yanbi-ml 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dd991aa7bffd5dd520fc71f40954b3603c8390a0
4
+ data.tar.gz: ff8d4ae0a916964c018d325f5c07ca75bcd5dc3f
5
+ SHA512:
6
+ metadata.gz: 8ee84e2f9a7c28dba34ab09af791f5be93b019317b2839597f40a55b3e88a1795e0e8474e6615a9c8dcfb708c3d72fac64920136c495e24c5c0a575233770b84
7
+ data.tar.gz: 2f874ba50d8f5538b3941be0f70cedfc5a0936a810d2cba0ac0a598e86b4dc220a9bc5c561affd701c4d020fd80a3a41cfc3ac1338bcfaa9dc573a233544227f
data/README.md CHANGED
@@ -201,6 +201,17 @@ classifier.set_significance(12)
201
201
 
202
202
  classifier.set_significance(24, :odd)
203
203
  ```
204
+ Additionally, you can call the scores (for word bags) and scores_raw methods to get a hash of the raw numerical category scores for a document:
205
+
206
+ ```ruby
207
+ classifier = Yanbi.default(:even, :odd)
208
+
209
+ #...tons of training happens here...
210
+
211
+ classifier.scores_raw('one two three') => {:even => -4.18383, :odd => -2.12391}
212
+ ```
213
+
214
+ Raw numerical scores are useful for those instances where you want to set a threshold for valid classification scores, or implement a minimal difference in scores for a valid classification, or for tie breaking.
204
215
 
205
216
  ## Persisting
206
217
 
@@ -271,3 +282,4 @@ Bug reports, corrections of any tragic mathematical misunderstandings, and pull
271
282
  ## License
272
283
 
273
284
  The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
285
+
data/lib/bayes/bayes.rb CHANGED
@@ -61,9 +61,14 @@ module Yanbi
61
61
 
62
62
  def classify(document)
63
63
  return nil if document.empty?
64
- max_score(document) do |cat, doc|
65
- score(cat, doc)
66
- end
64
+ weights = scores(document)
65
+ weights.max_by(&:last).first
66
+ end
67
+
68
+ def scores(document)
69
+ scores = {}
70
+ @categories.each {|c| scores[c] = score(c, document)}
71
+ scores
67
72
  end
68
73
 
69
74
  def train_raw(category, text)
@@ -73,6 +78,10 @@ module Yanbi
73
78
  def classify_raw(text)
74
79
  classify(self.newdoc(text))
75
80
  end
81
+
82
+ def scores_raw(text)
83
+ scores(self.newdoc(text))
84
+ end
76
85
 
77
86
  def set_significance(cutoff, category=nil)
78
87
  categories = (category.nil? ? @categories : [category])
@@ -100,18 +109,6 @@ module Yanbi
100
109
  Math.log(count / @category_sizes[cat])
101
110
  end
102
111
 
103
- def max_score(document)
104
- scores = []
105
-
106
- @categories.each do |c|
107
- score = yield c, document
108
- scores << score
109
- end
110
-
111
- i = scores.rindex(scores.max)
112
- @categories[i]
113
- end
114
-
115
112
  def category_size(cat)
116
113
  @category_counts[cat].values.reduce(&:+).to_i
117
114
  end
data/lib/corpus.rb CHANGED
@@ -33,6 +33,8 @@ module Yanbi
33
33
  infile = File.open(docpath, 'r')
34
34
  raw = infile.read
35
35
  infile.close
36
+
37
+ raw = raw.encode("UTF-8", invalid: :replace, replace: "")
36
38
 
37
39
  if delim
38
40
  docs = raw.split(delim)
data/lib/version.rb CHANGED
@@ -3,5 +3,5 @@
3
3
  # License:: MIT
4
4
 
5
5
  module Yanbi
6
- VERSION = "0.2.3"
6
+ VERSION = "0.2.4"
7
7
  end
data/yanbi-ml.gemspec CHANGED
@@ -17,6 +17,7 @@ Gem::Specification.new do |spec|
17
17
  spec.bindir = "exe"
18
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
19
  spec.require_paths = ["lib"]
20
+ spec.required_ruby_version = '>= 1.9.2.330'
20
21
 
21
22
  spec.add_development_dependency "bundler", "~> 1.11"
22
23
  spec.add_development_dependency "rake", "~> 10.0"
metadata CHANGED
@@ -1,78 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yanbi-ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
5
- prerelease:
4
+ version: 0.2.4
6
5
  platform: ruby
7
6
  authors:
8
7
  - Robert Dormer
9
8
  autorequire:
10
9
  bindir: exe
11
10
  cert_chain: []
12
- date: 2016-07-14 00:00:00.000000000 Z
11
+ date: 2016-07-31 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: bundler
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
19
  version: '1.11'
22
20
  type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ~>
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
26
  version: '1.11'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: rake
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ~>
31
+ - - "~>"
36
32
  - !ruby/object:Gem::Version
37
33
  version: '10.0'
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ~>
38
+ - - "~>"
44
39
  - !ruby/object:Gem::Version
45
40
  version: '10.0'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: rspec
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ~>
45
+ - - "~>"
52
46
  - !ruby/object:Gem::Version
53
47
  version: 3.4.0
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ~>
52
+ - - "~>"
60
53
  - !ruby/object:Gem::Version
61
54
  version: 3.4.0
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: fast-stemmer
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ~>
59
+ - - "~>"
68
60
  - !ruby/object:Gem::Version
69
61
  version: 1.0.2
70
62
  type: :runtime
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ~>
66
+ - - "~>"
76
67
  - !ruby/object:Gem::Version
77
68
  version: 1.0.2
78
69
  description:
@@ -82,7 +73,7 @@ executables: []
82
73
  extensions: []
83
74
  extra_rdoc_files: []
84
75
  files:
85
- - .travis.yml
76
+ - ".travis.yml"
86
77
  - Gemfile
87
78
  - README.md
88
79
  - Rakefile
@@ -101,26 +92,25 @@ files:
101
92
  homepage: http://github.com/rdormer/yanbi-ml
102
93
  licenses:
103
94
  - MIT
95
+ metadata: {}
104
96
  post_install_message:
105
97
  rdoc_options: []
106
98
  require_paths:
107
99
  - lib
108
100
  required_ruby_version: !ruby/object:Gem::Requirement
109
- none: false
110
101
  requirements:
111
- - - ! '>='
102
+ - - ">="
112
103
  - !ruby/object:Gem::Version
113
- version: '0'
104
+ version: 1.9.2.330
114
105
  required_rubygems_version: !ruby/object:Gem::Requirement
115
- none: false
116
106
  requirements:
117
- - - ! '>='
107
+ - - ">="
118
108
  - !ruby/object:Gem::Version
119
109
  version: '0'
120
110
  requirements: []
121
111
  rubyforge_project:
122
- rubygems_version: 1.8.25
112
+ rubygems_version: 2.4.8
123
113
  signing_key:
124
- specification_version: 3
114
+ specification_version: 4
125
115
  summary: Yet Another Naive Bayes Implementation
126
116
  test_files: []