yanbi-ml 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dd991aa7bffd5dd520fc71f40954b3603c8390a0
4
+ data.tar.gz: ff8d4ae0a916964c018d325f5c07ca75bcd5dc3f
5
+ SHA512:
6
+ metadata.gz: 8ee84e2f9a7c28dba34ab09af791f5be93b019317b2839597f40a55b3e88a1795e0e8474e6615a9c8dcfb708c3d72fac64920136c495e24c5c0a575233770b84
7
+ data.tar.gz: 2f874ba50d8f5538b3941be0f70cedfc5a0936a810d2cba0ac0a598e86b4dc220a9bc5c561affd701c4d020fd80a3a41cfc3ac1338bcfaa9dc573a233544227f
data/README.md CHANGED
@@ -201,6 +201,17 @@ classifier.set_significance(12)
201
201
 
202
202
  classifier.set_significance(24, :odd)
203
203
  ```
204
+ Additionally, you can call the scores (for word bags) and scores_raw methods to get a hash of the raw numerical category scores for a document:
205
+
206
+ ```ruby
207
+ classifier = Yanbi.default(:even, :odd)
208
+
209
+ #...tons of training happens here...
210
+
211
+ classifier.scores_raw('one two three') => {:even => -4.18383, :odd => -2.12391}
212
+ ```
213
+
214
+ Raw numerical scores are useful for those instances where you want to set a threshold for valid classification scores, or implement a minimal difference in scores for a valid classification, or for tie breaking.
204
215
 
205
216
  ## Persisting
206
217
 
@@ -271,3 +282,4 @@ Bug reports, corrections of any tragic mathematical misunderstandings, and pull
271
282
  ## License
272
283
 
273
284
  The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
285
+
data/lib/bayes/bayes.rb CHANGED
@@ -61,9 +61,14 @@ module Yanbi
61
61
 
62
62
  def classify(document)
63
63
  return nil if document.empty?
64
- max_score(document) do |cat, doc|
65
- score(cat, doc)
66
- end
64
+ weights = scores(document)
65
+ weights.max_by(&:last).first
66
+ end
67
+
68
+ def scores(document)
69
+ scores = {}
70
+ @categories.each {|c| scores[c] = score(c, document)}
71
+ scores
67
72
  end
68
73
 
69
74
  def train_raw(category, text)
@@ -73,6 +78,10 @@ module Yanbi
73
78
  def classify_raw(text)
74
79
  classify(self.newdoc(text))
75
80
  end
81
+
82
+ def scores_raw(text)
83
+ scores(self.newdoc(text))
84
+ end
76
85
 
77
86
  def set_significance(cutoff, category=nil)
78
87
  categories = (category.nil? ? @categories : [category])
@@ -100,18 +109,6 @@ module Yanbi
100
109
  Math.log(count / @category_sizes[cat])
101
110
  end
102
111
 
103
- def max_score(document)
104
- scores = []
105
-
106
- @categories.each do |c|
107
- score = yield c, document
108
- scores << score
109
- end
110
-
111
- i = scores.rindex(scores.max)
112
- @categories[i]
113
- end
114
-
115
112
  def category_size(cat)
116
113
  @category_counts[cat].values.reduce(&:+).to_i
117
114
  end
data/lib/corpus.rb CHANGED
@@ -33,6 +33,8 @@ module Yanbi
33
33
  infile = File.open(docpath, 'r')
34
34
  raw = infile.read
35
35
  infile.close
36
+
37
+ raw = raw.encode("UTF-8", invalid: :replace, replace: "")
36
38
 
37
39
  if delim
38
40
  docs = raw.split(delim)
data/lib/version.rb CHANGED
@@ -3,5 +3,5 @@
3
3
  # License:: MIT
4
4
 
5
5
  module Yanbi
6
- VERSION = "0.2.3"
6
+ VERSION = "0.2.4"
7
7
  end
data/yanbi-ml.gemspec CHANGED
@@ -17,6 +17,7 @@ Gem::Specification.new do |spec|
17
17
  spec.bindir = "exe"
18
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
19
  spec.require_paths = ["lib"]
20
+ spec.required_ruby_version = '>= 1.9.2.330'
20
21
 
21
22
  spec.add_development_dependency "bundler", "~> 1.11"
22
23
  spec.add_development_dependency "rake", "~> 10.0"
metadata CHANGED
@@ -1,78 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yanbi-ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
5
- prerelease:
4
+ version: 0.2.4
6
5
  platform: ruby
7
6
  authors:
8
7
  - Robert Dormer
9
8
  autorequire:
10
9
  bindir: exe
11
10
  cert_chain: []
12
- date: 2016-07-14 00:00:00.000000000 Z
11
+ date: 2016-07-31 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: bundler
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
19
  version: '1.11'
22
20
  type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ~>
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
26
  version: '1.11'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: rake
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ~>
31
+ - - "~>"
36
32
  - !ruby/object:Gem::Version
37
33
  version: '10.0'
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ~>
38
+ - - "~>"
44
39
  - !ruby/object:Gem::Version
45
40
  version: '10.0'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: rspec
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ~>
45
+ - - "~>"
52
46
  - !ruby/object:Gem::Version
53
47
  version: 3.4.0
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ~>
52
+ - - "~>"
60
53
  - !ruby/object:Gem::Version
61
54
  version: 3.4.0
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: fast-stemmer
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ~>
59
+ - - "~>"
68
60
  - !ruby/object:Gem::Version
69
61
  version: 1.0.2
70
62
  type: :runtime
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ~>
66
+ - - "~>"
76
67
  - !ruby/object:Gem::Version
77
68
  version: 1.0.2
78
69
  description:
@@ -82,7 +73,7 @@ executables: []
82
73
  extensions: []
83
74
  extra_rdoc_files: []
84
75
  files:
85
- - .travis.yml
76
+ - ".travis.yml"
86
77
  - Gemfile
87
78
  - README.md
88
79
  - Rakefile
@@ -101,26 +92,25 @@ files:
101
92
  homepage: http://github.com/rdormer/yanbi-ml
102
93
  licenses:
103
94
  - MIT
95
+ metadata: {}
104
96
  post_install_message:
105
97
  rdoc_options: []
106
98
  require_paths:
107
99
  - lib
108
100
  required_ruby_version: !ruby/object:Gem::Requirement
109
- none: false
110
101
  requirements:
111
- - - ! '>='
102
+ - - ">="
112
103
  - !ruby/object:Gem::Version
113
- version: '0'
104
+ version: 1.9.2.330
114
105
  required_rubygems_version: !ruby/object:Gem::Requirement
115
- none: false
116
106
  requirements:
117
- - - ! '>='
107
+ - - ">="
118
108
  - !ruby/object:Gem::Version
119
109
  version: '0'
120
110
  requirements: []
121
111
  rubyforge_project:
122
- rubygems_version: 1.8.25
112
+ rubygems_version: 2.4.8
123
113
  signing_key:
124
- specification_version: 3
114
+ specification_version: 4
125
115
  summary: Yet Another Naive Bayes Implementation
126
116
  test_files: []