yanbi-ml 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +12 -0
- data/lib/bayes/bayes.rb +12 -15
- data/lib/corpus.rb +2 -0
- data/lib/version.rb +1 -1
- data/yanbi-ml.gemspec +1 -0
- metadata +17 -27
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: dd991aa7bffd5dd520fc71f40954b3603c8390a0
|
4
|
+
data.tar.gz: ff8d4ae0a916964c018d325f5c07ca75bcd5dc3f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8ee84e2f9a7c28dba34ab09af791f5be93b019317b2839597f40a55b3e88a1795e0e8474e6615a9c8dcfb708c3d72fac64920136c495e24c5c0a575233770b84
|
7
|
+
data.tar.gz: 2f874ba50d8f5538b3941be0f70cedfc5a0936a810d2cba0ac0a598e86b4dc220a9bc5c561affd701c4d020fd80a3a41cfc3ac1338bcfaa9dc573a233544227f
|
data/README.md
CHANGED
@@ -201,6 +201,17 @@ classifier.set_significance(12)
|
|
201
201
|
|
202
202
|
classifier.set_significance(24, :odd)
|
203
203
|
```
|
204
|
+
Additionally, you can call the scores (for word bags) and scores_raw methods to get a hash of the raw numerical category scores for a document:
|
205
|
+
|
206
|
+
```ruby
|
207
|
+
classifier = Yanbi.default(:even, :odd)
|
208
|
+
|
209
|
+
#...tons of training happens here...
|
210
|
+
|
211
|
+
classifier.scores_raw('one two three') => {:even => -4.18383, :odd => -2.12391}
|
212
|
+
```
|
213
|
+
|
214
|
+
Raw numerical scores are useful for those instances where you want to set a threshold for valid classification scores, or implement a minimal difference in scores for a valid classification, or for tie breaking.
|
204
215
|
|
205
216
|
## Persisting
|
206
217
|
|
@@ -271,3 +282,4 @@ Bug reports, corrections of any tragic mathematical misunderstandings, and pull
|
|
271
282
|
## License
|
272
283
|
|
273
284
|
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
285
|
+
|
data/lib/bayes/bayes.rb
CHANGED
@@ -61,9 +61,14 @@ module Yanbi
|
|
61
61
|
|
62
62
|
def classify(document)
|
63
63
|
return nil if document.empty?
|
64
|
-
|
65
|
-
|
66
|
-
|
64
|
+
weights = scores(document)
|
65
|
+
weights.max_by(&:last).first
|
66
|
+
end
|
67
|
+
|
68
|
+
def scores(document)
|
69
|
+
scores = {}
|
70
|
+
@categories.each {|c| scores[c] = score(c, document)}
|
71
|
+
scores
|
67
72
|
end
|
68
73
|
|
69
74
|
def train_raw(category, text)
|
@@ -73,6 +78,10 @@ module Yanbi
|
|
73
78
|
def classify_raw(text)
|
74
79
|
classify(self.newdoc(text))
|
75
80
|
end
|
81
|
+
|
82
|
+
def scores_raw(text)
|
83
|
+
scores(self.newdoc(text))
|
84
|
+
end
|
76
85
|
|
77
86
|
def set_significance(cutoff, category=nil)
|
78
87
|
categories = (category.nil? ? @categories : [category])
|
@@ -100,18 +109,6 @@ module Yanbi
|
|
100
109
|
Math.log(count / @category_sizes[cat])
|
101
110
|
end
|
102
111
|
|
103
|
-
def max_score(document)
|
104
|
-
scores = []
|
105
|
-
|
106
|
-
@categories.each do |c|
|
107
|
-
score = yield c, document
|
108
|
-
scores << score
|
109
|
-
end
|
110
|
-
|
111
|
-
i = scores.rindex(scores.max)
|
112
|
-
@categories[i]
|
113
|
-
end
|
114
|
-
|
115
112
|
def category_size(cat)
|
116
113
|
@category_counts[cat].values.reduce(&:+).to_i
|
117
114
|
end
|
data/lib/corpus.rb
CHANGED
data/lib/version.rb
CHANGED
data/yanbi-ml.gemspec
CHANGED
@@ -17,6 +17,7 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.bindir = "exe"
|
18
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
19
|
spec.require_paths = ["lib"]
|
20
|
+
spec.required_ruby_version = '>= 1.9.2.330'
|
20
21
|
|
21
22
|
spec.add_development_dependency "bundler", "~> 1.11"
|
22
23
|
spec.add_development_dependency "rake", "~> 10.0"
|
metadata
CHANGED
@@ -1,78 +1,69 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yanbi-ml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
5
|
-
prerelease:
|
4
|
+
version: 0.2.4
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Robert Dormer
|
9
8
|
autorequire:
|
10
9
|
bindir: exe
|
11
10
|
cert_chain: []
|
12
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-31 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bundler
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.11'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- - ~>
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '1.11'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rake
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- - ~>
|
31
|
+
- - "~>"
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '10.0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- - ~>
|
38
|
+
- - "~>"
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '10.0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: rspec
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- - ~>
|
45
|
+
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: 3.4.0
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- - ~>
|
52
|
+
- - "~>"
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: 3.4.0
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: fast-stemmer
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- - ~>
|
59
|
+
- - "~>"
|
68
60
|
- !ruby/object:Gem::Version
|
69
61
|
version: 1.0.2
|
70
62
|
type: :runtime
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- - ~>
|
66
|
+
- - "~>"
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: 1.0.2
|
78
69
|
description:
|
@@ -82,7 +73,7 @@ executables: []
|
|
82
73
|
extensions: []
|
83
74
|
extra_rdoc_files: []
|
84
75
|
files:
|
85
|
-
- .travis.yml
|
76
|
+
- ".travis.yml"
|
86
77
|
- Gemfile
|
87
78
|
- README.md
|
88
79
|
- Rakefile
|
@@ -101,26 +92,25 @@ files:
|
|
101
92
|
homepage: http://github.com/rdormer/yanbi-ml
|
102
93
|
licenses:
|
103
94
|
- MIT
|
95
|
+
metadata: {}
|
104
96
|
post_install_message:
|
105
97
|
rdoc_options: []
|
106
98
|
require_paths:
|
107
99
|
- lib
|
108
100
|
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
-
none: false
|
110
101
|
requirements:
|
111
|
-
- -
|
102
|
+
- - ">="
|
112
103
|
- !ruby/object:Gem::Version
|
113
|
-
version:
|
104
|
+
version: 1.9.2.330
|
114
105
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
-
none: false
|
116
106
|
requirements:
|
117
|
-
- -
|
107
|
+
- - ">="
|
118
108
|
- !ruby/object:Gem::Version
|
119
109
|
version: '0'
|
120
110
|
requirements: []
|
121
111
|
rubyforge_project:
|
122
|
-
rubygems_version:
|
112
|
+
rubygems_version: 2.4.8
|
123
113
|
signing_key:
|
124
|
-
specification_version:
|
114
|
+
specification_version: 4
|
125
115
|
summary: Yet Another Naive Bayes Implementation
|
126
116
|
test_files: []
|