ankusa 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -2
- data/docs/classes/Ankusa/Classifier.html +6 -4
- data/docs/created.rid +1 -1
- data/docs/files/lib/ankusa/classifier_rb.html +1 -1
- data/lib/ankusa/classifier.rb +2 -0
- metadata +6 -6
data/Rakefile
CHANGED
@@ -22,7 +22,7 @@ Rake::TestTask.new("test") { |t|
|
|
22
22
|
|
23
23
|
spec = Gem::Specification.new do |s|
|
24
24
|
s.name = "ankusa"
|
25
|
-
s.version = "0.0.
|
25
|
+
s.version = "0.0.4"
|
26
26
|
s.authors = ["Brian Muller"]
|
27
27
|
s.date = %q{2010-12-02}
|
28
28
|
s.description = "Naive Bayes classifier with HBase storage"
|
@@ -32,7 +32,7 @@ spec = Gem::Specification.new do |s|
|
|
32
32
|
s.homepage = "https://github.com/livingsocial/ankusa"
|
33
33
|
s.require_paths = ["lib"]
|
34
34
|
s.rubygems_version = "1.3.5"
|
35
|
-
s.add_dependency('hbaserb', '>= 0.0.
|
35
|
+
s.add_dependency('hbaserb', '>= 0.0.3')
|
36
36
|
s.add_dependency('fast-stemmer', '>= 1.0.0')
|
37
37
|
end
|
38
38
|
|
@@ -167,7 +167,7 @@
|
|
167
167
|
onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
|
168
168
|
<div class="method-source-code" id="M000007-source">
|
169
169
|
<pre>
|
170
|
-
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line
|
170
|
+
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 46</span>
|
171
171
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>)
|
172
172
|
<span class="ruby-identifier">result</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
|
173
173
|
|
@@ -206,7 +206,7 @@
|
|
206
206
|
onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
|
207
207
|
<div class="method-source-code" id="M000006-source">
|
208
208
|
<pre>
|
209
|
-
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line
|
209
|
+
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 41</span>
|
210
210
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>)
|
211
211
|
<span class="ruby-comment cmt"># return the most probable class</span>
|
212
212
|
<span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">c</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">first</span>
|
@@ -244,6 +244,7 @@ text can be either an array of strings or a string klass is a symbol
|
|
244
244
|
<span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span>
|
245
245
|
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">doccount</span>
|
246
246
|
<span class="ruby-ivar">@classnames</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">klass</span> <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">not</span> <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">klass</span>
|
247
|
+
<span class="ruby-identifier">th</span>
|
247
248
|
<span class="ruby-keyword kw">end</span>
|
248
249
|
</pre>
|
249
250
|
</div>
|
@@ -267,7 +268,7 @@ text can be either an array of strings or a string klass is a symbol
|
|
267
268
|
onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
|
268
269
|
<div class="method-source-code" id="M000005-source">
|
269
270
|
<pre>
|
270
|
-
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line
|
271
|
+
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 29</span>
|
271
272
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">untrain</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">text</span>)
|
272
273
|
<span class="ruby-identifier">th</span> = <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>)
|
273
274
|
<span class="ruby-identifier">th</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
|
@@ -277,6 +278,7 @@ text can be either an array of strings or a string klass is a symbol
|
|
277
278
|
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_total_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">th</span>.<span class="ruby-identifier">word_count</span>
|
278
279
|
<span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span>
|
279
280
|
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">doccount</span>
|
281
|
+
<span class="ruby-identifier">th</span>
|
280
282
|
<span class="ruby-keyword kw">end</span>
|
281
283
|
</pre>
|
282
284
|
</div>
|
@@ -299,7 +301,7 @@ text can be either an array of strings or a string klass is a symbol
|
|
299
301
|
onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
|
300
302
|
<div class="method-source-code" id="M000008-source">
|
301
303
|
<pre>
|
302
|
-
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line
|
304
|
+
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 67</span>
|
303
305
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>)
|
304
306
|
<span class="ruby-identifier">probs</span> = <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
|
305
307
|
<span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">cn</span><span class="ruby-operator">|</span>
|
data/docs/created.rid
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
Fri, 03 Dec 2010 10:45:32 -0500
|
data/lib/ankusa/classifier.rb
CHANGED
@@ -21,6 +21,7 @@ module Ankusa
|
|
21
21
|
doccount = (text.kind_of? Array) ? text.length : 1
|
22
22
|
@storage.incr_doc_count klass, doccount
|
23
23
|
@classnames << klass if not @classnames.include? klass
|
24
|
+
th
|
24
25
|
end
|
25
26
|
|
26
27
|
# text can be either an array of strings or a string
|
@@ -34,6 +35,7 @@ module Ankusa
|
|
34
35
|
@storage.incr_total_word_count klass, -th.word_count
|
35
36
|
doccount = (text.kind_of? Array) ? text.length : 1
|
36
37
|
@storage.incr_doc_count klass, -doccount
|
38
|
+
th
|
37
39
|
end
|
38
40
|
|
39
41
|
def classify(text)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ankusa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Brian Muller
|
@@ -26,12 +26,12 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 25
|
30
30
|
segments:
|
31
31
|
- 0
|
32
32
|
- 0
|
33
|
-
-
|
34
|
-
version: 0.0.
|
33
|
+
- 3
|
34
|
+
version: 0.0.3
|
35
35
|
type: :runtime
|
36
36
|
version_requirements: *id001
|
37
37
|
- !ruby/object:Gem::Dependency
|