ankusa 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +2 -2
- data/docs/classes/Ankusa/Classifier.html +6 -4
- data/docs/created.rid +1 -1
- data/docs/files/lib/ankusa/classifier_rb.html +1 -1
- data/lib/ankusa/classifier.rb +2 -0
- metadata +6 -6
data/Rakefile
CHANGED
@@ -22,7 +22,7 @@ Rake::TestTask.new("test") { |t|
|
|
22
22
|
|
23
23
|
spec = Gem::Specification.new do |s|
|
24
24
|
s.name = "ankusa"
|
25
|
-
s.version = "0.0.
|
25
|
+
s.version = "0.0.4"
|
26
26
|
s.authors = ["Brian Muller"]
|
27
27
|
s.date = %q{2010-12-02}
|
28
28
|
s.description = "Naive Bayes classifier with HBase storage"
|
@@ -32,7 +32,7 @@ spec = Gem::Specification.new do |s|
|
|
32
32
|
s.homepage = "https://github.com/livingsocial/ankusa"
|
33
33
|
s.require_paths = ["lib"]
|
34
34
|
s.rubygems_version = "1.3.5"
|
35
|
-
s.add_dependency('hbaserb', '>= 0.0.
|
35
|
+
s.add_dependency('hbaserb', '>= 0.0.3')
|
36
36
|
s.add_dependency('fast-stemmer', '>= 1.0.0')
|
37
37
|
end
|
38
38
|
|
@@ -167,7 +167,7 @@
|
|
167
167
|
onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
|
168
168
|
<div class="method-source-code" id="M000007-source">
|
169
169
|
<pre>
|
170
|
-
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line
|
170
|
+
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 46</span>
|
171
171
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>)
|
172
172
|
<span class="ruby-identifier">result</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
|
173
173
|
|
@@ -206,7 +206,7 @@
|
|
206
206
|
onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
|
207
207
|
<div class="method-source-code" id="M000006-source">
|
208
208
|
<pre>
|
209
|
-
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line
|
209
|
+
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 41</span>
|
210
210
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>)
|
211
211
|
<span class="ruby-comment cmt"># return the most probable class</span>
|
212
212
|
<span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">c</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">first</span>
|
@@ -244,6 +244,7 @@ text can be either an array of strings or a string klass is a symbol
|
|
244
244
|
<span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span>
|
245
245
|
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">doccount</span>
|
246
246
|
<span class="ruby-ivar">@classnames</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">klass</span> <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">not</span> <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">klass</span>
|
247
|
+
<span class="ruby-identifier">th</span>
|
247
248
|
<span class="ruby-keyword kw">end</span>
|
248
249
|
</pre>
|
249
250
|
</div>
|
@@ -267,7 +268,7 @@ text can be either an array of strings or a string klass is a symbol
|
|
267
268
|
onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
|
268
269
|
<div class="method-source-code" id="M000005-source">
|
269
270
|
<pre>
|
270
|
-
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line
|
271
|
+
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 29</span>
|
271
272
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">untrain</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">text</span>)
|
272
273
|
<span class="ruby-identifier">th</span> = <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>)
|
273
274
|
<span class="ruby-identifier">th</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
|
@@ -277,6 +278,7 @@ text can be either an array of strings or a string klass is a symbol
|
|
277
278
|
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_total_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">th</span>.<span class="ruby-identifier">word_count</span>
|
278
279
|
<span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span>
|
279
280
|
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">doccount</span>
|
281
|
+
<span class="ruby-identifier">th</span>
|
280
282
|
<span class="ruby-keyword kw">end</span>
|
281
283
|
</pre>
|
282
284
|
</div>
|
@@ -299,7 +301,7 @@ text can be either an array of strings or a string klass is a symbol
|
|
299
301
|
onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
|
300
302
|
<div class="method-source-code" id="M000008-source">
|
301
303
|
<pre>
|
302
|
-
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line
|
304
|
+
<span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 67</span>
|
303
305
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>)
|
304
306
|
<span class="ruby-identifier">probs</span> = <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
|
305
307
|
<span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">cn</span><span class="ruby-operator">|</span>
|
data/docs/created.rid
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
Fri, 03 Dec 2010 10:45:32 -0500
|
data/lib/ankusa/classifier.rb
CHANGED
@@ -21,6 +21,7 @@ module Ankusa
|
|
21
21
|
doccount = (text.kind_of? Array) ? text.length : 1
|
22
22
|
@storage.incr_doc_count klass, doccount
|
23
23
|
@classnames << klass if not @classnames.include? klass
|
24
|
+
th
|
24
25
|
end
|
25
26
|
|
26
27
|
# text can be either an array of strings or a string
|
@@ -34,6 +35,7 @@ module Ankusa
|
|
34
35
|
@storage.incr_total_word_count klass, -th.word_count
|
35
36
|
doccount = (text.kind_of? Array) ? text.length : 1
|
36
37
|
@storage.incr_doc_count klass, -doccount
|
38
|
+
th
|
37
39
|
end
|
38
40
|
|
39
41
|
def classify(text)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ankusa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Brian Muller
|
@@ -26,12 +26,12 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 25
|
30
30
|
segments:
|
31
31
|
- 0
|
32
32
|
- 0
|
33
|
-
-
|
34
|
-
version: 0.0.
|
33
|
+
- 3
|
34
|
+
version: 0.0.3
|
35
35
|
type: :runtime
|
36
36
|
version_requirements: *id001
|
37
37
|
- !ruby/object:Gem::Dependency
|