ankusa 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -22,7 +22,7 @@ Rake::TestTask.new("test") { |t|
22
22
 
23
23
  spec = Gem::Specification.new do |s|
24
24
  s.name = "ankusa"
25
- s.version = "0.0.3"
25
+ s.version = "0.0.4"
26
26
  s.authors = ["Brian Muller"]
27
27
  s.date = %q{2010-12-02}
28
28
  s.description = "Naive Bayes classifier with HBase storage"
@@ -32,7 +32,7 @@ spec = Gem::Specification.new do |s|
32
32
  s.homepage = "https://github.com/livingsocial/ankusa"
33
33
  s.require_paths = ["lib"]
34
34
  s.rubygems_version = "1.3.5"
35
- s.add_dependency('hbaserb', '>= 0.0.1')
35
+ s.add_dependency('hbaserb', '>= 0.0.3')
36
36
  s.add_dependency('fast-stemmer', '>= 1.0.0')
37
37
  end
38
38
 
@@ -167,7 +167,7 @@
167
167
  onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
168
168
  <div class="method-source-code" id="M000007-source">
169
169
  <pre>
170
- <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 44</span>
170
+ <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 46</span>
171
171
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>)
172
172
  <span class="ruby-identifier">result</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
173
173
 
@@ -206,7 +206,7 @@
206
206
  onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
207
207
  <div class="method-source-code" id="M000006-source">
208
208
  <pre>
209
- <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 39</span>
209
+ <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 41</span>
210
210
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>)
211
211
  <span class="ruby-comment cmt"># return the most probable class</span>
212
212
  <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">c</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">first</span>
@@ -244,6 +244,7 @@ text can be either an array of strings or a string klass is a symbol
244
244
  <span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span>
245
245
  <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">doccount</span>
246
246
  <span class="ruby-ivar">@classnames</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">klass</span> <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">not</span> <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">klass</span>
247
+ <span class="ruby-identifier">th</span>
247
248
  <span class="ruby-keyword kw">end</span>
248
249
  </pre>
249
250
  </div>
@@ -267,7 +268,7 @@ text can be either an array of strings or a string klass is a symbol
267
268
  onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
268
269
  <div class="method-source-code" id="M000005-source">
269
270
  <pre>
270
- <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 28</span>
271
+ <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 29</span>
271
272
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">untrain</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">text</span>)
272
273
  <span class="ruby-identifier">th</span> = <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>)
273
274
  <span class="ruby-identifier">th</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
@@ -277,6 +278,7 @@ text can be either an array of strings or a string klass is a symbol
277
278
  <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_total_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">th</span>.<span class="ruby-identifier">word_count</span>
278
279
  <span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span>
279
280
  <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">doccount</span>
281
+ <span class="ruby-identifier">th</span>
280
282
  <span class="ruby-keyword kw">end</span>
281
283
  </pre>
282
284
  </div>
@@ -299,7 +301,7 @@ text can be either an array of strings or a string klass is a symbol
299
301
  onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
300
302
  <div class="method-source-code" id="M000008-source">
301
303
  <pre>
302
- <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 65</span>
304
+ <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 67</span>
303
305
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>)
304
306
  <span class="ruby-identifier">probs</span> = <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
305
307
  <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">cn</span><span class="ruby-operator">|</span>
data/docs/created.rid CHANGED
@@ -1 +1 @@
1
- Thu, 02 Dec 2010 16:24:45 -0500
1
+ Fri, 03 Dec 2010 10:45:32 -0500
@@ -56,7 +56,7 @@
56
56
  </tr>
57
57
  <tr class="top-aligned-row">
58
58
  <td><strong>Last Update:</strong></td>
59
- <td>Thu Dec 02 16:20:44 -0500 2010</td>
59
+ <td>Fri Dec 03 07:36:13 -0500 2010</td>
60
60
  </tr>
61
61
  </table>
62
62
  </div>
@@ -21,6 +21,7 @@ module Ankusa
21
21
  doccount = (text.kind_of? Array) ? text.length : 1
22
22
  @storage.incr_doc_count klass, doccount
23
23
  @classnames << klass if not @classnames.include? klass
24
+ th
24
25
  end
25
26
 
26
27
  # text can be either an array of strings or a string
@@ -34,6 +35,7 @@ module Ankusa
34
35
  @storage.incr_total_word_count klass, -th.word_count
35
36
  doccount = (text.kind_of? Array) ? text.length : 1
36
37
  @storage.incr_doc_count klass, -doccount
38
+ th
37
39
  end
38
40
 
39
41
  def classify(text)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ankusa
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brian Muller
@@ -26,12 +26,12 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- hash: 29
29
+ hash: 25
30
30
  segments:
31
31
  - 0
32
32
  - 0
33
- - 1
34
- version: 0.0.1
33
+ - 3
34
+ version: 0.0.3
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
37
  - !ruby/object:Gem::Dependency