ankusa 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -22,7 +22,7 @@ Rake::TestTask.new("test") { |t|
22
22
 
23
23
  spec = Gem::Specification.new do |s|
24
24
  s.name = "ankusa"
25
- s.version = "0.0.3"
25
+ s.version = "0.0.4"
26
26
  s.authors = ["Brian Muller"]
27
27
  s.date = %q{2010-12-02}
28
28
  s.description = "Naive Bayes classifier with HBase storage"
@@ -32,7 +32,7 @@ spec = Gem::Specification.new do |s|
32
32
  s.homepage = "https://github.com/livingsocial/ankusa"
33
33
  s.require_paths = ["lib"]
34
34
  s.rubygems_version = "1.3.5"
35
- s.add_dependency('hbaserb', '>= 0.0.1')
35
+ s.add_dependency('hbaserb', '>= 0.0.3')
36
36
  s.add_dependency('fast-stemmer', '>= 1.0.0')
37
37
  end
38
38
 
@@ -167,7 +167,7 @@
167
167
  onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
168
168
  <div class="method-source-code" id="M000007-source">
169
169
  <pre>
170
- <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 44</span>
170
+ <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 46</span>
171
171
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>)
172
172
  <span class="ruby-identifier">result</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
173
173
 
@@ -206,7 +206,7 @@
206
206
  onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
207
207
  <div class="method-source-code" id="M000006-source">
208
208
  <pre>
209
- <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 39</span>
209
+ <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 41</span>
210
210
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>)
211
211
  <span class="ruby-comment cmt"># return the most probable class</span>
212
212
  <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">c</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">first</span>
@@ -244,6 +244,7 @@ text can be either an array of strings or a string klass is a symbol
244
244
  <span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span>
245
245
  <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">doccount</span>
246
246
  <span class="ruby-ivar">@classnames</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">klass</span> <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">not</span> <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">klass</span>
247
+ <span class="ruby-identifier">th</span>
247
248
  <span class="ruby-keyword kw">end</span>
248
249
  </pre>
249
250
  </div>
@@ -267,7 +268,7 @@ text can be either an array of strings or a string klass is a symbol
267
268
  onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
268
269
  <div class="method-source-code" id="M000005-source">
269
270
  <pre>
270
- <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 28</span>
271
+ <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 29</span>
271
272
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">untrain</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">text</span>)
272
273
  <span class="ruby-identifier">th</span> = <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>)
273
274
  <span class="ruby-identifier">th</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
@@ -277,6 +278,7 @@ text can be either an array of strings or a string klass is a symbol
277
278
  <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_total_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">th</span>.<span class="ruby-identifier">word_count</span>
278
279
  <span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span>
279
280
  <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">doccount</span>
281
+ <span class="ruby-identifier">th</span>
280
282
  <span class="ruby-keyword kw">end</span>
281
283
  </pre>
282
284
  </div>
@@ -299,7 +301,7 @@ text can be either an array of strings or a string klass is a symbol
299
301
  onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
300
302
  <div class="method-source-code" id="M000008-source">
301
303
  <pre>
302
- <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 65</span>
304
+ <span class="ruby-comment cmt"># File lib/ankusa/classifier.rb, line 67</span>
303
305
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>)
304
306
  <span class="ruby-identifier">probs</span> = <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
305
307
  <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">cn</span><span class="ruby-operator">|</span>
data/docs/created.rid CHANGED
@@ -1 +1 @@
1
- Thu, 02 Dec 2010 16:24:45 -0500
1
+ Fri, 03 Dec 2010 10:45:32 -0500
@@ -56,7 +56,7 @@
56
56
  </tr>
57
57
  <tr class="top-aligned-row">
58
58
  <td><strong>Last Update:</strong></td>
59
- <td>Thu Dec 02 16:20:44 -0500 2010</td>
59
+ <td>Fri Dec 03 07:36:13 -0500 2010</td>
60
60
  </tr>
61
61
  </table>
62
62
  </div>
@@ -21,6 +21,7 @@ module Ankusa
21
21
  doccount = (text.kind_of? Array) ? text.length : 1
22
22
  @storage.incr_doc_count klass, doccount
23
23
  @classnames << klass if not @classnames.include? klass
24
+ th
24
25
  end
25
26
 
26
27
  # text can be either an array of strings or a string
@@ -34,6 +35,7 @@ module Ankusa
34
35
  @storage.incr_total_word_count klass, -th.word_count
35
36
  doccount = (text.kind_of? Array) ? text.length : 1
36
37
  @storage.incr_doc_count klass, -doccount
38
+ th
37
39
  end
38
40
 
39
41
  def classify(text)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ankusa
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brian Muller
@@ -26,12 +26,12 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- hash: 29
29
+ hash: 25
30
30
  segments:
31
31
  - 0
32
32
  - 0
33
- - 1
34
- version: 0.0.1
33
+ - 3
34
+ version: 0.0.3
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
37
  - !ruby/object:Gem::Dependency