ankusa 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ module Ankusa
2
+
3
+ class MemoryStorage
4
+ def initialize
5
+ init_tables
6
+ end
7
+
8
+ def classnames
9
+ @total_doc_counts.keys
10
+ end
11
+
12
+ def reset
13
+ init_tables
14
+ end
15
+
16
+ def drop_tables
17
+ end
18
+
19
+ def init_tables
20
+ @freqs = {}
21
+ @total_word_counts = Hash.new(0)
22
+ @total_doc_counts = Hash.new(0)
23
+ @klass_word_counts = {}
24
+ @klass_doc_counts = {}
25
+ end
26
+
27
+ def get_word_counts(word)
28
+ @freqs.fetch word, Hash.new(0)
29
+ end
30
+
31
+ def get_total_word_count(klass)
32
+ @total_word_counts[klass]
33
+ end
34
+
35
+ def get_doc_count(klass)
36
+ @total_doc_counts[klass]
37
+ end
38
+
39
+ def incr_word_count(klass, word, count)
40
+ @freqs[word] ||= Hash.new(0)
41
+ @freqs[word][klass] += count
42
+ end
43
+
44
+ def incr_total_word_count(klass, count)
45
+ @total_word_counts[klass] += count
46
+ end
47
+
48
+ def incr_doc_count(klass, count)
49
+ @total_doc_counts[klass] += count
50
+ end
51
+
52
+ def doc_count_total
53
+ @total_doc_counts.values.inject { |x,y| x+y }
54
+ end
55
+
56
+ def close
57
+ end
58
+
59
+ end
60
+
61
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ankusa
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brian Muller
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-11-29 00:00:00 -05:00
18
+ date: 2010-12-02 00:00:00 -05:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -60,21 +60,27 @@ extra_rdoc_files: []
60
60
 
61
61
  files:
62
62
  - lib/ankusa/classifier.rb
63
+ - lib/ankusa/extensions.rb
63
64
  - lib/ankusa/hasher.rb
64
- - lib/ankusa/nbclass.rb
65
+ - lib/ankusa/hbase_storage.rb
66
+ - lib/ankusa/memory_storage.rb
65
67
  - lib/ankusa/stopwords.rb
66
68
  - lib/ankusa.rb
67
69
  - LICENSE
68
70
  - Rakefile
69
71
  - README.rdoc
70
72
  - docs/classes/Ankusa/Classifier.html
71
- - docs/classes/Ankusa/NBClass.html
73
+ - docs/classes/Ankusa/HBaseStorage.html
74
+ - docs/classes/Ankusa/MemoryStorage.html
72
75
  - docs/classes/Ankusa/TextHash.html
73
76
  - docs/classes/Ankusa.html
77
+ - docs/classes/String.html
74
78
  - docs/created.rid
75
79
  - docs/files/lib/ankusa/classifier_rb.html
80
+ - docs/files/lib/ankusa/extensions_rb.html
76
81
  - docs/files/lib/ankusa/hasher_rb.html
77
- - docs/files/lib/ankusa/nbclass_rb.html
82
+ - docs/files/lib/ankusa/hbase_storage_rb.html
83
+ - docs/files/lib/ankusa/memory_storage_rb.html
78
84
  - docs/files/lib/ankusa/stopwords_rb.html
79
85
  - docs/files/lib/ankusa_rb.html
80
86
  - docs/files/README_rdoc.html
@@ -1,168 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
- <head>
8
- <title>Class: Ankusa::NBClass</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
- <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
- <script type="text/javascript">
13
- // <![CDATA[
14
-
15
- function popupCode( url ) {
16
- window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
- }
18
-
19
- function toggleCode( id ) {
20
- if ( document.getElementById )
21
- elem = document.getElementById( id );
22
- else if ( document.all )
23
- elem = eval( "document.all." + id );
24
- else
25
- return false;
26
-
27
- elemStyle = elem.style;
28
-
29
- if ( elemStyle.display != "block" ) {
30
- elemStyle.display = "block"
31
- } else {
32
- elemStyle.display = "none"
33
- }
34
-
35
- return true;
36
- }
37
-
38
- // Make codeblocks hidden by default
39
- document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
-
41
- // ]]>
42
- </script>
43
-
44
- </head>
45
- <body>
46
-
47
-
48
-
49
- <div id="classHeader">
50
- <table class="header-table">
51
- <tr class="top-aligned-row">
52
- <td><strong>Class</strong></td>
53
- <td class="class-name-in-header">Ankusa::NBClass</td>
54
- </tr>
55
- <tr class="top-aligned-row">
56
- <td><strong>In:</strong></td>
57
- <td>
58
- <a href="../../files/lib/ankusa/nbclass_rb.html">
59
- lib/ankusa/nbclass.rb
60
- </a>
61
- <br />
62
- </td>
63
- </tr>
64
-
65
- <tr class="top-aligned-row">
66
- <td><strong>Parent:</strong></td>
67
- <td>
68
- Object
69
- </td>
70
- </tr>
71
- </table>
72
- </div>
73
- <!-- banner header -->
74
-
75
- <div id="bodyContent">
76
-
77
-
78
-
79
- <div id="contextContent">
80
-
81
-
82
-
83
- </div>
84
-
85
- <div id="method-list">
86
- <h3 class="section-bar">Methods</h3>
87
-
88
- <div class="name-list">
89
- <a href="#M000014">new</a>&nbsp;&nbsp;
90
- </div>
91
- </div>
92
-
93
- </div>
94
-
95
-
96
- <!-- if includes -->
97
-
98
- <div id="section">
99
-
100
-
101
-
102
-
103
-
104
- <div id="attribute-list">
105
- <h3 class="section-bar">Attributes</h3>
106
-
107
- <div class="name-list">
108
- <table>
109
- <tr class="top-aligned-row context-row">
110
- <td class="context-item-name">doc_count</td>
111
- <td class="context-item-value">&nbsp;[R]&nbsp;</td>
112
- <td class="context-item-desc"></td>
113
- </tr>
114
- <tr class="top-aligned-row context-row">
115
- <td class="context-item-name">word_count</td>
116
- <td class="context-item-value">&nbsp;[R]&nbsp;</td>
117
- <td class="context-item-desc"></td>
118
- </tr>
119
- </table>
120
- </div>
121
- </div>
122
-
123
-
124
-
125
- <!-- if method_list -->
126
- <div id="methods">
127
- <h3 class="section-bar">Public Class methods</h3>
128
-
129
- <div id="method-M000014" class="method-detail">
130
- <a name="M000014"></a>
131
-
132
- <div class="method-heading">
133
- <a href="#M000014" class="method-signature">
134
- <span class="method-name">new</span><span class="method-args">(name, summary_table, freq_table)</span>
135
- </a>
136
- </div>
137
-
138
- <div class="method-description">
139
- <p><a class="source-toggle" href="#"
140
- onclick="toggleCode('M000014-source');return false;">[Source]</a></p>
141
- <div class="method-source-code" id="M000014-source">
142
- <pre>
143
- <span class="ruby-comment cmt"># File lib/ankusa/nbclass.rb, line 6</span>
144
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">name</span>, <span class="ruby-identifier">summary_table</span>, <span class="ruby-identifier">freq_table</span>)
145
- <span class="ruby-ivar">@name</span> = <span class="ruby-identifier">name</span>
146
- <span class="ruby-ivar">@summary_table</span> = <span class="ruby-identifier">summary_table</span>
147
- <span class="ruby-ivar">@freq_table</span> = <span class="ruby-identifier">freq_table</span>
148
- <span class="ruby-ivar">@word_count</span> = <span class="ruby-ivar">@summary_table</span>.<span class="ruby-identifier">get</span>(<span class="ruby-ivar">@name</span>, <span class="ruby-value str">&quot;totals:wordcount&quot;</span>).<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>
149
- <span class="ruby-ivar">@doc_count</span> = <span class="ruby-ivar">@summary_table</span>.<span class="ruby-identifier">get</span>(<span class="ruby-ivar">@name</span>, <span class="ruby-value str">&quot;totals:doccount&quot;</span>).<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>
150
- <span class="ruby-keyword kw">end</span>
151
- </pre>
152
- </div>
153
- </div>
154
- </div>
155
-
156
-
157
- </div>
158
-
159
-
160
- </div>
161
-
162
-
163
- <div id="validator-badges">
164
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
165
- </div>
166
-
167
- </body>
168
- </html>
@@ -1,15 +0,0 @@
1
- module Ankusa
2
-
3
- class NBClass
4
- attr_reader :doc_count, :word_count
5
-
6
- def initialize(name, summary_table, freq_table)
7
- @name = name
8
- @summary_table = summary_table
9
- @freq_table = freq_table
10
- @word_count = @summary_table.get(@name, "totals:wordcount").first.to_i64.to_f
11
- @doc_count = @summary_table.get(@name, "totals:doccount").first.to_i64.to_f
12
- end
13
- end
14
-
15
- end