ankusa 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. data/Rakefile +3 -3
  2. data/docs/Ankusa.html +229 -0
  3. data/docs/Ankusa/CassandraStorage.html +801 -0
  4. data/docs/Ankusa/Classifier.html +440 -0
  5. data/docs/Ankusa/FileSystemStorage.html +376 -0
  6. data/docs/Ankusa/HBaseStorage.html +845 -0
  7. data/docs/Ankusa/KLDivergenceClassifier.html +265 -0
  8. data/docs/Ankusa/MemoryStorage.html +672 -0
  9. data/docs/Ankusa/NaiveBayesClassifier.html +313 -0
  10. data/docs/Ankusa/TextHash.html +390 -0
  11. data/docs/README_rdoc.html +268 -0
  12. data/docs/String.html +241 -0
  13. data/docs/created.rid +14 -0
  14. data/docs/images/brick.png +0 -0
  15. data/docs/images/brick_link.png +0 -0
  16. data/docs/images/bug.png +0 -0
  17. data/docs/images/bullet_black.png +0 -0
  18. data/docs/images/bullet_toggle_minus.png +0 -0
  19. data/docs/images/bullet_toggle_plus.png +0 -0
  20. data/docs/images/date.png +0 -0
  21. data/docs/images/find.png +0 -0
  22. data/docs/images/loadingAnimation.gif +0 -0
  23. data/docs/images/macFFBgHack.png +0 -0
  24. data/docs/images/package.png +0 -0
  25. data/docs/images/page_green.png +0 -0
  26. data/docs/images/page_white_text.png +0 -0
  27. data/docs/images/page_white_width.png +0 -0
  28. data/docs/images/plugin.png +0 -0
  29. data/docs/images/ruby.png +0 -0
  30. data/docs/images/tag_green.png +0 -0
  31. data/docs/images/wrench.png +0 -0
  32. data/docs/images/wrench_orange.png +0 -0
  33. data/docs/images/zoom.png +0 -0
  34. data/docs/index.html +212 -0
  35. data/docs/js/darkfish.js +116 -0
  36. data/docs/js/jquery.js +32 -0
  37. data/docs/js/quicksearch.js +114 -0
  38. data/docs/js/thickbox-compressed.js +10 -0
  39. data/docs/lib/ankusa/cassandra_storage_rb.html +54 -0
  40. data/docs/lib/ankusa/classifier_rb.html +52 -0
  41. data/docs/lib/ankusa/extensions_rb.html +54 -0
  42. data/docs/lib/ankusa/file_system_storage_rb.html +54 -0
  43. data/docs/lib/ankusa/hasher_rb.html +56 -0
  44. data/docs/lib/ankusa/hbase_storage_rb.html +54 -0
  45. data/docs/lib/ankusa/kl_divergence_rb.html +52 -0
  46. data/docs/lib/ankusa/memory_storage_rb.html +52 -0
  47. data/docs/lib/ankusa/naive_bayes_rb.html +52 -0
  48. data/docs/lib/ankusa/stopwords_rb.html +52 -0
  49. data/docs/lib/ankusa/version_rb.html +52 -0
  50. data/docs/lib/ankusa_rb.html +64 -0
  51. data/docs/rdoc.css +759 -0
  52. data/lib/ankusa/cassandra_storage.rb +2 -2
  53. data/lib/ankusa/classifier.rb +2 -2
  54. data/lib/ankusa/hasher.rb +17 -17
  55. data/lib/ankusa/hbase_storage.rb +2 -2
  56. data/lib/ankusa/stopwords.rb +1 -1
  57. data/lib/ankusa/version.rb +1 -1
  58. metadata +56 -8
data/Rakefile CHANGED
@@ -1,12 +1,12 @@
1
1
  require 'rubygems'
2
2
  require 'bundler'
3
3
  require 'rake/testtask'
4
- require 'rake/rdoctask'
4
+ require 'rdoc/task'
5
5
 
6
6
  Bundler::GemHelper.install_tasks
7
7
 
8
8
  desc "Create documentation"
9
- Rake::RDocTask.new("doc") { |rdoc|
9
+ RDoc::Task.new("doc") { |rdoc|
10
10
  rdoc.title = "Ankusa - Naive Bayes classifier with big data storage"
11
11
  rdoc.rdoc_dir = 'docs'
12
12
  rdoc.rdoc_files.include('README.rdoc')
@@ -23,7 +23,7 @@ Rake::TestTask.new("test_memory") { |t|
23
23
  desc "Run all unit tests with HBase storage"
24
24
  Rake::TestTask.new("test_hbase") { |t|
25
25
  t.libs << "lib"
26
- t.test_files = FileList['test/hasher_test.rb', 'test/memory_hbase_test.rb']
26
+ t.test_files = FileList['test/hasher_test.rb']
27
27
  t.verbose = true
28
28
  }
29
29
 
data/docs/Ankusa.html ADDED
@@ -0,0 +1,229 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
+ <head>
6
+ <meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
7
+
8
+ <title>Module: Ankusa</title>
9
+
10
+ <link rel="stylesheet" href="./rdoc.css" type="text/css" media="screen" />
11
+
12
+ <script src="./js/jquery.js" type="text/javascript" charset="utf-8"></script>
13
+ <script src="./js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
14
+ <script src="./js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
15
+ <script src="./js/darkfish.js" type="text/javascript" charset="utf-8"></script>
16
+
17
+ </head>
18
+ <body id="top" class="module">
19
+
20
+ <div id="metadata">
21
+ <div id="home-metadata">
22
+ <div id="home-section" class="section">
23
+ <h3 class="section-header">
24
+ <a href="./index.html">Home</a>
25
+ <a href="./index.html#classes">Classes</a>
26
+ <a href="./index.html#methods">Methods</a>
27
+ </h3>
28
+ </div>
29
+ </div>
30
+
31
+ <div id="file-metadata">
32
+ <div id="file-list-section" class="section">
33
+ <h3 class="section-header">In Files</h3>
34
+ <div class="section-body">
35
+ <ul>
36
+
37
+ <li><a href="./lib/ankusa/cassandra_storage_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
38
+ class="thickbox" title="lib/ankusa/cassandra_storage.rb">lib/ankusa/cassandra_storage.rb</a></li>
39
+
40
+ <li><a href="./lib/ankusa/classifier_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
41
+ class="thickbox" title="lib/ankusa/classifier.rb">lib/ankusa/classifier.rb</a></li>
42
+
43
+ <li><a href="./lib/ankusa/file_system_storage_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
44
+ class="thickbox" title="lib/ankusa/file_system_storage.rb">lib/ankusa/file_system_storage.rb</a></li>
45
+
46
+ <li><a href="./lib/ankusa/hasher_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
47
+ class="thickbox" title="lib/ankusa/hasher.rb">lib/ankusa/hasher.rb</a></li>
48
+
49
+ <li><a href="./lib/ankusa/hbase_storage_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
50
+ class="thickbox" title="lib/ankusa/hbase_storage.rb">lib/ankusa/hbase_storage.rb</a></li>
51
+
52
+ <li><a href="./lib/ankusa/kl_divergence_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
53
+ class="thickbox" title="lib/ankusa/kl_divergence.rb">lib/ankusa/kl_divergence.rb</a></li>
54
+
55
+ <li><a href="./lib/ankusa/memory_storage_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
56
+ class="thickbox" title="lib/ankusa/memory_storage.rb">lib/ankusa/memory_storage.rb</a></li>
57
+
58
+ <li><a href="./lib/ankusa/naive_bayes_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
59
+ class="thickbox" title="lib/ankusa/naive_bayes.rb">lib/ankusa/naive_bayes.rb</a></li>
60
+
61
+ <li><a href="./lib/ankusa/stopwords_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
62
+ class="thickbox" title="lib/ankusa/stopwords.rb">lib/ankusa/stopwords.rb</a></li>
63
+
64
+ <li><a href="./lib/ankusa/version_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
65
+ class="thickbox" title="lib/ankusa/version.rb">lib/ankusa/version.rb</a></li>
66
+
67
+ </ul>
68
+ </div>
69
+ </div>
70
+
71
+
72
+ </div>
73
+
74
+ <div id="class-metadata">
75
+
76
+
77
+
78
+
79
+
80
+ <!-- Namespace Contents -->
81
+ <div id="namespace-list-section" class="section">
82
+ <h3 class="section-header">Namespace</h3>
83
+ <ul class="link-list">
84
+
85
+ <li><span class="type">MODULE</span> <a href="Ankusa/Classifier.html">Ankusa::Classifier</a></li>
86
+
87
+ <li><span class="type">CLASS</span> <a href="Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
88
+
89
+ <li><span class="type">CLASS</span> <a href="Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
90
+
91
+ <li><span class="type">CLASS</span> <a href="Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
92
+
93
+ <li><span class="type">CLASS</span> <a href="Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
94
+
95
+ <li><span class="type">CLASS</span> <a href="Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
96
+
97
+ <li><span class="type">CLASS</span> <a href="Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
98
+
99
+ <li><span class="type">CLASS</span> <a href="Ankusa/TextHash.html">Ankusa::TextHash</a></li>
100
+
101
+ </ul>
102
+ </div>
103
+
104
+
105
+
106
+
107
+
108
+ </div>
109
+
110
+ <div id="project-metadata">
111
+
112
+
113
+ <div id="fileindex-section" class="section project-section">
114
+ <h3 class="section-header">Files</h3>
115
+ <ul>
116
+
117
+ <li class="file"><a href="./README_rdoc.html">README.rdoc</a></li>
118
+
119
+ </ul>
120
+ </div>
121
+
122
+
123
+ <div id="classindex-section" class="section project-section">
124
+ <h3 class="section-header">Class/Module Index
125
+ <span class="search-toggle"><img src="./images/find.png"
126
+ height="16" width="16" alt="[+]"
127
+ title="show/hide quicksearch" /></span></h3>
128
+ <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
129
+ <fieldset>
130
+ <legend>Quicksearch</legend>
131
+ <input type="text" name="quicksearch" value=""
132
+ class="quicksearch-field" />
133
+ </fieldset>
134
+ </form>
135
+
136
+ <ul class="link-list">
137
+
138
+ <li><a href="./Ankusa.html">Ankusa</a></li>
139
+
140
+ <li><a href="./Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
141
+
142
+ <li><a href="./Ankusa/Classifier.html">Ankusa::Classifier</a></li>
143
+
144
+ <li><a href="./Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
145
+
146
+ <li><a href="./Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
147
+
148
+ <li><a href="./Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
149
+
150
+ <li><a href="./Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
151
+
152
+ <li><a href="./Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
153
+
154
+ <li><a href="./Ankusa/TextHash.html">Ankusa::TextHash</a></li>
155
+
156
+ <li><a href="./String.html">String</a></li>
157
+
158
+ </ul>
159
+ <div id="no-class-search-results" style="display: none;">No matching classes.</div>
160
+ </div>
161
+
162
+
163
+ </div>
164
+ </div>
165
+
166
+ <div id="documentation">
167
+ <h1 class="module">Ankusa</h1>
168
+
169
+ <div id="description" class="description">
170
+
171
+ <p>At the moment you’ll have to do:</p>
172
+
173
+ <p>create keyspace ankusa with replication_factor = 1</p>
174
+
175
+ <p>from the cassandra-cli. This should be fixed with new release candidate for
176
+ cassandra</p>
177
+
178
+ </div><!-- description -->
179
+
180
+
181
+ <div id="5Buntitled-5D" class="documentation-section">
182
+
183
+
184
+
185
+
186
+
187
+ <!-- Constants -->
188
+ <div id="constants-list" class="section">
189
+ <h3 class="section-header">Constants</h3>
190
+ <dl>
191
+
192
+ <dt><a name="INFTY">INFTY</a></dt>
193
+
194
+ <dd class="description"></dd>
195
+
196
+
197
+ <dt><a name="STOPWORDS">STOPWORDS</a></dt>
198
+
199
+ <dd class="description"><p>These are taken from MySQL - <a
200
+ href="http://dev.mysql.com/tech-resources/articles/full-text-revealed.html">dev.mysql.com/tech-resources/articles/full-text-revealed.html</a></p></dd>
201
+
202
+
203
+ <dt><a name="VERSION">VERSION</a></dt>
204
+
205
+ <dd class="description"></dd>
206
+
207
+
208
+ </dl>
209
+ </div>
210
+
211
+
212
+
213
+
214
+ <!-- Methods -->
215
+
216
+ </div><!-- 5Buntitled-5D -->
217
+
218
+
219
+ </div><!-- documentation -->
220
+
221
+ <div id="validator-badges">
222
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
223
+ <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
224
+ Rdoc Generator</a> 2</small>.</p>
225
+ </div>
226
+
227
+ </body>
228
+ </html>
229
+
@@ -0,0 +1,801 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
+ <head>
6
+ <meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
7
+
8
+ <title>Class: Ankusa::CassandraStorage</title>
9
+
10
+ <link rel="stylesheet" href="../rdoc.css" type="text/css" media="screen" />
11
+
12
+ <script src="../js/jquery.js" type="text/javascript" charset="utf-8"></script>
13
+ <script src="../js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
14
+ <script src="../js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
15
+ <script src="../js/darkfish.js" type="text/javascript" charset="utf-8"></script>
16
+
17
+ </head>
18
+ <body id="top" class="class">
19
+
20
+ <div id="metadata">
21
+ <div id="home-metadata">
22
+ <div id="home-section" class="section">
23
+ <h3 class="section-header">
24
+ <a href="../index.html">Home</a>
25
+ <a href="../index.html#classes">Classes</a>
26
+ <a href="../index.html#methods">Methods</a>
27
+ </h3>
28
+ </div>
29
+ </div>
30
+
31
+ <div id="file-metadata">
32
+ <div id="file-list-section" class="section">
33
+ <h3 class="section-header">In Files</h3>
34
+ <div class="section-body">
35
+ <ul>
36
+
37
+ <li><a href="../lib/ankusa/cassandra_storage_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
38
+ class="thickbox" title="lib/ankusa/cassandra_storage.rb">lib/ankusa/cassandra_storage.rb</a></li>
39
+
40
+ </ul>
41
+ </div>
42
+ </div>
43
+
44
+
45
+ </div>
46
+
47
+ <div id="class-metadata">
48
+
49
+ <!-- Parent Class -->
50
+ <div id="parent-class-section" class="section">
51
+ <h3 class="section-header">Parent</h3>
52
+
53
+ <p class="link">Object</p>
54
+
55
+ </div>
56
+
57
+
58
+
59
+
60
+
61
+
62
+
63
+ <!-- Method Quickref -->
64
+ <div id="method-list-section" class="section">
65
+ <h3 class="section-header">Methods</h3>
66
+ <ul class="link-list">
67
+
68
+ <li><a href="#method-c-new">::new</a></li>
69
+
70
+ <li><a href="#method-i-classnames">#classnames</a></li>
71
+
72
+ <li><a href="#method-i-close">#close</a></li>
73
+
74
+ <li><a href="#method-i-doc_count_totals">#doc_count_totals</a></li>
75
+
76
+ <li><a href="#method-i-drop_tables">#drop_tables</a></li>
77
+
78
+ <li><a href="#method-i-get_doc_count">#get_doc_count</a></li>
79
+
80
+ <li><a href="#method-i-get_summary">#get_summary</a></li>
81
+
82
+ <li><a href="#method-i-get_total_word_count">#get_total_word_count</a></li>
83
+
84
+ <li><a href="#method-i-get_vocabulary_sizes">#get_vocabulary_sizes</a></li>
85
+
86
+ <li><a href="#method-i-get_word_counts">#get_word_counts</a></li>
87
+
88
+ <li><a href="#method-i-incr_doc_count">#incr_doc_count</a></li>
89
+
90
+ <li><a href="#method-i-incr_total_word_count">#incr_total_word_count</a></li>
91
+
92
+ <li><a href="#method-i-incr_word_count">#incr_word_count</a></li>
93
+
94
+ <li><a href="#method-i-init_tables">#init_tables</a></li>
95
+
96
+ <li><a href="#method-i-reset">#reset</a></li>
97
+
98
+ </ul>
99
+ </div>
100
+
101
+
102
+
103
+ </div>
104
+
105
+ <div id="project-metadata">
106
+
107
+
108
+ <div id="fileindex-section" class="section project-section">
109
+ <h3 class="section-header">Files</h3>
110
+ <ul>
111
+
112
+ <li class="file"><a href="../README_rdoc.html">README.rdoc</a></li>
113
+
114
+ </ul>
115
+ </div>
116
+
117
+
118
+ <div id="classindex-section" class="section project-section">
119
+ <h3 class="section-header">Class/Module Index
120
+ <span class="search-toggle"><img src="../images/find.png"
121
+ height="16" width="16" alt="[+]"
122
+ title="show/hide quicksearch" /></span></h3>
123
+ <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
124
+ <fieldset>
125
+ <legend>Quicksearch</legend>
126
+ <input type="text" name="quicksearch" value=""
127
+ class="quicksearch-field" />
128
+ </fieldset>
129
+ </form>
130
+
131
+ <ul class="link-list">
132
+
133
+ <li><a href="../Ankusa.html">Ankusa</a></li>
134
+
135
+ <li><a href="../Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
136
+
137
+ <li><a href="../Ankusa/Classifier.html">Ankusa::Classifier</a></li>
138
+
139
+ <li><a href="../Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
140
+
141
+ <li><a href="../Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
142
+
143
+ <li><a href="../Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
144
+
145
+ <li><a href="../Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
146
+
147
+ <li><a href="../Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
148
+
149
+ <li><a href="../Ankusa/TextHash.html">Ankusa::TextHash</a></li>
150
+
151
+ <li><a href="../String.html">String</a></li>
152
+
153
+ </ul>
154
+ <div id="no-class-search-results" style="display: none;">No matching classes.</div>
155
+ </div>
156
+
157
+
158
+ </div>
159
+ </div>
160
+
161
+ <div id="documentation">
162
+ <h1 class="class">Ankusa::CassandraStorage</h1>
163
+
164
+ <div id="description" class="description">
165
+
166
+ </div><!-- description -->
167
+
168
+
169
+ <div id="5Buntitled-5D" class="documentation-section">
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+ <!-- Attributes -->
178
+ <div id="attribute-method-details" class="method-section section">
179
+ <h3 class="section-header">Attributes</h3>
180
+
181
+
182
+ <div id="cassandra-attribute-method" class="method-detail">
183
+ <a name="cassandra"></a>
184
+
185
+ <div class="method-heading attribute-method-heading">
186
+ <span class="method-name">cassandra</span><span
187
+ class="attribute-access-type">[R]</span>
188
+ </div>
189
+
190
+ <div class="method-description">
191
+
192
+
193
+
194
+ </div>
195
+ </div>
196
+
197
+ </div><!-- attribute-method-details -->
198
+
199
+
200
+ <!-- Methods -->
201
+
202
+ <div id="public-class-method-details" class="method-section section">
203
+ <h3 class="section-header">Public Class Methods</h3>
204
+
205
+
206
+ <div id="new-method" class="method-detail ">
207
+ <a name="method-c-new"></a>
208
+
209
+
210
+ <div class="method-heading">
211
+ <span class="method-name">new</span><span
212
+ class="method-args">(host='127.0.0.1', port=9160, keyspace = 'ankusa', max_classes = 100)</span>
213
+ <span class="method-click-advice">click to toggle source</span>
214
+ </div>
215
+
216
+
217
+ <div class="method-description">
218
+
219
+ <p>Necessary to set max classes since current implementation of ruby cassandra
220
+ client doesn’t support table scans. Using crufty get_range method at the
221
+ moment.</p>
222
+
223
+
224
+
225
+ <div class="method-source-code" id="new-source">
226
+ <pre>
227
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 21</span>
228
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">host</span>=<span class="ruby-string">'127.0.0.1'</span>, <span class="ruby-identifier">port</span>=<span class="ruby-value">9160</span>, <span class="ruby-identifier">keyspace</span> = <span class="ruby-string">'ankusa'</span>, <span class="ruby-identifier">max_classes</span> = <span class="ruby-value">100</span>)
229
+ <span class="ruby-ivar">@cassandra</span> = <span class="ruby-constant">Cassandra</span>.<span class="ruby-identifier">new</span>(<span class="ruby-string">'system'</span>, <span class="ruby-node">&quot;#{host}:#{port}&quot;</span>)
230
+ <span class="ruby-ivar">@klass_word_counts</span>, <span class="ruby-ivar">@klass_doc_counts</span> = {}
231
+ <span class="ruby-ivar">@keyspace</span> = <span class="ruby-identifier">keyspace</span>
232
+ <span class="ruby-ivar">@max_classes</span> = <span class="ruby-identifier">max_classes</span>
233
+ <span class="ruby-identifier">init_tables</span>
234
+ <span class="ruby-keyword">end</span></pre>
235
+ </div><!-- new-source -->
236
+
237
+ </div>
238
+
239
+
240
+
241
+
242
+ </div><!-- new-method -->
243
+
244
+
245
+ </div><!-- public-class-method-details -->
246
+
247
+ <div id="public-instance-method-details" class="method-section section">
248
+ <h3 class="section-header">Public Instance Methods</h3>
249
+
250
+
251
+ <div id="classnames-method" class="method-detail ">
252
+ <a name="method-i-classnames"></a>
253
+
254
+
255
+ <div class="method-heading">
256
+ <span class="method-name">classnames</span><span
257
+ class="method-args">()</span>
258
+ <span class="method-click-advice">click to toggle source</span>
259
+ </div>
260
+
261
+
262
+ <div class="method-description">
263
+
264
+ <p>Fetch the names of the distinct classes for classification: eg. :spam,
265
+ :good, etc</p>
266
+
267
+
268
+
269
+ <div class="method-source-code" id="classnames-source">
270
+ <pre>
271
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 33</span>
272
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">classnames</span>
273
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get_range</span>(<span class="ruby-value">:totals</span>, {<span class="ruby-value">:start</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">''</span>, <span class="ruby-value">:finish</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">''</span>, <span class="ruby-value">:count</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@max_classes</span>}).<span class="ruby-identifier">inject</span>([]) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">cs</span>, <span class="ruby-identifier">key_slice</span><span class="ruby-operator">|</span>
274
+ <span class="ruby-identifier">cs</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">key</span>.<span class="ruby-identifier">to_sym</span>
275
+ <span class="ruby-keyword">end</span>
276
+ <span class="ruby-keyword">end</span></pre>
277
+ </div><!-- classnames-source -->
278
+
279
+ </div>
280
+
281
+
282
+
283
+
284
+ </div><!-- classnames-method -->
285
+
286
+
287
+ <div id="close-method" class="method-detail ">
288
+ <a name="method-i-close"></a>
289
+
290
+
291
+ <div class="method-heading">
292
+ <span class="method-name">close</span><span
293
+ class="method-args">()</span>
294
+ <span class="method-click-advice">click to toggle source</span>
295
+ </div>
296
+
297
+
298
+ <div class="method-description">
299
+
300
+ <p>Doesn’t do anything</p>
301
+
302
+
303
+
304
+ <div class="method-source-code" id="close-source">
305
+ <pre>
306
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 172</span>
307
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">close</span>
308
+ <span class="ruby-keyword">end</span></pre>
309
+ </div><!-- close-source -->
310
+
311
+ </div>
312
+
313
+
314
+
315
+
316
+ </div><!-- close-method -->
317
+
318
+
319
+ <div id="doc_count_totals-method" class="method-detail ">
320
+ <a name="method-i-doc_count_totals"></a>
321
+
322
+
323
+ <div class="method-heading">
324
+ <span class="method-name">doc_count_totals</span><span
325
+ class="method-args">()</span>
326
+ <span class="method-click-advice">click to toggle source</span>
327
+ </div>
328
+
329
+
330
+ <div class="method-description">
331
+
332
+
333
+
334
+
335
+
336
+ <div class="method-source-code" id="doc_count_totals-source">
337
+ <pre>
338
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 165</span>
339
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">doc_count_totals</span>
340
+ <span class="ruby-identifier">get_summary</span> <span class="ruby-string">&quot;doc_count&quot;</span>
341
+ <span class="ruby-keyword">end</span></pre>
342
+ </div><!-- doc_count_totals-source -->
343
+
344
+ </div>
345
+
346
+
347
+
348
+
349
+ </div><!-- doc_count_totals-method -->
350
+
351
+
352
+ <div id="drop_tables-method" class="method-detail ">
353
+ <a name="method-i-drop_tables"></a>
354
+
355
+
356
+ <div class="method-heading">
357
+ <span class="method-name">drop_tables</span><span
358
+ class="method-args">()</span>
359
+ <span class="method-click-advice">click to toggle source</span>
360
+ </div>
361
+
362
+
363
+ <div class="method-description">
364
+
365
+ <p>Drop ankusa keyspace, reset internal caches</p>
366
+
367
+ <p>FIXME: truncate doesn’t work with cassandra-beta2</p>
368
+
369
+
370
+
371
+ <div class="method-source-code" id="drop_tables-source">
372
+ <pre>
373
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 49</span>
374
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">drop_tables</span>
375
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">truncate!</span>(<span class="ruby-string">'classes'</span>)
376
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">truncate!</span>(<span class="ruby-string">'totals'</span>)
377
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">drop_keyspace</span>(<span class="ruby-ivar">@keyspace</span>)
378
+ <span class="ruby-ivar">@klass_word_counts</span>, <span class="ruby-ivar">@klass_doc_counts</span> = {}
379
+ <span class="ruby-keyword">end</span></pre>
380
+ </div><!-- drop_tables-source -->
381
+
382
+ </div>
383
+
384
+
385
+
386
+
387
+ </div><!-- drop_tables-method -->
388
+
389
+
390
+ <div id="get_doc_count-method" class="method-detail ">
391
+ <a name="method-i-get_doc_count"></a>
392
+
393
+
394
+ <div class="method-heading">
395
+ <span class="method-name">get_doc_count</span><span
396
+ class="method-args">(klass)</span>
397
+ <span class="method-click-advice">click to toggle source</span>
398
+ </div>
399
+
400
+
401
+ <div class="method-description">
402
+
403
+ <p>Fetch total documents for a given class and cache it</p>
404
+
405
+
406
+
407
+ <div class="method-source-code" id="get_doc_count-source">
408
+ <pre>
409
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 107</span>
410
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">klass</span>)
411
+ <span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-string">&quot;doc_count&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>
412
+ <span class="ruby-keyword">end</span></pre>
413
+ </div><!-- get_doc_count-source -->
414
+
415
+ </div>
416
+
417
+
418
+
419
+
420
+ </div><!-- get_doc_count-method -->
421
+
422
+
423
+ <div id="get_total_word_count-method" class="method-detail ">
424
+ <a name="method-i-get_total_word_count"></a>
425
+
426
+
427
+ <div class="method-heading">
428
+ <span class="method-name">get_total_word_count</span><span
429
+ class="method-args">(klass)</span>
430
+ <span class="method-click-advice">click to toggle source</span>
431
+ </div>
432
+
433
+
434
+ <div class="method-description">
435
+
436
+ <p>Fetch total word count for a given class and cache it</p>
437
+
438
+
439
+
440
+ <div class="method-source-code" id="get_total_word_count-source">
441
+ <pre>
442
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 100</span>
443
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">klass</span>)
444
+ <span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-string">&quot;wordcount&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>
445
+ <span class="ruby-keyword">end</span></pre>
446
+ </div><!-- get_total_word_count-source -->
447
+
448
+ </div>
449
+
450
+
451
+
452
+
453
+ </div><!-- get_total_word_count-method -->
454
+
455
+
456
+ <div id="get_vocabulary_sizes-method" class="method-detail ">
457
+ <a name="method-i-get_vocabulary_sizes"></a>
458
+
459
+
460
+ <div class="method-heading">
461
+ <span class="method-name">get_vocabulary_sizes</span><span
462
+ class="method-args">()</span>
463
+ <span class="method-click-advice">click to toggle source</span>
464
+ </div>
465
+
466
+
467
+ <div class="method-description">
468
+
469
+ <p>Does a table ‘scan’ of summary table pulling out the ‘vocabsize’
470
+ column from each row. Generates a hash of (class, vocab_size) key value
471
+ pairs</p>
472
+
473
+
474
+
475
+ <div class="method-source-code" id="get_vocabulary_sizes-source">
476
+ <pre>
477
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 93</span>
478
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_vocabulary_sizes</span>
479
+ <span class="ruby-identifier">get_summary</span> <span class="ruby-string">&quot;vocabsize&quot;</span>
480
+ <span class="ruby-keyword">end</span></pre>
481
+ </div><!-- get_vocabulary_sizes-source -->
482
+
483
+ </div>
484
+
485
+
486
+
487
+
488
+ </div><!-- get_vocabulary_sizes-method -->
489
+
490
+
491
+ <div id="get_word_counts-method" class="method-detail ">
492
+ <a name="method-i-get_word_counts"></a>
493
+
494
+
495
+ <div class="method-heading">
496
+ <span class="method-name">get_word_counts</span><span
497
+ class="method-args">(word)</span>
498
+ <span class="method-click-advice">click to toggle source</span>
499
+ </div>
500
+
501
+
502
+ <div class="method-description">
503
+
504
+ <p>Fetch hash of word counts as a single row from cassandra. Here column_name
505
+ is the class and column value is the count</p>
506
+
507
+
508
+
509
+ <div class="method-source-code" id="get_word_counts-source">
510
+ <pre>
511
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 82</span>
512
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
513
+ <span class="ruby-comment"># fetch all (class,count) pairs for a given word</span>
514
+ <span class="ruby-identifier">row</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:classes</span>, <span class="ruby-identifier">word</span>.<span class="ruby-identifier">to_s</span>)
515
+ <span class="ruby-keyword">return</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">to_hash</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">empty?</span>
516
+ <span class="ruby-identifier">row</span>.<span class="ruby-identifier">inject</span>({}){<span class="ruby-operator">|</span><span class="ruby-identifier">counts</span>, <span class="ruby-identifier">col</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">col</span>.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_sym</span>] = [<span class="ruby-identifier">col</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>,<span class="ruby-value">0</span>].<span class="ruby-identifier">max</span>; <span class="ruby-identifier">counts</span>}
517
+ <span class="ruby-keyword">end</span></pre>
518
+ </div><!-- get_word_counts-source -->
519
+
520
+ </div>
521
+
522
+
523
+
524
+
525
+ </div><!-- get_word_counts-method -->
526
+
527
+
528
+ <div id="incr_doc_count-method" class="method-detail ">
529
+ <a name="method-i-incr_doc_count"></a>
530
+
531
+
532
+ <div class="method-heading">
533
+ <span class="method-name">incr_doc_count</span><span
534
+ class="method-args">(klass, count)</span>
535
+ <span class="method-click-advice">click to toggle source</span>
536
+ </div>
537
+
538
+
539
+ <div class="method-description">
540
+
541
+ <p>Increment total document count for a given class by ‘count’</p>
542
+
543
+
544
+
545
+ <div class="method-source-code" id="incr_doc_count-source">
546
+ <pre>
547
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 157</span>
548
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_doc_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
549
+ <span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
550
+ <span class="ruby-identifier">doc_count</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;doc_count&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
551
+ <span class="ruby-identifier">doc_count</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
552
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">&quot;doc_count&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">doc_count</span>.<span class="ruby-identifier">to_s</span>})
553
+ <span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">doc_count</span>
554
+ <span class="ruby-keyword">end</span></pre>
555
+ </div><!-- incr_doc_count-source -->
556
+
557
+ </div>
558
+
559
+
560
+
561
+
562
+ </div><!-- incr_doc_count-method -->
563
+
564
+
565
+ <div id="incr_total_word_count-method" class="method-detail ">
566
+ <a name="method-i-incr_total_word_count"></a>
567
+
568
+
569
+ <div class="method-heading">
570
+ <span class="method-name">incr_total_word_count</span><span
571
+ class="method-args">(klass, count)</span>
572
+ <span class="method-click-advice">click to toggle source</span>
573
+ </div>
574
+
575
+
576
+ <div class="method-description">
577
+
578
+ <p>Increment total word count for a given class by ‘count’</p>
579
+
580
+
581
+
582
+ <div class="method-source-code" id="incr_total_word_count-source">
583
+ <pre>
584
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 146</span>
585
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_total_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
586
+ <span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
587
+ <span class="ruby-identifier">wordcount</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;wordcount&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
588
+ <span class="ruby-identifier">wordcount</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
589
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">&quot;wordcount&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">wordcount</span>.<span class="ruby-identifier">to_s</span>})
590
+ <span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">wordcount</span>
591
+ <span class="ruby-keyword">end</span></pre>
592
+ </div><!-- incr_total_word_count-source -->
593
+
594
+ </div>
595
+
596
+
597
+
598
+
599
+ </div><!-- incr_total_word_count-method -->
600
+
601
+
602
+ <div id="incr_word_count-method" class="method-detail ">
603
+ <a name="method-i-incr_word_count"></a>
604
+
605
+
606
+ <div class="method-heading">
607
+ <span class="method-name">incr_word_count</span><span
608
+ class="method-args">(klass, word, count)</span>
609
+ <span class="method-click-advice">click to toggle source</span>
610
+ </div>
611
+
612
+
613
+ <div class="method-description">
614
+
615
+ <p>Increment the count for a given (word,class) pair. Evidently, cassandra
616
+ does not support atomic increment/decrement. Psh. HBase uses ZooKeeper to
617
+ implement atomic operations, ain’t it special?</p>
618
+
619
+
620
+
621
+ <div class="method-source-code" id="incr_word_count-source">
622
+ <pre>
623
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 116</span>
624
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span>)
625
+ <span class="ruby-comment"># Only wants strings</span>
626
+ <span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
627
+ <span class="ruby-identifier">word</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">to_s</span>
628
+
629
+ <span class="ruby-identifier">prior_count</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:classes</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">klass</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
630
+ <span class="ruby-identifier">new_count</span> = <span class="ruby-identifier">prior_count</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">count</span>
631
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:classes</span>, <span class="ruby-identifier">word</span>, {<span class="ruby-identifier">klass</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">new_count</span>.<span class="ruby-identifier">to_s</span>})
632
+
633
+ <span class="ruby-keyword">if</span> (<span class="ruby-identifier">prior_count</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">count</span> <span class="ruby-operator">&gt;</span> <span class="ruby-value">0</span>)
634
+ <span class="ruby-comment">#</span>
635
+ <span class="ruby-comment"># we've never seen this word before and we're not trying to unlearn it</span>
636
+ <span class="ruby-comment">#</span>
637
+ <span class="ruby-identifier">vocab_size</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;vocabsize&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
638
+ <span class="ruby-identifier">vocab_size</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
639
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">&quot;vocabsize&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">vocab_size</span>.<span class="ruby-identifier">to_s</span>})
640
+ <span class="ruby-keyword">elsif</span> <span class="ruby-identifier">new_count</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
641
+ <span class="ruby-comment">#</span>
642
+ <span class="ruby-comment"># we've seen this word before but we're trying to unlearn it</span>
643
+ <span class="ruby-comment">#</span>
644
+ <span class="ruby-identifier">vocab_size</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;vocabsize&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
645
+ <span class="ruby-identifier">vocab_size</span> <span class="ruby-operator">-=</span> <span class="ruby-value">1</span>
646
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">&quot;vocabsize&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">vocab_size</span>.<span class="ruby-identifier">to_s</span>})
647
+ <span class="ruby-keyword">end</span>
648
+ <span class="ruby-identifier">new_count</span>
649
+ <span class="ruby-keyword">end</span></pre>
650
+ </div><!-- incr_word_count-source -->
651
+
652
+ </div>
653
+
654
+
655
+
656
+
657
+ </div><!-- incr_word_count-method -->
658
+
659
+
660
+ <div id="init_tables-method" class="method-detail ">
661
+ <a name="method-i-init_tables"></a>
662
+
663
+
664
+ <div class="method-heading">
665
+ <span class="method-name">init_tables</span><span
666
+ class="method-args">()</span>
667
+ <span class="method-click-advice">click to toggle source</span>
668
+ </div>
669
+
670
+
671
+ <div class="method-description">
672
+
673
+ <p>Create required keyspace and column families</p>
674
+
675
+
676
+
677
+ <div class="method-source-code" id="init_tables-source">
678
+ <pre>
679
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 60</span>
680
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">init_tables</span>
681
+ <span class="ruby-comment"># Do nothing if keyspace already exists</span>
682
+ <span class="ruby-keyword">if</span> <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspaces</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-ivar">@keyspace</span>)
683
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspace</span> = <span class="ruby-ivar">@keyspace</span>
684
+ <span class="ruby-keyword">else</span>
685
+ <span class="ruby-identifier">freq_table</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">ColumnFamily</span>.<span class="ruby-identifier">new</span>({<span class="ruby-value">:keyspace</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@keyspace</span>, <span class="ruby-value">:name</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">&quot;classes&quot;</span>}) <span class="ruby-comment"># word =&gt; {classname =&gt; count}</span>
686
+ <span class="ruby-identifier">summary_table</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">ColumnFamily</span>.<span class="ruby-identifier">new</span>({<span class="ruby-value">:keyspace</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@keyspace</span>, <span class="ruby-value">:name</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">&quot;totals&quot;</span>}) <span class="ruby-comment"># class =&gt; {wordcount =&gt; count}</span>
687
+ <span class="ruby-identifier">ks_def</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">Keyspace</span>.<span class="ruby-identifier">new</span>({
688
+ <span class="ruby-value">:name</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@keyspace</span>,
689
+ <span class="ruby-value">:strategy_class</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">'org.apache.cassandra.locator.SimpleStrategy'</span>,
690
+ <span class="ruby-value">:replication_factor</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-value">1</span>,
691
+ <span class="ruby-value">:cf_defs</span> =<span class="ruby-operator">&gt;</span> [<span class="ruby-identifier">freq_table</span>, <span class="ruby-identifier">summary_table</span>]
692
+ })
693
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">add_keyspace</span> <span class="ruby-identifier">ks_def</span>
694
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspace</span> = <span class="ruby-ivar">@keyspace</span>
695
+ <span class="ruby-keyword">end</span>
696
+ <span class="ruby-keyword">end</span></pre>
697
+ </div><!-- init_tables-source -->
698
+
699
+ </div>
700
+
701
+
702
+
703
+
704
+ </div><!-- init_tables-method -->
705
+
706
+
707
+ <div id="reset-method" class="method-detail ">
708
+ <a name="method-i-reset"></a>
709
+
710
+
711
+ <div class="method-heading">
712
+ <span class="method-name">reset</span><span
713
+ class="method-args">()</span>
714
+ <span class="method-click-advice">click to toggle source</span>
715
+ </div>
716
+
717
+
718
+ <div class="method-description">
719
+
720
+
721
+
722
+
723
+
724
+ <div class="method-source-code" id="reset-source">
725
+ <pre>
726
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 39</span>
727
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">reset</span>
728
+ <span class="ruby-identifier">drop_tables</span>
729
+ <span class="ruby-identifier">init_tables</span>
730
+ <span class="ruby-keyword">end</span></pre>
731
+ </div><!-- reset-source -->
732
+
733
+ </div>
734
+
735
+
736
+
737
+
738
+ </div><!-- reset-method -->
739
+
740
+
741
+ </div><!-- public-instance-method-details -->
742
+
743
+ <div id="protected-instance-method-details" class="method-section section">
744
+ <h3 class="section-header">Protected Instance Methods</h3>
745
+
746
+
747
+ <div id="get_summary-method" class="method-detail ">
748
+ <a name="method-i-get_summary"></a>
749
+
750
+
751
+ <div class="method-heading">
752
+ <span class="method-name">get_summary</span><span
753
+ class="method-args">(name)</span>
754
+ <span class="method-click-advice">click to toggle source</span>
755
+ </div>
756
+
757
+
758
+ <div class="method-description">
759
+
760
+ <p>Fetch 100 rows from summary table, yes, increase if necessary</p>
761
+
762
+
763
+
764
+ <div class="method-source-code" id="get_summary-source">
765
+ <pre>
766
+ <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 180</span>
767
+ <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_summary</span>(<span class="ruby-identifier">name</span>)
768
+ <span class="ruby-identifier">counts</span> = {}
769
+ <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get_range</span>(<span class="ruby-value">:totals</span>, {<span class="ruby-value">:start</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">''</span>, <span class="ruby-value">:finish</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">''</span>, <span class="ruby-value">:count</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@max_classes</span>}).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">key_slice</span><span class="ruby-operator">|</span>
770
+ <span class="ruby-comment"># keyslice is a clunky thrift object, map into a ruby hash</span>
771
+ <span class="ruby-identifier">row</span> = <span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">columns</span>.<span class="ruby-identifier">inject</span>({}){<span class="ruby-operator">|</span><span class="ruby-identifier">hsh</span>, <span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-identifier">hsh</span>[<span class="ruby-identifier">c</span>.<span class="ruby-identifier">column</span>.<span class="ruby-identifier">name</span>] = <span class="ruby-identifier">c</span>.<span class="ruby-identifier">column</span>.<span class="ruby-identifier">value</span>; <span class="ruby-identifier">hsh</span>}
772
+ <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">key</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">row</span>[<span class="ruby-identifier">name</span>].<span class="ruby-identifier">to_f</span>
773
+ <span class="ruby-keyword">end</span>
774
+ <span class="ruby-identifier">counts</span>
775
+ <span class="ruby-keyword">end</span></pre>
776
+ </div><!-- get_summary-source -->
777
+
778
+ </div>
779
+
780
+
781
+
782
+
783
+ </div><!-- get_summary-method -->
784
+
785
+
786
+ </div><!-- protected-instance-method-details -->
787
+
788
+ </div><!-- 5Buntitled-5D -->
789
+
790
+
791
+ </div><!-- documentation -->
792
+
793
+ <div id="validator-badges">
794
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
795
+ <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
796
+ Rdoc Generator</a> 2</small>.</p>
797
+ </div>
798
+
799
+ </body>
800
+ </html>
801
+