ankusa 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +3 -3
- data/docs/Ankusa.html +229 -0
- data/docs/Ankusa/CassandraStorage.html +801 -0
- data/docs/Ankusa/Classifier.html +440 -0
- data/docs/Ankusa/FileSystemStorage.html +376 -0
- data/docs/Ankusa/HBaseStorage.html +845 -0
- data/docs/Ankusa/KLDivergenceClassifier.html +265 -0
- data/docs/Ankusa/MemoryStorage.html +672 -0
- data/docs/Ankusa/NaiveBayesClassifier.html +313 -0
- data/docs/Ankusa/TextHash.html +390 -0
- data/docs/README_rdoc.html +268 -0
- data/docs/String.html +241 -0
- data/docs/created.rid +14 -0
- data/docs/images/brick.png +0 -0
- data/docs/images/brick_link.png +0 -0
- data/docs/images/bug.png +0 -0
- data/docs/images/bullet_black.png +0 -0
- data/docs/images/bullet_toggle_minus.png +0 -0
- data/docs/images/bullet_toggle_plus.png +0 -0
- data/docs/images/date.png +0 -0
- data/docs/images/find.png +0 -0
- data/docs/images/loadingAnimation.gif +0 -0
- data/docs/images/macFFBgHack.png +0 -0
- data/docs/images/package.png +0 -0
- data/docs/images/page_green.png +0 -0
- data/docs/images/page_white_text.png +0 -0
- data/docs/images/page_white_width.png +0 -0
- data/docs/images/plugin.png +0 -0
- data/docs/images/ruby.png +0 -0
- data/docs/images/tag_green.png +0 -0
- data/docs/images/wrench.png +0 -0
- data/docs/images/wrench_orange.png +0 -0
- data/docs/images/zoom.png +0 -0
- data/docs/index.html +212 -0
- data/docs/js/darkfish.js +116 -0
- data/docs/js/jquery.js +32 -0
- data/docs/js/quicksearch.js +114 -0
- data/docs/js/thickbox-compressed.js +10 -0
- data/docs/lib/ankusa/cassandra_storage_rb.html +54 -0
- data/docs/lib/ankusa/classifier_rb.html +52 -0
- data/docs/lib/ankusa/extensions_rb.html +54 -0
- data/docs/lib/ankusa/file_system_storage_rb.html +54 -0
- data/docs/lib/ankusa/hasher_rb.html +56 -0
- data/docs/lib/ankusa/hbase_storage_rb.html +54 -0
- data/docs/lib/ankusa/kl_divergence_rb.html +52 -0
- data/docs/lib/ankusa/memory_storage_rb.html +52 -0
- data/docs/lib/ankusa/naive_bayes_rb.html +52 -0
- data/docs/lib/ankusa/stopwords_rb.html +52 -0
- data/docs/lib/ankusa/version_rb.html +52 -0
- data/docs/lib/ankusa_rb.html +64 -0
- data/docs/rdoc.css +759 -0
- data/lib/ankusa/cassandra_storage.rb +2 -2
- data/lib/ankusa/classifier.rb +2 -2
- data/lib/ankusa/hasher.rb +17 -17
- data/lib/ankusa/hbase_storage.rb +2 -2
- data/lib/ankusa/stopwords.rb +1 -1
- data/lib/ankusa/version.rb +1 -1
- metadata +56 -8
data/Rakefile
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'bundler'
|
3
3
|
require 'rake/testtask'
|
4
|
-
require '
|
4
|
+
require 'rdoc/task'
|
5
5
|
|
6
6
|
Bundler::GemHelper.install_tasks
|
7
7
|
|
8
8
|
desc "Create documentation"
|
9
|
-
|
9
|
+
RDoc::Task.new("doc") { |rdoc|
|
10
10
|
rdoc.title = "Ankusa - Naive Bayes classifier with big data storage"
|
11
11
|
rdoc.rdoc_dir = 'docs'
|
12
12
|
rdoc.rdoc_files.include('README.rdoc')
|
@@ -23,7 +23,7 @@ Rake::TestTask.new("test_memory") { |t|
|
|
23
23
|
desc "Run all unit tests with HBase storage"
|
24
24
|
Rake::TestTask.new("test_hbase") { |t|
|
25
25
|
t.libs << "lib"
|
26
|
-
t.test_files = FileList['test/hasher_test.rb'
|
26
|
+
t.test_files = FileList['test/hasher_test.rb']
|
27
27
|
t.verbose = true
|
28
28
|
}
|
29
29
|
|
data/docs/Ankusa.html
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
5
|
+
<head>
|
6
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
|
7
|
+
|
8
|
+
<title>Module: Ankusa</title>
|
9
|
+
|
10
|
+
<link rel="stylesheet" href="./rdoc.css" type="text/css" media="screen" />
|
11
|
+
|
12
|
+
<script src="./js/jquery.js" type="text/javascript" charset="utf-8"></script>
|
13
|
+
<script src="./js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
|
14
|
+
<script src="./js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
|
15
|
+
<script src="./js/darkfish.js" type="text/javascript" charset="utf-8"></script>
|
16
|
+
|
17
|
+
</head>
|
18
|
+
<body id="top" class="module">
|
19
|
+
|
20
|
+
<div id="metadata">
|
21
|
+
<div id="home-metadata">
|
22
|
+
<div id="home-section" class="section">
|
23
|
+
<h3 class="section-header">
|
24
|
+
<a href="./index.html">Home</a>
|
25
|
+
<a href="./index.html#classes">Classes</a>
|
26
|
+
<a href="./index.html#methods">Methods</a>
|
27
|
+
</h3>
|
28
|
+
</div>
|
29
|
+
</div>
|
30
|
+
|
31
|
+
<div id="file-metadata">
|
32
|
+
<div id="file-list-section" class="section">
|
33
|
+
<h3 class="section-header">In Files</h3>
|
34
|
+
<div class="section-body">
|
35
|
+
<ul>
|
36
|
+
|
37
|
+
<li><a href="./lib/ankusa/cassandra_storage_rb.html?TB_iframe=true&height=550&width=785"
|
38
|
+
class="thickbox" title="lib/ankusa/cassandra_storage.rb">lib/ankusa/cassandra_storage.rb</a></li>
|
39
|
+
|
40
|
+
<li><a href="./lib/ankusa/classifier_rb.html?TB_iframe=true&height=550&width=785"
|
41
|
+
class="thickbox" title="lib/ankusa/classifier.rb">lib/ankusa/classifier.rb</a></li>
|
42
|
+
|
43
|
+
<li><a href="./lib/ankusa/file_system_storage_rb.html?TB_iframe=true&height=550&width=785"
|
44
|
+
class="thickbox" title="lib/ankusa/file_system_storage.rb">lib/ankusa/file_system_storage.rb</a></li>
|
45
|
+
|
46
|
+
<li><a href="./lib/ankusa/hasher_rb.html?TB_iframe=true&height=550&width=785"
|
47
|
+
class="thickbox" title="lib/ankusa/hasher.rb">lib/ankusa/hasher.rb</a></li>
|
48
|
+
|
49
|
+
<li><a href="./lib/ankusa/hbase_storage_rb.html?TB_iframe=true&height=550&width=785"
|
50
|
+
class="thickbox" title="lib/ankusa/hbase_storage.rb">lib/ankusa/hbase_storage.rb</a></li>
|
51
|
+
|
52
|
+
<li><a href="./lib/ankusa/kl_divergence_rb.html?TB_iframe=true&height=550&width=785"
|
53
|
+
class="thickbox" title="lib/ankusa/kl_divergence.rb">lib/ankusa/kl_divergence.rb</a></li>
|
54
|
+
|
55
|
+
<li><a href="./lib/ankusa/memory_storage_rb.html?TB_iframe=true&height=550&width=785"
|
56
|
+
class="thickbox" title="lib/ankusa/memory_storage.rb">lib/ankusa/memory_storage.rb</a></li>
|
57
|
+
|
58
|
+
<li><a href="./lib/ankusa/naive_bayes_rb.html?TB_iframe=true&height=550&width=785"
|
59
|
+
class="thickbox" title="lib/ankusa/naive_bayes.rb">lib/ankusa/naive_bayes.rb</a></li>
|
60
|
+
|
61
|
+
<li><a href="./lib/ankusa/stopwords_rb.html?TB_iframe=true&height=550&width=785"
|
62
|
+
class="thickbox" title="lib/ankusa/stopwords.rb">lib/ankusa/stopwords.rb</a></li>
|
63
|
+
|
64
|
+
<li><a href="./lib/ankusa/version_rb.html?TB_iframe=true&height=550&width=785"
|
65
|
+
class="thickbox" title="lib/ankusa/version.rb">lib/ankusa/version.rb</a></li>
|
66
|
+
|
67
|
+
</ul>
|
68
|
+
</div>
|
69
|
+
</div>
|
70
|
+
|
71
|
+
|
72
|
+
</div>
|
73
|
+
|
74
|
+
<div id="class-metadata">
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
<!-- Namespace Contents -->
|
81
|
+
<div id="namespace-list-section" class="section">
|
82
|
+
<h3 class="section-header">Namespace</h3>
|
83
|
+
<ul class="link-list">
|
84
|
+
|
85
|
+
<li><span class="type">MODULE</span> <a href="Ankusa/Classifier.html">Ankusa::Classifier</a></li>
|
86
|
+
|
87
|
+
<li><span class="type">CLASS</span> <a href="Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
|
88
|
+
|
89
|
+
<li><span class="type">CLASS</span> <a href="Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
|
90
|
+
|
91
|
+
<li><span class="type">CLASS</span> <a href="Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
|
92
|
+
|
93
|
+
<li><span class="type">CLASS</span> <a href="Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
|
94
|
+
|
95
|
+
<li><span class="type">CLASS</span> <a href="Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
|
96
|
+
|
97
|
+
<li><span class="type">CLASS</span> <a href="Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
|
98
|
+
|
99
|
+
<li><span class="type">CLASS</span> <a href="Ankusa/TextHash.html">Ankusa::TextHash</a></li>
|
100
|
+
|
101
|
+
</ul>
|
102
|
+
</div>
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
</div>
|
109
|
+
|
110
|
+
<div id="project-metadata">
|
111
|
+
|
112
|
+
|
113
|
+
<div id="fileindex-section" class="section project-section">
|
114
|
+
<h3 class="section-header">Files</h3>
|
115
|
+
<ul>
|
116
|
+
|
117
|
+
<li class="file"><a href="./README_rdoc.html">README.rdoc</a></li>
|
118
|
+
|
119
|
+
</ul>
|
120
|
+
</div>
|
121
|
+
|
122
|
+
|
123
|
+
<div id="classindex-section" class="section project-section">
|
124
|
+
<h3 class="section-header">Class/Module Index
|
125
|
+
<span class="search-toggle"><img src="./images/find.png"
|
126
|
+
height="16" width="16" alt="[+]"
|
127
|
+
title="show/hide quicksearch" /></span></h3>
|
128
|
+
<form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
|
129
|
+
<fieldset>
|
130
|
+
<legend>Quicksearch</legend>
|
131
|
+
<input type="text" name="quicksearch" value=""
|
132
|
+
class="quicksearch-field" />
|
133
|
+
</fieldset>
|
134
|
+
</form>
|
135
|
+
|
136
|
+
<ul class="link-list">
|
137
|
+
|
138
|
+
<li><a href="./Ankusa.html">Ankusa</a></li>
|
139
|
+
|
140
|
+
<li><a href="./Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
|
141
|
+
|
142
|
+
<li><a href="./Ankusa/Classifier.html">Ankusa::Classifier</a></li>
|
143
|
+
|
144
|
+
<li><a href="./Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
|
145
|
+
|
146
|
+
<li><a href="./Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
|
147
|
+
|
148
|
+
<li><a href="./Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
|
149
|
+
|
150
|
+
<li><a href="./Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
|
151
|
+
|
152
|
+
<li><a href="./Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
|
153
|
+
|
154
|
+
<li><a href="./Ankusa/TextHash.html">Ankusa::TextHash</a></li>
|
155
|
+
|
156
|
+
<li><a href="./String.html">String</a></li>
|
157
|
+
|
158
|
+
</ul>
|
159
|
+
<div id="no-class-search-results" style="display: none;">No matching classes.</div>
|
160
|
+
</div>
|
161
|
+
|
162
|
+
|
163
|
+
</div>
|
164
|
+
</div>
|
165
|
+
|
166
|
+
<div id="documentation">
|
167
|
+
<h1 class="module">Ankusa</h1>
|
168
|
+
|
169
|
+
<div id="description" class="description">
|
170
|
+
|
171
|
+
<p>At the moment you’ll have to do:</p>
|
172
|
+
|
173
|
+
<p>create keyspace ankusa with replication_factor = 1</p>
|
174
|
+
|
175
|
+
<p>from the cassandra-cli. This should be fixed with new release candidate for
|
176
|
+
cassandra</p>
|
177
|
+
|
178
|
+
</div><!-- description -->
|
179
|
+
|
180
|
+
|
181
|
+
<div id="5Buntitled-5D" class="documentation-section">
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
<!-- Constants -->
|
188
|
+
<div id="constants-list" class="section">
|
189
|
+
<h3 class="section-header">Constants</h3>
|
190
|
+
<dl>
|
191
|
+
|
192
|
+
<dt><a name="INFTY">INFTY</a></dt>
|
193
|
+
|
194
|
+
<dd class="description"></dd>
|
195
|
+
|
196
|
+
|
197
|
+
<dt><a name="STOPWORDS">STOPWORDS</a></dt>
|
198
|
+
|
199
|
+
<dd class="description"><p>These are taken from MySQL - <a
|
200
|
+
href="http://dev.mysql.com/tech-resources/articles/full-text-revealed.html">dev.mysql.com/tech-resources/articles/full-text-revealed.html</a></p></dd>
|
201
|
+
|
202
|
+
|
203
|
+
<dt><a name="VERSION">VERSION</a></dt>
|
204
|
+
|
205
|
+
<dd class="description"></dd>
|
206
|
+
|
207
|
+
|
208
|
+
</dl>
|
209
|
+
</div>
|
210
|
+
|
211
|
+
|
212
|
+
|
213
|
+
|
214
|
+
<!-- Methods -->
|
215
|
+
|
216
|
+
</div><!-- 5Buntitled-5D -->
|
217
|
+
|
218
|
+
|
219
|
+
</div><!-- documentation -->
|
220
|
+
|
221
|
+
<div id="validator-badges">
|
222
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
223
|
+
<p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
|
224
|
+
Rdoc Generator</a> 2</small>.</p>
|
225
|
+
</div>
|
226
|
+
|
227
|
+
</body>
|
228
|
+
</html>
|
229
|
+
|
@@ -0,0 +1,801 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
5
|
+
<head>
|
6
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
|
7
|
+
|
8
|
+
<title>Class: Ankusa::CassandraStorage</title>
|
9
|
+
|
10
|
+
<link rel="stylesheet" href="../rdoc.css" type="text/css" media="screen" />
|
11
|
+
|
12
|
+
<script src="../js/jquery.js" type="text/javascript" charset="utf-8"></script>
|
13
|
+
<script src="../js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
|
14
|
+
<script src="../js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
|
15
|
+
<script src="../js/darkfish.js" type="text/javascript" charset="utf-8"></script>
|
16
|
+
|
17
|
+
</head>
|
18
|
+
<body id="top" class="class">
|
19
|
+
|
20
|
+
<div id="metadata">
|
21
|
+
<div id="home-metadata">
|
22
|
+
<div id="home-section" class="section">
|
23
|
+
<h3 class="section-header">
|
24
|
+
<a href="../index.html">Home</a>
|
25
|
+
<a href="../index.html#classes">Classes</a>
|
26
|
+
<a href="../index.html#methods">Methods</a>
|
27
|
+
</h3>
|
28
|
+
</div>
|
29
|
+
</div>
|
30
|
+
|
31
|
+
<div id="file-metadata">
|
32
|
+
<div id="file-list-section" class="section">
|
33
|
+
<h3 class="section-header">In Files</h3>
|
34
|
+
<div class="section-body">
|
35
|
+
<ul>
|
36
|
+
|
37
|
+
<li><a href="../lib/ankusa/cassandra_storage_rb.html?TB_iframe=true&height=550&width=785"
|
38
|
+
class="thickbox" title="lib/ankusa/cassandra_storage.rb">lib/ankusa/cassandra_storage.rb</a></li>
|
39
|
+
|
40
|
+
</ul>
|
41
|
+
</div>
|
42
|
+
</div>
|
43
|
+
|
44
|
+
|
45
|
+
</div>
|
46
|
+
|
47
|
+
<div id="class-metadata">
|
48
|
+
|
49
|
+
<!-- Parent Class -->
|
50
|
+
<div id="parent-class-section" class="section">
|
51
|
+
<h3 class="section-header">Parent</h3>
|
52
|
+
|
53
|
+
<p class="link">Object</p>
|
54
|
+
|
55
|
+
</div>
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
<!-- Method Quickref -->
|
64
|
+
<div id="method-list-section" class="section">
|
65
|
+
<h3 class="section-header">Methods</h3>
|
66
|
+
<ul class="link-list">
|
67
|
+
|
68
|
+
<li><a href="#method-c-new">::new</a></li>
|
69
|
+
|
70
|
+
<li><a href="#method-i-classnames">#classnames</a></li>
|
71
|
+
|
72
|
+
<li><a href="#method-i-close">#close</a></li>
|
73
|
+
|
74
|
+
<li><a href="#method-i-doc_count_totals">#doc_count_totals</a></li>
|
75
|
+
|
76
|
+
<li><a href="#method-i-drop_tables">#drop_tables</a></li>
|
77
|
+
|
78
|
+
<li><a href="#method-i-get_doc_count">#get_doc_count</a></li>
|
79
|
+
|
80
|
+
<li><a href="#method-i-get_summary">#get_summary</a></li>
|
81
|
+
|
82
|
+
<li><a href="#method-i-get_total_word_count">#get_total_word_count</a></li>
|
83
|
+
|
84
|
+
<li><a href="#method-i-get_vocabulary_sizes">#get_vocabulary_sizes</a></li>
|
85
|
+
|
86
|
+
<li><a href="#method-i-get_word_counts">#get_word_counts</a></li>
|
87
|
+
|
88
|
+
<li><a href="#method-i-incr_doc_count">#incr_doc_count</a></li>
|
89
|
+
|
90
|
+
<li><a href="#method-i-incr_total_word_count">#incr_total_word_count</a></li>
|
91
|
+
|
92
|
+
<li><a href="#method-i-incr_word_count">#incr_word_count</a></li>
|
93
|
+
|
94
|
+
<li><a href="#method-i-init_tables">#init_tables</a></li>
|
95
|
+
|
96
|
+
<li><a href="#method-i-reset">#reset</a></li>
|
97
|
+
|
98
|
+
</ul>
|
99
|
+
</div>
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
</div>
|
104
|
+
|
105
|
+
<div id="project-metadata">
|
106
|
+
|
107
|
+
|
108
|
+
<div id="fileindex-section" class="section project-section">
|
109
|
+
<h3 class="section-header">Files</h3>
|
110
|
+
<ul>
|
111
|
+
|
112
|
+
<li class="file"><a href="../README_rdoc.html">README.rdoc</a></li>
|
113
|
+
|
114
|
+
</ul>
|
115
|
+
</div>
|
116
|
+
|
117
|
+
|
118
|
+
<div id="classindex-section" class="section project-section">
|
119
|
+
<h3 class="section-header">Class/Module Index
|
120
|
+
<span class="search-toggle"><img src="../images/find.png"
|
121
|
+
height="16" width="16" alt="[+]"
|
122
|
+
title="show/hide quicksearch" /></span></h3>
|
123
|
+
<form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
|
124
|
+
<fieldset>
|
125
|
+
<legend>Quicksearch</legend>
|
126
|
+
<input type="text" name="quicksearch" value=""
|
127
|
+
class="quicksearch-field" />
|
128
|
+
</fieldset>
|
129
|
+
</form>
|
130
|
+
|
131
|
+
<ul class="link-list">
|
132
|
+
|
133
|
+
<li><a href="../Ankusa.html">Ankusa</a></li>
|
134
|
+
|
135
|
+
<li><a href="../Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
|
136
|
+
|
137
|
+
<li><a href="../Ankusa/Classifier.html">Ankusa::Classifier</a></li>
|
138
|
+
|
139
|
+
<li><a href="../Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
|
140
|
+
|
141
|
+
<li><a href="../Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
|
142
|
+
|
143
|
+
<li><a href="../Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
|
144
|
+
|
145
|
+
<li><a href="../Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
|
146
|
+
|
147
|
+
<li><a href="../Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
|
148
|
+
|
149
|
+
<li><a href="../Ankusa/TextHash.html">Ankusa::TextHash</a></li>
|
150
|
+
|
151
|
+
<li><a href="../String.html">String</a></li>
|
152
|
+
|
153
|
+
</ul>
|
154
|
+
<div id="no-class-search-results" style="display: none;">No matching classes.</div>
|
155
|
+
</div>
|
156
|
+
|
157
|
+
|
158
|
+
</div>
|
159
|
+
</div>
|
160
|
+
|
161
|
+
<div id="documentation">
|
162
|
+
<h1 class="class">Ankusa::CassandraStorage</h1>
|
163
|
+
|
164
|
+
<div id="description" class="description">
|
165
|
+
|
166
|
+
</div><!-- description -->
|
167
|
+
|
168
|
+
|
169
|
+
<div id="5Buntitled-5D" class="documentation-section">
|
170
|
+
|
171
|
+
|
172
|
+
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
<!-- Attributes -->
|
178
|
+
<div id="attribute-method-details" class="method-section section">
|
179
|
+
<h3 class="section-header">Attributes</h3>
|
180
|
+
|
181
|
+
|
182
|
+
<div id="cassandra-attribute-method" class="method-detail">
|
183
|
+
<a name="cassandra"></a>
|
184
|
+
|
185
|
+
<div class="method-heading attribute-method-heading">
|
186
|
+
<span class="method-name">cassandra</span><span
|
187
|
+
class="attribute-access-type">[R]</span>
|
188
|
+
</div>
|
189
|
+
|
190
|
+
<div class="method-description">
|
191
|
+
|
192
|
+
|
193
|
+
|
194
|
+
</div>
|
195
|
+
</div>
|
196
|
+
|
197
|
+
</div><!-- attribute-method-details -->
|
198
|
+
|
199
|
+
|
200
|
+
<!-- Methods -->
|
201
|
+
|
202
|
+
<div id="public-class-method-details" class="method-section section">
|
203
|
+
<h3 class="section-header">Public Class Methods</h3>
|
204
|
+
|
205
|
+
|
206
|
+
<div id="new-method" class="method-detail ">
|
207
|
+
<a name="method-c-new"></a>
|
208
|
+
|
209
|
+
|
210
|
+
<div class="method-heading">
|
211
|
+
<span class="method-name">new</span><span
|
212
|
+
class="method-args">(host='127.0.0.1', port=9160, keyspace = 'ankusa', max_classes = 100)</span>
|
213
|
+
<span class="method-click-advice">click to toggle source</span>
|
214
|
+
</div>
|
215
|
+
|
216
|
+
|
217
|
+
<div class="method-description">
|
218
|
+
|
219
|
+
<p>Necessary to set max classes since current implementation of ruby cassandra
|
220
|
+
client doesn’t support table scans. Using crufty get_range method at the
|
221
|
+
moment.</p>
|
222
|
+
|
223
|
+
|
224
|
+
|
225
|
+
<div class="method-source-code" id="new-source">
|
226
|
+
<pre>
|
227
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 21</span>
|
228
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">host</span>=<span class="ruby-string">'127.0.0.1'</span>, <span class="ruby-identifier">port</span>=<span class="ruby-value">9160</span>, <span class="ruby-identifier">keyspace</span> = <span class="ruby-string">'ankusa'</span>, <span class="ruby-identifier">max_classes</span> = <span class="ruby-value">100</span>)
|
229
|
+
<span class="ruby-ivar">@cassandra</span> = <span class="ruby-constant">Cassandra</span>.<span class="ruby-identifier">new</span>(<span class="ruby-string">'system'</span>, <span class="ruby-node">"#{host}:#{port}"</span>)
|
230
|
+
<span class="ruby-ivar">@klass_word_counts</span>, <span class="ruby-ivar">@klass_doc_counts</span> = {}
|
231
|
+
<span class="ruby-ivar">@keyspace</span> = <span class="ruby-identifier">keyspace</span>
|
232
|
+
<span class="ruby-ivar">@max_classes</span> = <span class="ruby-identifier">max_classes</span>
|
233
|
+
<span class="ruby-identifier">init_tables</span>
|
234
|
+
<span class="ruby-keyword">end</span></pre>
|
235
|
+
</div><!-- new-source -->
|
236
|
+
|
237
|
+
</div>
|
238
|
+
|
239
|
+
|
240
|
+
|
241
|
+
|
242
|
+
</div><!-- new-method -->
|
243
|
+
|
244
|
+
|
245
|
+
</div><!-- public-class-method-details -->
|
246
|
+
|
247
|
+
<div id="public-instance-method-details" class="method-section section">
|
248
|
+
<h3 class="section-header">Public Instance Methods</h3>
|
249
|
+
|
250
|
+
|
251
|
+
<div id="classnames-method" class="method-detail ">
|
252
|
+
<a name="method-i-classnames"></a>
|
253
|
+
|
254
|
+
|
255
|
+
<div class="method-heading">
|
256
|
+
<span class="method-name">classnames</span><span
|
257
|
+
class="method-args">()</span>
|
258
|
+
<span class="method-click-advice">click to toggle source</span>
|
259
|
+
</div>
|
260
|
+
|
261
|
+
|
262
|
+
<div class="method-description">
|
263
|
+
|
264
|
+
<p>Fetch the names of the distinct classes for classification: eg. :spam,
|
265
|
+
:good, etc</p>
|
266
|
+
|
267
|
+
|
268
|
+
|
269
|
+
<div class="method-source-code" id="classnames-source">
|
270
|
+
<pre>
|
271
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 33</span>
|
272
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">classnames</span>
|
273
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get_range</span>(<span class="ruby-value">:totals</span>, {<span class="ruby-value">:start</span> =<span class="ruby-operator">></span> <span class="ruby-string">''</span>, <span class="ruby-value">:finish</span> =<span class="ruby-operator">></span> <span class="ruby-string">''</span>, <span class="ruby-value">:count</span> =<span class="ruby-operator">></span> <span class="ruby-ivar">@max_classes</span>}).<span class="ruby-identifier">inject</span>([]) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">cs</span>, <span class="ruby-identifier">key_slice</span><span class="ruby-operator">|</span>
|
274
|
+
<span class="ruby-identifier">cs</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">key</span>.<span class="ruby-identifier">to_sym</span>
|
275
|
+
<span class="ruby-keyword">end</span>
|
276
|
+
<span class="ruby-keyword">end</span></pre>
|
277
|
+
</div><!-- classnames-source -->
|
278
|
+
|
279
|
+
</div>
|
280
|
+
|
281
|
+
|
282
|
+
|
283
|
+
|
284
|
+
</div><!-- classnames-method -->
|
285
|
+
|
286
|
+
|
287
|
+
<div id="close-method" class="method-detail ">
|
288
|
+
<a name="method-i-close"></a>
|
289
|
+
|
290
|
+
|
291
|
+
<div class="method-heading">
|
292
|
+
<span class="method-name">close</span><span
|
293
|
+
class="method-args">()</span>
|
294
|
+
<span class="method-click-advice">click to toggle source</span>
|
295
|
+
</div>
|
296
|
+
|
297
|
+
|
298
|
+
<div class="method-description">
|
299
|
+
|
300
|
+
<p>Doesn’t do anything</p>
|
301
|
+
|
302
|
+
|
303
|
+
|
304
|
+
<div class="method-source-code" id="close-source">
|
305
|
+
<pre>
|
306
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 172</span>
|
307
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">close</span>
|
308
|
+
<span class="ruby-keyword">end</span></pre>
|
309
|
+
</div><!-- close-source -->
|
310
|
+
|
311
|
+
</div>
|
312
|
+
|
313
|
+
|
314
|
+
|
315
|
+
|
316
|
+
</div><!-- close-method -->
|
317
|
+
|
318
|
+
|
319
|
+
<div id="doc_count_totals-method" class="method-detail ">
|
320
|
+
<a name="method-i-doc_count_totals"></a>
|
321
|
+
|
322
|
+
|
323
|
+
<div class="method-heading">
|
324
|
+
<span class="method-name">doc_count_totals</span><span
|
325
|
+
class="method-args">()</span>
|
326
|
+
<span class="method-click-advice">click to toggle source</span>
|
327
|
+
</div>
|
328
|
+
|
329
|
+
|
330
|
+
<div class="method-description">
|
331
|
+
|
332
|
+
|
333
|
+
|
334
|
+
|
335
|
+
|
336
|
+
<div class="method-source-code" id="doc_count_totals-source">
|
337
|
+
<pre>
|
338
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 165</span>
|
339
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">doc_count_totals</span>
|
340
|
+
<span class="ruby-identifier">get_summary</span> <span class="ruby-string">"doc_count"</span>
|
341
|
+
<span class="ruby-keyword">end</span></pre>
|
342
|
+
</div><!-- doc_count_totals-source -->
|
343
|
+
|
344
|
+
</div>
|
345
|
+
|
346
|
+
|
347
|
+
|
348
|
+
|
349
|
+
</div><!-- doc_count_totals-method -->
|
350
|
+
|
351
|
+
|
352
|
+
<div id="drop_tables-method" class="method-detail ">
|
353
|
+
<a name="method-i-drop_tables"></a>
|
354
|
+
|
355
|
+
|
356
|
+
<div class="method-heading">
|
357
|
+
<span class="method-name">drop_tables</span><span
|
358
|
+
class="method-args">()</span>
|
359
|
+
<span class="method-click-advice">click to toggle source</span>
|
360
|
+
</div>
|
361
|
+
|
362
|
+
|
363
|
+
<div class="method-description">
|
364
|
+
|
365
|
+
<p>Drop ankusa keyspace, reset internal caches</p>
|
366
|
+
|
367
|
+
<p>FIXME: truncate doesn’t work with cassandra-beta2</p>
|
368
|
+
|
369
|
+
|
370
|
+
|
371
|
+
<div class="method-source-code" id="drop_tables-source">
|
372
|
+
<pre>
|
373
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 49</span>
|
374
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">drop_tables</span>
|
375
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">truncate!</span>(<span class="ruby-string">'classes'</span>)
|
376
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">truncate!</span>(<span class="ruby-string">'totals'</span>)
|
377
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">drop_keyspace</span>(<span class="ruby-ivar">@keyspace</span>)
|
378
|
+
<span class="ruby-ivar">@klass_word_counts</span>, <span class="ruby-ivar">@klass_doc_counts</span> = {}
|
379
|
+
<span class="ruby-keyword">end</span></pre>
|
380
|
+
</div><!-- drop_tables-source -->
|
381
|
+
|
382
|
+
</div>
|
383
|
+
|
384
|
+
|
385
|
+
|
386
|
+
|
387
|
+
</div><!-- drop_tables-method -->
|
388
|
+
|
389
|
+
|
390
|
+
<div id="get_doc_count-method" class="method-detail ">
|
391
|
+
<a name="method-i-get_doc_count"></a>
|
392
|
+
|
393
|
+
|
394
|
+
<div class="method-heading">
|
395
|
+
<span class="method-name">get_doc_count</span><span
|
396
|
+
class="method-args">(klass)</span>
|
397
|
+
<span class="method-click-advice">click to toggle source</span>
|
398
|
+
</div>
|
399
|
+
|
400
|
+
|
401
|
+
<div class="method-description">
|
402
|
+
|
403
|
+
<p>Fetch total documents for a given class and cache it</p>
|
404
|
+
|
405
|
+
|
406
|
+
|
407
|
+
<div class="method-source-code" id="get_doc_count-source">
|
408
|
+
<pre>
|
409
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 107</span>
|
410
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">klass</span>)
|
411
|
+
<span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-string">"doc_count"</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>
|
412
|
+
<span class="ruby-keyword">end</span></pre>
|
413
|
+
</div><!-- get_doc_count-source -->
|
414
|
+
|
415
|
+
</div>
|
416
|
+
|
417
|
+
|
418
|
+
|
419
|
+
|
420
|
+
</div><!-- get_doc_count-method -->
|
421
|
+
|
422
|
+
|
423
|
+
<div id="get_total_word_count-method" class="method-detail ">
|
424
|
+
<a name="method-i-get_total_word_count"></a>
|
425
|
+
|
426
|
+
|
427
|
+
<div class="method-heading">
|
428
|
+
<span class="method-name">get_total_word_count</span><span
|
429
|
+
class="method-args">(klass)</span>
|
430
|
+
<span class="method-click-advice">click to toggle source</span>
|
431
|
+
</div>
|
432
|
+
|
433
|
+
|
434
|
+
<div class="method-description">
|
435
|
+
|
436
|
+
<p>Fetch total word count for a given class and cache it</p>
|
437
|
+
|
438
|
+
|
439
|
+
|
440
|
+
<div class="method-source-code" id="get_total_word_count-source">
|
441
|
+
<pre>
|
442
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 100</span>
|
443
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">klass</span>)
|
444
|
+
<span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-string">"wordcount"</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>
|
445
|
+
<span class="ruby-keyword">end</span></pre>
|
446
|
+
</div><!-- get_total_word_count-source -->
|
447
|
+
|
448
|
+
</div>
|
449
|
+
|
450
|
+
|
451
|
+
|
452
|
+
|
453
|
+
</div><!-- get_total_word_count-method -->
|
454
|
+
|
455
|
+
|
456
|
+
<div id="get_vocabulary_sizes-method" class="method-detail ">
|
457
|
+
<a name="method-i-get_vocabulary_sizes"></a>
|
458
|
+
|
459
|
+
|
460
|
+
<div class="method-heading">
|
461
|
+
<span class="method-name">get_vocabulary_sizes</span><span
|
462
|
+
class="method-args">()</span>
|
463
|
+
<span class="method-click-advice">click to toggle source</span>
|
464
|
+
</div>
|
465
|
+
|
466
|
+
|
467
|
+
<div class="method-description">
|
468
|
+
|
469
|
+
<p>Does a table ‘scan’ of summary table pulling out the ‘vocabsize’
|
470
|
+
column from each row. Generates a hash of (class, vocab_size) key value
|
471
|
+
pairs</p>
|
472
|
+
|
473
|
+
|
474
|
+
|
475
|
+
<div class="method-source-code" id="get_vocabulary_sizes-source">
|
476
|
+
<pre>
|
477
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 93</span>
|
478
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_vocabulary_sizes</span>
|
479
|
+
<span class="ruby-identifier">get_summary</span> <span class="ruby-string">"vocabsize"</span>
|
480
|
+
<span class="ruby-keyword">end</span></pre>
|
481
|
+
</div><!-- get_vocabulary_sizes-source -->
|
482
|
+
|
483
|
+
</div>
|
484
|
+
|
485
|
+
|
486
|
+
|
487
|
+
|
488
|
+
</div><!-- get_vocabulary_sizes-method -->
|
489
|
+
|
490
|
+
|
491
|
+
<div id="get_word_counts-method" class="method-detail ">
|
492
|
+
<a name="method-i-get_word_counts"></a>
|
493
|
+
|
494
|
+
|
495
|
+
<div class="method-heading">
|
496
|
+
<span class="method-name">get_word_counts</span><span
|
497
|
+
class="method-args">(word)</span>
|
498
|
+
<span class="method-click-advice">click to toggle source</span>
|
499
|
+
</div>
|
500
|
+
|
501
|
+
|
502
|
+
<div class="method-description">
|
503
|
+
|
504
|
+
<p>Fetch hash of word counts as a single row from cassandra. Here column_name
|
505
|
+
is the class and column value is the count</p>
|
506
|
+
|
507
|
+
|
508
|
+
|
509
|
+
<div class="method-source-code" id="get_word_counts-source">
|
510
|
+
<pre>
|
511
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 82</span>
|
512
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
|
513
|
+
<span class="ruby-comment"># fetch all (class,count) pairs for a given word</span>
|
514
|
+
<span class="ruby-identifier">row</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:classes</span>, <span class="ruby-identifier">word</span>.<span class="ruby-identifier">to_s</span>)
|
515
|
+
<span class="ruby-keyword">return</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">to_hash</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">empty?</span>
|
516
|
+
<span class="ruby-identifier">row</span>.<span class="ruby-identifier">inject</span>({}){<span class="ruby-operator">|</span><span class="ruby-identifier">counts</span>, <span class="ruby-identifier">col</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">col</span>.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_sym</span>] = [<span class="ruby-identifier">col</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>,<span class="ruby-value">0</span>].<span class="ruby-identifier">max</span>; <span class="ruby-identifier">counts</span>}
|
517
|
+
<span class="ruby-keyword">end</span></pre>
|
518
|
+
</div><!-- get_word_counts-source -->
|
519
|
+
|
520
|
+
</div>
|
521
|
+
|
522
|
+
|
523
|
+
|
524
|
+
|
525
|
+
</div><!-- get_word_counts-method -->
|
526
|
+
|
527
|
+
|
528
|
+
<div id="incr_doc_count-method" class="method-detail ">
|
529
|
+
<a name="method-i-incr_doc_count"></a>
|
530
|
+
|
531
|
+
|
532
|
+
<div class="method-heading">
|
533
|
+
<span class="method-name">incr_doc_count</span><span
|
534
|
+
class="method-args">(klass, count)</span>
|
535
|
+
<span class="method-click-advice">click to toggle source</span>
|
536
|
+
</div>
|
537
|
+
|
538
|
+
|
539
|
+
<div class="method-description">
|
540
|
+
|
541
|
+
<p>Increment total document count for a given class by ‘count’</p>
|
542
|
+
|
543
|
+
|
544
|
+
|
545
|
+
<div class="method-source-code" id="incr_doc_count-source">
|
546
|
+
<pre>
|
547
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 157</span>
|
548
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_doc_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
|
549
|
+
<span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
|
550
|
+
<span class="ruby-identifier">doc_count</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">"doc_count"</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
|
551
|
+
<span class="ruby-identifier">doc_count</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
|
552
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">"doc_count"</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">doc_count</span>.<span class="ruby-identifier">to_s</span>})
|
553
|
+
<span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">doc_count</span>
|
554
|
+
<span class="ruby-keyword">end</span></pre>
|
555
|
+
</div><!-- incr_doc_count-source -->
|
556
|
+
|
557
|
+
</div>
|
558
|
+
|
559
|
+
|
560
|
+
|
561
|
+
|
562
|
+
</div><!-- incr_doc_count-method -->
|
563
|
+
|
564
|
+
|
565
|
+
<div id="incr_total_word_count-method" class="method-detail ">
|
566
|
+
<a name="method-i-incr_total_word_count"></a>
|
567
|
+
|
568
|
+
|
569
|
+
<div class="method-heading">
|
570
|
+
<span class="method-name">incr_total_word_count</span><span
|
571
|
+
class="method-args">(klass, count)</span>
|
572
|
+
<span class="method-click-advice">click to toggle source</span>
|
573
|
+
</div>
|
574
|
+
|
575
|
+
|
576
|
+
<div class="method-description">
|
577
|
+
|
578
|
+
<p>Increment total word count for a given class by ‘count’</p>
|
579
|
+
|
580
|
+
|
581
|
+
|
582
|
+
<div class="method-source-code" id="incr_total_word_count-source">
|
583
|
+
<pre>
|
584
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 146</span>
|
585
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_total_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
|
586
|
+
<span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
|
587
|
+
<span class="ruby-identifier">wordcount</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">"wordcount"</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
|
588
|
+
<span class="ruby-identifier">wordcount</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
|
589
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">"wordcount"</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">wordcount</span>.<span class="ruby-identifier">to_s</span>})
|
590
|
+
<span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">wordcount</span>
|
591
|
+
<span class="ruby-keyword">end</span></pre>
|
592
|
+
</div><!-- incr_total_word_count-source -->
|
593
|
+
|
594
|
+
</div>
|
595
|
+
|
596
|
+
|
597
|
+
|
598
|
+
|
599
|
+
</div><!-- incr_total_word_count-method -->
|
600
|
+
|
601
|
+
|
602
|
+
<div id="incr_word_count-method" class="method-detail ">
|
603
|
+
<a name="method-i-incr_word_count"></a>
|
604
|
+
|
605
|
+
|
606
|
+
<div class="method-heading">
|
607
|
+
<span class="method-name">incr_word_count</span><span
|
608
|
+
class="method-args">(klass, word, count)</span>
|
609
|
+
<span class="method-click-advice">click to toggle source</span>
|
610
|
+
</div>
|
611
|
+
|
612
|
+
|
613
|
+
<div class="method-description">
|
614
|
+
|
615
|
+
<p>Increment the count for a given (word,class) pair. Evidently, cassandra
|
616
|
+
does not support atomic increment/decrement. Psh. HBase uses ZooKeeper to
|
617
|
+
implement atomic operations, ain’t it special?</p>
|
618
|
+
|
619
|
+
|
620
|
+
|
621
|
+
<div class="method-source-code" id="incr_word_count-source">
|
622
|
+
<pre>
|
623
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 116</span>
|
624
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span>)
|
625
|
+
<span class="ruby-comment"># Only wants strings</span>
|
626
|
+
<span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
|
627
|
+
<span class="ruby-identifier">word</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">to_s</span>
|
628
|
+
|
629
|
+
<span class="ruby-identifier">prior_count</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:classes</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">klass</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
|
630
|
+
<span class="ruby-identifier">new_count</span> = <span class="ruby-identifier">prior_count</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">count</span>
|
631
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:classes</span>, <span class="ruby-identifier">word</span>, {<span class="ruby-identifier">klass</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">new_count</span>.<span class="ruby-identifier">to_s</span>})
|
632
|
+
|
633
|
+
<span class="ruby-keyword">if</span> (<span class="ruby-identifier">prior_count</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span> <span class="ruby-operator">&&</span> <span class="ruby-identifier">count</span> <span class="ruby-operator">></span> <span class="ruby-value">0</span>)
|
634
|
+
<span class="ruby-comment">#</span>
|
635
|
+
<span class="ruby-comment"># we've never seen this word before and we're not trying to unlearn it</span>
|
636
|
+
<span class="ruby-comment">#</span>
|
637
|
+
<span class="ruby-identifier">vocab_size</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">"vocabsize"</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
|
638
|
+
<span class="ruby-identifier">vocab_size</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
|
639
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">"vocabsize"</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">vocab_size</span>.<span class="ruby-identifier">to_s</span>})
|
640
|
+
<span class="ruby-keyword">elsif</span> <span class="ruby-identifier">new_count</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
|
641
|
+
<span class="ruby-comment">#</span>
|
642
|
+
<span class="ruby-comment"># we've seen this word before but we're trying to unlearn it</span>
|
643
|
+
<span class="ruby-comment">#</span>
|
644
|
+
<span class="ruby-identifier">vocab_size</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">"vocabsize"</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
|
645
|
+
<span class="ruby-identifier">vocab_size</span> <span class="ruby-operator">-=</span> <span class="ruby-value">1</span>
|
646
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">"vocabsize"</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">vocab_size</span>.<span class="ruby-identifier">to_s</span>})
|
647
|
+
<span class="ruby-keyword">end</span>
|
648
|
+
<span class="ruby-identifier">new_count</span>
|
649
|
+
<span class="ruby-keyword">end</span></pre>
|
650
|
+
</div><!-- incr_word_count-source -->
|
651
|
+
|
652
|
+
</div>
|
653
|
+
|
654
|
+
|
655
|
+
|
656
|
+
|
657
|
+
</div><!-- incr_word_count-method -->
|
658
|
+
|
659
|
+
|
660
|
+
<div id="init_tables-method" class="method-detail ">
|
661
|
+
<a name="method-i-init_tables"></a>
|
662
|
+
|
663
|
+
|
664
|
+
<div class="method-heading">
|
665
|
+
<span class="method-name">init_tables</span><span
|
666
|
+
class="method-args">()</span>
|
667
|
+
<span class="method-click-advice">click to toggle source</span>
|
668
|
+
</div>
|
669
|
+
|
670
|
+
|
671
|
+
<div class="method-description">
|
672
|
+
|
673
|
+
<p>Create required keyspace and column families</p>
|
674
|
+
|
675
|
+
|
676
|
+
|
677
|
+
<div class="method-source-code" id="init_tables-source">
|
678
|
+
<pre>
|
679
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 60</span>
|
680
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">init_tables</span>
|
681
|
+
<span class="ruby-comment"># Do nothing if keyspace already exists</span>
|
682
|
+
<span class="ruby-keyword">if</span> <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspaces</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-ivar">@keyspace</span>)
|
683
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspace</span> = <span class="ruby-ivar">@keyspace</span>
|
684
|
+
<span class="ruby-keyword">else</span>
|
685
|
+
<span class="ruby-identifier">freq_table</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">ColumnFamily</span>.<span class="ruby-identifier">new</span>({<span class="ruby-value">:keyspace</span> =<span class="ruby-operator">></span> <span class="ruby-ivar">@keyspace</span>, <span class="ruby-value">:name</span> =<span class="ruby-operator">></span> <span class="ruby-string">"classes"</span>}) <span class="ruby-comment"># word => {classname => count}</span>
|
686
|
+
<span class="ruby-identifier">summary_table</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">ColumnFamily</span>.<span class="ruby-identifier">new</span>({<span class="ruby-value">:keyspace</span> =<span class="ruby-operator">></span> <span class="ruby-ivar">@keyspace</span>, <span class="ruby-value">:name</span> =<span class="ruby-operator">></span> <span class="ruby-string">"totals"</span>}) <span class="ruby-comment"># class => {wordcount => count}</span>
|
687
|
+
<span class="ruby-identifier">ks_def</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">Keyspace</span>.<span class="ruby-identifier">new</span>({
|
688
|
+
<span class="ruby-value">:name</span> =<span class="ruby-operator">></span> <span class="ruby-ivar">@keyspace</span>,
|
689
|
+
<span class="ruby-value">:strategy_class</span> =<span class="ruby-operator">></span> <span class="ruby-string">'org.apache.cassandra.locator.SimpleStrategy'</span>,
|
690
|
+
<span class="ruby-value">:replication_factor</span> =<span class="ruby-operator">></span> <span class="ruby-value">1</span>,
|
691
|
+
<span class="ruby-value">:cf_defs</span> =<span class="ruby-operator">></span> [<span class="ruby-identifier">freq_table</span>, <span class="ruby-identifier">summary_table</span>]
|
692
|
+
})
|
693
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">add_keyspace</span> <span class="ruby-identifier">ks_def</span>
|
694
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspace</span> = <span class="ruby-ivar">@keyspace</span>
|
695
|
+
<span class="ruby-keyword">end</span>
|
696
|
+
<span class="ruby-keyword">end</span></pre>
|
697
|
+
</div><!-- init_tables-source -->
|
698
|
+
|
699
|
+
</div>
|
700
|
+
|
701
|
+
|
702
|
+
|
703
|
+
|
704
|
+
</div><!-- init_tables-method -->
|
705
|
+
|
706
|
+
|
707
|
+
<div id="reset-method" class="method-detail ">
|
708
|
+
<a name="method-i-reset"></a>
|
709
|
+
|
710
|
+
|
711
|
+
<div class="method-heading">
|
712
|
+
<span class="method-name">reset</span><span
|
713
|
+
class="method-args">()</span>
|
714
|
+
<span class="method-click-advice">click to toggle source</span>
|
715
|
+
</div>
|
716
|
+
|
717
|
+
|
718
|
+
<div class="method-description">
|
719
|
+
|
720
|
+
|
721
|
+
|
722
|
+
|
723
|
+
|
724
|
+
<div class="method-source-code" id="reset-source">
|
725
|
+
<pre>
|
726
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 39</span>
|
727
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">reset</span>
|
728
|
+
<span class="ruby-identifier">drop_tables</span>
|
729
|
+
<span class="ruby-identifier">init_tables</span>
|
730
|
+
<span class="ruby-keyword">end</span></pre>
|
731
|
+
</div><!-- reset-source -->
|
732
|
+
|
733
|
+
</div>
|
734
|
+
|
735
|
+
|
736
|
+
|
737
|
+
|
738
|
+
</div><!-- reset-method -->
|
739
|
+
|
740
|
+
|
741
|
+
</div><!-- public-instance-method-details -->
|
742
|
+
|
743
|
+
<div id="protected-instance-method-details" class="method-section section">
|
744
|
+
<h3 class="section-header">Protected Instance Methods</h3>
|
745
|
+
|
746
|
+
|
747
|
+
<div id="get_summary-method" class="method-detail ">
|
748
|
+
<a name="method-i-get_summary"></a>
|
749
|
+
|
750
|
+
|
751
|
+
<div class="method-heading">
|
752
|
+
<span class="method-name">get_summary</span><span
|
753
|
+
class="method-args">(name)</span>
|
754
|
+
<span class="method-click-advice">click to toggle source</span>
|
755
|
+
</div>
|
756
|
+
|
757
|
+
|
758
|
+
<div class="method-description">
|
759
|
+
|
760
|
+
<p>Fetch 100 rows from summary table, yes, increase if necessary</p>
|
761
|
+
|
762
|
+
|
763
|
+
|
764
|
+
<div class="method-source-code" id="get_summary-source">
|
765
|
+
<pre>
|
766
|
+
<span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 180</span>
|
767
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_summary</span>(<span class="ruby-identifier">name</span>)
|
768
|
+
<span class="ruby-identifier">counts</span> = {}
|
769
|
+
<span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get_range</span>(<span class="ruby-value">:totals</span>, {<span class="ruby-value">:start</span> =<span class="ruby-operator">></span> <span class="ruby-string">''</span>, <span class="ruby-value">:finish</span> =<span class="ruby-operator">></span> <span class="ruby-string">''</span>, <span class="ruby-value">:count</span> =<span class="ruby-operator">></span> <span class="ruby-ivar">@max_classes</span>}).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">key_slice</span><span class="ruby-operator">|</span>
|
770
|
+
<span class="ruby-comment"># keyslice is a clunky thrift object, map into a ruby hash</span>
|
771
|
+
<span class="ruby-identifier">row</span> = <span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">columns</span>.<span class="ruby-identifier">inject</span>({}){<span class="ruby-operator">|</span><span class="ruby-identifier">hsh</span>, <span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-identifier">hsh</span>[<span class="ruby-identifier">c</span>.<span class="ruby-identifier">column</span>.<span class="ruby-identifier">name</span>] = <span class="ruby-identifier">c</span>.<span class="ruby-identifier">column</span>.<span class="ruby-identifier">value</span>; <span class="ruby-identifier">hsh</span>}
|
772
|
+
<span class="ruby-identifier">counts</span>[<span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">key</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">row</span>[<span class="ruby-identifier">name</span>].<span class="ruby-identifier">to_f</span>
|
773
|
+
<span class="ruby-keyword">end</span>
|
774
|
+
<span class="ruby-identifier">counts</span>
|
775
|
+
<span class="ruby-keyword">end</span></pre>
|
776
|
+
</div><!-- get_summary-source -->
|
777
|
+
|
778
|
+
</div>
|
779
|
+
|
780
|
+
|
781
|
+
|
782
|
+
|
783
|
+
</div><!-- get_summary-method -->
|
784
|
+
|
785
|
+
|
786
|
+
</div><!-- protected-instance-method-details -->
|
787
|
+
|
788
|
+
</div><!-- 5Buntitled-5D -->
|
789
|
+
|
790
|
+
|
791
|
+
</div><!-- documentation -->
|
792
|
+
|
793
|
+
<div id="validator-badges">
|
794
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
795
|
+
<p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
|
796
|
+
Rdoc Generator</a> 2</small>.</p>
|
797
|
+
</div>
|
798
|
+
|
799
|
+
</body>
|
800
|
+
</html>
|
801
|
+
|