ankusa 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/README.rdoc +2 -1
  2. data/Rakefile +4 -26
  3. data/lib/ankusa.rb +1 -0
  4. data/lib/ankusa/classifier.rb +3 -0
  5. data/lib/ankusa/naive_bayes.rb +8 -4
  6. data/lib/ankusa/version.rb +3 -0
  7. metadata +6 -33
  8. data/docs/classes/Ankusa.html +0 -182
  9. data/docs/classes/Ankusa/CassandraStorage.html +0 -615
  10. data/docs/classes/Ankusa/Classifier.html +0 -315
  11. data/docs/classes/Ankusa/FileSystemStorage.html +0 -272
  12. data/docs/classes/Ankusa/HBaseStorage.html +0 -594
  13. data/docs/classes/Ankusa/KLDivergenceClassifier.html +0 -194
  14. data/docs/classes/Ankusa/MemoryStorage.html +0 -467
  15. data/docs/classes/Ankusa/NaiveBayesClassifier.html +0 -231
  16. data/docs/classes/Ankusa/TextHash.html +0 -275
  17. data/docs/classes/String.html +0 -172
  18. data/docs/created.rid +0 -1
  19. data/docs/files/README_rdoc.html +0 -294
  20. data/docs/files/lib/ankusa/cassandra_storage_rb.html +0 -108
  21. data/docs/files/lib/ankusa/classifier_rb.html +0 -101
  22. data/docs/files/lib/ankusa/extensions_rb.html +0 -108
  23. data/docs/files/lib/ankusa/file_system_storage_rb.html +0 -108
  24. data/docs/files/lib/ankusa/hasher_rb.html +0 -109
  25. data/docs/files/lib/ankusa/hbase_storage_rb.html +0 -108
  26. data/docs/files/lib/ankusa/kl_divergence_rb.html +0 -101
  27. data/docs/files/lib/ankusa/memory_storage_rb.html +0 -101
  28. data/docs/files/lib/ankusa/naive_bayes_rb.html +0 -101
  29. data/docs/files/lib/ankusa/stopwords_rb.html +0 -101
  30. data/docs/files/lib/ankusa_rb.html +0 -112
  31. data/docs/fr_class_index.html +0 -36
  32. data/docs/fr_file_index.html +0 -38
  33. data/docs/fr_method_index.html +0 -95
  34. data/docs/index.html +0 -24
  35. data/docs/rdoc-style.css +0 -208
@@ -20,7 +20,8 @@ Using the naive Bayes classifier:
20
20
  require 'ankusa'
21
21
  require 'ankusa/hbase_storage'
22
22
 
23
- # connect to HBase
23
+ # connect to HBase. Alternatively, just for this test, use in memory storage with
24
+ # storage = Ankusa::MemoryStorage.new
24
25
  storage = Ankusa::HBaseStorage.new 'localhost'
25
26
  c = Ankusa::NaiveBayesClassifier.new storage
26
27
 
data/Rakefile CHANGED
@@ -1,12 +1,13 @@
1
1
  require 'rubygems'
2
- require 'rake'
2
+ require 'bundler'
3
3
  require 'rake/testtask'
4
4
  require 'rake/rdoctask'
5
- require 'rake/gempackagetask'
5
+
6
+ Bundler::GemHelper.install_tasks
6
7
 
7
8
  desc "Create documentation"
8
9
  Rake::RDocTask.new("doc") { |rdoc|
9
- rdoc.title = "HBaseRb - Naive Bayes classifier with HBase storage"
10
+ rdoc.title = "Ankusa - Naive Bayes classifier with big data storage"
10
11
  rdoc.rdoc_dir = 'docs'
11
12
  rdoc.rdoc_files.include('README.rdoc')
12
13
  rdoc.rdoc_files.include('lib/**/*.rb')
@@ -39,26 +40,3 @@ Rake::TestTask.new("test_filesystem") { |t|
39
40
  t.test_files = FileList['test/hasher_test.rb', 'test/file_system_classifier_test.rb']
40
41
  t.verbose = true
41
42
  }
42
-
43
- spec = Gem::Specification.new do |s|
44
- s.name = "ankusa"
45
- s.version = "0.0.8"
46
- s.authors = ["Brian Muller"]
47
- s.date = %q{2011-01-05}
48
- s.description = "Text classifier with HBase or Cassandra storage"
49
- s.summary = "Text classifier in Ruby that uses Hadoop's HBase or Cassandra for storage"
50
- s.email = "brian.muller@livingsocial.com"
51
- s.files = FileList["lib/**/*", "[A-Z]*", "Rakefile", "docs/**/*"]
52
- s.homepage = "https://github.com/livingsocial/ankusa"
53
- s.require_paths = ["lib"]
54
- s.add_dependency('fast-stemmer', '>= 1.0.0')
55
- s.requirements << "Either hbaserb >= 0.0.3 or cassandra >= 0.7"
56
- end
57
-
58
- Rake::GemPackageTask.new(spec) do |pkg|
59
- pkg.need_zip = true
60
- pkg.need_tar = true
61
- end
62
-
63
- desc "Default task: builds gem and runs tests"
64
- task :default => [ :gem, :test ]
@@ -1,3 +1,4 @@
1
+ require 'ankusa/version'
1
2
  require 'ankusa/extensions'
2
3
  require 'ankusa/classifier'
3
4
  require 'ankusa/naive_bayes'
@@ -50,6 +50,9 @@ module Ankusa
50
50
  @storage.get_word_counts(word).each { |k,v| probs[k] = v if classnames.include? k }
51
51
  vs = vocab_sizes
52
52
  classnames.each { |cn|
53
+ # if we've never seen the class, the word prob is 0
54
+ next if not vs.has_key? cn
55
+
53
56
  # use a laplacian smoother
54
57
  probs[cn] = (probs[cn] + 1).to_f / (@storage.get_total_word_count(cn) + vs[cn]).to_f
55
58
  }
@@ -1,4 +1,5 @@
1
1
  module Ankusa
2
+ INFTY = 1.0 / 0.0
2
3
 
3
4
  class NaiveBayesClassifier
4
5
  include Classifier
@@ -12,7 +13,7 @@ module Ankusa
12
13
  def classifications(text, classnames=nil)
13
14
  result = log_likelihoods text, classnames
14
15
  result.keys.each { |k|
15
- result[k] = Math.exp result[k]
16
+ result[k] = (result[k] == INFTY) ? 0 : Math.exp(result[k])
16
17
  }
17
18
 
18
19
  # normalize to get probs
@@ -28,16 +29,19 @@ module Ankusa
28
29
 
29
30
  TextHash.new(text).each { |word, count|
30
31
  probs = get_word_probs(word, classnames)
31
- classnames.each { |k| result[k] += (Math.log(probs[k]) * count) }
32
+ classnames.each { |k|
33
+ # log likelihood should be infinity if we've never seen the klass
34
+ result[k] += probs[k] > 0 ? (Math.log(probs[k]) * count) : INFTY
35
+ }
32
36
  }
33
37
 
34
- # add the prior and exponentiate
38
+ # add the prior
35
39
  doc_counts = doc_count_totals.select { |k,v| classnames.include? k }.map { |k,v| v }
36
40
  doc_count_total = (doc_counts.inject { |x,y| x+y } + classnames.length).to_f
37
41
  classnames.each { |k|
38
42
  result[k] += Math.log((@storage.get_doc_count(k) + 1).to_f / doc_count_total)
39
43
  }
40
-
44
+
41
45
  result
42
46
  end
43
47
 
@@ -0,0 +1,3 @@
1
+ module Ankusa
2
+ VERSION = "0.0.9"
3
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ankusa
3
3
  version: !ruby/object:Gem::Version
4
- hash: 15
4
+ hash: 13
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 8
10
- version: 0.0.8
9
+ - 9
10
+ version: 0.0.9
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brian Muller
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-05 00:00:00 -05:00
18
+ date: 2011-05-28 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -53,38 +53,11 @@ files:
53
53
  - lib/ankusa/memory_storage.rb
54
54
  - lib/ankusa/naive_bayes.rb
55
55
  - lib/ankusa/stopwords.rb
56
+ - lib/ankusa/version.rb
56
57
  - lib/ankusa.rb
57
58
  - LICENSE
58
59
  - Rakefile
59
60
  - README.rdoc
60
- - docs/classes/Ankusa/CassandraStorage.html
61
- - docs/classes/Ankusa/Classifier.html
62
- - docs/classes/Ankusa/FileSystemStorage.html
63
- - docs/classes/Ankusa/HBaseStorage.html
64
- - docs/classes/Ankusa/KLDivergenceClassifier.html
65
- - docs/classes/Ankusa/MemoryStorage.html
66
- - docs/classes/Ankusa/NaiveBayesClassifier.html
67
- - docs/classes/Ankusa/TextHash.html
68
- - docs/classes/Ankusa.html
69
- - docs/classes/String.html
70
- - docs/created.rid
71
- - docs/files/lib/ankusa/cassandra_storage_rb.html
72
- - docs/files/lib/ankusa/classifier_rb.html
73
- - docs/files/lib/ankusa/extensions_rb.html
74
- - docs/files/lib/ankusa/file_system_storage_rb.html
75
- - docs/files/lib/ankusa/hasher_rb.html
76
- - docs/files/lib/ankusa/hbase_storage_rb.html
77
- - docs/files/lib/ankusa/kl_divergence_rb.html
78
- - docs/files/lib/ankusa/memory_storage_rb.html
79
- - docs/files/lib/ankusa/naive_bayes_rb.html
80
- - docs/files/lib/ankusa/stopwords_rb.html
81
- - docs/files/lib/ankusa_rb.html
82
- - docs/files/README_rdoc.html
83
- - docs/fr_class_index.html
84
- - docs/fr_file_index.html
85
- - docs/fr_method_index.html
86
- - docs/index.html
87
- - docs/rdoc-style.css
88
61
  has_rdoc: true
89
62
  homepage: https://github.com/livingsocial/ankusa
90
63
  licenses: []
@@ -114,7 +87,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
114
87
  version: "0"
115
88
  requirements:
116
89
  - Either hbaserb >= 0.0.3 or cassandra >= 0.7
117
- rubyforge_project:
90
+ rubyforge_project: ankusa
118
91
  rubygems_version: 1.3.7
119
92
  signing_key:
120
93
  specification_version: 3
@@ -1,182 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
- <head>
8
- <title>Module: Ankusa</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
- <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
12
- <script type="text/javascript">
13
- // <![CDATA[
14
-
15
- function popupCode( url ) {
16
- window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
- }
18
-
19
- function toggleCode( id ) {
20
- if ( document.getElementById )
21
- elem = document.getElementById( id );
22
- else if ( document.all )
23
- elem = eval( "document.all." + id );
24
- else
25
- return false;
26
-
27
- elemStyle = elem.style;
28
-
29
- if ( elemStyle.display != "block" ) {
30
- elemStyle.display = "block"
31
- } else {
32
- elemStyle.display = "none"
33
- }
34
-
35
- return true;
36
- }
37
-
38
- // Make codeblocks hidden by default
39
- document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
-
41
- // ]]>
42
- </script>
43
-
44
- </head>
45
- <body>
46
-
47
-
48
-
49
- <div id="classHeader">
50
- <table class="header-table">
51
- <tr class="top-aligned-row">
52
- <td><strong>Module</strong></td>
53
- <td class="class-name-in-header">Ankusa</td>
54
- </tr>
55
- <tr class="top-aligned-row">
56
- <td><strong>In:</strong></td>
57
- <td>
58
- <a href="../files/lib/ankusa/cassandra_storage_rb.html">
59
- lib/ankusa/cassandra_storage.rb
60
- </a>
61
- <br />
62
- <a href="../files/lib/ankusa/classifier_rb.html">
63
- lib/ankusa/classifier.rb
64
- </a>
65
- <br />
66
- <a href="../files/lib/ankusa/file_system_storage_rb.html">
67
- lib/ankusa/file_system_storage.rb
68
- </a>
69
- <br />
70
- <a href="../files/lib/ankusa/hasher_rb.html">
71
- lib/ankusa/hasher.rb
72
- </a>
73
- <br />
74
- <a href="../files/lib/ankusa/hbase_storage_rb.html">
75
- lib/ankusa/hbase_storage.rb
76
- </a>
77
- <br />
78
- <a href="../files/lib/ankusa/kl_divergence_rb.html">
79
- lib/ankusa/kl_divergence.rb
80
- </a>
81
- <br />
82
- <a href="../files/lib/ankusa/memory_storage_rb.html">
83
- lib/ankusa/memory_storage.rb
84
- </a>
85
- <br />
86
- <a href="../files/lib/ankusa/naive_bayes_rb.html">
87
- lib/ankusa/naive_bayes.rb
88
- </a>
89
- <br />
90
- <a href="../files/lib/ankusa/stopwords_rb.html">
91
- lib/ankusa/stopwords.rb
92
- </a>
93
- <br />
94
- </td>
95
- </tr>
96
-
97
- </table>
98
- </div>
99
- <!-- banner header -->
100
-
101
- <div id="bodyContent">
102
-
103
-
104
-
105
- <div id="contextContent">
106
-
107
- <div id="description">
108
- <p>
109
- At the moment you&#8216;ll have to do:
110
- </p>
111
- <p>
112
- create keyspace ankusa with replication_factor = 1
113
- </p>
114
- <p>
115
- from the cassandra-cli. This should be fixed with new release candidate for
116
- cassandra
117
- </p>
118
-
119
- </div>
120
-
121
-
122
- </div>
123
-
124
-
125
- </div>
126
-
127
-
128
- <!-- if includes -->
129
-
130
- <div id="section">
131
-
132
- <div id="class-list">
133
- <h3 class="section-bar">Classes and Modules</h3>
134
-
135
- Module <a href="Ankusa/Classifier.html" class="link">Ankusa::Classifier</a><br />
136
- Class <a href="Ankusa/CassandraStorage.html" class="link">Ankusa::CassandraStorage</a><br />
137
- Class <a href="Ankusa/FileSystemStorage.html" class="link">Ankusa::FileSystemStorage</a><br />
138
- Class <a href="Ankusa/HBaseStorage.html" class="link">Ankusa::HBaseStorage</a><br />
139
- Class <a href="Ankusa/KLDivergenceClassifier.html" class="link">Ankusa::KLDivergenceClassifier</a><br />
140
- Class <a href="Ankusa/MemoryStorage.html" class="link">Ankusa::MemoryStorage</a><br />
141
- Class <a href="Ankusa/NaiveBayesClassifier.html" class="link">Ankusa::NaiveBayesClassifier</a><br />
142
- Class <a href="Ankusa/TextHash.html" class="link">Ankusa::TextHash</a><br />
143
-
144
- </div>
145
-
146
- <div id="constants-list">
147
- <h3 class="section-bar">Constants</h3>
148
-
149
- <div class="name-list">
150
- <table summary="Constants">
151
- <tr class="top-aligned-row context-row">
152
- <td class="context-item-name">STOPWORDS</td>
153
- <td>=</td>
154
- <td class="context-item-value">&quot;a able about above according accordingly across actually after afterwards again against ain't all allow allows almost alone along already also although always am among amongst an and another any anybody anyhow anyone anything anyway anyways anywhere apart appear appreciate appropriate are aren't around as aside ask asking associated at available away awfully be became because become becomes becoming been before beforehand behind being believe below beside besides best better between beyond both brief but by c'mon c's came can can't cannot cant cause causes certain certainly changes clearly co com come comes concerning consequently consider considering contain containing contains corresponding could couldn't course currently definitely described despite did didn't different do does doesn't doing don't done down downwards during each edu eg eight either else elsewhere enough entirely especially et etc even ever every everybody everyone everything everywhere ex exactly example except far few fifth first five followed following follows for former formerly forth four from further furthermore get gets getting given gives go goes going gone got gotten greetings had hadn't happens hardly has hasn't have haven't having he he's hello help hence her here here's hereafter hereby herein hereupon hers herself hi him himself his hither hopefully how howbeit however i'd i'll i'm i've ie if ignored immediate in inasmuch inc indeed indicate indicated indicates inner insofar instead into inward is isn't it it'd it'll it's its itself just keep keeps kept know knows known last lately later latter latterly least less lest let let's like liked likely little look looking looks ltd mainly many may maybe me mean meanwhile merely might more moreover most mostly much must my myself name namely nd near nearly necessary need needs neither never nevertheless new next nine no nobody non none noone nor normally not nothing novel now nowhere obviously of off often oh ok okay old on once one ones only onto or other others otherwise ought our ours ourselves out outside over overall own particular particularly per perhaps placed please plus possible presumably probably provides que quite qv rather rd re really reasonably regarding regardless regards relatively respectively right said same saw say saying says second secondly see seeing seem seemed seeming seems seen self selves sensible sent serious seriously seven several shall she should shouldn't since six so some somebody somehow someone something sometime sometimes somewhat somewhere soon sorry specified specify specifying still sub such sup sure t's take taken tell tends th than thank thanks thanx that that's thats the their theirs them themselves then thence there there's thereafter thereby therefore therein theres thereupon these they they'd they'll they're they've think third this thorough thoroughly those though three through throughout thru thus to together too took toward towards tried tries truly try trying twice two un under unfortunately unless unlikely until unto up upon us use used useful uses using usually value various very via viz vs want wants was wasn't way we we'd we'll we're we've welcome well went were weren't what what's whatever when whence whenever where where's whereafter whereas whereby wherein whereupon wherever whether which while whither who who's whoever whole whom whose why will willing wish with within without won't wonder would would wouldn't yes yet you you'd you'll you're you've your yours yourself yourselves zero&quot;.split</td>
155
- <td width="3em">&nbsp;</td>
156
- <td class="context-item-desc">
157
- These are taken from MySQL - <a
158
- href="http://dev.mysql.com/tech-resources/articles/full-text-revealed.html">dev.mysql.com/tech-resources/articles/full-text-revealed.html</a>
159
-
160
- </td>
161
- </tr>
162
- </table>
163
- </div>
164
- </div>
165
-
166
-
167
-
168
-
169
-
170
-
171
- <!-- if method_list -->
172
-
173
-
174
- </div>
175
-
176
-
177
- <div id="validator-badges">
178
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
179
- </div>
180
-
181
- </body>
182
- </html>
@@ -1,615 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
- <head>
8
- <title>Class: Ankusa::CassandraStorage</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
- <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
- <script type="text/javascript">
13
- // <![CDATA[
14
-
15
- function popupCode( url ) {
16
- window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
- }
18
-
19
- function toggleCode( id ) {
20
- if ( document.getElementById )
21
- elem = document.getElementById( id );
22
- else if ( document.all )
23
- elem = eval( "document.all." + id );
24
- else
25
- return false;
26
-
27
- elemStyle = elem.style;
28
-
29
- if ( elemStyle.display != "block" ) {
30
- elemStyle.display = "block"
31
- } else {
32
- elemStyle.display = "none"
33
- }
34
-
35
- return true;
36
- }
37
-
38
- // Make codeblocks hidden by default
39
- document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
-
41
- // ]]>
42
- </script>
43
-
44
- </head>
45
- <body>
46
-
47
-
48
-
49
- <div id="classHeader">
50
- <table class="header-table">
51
- <tr class="top-aligned-row">
52
- <td><strong>Class</strong></td>
53
- <td class="class-name-in-header">Ankusa::CassandraStorage</td>
54
- </tr>
55
- <tr class="top-aligned-row">
56
- <td><strong>In:</strong></td>
57
- <td>
58
- <a href="../../files/lib/ankusa/cassandra_storage_rb.html">
59
- lib/ankusa/cassandra_storage.rb
60
- </a>
61
- <br />
62
- </td>
63
- </tr>
64
-
65
- <tr class="top-aligned-row">
66
- <td><strong>Parent:</strong></td>
67
- <td>
68
- Object
69
- </td>
70
- </tr>
71
- </table>
72
- </div>
73
- <!-- banner header -->
74
-
75
- <div id="bodyContent">
76
-
77
-
78
-
79
- <div id="contextContent">
80
-
81
-
82
-
83
- </div>
84
-
85
- <div id="method-list">
86
- <h3 class="section-bar">Methods</h3>
87
-
88
- <div class="name-list">
89
- <a href="#M000010">classnames</a>&nbsp;&nbsp;
90
- <a href="#M000022">close</a>&nbsp;&nbsp;
91
- <a href="#M000021">doc_count_totals</a>&nbsp;&nbsp;
92
- <a href="#M000012">drop_tables</a>&nbsp;&nbsp;
93
- <a href="#M000017">get_doc_count</a>&nbsp;&nbsp;
94
- <a href="#M000023">get_summary</a>&nbsp;&nbsp;
95
- <a href="#M000016">get_total_word_count</a>&nbsp;&nbsp;
96
- <a href="#M000015">get_vocabulary_sizes</a>&nbsp;&nbsp;
97
- <a href="#M000014">get_word_counts</a>&nbsp;&nbsp;
98
- <a href="#M000020">incr_doc_count</a>&nbsp;&nbsp;
99
- <a href="#M000019">incr_total_word_count</a>&nbsp;&nbsp;
100
- <a href="#M000018">incr_word_count</a>&nbsp;&nbsp;
101
- <a href="#M000013">init_tables</a>&nbsp;&nbsp;
102
- <a href="#M000009">new</a>&nbsp;&nbsp;
103
- <a href="#M000011">reset</a>&nbsp;&nbsp;
104
- </div>
105
- </div>
106
-
107
- </div>
108
-
109
-
110
- <!-- if includes -->
111
-
112
- <div id="section">
113
-
114
-
115
-
116
-
117
-
118
- <div id="attribute-list">
119
- <h3 class="section-bar">Attributes</h3>
120
-
121
- <div class="name-list">
122
- <table>
123
- <tr class="top-aligned-row context-row">
124
- <td class="context-item-name">cassandra</td>
125
- <td class="context-item-value">&nbsp;[R]&nbsp;</td>
126
- <td class="context-item-desc"></td>
127
- </tr>
128
- </table>
129
- </div>
130
- </div>
131
-
132
-
133
-
134
- <!-- if method_list -->
135
- <div id="methods">
136
- <h3 class="section-bar">Public Class methods</h3>
137
-
138
- <div id="method-M000009" class="method-detail">
139
- <a name="M000009"></a>
140
-
141
- <div class="method-heading">
142
- <a href="#M000009" class="method-signature">
143
- <span class="method-name">new</span><span class="method-args">(host='127.0.0.1', port=9160, keyspace = 'ankusa', max_classes = 100)</span>
144
- </a>
145
- </div>
146
-
147
- <div class="method-description">
148
- <p>
149
- Necessary to set max classes since current implementation of ruby cassandra
150
- client doesn&#8216;t support table scans. Using crufty get_range method at
151
- the moment.
152
- </p>
153
- <p><a class="source-toggle" href="#"
154
- onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
155
- <div class="method-source-code" id="M000009-source">
156
- <pre>
157
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 21</span>
158
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">host</span>=<span class="ruby-value str">'127.0.0.1'</span>, <span class="ruby-identifier">port</span>=<span class="ruby-value">9160</span>, <span class="ruby-identifier">keyspace</span> = <span class="ruby-value str">'ankusa'</span>, <span class="ruby-identifier">max_classes</span> = <span class="ruby-value">100</span>)
159
- <span class="ruby-ivar">@cassandra</span> = <span class="ruby-constant">Cassandra</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'system'</span>, <span class="ruby-node">&quot;#{host}:#{port}&quot;</span>)
160
- <span class="ruby-ivar">@klass_word_counts</span> = {}
161
- <span class="ruby-ivar">@klass_doc_counts</span> = {}
162
- <span class="ruby-ivar">@keyspace</span> = <span class="ruby-identifier">keyspace</span>
163
- <span class="ruby-ivar">@max_classes</span> = <span class="ruby-identifier">max_classes</span>
164
- <span class="ruby-identifier">init_tables</span>
165
- <span class="ruby-keyword kw">end</span>
166
- </pre>
167
- </div>
168
- </div>
169
- </div>
170
-
171
- <h3 class="section-bar">Public Instance methods</h3>
172
-
173
- <div id="method-M000010" class="method-detail">
174
- <a name="M000010"></a>
175
-
176
- <div class="method-heading">
177
- <a href="#M000010" class="method-signature">
178
- <span class="method-name">classnames</span><span class="method-args">()</span>
179
- </a>
180
- </div>
181
-
182
- <div class="method-description">
183
- <p>
184
- Fetch the names of the distinct classes for classification: eg. :spam,
185
- :good, etc
186
- </p>
187
- <p><a class="source-toggle" href="#"
188
- onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
189
- <div class="method-source-code" id="M000010-source">
190
- <pre>
191
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 34</span>
192
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classnames</span>
193
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get_range</span>(<span class="ruby-identifier">:totals</span>, {<span class="ruby-identifier">:start</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-value str">''</span>, <span class="ruby-identifier">:finish</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-value str">''</span>, <span class="ruby-identifier">:count</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@max_classes</span>}).<span class="ruby-identifier">inject</span>([]) <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">cs</span>, <span class="ruby-identifier">key_slice</span><span class="ruby-operator">|</span>
194
- <span class="ruby-identifier">cs</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">key</span>.<span class="ruby-identifier">to_sym</span>
195
- <span class="ruby-keyword kw">end</span>
196
- <span class="ruby-keyword kw">end</span>
197
- </pre>
198
- </div>
199
- </div>
200
- </div>
201
-
202
- <div id="method-M000022" class="method-detail">
203
- <a name="M000022"></a>
204
-
205
- <div class="method-heading">
206
- <a href="#M000022" class="method-signature">
207
- <span class="method-name">close</span><span class="method-args">()</span>
208
- </a>
209
- </div>
210
-
211
- <div class="method-description">
212
- <p>
213
- Doesn&#8216;t do anything
214
- </p>
215
- <p><a class="source-toggle" href="#"
216
- onclick="toggleCode('M000022-source');return false;">[Source]</a></p>
217
- <div class="method-source-code" id="M000022-source">
218
- <pre>
219
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 174</span>
220
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">close</span>
221
- <span class="ruby-keyword kw">end</span>
222
- </pre>
223
- </div>
224
- </div>
225
- </div>
226
-
227
- <div id="method-M000021" class="method-detail">
228
- <a name="M000021"></a>
229
-
230
- <div class="method-heading">
231
- <a href="#M000021" class="method-signature">
232
- <span class="method-name">doc_count_totals</span><span class="method-args">()</span>
233
- </a>
234
- </div>
235
-
236
- <div class="method-description">
237
- <p><a class="source-toggle" href="#"
238
- onclick="toggleCode('M000021-source');return false;">[Source]</a></p>
239
- <div class="method-source-code" id="M000021-source">
240
- <pre>
241
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 167</span>
242
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">doc_count_totals</span>
243
- <span class="ruby-identifier">get_summary</span> <span class="ruby-value str">&quot;doc_count&quot;</span>
244
- <span class="ruby-keyword kw">end</span>
245
- </pre>
246
- </div>
247
- </div>
248
- </div>
249
-
250
- <div id="method-M000012" class="method-detail">
251
- <a name="M000012"></a>
252
-
253
- <div class="method-heading">
254
- <a href="#M000012" class="method-signature">
255
- <span class="method-name">drop_tables</span><span class="method-args">()</span>
256
- </a>
257
- </div>
258
-
259
- <div class="method-description">
260
- <p>
261
- Drop ankusa keyspace, <a href="CassandraStorage.html#M000011">reset</a>
262
- internal caches
263
- </p>
264
- <p>
265
- FIXME: truncate doesn&#8216;t work with cassandra-beta2
266
- </p>
267
- <p><a class="source-toggle" href="#"
268
- onclick="toggleCode('M000012-source');return false;">[Source]</a></p>
269
- <div class="method-source-code" id="M000012-source">
270
- <pre>
271
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 50</span>
272
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">drop_tables</span>
273
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">truncate!</span>(<span class="ruby-value str">'classes'</span>)
274
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">truncate!</span>(<span class="ruby-value str">'totals'</span>)
275
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">drop_keyspace</span>(<span class="ruby-ivar">@keyspace</span>)
276
- <span class="ruby-ivar">@klass_word_counts</span> = {}
277
- <span class="ruby-ivar">@klass_doc_counts</span> = {}
278
- <span class="ruby-keyword kw">end</span>
279
- </pre>
280
- </div>
281
- </div>
282
- </div>
283
-
284
- <div id="method-M000017" class="method-detail">
285
- <a name="M000017"></a>
286
-
287
- <div class="method-heading">
288
- <a href="#M000017" class="method-signature">
289
- <span class="method-name">get_doc_count</span><span class="method-args">(klass)</span>
290
- </a>
291
- </div>
292
-
293
- <div class="method-description">
294
- <p>
295
- Fetch total documents for a given class and cache it
296
- </p>
297
- <p><a class="source-toggle" href="#"
298
- onclick="toggleCode('M000017-source');return false;">[Source]</a></p>
299
- <div class="method-source-code" id="M000017-source">
300
- <pre>
301
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 109</span>
302
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">klass</span>)
303
- <span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">:totals</span>, <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-value str">&quot;doc_count&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>
304
- <span class="ruby-keyword kw">end</span>
305
- </pre>
306
- </div>
307
- </div>
308
- </div>
309
-
310
- <div id="method-M000016" class="method-detail">
311
- <a name="M000016"></a>
312
-
313
- <div class="method-heading">
314
- <a href="#M000016" class="method-signature">
315
- <span class="method-name">get_total_word_count</span><span class="method-args">(klass)</span>
316
- </a>
317
- </div>
318
-
319
- <div class="method-description">
320
- <p>
321
- Fetch total word count for a given class and cache it
322
- </p>
323
- <p><a class="source-toggle" href="#"
324
- onclick="toggleCode('M000016-source');return false;">[Source]</a></p>
325
- <div class="method-source-code" id="M000016-source">
326
- <pre>
327
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 102</span>
328
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">klass</span>)
329
- <span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">:totals</span>, <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-value str">&quot;wordcount&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>
330
- <span class="ruby-keyword kw">end</span>
331
- </pre>
332
- </div>
333
- </div>
334
- </div>
335
-
336
- <div id="method-M000015" class="method-detail">
337
- <a name="M000015"></a>
338
-
339
- <div class="method-heading">
340
- <a href="#M000015" class="method-signature">
341
- <span class="method-name">get_vocabulary_sizes</span><span class="method-args">()</span>
342
- </a>
343
- </div>
344
-
345
- <div class="method-description">
346
- <p>
347
- Does a table &#8216;scan&#8217; of summary table pulling out the
348
- &#8216;vocabsize&#8217; column from each row. Generates a hash of (class,
349
- vocab_size) key value pairs
350
- </p>
351
- <p><a class="source-toggle" href="#"
352
- onclick="toggleCode('M000015-source');return false;">[Source]</a></p>
353
- <div class="method-source-code" id="M000015-source">
354
- <pre>
355
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 95</span>
356
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_vocabulary_sizes</span>
357
- <span class="ruby-identifier">get_summary</span> <span class="ruby-value str">&quot;vocabsize&quot;</span>
358
- <span class="ruby-keyword kw">end</span>
359
- </pre>
360
- </div>
361
- </div>
362
- </div>
363
-
364
- <div id="method-M000014" class="method-detail">
365
- <a name="M000014"></a>
366
-
367
- <div class="method-heading">
368
- <a href="#M000014" class="method-signature">
369
- <span class="method-name">get_word_counts</span><span class="method-args">(word)</span>
370
- </a>
371
- </div>
372
-
373
- <div class="method-description">
374
- <p>
375
- Fetch hash of word counts as a single row from cassandra. Here column_name
376
- is the class and column value is the count
377
- </p>
378
- <p><a class="source-toggle" href="#"
379
- onclick="toggleCode('M000014-source');return false;">[Source]</a></p>
380
- <div class="method-source-code" id="M000014-source">
381
- <pre>
382
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 84</span>
383
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
384
- <span class="ruby-comment cmt"># fetch all (class,count) pairs for a given word</span>
385
- <span class="ruby-identifier">row</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">:classes</span>, <span class="ruby-identifier">word</span>.<span class="ruby-identifier">to_s</span>)
386
- <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">to_hash</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">empty?</span>
387
- <span class="ruby-identifier">row</span>.<span class="ruby-identifier">inject</span>({}){<span class="ruby-operator">|</span><span class="ruby-identifier">counts</span>, <span class="ruby-identifier">col</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">col</span>.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_sym</span>] = [<span class="ruby-identifier">col</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>,<span class="ruby-value">0</span>].<span class="ruby-identifier">max</span>; <span class="ruby-identifier">counts</span>}
388
- <span class="ruby-keyword kw">end</span>
389
- </pre>
390
- </div>
391
- </div>
392
- </div>
393
-
394
- <div id="method-M000020" class="method-detail">
395
- <a name="M000020"></a>
396
-
397
- <div class="method-heading">
398
- <a href="#M000020" class="method-signature">
399
- <span class="method-name">incr_doc_count</span><span class="method-args">(klass, count)</span>
400
- </a>
401
- </div>
402
-
403
- <div class="method-description">
404
- <p>
405
- Increment total document count for a given class by &#8216;count&#8216;
406
- </p>
407
- <p><a class="source-toggle" href="#"
408
- onclick="toggleCode('M000020-source');return false;">[Source]</a></p>
409
- <div class="method-source-code" id="M000020-source">
410
- <pre>
411
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 159</span>
412
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_doc_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
413
- <span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
414
- <span class="ruby-identifier">doc_count</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-value str">&quot;doc_count&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
415
- <span class="ruby-identifier">doc_count</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
416
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-identifier">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-value str">&quot;doc_count&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">doc_count</span>.<span class="ruby-identifier">to_s</span>})
417
- <span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">doc_count</span>
418
- <span class="ruby-keyword kw">end</span>
419
- </pre>
420
- </div>
421
- </div>
422
- </div>
423
-
424
- <div id="method-M000019" class="method-detail">
425
- <a name="M000019"></a>
426
-
427
- <div class="method-heading">
428
- <a href="#M000019" class="method-signature">
429
- <span class="method-name">incr_total_word_count</span><span class="method-args">(klass, count)</span>
430
- </a>
431
- </div>
432
-
433
- <div class="method-description">
434
- <p>
435
- Increment total word count for a given class by &#8216;count&#8216;
436
- </p>
437
- <p><a class="source-toggle" href="#"
438
- onclick="toggleCode('M000019-source');return false;">[Source]</a></p>
439
- <div class="method-source-code" id="M000019-source">
440
- <pre>
441
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 148</span>
442
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_total_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
443
- <span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
444
- <span class="ruby-identifier">wordcount</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-value str">&quot;wordcount&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
445
- <span class="ruby-identifier">wordcount</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
446
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-identifier">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-value str">&quot;wordcount&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">wordcount</span>.<span class="ruby-identifier">to_s</span>})
447
- <span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">wordcount</span>
448
- <span class="ruby-keyword kw">end</span>
449
- </pre>
450
- </div>
451
- </div>
452
- </div>
453
-
454
- <div id="method-M000018" class="method-detail">
455
- <a name="M000018"></a>
456
-
457
- <div class="method-heading">
458
- <a href="#M000018" class="method-signature">
459
- <span class="method-name">incr_word_count</span><span class="method-args">(klass, word, count)</span>
460
- </a>
461
- </div>
462
-
463
- <div class="method-description">
464
- <p>
465
- Increment the count for a given (word,class) pair. Evidently, cassandra
466
- does not support atomic increment/decrement. Psh. HBase uses ZooKeeper to
467
- implement atomic operations, ain&#8216;t it special?
468
- </p>
469
- <p><a class="source-toggle" href="#"
470
- onclick="toggleCode('M000018-source');return false;">[Source]</a></p>
471
- <div class="method-source-code" id="M000018-source">
472
- <pre>
473
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 118</span>
474
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span>)
475
- <span class="ruby-comment cmt"># Only wants strings</span>
476
- <span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
477
- <span class="ruby-identifier">word</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">to_s</span>
478
-
479
- <span class="ruby-identifier">prior_count</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">:classes</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">klass</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
480
- <span class="ruby-identifier">new_count</span> = <span class="ruby-identifier">prior_count</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">count</span>
481
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-identifier">:classes</span>, <span class="ruby-identifier">word</span>, {<span class="ruby-identifier">klass</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">new_count</span>.<span class="ruby-identifier">to_s</span>})
482
-
483
- <span class="ruby-keyword kw">if</span> (<span class="ruby-identifier">prior_count</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">count</span> <span class="ruby-operator">&gt;</span> <span class="ruby-value">0</span>)
484
- <span class="ruby-comment cmt">#</span>
485
- <span class="ruby-comment cmt"># we've never seen this word before and we're not trying to unlearn it</span>
486
- <span class="ruby-comment cmt">#</span>
487
- <span class="ruby-identifier">vocab_size</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-value str">&quot;vocabsize&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
488
- <span class="ruby-identifier">vocab_size</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
489
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-identifier">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-value str">&quot;vocabsize&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">vocab_size</span>.<span class="ruby-identifier">to_s</span>})
490
- <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">new_count</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
491
- <span class="ruby-comment cmt">#</span>
492
- <span class="ruby-comment cmt"># we've seen this word before but we're trying to unlearn it</span>
493
- <span class="ruby-comment cmt">#</span>
494
- <span class="ruby-identifier">vocab_size</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-value str">&quot;vocabsize&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
495
- <span class="ruby-identifier">vocab_size</span> <span class="ruby-operator">-=</span> <span class="ruby-value">1</span>
496
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-identifier">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-value str">&quot;vocabsize&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">vocab_size</span>.<span class="ruby-identifier">to_s</span>})
497
- <span class="ruby-keyword kw">end</span>
498
- <span class="ruby-identifier">new_count</span>
499
- <span class="ruby-keyword kw">end</span>
500
- </pre>
501
- </div>
502
- </div>
503
- </div>
504
-
505
- <div id="method-M000013" class="method-detail">
506
- <a name="M000013"></a>
507
-
508
- <div class="method-heading">
509
- <a href="#M000013" class="method-signature">
510
- <span class="method-name">init_tables</span><span class="method-args">()</span>
511
- </a>
512
- </div>
513
-
514
- <div class="method-description">
515
- <p>
516
- Create required keyspace and column families
517
- </p>
518
- <p><a class="source-toggle" href="#"
519
- onclick="toggleCode('M000013-source');return false;">[Source]</a></p>
520
- <div class="method-source-code" id="M000013-source">
521
- <pre>
522
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 62</span>
523
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">init_tables</span>
524
- <span class="ruby-comment cmt"># Do nothing if keyspace already exists</span>
525
- <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspaces</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-ivar">@keyspace</span>)
526
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspace</span> = <span class="ruby-ivar">@keyspace</span>
527
- <span class="ruby-keyword kw">else</span>
528
- <span class="ruby-identifier">freq_table</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">ColumnFamily</span>.<span class="ruby-identifier">new</span>({<span class="ruby-identifier">:keyspace</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@keyspace</span>, <span class="ruby-identifier">:name</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-value str">&quot;classes&quot;</span>}) <span class="ruby-comment cmt"># word =&gt; {classname =&gt; count}</span>
529
- <span class="ruby-identifier">summary_table</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">ColumnFamily</span>.<span class="ruby-identifier">new</span>({<span class="ruby-identifier">:keyspace</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@keyspace</span>, <span class="ruby-identifier">:name</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-value str">&quot;totals&quot;</span>}) <span class="ruby-comment cmt"># class =&gt; {wordcount =&gt; count}</span>
530
- <span class="ruby-identifier">ks_def</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">Keyspace</span>.<span class="ruby-identifier">new</span>({
531
- <span class="ruby-identifier">:name</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@keyspace</span>,
532
- <span class="ruby-identifier">:strategy_class</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-value str">'org.apache.cassandra.locator.SimpleStrategy'</span>,
533
- <span class="ruby-identifier">:replication_factor</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-value">1</span>,
534
- <span class="ruby-identifier">:cf_defs</span> =<span class="ruby-operator">&gt;</span> [<span class="ruby-identifier">freq_table</span>, <span class="ruby-identifier">summary_table</span>]
535
- })
536
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">add_keyspace</span> <span class="ruby-identifier">ks_def</span>
537
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspace</span> = <span class="ruby-ivar">@keyspace</span>
538
- <span class="ruby-keyword kw">end</span>
539
- <span class="ruby-keyword kw">end</span>
540
- </pre>
541
- </div>
542
- </div>
543
- </div>
544
-
545
- <div id="method-M000011" class="method-detail">
546
- <a name="M000011"></a>
547
-
548
- <div class="method-heading">
549
- <a href="#M000011" class="method-signature">
550
- <span class="method-name">reset</span><span class="method-args">()</span>
551
- </a>
552
- </div>
553
-
554
- <div class="method-description">
555
- <p><a class="source-toggle" href="#"
556
- onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
557
- <div class="method-source-code" id="M000011-source">
558
- <pre>
559
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 40</span>
560
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">reset</span>
561
- <span class="ruby-identifier">drop_tables</span>
562
- <span class="ruby-identifier">init_tables</span>
563
- <span class="ruby-keyword kw">end</span>
564
- </pre>
565
- </div>
566
- </div>
567
- </div>
568
-
569
- <h3 class="section-bar">Protected Instance methods</h3>
570
-
571
- <div id="method-M000023" class="method-detail">
572
- <a name="M000023"></a>
573
-
574
- <div class="method-heading">
575
- <a href="#M000023" class="method-signature">
576
- <span class="method-name">get_summary</span><span class="method-args">(name)</span>
577
- </a>
578
- </div>
579
-
580
- <div class="method-description">
581
- <p>
582
- Fetch 100 rows from summary table, yes, increase if necessary
583
- </p>
584
- <p><a class="source-toggle" href="#"
585
- onclick="toggleCode('M000023-source');return false;">[Source]</a></p>
586
- <div class="method-source-code" id="M000023-source">
587
- <pre>
588
- <span class="ruby-comment cmt"># File lib/ankusa/cassandra_storage.rb, line 182</span>
589
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_summary</span>(<span class="ruby-identifier">name</span>)
590
- <span class="ruby-identifier">counts</span> = {}
591
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get_range</span>(<span class="ruby-identifier">:totals</span>, {<span class="ruby-identifier">:start</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-value str">''</span>, <span class="ruby-identifier">:finish</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-value str">''</span>, <span class="ruby-identifier">:count</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@max_classes</span>}).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">key_slice</span><span class="ruby-operator">|</span>
592
- <span class="ruby-comment cmt"># keyslice is a clunky thrift object, map into a ruby hash</span>
593
- <span class="ruby-identifier">row</span> = <span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">columns</span>.<span class="ruby-identifier">inject</span>({}){<span class="ruby-operator">|</span><span class="ruby-identifier">hsh</span>, <span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-identifier">hsh</span>[<span class="ruby-identifier">c</span>.<span class="ruby-identifier">column</span>.<span class="ruby-identifier">name</span>] = <span class="ruby-identifier">c</span>.<span class="ruby-identifier">column</span>.<span class="ruby-identifier">value</span>; <span class="ruby-identifier">hsh</span>}
594
- <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">key</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">row</span>[<span class="ruby-identifier">name</span>].<span class="ruby-identifier">to_f</span>
595
- <span class="ruby-keyword kw">end</span>
596
- <span class="ruby-identifier">counts</span>
597
- <span class="ruby-keyword kw">end</span>
598
- </pre>
599
- </div>
600
- </div>
601
- </div>
602
-
603
-
604
- </div>
605
-
606
-
607
- </div>
608
-
609
-
610
- <div id="validator-badges">
611
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
612
- </div>
613
-
614
- </body>
615
- </html>