cassandra_datum 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ require 'rails/railtie'
2
+
3
+ module AppletonResque
4
+ class Railtie < Rails::Railtie
5
+
6
+ rake_tasks do
7
+ require 'cassandra_datum/tasks'
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,147 @@
1
+ namespace :cassandra do
2
+
3
+ # TODO (davebenvenuti 10/4/12) these tasks should use the hosts field from configuration, but we need to figure out a way to deal with the embedded ruby first
4
+
5
+ desc "Reset (Drop, Create, Remigrate) the Cassandra schema"
6
+ task :reset do
7
+ begin
8
+ Rake::Task['cassandra:drop'].invoke
9
+ rescue Thrift::Exception => e
10
+ puts "ignoring thrift exception #{e} (keyspace probably doesn't exist)"
11
+ end
12
+
13
+ Rake::Task['cassandra:create'].invoke
14
+ Rake::Task['cassandra:migrate'].invoke
15
+ end
16
+
17
+ desc "Drop the keyspace from Cassandra as defined in config/cassandra.yml"
18
+ task :drop do
19
+ client = Cassandra.new "system", ["#{`hostname`.strip}:9160"], { :connect_timeout => 1 }
20
+
21
+ begin
22
+ puts "Dropping keyspace #{keyspace_name}..."
23
+
24
+ with_thrift_timeout_retry do
25
+ client.drop_keyspace keyspace_name
26
+ end
27
+
28
+ rescue Thrift::Exception => e
29
+ if ENV['IGNORE_THRIFT_EXCEPTIONS']
30
+ puts "ignoring thrift exception #{e}"
31
+ else
32
+ raise e
33
+ end
34
+ ensure
35
+ client.disconnect!
36
+ end
37
+ end
38
+
39
+ desc "Create the keyspace in Cassandra as defined in config/cassandra.yml"
40
+ task :create do
41
+ client = Cassandra.new "system", ["#{`hostname`.strip}:9160"], { :connect_timeout => 1 }
42
+
43
+ begin
44
+ puts "Creating keyspace #{keyspace_name}..."
45
+
46
+ keyspace_definition = CassandraThrift::KsDef.new({
47
+ :name => keyspace_name,
48
+ :strategy_class => 'org.apache.cassandra.locator.SimpleStrategy',
49
+ :strategy_options => { 'replication_factor' => '1' },
50
+ :cf_defs => []
51
+ })
52
+
53
+ with_thrift_timeout_retry do
54
+ client.add_keyspace keyspace_definition
55
+ end
56
+
57
+ rescue Thrift::Exception => e
58
+ if ENV['IGNORE_THRIFT_EXCEPTIONS']
59
+ puts "ignoring thrift exception #{e}"
60
+ else
61
+ raise e
62
+ end
63
+ ensure
64
+ client.disconnect!
65
+ end
66
+
67
+ true
68
+ end
69
+
70
+ desc "Create column families as defined in config/cassandra.yml"
71
+ task :migrate do
72
+ client = Cassandra.new keyspace_name, ["#{`hostname`.strip}:9160"], { :connect_timeout => 1 }
73
+
74
+
75
+ # the migrate task needs a little more resilience with respect to timeouts and thrift errors. we should try 3 times with a random sleep in between
76
+
77
+ begin
78
+ column_families.each do |cf|
79
+ cf_def = CassandraThrift::CfDef.new({
80
+ :name => cf['name'],
81
+ :column_type => cf['column_type'],
82
+ :comparator_type => cf['compare_with'],
83
+ :subcomparator_type => cf['compare_subcolumns_with'],
84
+ :keyspace => keyspace_name
85
+ })
86
+
87
+ with_thrift_timeout_retry do
88
+
89
+ client.keyspace = keyspace_name # reloads the schema so the column_families are up to date
90
+
91
+ if client.column_families.has_key?(cf['name'])
92
+ puts "Skipping column family #{cf['name']}, already exsits"
93
+ else
94
+ puts "Creating column family #{cf['name']}"
95
+
96
+ client.add_column_family cf_def
97
+ end
98
+
99
+ end
100
+
101
+ end
102
+
103
+ rescue Thrift::Exception => e
104
+ if ENV['IGNORE_THRIFT_EXCEPTIONS']
105
+ puts "ignoring thrift exception #{e}"
106
+ else
107
+ raise e
108
+ end
109
+
110
+ ensure
111
+ client.disconnect!
112
+ end
113
+
114
+ true
115
+ end
116
+
117
+ def keyspace_name
118
+ CassandraDatum.configuration['keyspace']
119
+ end
120
+
121
+ def column_families
122
+ CassandraDatum.configuration['column_families']
123
+ end
124
+
125
+ def with_thrift_timeout_retry
126
+ max_tries = 3
127
+ current_try = 0
128
+
129
+ begin
130
+ yield
131
+ rescue CassandraThrift::Cassandra::Client::TransportException => e
132
+ if (current_try < max_tries) && (e.type =~ /Timed out reading/)
133
+ puts "Encountered thrift exception #{e}, retrying..."
134
+
135
+ sleep rand(5)
136
+
137
+ current_try += 1
138
+
139
+ retry
140
+ else
141
+ raise e
142
+ end
143
+ end
144
+
145
+ end
146
+
147
+ end
@@ -0,0 +1,41 @@
1
+ module CassandraDatum
2
+ module TestHelper
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ CASSANDRA_CLIENT.clear_keyspace! if defined?(Rails) && Rails.env.test? # extra paranoid with an operation like this
7
+ end
8
+
9
+ def assert_data_equal(list1, list2, explanation=nil)
10
+ assert_equal list1.size, list2.size, explanation
11
+ list1.each_with_index do |x, i|
12
+ y = list2[i]
13
+ if x.is_a? Array
14
+ assert_data_equal x, y, explanation
15
+ elsif x.is_a? CassandraDatum::Base
16
+ assert_datum_equal x, y, explanation
17
+ else
18
+ assert_equal x, y, explanation
19
+ end
20
+ end
21
+ end
22
+
23
+ def assert_datum_equal(datum1, datum2, explanation=nil)
24
+ assert_equal datum1.row_id, datum2.row_id, explanation
25
+ assert_equal datum1.column_name, datum2.column_name, explanation
26
+ assert_hashes_equal datum1.attributes, datum2.attributes, explanation
27
+ end
28
+
29
+ def assert_hashes_equal(hash1, hash2, explanation=nil)
30
+ assert_equal hash1.keys.size, hash2.keys.size, explanation
31
+ hash1.keys.each do |k|
32
+ if hash1[k].is_a? DateTime
33
+ assert_equal hash1[k].to_i, hash2[k].to_i, explanation
34
+ else
35
+ assert_equal hash1[k], hash2[k], explanation
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ end
@@ -0,0 +1,4 @@
1
+ module CassandraDatum
2
+ VERSION = "0.0.9"
3
+ end
4
+
@@ -0,0 +1,365 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/helper.rb')
2
+
3
+ module CassandraDatum
4
+ class BaseTest < Test::Unit::TestCase
5
+
6
+ should 'use timestamp long in column_name' do
7
+ time = DateTime.now
8
+ datum = FactoryGirl.create(:cassandra_datum, :timestamp => time)
9
+
10
+ assert datum.column_name.end_with?(time.to_i.to_s)
11
+ end
12
+
13
+ should "have a reload function that pulls from cassandra" do
14
+ datum = FactoryGirl.create(:cassandra_datum)
15
+
16
+ assert_datum_equal datum, datum.reload
17
+ end
18
+
19
+ should 'properly encode string' do
20
+ name = "No\u00eblle"
21
+
22
+ assert_equal "UTF-8", name.encoding.to_s
23
+
24
+ encoded_name = CassandraDatum::Base.encode_for_cassandra(name)
25
+
26
+ assert_equal "ASCII-8BIT", encoded_name.encoding.to_s
27
+ assert_equal name.encode('UTF-8').force_encoding('ASCII-8BIT'), encoded_name
28
+ end
29
+
30
+ should "handle encodings" do
31
+ enc = "\u20ACuro"
32
+ assert_equal "UTF-8", enc.encoding.to_s
33
+
34
+ datum = FactoryGirl.create(:cassandra_datum, :payload => enc)
35
+
36
+ datum = MockCassandraDatum.find(datum.key)
37
+
38
+ assert_equal enc, datum.payload
39
+ assert_equal 'UTF-8', datum.payload.encoding.to_s
40
+ end
41
+
42
+ should "populate type field if possible" do
43
+ datum = FactoryGirl.create(:polymorphic_cassandra_datum)
44
+ assert_equal datum.class.to_s, datum.type
45
+ end
46
+
47
+ context 'save' do
48
+ should 'save attributes to cassandra' do
49
+ datum = FactoryGirl.create(:cassandra_datum)
50
+
51
+ cass_entry = MockCassandraDatum.find(datum.key)
52
+
53
+ assert cass_entry.present?
54
+
55
+ cass_entry.attributes.each do |k, v|
56
+ assert !v.nil?
57
+ assert_equal v, datum.send(k).to_s
58
+ end
59
+ end
60
+
61
+ should 'reject nil values during save' do
62
+ datum = FactoryGirl.create(:cassandra_datum)
63
+
64
+ cass_td = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
65
+ assert cass_td.keys.include?('payload')
66
+
67
+ datum = FactoryGirl.create(:cassandra_datum, :payload => nil)
68
+
69
+ cass_td = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
70
+ assert !cass_td.keys.include?('payload')
71
+ end
72
+
73
+ should 'not save an invalid datum' do
74
+ datum = FactoryGirl.build(:cassandra_datum, :timestamp => nil)
75
+ assert !datum.valid?
76
+ assert !datum.save
77
+ assert_raises(ActiveRecord::RecordInvalid) { datum.save! }
78
+ end
79
+
80
+ should 'strip invalid characters when encoding to UTF-8' do
81
+ datum = FactoryGirl.build(:cassandra_datum, :payload => "my payload\xEF")
82
+
83
+ assert_nothing_raised { datum.save! }
84
+
85
+ fetched_datum = MockCassandraDatum.find(datum.key)
86
+ assert_equal 'my payload', fetched_datum.payload
87
+ end
88
+
89
+ should 'convert arrays and hashes to json' do
90
+ array_value = ['some', 'values']
91
+ hash_value = { 'foo' => 'bar' }
92
+
93
+ datum = FactoryGirl.create(:datum_with_array_and_hash, :an_array => array_value, :a_hash => hash_value)
94
+
95
+ res = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
96
+ assert_equal array_value.to_json, res['an_array']
97
+ assert_equal hash_value.to_json, res['a_hash']
98
+ end
99
+ end
100
+
101
+
102
+ context 'destroy' do
103
+ should 'remove datum from cassandra' do
104
+ datum = FactoryGirl.create(:cassandra_datum)
105
+
106
+ datum.destroy
107
+
108
+ assert MockCassandraDatum.find_by_key(datum.key).blank?
109
+ end
110
+ end
111
+
112
+ context "delete_all" do
113
+ setup do
114
+ @row_id = SecureRandom.hex(8)
115
+ 3.times { FactoryGirl.create(:cassandra_datum, :row_id => @row_id) }
116
+ end
117
+
118
+ should "delete an entire row from cassandra" do
119
+ MockCassandraDatum.delete_all(@row_id)
120
+ assert_equal [], MockCassandraDatum.all(:row_id => @row_id)
121
+ end
122
+ end
123
+
124
+
125
+ context 'delete' do
126
+ setup do
127
+ @row_id = SecureRandom.hex(8)
128
+ @data = 3.times.collect { FactoryGirl.create(:cassandra_datum, :row_id => @row_id) }
129
+ end
130
+
131
+ should "delete a list of column ids" do
132
+ MockCassandraDatum.delete(@row_id, @data[0].column_name, @data[1].column_name)
133
+
134
+ @data[0..1].each { |datum| assert_nil MockCassandraDatum.find_by_key datum.key }
135
+ assert MockCassandraDatum.find(@data[2].key).present?
136
+ end
137
+
138
+ should "flatten arguments" do
139
+ MockCassandraDatum.delete(@row_id, [@data[0].column_name], @data[1].column_name)
140
+
141
+ @data[0..1].each { |datum| assert_nil MockCassandraDatum.find_by_key datum.key }
142
+ assert MockCassandraDatum.find(@data[2].key).present?
143
+ end
144
+ end
145
+
146
+
147
+ context 'document lookup' do
148
+ setup do
149
+ @row_id = SecureRandom.hex(8)
150
+ end
151
+
152
+ context 'find' do
153
+
154
+ should 'find by key' do
155
+ datum = FactoryGirl.create(:cassandra_datum)
156
+
157
+ doc = MockCassandraDatum.find(datum.key)
158
+
159
+ assert_datum_equal datum, doc
160
+ end
161
+
162
+ should 'find by key, initialize polymorphically ' do
163
+ datum = FactoryGirl.create(:polymorphic_cassandra_datum)
164
+
165
+ # when we fetch with the base class, it should initialize an instance of the constantized :type attribute
166
+ doc = MockCassandraDatum.find(datum.key)
167
+
168
+ assert_datum_equal datum, doc
169
+ assert_instance_of PolymorphicCassandraDatum, doc
170
+ end
171
+
172
+ end
173
+
174
+ context 'all' do
175
+
176
+ should 'be sorted by timestamp in reverse order' do
177
+ data = 3.times.collect { |i| FactoryGirl.build(:cassandra_datum, :row_id => @row_id, :timestamp => DateTime.now + i) }
178
+
179
+ data.shuffle!
180
+ data.each {|d| d.save! } #save in random order
181
+ data = data.sort_by(&:timestamp).reverse #reverse sort by timestamp
182
+
183
+ res = MockCassandraDatum.all(:row_id => @row_id)
184
+
185
+ assert_data_equal data, res, "not sorted properly: #{res.collect(&:column_name)}.\n expected: #{data.collect(&:column_name)}"
186
+ end
187
+
188
+ should 'convert count option to integer' do
189
+ 3.times.collect { |i| FactoryGirl.create(:cassandra_datum, :row_id => @row_id, :timestamp => DateTime.now + i) }
190
+
191
+ res = MockCassandraDatum.all(:row_id => @row_id, :count => '2')
192
+
193
+ assert_equal 2, res.size
194
+ end
195
+
196
+ should 'honor polymorphic :type column' do
197
+ data = [
198
+ FactoryGirl.create(:cassandra_datum, :row_id => @row_id, :timestamp => DateTime.now),
199
+ FactoryGirl.create(:polymorphic_cassandra_datum, :row_id => @row_id, :timestamp => DateTime.now - 1),
200
+ FactoryGirl.create(:cassandra_datum, :row_id => @row_id, :timestamp => DateTime.now - 2)
201
+ ]
202
+
203
+ res = MockCassandraDatum.all(:row_id => @row_id)
204
+
205
+ assert_data_equal data, res
206
+ end
207
+
208
+ end
209
+
210
+ end
211
+
212
+ context "URL ID encoding" do
213
+ setup do
214
+ @datum = FactoryGirl.create(:cassandra_datum)
215
+ end
216
+
217
+ should "encode to_param" do
218
+ assert_equal @datum.key, @datum.to_param
219
+ end
220
+ end
221
+
222
+ context '#new_record?' do
223
+ should 'be a new record before saving' do
224
+ datum = FactoryGirl.build(:cassandra_datum)
225
+
226
+ assert datum.new_record?
227
+ end
228
+
229
+ should 'not be a new record after saving' do
230
+ datum = FactoryGirl.create(:cassandra_datum)
231
+
232
+ assert !datum.new_record?
233
+ end
234
+
235
+ should 'not be a new record when coming from #find' do
236
+ datum = FactoryGirl.create(:cassandra_datum)
237
+ retrieved = MockCassandraDatum.find(*datum.key)
238
+
239
+ assert !retrieved.new_record?
240
+ end
241
+
242
+ should 'not be a new record when coming from #all' do
243
+ row_id = SecureRandom.uuid
244
+ 3.times { FactoryGirl.create(:cassandra_datum, :row_id => row_id) }
245
+
246
+ retrieved = MockCassandraDatum.all(:row_id => row_id)
247
+
248
+ retrieved.each do |retrieved_datum|
249
+ assert !retrieved_datum.new_record?
250
+ end
251
+ end
252
+ end
253
+
254
+ context "find_each and find_each_key" do
255
+
256
+ setup do
257
+ @row_id = SecureRandom.uuid
258
+ end
259
+
260
+ should 'yield nothing for service with with no records' do
261
+ yielded = false
262
+ MockCassandraDatum.find_each(@row_id){ yielded = true }
263
+ MockCassandraDatum.find_each_key(@row_id){ yielded = true }
264
+ assert !yielded, "CassandraDatum#each should not have yielded anything"
265
+ end
266
+
267
+ should 'yield all records and keys' do
268
+ data = []
269
+
270
+ #cover all cases while crossing the per-page boundry of walk_row
271
+ 4.times do |i|
272
+ data << FactoryGirl.create(:cassandra_datum, :row_id => @row_id, :timestamp => i.days.ago)
273
+
274
+ yielded_data = []
275
+ yielded_keys = []
276
+ MockCassandraDatum.find_each(@row_id, :count => 3) { |datum| yielded_data << datum }
277
+ MockCassandraDatum.find_each_key(@row_id, :count => 3) { |key| yielded_keys << key }
278
+
279
+ assert_data_equal data, yielded_data
280
+ assert_data_equal data.collect(&:key), yielded_keys
281
+
282
+ #reversed should work as well
283
+ yielded_data = []
284
+ yielded_keys = []
285
+ MockCassandraDatum.find_each(@row_id, :count => 3, :reversed => true) { |datum| yielded_data << datum }
286
+ MockCassandraDatum.find_each_key(@row_id, :count => 3, :reversed => true) { |key| yielded_keys << key }
287
+
288
+ assert_data_equal data.reverse, yielded_data
289
+ assert_data_equal data.reverse.collect(&:key), yielded_keys
290
+ end
291
+ end
292
+ end
293
+
294
+ context "updated_at" do
295
+ # note for all of these tests that cassandra timestamp values are in microseconds by default, hence the / 1000000
296
+
297
+ should "return correct updated_at for a single object" do
298
+ datum = FactoryGirl.create(:cassandra_datum)
299
+
300
+ cassandra_tr = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
301
+ cassandra_time = cassandra_tr.timestamps.values.max / 1000000
302
+
303
+ assert_equal cassandra_time, MockCassandraDatum.find(datum.key).updated_at.to_i
304
+ end
305
+
306
+ should "return correct updated_at for a multi get" do
307
+ row_id = SecureRandom.uuid
308
+ 3.times { FactoryGirl.create(:cassandra_datum, :row_id => row_id) }
309
+
310
+ MockCassandraDatum.find_each(row_id) do |datum|
311
+ cassandra_tr = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
312
+ cassandra_time = cassandra_tr.timestamps.values.max / 1000000
313
+
314
+ assert_equal cassandra_time, datum.updated_at.to_i
315
+ end
316
+ end
317
+
318
+ should "have its value populated with the initialize method when given a Cassandra::OrderedHash" do
319
+ datum = FactoryGirl.create(:cassandra_datum)
320
+ ordered_hash = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
321
+ expected_time = ordered_hash.timestamps.values.max / 1000000
322
+
323
+ datum = MockCassandraDatum.new(ordered_hash)
324
+ assert datum.updated_at.is_a?(DateTime)
325
+ assert_equal expected_time, datum.updated_at.to_time.to_i
326
+ end
327
+ end
328
+
329
+ context "column_family" do
330
+ should "default to the pluralization of the class name" do
331
+ assert_equal 'MockCassandraData', MockCassandraDatum.column_family
332
+ end
333
+
334
+ should "allow override in declaration" do
335
+ assert_equal 'MockCassandraData', OverrideColumnFamilyDatum.column_family
336
+ datum = OverrideColumnFamilyDatum.create :payload => 'mock payload'
337
+
338
+ assert_datum_equal datum, OverrideColumnFamilyDatum.find(datum.key)
339
+ assert MockCassandraDatum.find(datum.key).present? #both objects are using the same column family
340
+ end
341
+ end
342
+
343
+ should 'support observers' do
344
+ MockCassandraDatum.reset_before_save_counts!
345
+
346
+ datum = FactoryGirl.create(:cassandra_datum)
347
+
348
+ # see MockCassandraDatum definition in
349
+ assert_equal 1, MockCassandraDatum.before_save_counts[datum]
350
+ end
351
+
352
+ should 'support activerecord before/after callbacks' do
353
+ MockCassandraDatumObserver.reset_before_save_counts!
354
+
355
+ ActiveRecord::Base.observers = MockCassandraDatumObserver
356
+ ActiveRecord::Base.instantiate_observers
357
+
358
+ datum = FactoryGirl.create(:cassandra_datum)
359
+
360
+ # see MockCassandraDatumObserver definition in helper.rb
361
+ assert_equal 1, MockCassandraDatumObserver.before_save_counts[datum]
362
+ end
363
+
364
+ end
365
+ end