cassandra_datum 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ require 'rails/railtie'
2
+
3
+ module AppletonResque
4
+ class Railtie < Rails::Railtie
5
+
6
+ rake_tasks do
7
+ require 'cassandra_datum/tasks'
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,147 @@
1
+ namespace :cassandra do
2
+
3
+ # TODO (davebenvenuti 10/4/12) these tasks should use the hosts field from configuration, but we need to figure out a way to deal with the embedded ruby first
4
+
5
+ desc "Reset (Drop, Create, Remigrate) the Cassandra schema"
6
+ task :reset do
7
+ begin
8
+ Rake::Task['cassandra:drop'].invoke
9
+ rescue Thrift::Exception => e
10
+ puts "ignoring thrift exception #{e} (keyspace probably doesn't exist)"
11
+ end
12
+
13
+ Rake::Task['cassandra:create'].invoke
14
+ Rake::Task['cassandra:migrate'].invoke
15
+ end
16
+
17
+ desc "Drop the keyspace from Cassandra as defined in config/cassandra.yml"
18
+ task :drop do
19
+ client = Cassandra.new "system", ["#{`hostname`.strip}:9160"], { :connect_timeout => 1 }
20
+
21
+ begin
22
+ puts "Dropping keyspace #{keyspace_name}..."
23
+
24
+ with_thrift_timeout_retry do
25
+ client.drop_keyspace keyspace_name
26
+ end
27
+
28
+ rescue Thrift::Exception => e
29
+ if ENV['IGNORE_THRIFT_EXCEPTIONS']
30
+ puts "ignoring thrift exception #{e}"
31
+ else
32
+ raise e
33
+ end
34
+ ensure
35
+ client.disconnect!
36
+ end
37
+ end
38
+
39
+ desc "Create the keyspace in Cassandra as defined in config/cassandra.yml"
40
+ task :create do
41
+ client = Cassandra.new "system", ["#{`hostname`.strip}:9160"], { :connect_timeout => 1 }
42
+
43
+ begin
44
+ puts "Creating keyspace #{keyspace_name}..."
45
+
46
+ keyspace_definition = CassandraThrift::KsDef.new({
47
+ :name => keyspace_name,
48
+ :strategy_class => 'org.apache.cassandra.locator.SimpleStrategy',
49
+ :strategy_options => { 'replication_factor' => '1' },
50
+ :cf_defs => []
51
+ })
52
+
53
+ with_thrift_timeout_retry do
54
+ client.add_keyspace keyspace_definition
55
+ end
56
+
57
+ rescue Thrift::Exception => e
58
+ if ENV['IGNORE_THRIFT_EXCEPTIONS']
59
+ puts "ignoring thrift exception #{e}"
60
+ else
61
+ raise e
62
+ end
63
+ ensure
64
+ client.disconnect!
65
+ end
66
+
67
+ true
68
+ end
69
+
70
+ desc "Create column families as defined in config/cassandra.yml"
71
+ task :migrate do
72
+ client = Cassandra.new keyspace_name, ["#{`hostname`.strip}:9160"], { :connect_timeout => 1 }
73
+
74
+
75
+ # the migrate task needs a little more resilience with respect to timeouts and thrift errors. we should try 3 times with a random sleep in between
76
+
77
+ begin
78
+ column_families.each do |cf|
79
+ cf_def = CassandraThrift::CfDef.new({
80
+ :name => cf['name'],
81
+ :column_type => cf['column_type'],
82
+ :comparator_type => cf['compare_with'],
83
+ :subcomparator_type => cf['compare_subcolumns_with'],
84
+ :keyspace => keyspace_name
85
+ })
86
+
87
+ with_thrift_timeout_retry do
88
+
89
+ client.keyspace = keyspace_name # reloads the schema so the column_families are up to date
90
+
91
+ if client.column_families.has_key?(cf['name'])
92
+ puts "Skipping column family #{cf['name']}, already exsits"
93
+ else
94
+ puts "Creating column family #{cf['name']}"
95
+
96
+ client.add_column_family cf_def
97
+ end
98
+
99
+ end
100
+
101
+ end
102
+
103
+ rescue Thrift::Exception => e
104
+ if ENV['IGNORE_THRIFT_EXCEPTIONS']
105
+ puts "ignoring thrift exception #{e}"
106
+ else
107
+ raise e
108
+ end
109
+
110
+ ensure
111
+ client.disconnect!
112
+ end
113
+
114
+ true
115
+ end
116
+
117
+ def keyspace_name
118
+ CassandraDatum.configuration['keyspace']
119
+ end
120
+
121
+ def column_families
122
+ CassandraDatum.configuration['column_families']
123
+ end
124
+
125
+ def with_thrift_timeout_retry
126
+ max_tries = 3
127
+ current_try = 0
128
+
129
+ begin
130
+ yield
131
+ rescue CassandraThrift::Cassandra::Client::TransportException => e
132
+ if (current_try < max_tries) && (e.type =~ /Timed out reading/)
133
+ puts "Encountered thrift exception #{e}, retrying..."
134
+
135
+ sleep rand(5)
136
+
137
+ current_try += 1
138
+
139
+ retry
140
+ else
141
+ raise e
142
+ end
143
+ end
144
+
145
+ end
146
+
147
+ end
@@ -0,0 +1,41 @@
1
+ module CassandraDatum
2
+ module TestHelper
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ CASSANDRA_CLIENT.clear_keyspace! if defined?(Rails) && Rails.env.test? # extra paranoid with an operation like this
7
+ end
8
+
9
+ def assert_data_equal(list1, list2, explanation=nil)
10
+ assert_equal list1.size, list2.size, explanation
11
+ list1.each_with_index do |x, i|
12
+ y = list2[i]
13
+ if x.is_a? Array
14
+ assert_data_equal x, y, explanation
15
+ elsif x.is_a? CassandraDatum::Base
16
+ assert_datum_equal x, y, explanation
17
+ else
18
+ assert_equal x, y, explanation
19
+ end
20
+ end
21
+ end
22
+
23
+ def assert_datum_equal(datum1, datum2, explanation=nil)
24
+ assert_equal datum1.row_id, datum2.row_id, explanation
25
+ assert_equal datum1.column_name, datum2.column_name, explanation
26
+ assert_hashes_equal datum1.attributes, datum2.attributes, explanation
27
+ end
28
+
29
+ def assert_hashes_equal(hash1, hash2, explanation=nil)
30
+ assert_equal hash1.keys.size, hash2.keys.size, explanation
31
+ hash1.keys.each do |k|
32
+ if hash1[k].is_a? DateTime
33
+ assert_equal hash1[k].to_i, hash2[k].to_i, explanation
34
+ else
35
+ assert_equal hash1[k], hash2[k], explanation
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ end
@@ -0,0 +1,4 @@
1
+ module CassandraDatum
2
+ VERSION = "0.0.9"
3
+ end
4
+
@@ -0,0 +1,365 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/helper.rb')
2
+
3
+ module CassandraDatum
4
+ class BaseTest < Test::Unit::TestCase
5
+
6
+ should 'use timestamp long in column_name' do
7
+ time = DateTime.now
8
+ datum = FactoryGirl.create(:cassandra_datum, :timestamp => time)
9
+
10
+ assert datum.column_name.end_with?(time.to_i.to_s)
11
+ end
12
+
13
+ should "have a reload function that pulls from cassandra" do
14
+ datum = FactoryGirl.create(:cassandra_datum)
15
+
16
+ assert_datum_equal datum, datum.reload
17
+ end
18
+
19
+ should 'properly encode string' do
20
+ name = "No\u00eblle"
21
+
22
+ assert_equal "UTF-8", name.encoding.to_s
23
+
24
+ encoded_name = CassandraDatum::Base.encode_for_cassandra(name)
25
+
26
+ assert_equal "ASCII-8BIT", encoded_name.encoding.to_s
27
+ assert_equal name.encode('UTF-8').force_encoding('ASCII-8BIT'), encoded_name
28
+ end
29
+
30
+ should "handle encodings" do
31
+ enc = "\u20ACuro"
32
+ assert_equal "UTF-8", enc.encoding.to_s
33
+
34
+ datum = FactoryGirl.create(:cassandra_datum, :payload => enc)
35
+
36
+ datum = MockCassandraDatum.find(datum.key)
37
+
38
+ assert_equal enc, datum.payload
39
+ assert_equal 'UTF-8', datum.payload.encoding.to_s
40
+ end
41
+
42
+ should "populate type field if possible" do
43
+ datum = FactoryGirl.create(:polymorphic_cassandra_datum)
44
+ assert_equal datum.class.to_s, datum.type
45
+ end
46
+
47
+ context 'save' do
48
+ should 'save attributes to cassandra' do
49
+ datum = FactoryGirl.create(:cassandra_datum)
50
+
51
+ cass_entry = MockCassandraDatum.find(datum.key)
52
+
53
+ assert cass_entry.present?
54
+
55
+ cass_entry.attributes.each do |k, v|
56
+ assert !v.nil?
57
+ assert_equal v, datum.send(k).to_s
58
+ end
59
+ end
60
+
61
+ should 'reject nil values during save' do
62
+ datum = FactoryGirl.create(:cassandra_datum)
63
+
64
+ cass_td = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
65
+ assert cass_td.keys.include?('payload')
66
+
67
+ datum = FactoryGirl.create(:cassandra_datum, :payload => nil)
68
+
69
+ cass_td = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
70
+ assert !cass_td.keys.include?('payload')
71
+ end
72
+
73
+ should 'not save an invalid datum' do
74
+ datum = FactoryGirl.build(:cassandra_datum, :timestamp => nil)
75
+ assert !datum.valid?
76
+ assert !datum.save
77
+ assert_raises(ActiveRecord::RecordInvalid) { datum.save! }
78
+ end
79
+
80
+ should 'strip invalid characters when encoding to UTF-8' do
81
+ datum = FactoryGirl.build(:cassandra_datum, :payload => "my payload\xEF")
82
+
83
+ assert_nothing_raised { datum.save! }
84
+
85
+ fetched_datum = MockCassandraDatum.find(datum.key)
86
+ assert_equal 'my payload', fetched_datum.payload
87
+ end
88
+
89
+ should 'convert arrays and hashes to json' do
90
+ array_value = ['some', 'values']
91
+ hash_value = { 'foo' => 'bar' }
92
+
93
+ datum = FactoryGirl.create(:datum_with_array_and_hash, :an_array => array_value, :a_hash => hash_value)
94
+
95
+ res = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
96
+ assert_equal array_value.to_json, res['an_array']
97
+ assert_equal hash_value.to_json, res['a_hash']
98
+ end
99
+ end
100
+
101
+
102
+ context 'destroy' do
103
+ should 'remove datum from cassandra' do
104
+ datum = FactoryGirl.create(:cassandra_datum)
105
+
106
+ datum.destroy
107
+
108
+ assert MockCassandraDatum.find_by_key(datum.key).blank?
109
+ end
110
+ end
111
+
112
+ context "delete_all" do
113
+ setup do
114
+ @row_id = SecureRandom.hex(8)
115
+ 3.times { FactoryGirl.create(:cassandra_datum, :row_id => @row_id) }
116
+ end
117
+
118
+ should "delete an entire row from cassandra" do
119
+ MockCassandraDatum.delete_all(@row_id)
120
+ assert_equal [], MockCassandraDatum.all(:row_id => @row_id)
121
+ end
122
+ end
123
+
124
+
125
+ context 'delete' do
126
+ setup do
127
+ @row_id = SecureRandom.hex(8)
128
+ @data = 3.times.collect { FactoryGirl.create(:cassandra_datum, :row_id => @row_id) }
129
+ end
130
+
131
+ should "delete a list of column ids" do
132
+ MockCassandraDatum.delete(@row_id, @data[0].column_name, @data[1].column_name)
133
+
134
+ @data[0..1].each { |datum| assert_nil MockCassandraDatum.find_by_key datum.key }
135
+ assert MockCassandraDatum.find(@data[2].key).present?
136
+ end
137
+
138
+ should "flatten arguments" do
139
+ MockCassandraDatum.delete(@row_id, [@data[0].column_name], @data[1].column_name)
140
+
141
+ @data[0..1].each { |datum| assert_nil MockCassandraDatum.find_by_key datum.key }
142
+ assert MockCassandraDatum.find(@data[2].key).present?
143
+ end
144
+ end
145
+
146
+
147
+ context 'document lookup' do
148
+ setup do
149
+ @row_id = SecureRandom.hex(8)
150
+ end
151
+
152
+ context 'find' do
153
+
154
+ should 'find by key' do
155
+ datum = FactoryGirl.create(:cassandra_datum)
156
+
157
+ doc = MockCassandraDatum.find(datum.key)
158
+
159
+ assert_datum_equal datum, doc
160
+ end
161
+
162
+ should 'find by key, initialize polymorphically ' do
163
+ datum = FactoryGirl.create(:polymorphic_cassandra_datum)
164
+
165
+ # when we fetch with the base class, it should initialize an instance of the constantized :type attribute
166
+ doc = MockCassandraDatum.find(datum.key)
167
+
168
+ assert_datum_equal datum, doc
169
+ assert_instance_of PolymorphicCassandraDatum, doc
170
+ end
171
+
172
+ end
173
+
174
+ context 'all' do
175
+
176
+ should 'be sorted by timestamp in reverse order' do
177
+ data = 3.times.collect { |i| FactoryGirl.build(:cassandra_datum, :row_id => @row_id, :timestamp => DateTime.now + i) }
178
+
179
+ data.shuffle!
180
+ data.each {|d| d.save! } #save in random order
181
+ data = data.sort_by(&:timestamp).reverse #reverse sort by timestamp
182
+
183
+ res = MockCassandraDatum.all(:row_id => @row_id)
184
+
185
+ assert_data_equal data, res, "not sorted properly: #{res.collect(&:column_name)}.\n expected: #{data.collect(&:column_name)}"
186
+ end
187
+
188
+ should 'convert count option to integer' do
189
+ 3.times.collect { |i| FactoryGirl.create(:cassandra_datum, :row_id => @row_id, :timestamp => DateTime.now + i) }
190
+
191
+ res = MockCassandraDatum.all(:row_id => @row_id, :count => '2')
192
+
193
+ assert_equal 2, res.size
194
+ end
195
+
196
+ should 'honor polymorphic :type column' do
197
+ data = [
198
+ FactoryGirl.create(:cassandra_datum, :row_id => @row_id, :timestamp => DateTime.now),
199
+ FactoryGirl.create(:polymorphic_cassandra_datum, :row_id => @row_id, :timestamp => DateTime.now - 1),
200
+ FactoryGirl.create(:cassandra_datum, :row_id => @row_id, :timestamp => DateTime.now - 2)
201
+ ]
202
+
203
+ res = MockCassandraDatum.all(:row_id => @row_id)
204
+
205
+ assert_data_equal data, res
206
+ end
207
+
208
+ end
209
+
210
+ end
211
+
212
+ context "URL ID encoding" do
213
+ setup do
214
+ @datum = FactoryGirl.create(:cassandra_datum)
215
+ end
216
+
217
+ should "encode to_param" do
218
+ assert_equal @datum.key, @datum.to_param
219
+ end
220
+ end
221
+
222
+ context '#new_record?' do
223
+ should 'be a new record before saving' do
224
+ datum = FactoryGirl.build(:cassandra_datum)
225
+
226
+ assert datum.new_record?
227
+ end
228
+
229
+ should 'not be a new record after saving' do
230
+ datum = FactoryGirl.create(:cassandra_datum)
231
+
232
+ assert !datum.new_record?
233
+ end
234
+
235
+ should 'not be a new record when coming from #find' do
236
+ datum = FactoryGirl.create(:cassandra_datum)
237
+ retrieved = MockCassandraDatum.find(*datum.key)
238
+
239
+ assert !retrieved.new_record?
240
+ end
241
+
242
+ should 'not be a new record when coming from #all' do
243
+ row_id = SecureRandom.uuid
244
+ 3.times { FactoryGirl.create(:cassandra_datum, :row_id => row_id) }
245
+
246
+ retrieved = MockCassandraDatum.all(:row_id => row_id)
247
+
248
+ retrieved.each do |retrieved_datum|
249
+ assert !retrieved_datum.new_record?
250
+ end
251
+ end
252
+ end
253
+
254
+ context "find_each and find_each_key" do
255
+
256
+ setup do
257
+ @row_id = SecureRandom.uuid
258
+ end
259
+
260
+ should 'yield nothing for service with with no records' do
261
+ yielded = false
262
+ MockCassandraDatum.find_each(@row_id){ yielded = true }
263
+ MockCassandraDatum.find_each_key(@row_id){ yielded = true }
264
+ assert !yielded, "CassandraDatum#each should not have yielded anything"
265
+ end
266
+
267
+ should 'yield all records and keys' do
268
+ data = []
269
+
270
+ #cover all cases while crossing the per-page boundry of walk_row
271
+ 4.times do |i|
272
+ data << FactoryGirl.create(:cassandra_datum, :row_id => @row_id, :timestamp => i.days.ago)
273
+
274
+ yielded_data = []
275
+ yielded_keys = []
276
+ MockCassandraDatum.find_each(@row_id, :count => 3) { |datum| yielded_data << datum }
277
+ MockCassandraDatum.find_each_key(@row_id, :count => 3) { |key| yielded_keys << key }
278
+
279
+ assert_data_equal data, yielded_data
280
+ assert_data_equal data.collect(&:key), yielded_keys
281
+
282
+ #reversed should work as well
283
+ yielded_data = []
284
+ yielded_keys = []
285
+ MockCassandraDatum.find_each(@row_id, :count => 3, :reversed => true) { |datum| yielded_data << datum }
286
+ MockCassandraDatum.find_each_key(@row_id, :count => 3, :reversed => true) { |key| yielded_keys << key }
287
+
288
+ assert_data_equal data.reverse, yielded_data
289
+ assert_data_equal data.reverse.collect(&:key), yielded_keys
290
+ end
291
+ end
292
+ end
293
+
294
+ context "updated_at" do
295
+ # note for all of these tests that cassandra timestamp values are in microseconds by default, hence the / 1000000
296
+
297
+ should "return correct updated_at for a single object" do
298
+ datum = FactoryGirl.create(:cassandra_datum)
299
+
300
+ cassandra_tr = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
301
+ cassandra_time = cassandra_tr.timestamps.values.max / 1000000
302
+
303
+ assert_equal cassandra_time, MockCassandraDatum.find(datum.key).updated_at.to_i
304
+ end
305
+
306
+ should "return correct updated_at for a multi get" do
307
+ row_id = SecureRandom.uuid
308
+ 3.times { FactoryGirl.create(:cassandra_datum, :row_id => row_id) }
309
+
310
+ MockCassandraDatum.find_each(row_id) do |datum|
311
+ cassandra_tr = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
312
+ cassandra_time = cassandra_tr.timestamps.values.max / 1000000
313
+
314
+ assert_equal cassandra_time, datum.updated_at.to_i
315
+ end
316
+ end
317
+
318
+ should "have its value populated with the initialize method when given a Cassandra::OrderedHash" do
319
+ datum = FactoryGirl.create(:cassandra_datum)
320
+ ordered_hash = CASSANDRA_CLIENT.get(datum.class.column_family, datum.row_id, datum.column_name)
321
+ expected_time = ordered_hash.timestamps.values.max / 1000000
322
+
323
+ datum = MockCassandraDatum.new(ordered_hash)
324
+ assert datum.updated_at.is_a?(DateTime)
325
+ assert_equal expected_time, datum.updated_at.to_time.to_i
326
+ end
327
+ end
328
+
329
+ context "column_family" do
330
+ should "default to the pluralization of the class name" do
331
+ assert_equal 'MockCassandraData', MockCassandraDatum.column_family
332
+ end
333
+
334
+ should "allow override in declaration" do
335
+ assert_equal 'MockCassandraData', OverrideColumnFamilyDatum.column_family
336
+ datum = OverrideColumnFamilyDatum.create :payload => 'mock payload'
337
+
338
+ assert_datum_equal datum, OverrideColumnFamilyDatum.find(datum.key)
339
+ assert MockCassandraDatum.find(datum.key).present? #both objects are using the same column family
340
+ end
341
+ end
342
+
343
+ should 'support observers' do
344
+ MockCassandraDatum.reset_before_save_counts!
345
+
346
+ datum = FactoryGirl.create(:cassandra_datum)
347
+
348
+ # see MockCassandraDatum definition in
349
+ assert_equal 1, MockCassandraDatum.before_save_counts[datum]
350
+ end
351
+
352
+ should 'support activerecord before/after callbacks' do
353
+ MockCassandraDatumObserver.reset_before_save_counts!
354
+
355
+ ActiveRecord::Base.observers = MockCassandraDatumObserver
356
+ ActiveRecord::Base.instantiate_observers
357
+
358
+ datum = FactoryGirl.create(:cassandra_datum)
359
+
360
+ # see MockCassandraDatumObserver definition in helper.rb
361
+ assert_equal 1, MockCassandraDatumObserver.before_save_counts[datum]
362
+ end
363
+
364
+ end
365
+ end