mcmire-cassandra 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data/CHANGELOG +108 -0
  2. data/LICENSE +202 -0
  3. data/Manifest +63 -0
  4. data/README.md +352 -0
  5. data/Rakefile +169 -0
  6. data/bin/cassandra_helper +16 -0
  7. data/conf/0.6/cassandra.in.sh +47 -0
  8. data/conf/0.6/log4j.properties +38 -0
  9. data/conf/0.6/schema.json +57 -0
  10. data/conf/0.6/storage-conf.xml +352 -0
  11. data/conf/0.7/cassandra.in.sh +46 -0
  12. data/conf/0.7/cassandra.yaml +336 -0
  13. data/conf/0.7/log4j-server.properties +41 -0
  14. data/conf/0.7/schema.json +57 -0
  15. data/conf/0.7/schema.txt +45 -0
  16. data/conf/0.8/cassandra.in.sh +41 -0
  17. data/conf/0.8/cassandra.yaml +61 -0
  18. data/conf/0.8/log4j-server.properties +40 -0
  19. data/conf/0.8/schema.json +66 -0
  20. data/conf/0.8/schema.txt +51 -0
  21. data/lib/cassandra/0.6/cassandra.rb +113 -0
  22. data/lib/cassandra/0.6/columns.rb +78 -0
  23. data/lib/cassandra/0.6/protocol.rb +90 -0
  24. data/lib/cassandra/0.6.rb +7 -0
  25. data/lib/cassandra/0.7/cassandra.rb +2 -0
  26. data/lib/cassandra/0.7/columns.rb +4 -0
  27. data/lib/cassandra/0.7/protocol.rb +5 -0
  28. data/lib/cassandra/0.7.rb +7 -0
  29. data/lib/cassandra/0.8/cassandra.rb +10 -0
  30. data/lib/cassandra/0.8/columns.rb +4 -0
  31. data/lib/cassandra/0.8/protocol.rb +21 -0
  32. data/lib/cassandra/0.8.rb +7 -0
  33. data/lib/cassandra/array.rb +8 -0
  34. data/lib/cassandra/cassandra.rb +1070 -0
  35. data/lib/cassandra/column_family.rb +3 -0
  36. data/lib/cassandra/columns.rb +144 -0
  37. data/lib/cassandra/comparable.rb +28 -0
  38. data/lib/cassandra/constants.rb +11 -0
  39. data/lib/cassandra/debug.rb +9 -0
  40. data/lib/cassandra/helpers.rb +41 -0
  41. data/lib/cassandra/keyspace.rb +3 -0
  42. data/lib/cassandra/long.rb +58 -0
  43. data/lib/cassandra/mock.rb +511 -0
  44. data/lib/cassandra/ordered_hash.rb +192 -0
  45. data/lib/cassandra/protocol.rb +120 -0
  46. data/lib/cassandra/time.rb +11 -0
  47. data/lib/cassandra.rb +38 -0
  48. data/mcmire-cassandra.gemspec +43 -0
  49. data/test/cassandra_client_test.rb +20 -0
  50. data/test/cassandra_mock_test.rb +116 -0
  51. data/test/cassandra_test.rb +863 -0
  52. data/test/comparable_types_test.rb +45 -0
  53. data/test/eventmachine_test.rb +42 -0
  54. data/test/ordered_hash_test.rb +386 -0
  55. data/test/test_helper.rb +15 -0
  56. data/vendor/0.6/gen-rb/cassandra.rb +1481 -0
  57. data/vendor/0.6/gen-rb/cassandra_constants.rb +12 -0
  58. data/vendor/0.6/gen-rb/cassandra_types.rb +482 -0
  59. data/vendor/0.7/gen-rb/cassandra.rb +1936 -0
  60. data/vendor/0.7/gen-rb/cassandra_constants.rb +12 -0
  61. data/vendor/0.7/gen-rb/cassandra_types.rb +681 -0
  62. data/vendor/0.8/gen-rb/cassandra.rb +2215 -0
  63. data/vendor/0.8/gen-rb/cassandra_constants.rb +12 -0
  64. data/vendor/0.8/gen-rb/cassandra_types.rb +824 -0
  65. metadata +200 -0
@@ -0,0 +1,1070 @@
1
+
2
+ =begin rdoc
3
+ Create a new Cassandra client instance. Accepts a keyspace name, and optional host and port.
4
+
5
+ client = Cassandra.new('twitter', '127.0.0.1:9160')
6
+
7
+ If the server requires authentication, you must authenticate before make calls
8
+
9
+ client.login!('username','password')
10
+
11
+ You can then make calls to the server via the <tt>client</tt> instance.
12
+
13
+ client.insert(:UserRelationships, "5", {"user_timeline" => {SimpleUUID::UUID.new => "1"}})
14
+ client.get(:UserRelationships, "5", "user_timeline")
15
+
16
+ For read methods, valid option parameters are:
17
+
18
+ <tt>:count</tt>:: How many results to return. Defaults to 100.
19
+ <tt>:start</tt>:: Column name token at which to start iterating, inclusive. Defaults to nil, which means the first column in the collation order.
20
+ <tt>:finish</tt>:: Column name token at which to stop iterating, inclusive. Defaults to nil, which means no boundary.
21
+ <tt>:reversed</tt>:: Swap the direction of the collation order.
22
+ <tt>:consistency</tt>:: The consistency level of the request. Defaults to <tt>Cassandra::Consistency::ONE</tt> (one node must respond). Other valid options are <tt>Cassandra::Consistency::ZERO</tt>, <tt>Cassandra::Consistency::QUORUM</tt>, and <tt>Cassandra::Consistency::ALL</tt>.
23
+
24
+ Note that some read options have no relevance in some contexts.
25
+
26
+ For write methods, valid option parameters are:
27
+
28
+ <tt>:timestamp </tt>:: The transaction timestamp. Defaults to the current time in milliseconds. This is used for conflict resolution by the server; you normally never need to change it.
29
+ <tt>:consistency</tt>:: See above.
30
+
31
+ For the initial client instantiation, you may also pass in <tt>:thrift_client<tt> with a ThriftClient subclass attached. On connection, that class will be used instead of the default ThriftClient class, allowing you to add additional behavior to the connection (e.g. query logging).
32
+
33
+ =end
34
+
35
+ class Cassandra
36
+ include Columns
37
+ include Protocol
38
+ include Helpers
39
+
40
+ class AccessError < StandardError #:nodoc:
41
+ end
42
+
43
+ module Consistency
44
+ include CassandraThrift::ConsistencyLevel
45
+ end
46
+
47
+ WRITE_DEFAULTS = {
48
+ :count => 1000,
49
+ :timestamp => nil,
50
+ :consistency => Consistency::ONE,
51
+ :ttl => nil
52
+ }
53
+
54
+ READ_DEFAULTS = {
55
+ :count => 100,
56
+ :start => nil,
57
+ :finish => nil,
58
+ :reversed => false,
59
+ :consistency => Consistency::ONE
60
+ }
61
+
62
+ THRIFT_DEFAULTS = {
63
+ :transport_wrapper => Thrift::FramedTransport,
64
+ :thrift_client_class => ThriftClient
65
+ }
66
+
67
+ attr_reader :keyspace, :servers, :schema, :thrift_client_options, :thrift_client_class, :auth_request
68
+
69
+ def self.DEFAULT_TRANSPORT_WRAPPER
70
+ Thrift::FramedTransport
71
+ end
72
+
73
+ # Create a new Cassandra instance and open the connection.
74
+ def initialize(keyspace, servers = "127.0.0.1:9160", thrift_client_options = {})
75
+ @is_super = {}
76
+ @column_name_class = {}
77
+ @sub_column_name_class = {}
78
+ @auto_discover_nodes = true
79
+ thrift_client_options[:transport_wrapper] ||= Cassandra.DEFAULT_TRANSPORT_WRAPPER
80
+ @thrift_client_options = THRIFT_DEFAULTS.merge(thrift_client_options)
81
+ @thrift_client_class = @thrift_client_options[:thrift_client_class]
82
+ @keyspace = keyspace
83
+ @servers = Array(servers)
84
+ end
85
+
86
+ ##
87
+ # This method will prevent us from trying to auto-discover all the
88
+ # server addresses, and only use the list of servers provided on
89
+ # initialization.
90
+
91
+ # This is primarily helpful when the cassandra cluster is communicating
92
+ # internally on a different ip address than what you are using to connect.
93
+ # A prime example of this would be when using EC2 to host a cluster.
94
+ # Typically, the cluster would be communicating over the local ip
95
+ # addresses issued by Amazon, but any clients connecting from outside EC2
96
+ # would need to use the public ip.
97
+ #
98
+ def disable_node_auto_discovery!
99
+ @auto_discover_nodes = false
100
+ end
101
+
102
+ ##
103
+ # Disconnect the current client connection.
104
+ #
105
+ def disconnect!
106
+ if @client
107
+ @client.disconnect!
108
+ @client = nil
109
+ end
110
+ end
111
+
112
+ ##
113
+ # Issues a login attempt using the username and password specified.
114
+ #
115
+ # * username
116
+ # * password
117
+ #
118
+ def login!(username, password)
119
+ request = CassandraThrift::AuthenticationRequest.new
120
+ request.credentials = {'username' => username, 'password' => password}
121
+ ret = client.login(request)
122
+
123
+ # To avoid a double login on the initial connect, we set
124
+ # @auth_request after the first successful login.
125
+ #
126
+ @auth_request = request
127
+ ret
128
+ end
129
+
130
+ def inspect
131
+ "#<Cassandra:#{object_id}, @keyspace=#{keyspace.inspect}, @schema={#{
132
+ Array(schema(false).cf_defs).map {|cfdef| ":#{cfdef.name} => #{cfdef.column_type}"}.join(', ')
133
+ }}, @servers=#{servers.inspect}>"
134
+ end
135
+
136
+ ##
137
+ # Set the keyspace to use.
138
+ #
139
+ # Please note that this only works on version 0.7.0 and higher.
140
+ def keyspace=(ks)
141
+ return false if Cassandra.VERSION.to_f < 0.7
142
+
143
+ client.set_keyspace(ks)
144
+ @schema = nil; @keyspace = ks
145
+ end
146
+
147
+ ##
148
+ # Return an array of the keyspace names available.
149
+ #
150
+ # Please note that this only works on version 0.7.0 and higher.
151
+ def keyspaces
152
+ return false if Cassandra.VERSION.to_f < 0.7
153
+
154
+ client.describe_keyspaces.to_a.collect {|ksdef| ksdef.name }
155
+ end
156
+
157
+ ##
158
+ # Return a hash of column_family definitions indexed by their
159
+ # names
160
+ def column_families
161
+ return false if Cassandra.VERSION.to_f < 0.7
162
+
163
+ schema.cf_defs.inject(Hash.new){|memo, cf_def| memo[cf_def.name] = cf_def; memo;}
164
+ end
165
+
166
+ ##
167
+ # Return a Cassandra::Keyspace object loaded with the current
168
+ # keyspaces schema.
169
+ #
170
+ # Please note that this only works on version 0.7.0 and higher.
171
+ def schema(load=true)
172
+ return false if Cassandra.VERSION.to_f < 0.7
173
+
174
+ if !load && !@schema
175
+ Cassandra::Keyspace.new
176
+ else
177
+ @schema ||= client.describe_keyspace(@keyspace)
178
+ end
179
+ end
180
+
181
+ ##
182
+ # This returns true if all servers are in agreement on the schema.
183
+ #
184
+ # Please note that this only works on version 0.7.0 and higher.
185
+ def schema_agreement?
186
+ return false if Cassandra.VERSION.to_f < 0.7
187
+
188
+ client.describe_schema_versions().length == 1
189
+ end
190
+
191
+ ##
192
+ # Lists the current cassandra.thrift version.
193
+ #
194
+ # Please note that this only works on version 0.7.0 and higher.
195
+ def version
196
+ return false if Cassandra.VERSION.to_f < 0.7
197
+
198
+ client.describe_version()
199
+ end
200
+
201
+ ##
202
+ # Returns the string name specified for the cluster.
203
+ #
204
+ # Please note that this only works on version 0.7.0 and higher.
205
+ def cluster_name
206
+ return false if Cassandra.VERSION.to_f < 0.7
207
+
208
+ @cluster_name ||= client.describe_cluster_name()
209
+ end
210
+
211
+ ##
212
+ # Returns an array of CassandraThrift::TokenRange objects indicating
213
+ # which servers make up the current ring. What their start and end
214
+ # tokens are, and their list of endpoints.
215
+ #
216
+ # Please note that this only works on version 0.7.0 and higher.
217
+ def ring
218
+ return false if Cassandra.VERSION.to_f < 0.7
219
+
220
+ client.describe_ring(@keyspace)
221
+ end
222
+
223
+ ##
224
+ # Returns a string identifying which partitioner is in use by the
225
+ # current cluster. Typically, this will be RandomPartitioner, but it
226
+ # could be OrderPreservingPartioner as well.
227
+ #
228
+ # Please note that this only works on version 0.7.0 and higher.
229
+ def partitioner
230
+ return false if Cassandra.VERSION.to_f < 0.7
231
+
232
+ client.describe_partitioner()
233
+ end
234
+
235
+ ##
236
+ # Remove all rows in the column family you request.
237
+ #
238
+ # * column_family
239
+ # * options
240
+ # * consitency
241
+ # * timestamp
242
+ #
243
+ def truncate!(column_family)
244
+ client.truncate(column_family.to_s)
245
+ end
246
+ alias clear_column_family! truncate!
247
+
248
+ ##
249
+ # Remove all column families in the keyspace.
250
+ #
251
+ # This method calls Cassandra#truncate! for each column family in the
252
+ # keyspace.
253
+ #
254
+ # Please note that this only works on version 0.7.0 and higher.
255
+ #
256
+ def clear_keyspace!
257
+ return false if Cassandra.VERSION.to_f < 0.7
258
+
259
+ schema.cf_defs.each { |cfdef| truncate!(cfdef.name) }
260
+ end
261
+
262
+ ##
263
+ # Creates a new column family from the passed in
264
+ # Cassandra::ColumnFamily instance, and returns the schema id.
265
+ #
266
+ def add_column_family(cf_def)
267
+ return false if Cassandra.VERSION.to_f < 0.7
268
+
269
+ begin
270
+ res = client.system_add_column_family(cf_def)
271
+ rescue CassandraThrift::TimedOutException => te
272
+ puts "Timed out: #{te.inspect}"
273
+ end
274
+ @schema = nil
275
+ res
276
+ end
277
+
278
+ ##
279
+ # Delete the specified column family. Return the new schema id.
280
+ #
281
+ # * column_family - The column_family name to drop.
282
+ #
283
+ def drop_column_family(column_family)
284
+ return false if Cassandra.VERSION.to_f < 0.7
285
+
286
+ begin
287
+ res = client.system_drop_column_family(column_family)
288
+ rescue CassandraThrift::TimedOutException => te
289
+ puts "Timed out: #{te.inspect}"
290
+ end
291
+ @schema = nil
292
+ res
293
+ end
294
+
295
+ ##
296
+ # Rename a column family. Returns the new schema id.
297
+ #
298
+ # * old_name - The current column_family name.
299
+ # * new_name - The desired column_family name.
300
+ #
301
+ def rename_column_family(old_name, new_name)
302
+ return false if Cassandra.VERSION.to_f != 0.7
303
+
304
+ begin
305
+ res = client.system_rename_column_family(old_name, new_name)
306
+ rescue CassandraThrift::TimedOutException => te
307
+ puts "Timed out: #{te.inspect}"
308
+ end
309
+ @schema = nil
310
+ res
311
+ end
312
+
313
+ ##
314
+ # Update the column family based on the passed in definition.
315
+ #
316
+ def update_column_family(cf_def)
317
+ return false if Cassandra.VERSION.to_f < 0.7
318
+
319
+ begin
320
+ res = client.system_update_column_family(cf_def)
321
+ rescue CassandraThrift::TimedOutException => te
322
+ puts "Timed out: #{te.inspect}"
323
+ end
324
+ @schema = nil
325
+ res
326
+ end
327
+
328
+ ##
329
+ # Add keyspace using the passed in keyspace definition.
330
+ #
331
+ # Returns the new schema id.
332
+ #
333
+ def add_keyspace(ks_def)
334
+ return false if Cassandra.VERSION.to_f < 0.7
335
+
336
+ begin
337
+ res = client.system_add_keyspace(ks_def)
338
+ rescue CassandraThrift::TimedOutException => toe
339
+ puts "Timed out: #{toe.inspect}"
340
+ rescue Thrift::TransportException => te
341
+ puts "Timed out: #{te.inspect}"
342
+ end
343
+ @keyspaces = nil
344
+ res
345
+ end
346
+
347
+ ##
348
+ # Deletes keyspace using the passed in keyspace name.
349
+ #
350
+ # Returns the new schema id.
351
+ #
352
+ def drop_keyspace(keyspace)
353
+ return false if Cassandra.VERSION.to_f < 0.7
354
+
355
+ begin
356
+ res = client.system_drop_keyspace(keyspace)
357
+ rescue CassandraThrift::TimedOutException => toe
358
+ puts "Timed out: #{toe.inspect}"
359
+ rescue Thrift::TransportException => te
360
+ puts "Timed out: #{te.inspect}"
361
+ end
362
+ keyspace = "system" if keyspace.eql?(@keyspace)
363
+ @keyspaces = nil
364
+ res
365
+ end
366
+
367
+ ##
368
+ # Renames keyspace.
369
+ #
370
+ # * old_name - Current keyspace name.
371
+ # * new_name - Desired keyspace name.
372
+ #
373
+ # Returns the new schema id
374
+ def rename_keyspace(old_name, new_name)
375
+ return false if Cassandra.VERSION.to_f < 0.7
376
+
377
+ begin
378
+ res = client.system_rename_keyspace(old_name, new_name)
379
+ rescue CassandraThrift::TimedOutException => toe
380
+ puts "Timed out: #{toe.inspect}"
381
+ rescue Thrift::TransportException => te
382
+ puts "Timed out: #{te.inspect}"
383
+ end
384
+ keyspace = new_name if old_name.eql?(@keyspace)
385
+ @keyspaces = nil
386
+ res
387
+ end
388
+
389
+ ##
390
+ # Update the keyspace using the passed in keyspace definition.
391
+ #
392
+ def update_keyspace(ks_def)
393
+ return false if Cassandra.VERSION.to_f < 0.7
394
+
395
+ begin
396
+ res = client.system_update_keyspace(ks_def)
397
+ rescue CassandraThrift::TimedOutException => toe
398
+ puts "Timed out: #{toe.inspect}"
399
+ rescue Thrift::TransportException => te
400
+ puts "Timed out: #{te.inspect}"
401
+ end
402
+ @keyspaces = nil
403
+ res
404
+ end
405
+ ##
406
+ # The initial default consistency is set to ONE, but you can use this method
407
+ # to override the normal default with your specified value. Use this if you
408
+ # do not want to specify a write consistency for each insert statement.
409
+ #
410
+ def default_write_consistency=(value)
411
+ WRITE_DEFAULTS[:consistency] = value
412
+ end
413
+
414
+ ##
415
+ # The initial default consistency is set to ONE, but you can use this method
416
+ # to override the normal default with your specified value. Use this if you
417
+ # do not want to specify a read consistency for each query.
418
+ #
419
+ def default_read_consistency=(value)
420
+ READ_DEFAULTS[:consistency] = value
421
+ end
422
+
423
+ ##
424
+ # This is the main method used to insert rows into cassandra. If the
425
+ # column\_family that you are inserting into is a SuperColumnFamily then
426
+ # the hash passed in should be a nested hash, otherwise it should be a
427
+ # flat hash.
428
+ #
429
+ # This method can also be called while in batch mode. If in batch mode
430
+ # then we queue up the mutations (an insert in this case) and pass them to
431
+ # cassandra in a single batch at the end of the block.
432
+ #
433
+ # * column\_family - The column\_family that you are inserting into.
434
+ # * key - The row key to insert.
435
+ # * hash - The columns or super columns to insert.
436
+ # * options - Valid options are:
437
+ # * :timestamp - Uses the current time if none specified.
438
+ # * :consistency - Uses the default write consistency if none specified.
439
+ # * :ttl - If specified this is the number of seconds after the insert that this value will be available.
440
+ #
441
+ def insert(column_family, key, hash, options = {})
442
+ column_family, _, _, options = extract_and_validate_params(column_family, key, [options], WRITE_DEFAULTS)
443
+
444
+ timestamp = options[:timestamp] || Time.stamp
445
+ mutation_map = if is_super(column_family)
446
+ {
447
+ key => {
448
+ column_family => hash.collect{|k,v| _super_insert_mutation(column_family, k, v, timestamp, options[:ttl]) }
449
+ }
450
+ }
451
+ else
452
+ {
453
+ key => {
454
+ column_family => hash.collect{|k,v| _standard_insert_mutation(column_family, k, v, timestamp, options[:ttl])}
455
+ }
456
+ }
457
+ end
458
+
459
+ @batch ? @batch << [mutation_map, options[:consistency]] : _mutate(mutation_map, options[:consistency])
460
+ end
461
+
462
+
463
+ ##
464
+ # This method is used to delete (actually marking them as deleted with a
465
+ # tombstone) rows, columns, or super columns depending on the parameters
466
+ # passed. If only a key is passed the entire row will be marked as deleted.
467
+ # If a column name is passed in that column will be deleted.
468
+ #
469
+ # This method can also be used in batch mode. If in batch mode then we
470
+ # queue up the mutations (a deletion in this case)
471
+ #
472
+ # * column\_family - The column\_family that you are inserting into.
473
+ # * key - The row key to insert.
474
+ # * columns - Either a single super_column or a list of columns.
475
+ # * sub_columns - The list of sub\_columns to select.
476
+ # * options - Valid options are:
477
+ # * :timestamp - Uses the current time if none specified.
478
+ # * :consistency - Uses the default write consistency if none specified.
479
+ #
480
+ # TODO: we could change this function or add another that support multi-column removal (by list or predicate)
481
+ #
482
+ def remove(column_family, key, *columns_and_options)
483
+ column_family, column, sub_column, options = extract_and_validate_params(column_family, key, columns_and_options, WRITE_DEFAULTS)
484
+
485
+ if @batch
486
+ mutation_map =
487
+ {
488
+ key => {
489
+ column_family => [ _delete_mutation(column_family, column, sub_column, options[:timestamp]|| Time.stamp) ]
490
+ }
491
+ }
492
+ @batch << [mutation_map, options[:consistency]]
493
+ else
494
+ # Let's continue using the 'remove' thrift method...not sure about the implications/performance of using the mutate instead
495
+ # Otherwise we coul get use the mutation_map above, and do _mutate(mutation_map, options[:consistency])
496
+ args = {:column_family => column_family}
497
+ columns = is_super(column_family) ? {:super_column => column, :column => sub_column} : {:column => column}
498
+ column_path = CassandraThrift::ColumnPath.new(args.merge(columns))
499
+ _remove(key, column_path, options[:timestamp] || Time.stamp, options[:consistency])
500
+ end
501
+ end
502
+
503
+ ##
504
+ # Count the columns for the provided parameters.
505
+ #
506
+ # * column_family - The column_family that you are inserting into.
507
+ # * key - The row key to insert.
508
+ # * columns - Either a single super_column or a list of columns.
509
+ # * sub_columns - The list of sub_columns to select.
510
+ # * options - Valid options are:
511
+ # * :start - The column name to start from.
512
+ # * :stop - The column name to stop at.
513
+ # * :count - The maximum count of columns to return. (By default cassandra will count up to 100 columns)
514
+ # * :consistency - Uses the default read consistency if none specified.
515
+ #
516
+ def count_columns(column_family, key, *columns_and_options)
517
+ column_family, super_column, _, options =
518
+ extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
519
+ _count_columns(column_family, key, super_column, options[:start], options[:stop], options[:count], options[:consistency])
520
+ end
521
+
522
+ ##
523
+ # Multi-key version of Cassandra#count_columns. Please note that this
524
+ # queries the server for each key passed in.
525
+ #
526
+ # Supports same parameters as Cassandra#count_columns.
527
+ #
528
+ # * column_family - The column_family that you are inserting into.
529
+ # * key - The row key to insert.
530
+ # * columns - Either a single super_column or a list of columns.
531
+ # * sub_columns - The list of sub_columns to select.
532
+ # * options - Valid options are:
533
+ # * :consistency - Uses the default read consistency if none specified.
534
+ #
535
+ # FIXME: Not real multi; needs server support
536
+ def multi_count_columns(column_family, keys, *options)
537
+ OrderedHash[*keys.map { |key| [key, count_columns(column_family, key, *options)] }._flatten_once]
538
+ end
539
+
540
+ ##
541
+ # Return a hash of column value pairs for the path you request.
542
+ #
543
+ # * column_family - The column_family that you are inserting into.
544
+ # * key - The row key to insert.
545
+ # * columns - Either a single super_column or a list of columns.
546
+ # * sub_columns - The list of sub_columns to select.
547
+ # * options - Valid options are:
548
+ # * :consistency - Uses the default read consistency if none specified.
549
+ #
550
+ def get_columns(column_family, key, *columns_and_options)
551
+ column_family, columns, sub_columns, options =
552
+ extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
553
+ _get_columns(column_family, key, columns, sub_columns, options[:consistency])
554
+ end
555
+
556
+ ##
557
+ # Multi-key version of Cassandra#get_columns. Please note that this
558
+ # queries the server for each key passed in.
559
+ #
560
+ # Supports same parameters as Cassandra#get_columns
561
+ #
562
+ # * column_family - The column_family that you are inserting into.
563
+ # * key - The row key to insert.
564
+ # * columns - Either a single super_column or a list of columns.
565
+ # * sub_columns - The list of sub_columns to select.
566
+ # * options - Valid options are:
567
+ # * :consistency - Uses the default read consistency if none specified.
568
+ #
569
+ # FIXME Not real multi; needs to use a Column predicate
570
+ def multi_get_columns(column_family, keys, *options)
571
+ OrderedHash[*keys.map { |key| [key, get_columns(column_family, key, *options)] }._flatten_once]
572
+ end
573
+
574
+ ##
575
+ # Return a hash (actually, a Cassandra::OrderedHash) or a single value
576
+ # representing the element at the column_family:key:[column]:[sub_column]
577
+ # path you request.
578
+ #
579
+ # * column_family - The column_family that you are inserting into.
580
+ # * key - The row key to insert.
581
+ # * columns - Either a single super_column or a list of columns.
582
+ # * sub_columns - The list of sub_columns to select.
583
+ # * options - Valid options are:
584
+ # * :count - The number of columns requested to be returned.
585
+ # * :start - The starting value for selecting a range of columns.
586
+ # * :finish - The final value for selecting a range of columns.
587
+ # * :reversed - If set to true the results will be returned in
588
+ # reverse order.
589
+ # * :consistency - Uses the default read consistency if none specified.
590
+ #
591
+ def get(column_family, key, *columns_and_options)
592
+ multi_get(column_family, [key], *columns_and_options)[key]
593
+ end
594
+
595
+ ##
596
+ # Multi-key version of Cassandra#get.
597
+ #
598
+ # This method allows you to select multiple rows with a single query.
599
+ # If a key that is passed in doesn't exist an empty hash will be
600
+ # returned.
601
+ #
602
+ # Supports the same parameters as Cassandra#get.
603
+ #
604
+ # * column_family - The column_family that you are inserting into.
605
+ # * keys - An array of keys to select.
606
+ # * columns - Either a single super_column or a list of columns.
607
+ # * sub_columns - The list of sub_columns to select.
608
+ # * options - Valid options are:
609
+ # * :count - The number of columns requested to be returned.
610
+ # * :start - The starting value for selecting a range of columns.
611
+ # * :finish - The final value for selecting a range of columns.
612
+ # * :reversed - If set to true the results will be returned in reverse order.
613
+ # * :consistency - Uses the default read consistency if none specified.
614
+ #
615
+ def multi_get(column_family, keys, *columns_and_options)
616
+ column_family, column, sub_column, options =
617
+ extract_and_validate_params(column_family, keys, columns_and_options, READ_DEFAULTS)
618
+
619
+ hash = _multiget(column_family, keys, column, sub_column, options[:count], options[:start], options[:finish], options[:reversed], options[:consistency])
620
+
621
+ # Restore order
622
+ ordered_hash = OrderedHash.new
623
+ keys.each { |key| ordered_hash[key] = hash[key] || (OrderedHash.new if is_super(column_family) and !sub_column) }
624
+ ordered_hash
625
+ end
626
+
627
+ ##
628
+ # Return true if the column_family:key:[column]:[sub_column] path you
629
+ # request exists.
630
+ #
631
+ # If passed in only a row key it will query for any columns (limiting
632
+ # to 1) for that row key. If a column is passed in it will query for
633
+ # that specific column/super column.
634
+ #
635
+ # This method will return true or false.
636
+ #
637
+ # * column_family - The column_family that you are inserting into.
638
+ # * key - The row key to insert.
639
+ # * columns - Either a single super_column or a list of columns.
640
+ # * sub_columns - The list of sub_columns to select.
641
+ # * options - Valid options are:
642
+ # * :consistency - Uses the default read consistency if none specified.
643
+ #
644
+ def exists?(column_family, key, *columns_and_options)
645
+ column_family, column, sub_column, options =
646
+ extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
647
+ result = if column
648
+ _multiget(column_family, [key], column, sub_column, 1, '', '', false, options[:consistency])[key]
649
+ else
650
+ _multiget(column_family, [key], nil, nil, 1, '', '', false, options[:consistency])[key]
651
+ end
652
+
653
+ ![{}, nil].include?(result)
654
+ end
655
+
656
+ ##
657
+ # Return an Cassandra::OrderedHash containing the columns specified for the given
658
+ # range of keys in the column_family you request.
659
+ #
660
+ # This method is just a convenience wrapper around Cassandra#get_range_single
661
+ # and Cassandra#get_range_batch. If :key_size, :batch_size, or a block
662
+ # is passed in Cassandra#get_range_batch will be called. Otherwise
663
+ # Cassandra#get_range_single will be used.
664
+ #
665
+ # The start_key and finish_key parameters are only useful for iterating of all records
666
+ # as is done in the Cassandra#each and Cassandra#each_key methods if you are using the
667
+ # RandomPartitioner.
668
+ #
669
+ # If the table is partitioned with OrderPreservingPartitioner you may
670
+ # use the start_key and finish_key params to select all records with
671
+ # the same prefix value.
672
+ #
673
+ # If a block is passed in we will yield the row key and columns for
674
+ # each record returned.
675
+ #
676
+ # Please note that Cassandra returns a row for each row that has existed in the
677
+ # system since gc_grace_seconds. This is because deleted row keys are marked as
678
+ # deleted, but left in the system until the cluster has had resonable time to replicate the deletion.
679
+ # This function attempts to suppress deleted rows (actually any row returned without
680
+ # columns is suppressed).
681
+ #
682
+ # Please note that when enabling the :reversed option, :start and :finish should be swapped (e.g.
683
+ # reversal happens before selecting the range).
684
+ #
685
+ # * column_family - The column_family that you are inserting into.
686
+ # * options - Valid options are:
687
+ # * :start_key - The starting value for selecting a range of keys (only useful with OPP).
688
+ # * :finish_key - The final value for selecting a range of keys (only useful with OPP).
689
+ # * :key_count - The total number of keys to return from the query. (see note regarding deleted records)
690
+ # * :batch_size - The maximum number of keys to return per query. If specified will loop until :key_count is obtained or all records have been returned.
691
+ # * :columns - A list of columns to return.
692
+ # * :count - The number of columns requested to be returned.
693
+ # * :start - The starting value for selecting a range of columns.
694
+ # * :finish - The final value for selecting a range of columns.
695
+ # * :reversed - If set to true the results will be returned in reverse order.
696
+ # * :consistency - Uses the default read consistency if none specified.
697
+ #
698
+ def get_range(column_family, options = {}, &blk)
699
+ if block_given? || options[:key_count] || options[:batch_size]
700
+ get_range_batch(column_family, options, &blk)
701
+ else
702
+ get_range_single(column_family, options, &blk)
703
+ end
704
+ end
705
+
706
+ ##
707
+ # Return an Cassandra::OrderedHash containing the columns specified for the given
708
+ # range of keys in the column_family you request.
709
+ #
710
+ # See Cassandra#get_range for more details.
711
+ #
712
+ def get_range_single(column_family, options = {})
713
+ return_empty_rows = options.delete(:return_empty_rows) || false
714
+
715
+ column_family, _, _, options =
716
+ extract_and_validate_params(column_family, "", [options],
717
+ READ_DEFAULTS.merge(:start_key => '',
718
+ :finish_key => '',
719
+ :key_count => 100,
720
+ :columns => nil,
721
+ :reversed => false
722
+ )
723
+ )
724
+
725
+ results = _get_range( column_family,
726
+ options[:start_key].to_s,
727
+ options[:finish_key].to_s,
728
+ options[:key_count],
729
+ options[:columns],
730
+ options[:start].to_s,
731
+ options[:finish].to_s,
732
+ options[:count],
733
+ options[:consistency],
734
+ options[:reversed] )
735
+
736
+ multi_key_slices_to_hash(column_family, results, return_empty_rows)
737
+ end
738
+
739
+ ##
740
+ # Return an Cassandra::OrderedHash containing the columns specified for the given
741
+ # range of keys in the column_family you request.
742
+ #
743
+ # If a block is passed in we will yield the row key and columns for
744
+ # each record returned and return a nil value instead of a Cassandra::OrderedHash.
745
+ #
746
+ # See Cassandra#get_range for more details.
747
+ #
748
+ def get_range_batch(column_family, options = {})
749
+ batch_size = options.delete(:batch_size) || 100
750
+ count = options.delete(:key_count)
751
+ result = (!block_given? && {}) || nil
752
+ num_results = 0
753
+
754
+ options[:start_key] ||= ''
755
+ last_key = nil
756
+
757
+ while count.nil? || count > num_results
758
+ res = get_range_single(column_family, options.merge!(:start_key => last_key || options[:start_key],
759
+ :key_count => batch_size,
760
+ :return_empty_rows => true
761
+ ))
762
+ break if res.keys.last == last_key
763
+
764
+ res.each do |key, columns|
765
+ next if last_key == key
766
+ next if num_results == count
767
+
768
+ unless columns == {}
769
+ if block_given?
770
+ yield key, columns
771
+ else
772
+ result[key] = columns
773
+ end
774
+ num_results += 1
775
+ end
776
+
777
+ last_key = key
778
+ end
779
+ end
780
+
781
+ result
782
+ end
783
+
784
+ ##
785
+ # Count all rows in the column_family you request.
786
+ #
787
+ # This method just calls Cassandra#get_range_keys and returns the
788
+ # number of records returned.
789
+ #
790
+ # See Cassandra#get_range for options.
791
+ #
792
+ def count_range(column_family, options = {})
793
+ get_range_keys(column_family, options).length
794
+ end
795
+
796
+ ##
797
+ # Return an Array containing all of the keys within a given range.
798
+ #
799
+ # This method just calls Cassandra#get_range and returns the
800
+ # row keys for the records returned.
801
+ #
802
+ # See Cassandra#get_range for options.
803
+ #
804
+ def get_range_keys(column_family, options = {})
805
+ get_range(column_family,options.merge!(:count => 1)).keys
806
+ end
807
+
808
+ ##
809
+ # Iterate through each key within the given parameters. This function can be
810
+ # used to iterate over each key in the given column family.
811
+ #
812
+ # This method just calls Cassandra#get_range and yields each row key.
813
+ #
814
+ # See Cassandra#get_range for options.
815
+ #
816
+ def each_key(column_family, options = {})
817
+ get_range_batch(column_family, options) do |key, columns|
818
+ yield key
819
+ end
820
+ end
821
+
822
+ ##
823
+ # Iterate through each row in the given column family
824
+ #
825
+ # This method just calls Cassandra#get_range and yields the key and
826
+ # columns.
827
+ #
828
+ # See Cassandra#get_range for options.
829
+ #
830
+ def each(column_family, options = {})
831
+ get_range_batch(column_family, options) do |key, columns|
832
+ yield key, columns
833
+ end
834
+ end
835
+
836
+ ##
837
+ # Open a batch operation and yield self. Inserts and deletes will be queued
838
+ # until the block closes, and then sent atomically to the server.
839
+ #
840
+ # Supports the :consistency option, which overrides the consistency set in
841
+ # the individual commands.
842
+ #
843
+ def batch(options = {})
844
+ _, _, _, options =
845
+ extract_and_validate_params(schema.cf_defs.first.name, "", [options], WRITE_DEFAULTS)
846
+
847
+ @batch = []
848
+ yield(self)
849
+ compacted_map,seen_clevels = compact_mutations!
850
+ clevel = if options[:consistency] != nil # Override any clevel from individual mutations if
851
+ options[:consistency]
852
+ elsif seen_clevels.length > 1 # Cannot choose which CLevel to use if there are several ones
853
+ raise "Multiple consistency levels used in the batch, and no override...cannot pick one"
854
+ else # if no consistency override has been provided but all the clevels in the batch are the same: use that one
855
+ seen_clevels.first
856
+ end
857
+
858
+ _mutate(compacted_map,clevel)
859
+ ensure
860
+ @batch = nil
861
+ end
862
+
863
+ ##
864
+ # Create secondary index.
865
+ #
866
+ # * keyspace
867
+ # * column_family
868
+ # * column_name
869
+ # * validation_class
870
+ #
871
+ def create_index(keyspace, column_family, column_name, validation_class)
872
+ return false if Cassandra.VERSION.to_f < 0.7
873
+
874
+ cf_def = client.describe_keyspace(keyspace).cf_defs.find{|x| x.name == column_family}
875
+ if !cf_def.nil? and !cf_def.column_metadata.find{|x| x.name == column_name}
876
+ c_def = CassandraThrift::ColumnDef.new do |cd|
877
+ cd.name = column_name
878
+ cd.validation_class = "org.apache.cassandra.db.marshal."+validation_class
879
+ cd.index_type = CassandraThrift::IndexType::KEYS
880
+ end
881
+ cf_def.column_metadata.push(c_def)
882
+ update_column_family(cf_def)
883
+ end
884
+ end
885
+
886
+ ##
887
+ # Delete secondary index.
888
+ #
889
+ # * keyspace
890
+ # * column_family
891
+ # * column_name
892
+ #
893
+ def drop_index(keyspace, column_family, column_name)
894
+ return false if Cassandra.VERSION.to_f < 0.7
895
+
896
+ cf_def = client.describe_keyspace(keyspace).cf_defs.find{|x| x.name == column_family}
897
+ if !cf_def.nil? and cf_def.column_metadata.find{|x| x.name == column_name}
898
+ cf_def.column_metadata.delete_if{|x| x.name == column_name}
899
+ update_column_family(cf_def)
900
+ end
901
+ end
902
+
903
+ ##
904
+ # This method is mostly used internally by get_index_slices to create
905
+ # a CassandraThrift::IndexExpression for the given options.
906
+ #
907
+ # * column_name - Column to be compared
908
+ # * value - Value to compare against
909
+ # * comparison - Type of comparison to do.
910
+ #
911
+ def create_index_expression(column_name, value, comparison)
912
+ return false if Cassandra.VERSION.to_f < 0.7
913
+
914
+ CassandraThrift::IndexExpression.new(
915
+ :column_name => column_name,
916
+ :value => value,
917
+ :op => (case comparison
918
+ when nil, "EQ", "eq", "=="
919
+ CassandraThrift::IndexOperator::EQ
920
+ when "GTE", "gte", ">="
921
+ CassandraThrift::IndexOperator::GTE
922
+ when "GT", "gt", ">"
923
+ CassandraThrift::IndexOperator::GT
924
+ when "LTE", "lte", "<="
925
+ CassandraThrift::IndexOperator::LTE
926
+ when "LT", "lt", "<"
927
+ CassandraThrift::IndexOperator::LT
928
+ end ))
929
+ end
930
+ alias :create_idx_expr :create_index_expression
931
+
932
+ ##
933
+ # This method takes an array if CassandraThrift::IndexExpression
934
+ # objects and creates a CassandraThrift::IndexClause for use in the
935
+ # Cassandra#get_index_slices
936
+ #
937
+ # * index_expressions - Array of CassandraThrift::IndexExpressions.
938
+ # * start - The starting row key.
939
+ # * count - The count of items to be returned
940
+ #
941
+ def create_index_clause(index_expressions, start = "", count = 100)
942
+ return false if Cassandra.VERSION.to_f < 0.7
943
+
944
+ CassandraThrift::IndexClause.new(
945
+ :start_key => start,
946
+ :expressions => index_expressions,
947
+ :count => count)
948
+ end
949
+ alias :create_idx_clause :create_index_clause
950
+
951
+ ##
952
+ # This method is used to query a secondary index with a set of
953
+ # provided search parameters
954
+ #
955
+ # Please note that you can either specify a
956
+ # CassandraThrift::IndexClause or an array of hashes with the
957
+ # format as below.
958
+ #
959
+ # * column_family - The Column Family this operation will be run on.
960
+ # * index_clause - This can either be a CassandraThrift::IndexClause or an array of hashes with the following keys:
961
+ # * :column_name - Column to be compared
962
+ # * :value - Value to compare against
963
+ # * :comparison - Type of comparison to do.
964
+ # * options
965
+ # * :key_count - Set maximum number of rows to return. (Only works if CassandraThrift::IndexClause is not passed in.)
966
+ # * :key_start - Set starting row key for search. (Only works if CassandraThrift::IndexClause is not passed in.)
967
+ # * :consistency
968
+ #
969
+ # TODO: Supercolumn support.
970
+ def get_indexed_slices(column_family, index_clause, *columns_and_options)
971
+ return false if Cassandra.VERSION.to_f < 0.7
972
+
973
+ column_family, columns, _, options =
974
+ extract_and_validate_params(column_family, [], columns_and_options, READ_DEFAULTS.merge(:key_count => 100, :key_start => ""))
975
+
976
+ if index_clause.class != CassandraThrift::IndexClause
977
+ index_expressions = index_clause.collect do |expression|
978
+ create_index_expression(expression[:column_name], expression[:value], expression[:comparison])
979
+ end
980
+
981
+ index_clause = create_index_clause(index_expressions, options[:key_start], options[:key_count])
982
+ end
983
+
984
+ key_slices = _get_indexed_slices(column_family, index_clause, columns, options[:count], options[:start],
985
+ options[:finish], options[:reversed], options[:consistency])
986
+
987
+ key_slices.inject({}){|h, key_slice| h[key_slice.key] = key_slice.columns; h}
988
+ end
989
+
990
+ protected
991
+
992
+ def calling_method
993
+ "#{self.class}##{caller[0].split('`').last[0..-3]}"
994
+ end
995
+
996
+ ##
997
+ # Roll up queued mutations, to improve atomicity (and performance).
998
+ #
999
+ def compact_mutations!
1000
+ used_clevels = {} # hash that lists the consistency levels seen in the batch array. key is the clevel, value is true
1001
+ by_key = Hash.new{|h,k | h[k] = {}}
1002
+ # @batch is an array of mutation_ops.
1003
+ # A mutation op is a 2-item array containing [mutationmap, consistency_number]
1004
+ # a mutation map is a hash, by key (string) that has a hash by CF name, containing a list of column_mutations)
1005
+ @batch.each do |mutation_op|
1006
+ # A single mutation op looks like:
1007
+ # For an insert/update
1008
+ #[ { key1 =>
1009
+ # { CF1 => [several of CassThrift:Mutation(colname,value,TS,ttl)]
1010
+ # CF2 => [several mutations]
1011
+ # },
1012
+ # key2 => {...} # Not sure if they can come batched like this...so there might only be a single key (and CF)
1013
+ # }, # [0]
1014
+ # consistency # [1]
1015
+ #]
1016
+ mmap = mutation_op[0] # :remove OR a hash like {"key"=> {"CF"=>[mutationclass1,...] } }
1017
+ used_clevels[mutation_op[1]] = true #save the clevel required for this operation
1018
+
1019
+ mmap.keys.each do |k|
1020
+ mmap[k].keys.each do |cf| # For each CF in that key
1021
+ by_key[k][cf] ||= []
1022
+ by_key[k][cf].concat(mmap[k][cf]) # Append the list of mutations for that key and CF
1023
+ end
1024
+ end
1025
+ end
1026
+ # Returns the batch mutations map, and an array with the consistency levels 'seen' in the batch
1027
+ [by_key, used_clevels.keys]
1028
+ end
1029
+
1030
+ ##
1031
+ # Creates a new client as specified by Cassandra.thrift_client_options[:thrift_client_class]
1032
+ #
1033
+ def new_client
1034
+ thrift_client_class.new(CassandraThrift::Cassandra::Client, @servers, @thrift_client_options)
1035
+ end
1036
+
1037
+ def client
1038
+ if @client.nil? || @client.current_server.nil?
1039
+ reconnect!
1040
+ end
1041
+ @client
1042
+ end
1043
+
1044
+ def reconnect!
1045
+ @servers = all_nodes
1046
+ @client = new_client
1047
+ @client.add_callback :post_connect do |cli|
1048
+ # Set the active keyspace after connecting
1049
+ cli.set_keyspace(@keyspace)
1050
+
1051
+ # If using an authenticated keyspace, ensure we relogin
1052
+ cli.login(@auth_request) if @auth_request
1053
+ end
1054
+ end
1055
+
1056
+ def all_nodes
1057
+ if @auto_discover_nodes && !@keyspace.eql?("system")
1058
+ temp_client = new_client
1059
+ begin
1060
+ ips = (temp_client.describe_ring(@keyspace).map {|range| range.endpoints}).flatten.uniq
1061
+ port = @servers.first.split(':').last
1062
+ ips.map{|ip| "#{ip}:#{port}" }
1063
+ ensure
1064
+ temp_client.disconnect!
1065
+ end
1066
+ else
1067
+ @servers
1068
+ end
1069
+ end
1070
+ end