cassilds 0.9.2 → 0.12.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. data/CHANGELOG +51 -1
  2. data/LICENSE +0 -0
  3. data/Manifest +25 -7
  4. data/README.md +352 -0
  5. data/Rakefile +169 -1
  6. data/cassilds.gemspec +45 -0
  7. data/conf/{cassandra.in.sh → 0.6/cassandra.in.sh} +0 -0
  8. data/conf/{log4j.properties → 0.6/log4j.properties} +0 -0
  9. data/conf/0.6/schema.json +57 -0
  10. data/conf/{storage-conf.xml → 0.6/storage-conf.xml} +15 -5
  11. data/conf/0.7/cassandra.in.sh +46 -0
  12. data/conf/0.7/cassandra.yaml +336 -0
  13. data/conf/0.7/log4j-server.properties +41 -0
  14. data/conf/0.7/schema.json +57 -0
  15. data/conf/0.7/schema.txt +45 -0
  16. data/conf/0.8/cassandra.in.sh +41 -0
  17. data/conf/0.8/cassandra.yaml +61 -0
  18. data/conf/0.8/log4j-server.properties +40 -0
  19. data/conf/0.8/schema.json +66 -0
  20. data/conf/0.8/schema.txt +51 -0
  21. data/lib/cassandra/0.6/cassandra.rb +58 -13
  22. data/lib/cassandra/0.6/columns.rb +43 -0
  23. data/lib/cassandra/0.6/protocol.rb +16 -18
  24. data/lib/cassandra/0.6.rb +0 -0
  25. data/lib/cassandra/0.7/cassandra.rb +0 -270
  26. data/lib/cassandra/0.7/columns.rb +1 -64
  27. data/lib/cassandra/0.7/protocol.rb +0 -134
  28. data/lib/cassandra/0.7.rb +0 -0
  29. data/lib/cassandra/0.8/cassandra.rb +10 -0
  30. data/lib/cassandra/0.8/columns.rb +4 -0
  31. data/lib/cassandra/0.8/protocol.rb +23 -0
  32. data/lib/cassandra/0.8.rb +7 -0
  33. data/lib/cassandra/array.rb +0 -0
  34. data/lib/cassandra/cassandra.rb +877 -111
  35. data/lib/cassandra/{0.7/column_family.rb → column_family.rb} +0 -0
  36. data/lib/cassandra/columns.rb +72 -6
  37. data/lib/cassandra/comparable.rb +0 -0
  38. data/lib/cassandra/constants.rb +0 -0
  39. data/lib/cassandra/debug.rb +0 -0
  40. data/lib/cassandra/helpers.rb +1 -0
  41. data/lib/cassandra/{0.7/keyspace.rb → keyspace.rb} +0 -0
  42. data/lib/cassandra/long.rb +5 -0
  43. data/lib/cassandra/mock.rb +259 -85
  44. data/lib/cassandra/ordered_hash.rb +10 -18
  45. data/lib/cassandra/protocol.rb +120 -0
  46. data/lib/cassandra/time.rb +0 -0
  47. data/lib/cassandra.rb +6 -7
  48. data/test/cassandra_client_test.rb +0 -0
  49. data/test/cassandra_mock_test.rb +52 -28
  50. data/test/cassandra_test.rb +465 -44
  51. data/test/comparable_types_test.rb +0 -0
  52. data/test/eventmachine_test.rb +30 -30
  53. data/test/ordered_hash_test.rb +6 -0
  54. data/test/test_helper.rb +3 -2
  55. data/vendor/0.6/gen-rb/cassandra.rb +0 -0
  56. data/vendor/0.6/gen-rb/cassandra_constants.rb +0 -0
  57. data/vendor/0.6/gen-rb/cassandra_types.rb +0 -0
  58. data/vendor/0.7/gen-rb/cassandra.rb +0 -0
  59. data/vendor/0.7/gen-rb/cassandra_constants.rb +0 -0
  60. data/vendor/0.7/gen-rb/cassandra_types.rb +4 -2
  61. data/vendor/0.8/gen-rb/cassandra.rb +2215 -0
  62. data/vendor/0.8/gen-rb/cassandra_constants.rb +12 -0
  63. data/vendor/0.8/gen-rb/cassandra_types.rb +816 -0
  64. metadata +50 -27
  65. data/README.rdoc +0 -83
  66. data/cassandra.gemspec +0 -46
  67. data/conf/cassandra.yaml +0 -113
@@ -30,7 +30,7 @@ For write methods, valid option parameters are:
30
30
 
31
31
  For the initial client instantiation, you may also pass in <tt>:thrift_client<tt> with a ThriftClient subclass attached. On connection, that class will be used instead of the default ThriftClient class, allowing you to add additional behavior to the connection (e.g. query logging).
32
32
 
33
- =end rdoc
33
+ =end
34
34
 
35
35
  class Cassandra
36
36
  include Columns
@@ -49,7 +49,7 @@ class Cassandra
49
49
  :timestamp => nil,
50
50
  :consistency => Consistency::ONE,
51
51
  :ttl => nil
52
- }.freeze
52
+ }
53
53
 
54
54
  READ_DEFAULTS = {
55
55
  :count => 100,
@@ -57,15 +57,19 @@ class Cassandra
57
57
  :finish => nil,
58
58
  :reversed => false,
59
59
  :consistency => Consistency::ONE
60
- }.freeze
61
-
60
+ }
61
+
62
62
  THRIFT_DEFAULTS = {
63
- :transport_wrapper => Thrift::BufferedTransport,
64
- :thrift_client_class => ThriftClient
65
- }.freeze
63
+ :transport_wrapper => Thrift::FramedTransport,
64
+ :thrift_client_class => ThriftClient
65
+ }
66
66
 
67
67
  attr_reader :keyspace, :servers, :schema, :thrift_client_options, :thrift_client_class, :auth_request
68
68
 
69
+ def self.DEFAULT_TRANSPORT_WRAPPER
70
+ Thrift::FramedTransport
71
+ end
72
+
69
73
  # Create a new Cassandra instance and open the connection.
70
74
  def initialize(keyspace, servers = "127.0.0.1:9160", thrift_client_options = {})
71
75
  @is_super = {}
@@ -79,10 +83,25 @@ class Cassandra
79
83
  @servers = Array(servers)
80
84
  end
81
85
 
86
+ ##
87
+ # This method will prevent us from trying to auto-discover all the
88
+ # server addresses, and only use the list of servers provided on
89
+ # initialization.
90
+
91
+ # This is primarily helpful when the cassandra cluster is communicating
92
+ # internally on a different ip address than what you are using to connect.
93
+ # A prime example of this would be when using EC2 to host a cluster.
94
+ # Typically, the cluster would be communicating over the local ip
95
+ # addresses issued by Amazon, but any clients connecting from outside EC2
96
+ # would need to use the public ip.
97
+ #
82
98
  def disable_node_auto_discovery!
83
99
  @auto_discover_nodes = false
84
100
  end
85
101
 
102
+ ##
103
+ # Disconnect the current client connection.
104
+ #
86
105
  def disconnect!
87
106
  if @client
88
107
  @client.disconnect!
@@ -90,27 +109,335 @@ class Cassandra
90
109
  end
91
110
  end
92
111
 
93
- def keyspaces
94
- @keyspaces ||= client.describe_keyspaces()
95
- end
96
-
112
+ ##
113
+ # Issues a login attempt using the username and password specified.
114
+ #
115
+ # * username
116
+ # * password
117
+ #
97
118
  def login!(username, password)
98
- @auth_request = CassandraThrift::AuthenticationRequest.new
99
- @auth_request.credentials = {'username' => username, 'password' => password}
100
- client.login(@keyspace, @auth_request)
119
+ request = CassandraThrift::AuthenticationRequest.new
120
+ request.credentials = {'username' => username, 'password' => password}
121
+ ret = client.login(request)
122
+
123
+ # To avoid a double login on the initial connect, we set
124
+ # @auth_request after the first successful login.
125
+ #
126
+ @auth_request = request
127
+ ret
101
128
  end
102
-
129
+
103
130
  def inspect
104
131
  "#<Cassandra:#{object_id}, @keyspace=#{keyspace.inspect}, @schema={#{
105
- schema(false).map {|name, hash| ":#{name} => #{hash['type'].inspect}"}.join(', ')
132
+ Array(schema(false).cf_defs).map {|cfdef| ":#{cfdef.name} => #{cfdef.column_type}"}.join(', ')
106
133
  }}, @servers=#{servers.inspect}>"
107
134
  end
108
135
 
109
- ### Write
136
+ ##
137
+ # Set the keyspace to use.
138
+ #
139
+ # Please note that this only works on version 0.7.0 and higher.
140
+ def keyspace=(ks)
141
+ return false if Cassandra.VERSION.to_f < 0.7
142
+
143
+ client.set_keyspace(ks)
144
+ @schema = nil; @keyspace = ks
145
+ end
146
+
147
+ ##
148
+ # Return an array of the keyspace names available.
149
+ #
150
+ # Please note that this only works on version 0.7.0 and higher.
151
+ def keyspaces
152
+ return false if Cassandra.VERSION.to_f < 0.7
153
+
154
+ client.describe_keyspaces.to_a.collect {|ksdef| ksdef.name }
155
+ end
156
+
157
+ ##
158
+ # Return a hash of column_family definitions indexed by their
159
+ # names
160
+ def column_families
161
+ return false if Cassandra.VERSION.to_f < 0.7
162
+
163
+ schema.cf_defs.inject(Hash.new){|memo, cf_def| memo[cf_def.name] = cf_def; memo;}
164
+ end
165
+
166
+ ##
167
+ # Return a Cassandra::Keyspace object loaded with the current
168
+ # keyspaces schema.
169
+ #
170
+ # Please note that this only works on version 0.7.0 and higher.
171
+ def schema(load=true)
172
+ return false if Cassandra.VERSION.to_f < 0.7
173
+
174
+ if !load && !@schema
175
+ Cassandra::Keyspace.new
176
+ else
177
+ @schema ||= client.describe_keyspace(@keyspace)
178
+ end
179
+ end
180
+
181
+ ##
182
+ # This returns true if all servers are in agreement on the schema.
183
+ #
184
+ # Please note that this only works on version 0.7.0 and higher.
185
+ def schema_agreement?
186
+ return false if Cassandra.VERSION.to_f < 0.7
187
+
188
+ client.describe_schema_versions().length == 1
189
+ end
190
+
191
+ ##
192
+ # Lists the current cassandra.thrift version.
193
+ #
194
+ # Please note that this only works on version 0.7.0 and higher.
195
+ def version
196
+ return false if Cassandra.VERSION.to_f < 0.7
197
+
198
+ client.describe_version()
199
+ end
200
+
201
+ ##
202
+ # Returns the string name specified for the cluster.
203
+ #
204
+ # Please note that this only works on version 0.7.0 and higher.
205
+ def cluster_name
206
+ return false if Cassandra.VERSION.to_f < 0.7
207
+
208
+ @cluster_name ||= client.describe_cluster_name()
209
+ end
210
+
211
+ ##
212
+ # Returns an array of CassandraThrift::TokenRange objects indicating
213
+ # which servers make up the current ring. What their start and end
214
+ # tokens are, and their list of endpoints.
215
+ #
216
+ # Please note that this only works on version 0.7.0 and higher.
217
+ def ring
218
+ return false if Cassandra.VERSION.to_f < 0.7
219
+
220
+ client.describe_ring(@keyspace)
221
+ end
222
+
223
+ ##
224
+ # Returns a string identifying which partitioner is in use by the
225
+ # current cluster. Typically, this will be RandomPartitioner, but it
226
+ # could be OrderPreservingPartioner as well.
227
+ #
228
+ # Please note that this only works on version 0.7.0 and higher.
229
+ def partitioner
230
+ return false if Cassandra.VERSION.to_f < 0.7
231
+
232
+ client.describe_partitioner()
233
+ end
234
+
235
+ ##
236
+ # Remove all rows in the column family you request.
237
+ #
238
+ # * column_family
239
+ # * options
240
+ # * consitency
241
+ # * timestamp
242
+ #
243
+ def truncate!(column_family)
244
+ client.truncate(column_family.to_s)
245
+ end
246
+ alias clear_column_family! truncate!
247
+
248
+ ##
249
+ # Remove all column families in the keyspace.
250
+ #
251
+ # This method calls Cassandra#truncate! for each column family in the
252
+ # keyspace.
253
+ #
254
+ # Please note that this only works on version 0.7.0 and higher.
255
+ #
256
+ def clear_keyspace!
257
+ return false if Cassandra.VERSION.to_f < 0.7
258
+
259
+ schema.cf_defs.each { |cfdef| truncate!(cfdef.name) }
260
+ end
261
+
262
+ ##
263
+ # Creates a new column family from the passed in
264
+ # Cassandra::ColumnFamily instance, and returns the schema id.
265
+ #
266
+ def add_column_family(cf_def)
267
+ return false if Cassandra.VERSION.to_f < 0.7
268
+
269
+ begin
270
+ res = client.system_add_column_family(cf_def)
271
+ rescue CassandraThrift::TimedOutException => te
272
+ puts "Timed out: #{te.inspect}"
273
+ end
274
+ @schema = nil
275
+ res
276
+ end
277
+
278
+ ##
279
+ # Delete the specified column family. Return the new schema id.
280
+ #
281
+ # * column_family - The column_family name to drop.
282
+ #
283
+ def drop_column_family(column_family)
284
+ return false if Cassandra.VERSION.to_f < 0.7
285
+
286
+ begin
287
+ res = client.system_drop_column_family(column_family)
288
+ rescue CassandraThrift::TimedOutException => te
289
+ puts "Timed out: #{te.inspect}"
290
+ end
291
+ @schema = nil
292
+ res
293
+ end
294
+
295
+ ##
296
+ # Rename a column family. Returns the new schema id.
297
+ #
298
+ # * old_name - The current column_family name.
299
+ # * new_name - The desired column_family name.
300
+ #
301
+ def rename_column_family(old_name, new_name)
302
+ return false if Cassandra.VERSION.to_f != 0.7
303
+
304
+ begin
305
+ res = client.system_rename_column_family(old_name, new_name)
306
+ rescue CassandraThrift::TimedOutException => te
307
+ puts "Timed out: #{te.inspect}"
308
+ end
309
+ @schema = nil
310
+ res
311
+ end
312
+
313
+ ##
314
+ # Update the column family based on the passed in definition.
315
+ #
316
+ def update_column_family(cf_def)
317
+ return false if Cassandra.VERSION.to_f < 0.7
318
+
319
+ begin
320
+ res = client.system_update_column_family(cf_def)
321
+ rescue CassandraThrift::TimedOutException => te
322
+ puts "Timed out: #{te.inspect}"
323
+ end
324
+ @schema = nil
325
+ res
326
+ end
327
+
328
+ ##
329
+ # Add keyspace using the passed in keyspace definition.
330
+ #
331
+ # Returns the new schema id.
332
+ #
333
+ def add_keyspace(ks_def)
334
+ return false if Cassandra.VERSION.to_f < 0.7
335
+
336
+ begin
337
+ res = client.system_add_keyspace(ks_def)
338
+ rescue CassandraThrift::TimedOutException => toe
339
+ puts "Timed out: #{toe.inspect}"
340
+ rescue Thrift::TransportException => te
341
+ puts "Timed out: #{te.inspect}"
342
+ end
343
+ @keyspaces = nil
344
+ res
345
+ end
346
+
347
+ ##
348
+ # Deletes keyspace using the passed in keyspace name.
349
+ #
350
+ # Returns the new schema id.
351
+ #
352
+ def drop_keyspace(keyspace)
353
+ return false if Cassandra.VERSION.to_f < 0.7
354
+
355
+ begin
356
+ res = client.system_drop_keyspace(keyspace)
357
+ rescue CassandraThrift::TimedOutException => toe
358
+ puts "Timed out: #{toe.inspect}"
359
+ rescue Thrift::TransportException => te
360
+ puts "Timed out: #{te.inspect}"
361
+ end
362
+ keyspace = "system" if keyspace.eql?(@keyspace)
363
+ @keyspaces = nil
364
+ res
365
+ end
366
+
367
+ ##
368
+ # Renames keyspace.
369
+ #
370
+ # * old_name - Current keyspace name.
371
+ # * new_name - Desired keyspace name.
372
+ #
373
+ # Returns the new schema id
374
+ def rename_keyspace(old_name, new_name)
375
+ return false if Cassandra.VERSION.to_f < 0.7
376
+
377
+ begin
378
+ res = client.system_rename_keyspace(old_name, new_name)
379
+ rescue CassandraThrift::TimedOutException => toe
380
+ puts "Timed out: #{toe.inspect}"
381
+ rescue Thrift::TransportException => te
382
+ puts "Timed out: #{te.inspect}"
383
+ end
384
+ keyspace = new_name if old_name.eql?(@keyspace)
385
+ @keyspaces = nil
386
+ res
387
+ end
388
+
389
+ ##
390
+ # Update the keyspace using the passed in keyspace definition.
391
+ #
392
+ def update_keyspace(ks_def)
393
+ return false if Cassandra.VERSION.to_f < 0.7
394
+
395
+ begin
396
+ res = client.system_update_keyspace(ks_def)
397
+ rescue CassandraThrift::TimedOutException => toe
398
+ puts "Timed out: #{toe.inspect}"
399
+ rescue Thrift::TransportException => te
400
+ puts "Timed out: #{te.inspect}"
401
+ end
402
+ @keyspaces = nil
403
+ res
404
+ end
405
+ ##
406
+ # The initial default consistency is set to ONE, but you can use this method
407
+ # to override the normal default with your specified value. Use this if you
408
+ # do not want to specify a write consistency for each insert statement.
409
+ #
410
+ def default_write_consistency=(value)
411
+ WRITE_DEFAULTS[:consistency] = value
412
+ end
413
+
414
+ ##
415
+ # The initial default consistency is set to ONE, but you can use this method
416
+ # to override the normal default with your specified value. Use this if you
417
+ # do not want to specify a read consistency for each query.
418
+ #
419
+ def default_read_consistency=(value)
420
+ READ_DEFAULTS[:consistency] = value
421
+ end
110
422
 
111
- # Insert a row for a key. Pass a flat hash for a regular column family, and
112
- # a nested hash for a super column family. Supports the <tt>:consistency</tt>,
113
- # <tt>:timestamp</tt> and <tt>:ttl</tt> options.
423
+ ##
424
+ # This is the main method used to insert rows into cassandra. If the
425
+ # column\_family that you are inserting into is a SuperColumnFamily then
426
+ # the hash passed in should be a nested hash, otherwise it should be a
427
+ # flat hash.
428
+ #
429
+ # This method can also be called while in batch mode. If in batch mode
430
+ # then we queue up the mutations (an insert in this case) and pass them to
431
+ # cassandra in a single batch at the end of the block.
432
+ #
433
+ # * column\_family - The column\_family that you are inserting into.
434
+ # * key - The row key to insert.
435
+ # * hash - The columns or super columns to insert.
436
+ # * options - Valid options are:
437
+ # * :timestamp - Uses the current time if none specified.
438
+ # * :consistency - Uses the default write consistency if none specified.
439
+ # * :ttl - If specified this is the number of seconds after the insert that this value will be available.
440
+ #
114
441
  def insert(column_family, key, hash, options = {})
115
442
  column_family, _, _, options = extract_and_validate_params(column_family, key, [options], WRITE_DEFAULTS)
116
443
 
@@ -133,66 +460,158 @@ class Cassandra
133
460
  end
134
461
 
135
462
 
136
- ## Delete
137
-
138
- # _mutate the element at the column_family:key:[column]:[sub_column]
139
- # path you request. Supports the <tt>:consistency</tt> and <tt>:timestamp</tt>
140
- # options.
463
+ ##
464
+ # This method is used to delete (actually marking them as deleted with a
465
+ # tombstone) rows, columns, or super columns depending on the parameters
466
+ # passed. If only a key is passed the entire row will be marked as deleted.
467
+ # If a column name is passed in that column will be deleted.
468
+ #
469
+ # This method can also be used in batch mode. If in batch mode then we
470
+ # queue up the mutations (a deletion in this case)
471
+ #
472
+ # * column\_family - The column\_family that you are inserting into.
473
+ # * key - The row key to insert.
474
+ # * columns - Either a single super_column or a list of columns.
475
+ # * sub_columns - The list of sub\_columns to select.
476
+ # * options - Valid options are:
477
+ # * :timestamp - Uses the current time if none specified.
478
+ # * :consistency - Uses the default write consistency if none specified.
479
+ #
480
+ # TODO: we could change this function or add another that support multi-column removal (by list or predicate)
481
+ #
141
482
  def remove(column_family, key, *columns_and_options)
142
483
  column_family, column, sub_column, options = extract_and_validate_params(column_family, key, columns_and_options, WRITE_DEFAULTS)
143
484
 
144
- args = {:column_family => column_family}
145
- columns = is_super(column_family) ? {:super_column => column, :column => sub_column} : {:column => column}
146
- column_path = CassandraThrift::ColumnPath.new(args.merge(columns))
147
-
148
- mutation = [:remove, [key, column_path, options[:timestamp] || Time.stamp, options[:consistency]]]
149
-
150
- @batch ? @batch << mutation : _remove(*mutation[1])
485
+ if @batch
486
+ mutation_map =
487
+ {
488
+ key => {
489
+ column_family => [ _delete_mutation(column_family, column, sub_column, options[:timestamp]|| Time.stamp) ]
490
+ }
491
+ }
492
+ @batch << [mutation_map, options[:consistency]]
493
+ else
494
+ # Let's continue using the 'remove' thrift method...not sure about the implications/performance of using the mutate instead
495
+ # Otherwise we coul get use the mutation_map above, and do _mutate(mutation_map, options[:consistency])
496
+ args = {:column_family => column_family}
497
+ columns = is_super(column_family) ? {:super_column => column, :column => sub_column} : {:column => column}
498
+ column_path = CassandraThrift::ColumnPath.new(args.merge(columns))
499
+ _remove(key, column_path, options[:timestamp] || Time.stamp, options[:consistency])
500
+ end
151
501
  end
152
502
 
153
- ### Read
154
-
155
- # Count the elements at the column_family:key:[super_column] path you
156
- # request. Supports the <tt>:consistency</tt> option.
503
+ ##
504
+ # Count the columns for the provided parameters.
505
+ #
506
+ # * column_family - The column_family that you are inserting into.
507
+ # * key - The row key to insert.
508
+ # * columns - Either a single super_column or a list of columns.
509
+ # * sub_columns - The list of sub_columns to select.
510
+ # * options - Valid options are:
511
+ # * :start - The column name to start from.
512
+ # * :stop - The column name to stop at.
513
+ # * :count - The maximum count of columns to return. (By default cassandra will count up to 100 columns)
514
+ # * :consistency - Uses the default read consistency if none specified.
515
+ #
157
516
  def count_columns(column_family, key, *columns_and_options)
158
517
  column_family, super_column, _, options =
159
518
  extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
160
- _count_columns(column_family, key, super_column, options[:consistency])
519
+ _count_columns(column_family, key, super_column, options[:start], options[:stop], options[:count], options[:consistency])
161
520
  end
162
521
 
163
- # Multi-key version of Cassandra#count_columns. Supports options <tt>:count</tt>,
164
- # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
165
- # FIXME Not real multi; needs server support
522
+ ##
523
+ # Multi-key version of Cassandra#count_columns. Please note that this
524
+ # queries the server for each key passed in.
525
+ #
526
+ # Supports same parameters as Cassandra#count_columns.
527
+ #
528
+ # * column_family - The column_family that you are inserting into.
529
+ # * key - The row key to insert.
530
+ # * columns - Either a single super_column or a list of columns.
531
+ # * sub_columns - The list of sub_columns to select.
532
+ # * options - Valid options are:
533
+ # * :consistency - Uses the default read consistency if none specified.
534
+ #
535
+ # FIXME: Not real multi; needs server support
166
536
  def multi_count_columns(column_family, keys, *options)
167
537
  OrderedHash[*keys.map { |key| [key, count_columns(column_family, key, *options)] }._flatten_once]
168
538
  end
169
539
 
170
- # Return a list of single values for the elements at the
171
- # column_family:key:column[s]:[sub_columns] path you request. Supports the
172
- # <tt>:consistency</tt> option.
540
+ ##
541
+ # Return a hash of column value pairs for the path you request.
542
+ #
543
+ # * column_family - The column_family that you are inserting into.
544
+ # * key - The row key to insert.
545
+ # * columns - Either a single super_column or a list of columns.
546
+ # * sub_columns - The list of sub_columns to select.
547
+ # * options - Valid options are:
548
+ # * :consistency - Uses the default read consistency if none specified.
549
+ #
173
550
  def get_columns(column_family, key, *columns_and_options)
174
551
  column_family, columns, sub_columns, options =
175
552
  extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
176
553
  _get_columns(column_family, key, columns, sub_columns, options[:consistency])
177
554
  end
178
555
 
179
- # Multi-key version of Cassandra#get_columns. Supports the <tt>:consistency</tt>
180
- # option.
556
+ ##
557
+ # Multi-key version of Cassandra#get_columns. Please note that this
558
+ # queries the server for each key passed in.
559
+ #
560
+ # Supports same parameters as Cassandra#get_columns
561
+ #
562
+ # * column_family - The column_family that you are inserting into.
563
+ # * key - The row key to insert.
564
+ # * columns - Either a single super_column or a list of columns.
565
+ # * sub_columns - The list of sub_columns to select.
566
+ # * options - Valid options are:
567
+ # * :consistency - Uses the default read consistency if none specified.
568
+ #
181
569
  # FIXME Not real multi; needs to use a Column predicate
182
570
  def multi_get_columns(column_family, keys, *options)
183
571
  OrderedHash[*keys.map { |key| [key, get_columns(column_family, key, *options)] }._flatten_once]
184
572
  end
185
573
 
574
+ ##
186
575
  # Return a hash (actually, a Cassandra::OrderedHash) or a single value
187
576
  # representing the element at the column_family:key:[column]:[sub_column]
188
- # path you request. Supports options <tt>:count</tt>, <tt>:start</tt>,
189
- # <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
577
+ # path you request.
578
+ #
579
+ # * column_family - The column_family that you are inserting into.
580
+ # * key - The row key to insert.
581
+ # * columns - Either a single super_column or a list of columns.
582
+ # * sub_columns - The list of sub_columns to select.
583
+ # * options - Valid options are:
584
+ # * :count - The number of columns requested to be returned.
585
+ # * :start - The starting value for selecting a range of columns.
586
+ # * :finish - The final value for selecting a range of columns.
587
+ # * :reversed - If set to true the results will be returned in
588
+ # reverse order.
589
+ # * :consistency - Uses the default read consistency if none specified.
590
+ #
190
591
  def get(column_family, key, *columns_and_options)
191
592
  multi_get(column_family, [key], *columns_and_options)[key]
192
593
  end
193
594
 
194
- # Multi-key version of Cassandra#get. Supports options <tt>:count</tt>,
195
- # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
595
+ ##
596
+ # Multi-key version of Cassandra#get.
597
+ #
598
+ # This method allows you to select multiple rows with a single query.
599
+ # If a key that is passed in doesn't exist an empty hash will be
600
+ # returned.
601
+ #
602
+ # Supports the same parameters as Cassandra#get.
603
+ #
604
+ # * column_family - The column_family that you are inserting into.
605
+ # * keys - An array of keys to select.
606
+ # * columns - Either a single super_column or a list of columns.
607
+ # * sub_columns - The list of sub_columns to select.
608
+ # * options - Valid options are:
609
+ # * :count - The number of columns requested to be returned.
610
+ # * :start - The starting value for selecting a range of columns.
611
+ # * :finish - The final value for selecting a range of columns.
612
+ # * :reversed - If set to true the results will be returned in reverse order.
613
+ # * :consistency - Uses the default read consistency if none specified.
614
+ #
196
615
  def multi_get(column_family, keys, *columns_and_options)
197
616
  column_family, column, sub_column, options =
198
617
  extract_and_validate_params(column_family, keys, columns_and_options, READ_DEFAULTS)
@@ -206,97 +625,444 @@ class Cassandra
206
625
  ordered_hash
207
626
  end
208
627
 
628
+ ##
209
629
  # Return true if the column_family:key:[column]:[sub_column] path you
210
- # request exists. Supports the <tt>:consistency</tt> option.
630
+ # request exists.
631
+ #
632
+ # If passed in only a row key it will query for any columns (limiting
633
+ # to 1) for that row key. If a column is passed in it will query for
634
+ # that specific column/super column.
635
+ #
636
+ # This method will return true or false.
637
+ #
638
+ # * column_family - The column_family that you are inserting into.
639
+ # * key - The row key to insert.
640
+ # * columns - Either a single super_column or a list of columns.
641
+ # * sub_columns - The list of sub_columns to select.
642
+ # * options - Valid options are:
643
+ # * :consistency - Uses the default read consistency if none specified.
644
+ #
211
645
  def exists?(column_family, key, *columns_and_options)
212
646
  column_family, column, sub_column, options =
213
647
  extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
214
- ret = nil
215
- if column
216
- ret = _multiget(column_family, [key], column, sub_column, '', '', 1, false, options[:consistency])[key]
648
+ result = if column
649
+ _multiget(column_family, [key], column, sub_column, '', '', 1, false, options[:consistency])[key]
650
+ else
651
+ _multiget(column_family, [key], nil, nil, '', '', 1, false, options[:consistency])[key]
652
+ end
653
+
654
+ ![{}, nil].include?(result)
655
+ end
656
+
657
+ ##
658
+ # Return an Cassandra::OrderedHash containing the columns specified for the given
659
+ # range of keys in the column_family you request.
660
+ #
661
+ # This method is just a convenience wrapper around Cassandra#get_range_single
662
+ # and Cassandra#get_range_batch. If :key_size, :batch_size, or a block
663
+ # is passed in Cassandra#get_range_batch will be called. Otherwise
664
+ # Cassandra#get_range_single will be used.
665
+ #
666
+ # The start_key and finish_key parameters are only useful for iterating of all records
667
+ # as is done in the Cassandra#each and Cassandra#each_key methods if you are using the
668
+ # RandomPartitioner.
669
+ #
670
+ # If the table is partitioned with OrderPreservingPartitioner you may
671
+ # use the start_key and finish_key params to select all records with
672
+ # the same prefix value.
673
+ #
674
+ # If a block is passed in we will yield the row key and columns for
675
+ # each record returned.
676
+ #
677
+ # Please note that Cassandra returns a row for each row that has existed in the
678
+ # system since gc_grace_seconds. This is because deleted row keys are marked as
679
+ # deleted, but left in the system until the cluster has had resonable time to replicate the deletion.
680
+ # This function attempts to suppress deleted rows (actually any row returned without
681
+ # columns is suppressed).
682
+ #
683
+ # Please note that when enabling the :reversed option, :start and :finish should be swapped (e.g.
684
+ # reversal happens before selecting the range).
685
+ #
686
+ # * column_family - The column_family that you are inserting into.
687
+ # * options - Valid options are:
688
+ # * :start_key - The starting value for selecting a range of keys (only useful with OPP).
689
+ # * :finish_key - The final value for selecting a range of keys (only useful with OPP).
690
+ # * :key_count - The total number of keys to return from the query. (see note regarding deleted records)
691
+ # * :batch_size - The maximum number of keys to return per query. If specified will loop until :key_count is obtained or all records have been returned.
692
+ # * :columns - A list of columns to return.
693
+ # * :count - The number of columns requested to be returned.
694
+ # * :start - The starting value for selecting a range of columns.
695
+ # * :finish - The final value for selecting a range of columns.
696
+ # * :reversed - If set to true the results will be returned in reverse order.
697
+ # * :consistency - Uses the default read consistency if none specified.
698
+ #
699
+ def get_range(column_family, options = {}, &blk)
700
+ if block_given? || options[:key_count] || options[:batch_size]
701
+ get_range_batch(column_family, options, &blk)
217
702
  else
218
- ret = _multiget(column_family, [key], nil, nil, '', '', 1, false, options[:consistency])[key]
703
+ get_range_single(column_family, options, &blk)
219
704
  end
220
705
  return (!ret.nil? and ret.send(:length) != 0)
221
706
  end
222
707
 
223
- # Return a list of keys in the column_family you request. Requires the
224
- # table to be partitioned with OrderPreservingHash. Supports the
225
- # <tt>:count</tt>, <tt>:start</tt>, <tt>:finish</tt>, and <tt>:consistency</tt>
226
- # options.
227
- def get_range(column_family, options = {})
708
+ ##
709
+ # Return an Cassandra::OrderedHash containing the columns specified for the given
710
+ # range of keys in the column_family you request.
711
+ #
712
+ # See Cassandra#get_range for more details.
713
+ #
714
+ def get_range_single(column_family, options = {})
715
+ return_empty_rows = options.delete(:return_empty_rows) || false
716
+
228
717
  column_family, _, _, options =
229
- extract_and_validate_params(column_family, "", [options], READ_DEFAULTS)
230
- _get_range(column_family, options[:start].to_s, options[:finish].to_s,
231
- options[:count], options[:consistency])
232
- end
233
-
234
- # Return a list of keys in the column_family you request. Requires the
235
- # table to be partitioned with OrderPreservingHash. Supports the
236
- # <tt>:count</tt>, <tt>:start</tt>, <tt>:finish</tt>, and <tt>:consistency</tt>
237
- # options.
238
- def get_range_hash(column_family, options = {})
239
- column_family, _, _, options =
240
- extract_and_validate_params(column_family, "", [options], READ_DEFAULTS)
241
- _get_range_hash(column_family, options[:start].to_s, options[:finish].to_s,
242
- options[:count], options[:consistency])
243
- end
244
-
245
- # Return a list of keys in the column_family you request. Requires the
246
- # table to be partitioned with OrderPreservingHash. Supports the
247
- # <tt>:count</tt>, <tt>:start</tt>, <tt>:finish</tt>, and <tt>:consistency</tt>
248
- # options.
249
- def get_range_columns(column_family, *columns_and_options)
250
- column_family, columns, sub_columns, options =
251
- extract_and_validate_params(column_family, "", columns_and_options, READ_DEFAULTS)
252
- _get_range_columns(column_family, columns, sub_columns, options[:start].to_s,
253
- options[:finish].to_s, options[:count], options[:consistency])
254
- end
255
-
256
- # Count all rows in the column_family you request. Requires the table
257
- # to be partitioned with OrderPreservingHash. Supports the <tt>:start</tt>,
258
- # <tt>:finish</tt>, and <tt>:consistency</tt> options.
718
+ extract_and_validate_params(column_family, "", [options],
719
+ READ_DEFAULTS.merge(:start_key => '',
720
+ :finish_key => '',
721
+ :key_count => 100,
722
+ :columns => nil,
723
+ :reversed => false
724
+ )
725
+ )
726
+
727
+ results = _get_range( column_family,
728
+ options[:start_key].to_s,
729
+ options[:finish_key].to_s,
730
+ options[:key_count],
731
+ options[:columns],
732
+ options[:start].to_s,
733
+ options[:finish].to_s,
734
+ options[:count],
735
+ options[:consistency],
736
+ options[:reversed] )
737
+
738
+ multi_key_slices_to_hash(column_family, results, return_empty_rows)
739
+ end
740
+
741
+ ##
742
+ # Return an Cassandra::OrderedHash containing the columns specified for the given
743
+ # range of keys in the column_family you request.
744
+ #
745
+ # If a block is passed in we will yield the row key and columns for
746
+ # each record returned.
747
+ #
748
+ # See Cassandra#get_range for more details.
749
+ #
750
+ def get_range_batch(column_family, options = {})
751
+ batch_size = options.delete(:batch_size) || 100
752
+ count = options.delete(:key_count)
753
+ result = {}
754
+
755
+ options[:start_key] ||= ''
756
+ last_key = nil
757
+
758
+ while options[:start_key] != last_key && (count.nil? || count > result.length)
759
+ options[:start_key] = last_key
760
+ res = get_range_single(column_family, options.merge!(:start_key => last_key,
761
+ :key_count => batch_size,
762
+ :return_empty_rows => true
763
+ ))
764
+ res.each do |key, columns|
765
+ next if options[:start_key] == key
766
+ next if result.length == count
767
+
768
+ unless columns == {}
769
+ if block_given?
770
+ yield key, columns
771
+ else
772
+ result[key] = columns
773
+ end
774
+ end
775
+ last_key = key
776
+ end
777
+ end
778
+
779
+ result if !block_given?
780
+ end
781
+
782
+ ##
783
+ # Count all rows in the column_family you request.
784
+ #
785
+ # This method just calls Cassandra#get_range_keys and returns the
786
+ # number of records returned.
787
+ #
788
+ # See Cassandra#get_range for options.
789
+ #
259
790
  def count_range(column_family, options = {})
260
- get_range(column_family, options).select{|r| r.columns.length > 0}.compact.length
791
+ get_range_keys(column_family, options).length
792
+ end
793
+
794
+ ##
795
+ # Return an Array containing all of the keys within a given range.
796
+ #
797
+ # This method just calls Cassandra#get_range and returns the
798
+ # row keys for the records returned.
799
+ #
800
+ # See Cassandra#get_range for options.
801
+ #
802
+ def get_range_keys(column_family, options = {})
803
+ get_range(column_family,options.merge!(:count => 1)).keys
261
804
  end
262
805
 
806
+ ##
807
+ # Iterate through each key within the given parameters. This function can be
808
+ # used to iterate over each key in the given column family.
809
+ #
810
+ # This method just calls Cassandra#get_range and yields each row key.
811
+ #
812
+ # See Cassandra#get_range for options.
813
+ #
814
+ def each_key(column_family, options = {})
815
+ get_range_batch(column_family, options) do |key, columns|
816
+ yield key
817
+ end
818
+ end
819
+
820
+ ##
821
+ # Iterate through each row in the given column family
822
+ #
823
+ # This method just calls Cassandra#get_range and yields the key and
824
+ # columns.
825
+ #
826
+ # See Cassandra#get_range for options.
827
+ #
828
+ def each(column_family, options = {})
829
+ get_range_batch(column_family, options) do |key, columns|
830
+ yield key, columns
831
+ end
832
+ end
833
+
834
+ ##
263
835
  # Open a batch operation and yield self. Inserts and deletes will be queued
264
- # until the block closes, and then sent atomically to the server. Supports
265
- # the <tt>:consistency</tt> option, which overrides the consistency set in
836
+ # until the block closes, and then sent atomically to the server.
837
+ #
838
+ # Supports the :consistency option, which overrides the consistency set in
266
839
  # the individual commands.
840
+ #
267
841
  def batch(options = {})
268
- _, _, _, options =
269
- extract_and_validate_params(schema.keys.first, "", [options], WRITE_DEFAULTS)
270
-
271
- @batch = []
272
- yield(self)
273
- compact_mutations!
274
-
275
- @batch.each do |mutation|
276
- case mutation.first
277
- when :remove
278
- _remove(*mutation[1])
279
- else
280
- _mutate(*mutation)
281
- end
282
- end
842
+ _, _, _, options =
843
+ extract_and_validate_params(schema.cf_defs.first.name, "", [options], WRITE_DEFAULTS)
844
+
845
+ @batch = []
846
+ yield(self)
847
+ compacted_map,seen_clevels = compact_mutations!
848
+ clevel = if options[:consistency] != nil # Override any clevel from individual mutations if
849
+ options[:consistency]
850
+ elsif seen_clevels.length > 1 # Cannot choose which CLevel to use if there are several ones
851
+ raise "Multiple consistency levels used in the batch, and no override...cannot pick one"
852
+ else # if no consistency override has been provided but all the clevels in the batch are the same: use that one
853
+ seen_clevels.first
854
+ end
855
+
856
+ _mutate(compacted_map,clevel)
283
857
  ensure
284
858
  @batch = nil
285
859
  end
286
-
860
+
861
+ ##
862
+ # Create secondary index.
863
+ #
864
+ # * keyspace
865
+ # * column_family
866
+ # * column_name
867
+ # * validation_class
868
+ #
869
+ def create_index(keyspace, column_family, column_name, validation_class)
870
+ return false if Cassandra.VERSION.to_f < 0.7
871
+
872
+ cf_def = client.describe_keyspace(keyspace).cf_defs.find{|x| x.name == column_family}
873
+ if !cf_def.nil? and !cf_def.column_metadata.find{|x| x.name == column_name}
874
+ c_def = CassandraThrift::ColumnDef.new do |cd|
875
+ cd.name = column_name
876
+ cd.validation_class = "org.apache.cassandra.db.marshal."+validation_class
877
+ cd.index_type = CassandraThrift::IndexType::KEYS
878
+ end
879
+ cf_def.column_metadata.push(c_def)
880
+ update_column_family(cf_def)
881
+ end
882
+ end
883
+
884
+ ##
885
+ # Delete secondary index.
886
+ #
887
+ # * keyspace
888
+ # * column_family
889
+ # * column_name
890
+ #
891
+ def drop_index(keyspace, column_family, column_name)
892
+ return false if Cassandra.VERSION.to_f < 0.7
893
+
894
+ cf_def = client.describe_keyspace(keyspace).cf_defs.find{|x| x.name == column_family}
895
+ if !cf_def.nil? and cf_def.column_metadata.find{|x| x.name == column_name}
896
+ cf_def.column_metadata.delete_if{|x| x.name == column_name}
897
+ update_column_family(cf_def)
898
+ end
899
+ end
900
+
901
+ ##
902
+ # This method is mostly used internally by get_index_slices to create
903
+ # a CassandraThrift::IndexExpression for the given options.
904
+ #
905
+ # * column_name - Column to be compared
906
+ # * value - Value to compare against
907
+ # * comparison - Type of comparison to do.
908
+ #
909
+ def create_index_expression(column_name, value, comparison)
910
+ return false if Cassandra.VERSION.to_f < 0.7
911
+
912
+ CassandraThrift::IndexExpression.new(
913
+ :column_name => column_name,
914
+ :value => value,
915
+ :op => (case comparison
916
+ when nil, "EQ", "eq", "=="
917
+ CassandraThrift::IndexOperator::EQ
918
+ when "GTE", "gte", ">="
919
+ CassandraThrift::IndexOperator::GTE
920
+ when "GT", "gt", ">"
921
+ CassandraThrift::IndexOperator::GT
922
+ when "LTE", "lte", "<="
923
+ CassandraThrift::IndexOperator::LTE
924
+ when "LT", "lt", "<"
925
+ CassandraThrift::IndexOperator::LT
926
+ end ))
927
+ end
928
+ alias :create_idx_expr :create_index_expression
929
+
930
+ ##
931
+ # This method takes an array if CassandraThrift::IndexExpression
932
+ # objects and creates a CassandraThrift::IndexClause for use in the
933
+ # Cassandra#get_index_slices
934
+ #
935
+ # * index_expressions - Array of CassandraThrift::IndexExpressions.
936
+ # * start - The starting row key.
937
+ # * count - The count of items to be returned
938
+ #
939
+ def create_index_clause(index_expressions, start = "", count = 100)
940
+ return false if Cassandra.VERSION.to_f < 0.7
941
+
942
+ CassandraThrift::IndexClause.new(
943
+ :start_key => start,
944
+ :expressions => index_expressions,
945
+ :count => count)
946
+ end
947
+ alias :create_idx_clause :create_index_clause
948
+
949
+ ##
950
+ # This method is used to query a secondary index with a set of
951
+ # provided search parameters
952
+ #
953
+ # Please note that you can either specify a
954
+ # CassandraThrift::IndexClause or an array of hashes with the
955
+ # format as below.
956
+ #
957
+ # * column_family - The Column Family this operation will be run on.
958
+ # * index_clause - This can either be a CassandraThrift::IndexClause or an array of hashes with the following keys:
959
+ # * :column_name - Column to be compared
960
+ # * :value - Value to compare against
961
+ # * :comparison - Type of comparison to do.
962
+ # * options
963
+ # * :key_count - Set maximum number of rows to return. (Only works if CassandraThrift::IndexClause is not passed in.)
964
+ # * :key_start - Set starting row key for search. (Only works if CassandraThrift::IndexClause is not passed in.)
965
+ # * :consistency
966
+ #
967
+ # TODO: Supercolumn support.
968
+ def get_indexed_slices(column_family, index_clause, *columns_and_options)
969
+ return false if Cassandra.VERSION.to_f < 0.7
970
+
971
+ column_family, columns, _, options =
972
+ extract_and_validate_params(column_family, [], columns_and_options, READ_DEFAULTS.merge(:key_count => 100, :key_start => ""))
973
+
974
+ if index_clause.class != CassandraThrift::IndexClause
975
+ index_expressions = index_clause.collect do |expression|
976
+ create_index_expression(expression[:column_name], expression[:value], expression[:comparison])
977
+ end
978
+
979
+ index_clause = create_index_clause(index_expressions, options[:key_start], options[:key_count])
980
+ end
981
+
982
+ key_slices = _get_indexed_slices(column_family, index_clause, columns, options[:count], options[:start],
983
+ options[:finish], options[:reversed], options[:consistency])
984
+
985
+ key_slices.inject({}){|h, key_slice| h[key_slice.key] = key_slice.columns; h}
986
+ end
987
+
287
988
  protected
288
989
 
289
990
  def calling_method
290
991
  "#{self.class}##{caller[0].split('`').last[0..-3]}"
291
992
  end
292
993
 
293
- # Roll up queued mutations, to improve atomicity.
994
+ ##
995
+ # Roll up queued mutations, to improve atomicity (and performance).
996
+ #
294
997
  def compact_mutations!
295
- #TODO re-do this rollup
998
+ used_clevels = {} # hash that lists the consistency levels seen in the batch array. key is the clevel, value is true
999
+ by_key = Hash.new{|h,k | h[k] = {}}
1000
+ # @batch is an array of mutation_ops.
1001
+ # A mutation op is a 2-item array containing [mutationmap, consistency_number]
1002
+ # a mutation map is a hash, by key (string) that has a hash by CF name, containing a list of column_mutations)
1003
+ @batch.each do |mutation_op|
1004
+ # A single mutation op looks like:
1005
+ # For an insert/update
1006
+ #[ { key1 =>
1007
+ # { CF1 => [several of CassThrift:Mutation(colname,value,TS,ttl)]
1008
+ # CF2 => [several mutations]
1009
+ # },
1010
+ # key2 => {...} # Not sure if they can come batched like this...so there might only be a single key (and CF)
1011
+ # }, # [0]
1012
+ # consistency # [1]
1013
+ #]
1014
+ mmap = mutation_op[0] # :remove OR a hash like {"key"=> {"CF"=>[mutationclass1,...] } }
1015
+ used_clevels[mutation_op[1]] = true #save the clevel required for this operation
1016
+
1017
+ mmap.keys.each do |k|
1018
+ mmap[k].keys.each do |cf| # For each CF in that key
1019
+ by_key[k][cf] ||= []
1020
+ by_key[k][cf].concat(mmap[k][cf]) # Append the list of mutations for that key and CF
1021
+ end
1022
+ end
1023
+ end
1024
+ # Returns the batch mutations map, and an array with the consistency levels 'seen' in the batch
1025
+ [by_key, used_clevels.keys]
296
1026
  end
297
1027
 
1028
+ ##
1029
+ # Creates a new client as specified by Cassandra.thrift_client_options[:thrift_client_class]
1030
+ #
298
1031
  def new_client
299
1032
  thrift_client_class.new(CassandraThrift::Cassandra::Client, @servers, @thrift_client_options)
300
1033
  end
301
-
1034
+
1035
+ def client
1036
+ if @client.nil? || @client.current_server.nil?
1037
+ reconnect!
1038
+ end
1039
+ @client
1040
+ end
1041
+
1042
+ def reconnect!
1043
+ @servers = all_nodes
1044
+ @client = new_client
1045
+ @client.add_callback :post_connect do |cli|
1046
+ # Set the active keyspace after connecting
1047
+ cli.set_keyspace(@keyspace)
1048
+
1049
+ # If using an authenticated keyspace, ensure we relogin
1050
+ cli.login(@auth_request) if @auth_request
1051
+ end
1052
+ end
1053
+
1054
+ def all_nodes
1055
+ if @auto_discover_nodes && !@keyspace.eql?("system")
1056
+ temp_client = new_client
1057
+ begin
1058
+ ips = (temp_client.describe_ring(@keyspace).map {|range| range.endpoints}).flatten.uniq
1059
+ port = @servers.first.split(':').last
1060
+ ips.map{|ip| "#{ip}:#{port}" }
1061
+ ensure
1062
+ temp_client.disconnect!
1063
+ end
1064
+ else
1065
+ @servers
1066
+ end
1067
+ end
302
1068
  end