jamesgolick-cassandra 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'thrift_client'
3
+ require 'json' unless defined?(JSON)
4
+ require 'simple_uuid'
5
+ here = File.expand_path(File.dirname(__FILE__))
6
+
7
+ $LOAD_PATH << "#{here}/../vendor/gen-rb"
8
+ require "#{here}/../vendor/gen-rb/cassandra"
9
+
10
+ $LOAD_PATH << "#{here}"
11
+
12
+ require 'cassandra/helpers'
13
+ require 'cassandra/array'
14
+ require 'cassandra/time'
15
+ require 'cassandra/comparable'
16
+ require 'cassandra/long'
17
+ require 'cassandra/ordered_hash'
18
+ require 'cassandra/columns'
19
+ require 'cassandra/protocol'
20
+ require 'cassandra/cassandra'
21
+ require 'cassandra/constants'
22
+ require 'cassandra/debug' if ENV['DEBUG']
@@ -0,0 +1,8 @@
1
+
2
+ class Array
3
+ def _flatten_once
4
+ result = []
5
+ each { |el| result.concat(Array(el)) }
6
+ result
7
+ end
8
+ end
@@ -0,0 +1,314 @@
1
+
2
+ =begin rdoc
3
+ Create a new Cassandra client instance. Accepts a keyspace name, and optional host and port.
4
+
5
+ client = Cassandra.new('twitter', '127.0.0.1:9160')
6
+
7
+ You can then make calls to the server via the <tt>client</tt> instance.
8
+
9
+ client.insert(:UserRelationships, "5", {"user_timeline" => {SimpleUUID::UUID.new => "1"}})
10
+ client.get(:UserRelationships, "5", "user_timeline")
11
+
12
+ For read methods, valid option parameters are:
13
+
14
+ <tt>:count</tt>:: How many results to return. Defaults to 100.
15
+ <tt>:start</tt>:: Column name token at which to start iterating, inclusive. Defaults to nil, which means the first column in the collation order.
16
+ <tt>:finish</tt>:: Column name token at which to stop iterating, inclusive. Defaults to nil, which means no boundary.
17
+ <tt>:reversed</tt>:: Swap the direction of the collation order.
18
+ <tt>:consistency</tt>:: The consistency level of the request. Defaults to <tt>Cassandra::Consistency::ONE</tt> (one node must respond). Other valid options are <tt>Cassandra::Consistency::ZERO</tt>, <tt>Cassandra::Consistency::QUORUM</tt>, and <tt>Cassandra::Consistency::ALL</tt>.
19
+
20
+ Note that some read options have no relevance in some contexts.
21
+
22
+ For write methods, valid option parameters are:
23
+
24
+ <tt>:timestamp </tt>:: The transaction timestamp. Defaults to the current time in milliseconds. This is used for conflict resolution by the server; you normally never need to change it.
25
+ <tt>:consistency</tt>:: See above.
26
+
27
+ For the initial client instantiation, you may also pass in <tt>:thrift_client<tt> with a ThriftClient subclass attached. On connection, that class will be used instead of the default ThriftClient class, allowing you to add additional behavior to the connection (e.g. query logging).
28
+
29
+ =end rdoc
30
+
31
+ class Cassandra
32
+ include Columns
33
+ include Protocol
34
+ include Helpers
35
+
36
+ class AccessError < StandardError #:nodoc:
37
+ end
38
+
39
+ module Consistency
40
+ include CassandraThrift::ConsistencyLevel
41
+ end
42
+
43
+ WRITE_DEFAULTS = {
44
+ :count => 1000,
45
+ :timestamp => nil,
46
+ :consistency => Consistency::ONE
47
+ }.freeze
48
+
49
+ READ_DEFAULTS = {
50
+ :count => 100,
51
+ :start => nil,
52
+ :finish => nil,
53
+ :reversed => false,
54
+ :consistency => Consistency::ONE
55
+ }.freeze
56
+
57
+ THRIFT_DEFAULTS = {
58
+ :transport_wrapper => Thrift::BufferedTransport,
59
+ :thrift_client_class => ThriftClient
60
+ }.freeze
61
+
62
+ attr_reader :keyspace, :servers, :schema, :thrift_client_options, :thrift_client_class
63
+
64
+ # Create a new Cassandra instance and open the connection.
65
+ def initialize(keyspace, servers = "127.0.0.1:9160", thrift_client_options = {})
66
+ @is_super = {}
67
+ @column_name_class = {}
68
+ @sub_column_name_class = {}
69
+ @auto_discover_nodes = true
70
+ @thrift_client_options = THRIFT_DEFAULTS.merge(thrift_client_options)
71
+ @thrift_client_class = @thrift_client_options[:thrift_client_class]
72
+ @keyspace = keyspace
73
+ @servers = Array(servers)
74
+ end
75
+
76
+ def disable_node_auto_discovery!
77
+ @auto_discover_nodes = false
78
+ end
79
+
80
+ def disconnect!
81
+ @client.disconnect!
82
+ @client = nil
83
+ end
84
+
85
+ def keyspaces
86
+ @keyspaces ||= client.get_string_list_property("keyspaces")
87
+ end
88
+
89
+ def inspect
90
+ "#<Cassandra:#{object_id}, @keyspace=#{keyspace.inspect}, @schema={#{
91
+ schema(false).map {|name, hash| ":#{name} => #{hash['type'].inspect}"}.join(', ')
92
+ }}, @servers=#{servers.inspect}>"
93
+ end
94
+
95
+ ### Write
96
+
97
+ # Insert a row for a key. Pass a flat hash for a regular column family, and
98
+ # a nested hash for a super column family. Supports the <tt>:consistency</tt>
99
+ # and <tt>:timestamp</tt> options.
100
+ def insert(column_family, key, hash, options = {})
101
+ column_family, _, _, options = extract_and_validate_params(column_family, key, [options], WRITE_DEFAULTS)
102
+
103
+ timestamp = options[:timestamp] || Time.stamp
104
+ mutation_map = if is_super(column_family)
105
+ {
106
+ key => {
107
+ column_family => hash.collect{|k,v| _super_insert_mutation(column_family, k, v, timestamp) }
108
+ }
109
+ }
110
+ else
111
+ {
112
+ key => {
113
+ column_family => hash.collect{|k,v| _standard_insert_mutation(column_family, k, v, timestamp)}
114
+ }
115
+ }
116
+ end
117
+
118
+ @batch ? @batch << [mutation_map, options[:consistency]] : _mutate(mutation_map, options[:consistency])
119
+ end
120
+
121
+
122
+ ## Delete
123
+
124
+ # _mutate the element at the column_family:key:[column]:[sub_column]
125
+ # path you request. Supports the <tt>:consistency</tt> and <tt>:timestamp</tt>
126
+ # options.
127
+ def remove(column_family, key, *columns_and_options)
128
+ column_family, column, sub_column, options = extract_and_validate_params(column_family, key, columns_and_options, WRITE_DEFAULTS)
129
+
130
+ args = {:column_family => column_family}
131
+ columns = is_super(column_family) ? {:super_column => column, :column => sub_column} : {:column => column}
132
+ column_path = CassandraThrift::ColumnPath.new(args.merge(columns))
133
+
134
+ mutation = [:remove, [key, column_path, options[:timestamp] || Time.stamp, options[:consistency]]]
135
+
136
+ @batch ? @batch << mutation : _remove(*mutation[1])
137
+ end
138
+
139
+ # Remove all rows in the column family you request. Supports options
140
+ # <tt>:consistency</tt> and <tt>:timestamp</tt>.
141
+ # FIXME May not currently delete all records without multiple calls. Waiting
142
+ # for ranged remove support in Cassandra.
143
+ def clear_column_family!(column_family, options = {})
144
+ each_key(column_family) do |key|
145
+ remove(column_family, key, options)
146
+ end
147
+ end
148
+
149
+ # Remove all rows in the keyspace. Supports options <tt>:consistency</tt> and
150
+ # <tt>:timestamp</tt>.
151
+ # FIXME May not currently delete all records without multiple calls. Waiting
152
+ # for ranged remove support in Cassandra.
153
+ def clear_keyspace!(options = {})
154
+ schema.keys.each { |column_family| clear_column_family!(column_family, options) }
155
+ end
156
+
157
+ ### Read
158
+
159
+ # Count the elements at the column_family:key:[super_column] path you
160
+ # request. Supports the <tt>:consistency</tt> option.
161
+ def count_columns(column_family, key, *columns_and_options)
162
+ column_family, super_column, _, options =
163
+ extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
164
+ _count_columns(column_family, key, super_column, options[:consistency])
165
+ end
166
+
167
+ # Multi-key version of Cassandra#count_columns. Supports options <tt>:count</tt>,
168
+ # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
169
+ # FIXME Not real multi; needs server support
170
+ def multi_count_columns(column_family, keys, *options)
171
+ OrderedHash[*keys.map { |key| [key, count_columns(column_family, key, *options)] }._flatten_once]
172
+ end
173
+
174
+ # Return a list of single values for the elements at the
175
+ # column_family:key:column[s]:[sub_columns] path you request. Supports the
176
+ # <tt>:consistency</tt> option.
177
+ def get_columns(column_family, key, *columns_and_options)
178
+ column_family, columns, sub_columns, options =
179
+ extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
180
+ _get_columns(column_family, key, columns, sub_columns, options[:consistency])
181
+ end
182
+
183
+ # Multi-key version of Cassandra#get_columns. Supports the <tt>:consistency</tt>
184
+ # option.
185
+ # FIXME Not real multi; needs to use a Column predicate
186
+ def multi_get_columns(column_family, keys, *options)
187
+ OrderedHash[*keys.map { |key| [key, get_columns(column_family, key, *options)] }._flatten_once]
188
+ end
189
+
190
+ # Return a hash (actually, a Cassandra::OrderedHash) or a single value
191
+ # representing the element at the column_family:key:[column]:[sub_column]
192
+ # path you request. Supports options <tt>:count</tt>, <tt>:start</tt>,
193
+ # <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
194
+ def get(column_family, key, *columns_and_options)
195
+ multi_get(column_family, [key], *columns_and_options)[key]
196
+ end
197
+
198
+ # Multi-key version of Cassandra#get. Supports options <tt>:count</tt>,
199
+ # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
200
+ def multi_get(column_family, keys, *columns_and_options)
201
+ column_family, column, sub_column, options =
202
+ extract_and_validate_params(column_family, keys, columns_and_options, READ_DEFAULTS)
203
+
204
+ hash = _multiget(column_family, keys, column, sub_column, options[:count], options[:start], options[:finish], options[:reversed], options[:consistency])
205
+ # Restore order
206
+ ordered_hash = OrderedHash.new
207
+ keys.each { |key| ordered_hash[key] = hash[key] || (OrderedHash.new if is_super(column_family) and !sub_column) }
208
+ ordered_hash
209
+ end
210
+
211
+ # Return true if the column_family:key:[column]:[sub_column] path you
212
+ # request exists. Supports the <tt>:consistency</tt> option.
213
+ def exists?(column_family, key, *columns_and_options)
214
+ column_family, column, sub_column, options =
215
+ extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
216
+ if column
217
+ _multiget(column_family, [key], column, sub_column, 1, nil, nil, nil, options[:consistency])[key]
218
+ else
219
+ _multiget(column_family, [key], nil, nil, 1, '', '', false, options[:consistency])[key]
220
+ end
221
+ end
222
+
223
+ # Return a list of keys in the column_family you request. Requires the
224
+ # table to be partitioned with OrderPreservingHash. Supports the
225
+ # <tt>:count</tt>, <tt>:start</tt>, <tt>:finish</tt>, and <tt>:consistency</tt>
226
+ # options.
227
+ def get_range(column_family, options = {})
228
+ column_family, _, _, options =
229
+ extract_and_validate_params(column_family, "", [options], READ_DEFAULTS)
230
+ _get_range(column_family, options[:start].to_s, options[:finish].to_s, options[:count], options[:consistency])
231
+ end
232
+
233
+ # Count all rows in the column_family you request. Requires the table
234
+ # to be partitioned with OrderPreservingHash. Supports the <tt>:start</tt>,
235
+ # <tt>:finish</tt>, and <tt>:consistency</tt> options.
236
+ def count_range(column_family, options = {})
237
+ get_range(column_family, options).select{|r| r.columns.length > 0}.compact.length
238
+ end
239
+
240
+ # Open a batch operation and yield. Inserts and deletes will be queued until
241
+ # the block closes, and then sent atomically to the server. Supports the
242
+ # <tt>:consistency</tt> option, which overrides the consistency set in
243
+ # the individual commands.
244
+ def batch(options = {})
245
+ _, _, _, options =
246
+ extract_and_validate_params(schema.keys.first, "", [options], WRITE_DEFAULTS)
247
+
248
+ @batch = []
249
+ yield
250
+ compact_mutations!
251
+
252
+ @batch.each do |mutation|
253
+ case mutation.first
254
+ when :remove
255
+ _remove(*mutation[1])
256
+ else
257
+ _mutate(*mutation)
258
+ end
259
+ end
260
+ ensure
261
+ @batch = nil
262
+ end
263
+
264
+ protected
265
+
266
+ def calling_method
267
+ "#{self.class}##{caller[0].split('`').last[0..-3]}"
268
+ end
269
+
270
+ # Roll up queued mutations, to improve atomicity.
271
+ def compact_mutations!
272
+ #TODO re-do this rollup
273
+ end
274
+
275
+ def schema(load=true)
276
+ if !load && !@schema
277
+ []
278
+ else
279
+ @schema ||= client.describe_keyspace(@keyspace)
280
+ end
281
+ end
282
+
283
+ def client
284
+ reconnect! if @client.nil?
285
+ @client
286
+ end
287
+
288
+ def reconnect!
289
+ @servers = all_nodes
290
+ @client = new_client
291
+ check_keyspace
292
+ end
293
+
294
+ def check_keyspace
295
+ unless (keyspaces = client.get_string_list_property("keyspaces")).include?(@keyspace)
296
+ raise AccessError, "Keyspace #{@keyspace.inspect} not found. Available: #{keyspaces.inspect}"
297
+ end
298
+ end
299
+
300
+ def new_client
301
+ thrift_client_class.new(CassandraThrift::Cassandra::Client, @servers, @thrift_client_options)
302
+ end
303
+
304
+ def all_nodes
305
+ if @auto_discover_nodes
306
+ ips = ::JSON.parse(new_client.get_string_property('token map')).values
307
+ port = @servers.first.split(':').last
308
+ ips.map{|ip| "#{ip}:#{port}" }
309
+ else
310
+ @servers
311
+ end
312
+ end
313
+
314
+ end
@@ -0,0 +1,106 @@
1
+
2
+ class Cassandra
3
+ # A bunch of crap, mostly related to introspecting on column types
4
+ module Columns #:nodoc:
5
+ private
6
+
7
+ def is_super(column_family)
8
+ @is_super[column_family] ||= column_family_property(column_family, 'Type') == "Super"
9
+ end
10
+
11
+ def column_name_class(column_family)
12
+ @column_name_class[column_family] ||= column_name_class_for_key(column_family, "CompareWith")
13
+ end
14
+
15
+ def sub_column_name_class(column_family)
16
+ @sub_column_name_class[column_family] ||= column_name_class_for_key(column_family, "CompareSubcolumnsWith")
17
+ end
18
+
19
+ def column_name_class_for_key(column_family, comparator_key)
20
+ property = column_family_property(column_family, comparator_key)
21
+ property =~ /.*\.(.*?)$/
22
+ case $1
23
+ when "LongType" then Long
24
+ when "LexicalUUIDType", "TimeUUIDType" then SimpleUUID::UUID
25
+ else
26
+ String # UTF8, Ascii, Bytes, anything else
27
+ end
28
+ end
29
+
30
+ def column_family_property(column_family, key)
31
+ unless schema[column_family]
32
+ raise AccessError, "Invalid column family \"#{column_family}\""
33
+ end
34
+ schema[column_family][key]
35
+ end
36
+
37
+ def multi_column_to_hash!(hash)
38
+ hash.each do |key, column_or_supercolumn|
39
+ hash[key] = (column_or_supercolumn.column.value if column_or_supercolumn.column)
40
+ end
41
+ end
42
+
43
+ def multi_columns_to_hash!(column_family, hash)
44
+ hash.each do |key, columns|
45
+ hash[key] = columns_to_hash(column_family, columns)
46
+ end
47
+ end
48
+
49
+ def multi_sub_columns_to_hash!(column_family, hash)
50
+ hash.each do |key, sub_columns|
51
+ hash[key] = sub_columns_to_hash(column_family, sub_columns)
52
+ end
53
+ end
54
+
55
+ def columns_to_hash(column_family, columns)
56
+ columns_to_hash_for_classes(columns, column_name_class(column_family), sub_column_name_class(column_family))
57
+ end
58
+
59
+ def sub_columns_to_hash(column_family, columns)
60
+ columns_to_hash_for_classes(columns, sub_column_name_class(column_family))
61
+ end
62
+
63
+ def columns_to_hash_for_classes(columns, column_name_class, sub_column_name_class = nil)
64
+ hash = OrderedHash.new
65
+ Array(columns).each do |c|
66
+ c = c.super_column || c.column if c.is_a?(CassandraThrift::ColumnOrSuperColumn)
67
+ hash[column_name_class.new(c.name)] = case c
68
+ when CassandraThrift::SuperColumn
69
+ columns_to_hash_for_classes(c.columns, sub_column_name_class) # Pop the class stack, and recurse
70
+ when CassandraThrift::Column
71
+ c.value
72
+ end
73
+ end
74
+ hash
75
+ end
76
+
77
+ def _standard_insert_mutation(column_family, column_name, value, timestamp)
78
+ CassandraThrift::Mutation.new(
79
+ :column_or_supercolumn => CassandraThrift::ColumnOrSuperColumn.new(
80
+ :column => CassandraThrift::Column.new(
81
+ :name => column_name_class(column_family).new(column_name).to_s,
82
+ :value => value,
83
+ :timestamp => timestamp
84
+ )
85
+ )
86
+ )
87
+ end
88
+
89
+ def _super_insert_mutation(column_family, super_column_name, sub_columns, timestamp)
90
+ CassandraThrift::Mutation.new(:column_or_supercolumn =>
91
+ CassandraThrift::ColumnOrSuperColumn.new(
92
+ :super_column => CassandraThrift::SuperColumn.new(
93
+ :name => column_name_class(column_family).new(super_column_name).to_s,
94
+ :columns => sub_columns.collect { |sub_column_name, sub_column_value|
95
+ CassandraThrift::Column.new(
96
+ :name => sub_column_name_class(column_family).new(sub_column_name).to_s,
97
+ :value => sub_column_value.to_s,
98
+ :timestamp => timestamp
99
+ )
100
+ }
101
+ )
102
+ )
103
+ )
104
+ end
105
+ end
106
+ end