jamesgolick-cassandra 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'thrift_client'
3
+ require 'json' unless defined?(JSON)
4
+ require 'simple_uuid'
5
+ here = File.expand_path(File.dirname(__FILE__))
6
+
7
+ $LOAD_PATH << "#{here}/../vendor/gen-rb"
8
+ require "#{here}/../vendor/gen-rb/cassandra"
9
+
10
+ $LOAD_PATH << "#{here}"
11
+
12
+ require 'cassandra/helpers'
13
+ require 'cassandra/array'
14
+ require 'cassandra/time'
15
+ require 'cassandra/comparable'
16
+ require 'cassandra/long'
17
+ require 'cassandra/ordered_hash'
18
+ require 'cassandra/columns'
19
+ require 'cassandra/protocol'
20
+ require 'cassandra/cassandra'
21
+ require 'cassandra/constants'
22
+ require 'cassandra/debug' if ENV['DEBUG']
@@ -0,0 +1,8 @@
1
+
2
+ class Array
3
+ def _flatten_once
4
+ result = []
5
+ each { |el| result.concat(Array(el)) }
6
+ result
7
+ end
8
+ end
@@ -0,0 +1,314 @@
1
+
2
+ =begin rdoc
3
+ Create a new Cassandra client instance. Accepts a keyspace name, and optional host and port.
4
+
5
+ client = Cassandra.new('twitter', '127.0.0.1:9160')
6
+
7
+ You can then make calls to the server via the <tt>client</tt> instance.
8
+
9
+ client.insert(:UserRelationships, "5", {"user_timeline" => {SimpleUUID::UUID.new => "1"}})
10
+ client.get(:UserRelationships, "5", "user_timeline")
11
+
12
+ For read methods, valid option parameters are:
13
+
14
+ <tt>:count</tt>:: How many results to return. Defaults to 100.
15
+ <tt>:start</tt>:: Column name token at which to start iterating, inclusive. Defaults to nil, which means the first column in the collation order.
16
+ <tt>:finish</tt>:: Column name token at which to stop iterating, inclusive. Defaults to nil, which means no boundary.
17
+ <tt>:reversed</tt>:: Swap the direction of the collation order.
18
+ <tt>:consistency</tt>:: The consistency level of the request. Defaults to <tt>Cassandra::Consistency::ONE</tt> (one node must respond). Other valid options are <tt>Cassandra::Consistency::ZERO</tt>, <tt>Cassandra::Consistency::QUORUM</tt>, and <tt>Cassandra::Consistency::ALL</tt>.
19
+
20
+ Note that some read options have no relevance in some contexts.
21
+
22
+ For write methods, valid option parameters are:
23
+
24
+ <tt>:timestamp </tt>:: The transaction timestamp. Defaults to the current time in milliseconds. This is used for conflict resolution by the server; you normally never need to change it.
25
+ <tt>:consistency</tt>:: See above.
26
+
27
+ For the initial client instantiation, you may also pass in <tt>:thrift_client<tt> with a ThriftClient subclass attached. On connection, that class will be used instead of the default ThriftClient class, allowing you to add additional behavior to the connection (e.g. query logging).
28
+
29
+ =end rdoc
30
+
31
+ class Cassandra
32
+ include Columns
33
+ include Protocol
34
+ include Helpers
35
+
36
+ class AccessError < StandardError #:nodoc:
37
+ end
38
+
39
+ module Consistency
40
+ include CassandraThrift::ConsistencyLevel
41
+ end
42
+
43
+ WRITE_DEFAULTS = {
44
+ :count => 1000,
45
+ :timestamp => nil,
46
+ :consistency => Consistency::ONE
47
+ }.freeze
48
+
49
+ READ_DEFAULTS = {
50
+ :count => 100,
51
+ :start => nil,
52
+ :finish => nil,
53
+ :reversed => false,
54
+ :consistency => Consistency::ONE
55
+ }.freeze
56
+
57
+ THRIFT_DEFAULTS = {
58
+ :transport_wrapper => Thrift::BufferedTransport,
59
+ :thrift_client_class => ThriftClient
60
+ }.freeze
61
+
62
+ attr_reader :keyspace, :servers, :schema, :thrift_client_options, :thrift_client_class
63
+
64
+ # Create a new Cassandra instance and open the connection.
65
+ def initialize(keyspace, servers = "127.0.0.1:9160", thrift_client_options = {})
66
+ @is_super = {}
67
+ @column_name_class = {}
68
+ @sub_column_name_class = {}
69
+ @auto_discover_nodes = true
70
+ @thrift_client_options = THRIFT_DEFAULTS.merge(thrift_client_options)
71
+ @thrift_client_class = @thrift_client_options[:thrift_client_class]
72
+ @keyspace = keyspace
73
+ @servers = Array(servers)
74
+ end
75
+
76
+ def disable_node_auto_discovery!
77
+ @auto_discover_nodes = false
78
+ end
79
+
80
+ def disconnect!
81
+ @client.disconnect!
82
+ @client = nil
83
+ end
84
+
85
+ def keyspaces
86
+ @keyspaces ||= client.get_string_list_property("keyspaces")
87
+ end
88
+
89
+ def inspect
90
+ "#<Cassandra:#{object_id}, @keyspace=#{keyspace.inspect}, @schema={#{
91
+ schema(false).map {|name, hash| ":#{name} => #{hash['type'].inspect}"}.join(', ')
92
+ }}, @servers=#{servers.inspect}>"
93
+ end
94
+
95
+ ### Write
96
+
97
+ # Insert a row for a key. Pass a flat hash for a regular column family, and
98
+ # a nested hash for a super column family. Supports the <tt>:consistency</tt>
99
+ # and <tt>:timestamp</tt> options.
100
+ def insert(column_family, key, hash, options = {})
101
+ column_family, _, _, options = extract_and_validate_params(column_family, key, [options], WRITE_DEFAULTS)
102
+
103
+ timestamp = options[:timestamp] || Time.stamp
104
+ mutation_map = if is_super(column_family)
105
+ {
106
+ key => {
107
+ column_family => hash.collect{|k,v| _super_insert_mutation(column_family, k, v, timestamp) }
108
+ }
109
+ }
110
+ else
111
+ {
112
+ key => {
113
+ column_family => hash.collect{|k,v| _standard_insert_mutation(column_family, k, v, timestamp)}
114
+ }
115
+ }
116
+ end
117
+
118
+ @batch ? @batch << [mutation_map, options[:consistency]] : _mutate(mutation_map, options[:consistency])
119
+ end
120
+
121
+
122
+ ## Delete
123
+
124
+ # _mutate the element at the column_family:key:[column]:[sub_column]
125
+ # path you request. Supports the <tt>:consistency</tt> and <tt>:timestamp</tt>
126
+ # options.
127
+ def remove(column_family, key, *columns_and_options)
128
+ column_family, column, sub_column, options = extract_and_validate_params(column_family, key, columns_and_options, WRITE_DEFAULTS)
129
+
130
+ args = {:column_family => column_family}
131
+ columns = is_super(column_family) ? {:super_column => column, :column => sub_column} : {:column => column}
132
+ column_path = CassandraThrift::ColumnPath.new(args.merge(columns))
133
+
134
+ mutation = [:remove, [key, column_path, options[:timestamp] || Time.stamp, options[:consistency]]]
135
+
136
+ @batch ? @batch << mutation : _remove(*mutation[1])
137
+ end
138
+
139
+ # Remove all rows in the column family you request. Supports options
140
+ # <tt>:consistency</tt> and <tt>:timestamp</tt>.
141
+ # FIXME May not currently delete all records without multiple calls. Waiting
142
+ # for ranged remove support in Cassandra.
143
+ def clear_column_family!(column_family, options = {})
144
+ each_key(column_family) do |key|
145
+ remove(column_family, key, options)
146
+ end
147
+ end
148
+
149
+ # Remove all rows in the keyspace. Supports options <tt>:consistency</tt> and
150
+ # <tt>:timestamp</tt>.
151
+ # FIXME May not currently delete all records without multiple calls. Waiting
152
+ # for ranged remove support in Cassandra.
153
+ def clear_keyspace!(options = {})
154
+ schema.keys.each { |column_family| clear_column_family!(column_family, options) }
155
+ end
156
+
157
+ ### Read
158
+
159
+ # Count the elements at the column_family:key:[super_column] path you
160
+ # request. Supports the <tt>:consistency</tt> option.
161
+ def count_columns(column_family, key, *columns_and_options)
162
+ column_family, super_column, _, options =
163
+ extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
164
+ _count_columns(column_family, key, super_column, options[:consistency])
165
+ end
166
+
167
+ # Multi-key version of Cassandra#count_columns. Supports options <tt>:count</tt>,
168
+ # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
169
+ # FIXME Not real multi; needs server support
170
+ def multi_count_columns(column_family, keys, *options)
171
+ OrderedHash[*keys.map { |key| [key, count_columns(column_family, key, *options)] }._flatten_once]
172
+ end
173
+
174
+ # Return a list of single values for the elements at the
175
+ # column_family:key:column[s]:[sub_columns] path you request. Supports the
176
+ # <tt>:consistency</tt> option.
177
+ def get_columns(column_family, key, *columns_and_options)
178
+ column_family, columns, sub_columns, options =
179
+ extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
180
+ _get_columns(column_family, key, columns, sub_columns, options[:consistency])
181
+ end
182
+
183
+ # Multi-key version of Cassandra#get_columns. Supports the <tt>:consistency</tt>
184
+ # option.
185
+ # FIXME Not real multi; needs to use a Column predicate
186
+ def multi_get_columns(column_family, keys, *options)
187
+ OrderedHash[*keys.map { |key| [key, get_columns(column_family, key, *options)] }._flatten_once]
188
+ end
189
+
190
+ # Return a hash (actually, a Cassandra::OrderedHash) or a single value
191
+ # representing the element at the column_family:key:[column]:[sub_column]
192
+ # path you request. Supports options <tt>:count</tt>, <tt>:start</tt>,
193
+ # <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
194
+ def get(column_family, key, *columns_and_options)
195
+ multi_get(column_family, [key], *columns_and_options)[key]
196
+ end
197
+
198
+ # Multi-key version of Cassandra#get. Supports options <tt>:count</tt>,
199
+ # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
200
+ def multi_get(column_family, keys, *columns_and_options)
201
+ column_family, column, sub_column, options =
202
+ extract_and_validate_params(column_family, keys, columns_and_options, READ_DEFAULTS)
203
+
204
+ hash = _multiget(column_family, keys, column, sub_column, options[:count], options[:start], options[:finish], options[:reversed], options[:consistency])
205
+ # Restore order
206
+ ordered_hash = OrderedHash.new
207
+ keys.each { |key| ordered_hash[key] = hash[key] || (OrderedHash.new if is_super(column_family) and !sub_column) }
208
+ ordered_hash
209
+ end
210
+
211
+ # Return true if the column_family:key:[column]:[sub_column] path you
212
+ # request exists. Supports the <tt>:consistency</tt> option.
213
+ def exists?(column_family, key, *columns_and_options)
214
+ column_family, column, sub_column, options =
215
+ extract_and_validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
216
+ if column
217
+ _multiget(column_family, [key], column, sub_column, 1, nil, nil, nil, options[:consistency])[key]
218
+ else
219
+ _multiget(column_family, [key], nil, nil, 1, '', '', false, options[:consistency])[key]
220
+ end
221
+ end
222
+
223
+ # Return a list of keys in the column_family you request. Requires the
224
+ # table to be partitioned with OrderPreservingHash. Supports the
225
+ # <tt>:count</tt>, <tt>:start</tt>, <tt>:finish</tt>, and <tt>:consistency</tt>
226
+ # options.
227
+ def get_range(column_family, options = {})
228
+ column_family, _, _, options =
229
+ extract_and_validate_params(column_family, "", [options], READ_DEFAULTS)
230
+ _get_range(column_family, options[:start].to_s, options[:finish].to_s, options[:count], options[:consistency])
231
+ end
232
+
233
+ # Count all rows in the column_family you request. Requires the table
234
+ # to be partitioned with OrderPreservingHash. Supports the <tt>:start</tt>,
235
+ # <tt>:finish</tt>, and <tt>:consistency</tt> options.
236
+ def count_range(column_family, options = {})
237
+ get_range(column_family, options).select{|r| r.columns.length > 0}.compact.length
238
+ end
239
+
240
+ # Open a batch operation and yield. Inserts and deletes will be queued until
241
+ # the block closes, and then sent atomically to the server. Supports the
242
+ # <tt>:consistency</tt> option, which overrides the consistency set in
243
+ # the individual commands.
244
+ def batch(options = {})
245
+ _, _, _, options =
246
+ extract_and_validate_params(schema.keys.first, "", [options], WRITE_DEFAULTS)
247
+
248
+ @batch = []
249
+ yield
250
+ compact_mutations!
251
+
252
+ @batch.each do |mutation|
253
+ case mutation.first
254
+ when :remove
255
+ _remove(*mutation[1])
256
+ else
257
+ _mutate(*mutation)
258
+ end
259
+ end
260
+ ensure
261
+ @batch = nil
262
+ end
263
+
264
+ protected
265
+
266
+ def calling_method
267
+ "#{self.class}##{caller[0].split('`').last[0..-3]}"
268
+ end
269
+
270
+ # Roll up queued mutations, to improve atomicity.
271
+ def compact_mutations!
272
+ #TODO re-do this rollup
273
+ end
274
+
275
+ def schema(load=true)
276
+ if !load && !@schema
277
+ []
278
+ else
279
+ @schema ||= client.describe_keyspace(@keyspace)
280
+ end
281
+ end
282
+
283
+ def client
284
+ reconnect! if @client.nil?
285
+ @client
286
+ end
287
+
288
+ def reconnect!
289
+ @servers = all_nodes
290
+ @client = new_client
291
+ check_keyspace
292
+ end
293
+
294
+ def check_keyspace
295
+ unless (keyspaces = client.get_string_list_property("keyspaces")).include?(@keyspace)
296
+ raise AccessError, "Keyspace #{@keyspace.inspect} not found. Available: #{keyspaces.inspect}"
297
+ end
298
+ end
299
+
300
+ def new_client
301
+ thrift_client_class.new(CassandraThrift::Cassandra::Client, @servers, @thrift_client_options)
302
+ end
303
+
304
+ def all_nodes
305
+ if @auto_discover_nodes
306
+ ips = ::JSON.parse(new_client.get_string_property('token map')).values
307
+ port = @servers.first.split(':').last
308
+ ips.map{|ip| "#{ip}:#{port}" }
309
+ else
310
+ @servers
311
+ end
312
+ end
313
+
314
+ end
@@ -0,0 +1,106 @@
1
+
2
+ class Cassandra
3
+ # A bunch of crap, mostly related to introspecting on column types
4
+ module Columns #:nodoc:
5
+ private
6
+
7
+ def is_super(column_family)
8
+ @is_super[column_family] ||= column_family_property(column_family, 'Type') == "Super"
9
+ end
10
+
11
+ def column_name_class(column_family)
12
+ @column_name_class[column_family] ||= column_name_class_for_key(column_family, "CompareWith")
13
+ end
14
+
15
+ def sub_column_name_class(column_family)
16
+ @sub_column_name_class[column_family] ||= column_name_class_for_key(column_family, "CompareSubcolumnsWith")
17
+ end
18
+
19
+ def column_name_class_for_key(column_family, comparator_key)
20
+ property = column_family_property(column_family, comparator_key)
21
+ property =~ /.*\.(.*?)$/
22
+ case $1
23
+ when "LongType" then Long
24
+ when "LexicalUUIDType", "TimeUUIDType" then SimpleUUID::UUID
25
+ else
26
+ String # UTF8, Ascii, Bytes, anything else
27
+ end
28
+ end
29
+
30
+ def column_family_property(column_family, key)
31
+ unless schema[column_family]
32
+ raise AccessError, "Invalid column family \"#{column_family}\""
33
+ end
34
+ schema[column_family][key]
35
+ end
36
+
37
+ def multi_column_to_hash!(hash)
38
+ hash.each do |key, column_or_supercolumn|
39
+ hash[key] = (column_or_supercolumn.column.value if column_or_supercolumn.column)
40
+ end
41
+ end
42
+
43
+ def multi_columns_to_hash!(column_family, hash)
44
+ hash.each do |key, columns|
45
+ hash[key] = columns_to_hash(column_family, columns)
46
+ end
47
+ end
48
+
49
+ def multi_sub_columns_to_hash!(column_family, hash)
50
+ hash.each do |key, sub_columns|
51
+ hash[key] = sub_columns_to_hash(column_family, sub_columns)
52
+ end
53
+ end
54
+
55
+ def columns_to_hash(column_family, columns)
56
+ columns_to_hash_for_classes(columns, column_name_class(column_family), sub_column_name_class(column_family))
57
+ end
58
+
59
+ def sub_columns_to_hash(column_family, columns)
60
+ columns_to_hash_for_classes(columns, sub_column_name_class(column_family))
61
+ end
62
+
63
+ def columns_to_hash_for_classes(columns, column_name_class, sub_column_name_class = nil)
64
+ hash = OrderedHash.new
65
+ Array(columns).each do |c|
66
+ c = c.super_column || c.column if c.is_a?(CassandraThrift::ColumnOrSuperColumn)
67
+ hash[column_name_class.new(c.name)] = case c
68
+ when CassandraThrift::SuperColumn
69
+ columns_to_hash_for_classes(c.columns, sub_column_name_class) # Pop the class stack, and recurse
70
+ when CassandraThrift::Column
71
+ c.value
72
+ end
73
+ end
74
+ hash
75
+ end
76
+
77
+ def _standard_insert_mutation(column_family, column_name, value, timestamp)
78
+ CassandraThrift::Mutation.new(
79
+ :column_or_supercolumn => CassandraThrift::ColumnOrSuperColumn.new(
80
+ :column => CassandraThrift::Column.new(
81
+ :name => column_name_class(column_family).new(column_name).to_s,
82
+ :value => value,
83
+ :timestamp => timestamp
84
+ )
85
+ )
86
+ )
87
+ end
88
+
89
+ def _super_insert_mutation(column_family, super_column_name, sub_columns, timestamp)
90
+ CassandraThrift::Mutation.new(:column_or_supercolumn =>
91
+ CassandraThrift::ColumnOrSuperColumn.new(
92
+ :super_column => CassandraThrift::SuperColumn.new(
93
+ :name => column_name_class(column_family).new(super_column_name).to_s,
94
+ :columns => sub_columns.collect { |sub_column_name, sub_column_value|
95
+ CassandraThrift::Column.new(
96
+ :name => sub_column_name_class(column_family).new(sub_column_name).to_s,
97
+ :value => sub_column_value.to_s,
98
+ :timestamp => timestamp
99
+ )
100
+ }
101
+ )
102
+ )
103
+ )
104
+ end
105
+ end
106
+ end