cassandra 0.5.6 → 0.5.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,226 @@
1
+ <!--
2
+ ~ Licensed to the Apache Software Foundation (ASF) under one
3
+ ~ or more contributor license agreements. See the NOTICE file
4
+ ~ distributed with this work for additional information
5
+ ~ regarding copyright ownership. The ASF licenses this file
6
+ ~ to you under the Apache License, Version 2.0 (the
7
+ ~ "License"); you may not use this file except in compliance
8
+ ~ with the License. You may obtain a copy of the License at
9
+ ~
10
+ ~ http:/www.apache.org/licenses/LICENSE-2.0
11
+ ~
12
+ ~ Unless required by applicable law or agreed to in writing,
13
+ ~ software distributed under the License is distributed on an
14
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ ~ KIND, either express or implied. See the License for the
16
+ ~ specific language governing permissions and limitations
17
+ ~ under the License.
18
+ -->
19
+ <Storage>
20
+ <!--======================================================================-->
21
+ <!-- Basic Configuration -->
22
+ <!--======================================================================-->
23
+ <ClusterName>Test</ClusterName>
24
+
25
+ <!-- Tables and ColumnFamilies
26
+ Think of a table as a namespace, not a relational table.
27
+ (ColumnFamilies are closer in meaning to those.)
28
+
29
+ There is an implicit table named 'system' for Cassandra internals.
30
+ -->
31
+ <Keyspaces>
32
+ <Keyspace Name="Twitter">
33
+ <KeysCachedFraction>0.01</KeysCachedFraction>
34
+ <ColumnFamily CompareWith="UTF8Type" Name="Users" />
35
+ <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
36
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
37
+ <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
38
+ <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
39
+ <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
40
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
41
+ </Keyspace>
42
+
43
+ <Keyspace Name="Multiblog">
44
+ <KeysCachedFraction>0.01</KeysCachedFraction>
45
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
46
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
47
+ </Keyspace>
48
+
49
+ <Keyspace Name="MultiblogLong">
50
+ <KeysCachedFraction>0.01</KeysCachedFraction>
51
+ <ColumnFamily CompareWith="LongType" Name="Blogs"/>
52
+ <ColumnFamily CompareWith="LongType" Name="Comments"/>
53
+ </Keyspace>
54
+ </Keyspaces>
55
+
56
+ <!-- Partitioner: any IPartitioner may be used, including your own
57
+ as long as it is on the classpath. Out of the box,
58
+ Cassandra provides
59
+ org.apache.cassandra.dht.RandomPartitioner and
60
+ org.apache.cassandra.dht.OrderPreservingPartitioner.
61
+ Range queries require using OrderPreservingPartitioner or a subclass.
62
+
63
+ Achtung! Changing this parameter requires wiping your data directories,
64
+ since the partitioner can modify the sstable on-disk format.
65
+ -->
66
+ <Partitioner>org.apache.cassandra.dht.OrderPreservingPartitioner</Partitioner>
67
+
68
+ <!-- If you are using the OrderPreservingPartitioner and you know your key
69
+ distribution, you can specify the token for this node to use.
70
+ (Keys are sent to the node with the "closest" token, so distributing
71
+ your tokens equally along the key distribution space will spread
72
+ keys evenly across your cluster.) This setting is only checked the
73
+ first time a node is started.
74
+
75
+ This can also be useful with RandomPartitioner to force equal
76
+ spacing of tokens around the hash space, especially for
77
+ clusters with a small number of nodes. -->
78
+ <InitialToken></InitialToken>
79
+
80
+
81
+ <!-- EndPointSnitch: Setting this to the class that implements IEndPointSnitch
82
+ which will see if two endpoints are in the same data center or on the same rack.
83
+ Out of the box, Cassandra provides
84
+ org.apache.cassandra.locator.EndPointSnitch
85
+ -->
86
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
87
+
88
+ <!-- Strategy: Setting this to the class that implements IReplicaPlacementStrategy
89
+ will change the way the node picker works.
90
+ Out of the box, Cassandra provides
91
+ org.apache.cassandra.locator.RackUnawareStrategy
92
+ org.apache.cassandra.locator.RackAwareStrategy
93
+ (place one replica in a different datacenter, and the
94
+ others on different racks in the same one.)
95
+ -->
96
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
97
+
98
+ <!-- Number of replicas of the data-->
99
+ <ReplicationFactor>1</ReplicationFactor>
100
+
101
+ <!-- Directories: Specify where Cassandra should store different data on disk
102
+ Keep the data disks and the CommitLog disks separate for best performance
103
+ -->
104
+ <CommitLogDirectory>data/commitlog</CommitLogDirectory>
105
+ <DataFileDirectories>
106
+ <DataFileDirectory>data/data</DataFileDirectory>
107
+ </DataFileDirectories>
108
+ <CalloutLocation>data/callouts</CalloutLocation>
109
+ <BootstrapFileDirectory>data/bootstrap</BootstrapFileDirectory>
110
+ <StagingFileDirectory>data/staging</StagingFileDirectory>
111
+
112
+ <!-- Addresses of hosts that are deemed contact points. Cassandra nodes use
113
+ this list of hosts to find each other and learn the topology of the ring.
114
+ You must change this if you are running multiple nodes!
115
+ -->
116
+ <Seeds>
117
+ <Seed>127.0.0.1</Seed>
118
+ </Seeds>
119
+
120
+
121
+ <!-- Miscellaneous -->
122
+
123
+ <!-- time to wait for a reply from other nodes before failing the command -->
124
+ <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
125
+ <!-- size to allow commitlog to grow to before creating a new segment -->
126
+ <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
127
+
128
+
129
+ <!-- Local hosts and ports -->
130
+
131
+ <!-- Address to bind to and tell other nodes to connect to.
132
+ You _must_ change this if you want multiple nodes to be able
133
+ to communicate!
134
+
135
+ Leaving it blank leaves it up to InetAddress.getLocalHost().
136
+ This will always do the Right Thing *if* the node is properly
137
+ configured (hostname, name resolution, etc), and the Right
138
+ Thing is to use the address associated with the hostname (it
139
+ might not be). -->
140
+ <ListenAddress>localhost</ListenAddress>
141
+ <!-- TCP port, for commands and data -->
142
+ <StoragePort>7000</StoragePort>
143
+ <!-- UDP port, for membership communications (gossip) -->
144
+ <ControlPort>7001</ControlPort>
145
+
146
+ <!-- The address to bind the Thrift RPC service to. Unlike
147
+ ListenAddress above, you *can* specify 0.0.0.0 here if you want
148
+ Thrift to listen on all interfaces.
149
+
150
+ Leaving this blank has the same effect it does for ListenAddress,
151
+ (i.e. it will be based on the configured hostname of the node).
152
+ -->
153
+ <ThriftAddress>localhost</ThriftAddress>
154
+ <!-- Thrift RPC port (the port clients connect to). -->
155
+ <ThriftPort>9160</ThriftPort>
156
+
157
+
158
+ <!--======================================================================-->
159
+ <!-- Memory, Disk, and Performance -->
160
+ <!--======================================================================-->
161
+
162
+ <!-- Add column indexes to a row after its contents reach this size -->
163
+ <ColumnIndexSizeInKB>256</ColumnIndexSizeInKB>
164
+
165
+ <!--
166
+ The maximum amount of data to store in memory before flushing to
167
+ disk. Note: There is one memtable per column family, and this threshold
168
+ is based solely on the amount of data stored, not actual heap memory
169
+ usage (there is some overhead in indexing the columns).
170
+ -->
171
+ <MemtableSizeInMB>32</MemtableSizeInMB>
172
+
173
+ <!--
174
+ The maximum number of columns in millions to store in memory
175
+ before flushing to disk. This is also a per-memtable setting.
176
+ Use with MemtableSizeInMB to tune memory usage.
177
+ -->
178
+ <MemtableObjectCountInMillions>0.01</MemtableObjectCountInMillions>
179
+
180
+ <!-- Unlike most systems, in Cassandra writes are faster than
181
+ reads, so you can afford more of those in parallel.
182
+ A good rule of thumb is 2 concurrent reads per processor core.
183
+ You especially want more concurrentwrites if you are using
184
+ CommitLogSync + CommitLogSyncDelay. -->
185
+ <ConcurrentReads>8</ConcurrentReads>
186
+ <ConcurrentWrites>32</ConcurrentWrites>
187
+
188
+ <!-- CommitLogSync may be either "periodic" or "batch."
189
+ When in batch mode, Cassandra won't ack writes until the commit log
190
+ has been fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
191
+ milliseconds for other writes, before performing the sync.
192
+
193
+ This is less necessary in Cassandra
194
+ than in traditional databases since replication reduces the
195
+ odds of losing data from a failure after writing the log
196
+ entry but before it actually reaches the disk. So the other
197
+ option is "timed," where wirtes may be acked immediately
198
+ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
199
+ milliseconds.
200
+ -->
201
+ <CommitLogSync>periodic</CommitLogSync>
202
+ <!-- Interval at which to perform syncs of the CommitLog in periodic
203
+ mode. Usually the default of 1000ms is fine; increase it
204
+ only if the CommitLog PendingTasks backlog in jmx shows that
205
+ you are frequently scheduling a second sync while the first
206
+ has not yet been processed.
207
+ -->
208
+ <CommitLogSyncPeriodInMS>1000</CommitLogSyncPeriodInMS>
209
+ <!-- Delay (in microseconds) during which additional commit log
210
+ entries may be written before fsync in batch mode. This will increase
211
+ latency slightly, but can vastly improve throughput where
212
+ there are many writers. Set to zero to disable
213
+ (each entry will be synced individually).
214
+ Reasonable values range from a minimal 0.1 to 10 or even more
215
+ if throughput matters more than latency.
216
+ -->
217
+ <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
218
+
219
+ <!-- Time to wait before garbage-collection deletion markers.
220
+ Set this to a large enough value that you are confident
221
+ that the deletion marker will be propagated to all replicas
222
+ by the time this many seconds has elapsed, even in the
223
+ face of hardware failures. The default value is ten days.
224
+ -->
225
+ <GCGraceSeconds>864000</GCGraceSeconds>
226
+ </Storage>
data/lib/cassandra.rb ADDED
@@ -0,0 +1,23 @@
1
+
2
+ require 'zlib'
3
+ require 'rubygems'
4
+ require 'thrift'
5
+
6
+ HERE = File.expand_path(File.dirname(__FILE__))
7
+
8
+ $LOAD_PATH << "#{HERE}/../vendor/gen-rb"
9
+ require "#{HERE}/../vendor/gen-rb/cassandra"
10
+
11
+ $LOAD_PATH << "#{HERE}"
12
+ require 'cassandra/array'
13
+ require 'cassandra/time'
14
+ require 'cassandra/comparable'
15
+ require 'cassandra/uuid'
16
+ require 'cassandra/long'
17
+ require 'cassandra/safe_client'
18
+ require 'cassandra/ordered_hash'
19
+ require 'cassandra/columns'
20
+ require 'cassandra/protocol'
21
+ require 'cassandra/cassandra'
22
+ require 'cassandra/constants'
23
+ require 'cassandra/debug' if ENV['DEBUG']
@@ -0,0 +1,8 @@
1
+
2
+ class Array
3
+ def _flatten_once
4
+ result = []
5
+ each { |el| result.concat(Array(el)) }
6
+ result
7
+ end
8
+ end
@@ -0,0 +1,306 @@
1
+
2
+ =begin rdoc
3
+ Create a new Cassandra client instance. Accepts a keyspace name, and optional host and port.
4
+
5
+ client = Cassandra.new('twitter', '127.0.0.1', 9160)
6
+
7
+ You can then make calls to the server via the <tt>client</tt> instance.
8
+
9
+ client.insert(:UserRelationships, "5", {"user_timeline" => {UUID.new => "1"}})
10
+ client.get(:UserRelationships, "5", "user_timeline")
11
+
12
+ For read methods, valid option parameters are:
13
+
14
+ <tt>:count</tt>:: How many results to return. Defaults to 100.
15
+ <tt>:start</tt>:: Column name token at which to start iterating, inclusive. Defaults to nil, which means the first column in the collation order.
16
+ <tt>:finish</tt>:: Column name token at which to stop iterating, inclusive. Defaults to nil, which means no boundary.
17
+ <tt>:reversed</tt>:: Swap the direction of the collation order.
18
+ <tt>:consistency</tt>:: The consistency level of the request. Defaults to <tt>Cassandra::Consistency::ONE</tt> (one node must respond). Other valid options are <tt>Cassandra::Consistency::ZERO</tt>, <tt>Cassandra::Consistency::QUORUM</tt>, and <tt>Cassandra::Consistency::ALL</tt>.
19
+
20
+ Note that some read options have no relevance in some contexts.
21
+
22
+ For write methods, valid option parameters are:
23
+
24
+ <tt>:timestamp </tt>:: The transaction timestamp. Defaults to the current time in milliseconds. This is used for conflict resolution by the server; you normally never need to change it.
25
+ <tt>:consistency</tt>:: See above.
26
+
27
+ =end rdoc
28
+
29
+ class Cassandra
30
+ include Columns
31
+ include Protocol
32
+
33
+ class AccessError < StandardError #:nodoc:
34
+ end
35
+
36
+ module Consistency
37
+ include CassandraThrift::ConsistencyLevel
38
+ end
39
+
40
+ MAX_INT = 2**31 - 1
41
+
42
+ WRITE_DEFAULTS = {
43
+ :count => MAX_INT,
44
+ :timestamp => nil,
45
+ :consistency => Consistency::ONE
46
+ }.freeze
47
+
48
+ READ_DEFAULTS = {
49
+ :count => 100,
50
+ :start => nil,
51
+ :finish => nil,
52
+ :reversed => false,
53
+ :consistency => Consistency::ONE
54
+ }.freeze
55
+
56
+ attr_reader :keyspace, :host, :port, :serializer, :transport, :client, :schema
57
+
58
+ # Instantiate a new Cassandra and open the connection.
59
+ def initialize(keyspace, host = '127.0.0.1', port = 9160, buffer = true)
60
+ @is_super = {}
61
+ @column_name_class = {}
62
+ @sub_column_name_class = {}
63
+
64
+ @keyspace = keyspace
65
+ @host = host
66
+ @port = port
67
+
68
+ transport = Thrift::BufferedTransport.new(Thrift::Socket.new(@host, @port))
69
+ transport.open
70
+
71
+ @client = CassandraThrift::Cassandra::SafeClient.new(
72
+ CassandraThrift::Cassandra::Client.new(Thrift::BinaryProtocol.new(transport)),
73
+ transport,
74
+ !buffer)
75
+
76
+ keyspaces = @client.get_string_list_property("keyspaces")
77
+ unless keyspaces.include?(@keyspace)
78
+ raise AccessError, "Keyspace #{@keyspace.inspect} not found. Available: #{keyspaces.inspect}"
79
+ end
80
+
81
+ @schema = @client.describe_keyspace(@keyspace)
82
+ end
83
+
84
+ def inspect
85
+ "#<Cassandra:#{object_id}, @keyspace=#{keyspace.inspect}, @schema={#{
86
+ schema.map {|name, hash| ":#{name} => #{hash['type'].inspect}"}.join(', ')
87
+ }}, @host=#{host.inspect}, @port=#{port}>"
88
+ end
89
+
90
+ ### Write
91
+
92
+ # Insert a row for a key. Pass a flat hash for a regular column family, and
93
+ # a nested hash for a super column family. Supports the <tt>:consistency</tt>
94
+ # and <tt>:timestamp</tt> options.
95
+ def insert(column_family, key, hash, options = {})
96
+ column_family, _, _, options =
97
+ validate_params(column_family, key, [options], WRITE_DEFAULTS)
98
+
99
+ args = [column_family, hash, options[:timestamp] || Time.stamp]
100
+ columns = is_super(column_family) ? hash_to_super_columns(*args) : hash_to_columns(*args)
101
+ mutation = CassandraThrift::BatchMutation.new(
102
+ :key => key,
103
+ :cfmap => {column_family => columns},
104
+ :column_paths => [])
105
+
106
+ @batch ? @batch << mutation : _mutate([mutation], options[:consistency])
107
+ end
108
+
109
+ ## Delete
110
+
111
+ # _mutate the element at the column_family:key:[column]:[sub_column]
112
+ # path you request. Supports the <tt>:consistency</tt> and <tt>:timestamp</tt>
113
+ # options.
114
+ def remove(column_family, key, *columns_and_options)
115
+ column_family, column, sub_column, options =
116
+ validate_params(column_family, key, columns_and_options, WRITE_DEFAULTS)
117
+
118
+ args = {:column_family => column_family, :timestamp => options[:timestamp] || Time.stamp}
119
+ columns = is_super(column_family) ? {:super_column => column, :column => sub_column} : {:column => column}
120
+ mutation = CassandraThrift::BatchMutation.new(
121
+ :key => key,
122
+ :cfmap => {},
123
+ :column_paths => [CassandraThrift::ColumnPath.new(args.merge(columns))])
124
+
125
+ @batch ? @batch << mutation : _mutate([mutation], options[:consistency])
126
+ end
127
+
128
+ # Remove all rows in the column family you request. Supports options
129
+ # <tt>:consistency</tt> and <tt>:timestamp</tt>.
130
+ # FIXME May not currently delete all records without multiple calls. Waiting
131
+ # for ranged remove support in Cassandra.
132
+ def clear_column_family!(column_family, options = {})
133
+ get_range(column_family).each { |key| remove(column_family, key, options) }
134
+ end
135
+
136
+ # Remove all rows in the keyspace. Supports options <tt>:consistency</tt> and
137
+ # <tt>:timestamp</tt>.
138
+ # FIXME May not currently delete all records without multiple calls. Waiting
139
+ # for ranged remove support in Cassandra.
140
+ def clear_keyspace!(options = {})
141
+ @schema.keys.each { |column_family| clear_column_family!(column_family, options) }
142
+ end
143
+
144
+ ### Read
145
+
146
+ # Count the elements at the column_family:key:[super_column] path you
147
+ # request. Supports the <tt>:consistency</tt> option.
148
+ def count_columns(column_family, key, *columns_and_options)
149
+ column_family, super_column, _, options =
150
+ validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
151
+ _count_columns(column_family, key, super_column, options[:consistency])
152
+ end
153
+
154
+ # Multi-key version of Cassandra#count_columns. Supports options <tt>:count</tt>,
155
+ # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
156
+ # FIXME Not real multi; needs server support
157
+ def multi_count_columns(column_family, keys, *options)
158
+ OrderedHash[*keys.map { |key| [key, count_columns(column_family, key, *options)] }._flatten_once]
159
+ end
160
+
161
+ # Return a list of single values for the elements at the
162
+ # column_family:key:column[s]:[sub_columns] path you request. Supports the
163
+ # <tt>:consistency</tt> option.
164
+ def get_columns(column_family, key, *columns_and_options)
165
+ column_family, columns, sub_columns, options =
166
+ validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
167
+ _get_columns(column_family, key, columns, sub_columns, options[:consistency])
168
+ end
169
+
170
+ # Multi-key version of Cassandra#get_columns. Supports the <tt>:consistency</tt>
171
+ # option.
172
+ # FIXME Not real multi; needs to use a Column predicate
173
+ def multi_get_columns(column_family, keys, *options)
174
+ OrderedHash[*keys.map { |key| [key, get_columns(column_family, key, *options)] }._flatten_once]
175
+ end
176
+
177
+ # Return a hash (actually, a Cassandra::OrderedHash) or a single value
178
+ # representing the element at the column_family:key:[column]:[sub_column]
179
+ # path you request. Supports options <tt>:count</tt>, <tt>:start</tt>,
180
+ # <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
181
+ def get(column_family, key, *columns_and_options)
182
+ multi_get(column_family, [key], *columns_and_options)[key]
183
+ end
184
+
185
+ # Multi-key version of Cassandra#get. Supports options <tt>:count</tt>,
186
+ # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
187
+ def multi_get(column_family, keys, *columns_and_options)
188
+ column_family, column, sub_column, options =
189
+ validate_params(column_family, keys, columns_and_options, READ_DEFAULTS)
190
+
191
+ hash = _multiget(column_family, keys, column, sub_column, options[:count], options[:start], options[:finish], options[:reversed], options[:consistency])
192
+ # Restore order
193
+ ordered_hash = OrderedHash.new
194
+ keys.each { |key| ordered_hash[key] = hash[key] || (OrderedHash.new if is_super(column_family) and !sub_column) }
195
+ ordered_hash
196
+ end
197
+
198
+ # Return true if the column_family:key:[column]:[sub_column] path you
199
+ # request exists. Supports the <tt>:consistency</tt> option.
200
+ def exists?(column_family, key, *columns_and_options)
201
+ column_family, column, sub_column, options =
202
+ validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
203
+ _multiget(column_family, [key], column, sub_column, 1, nil, nil, nil, options[:consistency])[key]
204
+ end
205
+
206
+ # Return a list of keys in the column_family you request. Requires the
207
+ # table to be partitioned with OrderPreservingHash. Supports the
208
+ # <tt>:count</tt>, <tt>:start</tt>, <tt>:finish</tt>, and <tt>:consistency</tt>
209
+ # options.
210
+ def get_range(column_family, options = {})
211
+ column_family, _, _, options =
212
+ validate_params(column_family, "", [options], READ_DEFAULTS)
213
+ _get_range(column_family, options[:start].to_s, options[:finish].to_s, options[:count], options[:consistency])
214
+ end
215
+
216
+ # Count all rows in the column_family you request. Requires the table
217
+ # to be partitioned with OrderPreservingHash. Supports the <tt>:start</tt>,
218
+ # <tt>:finish</tt>, and <tt>:consistency</tt> options.
219
+ # FIXME will count only MAX_INT records
220
+ def count_range(column_family, options = {})
221
+ get_range(column_family, options.merge(:count => MAX_INT)).size
222
+ end
223
+
224
+ # Open a batch operation and yield. Inserts and deletes will be queued until
225
+ # the block closes, and then sent atomically to the server. Supports the
226
+ # <tt>:consistency</tt> option, which overrides the consistency set in
227
+ # the individual commands.
228
+ def batch(options = {})
229
+ _, _, _, options =
230
+ validate_params(@schema.keys.first, "", [options], WRITE_DEFAULTS)
231
+
232
+ @batch = []
233
+ yield
234
+ compact_mutations!
235
+ _mutate(@batch, options[:consistency])
236
+ @batch = nil
237
+ end
238
+
239
+ private
240
+
241
+ # Extract and validate options.
242
+ # FIXME Should be done as a decorator
243
+ def validate_params(column_family, keys, args, options)
244
+ options = options.dup
245
+ column_family = column_family.to_s
246
+
247
+ # Keys
248
+ Array(keys).each do |key|
249
+ raise ArgumentError, "Key #{key.inspect} must be a String for #{calling_method}" unless key.is_a?(String)
250
+ end
251
+
252
+ # Options
253
+ if args.last.is_a?(Hash)
254
+ extras = args.last.keys - options.keys
255
+ raise ArgumentError, "Invalid options #{extras.inspect[1..-2]} for #{calling_method}" if extras.any?
256
+ options.merge!(args.pop)
257
+ end
258
+
259
+ # Ranges
260
+ column, sub_column = args[0], args[1]
261
+ klass, sub_klass = column_name_class(column_family), sub_column_name_class(column_family)
262
+ range_class = column ? sub_klass : klass
263
+ options[:start] = options[:start] ? range_class.new(options[:start]).to_s : ""
264
+ options[:finish] = options[:finish] ? range_class.new(options[:finish]).to_s : ""
265
+
266
+ [column_family, s_map(column, klass), s_map(sub_column, sub_klass), options]
267
+ end
268
+
269
+ def calling_method
270
+ "#{self.class}##{caller[0].split('`').last[0..-3]}"
271
+ end
272
+
273
+ # Convert stuff to strings.
274
+ def s_map(el, klass)
275
+ case el
276
+ when Array then el.map { |i| s_map(i, klass) }
277
+ when NilClass then nil
278
+ else
279
+ klass.new(el).to_s
280
+ end
281
+ end
282
+
283
+ # Roll up queued mutations, to improve atomicity.
284
+ def compact_mutations!
285
+ mutations = {}
286
+
287
+ # Nested hash merge
288
+ @batch.each do |m|
289
+ if mutation = mutations[m.key]
290
+ # Inserts
291
+ if columns = mutation.cfmap[m.cfmap.keys.first]
292
+ columns.concat(m.cfmap.values.first)
293
+ else
294
+ mutation.cfmap.merge!(m.cfmap)
295
+ end
296
+ # Deletes
297
+ mutation.column_paths.concat(m.column_paths)
298
+ else
299
+ mutations[m.key] = m
300
+ end
301
+ end
302
+
303
+ # FIXME Return atomic thrift thingy
304
+ @batch = mutations.values
305
+ end
306
+ end