cassandra 0.5.6 → 0.5.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ <!--
2
+ ~ Licensed to the Apache Software Foundation (ASF) under one
3
+ ~ or more contributor license agreements. See the NOTICE file
4
+ ~ distributed with this work for additional information
5
+ ~ regarding copyright ownership. The ASF licenses this file
6
+ ~ to you under the Apache License, Version 2.0 (the
7
+ ~ "License"); you may not use this file except in compliance
8
+ ~ with the License. You may obtain a copy of the License at
9
+ ~
10
+ ~ http:/www.apache.org/licenses/LICENSE-2.0
11
+ ~
12
+ ~ Unless required by applicable law or agreed to in writing,
13
+ ~ software distributed under the License is distributed on an
14
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ ~ KIND, either express or implied. See the License for the
16
+ ~ specific language governing permissions and limitations
17
+ ~ under the License.
18
+ -->
19
+ <Storage>
20
+ <!--======================================================================-->
21
+ <!-- Basic Configuration -->
22
+ <!--======================================================================-->
23
+ <ClusterName>Test</ClusterName>
24
+
25
+ <!-- Tables and ColumnFamilies
26
+ Think of a table as a namespace, not a relational table.
27
+ (ColumnFamilies are closer in meaning to those.)
28
+
29
+ There is an implicit table named 'system' for Cassandra internals.
30
+ -->
31
+ <Keyspaces>
32
+ <Keyspace Name="Twitter">
33
+ <KeysCachedFraction>0.01</KeysCachedFraction>
34
+ <ColumnFamily CompareWith="UTF8Type" Name="Users" />
35
+ <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
36
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
37
+ <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
38
+ <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
39
+ <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
40
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
41
+ </Keyspace>
42
+
43
+ <Keyspace Name="Multiblog">
44
+ <KeysCachedFraction>0.01</KeysCachedFraction>
45
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
46
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
47
+ </Keyspace>
48
+
49
+ <Keyspace Name="MultiblogLong">
50
+ <KeysCachedFraction>0.01</KeysCachedFraction>
51
+ <ColumnFamily CompareWith="LongType" Name="Blogs"/>
52
+ <ColumnFamily CompareWith="LongType" Name="Comments"/>
53
+ </Keyspace>
54
+ </Keyspaces>
55
+
56
+ <!-- Partitioner: any IPartitioner may be used, including your own
57
+ as long as it is on the classpath. Out of the box,
58
+ Cassandra provides
59
+ org.apache.cassandra.dht.RandomPartitioner and
60
+ org.apache.cassandra.dht.OrderPreservingPartitioner.
61
+ Range queries require using OrderPreservingPartitioner or a subclass.
62
+
63
+ Achtung! Changing this parameter requires wiping your data directories,
64
+ since the partitioner can modify the sstable on-disk format.
65
+ -->
66
+ <Partitioner>org.apache.cassandra.dht.OrderPreservingPartitioner</Partitioner>
67
+
68
+ <!-- If you are using the OrderPreservingPartitioner and you know your key
69
+ distribution, you can specify the token for this node to use.
70
+ (Keys are sent to the node with the "closest" token, so distributing
71
+ your tokens equally along the key distribution space will spread
72
+ keys evenly across your cluster.) This setting is only checked the
73
+ first time a node is started.
74
+
75
+ This can also be useful with RandomPartitioner to force equal
76
+ spacing of tokens around the hash space, especially for
77
+ clusters with a small number of nodes. -->
78
+ <InitialToken></InitialToken>
79
+
80
+
81
+ <!-- EndPointSnitch: Setting this to the class that implements IEndPointSnitch
82
+ which will see if two endpoints are in the same data center or on the same rack.
83
+ Out of the box, Cassandra provides
84
+ org.apache.cassandra.locator.EndPointSnitch
85
+ -->
86
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
87
+
88
+ <!-- Strategy: Setting this to the class that implements IReplicaPlacementStrategy
89
+ will change the way the node picker works.
90
+ Out of the box, Cassandra provides
91
+ org.apache.cassandra.locator.RackUnawareStrategy
92
+ org.apache.cassandra.locator.RackAwareStrategy
93
+ (place one replica in a different datacenter, and the
94
+ others on different racks in the same one.)
95
+ -->
96
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
97
+
98
+ <!-- Number of replicas of the data-->
99
+ <ReplicationFactor>1</ReplicationFactor>
100
+
101
+ <!-- Directories: Specify where Cassandra should store different data on disk
102
+ Keep the data disks and the CommitLog disks separate for best performance
103
+ -->
104
+ <CommitLogDirectory>data/commitlog</CommitLogDirectory>
105
+ <DataFileDirectories>
106
+ <DataFileDirectory>data/data</DataFileDirectory>
107
+ </DataFileDirectories>
108
+ <CalloutLocation>data/callouts</CalloutLocation>
109
+ <BootstrapFileDirectory>data/bootstrap</BootstrapFileDirectory>
110
+ <StagingFileDirectory>data/staging</StagingFileDirectory>
111
+
112
+ <!-- Addresses of hosts that are deemed contact points. Cassandra nodes use
113
+ this list of hosts to find each other and learn the topology of the ring.
114
+ You must change this if you are running multiple nodes!
115
+ -->
116
+ <Seeds>
117
+ <Seed>127.0.0.1</Seed>
118
+ </Seeds>
119
+
120
+
121
+ <!-- Miscellaneous -->
122
+
123
+ <!-- time to wait for a reply from other nodes before failing the command -->
124
+ <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
125
+ <!-- size to allow commitlog to grow to before creating a new segment -->
126
+ <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
127
+
128
+
129
+ <!-- Local hosts and ports -->
130
+
131
+ <!-- Address to bind to and tell other nodes to connect to.
132
+ You _must_ change this if you want multiple nodes to be able
133
+ to communicate!
134
+
135
+ Leaving it blank leaves it up to InetAddress.getLocalHost().
136
+ This will always do the Right Thing *if* the node is properly
137
+ configured (hostname, name resolution, etc), and the Right
138
+ Thing is to use the address associated with the hostname (it
139
+ might not be). -->
140
+ <ListenAddress>localhost</ListenAddress>
141
+ <!-- TCP port, for commands and data -->
142
+ <StoragePort>7000</StoragePort>
143
+ <!-- UDP port, for membership communications (gossip) -->
144
+ <ControlPort>7001</ControlPort>
145
+
146
+ <!-- The address to bind the Thrift RPC service to. Unlike
147
+ ListenAddress above, you *can* specify 0.0.0.0 here if you want
148
+ Thrift to listen on all interfaces.
149
+
150
+ Leaving this blank has the same effect it does for ListenAddress,
151
+ (i.e. it will be based on the configured hostname of the node).
152
+ -->
153
+ <ThriftAddress>localhost</ThriftAddress>
154
+ <!-- Thrift RPC port (the port clients connect to). -->
155
+ <ThriftPort>9160</ThriftPort>
156
+
157
+
158
+ <!--======================================================================-->
159
+ <!-- Memory, Disk, and Performance -->
160
+ <!--======================================================================-->
161
+
162
+ <!-- Add column indexes to a row after its contents reach this size -->
163
+ <ColumnIndexSizeInKB>256</ColumnIndexSizeInKB>
164
+
165
+ <!--
166
+ The maximum amount of data to store in memory before flushing to
167
+ disk. Note: There is one memtable per column family, and this threshold
168
+ is based solely on the amount of data stored, not actual heap memory
169
+ usage (there is some overhead in indexing the columns).
170
+ -->
171
+ <MemtableSizeInMB>32</MemtableSizeInMB>
172
+
173
+ <!--
174
+ The maximum number of columns in millions to store in memory
175
+ before flushing to disk. This is also a per-memtable setting.
176
+ Use with MemtableSizeInMB to tune memory usage.
177
+ -->
178
+ <MemtableObjectCountInMillions>0.01</MemtableObjectCountInMillions>
179
+
180
+ <!-- Unlike most systems, in Cassandra writes are faster than
181
+ reads, so you can afford more of those in parallel.
182
+ A good rule of thumb is 2 concurrent reads per processor core.
183
+ You especially want more concurrentwrites if you are using
184
+ CommitLogSync + CommitLogSyncDelay. -->
185
+ <ConcurrentReads>8</ConcurrentReads>
186
+ <ConcurrentWrites>32</ConcurrentWrites>
187
+
188
+ <!-- CommitLogSync may be either "periodic" or "batch."
189
+ When in batch mode, Cassandra won't ack writes until the commit log
190
+ has been fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
191
+ milliseconds for other writes, before performing the sync.
192
+
193
+ This is less necessary in Cassandra
194
+ than in traditional databases since replication reduces the
195
+ odds of losing data from a failure after writing the log
196
+ entry but before it actually reaches the disk. So the other
197
+ option is "timed," where wirtes may be acked immediately
198
+ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
199
+ milliseconds.
200
+ -->
201
+ <CommitLogSync>periodic</CommitLogSync>
202
+ <!-- Interval at which to perform syncs of the CommitLog in periodic
203
+ mode. Usually the default of 1000ms is fine; increase it
204
+ only if the CommitLog PendingTasks backlog in jmx shows that
205
+ you are frequently scheduling a second sync while the first
206
+ has not yet been processed.
207
+ -->
208
+ <CommitLogSyncPeriodInMS>1000</CommitLogSyncPeriodInMS>
209
+ <!-- Delay (in microseconds) during which additional commit log
210
+ entries may be written before fsync in batch mode. This will increase
211
+ latency slightly, but can vastly improve throughput where
212
+ there are many writers. Set to zero to disable
213
+ (each entry will be synced individually).
214
+ Reasonable values range from a minimal 0.1 to 10 or even more
215
+ if throughput matters more than latency.
216
+ -->
217
+ <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
218
+
219
+ <!-- Time to wait before garbage-collection deletion markers.
220
+ Set this to a large enough value that you are confident
221
+ that the deletion marker will be propagated to all replicas
222
+ by the time this many seconds has elapsed, even in the
223
+ face of hardware failures. The default value is ten days.
224
+ -->
225
+ <GCGraceSeconds>864000</GCGraceSeconds>
226
+ </Storage>
data/lib/cassandra.rb ADDED
@@ -0,0 +1,23 @@
1
+
2
+ require 'zlib'
3
+ require 'rubygems'
4
+ require 'thrift'
5
+
6
+ HERE = File.expand_path(File.dirname(__FILE__))
7
+
8
+ $LOAD_PATH << "#{HERE}/../vendor/gen-rb"
9
+ require "#{HERE}/../vendor/gen-rb/cassandra"
10
+
11
+ $LOAD_PATH << "#{HERE}"
12
+ require 'cassandra/array'
13
+ require 'cassandra/time'
14
+ require 'cassandra/comparable'
15
+ require 'cassandra/uuid'
16
+ require 'cassandra/long'
17
+ require 'cassandra/safe_client'
18
+ require 'cassandra/ordered_hash'
19
+ require 'cassandra/columns'
20
+ require 'cassandra/protocol'
21
+ require 'cassandra/cassandra'
22
+ require 'cassandra/constants'
23
+ require 'cassandra/debug' if ENV['DEBUG']
@@ -0,0 +1,8 @@
1
+
2
+ class Array
3
+ def _flatten_once
4
+ result = []
5
+ each { |el| result.concat(Array(el)) }
6
+ result
7
+ end
8
+ end
@@ -0,0 +1,306 @@
1
+
2
+ =begin rdoc
3
+ Create a new Cassandra client instance. Accepts a keyspace name, and optional host and port.
4
+
5
+ client = Cassandra.new('twitter', '127.0.0.1', 9160)
6
+
7
+ You can then make calls to the server via the <tt>client</tt> instance.
8
+
9
+ client.insert(:UserRelationships, "5", {"user_timeline" => {UUID.new => "1"}})
10
+ client.get(:UserRelationships, "5", "user_timeline")
11
+
12
+ For read methods, valid option parameters are:
13
+
14
+ <tt>:count</tt>:: How many results to return. Defaults to 100.
15
+ <tt>:start</tt>:: Column name token at which to start iterating, inclusive. Defaults to nil, which means the first column in the collation order.
16
+ <tt>:finish</tt>:: Column name token at which to stop iterating, inclusive. Defaults to nil, which means no boundary.
17
+ <tt>:reversed</tt>:: Swap the direction of the collation order.
18
+ <tt>:consistency</tt>:: The consistency level of the request. Defaults to <tt>Cassandra::Consistency::ONE</tt> (one node must respond). Other valid options are <tt>Cassandra::Consistency::ZERO</tt>, <tt>Cassandra::Consistency::QUORUM</tt>, and <tt>Cassandra::Consistency::ALL</tt>.
19
+
20
+ Note that some read options have no relevance in some contexts.
21
+
22
+ For write methods, valid option parameters are:
23
+
24
+ <tt>:timestamp </tt>:: The transaction timestamp. Defaults to the current time in milliseconds. This is used for conflict resolution by the server; you normally never need to change it.
25
+ <tt>:consistency</tt>:: See above.
26
+
27
+ =end rdoc
28
+
29
+ class Cassandra
30
+ include Columns
31
+ include Protocol
32
+
33
+ class AccessError < StandardError #:nodoc:
34
+ end
35
+
36
+ module Consistency
37
+ include CassandraThrift::ConsistencyLevel
38
+ end
39
+
40
+ MAX_INT = 2**31 - 1
41
+
42
+ WRITE_DEFAULTS = {
43
+ :count => MAX_INT,
44
+ :timestamp => nil,
45
+ :consistency => Consistency::ONE
46
+ }.freeze
47
+
48
+ READ_DEFAULTS = {
49
+ :count => 100,
50
+ :start => nil,
51
+ :finish => nil,
52
+ :reversed => false,
53
+ :consistency => Consistency::ONE
54
+ }.freeze
55
+
56
+ attr_reader :keyspace, :host, :port, :serializer, :transport, :client, :schema
57
+
58
+ # Instantiate a new Cassandra and open the connection.
59
+ def initialize(keyspace, host = '127.0.0.1', port = 9160, buffer = true)
60
+ @is_super = {}
61
+ @column_name_class = {}
62
+ @sub_column_name_class = {}
63
+
64
+ @keyspace = keyspace
65
+ @host = host
66
+ @port = port
67
+
68
+ transport = Thrift::BufferedTransport.new(Thrift::Socket.new(@host, @port))
69
+ transport.open
70
+
71
+ @client = CassandraThrift::Cassandra::SafeClient.new(
72
+ CassandraThrift::Cassandra::Client.new(Thrift::BinaryProtocol.new(transport)),
73
+ transport,
74
+ !buffer)
75
+
76
+ keyspaces = @client.get_string_list_property("keyspaces")
77
+ unless keyspaces.include?(@keyspace)
78
+ raise AccessError, "Keyspace #{@keyspace.inspect} not found. Available: #{keyspaces.inspect}"
79
+ end
80
+
81
+ @schema = @client.describe_keyspace(@keyspace)
82
+ end
83
+
84
+ def inspect
85
+ "#<Cassandra:#{object_id}, @keyspace=#{keyspace.inspect}, @schema={#{
86
+ schema.map {|name, hash| ":#{name} => #{hash['type'].inspect}"}.join(', ')
87
+ }}, @host=#{host.inspect}, @port=#{port}>"
88
+ end
89
+
90
+ ### Write
91
+
92
+ # Insert a row for a key. Pass a flat hash for a regular column family, and
93
+ # a nested hash for a super column family. Supports the <tt>:consistency</tt>
94
+ # and <tt>:timestamp</tt> options.
95
+ def insert(column_family, key, hash, options = {})
96
+ column_family, _, _, options =
97
+ validate_params(column_family, key, [options], WRITE_DEFAULTS)
98
+
99
+ args = [column_family, hash, options[:timestamp] || Time.stamp]
100
+ columns = is_super(column_family) ? hash_to_super_columns(*args) : hash_to_columns(*args)
101
+ mutation = CassandraThrift::BatchMutation.new(
102
+ :key => key,
103
+ :cfmap => {column_family => columns},
104
+ :column_paths => [])
105
+
106
+ @batch ? @batch << mutation : _mutate([mutation], options[:consistency])
107
+ end
108
+
109
+ ## Delete
110
+
111
+ # _mutate the element at the column_family:key:[column]:[sub_column]
112
+ # path you request. Supports the <tt>:consistency</tt> and <tt>:timestamp</tt>
113
+ # options.
114
+ def remove(column_family, key, *columns_and_options)
115
+ column_family, column, sub_column, options =
116
+ validate_params(column_family, key, columns_and_options, WRITE_DEFAULTS)
117
+
118
+ args = {:column_family => column_family, :timestamp => options[:timestamp] || Time.stamp}
119
+ columns = is_super(column_family) ? {:super_column => column, :column => sub_column} : {:column => column}
120
+ mutation = CassandraThrift::BatchMutation.new(
121
+ :key => key,
122
+ :cfmap => {},
123
+ :column_paths => [CassandraThrift::ColumnPath.new(args.merge(columns))])
124
+
125
+ @batch ? @batch << mutation : _mutate([mutation], options[:consistency])
126
+ end
127
+
128
+ # Remove all rows in the column family you request. Supports options
129
+ # <tt>:consistency</tt> and <tt>:timestamp</tt>.
130
+ # FIXME May not currently delete all records without multiple calls. Waiting
131
+ # for ranged remove support in Cassandra.
132
+ def clear_column_family!(column_family, options = {})
133
+ get_range(column_family).each { |key| remove(column_family, key, options) }
134
+ end
135
+
136
+ # Remove all rows in the keyspace. Supports options <tt>:consistency</tt> and
137
+ # <tt>:timestamp</tt>.
138
+ # FIXME May not currently delete all records without multiple calls. Waiting
139
+ # for ranged remove support in Cassandra.
140
+ def clear_keyspace!(options = {})
141
+ @schema.keys.each { |column_family| clear_column_family!(column_family, options) }
142
+ end
143
+
144
+ ### Read
145
+
146
+ # Count the elements at the column_family:key:[super_column] path you
147
+ # request. Supports the <tt>:consistency</tt> option.
148
+ def count_columns(column_family, key, *columns_and_options)
149
+ column_family, super_column, _, options =
150
+ validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
151
+ _count_columns(column_family, key, super_column, options[:consistency])
152
+ end
153
+
154
+ # Multi-key version of Cassandra#count_columns. Supports options <tt>:count</tt>,
155
+ # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
156
+ # FIXME Not real multi; needs server support
157
+ def multi_count_columns(column_family, keys, *options)
158
+ OrderedHash[*keys.map { |key| [key, count_columns(column_family, key, *options)] }._flatten_once]
159
+ end
160
+
161
+ # Return a list of single values for the elements at the
162
+ # column_family:key:column[s]:[sub_columns] path you request. Supports the
163
+ # <tt>:consistency</tt> option.
164
+ def get_columns(column_family, key, *columns_and_options)
165
+ column_family, columns, sub_columns, options =
166
+ validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
167
+ _get_columns(column_family, key, columns, sub_columns, options[:consistency])
168
+ end
169
+
170
+ # Multi-key version of Cassandra#get_columns. Supports the <tt>:consistency</tt>
171
+ # option.
172
+ # FIXME Not real multi; needs to use a Column predicate
173
+ def multi_get_columns(column_family, keys, *options)
174
+ OrderedHash[*keys.map { |key| [key, get_columns(column_family, key, *options)] }._flatten_once]
175
+ end
176
+
177
+ # Return a hash (actually, a Cassandra::OrderedHash) or a single value
178
+ # representing the element at the column_family:key:[column]:[sub_column]
179
+ # path you request. Supports options <tt>:count</tt>, <tt>:start</tt>,
180
+ # <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
181
+ def get(column_family, key, *columns_and_options)
182
+ multi_get(column_family, [key], *columns_and_options)[key]
183
+ end
184
+
185
+ # Multi-key version of Cassandra#get. Supports options <tt>:count</tt>,
186
+ # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
187
+ def multi_get(column_family, keys, *columns_and_options)
188
+ column_family, column, sub_column, options =
189
+ validate_params(column_family, keys, columns_and_options, READ_DEFAULTS)
190
+
191
+ hash = _multiget(column_family, keys, column, sub_column, options[:count], options[:start], options[:finish], options[:reversed], options[:consistency])
192
+ # Restore order
193
+ ordered_hash = OrderedHash.new
194
+ keys.each { |key| ordered_hash[key] = hash[key] || (OrderedHash.new if is_super(column_family) and !sub_column) }
195
+ ordered_hash
196
+ end
197
+
198
+ # Return true if the column_family:key:[column]:[sub_column] path you
199
+ # request exists. Supports the <tt>:consistency</tt> option.
200
+ def exists?(column_family, key, *columns_and_options)
201
+ column_family, column, sub_column, options =
202
+ validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
203
+ _multiget(column_family, [key], column, sub_column, 1, nil, nil, nil, options[:consistency])[key]
204
+ end
205
+
206
+ # Return a list of keys in the column_family you request. Requires the
207
+ # table to be partitioned with OrderPreservingHash. Supports the
208
+ # <tt>:count</tt>, <tt>:start</tt>, <tt>:finish</tt>, and <tt>:consistency</tt>
209
+ # options.
210
+ def get_range(column_family, options = {})
211
+ column_family, _, _, options =
212
+ validate_params(column_family, "", [options], READ_DEFAULTS)
213
+ _get_range(column_family, options[:start].to_s, options[:finish].to_s, options[:count], options[:consistency])
214
+ end
215
+
216
+ # Count all rows in the column_family you request. Requires the table
217
+ # to be partitioned with OrderPreservingHash. Supports the <tt>:start</tt>,
218
+ # <tt>:finish</tt>, and <tt>:consistency</tt> options.
219
+ # FIXME will count only MAX_INT records
220
+ def count_range(column_family, options = {})
221
+ get_range(column_family, options.merge(:count => MAX_INT)).size
222
+ end
223
+
224
+ # Open a batch operation and yield. Inserts and deletes will be queued until
225
+ # the block closes, and then sent atomically to the server. Supports the
226
+ # <tt>:consistency</tt> option, which overrides the consistency set in
227
+ # the individual commands.
228
+ def batch(options = {})
229
+ _, _, _, options =
230
+ validate_params(@schema.keys.first, "", [options], WRITE_DEFAULTS)
231
+
232
+ @batch = []
233
+ yield
234
+ compact_mutations!
235
+ _mutate(@batch, options[:consistency])
236
+ @batch = nil
237
+ end
238
+
239
+ private
240
+
241
+ # Extract and validate options.
242
+ # FIXME Should be done as a decorator
243
+ def validate_params(column_family, keys, args, options)
244
+ options = options.dup
245
+ column_family = column_family.to_s
246
+
247
+ # Keys
248
+ Array(keys).each do |key|
249
+ raise ArgumentError, "Key #{key.inspect} must be a String for #{calling_method}" unless key.is_a?(String)
250
+ end
251
+
252
+ # Options
253
+ if args.last.is_a?(Hash)
254
+ extras = args.last.keys - options.keys
255
+ raise ArgumentError, "Invalid options #{extras.inspect[1..-2]} for #{calling_method}" if extras.any?
256
+ options.merge!(args.pop)
257
+ end
258
+
259
+ # Ranges
260
+ column, sub_column = args[0], args[1]
261
+ klass, sub_klass = column_name_class(column_family), sub_column_name_class(column_family)
262
+ range_class = column ? sub_klass : klass
263
+ options[:start] = options[:start] ? range_class.new(options[:start]).to_s : ""
264
+ options[:finish] = options[:finish] ? range_class.new(options[:finish]).to_s : ""
265
+
266
+ [column_family, s_map(column, klass), s_map(sub_column, sub_klass), options]
267
+ end
268
+
269
+ def calling_method
270
+ "#{self.class}##{caller[0].split('`').last[0..-3]}"
271
+ end
272
+
273
+ # Convert stuff to strings.
274
+ def s_map(el, klass)
275
+ case el
276
+ when Array then el.map { |i| s_map(i, klass) }
277
+ when NilClass then nil
278
+ else
279
+ klass.new(el).to_s
280
+ end
281
+ end
282
+
283
+ # Roll up queued mutations, to improve atomicity.
284
+ def compact_mutations!
285
+ mutations = {}
286
+
287
+ # Nested hash merge
288
+ @batch.each do |m|
289
+ if mutation = mutations[m.key]
290
+ # Inserts
291
+ if columns = mutation.cfmap[m.cfmap.keys.first]
292
+ columns.concat(m.cfmap.values.first)
293
+ else
294
+ mutation.cfmap.merge!(m.cfmap)
295
+ end
296
+ # Deletes
297
+ mutation.column_paths.concat(m.column_paths)
298
+ else
299
+ mutations[m.key] = m
300
+ end
301
+ end
302
+
303
+ # FIXME Return atomic thrift thingy
304
+ @batch = mutations.values
305
+ end
306
+ end