RubyGems - cassandra - Versions diffs - 0.5.6 → 0.5.6.1 - Mend

cassandra 0.5.6 → 0.5.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

data.tar.gz.sig +3 -2
data/CHANGELOG +28 -0
data/LICENSE +202 -0
data/Manifest +28 -0
data/README +65 -0
data/bin/cassandra_helper +16 -0
data/cassandra.gemspec +5 -2
data/conf/cassandra.in.sh +51 -0
data/conf/log4j.properties +38 -0
data/conf/storage-conf.xml +226 -0
data/lib/cassandra.rb +23 -0
data/lib/cassandra/array.rb +8 -0
data/lib/cassandra/cassandra.rb +306 -0
data/lib/cassandra/columns.rb +101 -0
data/lib/cassandra/comparable.rb +28 -0
data/lib/cassandra/constants.rb +12 -0
data/lib/cassandra/debug.rb +7 -0
data/lib/cassandra/long.rb +58 -0
data/lib/cassandra/ordered_hash.rb +135 -0
data/lib/cassandra/protocol.rb +72 -0
data/lib/cassandra/safe_client.rb +26 -0
data/lib/cassandra/time.rb +11 -0
data/lib/cassandra/uuid.rb +111 -0
data/vendor/gen-rb/cassandra.rb +853 -0
data/vendor/gen-rb/cassandra_constants.rb +10 -0
data/vendor/gen-rb/cassandra_types.rb +238 -0
metadata +49 -6
metadata.gz.sig +0 -0

data/conf/storage-conf.xml ADDED Viewed

@@ -0,0 +1,226 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~    http:/www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ -->
+<Storage>
+    <!--======================================================================-->
+    <!-- Basic Configuration                                                  -->
+    <!--======================================================================-->
+    <ClusterName>Test</ClusterName>
+    <!-- Tables and ColumnFamilies
+         Think of a table as a namespace, not a relational table.
+         (ColumnFamilies are closer in meaning to those.)
+         There is an implicit table named 'system' for Cassandra internals.
+    -->
+    <Keyspaces>
+      <Keyspace Name="Twitter">
+          <KeysCachedFraction>0.01</KeysCachedFraction>
+          <ColumnFamily CompareWith="UTF8Type" Name="Users" />
+          <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
+          <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
+          <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
+          <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
+          <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
+          <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
+      </Keyspace>
+      <Keyspace Name="Multiblog">
+        <KeysCachedFraction>0.01</KeysCachedFraction>
+        <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
+        <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
+      </Keyspace>
+      <Keyspace Name="MultiblogLong">
+        <KeysCachedFraction>0.01</KeysCachedFraction>
+        <ColumnFamily CompareWith="LongType" Name="Blogs"/>
+        <ColumnFamily CompareWith="LongType" Name="Comments"/>
+      </Keyspace>
+    </Keyspaces>
+    <!-- Partitioner: any IPartitioner may be used, including your own
+         as long as it is on the classpath.  Out of the box,
+         Cassandra provides
+         org.apache.cassandra.dht.RandomPartitioner and
+         org.apache.cassandra.dht.OrderPreservingPartitioner.
+         Range queries require using OrderPreservingPartitioner or a subclass.
+         Achtung!  Changing this parameter requires wiping your data directories,
+         since the partitioner can modify the sstable on-disk format.
+    -->
+    <Partitioner>org.apache.cassandra.dht.OrderPreservingPartitioner</Partitioner>
+    <!-- If you are using the OrderPreservingPartitioner and you know your key
+         distribution, you can specify the token for this node to use.
+         (Keys are sent to the node with the "closest" token, so distributing
+         your tokens equally along the key distribution space will spread
+         keys evenly across your cluster.)  This setting is only checked the
+         first time a node is started.
+         This can also be useful with RandomPartitioner to force equal
+         spacing of tokens around the hash space, especially for
+         clusters with a small number of nodes. -->
+    <InitialToken></InitialToken>
+    <!-- EndPointSnitch: Setting this to the class that implements IEndPointSnitch
+	     which will see if two endpoints are in the same data center or on the same rack.
+         Out of the box, Cassandra provides
+         org.apache.cassandra.locator.EndPointSnitch
+    -->
+    <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
+    <!-- Strategy: Setting this to the class that implements IReplicaPlacementStrategy
+	     will change the way the node picker works.
+         Out of the box, Cassandra provides
+         org.apache.cassandra.locator.RackUnawareStrategy
+         org.apache.cassandra.locator.RackAwareStrategy
+		 (place one replica in a different datacenter, and the
+         others on different racks in the same one.)
+    -->
+    <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
+    <!-- Number of replicas of the data-->
+    <ReplicationFactor>1</ReplicationFactor>
+    <!-- Directories: Specify where Cassandra should store different data on disk
+         Keep the data disks and the CommitLog disks separate for best performance
+    -->
+    <CommitLogDirectory>data/commitlog</CommitLogDirectory>
+    <DataFileDirectories>
+        <DataFileDirectory>data/data</DataFileDirectory>
+    </DataFileDirectories>
+    <CalloutLocation>data/callouts</CalloutLocation>
+    <BootstrapFileDirectory>data/bootstrap</BootstrapFileDirectory>
+    <StagingFileDirectory>data/staging</StagingFileDirectory>
+    <!-- Addresses of hosts that are deemed contact points. Cassandra nodes use
+         this list of hosts to find each other and learn the topology of the ring.
+         You must change this if you are running multiple nodes!
+    -->
+    <Seeds>
+        <Seed>127.0.0.1</Seed>
+    </Seeds>
+    <!-- Miscellaneous -->
+    <!-- time to wait for a reply from other nodes before failing the command -->
+    <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
+    <!-- size to allow commitlog to grow to before creating a new segment -->
+    <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
+    <!-- Local hosts and ports -->
+    <!-- Address to bind to and tell other nodes to connect to.
+         You _must_ change this if you want multiple nodes to be able
+         to communicate!
+         Leaving it blank leaves it up to InetAddress.getLocalHost().
+         This will always do the Right Thing *if* the node is properly
+         configured (hostname, name resolution, etc), and the Right
+         Thing is to use the address associated with the hostname (it
+         might not be). -->
+    <ListenAddress>localhost</ListenAddress>
+    <!-- TCP port, for commands and data -->
+    <StoragePort>7000</StoragePort>
+    <!-- UDP port, for membership communications (gossip) -->
+    <ControlPort>7001</ControlPort>
+    <!-- The address to bind the Thrift RPC service to. Unlike
+         ListenAddress above, you *can* specify 0.0.0.0 here if you want
+         Thrift to listen on all interfaces.
+         Leaving this blank has the same effect it does for ListenAddress,
+         (i.e. it will be based on the configured hostname of the node).
+    -->
+    <ThriftAddress>localhost</ThriftAddress>
+    <!-- Thrift RPC port (the port clients connect to). -->
+    <ThriftPort>9160</ThriftPort>
+    <!--======================================================================-->
+    <!-- Memory, Disk, and Performance                                        -->
+    <!--======================================================================-->
+    <!-- Add column indexes to a row after its contents reach this size -->
+    <ColumnIndexSizeInKB>256</ColumnIndexSizeInKB>
+    <!--
+      The maximum amount of data to store in memory before flushing to
+      disk. Note: There is one memtable per column family, and this threshold
+      is based solely on the amount of data stored, not actual heap memory
+      usage (there is some overhead in indexing the columns).
+    -->
+    <MemtableSizeInMB>32</MemtableSizeInMB>
+    <!--
+      The maximum number of columns in millions to store in memory
+      before flushing to disk.  This is also a per-memtable setting.
+      Use with MemtableSizeInMB to tune memory usage.
+    -->
+    <MemtableObjectCountInMillions>0.01</MemtableObjectCountInMillions>
+    <!-- Unlike most systems, in Cassandra writes are faster than
+         reads, so you can afford more of those in parallel.
+	 A good rule of thumb is 2 concurrent reads per processor core.
+         You especially want more concurrentwrites if you are using
+         CommitLogSync + CommitLogSyncDelay. -->
+    <ConcurrentReads>8</ConcurrentReads>
+    <ConcurrentWrites>32</ConcurrentWrites>
+    <!-- CommitLogSync may be either "periodic" or "batch."
+         When in batch mode, Cassandra won't ack writes until the commit log
+         has been fsynced to disk.  It will wait up to CommitLogSyncBatchWindowInMS
+         milliseconds for other writes, before performing the sync.
+         This is less necessary in Cassandra
+         than in traditional databases since replication reduces the
+         odds of losing data from a failure after writing the log
+         entry but before it actually reaches the disk.  So the other
+         option is "timed," where wirtes may be acked immediately
+         and the CommitLog is simply synced every CommitLogSyncPeriodInMS
+         milliseconds.
+    -->
+    <CommitLogSync>periodic</CommitLogSync>
+    <!-- Interval at which to perform syncs of the CommitLog in periodic
+         mode.  Usually the default of 1000ms is fine; increase it
+         only if the CommitLog PendingTasks backlog in jmx shows that
+         you are frequently scheduling a second sync while the first
+         has not yet been processed.
+    -->
+    <CommitLogSyncPeriodInMS>1000</CommitLogSyncPeriodInMS>
+    <!-- Delay (in microseconds) during which additional commit log
+         entries may be written before fsync in batch mode.  This will increase
+         latency slightly, but can vastly improve throughput where
+         there are many writers.  Set to zero to disable
+         (each entry will be synced individually).
+	 Reasonable values range from a minimal 0.1 to 10 or even more
+         if throughput matters more than latency.
+    -->
+    <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
+    <!-- Time to wait before garbage-collection deletion markers.
+         Set this to a large enough value that you are confident
+         that the deletion marker will be propagated to all replicas
+         by the time this many seconds has elapsed, even in the
+         face of hardware failures.  The default value is ten days.
+    -->
+    <GCGraceSeconds>864000</GCGraceSeconds>
+</Storage>

data/lib/cassandra.rb ADDED Viewed

@@ -0,0 +1,23 @@
+require 'zlib'
+require 'rubygems'
+require 'thrift'
+HERE = File.expand_path(File.dirname(__FILE__))
+$LOAD_PATH << "#{HERE}/../vendor/gen-rb"
+require "#{HERE}/../vendor/gen-rb/cassandra"
+$LOAD_PATH << "#{HERE}"
+require 'cassandra/array'
+require 'cassandra/time'
+require 'cassandra/comparable'
+require 'cassandra/uuid'
+require 'cassandra/long'
+require 'cassandra/safe_client'
+require 'cassandra/ordered_hash'
+require 'cassandra/columns'
+require 'cassandra/protocol'
+require 'cassandra/cassandra'
+require 'cassandra/constants'
+require 'cassandra/debug' if ENV['DEBUG']

data/lib/cassandra/array.rb ADDED Viewed

@@ -0,0 +1,8 @@
+class Array
+  def _flatten_once
+    result = []
+    each { |el| result.concat(Array(el)) }
+    result
+  end
+end

data/lib/cassandra/cassandra.rb ADDED Viewed

@@ -0,0 +1,306 @@
+=begin rdoc
+Create a new Cassandra client instance. Accepts a keyspace name, and optional host and port.
+  client = Cassandra.new('twitter', '127.0.0.1', 9160)
+You can then make calls to the server via the <tt>client</tt> instance.
+  client.insert(:UserRelationships, "5", {"user_timeline" => {UUID.new => "1"}})
+  client.get(:UserRelationships, "5", "user_timeline")
+For read methods, valid option parameters are:
+<tt>:count</tt>:: How many results to return. Defaults to 100.
+<tt>:start</tt>:: Column name token at which to start iterating, inclusive. Defaults to nil, which means the first column in the collation order.
+<tt>:finish</tt>:: Column name token at which to stop iterating, inclusive. Defaults to nil, which means no boundary.
+<tt>:reversed</tt>:: Swap the direction of the collation order.
+<tt>:consistency</tt>:: The consistency level of the request. Defaults to <tt>Cassandra::Consistency::ONE</tt> (one node must respond). Other valid options are <tt>Cassandra::Consistency::ZERO</tt>, <tt>Cassandra::Consistency::QUORUM</tt>, and <tt>Cassandra::Consistency::ALL</tt>.
+Note that some read options have no relevance in some contexts.
+For write methods, valid option parameters are:
+<tt>:timestamp </tt>:: The transaction timestamp. Defaults to the current time in milliseconds. This is used for conflict resolution by the server; you normally never need to change it.
+<tt>:consistency</tt>:: See above.
+=end rdoc
+class Cassandra
+  include Columns
+  include Protocol
+  class AccessError < StandardError #:nodoc:
+  end
+  module Consistency
+    include CassandraThrift::ConsistencyLevel
+  end
+  MAX_INT = 2**31 - 1
+  WRITE_DEFAULTS = {
+    :count => MAX_INT,
+    :timestamp => nil,
+    :consistency => Consistency::ONE
+  }.freeze
+  READ_DEFAULTS = {
+    :count => 100,
+    :start => nil,
+    :finish => nil,
+    :reversed => false,
+    :consistency => Consistency::ONE
+  }.freeze
+  attr_reader :keyspace, :host, :port, :serializer, :transport, :client, :schema
+  # Instantiate a new Cassandra and open the connection.
+  def initialize(keyspace, host = '127.0.0.1', port = 9160, buffer = true)
+    @is_super = {}
+    @column_name_class = {}
+    @sub_column_name_class = {}
+    @keyspace = keyspace
+    @host = host
+    @port = port
+    transport = Thrift::BufferedTransport.new(Thrift::Socket.new(@host, @port))
+    transport.open
+    @client = CassandraThrift::Cassandra::SafeClient.new(
+      CassandraThrift::Cassandra::Client.new(Thrift::BinaryProtocol.new(transport)),
+      transport,
+      !buffer)
+    keyspaces = @client.get_string_list_property("keyspaces")
+    unless keyspaces.include?(@keyspace)
+      raise AccessError, "Keyspace #{@keyspace.inspect} not found. Available: #{keyspaces.inspect}"
+    end
+    @schema = @client.describe_keyspace(@keyspace)
+  end
+  def inspect
+    "#<Cassandra:#{object_id}, @keyspace=#{keyspace.inspect}, @schema={#{
+      schema.map {|name, hash| ":#{name} => #{hash['type'].inspect}"}.join(', ')
+    }}, @host=#{host.inspect}, @port=#{port}>"
+  end
+### Write
+  # Insert a row for a key. Pass a flat hash for a regular column family, and
+  # a nested hash for a super column family. Supports the <tt>:consistency</tt>
+  # and <tt>:timestamp</tt> options.
+  def insert(column_family, key, hash, options = {})
+    column_family, _, _, options =
+      validate_params(column_family, key, [options], WRITE_DEFAULTS)
+    args = [column_family, hash, options[:timestamp] || Time.stamp]
+    columns = is_super(column_family) ? hash_to_super_columns(*args) : hash_to_columns(*args)
+    mutation = CassandraThrift::BatchMutation.new(
+      :key => key,
+      :cfmap => {column_family => columns},
+      :column_paths => [])
+    @batch ? @batch << mutation : _mutate([mutation], options[:consistency])
+  end
+  ## Delete
+  # _mutate the element at the column_family:key:[column]:[sub_column]
+  # path you request. Supports the <tt>:consistency</tt> and <tt>:timestamp</tt>
+  # options.
+  def remove(column_family, key, *columns_and_options)
+    column_family, column, sub_column, options =
+      validate_params(column_family, key, columns_and_options, WRITE_DEFAULTS)
+    args = {:column_family => column_family, :timestamp => options[:timestamp] || Time.stamp}
+    columns = is_super(column_family) ? {:super_column => column, :column => sub_column} : {:column => column}
+    mutation = CassandraThrift::BatchMutation.new(
+      :key => key,
+      :cfmap => {},
+      :column_paths => [CassandraThrift::ColumnPath.new(args.merge(columns))])
+    @batch ? @batch << mutation : _mutate([mutation], options[:consistency])
+  end
+  # Remove all rows in the column family you request. Supports options
+  # <tt>:consistency</tt> and <tt>:timestamp</tt>.
+  # FIXME May not currently delete all records without multiple calls. Waiting
+  # for ranged remove support in Cassandra.
+  def clear_column_family!(column_family, options = {})
+    get_range(column_family).each { |key| remove(column_family, key, options) }
+  end
+  # Remove all rows in the keyspace. Supports options <tt>:consistency</tt> and
+  # <tt>:timestamp</tt>.
+  # FIXME May not currently delete all records without multiple calls. Waiting
+  # for ranged remove support in Cassandra.
+  def clear_keyspace!(options = {})
+    @schema.keys.each { |column_family| clear_column_family!(column_family, options) }
+  end
+### Read
+  # Count the elements at the column_family:key:[super_column] path you
+  # request. Supports the <tt>:consistency</tt> option.
+  def count_columns(column_family, key, *columns_and_options)
+    column_family, super_column, _, options =
+      validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
+    _count_columns(column_family, key, super_column, options[:consistency])
+  end
+  # Multi-key version of Cassandra#count_columns. Supports options <tt>:count</tt>,
+  # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
+  # FIXME Not real multi; needs server support
+  def multi_count_columns(column_family, keys, *options)
+    OrderedHash[*keys.map { |key| [key, count_columns(column_family, key, *options)] }._flatten_once]
+  end
+  # Return a list of single values for the elements at the
+  # column_family:key:column[s]:[sub_columns] path you request. Supports the
+  # <tt>:consistency</tt> option.
+  def get_columns(column_family, key, *columns_and_options)
+    column_family, columns, sub_columns, options =
+      validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
+    _get_columns(column_family, key, columns, sub_columns, options[:consistency])
+  end
+  # Multi-key version of Cassandra#get_columns. Supports the <tt>:consistency</tt>
+  # option.
+  # FIXME Not real multi; needs to use a Column predicate
+  def multi_get_columns(column_family, keys, *options)
+    OrderedHash[*keys.map { |key| [key, get_columns(column_family, key, *options)] }._flatten_once]
+  end
+  # Return a hash (actually, a Cassandra::OrderedHash) or a single value
+  # representing the element at the column_family:key:[column]:[sub_column]
+  # path you request. Supports options <tt>:count</tt>, <tt>:start</tt>,
+  # <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
+  def get(column_family, key, *columns_and_options)
+    multi_get(column_family, [key], *columns_and_options)[key]
+  end
+  # Multi-key version of Cassandra#get. Supports options <tt>:count</tt>,
+  # <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
+  def multi_get(column_family, keys, *columns_and_options)
+    column_family, column, sub_column, options =
+      validate_params(column_family, keys, columns_and_options, READ_DEFAULTS)
+    hash = _multiget(column_family, keys, column, sub_column, options[:count], options[:start], options[:finish], options[:reversed], options[:consistency])
+    # Restore order
+    ordered_hash = OrderedHash.new
+    keys.each { |key| ordered_hash[key] = hash[key] || (OrderedHash.new if is_super(column_family) and !sub_column) }
+    ordered_hash
+  end
+  # Return true if the column_family:key:[column]:[sub_column] path you
+  # request exists. Supports the <tt>:consistency</tt> option.
+  def exists?(column_family, key, *columns_and_options)
+    column_family, column, sub_column, options =
+      validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
+    _multiget(column_family, [key], column, sub_column, 1, nil, nil, nil, options[:consistency])[key]
+  end
+  # Return a list of keys in the column_family you request. Requires the
+  # table to be partitioned with OrderPreservingHash. Supports the
+  # <tt>:count</tt>, <tt>:start</tt>, <tt>:finish</tt>, and <tt>:consistency</tt>
+  # options.
+  def get_range(column_family, options = {})
+    column_family, _, _, options =
+      validate_params(column_family, "", [options], READ_DEFAULTS)
+    _get_range(column_family, options[:start].to_s, options[:finish].to_s, options[:count], options[:consistency])
+  end
+  # Count all rows in the column_family you request. Requires the table
+  # to be partitioned with OrderPreservingHash. Supports the <tt>:start</tt>,
+  # <tt>:finish</tt>, and <tt>:consistency</tt> options.
+  # FIXME will count only MAX_INT records
+  def count_range(column_family, options = {})
+    get_range(column_family, options.merge(:count => MAX_INT)).size
+  end
+  # Open a batch operation and yield. Inserts and deletes will be queued until
+  # the block closes, and then sent atomically to the server.  Supports the
+  # <tt>:consistency</tt> option, which overrides the consistency set in
+  # the individual commands.
+  def batch(options = {})
+    _, _, _, options =
+      validate_params(@schema.keys.first, "", [options], WRITE_DEFAULTS)
+    @batch = []
+    yield
+    compact_mutations!
+    _mutate(@batch, options[:consistency])
+    @batch = nil
+  end
+  private
+  # Extract and validate options.
+  # FIXME Should be done as a decorator
+  def validate_params(column_family, keys, args, options)
+    options = options.dup
+    column_family = column_family.to_s
+    # Keys
+    Array(keys).each do |key|
+      raise ArgumentError, "Key #{key.inspect} must be a String for #{calling_method}" unless key.is_a?(String)
+    end
+    # Options
+    if args.last.is_a?(Hash)
+      extras = args.last.keys - options.keys
+      raise ArgumentError, "Invalid options #{extras.inspect[1..-2]} for #{calling_method}" if extras.any?
+      options.merge!(args.pop)
+    end
+    # Ranges
+    column, sub_column = args[0], args[1]
+    klass, sub_klass = column_name_class(column_family), sub_column_name_class(column_family)
+    range_class = column ? sub_klass : klass
+    options[:start] = options[:start] ? range_class.new(options[:start]).to_s : ""
+    options[:finish] = options[:finish] ? range_class.new(options[:finish]).to_s : ""
+    [column_family, s_map(column, klass), s_map(sub_column, sub_klass), options]
+  end
+  def calling_method
+     "#{self.class}##{caller[0].split('`').last[0..-3]}"
+  end
+  # Convert stuff to strings.
+  def s_map(el, klass)
+    case el
+    when Array then el.map { |i| s_map(i, klass) }
+    when NilClass then nil
+    else
+      klass.new(el).to_s
+    end
+  end
+  # Roll up queued mutations, to improve atomicity.
+  def compact_mutations!
+    mutations = {}
+    # Nested hash merge
+    @batch.each do |m|
+      if mutation = mutations[m.key]
+        # Inserts
+        if columns = mutation.cfmap[m.cfmap.keys.first]
+          columns.concat(m.cfmap.values.first)
+        else
+          mutation.cfmap.merge!(m.cfmap)
+        end
+        # Deletes
+        mutation.column_paths.concat(m.column_paths)
+      else
+        mutations[m.key] = m
+      end
+    end
+    # FIXME Return atomic thrift thingy
+    @batch = mutations.values
+  end
+end