wukong 1.5.3 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.textile +4 -0
- data/bin/hdp-bin +44 -0
- data/bin/hdp-ls +2 -1
- data/docpages/avro/performance.textile +36 -0
- data/examples/cassandra_streaming/avromapper.rb +85 -0
- data/examples/cassandra_streaming/berlitz_for_cassandra.textile +22 -0
- data/examples/cassandra_streaming/cassandra.avpr +468 -0
- data/examples/cassandra_streaming/cassandra_random_partitioner.rb +62 -0
- data/examples/cassandra_streaming/catter.sh +45 -0
- data/examples/cassandra_streaming/client_interface_notes.textile +200 -0
- data/examples/cassandra_streaming/client_schema.avpr +211 -0
- data/examples/cassandra_streaming/client_schema.textile +318 -0
- data/examples/cassandra_streaming/foofile.avr +0 -0
- data/examples/cassandra_streaming/pymap.sh +1 -0
- data/examples/cassandra_streaming/pyreduce.sh +1 -0
- data/examples/cassandra_streaming/smutation.avpr +188 -0
- data/examples/cassandra_streaming/streamer.sh +51 -0
- data/examples/cassandra_streaming/struct_loader.rb +24 -0
- data/examples/cassandra_streaming/tuning.textile +73 -0
- data/examples/emr/README-elastic_map_reduce.textile +26 -0
- data/examples/emr/dot_wukong_dir/credentials.json +7 -0
- data/examples/emr/{emr.yaml → dot_wukong_dir/emr.yaml} +33 -16
- data/{bin/bootstrap.sh → examples/emr/dot_wukong_dir/emr_bootstrap.sh} +1 -1
- data/examples/emr/elastic_mapreduce_example.rb +1 -0
- data/lib/wukong/encoding/asciize.rb +108 -0
- data/lib/wukong/extensions/date_time.rb +33 -7
- data/lib/wukong/extensions/emittable.rb +12 -25
- data/lib/wukong/extensions/hash_like.rb +13 -6
- data/lib/wukong/filename_pattern.rb +8 -7
- data/lib/wukong/schema.rb +47 -0
- data/lib/wukong/script.rb +7 -0
- data/lib/wukong/script/cassandra_loader_script.rb +40 -0
- data/lib/wukong/script/emr_command.rb +74 -43
- data/lib/wukong/script/hadoop_command.rb +89 -72
- data/lib/wukong/store.rb +2 -7
- data/lib/wukong/store/cassandra.rb +10 -0
- data/lib/wukong/store/cassandra/streaming.rb +75 -0
- data/lib/wukong/store/cassandra/struct_loader.rb +21 -0
- data/lib/wukong/store/cassandra_model.rb +90 -0
- data/lib/wukong/store/chh_chunked_flat_file_store.rb +1 -1
- data/lib/wukong/store/chunked_flat_file_store.rb +24 -20
- data/wukong.gemspec +32 -4
- metadata +33 -14
| @@ -0,0 +1,62 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
            require 'rubygems'
         | 
| 3 | 
            +
            require 'avro'
         | 
| 4 | 
            +
            require 'wukong'
         | 
| 5 | 
            +
            require 'wukong/periodic_monitor'
         | 
| 6 | 
            +
            Settings.define :log_interval, :default => 10_000
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            require 'digest/md5'
         | 
| 9 | 
            +
            Settings.define :ring_nodes
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            MAX_HASH = 2**127
         | 
| 12 | 
            +
            RING_NODES = 72
         | 
| 13 | 
            +
            RING_WIDTH = MAX_HASH / RING_NODES
         | 
| 14 | 
            +
            OUT_DIR    = '/mnt/tmp/partitioned_words'
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            # for foo in pw0/part-000* ; do echo $foo ; time cat $foo | ~/ics/wukong/examples/cassandra_streaming/cassandra_random_partitioner.rb --map 2>/tmp/split-`basename $foo`.log & done 
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            module CassandraRandomPartitioner
         | 
| 19 | 
            +
              def partition_hash key
         | 
| 20 | 
            +
                uval = Digest::MD5.hexdigest(key).to_i(16)
         | 
| 21 | 
            +
                (uval > 2**127) ? (2**128 - uval) : uval
         | 
| 22 | 
            +
              end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
              def partition key
         | 
| 25 | 
            +
                partition_hash(key) / RING_WIDTH
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
              
         | 
| 28 | 
            +
              def files
         | 
| 29 | 
            +
                @files ||= Hash.new{|h,part| h[part] = File.open(OUT_DIR+"/chunk-#{"%03d" % part}", 'w') }
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            module PeriodicLog
         | 
| 35 | 
            +
              def log
         | 
| 36 | 
            +
                @log ||= PeriodicMonitor.new
         | 
| 37 | 
            +
              end  
         | 
| 38 | 
            +
            end
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            class HashingStreamer < Wukong::Streamer::RecordStreamer
         | 
| 41 | 
            +
              include CassandraRandomPartitioner
         | 
| 42 | 
            +
              include PeriodicLog
         | 
| 43 | 
            +
             | 
| 44 | 
            +
              def process word, count, *_
         | 
| 45 | 
            +
                log.periodically( word, count )
         | 
| 46 | 
            +
                part = partition(word)
         | 
| 47 | 
            +
                # yield [part, word, count]
         | 
| 48 | 
            +
                files[part] << [word, count].join("\t") << "\n"
         | 
| 49 | 
            +
              end
         | 
| 50 | 
            +
            end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            class HashingReducer <  Wukong::Streamer::RecordStreamer
         | 
| 53 | 
            +
              include CassandraRandomPartitioner
         | 
| 54 | 
            +
              include PeriodicLog
         | 
| 55 | 
            +
             | 
| 56 | 
            +
              def process part, word, count, *_
         | 
| 57 | 
            +
                log.periodically( word, count )
         | 
| 58 | 
            +
                yield [word, count]
         | 
| 59 | 
            +
              end
         | 
| 60 | 
            +
            end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            Wukong::Script.new(HashingStreamer, HashingReducer, :map_speculative => false).run
         | 
| @@ -0,0 +1,45 @@ | |
| 1 | 
            +
            #!/usr/bin/env bash
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            # Cat a binary-encoded avro file into the bulk loader
         | 
| 5 | 
            +
            #
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            input_file="$1" 		 ; shift
         | 
| 8 | 
            +
            output_file="$1" 		 ; shift
         | 
| 9 | 
            +
            map_script=${1-/bin/cat}	 ; shift
         | 
| 10 | 
            +
            reduce_script=${1-/usr/bin/uniq} ; shift
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            dest_keyspace=${dest_keyspace-soc_net_tw}
         | 
| 13 | 
            +
            dest_col_family=${dest_col_family-Wordbag}
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            hostname=`hostname`
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            # Path to cassandra and hadoop dirs
         | 
| 18 | 
            +
            script_dir=$(readlink -f `dirname $0`)
         | 
| 19 | 
            +
            CASSANDRA_HOME=${CASSANDRA_HOME-/usr/local/share/cassandra}
         | 
| 20 | 
            +
            HADOOP_HOME=${HADOOP_HOME-/usr/lib/hadoop}
         | 
| 21 | 
            +
            avro_file=${avro_file-$CASSANDRA_HOME/interface/avro/cassandra.avpr}
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            ARCHIVES=`/bin/ls -1 $CASSANDRA_HOME/build/apache-cassandra*.jar`
         | 
| 24 | 
            +
            for jar in `/bin/ls -1 $CASSANDRA_HOME/build/lib/jars/*.jar $CASSANDRA_HOME/lib/*.jar`; do
         | 
| 25 | 
            +
                ARCHIVES=$ARCHIVES,$jar
         | 
| 26 | 
            +
            done
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            ${HADOOP_HOME}/bin/hadoop                                                                        \
         | 
| 29 | 
            +
                 jar ${HADOOP_HOME}/contrib/streaming/hadoop-*streaming*.jar                                 \
         | 
| 30 | 
            +
                -D stream.map.output=cassandra_avro_output                                                   \
         | 
| 31 | 
            +
                -D stream.io.identifier.resolver.class=org.apache.cassandra.hadoop.streaming.AvroResolver    \
         | 
| 32 | 
            +
                -D cassandra.output.keyspace="$dest_keyspace"                                                \
         | 
| 33 | 
            +
                -D cassandra.output.columnfamily="$dest_col_family"                                          \
         | 
| 34 | 
            +
                -D cassandra.partitioner.class=org.apache.cassandra.dht.RandomPartitioner                    \
         | 
| 35 | 
            +
                -D cassandra.thrift.address="10.104.9.68"                                                    \
         | 
| 36 | 
            +
                -D cassandra.thrift.port=9160                                                                \
         | 
| 37 | 
            +
                -D mapred.reduce.tasks=0                                                                     \
         | 
| 38 | 
            +
                -libjars $ARCHIVES                                                                           \
         | 
| 39 | 
            +
                -file $avro_file                                                                             \
         | 
| 40 | 
            +
                -outputformat org.apache.cassandra.hadoop.ColumnFamilyOutputFormat                           \
         | 
| 41 | 
            +
                -mapper  	 `which cat`                                                                     \
         | 
| 42 | 
            +
                -input       "$input_file"                                                                   \
         | 
| 43 | 
            +
                -output  	 "$output_file"                                                                  \
         | 
| 44 | 
            +
                "$@"
         | 
| 45 | 
            +
             | 
| @@ -0,0 +1,200 @@ | |
| 1 | 
            +
            Method calls
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            get
         | 
| 4 | 
            +
              * ColumnOrSuperColumn get(string keyspace, string key, ColumnPath column_path, ConsistencyLevel consistency_level) 
         | 
| 5 | 
            +
                Get the Column or SuperColumn at the given column_path. If no value is present, NotFoundException is thrown. (This is the only method that can throw an exception under non-failure conditions.)
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            get_slice
         | 
| 8 | 
            +
              * list<ColumnOrSuperColumn> get_slice(string keyspace, string key, ColumnParent column_parent, SlicePredicate predicate, ConsistencyLevel consistency_level) 
         | 
| 9 | 
            +
                Get the group of columns contained by column_parent (either a ColumnFamily name or a ColumnFamily/SuperColumn name pair) specified by the given SlicePredicate struct.
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            multiget_slice
         | 
| 12 | 
            +
              * map<string,list<ColumnOrSuperColumn>> multiget_slice(string keyspace, list<string> keys, ColumnParent column_parent, SlicePredicate predicate, ConsistencyLevel consistency_level) 
         | 
| 13 | 
            +
                Retrieves slices for column_parent and predicate on each of the given keys in parallel. Keys are a `list<string> of the keys to get slices for.
         | 
| 14 | 
            +
                This is similar to get_range_slices (Cassandra 0.6) or get_range_slice (Cassandra 0.5) except operating on a set of non-contiguous keys instead of a range of keys.
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            get_count
         | 
| 17 | 
            +
              * i32 get_count(string keyspace, string key, ColumnParent column_parent, ConsistencyLevel consistency_level) 
         | 
| 18 | 
            +
                Counts the columns present in column_parent.
         | 
| 19 | 
            +
                The method is not O(1). It takes all the columns from disk to calculate the answer. The only benefit of the method is that you do not need to pull all the columns over Thrift interface to count them.
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            get_range_slices        Requires Cassandra 0.6
         | 
| 22 | 
            +
              * list<KeySlice> get_range_slices(string keyspace, ColumnParent column_parent, SlicePredicate predicate, KeyRange range, ConsistencyLevel consistency_level) 
         | 
| 23 | 
            +
                Replaces get_range_slice. Returns a list of slices for the keys within the specified KeyRange. Unlike get_key_range, this applies the given predicate to all keys in the range, not just those with undeleted matching data.
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            insert
         | 
| 26 | 
            +
              * void insert(string keyspace, string key, ColumnPath column_path, binary value, i64 timestamp, ConsistencyLevel consistency_level) 
         | 
| 27 | 
            +
                Insert or update a Column consisting of (column_path.column, value, timestamp) at the given column_path.column_family and optional column_path.super_column. Note that column_path.column is here required, since a SuperColumn cannot directly contain binary values -- it can only contain sub-Columns.
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            batch_mutate    Requires Cassandra 0.6
         | 
| 30 | 
            +
              * void batch_mutate(string keyspace, map<string,map<string,list<Mutation>>> mutation_map, ConsistencyLevel consistency_level) 
         | 
| 31 | 
            +
                Executes the specified mutations on the keyspace. mutation_map is a map<string, map<string, list<Mutation>>>; the outer map maps the key to the inner map, which maps the column family to the Mutation; can be read as: map<key : string, map<column_family : string, list<Mutation>>>. To be more specific, the outer map key is a row key, the inner map key is the column family name.
         | 
| 32 | 
            +
                A Mutation specifies columns to insert, update or delete. See Mutation and Deletion above for more details.
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            remove
         | 
| 35 | 
            +
              * void remove(string keyspace, string key, ColumnPath column_path, i64 timestamp, ConsistencyLevel consistency_level) 
         | 
| 36 | 
            +
                Remove data from the row specified by key at the granularity specified by column_path, and the given timestamp. Note that all the values in column_path besides column_path.column_family are truly optional: you can remove the entire row by just specifying the ColumnFamily, or you can remove a SuperColumn or a single Column by specifying those levels too. Note that the timestamp is needed, so that if the commands are replayed in a different order on different nodes, the same result is produced. 
         | 
| 37 | 
            +
             | 
| 38 | 
            +
             | 
| 39 | 
            +
            ===========================================================================
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            To use the standard interface, create a ColumnFamily instance.
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                >>> cf = pycassa.ColumnFamily(client, 'Test ColumnFamily')
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            The value returned by an insert is the timestamp used for insertion, or int(time.time() * 1e6). You may replace this function with your own (see Extra Documentation).
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                >>> cf.insert('foo', {'column1': 'val1'})
         | 
| 48 | 
            +
                1261349837816957
         | 
| 49 | 
            +
                >>> cf.get('foo')
         | 
| 50 | 
            +
                {'column1': 'val1'}
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            Insert also acts to update values.
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                >>> cf.insert('foo', {'column1': 'val2'})
         | 
| 55 | 
            +
                1261349910511572
         | 
| 56 | 
            +
                >>> cf.get('foo')
         | 
| 57 | 
            +
                {'column1': 'val2'}
         | 
| 58 | 
            +
             | 
| 59 | 
            +
            You may insert multiple columns at once.
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                >>> cf.insert('bar', {'column1': 'val3', 'column2': 'val4'})
         | 
| 62 | 
            +
                1261350013606860
         | 
| 63 | 
            +
                >>> cf.multiget(['foo', 'bar'])
         | 
| 64 | 
            +
                {'foo': {'column1': 'val2'}, 'bar': {'column1': 'val3', 'column2': 'val4'}}
         | 
| 65 | 
            +
                >>> cf.get_count('bar')
         | 
| 66 | 
            +
                2
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            get_range() returns an iterable. Call it with list() to convert it to a list.
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                >>> list(cf.get_range())
         | 
| 71 | 
            +
                [('bar', {'column1': 'val3', 'column2': 'val4'}), ('foo', {'column1': 'val2'})]
         | 
| 72 | 
            +
                >>> list(cf.get_range(row_count=1))
         | 
| 73 | 
            +
                [('bar', {'column1': 'val3', 'column2': 'val4'})]
         | 
| 74 | 
            +
             | 
| 75 | 
            +
            You can remove entire keys or just a certain column.
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                >>> cf.remove('bar', columns=['column1'])
         | 
| 78 | 
            +
                1261350220106863
         | 
| 79 | 
            +
                >>> cf.get('bar')
         | 
| 80 | 
            +
                {'column2': 'val4'}
         | 
| 81 | 
            +
                >>> cf.remove('bar')
         | 
| 82 | 
            +
                1261350226926859
         | 
| 83 | 
            +
                >>> cf.get('bar')
         | 
| 84 | 
            +
                Traceback (most recent call last):
         | 
| 85 | 
            +
                ...
         | 
| 86 | 
            +
                cassandra.ttypes.NotFoundException: NotFoundException()
         | 
| 87 | 
            +
             | 
| 88 | 
            +
            pycassa retains the behavior of Cassandra in that get_range() may return removed keys for a while. Cassandra will eventually delete them, so that they disappear.
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                >>> cf.remove('foo')
         | 
| 91 | 
            +
                >>> cf.remove('bar')
         | 
| 92 | 
            +
                >>> list(cf.get_range())
         | 
| 93 | 
            +
                [('bar', {}), ('foo', {})]
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                ... After some amount of time
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                >>> list(cf.get_range())
         | 
| 98 | 
            +
                []
         | 
| 99 | 
            +
             | 
| 100 | 
            +
            Class Mapping
         | 
| 101 | 
            +
            -------------
         | 
| 102 | 
            +
             | 
| 103 | 
            +
            You can also map existing classes using ColumnFamilyMap.
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                >>> class Test(object):
         | 
| 106 | 
            +
                ...     string_column       = pycassa.String(default='Your Default')
         | 
| 107 | 
            +
                ...     int_str_column      = pycassa.IntString(default=5)
         | 
| 108 | 
            +
                ...     float_str_column    = pycassa.FloatString(default=8.0)
         | 
| 109 | 
            +
                ...     float_column        = pycassa.Float64(default=0.0)
         | 
| 110 | 
            +
                ...     datetime_str_column = pycassa.DateTimeString() # default=None
         | 
| 111 | 
            +
             | 
| 112 | 
            +
            The defaults will be filled in whenever you retrieve instances from the Cassandra server and the column doesn't exist. If, for example, you add columns in the future, you simply add the relevant column and the default will be there when you get old instances.
         | 
| 113 | 
            +
             | 
| 114 | 
            +
            IntString, FloatString, and DateTimeString all use string representations for storage. Float64 is stored as a double and is native-endian. Be aware of any endian issues if you use it on different architectures, or perhaps make your own column type.
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                >>> Test.objects = pycassa.ColumnFamilyMap(Test, cf)
         | 
| 117 | 
            +
             | 
| 118 | 
            +
            All the functions are exactly the same, except that they return instances of the supplied class when possible.
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                >>> t = Test()
         | 
| 121 | 
            +
                >>> t.key = 'maptest'
         | 
| 122 | 
            +
                >>> t.string_column = 'string test'
         | 
| 123 | 
            +
                >>> t.int_str_column = 18
         | 
| 124 | 
            +
                >>> t.float_column = t.float_str_column = 35.8
         | 
| 125 | 
            +
                >>> from datetime import datetime
         | 
| 126 | 
            +
                >>> t.datetime_str_column = datetime.now()
         | 
| 127 | 
            +
                >>> Test.objects.insert(t)
         | 
| 128 | 
            +
                1261395560186855
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                >>> Test.objects.get(t.key).string_column
         | 
| 131 | 
            +
                'string test'
         | 
| 132 | 
            +
                >>> Test.objects.get(t.key).int_str_column
         | 
| 133 | 
            +
                18
         | 
| 134 | 
            +
                >>> Test.objects.get(t.key).float_column
         | 
| 135 | 
            +
                35.799999999999997
         | 
| 136 | 
            +
                >>> Test.objects.get(t.key).datetime_str_column
         | 
| 137 | 
            +
                datetime.datetime(2009, 12, 23, 17, 6, 3)
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                >>> Test.objects.multiget([t.key])
         | 
| 140 | 
            +
                {'maptest': <__main__.Test object at 0x7f8ddde0b9d0>}
         | 
| 141 | 
            +
                >>> list(Test.objects.get_range())
         | 
| 142 | 
            +
                [<__main__.Test object at 0x7f8ddde0b710>]
         | 
| 143 | 
            +
                >>> Test.objects.get_count(t.key)
         | 
| 144 | 
            +
                7
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                >>> Test.objects.remove(t)
         | 
| 147 | 
            +
                1261395603906864
         | 
| 148 | 
            +
                >>> Test.objects.get(t.key)
         | 
| 149 | 
            +
                Traceback (most recent call last):
         | 
| 150 | 
            +
                ...
         | 
| 151 | 
            +
                cassandra.ttypes.NotFoundException: NotFoundException()
         | 
| 152 | 
            +
             | 
| 153 | 
            +
            Note that, as mentioned previously, get_range() may continue to return removed rows for some time:
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                >>> Test.objects.remove(t)
         | 
| 156 | 
            +
                1261395603756875
         | 
| 157 | 
            +
                >>> list(Test.objects.get_range())
         | 
| 158 | 
            +
                [<__main__.Test object at 0x7fac9c85ea90>]
         | 
| 159 | 
            +
                >>> list(Test.objects.get_range())[0].string_column
         | 
| 160 | 
            +
                'Your Default'
         | 
| 161 | 
            +
             | 
| 162 | 
            +
            SuperColumns
         | 
| 163 | 
            +
            ------------
         | 
| 164 | 
            +
             | 
| 165 | 
            +
            To use SuperColumns, pass super=True to the ColumnFamily constructor.
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                >>> cf = pycassa.ColumnFamily(client, 'Test SuperColumnFamily', super=True)
         | 
| 168 | 
            +
                >>> cf.insert('key1', {'1': {'sub1': 'val1', 'sub2': 'val2'}, '2': {'sub3': 'val3', 'sub4': 'val4'}})
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                >>> cf.get('key1')
         | 
| 171 | 
            +
                {'1': {'sub2': 'val2', 'sub1': 'val1'}, '2': {'sub4': 'val4', 'sub3': 'val3'}}
         | 
| 172 | 
            +
                >>> cf.remove('key1', super_column='1')
         | 
| 173 | 
            +
                1261490176976864
         | 
| 174 | 
            +
                >>> cf.get('key1')
         | 
| 175 | 
            +
                {'2': {'sub4': 'val4', 'sub3': 'val3'}}
         | 
| 176 | 
            +
                >>> cf.get('key1', super_column='2')
         | 
| 177 | 
            +
                {'sub3': 'val3', 'sub4': 'val4'}
         | 
| 178 | 
            +
                >>> cf.multiget(['key1'], super_column='2')
         | 
| 179 | 
            +
                {'key1': {'sub3': 'val3', 'sub4': 'val4'}}
         | 
| 180 | 
            +
                >>> list(cf.get_range(super_column='2'))
         | 
| 181 | 
            +
                [('key1', {'sub3': 'val3', 'sub4': 'val4'})]
         | 
| 182 | 
            +
             | 
| 183 | 
            +
            You may also use a ColumnFamilyMap with SuperColumns:
         | 
| 184 | 
            +
             | 
| 185 | 
            +
                >>> Test.objects = pycassa.ColumnFamilyMap(Test, cf)
         | 
| 186 | 
            +
                >>> t = Test()
         | 
| 187 | 
            +
                >>> t.key = 'key1'
         | 
| 188 | 
            +
                >>> t.super_column = 'super1'
         | 
| 189 | 
            +
                >>> t.string_column = 'foobar'
         | 
| 190 | 
            +
                >>> t.int_str_column = 5
         | 
| 191 | 
            +
                >>> t.float_column = t.float_str_column = 35.8
         | 
| 192 | 
            +
                >>> t.datetime_str_column = datetime.now()
         | 
| 193 | 
            +
                >>> Test.objects.insert(t)
         | 
| 194 | 
            +
                >>> Test.objects.get(t.key)
         | 
| 195 | 
            +
                {'super1': <__main__.Test object at 0x20ab350>}
         | 
| 196 | 
            +
                >>> Test.objects.multiget([t.key])
         | 
| 197 | 
            +
                {'key1': {'super1': <__main__.Test object at 0x20ab550>}}
         | 
| 198 | 
            +
             | 
| 199 | 
            +
            These output values retain the same format as given by the Cassandra thrift interface.
         | 
| 200 | 
            +
            2
         | 
| @@ -0,0 +1,211 @@ | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "protocol" : "Cassandra",
         | 
| 3 | 
            +
                "namespace" : "org.apache.cassandra.avro",  "types" : [
         | 
| 4 | 
            +
                
         | 
| 5 | 
            +
            Add/insert one value
         | 
| 6 | 
            +
                
         | 
| 7 | 
            +
              Mutate        ks, [col_ref], 'val', ts, ttl }
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            Add/insert multiple cols to same row
         | 
| 10 | 
            +
              
         | 
| 11 | 
            +
              MutateRow     ks, supercol_or_nil, { [col, val, ts, ttl], [col,val,ts,ttl],...}}
         | 
| 12 | 
            +
              MutateCRow    ks,                  { [col, val, ts, ttl], [col,val,ts,ttl],...}}
         | 
| 13 | 
            +
              MutateSCRow   ks, supercol,        { [col, val, ts, ttl], [col,val,ts,ttl],...}}
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            Get one, many or all columns from given row
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              get
         | 
| 18 | 
            +
              Multiget      ks, supercol_or_nil, [col1, col2, ...] or nil
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            Get one, many or all columns from a slice of sequential rows
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              get_range
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            Remove one column from a row
         | 
| 25 | 
            +
              
         | 
| 26 | 
            +
              remove
         | 
| 27 | 
            +
              
         | 
| 28 | 
            +
            Remove many columns from a row
         | 
| 29 | 
            +
              
         | 
| 30 | 
            +
            Remove all columns in a row
         | 
| 31 | 
            +
             | 
| 32 | 
            +
             | 
| 33 | 
            +
             | 
| 34 | 
            +
            h3. Mo
         | 
| 35 | 
            +
                
         | 
| 36 | 
            +
                    
         | 
| 37 | 
            +
                    { "name" : "AccessLevel",               "type" : "enum",       "symbols" : [ "NONE", "READONLY", "READWRITE", "FALL" ]  },
         | 
| 38 | 
            +
                    { "name" : "ColumnPath",                "type" : "record",    "fields" : [
         | 
| 39 | 
            +
                        { "name" : "column_family",         "type" : "string"},
         | 
| 40 | 
            +
                        { "name" : "super_column",          "type" : [ "bytes", "null" ]},
         | 
| 41 | 
            +
                        { "name" : "column",                "type" : [ "bytes", "null" ] } ]},
         | 
| 42 | 
            +
                    { "name" : "ColumnParent",              "type" : "record",	"fields" : [
         | 
| 43 | 
            +
                        { "name" : "column_family",         "type" : "string"},
         | 
| 44 | 
            +
                        { "name" : "super_column",          "type" : [ "bytes", "null" ] } ]},
         | 
| 45 | 
            +
                    { "name" : "SliceRange",                "type" : "record",	"fields" : [
         | 
| 46 | 
            +
                        { "name" : "start",                 "type" : "bytes"},
         | 
| 47 | 
            +
                        { "name" : "finish",                "type" : "bytes"},
         | 
| 48 | 
            +
                        { "name" : "reversed",              "type" : "boolean"},
         | 
| 49 | 
            +
                        { "name" : "count",                 "type" : "int"},
         | 
| 50 | 
            +
                        { "name" : "bitmasks",              "type" : [ { "type" : "array",        "items" : "bytes"},  "null" ]    } ]},
         | 
| 51 | 
            +
                    { "name" : "SlicePredicate",            "type" : "record",	"fields" : [
         | 
| 52 | 
            +
                        { "name" : "column_names",          "type" : [ { "type" : "array",        "items" : "bytes"},  "null" ]},
         | 
| 53 | 
            +
                        { "name" : "slice_range",           "type" : [ "SliceRange", "null" ]    } ]},
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                    { "name" : "Clock",                     "type" : "record",	"fields" : [
         | 
| 56 | 
            +
                        { "name" : "timestamp",	"type" : "long"   } ]},
         | 
| 57 | 
            +
                    { "name" : "Column",                    "type" : "record",	"fields" : [
         | 
| 58 | 
            +
                        { "name" : "name",                  "type" : "bytes"},
         | 
| 59 | 
            +
                        { "name" : "value",                 "type" : "bytes"},
         | 
| 60 | 
            +
                        { "name" : "clock",                 "type" : "Clock"},
         | 
| 61 | 
            +
                        { "name" : "ttl",                   "type" : "int"   } ]},
         | 
| 62 | 
            +
                    { "name" : "SuperColumn",               "type" : "record",	"fields" : [
         | 
| 63 | 
            +
                        { "name" : "name",                  "type" : "bytes"},
         | 
| 64 | 
            +
                        { "name" : "columns",               "type" : { "type" : "array",        "items" : "Column" } } ]},
         | 
| 65 | 
            +
                    { "name" : "ColumnOrSuperColumn",       "type" : "record",	"fields" : [
         | 
| 66 | 
            +
                        { "name" : "column",                "type" : "Column" },
         | 
| 67 | 
            +
                        { "name" : "super_column",          "type" : "null"     } ]},
         | 
| 68 | 
            +
                    { "name" : "Deletion",                  "type" : "record",	"fields" : [
         | 
| 69 | 
            +
                        { "name" : "clock",                 "type" : "Clock"},
         | 
| 70 | 
            +
                        { "name" : "super_column",          "type" : [ "bytes", "null" ]},
         | 
| 71 | 
            +
                        { "name" : "predicate",             "type" : [ "SlicePredicate", "null" ]    } ]},
         | 
| 72 | 
            +
                    { "name" : "Mutation",                  "type" : "record",	"fields" : [
         | 
| 73 | 
            +
                        { "name" : "column_or_supercolumn", "type" : "ColumnOrSuperColumn" },
         | 
| 74 | 
            +
                        { "name" : "deletion",              "type" : "null" }
         | 
| 75 | 
            +
                    ]},
         | 
| 76 | 
            +
                    { "name" : "StreamingMutation",         "type" : "record",	"fields" : [
         | 
| 77 | 
            +
                        { "name" : "key",	                "type" : "bytes"       },
         | 
| 78 | 
            +
                        { "name" : "mutation",	        "type" : "Mutation"    } ]},
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    { "name" : "IndexType",                 "type" : "enum",	"symbols" : [ "KEYS" ]},
         | 
| 81 | 
            +
                    { "name" : "ColumnDef",                 "type" : "record",	"fields" : [
         | 
| 82 | 
            +
                        { "name" : "name",                  "type" : "bytes"},
         | 
| 83 | 
            +
                        { "name" : "validation_class",      "type" : "string"},
         | 
| 84 | 
            +
                        { "name" : "index_type",            "type" : [ "IndexType", "null" ]},
         | 
| 85 | 
            +
                        { "name" : "index_name",            "type" : [ "string", "null" ]    } ]},
         | 
| 86 | 
            +
                    { "name" : "CfDef",                     "type" : "record",	"fields" : [
         | 
| 87 | 
            +
                        { "name" : "keyspace",              "type" : "string"},
         | 
| 88 | 
            +
                        { "name" : "name",                  "type" : "string"},
         | 
| 89 | 
            +
                        { "name" : "column_type",           "type" : [ "string", "null" ]},
         | 
| 90 | 
            +
                        { "name" : "clock_type",            "type" : [ "string", "null" ]},
         | 
| 91 | 
            +
                        { "name" : "comparator_type",       "type" : [ "string", "null" ]},
         | 
| 92 | 
            +
                        { "name" : "subcomparator_type",    "type" : [ "string", "null" ]},
         | 
| 93 | 
            +
                        { "name" : "reconciler",            "type" : [ "string", "null" ]},
         | 
| 94 | 
            +
                        { "name" : "comment",               "type" : [ "string", "null" ]},
         | 
| 95 | 
            +
                        { "name" : "row_cache_size",        "type" : [ "double", "null" ]},
         | 
| 96 | 
            +
                        { "name" : "preload_row_cache",     "type" : [ "boolean", "null" ]},
         | 
| 97 | 
            +
                        { "name" : "key_cache_size",        "type" : [ "double", "null" ]},
         | 
| 98 | 
            +
                        { "name" : "read_repair_chance",    "type" : [ "double", "null" ]},
         | 
| 99 | 
            +
                        { "name" : "gc_grace_seconds",      "type" : [ "int", "null" ]},
         | 
| 100 | 
            +
                        { "name" : "column_metadata",       "type" : [ { "type" : "array",        "items" : "ColumnDef"},  "null" ]},
         | 
| 101 | 
            +
                        { "name" : "id",                    "type" : [ "int", "null" ]    } ]},
         | 
| 102 | 
            +
                    { "name" : "KsDef",                     "type" : "record",	"fields" : [
         | 
| 103 | 
            +
                        { "name" : "name",                  "type" : "string"},  { "name" : "strategy_class",	"type" : "string"},
         | 
| 104 | 
            +
                        { "name" : "strategy_options",      "type" : [ { "type" : "map",        "values" : "string"},  "null" ]},
         | 
| 105 | 
            +
                        { "name" : "replication_factor",    "type" : "int"},  { "name" : "cf_defs",	"type" : { "type" : "array",        "items" : "CfDef"      }    } ]},
         | 
| 106 | 
            +
                    { "name" : "MutationsMapEntry",         "type" : "record",	"fields" : [ { "name" : "key",	"type" : "bytes"},  { "name" : "mutations",	"type" : { "type" : "map",        "values" : { "type" : "array",          "items" : "Mutation"        }      }    } ]},
         | 
| 107 | 
            +
                    { "name" : "CoscsMapEntry",             "type" : "record",	"fields" : [ { "name" : "key",	"type" : "bytes"},  { "name" : "columns",	"type" : { "type" : "array",        "items" : "ColumnOrSuperColumn"      }    } ]},
         | 
| 108 | 
            +
                    { "name" : "ConsistencyLevel",          "type" : "enum",	"symbols" : [ "ZERO", "ONE", "QUORUM", "DCQUORUM", "DCQUORUMSYNC", "ALL" ]},
         | 
| 109 | 
            +
                    { "name" : "InvalidRequestException",   "type" : "error",	"fields" : [ { "name" : "why",	"type" : [ "string", "null" ]    } ]},
         | 
| 110 | 
            +
                    { "name" : "NotFoundException",         "type" : "error",	"fields" : [ { "name" : "why",	"type" : [ "string", "null" ]    } ]},
         | 
| 111 | 
            +
                    { "name" : "UnavailableException",      "type" : "error",	"fields" : [ { "name" : "why",	"type" : [ "string", "null" ]    } ]},
         | 
| 112 | 
            +
                    { "name" : "TimedOutException",         "type" : "error",	"fields" : [ { "name" : "why",	"type" : [ "string", "null" ]    } ]  }
         | 
| 113 | 
            +
                ],
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                
         | 
| 116 | 
            +
              "messages" : { "get" : {
         | 
| 117 | 
            +
                  "request" : [ { "name" : "key",	"type" : "bytes"},
         | 
| 118 | 
            +
              { "name" : "column_path",	"type" : "ColumnPath"},
         | 
| 119 | 
            +
              { "name" : "consistency_level",	"type" : "ConsistencyLevel"
         | 
| 120 | 
            +
                  } ],
         | 
| 121 | 
            +
                  "response" : "ColumnOrSuperColumn",
         | 
| 122 | 
            +
                  "errors" : [ "InvalidRequestException", "NotFoundException", "UnavailableException", "TimedOutException" ]
         | 
| 123 | 
            +
                },
         | 
| 124 | 
            +
                "get_slice" : {
         | 
| 125 | 
            +
                  "request" : [ { "name" : "key",	"type" : "bytes"},
         | 
| 126 | 
            +
              { "name" : "column_parent",	"type" : "ColumnParent"},
         | 
| 127 | 
            +
              { "name" : "predicate",	"type" : "SlicePredicate"},
         | 
| 128 | 
            +
              { "name" : "consistency_level",	"type" : "ConsistencyLevel"
         | 
| 129 | 
            +
                  } ],
         | 
| 130 | 
            +
                  "response" : { "type" : "array",
         | 
| 131 | 
            +
                    "items" : "ColumnOrSuperColumn"
         | 
| 132 | 
            +
                  },
         | 
| 133 | 
            +
                  "errors" : [ "InvalidRequestException", "UnavailableException", "TimedOutException" ]
         | 
| 134 | 
            +
                },
         | 
| 135 | 
            +
                "multiget_slice" : {
         | 
| 136 | 
            +
                  "request" : [ { "name" : "keys",	"type" : { "type" : "array",
         | 
| 137 | 
            +
                      "items" : "bytes"
         | 
| 138 | 
            +
                    }},
         | 
| 139 | 
            +
              { "name" : "column_parent",	"type" : "ColumnParent"},
         | 
| 140 | 
            +
              { "name" : "predicate",	"type" : "SlicePredicate"},
         | 
| 141 | 
            +
              { "name" : "consistency_level",	"type" : "ConsistencyLevel"
         | 
| 142 | 
            +
                  } ],
         | 
| 143 | 
            +
                  "response" : { "type" : "array",
         | 
| 144 | 
            +
                    "items" : "CoscsMapEntry"
         | 
| 145 | 
            +
                  },
         | 
| 146 | 
            +
                  "errors" : [ "InvalidRequestException", "UnavailableException", "TimedOutException" ]
         | 
| 147 | 
            +
                },
         | 
| 148 | 
            +
                "get_count" : {
         | 
| 149 | 
            +
                  "request" : [ { "name" : "key",	"type" : "bytes"},
         | 
| 150 | 
            +
              { "name" : "column_parent",	"type" : "ColumnParent"},
         | 
| 151 | 
            +
              { "name" : "predicate",	"type" : "SlicePredicate"},
         | 
| 152 | 
            +
              { "name" : "consistency_level",	"type" : "ConsistencyLevel"
         | 
| 153 | 
            +
                  } ],
         | 
| 154 | 
            +
                  "response" : "int",
         | 
| 155 | 
            +
                  "errors" : [ "InvalidRequestException", "UnavailableException", "TimedOutException" ]
         | 
| 156 | 
            +
                },
         | 
| 157 | 
            +
                "insert" : {
         | 
| 158 | 
            +
                  "request" : [ { "name" : "key",	"type" : "bytes"},
         | 
| 159 | 
            +
              { "name" : "column_parent",	"type" : "ColumnParent"},
         | 
| 160 | 
            +
              { "name" : "column",	"type" : "Column"},
         | 
| 161 | 
            +
              { "name" : "consistency_level",	"type" : "ConsistencyLevel"
         | 
| 162 | 
            +
                  } ],
         | 
| 163 | 
            +
                  "response" : "null",
         | 
| 164 | 
            +
                  "errors" : [ "InvalidRequestException", "UnavailableException", "TimedOutException" ]
         | 
| 165 | 
            +
                },
         | 
| 166 | 
            +
                "remove" : {
         | 
| 167 | 
            +
                  "request" : [ { "name" : "key",	"type" : "bytes"},
         | 
| 168 | 
            +
              { "name" : "column_path",	"type" : "ColumnPath"},
         | 
| 169 | 
            +
              { "name" : "clock",	"type" : "Clock"},
         | 
| 170 | 
            +
              { "name" : "consistency_level",	"type" : "ConsistencyLevel"
         | 
| 171 | 
            +
                  } ],
         | 
| 172 | 
            +
                  "response" : "null",
         | 
| 173 | 
            +
                  "errors" : [ "InvalidRequestException", "UnavailableException", "TimedOutException" ]
         | 
| 174 | 
            +
                },
         | 
| 175 | 
            +
                "batch_mutate" : {
         | 
| 176 | 
            +
                  "request" : [ { "name" : "mutation_map",	"type" : { "type" : "array",
         | 
| 177 | 
            +
                      "items" : "MutationsMapEntry"
         | 
| 178 | 
            +
                    }},
         | 
| 179 | 
            +
              { "name" : "consistency_level",	"type" : "ConsistencyLevel"
         | 
| 180 | 
            +
                  } ],
         | 
| 181 | 
            +
                  "response" : "null",
         | 
| 182 | 
            +
                  "errors" : [ "InvalidRequestException", "UnavailableException", "TimedOutException" ]
         | 
| 183 | 
            +
                },
         | 
| 184 | 
            +
                "system_add_keyspace" : {
         | 
| 185 | 
            +
                  "request" : [ { "name" : "ks_def",	"type" : "KsDef"
         | 
| 186 | 
            +
                  } ],
         | 
| 187 | 
            +
                  "response" : "null",
         | 
| 188 | 
            +
                  "errors" : [ "InvalidRequestException" ]
         | 
| 189 | 
            +
                },
         | 
| 190 | 
            +
                "set_keyspace" : {
         | 
| 191 | 
            +
                  "request" : [ { "name" : "keyspace",	"type" : "string"
         | 
| 192 | 
            +
                  } ],
         | 
| 193 | 
            +
                  "response" : "null",
         | 
| 194 | 
            +
                  "errors" : [ "InvalidRequestException" ]
         | 
| 195 | 
            +
                },
         | 
| 196 | 
            +
                "describe_keyspaces" : {
         | 
| 197 | 
            +
                  "request" : [ ],
         | 
| 198 | 
            +
                  "response" : { "type" : "array",
         | 
| 199 | 
            +
                    "items" : "string"
         | 
| 200 | 
            +
                  }
         | 
| 201 | 
            +
                },
         | 
| 202 | 
            +
                "describe_cluster_name" : {
         | 
| 203 | 
            +
                  "request" : [ ],
         | 
| 204 | 
            +
                  "response" : "string"
         | 
| 205 | 
            +
                },
         | 
| 206 | 
            +
                "describe_version" : {
         | 
| 207 | 
            +
                  "request" : [ ],
         | 
| 208 | 
            +
                  "response" : "string"
         | 
| 209 | 
            +
                }
         | 
| 210 | 
            +
              }
         | 
| 211 | 
            +
            }
         |