cassandra_client 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,134 @@
1
+ # Copyright (c) 2004-2009 David Heinemeier Hansson
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining
4
+ # a copy of this software and associated documentation files (the
5
+ # "Software"), to deal in the Software without restriction, including
6
+ # without limitation the rights to use, copy, modify, merge, publish,
7
+ # distribute, sublicense, and/or sell copies of the Software, and to
8
+ # permit persons to whom the Software is furnished to do so, subject to
9
+ # the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be
12
+ # included in all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
22
+ class CassandraClient
23
+ # Hash is ordered in Ruby 1.9!
24
+ if RUBY_VERSION >= '1.9'
25
+ OrderedHash = ::Hash
26
+ else
27
+ class OrderedHash < Hash #:nodoc:
28
+ require 'enumerator'
29
+
30
+ def self.[](*array)
31
+ hash = new
32
+ array.each_slice(2) { |key, value| hash[key] = value }
33
+ hash
34
+ end
35
+
36
+ def initialize(*args, &block)
37
+ super
38
+ @keys = []
39
+ end
40
+
41
+ def initialize_copy(other)
42
+ super
43
+ # make a deep copy of keys
44
+ @keys = other.keys
45
+ end
46
+
47
+ def []=(key, value)
48
+ @keys << key if !has_key?(key)
49
+ super
50
+ end
51
+
52
+ def delete(key)
53
+ if has_key? key
54
+ index = @keys.index(key)
55
+ @keys.delete_at index
56
+ end
57
+ super
58
+ end
59
+
60
+ def delete_if
61
+ super
62
+ sync_keys!
63
+ self
64
+ end
65
+
66
+ def reject!
67
+ super
68
+ sync_keys!
69
+ self
70
+ end
71
+
72
+ def reject(&block)
73
+ dup.reject!(&block)
74
+ end
75
+
76
+ def keys
77
+ @keys.dup
78
+ end
79
+
80
+ def values
81
+ @keys.collect { |key| self[key] }
82
+ end
83
+
84
+ def to_hash
85
+ self
86
+ end
87
+
88
+ def each_key
89
+ @keys.each { |key| yield key }
90
+ end
91
+
92
+ def each_value
93
+ @keys.each { |key| yield self[key]}
94
+ end
95
+
96
+ def each
97
+ @keys.each {|key| yield [key, self[key]]}
98
+ end
99
+
100
+ alias_method :each_pair, :each
101
+
102
+ def clear
103
+ super
104
+ @keys.clear
105
+ self
106
+ end
107
+
108
+ def shift
109
+ k = @keys.first
110
+ v = delete(k)
111
+ [k, v]
112
+ end
113
+
114
+ def merge!(other_hash)
115
+ other_hash.each {|k,v| self[k] = v }
116
+ self
117
+ end
118
+
119
+ def merge(other_hash)
120
+ dup.merge!(other_hash)
121
+ end
122
+
123
+ def inspect
124
+ "#<OrderedHash #{super}>"
125
+ end
126
+
127
+ private
128
+
129
+ def sync_keys!
130
+ @keys.delete_if {|k| !has_key?(k)}
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,64 @@
1
+
2
+ class CassandraClient
3
+ module Serialization
4
+ module String
5
+ def dump(object);
6
+ object.to_s
7
+ end
8
+
9
+ def load(object)
10
+ object
11
+ end
12
+ end
13
+
14
+ module Marshal
15
+ def dump(object)
16
+ ::Marshal.dump(object)
17
+ end
18
+
19
+ def load(object)
20
+ ::Marshal.load(object)
21
+ end
22
+ end
23
+
24
+ module JSON
25
+ def dump(object)
26
+ ::JSON.dump(object)
27
+ end
28
+
29
+ begin
30
+ require 'yajl/json_gem'
31
+ def load(object)
32
+ ::JSON.load(object)
33
+ end
34
+ rescue LoadError
35
+ require 'json/ext'
36
+ def load(object)
37
+ ::JSON.load("[#{object}]").first # :-(
38
+ end
39
+ end
40
+ end
41
+
42
+ # module Avro
43
+ # # Someday!
44
+ # end
45
+
46
+ # Decorate all available modules with compression
47
+ self.constants.each do |module_name|
48
+ eval <<-MODULE
49
+ module Compressed#{module_name}
50
+ include #{module_name}
51
+ def dump(object)
52
+ Zlib::Deflate.deflate(super(object))
53
+ end
54
+
55
+ def load(object)
56
+ super(Zlib::Inflate.inflate(object))
57
+ end
58
+ end
59
+ MODULE
60
+ end
61
+
62
+ end
63
+ end
64
+
@@ -0,0 +1,202 @@
1
+ class CassandraClient
2
+ class Table
3
+ attr_reader :name, :schema, :parent
4
+
5
+ MAX_INT = 2**31 - 1
6
+
7
+ def initialize(name, parent)
8
+ @parent = parent
9
+ @client = parent.client
10
+ @block_for = parent.block_for
11
+
12
+ @name = name
13
+ @schema = @client.describeTable(@name)
14
+ extend(parent.serialization)
15
+ end
16
+
17
+ def inspect(full = true)
18
+ string = "#<CassandraClient::Table:#{object_id}, @name=#{name.inspect}"
19
+ string += ", @schema={#{schema.map {|name, hash| ":#{name} => #{hash['type'].inspect}"}.join(', ')}}, @parent=#{parent.inspect(false)}" if full
20
+ string + ">"
21
+ end
22
+
23
+ ## Write
24
+
25
+ # Insert a row for a key. Pass a flat hash for a regular column family, and
26
+ # a nested hash for a super column family.
27
+ def insert(key, column_family, hash, timestamp = now)
28
+ column_family = column_family.to_s
29
+ insert = is_super(column_family) ? :insert_super : :insert_standard
30
+ send(insert, key, column_family, hash, timestamp)
31
+ end
32
+
33
+ private
34
+
35
+ def insert_standard(key, column_family, hash, timestamp = now)
36
+ mutation = Batch_mutation_t.new(
37
+ :table => @name,
38
+ :key => key,
39
+ :cfmap => {column_family => hash_to_columns(hash, timestamp)})
40
+ @client.batch_insert(mutation, @block_for)
41
+ end
42
+
43
+ def insert_super(key, column_family, hash, timestamp = now)
44
+ mutation = Batch_mutation_super_t.new(
45
+ :table => @name,
46
+ :key => key,
47
+ :cfmap => {column_family => hash_to_super_columns(hash, timestamp)})
48
+ @client.batch_insert_superColumn(mutation, @block_for)
49
+ end
50
+
51
+ public
52
+
53
+ ## Delete
54
+
55
+ # Remove the element at the column_family:key:super_column:column
56
+ # path you request.
57
+ def remove(key, column_family, super_column = nil, column = nil, timestamp = now)
58
+ column_family = column_family.to_s
59
+ column_family += ":#{super_column}" if super_column
60
+ column_family += ":#{column}" if column
61
+ @client.remove(@name, key, column_family, timestamp, @block_for )
62
+ end
63
+
64
+ # Remove all rows in the column family you request.
65
+ def remove_all(column_family)
66
+ get_key_range(column_family).each do |key|
67
+ remove(key, column_family)
68
+ end
69
+ end
70
+
71
+ ## Read
72
+
73
+ # Count the elements at the column_family:key:super_column path you
74
+ # request.
75
+ def count_columns(key, column_family, super_column = nil)
76
+ column_family = column_family.to_s
77
+ column_family += ":#{super_column}" if super_column
78
+ @client.get_column_count(@name, key, column_family)
79
+ end
80
+
81
+ # Return a list of single values for the elements at the
82
+ # column_family:key:super_column:column path you request.
83
+ def get_columns(key, column_family, super_columns, columns = nil)
84
+ column_family = column_family.to_s
85
+ get_slice_by_names = (is_super(column_family) && !columns) ? :get_slice_super_by_names : :get_slice_by_names
86
+ if super_columns and columns
87
+ column_family += ":#{super_columns}"
88
+ columns = Array(columns)
89
+ else
90
+ columns = Array(super_columns)
91
+ end
92
+
93
+ hash = columns_to_hash(@client.send(get_slice_by_names, @name, key, column_family, columns))
94
+ columns.map { |column| hash[column] }
95
+ end
96
+
97
+ # Return a hash (actually, a CassandraClient::OrderedHash) or a single value
98
+ # representing the element at the column_family:key:super_column:column
99
+ # path you request.
100
+ def get(key, column_family, super_column = nil, column = nil, offset = -1, limit = 100)
101
+ column_family = column_family.to_s
102
+ column_family += ":#{super_column}" if super_column
103
+ column_family += ":#{column}" if column
104
+
105
+ # You have got to be kidding
106
+ if is_super(column_family)
107
+ if column
108
+ load(@client.get_column(@name, key, column_family).value)
109
+ elsif super_column
110
+ columns_to_hash(@client.get_superColumn(@name, key, column_family).columns)
111
+ else
112
+ columns_to_hash(@client.get_slice_super(@name, key, "#{column_family}:", offset, limit))
113
+ end
114
+ else
115
+ if super_column
116
+ load(@client.get_column(@name, key, column_family).value)
117
+ elsif is_sorted_by_time(column_family)
118
+ result = columns_to_hash(@client.get_columns_since(@name, key, column_family, 0))
119
+
120
+ # FIXME Hack until get_slice on a time-sorted column family works again
121
+ result = OrderedHash[*flatten_once(result.to_a[offset, limit])] if offset > -1
122
+ result
123
+ else
124
+ columns_to_hash(@client.get_slice(@name, key, "#{column_family}:", offset, limit))
125
+ end
126
+ end
127
+ rescue NotFoundException
128
+ is_super(column_family) && !column ? {} : nil
129
+ end
130
+
131
+ # FIXME
132
+ # def get_recent(key, column_family, super_column = nil, column = nil, timestamp = 0)
133
+ # end
134
+
135
+ # Return a list of keys in the column_family you request. Requires the
136
+ # table to be partitioned with OrderPreservingHash.
137
+ def get_key_range(key_range, column_family = nil, limit = 100)
138
+ column_family, key_range = key_range, ''..'' unless column_family
139
+ column_families = Array(column_family).map {|c| c.to_s}
140
+ @client.get_key_range(@name, column_families, key_range.begin, key_range.end, limit)
141
+ end
142
+
143
+ # Count all rows in the column_family you request. Requires the table
144
+ # to be partitioned with OrderPreservingHash.
145
+ def count(key_range, column_family = nil, limit = MAX_INT)
146
+ get_key_range(key_range, column_family, limit).size
147
+ end
148
+
149
+ private
150
+
151
+ def is_super(column_family)
152
+ column_family_property(column_family, 'type') == 'Super'
153
+ end
154
+
155
+ def is_sorted_by_time(column_family)
156
+ column_family_property(column_family, 'sort') == 'Time'
157
+ end
158
+
159
+ def column_family_property(column_family_or_path, key)
160
+ column_family = column_family_or_path.to_s.split(':').first
161
+ @schema[column_family][key]
162
+ rescue NoMethodError
163
+ raise AccessError, "Invalid column family \":#{column_family}\""
164
+ end
165
+
166
+ def columns_to_hash(columns)
167
+ hash = ::CassandraClient::OrderedHash.new
168
+ Array(columns).each do |c|
169
+ if c.is_a?(SuperColumn_t)
170
+ hash[c.name] = columns_to_hash(c.columns)
171
+ else
172
+ hash[c.columnName] = load(c.value)
173
+ end
174
+ end
175
+ hash
176
+ end
177
+
178
+ def hash_to_columns(hash, timestamp)
179
+ hash.map do |column, value|
180
+ Column_t.new(:columnName => column, :value => dump(value), :timestamp => timestamp)
181
+ end
182
+ end
183
+
184
+ def hash_to_super_columns(hash, timestamp)
185
+ hash.map do |super_column, columns|
186
+ SuperColumn_t.new(:name => super_column, :columns => hash_to_columns(columns, timestamp))
187
+ end
188
+ end
189
+
190
+ def time_in_microseconds
191
+ time = Time.now
192
+ time.to_i * 1_000_000 + time.usec
193
+ end
194
+ alias :now :time_in_microseconds
195
+
196
+ def flatten_once(array)
197
+ result = []
198
+ array.each { |el| result.concat(el) }
199
+ result
200
+ end
201
+ end
202
+ end
@@ -0,0 +1,14 @@
1
+
2
+ require 'zlib'
3
+ require 'rubygems'
4
+ require 'thrift'
5
+
6
+ HERE = File.expand_path(File.dirname(__FILE__))
7
+
8
+ require "#{HERE}/cassandra_client/client"
9
+ require "#{HERE}/cassandra_client/table"
10
+ require "#{HERE}/cassandra_client/serialization"
11
+ require "#{HERE}/cassandra_client/ordered_hash"
12
+
13
+ $LOAD_PATH << "#{HERE}/../vendor/gen-rb"
14
+ require 'cassandra'
data/quickstart.sh ADDED
@@ -0,0 +1,10 @@
1
+
2
+ if [ ! -e cassandra-r789419 ]; then
3
+ cd vendor
4
+ tar xjvf cassandra-r789419.tar.bz2
5
+ mv cassandra-r789419 ..
6
+ cd ..
7
+ fi
8
+
9
+ export CASSANDRA_INCLUDE=`pwd`/conf/cassandra.in.sh
10
+ cassandra-r789419/bin/cassandra -f
@@ -0,0 +1,204 @@
1
+
2
+ require 'test/unit'
3
+ require "#{File.expand_path(File.dirname(__FILE__))}/../lib/cassandra_client"
4
+
5
+ begin; require 'ruby-debug'; rescue LoadError; end
6
+
7
+ class CassandraClientTest < Test::Unit::TestCase
8
+ def setup
9
+ @client = CassandraClient.new('127.0.0.1')
10
+ @client.remove_all
11
+ @statuses = @client.table('Statuses')
12
+ @users = @client.table('Users')
13
+ end
14
+
15
+ def test_inspect
16
+ assert_nothing_raised do
17
+ @statuses.inspect
18
+ @client.inspect
19
+ end
20
+ end
21
+
22
+ def test_connection_reopens
23
+ assert_raises(NoMethodError) do
24
+ @statuses.insert(1, :row, {'body' => 'v'})
25
+ end
26
+ assert_nothing_raised do
27
+ @statuses.insert(key, :row, {'body' => 'v'})
28
+ end
29
+ end
30
+
31
+ def test_get_key_name_sorted
32
+ @users.insert(key, :row, {'body' => 'v', 'user' => 'v'})
33
+ assert_equal({'body' => 'v', 'user' => 'v'}, @users.get(key, :row))
34
+ assert_equal({}, @users.get('bogus', :row))
35
+ end
36
+
37
+ def test_get_key_name_sorted_preserving_order
38
+ # In-order hash is preserved
39
+ hash = CassandraClient::OrderedHash['a', '', 'b', '', 'c', '', 'd', '',]
40
+ @users.insert(key, :row, hash)
41
+ assert_equal(hash.keys, @users.get(key, :row).keys)
42
+
43
+ @users.remove(key, :row)
44
+
45
+ # Out-of-order hash is returned sorted
46
+ hash = CassandraClient::OrderedHash['b', '', 'c', '', 'd', '', 'a', '']
47
+ @users.insert(key, :row, hash)
48
+ assert_equal(hash.keys.sort, @users.get(key, :row).keys)
49
+ assert_not_equal(hash.keys, @users.get(key, :row).keys)
50
+ end
51
+
52
+ def test_get_key_time_sorted
53
+ @statuses.insert(key, :row, {'body' => 'v', 'user' => 'v'})
54
+ assert_equal({'body' => 'v', 'user' => 'v'}, @statuses.get(key, :row))
55
+ assert_equal({}, @statuses.get('bogus', :row))
56
+ end
57
+
58
+ def test_get_key_time_sorted_with_limit
59
+ @statuses.insert(key, :row, {'first' => 'v'})
60
+ @statuses.insert(key, :row, {'second' => 'v'})
61
+ assert_equal({'second' => 'v'}, @statuses.get(key, :row, nil, nil, 0, 1))
62
+ end
63
+
64
+ def test_get_value
65
+ @statuses.insert(key, :row, {'body' => 'v'})
66
+ assert_equal 'v', @statuses.get(key, :row, 'body')
67
+ assert_nil @statuses.get('bogus', :row, 'body')
68
+ end
69
+
70
+ def test_get_super_key
71
+ @statuses.insert(key, :relationships, {'user_timelines' => {'4' => 'v', '5' => 'v'}})
72
+ assert_equal({'user_timelines' => {'4' => 'v', '5' => 'v'}}, @statuses.get(key, :relationships))
73
+ assert_equal({}, @statuses.get('bogus', :relationships))
74
+ end
75
+
76
+ def test_get_super_key_multi
77
+ @statuses.insert(key, :relationships, {
78
+ 'user_timelines' => {'1' => 'v1'},
79
+ 'mentions_timelines' => {'2' => 'v2'}})
80
+ assert_equal({
81
+ 'user_timelines' => {'1' => 'v1'},
82
+ 'mentions_timelines' => {'2' => 'v2'}}, @statuses.get(key, :relationships))
83
+ assert_equal({}, @statuses.get('bogus', :relationships))
84
+ end
85
+
86
+ def test_get_super_sub_key
87
+ @statuses.insert(key, :relationships, {'user_timelines' => {'4' => 'v', '5' => 'v'}})
88
+ assert_equal({'4' => 'v', '5' => 'v'}, @statuses.get(key, :relationships, 'user_timelines'))
89
+ assert_equal({}, @statuses.get('bogus', :relationships, 'user_timelines'))
90
+ end
91
+
92
+ def test_get_super_value
93
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v'}})
94
+ assert_equal('v', @statuses.get(key, :relationships, 'user_timelines', '1'))
95
+ assert_nil @statuses.get('bogus', :relationships, 'user_timelines', '1')
96
+ end
97
+
98
+ def test_get_key_range
99
+ @statuses.insert('3', :row, {'body' => 'v'})
100
+ @statuses.insert('4', :row, {'body' => 'v'})
101
+ @statuses.insert('5', :row, {'body' => 'v'})
102
+ assert_equal(['3', '4', '5'], @statuses.get_key_range('3'..'5', :row))
103
+ end
104
+
105
+ # Not supported
106
+ # def test_get_key_range_super
107
+ # @statuses.insert('3', :relationships, {'user_timelines' => {'1' => 'v'}})
108
+ # @statuses.insert('4', :relationships, {'user_timelines' => {'1' => 'v'}})
109
+ # @statuses.insert('5', :relationships, {'user_timelines' => {'1' => 'v'}})
110
+ # assert_equal(['3', '4', '5'], @statuses.get_key_range('3'..'5', :relationships, 'user_timelines'))
111
+ # end
112
+
113
+ def test_remove_key
114
+ @statuses.insert(key, :row, {'body' => 'v'})
115
+ @statuses.remove(key, :row)
116
+ assert_equal({}, @statuses.get(key, :row))
117
+ end
118
+
119
+ def test_remove_value
120
+ @statuses.insert(key, :row, {'body' => 'v'})
121
+ @statuses.remove(key, :row, 'body')
122
+ assert_nil @statuses.get(key, :row, 'body')
123
+ end
124
+
125
+ def test_remove_super_key
126
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v'}})
127
+ @statuses.remove(key, :relationships)
128
+ assert_equal({}, @statuses.get(key, :relationships))
129
+ end
130
+
131
+ def test_remove_super_sub_key
132
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v'}})
133
+ @statuses.remove(key, :relationships, 'user_timelines')
134
+ assert_equal({}, @statuses.get(key, :relationships, 'user_timelines'))
135
+ end
136
+
137
+ def test_remove_super_value
138
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v'}})
139
+ @statuses.remove(key, :relationships, 'user_timelines', '1')
140
+ assert_nil @statuses.get(key, :relationships, 'user_timelines', '1')
141
+ end
142
+
143
+ def test_insert_key
144
+ @statuses.insert(key, :row, {'body' => 'v', 'user' => 'v'})
145
+ assert_equal({'body' => 'v', 'user' => 'v'}, @statuses.get(key, :row))
146
+ end
147
+
148
+ def test_insert_super_key
149
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v', key => 'v'}})
150
+ assert_equal({'1' => 'v' , key => 'v'}, @statuses.get(key, :relationships, 'user_timelines'))
151
+ end
152
+
153
+ def test_get_column_values
154
+ @statuses.insert(key, :row, {'body' => 'v1', 'user' => 'v2'})
155
+ assert_equal(['v1' , 'v2'], @statuses.get_columns(key, :row, ['body', 'user']))
156
+ end
157
+
158
+ def test_get_column_values_super
159
+ @statuses.insert(key, :relationships, {
160
+ 'user_timelines' => {'1' => 'v1'},
161
+ 'mentions_timelines' => {'2' => 'v2'}})
162
+ assert_equal [{'1' => 'v1'}, {'2' => 'v2'}],
163
+ @statuses.get_columns(key, :relationships, ['user_timelines', 'mentions_timelines'])
164
+ end
165
+
166
+ # Not supported
167
+ # def test_get_columns_super_sub
168
+ # @statuses.insert(key, :relationships, {
169
+ # 'user_timelines' => {'1' => 'v1'},
170
+ # 'mentions_timelines' => {'2' => 'v2'}})
171
+ # assert_equal ['v1', 'v2'],
172
+ # @statuses.get_columns(key, :relationships, 'user_timelines', ['1', key])
173
+ # end
174
+
175
+ def test_count_keys
176
+ @statuses.insert(key + "1", :row, {'body' => 'v1'})
177
+ @statuses.insert(key + "2", :row, {'body' => 'v1'})
178
+ @statuses.insert(key + "3", :row, {'body' => 'v1'})
179
+ assert_equal 3, @statuses.count(:row)
180
+ end
181
+
182
+ def test_count_columns
183
+ @statuses.insert(key, :row, {'body' => 'v1', 'user' => 'v2'})
184
+ assert_equal 2, @statuses.count_columns(key, :row)
185
+ end
186
+
187
+ def test_count_super_columns
188
+ @statuses.insert(key, :relationships, {
189
+ 'user_timelines' => {'1' => 'v1'},
190
+ 'mentions_timelines' => {'2' => 'v2'}})
191
+ assert_equal 2, @statuses.count_columns(key, :relationships)
192
+ end
193
+
194
+ def test_count_super_sub_columns
195
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v1', key => 'v2'}})
196
+ assert_equal 2, @statuses.count_columns(key, :relationships, 'user_timelines')
197
+ end
198
+
199
+ private
200
+
201
+ def key
202
+ caller.first[/`(.*?)'/, 1]
203
+ end
204
+ end