cassandra_client 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,134 @@
1
+ # Copyright (c) 2004-2009 David Heinemeier Hansson
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining
4
+ # a copy of this software and associated documentation files (the
5
+ # "Software"), to deal in the Software without restriction, including
6
+ # without limitation the rights to use, copy, modify, merge, publish,
7
+ # distribute, sublicense, and/or sell copies of the Software, and to
8
+ # permit persons to whom the Software is furnished to do so, subject to
9
+ # the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be
12
+ # included in all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
22
+ class CassandraClient
23
+ # Hash is ordered in Ruby 1.9!
24
+ if RUBY_VERSION >= '1.9'
25
+ OrderedHash = ::Hash
26
+ else
27
+ class OrderedHash < Hash #:nodoc:
28
+ require 'enumerator'
29
+
30
+ def self.[](*array)
31
+ hash = new
32
+ array.each_slice(2) { |key, value| hash[key] = value }
33
+ hash
34
+ end
35
+
36
+ def initialize(*args, &block)
37
+ super
38
+ @keys = []
39
+ end
40
+
41
+ def initialize_copy(other)
42
+ super
43
+ # make a deep copy of keys
44
+ @keys = other.keys
45
+ end
46
+
47
+ def []=(key, value)
48
+ @keys << key if !has_key?(key)
49
+ super
50
+ end
51
+
52
+ def delete(key)
53
+ if has_key? key
54
+ index = @keys.index(key)
55
+ @keys.delete_at index
56
+ end
57
+ super
58
+ end
59
+
60
+ def delete_if
61
+ super
62
+ sync_keys!
63
+ self
64
+ end
65
+
66
+ def reject!
67
+ super
68
+ sync_keys!
69
+ self
70
+ end
71
+
72
+ def reject(&block)
73
+ dup.reject!(&block)
74
+ end
75
+
76
+ def keys
77
+ @keys.dup
78
+ end
79
+
80
+ def values
81
+ @keys.collect { |key| self[key] }
82
+ end
83
+
84
+ def to_hash
85
+ self
86
+ end
87
+
88
+ def each_key
89
+ @keys.each { |key| yield key }
90
+ end
91
+
92
+ def each_value
93
+ @keys.each { |key| yield self[key]}
94
+ end
95
+
96
+ def each
97
+ @keys.each {|key| yield [key, self[key]]}
98
+ end
99
+
100
+ alias_method :each_pair, :each
101
+
102
+ def clear
103
+ super
104
+ @keys.clear
105
+ self
106
+ end
107
+
108
+ def shift
109
+ k = @keys.first
110
+ v = delete(k)
111
+ [k, v]
112
+ end
113
+
114
+ def merge!(other_hash)
115
+ other_hash.each {|k,v| self[k] = v }
116
+ self
117
+ end
118
+
119
+ def merge(other_hash)
120
+ dup.merge!(other_hash)
121
+ end
122
+
123
+ def inspect
124
+ "#<OrderedHash #{super}>"
125
+ end
126
+
127
+ private
128
+
129
+ def sync_keys!
130
+ @keys.delete_if {|k| !has_key?(k)}
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,64 @@
1
+
2
+ class CassandraClient
3
+ module Serialization
4
+ module String
5
+ def dump(object);
6
+ object.to_s
7
+ end
8
+
9
+ def load(object)
10
+ object
11
+ end
12
+ end
13
+
14
+ module Marshal
15
+ def dump(object)
16
+ ::Marshal.dump(object)
17
+ end
18
+
19
+ def load(object)
20
+ ::Marshal.load(object)
21
+ end
22
+ end
23
+
24
+ module JSON
25
+ def dump(object)
26
+ ::JSON.dump(object)
27
+ end
28
+
29
+ begin
30
+ require 'yajl/json_gem'
31
+ def load(object)
32
+ ::JSON.load(object)
33
+ end
34
+ rescue LoadError
35
+ require 'json/ext'
36
+ def load(object)
37
+ ::JSON.load("[#{object}]").first # :-(
38
+ end
39
+ end
40
+ end
41
+
42
+ # module Avro
43
+ # # Someday!
44
+ # end
45
+
46
+ # Decorate all available modules with compression
47
+ self.constants.each do |module_name|
48
+ eval <<-MODULE
49
+ module Compressed#{module_name}
50
+ include #{module_name}
51
+ def dump(object)
52
+ Zlib::Deflate.deflate(super(object))
53
+ end
54
+
55
+ def load(object)
56
+ super(Zlib::Inflate.inflate(object))
57
+ end
58
+ end
59
+ MODULE
60
+ end
61
+
62
+ end
63
+ end
64
+
@@ -0,0 +1,202 @@
1
+ class CassandraClient
2
+ class Table
3
+ attr_reader :name, :schema, :parent
4
+
5
+ MAX_INT = 2**31 - 1
6
+
7
+ def initialize(name, parent)
8
+ @parent = parent
9
+ @client = parent.client
10
+ @block_for = parent.block_for
11
+
12
+ @name = name
13
+ @schema = @client.describeTable(@name)
14
+ extend(parent.serialization)
15
+ end
16
+
17
+ def inspect(full = true)
18
+ string = "#<CassandraClient::Table:#{object_id}, @name=#{name.inspect}"
19
+ string += ", @schema={#{schema.map {|name, hash| ":#{name} => #{hash['type'].inspect}"}.join(', ')}}, @parent=#{parent.inspect(false)}" if full
20
+ string + ">"
21
+ end
22
+
23
+ ## Write
24
+
25
+ # Insert a row for a key. Pass a flat hash for a regular column family, and
26
+ # a nested hash for a super column family.
27
+ def insert(key, column_family, hash, timestamp = now)
28
+ column_family = column_family.to_s
29
+ insert = is_super(column_family) ? :insert_super : :insert_standard
30
+ send(insert, key, column_family, hash, timestamp)
31
+ end
32
+
33
+ private
34
+
35
+ def insert_standard(key, column_family, hash, timestamp = now)
36
+ mutation = Batch_mutation_t.new(
37
+ :table => @name,
38
+ :key => key,
39
+ :cfmap => {column_family => hash_to_columns(hash, timestamp)})
40
+ @client.batch_insert(mutation, @block_for)
41
+ end
42
+
43
+ def insert_super(key, column_family, hash, timestamp = now)
44
+ mutation = Batch_mutation_super_t.new(
45
+ :table => @name,
46
+ :key => key,
47
+ :cfmap => {column_family => hash_to_super_columns(hash, timestamp)})
48
+ @client.batch_insert_superColumn(mutation, @block_for)
49
+ end
50
+
51
+ public
52
+
53
+ ## Delete
54
+
55
+ # Remove the element at the column_family:key:super_column:column
56
+ # path you request.
57
+ def remove(key, column_family, super_column = nil, column = nil, timestamp = now)
58
+ column_family = column_family.to_s
59
+ column_family += ":#{super_column}" if super_column
60
+ column_family += ":#{column}" if column
61
+ @client.remove(@name, key, column_family, timestamp, @block_for )
62
+ end
63
+
64
+ # Remove all rows in the column family you request.
65
+ def remove_all(column_family)
66
+ get_key_range(column_family).each do |key|
67
+ remove(key, column_family)
68
+ end
69
+ end
70
+
71
+ ## Read
72
+
73
+ # Count the elements at the column_family:key:super_column path you
74
+ # request.
75
+ def count_columns(key, column_family, super_column = nil)
76
+ column_family = column_family.to_s
77
+ column_family += ":#{super_column}" if super_column
78
+ @client.get_column_count(@name, key, column_family)
79
+ end
80
+
81
+ # Return a list of single values for the elements at the
82
+ # column_family:key:super_column:column path you request.
83
+ def get_columns(key, column_family, super_columns, columns = nil)
84
+ column_family = column_family.to_s
85
+ get_slice_by_names = (is_super(column_family) && !columns) ? :get_slice_super_by_names : :get_slice_by_names
86
+ if super_columns and columns
87
+ column_family += ":#{super_columns}"
88
+ columns = Array(columns)
89
+ else
90
+ columns = Array(super_columns)
91
+ end
92
+
93
+ hash = columns_to_hash(@client.send(get_slice_by_names, @name, key, column_family, columns))
94
+ columns.map { |column| hash[column] }
95
+ end
96
+
97
+ # Return a hash (actually, a CassandraClient::OrderedHash) or a single value
98
+ # representing the element at the column_family:key:super_column:column
99
+ # path you request.
100
+ def get(key, column_family, super_column = nil, column = nil, offset = -1, limit = 100)
101
+ column_family = column_family.to_s
102
+ column_family += ":#{super_column}" if super_column
103
+ column_family += ":#{column}" if column
104
+
105
+ # You have got to be kidding
106
+ if is_super(column_family)
107
+ if column
108
+ load(@client.get_column(@name, key, column_family).value)
109
+ elsif super_column
110
+ columns_to_hash(@client.get_superColumn(@name, key, column_family).columns)
111
+ else
112
+ columns_to_hash(@client.get_slice_super(@name, key, "#{column_family}:", offset, limit))
113
+ end
114
+ else
115
+ if super_column
116
+ load(@client.get_column(@name, key, column_family).value)
117
+ elsif is_sorted_by_time(column_family)
118
+ result = columns_to_hash(@client.get_columns_since(@name, key, column_family, 0))
119
+
120
+ # FIXME Hack until get_slice on a time-sorted column family works again
121
+ result = OrderedHash[*flatten_once(result.to_a[offset, limit])] if offset > -1
122
+ result
123
+ else
124
+ columns_to_hash(@client.get_slice(@name, key, "#{column_family}:", offset, limit))
125
+ end
126
+ end
127
+ rescue NotFoundException
128
+ is_super(column_family) && !column ? {} : nil
129
+ end
130
+
131
+ # FIXME
132
+ # def get_recent(key, column_family, super_column = nil, column = nil, timestamp = 0)
133
+ # end
134
+
135
+ # Return a list of keys in the column_family you request. Requires the
136
+ # table to be partitioned with OrderPreservingHash.
137
+ def get_key_range(key_range, column_family = nil, limit = 100)
138
+ column_family, key_range = key_range, ''..'' unless column_family
139
+ column_families = Array(column_family).map {|c| c.to_s}
140
+ @client.get_key_range(@name, column_families, key_range.begin, key_range.end, limit)
141
+ end
142
+
143
+ # Count all rows in the column_family you request. Requires the table
144
+ # to be partitioned with OrderPreservingHash.
145
+ def count(key_range, column_family = nil, limit = MAX_INT)
146
+ get_key_range(key_range, column_family, limit).size
147
+ end
148
+
149
+ private
150
+
151
+ def is_super(column_family)
152
+ column_family_property(column_family, 'type') == 'Super'
153
+ end
154
+
155
+ def is_sorted_by_time(column_family)
156
+ column_family_property(column_family, 'sort') == 'Time'
157
+ end
158
+
159
+ def column_family_property(column_family_or_path, key)
160
+ column_family = column_family_or_path.to_s.split(':').first
161
+ @schema[column_family][key]
162
+ rescue NoMethodError
163
+ raise AccessError, "Invalid column family \":#{column_family}\""
164
+ end
165
+
166
+ def columns_to_hash(columns)
167
+ hash = ::CassandraClient::OrderedHash.new
168
+ Array(columns).each do |c|
169
+ if c.is_a?(SuperColumn_t)
170
+ hash[c.name] = columns_to_hash(c.columns)
171
+ else
172
+ hash[c.columnName] = load(c.value)
173
+ end
174
+ end
175
+ hash
176
+ end
177
+
178
+ def hash_to_columns(hash, timestamp)
179
+ hash.map do |column, value|
180
+ Column_t.new(:columnName => column, :value => dump(value), :timestamp => timestamp)
181
+ end
182
+ end
183
+
184
+ def hash_to_super_columns(hash, timestamp)
185
+ hash.map do |super_column, columns|
186
+ SuperColumn_t.new(:name => super_column, :columns => hash_to_columns(columns, timestamp))
187
+ end
188
+ end
189
+
190
+ def time_in_microseconds
191
+ time = Time.now
192
+ time.to_i * 1_000_000 + time.usec
193
+ end
194
+ alias :now :time_in_microseconds
195
+
196
+ def flatten_once(array)
197
+ result = []
198
+ array.each { |el| result.concat(el) }
199
+ result
200
+ end
201
+ end
202
+ end
@@ -0,0 +1,14 @@
1
+
2
+ require 'zlib'
3
+ require 'rubygems'
4
+ require 'thrift'
5
+
6
+ HERE = File.expand_path(File.dirname(__FILE__))
7
+
8
+ require "#{HERE}/cassandra_client/client"
9
+ require "#{HERE}/cassandra_client/table"
10
+ require "#{HERE}/cassandra_client/serialization"
11
+ require "#{HERE}/cassandra_client/ordered_hash"
12
+
13
+ $LOAD_PATH << "#{HERE}/../vendor/gen-rb"
14
+ require 'cassandra'
data/quickstart.sh ADDED
@@ -0,0 +1,10 @@
1
+
2
+ if [ ! -e cassandra-r789419 ]; then
3
+ cd vendor
4
+ tar xjvf cassandra-r789419.tar.bz2
5
+ mv cassandra-r789419 ..
6
+ cd ..
7
+ fi
8
+
9
+ export CASSANDRA_INCLUDE=`pwd`/conf/cassandra.in.sh
10
+ cassandra-r789419/bin/cassandra -f
@@ -0,0 +1,204 @@
1
+
2
+ require 'test/unit'
3
+ require "#{File.expand_path(File.dirname(__FILE__))}/../lib/cassandra_client"
4
+
5
+ begin; require 'ruby-debug'; rescue LoadError; end
6
+
7
+ class CassandraClientTest < Test::Unit::TestCase
8
+ def setup
9
+ @client = CassandraClient.new('127.0.0.1')
10
+ @client.remove_all
11
+ @statuses = @client.table('Statuses')
12
+ @users = @client.table('Users')
13
+ end
14
+
15
+ def test_inspect
16
+ assert_nothing_raised do
17
+ @statuses.inspect
18
+ @client.inspect
19
+ end
20
+ end
21
+
22
+ def test_connection_reopens
23
+ assert_raises(NoMethodError) do
24
+ @statuses.insert(1, :row, {'body' => 'v'})
25
+ end
26
+ assert_nothing_raised do
27
+ @statuses.insert(key, :row, {'body' => 'v'})
28
+ end
29
+ end
30
+
31
+ def test_get_key_name_sorted
32
+ @users.insert(key, :row, {'body' => 'v', 'user' => 'v'})
33
+ assert_equal({'body' => 'v', 'user' => 'v'}, @users.get(key, :row))
34
+ assert_equal({}, @users.get('bogus', :row))
35
+ end
36
+
37
+ def test_get_key_name_sorted_preserving_order
38
+ # In-order hash is preserved
39
+ hash = CassandraClient::OrderedHash['a', '', 'b', '', 'c', '', 'd', '',]
40
+ @users.insert(key, :row, hash)
41
+ assert_equal(hash.keys, @users.get(key, :row).keys)
42
+
43
+ @users.remove(key, :row)
44
+
45
+ # Out-of-order hash is returned sorted
46
+ hash = CassandraClient::OrderedHash['b', '', 'c', '', 'd', '', 'a', '']
47
+ @users.insert(key, :row, hash)
48
+ assert_equal(hash.keys.sort, @users.get(key, :row).keys)
49
+ assert_not_equal(hash.keys, @users.get(key, :row).keys)
50
+ end
51
+
52
+ def test_get_key_time_sorted
53
+ @statuses.insert(key, :row, {'body' => 'v', 'user' => 'v'})
54
+ assert_equal({'body' => 'v', 'user' => 'v'}, @statuses.get(key, :row))
55
+ assert_equal({}, @statuses.get('bogus', :row))
56
+ end
57
+
58
+ def test_get_key_time_sorted_with_limit
59
+ @statuses.insert(key, :row, {'first' => 'v'})
60
+ @statuses.insert(key, :row, {'second' => 'v'})
61
+ assert_equal({'second' => 'v'}, @statuses.get(key, :row, nil, nil, 0, 1))
62
+ end
63
+
64
+ def test_get_value
65
+ @statuses.insert(key, :row, {'body' => 'v'})
66
+ assert_equal 'v', @statuses.get(key, :row, 'body')
67
+ assert_nil @statuses.get('bogus', :row, 'body')
68
+ end
69
+
70
+ def test_get_super_key
71
+ @statuses.insert(key, :relationships, {'user_timelines' => {'4' => 'v', '5' => 'v'}})
72
+ assert_equal({'user_timelines' => {'4' => 'v', '5' => 'v'}}, @statuses.get(key, :relationships))
73
+ assert_equal({}, @statuses.get('bogus', :relationships))
74
+ end
75
+
76
+ def test_get_super_key_multi
77
+ @statuses.insert(key, :relationships, {
78
+ 'user_timelines' => {'1' => 'v1'},
79
+ 'mentions_timelines' => {'2' => 'v2'}})
80
+ assert_equal({
81
+ 'user_timelines' => {'1' => 'v1'},
82
+ 'mentions_timelines' => {'2' => 'v2'}}, @statuses.get(key, :relationships))
83
+ assert_equal({}, @statuses.get('bogus', :relationships))
84
+ end
85
+
86
+ def test_get_super_sub_key
87
+ @statuses.insert(key, :relationships, {'user_timelines' => {'4' => 'v', '5' => 'v'}})
88
+ assert_equal({'4' => 'v', '5' => 'v'}, @statuses.get(key, :relationships, 'user_timelines'))
89
+ assert_equal({}, @statuses.get('bogus', :relationships, 'user_timelines'))
90
+ end
91
+
92
+ def test_get_super_value
93
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v'}})
94
+ assert_equal('v', @statuses.get(key, :relationships, 'user_timelines', '1'))
95
+ assert_nil @statuses.get('bogus', :relationships, 'user_timelines', '1')
96
+ end
97
+
98
+ def test_get_key_range
99
+ @statuses.insert('3', :row, {'body' => 'v'})
100
+ @statuses.insert('4', :row, {'body' => 'v'})
101
+ @statuses.insert('5', :row, {'body' => 'v'})
102
+ assert_equal(['3', '4', '5'], @statuses.get_key_range('3'..'5', :row))
103
+ end
104
+
105
+ # Not supported
106
+ # def test_get_key_range_super
107
+ # @statuses.insert('3', :relationships, {'user_timelines' => {'1' => 'v'}})
108
+ # @statuses.insert('4', :relationships, {'user_timelines' => {'1' => 'v'}})
109
+ # @statuses.insert('5', :relationships, {'user_timelines' => {'1' => 'v'}})
110
+ # assert_equal(['3', '4', '5'], @statuses.get_key_range('3'..'5', :relationships, 'user_timelines'))
111
+ # end
112
+
113
+ def test_remove_key
114
+ @statuses.insert(key, :row, {'body' => 'v'})
115
+ @statuses.remove(key, :row)
116
+ assert_equal({}, @statuses.get(key, :row))
117
+ end
118
+
119
+ def test_remove_value
120
+ @statuses.insert(key, :row, {'body' => 'v'})
121
+ @statuses.remove(key, :row, 'body')
122
+ assert_nil @statuses.get(key, :row, 'body')
123
+ end
124
+
125
+ def test_remove_super_key
126
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v'}})
127
+ @statuses.remove(key, :relationships)
128
+ assert_equal({}, @statuses.get(key, :relationships))
129
+ end
130
+
131
+ def test_remove_super_sub_key
132
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v'}})
133
+ @statuses.remove(key, :relationships, 'user_timelines')
134
+ assert_equal({}, @statuses.get(key, :relationships, 'user_timelines'))
135
+ end
136
+
137
+ def test_remove_super_value
138
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v'}})
139
+ @statuses.remove(key, :relationships, 'user_timelines', '1')
140
+ assert_nil @statuses.get(key, :relationships, 'user_timelines', '1')
141
+ end
142
+
143
+ def test_insert_key
144
+ @statuses.insert(key, :row, {'body' => 'v', 'user' => 'v'})
145
+ assert_equal({'body' => 'v', 'user' => 'v'}, @statuses.get(key, :row))
146
+ end
147
+
148
+ def test_insert_super_key
149
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v', key => 'v'}})
150
+ assert_equal({'1' => 'v' , key => 'v'}, @statuses.get(key, :relationships, 'user_timelines'))
151
+ end
152
+
153
+ def test_get_column_values
154
+ @statuses.insert(key, :row, {'body' => 'v1', 'user' => 'v2'})
155
+ assert_equal(['v1' , 'v2'], @statuses.get_columns(key, :row, ['body', 'user']))
156
+ end
157
+
158
+ def test_get_column_values_super
159
+ @statuses.insert(key, :relationships, {
160
+ 'user_timelines' => {'1' => 'v1'},
161
+ 'mentions_timelines' => {'2' => 'v2'}})
162
+ assert_equal [{'1' => 'v1'}, {'2' => 'v2'}],
163
+ @statuses.get_columns(key, :relationships, ['user_timelines', 'mentions_timelines'])
164
+ end
165
+
166
+ # Not supported
167
+ # def test_get_columns_super_sub
168
+ # @statuses.insert(key, :relationships, {
169
+ # 'user_timelines' => {'1' => 'v1'},
170
+ # 'mentions_timelines' => {'2' => 'v2'}})
171
+ # assert_equal ['v1', 'v2'],
172
+ # @statuses.get_columns(key, :relationships, 'user_timelines', ['1', key])
173
+ # end
174
+
175
+ def test_count_keys
176
+ @statuses.insert(key + "1", :row, {'body' => 'v1'})
177
+ @statuses.insert(key + "2", :row, {'body' => 'v1'})
178
+ @statuses.insert(key + "3", :row, {'body' => 'v1'})
179
+ assert_equal 3, @statuses.count(:row)
180
+ end
181
+
182
+ def test_count_columns
183
+ @statuses.insert(key, :row, {'body' => 'v1', 'user' => 'v2'})
184
+ assert_equal 2, @statuses.count_columns(key, :row)
185
+ end
186
+
187
+ def test_count_super_columns
188
+ @statuses.insert(key, :relationships, {
189
+ 'user_timelines' => {'1' => 'v1'},
190
+ 'mentions_timelines' => {'2' => 'v2'}})
191
+ assert_equal 2, @statuses.count_columns(key, :relationships)
192
+ end
193
+
194
+ def test_count_super_sub_columns
195
+ @statuses.insert(key, :relationships, {'user_timelines' => {'1' => 'v1', key => 'v2'}})
196
+ assert_equal 2, @statuses.count_columns(key, :relationships, 'user_timelines')
197
+ end
198
+
199
+ private
200
+
201
+ def key
202
+ caller.first[/`(.*?)'/, 1]
203
+ end
204
+ end