cassandra 0.5.5 → 0.5.6
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/Rakefile +3 -4
- data/cassandra.gemspec +3 -6
- data/test/cassandra_test.rb +12 -6
- data/test/comparable_types_test.rb +2 -0
- metadata +7 -50
- metadata.gz.sig +0 -0
- data/CHANGELOG +0 -24
- data/LICENSE +0 -202
- data/Manifest +0 -28
- data/README +0 -65
- data/bin/cassandra_helper +0 -16
- data/conf/cassandra.in.sh +0 -51
- data/conf/log4j.properties +0 -38
- data/conf/storage-conf.xml +0 -226
- data/lib/cassandra.rb +0 -23
- data/lib/cassandra/array.rb +0 -8
- data/lib/cassandra/cassandra.rb +0 -291
- data/lib/cassandra/columns.rb +0 -87
- data/lib/cassandra/comparable.rb +0 -28
- data/lib/cassandra/constants.rb +0 -12
- data/lib/cassandra/debug.rb +0 -7
- data/lib/cassandra/long.rb +0 -55
- data/lib/cassandra/ordered_hash.rb +0 -135
- data/lib/cassandra/protocol.rb +0 -74
- data/lib/cassandra/safe_client.rb +0 -21
- data/lib/cassandra/time.rb +0 -11
- data/lib/cassandra/uuid.rb +0 -109
- data/vendor/gen-rb/cassandra.rb +0 -706
- data/vendor/gen-rb/cassandra_constants.rb +0 -10
- data/vendor/gen-rb/cassandra_types.rb +0 -225
data/bin/cassandra_helper
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'rake'
|
5
|
-
require 'cassandra'
|
6
|
-
|
7
|
-
gem_path = $LOAD_PATH.last.sub(/lib$/, "")
|
8
|
-
|
9
|
-
Dir.chdir(gem_path) do
|
10
|
-
if !ENV["CASSANDRA_INCLUDE"]
|
11
|
-
puts "Set the CASSANDRA_INCLUDE environment variable to use a non-default cassandra.in.sh and friends."
|
12
|
-
end
|
13
|
-
|
14
|
-
ARGV << "-T" if ARGV.empty?
|
15
|
-
exec("env FROM_BIN_CASSANDRA_HELPER=1 rake #{ARGV.join(' ')}")
|
16
|
-
end
|
data/conf/cassandra.in.sh
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
-
# See the License for the specific language governing permissions and
|
15
|
-
# limitations under the License.
|
16
|
-
|
17
|
-
# The directory where Cassandra's configs live (required)
|
18
|
-
CASSANDRA_CONF=$CASSANDRA_CONF
|
19
|
-
|
20
|
-
# This can be the path to a jar file, or a directory containing the
|
21
|
-
# compiled classes.
|
22
|
-
cassandra_bin=$CASSANDRA_HOME/build/classes
|
23
|
-
|
24
|
-
# The java classpath (required)
|
25
|
-
CLASSPATH=$CASSANDRA_CONF:$cassandra_bin
|
26
|
-
|
27
|
-
for jar in $CASSANDRA_HOME/lib/*.jar; do
|
28
|
-
CLASSPATH=$CLASSPATH:$jar
|
29
|
-
done
|
30
|
-
|
31
|
-
echo "CASSANDRA_HOME: $CASSANDRA_HOME"
|
32
|
-
echo "CASSANDRA_CONF: $CASSANDRA_CONF"
|
33
|
-
|
34
|
-
# Arguments to pass to the JVM
|
35
|
-
JVM_OPTS=" \
|
36
|
-
-ea \
|
37
|
-
-Xdebug \
|
38
|
-
-Xrunjdwp:transport=dt_socket,server=y,address=8888,suspend=n \
|
39
|
-
-Xms128M \
|
40
|
-
-Xmx1G \
|
41
|
-
-XX:SurvivorRatio=8 \
|
42
|
-
-XX:TargetSurvivorRatio=90 \
|
43
|
-
-XX:+AggressiveOpts \
|
44
|
-
-XX:+UseParNewGC \
|
45
|
-
-XX:+UseConcMarkSweepGC \
|
46
|
-
-XX:CMSInitiatingOccupancyFraction=1 \
|
47
|
-
-XX:+CMSParallelRemarkEnabled \
|
48
|
-
-XX:+HeapDumpOnOutOfMemoryError \
|
49
|
-
-Dcom.sun.management.jmxremote.port=8080 \
|
50
|
-
-Dcom.sun.management.jmxremote.ssl=false \
|
51
|
-
-Dcom.sun.management.jmxremote.authenticate=false"
|
data/conf/log4j.properties
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
-
# See the License for the specific language governing permissions and
|
15
|
-
# limitations under the License.
|
16
|
-
|
17
|
-
# for production, you should probably set the root to INFO
|
18
|
-
# and the pattern to %c instead of %l. (%l is slower.)
|
19
|
-
|
20
|
-
# output messages into a rolling log file as well as stdout
|
21
|
-
log4j.rootLogger=DEBUG,stdout,R
|
22
|
-
|
23
|
-
# stdout
|
24
|
-
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
25
|
-
log4j.appender.stdout.layout=org.apache.log4j.SimpleLayout
|
26
|
-
|
27
|
-
# rolling log file ("system.log
|
28
|
-
log4j.appender.R=org.apache.log4j.DailyRollingFileAppender
|
29
|
-
log4j.appender.R.DatePattern='.'yyyy-MM-dd-HH
|
30
|
-
log4j.appender.R.layout=org.apache.log4j.PatternLayout
|
31
|
-
log4j.appender.R.layout.ConversionPattern=%5p [%t] %d{ISO8601} %F (line %L) %m%n
|
32
|
-
# Edit the next line to point to your logs directory
|
33
|
-
log4j.appender.R.File=data/logs/system.log
|
34
|
-
|
35
|
-
# Application logging options
|
36
|
-
#log4j.logger.com.facebook=DEBUG
|
37
|
-
#log4j.logger.com.facebook.infrastructure.gms=DEBUG
|
38
|
-
#log4j.logger.com.facebook.infrastructure.db=DEBUG
|
data/conf/storage-conf.xml
DELETED
@@ -1,226 +0,0 @@
|
|
1
|
-
<!--
|
2
|
-
~ Licensed to the Apache Software Foundation (ASF) under one
|
3
|
-
~ or more contributor license agreements. See the NOTICE file
|
4
|
-
~ distributed with this work for additional information
|
5
|
-
~ regarding copyright ownership. The ASF licenses this file
|
6
|
-
~ to you under the Apache License, Version 2.0 (the
|
7
|
-
~ "License"); you may not use this file except in compliance
|
8
|
-
~ with the License. You may obtain a copy of the License at
|
9
|
-
~
|
10
|
-
~ http:/www.apache.org/licenses/LICENSE-2.0
|
11
|
-
~
|
12
|
-
~ Unless required by applicable law or agreed to in writing,
|
13
|
-
~ software distributed under the License is distributed on an
|
14
|
-
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
-
~ KIND, either express or implied. See the License for the
|
16
|
-
~ specific language governing permissions and limitations
|
17
|
-
~ under the License.
|
18
|
-
-->
|
19
|
-
<Storage>
|
20
|
-
<!--======================================================================-->
|
21
|
-
<!-- Basic Configuration -->
|
22
|
-
<!--======================================================================-->
|
23
|
-
<ClusterName>Test</ClusterName>
|
24
|
-
|
25
|
-
<!-- Tables and ColumnFamilies
|
26
|
-
Think of a table as a namespace, not a relational table.
|
27
|
-
(ColumnFamilies are closer in meaning to those.)
|
28
|
-
|
29
|
-
There is an implicit table named 'system' for Cassandra internals.
|
30
|
-
-->
|
31
|
-
<Keyspaces>
|
32
|
-
<Keyspace Name="Twitter">
|
33
|
-
<KeysCachedFraction>0.01</KeysCachedFraction>
|
34
|
-
<ColumnFamily CompareWith="UTF8Type" Name="Users" />
|
35
|
-
<ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
|
36
|
-
<ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
|
37
|
-
<ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
|
38
|
-
<ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
|
39
|
-
<ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
|
40
|
-
<ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
|
41
|
-
</Keyspace>
|
42
|
-
|
43
|
-
<Keyspace Name="Multiblog">
|
44
|
-
<KeysCachedFraction>0.01</KeysCachedFraction>
|
45
|
-
<ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
|
46
|
-
<ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
|
47
|
-
</Keyspace>
|
48
|
-
|
49
|
-
<Keyspace Name="MultiblogLong">
|
50
|
-
<KeysCachedFraction>0.01</KeysCachedFraction>
|
51
|
-
<ColumnFamily CompareWith="LongType" Name="Blogs"/>
|
52
|
-
<ColumnFamily CompareWith="LongType" Name="Comments"/>
|
53
|
-
</Keyspace>
|
54
|
-
</Keyspaces>
|
55
|
-
|
56
|
-
<!-- Partitioner: any IPartitioner may be used, including your own
|
57
|
-
as long as it is on the classpath. Out of the box,
|
58
|
-
Cassandra provides
|
59
|
-
org.apache.cassandra.dht.RandomPartitioner and
|
60
|
-
org.apache.cassandra.dht.OrderPreservingPartitioner.
|
61
|
-
Range queries require using OrderPreservingPartitioner or a subclass.
|
62
|
-
|
63
|
-
Achtung! Changing this parameter requires wiping your data directories,
|
64
|
-
since the partitioner can modify the sstable on-disk format.
|
65
|
-
-->
|
66
|
-
<Partitioner>org.apache.cassandra.dht.OrderPreservingPartitioner</Partitioner>
|
67
|
-
|
68
|
-
<!-- If you are using the OrderPreservingPartitioner and you know your key
|
69
|
-
distribution, you can specify the token for this node to use.
|
70
|
-
(Keys are sent to the node with the "closest" token, so distributing
|
71
|
-
your tokens equally along the key distribution space will spread
|
72
|
-
keys evenly across your cluster.) This setting is only checked the
|
73
|
-
first time a node is started.
|
74
|
-
|
75
|
-
This can also be useful with RandomPartitioner to force equal
|
76
|
-
spacing of tokens around the hash space, especially for
|
77
|
-
clusters with a small number of nodes. -->
|
78
|
-
<InitialToken></InitialToken>
|
79
|
-
|
80
|
-
|
81
|
-
<!-- EndPointSnitch: Setting this to the class that implements IEndPointSnitch
|
82
|
-
which will see if two endpoints are in the same data center or on the same rack.
|
83
|
-
Out of the box, Cassandra provides
|
84
|
-
org.apache.cassandra.locator.EndPointSnitch
|
85
|
-
-->
|
86
|
-
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
87
|
-
|
88
|
-
<!-- Strategy: Setting this to the class that implements IReplicaPlacementStrategy
|
89
|
-
will change the way the node picker works.
|
90
|
-
Out of the box, Cassandra provides
|
91
|
-
org.apache.cassandra.locator.RackUnawareStrategy
|
92
|
-
org.apache.cassandra.locator.RackAwareStrategy
|
93
|
-
(place one replica in a different datacenter, and the
|
94
|
-
others on different racks in the same one.)
|
95
|
-
-->
|
96
|
-
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
97
|
-
|
98
|
-
<!-- Number of replicas of the data-->
|
99
|
-
<ReplicationFactor>1</ReplicationFactor>
|
100
|
-
|
101
|
-
<!-- Directories: Specify where Cassandra should store different data on disk
|
102
|
-
Keep the data disks and the CommitLog disks separate for best performance
|
103
|
-
-->
|
104
|
-
<CommitLogDirectory>data/commitlog</CommitLogDirectory>
|
105
|
-
<DataFileDirectories>
|
106
|
-
<DataFileDirectory>data/data</DataFileDirectory>
|
107
|
-
</DataFileDirectories>
|
108
|
-
<CalloutLocation>data/callouts</CalloutLocation>
|
109
|
-
<BootstrapFileDirectory>data/bootstrap</BootstrapFileDirectory>
|
110
|
-
<StagingFileDirectory>data/staging</StagingFileDirectory>
|
111
|
-
|
112
|
-
<!-- Addresses of hosts that are deemed contact points. Cassandra nodes use
|
113
|
-
this list of hosts to find each other and learn the topology of the ring.
|
114
|
-
You must change this if you are running multiple nodes!
|
115
|
-
-->
|
116
|
-
<Seeds>
|
117
|
-
<Seed>127.0.0.1</Seed>
|
118
|
-
</Seeds>
|
119
|
-
|
120
|
-
|
121
|
-
<!-- Miscellaneous -->
|
122
|
-
|
123
|
-
<!-- time to wait for a reply from other nodes before failing the command -->
|
124
|
-
<RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
|
125
|
-
<!-- size to allow commitlog to grow to before creating a new segment -->
|
126
|
-
<CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
|
127
|
-
|
128
|
-
|
129
|
-
<!-- Local hosts and ports -->
|
130
|
-
|
131
|
-
<!-- Address to bind to and tell other nodes to connect to.
|
132
|
-
You _must_ change this if you want multiple nodes to be able
|
133
|
-
to communicate!
|
134
|
-
|
135
|
-
Leaving it blank leaves it up to InetAddress.getLocalHost().
|
136
|
-
This will always do the Right Thing *if* the node is properly
|
137
|
-
configured (hostname, name resolution, etc), and the Right
|
138
|
-
Thing is to use the address associated with the hostname (it
|
139
|
-
might not be). -->
|
140
|
-
<ListenAddress>localhost</ListenAddress>
|
141
|
-
<!-- TCP port, for commands and data -->
|
142
|
-
<StoragePort>7000</StoragePort>
|
143
|
-
<!-- UDP port, for membership communications (gossip) -->
|
144
|
-
<ControlPort>7001</ControlPort>
|
145
|
-
|
146
|
-
<!-- The address to bind the Thrift RPC service to. Unlike
|
147
|
-
ListenAddress above, you *can* specify 0.0.0.0 here if you want
|
148
|
-
Thrift to listen on all interfaces.
|
149
|
-
|
150
|
-
Leaving this blank has the same effect it does for ListenAddress,
|
151
|
-
(i.e. it will be based on the configured hostname of the node).
|
152
|
-
-->
|
153
|
-
<ThriftAddress>localhost</ThriftAddress>
|
154
|
-
<!-- Thrift RPC port (the port clients connect to). -->
|
155
|
-
<ThriftPort>9160</ThriftPort>
|
156
|
-
|
157
|
-
|
158
|
-
<!--======================================================================-->
|
159
|
-
<!-- Memory, Disk, and Performance -->
|
160
|
-
<!--======================================================================-->
|
161
|
-
|
162
|
-
<!-- Add column indexes to a row after its contents reach this size -->
|
163
|
-
<ColumnIndexSizeInKB>256</ColumnIndexSizeInKB>
|
164
|
-
|
165
|
-
<!--
|
166
|
-
The maximum amount of data to store in memory before flushing to
|
167
|
-
disk. Note: There is one memtable per column family, and this threshold
|
168
|
-
is based solely on the amount of data stored, not actual heap memory
|
169
|
-
usage (there is some overhead in indexing the columns).
|
170
|
-
-->
|
171
|
-
<MemtableSizeInMB>32</MemtableSizeInMB>
|
172
|
-
|
173
|
-
<!--
|
174
|
-
The maximum number of columns in millions to store in memory
|
175
|
-
before flushing to disk. This is also a per-memtable setting.
|
176
|
-
Use with MemtableSizeInMB to tune memory usage.
|
177
|
-
-->
|
178
|
-
<MemtableObjectCountInMillions>0.01</MemtableObjectCountInMillions>
|
179
|
-
|
180
|
-
<!-- Unlike most systems, in Cassandra writes are faster than
|
181
|
-
reads, so you can afford more of those in parallel.
|
182
|
-
A good rule of thumb is 2 concurrent reads per processor core.
|
183
|
-
You especially want more concurrentwrites if you are using
|
184
|
-
CommitLogSync + CommitLogSyncDelay. -->
|
185
|
-
<ConcurrentReads>8</ConcurrentReads>
|
186
|
-
<ConcurrentWrites>32</ConcurrentWrites>
|
187
|
-
|
188
|
-
<!-- CommitLogSync may be either "periodic" or "batch."
|
189
|
-
When in batch mode, Cassandra won't ack writes until the commit log
|
190
|
-
has been fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
|
191
|
-
milliseconds for other writes, before performing the sync.
|
192
|
-
|
193
|
-
This is less necessary in Cassandra
|
194
|
-
than in traditional databases since replication reduces the
|
195
|
-
odds of losing data from a failure after writing the log
|
196
|
-
entry but before it actually reaches the disk. So the other
|
197
|
-
option is "timed," where wirtes may be acked immediately
|
198
|
-
and the CommitLog is simply synced every CommitLogSyncPeriodInMS
|
199
|
-
milliseconds.
|
200
|
-
-->
|
201
|
-
<CommitLogSync>periodic</CommitLogSync>
|
202
|
-
<!-- Interval at which to perform syncs of the CommitLog in periodic
|
203
|
-
mode. Usually the default of 1000ms is fine; increase it
|
204
|
-
only if the CommitLog PendingTasks backlog in jmx shows that
|
205
|
-
you are frequently scheduling a second sync while the first
|
206
|
-
has not yet been processed.
|
207
|
-
-->
|
208
|
-
<CommitLogSyncPeriodInMS>1000</CommitLogSyncPeriodInMS>
|
209
|
-
<!-- Delay (in microseconds) during which additional commit log
|
210
|
-
entries may be written before fsync in batch mode. This will increase
|
211
|
-
latency slightly, but can vastly improve throughput where
|
212
|
-
there are many writers. Set to zero to disable
|
213
|
-
(each entry will be synced individually).
|
214
|
-
Reasonable values range from a minimal 0.1 to 10 or even more
|
215
|
-
if throughput matters more than latency.
|
216
|
-
-->
|
217
|
-
<!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
|
218
|
-
|
219
|
-
<!-- Time to wait before garbage-collection deletion markers.
|
220
|
-
Set this to a large enough value that you are confident
|
221
|
-
that the deletion marker will be propagated to all replicas
|
222
|
-
by the time this many seconds has elapsed, even in the
|
223
|
-
face of hardware failures. The default value is ten days.
|
224
|
-
-->
|
225
|
-
<GCGraceSeconds>864000</GCGraceSeconds>
|
226
|
-
</Storage>
|
data/lib/cassandra.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'zlib'
|
3
|
-
require 'rubygems'
|
4
|
-
require 'thrift'
|
5
|
-
|
6
|
-
HERE = File.expand_path(File.dirname(__FILE__))
|
7
|
-
|
8
|
-
$LOAD_PATH << "#{HERE}/../vendor/gen-rb"
|
9
|
-
require "#{HERE}/../vendor/gen-rb/cassandra"
|
10
|
-
|
11
|
-
$LOAD_PATH << "#{HERE}"
|
12
|
-
require 'cassandra/array'
|
13
|
-
require 'cassandra/time'
|
14
|
-
require 'cassandra/comparable'
|
15
|
-
require 'cassandra/uuid'
|
16
|
-
require 'cassandra/long'
|
17
|
-
require 'cassandra/safe_client'
|
18
|
-
require 'cassandra/ordered_hash'
|
19
|
-
require 'cassandra/columns'
|
20
|
-
require 'cassandra/protocol'
|
21
|
-
require 'cassandra/cassandra'
|
22
|
-
require 'cassandra/constants'
|
23
|
-
require 'cassandra/debug' if ENV['DEBUG']
|
data/lib/cassandra/array.rb
DELETED
data/lib/cassandra/cassandra.rb
DELETED
@@ -1,291 +0,0 @@
|
|
1
|
-
|
2
|
-
=begin rdoc
|
3
|
-
Create a new Cassandra client instance. Accepts a keyspace name, and optional host and port.
|
4
|
-
|
5
|
-
client = Cassandra.new('twitter', '127.0.0.1', 9160)
|
6
|
-
|
7
|
-
You can then make calls to the server via the <tt>client</tt> instance.
|
8
|
-
|
9
|
-
client.insert(:UserRelationships, "5", {"user_timeline" => {UUID.new => "1"}})
|
10
|
-
client.get(:UserRelationships, "5", "user_timeline")
|
11
|
-
|
12
|
-
For read methods, valid option parameters are:
|
13
|
-
|
14
|
-
<tt>:count</tt>:: How many results to return. Defaults to 100.
|
15
|
-
<tt>:start</tt>:: Column name token at which to start iterating, inclusive. Defaults to nil, which means the first column in the collation order.
|
16
|
-
<tt>:finish</tt>:: Column name token at which to stop iterating, inclusive. Defaults to nil, which means no boundary.
|
17
|
-
<tt>:reversed</tt>:: Swap the direction of the collation order.
|
18
|
-
<tt>:consistency</tt>:: The consistency level of the request. Defaults to <tt>Cassandra::Consistency::ONE</tt> (one node must respond). Other valid options are <tt>Cassandra::Consistency::ZERO</tt>, <tt>Cassandra::Consistency::QUORUM</tt>, and <tt>Cassandra::Consistency::ALL</tt>.
|
19
|
-
|
20
|
-
Note that some read options have no relevance in some contexts.
|
21
|
-
|
22
|
-
For write methods, valid option parameters are:
|
23
|
-
|
24
|
-
<tt>:timestamp </tt>:: The transaction timestamp. Defaults to the current time in milliseconds. This is used for conflict resolution by the server; you normally never need to change it.
|
25
|
-
<tt>:consistency</tt>:: See above.
|
26
|
-
|
27
|
-
=end rdoc
|
28
|
-
|
29
|
-
class Cassandra
|
30
|
-
include Columns
|
31
|
-
include Protocol
|
32
|
-
|
33
|
-
class AccessError < StandardError #:nodoc:
|
34
|
-
end
|
35
|
-
|
36
|
-
module Consistency
|
37
|
-
include CassandraThrift::ConsistencyLevel
|
38
|
-
end
|
39
|
-
|
40
|
-
MAX_INT = 2**31 - 1
|
41
|
-
|
42
|
-
WRITE_DEFAULTS = {
|
43
|
-
:count => MAX_INT,
|
44
|
-
:timestamp => nil,
|
45
|
-
:consistency => Consistency::ONE
|
46
|
-
}.freeze
|
47
|
-
|
48
|
-
READ_DEFAULTS = {
|
49
|
-
:count => 100,
|
50
|
-
:start => nil,
|
51
|
-
:finish => nil,
|
52
|
-
:reversed => false,
|
53
|
-
:consistency => Consistency::ONE
|
54
|
-
}.freeze
|
55
|
-
|
56
|
-
attr_reader :keyspace, :host, :port, :serializer, :transport, :client, :schema
|
57
|
-
|
58
|
-
# Instantiate a new Cassandra and open the connection.
|
59
|
-
def initialize(keyspace, host = '127.0.0.1', port = 9160)
|
60
|
-
@is_super = {}
|
61
|
-
@column_name_class = {}
|
62
|
-
@sub_column_name_class = {}
|
63
|
-
|
64
|
-
@keyspace = keyspace
|
65
|
-
@host = host
|
66
|
-
@port = port
|
67
|
-
|
68
|
-
@transport = Thrift::BufferedTransport.new(Thrift::Socket.new(@host, @port))
|
69
|
-
@transport.open
|
70
|
-
@client = CassandraThrift::Cassandra::SafeClient.new(
|
71
|
-
CassandraThrift::Cassandra::Client.new(Thrift::BinaryProtocol.new(@transport)),
|
72
|
-
@transport)
|
73
|
-
|
74
|
-
keyspaces = @client.get_string_list_property("keyspaces")
|
75
|
-
unless keyspaces.include?(@keyspace)
|
76
|
-
raise AccessError, "Keyspace #{@keyspace.inspect} not found. Available: #{keyspaces.inspect}"
|
77
|
-
end
|
78
|
-
|
79
|
-
@schema = @client.describe_keyspace(@keyspace)
|
80
|
-
end
|
81
|
-
|
82
|
-
def inspect
|
83
|
-
"#<Cassandra:#{object_id}, @keyspace=#{keyspace.inspect}, @schema={#{
|
84
|
-
schema.map {|name, hash| ":#{name} => #{hash['type'].inspect}"}.join(', ')
|
85
|
-
}}, @host=#{host.inspect}, @port=#{port}>"
|
86
|
-
end
|
87
|
-
|
88
|
-
### Write
|
89
|
-
|
90
|
-
# Insert a row for a key. Pass a flat hash for a regular column family, and
|
91
|
-
# a nested hash for a super column family. Supports the <tt>:consistency</tt>
|
92
|
-
# and <tt>:timestamp</tt> options.
|
93
|
-
def insert(column_family, key, hash, options = {})
|
94
|
-
column_family, _, _, options =
|
95
|
-
validate_params(column_family, key, [options], WRITE_DEFAULTS)
|
96
|
-
|
97
|
-
args = [column_family, hash, options[:timestamp] || Time.stamp]
|
98
|
-
columns = is_super(column_family) ? hash_to_super_columns(*args) : hash_to_columns(*args)
|
99
|
-
mutation = CassandraThrift::BatchMutation.new(
|
100
|
-
:key => key,
|
101
|
-
:cfmap => {column_family => columns},
|
102
|
-
:column_paths => [])
|
103
|
-
|
104
|
-
@batch ? @batch << mutation : _mutate([mutation], options[:consistency])
|
105
|
-
end
|
106
|
-
|
107
|
-
## Delete
|
108
|
-
|
109
|
-
# _mutate the element at the column_family:key:[column]:[sub_column]
|
110
|
-
# path you request. Supports the <tt>:consistency</tt> and <tt>:timestamp</tt>
|
111
|
-
# options.
|
112
|
-
def remove(column_family, key, *columns_and_options)
|
113
|
-
column_family, column, sub_column, options =
|
114
|
-
validate_params(column_family, key, columns_and_options, WRITE_DEFAULTS)
|
115
|
-
|
116
|
-
args = {:column_family => column_family, :timestamp => options[:timestamp] || Time.stamp}
|
117
|
-
columns = is_super(column_family) ? {:super_column => column, :column => sub_column} : {:column => column}
|
118
|
-
mutation = CassandraThrift::BatchMutation.new(
|
119
|
-
:key => key,
|
120
|
-
:cfmap => {},
|
121
|
-
:column_paths => [CassandraThrift::ColumnPath.new(args.merge(columns))])
|
122
|
-
|
123
|
-
@batch ? @batch << mutation : _mutate([mutation], options[:consistency])
|
124
|
-
end
|
125
|
-
|
126
|
-
# Remove all rows in the column family you request. Supports options
|
127
|
-
# <tt>:consistency</tt> and <tt>:timestamp</tt>.
|
128
|
-
# FIXME May not currently delete all records without multiple calls. Waiting
|
129
|
-
# for ranged remove support in Cassandra.
|
130
|
-
def clear_column_family!(column_family, options = {})
|
131
|
-
get_range(column_family).each { |key| remove(column_family, key, options) }
|
132
|
-
end
|
133
|
-
|
134
|
-
# Remove all rows in the keyspace. Supports options <tt>:consistency</tt> and
|
135
|
-
# <tt>:timestamp</tt>.
|
136
|
-
# FIXME May not currently delete all records without multiple calls. Waiting
|
137
|
-
# for ranged remove support in Cassandra.
|
138
|
-
def clear_keyspace!(options = {})
|
139
|
-
@schema.keys.each { |column_family| clear_column_family!(column_family, options) }
|
140
|
-
end
|
141
|
-
|
142
|
-
### Read
|
143
|
-
|
144
|
-
# Count the elements at the column_family:key:[super_column] path you
|
145
|
-
# request. Supports options <tt>:count</tt>, <tt>:start</tt>, <tt>:finish</tt>,
|
146
|
-
# <tt>:reversed</tt>, and <tt>:consistency</tt>.
|
147
|
-
def count_columns(column_family, key, *columns_and_options)
|
148
|
-
column_family, super_column, _, options =
|
149
|
-
validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
|
150
|
-
_count_columns(column_family, key, super_column, options[:consistency])
|
151
|
-
end
|
152
|
-
|
153
|
-
# Multi-key version of Cassandra#count_columns. Supports options <tt>:count</tt>,
|
154
|
-
# <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
|
155
|
-
def multi_count_columns(column_family, keys, *options)
|
156
|
-
OrderedHash[*keys.map { |key| [key, count_columns(column_family, key, *options)] }._flatten_once]
|
157
|
-
end
|
158
|
-
|
159
|
-
# Return a list of single values for the elements at the
|
160
|
-
# column_family:key:column[s]:[sub_columns] path you request. Supports the
|
161
|
-
# <tt>:consistency</tt> option.
|
162
|
-
def get_columns(column_family, key, *columns_and_options)
|
163
|
-
column_family, columns, sub_columns, options =
|
164
|
-
validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
|
165
|
-
_get_columns(column_family, key, columns, sub_columns, options[:consistency])
|
166
|
-
end
|
167
|
-
|
168
|
-
# Multi-key version of Cassandra#get_columns. Supports the <tt>:consistency</tt>
|
169
|
-
# option.
|
170
|
-
def multi_get_columns(column_family, keys, *options)
|
171
|
-
OrderedHash[*keys.map { |key| [key, get_columns(column_family, key, *options)] }._flatten_once]
|
172
|
-
end
|
173
|
-
|
174
|
-
# Return a hash (actually, a Cassandra::OrderedHash) or a single value
|
175
|
-
# representing the element at the column_family:key:[column]:[sub_column]
|
176
|
-
# path you request. Supports options <tt>:count</tt>, <tt>:start</tt>,
|
177
|
-
# <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
|
178
|
-
def get(column_family, key, *columns_and_options)
|
179
|
-
column_family, column, sub_column, options =
|
180
|
-
validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
|
181
|
-
_get(column_family, key, column, sub_column, options[:count], options[:start], options[:finish], options[:reversed], options[:consistency])
|
182
|
-
rescue CassandraThrift::NotFoundException
|
183
|
-
is_super(column_family) && !sub_column ? OrderedHash.new : nil
|
184
|
-
end
|
185
|
-
|
186
|
-
# Multi-key version of Cassandra#get. Supports options <tt>:count</tt>,
|
187
|
-
# <tt>:start</tt>, <tt>:finish</tt>, <tt>:reversed</tt>, and <tt>:consistency</tt>.
|
188
|
-
def multi_get(column_family, keys, *options)
|
189
|
-
OrderedHash[*keys.map { |key| [key, get(column_family, key, *options)] }._flatten_once]
|
190
|
-
end
|
191
|
-
|
192
|
-
# Return true if the column_family:key:[column]:[sub_column] path you
|
193
|
-
# request exists. Supports the <tt>:consistency</tt> option.
|
194
|
-
def exists?(column_family, key, *columns_and_options)
|
195
|
-
column_family, column, sub_column, options =
|
196
|
-
validate_params(column_family, key, columns_and_options, READ_DEFAULTS)
|
197
|
-
_get(column_family, key, column, sub_column, 1, nil, nil, nil, options[:consistency])
|
198
|
-
true
|
199
|
-
rescue CassandraThrift::NotFoundException
|
200
|
-
end
|
201
|
-
|
202
|
-
# Return a list of keys in the column_family you request. Requires the
|
203
|
-
# table to be partitioned with OrderPreservingHash. Supports the
|
204
|
-
# <tt>:count</tt>, <tt>:start</tt>, <tt>:finish</tt>, and <tt>:consistency</tt>
|
205
|
-
# options.
|
206
|
-
def get_range(column_family, options = {})
|
207
|
-
column_family, _, _, options =
|
208
|
-
validate_params(column_family, "", [options], READ_DEFAULTS)
|
209
|
-
_get_range(column_family, options[:start], options[:finish], options[:count], options[:consistency])
|
210
|
-
end
|
211
|
-
|
212
|
-
# Count all rows in the column_family you request. Requires the table
|
213
|
-
# to be partitioned with OrderPreservingHash. Supports the <tt>:start</tt>,
|
214
|
-
# <tt>:finish</tt>, and <tt>:consistency</tt> options.
|
215
|
-
# FIXME will count only MAX_INT records
|
216
|
-
def count_range(column_family, options = {})
|
217
|
-
get_range(column_family, options.merge(:count => MAX_INT)).size
|
218
|
-
end
|
219
|
-
|
220
|
-
# Open a batch operation and yield. Inserts and deletes will be queued until
|
221
|
-
# the block closes, and then sent atomically to the server. Supports the
|
222
|
-
# <tt>:consistency</tt> option, which overrides the consistency set in
|
223
|
-
# the individual commands.
|
224
|
-
def batch(options = {})
|
225
|
-
_, _, _, options =
|
226
|
-
validate_params(@schema.keys.first, "", [options], WRITE_DEFAULTS)
|
227
|
-
|
228
|
-
@batch = []
|
229
|
-
yield
|
230
|
-
compact_mutations!
|
231
|
-
_mutate(@batch, options[:consistency])
|
232
|
-
@batch = nil
|
233
|
-
end
|
234
|
-
|
235
|
-
private
|
236
|
-
|
237
|
-
# Extract and validate options.
|
238
|
-
# FIXME Should be done as a decorator
|
239
|
-
def validate_params(column_family, key, args, options)
|
240
|
-
if !key.is_a?(String)
|
241
|
-
raise ArgumentError, "Key #{key.inspect} must be a String for #{calling_method}"
|
242
|
-
elsif args.last.is_a?(Hash)
|
243
|
-
extras = args.last.keys - options.keys
|
244
|
-
raise ArgumentError, "Invalid options #{extras.inspect[1..-2]} for #{calling_method}" if extras.any?
|
245
|
-
options = options.merge(args.pop)
|
246
|
-
end
|
247
|
-
|
248
|
-
column_family, column, sub_column = column_family.to_s, args[0], args[1]
|
249
|
-
assert_column_name_classes(column_family, column, sub_column)
|
250
|
-
[column_family, map_to_s(column), map_to_s(sub_column), options]
|
251
|
-
end
|
252
|
-
|
253
|
-
def calling_method
|
254
|
-
"#{self.class}##{caller[0].split('`').last[0..-3]}"
|
255
|
-
end
|
256
|
-
|
257
|
-
# Convert stuff to strings.
|
258
|
-
def map_to_s(el)
|
259
|
-
case el
|
260
|
-
when NilClass # nil
|
261
|
-
when Array then el.map { |i| map_to_s(i) }
|
262
|
-
when Comparable, String, Symbol then el.to_s
|
263
|
-
else
|
264
|
-
raise Comparable::TypeError, "Can't map #{el.inspect}"
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
# Roll up queued mutations, to improve atomicity.
|
269
|
-
def compact_mutations!
|
270
|
-
mutations = {}
|
271
|
-
|
272
|
-
# Nested hash merge
|
273
|
-
@batch.each do |m|
|
274
|
-
if mutation = mutations[m.key]
|
275
|
-
# Inserts
|
276
|
-
if columns = mutation.cfmap[m.cfmap.keys.first]
|
277
|
-
columns.concat(m.cfmap.values.first)
|
278
|
-
else
|
279
|
-
mutation.cfmap.merge!(m.cfmap)
|
280
|
-
end
|
281
|
-
# Deletes
|
282
|
-
mutation.column_paths.concat(m.column_paths)
|
283
|
-
else
|
284
|
-
mutations[m.key] = m
|
285
|
-
end
|
286
|
-
end
|
287
|
-
|
288
|
-
# FIXME Return atomic thrift thingy
|
289
|
-
@batch = mutations.values
|
290
|
-
end
|
291
|
-
end
|