dm-cassandra-adapter 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/Rakefile +32 -0
- data/VERSION +1 -0
- data/conf/storage-conf.xml +354 -0
- data/lib/dm-cassandra-adapter.rb +9 -0
- data/lib/dm-cassandra-adapter/adapter.rb +169 -0
- data/lib/dm-cassandra-adapter/spec/setup.rb +15 -0
- data/spec/fixtures/book.rb +8 -0
- data/spec/fixtures/difficult_book.rb +10 -0
- data/spec/rcov.opts +6 -0
- data/spec/semipublic/cassandra_adapter_spec.rb +116 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +30 -0
- data/tasks/spec.rake +41 -0
- metadata +145 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Michael Rykov
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
gem 'jeweler', '~> 1.4'
|
6
|
+
require 'jeweler'
|
7
|
+
|
8
|
+
Jeweler::Tasks.new do |gem|
|
9
|
+
gem.name = 'dm-cassandra-adapter'
|
10
|
+
gem.summary = 'Cassandra Adapter for DataMapper'
|
11
|
+
gem.description = gem.summary
|
12
|
+
gem.email = 'mrykov [a] gmail [d] com'
|
13
|
+
gem.homepage = 'http://github.com/rykov/%s' % gem.name
|
14
|
+
gem.authors = [ 'Michael Rykov' ]
|
15
|
+
gem.has_rdoc = 'yard'
|
16
|
+
|
17
|
+
gem.rubyforge_project = 'datamapper'
|
18
|
+
|
19
|
+
gem.add_dependency 'dm-core', '~> 1.0.2'
|
20
|
+
gem.add_dependency 'dm-serializer', '~> 1.0.2'
|
21
|
+
|
22
|
+
gem.add_development_dependency 'rspec', '~> 1.3'
|
23
|
+
gem.add_development_dependency 'dm-validations', '~> 1.0.2'
|
24
|
+
gem.add_development_dependency 'fakeweb', '~> 1.3'
|
25
|
+
end
|
26
|
+
|
27
|
+
Jeweler::GemcutterTasks.new
|
28
|
+
|
29
|
+
FileList['tasks/**/*.rake'].each { |task| import task }
|
30
|
+
rescue LoadError
|
31
|
+
puts 'Jeweler (or a dependency) not available. Install it with: gem install jeweler'
|
32
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,354 @@
|
|
1
|
+
<!--
|
2
|
+
~ Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
~ or more contributor license agreements. See the NOTICE file
|
4
|
+
~ distributed with this work for additional information
|
5
|
+
~ regarding copyright ownership. The ASF licenses this file
|
6
|
+
~ to you under the Apache License, Version 2.0 (the
|
7
|
+
~ "License"); you may not use this file except in compliance
|
8
|
+
~ with the License. You may obtain a copy of the License at
|
9
|
+
~
|
10
|
+
~ http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
~
|
12
|
+
~ Unless required by applicable law or agreed to in writing,
|
13
|
+
~ software distributed under the License is distributed on an
|
14
|
+
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
~ KIND, either express or implied. See the License for the
|
16
|
+
~ specific language governing permissions and limitations
|
17
|
+
~ under the License.
|
18
|
+
-->
|
19
|
+
<Storage>
|
20
|
+
<!--======================================================================-->
|
21
|
+
<!-- Basic Configuration -->
|
22
|
+
<!--======================================================================-->
|
23
|
+
|
24
|
+
<!--
|
25
|
+
~ The name of this cluster. This is mainly used to prevent machines in
|
26
|
+
~ one logical cluster from joining another.
|
27
|
+
-->
|
28
|
+
<ClusterName>Test</ClusterName>
|
29
|
+
|
30
|
+
<!--
|
31
|
+
~ Turn on to make new [non-seed] nodes automatically migrate the right data
|
32
|
+
~ to themselves. (If no InitialToken is specified, they will pick one
|
33
|
+
~ such that they will get half the range of the most-loaded node.)
|
34
|
+
~ If a node starts up without bootstrapping, it will mark itself bootstrapped
|
35
|
+
~ so that you can't subsequently accidently bootstrap a node with
|
36
|
+
~ data on it. (You can reset this by wiping your data and commitlog
|
37
|
+
~ directories.)
|
38
|
+
~
|
39
|
+
~ Off by default so that new clusters and upgraders from 0.4 don't
|
40
|
+
~ bootstrap immediately. You should turn this on when you start adding
|
41
|
+
~ new nodes to a cluster that already has data on it. (If you are upgrading
|
42
|
+
~ from 0.4, start your cluster with it off once before changing it to true.
|
43
|
+
~ Otherwise, no data will be lost but you will incur a lot of unnecessary
|
44
|
+
~ I/O before your cluster starts up.)
|
45
|
+
-->
|
46
|
+
<AutoBootstrap>false</AutoBootstrap>
|
47
|
+
|
48
|
+
<!--
|
49
|
+
~ Keyspaces and ColumnFamilies:
|
50
|
+
~ A ColumnFamily is the Cassandra concept closest to a relational
|
51
|
+
~ table. Keyspaces are separate groups of ColumnFamilies. Except in
|
52
|
+
~ very unusual circumstances you will have one Keyspace per application.
|
53
|
+
|
54
|
+
~ There is an implicit keyspace named 'system' for Cassandra internals.
|
55
|
+
-->
|
56
|
+
<Keyspaces>
|
57
|
+
<Keyspace Name="AdapterTest">
|
58
|
+
<KeysCachedFraction>0.01</KeysCachedFraction>
|
59
|
+
|
60
|
+
<!-- 'Books' is for my our own tests -->
|
61
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Books" />
|
62
|
+
<!-- 'Heffalumps' is for shared DataMapper adapter tests -->
|
63
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Heffalumps" />
|
64
|
+
|
65
|
+
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
66
|
+
<ReplicationFactor>1</ReplicationFactor>
|
67
|
+
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
68
|
+
</Keyspace>
|
69
|
+
|
70
|
+
<Keyspace Name="Twitter">
|
71
|
+
<KeysCachedFraction>0.01</KeysCachedFraction>
|
72
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Users" />
|
73
|
+
<ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
|
74
|
+
<ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
|
75
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
|
76
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
|
77
|
+
<ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
|
78
|
+
<ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
|
79
|
+
<ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
|
80
|
+
<ColumnFamily CompareWith="BytesType" ColumnType="Standard" Name="TimelinishThings" />
|
81
|
+
|
82
|
+
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
83
|
+
<ReplicationFactor>1</ReplicationFactor>
|
84
|
+
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
85
|
+
</Keyspace>
|
86
|
+
|
87
|
+
<Keyspace Name="Multiblog">
|
88
|
+
<KeysCachedFraction>0.01</KeysCachedFraction>
|
89
|
+
<ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
|
90
|
+
<ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
|
91
|
+
|
92
|
+
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
93
|
+
<ReplicationFactor>1</ReplicationFactor>
|
94
|
+
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
95
|
+
</Keyspace>
|
96
|
+
|
97
|
+
<Keyspace Name="MultiblogLong">
|
98
|
+
<KeysCachedFraction>0.01</KeysCachedFraction>
|
99
|
+
<ColumnFamily CompareWith="LongType" Name="Blogs"/>
|
100
|
+
<ColumnFamily CompareWith="LongType" Name="Comments"/>
|
101
|
+
|
102
|
+
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
103
|
+
<ReplicationFactor>1</ReplicationFactor>
|
104
|
+
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
105
|
+
</Keyspace>
|
106
|
+
|
107
|
+
<Keyspace Name="CassandraObject">
|
108
|
+
<KeysCachedFraction>0.01</KeysCachedFraction>
|
109
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Customers" />
|
110
|
+
<ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
|
111
|
+
<ColumnFamily CompareWith="TimeUUIDType" Name="CustomersByLastName" />
|
112
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
|
113
|
+
<ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
|
114
|
+
<ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
|
115
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Payments" />
|
116
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
|
117
|
+
<!-- <ColumnFamily CompareWith="UTF8Type" Name="FirstNames" /> -->
|
118
|
+
|
119
|
+
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
120
|
+
<ReplicationFactor>1</ReplicationFactor>
|
121
|
+
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
122
|
+
</Keyspace>
|
123
|
+
</Keyspaces>
|
124
|
+
|
125
|
+
<!--
|
126
|
+
~ Authenticator: any IAuthenticator may be used, including your own as long
|
127
|
+
~ as it is on the classpath. Out of the box, Cassandra provides
|
128
|
+
~ org.apache.cassandra.auth.AllowAllAuthenticator and,
|
129
|
+
~ org.apache.cassandra.auth.SimpleAuthenticator
|
130
|
+
~ (SimpleAuthenticator uses access.properties and passwd.properties by
|
131
|
+
~ default).
|
132
|
+
~
|
133
|
+
~ If you don't specify an authenticator, AllowAllAuthenticator is used.
|
134
|
+
-->
|
135
|
+
<Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
|
136
|
+
|
137
|
+
<!--
|
138
|
+
~ Partitioner: any IPartitioner may be used, including your own as long
|
139
|
+
~ as it is on the classpath. Out of the box, Cassandra provides
|
140
|
+
~ org.apache.cassandra.dht.RandomPartitioner,
|
141
|
+
~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
|
142
|
+
~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
|
143
|
+
~ (CollatingOPP colates according to EN,US rules, not naive byte
|
144
|
+
~ ordering. Use this as an example if you need locale-aware collation.)
|
145
|
+
~ Range queries require using an order-preserving partitioner.
|
146
|
+
~
|
147
|
+
~ Achtung! Changing this parameter requires wiping your data
|
148
|
+
~ directories, since the partitioner can modify the sstable on-disk
|
149
|
+
~ format.
|
150
|
+
-->
|
151
|
+
<Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
|
152
|
+
|
153
|
+
<!--
|
154
|
+
~ If you are using an order-preserving partitioner and you know your key
|
155
|
+
~ distribution, you can specify the token for this node to use. (Keys
|
156
|
+
~ are sent to the node with the "closest" token, so distributing your
|
157
|
+
~ tokens equally along the key distribution space will spread keys
|
158
|
+
~ evenly across your cluster.) This setting is only checked the first
|
159
|
+
~ time a node is started.
|
160
|
+
|
161
|
+
~ This can also be useful with RandomPartitioner to force equal spacing
|
162
|
+
~ of tokens around the hash space, especially for clusters with a small
|
163
|
+
~ number of nodes.
|
164
|
+
-->
|
165
|
+
<InitialToken></InitialToken>
|
166
|
+
|
167
|
+
<!--
|
168
|
+
~ Directories: Specify where Cassandra should store different data on
|
169
|
+
~ disk. Keep the data disks and the CommitLog disks separate for best
|
170
|
+
~ performance
|
171
|
+
-->
|
172
|
+
<CommitLogDirectory>data/cassandra/commitlog</CommitLogDirectory>
|
173
|
+
<DataFileDirectories>
|
174
|
+
<DataFileDirectory>data/cassandra/data</DataFileDirectory>
|
175
|
+
</DataFileDirectories>
|
176
|
+
<CalloutLocation>data/cassandra/callouts</CalloutLocation>
|
177
|
+
<StagingFileDirectory>data/cassandra/staging</StagingFileDirectory>
|
178
|
+
|
179
|
+
|
180
|
+
<!--
|
181
|
+
~ Addresses of hosts that are deemed contact points. Cassandra nodes
|
182
|
+
~ use this list of hosts to find each other and learn the topology of
|
183
|
+
~ the ring. You must change this if you are running multiple nodes!
|
184
|
+
-->
|
185
|
+
<Seeds>
|
186
|
+
<Seed>127.0.0.1</Seed>
|
187
|
+
</Seeds>
|
188
|
+
|
189
|
+
|
190
|
+
<!-- Miscellaneous -->
|
191
|
+
|
192
|
+
<!-- Time to wait for a reply from other nodes before failing the command -->
|
193
|
+
<RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
|
194
|
+
<!-- Size to allow commitlog to grow to before creating a new segment -->
|
195
|
+
<CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
|
196
|
+
|
197
|
+
|
198
|
+
<!-- Local hosts and ports -->
|
199
|
+
|
200
|
+
<!--
|
201
|
+
~ Address to bind to and tell other nodes to connect to. You _must_
|
202
|
+
~ change this if you want multiple nodes to be able to communicate!
|
203
|
+
~
|
204
|
+
~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
|
205
|
+
~ will always do the Right Thing *if* the node is properly configured
|
206
|
+
~ (hostname, name resolution, etc), and the Right Thing is to use the
|
207
|
+
~ address associated with the hostname (it might not be).
|
208
|
+
-->
|
209
|
+
<ListenAddress>localhost</ListenAddress>
|
210
|
+
<!-- internal communications port -->
|
211
|
+
<StoragePort>7000</StoragePort>
|
212
|
+
|
213
|
+
<!--
|
214
|
+
~ The address to bind the Thrift RPC service to. Unlike ListenAddress
|
215
|
+
~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
|
216
|
+
~ all interfaces.
|
217
|
+
~
|
218
|
+
~ Leaving this blank has the same effect it does for ListenAddress,
|
219
|
+
~ (i.e. it will be based on the configured hostname of the node).
|
220
|
+
-->
|
221
|
+
<ThriftAddress>localhost</ThriftAddress>
|
222
|
+
<!-- Thrift RPC port (the port clients connect to). -->
|
223
|
+
<ThriftPort>9160</ThriftPort>
|
224
|
+
<!--
|
225
|
+
~ Whether or not to use a framed transport for Thrift. If this option
|
226
|
+
~ is set to true then you must also use a framed transport on the
|
227
|
+
~ client-side, (framed and non-framed transports are not compatible).
|
228
|
+
-->
|
229
|
+
<ThriftFramedTransport>false</ThriftFramedTransport>
|
230
|
+
|
231
|
+
|
232
|
+
<!--======================================================================-->
|
233
|
+
<!-- Memory, Disk, and Performance -->
|
234
|
+
<!--======================================================================-->
|
235
|
+
|
236
|
+
<!--
|
237
|
+
~ Access mode. mmapped i/o is substantially faster, but only practical on
|
238
|
+
~ a 64bit machine (which notably does not include EC2 "small" instances)
|
239
|
+
~ or relatively small datasets. "auto", the safe choice, will enable
|
240
|
+
~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
|
241
|
+
~ (which may allow you to get part of the benefits of mmap on a 32bit
|
242
|
+
~ machine by mmapping only index files) and "standard".
|
243
|
+
~ (The buffer size settings that follow only apply to standard,
|
244
|
+
~ non-mmapped i/o.)
|
245
|
+
-->
|
246
|
+
<DiskAccessMode>auto</DiskAccessMode>
|
247
|
+
|
248
|
+
<!--
|
249
|
+
~ Buffer size to use when performing contiguous column slices. Increase
|
250
|
+
~ this to the size of the column slices you typically perform.
|
251
|
+
~ (Name-based queries are performed with a buffer size of
|
252
|
+
~ ColumnIndexSizeInKB.)
|
253
|
+
-->
|
254
|
+
<SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
|
255
|
+
|
256
|
+
<!--
|
257
|
+
~ Buffer size to use when flushing memtables to disk. (Only one
|
258
|
+
~ memtable is ever flushed at a time.) Increase (decrease) the index
|
259
|
+
~ buffer size relative to the data buffer if you have few (many)
|
260
|
+
~ columns per key. Bigger is only better _if_ your memtables get large
|
261
|
+
~ enough to use the space. (Check in your data directory after your
|
262
|
+
~ app has been running long enough.) -->
|
263
|
+
<FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
|
264
|
+
<FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
|
265
|
+
|
266
|
+
<!--
|
267
|
+
~ Add column indexes to a row after its contents reach this size.
|
268
|
+
~ Increase if your column values are large, or if you have a very large
|
269
|
+
~ number of columns. The competing causes are, Cassandra has to
|
270
|
+
~ deserialize this much of the row to read a single column, so you want
|
271
|
+
~ it to be small - at least if you do many partial-row reads - but all
|
272
|
+
~ the index data is read for each access, so you don't want to generate
|
273
|
+
~ that wastefully either.
|
274
|
+
-->
|
275
|
+
<ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
|
276
|
+
|
277
|
+
<!--
|
278
|
+
~ Flush memtable after this much data has been inserted, including
|
279
|
+
~ overwritten data. There is one memtable per column family, and
|
280
|
+
~ this threshold is based solely on the amount of data stored, not
|
281
|
+
~ actual heap memory usage (there is some overhead in indexing the
|
282
|
+
~ columns).
|
283
|
+
-->
|
284
|
+
<MemtableThroughputInMB>64</MemtableThroughputInMB>
|
285
|
+
<!--
|
286
|
+
~ Throughput setting for Binary Memtables. Typically these are
|
287
|
+
~ used for bulk load so you want them to be larger.
|
288
|
+
-->
|
289
|
+
<BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
|
290
|
+
<!--
|
291
|
+
~ The maximum number of columns in millions to store in memory per
|
292
|
+
~ ColumnFamily before flushing to disk. This is also a per-memtable
|
293
|
+
~ setting. Use with MemtableThroughputInMB to tune memory usage.
|
294
|
+
-->
|
295
|
+
<MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
|
296
|
+
<!--
|
297
|
+
~ The maximum time to leave a dirty memtable unflushed.
|
298
|
+
~ (While any affected columnfamilies have unflushed data from a
|
299
|
+
~ commit log segment, that segment cannot be deleted.)
|
300
|
+
~ This needs to be large enough that it won't cause a flush storm
|
301
|
+
~ of all your memtables flushing at once because none has hit
|
302
|
+
~ the size or count thresholds yet. For production, a larger
|
303
|
+
~ value such as 1440 is recommended.
|
304
|
+
-->
|
305
|
+
<MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
|
306
|
+
|
307
|
+
<!--
|
308
|
+
~ Unlike most systems, in Cassandra writes are faster than reads, so
|
309
|
+
~ you can afford more of those in parallel. A good rule of thumb is 2
|
310
|
+
~ concurrent reads per processor core. Increase ConcurrentWrites to
|
311
|
+
~ the number of clients writing at once if you enable CommitLogSync +
|
312
|
+
~ CommitLogSyncDelay. -->
|
313
|
+
<ConcurrentReads>8</ConcurrentReads>
|
314
|
+
<ConcurrentWrites>32</ConcurrentWrites>
|
315
|
+
|
316
|
+
<!--
|
317
|
+
~ CommitLogSync may be either "periodic" or "batch." When in batch
|
318
|
+
~ mode, Cassandra won't ack writes until the commit log has been
|
319
|
+
~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
|
320
|
+
~ milliseconds for other writes, before performing the sync.
|
321
|
+
|
322
|
+
~ This is less necessary in Cassandra than in traditional databases
|
323
|
+
~ since replication reduces the odds of losing data from a failure
|
324
|
+
~ after writing the log entry but before it actually reaches the disk.
|
325
|
+
~ So the other option is "timed," where writes may be acked immediately
|
326
|
+
~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
|
327
|
+
~ milliseconds.
|
328
|
+
-->
|
329
|
+
<CommitLogSync>periodic</CommitLogSync>
|
330
|
+
<!--
|
331
|
+
~ Interval at which to perform syncs of the CommitLog in periodic mode.
|
332
|
+
~ Usually the default of 10000ms is fine; increase it if your i/o
|
333
|
+
~ load is such that syncs are taking excessively long times.
|
334
|
+
-->
|
335
|
+
<CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
|
336
|
+
<!--
|
337
|
+
~ Delay (in milliseconds) during which additional commit log entries
|
338
|
+
~ may be written before fsync in batch mode. This will increase
|
339
|
+
~ latency slightly, but can vastly improve throughput where there are
|
340
|
+
~ many writers. Set to zero to disable (each entry will be synced
|
341
|
+
~ individually). Reasonable values range from a minimal 0.1 to 10 or
|
342
|
+
~ even more if throughput matters more than latency.
|
343
|
+
-->
|
344
|
+
<!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
|
345
|
+
|
346
|
+
<!--
|
347
|
+
~ Time to wait before garbage-collection deletion markers. Set this to
|
348
|
+
~ a large enough value that you are confident that the deletion marker
|
349
|
+
~ will be propagated to all replicas by the time this many seconds has
|
350
|
+
~ elapsed, even in the face of hardware failures. The default value is
|
351
|
+
~ ten days.
|
352
|
+
-->
|
353
|
+
<GCGraceSeconds>864000</GCGraceSeconds>
|
354
|
+
</Storage>
|
@@ -0,0 +1,169 @@
|
|
1
|
+
module DataMapperCassandra
|
2
|
+
# TODO: Do not store IDs in the object hash ????
|
3
|
+
|
4
|
+
class Adapter < DataMapper::Adapters::AbstractAdapter
|
5
|
+
def create(resources)
|
6
|
+
client.batch do
|
7
|
+
resources.each do |resource|
|
8
|
+
repository = resource.repository
|
9
|
+
model = resource.model
|
10
|
+
attributes = resource.attributes
|
11
|
+
properties = model.properties(repository.name)
|
12
|
+
|
13
|
+
## Figure out or generate the key
|
14
|
+
kind = self.column_family(model)
|
15
|
+
keys = properties.key
|
16
|
+
raise "Multiple keys in #{resource.inspect}" if keys.size > 1
|
17
|
+
if keys.size == 1
|
18
|
+
name = keys.first.name
|
19
|
+
property = properties[name]
|
20
|
+
key = convert_value(property, attributes[name])
|
21
|
+
end
|
22
|
+
if keys.first.serial? && (key.nil? || key == 0 || key == '')
|
23
|
+
name = keys.first.name
|
24
|
+
property = properties[name]
|
25
|
+
key = if property.primitive == Integer
|
26
|
+
# BAD: for Serial
|
27
|
+
Time.stamp & 0x7FFFFFFF
|
28
|
+
else
|
29
|
+
# GOOD: for UUID/:key => true
|
30
|
+
SimpleUUID::UUID.new.to_guid
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
initialize_serial(resource, key)
|
35
|
+
attributes = resource.attributes
|
36
|
+
|
37
|
+
#puts "#{key} => #{attributes.inspect}"
|
38
|
+
|
39
|
+
## Convert to serialized data ##
|
40
|
+
data = {}
|
41
|
+
attributes.each do |name, value|
|
42
|
+
property = properties[name]
|
43
|
+
data[property.field] = convert_value(property, value)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Insert this resource into Cassandra
|
47
|
+
client.insert(kind, key.to_s, data);
|
48
|
+
end
|
49
|
+
end
|
50
|
+
resources
|
51
|
+
end
|
52
|
+
|
53
|
+
def column_family(model)
|
54
|
+
model.storage_name(self.name)
|
55
|
+
end
|
56
|
+
|
57
|
+
def convert_value(property, value)
|
58
|
+
property.dump(value)
|
59
|
+
end
|
60
|
+
|
61
|
+
def read(query)
|
62
|
+
model = query.model
|
63
|
+
kind = self.column_family(model)
|
64
|
+
|
65
|
+
records = if id = extract_id_from_query(query)
|
66
|
+
data = client.get(kind, id.to_s)
|
67
|
+
[ load_resource(data, model) ]
|
68
|
+
else
|
69
|
+
# raise NotImplementedError.new("SimpleDB supports only a single order clause")
|
70
|
+
# FIXME - This is terrible, we should not get all keys
|
71
|
+
all_keys = client.get_range(kind)
|
72
|
+
data_hash = client.multi_get(kind, all_keys)
|
73
|
+
data_hash.map do |id, data|
|
74
|
+
load_resource(data, model)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
query.filter_records(records)
|
79
|
+
end
|
80
|
+
|
81
|
+
def update(dirty_attributes, collection)
|
82
|
+
client.batch do
|
83
|
+
count = collection.select do |resource|
|
84
|
+
model = resource.model
|
85
|
+
kind = self.column_family(model)
|
86
|
+
key = model.key
|
87
|
+
id = key.get(resource).join
|
88
|
+
|
89
|
+
data = {}
|
90
|
+
dirty_attributes.each do |property, value|
|
91
|
+
property.set!(resource, value)
|
92
|
+
data[property.field] = convert_value(property, value)
|
93
|
+
end
|
94
|
+
|
95
|
+
client.insert(kind, id, data);
|
96
|
+
end
|
97
|
+
end.size
|
98
|
+
end
|
99
|
+
|
100
|
+
def delete(collection)
|
101
|
+
client.batch do
|
102
|
+
count = collection.select do |resource|
|
103
|
+
model = resource.model
|
104
|
+
kind = self.column_family(model)
|
105
|
+
key = model.key
|
106
|
+
id = key.get(resource).join
|
107
|
+
|
108
|
+
client.remove(kind, id)
|
109
|
+
end
|
110
|
+
end.size
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def initialize(*)
|
116
|
+
super
|
117
|
+
@resource_naming_convention = lambda do |value|
|
118
|
+
Extlib::Inflection.pluralize(Extlib::Inflection.camelize(value))
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def client
|
123
|
+
@client ||= begin
|
124
|
+
keyspace = @options[:path][1..-1] # Without leading slash
|
125
|
+
if @options[:host] == 'memory'
|
126
|
+
require 'cassandra/mock'
|
127
|
+
this_dir = File.dirname(__FILE__)
|
128
|
+
conf_xml = File.expand_path('../../conf/storage-conf.xml', this_dir)
|
129
|
+
Cassandra::Mock.new(keyspace, conf_xml)
|
130
|
+
else
|
131
|
+
server = "#{@options[:host]}:#{@options[:port] || 9160}"
|
132
|
+
Cassandra.new(keyspace, server)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def extract_id_from_query(query)
|
138
|
+
return nil unless query.limit == 1
|
139
|
+
|
140
|
+
conditions = query.conditions
|
141
|
+
|
142
|
+
return nil unless conditions.kind_of?(DataMapper::Query::Conditions::AndOperation)
|
143
|
+
return nil unless (key_condition = conditions.select { |o| o.subject.key? }).size == 1
|
144
|
+
|
145
|
+
key_condition.first.value
|
146
|
+
end
|
147
|
+
|
148
|
+
def extract_params_from_query(query)
|
149
|
+
conditions = query.conditions
|
150
|
+
|
151
|
+
return {} unless conditions.kind_of?(DataMapper::Query::Conditions::AndOperation)
|
152
|
+
return {} if conditions.any? { |o| o.subject.key? }
|
153
|
+
|
154
|
+
query.options
|
155
|
+
end
|
156
|
+
|
157
|
+
## CASSANDRA ###
|
158
|
+
def load_resource(data, model)
|
159
|
+
field_to_property = model.properties(name).map { |p| [ p.field, p ] }.to_hash
|
160
|
+
|
161
|
+
record = {}
|
162
|
+
data.each do |key, value|
|
163
|
+
next unless property = field_to_property[key]
|
164
|
+
record[key] = property.load(value)
|
165
|
+
end
|
166
|
+
record
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
data/spec/rcov.opts
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require DataMapper.root / 'lib' / 'dm-core' / 'spec' / 'shared' / 'adapter_spec'
|
3
|
+
|
4
|
+
describe DataMapper::Adapters::CassandraAdapter do
|
5
|
+
before :all do
|
6
|
+
@adapter = DataMapper::Repository.adapters[:default]
|
7
|
+
@adapter.send(:client).clear_keyspace!
|
8
|
+
end
|
9
|
+
|
10
|
+
# Shared DataMapper::Adapter specs
|
11
|
+
it_should_behave_like 'An Adapter'
|
12
|
+
|
13
|
+
|
14
|
+
describe 'with one created resource' do
|
15
|
+
before :all do
|
16
|
+
@input_hash = {
|
17
|
+
:created_at => DateTime.parse('2009-05-17T22:38:42-07:00'),
|
18
|
+
:title => 'DataMapper',
|
19
|
+
:author => 'Dan Kubb'
|
20
|
+
}
|
21
|
+
|
22
|
+
# Create resource
|
23
|
+
@resource = Book.new(@input_hash)
|
24
|
+
@resources = [ @resource ]
|
25
|
+
@response = @adapter.create(@resources)
|
26
|
+
@generated_id = @resource.id
|
27
|
+
|
28
|
+
# Stringify keys and add the Generated ID
|
29
|
+
@output_hash = @input_hash.inject('id' => @generated_id) do |s, kv|
|
30
|
+
s[kv[0].to_s] = kv[1]
|
31
|
+
s
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should return an Array containing the Resource' do
|
36
|
+
@response.should equal(@resources)
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should set the identity field' do
|
40
|
+
@generated_id.should be_present
|
41
|
+
end
|
42
|
+
|
43
|
+
describe '#read' do
|
44
|
+
describe 'with unscoped query' do
|
45
|
+
before :all do
|
46
|
+
@query = Book.all.query
|
47
|
+
@response = @adapter.read(@query)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'should return an Array with the matching Records' do
|
51
|
+
@response.should == [ @output_hash ]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe 'with query scoped by a key' do
|
57
|
+
before :all do
|
58
|
+
@query = Book.all(:id => @generated_id, :limit => 1).query
|
59
|
+
@response = @adapter.read(@query)
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'should return an Array with the matching Records' do
|
63
|
+
@response.should == [ @output_hash ]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
describe 'with query scoped by a non-key' do
|
69
|
+
before :all do
|
70
|
+
@query = Book.all(:author => 'Dan Kubb').query
|
71
|
+
@response = @adapter.read(@query)
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'should return an Array with the matching Records' do
|
75
|
+
@response.should == [ @output_hash ]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
describe 'with a non-standard model <=> storage_name relationship' do
|
80
|
+
before :all do
|
81
|
+
@query = DifficultBook.all.query
|
82
|
+
@response = @adapter.read(@query)
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should return an Array with the matching Records' do
|
86
|
+
@response.should == [ @output_hash ]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe '#update' do
|
91
|
+
before :all do
|
92
|
+
@resources = Book.all
|
93
|
+
@response = @adapter.update({ Book.properties[:author] => 'John Doe' }, @resources)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'should return the number of updated Resources' do
|
97
|
+
@response.should == 1
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'should modify the Resource' do
|
101
|
+
@resources.first.author.should == 'John Doe'
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe '#delete' do
|
106
|
+
before :all do
|
107
|
+
@resources = Book.all
|
108
|
+
@response = @adapter.delete(@resources)
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'should return the number of updated Resources' do
|
112
|
+
@response.should == 1
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'pathname'
|
3
|
+
require 'simple_uuid'
|
4
|
+
|
5
|
+
# use local dm-core if running from a typical dev checkout.
|
6
|
+
lib = File.join('..', '..', '..', 'dm-core', 'lib')
|
7
|
+
$LOAD_PATH.unshift(lib) if File.directory?(lib)
|
8
|
+
|
9
|
+
# use local dm-validations if running from a typical dev checkout.
|
10
|
+
lib = File.join('..', '..', 'dm-validations', 'lib')
|
11
|
+
$LOAD_PATH.unshift(lib) if File.directory?(lib)
|
12
|
+
require 'dm-validations'
|
13
|
+
|
14
|
+
# use local dm-serializer if running from a typical dev checkout.
|
15
|
+
lib = File.join('..', '..', 'dm-serializer', 'lib')
|
16
|
+
$LOAD_PATH.unshift(lib) if File.directory?(lib)
|
17
|
+
|
18
|
+
# Support running specs with 'rake spec' and 'spec'
|
19
|
+
$LOAD_PATH.unshift('lib') unless $LOAD_PATH.include?('lib')
|
20
|
+
|
21
|
+
require 'simple_uuid'
|
22
|
+
require 'dm-cassandra-adapter'
|
23
|
+
|
24
|
+
ROOT = Pathname(__FILE__).dirname.parent
|
25
|
+
|
26
|
+
DataMapper.setup(:default, 'cassandra://memory/AdapterTest')
|
27
|
+
|
28
|
+
Dir[ROOT / 'spec' / 'fixtures' / '**' / '*.rb'].each { |rb| require rb }
|
29
|
+
|
30
|
+
####FakeWeb.allow_net_connect = false
|
data/tasks/spec.rake
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
spec_defaults = lambda do |spec|
|
2
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
3
|
+
spec.libs << 'lib' << 'spec'
|
4
|
+
spec.spec_opts << '--options' << 'spec/spec.opts'
|
5
|
+
end
|
6
|
+
|
7
|
+
begin
|
8
|
+
require 'spec/rake/spectask'
|
9
|
+
|
10
|
+
Spec::Rake::SpecTask.new(:spec, &spec_defaults)
|
11
|
+
rescue LoadError
|
12
|
+
task :spec do
|
13
|
+
abort 'rspec is not available. In order to run spec, you must: gem install rspec'
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
begin
|
18
|
+
require 'rcov'
|
19
|
+
require 'spec/rake/verify_rcov'
|
20
|
+
|
21
|
+
Spec::Rake::SpecTask.new(:rcov) do |rcov|
|
22
|
+
spec_defaults.call(rcov)
|
23
|
+
rcov.rcov = true
|
24
|
+
rcov.rcov_opts = File.read('spec/rcov.opts').split(/\s+/)
|
25
|
+
end
|
26
|
+
|
27
|
+
RCov::VerifyTask.new(:verify_rcov => :rcov) do |rcov|
|
28
|
+
rcov.threshold = 100
|
29
|
+
end
|
30
|
+
rescue LoadError
|
31
|
+
%w[ rcov verify_rcov ].each do |name|
|
32
|
+
task name do
|
33
|
+
abort "rcov is not available. In order to run #{name}, you must: gem install rcov"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
#task :spec => :check_dependencies
|
39
|
+
#task :rcov => :check_dependencies
|
40
|
+
|
41
|
+
task :default => :spec
|
metadata
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dm-cassandra-adapter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Michael Rykov
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-12-19 00:00:00 -08:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: dm-core
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ~>
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 1
|
29
|
+
- 0
|
30
|
+
- 2
|
31
|
+
version: 1.0.2
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: dm-serializer
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ~>
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 1
|
43
|
+
- 0
|
44
|
+
- 2
|
45
|
+
version: 1.0.2
|
46
|
+
type: :runtime
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rspec
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ~>
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 1
|
57
|
+
- 3
|
58
|
+
version: "1.3"
|
59
|
+
type: :development
|
60
|
+
version_requirements: *id003
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: dm-validations
|
63
|
+
prerelease: false
|
64
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
segments:
|
69
|
+
- 1
|
70
|
+
- 0
|
71
|
+
- 2
|
72
|
+
version: 1.0.2
|
73
|
+
type: :development
|
74
|
+
version_requirements: *id004
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: fakeweb
|
77
|
+
prerelease: false
|
78
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
segments:
|
83
|
+
- 1
|
84
|
+
- 3
|
85
|
+
version: "1.3"
|
86
|
+
type: :development
|
87
|
+
version_requirements: *id005
|
88
|
+
description: Cassandra Adapter for DataMapper
|
89
|
+
email: mrykov [a] gmail [d] com
|
90
|
+
executables: []
|
91
|
+
|
92
|
+
extensions: []
|
93
|
+
|
94
|
+
extra_rdoc_files:
|
95
|
+
- LICENSE
|
96
|
+
files:
|
97
|
+
- LICENSE
|
98
|
+
- Rakefile
|
99
|
+
- VERSION
|
100
|
+
- conf/storage-conf.xml
|
101
|
+
- lib/dm-cassandra-adapter.rb
|
102
|
+
- lib/dm-cassandra-adapter/adapter.rb
|
103
|
+
- lib/dm-cassandra-adapter/spec/setup.rb
|
104
|
+
- spec/fixtures/book.rb
|
105
|
+
- spec/fixtures/difficult_book.rb
|
106
|
+
- spec/rcov.opts
|
107
|
+
- spec/semipublic/cassandra_adapter_spec.rb
|
108
|
+
- spec/spec.opts
|
109
|
+
- spec/spec_helper.rb
|
110
|
+
- tasks/spec.rake
|
111
|
+
has_rdoc: true
|
112
|
+
homepage: http://github.com/rykov/dm-cassandra-adapter
|
113
|
+
licenses: []
|
114
|
+
|
115
|
+
post_install_message:
|
116
|
+
rdoc_options: []
|
117
|
+
|
118
|
+
require_paths:
|
119
|
+
- lib
|
120
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
segments:
|
125
|
+
- 0
|
126
|
+
version: "0"
|
127
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
segments:
|
132
|
+
- 0
|
133
|
+
version: "0"
|
134
|
+
requirements: []
|
135
|
+
|
136
|
+
rubyforge_project: datamapper
|
137
|
+
rubygems_version: 1.3.6
|
138
|
+
signing_key:
|
139
|
+
specification_version: 3
|
140
|
+
summary: Cassandra Adapter for DataMapper
|
141
|
+
test_files:
|
142
|
+
- spec/fixtures/book.rb
|
143
|
+
- spec/fixtures/difficult_book.rb
|
144
|
+
- spec/semipublic/cassandra_adapter_spec.rb
|
145
|
+
- spec/spec_helper.rb
|