dm-cassandra-adapter 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/Rakefile +32 -0
- data/VERSION +1 -0
- data/conf/storage-conf.xml +354 -0
- data/lib/dm-cassandra-adapter.rb +9 -0
- data/lib/dm-cassandra-adapter/adapter.rb +169 -0
- data/lib/dm-cassandra-adapter/spec/setup.rb +15 -0
- data/spec/fixtures/book.rb +8 -0
- data/spec/fixtures/difficult_book.rb +10 -0
- data/spec/rcov.opts +6 -0
- data/spec/semipublic/cassandra_adapter_spec.rb +116 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +30 -0
- data/tasks/spec.rake +41 -0
- metadata +145 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Michael Rykov
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
gem 'jeweler', '~> 1.4'
|
6
|
+
require 'jeweler'
|
7
|
+
|
8
|
+
Jeweler::Tasks.new do |gem|
|
9
|
+
gem.name = 'dm-cassandra-adapter'
|
10
|
+
gem.summary = 'Cassandra Adapter for DataMapper'
|
11
|
+
gem.description = gem.summary
|
12
|
+
gem.email = 'mrykov [a] gmail [d] com'
|
13
|
+
gem.homepage = 'http://github.com/rykov/%s' % gem.name
|
14
|
+
gem.authors = [ 'Michael Rykov' ]
|
15
|
+
gem.has_rdoc = 'yard'
|
16
|
+
|
17
|
+
gem.rubyforge_project = 'datamapper'
|
18
|
+
|
19
|
+
gem.add_dependency 'dm-core', '~> 1.0.2'
|
20
|
+
gem.add_dependency 'dm-serializer', '~> 1.0.2'
|
21
|
+
|
22
|
+
gem.add_development_dependency 'rspec', '~> 1.3'
|
23
|
+
gem.add_development_dependency 'dm-validations', '~> 1.0.2'
|
24
|
+
gem.add_development_dependency 'fakeweb', '~> 1.3'
|
25
|
+
end
|
26
|
+
|
27
|
+
Jeweler::GemcutterTasks.new
|
28
|
+
|
29
|
+
FileList['tasks/**/*.rake'].each { |task| import task }
|
30
|
+
rescue LoadError
|
31
|
+
puts 'Jeweler (or a dependency) not available. Install it with: gem install jeweler'
|
32
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,354 @@
|
|
1
|
+
<!--
|
2
|
+
~ Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
~ or more contributor license agreements. See the NOTICE file
|
4
|
+
~ distributed with this work for additional information
|
5
|
+
~ regarding copyright ownership. The ASF licenses this file
|
6
|
+
~ to you under the Apache License, Version 2.0 (the
|
7
|
+
~ "License"); you may not use this file except in compliance
|
8
|
+
~ with the License. You may obtain a copy of the License at
|
9
|
+
~
|
10
|
+
~ http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
~
|
12
|
+
~ Unless required by applicable law or agreed to in writing,
|
13
|
+
~ software distributed under the License is distributed on an
|
14
|
+
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
~ KIND, either express or implied. See the License for the
|
16
|
+
~ specific language governing permissions and limitations
|
17
|
+
~ under the License.
|
18
|
+
-->
|
19
|
+
<Storage>
|
20
|
+
<!--======================================================================-->
|
21
|
+
<!-- Basic Configuration -->
|
22
|
+
<!--======================================================================-->
|
23
|
+
|
24
|
+
<!--
|
25
|
+
~ The name of this cluster. This is mainly used to prevent machines in
|
26
|
+
~ one logical cluster from joining another.
|
27
|
+
-->
|
28
|
+
<ClusterName>Test</ClusterName>
|
29
|
+
|
30
|
+
<!--
|
31
|
+
~ Turn on to make new [non-seed] nodes automatically migrate the right data
|
32
|
+
~ to themselves. (If no InitialToken is specified, they will pick one
|
33
|
+
~ such that they will get half the range of the most-loaded node.)
|
34
|
+
~ If a node starts up without bootstrapping, it will mark itself bootstrapped
|
35
|
+
~ so that you can't subsequently accidently bootstrap a node with
|
36
|
+
~ data on it. (You can reset this by wiping your data and commitlog
|
37
|
+
~ directories.)
|
38
|
+
~
|
39
|
+
~ Off by default so that new clusters and upgraders from 0.4 don't
|
40
|
+
~ bootstrap immediately. You should turn this on when you start adding
|
41
|
+
~ new nodes to a cluster that already has data on it. (If you are upgrading
|
42
|
+
~ from 0.4, start your cluster with it off once before changing it to true.
|
43
|
+
~ Otherwise, no data will be lost but you will incur a lot of unnecessary
|
44
|
+
~ I/O before your cluster starts up.)
|
45
|
+
-->
|
46
|
+
<AutoBootstrap>false</AutoBootstrap>
|
47
|
+
|
48
|
+
<!--
|
49
|
+
~ Keyspaces and ColumnFamilies:
|
50
|
+
~ A ColumnFamily is the Cassandra concept closest to a relational
|
51
|
+
~ table. Keyspaces are separate groups of ColumnFamilies. Except in
|
52
|
+
~ very unusual circumstances you will have one Keyspace per application.
|
53
|
+
|
54
|
+
~ There is an implicit keyspace named 'system' for Cassandra internals.
|
55
|
+
-->
|
56
|
+
<Keyspaces>
|
57
|
+
<Keyspace Name="AdapterTest">
|
58
|
+
<KeysCachedFraction>0.01</KeysCachedFraction>
|
59
|
+
|
60
|
+
<!-- 'Books' is for my our own tests -->
|
61
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Books" />
|
62
|
+
<!-- 'Heffalumps' is for shared DataMapper adapter tests -->
|
63
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Heffalumps" />
|
64
|
+
|
65
|
+
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
66
|
+
<ReplicationFactor>1</ReplicationFactor>
|
67
|
+
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
68
|
+
</Keyspace>
|
69
|
+
|
70
|
+
<Keyspace Name="Twitter">
|
71
|
+
<KeysCachedFraction>0.01</KeysCachedFraction>
|
72
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Users" />
|
73
|
+
<ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
|
74
|
+
<ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
|
75
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
|
76
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
|
77
|
+
<ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
|
78
|
+
<ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
|
79
|
+
<ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
|
80
|
+
<ColumnFamily CompareWith="BytesType" ColumnType="Standard" Name="TimelinishThings" />
|
81
|
+
|
82
|
+
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
83
|
+
<ReplicationFactor>1</ReplicationFactor>
|
84
|
+
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
85
|
+
</Keyspace>
|
86
|
+
|
87
|
+
<Keyspace Name="Multiblog">
|
88
|
+
<KeysCachedFraction>0.01</KeysCachedFraction>
|
89
|
+
<ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
|
90
|
+
<ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
|
91
|
+
|
92
|
+
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
93
|
+
<ReplicationFactor>1</ReplicationFactor>
|
94
|
+
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
95
|
+
</Keyspace>
|
96
|
+
|
97
|
+
<Keyspace Name="MultiblogLong">
|
98
|
+
<KeysCachedFraction>0.01</KeysCachedFraction>
|
99
|
+
<ColumnFamily CompareWith="LongType" Name="Blogs"/>
|
100
|
+
<ColumnFamily CompareWith="LongType" Name="Comments"/>
|
101
|
+
|
102
|
+
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
103
|
+
<ReplicationFactor>1</ReplicationFactor>
|
104
|
+
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
105
|
+
</Keyspace>
|
106
|
+
|
107
|
+
<Keyspace Name="CassandraObject">
|
108
|
+
<KeysCachedFraction>0.01</KeysCachedFraction>
|
109
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Customers" />
|
110
|
+
<ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
|
111
|
+
<ColumnFamily CompareWith="TimeUUIDType" Name="CustomersByLastName" />
|
112
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
|
113
|
+
<ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
|
114
|
+
<ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
|
115
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Payments" />
|
116
|
+
<ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
|
117
|
+
<!-- <ColumnFamily CompareWith="UTF8Type" Name="FirstNames" /> -->
|
118
|
+
|
119
|
+
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
120
|
+
<ReplicationFactor>1</ReplicationFactor>
|
121
|
+
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
122
|
+
</Keyspace>
|
123
|
+
</Keyspaces>
|
124
|
+
|
125
|
+
<!--
|
126
|
+
~ Authenticator: any IAuthenticator may be used, including your own as long
|
127
|
+
~ as it is on the classpath. Out of the box, Cassandra provides
|
128
|
+
~ org.apache.cassandra.auth.AllowAllAuthenticator and,
|
129
|
+
~ org.apache.cassandra.auth.SimpleAuthenticator
|
130
|
+
~ (SimpleAuthenticator uses access.properties and passwd.properties by
|
131
|
+
~ default).
|
132
|
+
~
|
133
|
+
~ If you don't specify an authenticator, AllowAllAuthenticator is used.
|
134
|
+
-->
|
135
|
+
<Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
|
136
|
+
|
137
|
+
<!--
|
138
|
+
~ Partitioner: any IPartitioner may be used, including your own as long
|
139
|
+
~ as it is on the classpath. Out of the box, Cassandra provides
|
140
|
+
~ org.apache.cassandra.dht.RandomPartitioner,
|
141
|
+
~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
|
142
|
+
~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
|
143
|
+
~ (CollatingOPP colates according to EN,US rules, not naive byte
|
144
|
+
~ ordering. Use this as an example if you need locale-aware collation.)
|
145
|
+
~ Range queries require using an order-preserving partitioner.
|
146
|
+
~
|
147
|
+
~ Achtung! Changing this parameter requires wiping your data
|
148
|
+
~ directories, since the partitioner can modify the sstable on-disk
|
149
|
+
~ format.
|
150
|
+
-->
|
151
|
+
<Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
|
152
|
+
|
153
|
+
<!--
|
154
|
+
~ If you are using an order-preserving partitioner and you know your key
|
155
|
+
~ distribution, you can specify the token for this node to use. (Keys
|
156
|
+
~ are sent to the node with the "closest" token, so distributing your
|
157
|
+
~ tokens equally along the key distribution space will spread keys
|
158
|
+
~ evenly across your cluster.) This setting is only checked the first
|
159
|
+
~ time a node is started.
|
160
|
+
|
161
|
+
~ This can also be useful with RandomPartitioner to force equal spacing
|
162
|
+
~ of tokens around the hash space, especially for clusters with a small
|
163
|
+
~ number of nodes.
|
164
|
+
-->
|
165
|
+
<InitialToken></InitialToken>
|
166
|
+
|
167
|
+
<!--
|
168
|
+
~ Directories: Specify where Cassandra should store different data on
|
169
|
+
~ disk. Keep the data disks and the CommitLog disks separate for best
|
170
|
+
~ performance
|
171
|
+
-->
|
172
|
+
<CommitLogDirectory>data/cassandra/commitlog</CommitLogDirectory>
|
173
|
+
<DataFileDirectories>
|
174
|
+
<DataFileDirectory>data/cassandra/data</DataFileDirectory>
|
175
|
+
</DataFileDirectories>
|
176
|
+
<CalloutLocation>data/cassandra/callouts</CalloutLocation>
|
177
|
+
<StagingFileDirectory>data/cassandra/staging</StagingFileDirectory>
|
178
|
+
|
179
|
+
|
180
|
+
<!--
|
181
|
+
~ Addresses of hosts that are deemed contact points. Cassandra nodes
|
182
|
+
~ use this list of hosts to find each other and learn the topology of
|
183
|
+
~ the ring. You must change this if you are running multiple nodes!
|
184
|
+
-->
|
185
|
+
<Seeds>
|
186
|
+
<Seed>127.0.0.1</Seed>
|
187
|
+
</Seeds>
|
188
|
+
|
189
|
+
|
190
|
+
<!-- Miscellaneous -->
|
191
|
+
|
192
|
+
<!-- Time to wait for a reply from other nodes before failing the command -->
|
193
|
+
<RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
|
194
|
+
<!-- Size to allow commitlog to grow to before creating a new segment -->
|
195
|
+
<CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
|
196
|
+
|
197
|
+
|
198
|
+
<!-- Local hosts and ports -->
|
199
|
+
|
200
|
+
<!--
|
201
|
+
~ Address to bind to and tell other nodes to connect to. You _must_
|
202
|
+
~ change this if you want multiple nodes to be able to communicate!
|
203
|
+
~
|
204
|
+
~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
|
205
|
+
~ will always do the Right Thing *if* the node is properly configured
|
206
|
+
~ (hostname, name resolution, etc), and the Right Thing is to use the
|
207
|
+
~ address associated with the hostname (it might not be).
|
208
|
+
-->
|
209
|
+
<ListenAddress>localhost</ListenAddress>
|
210
|
+
<!-- internal communications port -->
|
211
|
+
<StoragePort>7000</StoragePort>
|
212
|
+
|
213
|
+
<!--
|
214
|
+
~ The address to bind the Thrift RPC service to. Unlike ListenAddress
|
215
|
+
~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
|
216
|
+
~ all interfaces.
|
217
|
+
~
|
218
|
+
~ Leaving this blank has the same effect it does for ListenAddress,
|
219
|
+
~ (i.e. it will be based on the configured hostname of the node).
|
220
|
+
-->
|
221
|
+
<ThriftAddress>localhost</ThriftAddress>
|
222
|
+
<!-- Thrift RPC port (the port clients connect to). -->
|
223
|
+
<ThriftPort>9160</ThriftPort>
|
224
|
+
<!--
|
225
|
+
~ Whether or not to use a framed transport for Thrift. If this option
|
226
|
+
~ is set to true then you must also use a framed transport on the
|
227
|
+
~ client-side, (framed and non-framed transports are not compatible).
|
228
|
+
-->
|
229
|
+
<ThriftFramedTransport>false</ThriftFramedTransport>
|
230
|
+
|
231
|
+
|
232
|
+
<!--======================================================================-->
|
233
|
+
<!-- Memory, Disk, and Performance -->
|
234
|
+
<!--======================================================================-->
|
235
|
+
|
236
|
+
<!--
|
237
|
+
~ Access mode. mmapped i/o is substantially faster, but only practical on
|
238
|
+
~ a 64bit machine (which notably does not include EC2 "small" instances)
|
239
|
+
~ or relatively small datasets. "auto", the safe choice, will enable
|
240
|
+
~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
|
241
|
+
~ (which may allow you to get part of the benefits of mmap on a 32bit
|
242
|
+
~ machine by mmapping only index files) and "standard".
|
243
|
+
~ (The buffer size settings that follow only apply to standard,
|
244
|
+
~ non-mmapped i/o.)
|
245
|
+
-->
|
246
|
+
<DiskAccessMode>auto</DiskAccessMode>
|
247
|
+
|
248
|
+
<!--
|
249
|
+
~ Buffer size to use when performing contiguous column slices. Increase
|
250
|
+
~ this to the size of the column slices you typically perform.
|
251
|
+
~ (Name-based queries are performed with a buffer size of
|
252
|
+
~ ColumnIndexSizeInKB.)
|
253
|
+
-->
|
254
|
+
<SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
|
255
|
+
|
256
|
+
<!--
|
257
|
+
~ Buffer size to use when flushing memtables to disk. (Only one
|
258
|
+
~ memtable is ever flushed at a time.) Increase (decrease) the index
|
259
|
+
~ buffer size relative to the data buffer if you have few (many)
|
260
|
+
~ columns per key. Bigger is only better _if_ your memtables get large
|
261
|
+
~ enough to use the space. (Check in your data directory after your
|
262
|
+
~ app has been running long enough.) -->
|
263
|
+
<FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
|
264
|
+
<FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
|
265
|
+
|
266
|
+
<!--
|
267
|
+
~ Add column indexes to a row after its contents reach this size.
|
268
|
+
~ Increase if your column values are large, or if you have a very large
|
269
|
+
~ number of columns. The competing causes are, Cassandra has to
|
270
|
+
~ deserialize this much of the row to read a single column, so you want
|
271
|
+
~ it to be small - at least if you do many partial-row reads - but all
|
272
|
+
~ the index data is read for each access, so you don't want to generate
|
273
|
+
~ that wastefully either.
|
274
|
+
-->
|
275
|
+
<ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
|
276
|
+
|
277
|
+
<!--
|
278
|
+
~ Flush memtable after this much data has been inserted, including
|
279
|
+
~ overwritten data. There is one memtable per column family, and
|
280
|
+
~ this threshold is based solely on the amount of data stored, not
|
281
|
+
~ actual heap memory usage (there is some overhead in indexing the
|
282
|
+
~ columns).
|
283
|
+
-->
|
284
|
+
<MemtableThroughputInMB>64</MemtableThroughputInMB>
|
285
|
+
<!--
|
286
|
+
~ Throughput setting for Binary Memtables. Typically these are
|
287
|
+
~ used for bulk load so you want them to be larger.
|
288
|
+
-->
|
289
|
+
<BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
|
290
|
+
<!--
|
291
|
+
~ The maximum number of columns in millions to store in memory per
|
292
|
+
~ ColumnFamily before flushing to disk. This is also a per-memtable
|
293
|
+
~ setting. Use with MemtableThroughputInMB to tune memory usage.
|
294
|
+
-->
|
295
|
+
<MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
|
296
|
+
<!--
|
297
|
+
~ The maximum time to leave a dirty memtable unflushed.
|
298
|
+
~ (While any affected columnfamilies have unflushed data from a
|
299
|
+
~ commit log segment, that segment cannot be deleted.)
|
300
|
+
~ This needs to be large enough that it won't cause a flush storm
|
301
|
+
~ of all your memtables flushing at once because none has hit
|
302
|
+
~ the size or count thresholds yet. For production, a larger
|
303
|
+
~ value such as 1440 is recommended.
|
304
|
+
-->
|
305
|
+
<MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
|
306
|
+
|
307
|
+
<!--
|
308
|
+
~ Unlike most systems, in Cassandra writes are faster than reads, so
|
309
|
+
~ you can afford more of those in parallel. A good rule of thumb is 2
|
310
|
+
~ concurrent reads per processor core. Increase ConcurrentWrites to
|
311
|
+
~ the number of clients writing at once if you enable CommitLogSync +
|
312
|
+
~ CommitLogSyncDelay. -->
|
313
|
+
<ConcurrentReads>8</ConcurrentReads>
|
314
|
+
<ConcurrentWrites>32</ConcurrentWrites>
|
315
|
+
|
316
|
+
<!--
|
317
|
+
~ CommitLogSync may be either "periodic" or "batch." When in batch
|
318
|
+
~ mode, Cassandra won't ack writes until the commit log has been
|
319
|
+
~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
|
320
|
+
~ milliseconds for other writes, before performing the sync.
|
321
|
+
|
322
|
+
~ This is less necessary in Cassandra than in traditional databases
|
323
|
+
~ since replication reduces the odds of losing data from a failure
|
324
|
+
~ after writing the log entry but before it actually reaches the disk.
|
325
|
+
~ So the other option is "timed," where writes may be acked immediately
|
326
|
+
~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
|
327
|
+
~ milliseconds.
|
328
|
+
-->
|
329
|
+
<CommitLogSync>periodic</CommitLogSync>
|
330
|
+
<!--
|
331
|
+
~ Interval at which to perform syncs of the CommitLog in periodic mode.
|
332
|
+
~ Usually the default of 10000ms is fine; increase it if your i/o
|
333
|
+
~ load is such that syncs are taking excessively long times.
|
334
|
+
-->
|
335
|
+
<CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
|
336
|
+
<!--
|
337
|
+
~ Delay (in milliseconds) during which additional commit log entries
|
338
|
+
~ may be written before fsync in batch mode. This will increase
|
339
|
+
~ latency slightly, but can vastly improve throughput where there are
|
340
|
+
~ many writers. Set to zero to disable (each entry will be synced
|
341
|
+
~ individually). Reasonable values range from a minimal 0.1 to 10 or
|
342
|
+
~ even more if throughput matters more than latency.
|
343
|
+
-->
|
344
|
+
<!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
|
345
|
+
|
346
|
+
<!--
|
347
|
+
~ Time to wait before garbage-collection deletion markers. Set this to
|
348
|
+
~ a large enough value that you are confident that the deletion marker
|
349
|
+
~ will be propagated to all replicas by the time this many seconds has
|
350
|
+
~ elapsed, even in the face of hardware failures. The default value is
|
351
|
+
~ ten days.
|
352
|
+
-->
|
353
|
+
<GCGraceSeconds>864000</GCGraceSeconds>
|
354
|
+
</Storage>
|
@@ -0,0 +1,169 @@
|
|
1
|
+
module DataMapperCassandra
|
2
|
+
# TODO: Do not store IDs in the object hash ????
|
3
|
+
|
4
|
+
class Adapter < DataMapper::Adapters::AbstractAdapter
|
5
|
+
def create(resources)
|
6
|
+
client.batch do
|
7
|
+
resources.each do |resource|
|
8
|
+
repository = resource.repository
|
9
|
+
model = resource.model
|
10
|
+
attributes = resource.attributes
|
11
|
+
properties = model.properties(repository.name)
|
12
|
+
|
13
|
+
## Figure out or generate the key
|
14
|
+
kind = self.column_family(model)
|
15
|
+
keys = properties.key
|
16
|
+
raise "Multiple keys in #{resource.inspect}" if keys.size > 1
|
17
|
+
if keys.size == 1
|
18
|
+
name = keys.first.name
|
19
|
+
property = properties[name]
|
20
|
+
key = convert_value(property, attributes[name])
|
21
|
+
end
|
22
|
+
if keys.first.serial? && (key.nil? || key == 0 || key == '')
|
23
|
+
name = keys.first.name
|
24
|
+
property = properties[name]
|
25
|
+
key = if property.primitive == Integer
|
26
|
+
# BAD: for Serial
|
27
|
+
Time.stamp & 0x7FFFFFFF
|
28
|
+
else
|
29
|
+
# GOOD: for UUID/:key => true
|
30
|
+
SimpleUUID::UUID.new.to_guid
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
initialize_serial(resource, key)
|
35
|
+
attributes = resource.attributes
|
36
|
+
|
37
|
+
#puts "#{key} => #{attributes.inspect}"
|
38
|
+
|
39
|
+
## Convert to serialized data ##
|
40
|
+
data = {}
|
41
|
+
attributes.each do |name, value|
|
42
|
+
property = properties[name]
|
43
|
+
data[property.field] = convert_value(property, value)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Insert this resource into Cassandra
|
47
|
+
client.insert(kind, key.to_s, data);
|
48
|
+
end
|
49
|
+
end
|
50
|
+
resources
|
51
|
+
end
|
52
|
+
|
53
|
+
def column_family(model)
|
54
|
+
model.storage_name(self.name)
|
55
|
+
end
|
56
|
+
|
57
|
+
def convert_value(property, value)
|
58
|
+
property.dump(value)
|
59
|
+
end
|
60
|
+
|
61
|
+
def read(query)
|
62
|
+
model = query.model
|
63
|
+
kind = self.column_family(model)
|
64
|
+
|
65
|
+
records = if id = extract_id_from_query(query)
|
66
|
+
data = client.get(kind, id.to_s)
|
67
|
+
[ load_resource(data, model) ]
|
68
|
+
else
|
69
|
+
# raise NotImplementedError.new("SimpleDB supports only a single order clause")
|
70
|
+
# FIXME - This is terrible, we should not get all keys
|
71
|
+
all_keys = client.get_range(kind)
|
72
|
+
data_hash = client.multi_get(kind, all_keys)
|
73
|
+
data_hash.map do |id, data|
|
74
|
+
load_resource(data, model)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
query.filter_records(records)
|
79
|
+
end
|
80
|
+
|
81
|
+
def update(dirty_attributes, collection)
|
82
|
+
client.batch do
|
83
|
+
count = collection.select do |resource|
|
84
|
+
model = resource.model
|
85
|
+
kind = self.column_family(model)
|
86
|
+
key = model.key
|
87
|
+
id = key.get(resource).join
|
88
|
+
|
89
|
+
data = {}
|
90
|
+
dirty_attributes.each do |property, value|
|
91
|
+
property.set!(resource, value)
|
92
|
+
data[property.field] = convert_value(property, value)
|
93
|
+
end
|
94
|
+
|
95
|
+
client.insert(kind, id, data);
|
96
|
+
end
|
97
|
+
end.size
|
98
|
+
end
|
99
|
+
|
100
|
+
def delete(collection)
|
101
|
+
client.batch do
|
102
|
+
count = collection.select do |resource|
|
103
|
+
model = resource.model
|
104
|
+
kind = self.column_family(model)
|
105
|
+
key = model.key
|
106
|
+
id = key.get(resource).join
|
107
|
+
|
108
|
+
client.remove(kind, id)
|
109
|
+
end
|
110
|
+
end.size
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def initialize(*)
|
116
|
+
super
|
117
|
+
@resource_naming_convention = lambda do |value|
|
118
|
+
Extlib::Inflection.pluralize(Extlib::Inflection.camelize(value))
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def client
|
123
|
+
@client ||= begin
|
124
|
+
keyspace = @options[:path][1..-1] # Without leading slash
|
125
|
+
if @options[:host] == 'memory'
|
126
|
+
require 'cassandra/mock'
|
127
|
+
this_dir = File.dirname(__FILE__)
|
128
|
+
conf_xml = File.expand_path('../../conf/storage-conf.xml', this_dir)
|
129
|
+
Cassandra::Mock.new(keyspace, conf_xml)
|
130
|
+
else
|
131
|
+
server = "#{@options[:host]}:#{@options[:port] || 9160}"
|
132
|
+
Cassandra.new(keyspace, server)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def extract_id_from_query(query)
|
138
|
+
return nil unless query.limit == 1
|
139
|
+
|
140
|
+
conditions = query.conditions
|
141
|
+
|
142
|
+
return nil unless conditions.kind_of?(DataMapper::Query::Conditions::AndOperation)
|
143
|
+
return nil unless (key_condition = conditions.select { |o| o.subject.key? }).size == 1
|
144
|
+
|
145
|
+
key_condition.first.value
|
146
|
+
end
|
147
|
+
|
148
|
+
def extract_params_from_query(query)
|
149
|
+
conditions = query.conditions
|
150
|
+
|
151
|
+
return {} unless conditions.kind_of?(DataMapper::Query::Conditions::AndOperation)
|
152
|
+
return {} if conditions.any? { |o| o.subject.key? }
|
153
|
+
|
154
|
+
query.options
|
155
|
+
end
|
156
|
+
|
157
|
+
## CASSANDRA ###
|
158
|
+
def load_resource(data, model)
|
159
|
+
field_to_property = model.properties(name).map { |p| [ p.field, p ] }.to_hash
|
160
|
+
|
161
|
+
record = {}
|
162
|
+
data.each do |key, value|
|
163
|
+
next unless property = field_to_property[key]
|
164
|
+
record[key] = property.load(value)
|
165
|
+
end
|
166
|
+
record
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
data/spec/rcov.opts
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require DataMapper.root / 'lib' / 'dm-core' / 'spec' / 'shared' / 'adapter_spec'
|
3
|
+
|
4
|
+
describe DataMapper::Adapters::CassandraAdapter do
|
5
|
+
before :all do
|
6
|
+
@adapter = DataMapper::Repository.adapters[:default]
|
7
|
+
@adapter.send(:client).clear_keyspace!
|
8
|
+
end
|
9
|
+
|
10
|
+
# Shared DataMapper::Adapter specs
|
11
|
+
it_should_behave_like 'An Adapter'
|
12
|
+
|
13
|
+
|
14
|
+
describe 'with one created resource' do
|
15
|
+
before :all do
|
16
|
+
@input_hash = {
|
17
|
+
:created_at => DateTime.parse('2009-05-17T22:38:42-07:00'),
|
18
|
+
:title => 'DataMapper',
|
19
|
+
:author => 'Dan Kubb'
|
20
|
+
}
|
21
|
+
|
22
|
+
# Create resource
|
23
|
+
@resource = Book.new(@input_hash)
|
24
|
+
@resources = [ @resource ]
|
25
|
+
@response = @adapter.create(@resources)
|
26
|
+
@generated_id = @resource.id
|
27
|
+
|
28
|
+
# Stringify keys and add the Generated ID
|
29
|
+
@output_hash = @input_hash.inject('id' => @generated_id) do |s, kv|
|
30
|
+
s[kv[0].to_s] = kv[1]
|
31
|
+
s
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should return an Array containing the Resource' do
|
36
|
+
@response.should equal(@resources)
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should set the identity field' do
|
40
|
+
@generated_id.should be_present
|
41
|
+
end
|
42
|
+
|
43
|
+
describe '#read' do
|
44
|
+
describe 'with unscoped query' do
|
45
|
+
before :all do
|
46
|
+
@query = Book.all.query
|
47
|
+
@response = @adapter.read(@query)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'should return an Array with the matching Records' do
|
51
|
+
@response.should == [ @output_hash ]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe 'with query scoped by a key' do
|
57
|
+
before :all do
|
58
|
+
@query = Book.all(:id => @generated_id, :limit => 1).query
|
59
|
+
@response = @adapter.read(@query)
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'should return an Array with the matching Records' do
|
63
|
+
@response.should == [ @output_hash ]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
describe 'with query scoped by a non-key' do
|
69
|
+
before :all do
|
70
|
+
@query = Book.all(:author => 'Dan Kubb').query
|
71
|
+
@response = @adapter.read(@query)
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'should return an Array with the matching Records' do
|
75
|
+
@response.should == [ @output_hash ]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
describe 'with a non-standard model <=> storage_name relationship' do
|
80
|
+
before :all do
|
81
|
+
@query = DifficultBook.all.query
|
82
|
+
@response = @adapter.read(@query)
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should return an Array with the matching Records' do
|
86
|
+
@response.should == [ @output_hash ]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe '#update' do
|
91
|
+
before :all do
|
92
|
+
@resources = Book.all
|
93
|
+
@response = @adapter.update({ Book.properties[:author] => 'John Doe' }, @resources)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'should return the number of updated Resources' do
|
97
|
+
@response.should == 1
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'should modify the Resource' do
|
101
|
+
@resources.first.author.should == 'John Doe'
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe '#delete' do
|
106
|
+
before :all do
|
107
|
+
@resources = Book.all
|
108
|
+
@response = @adapter.delete(@resources)
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'should return the number of updated Resources' do
|
112
|
+
@response.should == 1
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'pathname'
|
3
|
+
require 'simple_uuid'
|
4
|
+
|
5
|
+
# use local dm-core if running from a typical dev checkout.
|
6
|
+
lib = File.join('..', '..', '..', 'dm-core', 'lib')
|
7
|
+
$LOAD_PATH.unshift(lib) if File.directory?(lib)
|
8
|
+
|
9
|
+
# use local dm-validations if running from a typical dev checkout.
|
10
|
+
lib = File.join('..', '..', 'dm-validations', 'lib')
|
11
|
+
$LOAD_PATH.unshift(lib) if File.directory?(lib)
|
12
|
+
require 'dm-validations'
|
13
|
+
|
14
|
+
# use local dm-serializer if running from a typical dev checkout.
|
15
|
+
lib = File.join('..', '..', 'dm-serializer', 'lib')
|
16
|
+
$LOAD_PATH.unshift(lib) if File.directory?(lib)
|
17
|
+
|
18
|
+
# Support running specs with 'rake spec' and 'spec'
|
19
|
+
$LOAD_PATH.unshift('lib') unless $LOAD_PATH.include?('lib')
|
20
|
+
|
21
|
+
require 'simple_uuid'
|
22
|
+
require 'dm-cassandra-adapter'
|
23
|
+
|
24
|
+
ROOT = Pathname(__FILE__).dirname.parent
|
25
|
+
|
26
|
+
DataMapper.setup(:default, 'cassandra://memory/AdapterTest')
|
27
|
+
|
28
|
+
Dir[ROOT / 'spec' / 'fixtures' / '**' / '*.rb'].each { |rb| require rb }
|
29
|
+
|
30
|
+
####FakeWeb.allow_net_connect = false
|
data/tasks/spec.rake
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
spec_defaults = lambda do |spec|
|
2
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
3
|
+
spec.libs << 'lib' << 'spec'
|
4
|
+
spec.spec_opts << '--options' << 'spec/spec.opts'
|
5
|
+
end
|
6
|
+
|
7
|
+
begin
|
8
|
+
require 'spec/rake/spectask'
|
9
|
+
|
10
|
+
Spec::Rake::SpecTask.new(:spec, &spec_defaults)
|
11
|
+
rescue LoadError
|
12
|
+
task :spec do
|
13
|
+
abort 'rspec is not available. In order to run spec, you must: gem install rspec'
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
begin
|
18
|
+
require 'rcov'
|
19
|
+
require 'spec/rake/verify_rcov'
|
20
|
+
|
21
|
+
Spec::Rake::SpecTask.new(:rcov) do |rcov|
|
22
|
+
spec_defaults.call(rcov)
|
23
|
+
rcov.rcov = true
|
24
|
+
rcov.rcov_opts = File.read('spec/rcov.opts').split(/\s+/)
|
25
|
+
end
|
26
|
+
|
27
|
+
RCov::VerifyTask.new(:verify_rcov => :rcov) do |rcov|
|
28
|
+
rcov.threshold = 100
|
29
|
+
end
|
30
|
+
rescue LoadError
|
31
|
+
%w[ rcov verify_rcov ].each do |name|
|
32
|
+
task name do
|
33
|
+
abort "rcov is not available. In order to run #{name}, you must: gem install rcov"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
#task :spec => :check_dependencies
|
39
|
+
#task :rcov => :check_dependencies
|
40
|
+
|
41
|
+
task :default => :spec
|
metadata
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dm-cassandra-adapter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Michael Rykov
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-12-19 00:00:00 -08:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: dm-core
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ~>
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 1
|
29
|
+
- 0
|
30
|
+
- 2
|
31
|
+
version: 1.0.2
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: dm-serializer
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ~>
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 1
|
43
|
+
- 0
|
44
|
+
- 2
|
45
|
+
version: 1.0.2
|
46
|
+
type: :runtime
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rspec
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ~>
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 1
|
57
|
+
- 3
|
58
|
+
version: "1.3"
|
59
|
+
type: :development
|
60
|
+
version_requirements: *id003
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: dm-validations
|
63
|
+
prerelease: false
|
64
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
segments:
|
69
|
+
- 1
|
70
|
+
- 0
|
71
|
+
- 2
|
72
|
+
version: 1.0.2
|
73
|
+
type: :development
|
74
|
+
version_requirements: *id004
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: fakeweb
|
77
|
+
prerelease: false
|
78
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
segments:
|
83
|
+
- 1
|
84
|
+
- 3
|
85
|
+
version: "1.3"
|
86
|
+
type: :development
|
87
|
+
version_requirements: *id005
|
88
|
+
description: Cassandra Adapter for DataMapper
|
89
|
+
email: mrykov [a] gmail [d] com
|
90
|
+
executables: []
|
91
|
+
|
92
|
+
extensions: []
|
93
|
+
|
94
|
+
extra_rdoc_files:
|
95
|
+
- LICENSE
|
96
|
+
files:
|
97
|
+
- LICENSE
|
98
|
+
- Rakefile
|
99
|
+
- VERSION
|
100
|
+
- conf/storage-conf.xml
|
101
|
+
- lib/dm-cassandra-adapter.rb
|
102
|
+
- lib/dm-cassandra-adapter/adapter.rb
|
103
|
+
- lib/dm-cassandra-adapter/spec/setup.rb
|
104
|
+
- spec/fixtures/book.rb
|
105
|
+
- spec/fixtures/difficult_book.rb
|
106
|
+
- spec/rcov.opts
|
107
|
+
- spec/semipublic/cassandra_adapter_spec.rb
|
108
|
+
- spec/spec.opts
|
109
|
+
- spec/spec_helper.rb
|
110
|
+
- tasks/spec.rake
|
111
|
+
has_rdoc: true
|
112
|
+
homepage: http://github.com/rykov/dm-cassandra-adapter
|
113
|
+
licenses: []
|
114
|
+
|
115
|
+
post_install_message:
|
116
|
+
rdoc_options: []
|
117
|
+
|
118
|
+
require_paths:
|
119
|
+
- lib
|
120
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
segments:
|
125
|
+
- 0
|
126
|
+
version: "0"
|
127
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
segments:
|
132
|
+
- 0
|
133
|
+
version: "0"
|
134
|
+
requirements: []
|
135
|
+
|
136
|
+
rubyforge_project: datamapper
|
137
|
+
rubygems_version: 1.3.6
|
138
|
+
signing_key:
|
139
|
+
specification_version: 3
|
140
|
+
summary: Cassandra Adapter for DataMapper
|
141
|
+
test_files:
|
142
|
+
- spec/fixtures/book.rb
|
143
|
+
- spec/fixtures/difficult_book.rb
|
144
|
+
- spec/semipublic/cassandra_adapter_spec.rb
|
145
|
+
- spec/spec_helper.rb
|