dm-cassandra-adapter 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Michael Rykov
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ gem 'jeweler', '~> 1.4'
6
+ require 'jeweler'
7
+
8
+ Jeweler::Tasks.new do |gem|
9
+ gem.name = 'dm-cassandra-adapter'
10
+ gem.summary = 'Cassandra Adapter for DataMapper'
11
+ gem.description = gem.summary
12
+ gem.email = 'mrykov [a] gmail [d] com'
13
+ gem.homepage = 'http://github.com/rykov/%s' % gem.name
14
+ gem.authors = [ 'Michael Rykov' ]
15
+ gem.has_rdoc = 'yard'
16
+
17
+ gem.rubyforge_project = 'datamapper'
18
+
19
+ gem.add_dependency 'dm-core', '~> 1.0.2'
20
+ gem.add_dependency 'dm-serializer', '~> 1.0.2'
21
+
22
+ gem.add_development_dependency 'rspec', '~> 1.3'
23
+ gem.add_development_dependency 'dm-validations', '~> 1.0.2'
24
+ gem.add_development_dependency 'fakeweb', '~> 1.3'
25
+ end
26
+
27
+ Jeweler::GemcutterTasks.new
28
+
29
+ FileList['tasks/**/*.rake'].each { |task| import task }
30
+ rescue LoadError
31
+ puts 'Jeweler (or a dependency) not available. Install it with: gem install jeweler'
32
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,354 @@
1
+ <!--
2
+ ~ Licensed to the Apache Software Foundation (ASF) under one
3
+ ~ or more contributor license agreements. See the NOTICE file
4
+ ~ distributed with this work for additional information
5
+ ~ regarding copyright ownership. The ASF licenses this file
6
+ ~ to you under the Apache License, Version 2.0 (the
7
+ ~ "License"); you may not use this file except in compliance
8
+ ~ with the License. You may obtain a copy of the License at
9
+ ~
10
+ ~ http://www.apache.org/licenses/LICENSE-2.0
11
+ ~
12
+ ~ Unless required by applicable law or agreed to in writing,
13
+ ~ software distributed under the License is distributed on an
14
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ ~ KIND, either express or implied. See the License for the
16
+ ~ specific language governing permissions and limitations
17
+ ~ under the License.
18
+ -->
19
+ <Storage>
20
+ <!--======================================================================-->
21
+ <!-- Basic Configuration -->
22
+ <!--======================================================================-->
23
+
24
+ <!--
25
+ ~ The name of this cluster. This is mainly used to prevent machines in
26
+ ~ one logical cluster from joining another.
27
+ -->
28
+ <ClusterName>Test</ClusterName>
29
+
30
+ <!--
31
+ ~ Turn on to make new [non-seed] nodes automatically migrate the right data
32
+ ~ to themselves. (If no InitialToken is specified, they will pick one
33
+ ~ such that they will get half the range of the most-loaded node.)
34
+ ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
35
+ ~ so that you can't subsequently accidently bootstrap a node with
36
+ ~ data on it. (You can reset this by wiping your data and commitlog
37
+ ~ directories.)
38
+ ~
39
+ ~ Off by default so that new clusters and upgraders from 0.4 don't
40
+ ~ bootstrap immediately. You should turn this on when you start adding
41
+ ~ new nodes to a cluster that already has data on it. (If you are upgrading
42
+ ~ from 0.4, start your cluster with it off once before changing it to true.
43
+ ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
44
+ ~ I/O before your cluster starts up.)
45
+ -->
46
+ <AutoBootstrap>false</AutoBootstrap>
47
+
48
+ <!--
49
+ ~ Keyspaces and ColumnFamilies:
50
+ ~ A ColumnFamily is the Cassandra concept closest to a relational
51
+ ~ table. Keyspaces are separate groups of ColumnFamilies. Except in
52
+ ~ very unusual circumstances you will have one Keyspace per application.
53
+
54
+ ~ There is an implicit keyspace named 'system' for Cassandra internals.
55
+ -->
56
+ <Keyspaces>
57
+ <Keyspace Name="AdapterTest">
58
+ <KeysCachedFraction>0.01</KeysCachedFraction>
59
+
60
+ <!-- 'Books' is for my our own tests -->
61
+ <ColumnFamily CompareWith="UTF8Type" Name="Books" />
62
+ <!-- 'Heffalumps' is for shared DataMapper adapter tests -->
63
+ <ColumnFamily CompareWith="UTF8Type" Name="Heffalumps" />
64
+
65
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
66
+ <ReplicationFactor>1</ReplicationFactor>
67
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
68
+ </Keyspace>
69
+
70
+ <Keyspace Name="Twitter">
71
+ <KeysCachedFraction>0.01</KeysCachedFraction>
72
+ <ColumnFamily CompareWith="UTF8Type" Name="Users" />
73
+ <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
74
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
75
+ <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
76
+ <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
77
+ <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
78
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
79
+ <ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
80
+ <ColumnFamily CompareWith="BytesType" ColumnType="Standard" Name="TimelinishThings" />
81
+
82
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
83
+ <ReplicationFactor>1</ReplicationFactor>
84
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
85
+ </Keyspace>
86
+
87
+ <Keyspace Name="Multiblog">
88
+ <KeysCachedFraction>0.01</KeysCachedFraction>
89
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
90
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
91
+
92
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
93
+ <ReplicationFactor>1</ReplicationFactor>
94
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
95
+ </Keyspace>
96
+
97
+ <Keyspace Name="MultiblogLong">
98
+ <KeysCachedFraction>0.01</KeysCachedFraction>
99
+ <ColumnFamily CompareWith="LongType" Name="Blogs"/>
100
+ <ColumnFamily CompareWith="LongType" Name="Comments"/>
101
+
102
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
103
+ <ReplicationFactor>1</ReplicationFactor>
104
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
105
+ </Keyspace>
106
+
107
+ <Keyspace Name="CassandraObject">
108
+ <KeysCachedFraction>0.01</KeysCachedFraction>
109
+ <ColumnFamily CompareWith="UTF8Type" Name="Customers" />
110
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
111
+ <ColumnFamily CompareWith="TimeUUIDType" Name="CustomersByLastName" />
112
+ <ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
113
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
114
+ <ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
115
+ <ColumnFamily CompareWith="UTF8Type" Name="Payments" />
116
+ <ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
117
+ <!-- <ColumnFamily CompareWith="UTF8Type" Name="FirstNames" /> -->
118
+
119
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
120
+ <ReplicationFactor>1</ReplicationFactor>
121
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
122
+ </Keyspace>
123
+ </Keyspaces>
124
+
125
+ <!--
126
+ ~ Authenticator: any IAuthenticator may be used, including your own as long
127
+ ~ as it is on the classpath. Out of the box, Cassandra provides
128
+ ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
129
+ ~ org.apache.cassandra.auth.SimpleAuthenticator
130
+ ~ (SimpleAuthenticator uses access.properties and passwd.properties by
131
+ ~ default).
132
+ ~
133
+ ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
134
+ -->
135
+ <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
136
+
137
+ <!--
138
+ ~ Partitioner: any IPartitioner may be used, including your own as long
139
+ ~ as it is on the classpath. Out of the box, Cassandra provides
140
+ ~ org.apache.cassandra.dht.RandomPartitioner,
141
+ ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
142
+ ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
143
+ ~ (CollatingOPP colates according to EN,US rules, not naive byte
144
+ ~ ordering. Use this as an example if you need locale-aware collation.)
145
+ ~ Range queries require using an order-preserving partitioner.
146
+ ~
147
+ ~ Achtung! Changing this parameter requires wiping your data
148
+ ~ directories, since the partitioner can modify the sstable on-disk
149
+ ~ format.
150
+ -->
151
+ <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
152
+
153
+ <!--
154
+ ~ If you are using an order-preserving partitioner and you know your key
155
+ ~ distribution, you can specify the token for this node to use. (Keys
156
+ ~ are sent to the node with the "closest" token, so distributing your
157
+ ~ tokens equally along the key distribution space will spread keys
158
+ ~ evenly across your cluster.) This setting is only checked the first
159
+ ~ time a node is started.
160
+
161
+ ~ This can also be useful with RandomPartitioner to force equal spacing
162
+ ~ of tokens around the hash space, especially for clusters with a small
163
+ ~ number of nodes.
164
+ -->
165
+ <InitialToken></InitialToken>
166
+
167
+ <!--
168
+ ~ Directories: Specify where Cassandra should store different data on
169
+ ~ disk. Keep the data disks and the CommitLog disks separate for best
170
+ ~ performance
171
+ -->
172
+ <CommitLogDirectory>data/cassandra/commitlog</CommitLogDirectory>
173
+ <DataFileDirectories>
174
+ <DataFileDirectory>data/cassandra/data</DataFileDirectory>
175
+ </DataFileDirectories>
176
+ <CalloutLocation>data/cassandra/callouts</CalloutLocation>
177
+ <StagingFileDirectory>data/cassandra/staging</StagingFileDirectory>
178
+
179
+
180
+ <!--
181
+ ~ Addresses of hosts that are deemed contact points. Cassandra nodes
182
+ ~ use this list of hosts to find each other and learn the topology of
183
+ ~ the ring. You must change this if you are running multiple nodes!
184
+ -->
185
+ <Seeds>
186
+ <Seed>127.0.0.1</Seed>
187
+ </Seeds>
188
+
189
+
190
+ <!-- Miscellaneous -->
191
+
192
+ <!-- Time to wait for a reply from other nodes before failing the command -->
193
+ <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
194
+ <!-- Size to allow commitlog to grow to before creating a new segment -->
195
+ <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
196
+
197
+
198
+ <!-- Local hosts and ports -->
199
+
200
+ <!--
201
+ ~ Address to bind to and tell other nodes to connect to. You _must_
202
+ ~ change this if you want multiple nodes to be able to communicate!
203
+ ~
204
+ ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
205
+ ~ will always do the Right Thing *if* the node is properly configured
206
+ ~ (hostname, name resolution, etc), and the Right Thing is to use the
207
+ ~ address associated with the hostname (it might not be).
208
+ -->
209
+ <ListenAddress>localhost</ListenAddress>
210
+ <!-- internal communications port -->
211
+ <StoragePort>7000</StoragePort>
212
+
213
+ <!--
214
+ ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
215
+ ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
216
+ ~ all interfaces.
217
+ ~
218
+ ~ Leaving this blank has the same effect it does for ListenAddress,
219
+ ~ (i.e. it will be based on the configured hostname of the node).
220
+ -->
221
+ <ThriftAddress>localhost</ThriftAddress>
222
+ <!-- Thrift RPC port (the port clients connect to). -->
223
+ <ThriftPort>9160</ThriftPort>
224
+ <!--
225
+ ~ Whether or not to use a framed transport for Thrift. If this option
226
+ ~ is set to true then you must also use a framed transport on the
227
+ ~ client-side, (framed and non-framed transports are not compatible).
228
+ -->
229
+ <ThriftFramedTransport>false</ThriftFramedTransport>
230
+
231
+
232
+ <!--======================================================================-->
233
+ <!-- Memory, Disk, and Performance -->
234
+ <!--======================================================================-->
235
+
236
+ <!--
237
+ ~ Access mode. mmapped i/o is substantially faster, but only practical on
238
+ ~ a 64bit machine (which notably does not include EC2 "small" instances)
239
+ ~ or relatively small datasets. "auto", the safe choice, will enable
240
+ ~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
241
+ ~ (which may allow you to get part of the benefits of mmap on a 32bit
242
+ ~ machine by mmapping only index files) and "standard".
243
+ ~ (The buffer size settings that follow only apply to standard,
244
+ ~ non-mmapped i/o.)
245
+ -->
246
+ <DiskAccessMode>auto</DiskAccessMode>
247
+
248
+ <!--
249
+ ~ Buffer size to use when performing contiguous column slices. Increase
250
+ ~ this to the size of the column slices you typically perform.
251
+ ~ (Name-based queries are performed with a buffer size of
252
+ ~ ColumnIndexSizeInKB.)
253
+ -->
254
+ <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
255
+
256
+ <!--
257
+ ~ Buffer size to use when flushing memtables to disk. (Only one
258
+ ~ memtable is ever flushed at a time.) Increase (decrease) the index
259
+ ~ buffer size relative to the data buffer if you have few (many)
260
+ ~ columns per key. Bigger is only better _if_ your memtables get large
261
+ ~ enough to use the space. (Check in your data directory after your
262
+ ~ app has been running long enough.) -->
263
+ <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
264
+ <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
265
+
266
+ <!--
267
+ ~ Add column indexes to a row after its contents reach this size.
268
+ ~ Increase if your column values are large, or if you have a very large
269
+ ~ number of columns. The competing causes are, Cassandra has to
270
+ ~ deserialize this much of the row to read a single column, so you want
271
+ ~ it to be small - at least if you do many partial-row reads - but all
272
+ ~ the index data is read for each access, so you don't want to generate
273
+ ~ that wastefully either.
274
+ -->
275
+ <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
276
+
277
+ <!--
278
+ ~ Flush memtable after this much data has been inserted, including
279
+ ~ overwritten data. There is one memtable per column family, and
280
+ ~ this threshold is based solely on the amount of data stored, not
281
+ ~ actual heap memory usage (there is some overhead in indexing the
282
+ ~ columns).
283
+ -->
284
+ <MemtableThroughputInMB>64</MemtableThroughputInMB>
285
+ <!--
286
+ ~ Throughput setting for Binary Memtables. Typically these are
287
+ ~ used for bulk load so you want them to be larger.
288
+ -->
289
+ <BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
290
+ <!--
291
+ ~ The maximum number of columns in millions to store in memory per
292
+ ~ ColumnFamily before flushing to disk. This is also a per-memtable
293
+ ~ setting. Use with MemtableThroughputInMB to tune memory usage.
294
+ -->
295
+ <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
296
+ <!--
297
+ ~ The maximum time to leave a dirty memtable unflushed.
298
+ ~ (While any affected columnfamilies have unflushed data from a
299
+ ~ commit log segment, that segment cannot be deleted.)
300
+ ~ This needs to be large enough that it won't cause a flush storm
301
+ ~ of all your memtables flushing at once because none has hit
302
+ ~ the size or count thresholds yet. For production, a larger
303
+ ~ value such as 1440 is recommended.
304
+ -->
305
+ <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
306
+
307
+ <!--
308
+ ~ Unlike most systems, in Cassandra writes are faster than reads, so
309
+ ~ you can afford more of those in parallel. A good rule of thumb is 2
310
+ ~ concurrent reads per processor core. Increase ConcurrentWrites to
311
+ ~ the number of clients writing at once if you enable CommitLogSync +
312
+ ~ CommitLogSyncDelay. -->
313
+ <ConcurrentReads>8</ConcurrentReads>
314
+ <ConcurrentWrites>32</ConcurrentWrites>
315
+
316
+ <!--
317
+ ~ CommitLogSync may be either "periodic" or "batch." When in batch
318
+ ~ mode, Cassandra won't ack writes until the commit log has been
319
+ ~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
320
+ ~ milliseconds for other writes, before performing the sync.
321
+
322
+ ~ This is less necessary in Cassandra than in traditional databases
323
+ ~ since replication reduces the odds of losing data from a failure
324
+ ~ after writing the log entry but before it actually reaches the disk.
325
+ ~ So the other option is "timed," where writes may be acked immediately
326
+ ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
327
+ ~ milliseconds.
328
+ -->
329
+ <CommitLogSync>periodic</CommitLogSync>
330
+ <!--
331
+ ~ Interval at which to perform syncs of the CommitLog in periodic mode.
332
+ ~ Usually the default of 10000ms is fine; increase it if your i/o
333
+ ~ load is such that syncs are taking excessively long times.
334
+ -->
335
+ <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
336
+ <!--
337
+ ~ Delay (in milliseconds) during which additional commit log entries
338
+ ~ may be written before fsync in batch mode. This will increase
339
+ ~ latency slightly, but can vastly improve throughput where there are
340
+ ~ many writers. Set to zero to disable (each entry will be synced
341
+ ~ individually). Reasonable values range from a minimal 0.1 to 10 or
342
+ ~ even more if throughput matters more than latency.
343
+ -->
344
+ <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
345
+
346
+ <!--
347
+ ~ Time to wait before garbage-collection deletion markers. Set this to
348
+ ~ a large enough value that you are confident that the deletion marker
349
+ ~ will be propagated to all replicas by the time this many seconds has
350
+ ~ elapsed, even in the face of hardware failures. The default value is
351
+ ~ ten days.
352
+ -->
353
+ <GCGraceSeconds>864000</GCGraceSeconds>
354
+ </Storage>
@@ -0,0 +1,9 @@
1
+ require 'extlib'
2
+ require 'dm-core'
3
+ require 'dm-serializer'
4
+ require 'cassandra'
5
+
6
+ require 'dm-cassandra-adapter/adapter'
7
+
8
+ DataMapper::Adapters::CassandraAdapter = DataMapperCassandra::Adapter
9
+ DataMapper::Adapters.const_added(:CassandraAdapter)
@@ -0,0 +1,169 @@
1
+ module DataMapperCassandra
2
+ # TODO: Do not store IDs in the object hash ????
3
+
4
+ class Adapter < DataMapper::Adapters::AbstractAdapter
5
+ def create(resources)
6
+ client.batch do
7
+ resources.each do |resource|
8
+ repository = resource.repository
9
+ model = resource.model
10
+ attributes = resource.attributes
11
+ properties = model.properties(repository.name)
12
+
13
+ ## Figure out or generate the key
14
+ kind = self.column_family(model)
15
+ keys = properties.key
16
+ raise "Multiple keys in #{resource.inspect}" if keys.size > 1
17
+ if keys.size == 1
18
+ name = keys.first.name
19
+ property = properties[name]
20
+ key = convert_value(property, attributes[name])
21
+ end
22
+ if keys.first.serial? && (key.nil? || key == 0 || key == '')
23
+ name = keys.first.name
24
+ property = properties[name]
25
+ key = if property.primitive == Integer
26
+ # BAD: for Serial
27
+ Time.stamp & 0x7FFFFFFF
28
+ else
29
+ # GOOD: for UUID/:key => true
30
+ SimpleUUID::UUID.new.to_guid
31
+ end
32
+ end
33
+
34
+ initialize_serial(resource, key)
35
+ attributes = resource.attributes
36
+
37
+ #puts "#{key} => #{attributes.inspect}"
38
+
39
+ ## Convert to serialized data ##
40
+ data = {}
41
+ attributes.each do |name, value|
42
+ property = properties[name]
43
+ data[property.field] = convert_value(property, value)
44
+ end
45
+
46
+ # Insert this resource into Cassandra
47
+ client.insert(kind, key.to_s, data);
48
+ end
49
+ end
50
+ resources
51
+ end
52
+
53
+ def column_family(model)
54
+ model.storage_name(self.name)
55
+ end
56
+
57
+ def convert_value(property, value)
58
+ property.dump(value)
59
+ end
60
+
61
+ def read(query)
62
+ model = query.model
63
+ kind = self.column_family(model)
64
+
65
+ records = if id = extract_id_from_query(query)
66
+ data = client.get(kind, id.to_s)
67
+ [ load_resource(data, model) ]
68
+ else
69
+ # raise NotImplementedError.new("SimpleDB supports only a single order clause")
70
+ # FIXME - This is terrible, we should not get all keys
71
+ all_keys = client.get_range(kind)
72
+ data_hash = client.multi_get(kind, all_keys)
73
+ data_hash.map do |id, data|
74
+ load_resource(data, model)
75
+ end
76
+ end
77
+
78
+ query.filter_records(records)
79
+ end
80
+
81
+ def update(dirty_attributes, collection)
82
+ client.batch do
83
+ count = collection.select do |resource|
84
+ model = resource.model
85
+ kind = self.column_family(model)
86
+ key = model.key
87
+ id = key.get(resource).join
88
+
89
+ data = {}
90
+ dirty_attributes.each do |property, value|
91
+ property.set!(resource, value)
92
+ data[property.field] = convert_value(property, value)
93
+ end
94
+
95
+ client.insert(kind, id, data);
96
+ end
97
+ end.size
98
+ end
99
+
100
+ def delete(collection)
101
+ client.batch do
102
+ count = collection.select do |resource|
103
+ model = resource.model
104
+ kind = self.column_family(model)
105
+ key = model.key
106
+ id = key.get(resource).join
107
+
108
+ client.remove(kind, id)
109
+ end
110
+ end.size
111
+ end
112
+
113
+ private
114
+
115
+ def initialize(*)
116
+ super
117
+ @resource_naming_convention = lambda do |value|
118
+ Extlib::Inflection.pluralize(Extlib::Inflection.camelize(value))
119
+ end
120
+ end
121
+
122
+ def client
123
+ @client ||= begin
124
+ keyspace = @options[:path][1..-1] # Without leading slash
125
+ if @options[:host] == 'memory'
126
+ require 'cassandra/mock'
127
+ this_dir = File.dirname(__FILE__)
128
+ conf_xml = File.expand_path('../../conf/storage-conf.xml', this_dir)
129
+ Cassandra::Mock.new(keyspace, conf_xml)
130
+ else
131
+ server = "#{@options[:host]}:#{@options[:port] || 9160}"
132
+ Cassandra.new(keyspace, server)
133
+ end
134
+ end
135
+ end
136
+
137
+ def extract_id_from_query(query)
138
+ return nil unless query.limit == 1
139
+
140
+ conditions = query.conditions
141
+
142
+ return nil unless conditions.kind_of?(DataMapper::Query::Conditions::AndOperation)
143
+ return nil unless (key_condition = conditions.select { |o| o.subject.key? }).size == 1
144
+
145
+ key_condition.first.value
146
+ end
147
+
148
+ def extract_params_from_query(query)
149
+ conditions = query.conditions
150
+
151
+ return {} unless conditions.kind_of?(DataMapper::Query::Conditions::AndOperation)
152
+ return {} if conditions.any? { |o| o.subject.key? }
153
+
154
+ query.options
155
+ end
156
+
157
+ ## CASSANDRA ###
158
+ def load_resource(data, model)
159
+ field_to_property = model.properties(name).map { |p| [ p.field, p ] }.to_hash
160
+
161
+ record = {}
162
+ data.each do |key, value|
163
+ next unless property = field_to_property[key]
164
+ record[key] = property.load(value)
165
+ end
166
+ record
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,15 @@
1
+ require 'dm-cassandra-adapter'
2
+ require 'dm-core/spec/setup'
3
+
4
+ module DataMapper
5
+ module Spec
6
+ module Adapters
7
+
8
+ class CassandraAdapter < Adapter
9
+ end
10
+
11
+ use CassandraAdapter
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,8 @@
1
+ class Book
2
+ include DataMapper::Resource
3
+
4
+ property :id, Serial
5
+ property :created_at, DateTime
6
+ property :title, String
7
+ property :author, String
8
+ end
@@ -0,0 +1,10 @@
1
+ class DifficultBook
2
+ include DataMapper::Resource
3
+
4
+ storage_names[:default] = 'Books'
5
+
6
+ property :id, Serial
7
+ property :created_at, DateTime
8
+ property :title, String
9
+ property :author, String
10
+ end
data/spec/rcov.opts ADDED
@@ -0,0 +1,6 @@
1
+ --exclude "spec,^/"
2
+ --sort coverage
3
+ --callsites
4
+ --xrefs
5
+ --profile
6
+ --text-summary
@@ -0,0 +1,116 @@
1
+ require 'spec_helper'
2
+ require DataMapper.root / 'lib' / 'dm-core' / 'spec' / 'shared' / 'adapter_spec'
3
+
4
+ describe DataMapper::Adapters::CassandraAdapter do
5
+ before :all do
6
+ @adapter = DataMapper::Repository.adapters[:default]
7
+ @adapter.send(:client).clear_keyspace!
8
+ end
9
+
10
+ # Shared DataMapper::Adapter specs
11
+ it_should_behave_like 'An Adapter'
12
+
13
+
14
+ describe 'with one created resource' do
15
+ before :all do
16
+ @input_hash = {
17
+ :created_at => DateTime.parse('2009-05-17T22:38:42-07:00'),
18
+ :title => 'DataMapper',
19
+ :author => 'Dan Kubb'
20
+ }
21
+
22
+ # Create resource
23
+ @resource = Book.new(@input_hash)
24
+ @resources = [ @resource ]
25
+ @response = @adapter.create(@resources)
26
+ @generated_id = @resource.id
27
+
28
+ # Stringify keys and add the Generated ID
29
+ @output_hash = @input_hash.inject('id' => @generated_id) do |s, kv|
30
+ s[kv[0].to_s] = kv[1]
31
+ s
32
+ end
33
+ end
34
+
35
+ it 'should return an Array containing the Resource' do
36
+ @response.should equal(@resources)
37
+ end
38
+
39
+ it 'should set the identity field' do
40
+ @generated_id.should be_present
41
+ end
42
+
43
+ describe '#read' do
44
+ describe 'with unscoped query' do
45
+ before :all do
46
+ @query = Book.all.query
47
+ @response = @adapter.read(@query)
48
+ end
49
+
50
+ it 'should return an Array with the matching Records' do
51
+ @response.should == [ @output_hash ]
52
+ end
53
+ end
54
+ end
55
+
56
+ describe 'with query scoped by a key' do
57
+ before :all do
58
+ @query = Book.all(:id => @generated_id, :limit => 1).query
59
+ @response = @adapter.read(@query)
60
+ end
61
+
62
+ it 'should return an Array with the matching Records' do
63
+ @response.should == [ @output_hash ]
64
+ end
65
+ end
66
+
67
+
68
+ describe 'with query scoped by a non-key' do
69
+ before :all do
70
+ @query = Book.all(:author => 'Dan Kubb').query
71
+ @response = @adapter.read(@query)
72
+ end
73
+
74
+ it 'should return an Array with the matching Records' do
75
+ @response.should == [ @output_hash ]
76
+ end
77
+ end
78
+
79
+ describe 'with a non-standard model <=> storage_name relationship' do
80
+ before :all do
81
+ @query = DifficultBook.all.query
82
+ @response = @adapter.read(@query)
83
+ end
84
+
85
+ it 'should return an Array with the matching Records' do
86
+ @response.should == [ @output_hash ]
87
+ end
88
+ end
89
+
90
+ describe '#update' do
91
+ before :all do
92
+ @resources = Book.all
93
+ @response = @adapter.update({ Book.properties[:author] => 'John Doe' }, @resources)
94
+ end
95
+
96
+ it 'should return the number of updated Resources' do
97
+ @response.should == 1
98
+ end
99
+
100
+ it 'should modify the Resource' do
101
+ @resources.first.author.should == 'John Doe'
102
+ end
103
+ end
104
+
105
+ describe '#delete' do
106
+ before :all do
107
+ @resources = Book.all
108
+ @response = @adapter.delete(@resources)
109
+ end
110
+
111
+ it 'should return the number of updated Resources' do
112
+ @response.should == 1
113
+ end
114
+ end
115
+ end
116
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,4 @@
1
+ --colour
2
+ --loadby random
3
+ --format profile
4
+ --backtrace
@@ -0,0 +1,30 @@
1
+ require 'rubygems'
2
+ require 'pathname'
3
+ require 'simple_uuid'
4
+
5
+ # use local dm-core if running from a typical dev checkout.
6
+ lib = File.join('..', '..', '..', 'dm-core', 'lib')
7
+ $LOAD_PATH.unshift(lib) if File.directory?(lib)
8
+
9
+ # use local dm-validations if running from a typical dev checkout.
10
+ lib = File.join('..', '..', 'dm-validations', 'lib')
11
+ $LOAD_PATH.unshift(lib) if File.directory?(lib)
12
+ require 'dm-validations'
13
+
14
+ # use local dm-serializer if running from a typical dev checkout.
15
+ lib = File.join('..', '..', 'dm-serializer', 'lib')
16
+ $LOAD_PATH.unshift(lib) if File.directory?(lib)
17
+
18
+ # Support running specs with 'rake spec' and 'spec'
19
+ $LOAD_PATH.unshift('lib') unless $LOAD_PATH.include?('lib')
20
+
21
+ require 'simple_uuid'
22
+ require 'dm-cassandra-adapter'
23
+
24
+ ROOT = Pathname(__FILE__).dirname.parent
25
+
26
+ DataMapper.setup(:default, 'cassandra://memory/AdapterTest')
27
+
28
+ Dir[ROOT / 'spec' / 'fixtures' / '**' / '*.rb'].each { |rb| require rb }
29
+
30
+ ####FakeWeb.allow_net_connect = false
data/tasks/spec.rake ADDED
@@ -0,0 +1,41 @@
1
+ spec_defaults = lambda do |spec|
2
+ spec.pattern = 'spec/**/*_spec.rb'
3
+ spec.libs << 'lib' << 'spec'
4
+ spec.spec_opts << '--options' << 'spec/spec.opts'
5
+ end
6
+
7
+ begin
8
+ require 'spec/rake/spectask'
9
+
10
+ Spec::Rake::SpecTask.new(:spec, &spec_defaults)
11
+ rescue LoadError
12
+ task :spec do
13
+ abort 'rspec is not available. In order to run spec, you must: gem install rspec'
14
+ end
15
+ end
16
+
17
+ begin
18
+ require 'rcov'
19
+ require 'spec/rake/verify_rcov'
20
+
21
+ Spec::Rake::SpecTask.new(:rcov) do |rcov|
22
+ spec_defaults.call(rcov)
23
+ rcov.rcov = true
24
+ rcov.rcov_opts = File.read('spec/rcov.opts').split(/\s+/)
25
+ end
26
+
27
+ RCov::VerifyTask.new(:verify_rcov => :rcov) do |rcov|
28
+ rcov.threshold = 100
29
+ end
30
+ rescue LoadError
31
+ %w[ rcov verify_rcov ].each do |name|
32
+ task name do
33
+ abort "rcov is not available. In order to run #{name}, you must: gem install rcov"
34
+ end
35
+ end
36
+ end
37
+
38
+ #task :spec => :check_dependencies
39
+ #task :rcov => :check_dependencies
40
+
41
+ task :default => :spec
metadata ADDED
@@ -0,0 +1,145 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dm-cassandra-adapter
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Michael Rykov
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-12-19 00:00:00 -08:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: dm-core
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 0
30
+ - 2
31
+ version: 1.0.2
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: dm-serializer
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ~>
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 1
43
+ - 0
44
+ - 2
45
+ version: 1.0.2
46
+ type: :runtime
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 1
57
+ - 3
58
+ version: "1.3"
59
+ type: :development
60
+ version_requirements: *id003
61
+ - !ruby/object:Gem::Dependency
62
+ name: dm-validations
63
+ prerelease: false
64
+ requirement: &id004 !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ segments:
69
+ - 1
70
+ - 0
71
+ - 2
72
+ version: 1.0.2
73
+ type: :development
74
+ version_requirements: *id004
75
+ - !ruby/object:Gem::Dependency
76
+ name: fakeweb
77
+ prerelease: false
78
+ requirement: &id005 !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ segments:
83
+ - 1
84
+ - 3
85
+ version: "1.3"
86
+ type: :development
87
+ version_requirements: *id005
88
+ description: Cassandra Adapter for DataMapper
89
+ email: mrykov [a] gmail [d] com
90
+ executables: []
91
+
92
+ extensions: []
93
+
94
+ extra_rdoc_files:
95
+ - LICENSE
96
+ files:
97
+ - LICENSE
98
+ - Rakefile
99
+ - VERSION
100
+ - conf/storage-conf.xml
101
+ - lib/dm-cassandra-adapter.rb
102
+ - lib/dm-cassandra-adapter/adapter.rb
103
+ - lib/dm-cassandra-adapter/spec/setup.rb
104
+ - spec/fixtures/book.rb
105
+ - spec/fixtures/difficult_book.rb
106
+ - spec/rcov.opts
107
+ - spec/semipublic/cassandra_adapter_spec.rb
108
+ - spec/spec.opts
109
+ - spec/spec_helper.rb
110
+ - tasks/spec.rake
111
+ has_rdoc: true
112
+ homepage: http://github.com/rykov/dm-cassandra-adapter
113
+ licenses: []
114
+
115
+ post_install_message:
116
+ rdoc_options: []
117
+
118
+ require_paths:
119
+ - lib
120
+ required_ruby_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ segments:
125
+ - 0
126
+ version: "0"
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ segments:
132
+ - 0
133
+ version: "0"
134
+ requirements: []
135
+
136
+ rubyforge_project: datamapper
137
+ rubygems_version: 1.3.6
138
+ signing_key:
139
+ specification_version: 3
140
+ summary: Cassandra Adapter for DataMapper
141
+ test_files:
142
+ - spec/fixtures/book.rb
143
+ - spec/fixtures/difficult_book.rb
144
+ - spec/semipublic/cassandra_adapter_spec.rb
145
+ - spec/spec_helper.rb