dm-cassandra-adapter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Michael Rykov
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ gem 'jeweler', '~> 1.4'
6
+ require 'jeweler'
7
+
8
+ Jeweler::Tasks.new do |gem|
9
+ gem.name = 'dm-cassandra-adapter'
10
+ gem.summary = 'Cassandra Adapter for DataMapper'
11
+ gem.description = gem.summary
12
+ gem.email = 'mrykov [a] gmail [d] com'
13
+ gem.homepage = 'http://github.com/rykov/%s' % gem.name
14
+ gem.authors = [ 'Michael Rykov' ]
15
+ gem.has_rdoc = 'yard'
16
+
17
+ gem.rubyforge_project = 'datamapper'
18
+
19
+ gem.add_dependency 'dm-core', '~> 1.0.2'
20
+ gem.add_dependency 'dm-serializer', '~> 1.0.2'
21
+
22
+ gem.add_development_dependency 'rspec', '~> 1.3'
23
+ gem.add_development_dependency 'dm-validations', '~> 1.0.2'
24
+ gem.add_development_dependency 'fakeweb', '~> 1.3'
25
+ end
26
+
27
+ Jeweler::GemcutterTasks.new
28
+
29
+ FileList['tasks/**/*.rake'].each { |task| import task }
30
+ rescue LoadError
31
+ puts 'Jeweler (or a dependency) not available. Install it with: gem install jeweler'
32
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,354 @@
1
+ <!--
2
+ ~ Licensed to the Apache Software Foundation (ASF) under one
3
+ ~ or more contributor license agreements. See the NOTICE file
4
+ ~ distributed with this work for additional information
5
+ ~ regarding copyright ownership. The ASF licenses this file
6
+ ~ to you under the Apache License, Version 2.0 (the
7
+ ~ "License"); you may not use this file except in compliance
8
+ ~ with the License. You may obtain a copy of the License at
9
+ ~
10
+ ~ http://www.apache.org/licenses/LICENSE-2.0
11
+ ~
12
+ ~ Unless required by applicable law or agreed to in writing,
13
+ ~ software distributed under the License is distributed on an
14
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ ~ KIND, either express or implied. See the License for the
16
+ ~ specific language governing permissions and limitations
17
+ ~ under the License.
18
+ -->
19
+ <Storage>
20
+ <!--======================================================================-->
21
+ <!-- Basic Configuration -->
22
+ <!--======================================================================-->
23
+
24
+ <!--
25
+ ~ The name of this cluster. This is mainly used to prevent machines in
26
+ ~ one logical cluster from joining another.
27
+ -->
28
+ <ClusterName>Test</ClusterName>
29
+
30
+ <!--
31
+ ~ Turn on to make new [non-seed] nodes automatically migrate the right data
32
+ ~ to themselves. (If no InitialToken is specified, they will pick one
33
+ ~ such that they will get half the range of the most-loaded node.)
34
+ ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
35
+ ~ so that you can't subsequently accidently bootstrap a node with
36
+ ~ data on it. (You can reset this by wiping your data and commitlog
37
+ ~ directories.)
38
+ ~
39
+ ~ Off by default so that new clusters and upgraders from 0.4 don't
40
+ ~ bootstrap immediately. You should turn this on when you start adding
41
+ ~ new nodes to a cluster that already has data on it. (If you are upgrading
42
+ ~ from 0.4, start your cluster with it off once before changing it to true.
43
+ ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
44
+ ~ I/O before your cluster starts up.)
45
+ -->
46
+ <AutoBootstrap>false</AutoBootstrap>
47
+
48
+ <!--
49
+ ~ Keyspaces and ColumnFamilies:
50
+ ~ A ColumnFamily is the Cassandra concept closest to a relational
51
+ ~ table. Keyspaces are separate groups of ColumnFamilies. Except in
52
+ ~ very unusual circumstances you will have one Keyspace per application.
53
+
54
+ ~ There is an implicit keyspace named 'system' for Cassandra internals.
55
+ -->
56
+ <Keyspaces>
57
+ <Keyspace Name="AdapterTest">
58
+ <KeysCachedFraction>0.01</KeysCachedFraction>
59
+
60
+ <!-- 'Books' is for my our own tests -->
61
+ <ColumnFamily CompareWith="UTF8Type" Name="Books" />
62
+ <!-- 'Heffalumps' is for shared DataMapper adapter tests -->
63
+ <ColumnFamily CompareWith="UTF8Type" Name="Heffalumps" />
64
+
65
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
66
+ <ReplicationFactor>1</ReplicationFactor>
67
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
68
+ </Keyspace>
69
+
70
+ <Keyspace Name="Twitter">
71
+ <KeysCachedFraction>0.01</KeysCachedFraction>
72
+ <ColumnFamily CompareWith="UTF8Type" Name="Users" />
73
+ <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
74
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
75
+ <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
76
+ <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
77
+ <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
78
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
79
+ <ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
80
+ <ColumnFamily CompareWith="BytesType" ColumnType="Standard" Name="TimelinishThings" />
81
+
82
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
83
+ <ReplicationFactor>1</ReplicationFactor>
84
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
85
+ </Keyspace>
86
+
87
+ <Keyspace Name="Multiblog">
88
+ <KeysCachedFraction>0.01</KeysCachedFraction>
89
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
90
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
91
+
92
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
93
+ <ReplicationFactor>1</ReplicationFactor>
94
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
95
+ </Keyspace>
96
+
97
+ <Keyspace Name="MultiblogLong">
98
+ <KeysCachedFraction>0.01</KeysCachedFraction>
99
+ <ColumnFamily CompareWith="LongType" Name="Blogs"/>
100
+ <ColumnFamily CompareWith="LongType" Name="Comments"/>
101
+
102
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
103
+ <ReplicationFactor>1</ReplicationFactor>
104
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
105
+ </Keyspace>
106
+
107
+ <Keyspace Name="CassandraObject">
108
+ <KeysCachedFraction>0.01</KeysCachedFraction>
109
+ <ColumnFamily CompareWith="UTF8Type" Name="Customers" />
110
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
111
+ <ColumnFamily CompareWith="TimeUUIDType" Name="CustomersByLastName" />
112
+ <ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
113
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
114
+ <ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
115
+ <ColumnFamily CompareWith="UTF8Type" Name="Payments" />
116
+ <ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
117
+ <!-- <ColumnFamily CompareWith="UTF8Type" Name="FirstNames" /> -->
118
+
119
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
120
+ <ReplicationFactor>1</ReplicationFactor>
121
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
122
+ </Keyspace>
123
+ </Keyspaces>
124
+
125
+ <!--
126
+ ~ Authenticator: any IAuthenticator may be used, including your own as long
127
+ ~ as it is on the classpath. Out of the box, Cassandra provides
128
+ ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
129
+ ~ org.apache.cassandra.auth.SimpleAuthenticator
130
+ ~ (SimpleAuthenticator uses access.properties and passwd.properties by
131
+ ~ default).
132
+ ~
133
+ ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
134
+ -->
135
+ <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
136
+
137
+ <!--
138
+ ~ Partitioner: any IPartitioner may be used, including your own as long
139
+ ~ as it is on the classpath. Out of the box, Cassandra provides
140
+ ~ org.apache.cassandra.dht.RandomPartitioner,
141
+ ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
142
+ ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
143
+ ~ (CollatingOPP colates according to EN,US rules, not naive byte
144
+ ~ ordering. Use this as an example if you need locale-aware collation.)
145
+ ~ Range queries require using an order-preserving partitioner.
146
+ ~
147
+ ~ Achtung! Changing this parameter requires wiping your data
148
+ ~ directories, since the partitioner can modify the sstable on-disk
149
+ ~ format.
150
+ -->
151
+ <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
152
+
153
+ <!--
154
+ ~ If you are using an order-preserving partitioner and you know your key
155
+ ~ distribution, you can specify the token for this node to use. (Keys
156
+ ~ are sent to the node with the "closest" token, so distributing your
157
+ ~ tokens equally along the key distribution space will spread keys
158
+ ~ evenly across your cluster.) This setting is only checked the first
159
+ ~ time a node is started.
160
+
161
+ ~ This can also be useful with RandomPartitioner to force equal spacing
162
+ ~ of tokens around the hash space, especially for clusters with a small
163
+ ~ number of nodes.
164
+ -->
165
+ <InitialToken></InitialToken>
166
+
167
+ <!--
168
+ ~ Directories: Specify where Cassandra should store different data on
169
+ ~ disk. Keep the data disks and the CommitLog disks separate for best
170
+ ~ performance
171
+ -->
172
+ <CommitLogDirectory>data/cassandra/commitlog</CommitLogDirectory>
173
+ <DataFileDirectories>
174
+ <DataFileDirectory>data/cassandra/data</DataFileDirectory>
175
+ </DataFileDirectories>
176
+ <CalloutLocation>data/cassandra/callouts</CalloutLocation>
177
+ <StagingFileDirectory>data/cassandra/staging</StagingFileDirectory>
178
+
179
+
180
+ <!--
181
+ ~ Addresses of hosts that are deemed contact points. Cassandra nodes
182
+ ~ use this list of hosts to find each other and learn the topology of
183
+ ~ the ring. You must change this if you are running multiple nodes!
184
+ -->
185
+ <Seeds>
186
+ <Seed>127.0.0.1</Seed>
187
+ </Seeds>
188
+
189
+
190
+ <!-- Miscellaneous -->
191
+
192
+ <!-- Time to wait for a reply from other nodes before failing the command -->
193
+ <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
194
+ <!-- Size to allow commitlog to grow to before creating a new segment -->
195
+ <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
196
+
197
+
198
+ <!-- Local hosts and ports -->
199
+
200
+ <!--
201
+ ~ Address to bind to and tell other nodes to connect to. You _must_
202
+ ~ change this if you want multiple nodes to be able to communicate!
203
+ ~
204
+ ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
205
+ ~ will always do the Right Thing *if* the node is properly configured
206
+ ~ (hostname, name resolution, etc), and the Right Thing is to use the
207
+ ~ address associated with the hostname (it might not be).
208
+ -->
209
+ <ListenAddress>localhost</ListenAddress>
210
+ <!-- internal communications port -->
211
+ <StoragePort>7000</StoragePort>
212
+
213
+ <!--
214
+ ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
215
+ ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
216
+ ~ all interfaces.
217
+ ~
218
+ ~ Leaving this blank has the same effect it does for ListenAddress,
219
+ ~ (i.e. it will be based on the configured hostname of the node).
220
+ -->
221
+ <ThriftAddress>localhost</ThriftAddress>
222
+ <!-- Thrift RPC port (the port clients connect to). -->
223
+ <ThriftPort>9160</ThriftPort>
224
+ <!--
225
+ ~ Whether or not to use a framed transport for Thrift. If this option
226
+ ~ is set to true then you must also use a framed transport on the
227
+ ~ client-side, (framed and non-framed transports are not compatible).
228
+ -->
229
+ <ThriftFramedTransport>false</ThriftFramedTransport>
230
+
231
+
232
+ <!--======================================================================-->
233
+ <!-- Memory, Disk, and Performance -->
234
+ <!--======================================================================-->
235
+
236
+ <!--
237
+ ~ Access mode. mmapped i/o is substantially faster, but only practical on
238
+ ~ a 64bit machine (which notably does not include EC2 "small" instances)
239
+ ~ or relatively small datasets. "auto", the safe choice, will enable
240
+ ~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
241
+ ~ (which may allow you to get part of the benefits of mmap on a 32bit
242
+ ~ machine by mmapping only index files) and "standard".
243
+ ~ (The buffer size settings that follow only apply to standard,
244
+ ~ non-mmapped i/o.)
245
+ -->
246
+ <DiskAccessMode>auto</DiskAccessMode>
247
+
248
+ <!--
249
+ ~ Buffer size to use when performing contiguous column slices. Increase
250
+ ~ this to the size of the column slices you typically perform.
251
+ ~ (Name-based queries are performed with a buffer size of
252
+ ~ ColumnIndexSizeInKB.)
253
+ -->
254
+ <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
255
+
256
+ <!--
257
+ ~ Buffer size to use when flushing memtables to disk. (Only one
258
+ ~ memtable is ever flushed at a time.) Increase (decrease) the index
259
+ ~ buffer size relative to the data buffer if you have few (many)
260
+ ~ columns per key. Bigger is only better _if_ your memtables get large
261
+ ~ enough to use the space. (Check in your data directory after your
262
+ ~ app has been running long enough.) -->
263
+ <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
264
+ <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
265
+
266
+ <!--
267
+ ~ Add column indexes to a row after its contents reach this size.
268
+ ~ Increase if your column values are large, or if you have a very large
269
+ ~ number of columns. The competing causes are, Cassandra has to
270
+ ~ deserialize this much of the row to read a single column, so you want
271
+ ~ it to be small - at least if you do many partial-row reads - but all
272
+ ~ the index data is read for each access, so you don't want to generate
273
+ ~ that wastefully either.
274
+ -->
275
+ <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
276
+
277
+ <!--
278
+ ~ Flush memtable after this much data has been inserted, including
279
+ ~ overwritten data. There is one memtable per column family, and
280
+ ~ this threshold is based solely on the amount of data stored, not
281
+ ~ actual heap memory usage (there is some overhead in indexing the
282
+ ~ columns).
283
+ -->
284
+ <MemtableThroughputInMB>64</MemtableThroughputInMB>
285
+ <!--
286
+ ~ Throughput setting for Binary Memtables. Typically these are
287
+ ~ used for bulk load so you want them to be larger.
288
+ -->
289
+ <BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
290
+ <!--
291
+ ~ The maximum number of columns in millions to store in memory per
292
+ ~ ColumnFamily before flushing to disk. This is also a per-memtable
293
+ ~ setting. Use with MemtableThroughputInMB to tune memory usage.
294
+ -->
295
+ <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
296
+ <!--
297
+ ~ The maximum time to leave a dirty memtable unflushed.
298
+ ~ (While any affected columnfamilies have unflushed data from a
299
+ ~ commit log segment, that segment cannot be deleted.)
300
+ ~ This needs to be large enough that it won't cause a flush storm
301
+ ~ of all your memtables flushing at once because none has hit
302
+ ~ the size or count thresholds yet. For production, a larger
303
+ ~ value such as 1440 is recommended.
304
+ -->
305
+ <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
306
+
307
+ <!--
308
+ ~ Unlike most systems, in Cassandra writes are faster than reads, so
309
+ ~ you can afford more of those in parallel. A good rule of thumb is 2
310
+ ~ concurrent reads per processor core. Increase ConcurrentWrites to
311
+ ~ the number of clients writing at once if you enable CommitLogSync +
312
+ ~ CommitLogSyncDelay. -->
313
+ <ConcurrentReads>8</ConcurrentReads>
314
+ <ConcurrentWrites>32</ConcurrentWrites>
315
+
316
+ <!--
317
+ ~ CommitLogSync may be either "periodic" or "batch." When in batch
318
+ ~ mode, Cassandra won't ack writes until the commit log has been
319
+ ~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
320
+ ~ milliseconds for other writes, before performing the sync.
321
+
322
+ ~ This is less necessary in Cassandra than in traditional databases
323
+ ~ since replication reduces the odds of losing data from a failure
324
+ ~ after writing the log entry but before it actually reaches the disk.
325
+ ~ So the other option is "timed," where writes may be acked immediately
326
+ ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
327
+ ~ milliseconds.
328
+ -->
329
+ <CommitLogSync>periodic</CommitLogSync>
330
+ <!--
331
+ ~ Interval at which to perform syncs of the CommitLog in periodic mode.
332
+ ~ Usually the default of 10000ms is fine; increase it if your i/o
333
+ ~ load is such that syncs are taking excessively long times.
334
+ -->
335
+ <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
336
+ <!--
337
+ ~ Delay (in milliseconds) during which additional commit log entries
338
+ ~ may be written before fsync in batch mode. This will increase
339
+ ~ latency slightly, but can vastly improve throughput where there are
340
+ ~ many writers. Set to zero to disable (each entry will be synced
341
+ ~ individually). Reasonable values range from a minimal 0.1 to 10 or
342
+ ~ even more if throughput matters more than latency.
343
+ -->
344
+ <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
345
+
346
+ <!--
347
+ ~ Time to wait before garbage-collection deletion markers. Set this to
348
+ ~ a large enough value that you are confident that the deletion marker
349
+ ~ will be propagated to all replicas by the time this many seconds has
350
+ ~ elapsed, even in the face of hardware failures. The default value is
351
+ ~ ten days.
352
+ -->
353
+ <GCGraceSeconds>864000</GCGraceSeconds>
354
+ </Storage>
@@ -0,0 +1,9 @@
1
+ require 'extlib'
2
+ require 'dm-core'
3
+ require 'dm-serializer'
4
+ require 'cassandra'
5
+
6
+ require 'dm-cassandra-adapter/adapter'
7
+
8
+ DataMapper::Adapters::CassandraAdapter = DataMapperCassandra::Adapter
9
+ DataMapper::Adapters.const_added(:CassandraAdapter)
@@ -0,0 +1,169 @@
1
+ module DataMapperCassandra
2
+ # TODO: Do not store IDs in the object hash ????
3
+
4
+ class Adapter < DataMapper::Adapters::AbstractAdapter
5
+ def create(resources)
6
+ client.batch do
7
+ resources.each do |resource|
8
+ repository = resource.repository
9
+ model = resource.model
10
+ attributes = resource.attributes
11
+ properties = model.properties(repository.name)
12
+
13
+ ## Figure out or generate the key
14
+ kind = self.column_family(model)
15
+ keys = properties.key
16
+ raise "Multiple keys in #{resource.inspect}" if keys.size > 1
17
+ if keys.size == 1
18
+ name = keys.first.name
19
+ property = properties[name]
20
+ key = convert_value(property, attributes[name])
21
+ end
22
+ if keys.first.serial? && (key.nil? || key == 0 || key == '')
23
+ name = keys.first.name
24
+ property = properties[name]
25
+ key = if property.primitive == Integer
26
+ # BAD: for Serial
27
+ Time.stamp & 0x7FFFFFFF
28
+ else
29
+ # GOOD: for UUID/:key => true
30
+ SimpleUUID::UUID.new.to_guid
31
+ end
32
+ end
33
+
34
+ initialize_serial(resource, key)
35
+ attributes = resource.attributes
36
+
37
+ #puts "#{key} => #{attributes.inspect}"
38
+
39
+ ## Convert to serialized data ##
40
+ data = {}
41
+ attributes.each do |name, value|
42
+ property = properties[name]
43
+ data[property.field] = convert_value(property, value)
44
+ end
45
+
46
+ # Insert this resource into Cassandra
47
+ client.insert(kind, key.to_s, data);
48
+ end
49
+ end
50
+ resources
51
+ end
52
+
53
+ def column_family(model)
54
+ model.storage_name(self.name)
55
+ end
56
+
57
+ def convert_value(property, value)
58
+ property.dump(value)
59
+ end
60
+
61
+ def read(query)
62
+ model = query.model
63
+ kind = self.column_family(model)
64
+
65
+ records = if id = extract_id_from_query(query)
66
+ data = client.get(kind, id.to_s)
67
+ [ load_resource(data, model) ]
68
+ else
69
+ # raise NotImplementedError.new("SimpleDB supports only a single order clause")
70
+ # FIXME - This is terrible, we should not get all keys
71
+ all_keys = client.get_range(kind)
72
+ data_hash = client.multi_get(kind, all_keys)
73
+ data_hash.map do |id, data|
74
+ load_resource(data, model)
75
+ end
76
+ end
77
+
78
+ query.filter_records(records)
79
+ end
80
+
81
+ def update(dirty_attributes, collection)
82
+ client.batch do
83
+ count = collection.select do |resource|
84
+ model = resource.model
85
+ kind = self.column_family(model)
86
+ key = model.key
87
+ id = key.get(resource).join
88
+
89
+ data = {}
90
+ dirty_attributes.each do |property, value|
91
+ property.set!(resource, value)
92
+ data[property.field] = convert_value(property, value)
93
+ end
94
+
95
+ client.insert(kind, id, data);
96
+ end
97
+ end.size
98
+ end
99
+
100
+ def delete(collection)
101
+ client.batch do
102
+ count = collection.select do |resource|
103
+ model = resource.model
104
+ kind = self.column_family(model)
105
+ key = model.key
106
+ id = key.get(resource).join
107
+
108
+ client.remove(kind, id)
109
+ end
110
+ end.size
111
+ end
112
+
113
+ private
114
+
115
+ def initialize(*)
116
+ super
117
+ @resource_naming_convention = lambda do |value|
118
+ Extlib::Inflection.pluralize(Extlib::Inflection.camelize(value))
119
+ end
120
+ end
121
+
122
+ def client
123
+ @client ||= begin
124
+ keyspace = @options[:path][1..-1] # Without leading slash
125
+ if @options[:host] == 'memory'
126
+ require 'cassandra/mock'
127
+ this_dir = File.dirname(__FILE__)
128
+ conf_xml = File.expand_path('../../conf/storage-conf.xml', this_dir)
129
+ Cassandra::Mock.new(keyspace, conf_xml)
130
+ else
131
+ server = "#{@options[:host]}:#{@options[:port] || 9160}"
132
+ Cassandra.new(keyspace, server)
133
+ end
134
+ end
135
+ end
136
+
137
+ def extract_id_from_query(query)
138
+ return nil unless query.limit == 1
139
+
140
+ conditions = query.conditions
141
+
142
+ return nil unless conditions.kind_of?(DataMapper::Query::Conditions::AndOperation)
143
+ return nil unless (key_condition = conditions.select { |o| o.subject.key? }).size == 1
144
+
145
+ key_condition.first.value
146
+ end
147
+
148
+ def extract_params_from_query(query)
149
+ conditions = query.conditions
150
+
151
+ return {} unless conditions.kind_of?(DataMapper::Query::Conditions::AndOperation)
152
+ return {} if conditions.any? { |o| o.subject.key? }
153
+
154
+ query.options
155
+ end
156
+
157
+ ## CASSANDRA ###
158
+ def load_resource(data, model)
159
+ field_to_property = model.properties(name).map { |p| [ p.field, p ] }.to_hash
160
+
161
+ record = {}
162
+ data.each do |key, value|
163
+ next unless property = field_to_property[key]
164
+ record[key] = property.load(value)
165
+ end
166
+ record
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,15 @@
1
+ require 'dm-cassandra-adapter'
2
+ require 'dm-core/spec/setup'
3
+
4
+ module DataMapper
5
+ module Spec
6
+ module Adapters
7
+
8
+ class CassandraAdapter < Adapter
9
+ end
10
+
11
+ use CassandraAdapter
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,8 @@
1
+ class Book
2
+ include DataMapper::Resource
3
+
4
+ property :id, Serial
5
+ property :created_at, DateTime
6
+ property :title, String
7
+ property :author, String
8
+ end
@@ -0,0 +1,10 @@
1
+ class DifficultBook
2
+ include DataMapper::Resource
3
+
4
+ storage_names[:default] = 'Books'
5
+
6
+ property :id, Serial
7
+ property :created_at, DateTime
8
+ property :title, String
9
+ property :author, String
10
+ end
data/spec/rcov.opts ADDED
@@ -0,0 +1,6 @@
1
+ --exclude "spec,^/"
2
+ --sort coverage
3
+ --callsites
4
+ --xrefs
5
+ --profile
6
+ --text-summary
@@ -0,0 +1,116 @@
1
+ require 'spec_helper'
2
+ require DataMapper.root / 'lib' / 'dm-core' / 'spec' / 'shared' / 'adapter_spec'
3
+
4
+ describe DataMapper::Adapters::CassandraAdapter do
5
+ before :all do
6
+ @adapter = DataMapper::Repository.adapters[:default]
7
+ @adapter.send(:client).clear_keyspace!
8
+ end
9
+
10
+ # Shared DataMapper::Adapter specs
11
+ it_should_behave_like 'An Adapter'
12
+
13
+
14
+ describe 'with one created resource' do
15
+ before :all do
16
+ @input_hash = {
17
+ :created_at => DateTime.parse('2009-05-17T22:38:42-07:00'),
18
+ :title => 'DataMapper',
19
+ :author => 'Dan Kubb'
20
+ }
21
+
22
+ # Create resource
23
+ @resource = Book.new(@input_hash)
24
+ @resources = [ @resource ]
25
+ @response = @adapter.create(@resources)
26
+ @generated_id = @resource.id
27
+
28
+ # Stringify keys and add the Generated ID
29
+ @output_hash = @input_hash.inject('id' => @generated_id) do |s, kv|
30
+ s[kv[0].to_s] = kv[1]
31
+ s
32
+ end
33
+ end
34
+
35
+ it 'should return an Array containing the Resource' do
36
+ @response.should equal(@resources)
37
+ end
38
+
39
+ it 'should set the identity field' do
40
+ @generated_id.should be_present
41
+ end
42
+
43
+ describe '#read' do
44
+ describe 'with unscoped query' do
45
+ before :all do
46
+ @query = Book.all.query
47
+ @response = @adapter.read(@query)
48
+ end
49
+
50
+ it 'should return an Array with the matching Records' do
51
+ @response.should == [ @output_hash ]
52
+ end
53
+ end
54
+ end
55
+
56
+ describe 'with query scoped by a key' do
57
+ before :all do
58
+ @query = Book.all(:id => @generated_id, :limit => 1).query
59
+ @response = @adapter.read(@query)
60
+ end
61
+
62
+ it 'should return an Array with the matching Records' do
63
+ @response.should == [ @output_hash ]
64
+ end
65
+ end
66
+
67
+
68
+ describe 'with query scoped by a non-key' do
69
+ before :all do
70
+ @query = Book.all(:author => 'Dan Kubb').query
71
+ @response = @adapter.read(@query)
72
+ end
73
+
74
+ it 'should return an Array with the matching Records' do
75
+ @response.should == [ @output_hash ]
76
+ end
77
+ end
78
+
79
+ describe 'with a non-standard model <=> storage_name relationship' do
80
+ before :all do
81
+ @query = DifficultBook.all.query
82
+ @response = @adapter.read(@query)
83
+ end
84
+
85
+ it 'should return an Array with the matching Records' do
86
+ @response.should == [ @output_hash ]
87
+ end
88
+ end
89
+
90
+ describe '#update' do
91
+ before :all do
92
+ @resources = Book.all
93
+ @response = @adapter.update({ Book.properties[:author] => 'John Doe' }, @resources)
94
+ end
95
+
96
+ it 'should return the number of updated Resources' do
97
+ @response.should == 1
98
+ end
99
+
100
+ it 'should modify the Resource' do
101
+ @resources.first.author.should == 'John Doe'
102
+ end
103
+ end
104
+
105
+ describe '#delete' do
106
+ before :all do
107
+ @resources = Book.all
108
+ @response = @adapter.delete(@resources)
109
+ end
110
+
111
+ it 'should return the number of updated Resources' do
112
+ @response.should == 1
113
+ end
114
+ end
115
+ end
116
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,4 @@
1
+ --colour
2
+ --loadby random
3
+ --format profile
4
+ --backtrace
@@ -0,0 +1,30 @@
1
+ require 'rubygems'
2
+ require 'pathname'
3
+ require 'simple_uuid'
4
+
5
+ # use local dm-core if running from a typical dev checkout.
6
+ lib = File.join('..', '..', '..', 'dm-core', 'lib')
7
+ $LOAD_PATH.unshift(lib) if File.directory?(lib)
8
+
9
+ # use local dm-validations if running from a typical dev checkout.
10
+ lib = File.join('..', '..', 'dm-validations', 'lib')
11
+ $LOAD_PATH.unshift(lib) if File.directory?(lib)
12
+ require 'dm-validations'
13
+
14
+ # use local dm-serializer if running from a typical dev checkout.
15
+ lib = File.join('..', '..', 'dm-serializer', 'lib')
16
+ $LOAD_PATH.unshift(lib) if File.directory?(lib)
17
+
18
+ # Support running specs with 'rake spec' and 'spec'
19
+ $LOAD_PATH.unshift('lib') unless $LOAD_PATH.include?('lib')
20
+
21
+ require 'simple_uuid'
22
+ require 'dm-cassandra-adapter'
23
+
24
+ ROOT = Pathname(__FILE__).dirname.parent
25
+
26
+ DataMapper.setup(:default, 'cassandra://memory/AdapterTest')
27
+
28
+ Dir[ROOT / 'spec' / 'fixtures' / '**' / '*.rb'].each { |rb| require rb }
29
+
30
+ ####FakeWeb.allow_net_connect = false
data/tasks/spec.rake ADDED
@@ -0,0 +1,41 @@
1
+ spec_defaults = lambda do |spec|
2
+ spec.pattern = 'spec/**/*_spec.rb'
3
+ spec.libs << 'lib' << 'spec'
4
+ spec.spec_opts << '--options' << 'spec/spec.opts'
5
+ end
6
+
7
+ begin
8
+ require 'spec/rake/spectask'
9
+
10
+ Spec::Rake::SpecTask.new(:spec, &spec_defaults)
11
+ rescue LoadError
12
+ task :spec do
13
+ abort 'rspec is not available. In order to run spec, you must: gem install rspec'
14
+ end
15
+ end
16
+
17
+ begin
18
+ require 'rcov'
19
+ require 'spec/rake/verify_rcov'
20
+
21
+ Spec::Rake::SpecTask.new(:rcov) do |rcov|
22
+ spec_defaults.call(rcov)
23
+ rcov.rcov = true
24
+ rcov.rcov_opts = File.read('spec/rcov.opts').split(/\s+/)
25
+ end
26
+
27
+ RCov::VerifyTask.new(:verify_rcov => :rcov) do |rcov|
28
+ rcov.threshold = 100
29
+ end
30
+ rescue LoadError
31
+ %w[ rcov verify_rcov ].each do |name|
32
+ task name do
33
+ abort "rcov is not available. In order to run #{name}, you must: gem install rcov"
34
+ end
35
+ end
36
+ end
37
+
38
+ #task :spec => :check_dependencies
39
+ #task :rcov => :check_dependencies
40
+
41
+ task :default => :spec
metadata ADDED
@@ -0,0 +1,145 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dm-cassandra-adapter
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Michael Rykov
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-12-19 00:00:00 -08:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: dm-core
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 0
30
+ - 2
31
+ version: 1.0.2
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: dm-serializer
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ~>
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 1
43
+ - 0
44
+ - 2
45
+ version: 1.0.2
46
+ type: :runtime
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 1
57
+ - 3
58
+ version: "1.3"
59
+ type: :development
60
+ version_requirements: *id003
61
+ - !ruby/object:Gem::Dependency
62
+ name: dm-validations
63
+ prerelease: false
64
+ requirement: &id004 !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ segments:
69
+ - 1
70
+ - 0
71
+ - 2
72
+ version: 1.0.2
73
+ type: :development
74
+ version_requirements: *id004
75
+ - !ruby/object:Gem::Dependency
76
+ name: fakeweb
77
+ prerelease: false
78
+ requirement: &id005 !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ segments:
83
+ - 1
84
+ - 3
85
+ version: "1.3"
86
+ type: :development
87
+ version_requirements: *id005
88
+ description: Cassandra Adapter for DataMapper
89
+ email: mrykov [a] gmail [d] com
90
+ executables: []
91
+
92
+ extensions: []
93
+
94
+ extra_rdoc_files:
95
+ - LICENSE
96
+ files:
97
+ - LICENSE
98
+ - Rakefile
99
+ - VERSION
100
+ - conf/storage-conf.xml
101
+ - lib/dm-cassandra-adapter.rb
102
+ - lib/dm-cassandra-adapter/adapter.rb
103
+ - lib/dm-cassandra-adapter/spec/setup.rb
104
+ - spec/fixtures/book.rb
105
+ - spec/fixtures/difficult_book.rb
106
+ - spec/rcov.opts
107
+ - spec/semipublic/cassandra_adapter_spec.rb
108
+ - spec/spec.opts
109
+ - spec/spec_helper.rb
110
+ - tasks/spec.rake
111
+ has_rdoc: true
112
+ homepage: http://github.com/rykov/dm-cassandra-adapter
113
+ licenses: []
114
+
115
+ post_install_message:
116
+ rdoc_options: []
117
+
118
+ require_paths:
119
+ - lib
120
+ required_ruby_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ segments:
125
+ - 0
126
+ version: "0"
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ segments:
132
+ - 0
133
+ version: "0"
134
+ requirements: []
135
+
136
+ rubyforge_project: datamapper
137
+ rubygems_version: 1.3.6
138
+ signing_key:
139
+ specification_version: 3
140
+ summary: Cassandra Adapter for DataMapper
141
+ test_files:
142
+ - spec/fixtures/book.rb
143
+ - spec/fixtures/difficult_book.rb
144
+ - spec/semipublic/cassandra_adapter_spec.rb
145
+ - spec/spec_helper.rb