dm-adapter-simpledb 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. data/.gitignore +1 -0
  2. data/History.txt +21 -0
  3. data/README +21 -8
  4. data/Rakefile +35 -23
  5. data/VERSION +1 -1
  6. data/dm-adapter-simpledb.gemspec +44 -24
  7. data/lib/dm-adapter-simpledb.rb +17 -0
  8. data/lib/dm-adapter-simpledb/adapters/simpledb_adapter.rb +339 -0
  9. data/lib/dm-adapter-simpledb/chunked_string.rb +54 -0
  10. data/lib/dm-adapter-simpledb/migrations/simpledb_adapter.rb +45 -0
  11. data/lib/dm-adapter-simpledb/rake.rb +43 -0
  12. data/lib/dm-adapter-simpledb/record.rb +318 -0
  13. data/lib/{simpledb_adapter → dm-adapter-simpledb}/sdb_array.rb +0 -0
  14. data/lib/dm-adapter-simpledb/table.rb +40 -0
  15. data/lib/dm-adapter-simpledb/utils.rb +15 -0
  16. data/lib/simpledb_adapter.rb +2 -469
  17. data/scripts/simple_benchmark.rb +1 -1
  18. data/spec/{associations_spec.rb → integration/associations_spec.rb} +0 -0
  19. data/spec/{compliance_spec.rb → integration/compliance_spec.rb} +0 -0
  20. data/spec/{date_spec.rb → integration/date_spec.rb} +0 -0
  21. data/spec/{limit_and_order_spec.rb → integration/limit_and_order_spec.rb} +0 -0
  22. data/spec/{migrations_spec.rb → integration/migrations_spec.rb} +0 -0
  23. data/spec/{multiple_records_spec.rb → integration/multiple_records_spec.rb} +0 -0
  24. data/spec/{nils_spec.rb → integration/nils_spec.rb} +0 -0
  25. data/spec/{sdb_array_spec.rb → integration/sdb_array_spec.rb} +4 -5
  26. data/spec/{simpledb_adapter_spec.rb → integration/simpledb_adapter_spec.rb} +65 -0
  27. data/spec/{spec_helper.rb → integration/spec_helper.rb} +8 -3
  28. data/spec/unit/record_spec.rb +346 -0
  29. data/spec/unit/simpledb_adapter_spec.rb +80 -0
  30. data/spec/unit/unit_spec_helper.rb +26 -0
  31. metadata +58 -24
  32. data/tasks/devver.rake +0 -167
@@ -0,0 +1,40 @@
1
+ module DmAdapterSimpledb
2
+ class Table
3
+
4
+ def self.name_from_metadata(metadata)
5
+ Array(metadata).grep(/^table:(.*)$/) do |match|
6
+ return $1
7
+ end
8
+ nil
9
+ end
10
+
11
+ def self.token_for(name)
12
+ "table:#{name}"
13
+ end
14
+
15
+ attr_reader :model
16
+
17
+ def initialize(model)
18
+ @model = model
19
+ end
20
+
21
+ # Returns a string so we know what type of
22
+ def simpledb_type
23
+ model.storage_name(repository_name)
24
+ end
25
+
26
+ def repository_name
27
+ # TODO this should probably take into account the adapter
28
+ model.repository.name
29
+ end
30
+
31
+ # Returns the keys for model sorted in alphabetical order
32
+ def keys_for_model
33
+ model.key(repository_name).sort {|a,b| a.name.to_s <=> b.name.to_s }
34
+ end
35
+
36
+ def token
37
+ self.class.token_for(simpledb_type)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,15 @@
1
+ module DmAdapterSimpledb
2
+ module Utils
3
+ def transform_hash(original, options={}, &block)
4
+ original.inject({}){|result, (key,value)|
5
+ value = if (options[:deep] && Hash === value)
6
+ transform_hash(value, options, &block)
7
+ else
8
+ value
9
+ end
10
+ block.call(result,key,value)
11
+ result
12
+ }
13
+ end
14
+ end
15
+ end
@@ -1,469 +1,2 @@
1
- require 'rubygems'
2
- require 'dm-core'
3
- require 'digest/sha1'
4
- require 'dm-aggregates'
5
- require 'right_aws'
6
- require 'uuidtools'
7
- require File.expand_path('simpledb_adapter/sdb_array', File.dirname(__FILE__))
8
-
9
- module DataMapper
10
-
11
- module Migrations
12
- #integrated from http://github.com/edward/dm-simpledb/tree/master
13
- module SimpledbAdapter
14
-
15
- module ClassMethods
16
-
17
- end
18
-
19
- def self.included(other)
20
- other.extend ClassMethods
21
-
22
- DataMapper.extend(::DataMapper::Migrations::SingletonMethods)
23
-
24
- [ :Repository, :Model ].each do |name|
25
- ::DataMapper.const_get(name).send(:include, Migrations.const_get(name))
26
- end
27
- end
28
-
29
- # Returns whether the storage_name exists.
30
- # @param storage_name<String> a String defining the name of a domain
31
- # @return <Boolean> true if the storage exists
32
- def storage_exists?(storage_name)
33
- domains = sdb.list_domains[:domains]
34
- domains.detect {|d| d == storage_name }!=nil
35
- end
36
-
37
- def create_model_storage(model)
38
- sdb.create_domain(@sdb_options[:domain])
39
- end
40
-
41
- #On SimpleDB you probably don't want to destroy the whole domain
42
- #if you are just adding fields it is automatically supported
43
- #default to non destructive migrate, to destroy run
44
- #rake db:automigrate destroy=true
45
- def destroy_model_storage(model)
46
- if ENV['destroy']!=nil && ENV['destroy']=='true'
47
- sdb.delete_domain(@sdb_options[:domain])
48
- end
49
- end
50
-
51
- end # module Migration
52
- end # module Migration
53
-
54
- module Adapters
55
- class SimpleDBAdapter < AbstractAdapter
56
-
57
- attr_reader :sdb_options
58
-
59
- # For testing purposes ONLY. Seriously, don't enable this for production
60
- # code.
61
- attr_accessor :consistency_policy
62
-
63
- def initialize(name, normalised_options)
64
- super
65
- @sdb_options = {}
66
- @sdb_options[:access_key] = options.fetch(:access_key) {
67
- options[:user]
68
- }
69
- @sdb_options[:secret_key] = options.fetch(:secret_key) {
70
- options[:password]
71
- }
72
- @sdb_options[:logger] = options.fetch(:logger) { DataMapper.logger }
73
- @sdb_options[:server] = options.fetch(:host) { 'sdb.amazonaws.com' }
74
- @sdb_options[:port] = options[:port] || 443 # port may be set but nil
75
- @sdb_options[:domain] = options.fetch(:domain) {
76
- options[:path].to_s.gsub(%r{(^/+)|(/+$)},"") # remove slashes
77
- }
78
- @consistency_policy =
79
- normalised_options.fetch(:wait_for_consistency) { false }
80
- end
81
-
82
- def create(resources)
83
- created = 0
84
- time = Benchmark.realtime do
85
- resources.each do |resource|
86
- uuid = UUIDTools::UUID.timestamp_create
87
- initialize_serial(resource, uuid.to_i)
88
- item_name = item_name_for_resource(resource)
89
- sdb_type = simpledb_type(resource.model)
90
- attributes = resource.attributes.merge(:simpledb_type => sdb_type)
91
- attributes = adjust_to_sdb_attributes(attributes)
92
- attributes.reject!{|name, value| value.nil?}
93
- sdb.put_attributes(domain, item_name, attributes)
94
- created += 1
95
- end
96
- end
97
- DataMapper.logger.debug(format_log_entry("(#{created}) INSERT #{resources.inspect}", time))
98
- modified!
99
- created
100
- end
101
-
102
- def delete(collection)
103
- deleted = 0
104
- time = Benchmark.realtime do
105
- collection.each do |resource|
106
- item_name = item_name_for_resource(resource)
107
- sdb.delete_attributes(domain, item_name)
108
- deleted += 1
109
- end
110
- raise NotImplementedError.new('Only :eql on delete at the moment') if not_eql_query?(collection.query)
111
- end; DataMapper.logger.debug(format_log_entry("(#{deleted}) DELETE #{collection.query.conditions.inspect}", time))
112
- modified!
113
- deleted
114
- end
115
-
116
- def read(query)
117
- maybe_wait_for_consistency
118
- sdb_type = simpledb_type(query.model)
119
-
120
- conditions, order, unsupported_conditions =
121
- set_conditions_and_sort_order(query, sdb_type)
122
- results = get_results(query, conditions, order)
123
- proto_resources = results.map do |result|
124
- name, attributes = *result.to_a.first
125
- proto_resource = query.fields.inject({}) do |proto_resource, property|
126
- value = attributes[property.field.to_s]
127
- if value != nil
128
- if value.size > 1
129
- if property.type == String
130
- value = chunks_to_string(value)
131
- else
132
- value = value.map {|v| property.typecast(v) }
133
- end
134
- else
135
- value = property.typecast(value.first)
136
- end
137
- else
138
- value = property.typecast(nil)
139
- end
140
- proto_resource[property.name.to_s] = value
141
- proto_resource
142
- end
143
- proto_resource
144
- end
145
- query.conditions.operands.reject!{ |op|
146
- !unsupported_conditions.include?(op)
147
- }
148
- records = query.filter_records(proto_resources)
149
-
150
- records
151
- end
152
-
153
- def update(attributes, collection)
154
- updated = 0
155
- attrs_to_update, attrs_to_delete = prepare_attributes(attributes)
156
- time = Benchmark.realtime do
157
- collection.each do |resource|
158
- item_name = item_name_for_resource(resource)
159
- unless attrs_to_update.empty?
160
- sdb.put_attributes(domain, item_name, attrs_to_update, :replace)
161
- end
162
- unless attrs_to_delete.empty?
163
- sdb.delete_attributes(domain, item_name, attrs_to_delete)
164
- end
165
- updated += 1
166
- end
167
- raise NotImplementedError.new('Only :eql on delete at the moment') if not_eql_query?(collection.query)
168
- end
169
- DataMapper.logger.debug(format_log_entry("UPDATE #{collection.query.conditions.inspect} (#{updated} times)", time))
170
- modified!
171
- updated
172
- end
173
-
174
- def query(query_call, query_limit = 999999999)
175
- select(query_call, query_limit).collect{|x| x.values[0]}
176
- end
177
-
178
- def aggregate(query)
179
- raise ArgumentError.new("Only count is supported") unless (query.fields.first.operator == :count)
180
- sdb_type = simpledb_type(query.model)
181
- conditions, order, unsupported_conditions = set_conditions_and_sort_order(query, sdb_type)
182
-
183
- query_call = "SELECT count(*) FROM #{domain} "
184
- query_call << "WHERE #{conditions.compact.join(' AND ')}" if conditions.length > 0
185
- results = nil
186
- time = Benchmark.realtime do
187
- results = sdb.select(query_call)
188
- end; DataMapper.logger.debug(format_log_entry(query_call, time))
189
- [results[:items][0].values.first["Count"].first.to_i]
190
- end
191
-
192
- # For testing purposes only.
193
- def wait_for_consistency
194
- return unless @current_consistency_token
195
- token = :none
196
- begin
197
- results = sdb.get_attributes(domain, '__dm_consistency_token', '__dm_consistency_token')
198
- tokens = results[:attributes]['__dm_consistency_token']
199
- end until tokens.include?(@current_consistency_token)
200
- end
201
-
202
- private
203
-
204
- # hack for converting and storing strings longer than 1024 one thing to
205
- # note if you use string longer than 1019 chars you will loose the ability
206
- # to do full text matching on queries as the string can be broken at any
207
- # place during chunking
208
- def adjust_to_sdb_attributes(attrs)
209
- attrs.each_pair do |key, value|
210
- if value.kind_of?(String)
211
- # Strings need to be inside arrays in order to prevent RightAws from
212
- # inadvertantly splitting them on newlines when it calls
213
- # Array(value).
214
- attrs[key] = [value]
215
- end
216
- if value.is_a?(String) && value.length > 1019
217
- chunked = string_to_chunks(value)
218
- attrs[key] = chunked
219
- end
220
- end
221
- attrs
222
- end
223
-
224
- def string_to_chunks(value)
225
- chunks = value.to_s.scan(%r/.{1,1019}/) # 1024 - '1024:'.size
226
- i = -1
227
- fmt = '%04d:'
228
- chunks.map!{|chunk| [(fmt % (i += 1)), chunk].join}
229
- raise ArgumentError, 'that is just too big yo!' if chunks.size >= 256
230
- chunks
231
- end
232
-
233
- def chunks_to_string(value)
234
- begin
235
- chunks =
236
- Array(value).flatten.map do |chunk|
237
- index, text = chunk.split(%r/:/, 2)
238
- [Float(index).to_i, text]
239
- end
240
- chunks.replace chunks.sort_by{|index, text| index}
241
- string_result = chunks.map!{|index, text| text}.join
242
- string_result
243
- rescue ArgumentError, TypeError
244
- #return original value, they could have put strings in the system not using the adapter or previous versions
245
- #that are larger than chunk size, but less than 1024
246
- value
247
- end
248
- end
249
-
250
- # Returns the domain for the model
251
- def domain
252
- @sdb_options[:domain]
253
- end
254
-
255
- #sets the conditions and order for the SDB query
256
- def set_conditions_and_sort_order(query, sdb_type)
257
- unsupported_conditions = []
258
- conditions = ["simpledb_type = '#{sdb_type}'"]
259
- # look for query.order.first and insure in conditions
260
- # raise if order if greater than 1
261
-
262
- if query.order && query.order.length > 0
263
- query_object = query.order[0]
264
- #anything sorted on must be a condition for SDB
265
- conditions << "#{query_object.target.name} IS NOT NULL"
266
- order = "ORDER BY #{query_object.target.name} #{query_object.operator}"
267
- else
268
- order = ""
269
- end
270
- query.conditions.each do |op|
271
- case op.slug
272
- when :regexp
273
- unsupported_conditions << op
274
- when :eql
275
- conditions << if op.value.nil?
276
- "#{op.subject.name} IS NULL"
277
- else
278
- "#{op.subject.name} = '#{op.value}'"
279
- end
280
- when :not then
281
- comp = op.operands.first
282
- if comp.slug == :like
283
- conditions << "#{comp.subject.name} not like '#{comp.value}'"
284
- next
285
- end
286
- case comp.value
287
- when Range, Set, Array, Regexp
288
- unsupported_conditions << op
289
- when nil
290
- conditions << "#{comp.subject.name} IS NOT NULL"
291
- else
292
- conditions << "#{comp.subject.name} != '#{comp.value}'"
293
- end
294
- when :gt then conditions << "#{op.subject.name} > '#{op.value}'"
295
- when :gte then conditions << "#{op.subject.name} >= '#{op.value}'"
296
- when :lt then conditions << "#{op.subject.name} < '#{op.value}'"
297
- when :lte then conditions << "#{op.subject.name} <= '#{op.value}'"
298
- when :like then conditions << "#{op.subject.name} like '#{op.value}'"
299
- when :in
300
- case op.value
301
- when Array, Set
302
- values = op.value.collect{|v| "'#{v}'"}.join(',')
303
- values = "'__NULL__'" if values.empty?
304
- conditions << "#{op.subject.name} IN (#{values})"
305
- when Range
306
- if op.value.exclude_end?
307
- unsupported_conditions << op
308
- else
309
- conditions << "#{op.subject.name} between '#{op.value.first}' and '#{op.value.last}'"
310
- end
311
- else
312
- raise ArgumentError, "Unsupported inclusion op: #{op.value.inspect}"
313
- end
314
- else raise "Invalid query op: #{op.inspect}"
315
- end
316
- end
317
- [conditions,order,unsupported_conditions]
318
- end
319
-
320
- def select(query_call, query_limit)
321
- items = []
322
- time = Benchmark.realtime do
323
- sdb_continuation_key = nil
324
- while (results = sdb.select(query_call, sdb_continuation_key)) do
325
- sdb_continuation_key = results[:next_token]
326
- items += results[:items]
327
- break if items.length > query_limit
328
- break if sdb_continuation_key.nil?
329
- end
330
- end; DataMapper.logger.debug(format_log_entry(query_call, time))
331
- items[0...query_limit]
332
- end
333
-
334
- #gets all results or proper number of results depending on the :limit
335
- def get_results(query, conditions, order)
336
- output_list = query.fields.map{|f| f.field}.join(', ')
337
- query_call = "SELECT #{output_list} FROM #{domain} "
338
- query_call << "WHERE #{conditions.compact.join(' AND ')}" if conditions.length > 0
339
- query_call << " #{order}"
340
- if query.limit!=nil
341
- query_limit = query.limit
342
- query_call << " LIMIT #{query.limit}"
343
- else
344
- #on large items force the max limit
345
- query_limit = 999999999 #TODO hack for query.limit being nil
346
- #query_call << " limit 2500" #this doesn't work with continuation keys as it halts at the limit passed not just a limit per query.
347
- end
348
- records = select(query_call, query_limit)
349
- end
350
-
351
- # Creates an item name for a query
352
- def item_name_for_query(query)
353
- sdb_type = simpledb_type(query.model)
354
-
355
- item_name = "#{sdb_type}+"
356
- keys = keys_for_model(query.model)
357
- conditions = query.conditions.sort {|a,b| a[1].name.to_s <=> b[1].name.to_s }
358
- item_name += conditions.map do |property|
359
- property[2].to_s
360
- end.join('-')
361
- Digest::SHA1.hexdigest(item_name)
362
- end
363
-
364
- # Creates an item name for a resource
365
- def item_name_for_resource(resource)
366
- sdb_type = simpledb_type(resource.model)
367
-
368
- item_name = "#{sdb_type}+"
369
- keys = keys_for_model(resource.model)
370
- item_name += keys.map do |property|
371
- property.get(resource)
372
- end.join('-')
373
-
374
- Digest::SHA1.hexdigest(item_name)
375
- end
376
-
377
- # Returns the keys for model sorted in alphabetical order
378
- def keys_for_model(model)
379
- model.key(self.name).sort {|a,b| a.name.to_s <=> b.name.to_s }
380
- end
381
-
382
- def not_eql_query?(query)
383
- # Curosity check to make sure we are only dealing with a delete
384
- conditions = query.conditions.map {|c| c.slug }.uniq
385
- selectors = [ :gt, :gte, :lt, :lte, :not, :like, :in ]
386
- return (selectors - conditions).size != selectors.size
387
- end
388
-
389
- # Returns an SimpleDB instance to work with
390
- def sdb
391
- access_key = @sdb_options[:access_key]
392
- secret_key = @sdb_options[:secret_key]
393
- @sdb ||= RightAws::SdbInterface.new(access_key,secret_key,@sdb_options)
394
- @sdb
395
- end
396
-
397
- # Returns a string so we know what type of
398
- def simpledb_type(model)
399
- model.storage_name(model.repository.name)
400
- end
401
-
402
- def format_log_entry(query, ms = 0)
403
- 'SDB (%.1fs) %s' % [ms, query.squeeze(' ')]
404
- end
405
-
406
- def prepare_attributes(attributes)
407
- attributes = attributes.to_a.map {|a| [a.first.name.to_s, a.last]}.to_hash
408
- attributes = adjust_to_sdb_attributes(attributes)
409
- updates, deletes = attributes.partition{|name,value|
410
- !value.nil? && !(value.respond_to?(:to_ary) && value.to_ary.empty?)
411
- }
412
- attrs_to_update = Hash[updates]
413
- attrs_to_delete = Hash[deletes].keys
414
- [attrs_to_update, attrs_to_delete]
415
- end
416
-
417
- def update_consistency_token
418
- @current_consistency_token = UUIDTools::UUID.timestamp_create.to_s
419
- sdb.put_attributes(
420
- domain,
421
- '__dm_consistency_token',
422
- {'__dm_consistency_token' => [@current_consistency_token]})
423
- end
424
-
425
- def maybe_wait_for_consistency
426
- if consistency_policy == :automatic && @current_consistency_token
427
- wait_for_consistency
428
- end
429
- end
430
-
431
- # SimpleDB supports "eventual consistency", which mean your data will be
432
- # there... eventually. Obviously this can make tests a little flaky. One
433
- # option is to just wait a fixed amount of time after every write, but
434
- # this can quickly add up to a lot of waiting. The strategy implemented
435
- # here is based on the theory that while consistency is only eventual,
436
- # chances are writes will at least be linear. That is, once the results of
437
- # write #2 show up we can probably assume that the results of write #1 are
438
- # in as well.
439
- #
440
- # When a consistency policy is enabled, the adapter writes a new unique
441
- # "consistency token" to the database after every write (i.e. every
442
- # create, update, or delete). If the policy is :manual, it only writes the
443
- # consistency token. If the policy is :automatic, writes will not return
444
- # until the token has been successfully read back.
445
- #
446
- # When waiting for the consistency token to show up, we use progressively
447
- # longer timeouts until finally giving up and raising an exception.
448
- def modified!
449
- case @consistency_policy
450
- when :manual, :automatic then
451
- update_consistency_token
452
- when false then
453
- # do nothing
454
- else
455
- raise "Invalid :wait_for_consistency option: #{@consistency_policy.inspect}"
456
- end
457
- end
458
-
459
- end # class SimpleDBAdapter
460
-
461
- # Required naming scheme.
462
- SimpledbAdapter = SimpleDBAdapter
463
-
464
- const_added(:SimpledbAdapter)
465
-
466
- end # module Adapters
467
-
468
-
469
- end # module DataMapper
1
+ # This file is for backwards compatibility
2
+ require 'dm-adapter-simpledb'