cassandra_mapper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/README.rdoc +98 -0
  2. data/Rakefile.rb +11 -0
  3. data/lib/cassandra_mapper.rb +5 -0
  4. data/lib/cassandra_mapper/base.rb +19 -0
  5. data/lib/cassandra_mapper/connection.rb +9 -0
  6. data/lib/cassandra_mapper/core_ext/array/extract_options.rb +29 -0
  7. data/lib/cassandra_mapper/core_ext/array/wrap.rb +22 -0
  8. data/lib/cassandra_mapper/core_ext/class/inheritable_attributes.rb +232 -0
  9. data/lib/cassandra_mapper/core_ext/kernel/reporting.rb +62 -0
  10. data/lib/cassandra_mapper/core_ext/kernel/singleton_class.rb +13 -0
  11. data/lib/cassandra_mapper/core_ext/module/aliasing.rb +70 -0
  12. data/lib/cassandra_mapper/core_ext/module/attribute_accessors.rb +66 -0
  13. data/lib/cassandra_mapper/core_ext/object/duplicable.rb +65 -0
  14. data/lib/cassandra_mapper/core_ext/string/inflections.rb +160 -0
  15. data/lib/cassandra_mapper/core_ext/string/multibyte.rb +72 -0
  16. data/lib/cassandra_mapper/exceptions.rb +10 -0
  17. data/lib/cassandra_mapper/identity.rb +29 -0
  18. data/lib/cassandra_mapper/indexing.rb +465 -0
  19. data/lib/cassandra_mapper/observable.rb +36 -0
  20. data/lib/cassandra_mapper/persistence.rb +309 -0
  21. data/lib/cassandra_mapper/support/callbacks.rb +136 -0
  22. data/lib/cassandra_mapper/support/concern.rb +31 -0
  23. data/lib/cassandra_mapper/support/dependencies.rb +60 -0
  24. data/lib/cassandra_mapper/support/descendants_tracker.rb +41 -0
  25. data/lib/cassandra_mapper/support/inflections.rb +58 -0
  26. data/lib/cassandra_mapper/support/inflector.rb +7 -0
  27. data/lib/cassandra_mapper/support/inflector/inflections.rb +213 -0
  28. data/lib/cassandra_mapper/support/inflector/methods.rb +143 -0
  29. data/lib/cassandra_mapper/support/inflector/transliterate.rb +99 -0
  30. data/lib/cassandra_mapper/support/multibyte.rb +46 -0
  31. data/lib/cassandra_mapper/support/multibyte/utils.rb +62 -0
  32. data/lib/cassandra_mapper/support/observing.rb +218 -0
  33. data/lib/cassandra_mapper/support/support_callbacks.rb +593 -0
  34. data/test/test_helper.rb +11 -0
  35. data/test/unit/callbacks_test.rb +100 -0
  36. data/test/unit/identity_test.rb +51 -0
  37. data/test/unit/indexing_test.rb +406 -0
  38. data/test/unit/observer_test.rb +56 -0
  39. data/test/unit/persistence_test.rb +561 -0
  40. metadata +192 -0
@@ -0,0 +1,10 @@
1
+ module CassandraMapper
2
+ class Exception < ::Exception
3
+ end
4
+ class InvalidArgumentException < Exception
5
+ end
6
+ class RecordNotFoundException < Exception
7
+ end
8
+ class UndefinedKeyException < Exception
9
+ end
10
+ end
@@ -0,0 +1,29 @@
1
+ module CassandraMapper::Identity
2
+ module ClassMethods
3
+ def key(attribute = nil)
4
+ @cassandra_mapper_key = attribute if attribute
5
+ @cassandra_mapper_key ||= default_key_name
6
+ end
7
+
8
+ def default_key_name
9
+ :key
10
+ end
11
+ end
12
+
13
+ def self.included(klass)
14
+ klass.extend ClassMethods
15
+ end
16
+
17
+ def key
18
+ read_attribute(self.class.key)
19
+ end
20
+
21
+ def new_record=(flag)
22
+ @cassandra_mapper_new_record = (flag && true) || false
23
+ end
24
+
25
+ def new_record?
26
+ @cassandra_mapper_new_record = true unless defined? @cassandra_mapper_new_record
27
+ @cassandra_mapper_new_record
28
+ end
29
+ end
@@ -0,0 +1,465 @@
1
+ module CassandraMapper
2
+ # Provides indexing behavior for CassandraMapper::Base objects.
3
+ # Rather than maintaining indexes in Cassandra yourself, use the higher-level
4
+ # functionality provided by CassandraMapper::Indexing, and CassandraMapper will
5
+ # manage the underlying index state for you.
6
+ #
7
+ # An index needs a standard column family into which index data is placed.
8
+ # A given searchable value (an indexed value) becomes a row key in the column family.
9
+ # The columns and values in the row provide the keys in your indexed column family
10
+ # that have the indexed value.
11
+ #
12
+ # Suppose we have column family _A_ with rows:
13
+ # 'foo': {
14
+ # 'key' : 'foo',
15
+ # 'value': 'a',
16
+ # };
17
+ # 'bar': {
18
+ # 'key' : 'bar',
19
+ # 'value': 'b',
20
+ # };
21
+ # 'fu': {
22
+ # 'key' : 'fu',
23
+ # 'value': 'a',
24
+ # }
25
+ #
26
+ # Suppose further that in column family _B_ we want to index on _A_'s _value_ column.
27
+ # We would therefore expect _B_ to have rows:
28
+ # 'a': {
29
+ # 'foo': 'foo',
30
+ # 'fu' : 'fu',
31
+ # };
32
+ # 'b': {
33
+ # 'bar': 'bar',
34
+ # };
35
+ #
36
+ # Cassandra automatically sorts columns within a row, based on the configuration for
37
+ # the column family in question. Therefore, while the redundant data for column
38
+ # keys and values shown above seems somewhat awkward, the column keys can be designed
39
+ # to give smarter sorting of results; for instance, were each row to have a _created_at_
40
+ # timestamp string, we could index on _value_ as before but sort by _created_at_.
41
+ #
42
+ # So, with _A_ values:
43
+ # 'foo': {
44
+ # 'key' : 'foo',
45
+ # 'value' : 'a',
46
+ # 'created_at': '20100601 093000',
47
+ # };
48
+ # 'bar': {
49
+ # 'key' : 'bar',
50
+ # 'value' : 'b',
51
+ # 'created_at': '20100529 172500',
52
+ # };
53
+ # 'fu': {
54
+ # 'key' : 'fu',
55
+ # 'value' : 'a',
56
+ # 'created_at': '20100602 121500',
57
+ # };
58
+ #
59
+ # We could index on _value_ with results sorted in ascending order of _created_at_
60
+ # with _B_ rows:
61
+ # 'a': {
62
+ # '20100601 093000 foo': 'foo',
63
+ # '20100602 121500 fu': 'fu',
64
+ # };
65
+ # 'b': {
66
+ # '20100529 172500 bar': 'bar',
67
+ # };
68
+ #
69
+ # The end result is that rows in _A_ could be looked up via _data_ values using
70
+ # the desired _data_ value as the key of _B_ for finding identifiers. Those results
71
+ # can be structure (via column name) to ensure that keys come back in the desired order
72
+ # (in this case, by _created_at_ order).
73
+ #
74
+ # The column family that stores the index can be used for one index or multiple
75
+ # indexes, depending on your use case.
76
+ module Indexing
77
+ module ClassMethods
78
+ # Build an index object and install it into the calling class.
79
+ # * The _index_ argument should be a symbol, which will be the name of the index
80
+ # and the name of the accessor method for that index at both the class level
81
+ # and the instance level.
82
+ # * The _options_ hash is passed through to the CassandraMapper::Index constructor,
83
+ # with some minor mapping logic. See the CassandraMapper::Index documentation for
84
+ # most options. Some options specific to this method:
85
+ # * _class_: the class object to use for the index object; use this if you want to
86
+ # provide your own custom index behavior. Defaults to Cassandra::Index. This
87
+ # option determines the class to be instantiated and is not passed along to the
88
+ # constructor.
89
+ # * _indexed_class_: always gets set to the receiver, even if you set it explicitly.
90
+ # This ensures that the index binds to the class against which _has_index_ was
91
+ # called.
92
+ # * _name_: always gets set to the _index_ argument provided to the _has_index_ call,
93
+ # even if you set it explicitly in _options_.
94
+ # * If a _&block_ is provided, it will be evaluated in the context of the newly-created
95
+ # index object; this makes it easy to build indexes that have specialized logic for
96
+ # formatting sortable identifiers, etc.
97
+ #
98
+ # The index is installed as the _index_ attribute of the class object, so all index
99
+ # operations can be accomplished from there. Additionally, the _index_ name is used as
100
+ # an instance attribute, in which an instance's state relative to the index is tracked.
101
+ # Therefore, choose an _index_ value that you're happy having on both class and instances.
102
+ #
103
+ # The index object is activated after installation, so its observer goes into effect
104
+ # immediately.
105
+ #
106
+ # Given the example class and index described at CassandraMapper::Index, the same
107
+ # strategy could be achieved less verbosely with:
108
+ # class ToBeIndexed < CassandraMapper::Base
109
+ # column_family :ToBeIndexed
110
+ # maps :key, :type => :simple_uuid
111
+ #
112
+ # maps :data
113
+ # maps :created_at, :type => :timestamp, :default => :from_type
114
+ #
115
+ # def timestamped_key
116
+ # "#{created_at.to_s}_#{key}"
117
+ # end
118
+ #
119
+ # has_index :data_index, :source => :data,
120
+ # :indexed_identifier => :timestamped_key,
121
+ # :column_family => :Indexes
122
+ # end
123
+ #
124
+ # The +has_index+ invocation takes care of the details for creating the :data_index
125
+ # class and instance attributes, the CassandraMapper::Index instance, its installation
126
+ # and activation, etc.
127
+ #
128
+ # Finally, if the timestamped key only pertains to this index (as is the case in this
129
+ # example), we could arguably reduce clutter in the main model class and keep the key
130
+ # generation encapsulated in the index by using the block-style invocation.
131
+ # class ToBeIndexed < CassandraMapper::Base
132
+ # column_family :ToBeIndexed
133
+ # maps :key, :type => :simple_uuid
134
+ #
135
+ # maps :data
136
+ # maps :created_at, :type => :timestamp, :default => :from_type
137
+ #
138
+ # has_index :data_index, :source => :data, :column_family => :Indexes do
139
+ # def indexed_identifier_for(instance)
140
+ # "#{instance.created_at.to_s}_#{instance.key}"
141
+ # end
142
+ # end
143
+ # end
144
+ #
145
+ def has_index(index, options={}, &block)
146
+ klass = options.delete(:class) || CassandraMapper::Index
147
+ object = klass.new(options.merge(:indexed_class => self, :name => index))
148
+ object.instance_eval &block if block_given?
149
+ install_index(index, object)
150
+ end
151
+
152
+ def install_index(name, index)
153
+ name_string = name.to_s
154
+ instance_variable_set(:"@#{name_string}", index)
155
+ instance_eval "def #{name_string}; @#{name_string}; end"
156
+ module_eval "def #{name_string}; @#{name_string} ||= CassandraMapper::Index::State.new; end"
157
+ index.activate!
158
+ index
159
+ end
160
+ end
161
+
162
+ def self.included(klass)
163
+ klass.extend(ClassMethods)
164
+ end
165
+ end
166
+
167
+ # The fundamental implementation of an index in Cassandra. Once installed into the
168
+ # class to be indexed, the CassandraMapper::Index maintains index values for all
169
+ # instances of the indexed class as those instances are written out t the database.
170
+ #
171
+ # For any given instance of an indexed class, CassandraMapper::Index will update
172
+ # the index information based on the following criteria:
173
+ # * The class being indexed should be provided through _indexed_class_. The index uses
174
+ # an observer under the hood to track state changes per instance, and therefore requires
175
+ # the _indexed_class_ to be provided to hook into the observer/callback machinery. Additionally,
176
+ # the index needs to know the class to instantiate when reading objects out of the index.
177
+ # * The column family to contain the indexing data is specified with the _column_family_
178
+ # attribute. CassandraMapper::Index handles writes/removes to that column family directly;
179
+ # there is no need for a CassandraMapper::Base model fronting the column family.
180
+ # * The actual indexed value is determined by invoking the method specified in the
181
+ # index's _source_ attribute on the object written to the database. If a class
182
+ # should have an index on its +:foo+ attribute, then the index object should have
183
+ # _source_ set to +:foo+. This determines the row key for the index.
184
+ # * Entries can be sorted within the index, provided an identifier is available per
185
+ # object that is sensibly sortable. The _indexed_identifier_ attribute specifies the
186
+ # method to call to provide that sortable identifier, which will correspond to the column
187
+ # named used within the index row for the given object. The _indexed_identifier_ defaults
188
+ # to +:key+, and does not need to be changed unless you have some criteria for sorting
189
+ # entries within the index. Like _source_, the _indexed_identifier_ identifies a method
190
+ # on the object being saved, not a method on the index object itself.
191
+ # * The _name_ identifies the name of the index. This ultimately must match up to the
192
+ # name of an attribute on objects being indexed that holds the instance index state information,
193
+ # in an instance of CassandraMapper::Index::State. Without this, index operations will
194
+ # fail because indexing of an object requires tracking state changes from one save to the
195
+ # next (to determine at save time in the case of an update whether the index needs to be
196
+ # changed and consequently requires a delete and a write).
197
+ #
198
+ # Say we have the following model class:
199
+ # class ToBeIndexed < CassandraMapper::Base
200
+ # column_family :ToBeIndexed
201
+ # maps :key, :type => :simple_uuid
202
+ #
203
+ # # We'll be indexing this attribute.
204
+ # maps :data
205
+ #
206
+ # # and within the index, we'll sort by create date from this attribute.
207
+ # maps :created_at, :type => :timestamp, :default => :from_type
208
+ #
209
+ # # we'll need this to match up with the :name attribute, as described above.
210
+ # def data_index
211
+ # @data_index ||= CassandraMapper::Index::State.new
212
+ # end
213
+ #
214
+ # # we'll use this to generate the sortable identifiers; it'll output
215
+ # # a string like "2010-06-02T09:45:21-04:00_47118d04-6e4e-11df-911a-e141fbb809ab".
216
+ # # It should be unique to each indexed object, as it includes the object's key.
217
+ # # But it is structured so it is effectively sortable according to create timestamp.
218
+ # def timestamped_key
219
+ # "#{created_at.to_s}_#{key}"
220
+ # end
221
+ # end
222
+ #
223
+ # We can index this class using the +Indexes+ column family to hold index data.
224
+ # index = CassandraMapper::Index.new(:indexed_identifier => :timestamped_key,
225
+ # :source => :data,
226
+ # :name => :data_index,
227
+ # :indexed_class => :to_be_indexed,
228
+ # :column_family => :Indexes)
229
+ # # activate it to install the observer and start indexing.
230
+ # index.activate!
231
+ #
232
+ # Then supposing we ran this code:
233
+ # # supposing key 47118d04-6e4e-11df-911a-e141fbb809ab is generated
234
+ # ToBeIndexed.new(:data => 'this data').save
235
+ # sleep 1
236
+ # # say that key 5a7e65fa-6e4f-11df-9554-d05c3d9715f7 is generated
237
+ # ToBeIndexed.new(:data => 'that data').save
238
+ # sleep 1
239
+ # # and finally say key gets 68985128-6e4f-11df-8e08-093a2b8b1253
240
+ # ToBeIndexed.new(:data => 'this data').save
241
+ #
242
+ # The resulting index structure in the +Indexes+ column family would look like:
243
+ # 'this data': {
244
+ # '2010-06-02T10:01:00-04:00_47118d04-6e4e-11df-911a-e141fbb809ab': '47118d04-6e4e-11df-911a-e141fbb809ab',
245
+ # '2010-06-02T10:01:02-04:00_68985128-6e4f-11df-8e08-093a2b8b1253': '68985128-6e4f-11df-8e08-093a2b8b1253'
246
+ # },
247
+ # 'that data': {
248
+ # '2010-06-02T10:01:01-04:00_5a7e65fa-6e4f-11df-9554-d05c3d9715f7': '5a7e65fa-6e4f-11df-9554-d05c3d9715f7'
249
+ # }
250
+ #
251
+ # Thus, the +Indexes+ column family could be used to retrieve +ToBeIndexed+ instances that
252
+ # have particular values for +:data+, and retrieve those instances sorted by create timestamp
253
+ # (thanks to the sortable column names).
254
+ #
255
+ # Ultimately, the structure that goes to the index column family for an instance of an indexed
256
+ # class would look like this (relative to the index attributes and the instance being indexed):
257
+ # :source : {
258
+ # :indexed_identifier : :key
259
+ # }
260
+ class Index
261
+ ATTRS = [:source, :indexed_class, :column_family, :name, :indexed_identifier]
262
+ attr_accessor *ATTRS
263
+
264
+ DEFAULTS = {:indexed_identifier => :key}
265
+
266
+ def initialize(options={})
267
+ opts = DEFAULTS.merge(options)
268
+ ATTRS.each do |attrib|
269
+ value = opts[attrib]
270
+ send(:"#{attrib.to_s}=", value) if not value.nil?
271
+ end
272
+ end
273
+
274
+ # Returns the CassandraMapper::Index::State instance pertaining to the receiver
275
+ # on _instance_, determined by the receiver's _name_ attribute.
276
+ #
277
+ # The _instance_ is expected to implement that interface, ensuring that an accessor
278
+ # with name matching index's _name_ returns an object conforming to the state object
279
+ # interface.
280
+ def state_for(instance)
281
+ instance.send(name)
282
+ end
283
+
284
+ # Returns the "source" value (the index row key) for _instance_ based on the method
285
+ # specified in the receiver's _source_ attribute.
286
+ #
287
+ # This could be overridden to have more sophisticated index row key generation techniques
288
+ # applied for a particular index.
289
+ def source_for(instance)
290
+ instance.send(source)
291
+ end
292
+
293
+ # Returns the "indexed identifier" (the sort-friendly column name) for _instance_ based
294
+ # on the method specified in the receiver's _indexed_identifier_ attribute.
295
+ #
296
+ # This could be overridden to have more sophisticated sort logic within an index for
297
+ # a particular index object.
298
+ def indexed_identifier_for(instance)
299
+ instance.send(indexed_identifier)
300
+ end
301
+
302
+ # If the value to be indexed is non-nil, performs an insert into the appropriate
303
+ # column family of the index structure for the _instance_ provided. Also updates
304
+ # the state information at the index's _name_ on _instance_ to reflect the latest
305
+ # source and indexed identifier values.
306
+ #
307
+ # This is typically managed under the hood by observer callbacks during the _instance_
308
+ # lifecycle, but you could invoke it directly if you need to force certain index values
309
+ # to be present.
310
+ def create(instance)
311
+ index_key = source_for(instance)
312
+ if not index_key.nil?
313
+ column = indexed_identifier_for(instance)
314
+ instance.connection.insert(column_family, index_key, {column => instance.key})
315
+ state = state_for(instance)
316
+ state.source_value = index_key
317
+ state.identifier_value = column
318
+ end
319
+ instance
320
+ end
321
+
322
+ # Given non-nil values in the _instance_'s index state for the index's _name_,
323
+ # performs a +:remove+ against the appropriate column family to remove that old
324
+ # state from the index. Also clears the index state object for the _instance_.
325
+ #
326
+ # Like :create, this is intended to be managed automatically during the _instance_
327
+ # lifecycle, but you could invoke it directly if necessary. In this case, take care
328
+ # to note that the remove acts against the index state object at _name_ on _instance_,
329
+ # *not* against the current source/identifier values.
330
+ def remove(instance)
331
+ state = state_for(instance)
332
+ unless state.source_value.nil? or state.identifier_value.nil?
333
+ instance.connection.remove(column_family, state.source_value, state.identifier_value)
334
+ state.source_value = nil
335
+ state.identifier_value = nil
336
+ end
337
+ instance
338
+ end
339
+
340
+ # If the source or indexed identifier values are found to have changed on _instance_
341
+ # (current values compared to the state preserved in the index state object at the index's
342
+ # _name_ on _instance_), performs a +:remove+ followed by a +:create+ to keep the index
343
+ # up to date.
344
+ def update(instance)
345
+ state = state_for(instance)
346
+ if state.source_value != source_for(instance) or state.identifier_value != indexed_identifier_for(instance)
347
+ remove(instance)
348
+ create(instance)
349
+ end
350
+ instance
351
+ end
352
+
353
+ # Creates the necessary observer for the class to be indexed and thus activates the callbacks
354
+ # for index management.
355
+ def activate!
356
+ @observer = Class.new(Observer)
357
+ @observer.activate!(self)
358
+ end
359
+
360
+ # Retrieve a hash of indexed identifier to row key mappings from the index for
361
+ # all indexed _values_. The _values_ may be an array of indexed values to check,
362
+ # or a single such value. The result set is collapsed such that it cannot be determined
363
+ # which result corresponds to which index. Additionally, if a particular row key is
364
+ # present in multiple indexes, it'll be redundantly represented here (as redundant values in
365
+ # the result hash).
366
+ #
367
+ # The _options_ are passed directly to the underlying Cassandra +get+/+multi_get+ invocations,
368
+ # and can be used to control paging through results, result set size limits, etc.
369
+ def get(values, options={})
370
+ case values
371
+ when Array
372
+ if values.size == 1
373
+ _single_get(values[0], options)
374
+ else
375
+ _multi_get(values, options)
376
+ end
377
+ else
378
+ _single_get(values, options)
379
+ end
380
+ end
381
+
382
+ # Retrieve the row keys for objects that have the indexed values specified
383
+ # in _values_. The handling of _values_ and _options_ is done by the
384
+ # CassandraMapper::Index#get method, and row keys from the result set are
385
+ # collapsed into a unique list matching the original sort order.
386
+ #
387
+ # The resulting list could be passed to a find call or manipulated in some
388
+ # other delightful fashion.
389
+ def keys(values, options={})
390
+ get(values, options).values.uniq
391
+ end
392
+
393
+ # Retrieve the objects that have the indexed values specified in
394
+ # _values_. The operations are analogous to CassandraMapper::Index#keys,
395
+ # except that a +find+ call is made on the receiver's _indexed_class_.
396
+ #
397
+ # If you are potentially dealing with large sets of objects, consider using
398
+ # the +:start+, +:finish+, and +:count+ options supported by the underlying
399
+ # Cassandra#get and Cassandra#multi_get functionality.
400
+ def objects(values, options={})
401
+ if ids = keys(values, options) and ids.size > 0
402
+ indexed_class.find(ids, {:allow_missing => true})
403
+ else
404
+ []
405
+ end
406
+ end
407
+
408
+ def _single_get(value, options)
409
+ indexed_class.connection.get(column_family, value, options)
410
+ end
411
+
412
+ def _multi_get(values, options)
413
+ result = Cassandra::OrderedHash.new
414
+ indexes = indexed_class.connection.multi_get(column_family, values, options)
415
+ if indexes
416
+ indexes.values.each do |index|
417
+ result.merge!(index)
418
+ end
419
+ end
420
+ result
421
+ end
422
+
423
+ class Observer < CassandraMapper::Observer
424
+ class << self
425
+ attr_accessor :index
426
+
427
+ def activate!(index_object)
428
+ observe index_object.indexed_class
429
+ self.index = index_object
430
+ instance
431
+ end
432
+ end
433
+
434
+ def index
435
+ self.class.index
436
+ end
437
+
438
+ def after_load(instance)
439
+ state = index.state_for(instance)
440
+ state.source_value = index.source_for(instance)
441
+ state.identifier_value = index.indexed_identifier_for(instance)
442
+ instance
443
+ end
444
+
445
+ def after_create(instance)
446
+ index.create(instance)
447
+ instance
448
+ end
449
+
450
+ def after_update(instance)
451
+ index.update(instance)
452
+ instance
453
+ end
454
+
455
+ def after_destroy(instance)
456
+ index.remove(instance)
457
+ instance
458
+ end
459
+ end
460
+
461
+ class State
462
+ attr_accessor :source_value, :identifier_value
463
+ end
464
+ end
465
+ end