cassandra_mapper 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/README.rdoc +98 -0
  2. data/Rakefile.rb +11 -0
  3. data/lib/cassandra_mapper.rb +5 -0
  4. data/lib/cassandra_mapper/base.rb +19 -0
  5. data/lib/cassandra_mapper/connection.rb +9 -0
  6. data/lib/cassandra_mapper/core_ext/array/extract_options.rb +29 -0
  7. data/lib/cassandra_mapper/core_ext/array/wrap.rb +22 -0
  8. data/lib/cassandra_mapper/core_ext/class/inheritable_attributes.rb +232 -0
  9. data/lib/cassandra_mapper/core_ext/kernel/reporting.rb +62 -0
  10. data/lib/cassandra_mapper/core_ext/kernel/singleton_class.rb +13 -0
  11. data/lib/cassandra_mapper/core_ext/module/aliasing.rb +70 -0
  12. data/lib/cassandra_mapper/core_ext/module/attribute_accessors.rb +66 -0
  13. data/lib/cassandra_mapper/core_ext/object/duplicable.rb +65 -0
  14. data/lib/cassandra_mapper/core_ext/string/inflections.rb +160 -0
  15. data/lib/cassandra_mapper/core_ext/string/multibyte.rb +72 -0
  16. data/lib/cassandra_mapper/exceptions.rb +10 -0
  17. data/lib/cassandra_mapper/identity.rb +29 -0
  18. data/lib/cassandra_mapper/indexing.rb +465 -0
  19. data/lib/cassandra_mapper/observable.rb +36 -0
  20. data/lib/cassandra_mapper/persistence.rb +309 -0
  21. data/lib/cassandra_mapper/support/callbacks.rb +136 -0
  22. data/lib/cassandra_mapper/support/concern.rb +31 -0
  23. data/lib/cassandra_mapper/support/dependencies.rb +60 -0
  24. data/lib/cassandra_mapper/support/descendants_tracker.rb +41 -0
  25. data/lib/cassandra_mapper/support/inflections.rb +58 -0
  26. data/lib/cassandra_mapper/support/inflector.rb +7 -0
  27. data/lib/cassandra_mapper/support/inflector/inflections.rb +213 -0
  28. data/lib/cassandra_mapper/support/inflector/methods.rb +143 -0
  29. data/lib/cassandra_mapper/support/inflector/transliterate.rb +99 -0
  30. data/lib/cassandra_mapper/support/multibyte.rb +46 -0
  31. data/lib/cassandra_mapper/support/multibyte/utils.rb +62 -0
  32. data/lib/cassandra_mapper/support/observing.rb +218 -0
  33. data/lib/cassandra_mapper/support/support_callbacks.rb +593 -0
  34. data/test/test_helper.rb +11 -0
  35. data/test/unit/callbacks_test.rb +100 -0
  36. data/test/unit/identity_test.rb +51 -0
  37. data/test/unit/indexing_test.rb +406 -0
  38. data/test/unit/observer_test.rb +56 -0
  39. data/test/unit/persistence_test.rb +561 -0
  40. metadata +192 -0
@@ -0,0 +1,10 @@
1
+ module CassandraMapper
2
+ class Exception < ::Exception
3
+ end
4
+ class InvalidArgumentException < Exception
5
+ end
6
+ class RecordNotFoundException < Exception
7
+ end
8
+ class UndefinedKeyException < Exception
9
+ end
10
+ end
@@ -0,0 +1,29 @@
1
+ module CassandraMapper::Identity
2
+ module ClassMethods
3
+ def key(attribute = nil)
4
+ @cassandra_mapper_key = attribute if attribute
5
+ @cassandra_mapper_key ||= default_key_name
6
+ end
7
+
8
+ def default_key_name
9
+ :key
10
+ end
11
+ end
12
+
13
+ def self.included(klass)
14
+ klass.extend ClassMethods
15
+ end
16
+
17
+ def key
18
+ read_attribute(self.class.key)
19
+ end
20
+
21
+ def new_record=(flag)
22
+ @cassandra_mapper_new_record = (flag && true) || false
23
+ end
24
+
25
+ def new_record?
26
+ @cassandra_mapper_new_record = true unless defined? @cassandra_mapper_new_record
27
+ @cassandra_mapper_new_record
28
+ end
29
+ end
@@ -0,0 +1,465 @@
1
+ module CassandraMapper
2
+ # Provides indexing behavior for CassandraMapper::Base objects.
3
+ # Rather than maintaining indexes in Cassandra yourself, use the higher-level
4
+ # functionality provided by CassandraMapper::Indexing, and CassandraMapper will
5
+ # manage the underlying index state for you.
6
+ #
7
+ # An index needs a standard column family into which index data is placed.
8
+ # A given searchable value (an indexed value) becomes a row key in the column family.
9
+ # The columns and values in the row provide the keys in your indexed column family
10
+ # that have the indexed value.
11
+ #
12
+ # Suppose we have column family _A_ with rows:
13
+ # 'foo': {
14
+ # 'key' : 'foo',
15
+ # 'value': 'a',
16
+ # };
17
+ # 'bar': {
18
+ # 'key' : 'bar',
19
+ # 'value': 'b',
20
+ # };
21
+ # 'fu': {
22
+ # 'key' : 'fu',
23
+ # 'value': 'a',
24
+ # }
25
+ #
26
+ # Suppose further that in column family _B_ we want to index on _A_'s _value_ column.
27
+ # We would therefore expect _B_ to have rows:
28
+ # 'a': {
29
+ # 'foo': 'foo',
30
+ # 'fu' : 'fu',
31
+ # };
32
+ # 'b': {
33
+ # 'bar': 'bar',
34
+ # };
35
+ #
36
+ # Cassandra automatically sorts columns within a row, based on the configuration for
37
+ # the column family in question. Therefore, while the redundant data for column
38
+ # keys and values shown above seems somewhat awkward, the column keys can be designed
39
+ # to give smarter sorting of results; for instance, were each row to have a _created_at_
40
+ # timestamp string, we could index on _value_ as before but sort by _created_at_.
41
+ #
42
+ # So, with _A_ values:
43
+ # 'foo': {
44
+ # 'key' : 'foo',
45
+ # 'value' : 'a',
46
+ # 'created_at': '20100601 093000',
47
+ # };
48
+ # 'bar': {
49
+ # 'key' : 'bar',
50
+ # 'value' : 'b',
51
+ # 'created_at': '20100529 172500',
52
+ # };
53
+ # 'fu': {
54
+ # 'key' : 'fu',
55
+ # 'value' : 'a',
56
+ # 'created_at': '20100602 121500',
57
+ # };
58
+ #
59
+ # We could index on _value_ with results sorted in ascending order of _created_at_
60
+ # with _B_ rows:
61
+ # 'a': {
62
+ # '20100601 093000 foo': 'foo',
63
+ # '20100602 121500 fu': 'fu',
64
+ # };
65
+ # 'b': {
66
+ # '20100529 172500 bar': 'bar',
67
+ # };
68
+ #
69
+ # The end result is that rows in _A_ could be looked up via _data_ values using
70
+ # the desired _data_ value as the key of _B_ for finding identifiers. Those results
71
+ # can be structure (via column name) to ensure that keys come back in the desired order
72
+ # (in this case, by _created_at_ order).
73
+ #
74
+ # The column family that stores the index can be used for one index or multiple
75
+ # indexes, depending on your use case.
76
+ module Indexing
77
+ module ClassMethods
78
+ # Build an index object and install it into the calling class.
79
+ # * The _index_ argument should be a symbol, which will be the name of the index
80
+ # and the name of the accessor method for that index at both the class level
81
+ # and the instance level.
82
+ # * The _options_ hash is passed through to the CassandraMapper::Index constructor,
83
+ # with some minor mapping logic. See the CassandraMapper::Index documentation for
84
+ # most options. Some options specific to this method:
85
+ # * _class_: the class object to use for the index object; use this if you want to
86
+ # provide your own custom index behavior. Defaults to Cassandra::Index. This
87
+ # option determines the class to be instantiated and is not passed along to the
88
+ # constructor.
89
+ # * _indexed_class_: always gets set to the receiver, even if you set it explicitly.
90
+ # This ensures that the index binds to the class against which _has_index_ was
91
+ # called.
92
+ # * _name_: always gets set to the _index_ argument provided to the _has_index_ call,
93
+ # even if you set it explicitly in _options_.
94
+ # * If a _&block_ is provided, it will be evaluated in the context of the newly-created
95
+ # index object; this makes it easy to build indexes that have specialized logic for
96
+ # formatting sortable identifiers, etc.
97
+ #
98
+ # The index is installed as the _index_ attribute of the class object, so all index
99
+ # operations can be accomplished from there. Additionally, the _index_ name is used as
100
+ # an instance attribute, in which an instance's state relative to the index is tracked.
101
+ # Therefore, choose an _index_ value that you're happy having on both class and instances.
102
+ #
103
+ # The index object is activated after installation, so its observer goes into effect
104
+ # immediately.
105
+ #
106
+ # Given the example class and index described at CassandraMapper::Index, the same
107
+ # strategy could be achieved less verbosely with:
108
+ # class ToBeIndexed < CassandraMapper::Base
109
+ # column_family :ToBeIndexed
110
+ # maps :key, :type => :simple_uuid
111
+ #
112
+ # maps :data
113
+ # maps :created_at, :type => :timestamp, :default => :from_type
114
+ #
115
+ # def timestamped_key
116
+ # "#{created_at.to_s}_#{key}"
117
+ # end
118
+ #
119
+ # has_index :data_index, :source => :data,
120
+ # :indexed_identifier => :timestamped_key,
121
+ # :column_family => :Indexes
122
+ # end
123
+ #
124
+ # The +has_index+ invocation takes care of the details for creating the :data_index
125
+ # class and instance attributes, the CassandraMapper::Index instance, its installation
126
+ # and activation, etc.
127
+ #
128
+ # Finally, if the timestamped key only pertains to this index (as is the case in this
129
+ # example), we could arguably reduce clutter in the main model class and keep the key
130
+ # generation encapsulated in the index by using the block-style invocation.
131
+ # class ToBeIndexed < CassandraMapper::Base
132
+ # column_family :ToBeIndexed
133
+ # maps :key, :type => :simple_uuid
134
+ #
135
+ # maps :data
136
+ # maps :created_at, :type => :timestamp, :default => :from_type
137
+ #
138
+ # has_index :data_index, :source => :data, :column_family => :Indexes do
139
+ # def indexed_identifier_for(instance)
140
+ # "#{instance.created_at.to_s}_#{instance.key}"
141
+ # end
142
+ # end
143
+ # end
144
+ #
145
+ def has_index(index, options={}, &block)
146
+ klass = options.delete(:class) || CassandraMapper::Index
147
+ object = klass.new(options.merge(:indexed_class => self, :name => index))
148
+ object.instance_eval &block if block_given?
149
+ install_index(index, object)
150
+ end
151
+
152
+ def install_index(name, index)
153
+ name_string = name.to_s
154
+ instance_variable_set(:"@#{name_string}", index)
155
+ instance_eval "def #{name_string}; @#{name_string}; end"
156
+ module_eval "def #{name_string}; @#{name_string} ||= CassandraMapper::Index::State.new; end"
157
+ index.activate!
158
+ index
159
+ end
160
+ end
161
+
162
+ def self.included(klass)
163
+ klass.extend(ClassMethods)
164
+ end
165
+ end
166
+
167
+ # The fundamental implementation of an index in Cassandra. Once installed into the
168
+ # class to be indexed, the CassandraMapper::Index maintains index values for all
169
+ # instances of the indexed class as those instances are written out t the database.
170
+ #
171
+ # For any given instance of an indexed class, CassandraMapper::Index will update
172
+ # the index information based on the following criteria:
173
+ # * The class being indexed should be provided through _indexed_class_. The index uses
174
+ # an observer under the hood to track state changes per instance, and therefore requires
175
+ # the _indexed_class_ to be provided to hook into the observer/callback machinery. Additionally,
176
+ # the index needs to know the class to instantiate when reading objects out of the index.
177
+ # * The column family to contain the indexing data is specified with the _column_family_
178
+ # attribute. CassandraMapper::Index handles writes/removes to that column family directly;
179
+ # there is no need for a CassandraMapper::Base model fronting the column family.
180
+ # * The actual indexed value is determined by invoking the method specified in the
181
+ # index's _source_ attribute on the object written to the database. If a class
182
+ # should have an index on its +:foo+ attribute, then the index object should have
183
+ # _source_ set to +:foo+. This determines the row key for the index.
184
+ # * Entries can be sorted within the index, provided an identifier is available per
185
+ # object that is sensibly sortable. The _indexed_identifier_ attribute specifies the
186
+ # method to call to provide that sortable identifier, which will correspond to the column
187
+ # named used within the index row for the given object. The _indexed_identifier_ defaults
188
+ # to +:key+, and does not need to be changed unless you have some criteria for sorting
189
+ # entries within the index. Like _source_, the _indexed_identifier_ identifies a method
190
+ # on the object being saved, not a method on the index object itself.
191
+ # * The _name_ identifies the name of the index. This ultimately must match up to the
192
+ # name of an attribute on objects being indexed that holds the instance index state information,
193
+ # in an instance of CassandraMapper::Index::State. Without this, index operations will
194
+ # fail because indexing of an object requires tracking state changes from one save to the
195
+ # next (to determine at save time in the case of an update whether the index needs to be
196
+ # changed and consequently requires a delete and a write).
197
+ #
198
+ # Say we have the following model class:
199
+ # class ToBeIndexed < CassandraMapper::Base
200
+ # column_family :ToBeIndexed
201
+ # maps :key, :type => :simple_uuid
202
+ #
203
+ # # We'll be indexing this attribute.
204
+ # maps :data
205
+ #
206
+ # # and within the index, we'll sort by create date from this attribute.
207
+ # maps :created_at, :type => :timestamp, :default => :from_type
208
+ #
209
+ # # we'll need this to match up with the :name attribute, as described above.
210
+ # def data_index
211
+ # @data_index ||= CassandraMapper::Index::State.new
212
+ # end
213
+ #
214
+ # # we'll use this to generate the sortable identifiers; it'll output
215
+ # # a string like "2010-06-02T09:45:21-04:00_47118d04-6e4e-11df-911a-e141fbb809ab".
216
+ # # It should be unique to each indexed object, as it includes the object's key.
217
+ # # But it is structured so it is effectively sortable according to create timestamp.
218
+ # def timestamped_key
219
+ # "#{created_at.to_s}_#{key}"
220
+ # end
221
+ # end
222
+ #
223
+ # We can index this class using the +Indexes+ column family to hold index data.
224
+ # index = CassandraMapper::Index.new(:indexed_identifier => :timestamped_key,
225
+ # :source => :data,
226
+ # :name => :data_index,
227
+ # :indexed_class => :to_be_indexed,
228
+ # :column_family => :Indexes)
229
+ # # activate it to install the observer and start indexing.
230
+ # index.activate!
231
+ #
232
+ # Then supposing we ran this code:
233
+ # # supposing key 47118d04-6e4e-11df-911a-e141fbb809ab is generated
234
+ # ToBeIndexed.new(:data => 'this data').save
235
+ # sleep 1
236
+ # # say that key 5a7e65fa-6e4f-11df-9554-d05c3d9715f7 is generated
237
+ # ToBeIndexed.new(:data => 'that data').save
238
+ # sleep 1
239
+ # # and finally say key gets 68985128-6e4f-11df-8e08-093a2b8b1253
240
+ # ToBeIndexed.new(:data => 'this data').save
241
+ #
242
+ # The resulting index structure in the +Indexes+ column family would look like:
243
+ # 'this data': {
244
+ # '2010-06-02T10:01:00-04:00_47118d04-6e4e-11df-911a-e141fbb809ab': '47118d04-6e4e-11df-911a-e141fbb809ab',
245
+ # '2010-06-02T10:01:02-04:00_68985128-6e4f-11df-8e08-093a2b8b1253': '68985128-6e4f-11df-8e08-093a2b8b1253'
246
+ # },
247
+ # 'that data': {
248
+ # '2010-06-02T10:01:01-04:00_5a7e65fa-6e4f-11df-9554-d05c3d9715f7': '5a7e65fa-6e4f-11df-9554-d05c3d9715f7'
249
+ # }
250
+ #
251
+ # Thus, the +Indexes+ column family could be used to retrieve +ToBeIndexed+ instances that
252
+ # have particular values for +:data+, and retrieve those instances sorted by create timestamp
253
+ # (thanks to the sortable column names).
254
+ #
255
+ # Ultimately, the structure that goes to the index column family for an instance of an indexed
256
+ # class would look like this (relative to the index attributes and the instance being indexed):
257
+ # :source : {
258
+ # :indexed_identifier : :key
259
+ # }
260
+ class Index
261
+ ATTRS = [:source, :indexed_class, :column_family, :name, :indexed_identifier]
262
+ attr_accessor *ATTRS
263
+
264
+ DEFAULTS = {:indexed_identifier => :key}
265
+
266
+ def initialize(options={})
267
+ opts = DEFAULTS.merge(options)
268
+ ATTRS.each do |attrib|
269
+ value = opts[attrib]
270
+ send(:"#{attrib.to_s}=", value) if not value.nil?
271
+ end
272
+ end
273
+
274
+ # Returns the CassandraMapper::Index::State instance pertaining to the receiver
275
+ # on _instance_, determined by the receiver's _name_ attribute.
276
+ #
277
+ # The _instance_ is expected to implement that interface, ensuring that an accessor
278
+ # with name matching index's _name_ returns an object conforming to the state object
279
+ # interface.
280
+ def state_for(instance)
281
+ instance.send(name)
282
+ end
283
+
284
+ # Returns the "source" value (the index row key) for _instance_ based on the method
285
+ # specified in the receiver's _source_ attribute.
286
+ #
287
+ # This could be overridden to have more sophisticated index row key generation techniques
288
+ # applied for a particular index.
289
+ def source_for(instance)
290
+ instance.send(source)
291
+ end
292
+
293
+ # Returns the "indexed identifier" (the sort-friendly column name) for _instance_ based
294
+ # on the method specified in the receiver's _indexed_identifier_ attribute.
295
+ #
296
+ # This could be overridden to have more sophisticated sort logic within an index for
297
+ # a particular index object.
298
+ def indexed_identifier_for(instance)
299
+ instance.send(indexed_identifier)
300
+ end
301
+
302
+ # If the value to be indexed is non-nil, performs an insert into the appropriate
303
+ # column family of the index structure for the _instance_ provided. Also updates
304
+ # the state information at the index's _name_ on _instance_ to reflect the latest
305
+ # source and indexed identifier values.
306
+ #
307
+ # This is typically managed under the hood by observer callbacks during the _instance_
308
+ # lifecycle, but you could invoke it directly if you need to force certain index values
309
+ # to be present.
310
+ def create(instance)
311
+ index_key = source_for(instance)
312
+ if not index_key.nil?
313
+ column = indexed_identifier_for(instance)
314
+ instance.connection.insert(column_family, index_key, {column => instance.key})
315
+ state = state_for(instance)
316
+ state.source_value = index_key
317
+ state.identifier_value = column
318
+ end
319
+ instance
320
+ end
321
+
322
+ # Given non-nil values in the _instance_'s index state for the index's _name_,
323
+ # performs a +:remove+ against the appropriate column family to remove that old
324
+ # state from the index. Also clears the index state object for the _instance_.
325
+ #
326
+ # Like :create, this is intended to be managed automatically during the _instance_
327
+ # lifecycle, but you could invoke it directly if necessary. In this case, take care
328
+ # to note that the remove acts against the index state object at _name_ on _instance_,
329
+ # *not* against the current source/identifier values.
330
+ def remove(instance)
331
+ state = state_for(instance)
332
+ unless state.source_value.nil? or state.identifier_value.nil?
333
+ instance.connection.remove(column_family, state.source_value, state.identifier_value)
334
+ state.source_value = nil
335
+ state.identifier_value = nil
336
+ end
337
+ instance
338
+ end
339
+
340
+ # If the source or indexed identifier values are found to have changed on _instance_
341
+ # (current values compared to the state preserved in the index state object at the index's
342
+ # _name_ on _instance_), performs a +:remove+ followed by a +:create+ to keep the index
343
+ # up to date.
344
+ def update(instance)
345
+ state = state_for(instance)
346
+ if state.source_value != source_for(instance) or state.identifier_value != indexed_identifier_for(instance)
347
+ remove(instance)
348
+ create(instance)
349
+ end
350
+ instance
351
+ end
352
+
353
+ # Creates the necessary observer for the class to be indexed and thus activates the callbacks
354
+ # for index management.
355
+ def activate!
356
+ @observer = Class.new(Observer)
357
+ @observer.activate!(self)
358
+ end
359
+
360
+ # Retrieve a hash of indexed identifier to row key mappings from the index for
361
+ # all indexed _values_. The _values_ may be an array of indexed values to check,
362
+ # or a single such value. The result set is collapsed such that it cannot be determined
363
+ # which result corresponds to which index. Additionally, if a particular row key is
364
+ # present in multiple indexes, it'll be redundantly represented here (as redundant values in
365
+ # the result hash).
366
+ #
367
+ # The _options_ are passed directly to the underlying Cassandra +get+/+multi_get+ invocations,
368
+ # and can be used to control paging through results, result set size limits, etc.
369
+ def get(values, options={})
370
+ case values
371
+ when Array
372
+ if values.size == 1
373
+ _single_get(values[0], options)
374
+ else
375
+ _multi_get(values, options)
376
+ end
377
+ else
378
+ _single_get(values, options)
379
+ end
380
+ end
381
+
382
+ # Retrieve the row keys for objects that have the indexed values specified
383
+ # in _values_. The handling of _values_ and _options_ is done by the
384
+ # CassandraMapper::Index#get method, and row keys from the result set are
385
+ # collapsed into a unique list matching the original sort order.
386
+ #
387
+ # The resulting list could be passed to a find call or manipulated in some
388
+ # other delightful fashion.
389
+ def keys(values, options={})
390
+ get(values, options).values.uniq
391
+ end
392
+
393
+ # Retrieve the objects that have the indexed values specified in
394
+ # _values_. The operations are analogous to CassandraMapper::Index#keys,
395
+ # except that a +find+ call is made on the receiver's _indexed_class_.
396
+ #
397
+ # If you are potentially dealing with large sets of objects, consider using
398
+ # the +:start+, +:finish+, and +:count+ options supported by the underlying
399
+ # Cassandra#get and Cassandra#multi_get functionality.
400
+ def objects(values, options={})
401
+ if ids = keys(values, options) and ids.size > 0
402
+ indexed_class.find(ids, {:allow_missing => true})
403
+ else
404
+ []
405
+ end
406
+ end
407
+
408
+ def _single_get(value, options)
409
+ indexed_class.connection.get(column_family, value, options)
410
+ end
411
+
412
+ def _multi_get(values, options)
413
+ result = Cassandra::OrderedHash.new
414
+ indexes = indexed_class.connection.multi_get(column_family, values, options)
415
+ if indexes
416
+ indexes.values.each do |index|
417
+ result.merge!(index)
418
+ end
419
+ end
420
+ result
421
+ end
422
+
423
+ class Observer < CassandraMapper::Observer
424
+ class << self
425
+ attr_accessor :index
426
+
427
+ def activate!(index_object)
428
+ observe index_object.indexed_class
429
+ self.index = index_object
430
+ instance
431
+ end
432
+ end
433
+
434
+ def index
435
+ self.class.index
436
+ end
437
+
438
+ def after_load(instance)
439
+ state = index.state_for(instance)
440
+ state.source_value = index.source_for(instance)
441
+ state.identifier_value = index.indexed_identifier_for(instance)
442
+ instance
443
+ end
444
+
445
+ def after_create(instance)
446
+ index.create(instance)
447
+ instance
448
+ end
449
+
450
+ def after_update(instance)
451
+ index.update(instance)
452
+ instance
453
+ end
454
+
455
+ def after_destroy(instance)
456
+ index.remove(instance)
457
+ instance
458
+ end
459
+ end
460
+
461
+ class State
462
+ attr_accessor :source_value, :identifier_value
463
+ end
464
+ end
465
+ end