hyper_record 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,636 @@
1
+ require File.dirname(__FILE__) + '/hypertable/thrift_client'
2
+ require File.dirname(__FILE__) + '/active_record/connection_adapters/hypertable_adapter'
3
+ require File.dirname(__FILE__) + '/associations/hyper_has_many_association_extension'
4
+ require File.dirname(__FILE__) + '/associations/hyper_has_and_belongs_to_many_association_extension'
5
+
6
+ module ActiveRecord
7
+ class Base
8
+ def self.inherited(child) #:nodoc:
9
+ return if child == ActiveRecord::HyperBase
10
+
11
+ @@subclasses[self] ||= []
12
+ @@subclasses[self] << child
13
+ super
14
+ end
15
+ end
16
+
17
+ class HyperBase < Base
18
+ cattr_accessor :log_calls
19
+
20
+ # All records must include a ROW key
21
+ validates_presence_of :ROW
22
+
23
+ BILLION = 1_000_000_000.0
24
+
25
+ ROW_KEY_OFFSET = 0
26
+ COLUMN_FAMILY_OFFSET = 1
27
+ COLUMN_QUALIFIER_OFFSET = 2
28
+ VALUE_OFFSET = 3
29
+ TIMESTAMP_OFFSET = 4
30
+
31
+ def initialize(attrs={})
32
+ super(attrs)
33
+ self.ROW = attrs[:ROW] if attrs[:ROW] && attrs[:ROW]
34
+ end
35
+
36
+ # Instance Methods
37
+ def update(mutator=self.class.mutator)
38
+ write_quoted_attributes(attributes_with_quotes(false, false),
39
+ self.class.table_name, mutator)
40
+ true
41
+ end
42
+
43
+ def create(mutator=self.class.mutator)
44
+ write_quoted_attributes(attributes_with_quotes(false, false),
45
+ self.class.table_name, mutator)
46
+ @new_record = false
47
+ self.attributes[self.class.primary_key]
48
+ end
49
+
50
+ # Allows the save operation to be performed with a specific
51
+ # mutator. By default, a new mutator is opened, flushed and closed for
52
+ # each save operation. Write-heavy application may wish to manually
53
+ # manage mutator flushes (which happens when the mutator is closed) at
54
+ # the application-layer in order to increase write throughput.
55
+ #
56
+ # m = Page.open_mutator
57
+ #
58
+ # p1 = Page.new({:ROW => 'created_with_mutator_1', :url => 'url_1'})
59
+ # p1.save_with_mutator!(m)
60
+ # p2 = Page.new({:ROW => 'created_with_mutator_2', :url => 'url_2'})
61
+ # p2.save_with_mutator!(m)
62
+ #
63
+ # Page.close_mutator(m)
64
+ #
65
+ # Future versions of hypertable will provide a mutator that automatically
66
+ # periodically flushes. This feature is expected by Summary 2009. At
67
+ # that time, manually managing the mutator at the
68
+ def save_with_mutator(mutator)
69
+ create_or_update_with_mutator(mutator)
70
+ end
71
+
72
+ def save_with_mutator!(mutator)
73
+ create_or_update_with_mutator(mutator) || raise(RecordNotSaved)
74
+ end
75
+
76
+ def create_or_update_with_mutator(mutator)
77
+ raise ReadOnlyRecord if readonly?
78
+ result = new_record? ? create(mutator) : update(mutator)
79
+ result != false
80
+ end
81
+
82
+ # Destroy an object. Since Hypertable does not have foreign keys,
83
+ # association cells must be removed manually.
84
+ def destroy
85
+ # check for associations and delete association cells as necessary
86
+ for reflection_key in self.class.reflections.keys
87
+ case self.class.reflections[reflection_key].macro
88
+ when :has_and_belongs_to_many
89
+ # remove all the association cells from the associated objects
90
+ cells_to_delete = []
91
+
92
+ for row_key in self.send(self.class.reflections[reflection_key].association_foreign_key).keys
93
+ cells_to_delete << connection.cell_native_array(row_key, self.class.reflections[reflection_key].primary_key_name, self.ROW)
94
+ end
95
+
96
+ self.delete_cells(cells_to_delete, self.class.reflections[reflection_key].klass.table_name)
97
+ end
98
+ end
99
+
100
+ self.class.connection.delete_rows(self.class.table_name, [self.ROW])
101
+ end
102
+
103
+ # Casts the attribute to an integer before performing the increment. This
104
+ # is necessary because Hypertable only supports integer types at the
105
+ # moment. The cast has the effect of initializing nil values (and most
106
+ # string values) to zero.
107
+ def increment(attribute, by=1)
108
+ self[attribute] = self[attribute].to_i
109
+ self[attribute] += by
110
+ self
111
+ end
112
+
113
+ def increment!(attribute, by=1)
114
+ increment(attribute, by)
115
+ self.save
116
+ end
117
+
118
+ def decrement(attribute, by=1)
119
+ increment(attribute, -by)
120
+ end
121
+
122
+ def decrement!(attribute, by=1)
123
+ increment!(attribute, -by)
124
+ end
125
+
126
+ # Returns a copy of the attributes hash where all the values have been
127
+ # safely quoted for insertion. Translates qualified columns from a Hash
128
+ # value in Ruby to a flat list of attributes.
129
+ #
130
+ # => {
131
+ # "ROW" => "page_1",
132
+ # "name" => "name",
133
+ # "url" => "http://www.icanhascheezburger.com"
134
+ # }
135
+ def attributes_with_quotes(include_primary_key = true, include_readonly_attributes = true)
136
+ quoted = attributes.inject({}) do |quoted, (name, value)|
137
+ if column = column_for_attribute(name)
138
+ if column.is_a?(ConnectionAdapters::QualifiedColumn) and value.is_a?(Hash)
139
+ value.keys.each{|k|
140
+ quoted[self.class.connection.qualified_column_name(column.name, k)] = quote_value(value[k], column)
141
+ }
142
+ else
143
+ quoted[name] = quote_value(value, column) unless !include_primary_key && column.primary
144
+ end
145
+ end
146
+ quoted
147
+ end
148
+ include_readonly_attributes ? quoted : remove_readonly_attributes(quoted)
149
+ end
150
+
151
+ # Translates the output of attributes_with_quotes into an array of
152
+ # cells suitable for writing into Hypertable (through the write_cells
153
+ # method). Data format is native array format for cells.
154
+ # [
155
+ # ["row_key", "column_family", "column_qualifier", "value"],
156
+ # ]
157
+ def quoted_attributes_to_cells(quoted_attrs, table=self.class.table_name)
158
+ cells = []
159
+ pk = self.attributes[self.class.primary_key]
160
+ quoted_attrs.keys.each{|key|
161
+ name, qualifier = connection.hypertable_column_name(key, table).split(':', 2)
162
+ cells << connection.cell_native_array(pk, name, qualifier, quoted_attrs[key])
163
+ }
164
+ cells
165
+ end
166
+
167
+ def write_quoted_attributes(quoted_attrs, table=self.class.table_name, mutator=self.class.mutator)
168
+ write_cells(quoted_attributes_to_cells(quoted_attrs, table), table, mutator)
169
+ end
170
+
171
+ # Write an array of cells to Hypertable
172
+ def write_cells(cells, table=self.class.table_name, mutator=self.class.mutator)
173
+ if HyperBase.log_calls
174
+ msg = [
175
+ "Writing #{cells.length} cells to #{table} table",
176
+ cells.map{|c| [c[0], c[1], c[2], c[3].to_s.first(20)].compact.join("\t")}
177
+ ].join("\n")
178
+ RAILS_DEFAULT_LOGGER.info(msg)
179
+ # puts msg
180
+ end
181
+
182
+ connection.write_cells(table, cells, mutator, self.class.mutator_flags, self.class.mutator_flush_interval)
183
+ end
184
+
185
+ # Delete an array of cells from Hypertable
186
+ # cells is an array of cell keys [["row", "column"], ...]
187
+ def delete_cells(cells, table=self.class.table_name)
188
+ if HyperBase.log_calls
189
+ msg = [
190
+ "Deleting #{cells.length} cells from #{table} table",
191
+ cells.map{|c| [ c[0], c[1] ].compact.join("\t")}
192
+ ].join("\n")
193
+ RAILS_DEFAULT_LOGGER.info(msg)
194
+ # puts msg
195
+ end
196
+
197
+ connection.delete_cells(table, cells)
198
+ end
199
+
200
+ # Delete an array of rows from Hypertable
201
+ # rows is an array of row keys ["row1", "row2", ...]
202
+ def delete_rows(row_keys, table=self.class.table_name)
203
+ connection.delete_rows(table, cells)
204
+ end
205
+
206
+ # Class Methods
207
+ class << self
208
+ def abstract_class?
209
+ self == ActiveRecord::HyperBase
210
+ end
211
+
212
+ def exists?(id_or_conditions)
213
+ case id_or_conditions
214
+ when Fixnum, String
215
+ !find(:first, :row_keys => [id_or_conditions]).nil?
216
+ when Hash
217
+ !find(:first, :conditions => id_or_conditions).nil?
218
+ else
219
+ raise "only Fixnum, String and Hash arguments supported"
220
+ end
221
+ end
222
+
223
+ def delete(*ids)
224
+ self.connection.delete_rows(table_name, ids.flatten)
225
+ end
226
+
227
+ def find(*args)
228
+ options = args.extract_options!
229
+
230
+ case args.first
231
+ when :first then find_initial(options)
232
+ when :all then find_by_options(options)
233
+ else find_from_ids(args, options)
234
+ end
235
+ end
236
+
237
+ # Converts incoming finder options into a scan spec. A scan spec
238
+ # is an object used to describe query parameters (columns to retrieve,
239
+ # number of rows to retrieve, row key ranges) for Hypertable queries.
240
+ def find_to_scan_spec(*args)
241
+ options = args.extract_options!
242
+ options[:scan_spec] = true
243
+ args << options
244
+ find(*args)
245
+ end
246
+
247
+ # Returns a scanner object that allows you to iterate over the
248
+ # result set using the lower-level Thrift client APIs methods that
249
+ # require a scanner object. e.g.,
250
+ #
251
+ # Page.find_with_scanner(:all, :limit => 1) do |scanner|
252
+ # Page.each_cell_as_arrays(scanner) do |cell|
253
+ # ...
254
+ # end
255
+ # end
256
+ #
257
+ # See the Thrift Client API documentation for more detail.
258
+ # http://hypertable.org/thrift-api-ref/index.html
259
+ def find_with_scanner(*args, &block)
260
+ scan_spec = find_to_scan_spec(*args)
261
+ with_scanner(scan_spec, &block)
262
+ end
263
+
264
+ # Returns each row matching the finder options as a HyperRecord
265
+ # object. Each object is yielded to the caller so that large queries
266
+ # can be processed one object at a time without pulling the entire
267
+ # result set into memory.
268
+ #
269
+ # Page.find_each_row(:all) do |page|
270
+ # ...
271
+ # end
272
+ def find_each_row(*args)
273
+ find_each_row_as_arrays(*args) do |row|
274
+ yield convert_cells_to_instantiated_rows(row).first
275
+ end
276
+ end
277
+
278
+ # Returns each row matching the finder options as an array of cells
279
+ # in native array format. Each row is yielded to the caller so that
280
+ # large queries can be processed one row at a time without pulling
281
+ # the entire result set into memory.
282
+ #
283
+ # Page.find_each_row(:all) do |page_as_array_of_cells|
284
+ # ...
285
+ # end
286
+ def find_each_row_as_arrays(*args)
287
+ scan_spec = find_to_scan_spec(*args)
288
+ with_scanner(scan_spec) do |scanner|
289
+ row = []
290
+ current_row_key = nil
291
+
292
+ each_cell_as_arrays(scanner) do |cell|
293
+ current_row_key ||= cell[ROW_KEY_OFFSET]
294
+
295
+ if cell[ROW_KEY_OFFSET] == current_row_key
296
+ row << cell
297
+ else
298
+ yield row
299
+ row = [cell]
300
+ current_row_key = cell[ROW_KEY_OFFSET]
301
+ end
302
+ end
303
+
304
+ yield row unless row.empty?
305
+ end
306
+ end
307
+
308
+ # Each hypertable query requires some default options (e.g., table name)
309
+ # that are set here if not specified in the query.
310
+ def set_default_options(options)
311
+ options[:table_name] ||= table_name
312
+ options[:columns] ||= columns
313
+
314
+ # Don't request the ROW key explicitly, it always comes back
315
+ options[:select] ||= qualified_column_names_without_row_key.map{|c|
316
+ connection.hypertable_column_name(c, table_name)
317
+ }
318
+ end
319
+
320
+ # Return the first record that matches the finder options.
321
+ def find_initial(options)
322
+ options.update(:limit => 1)
323
+
324
+ if options[:scan_spec]
325
+ find_by_options(options)
326
+ else
327
+ find_by_options(options).first
328
+ end
329
+ end
330
+
331
+ # Return an array of records matching the finder options.
332
+ def find_by_options(options)
333
+ set_default_options(options)
334
+
335
+ # If requested, instead of retrieving the matching cells from
336
+ # Hypertable, simply return a scan spec that matches the finder
337
+ # options.
338
+ if options[:scan_spec]
339
+ return connection.convert_options_to_scan_spec(options)
340
+ end
341
+
342
+ cells = connection.execute_with_options(options)
343
+
344
+ if HyperBase.log_calls
345
+ msg = [ "Select" ]
346
+ for key in options.keys
347
+ case key
348
+ when :columns
349
+ msg << " columns\t#{options[:columns].map{|c| c.name}.join(',')}"
350
+ else
351
+ msg << " #{key}\t#{options[key]}"
352
+ end
353
+ end
354
+ msg << "Returned #{cell_count} cells"
355
+
356
+ RAILS_DEFAULT_LOGGER.info(msg)
357
+ # puts msg
358
+ end
359
+
360
+ convert_cells_to_instantiated_rows(cells, options)
361
+ end
362
+
363
+ # Converts cells that come back from Hypertable into hashes. Each
364
+ # hash represents a separate record (where each cell that has the same
365
+ # row key is considered one record).
366
+ def convert_cells_to_hashes(cells, options={})
367
+ rows = []
368
+ current_row = {}
369
+
370
+ # Cells are guaranteed to come back in row key order, so assemble
371
+ # a row by iterating over each cell and checking to see if the row key
372
+ # has changed. If it has, then the row is complete and needs to be
373
+ # instantiated before processing the next cell.
374
+ cells.each_with_index do |cell, i|
375
+ current_row['ROW'] = cell[ROW_KEY_OFFSET]
376
+
377
+ family = connection.rubify_column_name(cell[COLUMN_FAMILY_OFFSET])
378
+
379
+ if !cell[COLUMN_QUALIFIER_OFFSET].blank?
380
+ current_row[family] ||= {}
381
+ current_row[family][cell[COLUMN_QUALIFIER_OFFSET]] = cell[VALUE_OFFSET]
382
+ else
383
+ current_row[family] = cell[VALUE_OFFSET]
384
+ end
385
+
386
+ # Instantiate the row if we've processed all cells for the row
387
+ next_index = i + 1
388
+
389
+ # Check to see if next cell has different row key or if we're at
390
+ # the end of the cell stream.
391
+ if (cells[next_index] and cells[next_index][ROW_KEY_OFFSET] != current_row['ROW']) or next_index >= cells.length
392
+ # Make sure that the resulting object has attributes for all
393
+ # columns - even ones that aren't in the response (due to limited
394
+ # select)
395
+
396
+ for col in column_families_without_row_key
397
+ next if options[:instantiate_only_requested_columns] && !options[:select].include?(col.name)
398
+
399
+ if !current_row.has_key?(col.name)
400
+ if col.is_a?(ActiveRecord::ConnectionAdapters::QualifiedColumn)
401
+ current_row[col.name] = {}
402
+ else
403
+ current_row[col.name] = nil
404
+ end
405
+ end
406
+ end
407
+
408
+ rows << current_row
409
+ current_row = {}
410
+ end
411
+ end
412
+
413
+ rows
414
+ end
415
+
416
+ def convert_cells_to_instantiated_rows(cells, options={})
417
+ convert_cells_to_hashes(cells, options).map{|row| instantiate(row)}
418
+ end
419
+
420
+ # Return the records that match a specific HQL query.
421
+ def find_by_hql(hql)
422
+ hql_result = connection.execute(hql)
423
+ cells_in_native_array_format = hql_result.cells.map do |c|
424
+ connection.cell_native_array(c.row_key, c.column_family, c.column_qualifier, c.value)
425
+ end
426
+ convert_cells_to_instantiated_rows(cells_in_native_array_format)
427
+ end
428
+ alias :find_by_sql :find_by_hql
429
+
430
+ # Return multiple records by row keys.
431
+ def find_from_ids(ids, options)
432
+ expects_array = ids.first.kind_of?(Array)
433
+ return ids.first if expects_array && ids.first.empty?
434
+ ids = ids.flatten.compact.uniq
435
+
436
+ case ids.size
437
+ when 0
438
+ raise RecordNotFound, "Couldn't find #{name} without an ID"
439
+ when 1
440
+ result = find_one(ids.first, options)
441
+ expects_array ? [ result ] : result
442
+ else
443
+ find_some(ids, options)
444
+ end
445
+ end
446
+
447
+ # Return a single record identified by a row key.
448
+ def find_one(id, options)
449
+ return nil if id.blank?
450
+
451
+ options[:row_keys] = [id.to_s]
452
+
453
+ if result = find_initial(options)
454
+ result
455
+ else
456
+ raise ::ActiveRecord::RecordNotFound, "Couldn't find #{name} with ID=#{id}"
457
+ end
458
+ end
459
+
460
+ def find_some(ids, options)
461
+ options[:row_keys] = [ids.map{|i| i.to_s}]
462
+ find_by_options(options)
463
+ end
464
+
465
+ def table_exists?(name=table_name)
466
+ connection.tables.include?(name)
467
+ end
468
+
469
+ def drop_table
470
+ connection.drop_table(table_name) if table_exists?
471
+ end
472
+
473
+ # Returns the primary key field for a table. In Hypertable, a single
474
+ # row key exists for each row. The row key is referred to as ROW
475
+ # in HQL, so we'll refer to it the same way here.
476
+ def primary_key
477
+ "ROW"
478
+ end
479
+
480
+ # Returns array of column objects for table associated with this class.
481
+ def columns
482
+ unless @columns
483
+ @columns = connection.columns(table_name, "#{name} Columns")
484
+ @qualified_columns ||= []
485
+ @qualified_columns.each{|qc|
486
+ # Remove the column family from the column list
487
+ @columns = @columns.reject{|c| c.name == qc[:column_name].to_s}
488
+ connection.remove_column_from_name_map(table_name, qc[:column_name].to_s)
489
+
490
+ # Add the new qualified column family to the column list
491
+ @columns << connection.add_qualified_column(table_name, qc[:column_name].to_s, qc[:qualifiers])
492
+ }
493
+ @columns.each {|column| column.primary = column.name == primary_key}
494
+ end
495
+ @columns
496
+ end
497
+
498
+ def qualified?(column_name)
499
+ @qualified_columns.map{|c| c[:column_name]}.include?(column_name.to_sym)
500
+ end
501
+
502
+ def quoted_column_names(attributes=attributes_with_quotes)
503
+ attributes.keys.collect do |column_name|
504
+ self.class.connection.quote_column_name_for_table(column_name, table_name)
505
+ end
506
+ end
507
+
508
+ def column_families_without_row_key
509
+ columns[1,columns.length]
510
+ end
511
+
512
+ def qualified_column_names_without_row_key
513
+ cols = column_families_without_row_key.map{|c| c.name}
514
+ for qc in @qualified_columns
515
+ cols.delete(qc[:column_name].to_s)
516
+ for qualifier in qc[:qualifiers]
517
+ cols << "#{qc[:column_name]}:#{qualifier}"
518
+ end
519
+ end
520
+ cols
521
+ end
522
+
523
+ # qualified_column :misc, :qualifiers => [:name, :url]
524
+ attr_accessor :qualified_columns
525
+ def qualified_column(*attrs)
526
+ @qualified_columns ||= []
527
+ name = attrs.shift
528
+
529
+ qualifiers = attrs.shift
530
+ qualifiers = qualifiers.symbolize_keys[:qualifiers] if qualifiers
531
+ @qualified_columns << {
532
+ :column_name => name,
533
+ :qualifiers => qualifiers || []
534
+ }
535
+ end
536
+
537
+ # row_key_attributes :regex => /_(\d{4}-\d{2}-\d{2}_\d{2}:\d{2})$/, :attribute_names => [:timestamp]
538
+ attr_accessor :row_key_attributes
539
+ def row_key_attributes(*attrs)
540
+ symbolized_attrs = attrs.first.symbolize_keys
541
+ regex = symbolized_attrs[:regex]
542
+ names = symbolized_attrs[:attribute_names]
543
+
544
+ names.each_with_index do |name, i|
545
+ self.class_eval %{
546
+ def #{name}
547
+ @_row_key_attributes ||= {}
548
+
549
+ if !@_row_key_attributes['#{name}'] || self.ROW_changed?
550
+ matches = self.ROW.to_s.match(#{regex.to_s.inspect})
551
+ @_row_key_attributes['#{name}'] = if matches
552
+ (matches[#{i + 1}] || '')
553
+ else
554
+ ''
555
+ end
556
+ end
557
+
558
+ @_row_key_attributes['#{name}']
559
+ end
560
+ }
561
+ end
562
+
563
+ if !names.blank?
564
+ self.class_eval %{
565
+ def self.assemble_row_key_from_attributes(attributes)
566
+ %w(#{names.join(' ')}).map do |n|
567
+ attributes[n.to_sym]
568
+ end.compact.join('_')
569
+ end
570
+ }
571
+ end
572
+ end
573
+
574
+ attr_accessor :mutator, :mutator_flags, :mutator_flush_interval
575
+ def mutator_options(*attrs)
576
+ symbolized_attrs = attrs.first.symbolize_keys
577
+ @mutator_flags = symbolized_attrs[:flags].to_i
578
+ @mutator_flush_interval = symbolized_attrs[:flush_interval].to_i
579
+ if symbolized_attrs[:persistent]
580
+ @mutator = self.open_mutator(@mutator_flags, @mutator_flush_interval)
581
+ end
582
+ end
583
+
584
+ # Mutator methods - passed through straight to the Hypertable Adapter.
585
+
586
+ # Return an open mutator on this table.
587
+ def open_mutator(flags=@mutator_flags.to_i, flush_interval=@mutator_flush_interval.to_i)
588
+ self.connection.open_mutator(table_name, flags, flush_interval)
589
+ end
590
+
591
+ # As of Hypertable 0.9.2.5, flush is automatically performed on a
592
+ # close_mutator call (so flush should default to 0).
593
+ def close_mutator(mutator, flush=0)
594
+ self.connection.close_mutator(mutator, flush)
595
+ end
596
+
597
+ def flush_mutator(mutator)
598
+ self.connection.flush_mutator(mutator)
599
+ end
600
+
601
+ # Scanner methods
602
+ def open_scanner(scan_spec)
603
+ self.connection.open_scanner(self.table_name, scan_spec)
604
+ end
605
+
606
+ def close_scanner(scanner)
607
+ self.connection.close_scanner(scanner)
608
+ end
609
+
610
+ def with_scanner(scan_spec, &block)
611
+ self.connection.with_scanner(self.table_name, scan_spec, &block)
612
+ end
613
+
614
+ # Iterator methods
615
+ def each_cell(scanner, &block)
616
+ self.connection.each_cell(scanner, &block)
617
+ end
618
+
619
+ def each_cell_as_arrays(scanner, &block)
620
+ self.connection.each_cell_as_arrays(scanner, &block)
621
+ end
622
+
623
+ def each_row(scanner, &block)
624
+ self.connection.each_row(scanner, &block)
625
+ end
626
+
627
+ def each_row_as_arrays(scanner, &block)
628
+ self.connection.each_row_as_arrays(scanner, &block)
629
+ end
630
+
631
+ def with_thrift_client(&block)
632
+ self.connection.raw_thrift_client(&block)
633
+ end
634
+ end
635
+ end
636
+ end
@@ -0,0 +1,12 @@
1
+ #
2
+ # Autogenerated by Thrift
3
+ #
4
+ # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
5
+ #
6
+
7
+ require 'client_types'
8
+
9
+ module Hypertable
10
+ module ThriftGen
11
+ end
12
+ end