hyper_record 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,636 @@
1
+ require File.dirname(__FILE__) + '/hypertable/thrift_client'
2
+ require File.dirname(__FILE__) + '/active_record/connection_adapters/hypertable_adapter'
3
+ require File.dirname(__FILE__) + '/associations/hyper_has_many_association_extension'
4
+ require File.dirname(__FILE__) + '/associations/hyper_has_and_belongs_to_many_association_extension'
5
+
6
+ module ActiveRecord
7
+ class Base
8
+ def self.inherited(child) #:nodoc:
9
+ return if child == ActiveRecord::HyperBase
10
+
11
+ @@subclasses[self] ||= []
12
+ @@subclasses[self] << child
13
+ super
14
+ end
15
+ end
16
+
17
+ class HyperBase < Base
18
+ cattr_accessor :log_calls
19
+
20
+ # All records must include a ROW key
21
+ validates_presence_of :ROW
22
+
23
+ BILLION = 1_000_000_000.0
24
+
25
+ ROW_KEY_OFFSET = 0
26
+ COLUMN_FAMILY_OFFSET = 1
27
+ COLUMN_QUALIFIER_OFFSET = 2
28
+ VALUE_OFFSET = 3
29
+ TIMESTAMP_OFFSET = 4
30
+
31
+ def initialize(attrs={})
32
+ super(attrs)
33
+ self.ROW = attrs[:ROW] if attrs[:ROW] && attrs[:ROW]
34
+ end
35
+
36
+ # Instance Methods
37
+ def update(mutator=self.class.mutator)
38
+ write_quoted_attributes(attributes_with_quotes(false, false),
39
+ self.class.table_name, mutator)
40
+ true
41
+ end
42
+
43
+ def create(mutator=self.class.mutator)
44
+ write_quoted_attributes(attributes_with_quotes(false, false),
45
+ self.class.table_name, mutator)
46
+ @new_record = false
47
+ self.attributes[self.class.primary_key]
48
+ end
49
+
50
+ # Allows the save operation to be performed with a specific
51
+ # mutator. By default, a new mutator is opened, flushed and closed for
52
+ # each save operation. Write-heavy application may wish to manually
53
+ # manage mutator flushes (which happens when the mutator is closed) at
54
+ # the application-layer in order to increase write throughput.
55
+ #
56
+ # m = Page.open_mutator
57
+ #
58
+ # p1 = Page.new({:ROW => 'created_with_mutator_1', :url => 'url_1'})
59
+ # p1.save_with_mutator!(m)
60
+ # p2 = Page.new({:ROW => 'created_with_mutator_2', :url => 'url_2'})
61
+ # p2.save_with_mutator!(m)
62
+ #
63
+ # Page.close_mutator(m)
64
+ #
65
+ # Future versions of hypertable will provide a mutator that automatically
66
+ # periodically flushes. This feature is expected by Summary 2009. At
67
+ # that time, manually managing the mutator at the
68
+ def save_with_mutator(mutator)
69
+ create_or_update_with_mutator(mutator)
70
+ end
71
+
72
+ def save_with_mutator!(mutator)
73
+ create_or_update_with_mutator(mutator) || raise(RecordNotSaved)
74
+ end
75
+
76
+ def create_or_update_with_mutator(mutator)
77
+ raise ReadOnlyRecord if readonly?
78
+ result = new_record? ? create(mutator) : update(mutator)
79
+ result != false
80
+ end
81
+
82
+ # Destroy an object. Since Hypertable does not have foreign keys,
83
+ # association cells must be removed manually.
84
+ def destroy
85
+ # check for associations and delete association cells as necessary
86
+ for reflection_key in self.class.reflections.keys
87
+ case self.class.reflections[reflection_key].macro
88
+ when :has_and_belongs_to_many
89
+ # remove all the association cells from the associated objects
90
+ cells_to_delete = []
91
+
92
+ for row_key in self.send(self.class.reflections[reflection_key].association_foreign_key).keys
93
+ cells_to_delete << connection.cell_native_array(row_key, self.class.reflections[reflection_key].primary_key_name, self.ROW)
94
+ end
95
+
96
+ self.delete_cells(cells_to_delete, self.class.reflections[reflection_key].klass.table_name)
97
+ end
98
+ end
99
+
100
+ self.class.connection.delete_rows(self.class.table_name, [self.ROW])
101
+ end
102
+
103
+ # Casts the attribute to an integer before performing the increment. This
104
+ # is necessary because Hypertable only supports integer types at the
105
+ # moment. The cast has the effect of initializing nil values (and most
106
+ # string values) to zero.
107
+ def increment(attribute, by=1)
108
+ self[attribute] = self[attribute].to_i
109
+ self[attribute] += by
110
+ self
111
+ end
112
+
113
+ def increment!(attribute, by=1)
114
+ increment(attribute, by)
115
+ self.save
116
+ end
117
+
118
+ def decrement(attribute, by=1)
119
+ increment(attribute, -by)
120
+ end
121
+
122
+ def decrement!(attribute, by=1)
123
+ increment!(attribute, -by)
124
+ end
125
+
126
+ # Returns a copy of the attributes hash where all the values have been
127
+ # safely quoted for insertion. Translates qualified columns from a Hash
128
+ # value in Ruby to a flat list of attributes.
129
+ #
130
+ # => {
131
+ # "ROW" => "page_1",
132
+ # "name" => "name",
133
+ # "url" => "http://www.icanhascheezburger.com"
134
+ # }
135
+ def attributes_with_quotes(include_primary_key = true, include_readonly_attributes = true)
136
+ quoted = attributes.inject({}) do |quoted, (name, value)|
137
+ if column = column_for_attribute(name)
138
+ if column.is_a?(ConnectionAdapters::QualifiedColumn) and value.is_a?(Hash)
139
+ value.keys.each{|k|
140
+ quoted[self.class.connection.qualified_column_name(column.name, k)] = quote_value(value[k], column)
141
+ }
142
+ else
143
+ quoted[name] = quote_value(value, column) unless !include_primary_key && column.primary
144
+ end
145
+ end
146
+ quoted
147
+ end
148
+ include_readonly_attributes ? quoted : remove_readonly_attributes(quoted)
149
+ end
150
+
151
+ # Translates the output of attributes_with_quotes into an array of
152
+ # cells suitable for writing into Hypertable (through the write_cells
153
+ # method). Data format is native array format for cells.
154
+ # [
155
+ # ["row_key", "column_family", "column_qualifier", "value"],
156
+ # ]
157
+ def quoted_attributes_to_cells(quoted_attrs, table=self.class.table_name)
158
+ cells = []
159
+ pk = self.attributes[self.class.primary_key]
160
+ quoted_attrs.keys.each{|key|
161
+ name, qualifier = connection.hypertable_column_name(key, table).split(':', 2)
162
+ cells << connection.cell_native_array(pk, name, qualifier, quoted_attrs[key])
163
+ }
164
+ cells
165
+ end
166
+
167
+ def write_quoted_attributes(quoted_attrs, table=self.class.table_name, mutator=self.class.mutator)
168
+ write_cells(quoted_attributes_to_cells(quoted_attrs, table), table, mutator)
169
+ end
170
+
171
+ # Write an array of cells to Hypertable
172
+ def write_cells(cells, table=self.class.table_name, mutator=self.class.mutator)
173
+ if HyperBase.log_calls
174
+ msg = [
175
+ "Writing #{cells.length} cells to #{table} table",
176
+ cells.map{|c| [c[0], c[1], c[2], c[3].to_s.first(20)].compact.join("\t")}
177
+ ].join("\n")
178
+ RAILS_DEFAULT_LOGGER.info(msg)
179
+ # puts msg
180
+ end
181
+
182
+ connection.write_cells(table, cells, mutator, self.class.mutator_flags, self.class.mutator_flush_interval)
183
+ end
184
+
185
+ # Delete an array of cells from Hypertable
186
+ # cells is an array of cell keys [["row", "column"], ...]
187
+ def delete_cells(cells, table=self.class.table_name)
188
+ if HyperBase.log_calls
189
+ msg = [
190
+ "Deleting #{cells.length} cells from #{table} table",
191
+ cells.map{|c| [ c[0], c[1] ].compact.join("\t")}
192
+ ].join("\n")
193
+ RAILS_DEFAULT_LOGGER.info(msg)
194
+ # puts msg
195
+ end
196
+
197
+ connection.delete_cells(table, cells)
198
+ end
199
+
200
+ # Delete an array of rows from Hypertable
201
+ # rows is an array of row keys ["row1", "row2", ...]
202
+ def delete_rows(row_keys, table=self.class.table_name)
203
+ connection.delete_rows(table, cells)
204
+ end
205
+
206
+ # Class Methods
207
+ class << self
208
+ def abstract_class?
209
+ self == ActiveRecord::HyperBase
210
+ end
211
+
212
+ def exists?(id_or_conditions)
213
+ case id_or_conditions
214
+ when Fixnum, String
215
+ !find(:first, :row_keys => [id_or_conditions]).nil?
216
+ when Hash
217
+ !find(:first, :conditions => id_or_conditions).nil?
218
+ else
219
+ raise "only Fixnum, String and Hash arguments supported"
220
+ end
221
+ end
222
+
223
+ def delete(*ids)
224
+ self.connection.delete_rows(table_name, ids.flatten)
225
+ end
226
+
227
+ def find(*args)
228
+ options = args.extract_options!
229
+
230
+ case args.first
231
+ when :first then find_initial(options)
232
+ when :all then find_by_options(options)
233
+ else find_from_ids(args, options)
234
+ end
235
+ end
236
+
237
+ # Converts incoming finder options into a scan spec. A scan spec
238
+ # is an object used to describe query parameters (columns to retrieve,
239
+ # number of rows to retrieve, row key ranges) for Hypertable queries.
240
+ def find_to_scan_spec(*args)
241
+ options = args.extract_options!
242
+ options[:scan_spec] = true
243
+ args << options
244
+ find(*args)
245
+ end
246
+
247
+ # Returns a scanner object that allows you to iterate over the
248
+ # result set using the lower-level Thrift client APIs methods that
249
+ # require a scanner object. e.g.,
250
+ #
251
+ # Page.find_with_scanner(:all, :limit => 1) do |scanner|
252
+ # Page.each_cell_as_arrays(scanner) do |cell|
253
+ # ...
254
+ # end
255
+ # end
256
+ #
257
+ # See the Thrift Client API documentation for more detail.
258
+ # http://hypertable.org/thrift-api-ref/index.html
259
+ def find_with_scanner(*args, &block)
260
+ scan_spec = find_to_scan_spec(*args)
261
+ with_scanner(scan_spec, &block)
262
+ end
263
+
264
+ # Returns each row matching the finder options as a HyperRecord
265
+ # object. Each object is yielded to the caller so that large queries
266
+ # can be processed one object at a time without pulling the entire
267
+ # result set into memory.
268
+ #
269
+ # Page.find_each_row(:all) do |page|
270
+ # ...
271
+ # end
272
+ def find_each_row(*args)
273
+ find_each_row_as_arrays(*args) do |row|
274
+ yield convert_cells_to_instantiated_rows(row).first
275
+ end
276
+ end
277
+
278
+ # Returns each row matching the finder options as an array of cells
279
+ # in native array format. Each row is yielded to the caller so that
280
+ # large queries can be processed one row at a time without pulling
281
+ # the entire result set into memory.
282
+ #
283
+ # Page.find_each_row(:all) do |page_as_array_of_cells|
284
+ # ...
285
+ # end
286
+ def find_each_row_as_arrays(*args)
287
+ scan_spec = find_to_scan_spec(*args)
288
+ with_scanner(scan_spec) do |scanner|
289
+ row = []
290
+ current_row_key = nil
291
+
292
+ each_cell_as_arrays(scanner) do |cell|
293
+ current_row_key ||= cell[ROW_KEY_OFFSET]
294
+
295
+ if cell[ROW_KEY_OFFSET] == current_row_key
296
+ row << cell
297
+ else
298
+ yield row
299
+ row = [cell]
300
+ current_row_key = cell[ROW_KEY_OFFSET]
301
+ end
302
+ end
303
+
304
+ yield row unless row.empty?
305
+ end
306
+ end
307
+
308
+ # Each hypertable query requires some default options (e.g., table name)
309
+ # that are set here if not specified in the query.
310
+ def set_default_options(options)
311
+ options[:table_name] ||= table_name
312
+ options[:columns] ||= columns
313
+
314
+ # Don't request the ROW key explicitly, it always comes back
315
+ options[:select] ||= qualified_column_names_without_row_key.map{|c|
316
+ connection.hypertable_column_name(c, table_name)
317
+ }
318
+ end
319
+
320
+ # Return the first record that matches the finder options.
321
+ def find_initial(options)
322
+ options.update(:limit => 1)
323
+
324
+ if options[:scan_spec]
325
+ find_by_options(options)
326
+ else
327
+ find_by_options(options).first
328
+ end
329
+ end
330
+
331
+ # Return an array of records matching the finder options.
332
+ def find_by_options(options)
333
+ set_default_options(options)
334
+
335
+ # If requested, instead of retrieving the matching cells from
336
+ # Hypertable, simply return a scan spec that matches the finder
337
+ # options.
338
+ if options[:scan_spec]
339
+ return connection.convert_options_to_scan_spec(options)
340
+ end
341
+
342
+ cells = connection.execute_with_options(options)
343
+
344
+ if HyperBase.log_calls
345
+ msg = [ "Select" ]
346
+ for key in options.keys
347
+ case key
348
+ when :columns
349
+ msg << " columns\t#{options[:columns].map{|c| c.name}.join(',')}"
350
+ else
351
+ msg << " #{key}\t#{options[key]}"
352
+ end
353
+ end
354
+ msg << "Returned #{cell_count} cells"
355
+
356
+ RAILS_DEFAULT_LOGGER.info(msg)
357
+ # puts msg
358
+ end
359
+
360
+ convert_cells_to_instantiated_rows(cells, options)
361
+ end
362
+
363
+ # Converts cells that come back from Hypertable into hashes. Each
364
+ # hash represents a separate record (where each cell that has the same
365
+ # row key is considered one record).
366
+ def convert_cells_to_hashes(cells, options={})
367
+ rows = []
368
+ current_row = {}
369
+
370
+ # Cells are guaranteed to come back in row key order, so assemble
371
+ # a row by iterating over each cell and checking to see if the row key
372
+ # has changed. If it has, then the row is complete and needs to be
373
+ # instantiated before processing the next cell.
374
+ cells.each_with_index do |cell, i|
375
+ current_row['ROW'] = cell[ROW_KEY_OFFSET]
376
+
377
+ family = connection.rubify_column_name(cell[COLUMN_FAMILY_OFFSET])
378
+
379
+ if !cell[COLUMN_QUALIFIER_OFFSET].blank?
380
+ current_row[family] ||= {}
381
+ current_row[family][cell[COLUMN_QUALIFIER_OFFSET]] = cell[VALUE_OFFSET]
382
+ else
383
+ current_row[family] = cell[VALUE_OFFSET]
384
+ end
385
+
386
+ # Instantiate the row if we've processed all cells for the row
387
+ next_index = i + 1
388
+
389
+ # Check to see if next cell has different row key or if we're at
390
+ # the end of the cell stream.
391
+ if (cells[next_index] and cells[next_index][ROW_KEY_OFFSET] != current_row['ROW']) or next_index >= cells.length
392
+ # Make sure that the resulting object has attributes for all
393
+ # columns - even ones that aren't in the response (due to limited
394
+ # select)
395
+
396
+ for col in column_families_without_row_key
397
+ next if options[:instantiate_only_requested_columns] && !options[:select].include?(col.name)
398
+
399
+ if !current_row.has_key?(col.name)
400
+ if col.is_a?(ActiveRecord::ConnectionAdapters::QualifiedColumn)
401
+ current_row[col.name] = {}
402
+ else
403
+ current_row[col.name] = nil
404
+ end
405
+ end
406
+ end
407
+
408
+ rows << current_row
409
+ current_row = {}
410
+ end
411
+ end
412
+
413
+ rows
414
+ end
415
+
416
+ def convert_cells_to_instantiated_rows(cells, options={})
417
+ convert_cells_to_hashes(cells, options).map{|row| instantiate(row)}
418
+ end
419
+
420
+ # Return the records that match a specific HQL query.
421
+ def find_by_hql(hql)
422
+ hql_result = connection.execute(hql)
423
+ cells_in_native_array_format = hql_result.cells.map do |c|
424
+ connection.cell_native_array(c.row_key, c.column_family, c.column_qualifier, c.value)
425
+ end
426
+ convert_cells_to_instantiated_rows(cells_in_native_array_format)
427
+ end
428
+ alias :find_by_sql :find_by_hql
429
+
430
+ # Return multiple records by row keys.
431
+ def find_from_ids(ids, options)
432
+ expects_array = ids.first.kind_of?(Array)
433
+ return ids.first if expects_array && ids.first.empty?
434
+ ids = ids.flatten.compact.uniq
435
+
436
+ case ids.size
437
+ when 0
438
+ raise RecordNotFound, "Couldn't find #{name} without an ID"
439
+ when 1
440
+ result = find_one(ids.first, options)
441
+ expects_array ? [ result ] : result
442
+ else
443
+ find_some(ids, options)
444
+ end
445
+ end
446
+
447
+ # Return a single record identified by a row key.
448
+ def find_one(id, options)
449
+ return nil if id.blank?
450
+
451
+ options[:row_keys] = [id.to_s]
452
+
453
+ if result = find_initial(options)
454
+ result
455
+ else
456
+ raise ::ActiveRecord::RecordNotFound, "Couldn't find #{name} with ID=#{id}"
457
+ end
458
+ end
459
+
460
+ def find_some(ids, options)
461
+ options[:row_keys] = [ids.map{|i| i.to_s}]
462
+ find_by_options(options)
463
+ end
464
+
465
+ def table_exists?(name=table_name)
466
+ connection.tables.include?(name)
467
+ end
468
+
469
+ def drop_table
470
+ connection.drop_table(table_name) if table_exists?
471
+ end
472
+
473
+ # Returns the primary key field for a table. In Hypertable, a single
474
+ # row key exists for each row. The row key is referred to as ROW
475
+ # in HQL, so we'll refer to it the same way here.
476
+ def primary_key
477
+ "ROW"
478
+ end
479
+
480
+ # Returns array of column objects for table associated with this class.
481
+ def columns
482
+ unless @columns
483
+ @columns = connection.columns(table_name, "#{name} Columns")
484
+ @qualified_columns ||= []
485
+ @qualified_columns.each{|qc|
486
+ # Remove the column family from the column list
487
+ @columns = @columns.reject{|c| c.name == qc[:column_name].to_s}
488
+ connection.remove_column_from_name_map(table_name, qc[:column_name].to_s)
489
+
490
+ # Add the new qualified column family to the column list
491
+ @columns << connection.add_qualified_column(table_name, qc[:column_name].to_s, qc[:qualifiers])
492
+ }
493
+ @columns.each {|column| column.primary = column.name == primary_key}
494
+ end
495
+ @columns
496
+ end
497
+
498
+ def qualified?(column_name)
499
+ @qualified_columns.map{|c| c[:column_name]}.include?(column_name.to_sym)
500
+ end
501
+
502
+ def quoted_column_names(attributes=attributes_with_quotes)
503
+ attributes.keys.collect do |column_name|
504
+ self.class.connection.quote_column_name_for_table(column_name, table_name)
505
+ end
506
+ end
507
+
508
+ def column_families_without_row_key
509
+ columns[1,columns.length]
510
+ end
511
+
512
+ def qualified_column_names_without_row_key
513
+ cols = column_families_without_row_key.map{|c| c.name}
514
+ for qc in @qualified_columns
515
+ cols.delete(qc[:column_name].to_s)
516
+ for qualifier in qc[:qualifiers]
517
+ cols << "#{qc[:column_name]}:#{qualifier}"
518
+ end
519
+ end
520
+ cols
521
+ end
522
+
523
+ # qualified_column :misc, :qualifiers => [:name, :url]
524
+ attr_accessor :qualified_columns
525
+ def qualified_column(*attrs)
526
+ @qualified_columns ||= []
527
+ name = attrs.shift
528
+
529
+ qualifiers = attrs.shift
530
+ qualifiers = qualifiers.symbolize_keys[:qualifiers] if qualifiers
531
+ @qualified_columns << {
532
+ :column_name => name,
533
+ :qualifiers => qualifiers || []
534
+ }
535
+ end
536
+
537
+ # row_key_attributes :regex => /_(\d{4}-\d{2}-\d{2}_\d{2}:\d{2})$/, :attribute_names => [:timestamp]
538
+ attr_accessor :row_key_attributes
539
+ def row_key_attributes(*attrs)
540
+ symbolized_attrs = attrs.first.symbolize_keys
541
+ regex = symbolized_attrs[:regex]
542
+ names = symbolized_attrs[:attribute_names]
543
+
544
+ names.each_with_index do |name, i|
545
+ self.class_eval %{
546
+ def #{name}
547
+ @_row_key_attributes ||= {}
548
+
549
+ if !@_row_key_attributes['#{name}'] || self.ROW_changed?
550
+ matches = self.ROW.to_s.match(#{regex.to_s.inspect})
551
+ @_row_key_attributes['#{name}'] = if matches
552
+ (matches[#{i + 1}] || '')
553
+ else
554
+ ''
555
+ end
556
+ end
557
+
558
+ @_row_key_attributes['#{name}']
559
+ end
560
+ }
561
+ end
562
+
563
+ if !names.blank?
564
+ self.class_eval %{
565
+ def self.assemble_row_key_from_attributes(attributes)
566
+ %w(#{names.join(' ')}).map do |n|
567
+ attributes[n.to_sym]
568
+ end.compact.join('_')
569
+ end
570
+ }
571
+ end
572
+ end
573
+
574
+ attr_accessor :mutator, :mutator_flags, :mutator_flush_interval
575
+ def mutator_options(*attrs)
576
+ symbolized_attrs = attrs.first.symbolize_keys
577
+ @mutator_flags = symbolized_attrs[:flags].to_i
578
+ @mutator_flush_interval = symbolized_attrs[:flush_interval].to_i
579
+ if symbolized_attrs[:persistent]
580
+ @mutator = self.open_mutator(@mutator_flags, @mutator_flush_interval)
581
+ end
582
+ end
583
+
584
+ # Mutator methods - passed through straight to the Hypertable Adapter.
585
+
586
+ # Return an open mutator on this table.
587
+ def open_mutator(flags=@mutator_flags.to_i, flush_interval=@mutator_flush_interval.to_i)
588
+ self.connection.open_mutator(table_name, flags, flush_interval)
589
+ end
590
+
591
+ # As of Hypertable 0.9.2.5, flush is automatically performed on a
592
+ # close_mutator call (so flush should default to 0).
593
+ def close_mutator(mutator, flush=0)
594
+ self.connection.close_mutator(mutator, flush)
595
+ end
596
+
597
+ def flush_mutator(mutator)
598
+ self.connection.flush_mutator(mutator)
599
+ end
600
+
601
+ # Scanner methods
602
+ def open_scanner(scan_spec)
603
+ self.connection.open_scanner(self.table_name, scan_spec)
604
+ end
605
+
606
+ def close_scanner(scanner)
607
+ self.connection.close_scanner(scanner)
608
+ end
609
+
610
+ def with_scanner(scan_spec, &block)
611
+ self.connection.with_scanner(self.table_name, scan_spec, &block)
612
+ end
613
+
614
+ # Iterator methods
615
+ def each_cell(scanner, &block)
616
+ self.connection.each_cell(scanner, &block)
617
+ end
618
+
619
+ def each_cell_as_arrays(scanner, &block)
620
+ self.connection.each_cell_as_arrays(scanner, &block)
621
+ end
622
+
623
+ def each_row(scanner, &block)
624
+ self.connection.each_row(scanner, &block)
625
+ end
626
+
627
+ def each_row_as_arrays(scanner, &block)
628
+ self.connection.each_row_as_arrays(scanner, &block)
629
+ end
630
+
631
+ def with_thrift_client(&block)
632
+ self.connection.raw_thrift_client(&block)
633
+ end
634
+ end
635
+ end
636
+ end
@@ -0,0 +1,12 @@
1
+ #
2
+ # Autogenerated by Thrift
3
+ #
4
+ # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
5
+ #
6
+
7
+ require 'client_types'
8
+
9
+ module Hypertable
10
+ module ThriftGen
11
+ end
12
+ end