hyper_record 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/CHANGELOG +83 -0
- data/LICENSE +20 -0
- data/README +49 -0
- data/Rakefile +43 -0
- data/VERSION.yml +4 -0
- data/benchmark/save.rb +58 -0
- data/hyper_record.gemspec +76 -0
- data/init.rb +1 -0
- data/lib/active_record/connection_adapters/hyper_table_definition.rb +26 -0
- data/lib/active_record/connection_adapters/hypertable_adapter.rb +680 -0
- data/lib/active_record/connection_adapters/qualified_column.rb +57 -0
- data/lib/associations/hyper_has_and_belongs_to_many_association_extension.rb +107 -0
- data/lib/associations/hyper_has_many_association_extension.rb +87 -0
- data/lib/hyper_record.rb +636 -0
- data/lib/hypertable/gen-rb/client_constants.rb +12 -0
- data/lib/hypertable/gen-rb/client_service.rb +1436 -0
- data/lib/hypertable/gen-rb/client_types.rb +253 -0
- data/lib/hypertable/gen-rb/hql_constants.rb +12 -0
- data/lib/hypertable/gen-rb/hql_service.rb +281 -0
- data/lib/hypertable/gen-rb/hql_types.rb +73 -0
- data/lib/hypertable/thrift_client.rb +94 -0
- data/lib/hypertable/thrift_transport_monkey_patch.rb +29 -0
- data/pkg/hyper_record-0.2.8.gem +0 -0
- data/spec/fixtures/pages.yml +8 -0
- data/spec/fixtures/qualified_pages.yml +1 -0
- data/spec/lib/associations_spec.rb +235 -0
- data/spec/lib/hyper_record_spec.rb +948 -0
- data/spec/lib/hypertable_adapter_spec.rb +121 -0
- data/spec/spec_helper.rb +130 -0
- data/test/test_helper.rb +10 -0
- data/test/thrift_client_test.rb +590 -0
- metadata +99 -0
@@ -0,0 +1,680 @@
|
|
1
|
+
# For each supported data store, ActiveRecord has an adapter that implements
|
2
|
+
# functionality specific to that store as well as providing metadata for
|
3
|
+
# data held within the store. Features implemented by adapters typically
|
4
|
+
# include connection handling, listings metadata (tables and schema),
|
5
|
+
# statement execution (selects, writes, etc.), latency measurement, fixture
|
6
|
+
# handling.
|
7
|
+
#
|
8
|
+
# This file implements the adapter for Hypertable used by ActiveRecord
|
9
|
+
# (HyperRecord). The adapter communicates with Hypertable using the
|
10
|
+
# Thrift client API documented here:
|
11
|
+
# http://hypertable.org/thrift-api-ref/index.html
|
12
|
+
#
|
13
|
+
# Refer to the main hypertable site (http://hypertable.org/) for additional
|
14
|
+
# information and documentation (http://hypertable.org/documentation.html)
|
15
|
+
# on Hypertable and the Thrift client API.
|
16
|
+
|
17
|
+
unless defined?(ActiveRecord::ConnectionAdapters::AbstractAdapter)
|
18
|
+
# running into some situations where rails has already loaded this, without
|
19
|
+
# require realizing it, and loading again is unsafe (alias_method_chain is a
|
20
|
+
# great way to create infinite recursion loops)
|
21
|
+
require 'active_record/connection_adapters/abstract_adapter'
|
22
|
+
end
|
23
|
+
require 'active_record/connection_adapters/qualified_column'
|
24
|
+
require 'active_record/connection_adapters/hyper_table_definition'
|
25
|
+
|
26
|
+
module ActiveRecord
|
27
|
+
class Base
|
28
|
+
# Include the thrift driver if one hasn't already been loaded
|
29
|
+
def self.require_hypertable_thrift_client
|
30
|
+
unless defined? Hypertable::ThriftClient
|
31
|
+
gem 'hypertable-thrift-client'
|
32
|
+
require_dependency 'thrift_client'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Establishes a connection to the Thrift Broker (which brokers requests
|
37
|
+
# to Hypertable itself. The connection details must appear in
|
38
|
+
# config/database.yml. e.g.,
|
39
|
+
# hypertable_dev:
|
40
|
+
# host: localhost
|
41
|
+
# port: 38088
|
42
|
+
# timeout: 20000
|
43
|
+
#
|
44
|
+
# Options:
|
45
|
+
# * <tt>:host</tt> - Defaults to localhost
|
46
|
+
# * <tt>:port</tt> - Defaults to 38088
|
47
|
+
# * <tt>:timeout</tt> - Timeout for queries in milliseconds. Defaults to 20000
|
48
|
+
def self.hypertable_connection(config)
|
49
|
+
config = config.symbolize_keys
|
50
|
+
require_hypertable_thrift_client
|
51
|
+
|
52
|
+
raise "Hypertable config missing :host in database.yml" if !config[:host]
|
53
|
+
|
54
|
+
config[:host] ||= 'localhost'
|
55
|
+
config[:port] ||= 38088
|
56
|
+
config[:timeout] ||= 20000
|
57
|
+
|
58
|
+
connection = Hypertable::ThriftClient.new(config[:host], config[:port],
|
59
|
+
config[:timeout])
|
60
|
+
|
61
|
+
ConnectionAdapters::HypertableAdapter.new(connection, logger, config)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
module ConnectionAdapters
|
66
|
+
class HypertableAdapter < AbstractAdapter
|
67
|
+
# Following cattr_accessors are used to record and access query
|
68
|
+
# performance statistics.
|
69
|
+
@@read_latency = 0.0
|
70
|
+
@@write_latency = 0.0
|
71
|
+
@@cells_read = 0
|
72
|
+
cattr_accessor :read_latency, :write_latency, :cells_read
|
73
|
+
|
74
|
+
# Used by retry_on_connection_error() to determine whether to retry
|
75
|
+
@retry_on_failure = true
|
76
|
+
attr_accessor :retry_on_failure
|
77
|
+
|
78
|
+
def initialize(connection, logger, config)
|
79
|
+
super(connection, logger)
|
80
|
+
@config = config
|
81
|
+
@hypertable_column_names = {}
|
82
|
+
end
|
83
|
+
|
84
|
+
def raw_thrift_client(&block)
|
85
|
+
Hypertable.with_thrift_client(@config[:host], @config[:port],
|
86
|
+
@config[:timeout], &block)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Return the current set of performance statistics. The application
|
90
|
+
# can retrieve (and reset) these statistics after every query or
|
91
|
+
# request for its own logging purposes.
|
92
|
+
def self.get_timing
|
93
|
+
[@@read_latency, @@write_latency, @@cells_read]
|
94
|
+
end
|
95
|
+
|
96
|
+
# Reset performance metrics.
|
97
|
+
def self.reset_timing
|
98
|
+
@@read_latency = 0.0
|
99
|
+
@@write_latency = 0.0
|
100
|
+
@@cells_read = 0
|
101
|
+
end
|
102
|
+
|
103
|
+
def adapter_name
|
104
|
+
'Hypertable'
|
105
|
+
end
|
106
|
+
|
107
|
+
def supports_migrations?
|
108
|
+
true
|
109
|
+
end
|
110
|
+
|
111
|
+
# Hypertable only supports string types at the moment, so treat
|
112
|
+
# all values as strings and leave it to the application to handle
|
113
|
+
# types.
|
114
|
+
def native_database_types
|
115
|
+
{
|
116
|
+
:string => { :name => "varchar", :limit => 255 }
|
117
|
+
}
|
118
|
+
end
|
119
|
+
|
120
|
+
def sanitize_conditions(options)
|
121
|
+
case options[:conditions]
|
122
|
+
when Hash
|
123
|
+
# requires Hypertable API to support query by arbitrary cell value
|
124
|
+
raise "HyperRecord does not support specifying conditions by Hash"
|
125
|
+
when NilClass
|
126
|
+
# do nothing
|
127
|
+
else
|
128
|
+
raise "Only hash conditions are supported"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Execute an HQL query against Hypertable and return the native
|
133
|
+
# HqlResult object that comes back from the Thrift client API.
|
134
|
+
def execute(hql, name=nil)
|
135
|
+
log(hql, name) {
|
136
|
+
retry_on_connection_error { @connection.hql_query(hql) }
|
137
|
+
}
|
138
|
+
end
|
139
|
+
|
140
|
+
# Execute a query against Hypertable and return the matching cells.
|
141
|
+
# The query parameters are denoted in an options hash, which is
|
142
|
+
# converted to a "scan spec" by convert_options_to_scan_spec.
|
143
|
+
# A "scan spec" is the mechanism used to specify query parameters
|
144
|
+
# (e.g., the columns to retrieve, the number of rows to retrieve, etc.)
|
145
|
+
# to Hypertable.
|
146
|
+
def execute_with_options(options)
|
147
|
+
scan_spec = convert_options_to_scan_spec(options)
|
148
|
+
t1 = Time.now
|
149
|
+
|
150
|
+
# Use native array method (get_cells_as_arrays) for cell retrieval -
|
151
|
+
# much faster than get_cells that returns Hypertable::ThriftGen::Cell
|
152
|
+
# objects.
|
153
|
+
# [
|
154
|
+
# ["page_1", "name", "", "LOLcats and more", "1237331693147619001"],
|
155
|
+
# ["page_1", "url", "", "http://...", "1237331693147619002"]
|
156
|
+
# ]
|
157
|
+
cells = retry_on_connection_error {
|
158
|
+
@connection.get_cells_as_arrays(options[:table_name], scan_spec)
|
159
|
+
}
|
160
|
+
|
161
|
+
# Capture performance metrics
|
162
|
+
@@read_latency += Time.now - t1
|
163
|
+
@@cells_read += cells.length
|
164
|
+
|
165
|
+
cells
|
166
|
+
end
|
167
|
+
|
168
|
+
# Convert an options hash to a scan spec. A scan spec is native
|
169
|
+
# representation of the query parameters that must be sent to
|
170
|
+
# Hypertable.
|
171
|
+
# http://hypertable.org/thrift-api-ref/Client.html#Struct_ScanSpec
|
172
|
+
def convert_options_to_scan_spec(options={})
|
173
|
+
sanitize_conditions(options)
|
174
|
+
|
175
|
+
# Rows can be specified using a number of different options:
|
176
|
+
# :row_keys => [row_key_1, row_key_2, ...]
|
177
|
+
# :start_row and :end_row
|
178
|
+
# :row_intervals => [[start_1, end_1], [start_2, end_2]]
|
179
|
+
row_intervals = []
|
180
|
+
|
181
|
+
options[:start_inclusive] = options.has_key?(:start_inclusive) ? options[:start_inclusive] : true
|
182
|
+
options[:end_inclusive] = options.has_key?(:end_inclusive) ? options[:end_inclusive] : true
|
183
|
+
|
184
|
+
if options[:row_keys]
|
185
|
+
options[:row_keys].flatten.each do |rk|
|
186
|
+
row_intervals << [rk, rk]
|
187
|
+
end
|
188
|
+
elsif options[:row_intervals]
|
189
|
+
options[:row_intervals].each do |ri|
|
190
|
+
row_intervals << [ri.first, ri.last]
|
191
|
+
end
|
192
|
+
elsif options[:start_row]
|
193
|
+
raise "missing :end_row" if !options[:end_row]
|
194
|
+
row_intervals << [options[:start_row], options[:end_row]]
|
195
|
+
end
|
196
|
+
|
197
|
+
# Add each row interval to the scan spec
|
198
|
+
options[:row_intervals] = row_intervals.map do |row_interval|
|
199
|
+
ri = Hypertable::ThriftGen::RowInterval.new
|
200
|
+
ri.start_row = row_interval.first
|
201
|
+
ri.start_inclusive = options[:start_inclusive]
|
202
|
+
ri.end_row = row_interval.last
|
203
|
+
ri.end_inclusive = options[:end_inclusive]
|
204
|
+
ri
|
205
|
+
end
|
206
|
+
|
207
|
+
scan_spec = Hypertable::ThriftGen::ScanSpec.new
|
208
|
+
|
209
|
+
# Hypertable can store multiple revisions for each cell but this
|
210
|
+
# feature does not map into an ORM very well. By default, just
|
211
|
+
# retrieve the latest revision of each cell. Since this is most
|
212
|
+
# common config when using HyperRecord, tables should be defined
|
213
|
+
# with MAX_VERSIONS=1 at creation time to save space and reduce
|
214
|
+
# query time.
|
215
|
+
options[:revs] ||= 1
|
216
|
+
|
217
|
+
# Most of the time, we're not interested in cells that have been
|
218
|
+
# marked deleted but have not actually been deleted yet.
|
219
|
+
options[:return_deletes] ||= false
|
220
|
+
|
221
|
+
for key in options.keys
|
222
|
+
case key.to_sym
|
223
|
+
when :row_intervals
|
224
|
+
scan_spec.row_intervals = options[key]
|
225
|
+
when :cell_intervals
|
226
|
+
scan_spec.cell_intervals = options[key]
|
227
|
+
when :start_time
|
228
|
+
scan_spec.start_time = options[key]
|
229
|
+
when :end_time
|
230
|
+
scan_spec.end_time = options[key]
|
231
|
+
when :limit
|
232
|
+
scan_spec.row_limit = options[key]
|
233
|
+
when :revs
|
234
|
+
scan_spec.revs = options[key]
|
235
|
+
when :return_deletes
|
236
|
+
scan_spec.return_deletes = options[key]
|
237
|
+
when :select
|
238
|
+
# Columns listed here can only be column families (not
|
239
|
+
# column qualifiers) at this time.
|
240
|
+
requested_columns = if options[key].is_a?(String)
|
241
|
+
requested_columns_from_string(options[key])
|
242
|
+
elsif options[key].is_a?(Symbol)
|
243
|
+
requested_columns_from_string(options[key].to_s)
|
244
|
+
elsif options[key].is_a?(Array)
|
245
|
+
options[key].map{|k| k.to_s}
|
246
|
+
else
|
247
|
+
options[key]
|
248
|
+
end
|
249
|
+
|
250
|
+
scan_spec.columns = requested_columns.map do |column|
|
251
|
+
status, family, qualifier = is_qualified_column_name?(column)
|
252
|
+
family
|
253
|
+
end.uniq
|
254
|
+
when :table_name, :start_row, :end_row, :start_inclusive, :end_inclusive, :select, :columns, :row_keys, :conditions, :include, :readonly, :scan_spec, :instantiate_only_requested_columns
|
255
|
+
# ignore
|
256
|
+
else
|
257
|
+
raise "Unrecognized scan spec option: #{key}"
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
scan_spec
|
262
|
+
end
|
263
|
+
|
264
|
+
def requested_columns_from_string(s)
|
265
|
+
if s == '*'
|
266
|
+
[]
|
267
|
+
else
|
268
|
+
s.split(',').map{|s| s.strip}
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# Exceptions generated by Thrift IDL do not set a message.
|
273
|
+
# This causes a lot of problems for Rails which expects a String
|
274
|
+
# value and throws exception when it encounters NilClass.
|
275
|
+
# Unfortunately, you cannot assign a message to exceptions so define
|
276
|
+
# a singleton to accomplish same goal.
|
277
|
+
def handle_thrift_exceptions_with_missing_message
|
278
|
+
begin
|
279
|
+
yield
|
280
|
+
rescue Exception => err
|
281
|
+
if !err.message
|
282
|
+
if err.respond_to?("message=")
|
283
|
+
err.message = err.what || ''
|
284
|
+
else
|
285
|
+
def err.message
|
286
|
+
self.what || ''
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
raise err
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# Attempt to reconnect to the Thrift Broker once before aborting.
|
296
|
+
# This ensures graceful recovery in the case that the Thrift Broker
|
297
|
+
# goes down and then comes back up.
|
298
|
+
def retry_on_connection_error
|
299
|
+
@retry_on_failure = true
|
300
|
+
begin
|
301
|
+
handle_thrift_exceptions_with_missing_message { yield }
|
302
|
+
rescue Thrift::TransportException, IOError, Thrift::ApplicationException, Thrift::ProtocolException => err
|
303
|
+
if @retry_on_failure
|
304
|
+
@retry_on_failure = false
|
305
|
+
@connection.close
|
306
|
+
@connection.open
|
307
|
+
retry
|
308
|
+
else
|
309
|
+
raise err
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
# Column Operations
|
315
|
+
|
316
|
+
# Returns array of column objects for the table associated with this
|
317
|
+
# class. Hypertable allows columns to include dashes in the name.
|
318
|
+
# This doesn't play well with Ruby (can't have dashes in method names),
|
319
|
+
# so we maintain a mapping of original column names to Ruby-safe
|
320
|
+
# names.
|
321
|
+
def columns(table_name, name = nil)
|
322
|
+
# Each table always has a row key called 'ROW'
|
323
|
+
columns = [ Column.new('ROW', '') ]
|
324
|
+
|
325
|
+
schema = describe_table(table_name)
|
326
|
+
doc = REXML::Document.new(schema)
|
327
|
+
column_families = doc.each_element('Schema/AccessGroup/ColumnFamily') { |cf| cf }
|
328
|
+
|
329
|
+
@hypertable_column_names[table_name] ||= {}
|
330
|
+
for cf in column_families
|
331
|
+
# Columns are lazily-deleted in Hypertable so still may show up
|
332
|
+
# in describe table output. Ignore.
|
333
|
+
deleted = cf.elements['deleted'].text
|
334
|
+
next if deleted == 'true'
|
335
|
+
|
336
|
+
column_name = cf.elements['Name'].text
|
337
|
+
rubified_name = rubify_column_name(column_name)
|
338
|
+
@hypertable_column_names[table_name][rubified_name] = column_name
|
339
|
+
columns << new_column(rubified_name, '')
|
340
|
+
end
|
341
|
+
|
342
|
+
columns
|
343
|
+
end
|
344
|
+
|
345
|
+
def remove_column_from_name_map(table_name, name)
|
346
|
+
@hypertable_column_names[table_name].delete(rubify_column_name(name))
|
347
|
+
end
|
348
|
+
|
349
|
+
def add_column_to_name_map(table_name, name)
|
350
|
+
@hypertable_column_names[table_name][rubify_column_name(name)] = name
|
351
|
+
end
|
352
|
+
|
353
|
+
def add_qualified_column(table_name, column_family, qualifiers=[], default='', sql_type=nil, null=true)
|
354
|
+
qc = QualifiedColumn.new(column_family, default, sql_type, null)
|
355
|
+
qc.qualifiers = qualifiers
|
356
|
+
qualifiers.each{|q| add_column_to_name_map(table_name, qualified_column_name(column_family, q))}
|
357
|
+
qc
|
358
|
+
end
|
359
|
+
|
360
|
+
def new_column(column_name, default_value='')
|
361
|
+
Column.new(rubify_column_name(column_name), default_value)
|
362
|
+
end
|
363
|
+
|
364
|
+
def qualified_column_name(column_family, qualifier=nil)
|
365
|
+
[column_family, qualifier].compact.join(':')
|
366
|
+
end
|
367
|
+
|
368
|
+
def rubify_column_name(column_name)
|
369
|
+
column_name.to_s.gsub(/-+/, '_')
|
370
|
+
end
|
371
|
+
|
372
|
+
def is_qualified_column_name?(column_name)
|
373
|
+
column_family, qualifier = column_name.split(':', 2)
|
374
|
+
if qualifier
|
375
|
+
[true, column_family, qualifier]
|
376
|
+
else
|
377
|
+
[false, column_name, nil]
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
# Schema alterations
|
382
|
+
|
383
|
+
def rename_column(table_name, column_name, new_column_name)
|
384
|
+
raise "rename_column operation not supported by Hypertable."
|
385
|
+
end
|
386
|
+
|
387
|
+
def change_column(table_name, column_name, new_column_name)
|
388
|
+
raise "change_column operation not supported by Hypertable."
|
389
|
+
end
|
390
|
+
|
391
|
+
# Translate "sexy" ActiveRecord::Migration syntax to an HQL
|
392
|
+
# CREATE TABLE statement.
|
393
|
+
def create_table_hql(table_name, options={}, &block)
|
394
|
+
table_definition = HyperTableDefinition.new(self)
|
395
|
+
|
396
|
+
yield table_definition
|
397
|
+
|
398
|
+
if options[:force] && table_exists?(table_name)
|
399
|
+
drop_table(table_name, options)
|
400
|
+
end
|
401
|
+
|
402
|
+
create_sql = [ "CREATE TABLE #{quote_table_name(table_name)} (" ]
|
403
|
+
column_sql = []
|
404
|
+
for col in table_definition.columns
|
405
|
+
column_sql << [
|
406
|
+
quote_table_name(col.name),
|
407
|
+
col.max_versions ? "MAX_VERSIONS=#{col.max_versions}" : ''
|
408
|
+
].join(' ')
|
409
|
+
end
|
410
|
+
create_sql << column_sql.join(', ')
|
411
|
+
|
412
|
+
create_sql << ") #{options[:options]}"
|
413
|
+
create_sql.join(' ').strip
|
414
|
+
end
|
415
|
+
|
416
|
+
def create_table(table_name, options={}, &block)
|
417
|
+
execute(create_table_hql(table_name, options, &block))
|
418
|
+
end
|
419
|
+
|
420
|
+
def drop_table(table_name, options = {})
|
421
|
+
retry_on_connection_error {
|
422
|
+
@connection.drop_table(table_name, options[:if_exists] || false)
|
423
|
+
}
|
424
|
+
end
|
425
|
+
|
426
|
+
def rename_table(table_name, options = {})
|
427
|
+
raise "rename_table operation not supported by Hypertable."
|
428
|
+
end
|
429
|
+
|
430
|
+
def change_column_default(table_name, column_name, default)
|
431
|
+
raise "change_column_default operation not supported by Hypertable."
|
432
|
+
end
|
433
|
+
|
434
|
+
def change_column_null(table_name, column_name, null, default = nil)
|
435
|
+
raise "change_column_null operation not supported by Hypertable."
|
436
|
+
end
|
437
|
+
|
438
|
+
def add_column(table_name, column_name, type=:string, options = {})
|
439
|
+
hql = [ "ALTER TABLE #{quote_table_name(table_name)} ADD (" ]
|
440
|
+
hql << quote_column_name(column_name)
|
441
|
+
hql << "MAX_VERSIONS=#{options[:max_versions]}" if !options[:max_versions].blank?
|
442
|
+
hql << ")"
|
443
|
+
execute(hql.join(' '))
|
444
|
+
end
|
445
|
+
|
446
|
+
def add_column_options!(hql, options)
|
447
|
+
hql << " MAX_VERSIONS =1 #{quote(options[:default], options[:column])}" if options_include_default?(options)
|
448
|
+
# must explicitly check for :null to allow change_column to work on migrations
|
449
|
+
if options[:null] == false
|
450
|
+
hql << " NOT NULL"
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
def remove_column(table_name, *column_names)
|
455
|
+
column_names.flatten.each do |column_name|
|
456
|
+
execute "ALTER TABLE #{quote_table_name(table_name)} DROP(#{quote_column_name(column_name)})"
|
457
|
+
end
|
458
|
+
end
|
459
|
+
alias :remove_columns :remove_column
|
460
|
+
|
461
|
+
def quote(value, column = nil)
|
462
|
+
case value
|
463
|
+
when NilClass then ''
|
464
|
+
when String then value
|
465
|
+
else super(value, column)
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
def quote_column_name(name)
|
470
|
+
"'#{name}'"
|
471
|
+
end
|
472
|
+
|
473
|
+
def quote_column_name_for_table(name, table_name)
|
474
|
+
quote_column_name(hypertable_column_name(name, table_name))
|
475
|
+
end
|
476
|
+
|
477
|
+
def hypertable_column_name(name, table_name, declared_columns_only=false)
|
478
|
+
begin
|
479
|
+
columns(table_name) if @hypertable_column_names[table_name].blank?
|
480
|
+
n = @hypertable_column_names[table_name][name]
|
481
|
+
n ||= name if !declared_columns_only
|
482
|
+
n
|
483
|
+
rescue Exception => err
|
484
|
+
raise [
|
485
|
+
"hypertable_column_name exception",
|
486
|
+
err.message,
|
487
|
+
"table: #{table_name}",
|
488
|
+
"column: #{name}",
|
489
|
+
"@htcn: #{pp @hypertable_column_names}"
|
490
|
+
].join("\n")
|
491
|
+
end
|
492
|
+
end
|
493
|
+
|
494
|
+
# Return an XML document describing the table named in the first
|
495
|
+
# argument. Output is equivalent to that returned by the DESCRIBE
|
496
|
+
# TABLE command available in the Hypertable CLI.
|
497
|
+
# <Schema generation="2">
|
498
|
+
# <AccessGroup name="default">
|
499
|
+
# <ColumnFamily id="1">
|
500
|
+
# <Generation>1</Generation>
|
501
|
+
# <Name>date</Name>
|
502
|
+
# <deleted>false</deleted>
|
503
|
+
# </ColumnFamily>
|
504
|
+
# </AccessGroup>
|
505
|
+
# </Schema>
|
506
|
+
def describe_table(table_name)
|
507
|
+
retry_on_connection_error {
|
508
|
+
@connection.get_schema(table_name)
|
509
|
+
}
|
510
|
+
end
|
511
|
+
|
512
|
+
# Returns an array of tables available in the current Hypertable
|
513
|
+
# instance.
|
514
|
+
def tables(name=nil)
|
515
|
+
retry_on_connection_error {
|
516
|
+
@connection.get_tables
|
517
|
+
}
|
518
|
+
end
|
519
|
+
|
520
|
+
# Write an array of cells to the named table. By default, write_cells
|
521
|
+
# will open and close a mutator for this operation. Closing the
|
522
|
+
# mutator flushes the data, which guarantees is it is stored in
|
523
|
+
# Hypertable before the call returns. This also slows down the
|
524
|
+
# operation, so if you're doing lots of writes and want to manage
|
525
|
+
# mutator flushes at the application layer then you can pass in a
|
526
|
+
# mutator as argument. Mutators can be created with the open_mutator
|
527
|
+
# method. In the near future (Summer 2009), Hypertable will provide
|
528
|
+
# a periodic mutator that automatically flushes at specific intervals.
|
529
|
+
def write_cells(table_name, cells, mutator=nil, flags=nil, flush_interval=nil)
|
530
|
+
return if cells.blank?
|
531
|
+
|
532
|
+
retry_on_connection_error {
|
533
|
+
local_mutator_created = !mutator
|
534
|
+
|
535
|
+
begin
|
536
|
+
t1 = Time.now
|
537
|
+
mutator ||= open_mutator(table_name, flags, flush_interval)
|
538
|
+
@connection.set_cells_as_arrays(mutator, cells)
|
539
|
+
ensure
|
540
|
+
if local_mutator_created && mutator
|
541
|
+
close_mutator(mutator)
|
542
|
+
mutator = nil
|
543
|
+
end
|
544
|
+
@@write_latency += Time.now - t1
|
545
|
+
end
|
546
|
+
}
|
547
|
+
end
|
548
|
+
|
549
|
+
# Return a Hypertable::ThriftGen::Cell object from a cell passed in
|
550
|
+
# as an array of format: [row_key, column_name, value]
|
551
|
+
# Hypertable::ThriftGen::Cell objects are required when setting a flag
|
552
|
+
# on write - used by special operations (e.g,. delete )
|
553
|
+
def thrift_cell_from_native_array(array)
|
554
|
+
cell = Hypertable::ThriftGen::Cell.new
|
555
|
+
cell.row_key = array[0]
|
556
|
+
cell.column_family = array[1]
|
557
|
+
cell.column_qualifier = array[2] if !array[2].blank?
|
558
|
+
cell.value = array[3] if array[3]
|
559
|
+
cell.timestamp = array[4] if array[4]
|
560
|
+
cell
|
561
|
+
end
|
562
|
+
|
563
|
+
# Create native array format for cell. Most HyperRecord operations
|
564
|
+
# deal with cells in native array format since operations on an
|
565
|
+
# array are much faster than operations on Hypertable::ThriftGen::Cell
|
566
|
+
# objects.
|
567
|
+
# ["row_key", "column_family", "column_qualifier", "value"],
|
568
|
+
def cell_native_array(row_key, column_family, column_qualifier, value=nil, timestamp=nil)
|
569
|
+
[
|
570
|
+
row_key.to_s,
|
571
|
+
column_family.to_s,
|
572
|
+
column_qualifier.to_s,
|
573
|
+
value.to_s
|
574
|
+
].map do |s|
|
575
|
+
s.respond_to?(:force_encoding) ? s.force_encoding('ascii-8bit') : s
|
576
|
+
end
|
577
|
+
end
|
578
|
+
|
579
|
+
# Delete cells from a table.
|
580
|
+
def delete_cells(table_name, cells)
|
581
|
+
t1 = Time.now
|
582
|
+
|
583
|
+
retry_on_connection_error {
|
584
|
+
@connection.with_mutator(table_name) do |mutator|
|
585
|
+
thrift_cells = cells.map{|c|
|
586
|
+
cell = thrift_cell_from_native_array(c)
|
587
|
+
cell.flag = Hypertable::ThriftGen::CellFlag::DELETE_CELL
|
588
|
+
cell
|
589
|
+
}
|
590
|
+
@connection.set_cells(mutator, thrift_cells)
|
591
|
+
end
|
592
|
+
}
|
593
|
+
|
594
|
+
@@write_latency += Time.now - t1
|
595
|
+
end
|
596
|
+
|
597
|
+
# Delete rows from a table.
|
598
|
+
def delete_rows(table_name, row_keys)
|
599
|
+
t1 = Time.now
|
600
|
+
cells = row_keys.map do |row_key|
|
601
|
+
cell = Hypertable::ThriftGen::Cell.new
|
602
|
+
cell.row_key = row_key
|
603
|
+
cell.flag = Hypertable::ThriftGen::CellFlag::DELETE_ROW
|
604
|
+
cell
|
605
|
+
end
|
606
|
+
|
607
|
+
retry_on_connection_error {
|
608
|
+
@connection.with_mutator(table_name) do |mutator|
|
609
|
+
@connection.set_cells(mutator, cells)
|
610
|
+
end
|
611
|
+
}
|
612
|
+
|
613
|
+
@@write_latency += Time.now - t1
|
614
|
+
end
|
615
|
+
|
616
|
+
# Insert a test fixture into a table.
|
617
|
+
def insert_fixture(fixture, table_name)
|
618
|
+
fixture_hash = fixture.to_hash
|
619
|
+
timestamp = fixture_hash.delete('timestamp')
|
620
|
+
row_key = fixture_hash.delete('ROW')
|
621
|
+
cells = []
|
622
|
+
fixture_hash.keys.each do |k|
|
623
|
+
column_name, column_family = k.split(':', 2)
|
624
|
+
cells << cell_native_array(row_key, column_name, column_family, fixture_hash[k], timestamp)
|
625
|
+
end
|
626
|
+
write_cells(table_name, cells)
|
627
|
+
end
|
628
|
+
|
629
|
+
# Mutator methods
|
630
|
+
|
631
|
+
def open_mutator(table_name, flags=0, flush_interval=0)
|
632
|
+
@connection.open_mutator(table_name, flags, flush_interval)
|
633
|
+
end
|
634
|
+
|
635
|
+
# Flush is always called in a mutator's destructor due to recent
|
636
|
+
# no_log_sync changes. Adding an explicit flush here just adds
|
637
|
+
# one round trip for an extra flush call, so change the default to
|
638
|
+
# flush=0. Consider removing this argument and always sending 0.
|
639
|
+
def close_mutator(mutator, flush=0)
|
640
|
+
@connection.close_mutator(mutator, flush)
|
641
|
+
end
|
642
|
+
|
643
|
+
def flush_mutator(mutator)
|
644
|
+
@connection.flush_mutator(mutator)
|
645
|
+
end
|
646
|
+
|
647
|
+
# Scanner methods
|
648
|
+
|
649
|
+
def open_scanner(table_name, scan_spec)
|
650
|
+
@connection.open_scanner(table_name, scan_spec, true)
|
651
|
+
end
|
652
|
+
|
653
|
+
def close_scanner(scanner)
|
654
|
+
@connection.close_scanner(scanner)
|
655
|
+
end
|
656
|
+
|
657
|
+
def with_scanner(table_name, scan_spec, &block)
|
658
|
+
@connection.with_scanner(table_name, scan_spec, &block)
|
659
|
+
end
|
660
|
+
|
661
|
+
# Iterator methods
|
662
|
+
|
663
|
+
def each_cell(scanner, &block)
|
664
|
+
@connection.each_cell(scanner, &block)
|
665
|
+
end
|
666
|
+
|
667
|
+
def each_cell_as_arrays(scanner, &block)
|
668
|
+
@connection.each_cell_as_arrays(scanner, &block)
|
669
|
+
end
|
670
|
+
|
671
|
+
def each_row(scanner, &block)
|
672
|
+
@connection.each_row(scanner, &block)
|
673
|
+
end
|
674
|
+
|
675
|
+
def each_row_as_arrays(scanner, &block)
|
676
|
+
@connection.each_row_as_arrays(scanner, &block)
|
677
|
+
end
|
678
|
+
end
|
679
|
+
end
|
680
|
+
end
|