hyper_record 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/CHANGELOG +83 -0
- data/LICENSE +20 -0
- data/README +49 -0
- data/Rakefile +43 -0
- data/VERSION.yml +4 -0
- data/benchmark/save.rb +58 -0
- data/hyper_record.gemspec +76 -0
- data/init.rb +1 -0
- data/lib/active_record/connection_adapters/hyper_table_definition.rb +26 -0
- data/lib/active_record/connection_adapters/hypertable_adapter.rb +680 -0
- data/lib/active_record/connection_adapters/qualified_column.rb +57 -0
- data/lib/associations/hyper_has_and_belongs_to_many_association_extension.rb +107 -0
- data/lib/associations/hyper_has_many_association_extension.rb +87 -0
- data/lib/hyper_record.rb +636 -0
- data/lib/hypertable/gen-rb/client_constants.rb +12 -0
- data/lib/hypertable/gen-rb/client_service.rb +1436 -0
- data/lib/hypertable/gen-rb/client_types.rb +253 -0
- data/lib/hypertable/gen-rb/hql_constants.rb +12 -0
- data/lib/hypertable/gen-rb/hql_service.rb +281 -0
- data/lib/hypertable/gen-rb/hql_types.rb +73 -0
- data/lib/hypertable/thrift_client.rb +94 -0
- data/lib/hypertable/thrift_transport_monkey_patch.rb +29 -0
- data/pkg/hyper_record-0.2.8.gem +0 -0
- data/spec/fixtures/pages.yml +8 -0
- data/spec/fixtures/qualified_pages.yml +1 -0
- data/spec/lib/associations_spec.rb +235 -0
- data/spec/lib/hyper_record_spec.rb +948 -0
- data/spec/lib/hypertable_adapter_spec.rb +121 -0
- data/spec/spec_helper.rb +130 -0
- data/test/test_helper.rb +10 -0
- data/test/thrift_client_test.rb +590 -0
- metadata +99 -0
@@ -0,0 +1,680 @@
|
|
1
|
+
# For each supported data store, ActiveRecord has an adapter that implements
|
2
|
+
# functionality specific to that store as well as providing metadata for
|
3
|
+
# data held within the store. Features implemented by adapters typically
|
4
|
+
# include connection handling, listings metadata (tables and schema),
|
5
|
+
# statement execution (selects, writes, etc.), latency measurement, fixture
|
6
|
+
# handling.
|
7
|
+
#
|
8
|
+
# This file implements the adapter for Hypertable used by ActiveRecord
|
9
|
+
# (HyperRecord). The adapter communicates with Hypertable using the
|
10
|
+
# Thrift client API documented here:
|
11
|
+
# http://hypertable.org/thrift-api-ref/index.html
|
12
|
+
#
|
13
|
+
# Refer to the main hypertable site (http://hypertable.org/) for additional
|
14
|
+
# information and documentation (http://hypertable.org/documentation.html)
|
15
|
+
# on Hypertable and the Thrift client API.
|
16
|
+
|
17
|
+
unless defined?(ActiveRecord::ConnectionAdapters::AbstractAdapter)
|
18
|
+
# running into some situations where rails has already loaded this, without
|
19
|
+
# require realizing it, and loading again is unsafe (alias_method_chain is a
|
20
|
+
# great way to create infinite recursion loops)
|
21
|
+
require 'active_record/connection_adapters/abstract_adapter'
|
22
|
+
end
|
23
|
+
require 'active_record/connection_adapters/qualified_column'
|
24
|
+
require 'active_record/connection_adapters/hyper_table_definition'
|
25
|
+
|
26
|
+
module ActiveRecord
|
27
|
+
class Base
|
28
|
+
# Include the thrift driver if one hasn't already been loaded
|
29
|
+
def self.require_hypertable_thrift_client
|
30
|
+
unless defined? Hypertable::ThriftClient
|
31
|
+
gem 'hypertable-thrift-client'
|
32
|
+
require_dependency 'thrift_client'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Establishes a connection to the Thrift Broker (which brokers requests
|
37
|
+
# to Hypertable itself. The connection details must appear in
|
38
|
+
# config/database.yml. e.g.,
|
39
|
+
# hypertable_dev:
|
40
|
+
# host: localhost
|
41
|
+
# port: 38088
|
42
|
+
# timeout: 20000
|
43
|
+
#
|
44
|
+
# Options:
|
45
|
+
# * <tt>:host</tt> - Defaults to localhost
|
46
|
+
# * <tt>:port</tt> - Defaults to 38088
|
47
|
+
# * <tt>:timeout</tt> - Timeout for queries in milliseconds. Defaults to 20000
|
48
|
+
def self.hypertable_connection(config)
|
49
|
+
config = config.symbolize_keys
|
50
|
+
require_hypertable_thrift_client
|
51
|
+
|
52
|
+
raise "Hypertable config missing :host in database.yml" if !config[:host]
|
53
|
+
|
54
|
+
config[:host] ||= 'localhost'
|
55
|
+
config[:port] ||= 38088
|
56
|
+
config[:timeout] ||= 20000
|
57
|
+
|
58
|
+
connection = Hypertable::ThriftClient.new(config[:host], config[:port],
|
59
|
+
config[:timeout])
|
60
|
+
|
61
|
+
ConnectionAdapters::HypertableAdapter.new(connection, logger, config)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
module ConnectionAdapters
|
66
|
+
class HypertableAdapter < AbstractAdapter
|
67
|
+
# Following cattr_accessors are used to record and access query
|
68
|
+
# performance statistics.
|
69
|
+
@@read_latency = 0.0
|
70
|
+
@@write_latency = 0.0
|
71
|
+
@@cells_read = 0
|
72
|
+
cattr_accessor :read_latency, :write_latency, :cells_read
|
73
|
+
|
74
|
+
# Used by retry_on_connection_error() to determine whether to retry
|
75
|
+
@retry_on_failure = true
|
76
|
+
attr_accessor :retry_on_failure
|
77
|
+
|
78
|
+
def initialize(connection, logger, config)
|
79
|
+
super(connection, logger)
|
80
|
+
@config = config
|
81
|
+
@hypertable_column_names = {}
|
82
|
+
end
|
83
|
+
|
84
|
+
def raw_thrift_client(&block)
|
85
|
+
Hypertable.with_thrift_client(@config[:host], @config[:port],
|
86
|
+
@config[:timeout], &block)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Return the current set of performance statistics. The application
|
90
|
+
# can retrieve (and reset) these statistics after every query or
|
91
|
+
# request for its own logging purposes.
|
92
|
+
def self.get_timing
|
93
|
+
[@@read_latency, @@write_latency, @@cells_read]
|
94
|
+
end
|
95
|
+
|
96
|
+
# Reset performance metrics.
|
97
|
+
def self.reset_timing
|
98
|
+
@@read_latency = 0.0
|
99
|
+
@@write_latency = 0.0
|
100
|
+
@@cells_read = 0
|
101
|
+
end
|
102
|
+
|
103
|
+
def adapter_name
|
104
|
+
'Hypertable'
|
105
|
+
end
|
106
|
+
|
107
|
+
def supports_migrations?
|
108
|
+
true
|
109
|
+
end
|
110
|
+
|
111
|
+
# Hypertable only supports string types at the moment, so treat
|
112
|
+
# all values as strings and leave it to the application to handle
|
113
|
+
# types.
|
114
|
+
def native_database_types
|
115
|
+
{
|
116
|
+
:string => { :name => "varchar", :limit => 255 }
|
117
|
+
}
|
118
|
+
end
|
119
|
+
|
120
|
+
def sanitize_conditions(options)
|
121
|
+
case options[:conditions]
|
122
|
+
when Hash
|
123
|
+
# requires Hypertable API to support query by arbitrary cell value
|
124
|
+
raise "HyperRecord does not support specifying conditions by Hash"
|
125
|
+
when NilClass
|
126
|
+
# do nothing
|
127
|
+
else
|
128
|
+
raise "Only hash conditions are supported"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Execute an HQL query against Hypertable and return the native
|
133
|
+
# HqlResult object that comes back from the Thrift client API.
|
134
|
+
def execute(hql, name=nil)
|
135
|
+
log(hql, name) {
|
136
|
+
retry_on_connection_error { @connection.hql_query(hql) }
|
137
|
+
}
|
138
|
+
end
|
139
|
+
|
140
|
+
# Execute a query against Hypertable and return the matching cells.
|
141
|
+
# The query parameters are denoted in an options hash, which is
|
142
|
+
# converted to a "scan spec" by convert_options_to_scan_spec.
|
143
|
+
# A "scan spec" is the mechanism used to specify query parameters
|
144
|
+
# (e.g., the columns to retrieve, the number of rows to retrieve, etc.)
|
145
|
+
# to Hypertable.
|
146
|
+
def execute_with_options(options)
|
147
|
+
scan_spec = convert_options_to_scan_spec(options)
|
148
|
+
t1 = Time.now
|
149
|
+
|
150
|
+
# Use native array method (get_cells_as_arrays) for cell retrieval -
|
151
|
+
# much faster than get_cells that returns Hypertable::ThriftGen::Cell
|
152
|
+
# objects.
|
153
|
+
# [
|
154
|
+
# ["page_1", "name", "", "LOLcats and more", "1237331693147619001"],
|
155
|
+
# ["page_1", "url", "", "http://...", "1237331693147619002"]
|
156
|
+
# ]
|
157
|
+
cells = retry_on_connection_error {
|
158
|
+
@connection.get_cells_as_arrays(options[:table_name], scan_spec)
|
159
|
+
}
|
160
|
+
|
161
|
+
# Capture performance metrics
|
162
|
+
@@read_latency += Time.now - t1
|
163
|
+
@@cells_read += cells.length
|
164
|
+
|
165
|
+
cells
|
166
|
+
end
|
167
|
+
|
168
|
+
# Convert an options hash to a scan spec. A scan spec is native
|
169
|
+
# representation of the query parameters that must be sent to
|
170
|
+
# Hypertable.
|
171
|
+
# http://hypertable.org/thrift-api-ref/Client.html#Struct_ScanSpec
|
172
|
+
def convert_options_to_scan_spec(options={})
|
173
|
+
sanitize_conditions(options)
|
174
|
+
|
175
|
+
# Rows can be specified using a number of different options:
|
176
|
+
# :row_keys => [row_key_1, row_key_2, ...]
|
177
|
+
# :start_row and :end_row
|
178
|
+
# :row_intervals => [[start_1, end_1], [start_2, end_2]]
|
179
|
+
row_intervals = []
|
180
|
+
|
181
|
+
options[:start_inclusive] = options.has_key?(:start_inclusive) ? options[:start_inclusive] : true
|
182
|
+
options[:end_inclusive] = options.has_key?(:end_inclusive) ? options[:end_inclusive] : true
|
183
|
+
|
184
|
+
if options[:row_keys]
|
185
|
+
options[:row_keys].flatten.each do |rk|
|
186
|
+
row_intervals << [rk, rk]
|
187
|
+
end
|
188
|
+
elsif options[:row_intervals]
|
189
|
+
options[:row_intervals].each do |ri|
|
190
|
+
row_intervals << [ri.first, ri.last]
|
191
|
+
end
|
192
|
+
elsif options[:start_row]
|
193
|
+
raise "missing :end_row" if !options[:end_row]
|
194
|
+
row_intervals << [options[:start_row], options[:end_row]]
|
195
|
+
end
|
196
|
+
|
197
|
+
# Add each row interval to the scan spec
|
198
|
+
options[:row_intervals] = row_intervals.map do |row_interval|
|
199
|
+
ri = Hypertable::ThriftGen::RowInterval.new
|
200
|
+
ri.start_row = row_interval.first
|
201
|
+
ri.start_inclusive = options[:start_inclusive]
|
202
|
+
ri.end_row = row_interval.last
|
203
|
+
ri.end_inclusive = options[:end_inclusive]
|
204
|
+
ri
|
205
|
+
end
|
206
|
+
|
207
|
+
scan_spec = Hypertable::ThriftGen::ScanSpec.new
|
208
|
+
|
209
|
+
# Hypertable can store multiple revisions for each cell but this
|
210
|
+
# feature does not map into an ORM very well. By default, just
|
211
|
+
# retrieve the latest revision of each cell. Since this is most
|
212
|
+
# common config when using HyperRecord, tables should be defined
|
213
|
+
# with MAX_VERSIONS=1 at creation time to save space and reduce
|
214
|
+
# query time.
|
215
|
+
options[:revs] ||= 1
|
216
|
+
|
217
|
+
# Most of the time, we're not interested in cells that have been
|
218
|
+
# marked deleted but have not actually been deleted yet.
|
219
|
+
options[:return_deletes] ||= false
|
220
|
+
|
221
|
+
for key in options.keys
|
222
|
+
case key.to_sym
|
223
|
+
when :row_intervals
|
224
|
+
scan_spec.row_intervals = options[key]
|
225
|
+
when :cell_intervals
|
226
|
+
scan_spec.cell_intervals = options[key]
|
227
|
+
when :start_time
|
228
|
+
scan_spec.start_time = options[key]
|
229
|
+
when :end_time
|
230
|
+
scan_spec.end_time = options[key]
|
231
|
+
when :limit
|
232
|
+
scan_spec.row_limit = options[key]
|
233
|
+
when :revs
|
234
|
+
scan_spec.revs = options[key]
|
235
|
+
when :return_deletes
|
236
|
+
scan_spec.return_deletes = options[key]
|
237
|
+
when :select
|
238
|
+
# Columns listed here can only be column families (not
|
239
|
+
# column qualifiers) at this time.
|
240
|
+
requested_columns = if options[key].is_a?(String)
|
241
|
+
requested_columns_from_string(options[key])
|
242
|
+
elsif options[key].is_a?(Symbol)
|
243
|
+
requested_columns_from_string(options[key].to_s)
|
244
|
+
elsif options[key].is_a?(Array)
|
245
|
+
options[key].map{|k| k.to_s}
|
246
|
+
else
|
247
|
+
options[key]
|
248
|
+
end
|
249
|
+
|
250
|
+
scan_spec.columns = requested_columns.map do |column|
|
251
|
+
status, family, qualifier = is_qualified_column_name?(column)
|
252
|
+
family
|
253
|
+
end.uniq
|
254
|
+
when :table_name, :start_row, :end_row, :start_inclusive, :end_inclusive, :select, :columns, :row_keys, :conditions, :include, :readonly, :scan_spec, :instantiate_only_requested_columns
|
255
|
+
# ignore
|
256
|
+
else
|
257
|
+
raise "Unrecognized scan spec option: #{key}"
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
scan_spec
|
262
|
+
end
|
263
|
+
|
264
|
+
def requested_columns_from_string(s)
|
265
|
+
if s == '*'
|
266
|
+
[]
|
267
|
+
else
|
268
|
+
s.split(',').map{|s| s.strip}
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# Exceptions generated by Thrift IDL do not set a message.
|
273
|
+
# This causes a lot of problems for Rails which expects a String
|
274
|
+
# value and throws exception when it encounters NilClass.
|
275
|
+
# Unfortunately, you cannot assign a message to exceptions so define
|
276
|
+
# a singleton to accomplish same goal.
|
277
|
+
def handle_thrift_exceptions_with_missing_message
|
278
|
+
begin
|
279
|
+
yield
|
280
|
+
rescue Exception => err
|
281
|
+
if !err.message
|
282
|
+
if err.respond_to?("message=")
|
283
|
+
err.message = err.what || ''
|
284
|
+
else
|
285
|
+
def err.message
|
286
|
+
self.what || ''
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
raise err
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# Attempt to reconnect to the Thrift Broker once before aborting.
|
296
|
+
# This ensures graceful recovery in the case that the Thrift Broker
|
297
|
+
# goes down and then comes back up.
|
298
|
+
def retry_on_connection_error
|
299
|
+
@retry_on_failure = true
|
300
|
+
begin
|
301
|
+
handle_thrift_exceptions_with_missing_message { yield }
|
302
|
+
rescue Thrift::TransportException, IOError, Thrift::ApplicationException, Thrift::ProtocolException => err
|
303
|
+
if @retry_on_failure
|
304
|
+
@retry_on_failure = false
|
305
|
+
@connection.close
|
306
|
+
@connection.open
|
307
|
+
retry
|
308
|
+
else
|
309
|
+
raise err
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
# Column Operations
|
315
|
+
|
316
|
+
# Returns array of column objects for the table associated with this
|
317
|
+
# class. Hypertable allows columns to include dashes in the name.
|
318
|
+
# This doesn't play well with Ruby (can't have dashes in method names),
|
319
|
+
# so we maintain a mapping of original column names to Ruby-safe
|
320
|
+
# names.
|
321
|
+
def columns(table_name, name = nil)
|
322
|
+
# Each table always has a row key called 'ROW'
|
323
|
+
columns = [ Column.new('ROW', '') ]
|
324
|
+
|
325
|
+
schema = describe_table(table_name)
|
326
|
+
doc = REXML::Document.new(schema)
|
327
|
+
column_families = doc.each_element('Schema/AccessGroup/ColumnFamily') { |cf| cf }
|
328
|
+
|
329
|
+
@hypertable_column_names[table_name] ||= {}
|
330
|
+
for cf in column_families
|
331
|
+
# Columns are lazily-deleted in Hypertable so still may show up
|
332
|
+
# in describe table output. Ignore.
|
333
|
+
deleted = cf.elements['deleted'].text
|
334
|
+
next if deleted == 'true'
|
335
|
+
|
336
|
+
column_name = cf.elements['Name'].text
|
337
|
+
rubified_name = rubify_column_name(column_name)
|
338
|
+
@hypertable_column_names[table_name][rubified_name] = column_name
|
339
|
+
columns << new_column(rubified_name, '')
|
340
|
+
end
|
341
|
+
|
342
|
+
columns
|
343
|
+
end
|
344
|
+
|
345
|
+
def remove_column_from_name_map(table_name, name)
|
346
|
+
@hypertable_column_names[table_name].delete(rubify_column_name(name))
|
347
|
+
end
|
348
|
+
|
349
|
+
def add_column_to_name_map(table_name, name)
|
350
|
+
@hypertable_column_names[table_name][rubify_column_name(name)] = name
|
351
|
+
end
|
352
|
+
|
353
|
+
def add_qualified_column(table_name, column_family, qualifiers=[], default='', sql_type=nil, null=true)
|
354
|
+
qc = QualifiedColumn.new(column_family, default, sql_type, null)
|
355
|
+
qc.qualifiers = qualifiers
|
356
|
+
qualifiers.each{|q| add_column_to_name_map(table_name, qualified_column_name(column_family, q))}
|
357
|
+
qc
|
358
|
+
end
|
359
|
+
|
360
|
+
def new_column(column_name, default_value='')
|
361
|
+
Column.new(rubify_column_name(column_name), default_value)
|
362
|
+
end
|
363
|
+
|
364
|
+
def qualified_column_name(column_family, qualifier=nil)
|
365
|
+
[column_family, qualifier].compact.join(':')
|
366
|
+
end
|
367
|
+
|
368
|
+
def rubify_column_name(column_name)
|
369
|
+
column_name.to_s.gsub(/-+/, '_')
|
370
|
+
end
|
371
|
+
|
372
|
+
def is_qualified_column_name?(column_name)
|
373
|
+
column_family, qualifier = column_name.split(':', 2)
|
374
|
+
if qualifier
|
375
|
+
[true, column_family, qualifier]
|
376
|
+
else
|
377
|
+
[false, column_name, nil]
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
# Schema alterations
|
382
|
+
|
383
|
+
def rename_column(table_name, column_name, new_column_name)
|
384
|
+
raise "rename_column operation not supported by Hypertable."
|
385
|
+
end
|
386
|
+
|
387
|
+
def change_column(table_name, column_name, new_column_name)
|
388
|
+
raise "change_column operation not supported by Hypertable."
|
389
|
+
end
|
390
|
+
|
391
|
+
# Translate "sexy" ActiveRecord::Migration syntax to an HQL
|
392
|
+
# CREATE TABLE statement.
|
393
|
+
def create_table_hql(table_name, options={}, &block)
|
394
|
+
table_definition = HyperTableDefinition.new(self)
|
395
|
+
|
396
|
+
yield table_definition
|
397
|
+
|
398
|
+
if options[:force] && table_exists?(table_name)
|
399
|
+
drop_table(table_name, options)
|
400
|
+
end
|
401
|
+
|
402
|
+
create_sql = [ "CREATE TABLE #{quote_table_name(table_name)} (" ]
|
403
|
+
column_sql = []
|
404
|
+
for col in table_definition.columns
|
405
|
+
column_sql << [
|
406
|
+
quote_table_name(col.name),
|
407
|
+
col.max_versions ? "MAX_VERSIONS=#{col.max_versions}" : ''
|
408
|
+
].join(' ')
|
409
|
+
end
|
410
|
+
create_sql << column_sql.join(', ')
|
411
|
+
|
412
|
+
create_sql << ") #{options[:options]}"
|
413
|
+
create_sql.join(' ').strip
|
414
|
+
end
|
415
|
+
|
416
|
+
def create_table(table_name, options={}, &block)
|
417
|
+
execute(create_table_hql(table_name, options, &block))
|
418
|
+
end
|
419
|
+
|
420
|
+
def drop_table(table_name, options = {})
|
421
|
+
retry_on_connection_error {
|
422
|
+
@connection.drop_table(table_name, options[:if_exists] || false)
|
423
|
+
}
|
424
|
+
end
|
425
|
+
|
426
|
+
def rename_table(table_name, options = {})
|
427
|
+
raise "rename_table operation not supported by Hypertable."
|
428
|
+
end
|
429
|
+
|
430
|
+
def change_column_default(table_name, column_name, default)
|
431
|
+
raise "change_column_default operation not supported by Hypertable."
|
432
|
+
end
|
433
|
+
|
434
|
+
def change_column_null(table_name, column_name, null, default = nil)
|
435
|
+
raise "change_column_null operation not supported by Hypertable."
|
436
|
+
end
|
437
|
+
|
438
|
+
def add_column(table_name, column_name, type=:string, options = {})
|
439
|
+
hql = [ "ALTER TABLE #{quote_table_name(table_name)} ADD (" ]
|
440
|
+
hql << quote_column_name(column_name)
|
441
|
+
hql << "MAX_VERSIONS=#{options[:max_versions]}" if !options[:max_versions].blank?
|
442
|
+
hql << ")"
|
443
|
+
execute(hql.join(' '))
|
444
|
+
end
|
445
|
+
|
446
|
+
def add_column_options!(hql, options)
|
447
|
+
hql << " MAX_VERSIONS =1 #{quote(options[:default], options[:column])}" if options_include_default?(options)
|
448
|
+
# must explicitly check for :null to allow change_column to work on migrations
|
449
|
+
if options[:null] == false
|
450
|
+
hql << " NOT NULL"
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
def remove_column(table_name, *column_names)
|
455
|
+
column_names.flatten.each do |column_name|
|
456
|
+
execute "ALTER TABLE #{quote_table_name(table_name)} DROP(#{quote_column_name(column_name)})"
|
457
|
+
end
|
458
|
+
end
|
459
|
+
alias :remove_columns :remove_column
|
460
|
+
|
461
|
+
def quote(value, column = nil)
|
462
|
+
case value
|
463
|
+
when NilClass then ''
|
464
|
+
when String then value
|
465
|
+
else super(value, column)
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
def quote_column_name(name)
|
470
|
+
"'#{name}'"
|
471
|
+
end
|
472
|
+
|
473
|
+
def quote_column_name_for_table(name, table_name)
|
474
|
+
quote_column_name(hypertable_column_name(name, table_name))
|
475
|
+
end
|
476
|
+
|
477
|
+
def hypertable_column_name(name, table_name, declared_columns_only=false)
|
478
|
+
begin
|
479
|
+
columns(table_name) if @hypertable_column_names[table_name].blank?
|
480
|
+
n = @hypertable_column_names[table_name][name]
|
481
|
+
n ||= name if !declared_columns_only
|
482
|
+
n
|
483
|
+
rescue Exception => err
|
484
|
+
raise [
|
485
|
+
"hypertable_column_name exception",
|
486
|
+
err.message,
|
487
|
+
"table: #{table_name}",
|
488
|
+
"column: #{name}",
|
489
|
+
"@htcn: #{pp @hypertable_column_names}"
|
490
|
+
].join("\n")
|
491
|
+
end
|
492
|
+
end
|
493
|
+
|
494
|
+
# Return an XML document describing the table named in the first
|
495
|
+
# argument. Output is equivalent to that returned by the DESCRIBE
|
496
|
+
# TABLE command available in the Hypertable CLI.
|
497
|
+
# <Schema generation="2">
|
498
|
+
# <AccessGroup name="default">
|
499
|
+
# <ColumnFamily id="1">
|
500
|
+
# <Generation>1</Generation>
|
501
|
+
# <Name>date</Name>
|
502
|
+
# <deleted>false</deleted>
|
503
|
+
# </ColumnFamily>
|
504
|
+
# </AccessGroup>
|
505
|
+
# </Schema>
|
506
|
+
def describe_table(table_name)
|
507
|
+
retry_on_connection_error {
|
508
|
+
@connection.get_schema(table_name)
|
509
|
+
}
|
510
|
+
end
|
511
|
+
|
512
|
+
# Returns an array of tables available in the current Hypertable
|
513
|
+
# instance.
|
514
|
+
def tables(name=nil)
|
515
|
+
retry_on_connection_error {
|
516
|
+
@connection.get_tables
|
517
|
+
}
|
518
|
+
end
|
519
|
+
|
520
|
+
# Write an array of cells to the named table. By default, write_cells
|
521
|
+
# will open and close a mutator for this operation. Closing the
|
522
|
+
# mutator flushes the data, which guarantees is it is stored in
|
523
|
+
# Hypertable before the call returns. This also slows down the
|
524
|
+
# operation, so if you're doing lots of writes and want to manage
|
525
|
+
# mutator flushes at the application layer then you can pass in a
|
526
|
+
# mutator as argument. Mutators can be created with the open_mutator
|
527
|
+
# method. In the near future (Summer 2009), Hypertable will provide
|
528
|
+
# a periodic mutator that automatically flushes at specific intervals.
|
529
|
+
def write_cells(table_name, cells, mutator=nil, flags=nil, flush_interval=nil)
|
530
|
+
return if cells.blank?
|
531
|
+
|
532
|
+
retry_on_connection_error {
|
533
|
+
local_mutator_created = !mutator
|
534
|
+
|
535
|
+
begin
|
536
|
+
t1 = Time.now
|
537
|
+
mutator ||= open_mutator(table_name, flags, flush_interval)
|
538
|
+
@connection.set_cells_as_arrays(mutator, cells)
|
539
|
+
ensure
|
540
|
+
if local_mutator_created && mutator
|
541
|
+
close_mutator(mutator)
|
542
|
+
mutator = nil
|
543
|
+
end
|
544
|
+
@@write_latency += Time.now - t1
|
545
|
+
end
|
546
|
+
}
|
547
|
+
end
|
548
|
+
|
549
|
+
# Return a Hypertable::ThriftGen::Cell object from a cell passed in
|
550
|
+
# as an array of format: [row_key, column_name, value]
|
551
|
+
# Hypertable::ThriftGen::Cell objects are required when setting a flag
|
552
|
+
# on write - used by special operations (e.g,. delete )
|
553
|
+
def thrift_cell_from_native_array(array)
|
554
|
+
cell = Hypertable::ThriftGen::Cell.new
|
555
|
+
cell.row_key = array[0]
|
556
|
+
cell.column_family = array[1]
|
557
|
+
cell.column_qualifier = array[2] if !array[2].blank?
|
558
|
+
cell.value = array[3] if array[3]
|
559
|
+
cell.timestamp = array[4] if array[4]
|
560
|
+
cell
|
561
|
+
end
|
562
|
+
|
563
|
+
# Create native array format for cell. Most HyperRecord operations
|
564
|
+
# deal with cells in native array format since operations on an
|
565
|
+
# array are much faster than operations on Hypertable::ThriftGen::Cell
|
566
|
+
# objects.
|
567
|
+
# ["row_key", "column_family", "column_qualifier", "value"],
|
568
|
+
def cell_native_array(row_key, column_family, column_qualifier, value=nil, timestamp=nil)
|
569
|
+
[
|
570
|
+
row_key.to_s,
|
571
|
+
column_family.to_s,
|
572
|
+
column_qualifier.to_s,
|
573
|
+
value.to_s
|
574
|
+
].map do |s|
|
575
|
+
s.respond_to?(:force_encoding) ? s.force_encoding('ascii-8bit') : s
|
576
|
+
end
|
577
|
+
end
|
578
|
+
|
579
|
+
# Delete cells from a table.
|
580
|
+
def delete_cells(table_name, cells)
|
581
|
+
t1 = Time.now
|
582
|
+
|
583
|
+
retry_on_connection_error {
|
584
|
+
@connection.with_mutator(table_name) do |mutator|
|
585
|
+
thrift_cells = cells.map{|c|
|
586
|
+
cell = thrift_cell_from_native_array(c)
|
587
|
+
cell.flag = Hypertable::ThriftGen::CellFlag::DELETE_CELL
|
588
|
+
cell
|
589
|
+
}
|
590
|
+
@connection.set_cells(mutator, thrift_cells)
|
591
|
+
end
|
592
|
+
}
|
593
|
+
|
594
|
+
@@write_latency += Time.now - t1
|
595
|
+
end
|
596
|
+
|
597
|
+
# Delete rows from a table.
|
598
|
+
def delete_rows(table_name, row_keys)
|
599
|
+
t1 = Time.now
|
600
|
+
cells = row_keys.map do |row_key|
|
601
|
+
cell = Hypertable::ThriftGen::Cell.new
|
602
|
+
cell.row_key = row_key
|
603
|
+
cell.flag = Hypertable::ThriftGen::CellFlag::DELETE_ROW
|
604
|
+
cell
|
605
|
+
end
|
606
|
+
|
607
|
+
retry_on_connection_error {
|
608
|
+
@connection.with_mutator(table_name) do |mutator|
|
609
|
+
@connection.set_cells(mutator, cells)
|
610
|
+
end
|
611
|
+
}
|
612
|
+
|
613
|
+
@@write_latency += Time.now - t1
|
614
|
+
end
|
615
|
+
|
616
|
+
# Insert a test fixture into a table.
|
617
|
+
def insert_fixture(fixture, table_name)
|
618
|
+
fixture_hash = fixture.to_hash
|
619
|
+
timestamp = fixture_hash.delete('timestamp')
|
620
|
+
row_key = fixture_hash.delete('ROW')
|
621
|
+
cells = []
|
622
|
+
fixture_hash.keys.each do |k|
|
623
|
+
column_name, column_family = k.split(':', 2)
|
624
|
+
cells << cell_native_array(row_key, column_name, column_family, fixture_hash[k], timestamp)
|
625
|
+
end
|
626
|
+
write_cells(table_name, cells)
|
627
|
+
end
|
628
|
+
|
629
|
+
# Mutator methods
|
630
|
+
|
631
|
+
def open_mutator(table_name, flags=0, flush_interval=0)
|
632
|
+
@connection.open_mutator(table_name, flags, flush_interval)
|
633
|
+
end
|
634
|
+
|
635
|
+
# Flush is always called in a mutator's destructor due to recent
|
636
|
+
# no_log_sync changes. Adding an explicit flush here just adds
|
637
|
+
# one round trip for an extra flush call, so change the default to
|
638
|
+
# flush=0. Consider removing this argument and always sending 0.
|
639
|
+
def close_mutator(mutator, flush=0)
|
640
|
+
@connection.close_mutator(mutator, flush)
|
641
|
+
end
|
642
|
+
|
643
|
+
def flush_mutator(mutator)
|
644
|
+
@connection.flush_mutator(mutator)
|
645
|
+
end
|
646
|
+
|
647
|
+
# Scanner methods
|
648
|
+
|
649
|
+
def open_scanner(table_name, scan_spec)
|
650
|
+
@connection.open_scanner(table_name, scan_spec, true)
|
651
|
+
end
|
652
|
+
|
653
|
+
def close_scanner(scanner)
|
654
|
+
@connection.close_scanner(scanner)
|
655
|
+
end
|
656
|
+
|
657
|
+
def with_scanner(table_name, scan_spec, &block)
|
658
|
+
@connection.with_scanner(table_name, scan_spec, &block)
|
659
|
+
end
|
660
|
+
|
661
|
+
# Iterator methods
|
662
|
+
|
663
|
+
def each_cell(scanner, &block)
|
664
|
+
@connection.each_cell(scanner, &block)
|
665
|
+
end
|
666
|
+
|
667
|
+
def each_cell_as_arrays(scanner, &block)
|
668
|
+
@connection.each_cell_as_arrays(scanner, &block)
|
669
|
+
end
|
670
|
+
|
671
|
+
def each_row(scanner, &block)
|
672
|
+
@connection.each_row(scanner, &block)
|
673
|
+
end
|
674
|
+
|
675
|
+
def each_row_as_arrays(scanner, &block)
|
676
|
+
@connection.each_row_as_arrays(scanner, &block)
|
677
|
+
end
|
678
|
+
end
|
679
|
+
end
|
680
|
+
end
|