hbase-jruby 0.2.6-java → 0.3.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -7,52 +7,99 @@
7
7
  - ActiveRecord-like method chaining for data retrieval
8
8
  - Automatic Hadoop/HBase dependency resolution
9
9
 
10
+ ## Installation
11
+
12
+ gem install hbase-jruby
13
+
10
14
  ## A quick example
11
15
 
12
16
  ```ruby
13
17
  require 'hbase-jruby'
14
18
 
15
- HBase.resolve_dependency! 'cdh4.2.0'
19
+ # Load required JAR files from CDH distribution using Maven
20
+ HBase.resolve_dependency! 'cdh4.2.1'
16
21
 
22
+ # Connect to HBase on localhost
17
23
  hbase = HBase.new
18
- table = hbase[:test_table]
24
+
25
+ # Define table schema for easier data access
26
+ hbase.schema = {
27
+ book: {
28
+ # Columns in cf1 family
29
+ cf1: {
30
+ title: :string,
31
+ author: :string,
32
+ category: :string,
33
+ year: :short,
34
+ pages: :fixnum,
35
+ price: :bigdecimal,
36
+ weight: :float,
37
+ in_print: :boolean
38
+ },
39
+ # Columns in cf2 family
40
+ cf2: {
41
+ summary: :string,
42
+ reviews: :fixnum,
43
+ stars: :fixnum,
44
+ /^comment\d+/ => :string
45
+ }
46
+ }
47
+ }
48
+
49
+ # Create book table with two column families
50
+ table = hbase[:book]
51
+ unless table.exists?
52
+ table.create! cf1: { min_versions: 2 },
53
+ cf2: { bloomfilter: :rowcol, versions: 5 }
54
+ end
19
55
 
20
56
  # PUT
21
- table.put :rowkey1 => { 'cf1:a' => 100, 'cf2:b' => "Hello" }
57
+ table.put 1 => {
58
+ title: 'The Golden Bough: A Study of Magic and Religion',
59
+ author: 'Sir James G. Frazer',
60
+ category: 'Occult',
61
+ year: 1890,
62
+ pages: 1006,
63
+ price: BigDecimal('21.50'),
64
+ weight: 3.0,
65
+ in_print: true,
66
+ summary: 'A wide-ranging, comparative study of mythology and religion',
67
+ reviews: 52,
68
+ stars: 226,
69
+ comment1: 'A must-have',
70
+ comment2: 'Rewarding purchase'
71
+ }
22
72
 
23
73
  # GET
24
- row = table.get(:rowkey1)
25
- number = row.fixnum('cf1:a')
26
- string = row.string('cf1:b')
74
+ book = table.get(1)
75
+ title = book[:title]
76
+ comment2 = book[:comment2]
77
+ as_hash = book.to_h
27
78
 
28
79
  # SCAN
29
- table.range('rowkey1'..'rowkey9').
30
- filter('cf1:a' => 100..200, # cf1:a between 100 and 200
31
- 'cf1:b' => 'Hello', # cf1:b = 'Hello'
32
- 'cf2:c' => /world/i, # cf2:c matches /world/i
33
- 'cf2:d' => ['foo', /^BAR/i]). # cf2:d = 'foo' OR matches /^BAR/i
34
- project('cf1:a', 'cf2').
35
- each do |row|
36
- puts row.fixnum('cf1:a')
80
+ table.range(0..100)
81
+ .filter(year: 1880...1900,
82
+ in_print: true,
83
+ category: ['Comics', 'Fiction', /cult/i],
84
+ price: { lt: BigDecimal('30.00') },
85
+ summary: /myth/i)
86
+ .project(:cf1, :reviews)
87
+ .each do |book|
88
+
89
+ # Update columns
90
+ table.put book.rowkey, price: book[:price] + BigDecimal('1')
91
+
92
+ # Atomic increment
93
+ table.increment book.rowkey, reviews: 1, stars: 5
94
+
95
+ # Delete a column
96
+ table.delete book.rowkey, :comment1
37
97
  end
38
98
 
39
- # DELETE
40
- table.delete(:rowkey9)
99
+ # Delete row
100
+ table.delete 1
41
101
  ```
42
102
 
43
- ## Installation
44
-
45
- ### From Rubygems
46
-
47
- gem install hbase-jruby
48
-
49
- ### From source
50
-
51
- git clone -b devel https://github.com/junegunn/hbase-jruby.git
52
- cd hbase-jruby
53
- rake build
54
- gem install pkg/hbase-jruby-0.2.2-java.gem
55
-
56
103
  ## Setting up
57
104
 
58
105
  ### Resolving Hadoop/HBase dependency
@@ -72,7 +119,7 @@ Call `HBase.resolve_dependency!` helper method passing one of the arguments list
72
119
  | cdh4.1[.*] | Cloudera CDH4.1 | cdh4.1.4 | mvn |
73
120
  | cdh3[u*] | Cloudera CDH3 | cdh3u6 | mvn |
74
121
  | 0.95[.*] | Apache HBase 0.95 | 0.95.0 | mvn |
75
- | 0.94[.*] | Apache HBase 0.94 | 0.94.6.1 | mvn |
122
+ | 0.94[.*] | Apache HBase 0.94 | 0.94.7 | mvn |
76
123
  | 0.92[.*] | Apache HBase 0.92 | 0.92.2 | mvn |
77
124
  | *POM PATH* | Custom Maven POM file | - | mvn |
78
125
  | `:local` | Local HBase installation | - | hbase |
@@ -84,16 +131,16 @@ Call `HBase.resolve_dependency!` helper method passing one of the arguments list
84
131
 
85
132
  ```ruby
86
133
  # Load JAR files from CDH4 using Maven
87
- HBase.resolve_dependency! 'cdh4.2.0'
134
+ HBase.resolve_dependency! 'cdh4.2.1'
88
135
  HBase.resolve_dependency! 'cdh4.1.3'
89
136
 
90
137
  # Load JAR files of HBase 0.94.x using Maven
91
- HBase.resolve_dependency! '0.94.6.1'
92
- HBase.resolve_dependency! '0.94.2', :verbose => true
138
+ HBase.resolve_dependency! '0.94.7'
139
+ HBase.resolve_dependency! '0.94.2', verbose: true
93
140
 
94
141
  # Dependency resolution with custom POM file
95
142
  HBase.resolve_dependency! '/path/to/my/pom.xml'
96
- HBase.resolve_dependency! '/path/to/my/pom.xml', :profile => 'trunk'
143
+ HBase.resolve_dependency! '/path/to/my/pom.xml', profile: 'trunk'
97
144
 
98
145
  # Load JAR files from local HBase installation
99
146
  # (equivalent to: export CLASSPATH=$CLASSPATH:`hbase classpath`)
@@ -126,10 +173,10 @@ hbase = HBase.new
126
173
  hbase = HBase.new 'hbase.zookeeper.quorum' => 'remote-server.mydomain.net'
127
174
 
128
175
  # Extra configuration
129
- hbase = HBase.new 'hbase.zookeeper.quorum' => 'remote-server.mydomain.net',
130
- 'hbase.client.retries.number' => 3,
176
+ hbase = HBase.new 'hbase.zookeeper.quorum' => 'remote-server.mydomain.net',
177
+ 'hbase.client.retries.number' => 3,
131
178
  'hbase.client.scanner.caching' => 1000,
132
- 'hbase.rpc.timeout' => 120000
179
+ 'hbase.rpc.timeout' => 120000
133
180
 
134
181
  # Close HBase connection
135
182
  hbase.close
@@ -154,132 +201,190 @@ table = hbase[:test_table]
154
201
  table.drop! if table.exists?
155
202
 
156
203
  # Create table with two column families
157
- table.create! :cf1 => {},
158
- :cf2 => { :compression => :snappy, :bloomfilter => :row }
204
+ table.create! cf1: {},
205
+ cf2: { compression: :snappy, bloomfilter: :row }
159
206
  ```
160
207
 
161
208
  ## Basic operations
162
209
 
210
+ ### Defining table schema for easier data access
211
+
212
+ HBase stores everything as plain Java byte arrays. So it's completely up to
213
+ users to encode and decode column values of various types into and from byte
214
+ arrays, and that is a quite tedious and error-prone task.
215
+
216
+ To remedy this situation, `hbase-jruby` implements the concept of table schema.
217
+
218
+ Using table schema greatly simplifies the way you access data:
219
+ - With schema, byte array conversion becomes automatic
220
+ - It allows you to omit column family names (e.g. `:title` instead of `"cf1:title"`)
221
+
222
+ We'll use the following schema throughout the examples.
223
+
224
+ ```ruby
225
+ hbase.schema = {
226
+ # Schema for `book` table
227
+ book: {
228
+ # Columns in cf1 family
229
+ cf1: {
230
+ title: :string,
231
+ author: :string,
232
+ category: :string,
233
+ year: :short,
234
+ pages: :fixnum,
235
+ price: :bigdecimal,
236
+ weight: :float,
237
+ in_print: :boolean
238
+ },
239
+ # Columns in cf2 family
240
+ cf2: {
241
+ summary: :string,
242
+ reviews: :fixnum,
243
+ stars: :fixnum,
244
+ /^comment\d+/ => :string
245
+ }
246
+ }
247
+ }
248
+ ```
249
+
250
+ Columns that are not defined in the schema can be referenced
251
+ using `FAMILY:QUALIFIER` notation or 2-element Array of column family name (as Symbol) and qualifier,
252
+ however since there's no type information, they are returned as Java byte arrays,
253
+ which have to be decoded manually.
254
+
163
255
  ### PUT
164
256
 
165
257
  ```ruby
166
258
  # Putting a single row
167
- table.put 'rowkey1', 'cf1:col1' => "Hello", 'cf2:col2' => "World"
259
+ # - Row keys can be of any type, in this case, we use String type
260
+ table.put 'rowkey1', title: "Hello World", year: 2013
168
261
 
169
262
  # Putting multiple rows
170
- table.put 'rowkey1' => { 'cf1:col1' => "Hello", 'cf2:col2' => "World" },
171
- 'rowkey2' => { 'cf1:col1' => "Howdy", 'cf2:col2' => "World" },
172
- 'rowkey3' => { 'cf1:col1' => "So long", 'cf2:col2' => "World" }
263
+ table.put 'rowkey1' => { title: 'foo', year: 2013 },
264
+ 'rowkey2' => { title: "bar", year: 2014 },
265
+ 'rowkey3' => { title: 'foobar', year: 2015 }
173
266
 
174
267
  # Putting values with timestamps
175
268
  table.put 'rowkey1' => {
176
- 'cf1:col1' => {
177
- 1353143856665 => "Hello",
178
- 1352978648642 => "Goodbye" },
179
- 'cf2:col2' => "World"
269
+ title: {
270
+ 1353143856665 => "Hello world",
271
+ 1352978648642 => "Goodbye world"
272
+ },
273
+ year: 2013
180
274
  }
181
275
  ```
182
276
 
183
277
  ### GET
184
278
 
185
- HBase stores everything as a byte array, so when you fetch data from HBase,
186
- you need to explicitly specify the type of each value stored.
187
-
188
279
  ```ruby
189
- row = table.get('rowkey1')
280
+ book = table.get('rowkey1')
190
281
 
191
282
  # Rowkey
192
- rowk = row.rowkey
283
+ rowkey = row.rowkey # Rowkey as raw Java byte array
284
+ rowkey = row.rowkey :string # Rowkey as String
285
+
286
+ # Access columns in schema
287
+ title = book[:title]
288
+ author = book[:author]
289
+ year = book[:year]
193
290
 
194
- # Column value as a raw Java byte array
195
- col0 = row.raw 'cf1:col0'
291
+ # Convert to simple Hash
292
+ hash = book.to_h
196
293
 
197
- # Decode column values
198
- col1 = row.string 'cf1:col1'
199
- col2 = row.fixnum 'cf1:col2'
200
- col3 = row.bigdecimal 'cf1:col3'
201
- col4 = row.float 'cf1:col4'
202
- col5 = row.boolean 'cf1:col5'
203
- col6 = row.symbol 'cf1:col6'
294
+ # Convert to Hash containing all versions of values indexed by their timestamps
295
+ all_hash = book.to_H
204
296
 
205
- # Decode multiple columns at once
206
- row.string ['cf1:str1', 'cf1:str2']
207
- # [ "Hello", "World" ]
297
+ # Columns not defined in the schema are returned as Java byte arrays
298
+ # They need to be decoded manually
299
+ extra = HBase::Util.from_bytes(:bigdecimal, book['cf2:extra'])
300
+ # or, simply
301
+ extra = book.bigdecimal 'cf2:extra'
208
302
  ```
209
303
 
210
- #### Batch GET
304
+ ### Batch-GET
211
305
 
212
306
  ```ruby
213
307
  # Pass an array of row keys as the parameter
214
- rows = table.get(['rowkey1', 'rowkey2', 'rowkey3'])
308
+ books = table.get(['rowkey1', 'rowkey2', 'rowkey3'])
215
309
  ```
216
310
 
217
- #### Decode all versions with plural-form (-s) methods
311
+ #### `to_h`
218
312
 
219
- ```ruby
220
- # Decode all versions as Hash indexed by their timestamps
221
- row.strings 'cf1:str'
222
- # {1353143856665=>"Hello", 1353143856662=>"Goodbye"}
313
+ `to_h` and `to_H` return the Hash representation of the row.
314
+ (The latter returns all values with their timestamp)
315
+
316
+ If a column is defined in the schema, it is referenced using its quailifier in Symbol type.
317
+ If a column is not defined, it is represented as a 2-element Array
318
+ of column family in Symbol and column qualifier as ByteArray.
319
+ Even so, to make it easier to reference those columns, an extended version of
320
+ Hash is returned with which you can also reference them with `FAMILY:QUALIFIER`
321
+ notation or `[cf, cq]` array notation.
223
322
 
224
- # Decode all versions of multiple columns
225
- row.strings ['cf1:str1', 'cf1:str2']
226
- # [
227
- # {1353143856665=>"Hello", 1353143856662=>"Goodbye"},
228
- # {1353143856665=>"World", 1353143856662=>"Cruel world"}
229
- # ]
323
+ ```ruby
324
+ table.put 1000 => {
325
+ title: 'Hello world', # Known column
326
+ comment100: 'foo', # Known column
327
+ 'cf2:extra' => 'bar', # Unknown column
328
+ [:cf2, 10] => 'foobar' # Unknown column, non-string qualifier
329
+ }
230
330
 
231
- # Plural-form methods are provided for any other data types as well
232
- cols0 = row.raws 'cf1:col0'
233
- cols1 = row.strings 'cf1:col1'
234
- cols2 = row.fixnums 'cf1:col2'
235
- cols3 = row.bigdecimals 'cf1:col3'
236
- cols4 = row.floats 'cf1:col4'
237
- cols5 = row.booleans 'cf1:col5'
238
- cols6 = row.symbols 'cf1:col6'
331
+ book = table.get 10000
332
+ hash = book.to_h
333
+ # {
334
+ # :title => "Hello world",
335
+ # [:cf2, HBase::ByteArray<0, 0, 0, 0, 0, 0, 0, 10>] =>
336
+ # byte[102, 111, 111, 98, 97, 114]@6f28bb44,
337
+ # :comment100 => "foo",
338
+ # [:cf2, HBase::ByteArray<101, 120, 116, 114, 97>] =>
339
+ # byte[98, 97, 114]@77190cfc}
340
+ # }
341
+
342
+ hash['cf2:extra']
343
+ # byte[98, 97, 114]@77190cfc
344
+
345
+ hash[%w[cf2 extra]]
346
+ # byte[98, 97, 114]@77190cfc
347
+
348
+ hash[[:cf2, HBase::ByteArray['extra']]]
349
+ # byte[98, 97, 114]@77190cfc
350
+
351
+ hash['cf2:extra'].to_s
352
+ # 'bar'
353
+
354
+ # Columns with non-string qualifiers must be referenced using 2-element Array notation
355
+ hash['cf2:10']
356
+ # nil
357
+ hash[[:cf2, 10]]
358
+ # byte[102, 111, 111, 98, 97, 114]@6f28bb44
359
+
360
+ hash_with_versions = book.to_H
361
+ # {
362
+ # :title => {1369019227766 => "Hello world"},
363
+ # [:cf2, HBase::ByteArray<0, 0, 0, 0, 0, 0, 0, 10>] =>
364
+ # {1369019227766 => byte[102, 111, 111, 98, 97, 114]@6f28bb44},
365
+ # :comment100 => {1369019227766 => "foo"},
366
+ # [:cf2, HBase::ByteArray<101, 120, 116, 114, 97>] =>
367
+ # {1369019227766 => byte[98, 97, 114]@77190cfc}}
368
+ # }
239
369
  ```
240
370
 
241
371
  #### Intra-row scan
242
372
 
243
- Intra-row scan can be done with `each` method which yields `HBase::Cell` instances.
373
+ Intra-row scan can be done using `each` method which yields `HBase::Cell` instances.
244
374
 
245
375
  ```ruby
246
376
  # Intra-row scan (all versions)
247
377
  row.each do |cell|
248
378
  family = cell.family
249
- qualifier = cell.qualifier(:string) # Column qualifier as String
379
+ qualifier = cell.qualifier :string # Column qualifier as String
250
380
  timestamp = cell.timestamp
251
-
252
- # Cell value as Java byte array
253
- bytes = cell.raw
254
-
255
- # Typed access
256
- # value_as_string = cell.string
257
- # value_as_fixnum = cell.fixnum
258
- # ...
381
+ value = cell.value
259
382
  end
260
383
 
261
384
  # Array of HBase::Cells
262
385
  cells = row.to_a
263
386
  ```
264
387
 
265
- #### `to_hash`
266
-
267
- ```ruby
268
- # Returns the Hash representation of the record with the specified schema
269
- schema = {
270
- 'cf1:col1' => :string,
271
- 'cf1:col2' => :fixnum,
272
- 'cf1:col3' => :bigdecimal,
273
- 'cf1:col4' => :float,
274
- 'cf1:col5' => :boolean,
275
- 'cf1:col6' => :symbol }
276
-
277
- table.get('rowkey1').to_hash(schema)
278
-
279
- # Returns all versions for each column indexed by their timestamps
280
- table.get('rowkey1').to_hash_with_versions(schema)
281
- ```
282
-
283
388
  ### DELETE
284
389
 
285
390
  ```ruby
@@ -287,23 +392,23 @@ table.get('rowkey1').to_hash_with_versions(schema)
287
392
  table.delete('rowkey1')
288
393
 
289
394
  # Deletes all columns in the specified column family
290
- table.delete('rowkey1', 'cf1')
395
+ table.delete('rowkey1', :cf1)
291
396
 
292
397
  # Deletes a column
293
- table.delete('rowkey1', 'cf1:col1')
398
+ table.delete('rowkey1', :author)
294
399
 
295
400
  # Deletes a column with empty qualifier.
296
401
  # (!= deleing the entire columns in the family. See the trailing colon.)
297
402
  table.delete('rowkey1', 'cf1:')
298
403
 
299
404
  # Deletes a version of a column
300
- table.delete('rowkey1', 'cf1:col1', 1352978648642)
405
+ table.delete('rowkey1', :author, 1352978648642)
301
406
 
302
407
  # Deletes multiple versions of a column
303
- table.delete('rowkey1', 'cf1:col1', 1352978648642, 1352978649642)
408
+ table.delete('rowkey1', :author, 1352978648642, 1352978649642)
304
409
 
305
410
  # Batch delete
306
- table.delete(['rowkey1'], ['rowkey2'], ['rowkey3', 'cf1:col1', 1352978648642, 135297864964])
411
+ table.delete(['rowkey1'], ['rowkey2'], ['rowkey3', :author, 1352978648642, 135297864964])
307
412
  ```
308
413
 
309
414
  However, the last syntax seems a bit unwieldy when you just wish to delete a few rows.
@@ -318,11 +423,16 @@ table.delete_row 'rowkey1', 'rowkey2', 'rowkey3'
318
423
  ### Atomic increment of column values
319
424
 
320
425
  ```ruby
321
- # Atomically increase cf1:counter by one
322
- table.increment('rowkey1', 'cf1:counter', 1)
426
+ # Atomically increase cf2:reviews by one
427
+ table.increment('rowkey1', reviews: 1)
323
428
 
324
- # Atomically increase two columns by one and two respectively
325
- table.increment('rowkey1', 'cf1:counter' => 1, 'cf1:counter2' => 2)
429
+ # Atomically increase two columns by one and five respectively
430
+ table.increment('rowkey1', reviews: 1, stars: 5)
431
+
432
+ # Increase column values of multiple rows.
433
+ # - Atomicity is only guaranteed within each row.
434
+ table.increment 'rowkey1' => { reviews: 1, stars: 5 },
435
+ 'rowkey2' => { reviews: 1, stars: 3 }
326
436
  ```
327
437
 
328
438
  ### SCAN
@@ -332,10 +442,11 @@ table.increment('rowkey1', 'cf1:counter' => 1, 'cf1:counter2' => 2)
332
442
  ```ruby
333
443
  # Full scan
334
444
  table.each do |row|
335
- age = row.fixnum('cf:age')
336
- name = row.string('cf:name')
337
- # ...
445
+ p row.to_h
338
446
  end
447
+
448
+ # Returns Enumerator when block is not given
449
+ table.each.with_index.each_slice(10).to_a
339
450
  ```
340
451
 
341
452
  ## Scoped access
@@ -374,13 +485,13 @@ you can retrieve data with the following methods.
374
485
  import org.apache.hadoop.hbase.filter.RandomRowFilter
375
486
 
376
487
  table.range('A'..'Z'). # Row key range,
377
- project('cf1:a'). # Select cf1:a column
488
+ project(:author). # Select cf1:author column
378
489
  project('cf2'). # Select cf2 family as well
379
- filter('cf1:a' => 'Hello'). # Filter by cf1:a value
380
- filter('cf2:d' => 100..200). # Range filter on cf2:d
381
- filter('cf2:e' => [10, 20..30]). # Set-inclusion condition on cf2:e
490
+ filter(category: 'Comics'). # Filter by cf1:category value
491
+ filter(year: [1990, 2000, 2010]). # Set-inclusion condition on cf1:year
492
+ filter(weight: 2.0..4.0). # Range filter on cf1:weight
382
493
  filter(RandomRowFilter.new(0.5)). # Any Java HBase filter
383
- while('cf2:f' => { ne: 'OPEN' }). # Early termination of scan
494
+ while(reviews: { gt: 20 }). # Early termination of scan
384
495
  time_range(Time.now - 600, Time.now). # Scan data of the last 10 minutes
385
496
  limit(10). # Limits the size of the result set
386
497
  versions(2). # Only fetches 2 versions for each value
@@ -389,7 +500,7 @@ table.range('A'..'Z'). # Row key range,
389
500
  with_java_scan { |scan| # Directly access Java Scan object
390
501
  scan.setCacheBlocks false
391
502
  }.
392
- to_a # To Array
503
+ to_a # To Array of HBase::Rows
393
504
  ```
394
505
 
395
506
  ### *range*
@@ -420,15 +531,15 @@ Optionally, prefix filter can be applied as follows.
420
531
  # Row keys with "APPLE" prefix
421
532
  # Start key is automatically set to "APPLE",
422
533
  # stop key "APPLF" to avoid unnecessary disk access
423
- table.range(:prefix => 'APPLE')
534
+ table.range(prefix: 'APPLE')
424
535
 
425
536
  # Row keys with "ACE", "BLUE" or "APPLE" prefix
426
537
  # Start key is automatically set to "ACE",
427
538
  # stop key "BLUF"
428
- table.range(:prefix => ['ACE', 'BLUE', 'APPLE'])
539
+ table.range(prefix: ['ACE', 'BLUE', 'APPLE'])
429
540
 
430
541
  # Prefix filter with start key and stop key.
431
- table.range('ACE', 'BLUEMARINE', :prefix => ['ACE', 'BLUE', 'APPLE'])
542
+ table.range('ACE', 'BLUEMARINE', prefix: ['ACE', 'BLUE', 'APPLE'])
432
543
  ```
433
544
 
434
545
  Subsequent calls to `#range` override the range previously defined.
@@ -437,7 +548,7 @@ Subsequent calls to `#range` override the range previously defined.
437
548
  # Previous ranges are discarded
438
549
  scope.range(1, 100).
439
550
  range(50..100).
440
- range(:prefix => 'A').
551
+ range(prefix: 'A').
441
552
  range(1, 1000)
442
553
  # Same as `scope.range(1, 1000)`
443
554
  ```
@@ -451,27 +562,24 @@ Multiple calls have conjunctive effects.
451
562
  # Range scanning the table with filters
452
563
  table.range(nil, 1000).
453
564
  filter(
454
- # Numbers and characters: Checks if the value is equal to the given value
455
- 'cf1:a' => 'Hello',
456
- 'cf1:b' => 1024,
565
+ # Equality match
566
+ year: 2013,
457
567
 
458
568
  # Range of numbers or characters: Checks if the value falls within the range
459
- 'cf1:c' => 100..200,
460
- 'cf1:d' => 'A'..'C',
569
+ weight: 2.0..4.0
570
+ author: 'A'..'C'
461
571
 
462
572
  # Regular expression: Checks if the value matches the regular expression
463
- 'cf1:e' => /world$/i,
573
+ summary: /classic$/i,
464
574
 
465
575
  # Hash: Tests the value with 6 types of operators (:gt, :lt, :gte, :lte, :eq, :ne)
466
- 'cf1:f' => { gt: 1000, lte: 2000 },
467
- 'cf1:g' => { ne: 1000 },
576
+ reviews: { gt: 100, lte: 200 },
468
577
 
469
578
  # Array of the aforementioned types: OR condition (disjunctive)
470
- 'cf1:h' => %w[A B C],
471
- 'cf1:i' => ['A'...'B', 'C', /^D/, { lt: 'F' }]).
579
+ category: ['Fiction', 'Comic', /science/i, { ne: 'Political Science' }]).
472
580
 
473
581
  # Multiple calls for conjunctive filtering
474
- filter('cf1:j' => ['Alice'..'Bob', 'Cat']).
582
+ filter(summary: /instant/i).
475
583
 
476
584
  # Any number of Java filters can be applied
477
585
  filter(org.apache.hadoop.hbase.filter.RandomRowFilter.new(0.5)).
@@ -489,12 +597,12 @@ See the following example.
489
597
 
490
598
  ```ruby
491
599
  (0...30).each do |idx|
492
- table.put idx, 'cf1:a' => idx % 10
600
+ table.put idx, year: 2000 + idx % 10
493
601
  end
494
602
 
495
- table.filter('cf1:a' => { lte: 1 }).map { |r| r.rowkey :fixnum }
603
+ table.filter(year: { lte: 2001 }).map { |r| r.rowkey :fixnum }
496
604
  # [0, 1, 10, 11, 20, 21]
497
- table.while('cf1:a' => { lte: 1 }).map { |r| r.rowkey :fixnum }
605
+ table.while(year: { lte: 2001 }).map { |r| r.rowkey :fixnum }
498
606
  # [0, 1]
499
607
  # Scan terminates immediately when condition not met.
500
608
  ```
@@ -505,9 +613,9 @@ table.while('cf1:a' => { lte: 1 }).map { |r| r.rowkey :fixnum }
505
613
  Multiple calls have additive effects.
506
614
 
507
615
  ```ruby
508
- # Fetches cf1:a and all columns in column family cf2 and cf3
509
- scoped.project('cf1:a', 'cf2').
510
- project('cf3')
616
+ # Fetches cf1:title, cf1:author, and all columns in column family cf2 and cf3
617
+ scoped.project(:title, :author, :cf2).
618
+ project(:cf3)
511
619
  ```
512
620
 
513
621
  HBase filters can not only filter rows but also columns.
@@ -519,17 +627,17 @@ to pass column filter to filter method.
519
627
  ```ruby
520
628
  # Column prefix filter:
521
629
  # Fetch columns whose qualifiers start with the specified prefixes
522
- scoped.project(:prefix => 'alice').
523
- project(:prefix => %w[alice bob])
630
+ scoped.project(prefix: 'alice').
631
+ project(prefix: %w[alice bob])
524
632
 
525
633
  # Column range filter:
526
634
  # Fetch columns whose qualifiers within the ranges
527
- scoped.project(:range => 'a'...'c').
528
- project(:range => ['i'...'k', 'x'...'z'])
635
+ scoped.project(range: 'a'...'c').
636
+ project(range: ['i'...'k', 'x'...'z'])
529
637
 
530
638
  # Column pagination filter:
531
639
  # Fetch columns within the specified intra-scan offset and limit
532
- scoped.project(:offset => 1000, :limit => 10)
640
+ scoped.project(offset: 1000, limit: 10)
533
641
  ```
534
642
 
535
643
  When using column filters on *fat* rows with many columns,
@@ -540,7 +648,7 @@ However setting batch size allows multiple rows with the same row key are return
540
648
  ```ruby
541
649
  # Let's say that we have rows with more than 10 columns whose qualifiers start with `str`
542
650
  puts scoped.range(1..100).
543
- project(:prefix => 'str').
651
+ project(prefix: 'str').
544
652
  batch(10).
545
653
  map { |row| [row.rowkey(:fixnum), row.count].map(&:to_s).join ': ' }
546
654
 
@@ -556,12 +664,10 @@ puts scoped.range(1..100).
556
664
  ### Scoped SCAN / GET
557
665
 
558
666
  ```ruby
559
- scoped = table.versions(1). # Limits the number of versions
560
- filter('cf1:a' => 'Hello', # With filters
561
- 'cf1:b' => 100...200,
562
- 'cf1:c' => 'Alice'..'Bob').
563
- range('rowkey0'..'rowkey2') # Range of rowkeys.
564
- project('cf1', 'cf2:x') # Projection
667
+ scoped = table.versions(1) # Limits the number of versions
668
+ .filter(year: 1990...2000)
669
+ .range('rowkey0'..'rowkey2') # Range of rowkeys.
670
+ .project('cf1', 'cf2:x') # Projection
565
671
 
566
672
  # Scoped GET
567
673
  # Nonexistent or filtered rows are returned as nils
@@ -603,22 +709,22 @@ of the projected columns.
603
709
 
604
710
  ```ruby
605
711
  # cf1:a must hold 8-byte integer values
606
- table.project('cf1:a').aggregate(:sum)
607
- table.project('cf1:a').aggregate(:avg)
608
- table.project('cf1:a').aggregate(:min)
609
- table.project('cf1:a').aggregate(:max)
610
- table.project('cf1:a').aggregate(:std)
611
- table.project('cf1:a').aggregate(:row_count)
712
+ table.project(:reviews).aggregate(:sum)
713
+ table.project(:reviews).aggregate(:avg)
714
+ table.project(:reviews).aggregate(:min)
715
+ table.project(:reviews).aggregate(:max)
716
+ table.project(:reviews).aggregate(:std)
717
+ table.project(:reviews).aggregate(:row_count)
612
718
 
613
719
  # Aggregation of multiple columns
614
- table.project('cf1:a', 'cf1:b').aggregate(:sum)
720
+ table.project(:reviews, :stars).aggregate(:sum)
615
721
  ```
616
722
 
617
723
  By default, aggregate method assumes that the projected values are 8-byte integers.
618
724
  For other data types, you can pass your own ColumnInterpreter.
619
725
 
620
726
  ```ruby
621
- table.project('cf1:b').aggregate(:sum, MyColumnInterpreter.new)
727
+ table.project(:price).aggregate(:sum, MyColumnInterpreter.new)
622
728
  ```
623
729
 
624
730
  ## Table inspection
@@ -691,8 +797,7 @@ With `regions` method, you can even presplit the new table just like the old one
691
797
  ```ruby
692
798
  hbase[:dupe_table].create!(
693
799
  table.raw_families,
694
- table.raw_properties.merge(
695
- :splits => table.regions.map { |r| r[:start_key] }.compact))
800
+ table.raw_properties.merge(splits: table.regions.map { |r| r[:start_key] }.compact))
696
801
  ```
697
802
 
698
803
  ## Table administration
@@ -709,21 +814,22 @@ and come with non-bang, asynchronous counterparts.
709
814
  table.create!(
710
815
  # 1st Hash: Column family specification
711
816
  {
712
- :cf1 => { :compression => :snappy },
713
- :cf2 => { :bloomfilter => :row }
817
+ cf1: { compression: snappy },
818
+ cf2: { bloomfilter: row }
714
819
  },
715
820
 
716
821
  # 2nd Hash: Table properties
717
- :max_filesize => 256 * 1024 ** 2,
718
- :deferred_log_flush => false,
719
- :splits => [1000, 2000, 3000])
822
+ max_filesize: 256 * 1024 ** 2,
823
+ deferred_log_flush: false,
824
+ splits: [1000, 2000, 3000]
825
+ )
720
826
 
721
827
  # Alter table properties (synchronous with optional block)
722
828
  table.alter!(
723
- :max_filesize => 512 * 1024 ** 2,
724
- :memstore_flushsize => 64 * 1024 ** 2,
725
- :readonly => false,
726
- :deferred_log_flush => true
829
+ max_filesize: 512 * 1024 ** 2,
830
+ memstore_flushsize: 64 * 1024 ** 2,
831
+ readonly: false,
832
+ deferred_log_flush: true
727
833
  ) { |progress, total|
728
834
  # Progress report with an optional block
729
835
  puts [progress, total].join('/')
@@ -731,10 +837,10 @@ table.alter!(
731
837
 
732
838
  # Alter table properties (asynchronous)
733
839
  table.alter(
734
- :max_filesize => 512 * 1024 ** 2,
735
- :memstore_flushsize => 64 * 1024 ** 2,
736
- :readonly => false,
737
- :deferred_log_flush => true
840
+ max_filesize: 512 * 1024 ** 2,
841
+ memstore_flushsize: 64 * 1024 ** 2,
842
+ readonly: false,
843
+ deferred_log_flush: true
738
844
  )
739
845
  ```
740
846
 
@@ -780,11 +886,10 @@ http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HTableDescriptor.html
780
886
 
781
887
  ```ruby
782
888
  # Add column family
783
- table.add_family! :cf3, :compression => :snappy,
784
- :bloomfilter => :row
889
+ table.add_family! :cf3, compression: :snappy, bloomfilter: :row
785
890
 
786
891
  # Alter column family
787
- table.alter_family! :cf2, :bloomfilter => :rowcol
892
+ table.alter_family! :cf2, bloomfilter: :rowcol
788
893
 
789
894
  # Remove column family
790
895
  table.delete_family! :cf1
@@ -797,8 +902,7 @@ table.delete_family! :cf1
797
902
  unless table.has_coprocessor?(cp_class_name1)
798
903
  table.add_coprocessor! cp_class_name1
799
904
  end
800
- table.add_coprocessor! cp_class_name2,
801
- :path => path, :priority => priority, :params => params
905
+ table.add_coprocessor! cp_class_name2, path: path, priority: priority, params: params
802
906
 
803
907
  # Remove coprocessor
804
908
  table.remove_coprocessor! cp_class_name1
@@ -868,17 +972,14 @@ table.range('1'..'3').map { |r| r.rowkey :string }
868
972
 
869
973
  ### Non-string column qualifier
870
974
 
871
- If a column qualifier is not a String, *an HBase::ColumnKey instance* should be used
872
- instead of a conventional `FAMILY:QUALIFIER` String.
975
+ If a column qualifier is not a String, a 2-element Array should be used.
873
976
 
874
977
  ```ruby
875
978
  table.put 'rowkey',
876
- 'cf1:col1' => 'Hello world',
877
- HBase::ColumnKey(:cf1, 100) => "Byte representation of an 8-byte integer",
878
- HBase::ColumnKey(:cf1, bytes) => "Qualifier is an arbitrary byte array"
979
+ [:cf1, 100 ] => "Byte representation of an 8-byte integer",
980
+ [:cf1, bytes] => "Qualifier is an arbitrary byte array"
879
981
 
880
- table.get('rowkey').string('cf1:col1')
881
- table.get('rowkey').string(HBase::ColumnKey(:cf1, 100))
982
+ table.get('rowkey')[:cf1, 100]
882
983
  # ...
883
984
  ```
884
985
 
@@ -895,12 +996,7 @@ table.put({ int: 12345 }, 'cf1:a' => { byte: 100 }, # 1-byte integer
895
996
  'cf1:c' => { int: 300 }, # 4-byte integer
896
997
  'cf1:4' => 400) # Ordinary 8-byte integer
897
998
 
898
- result = table.get(int: 12345)
899
-
900
- result.byte('cf1:a') # 100
901
- result.short('cf1:b') # 200
902
- result.int('cf1:c') # 300
903
- # ...
999
+ row = table.get(int: 12345)
904
1000
  ```
905
1001
 
906
1002
  ### Working with byte arrays
@@ -919,7 +1015,7 @@ which makes byte array manipulation much easier.
919
1015
  A ByteArray can be created as a concatenation of any number of objects.
920
1016
 
921
1017
  ```ruby
922
- ba = HBase::ByteArray(100, 3.14, {int: 300}, "Hello World")
1018
+ ba = HBase::ByteArray[100, 3.14, {int: 300}, "Hello World"]
923
1019
  ```
924
1020
 
925
1021
  Then you can slice it and decode each part,
@@ -943,7 +1039,7 @@ ba << { short: 300 }
943
1039
  concatenate another ByteArray,
944
1040
 
945
1041
  ```ruby
946
- ba += HBase::ByteArray(1024)
1042
+ ba += HBase::ByteArray[1024]
947
1043
  ```
948
1044
 
949
1045
  or shift decoded objects from it.