hbase-jruby 0.1.1-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +623 -0
- data/Rakefile +7 -0
- data/hbase-jruby.gemspec +23 -0
- data/lib/hbase-jruby.rb +16 -0
- data/lib/hbase-jruby/admin.rb +29 -0
- data/lib/hbase-jruby/byte_array.rb +39 -0
- data/lib/hbase-jruby/cell.rb +122 -0
- data/lib/hbase-jruby/column_key.rb +63 -0
- data/lib/hbase-jruby/dependency.rb +69 -0
- data/lib/hbase-jruby/hbase.rb +77 -0
- data/lib/hbase-jruby/pom/cdh3u5.xml +40 -0
- data/lib/hbase-jruby/pom/cdh4.1.2.xml +47 -0
- data/lib/hbase-jruby/result.rb +382 -0
- data/lib/hbase-jruby/scoped.rb +489 -0
- data/lib/hbase-jruby/table.rb +486 -0
- data/lib/hbase-jruby/util.rb +171 -0
- data/lib/hbase-jruby/version.rb +5 -0
- data/test/helper.rb +53 -0
- data/test/test_byte_array.rb +40 -0
- data/test/test_cell.rb +51 -0
- data/test/test_column_key.rb +49 -0
- data/test/test_hbase.rb +36 -0
- data/test/test_scoped.rb +318 -0
- data/test/test_table.rb +211 -0
- data/test/test_table_admin.rb +148 -0
- data/test/test_util.rb +80 -0
- metadata +116 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Junegunn Choi
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,623 @@
|
|
1
|
+
# hbase-jruby
|
2
|
+
|
3
|
+
*hbase-jruby* is a Ruby-esque interface for accessing HBase from JRuby.
|
4
|
+
|
5
|
+
You can of course just use the native Java APIs of HBase,
|
6
|
+
but doing so requires a lot of keystrokes even for the most basic operations and
|
7
|
+
can easily lead to overly verbose code that will be frowned upon by Rubyists.
|
8
|
+
Anyhow, JRuby is Ruby, not Java, right?
|
9
|
+
|
10
|
+
*hbase-jruby* provides the followings:
|
11
|
+
- Easy, Ruby-esque interface for the fundamental HBase operations
|
12
|
+
- ActiveRecord-like method chaining for scanning tables
|
13
|
+
- Automatic Hadoop/HBase dependency resolution
|
14
|
+
|
15
|
+
## A quick example
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
require 'hbase-jruby'
|
19
|
+
|
20
|
+
hbase = HBase.new
|
21
|
+
table = hbase.table(:test_table)
|
22
|
+
|
23
|
+
# PUT
|
24
|
+
table.put :rowkey1 => { 'cf1:a' => 100, 'cf2:b' => "Hello" }
|
25
|
+
|
26
|
+
# GET
|
27
|
+
row = table.get(:rowkey1)
|
28
|
+
number = row.integer('cf1:a')
|
29
|
+
string = row.string('cf1:b')
|
30
|
+
|
31
|
+
# SCAN
|
32
|
+
table.range('rowkey1'..'rowkey9').
|
33
|
+
filter('cf1:a' => 100..200, # cf1:a between 100 and 200
|
34
|
+
'cf2:b' => 'Hello', # cf1:b = 'Hello'
|
35
|
+
'cf2:c' => ['foo', 'bar']). # cf2:c in ('foo', 'bar')
|
36
|
+
project('cf1:a', 'cf2').each do |row|
|
37
|
+
puts row.integer('cf1:a')
|
38
|
+
end
|
39
|
+
|
40
|
+
# DELETE
|
41
|
+
table.delete(:rowkey9)
|
42
|
+
```
|
43
|
+
|
44
|
+
## Installation
|
45
|
+
|
46
|
+
Add this line to your application's Gemfile:
|
47
|
+
|
48
|
+
gem 'hbase-jruby'
|
49
|
+
|
50
|
+
And then execute:
|
51
|
+
|
52
|
+
$ bundle
|
53
|
+
|
54
|
+
Or install it yourself as:
|
55
|
+
|
56
|
+
$ gem install hbase-jruby
|
57
|
+
|
58
|
+
## Setting up
|
59
|
+
|
60
|
+
### Resolving Hadoop/HBase dependency
|
61
|
+
|
62
|
+
To be able to access HBase from JRuby, Hadoop/HBase dependency must be satisfied.
|
63
|
+
This can be done by setting up CLASSPATH variable beforehand
|
64
|
+
or by `require`ing relevant JAR files after launch.
|
65
|
+
However, downloading all the JAR files and manually putting them in CLASSPATH is a PITA,
|
66
|
+
especially when HBase is not installed on local system.
|
67
|
+
|
68
|
+
*hbase-jruby* includes `HBase.resolve_dependency!` helper method,
|
69
|
+
which resolves Hadoop/HBase dependency.
|
70
|
+
|
71
|
+
#### Preconfigured dependencies
|
72
|
+
|
73
|
+
Apache Maven is the de facto standard dependency management mechanism for Java projects.
|
74
|
+
Current version of *hbase-jruby* is shipped with Maven dependency specifications
|
75
|
+
for the following Hadoop/HBase distributions.
|
76
|
+
|
77
|
+
* cdh4.1.2
|
78
|
+
* Recommended as of now
|
79
|
+
* cdh3u5
|
80
|
+
* Does not support some features
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
require 'hbase-jruby'
|
84
|
+
|
85
|
+
HBase.resolve_dependency! 'cdh4.1.2'
|
86
|
+
```
|
87
|
+
|
88
|
+
#### Customized dependencies
|
89
|
+
|
90
|
+
If you use another version of HBase and Hadoop,
|
91
|
+
you can use your own Maven pom.xml file with its customized Hadoop/HBase dependency
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
HBase.resolve_dependency! '/project/my-hbase/pom.xml'
|
95
|
+
```
|
96
|
+
|
97
|
+
#### Using `hbase classpath` command
|
98
|
+
|
99
|
+
If you have HBase installed on your system, it's possible to find the JAR files
|
100
|
+
for that local installation with `hbase classpath` command.
|
101
|
+
You can tell `resolve_dependency!` method to do so by passing it special `:hbase` parameter.
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
HBase.resolve_dependency! :hbase
|
105
|
+
```
|
106
|
+
|
107
|
+
### Connecting to HBase
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
# HBase on localhost
|
111
|
+
hbase = HBase.new
|
112
|
+
|
113
|
+
# HBase on remote host
|
114
|
+
hbase = HBase.new 'hbase.zookeeper.quorum' => 'remote-server.mydomain.net'
|
115
|
+
|
116
|
+
# Extra configuration
|
117
|
+
hbase = HBase.new 'hbase.zookeeper.quorum' => 'remote-server.mydomain.net',
|
118
|
+
'hbase.client.retries.number' => 3
|
119
|
+
|
120
|
+
# Close HBase connection
|
121
|
+
hbase.close
|
122
|
+
```
|
123
|
+
|
124
|
+
## Accessing data with HBase::Table instance
|
125
|
+
|
126
|
+
`HBase#table` method creates an `HBase::Table` instance which represents a table on HBase.
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
table = hbase.table(:test_table)
|
130
|
+
```
|
131
|
+
|
132
|
+
`HBase::Table` instance must be closed after use.
|
133
|
+
|
134
|
+
```ruby
|
135
|
+
# Always close table instance after use
|
136
|
+
table.close
|
137
|
+
|
138
|
+
# If block is given, table is automatically closed at the end of the block
|
139
|
+
hbase.table(:test_table) do |table|
|
140
|
+
# ...
|
141
|
+
end
|
142
|
+
```
|
143
|
+
|
144
|
+
## Basic table administration
|
145
|
+
|
146
|
+
### Creating tables
|
147
|
+
|
148
|
+
```ruby
|
149
|
+
table = hbase.table(:my_table)
|
150
|
+
|
151
|
+
# Drop table if exists
|
152
|
+
table.drop! if table.exists?
|
153
|
+
|
154
|
+
# Create table with two column families
|
155
|
+
table.create! :cf1 => {},
|
156
|
+
:cf2 => { :compression => :snappy, :bloomfilter => :row }
|
157
|
+
```
|
158
|
+
|
159
|
+
### Table inspection
|
160
|
+
|
161
|
+
```ruby
|
162
|
+
puts table.inspect
|
163
|
+
```
|
164
|
+
|
165
|
+
## Basic operations
|
166
|
+
|
167
|
+
### PUT
|
168
|
+
|
169
|
+
```ruby
|
170
|
+
# Putting a single row
|
171
|
+
table.put 'rowkey1', 'cf1:col1' => "Hello", 'cf2:col2' => "World"
|
172
|
+
|
173
|
+
# Putting multiple rows
|
174
|
+
table.put 'rowkey1' => { 'cf1:col1' => "Hello", 'cf2:col2' => "World" },
|
175
|
+
'rowkey2' => { 'cf1:col1' => "Howdy", 'cf2:col2' => "World" },
|
176
|
+
'rowkey3' => { 'cf1:col1' => "So long", 'cf2:col2' => "World" }
|
177
|
+
```
|
178
|
+
|
179
|
+
### GET
|
180
|
+
|
181
|
+
HBase stores everything as a byte array, so when you fetch data from HBase,
|
182
|
+
you need to explicitly specify the type of each value stored.
|
183
|
+
|
184
|
+
```ruby
|
185
|
+
row = table.get('rowkey1')
|
186
|
+
|
187
|
+
# Rowkey
|
188
|
+
rowk = row.rowkey
|
189
|
+
|
190
|
+
# Column value as a raw Java byte array
|
191
|
+
col0 = row.raw 'cf1:col0'
|
192
|
+
|
193
|
+
# Decode column values
|
194
|
+
col1 = row.string 'cf1:col1'
|
195
|
+
col2 = row.fixnum 'cf1:col2'
|
196
|
+
col3 = row.bignum 'cf1:col3'
|
197
|
+
col4 = row.float 'cf1:col4'
|
198
|
+
col5 = row.boolean 'cf1:col5'
|
199
|
+
col6 = row.symbol 'cf1:col6'
|
200
|
+
|
201
|
+
# Decode multiple columns at once
|
202
|
+
row.string ['cf1:str1', 'cf1:str2']
|
203
|
+
# [ "Hello", "World" ]
|
204
|
+
```
|
205
|
+
|
206
|
+
#### Batch GET
|
207
|
+
|
208
|
+
```ruby
|
209
|
+
# Pass an array of row keys as the parameter
|
210
|
+
rows = table.get(['rowkey1', 'rowkey2', 'rowkey3'])
|
211
|
+
```
|
212
|
+
|
213
|
+
#### Decode all versions with plural-form (-s) methods
|
214
|
+
|
215
|
+
```ruby
|
216
|
+
# Decode all versions as Hash indexed by their timestamps
|
217
|
+
row.strings 'cf1:str'
|
218
|
+
# {1353143856665=>"Hello", 1353143856662=>"Goodbye"}
|
219
|
+
|
220
|
+
# Decode all versions of multiple columns
|
221
|
+
row.strings ['cf1:str1', 'cf1:str2']
|
222
|
+
# [
|
223
|
+
# {1353143856665=>"Hello", 1353143856662=>"Goodbye"},
|
224
|
+
# {1353143856665=>"World", 1353143856662=>"Cruel world"}
|
225
|
+
# ]
|
226
|
+
|
227
|
+
# Plural-form methods are provided for any other data types as well
|
228
|
+
cols0 = row.raws 'cf1:col0'
|
229
|
+
cols1 = row.strings 'cf1:col1'
|
230
|
+
cols2 = row.fixnums 'cf1:col2'
|
231
|
+
cols3 = row.bignums 'cf1:col3'
|
232
|
+
cols4 = row.floats 'cf1:col4'
|
233
|
+
cols5 = row.booleans 'cf1:col5'
|
234
|
+
cols6 = row.symbols 'cf1:col6'
|
235
|
+
```
|
236
|
+
|
237
|
+
#### Intra-row scan
|
238
|
+
|
239
|
+
Intra-row scan can be done with `each` method which yields `HBase::Cell` instances.
|
240
|
+
|
241
|
+
```ruby
|
242
|
+
# Intra-row scan (all versions)
|
243
|
+
row.each do |cell|
|
244
|
+
family = cell.family
|
245
|
+
qualifier = cell.qualifier(:string) # Column qualifier as String
|
246
|
+
timestamp = cell.timestamp
|
247
|
+
|
248
|
+
# Cell value as Java byte array
|
249
|
+
bytes = cell.bytes
|
250
|
+
|
251
|
+
# Typed access
|
252
|
+
# value_as_string = cell.string
|
253
|
+
# value_as_fixnum = cell.fixnum
|
254
|
+
# ...
|
255
|
+
end
|
256
|
+
```
|
257
|
+
|
258
|
+
#### `to_hash`
|
259
|
+
|
260
|
+
```ruby
|
261
|
+
# Returns the Hash representation of the record with the specified schema
|
262
|
+
schema = {
|
263
|
+
'cf1:col1' => :string,
|
264
|
+
'cf1:col2' => :fixnum,
|
265
|
+
'cf1:col3' => :bignum,
|
266
|
+
'cf1:col4' => :float,
|
267
|
+
'cf1:col5' => :boolean,
|
268
|
+
'cf1:col6' => :symbol }
|
269
|
+
|
270
|
+
table.get('rowkey1').to_hash(schema)
|
271
|
+
|
272
|
+
# Returns all versions for each column indexed by their timestamps
|
273
|
+
table.get('rowkey1').to_hash_with_versions(schema)
|
274
|
+
```
|
275
|
+
|
276
|
+
### DELETE
|
277
|
+
|
278
|
+
```ruby
|
279
|
+
# Deletes a row
|
280
|
+
table.delete('rowkey1')
|
281
|
+
|
282
|
+
# Deletes all columns in the specified column family
|
283
|
+
table.delete('rowkey1', 'cf1')
|
284
|
+
|
285
|
+
# Deletes a column
|
286
|
+
table.delete('rowkey1', 'cf1:col1')
|
287
|
+
|
288
|
+
# Deletes a column with empty qualifier.
|
289
|
+
# (!= deleing the entire columns in the family. See the trailing colon.)
|
290
|
+
table.delete('rowkey1', 'cf1:')
|
291
|
+
|
292
|
+
# Deletes a version of a column
|
293
|
+
table.delete('rowkey1', 'cf1:col1', 1352978648642)
|
294
|
+
|
295
|
+
# Deletes multiple versions of a column
|
296
|
+
table.delete('rowkey1', 'cf1:col1', 1352978648642, 1352978649642)
|
297
|
+
|
298
|
+
# Batch delete
|
299
|
+
table.delete(['rowkey1'], ['rowkey2'], ['rowkey3', 'cf1:col1'])
|
300
|
+
|
301
|
+
# Truncate table
|
302
|
+
table.truncate!
|
303
|
+
```
|
304
|
+
|
305
|
+
### Atomic increment of column values
|
306
|
+
|
307
|
+
```ruby
|
308
|
+
# Atomically increase cf1:counter by one
|
309
|
+
table.increment('rowkey1', 'cf1:counter', 1)
|
310
|
+
|
311
|
+
# Atomically increase two columns by one an two respectively
|
312
|
+
table.increment('rowkey1', 'cf1:counter' => 1, 'cf1:counter2' => 2)
|
313
|
+
```
|
314
|
+
|
315
|
+
### SCAN
|
316
|
+
|
317
|
+
`HBase::Table` itself is an enumerable object.
|
318
|
+
|
319
|
+
```ruby
|
320
|
+
# Full scan
|
321
|
+
table.each do |row|
|
322
|
+
# ...
|
323
|
+
end
|
324
|
+
```
|
325
|
+
|
326
|
+
## Scoped access
|
327
|
+
|
328
|
+
SCAN and GET operations are actually implemented in enumerable `HBase::Scoped` class,
|
329
|
+
whose instance is created by `HBase::Table#each` call.
|
330
|
+
|
331
|
+
```ruby
|
332
|
+
scoped = table.each
|
333
|
+
scoped.get(1)
|
334
|
+
scoped.to_a
|
335
|
+
```
|
336
|
+
|
337
|
+
An `HBase::Scoped` object provides a set of methods for controlling data retrieval
|
338
|
+
such as `range`, `filter`, `project`, `versions`, `caching`, et cetera.
|
339
|
+
However, it doesn't respond to data manipulation methods (`put`, `delete` and `increment`),
|
340
|
+
and methods for table administration.
|
341
|
+
|
342
|
+
An `HBase::Table` object also responds to the data retrieval methods described above,
|
343
|
+
but those calls are simply forwarded to a new `HBase::Scoped` object implicitly created.
|
344
|
+
For example, `table.range(start, end)` is just a shorthand notation for
|
345
|
+
`table.each.range(start, end)`.
|
346
|
+
|
347
|
+
### Chaining methods
|
348
|
+
|
349
|
+
Methods of `HBase::Scoped` can be chained as follows.
|
350
|
+
|
351
|
+
```ruby
|
352
|
+
# Chaining methods
|
353
|
+
import org.apache.hadoop.hbase.filter.RandomRowFilter
|
354
|
+
|
355
|
+
table.range('A'..'Z'). # Row key range,
|
356
|
+
project('cf1:a'). # Select cf1:a column
|
357
|
+
project('cf2'). # Select cf2 family as well
|
358
|
+
filter('cf1:a' => 'Hello'). # Filter by cf1:a value
|
359
|
+
filter('cf2:d' => 100..200). # Range filter on cf2:d
|
360
|
+
filter('cf2:e' => [10, 20..30]). # Set-inclusion condition on cf2:e
|
361
|
+
filter(RandomRowFilter.new(0.5)). # Any Java HBase filter
|
362
|
+
limit(10). # Limits the size of the result set
|
363
|
+
versions(2). # Only fetches 2 versions for each value
|
364
|
+
batch(100). # Batch size for scan set to 100
|
365
|
+
caching(100). # Caching 100 rows
|
366
|
+
to_a # To Array
|
367
|
+
```
|
368
|
+
|
369
|
+
### range
|
370
|
+
|
371
|
+
`HBase::Scoped#range` method is used to filter rows based on their row keys.
|
372
|
+
|
373
|
+
```ruby
|
374
|
+
# 100 ~ 900 (inclusive end)
|
375
|
+
table.range(100..900)
|
376
|
+
|
377
|
+
# 100 ~ 900 (exclusive end)
|
378
|
+
table.range(100...900)
|
379
|
+
|
380
|
+
# 100 ~ 900 (exclusive end)
|
381
|
+
table.range(100, 900)
|
382
|
+
|
383
|
+
# 100 ~
|
384
|
+
table.range(100)
|
385
|
+
|
386
|
+
# ~ 900 (exclusive end)
|
387
|
+
table.range(nil, 900)
|
388
|
+
```
|
389
|
+
|
390
|
+
Optionally, prefix filter can be applied as follows.
|
391
|
+
|
392
|
+
```ruby
|
393
|
+
# Prefix filter
|
394
|
+
# Row keys with "APPLE" prefix
|
395
|
+
# Start key is automatically set to "APPLE",
|
396
|
+
# stop key "APPLF" to avoid unnecessary disk access
|
397
|
+
table.range(:prefix => 'APPLE')
|
398
|
+
|
399
|
+
# Row keys with "ACE", "BLUE" or "APPLE" prefix
|
400
|
+
# Start key is automatically set to "ACE",
|
401
|
+
# stop key "BLUF"
|
402
|
+
table.range(:prefix => ['ACE', 'BLUE', 'APPLE'])
|
403
|
+
|
404
|
+
# Prefix filter with start key and stop key.
|
405
|
+
table.range('ACE', 'BLUEMARINE', :prefix => ['ACE', 'BLUE', 'APPLE'])
|
406
|
+
```
|
407
|
+
|
408
|
+
Subsequent calls to `#range` override the range previously defined.
|
409
|
+
|
410
|
+
```ruby
|
411
|
+
# Previous ranges are discarded
|
412
|
+
scope.range(1, 100).
|
413
|
+
range(50..100).
|
414
|
+
range(:prefix => 'A').
|
415
|
+
range(1, 1000)
|
416
|
+
# Same as `scope.range(1, 1000)`
|
417
|
+
```
|
418
|
+
|
419
|
+
### filter
|
420
|
+
|
421
|
+
You can configure server-side filtering of rows and columns with `HBase::Scoped#filter` calls.
|
422
|
+
Multiple calls have conjunctive effects.
|
423
|
+
|
424
|
+
```ruby
|
425
|
+
# Range scanning the table with filters
|
426
|
+
table.range(nil, 1000).
|
427
|
+
filter('cf1:a' => 'Hello', # cf1:a = 'Hello'
|
428
|
+
'cf1:b' => 100...200, # cf1:b between 100 and 200
|
429
|
+
'cf1:c' => %w[A B C], # cf1:c in ('A', 'B', 'C')
|
430
|
+
'cf1:d' => ['A'...'B', 'C'], # ('A' <= cf1:d < 'B') or cf1:d = 'C'
|
431
|
+
'cf1:e' => { gt: 1000, lte: 2000 }). # cf1:e > 1000 and cf1:e <= 2000
|
432
|
+
'cf1:f' => { ne: 1000 }). # cf1:f != 1000
|
433
|
+
# Supported operators: gt, lt, gte, lte, eq, ne
|
434
|
+
filter('cf1:g' => ['Alice'..'Bob', 'Cat']). # Multiple calls for conjunctive filtering
|
435
|
+
filter(org.apache.hadoop.hbase.filter.RandomRowFilter.new(0.5)).
|
436
|
+
# Any number of Java filters can be applied
|
437
|
+
each do |record|
|
438
|
+
# ...
|
439
|
+
end
|
440
|
+
```
|
441
|
+
|
442
|
+
### project
|
443
|
+
|
444
|
+
`HBase::Scoped#project` allows you to fetch only a subset of columns from each row.
|
445
|
+
Multiple calls have additive effects.
|
446
|
+
|
447
|
+
```ruby
|
448
|
+
# Fetches cf1:a and all columns in column family cf2 and cf3
|
449
|
+
scoped.project('cf1:a', 'cf2').
|
450
|
+
project('cf3')
|
451
|
+
```
|
452
|
+
|
453
|
+
HBase filters can not only filter rows but also columns.
|
454
|
+
Since column filtering can be thought of as a kind of projection,
|
455
|
+
it makes sense to internally apply column filters in `HBase::Scoped#project`,
|
456
|
+
instead of in `HBase::Scoped#filter`, although it's still perfectly valid
|
457
|
+
to pass column filter to filter method.
|
458
|
+
|
459
|
+
```ruby
|
460
|
+
# Column prefix filter:
|
461
|
+
# Fetch columns whose qualifiers start with the specified prefixes
|
462
|
+
scoped.project(:prefix => 'alice').
|
463
|
+
project(:prefix => %w[alice bob])
|
464
|
+
|
465
|
+
# Column range filter:
|
466
|
+
# Fetch columns whose qualifiers within the ranges
|
467
|
+
scoped.project(:range => 'a'...'c').
|
468
|
+
project(:range => ['i'...'k', 'x'...'z'])
|
469
|
+
|
470
|
+
# Column pagination filter (Cannot be chained. Must be called exactly once.):
|
471
|
+
# Fetch columns within the specified intra-scan offset and limit
|
472
|
+
scoped.project(:offset => 1000, :limit => 10)
|
473
|
+
```
|
474
|
+
|
475
|
+
When using column filters on *fat* rows with many columns,
|
476
|
+
it's advised that you limit the batch size with `HBase::Scoped#batch` call
|
477
|
+
to avoid fetching all columns at once.
|
478
|
+
However setting batch size allows multiple rows with the same row key are returned during scan.
|
479
|
+
|
480
|
+
```ruby
|
481
|
+
# Let's say that we have rows with more than 10 columns whose qualifiers start with `str`
|
482
|
+
puts scoped.range(1..100).
|
483
|
+
project(:prefix => 'str').
|
484
|
+
batch(10).
|
485
|
+
map { |row| [row.rowkey(:fixnum), row.count].map(&:to_s).join ': ' }
|
486
|
+
|
487
|
+
# 1: 10
|
488
|
+
# 1: 10
|
489
|
+
# 1: 5
|
490
|
+
# 2: 10
|
491
|
+
# 2: 2
|
492
|
+
# 3: 10
|
493
|
+
# ...
|
494
|
+
```
|
495
|
+
|
496
|
+
### Scoped SCAN / GET
|
497
|
+
|
498
|
+
```ruby
|
499
|
+
scoped = table.versions(1). # Limits the number of versions
|
500
|
+
filter('cf1:a' => 'Hello', # With filters
|
501
|
+
'cf1:b' => 100...200,
|
502
|
+
'cf1:c' => 'Alice'..'Bob').
|
503
|
+
range('rowkey0'..'rowkey2') # Range of rowkeys.
|
504
|
+
project('cf1', 'cf2:x') # Projection
|
505
|
+
|
506
|
+
# Scoped GET
|
507
|
+
# Nonexistent or filtered rows are returned as nils
|
508
|
+
scoped.get(['rowkey1', 'rowkey2', 'rowkey4'])
|
509
|
+
|
510
|
+
# Scoped SCAN
|
511
|
+
scoped.each do |row|
|
512
|
+
row.each do |cell|
|
513
|
+
# Intra-row scan
|
514
|
+
end
|
515
|
+
end
|
516
|
+
|
517
|
+
# Scoped COUNT
|
518
|
+
# When counting the number of rows, use `HTable::Scoped#count`
|
519
|
+
# instead of just iterating through the scope, as it internally
|
520
|
+
# minimizes amount of data fetched with KeyOnlyFilter
|
521
|
+
scoped.count
|
522
|
+
```
|
523
|
+
|
524
|
+
## Advanced topics
|
525
|
+
|
526
|
+
### Lexicographic scan order
|
527
|
+
|
528
|
+
HBase stores rows in the lexicographic order of the rowkeys in their byte array representations.
|
529
|
+
Thus the type of row key affects the scan order.
|
530
|
+
|
531
|
+
```ruby
|
532
|
+
(1..15).times do |i|
|
533
|
+
table.put i, data
|
534
|
+
table.put i.to_s, data
|
535
|
+
end
|
536
|
+
|
537
|
+
table.range(1..3).map { |r| r.rowkey :fixnum }
|
538
|
+
# [1, 2, 3]
|
539
|
+
table.range('1'..'3').map { |r| r.rowkey :string }
|
540
|
+
# %w[1 10 11 12 13 14 15 2 3]
|
541
|
+
```
|
542
|
+
|
543
|
+
### Non-string column qualifier
|
544
|
+
|
545
|
+
If a column qualifier is not a String, *an HBase::ColumnKey instance* should be used
|
546
|
+
instead of a conventional `FAMILY:QUALIFIER` String.
|
547
|
+
|
548
|
+
```ruby
|
549
|
+
table.put 'rowkey',
|
550
|
+
'cf1:col1' => 'Hello world',
|
551
|
+
HBase::ColumnKey(:cf1, 100) => "Byte representation of an 8-byte integer",
|
552
|
+
HBase::ColumnKey(:cf1, bytes) => "Qualifier is an arbitrary byte array"
|
553
|
+
|
554
|
+
table.get('rowkey').string('cf1:col1')
|
555
|
+
table.get('rowkey').string(HBase::ColumnKey(:cf1, 100))
|
556
|
+
# ...
|
557
|
+
```
|
558
|
+
|
559
|
+
|
560
|
+
### Table administration
|
561
|
+
|
562
|
+
`HBase#Table` provides a few *synchronous* table administration methods.
|
563
|
+
|
564
|
+
```ruby
|
565
|
+
# Create a table with configurable table-level properties
|
566
|
+
table.create!(
|
567
|
+
# 1st Hash: Column family specification
|
568
|
+
{ :cf1 => { :compression => :snappy }, :cf2 => {} },
|
569
|
+
|
570
|
+
# 2nd Hash: Table properties
|
571
|
+
:max_filesize => 256 * 1024 ** 2,
|
572
|
+
:deferred_log_flush => false)
|
573
|
+
|
574
|
+
# Alter table properties
|
575
|
+
table.alter!(
|
576
|
+
:max_filesize => 512 * 1024 ** 2,
|
577
|
+
:memstore_flushsize => 64 * 1024 ** 2,
|
578
|
+
:readonly => false,
|
579
|
+
:deferred_log_flush => true
|
580
|
+
)
|
581
|
+
|
582
|
+
# Add column family
|
583
|
+
table.add_family! :cf3, :compression => :snappy,
|
584
|
+
:bloomfilter => :row
|
585
|
+
|
586
|
+
# Alter column family
|
587
|
+
table.alter_family! :cf2, :bloomfilter => :rowcol
|
588
|
+
|
589
|
+
# Remove column family
|
590
|
+
table.delete_family! :cf1
|
591
|
+
```
|
592
|
+
|
593
|
+
You can perform other types of administrative tasks
|
594
|
+
with Native Java [HBaseAdmin object](http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html),
|
595
|
+
which can be obtained by `HBase#admin` method which will automatically close the object at the end of the given block.
|
596
|
+
|
597
|
+
```ruby
|
598
|
+
# Advanced table administration with HBaseAdmin object
|
599
|
+
# http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html
|
600
|
+
hbase.admin do |admin|
|
601
|
+
# ...
|
602
|
+
end
|
603
|
+
|
604
|
+
# Without the block
|
605
|
+
admin = hbase.admin
|
606
|
+
# ...
|
607
|
+
admin.close
|
608
|
+
```
|
609
|
+
|
610
|
+
## Test
|
611
|
+
|
612
|
+
```
|
613
|
+
export HBASE_JRUBY_TEST_ZK='your-hbaase.domain.net'
|
614
|
+
rake test
|
615
|
+
```
|
616
|
+
|
617
|
+
## Contributing
|
618
|
+
|
619
|
+
1. Fork it
|
620
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
621
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
622
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
623
|
+
5. Create new Pull Request
|