hbase-jruby 0.1.6-java → 0.2.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +14 -0
- data/README.md +227 -94
- data/lib/hbase-jruby/admin.rb +3 -1
- data/lib/hbase-jruby/dependency.rb +37 -12
- data/lib/hbase-jruby/hbase.rb +53 -10
- data/lib/hbase-jruby/pom/pom.xml +1 -1
- data/lib/hbase-jruby/scoped.rb +40 -2
- data/lib/hbase-jruby/table/admin.rb +442 -0
- data/lib/hbase-jruby/table/inspection.rb +109 -0
- data/lib/hbase-jruby/table.rb +29 -388
- data/lib/hbase-jruby/util.rb +16 -1
- data/lib/hbase-jruby/version.rb +1 -1
- data/lib/hbase-jruby.rb +2 -0
- data/test/helper.rb +4 -7
- data/test/test_cell.rb +2 -2
- data/test/test_hbase.rb +10 -3
- data/test/test_scoped.rb +35 -0
- data/test/test_table.rb +36 -26
- data/test/test_table_admin.rb +118 -31
- metadata +4 -2
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
Changelog
|
2
2
|
=========
|
3
3
|
|
4
|
+
0.2.0
|
5
|
+
-----
|
6
|
+
- Deprecated `HBase::Table#close`. You don't need to close Table instances.
|
7
|
+
- Added `HBase::Table#split` and `HBase::Table#split!` method
|
8
|
+
- Added `:splits` option to `HTable#create!` method to pre-split the table
|
9
|
+
- Added table inspection methods: `properties`, `families`, and `regions`
|
10
|
+
- Added raw inspection methods: `raw_properties` and `raw_families`
|
11
|
+
- Added `HBase.log4j=` method
|
12
|
+
- Added `HBase::Scoped#at`, `HBase::Scoped#time_range` method
|
13
|
+
- Changed parameters to `HBase.resolve_dependency!` method
|
14
|
+
- Ruby Time object can be used as timestamp in put and delete methods
|
15
|
+
- Using closed HBase connection is disallowed
|
16
|
+
- Ruby 1.8 compatibility mode (Oops!)
|
17
|
+
|
4
18
|
0.1.6
|
5
19
|
-----
|
6
20
|
- Maven dependencies for 0.94 and 0.92
|
data/README.md
CHANGED
@@ -20,7 +20,7 @@ require 'hbase-jruby'
|
|
20
20
|
HBase.resolve_dependency! 'cdh4.1'
|
21
21
|
|
22
22
|
hbase = HBase.new
|
23
|
-
table = hbase
|
23
|
+
table = hbase[:test_table]
|
24
24
|
|
25
25
|
# PUT
|
26
26
|
table.put :rowkey1 => { 'cf1:a' => 100, 'cf2:b' => "Hello" }
|
@@ -56,7 +56,7 @@ table.delete(:rowkey9)
|
|
56
56
|
git clone -b devel https://github.com/junegunn/hbase-jruby.git
|
57
57
|
cd hbase-jruby
|
58
58
|
rake build
|
59
|
-
gem install pkg/hbase-jruby-0.
|
59
|
+
gem install pkg/hbase-jruby-0.2.0-java.gem
|
60
60
|
|
61
61
|
## Setting up
|
62
62
|
|
@@ -65,47 +65,51 @@ table.delete(:rowkey9)
|
|
65
65
|
To be able to access HBase from JRuby, Hadoop/HBase dependency must be satisfied.
|
66
66
|
This can be done by either setting up CLASSPATH variable beforehand
|
67
67
|
or by `require`ing relevant JAR files after launching JRuby.
|
68
|
-
However, that's a lot of work, so *hbase-jruby* provides `HBase.resolve_dependency!` helper method,
|
69
|
-
which automatically resolves Hadoop/HBase dependency.
|
70
68
|
|
71
|
-
|
69
|
+
### `HBase.resolve_dependency!`
|
72
70
|
|
73
|
-
|
74
|
-
|
75
|
-
[Maven dependency specifications](https://github.com/junegunn/hbase-jruby/blob/master/lib/hbase-jruby/pom/pom.xml)
|
76
|
-
for the following Hadoop/HBase distributions.
|
71
|
+
Well, there's an easier way.
|
72
|
+
You can call `HBase.resolve_dependency!` helper method passing one of the arguments listed below.
|
77
73
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
74
|
+
| Argument | Description | Required executable |
|
75
|
+
|------------|----------------------------------------------------------|---------------------|
|
76
|
+
| 'cdh4.1' | Predefined Maven profile for Cloudera CDH4.1 | mvn |
|
77
|
+
| 'cdh3' | Predefined Maven profile for Cloudera CDH3 | mvn |
|
78
|
+
| '0.94' | Predefined Maven profile for Apache HBase 0.94 | mvn |
|
79
|
+
| '0.92' | Predefined Maven profile for Apache HBase 0.92 | mvn |
|
80
|
+
| *POM PATH* | Follow dependency described in the given POM file | mvn |
|
81
|
+
| *:local* | Resolve HBase dependency using `hbase classpath` command | hbase |
|
82
82
|
|
83
83
|
```ruby
|
84
|
-
|
84
|
+
# Examples
|
85
85
|
|
86
|
+
# Load JAR files from CDH4.1 distribution of HBase using Maven
|
86
87
|
HBase.resolve_dependency! 'cdh4.1'
|
88
|
+
|
89
|
+
# Load JAR files for HBase 0.94 using Maven
|
90
|
+
HBase.resolve_dependency! '0.94', :verbose => true
|
91
|
+
|
92
|
+
# Dependency resolution with your own POM file
|
93
|
+
HBase.resolve_dependency! '/path/to/my/pom.xml'
|
94
|
+
HBase.resolve_dependency! '/path/to/my/pom.xml', :profile => 'trunk'
|
95
|
+
|
96
|
+
# Resolve JAR files from local HBase installation
|
97
|
+
HBase.resolve_dependency! :local
|
87
98
|
```
|
88
99
|
|
89
100
|
(If you're behind an http proxy, set up your ~/.m2/settings.xml file
|
90
101
|
as described in [this page](http://maven.apache.org/guides/mini/guide-proxies.html))
|
91
102
|
|
92
|
-
|
103
|
+
### Log4j logs from HBase
|
93
104
|
|
94
|
-
|
95
|
-
you can use your own Maven pom.xml file with its Hadoop/HBase dependency.
|
105
|
+
You may want to suppress (or customize) log messages from HBase.
|
96
106
|
|
97
107
|
```ruby
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
#### Using `hbase classpath` command
|
108
|
+
# With an external log4j.properties file
|
109
|
+
HBase.log4j = '/your/log4j.properties'
|
102
110
|
|
103
|
-
|
104
|
-
|
105
|
-
You can tell `resolve_dependency!` method to do so by passing it special `:hbase` parameter.
|
106
|
-
|
107
|
-
```ruby
|
108
|
-
HBase.resolve_dependency! :hbase
|
111
|
+
# With a Hash
|
112
|
+
HBase.log4j = { 'log4j.threshold' => 'ERROR' }
|
109
113
|
```
|
110
114
|
|
111
115
|
### Connecting to HBase
|
@@ -129,30 +133,23 @@ hbase.close
|
|
129
133
|
|
130
134
|
## Accessing data with HBase::Table instance
|
131
135
|
|
132
|
-
`HBase#
|
136
|
+
`HBase#[]` method (or `HBase#table`) returns an `HBase::Table` instance
|
137
|
+
which represents the table of the given name.
|
133
138
|
|
134
139
|
```ruby
|
135
140
|
table = hbase.table(:test_table)
|
136
|
-
```
|
137
|
-
|
138
|
-
`HBase::Table` instance must be closed after use.
|
139
141
|
|
140
|
-
|
141
|
-
|
142
|
-
table.close
|
143
|
-
|
144
|
-
# If block is given, table is automatically closed at the end of the block
|
145
|
-
hbase.table(:test_table) do |table|
|
146
|
-
# ...
|
147
|
-
end
|
142
|
+
# Or simply,
|
143
|
+
table = hbase[:test_table]
|
148
144
|
```
|
149
145
|
|
146
|
+
|
150
147
|
## Basic table administration
|
151
148
|
|
152
|
-
### Creating
|
149
|
+
### Creating a table
|
153
150
|
|
154
151
|
```ruby
|
155
|
-
table = hbase
|
152
|
+
table = hbase[:my_table]
|
156
153
|
|
157
154
|
# Drop table if exists
|
158
155
|
table.drop! if table.exists?
|
@@ -165,7 +162,75 @@ table.create! :cf1 => {},
|
|
165
162
|
### Table inspection
|
166
163
|
|
167
164
|
```ruby
|
168
|
-
|
165
|
+
# Table properties
|
166
|
+
table.properties
|
167
|
+
# {:max_filesize => 2147483648,
|
168
|
+
# :readonly => false,
|
169
|
+
# :memstore_flushsize => 134217728,
|
170
|
+
# :deferred_log_flush => false}
|
171
|
+
|
172
|
+
# Properties of the column families
|
173
|
+
table.families
|
174
|
+
# {"cf"=>
|
175
|
+
# {:blockcache => true,
|
176
|
+
# :blocksize => 65536,
|
177
|
+
# :bloomfilter => "NONE",
|
178
|
+
# :cache_blooms_on_write => false,
|
179
|
+
# :cache_data_on_write => false,
|
180
|
+
# :cache_index_on_write => false,
|
181
|
+
# :compression => "NONE",
|
182
|
+
# :compression_compact => "NONE",
|
183
|
+
# :data_block_encoding => "NONE",
|
184
|
+
# :evict_blocks_on_close => false,
|
185
|
+
# :in_memory => false,
|
186
|
+
# :keep_deleted_cells => false,
|
187
|
+
# :min_versions => 0,
|
188
|
+
# :replication_scope => 0,
|
189
|
+
# :ttl => 2147483647,
|
190
|
+
# :versions => 3}}
|
191
|
+
```
|
192
|
+
|
193
|
+
There are also `raw_` variants of `properties` and `families`.
|
194
|
+
They return properties in their internal String format (mainly used in HBase shell).
|
195
|
+
(See [HTableDescriptor.values](http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HTableDescriptor.html#values) and
|
196
|
+
[HColumnDescriptor.values](http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html#values))
|
197
|
+
|
198
|
+
```ruby
|
199
|
+
table.raw_properties
|
200
|
+
# {"IS_ROOT" => "false",
|
201
|
+
# "IS_META" => "false",
|
202
|
+
# "MAX_FILESIZE" => "2147483648"}
|
203
|
+
|
204
|
+
table.raw_families
|
205
|
+
# {"cf" =>
|
206
|
+
# {"DATA_BLOCK_ENCODING" => "NONE",
|
207
|
+
# "BLOOMFILTER" => "NONE",
|
208
|
+
# "REPLICATION_SCOPE" => "0",
|
209
|
+
# "VERSIONS" => "3",
|
210
|
+
# "COMPRESSION" => "NONE",
|
211
|
+
# "MIN_VERSIONS" => "0",
|
212
|
+
# "TTL" => "2147483647",
|
213
|
+
# "KEEP_DELETED_CELLS" => "false",
|
214
|
+
# "BLOCKSIZE" => "65536",
|
215
|
+
# "IN_MEMORY" => "false",
|
216
|
+
# "ENCODE_ON_DISK" => "true",
|
217
|
+
# "BLOCKCACHE" => "true"}}
|
218
|
+
```
|
219
|
+
|
220
|
+
These String key-value pairs are not really a part of the public API of HBase, and thus might change over time.
|
221
|
+
However, they are most useful when you need to create a table with the same properties as the existing one.
|
222
|
+
|
223
|
+
```ruby
|
224
|
+
hbase[:dupe_table].create!(table.raw_families, table.raw_properties)
|
225
|
+
```
|
226
|
+
|
227
|
+
With `regions` method, you can even presplit the new table just like the old one.
|
228
|
+
|
229
|
+
```ruby
|
230
|
+
hbase[:dupe_table].create!(
|
231
|
+
table.raw_families,
|
232
|
+
table.raw_properties.merge(
|
233
|
+
:splits => table.regions.map { |r| r[:start_key] }.compact))
|
169
234
|
```
|
170
235
|
|
171
236
|
## Basic operations
|
@@ -333,52 +398,59 @@ table.increment('rowkey1', 'cf1:counter' => 1, 'cf1:counter2' => 2)
|
|
333
398
|
```ruby
|
334
399
|
# Full scan
|
335
400
|
table.each do |row|
|
401
|
+
age = row.fixnum('cf:age')
|
402
|
+
name = row.string('cf:name')
|
336
403
|
# ...
|
337
404
|
end
|
338
405
|
```
|
339
406
|
|
340
407
|
## Scoped access
|
341
408
|
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
```ruby
|
346
|
-
scoped = table.each
|
347
|
-
scoped.get(1)
|
348
|
-
scoped.to_a
|
349
|
-
```
|
409
|
+
You can control how you retrieve data by chaining
|
410
|
+
the following methods of `HBase::Table` (or `HBase::Scoped`).
|
350
411
|
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
412
|
+
| Method | Description |
|
413
|
+
|--------------|-----------------------------------------------------------------|
|
414
|
+
| `range` | Specifies the rowkey range of scan |
|
415
|
+
| `project` | To retrieve only a subset of columns |
|
416
|
+
| `filter` | Filtering conditions of scan |
|
417
|
+
| `while` | Allows early termination of scan (server-side) |
|
418
|
+
| `at` | Only retrieve data with the specified timestamp |
|
419
|
+
| `time_range` | Only retrieve data within the specified time range |
|
420
|
+
| `limit` | Limits the number of rows |
|
421
|
+
| `versions` | Limits the number of versions of each column |
|
422
|
+
| `caching` | Sets the number of rows for caching during scan |
|
423
|
+
| `batch` | Limits the maximum number of values returned for each iteration |
|
355
424
|
|
356
|
-
|
357
|
-
|
358
|
-
For example, `table.range(start, end)` is just a shorthand notation for
|
359
|
-
`table.each.range(start, end)`.
|
425
|
+
Each invocation to these methods returns an `HBase::Scoped` instance with which
|
426
|
+
you can retrieve data with the following methods.
|
360
427
|
|
361
|
-
|
428
|
+
| Method | Description |
|
429
|
+
|-------------|-------------------------------------------------------------------------|
|
430
|
+
| `get` | Fetches rows by the given rowkeys |
|
431
|
+
| `each` | Scans the scope of the table (`HBase::Scoped` instance is `Enumerable`) |
|
432
|
+
| `count` | Efficiently counts the number of rows in the scope |
|
433
|
+
| `aggregate` | Performs aggregation using Coprocessor (To be described shortly) |
|
362
434
|
|
363
|
-
|
435
|
+
### Example of scoped access
|
364
436
|
|
365
437
|
```ruby
|
366
|
-
# Chaining methods
|
367
438
|
import org.apache.hadoop.hbase.filter.RandomRowFilter
|
368
439
|
|
369
|
-
table.range('A'..'Z').
|
370
|
-
project('cf1:a').
|
371
|
-
project('cf2').
|
372
|
-
filter('cf1:a' => 'Hello').
|
373
|
-
filter('cf2:d' => 100..200).
|
374
|
-
filter('cf2:e' => [10, 20..30]).
|
375
|
-
filter(RandomRowFilter.new(0.5)).
|
376
|
-
while('cf2:f' => { ne: 'OPEN' }).
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
440
|
+
table.range('A'..'Z'). # Row key range,
|
441
|
+
project('cf1:a'). # Select cf1:a column
|
442
|
+
project('cf2'). # Select cf2 family as well
|
443
|
+
filter('cf1:a' => 'Hello'). # Filter by cf1:a value
|
444
|
+
filter('cf2:d' => 100..200). # Range filter on cf2:d
|
445
|
+
filter('cf2:e' => [10, 20..30]). # Set-inclusion condition on cf2:e
|
446
|
+
filter(RandomRowFilter.new(0.5)). # Any Java HBase filter
|
447
|
+
while('cf2:f' => { ne: 'OPEN' }). # Early termination of scan
|
448
|
+
time_range(Time.now - 600, Time.now). # Scan data of the last 10 minutes
|
449
|
+
limit(10). # Limits the size of the result set
|
450
|
+
versions(2). # Only fetches 2 versions for each value
|
451
|
+
batch(100). # Batch size for scan set to 100
|
452
|
+
caching(1000). # Caching 1000 rows
|
453
|
+
to_a # To Array
|
382
454
|
```
|
383
455
|
|
384
456
|
### *range*
|
@@ -570,7 +642,7 @@ end
|
|
570
642
|
scoped.count
|
571
643
|
|
572
644
|
# This should be even faster as it dramatically reduces the number of RPC calls
|
573
|
-
scoped.caching(
|
645
|
+
scoped.caching(5000).count
|
574
646
|
```
|
575
647
|
|
576
648
|
## Basic aggregation using coprocessor
|
@@ -728,30 +800,28 @@ ba.java # Returns the native Java byte array (byte[])
|
|
728
800
|
|
729
801
|
### Table administration
|
730
802
|
|
731
|
-
`HBase#Table` provides a
|
732
|
-
|
733
|
-
|
734
|
-
|
803
|
+
`HBase#Table` provides a number of *bang_methods!* for table administration tasks.
|
804
|
+
They run synchronously, except when mentioned otherwise (e.g. `HTable#split!`).
|
805
|
+
Some of them take an optional block to allow progress monitoring
|
806
|
+
and come with non-bang, asynchronous counterparts.
|
807
|
+
|
808
|
+
#### Creation and alteration
|
735
809
|
|
736
810
|
```ruby
|
737
811
|
# Create a table with configurable table-level properties
|
738
812
|
table.create!(
|
739
813
|
# 1st Hash: Column family specification
|
740
|
-
{
|
814
|
+
{
|
815
|
+
:cf1 => { :compression => :snappy },
|
816
|
+
:cf2 => { :bloomfilter => :row }
|
817
|
+
},
|
741
818
|
|
742
819
|
# 2nd Hash: Table properties
|
743
820
|
:max_filesize => 256 * 1024 ** 2,
|
744
|
-
:deferred_log_flush => false
|
821
|
+
:deferred_log_flush => false,
|
822
|
+
:splits => [1000, 2000, 3000])
|
745
823
|
|
746
|
-
# Alter table properties (
|
747
|
-
table.alter(
|
748
|
-
:max_filesize => 512 * 1024 ** 2,
|
749
|
-
:memstore_flushsize => 64 * 1024 ** 2,
|
750
|
-
:readonly => false,
|
751
|
-
:deferred_log_flush => true
|
752
|
-
)
|
753
|
-
|
754
|
-
# Alter table properties (synchronous)
|
824
|
+
# Alter table properties (synchronous with optional block)
|
755
825
|
table.alter!(
|
756
826
|
:max_filesize => 512 * 1024 ** 2,
|
757
827
|
:memstore_flushsize => 64 * 1024 ** 2,
|
@@ -762,6 +832,56 @@ table.alter!(
|
|
762
832
|
puts [progress, total].join('/')
|
763
833
|
}
|
764
834
|
|
835
|
+
# Alter table properties (asynchronous)
|
836
|
+
table.alter(
|
837
|
+
:max_filesize => 512 * 1024 ** 2,
|
838
|
+
:memstore_flushsize => 64 * 1024 ** 2,
|
839
|
+
:readonly => false,
|
840
|
+
:deferred_log_flush => true
|
841
|
+
)
|
842
|
+
```
|
843
|
+
|
844
|
+
##### List of column family properties
|
845
|
+
|
846
|
+
http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html
|
847
|
+
|
848
|
+
Some of the properties are only available on recent versions of HBase.
|
849
|
+
|
850
|
+
| Property | Type | Description |
|
851
|
+
|--------------------------|---------------|--------------------------------------------------------------------------------------------------------------------|
|
852
|
+
| `:blockcache` | Boolean | If MapFile blocks should be cached |
|
853
|
+
| `:blocksize` | Fixnum | Blocksize to use when writing out storefiles/hfiles on this column family |
|
854
|
+
| `:bloomfilter` | Symbol/String | Bloom filter type: `:none`, `:row`, `:rowcol`, or uppercase Strings |
|
855
|
+
| `:cache_blooms_on_write` | Boolean | If we should cache bloomfilter blocks on write |
|
856
|
+
| `:cache_data_on_write` | Boolean | If we should cache data blocks on write |
|
857
|
+
| `:cache_index_on_write` | Boolean | If we should cache index blocks on write |
|
858
|
+
| `:compression` | Symbol/String | Compression type: `:none`, `:gz`, `:lzo`, `:lz4`, `:snappy`, or uppercase Strings |
|
859
|
+
| `:compression_compact` | Symbol/String | Compression type: `:none`, `:gz`, `:lzo`, `:lz4`, `:snappy`, or uppercase Strings |
|
860
|
+
| `:data_block_encoding` | Symbol/String | Data block encoding algorithm used in block cache: `:none`, `:diff`, `:fast_diff`, `:prefix`, or uppercase Strings |
|
861
|
+
| `:encode_on_disk` | Boolean | If we want to encode data block in cache and on disk |
|
862
|
+
| `:evict_blocks_on_close` | Boolean | If we should evict cached blocks from the blockcache on close |
|
863
|
+
| `:in_memory` | Boolean | If we are to keep all values in the HRegionServer cache |
|
864
|
+
| `:keep_deleted_cells` | Boolean | If deleted rows should not be collected immediately |
|
865
|
+
| `:min_versions` | Fixnum | The minimum number of versions to keep (used when timeToLive is set) |
|
866
|
+
| `:replication_scope` | Fixnum | Replication scope |
|
867
|
+
| `:ttl` | Fixnum | Time-to-live of cell contents, in seconds |
|
868
|
+
| `:versions` | Fixnum | The maximum number of versions. (By default, all available versions are retrieved.) |
|
869
|
+
|
870
|
+
##### List of table properties
|
871
|
+
|
872
|
+
http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HTableDescriptor.html
|
873
|
+
|
874
|
+
| Property | Type | Description |
|
875
|
+
|-----------------------|---------|---------------------------------------------------------------------------------------------------------|
|
876
|
+
| `:max_filesize` | Fixnum | The maximum size upto which a region can grow to after which a region split is triggered |
|
877
|
+
| `:readonly` | Boolean | If the table is read-only |
|
878
|
+
| `:memstore_flushsize` | Fixnum | The maximum size of the memstore after which the contents of the memstore are flushed to the filesystem |
|
879
|
+
| `:deferred_log_flush` | Boolean | Defer the log edits syncing to the file system |
|
880
|
+
| `:splits` | Array | Region split points |
|
881
|
+
|
882
|
+
#### Managing column families
|
883
|
+
|
884
|
+
```ruby
|
765
885
|
# Add column family
|
766
886
|
table.add_family! :cf3, :compression => :snappy,
|
767
887
|
:bloomfilter => :row
|
@@ -771,7 +891,11 @@ table.alter_family! :cf2, :bloomfilter => :rowcol
|
|
771
891
|
|
772
892
|
# Remove column family
|
773
893
|
table.delete_family! :cf1
|
894
|
+
```
|
774
895
|
|
896
|
+
#### Coprocessors
|
897
|
+
|
898
|
+
```ruby
|
775
899
|
# Add Coprocessor
|
776
900
|
unless table.has_coprocessor?(cp_class_name1)
|
777
901
|
table.add_coprocessor! cp_class_name1
|
@@ -783,14 +907,21 @@ table.add_coprocessor! cp_class_name2,
|
|
783
907
|
table.remove_coprocessor! cp_class_name1
|
784
908
|
```
|
785
909
|
|
910
|
+
#### Region splits (asynchronous)
|
911
|
+
|
912
|
+
```ruby
|
913
|
+
table.split!(1000)
|
914
|
+
table.split!(2000, 3000)
|
915
|
+
```
|
916
|
+
|
917
|
+
#### Advanced table administration
|
918
|
+
|
786
919
|
You can perform other types of administrative tasks
|
787
920
|
with native Java [HBaseAdmin object](http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html),
|
788
921
|
which can be obtained by `HBase#admin` method. Optionally, a block can be given
|
789
922
|
so that the HBaseAdmin object is automatically closed at the end of the given block.
|
790
923
|
|
791
924
|
```ruby
|
792
|
-
# Advanced table administration with HBaseAdmin object
|
793
|
-
# http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html
|
794
925
|
admin = hbase.admin
|
795
926
|
# ...
|
796
927
|
admin.close
|
@@ -804,8 +935,10 @@ end
|
|
804
935
|
## Test
|
805
936
|
|
806
937
|
```bash
|
807
|
-
|
808
|
-
|
938
|
+
#!/bin/bash
|
939
|
+
|
940
|
+
# Test HBase 0.94 on localhost
|
941
|
+
export HBASE_JRUBY_TEST_ZK='127.0.0.1'
|
809
942
|
export HBASE_JRUBY_TEST_DIST='0.94'
|
810
943
|
|
811
944
|
# Test both for 1.8 and 1.9
|
data/lib/hbase-jruby/admin.rb
CHANGED
@@ -3,6 +3,8 @@ class HBase
|
|
3
3
|
module Admin
|
4
4
|
private
|
5
5
|
def with_admin
|
6
|
+
check_closed
|
7
|
+
|
6
8
|
begin
|
7
9
|
admin = HBaseAdmin.new(@config)
|
8
10
|
yield admin
|
@@ -19,7 +21,7 @@ private
|
|
19
21
|
total = pair.getSecond
|
20
22
|
|
21
23
|
if block && yet != prev_yet
|
22
|
-
block.call
|
24
|
+
block.call(total - yet, total)
|
23
25
|
prev_yet = yet
|
24
26
|
end
|
25
27
|
|
@@ -5,26 +5,44 @@ require 'tempfile'
|
|
5
5
|
# HBase connection
|
6
6
|
class HBase
|
7
7
|
class << self
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
|
8
|
+
# @overload resolve_dependency!(dist, options)
|
9
|
+
# Resolve Hadoop and HBase dependency with a predefined Maven profile
|
10
|
+
# @param [String] dist HBase distribution: cdh4.1, cdh3, 0.94, 0.92, local
|
11
|
+
# @param [Hash] options Options
|
12
|
+
# @option options [Boolean] :verbose Enable verbose output
|
13
|
+
# @return [Array<String>] Loaded JAR files
|
14
|
+
# @overload resolve_dependency!(pom_path, options)
|
15
|
+
# Resolve Hadoop and HBase dependency with the given Maven POM file
|
16
|
+
# @param [String] pom_path Path to POM file
|
17
|
+
# @param [Hash] options Options
|
18
|
+
# @option options [Boolean] :verbose Enable verbose output
|
19
|
+
# @option options [String] :profile Maven profile
|
20
|
+
# @return [Array<String>] Loaded JAR files
|
21
|
+
def resolve_dependency! dist, options = {}
|
22
|
+
# Backward-compatibility
|
23
|
+
options = { :verbose => options } if [true, false].include?(options)
|
24
|
+
options = { :verbose => false }.merge(options)
|
25
|
+
|
26
|
+
dist = dist.to_s
|
27
|
+
verbose = options[:verbose]
|
28
|
+
|
13
29
|
silencer = verbose ? '' : '> /dev/null'
|
14
30
|
tempfiles = []
|
31
|
+
|
15
32
|
jars =
|
16
|
-
if dist
|
33
|
+
if %w[hbase local].include?(dist)
|
17
34
|
# Check for hbase executable
|
18
35
|
hbase = `which hbase`
|
19
|
-
raise RuntimeError, "Cannot find
|
36
|
+
raise RuntimeError, "Cannot find `hbase` executable" if hbase.empty?
|
20
37
|
`hbase classpath`.split(':')
|
21
38
|
else
|
22
39
|
# Check for Maven executable
|
23
40
|
mvn = `which mvn`
|
24
|
-
raise RuntimeError, "Cannot find
|
41
|
+
raise RuntimeError, "Cannot find `mvn` executable" if mvn.empty?
|
25
42
|
|
26
43
|
if File.exists?(dist)
|
27
44
|
path = dist
|
45
|
+
profile = options[:profile] && "-P #{options[:profile]}"
|
28
46
|
else
|
29
47
|
path = File.expand_path("../pom/pom.xml", __FILE__)
|
30
48
|
profile = "-P #{dist}"
|
@@ -35,10 +53,17 @@ class HBase
|
|
35
53
|
tf.close(false)
|
36
54
|
system "mvn org.apache.maven.plugins:maven-dependency-plugin:2.5.1:resolve org.apache.maven.plugins:maven-dependency-plugin:2.5.1:build-classpath -Dsilent=true -Dmdep.outputFile=#{tf.path} #{profile} -f #{path} #{silencer}"
|
37
55
|
|
38
|
-
raise RuntimeError.new("Error occurred. Set verbose
|
56
|
+
raise RuntimeError.new("Error occurred. Set verbose option to see the log.") unless $?.exitstatus == 0
|
39
57
|
|
40
|
-
|
41
|
-
|
58
|
+
if File.read(tf.path).empty?
|
59
|
+
desc =
|
60
|
+
if options[:profile]
|
61
|
+
"#{dist} (#{options[:profile]})"
|
62
|
+
else
|
63
|
+
dist
|
64
|
+
end
|
65
|
+
raise ArgumentError.new("Invalid profile: #{desc}")
|
66
|
+
end
|
42
67
|
File.read(tf.path).split(':')
|
43
68
|
end
|
44
69
|
|
@@ -68,7 +93,7 @@ class HBase
|
|
68
93
|
base.class_eval do
|
69
94
|
classes.map { |klass|
|
70
95
|
begin
|
71
|
-
|
96
|
+
java_import klass
|
72
97
|
nil
|
73
98
|
rescue NameError => e
|
74
99
|
klass
|