ok_hbase 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/Gemfile +17 -0
- data/LICENSE.txt +22 -0
- data/README.md +47 -0
- data/Rakefile +22 -0
- data/examples/README.md +46 -0
- data/examples/advanced/README.md +36 -0
- data/examples/advanced/perf_read.rb +146 -0
- data/examples/advanced/perf_write.rb +143 -0
- data/examples/advanced/table_read.rb +115 -0
- data/examples/advanced/table_write.rb +128 -0
- data/examples/table_scan.rb +97 -0
- data/examples/table_write.rb +97 -0
- data/lib/ok_hbase/active_model.rb +35 -0
- data/lib/ok_hbase/client.rb +42 -0
- data/lib/ok_hbase/concerns/custom_row/class_methods.rb +13 -0
- data/lib/ok_hbase/concerns/custom_row.rb +40 -0
- data/lib/ok_hbase/concerns/indexable/class_methods.rb +13 -0
- data/lib/ok_hbase/concerns/indexable.rb +101 -0
- data/lib/ok_hbase/concerns/row.rb +85 -0
- data/lib/ok_hbase/concerns/table/batch.rb +95 -0
- data/lib/ok_hbase/concerns/table/class_methods.rb +13 -0
- data/lib/ok_hbase/concerns/table/instrumentation.rb +48 -0
- data/lib/ok_hbase/concerns/table.rb +241 -0
- data/lib/ok_hbase/concerns.rb +13 -0
- data/lib/ok_hbase/connection.rb +157 -0
- data/lib/ok_hbase/row.rb +21 -0
- data/lib/ok_hbase/table.rb +10 -0
- data/lib/ok_hbase/version.rb +3 -0
- data/lib/ok_hbase.rb +39 -0
- data/lib/thrift/hbase/hbase.rb +2643 -0
- data/lib/thrift/hbase/hbase_constants.rb +14 -0
- data/lib/thrift/hbase/hbase_types.rb +252 -0
- data/ok-hbase.gemspec +23 -0
- data/spec/ok_hbase/connection_spec.rb +99 -0
- data/spec/ok_hbase/table_spec.rb +149 -0
- data/spec/ok_hbase_spec.rb +24 -0
- data/spec/spec_helper.rb +20 -0
- data/tasks/bump.rb +30 -0
- metadata +122 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use --create --install ruby-1.9.3@ok_hbase
|
data/Gemfile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in ok_hbase.gemspec
|
4
|
+
gemspec
|
5
|
+
|
6
|
+
|
7
|
+
group :development, :test do
|
8
|
+
gem 'awesome_print'
|
9
|
+
gem 'bundler'
|
10
|
+
end
|
11
|
+
|
12
|
+
group :test do
|
13
|
+
gem 'json', '~> 1.7.7'
|
14
|
+
gem 'rake'
|
15
|
+
gem 'rspec'
|
16
|
+
gem 'simplecov', require: false
|
17
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Nathan Keyes
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# ok_hbase
|
2
|
+
|
3
|
+
Welcome HBase cowboys.
|
4
|
+
|
5
|
+
Read the [wiki](https://github.com/okcwest/ok-hbase/wiki)!
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'ok_hbase'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install ok_hbase
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```bash
|
24
|
+
$ bundle console
|
25
|
+
Resolving dependencies...
|
26
|
+
irb(main):030:0> connection = OkHbase::Connection.new(host: "hbase-dev")
|
27
|
+
=> #<OkHbase::Connection:0x00000002440140 <snip>
|
28
|
+
irb(main):031:0> table = OkHbase::Table.new(mytable, connection)
|
29
|
+
=> #<OkHbase::Table:0x00000002449f38 <snip>
|
30
|
+
irb(main):032:0> count = 0
|
31
|
+
=> 0
|
32
|
+
irb(main):033:0> table.scan row_prefix: [ myid, 1, 5, 1, 0 ].pack("L>CCCC") do |row, col|
|
33
|
+
irb(main):034:1* count += 1
|
34
|
+
irb(main):035:1> end
|
35
|
+
=> nil
|
36
|
+
irb(main):036:0> count
|
37
|
+
=> 1072
|
38
|
+
irb(main):037:0> connection.close
|
39
|
+
```
|
40
|
+
|
41
|
+
## Contributing
|
42
|
+
|
43
|
+
1. Fork it
|
44
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
45
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
46
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
47
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
Bundler.setup :default, :test, :development
|
4
|
+
|
5
|
+
Bundler::GemHelper.install_tasks
|
6
|
+
|
7
|
+
require 'rspec/core/rake_task'
|
8
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
9
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
10
|
+
end
|
11
|
+
|
12
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
13
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
14
|
+
spec.rcov = true
|
15
|
+
end
|
16
|
+
|
17
|
+
task :spec
|
18
|
+
task :default => :spec
|
19
|
+
|
20
|
+
Dir['tasks/**/*.{rb}'].each do |file|
|
21
|
+
require_relative file
|
22
|
+
end
|
data/examples/README.md
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# okhbase examples
|
2
|
+
Simple examples to illustrate using the api to do basic work.
|
3
|
+
|
4
|
+
## Usage
|
5
|
+
|
6
|
+
### Create table and Write Data
|
7
|
+
|
8
|
+
First we create a table, and put a bunch of data in it.
|
9
|
+
This command will crate a table named 'ok_hbase_test', withs a single column family: 'd'.
|
10
|
+
It then sequentially creates rows with row keys a through zzz, 18278 rows in total.
|
11
|
+
That may take a few minutes, as we are not batching the writes.
|
12
|
+
|
13
|
+
```bash
|
14
|
+
$ ./table_write.rb --host localhost --port 9090 --table ok_hbase_test
|
15
|
+
```
|
16
|
+
|
17
|
+
### Read Data
|
18
|
+
Now that we have daa in the table, we want to read it.
|
19
|
+
The following script will scan the table we just made for any rows that begin with 'hba'
|
20
|
+
|
21
|
+
```bash
|
22
|
+
$ ./table_scan.rb --host localhost --table ok_hbase_test --prefix hba
|
23
|
+
```
|
24
|
+
|
25
|
+
The output should look like:
|
26
|
+
```bash
|
27
|
+
Nathans-MacBook-Pro-2:ok_hbase nkeyes$ ./examples/table_scan.rb --host localhost --table ok_hbase_test --prefix hba
|
28
|
+
2013-06-11 08:36:13 -0700 DEBUG: Setting up connection
|
29
|
+
2013-06-11 08:36:13 -0700 DEBUG: Connecting to localhost
|
30
|
+
2013-06-11 08:36:13 -0700 DEBUG: Get instance for table ok_hbase_test
|
31
|
+
{
|
32
|
+
"hba" => {
|
33
|
+
"d:message" => "this is row number 5461",
|
34
|
+
"d:row_number" => "5461"
|
35
|
+
}
|
36
|
+
}
|
37
|
+
Nathans-MacBook-Pro-2:ok_hbase nkeyes$
|
38
|
+
```
|
39
|
+
|
40
|
+
Experiment with shorter prefixes to see more rows returned.
|
41
|
+
An empty prefix will return all rows:
|
42
|
+
|
43
|
+
```bash
|
44
|
+
./table_scan.rb --host localhost --table ok_hbase_test --prefix ''
|
45
|
+
```
|
46
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# okhbase examples
|
2
|
+
Examples to illustrate using the api to do basic work.
|
3
|
+
|
4
|
+
## Usage
|
5
|
+
|
6
|
+
See: http://hbase.apache.org/book.html if you are new to hbase.
|
7
|
+
|
8
|
+
With bundler up to date, usage():
|
9
|
+
|
10
|
+
```bash
|
11
|
+
$ bundle exec examples/perf_read.rb -h
|
12
|
+
Usage: examples/perf_read.rb [options]
|
13
|
+
-h, --help Display this help
|
14
|
+
-v, --verbose Output json result
|
15
|
+
-n, --host HOSTNAME hostname of RegionServer or master
|
16
|
+
-t, --table TABLE hbase table name
|
17
|
+
--port PORT port number of thrift server, defaults to 9090
|
18
|
+
--timeout TIMEOUT connect timeout, defaults to 600
|
19
|
+
-a, --array ARRAY array values for pack, in csv, no whitespace in the format of "11111111,1,1,1,1"
|
20
|
+
-p, --pack PACK template string to build binary sequence from literal passed to -a
|
21
|
+
-i, --iterations NUM number of iterations to run the benchmark, defaults to 10
|
22
|
+
```
|
23
|
+
|
24
|
+
the table reading examples are built around the ruby Array api using Array.pack to build binary sequences to needle our data out of the hbase haystack. Most people use keys comprised of timestamps, IDs and other data.
|
25
|
+
|
26
|
+
If we had a rowkey with a id and flags in a table called "chat" we could run the perf_read.rb example like this:
|
27
|
+
|
28
|
+
```bash
|
29
|
+
$ bundle exec examples/perf_read.rb -n 127.0.0.1 -t chat --array='22221111,7,3,4,0' -p "L>CCCC" -i 10 -v
|
30
|
+
```
|
31
|
+
|
32
|
+
## WIP
|
33
|
+
* perf_write.rb: currently a wip , use at your own risk
|
34
|
+
* table_write.rb: currently a wip, use at your own risk
|
35
|
+
* table_read.rb: tested
|
36
|
+
* perf_read.rb: tested
|
@@ -0,0 +1,146 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# perf_read.rb - basic read perf test
|
4
|
+
|
5
|
+
$:.unshift File.expand_path('../../lib', __FILE__)
|
6
|
+
$stdout.sync = true
|
7
|
+
|
8
|
+
require 'awesome_print'
|
9
|
+
require 'ok_hbase'
|
10
|
+
require 'optparse'
|
11
|
+
require 'logger'
|
12
|
+
|
13
|
+
$options = {}
|
14
|
+
$logger = Logger.new(STDOUT)
|
15
|
+
$logger.formatter = proc { |severity, datetime, progname, msg| "#{datetime} #{severity}: #{msg}\n" }
|
16
|
+
$logger.level = Logger::FATAL
|
17
|
+
|
18
|
+
def usage(error=nil)
|
19
|
+
puts "Error: #{error}\n\n" if error
|
20
|
+
puts $optparse
|
21
|
+
exit 1
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_connection(table=nil)
|
25
|
+
$logger.debug "Setting up connection for table #{table}"
|
26
|
+
if table.nil?
|
27
|
+
$logger.fatal "Must specify a table"
|
28
|
+
return nil
|
29
|
+
end
|
30
|
+
|
31
|
+
$logger.debug "Connecting to #{$options[:hostname]}"
|
32
|
+
conn = OkHbase::Connection.new(auto_connect: true, host: $options[:hostname], port: $options[:port],
|
33
|
+
timeout: $options[:timeout])
|
34
|
+
$logger.debug "Get instance for table #{table}"
|
35
|
+
OkHbase::Table.new(table, conn)
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_filter()
|
39
|
+
$logger.debug "Setup byte sequence using #{$options[:filter_array].to_s}.pack(#{$options[:filter_pack].to_s})"
|
40
|
+
$options[:filter_array].pack($options[:filter_pack].to_s)
|
41
|
+
end
|
42
|
+
|
43
|
+
def get_row_count(conn, prefix)
|
44
|
+
row_count = 0
|
45
|
+
conn.scan row_prefix: prefix, caching: 5000 do |row, cols|
|
46
|
+
row_count += 1
|
47
|
+
end
|
48
|
+
|
49
|
+
row_count
|
50
|
+
end
|
51
|
+
|
52
|
+
def perf_test_a()
|
53
|
+
c = get_connection($options[:table])
|
54
|
+
filter = get_filter()
|
55
|
+
Benchmark.bm(7, ">total:", ">avg:") do |x|
|
56
|
+
runs = []
|
57
|
+
$options[:iterations].times do |i|
|
58
|
+
runs << x.report("Run #{i+1}:") do
|
59
|
+
count = get_row_count(c, filter).to_s
|
60
|
+
$logger.debug "Found #{count} row(s)"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
[runs.sum, runs.sum/runs.size]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def perf_test_b()
|
68
|
+
c = get_connection($options[:table])
|
69
|
+
filter = get_filter()
|
70
|
+
bench_times = []
|
71
|
+
|
72
|
+
$options[:iterations].times do |i|
|
73
|
+
bench_times.push(Benchmark.realtime { count = get_row_count(c, filter).to_s })
|
74
|
+
$logger.debug "Benchmark iteration ##{i+1}: #{bench_times[-1]} second(s)"
|
75
|
+
end
|
76
|
+
|
77
|
+
bench_times.each_with_index do |v,k|
|
78
|
+
puts "Run #{k+1}: #{v} second(s)"
|
79
|
+
end
|
80
|
+
puts "\nMedian: #{ bench_times[bench_times.length / 2] } second(s)"
|
81
|
+
puts "Average: #{ bench_times.inject(:+) / bench_times.length } second(s)"
|
82
|
+
puts "Higest Time: #{ bench_times.sort[-1] } second(s)"
|
83
|
+
puts "Lowest Time: #{ bench_times.sort[0] } second(s)"
|
84
|
+
end
|
85
|
+
|
86
|
+
def main()
|
87
|
+
$optparse = OptionParser.new do|opts|
|
88
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
89
|
+
|
90
|
+
$options[:verbose] = false
|
91
|
+
$options[:port] = 9090
|
92
|
+
$options[:timeout] = 600
|
93
|
+
$options[:iterations] = 10
|
94
|
+
|
95
|
+
opts.on('-h', '--help', 'Display this help') do
|
96
|
+
usage
|
97
|
+
end
|
98
|
+
|
99
|
+
opts.on('-v', '--verbose', 'Output json result') do
|
100
|
+
$options[:verbose] = true
|
101
|
+
$logger.level = Logger::DEBUG
|
102
|
+
end
|
103
|
+
|
104
|
+
opts.on('-n', '--host HOSTNAME', 'hostname of RegionServer or master') do |hostname|
|
105
|
+
$options[:hostname] = hostname
|
106
|
+
end
|
107
|
+
|
108
|
+
opts.on('-t', '--table TABLE', 'hbase table name') do |table|
|
109
|
+
$options[:table] = table
|
110
|
+
end
|
111
|
+
|
112
|
+
opts.on('-p', '--port PORT', "port number of thrift server, defaults to #{$options[:port]}") do |port|
|
113
|
+
$options[:port] = port.to_i
|
114
|
+
end
|
115
|
+
|
116
|
+
opts.on('--timeout TIMEOUT', "connect timeout, defaults to #{$options[:timeout]}") do |timeout|
|
117
|
+
$options[:timeout] = timeout.to_i
|
118
|
+
end
|
119
|
+
|
120
|
+
opts.on('-a', '--array ARRAY', Array, "array values for pack, in csv, no whitespace in the format of \"11111111,1,1,1,1\"") do |ar|
|
121
|
+
$options[:filter_array] = ar.map(&:to_i)
|
122
|
+
end
|
123
|
+
|
124
|
+
opts.on('-p', '--pack PACK', "template string to build binary sequence from literal passed to -a") do |pack|
|
125
|
+
$options[:filter_pack] = pack.to_s
|
126
|
+
end
|
127
|
+
|
128
|
+
opts.on('-i', '--iterations NUM', "number of iterations to run the benchmark, defaults to #{$options[:iterations]}") do |num|
|
129
|
+
$options[:iterations] = num.to_i
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
usage "You didn't specify any options" if not ARGV[0]
|
135
|
+
|
136
|
+
$optparse.parse!
|
137
|
+
|
138
|
+
usage "You didn't specify a hostname" if not $options[:hostname]
|
139
|
+
usage "You didn't specify a table" if not $options[:table]
|
140
|
+
usage "You didn't specify an array literal" if not $options[:filter_array]
|
141
|
+
usage "You didn't specify a binary sequence template" if not $options[:filter_pack]
|
142
|
+
|
143
|
+
perf_test_b()
|
144
|
+
end
|
145
|
+
|
146
|
+
main() if __FILE__ == $0
|
@@ -0,0 +1,143 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# perf_write.rb - basic table write benchmarks
|
4
|
+
|
5
|
+
$:.unshift File.expand_path('../../lib', __FILE__)
|
6
|
+
$stdout.sync = true
|
7
|
+
|
8
|
+
require 'awesome_print'
|
9
|
+
require 'ok_hbase'
|
10
|
+
require 'optparse'
|
11
|
+
require 'logger'
|
12
|
+
|
13
|
+
$options = {}
|
14
|
+
$logger = Logger.new(STDOUT)
|
15
|
+
$logger.formatter = proc { |severity, datetime, progname, msg| "#{datetime} #{severity}: #{msg}\n" }
|
16
|
+
$logger.level = Logger::FATAL
|
17
|
+
|
18
|
+
def usage(error=nil)
|
19
|
+
puts "Error: #{error}\n\n" if error
|
20
|
+
puts $optparse
|
21
|
+
exit 1
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_connection(table=nil)
|
25
|
+
$logger.debug "Setting up connection for table #{table}"
|
26
|
+
if table.nil?
|
27
|
+
$logger.fatal "Must specify a table"
|
28
|
+
return nil
|
29
|
+
end
|
30
|
+
|
31
|
+
$logger.debug "Connecting to #{$options[:hostname]}"
|
32
|
+
conn = OkHbase::Connection.new(auto_connect: true, host: $options[:hostname], port: $options[:port],
|
33
|
+
timeout: $options[:timeout])
|
34
|
+
$logger.debug "Get instance for table #{table}"
|
35
|
+
OkHbase::Table.new(table, conn)
|
36
|
+
end
|
37
|
+
|
38
|
+
def write_test_row(conn, rowkey)
|
39
|
+
# set any column family shit
|
40
|
+
# use a pack method to build the binary sequence
|
41
|
+
puts 'wrote all the things'
|
42
|
+
end
|
43
|
+
|
44
|
+
def write_batch_row(conn, rowkey)
|
45
|
+
# set any column family shit
|
46
|
+
|
47
|
+
$options[:rowcount].times do |i|
|
48
|
+
# increment and write
|
49
|
+
puts 'wrote something things'
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
def get_rowkey()
|
55
|
+
# get a incrementor value if needed
|
56
|
+
# set attributes for a row key
|
57
|
+
# setup any time data
|
58
|
+
# use a pack method to build the binary sequence
|
59
|
+
# return binary sequence or decimal sequence to ok-hbase
|
60
|
+
puts "rowkey"
|
61
|
+
end
|
62
|
+
|
63
|
+
def perf_test()
|
64
|
+
c = get_connection($options[:table])
|
65
|
+
row_key = get_rowkey()
|
66
|
+
bench_times = []
|
67
|
+
|
68
|
+
$options[:iterations].times do |i|
|
69
|
+
bench_times.push(Benchmark.realtime { write_batch_row(c, row_key) })
|
70
|
+
$logger.debug "Benchmark iteration ##{i+1}: #{bench_times[-1]} second(s)"
|
71
|
+
end
|
72
|
+
|
73
|
+
bench_times.each_with_index do |v,k|
|
74
|
+
puts "Run #{k+1}: #{v} second(s)"
|
75
|
+
end
|
76
|
+
puts "\nMedian: #{ bench_times[bench_times.length / 2] } second(s)"
|
77
|
+
puts "Average: #{ bench_times.inject(:+) / bench_times.length } second(s)"
|
78
|
+
puts "Higest Time: #{ bench_times.sort[-1] } second(s)"
|
79
|
+
puts "Lowest Time: #{ bench_times.sort[0] } second(s)"
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
def main()
|
84
|
+
$optparse = OptionParser.new do|opts|
|
85
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
86
|
+
|
87
|
+
$options[:verbose] = false
|
88
|
+
$options[:port] = 9090
|
89
|
+
$options[:timeout] = 600
|
90
|
+
$options[:rowcount] = 1
|
91
|
+
|
92
|
+
opts.on('-h', '--help', 'Display this help') do
|
93
|
+
usage
|
94
|
+
end
|
95
|
+
|
96
|
+
opts.on('-v', '--verbose', 'Output json result') do
|
97
|
+
$options[:verbose] = true
|
98
|
+
$logger.level = Logger::DEBUG
|
99
|
+
end
|
100
|
+
|
101
|
+
opts.on('-n', '--host HOSTNAME', 'hostname of RegionServer or master') do |hostname|
|
102
|
+
$options[:hostname] = hostname
|
103
|
+
end
|
104
|
+
|
105
|
+
opts.on('-t', '--table TABLE', 'hbase table name') do |table|
|
106
|
+
$options[:table] = table
|
107
|
+
end
|
108
|
+
|
109
|
+
opts.on('-p', '--port PORT', "port number of thrift server, defaults to #{$options[:port]}") do |port|
|
110
|
+
$options[:port] = port.to_i
|
111
|
+
end
|
112
|
+
|
113
|
+
opts.on('--timeout TIMEOUT', "connect timeout, defaults to #{$options[:timeout]}") do |timeout|
|
114
|
+
$options[:timeout] = timeout.to_i
|
115
|
+
end
|
116
|
+
|
117
|
+
opts.on('-a', '--array ARRAY', Array, "array values for pack for rowkey, comma separated, no whitespace in the format of \"11111111,1,1,1,1\"") do |ar|
|
118
|
+
$options[:filter_array] = ar.map(&:to_i)
|
119
|
+
end
|
120
|
+
|
121
|
+
opts.on('-p', '--pack PACK', "template string to build binary sequence from literal passed to -a") do |pack|
|
122
|
+
$options[:filter_pack] = pack.to_s
|
123
|
+
end
|
124
|
+
|
125
|
+
opts.on('-w', '--write ROWS', "how many times to write with a row key defaults to #{$options[:rowcount]}") do |row|
|
126
|
+
$options[:rowcount] = row.to_i
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
usage "You didn't specify any options" if not ARGV[0]
|
132
|
+
|
133
|
+
$optparse.parse!
|
134
|
+
|
135
|
+
usage "You didn't specify a hostname" if not $options[:hostname]
|
136
|
+
usage "You didn't specify a table" if not $options[:table]
|
137
|
+
usage "You didn't specify an array literal" if not $options[:filter_array]
|
138
|
+
usage "You didn't specify a binary sequence template" if not $options[:filter_pack]
|
139
|
+
|
140
|
+
perf_test()
|
141
|
+
end
|
142
|
+
|
143
|
+
main() if __FILE__ == $0
|
@@ -0,0 +1,115 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# table_read.rb - basic table read examples
|
4
|
+
|
5
|
+
$:.unshift File.expand_path('../../lib', __FILE__)
|
6
|
+
$stdout.sync = true
|
7
|
+
|
8
|
+
require 'awesome_print'
|
9
|
+
require 'ok_hbase'
|
10
|
+
require 'optparse'
|
11
|
+
require 'logger'
|
12
|
+
|
13
|
+
$options = {}
|
14
|
+
$logger = Logger.new(STDOUT)
|
15
|
+
# $logger.formatter = proc do |severity, date, progname, msg| "#{date} #{severity}: #{msg}\n" end
|
16
|
+
$logger.formatter = proc { |severity, datetime, progname, msg| "#{datetime} #{severity}: #{msg}\n" }
|
17
|
+
$logger.level = Logger::FATAL
|
18
|
+
|
19
|
+
def usage(error=nil)
|
20
|
+
puts "Error: #{error}\n\n" if error
|
21
|
+
puts $optparse
|
22
|
+
exit 1
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_connection(table=nil)
|
26
|
+
$logger.debug "Setting up connection for table #{table}"
|
27
|
+
if table.nil?
|
28
|
+
$logger.fatal "Must specify a table"
|
29
|
+
return nil
|
30
|
+
end
|
31
|
+
|
32
|
+
$logger.debug "Connecting to #{$options[:hostname]}"
|
33
|
+
conn = OkHbase::Connection.new(auto_connect: true, host: $options[:hostname], port: $options[:port],
|
34
|
+
timeout: $options[:timeout])
|
35
|
+
$logger.debug "Get instance for table #{table}"
|
36
|
+
OkHbase::Table.new(table, conn)
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_filter()
|
40
|
+
$logger.debug "Setup byte sequence using #{$options[:filter_array].to_s}.pack(#{$options[:filter_pack].to_s})"
|
41
|
+
$options[:filter_array].pack($options[:filter_pack].to_s)
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_row_count(conn, prefix)
|
45
|
+
row_count = 0
|
46
|
+
$logger.debug "Getting row count"
|
47
|
+
conn.scan row_prefix: prefix, caching: 5000 do |row, cols|
|
48
|
+
$logger.debug row.to_s
|
49
|
+
row_count += 1
|
50
|
+
end
|
51
|
+
|
52
|
+
row_count
|
53
|
+
end
|
54
|
+
|
55
|
+
def main()
|
56
|
+
$optparse = OptionParser.new do|opts|
|
57
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
58
|
+
|
59
|
+
$options[:verbose] = false
|
60
|
+
$options[:port] = 9090
|
61
|
+
$options[:timeout] = 600
|
62
|
+
|
63
|
+
opts.on('-h', '--help', 'Display this help') do
|
64
|
+
usage
|
65
|
+
end
|
66
|
+
|
67
|
+
opts.on('-v', '--verbose', 'Output json result') do
|
68
|
+
$options[:verbose] = true
|
69
|
+
$logger.level = Logger::DEBUG
|
70
|
+
end
|
71
|
+
|
72
|
+
opts.on('-n', '--host HOSTNAME', 'hostname of RegionServer or master') do |hostname|
|
73
|
+
$options[:hostname] = hostname
|
74
|
+
end
|
75
|
+
|
76
|
+
opts.on('-t', '--table TABLE', 'hbase table name') do |table|
|
77
|
+
$options[:table] = table
|
78
|
+
end
|
79
|
+
|
80
|
+
opts.on('-p', '--port PORT', "port number of thrift server, defaults to #{$options[:port]}") do |port|
|
81
|
+
$options[:port] = port.to_i
|
82
|
+
end
|
83
|
+
|
84
|
+
opts.on('--timeout TIMEOUT', "connect timeout, defaults to #{$options[:timeout]}") do |timeout|
|
85
|
+
$options[:timeout] = timeout.to_i
|
86
|
+
end
|
87
|
+
|
88
|
+
opts.on('-a', '--array ARRAY', Array, "array values for pack, in csv, no whitespace in the format of \"11111111,1,1,1,1\"") do |ar|
|
89
|
+
$options[:filter_array] = ar.map(&:to_i)
|
90
|
+
end
|
91
|
+
|
92
|
+
opts.on('-p', '--pack PACK', "template string to build binary sequence from literal passed to -a") do |pack|
|
93
|
+
$options[:filter_pack] = pack.to_s
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
usage "You didn't specify any options" if not ARGV[0]
|
99
|
+
|
100
|
+
$optparse.parse!
|
101
|
+
|
102
|
+
usage "You didn't specify a hostname" if not $options[:hostname]
|
103
|
+
usage "You didn't specify a table" if not $options[:table]
|
104
|
+
usage "You didn't specify an array literal" if not $options[:filter_array]
|
105
|
+
usage "You didn't specify a binary sequence template" if not $options[:filter_pack]
|
106
|
+
|
107
|
+
start_time = Time.now
|
108
|
+
c = get_connection($options[:table])
|
109
|
+
filter = get_filter()
|
110
|
+
count = get_row_count(c, filter).to_s
|
111
|
+
total_time = Time.now - start_time
|
112
|
+
puts "Found #{count} row(s) in #{total_time} second(s)"
|
113
|
+
end
|
114
|
+
|
115
|
+
main() if __FILE__ == $0
|