ok_hbase 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/.gitignore +18 -0
  2. data/.rspec +2 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +17 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +47 -0
  7. data/Rakefile +22 -0
  8. data/examples/README.md +46 -0
  9. data/examples/advanced/README.md +36 -0
  10. data/examples/advanced/perf_read.rb +146 -0
  11. data/examples/advanced/perf_write.rb +143 -0
  12. data/examples/advanced/table_read.rb +115 -0
  13. data/examples/advanced/table_write.rb +128 -0
  14. data/examples/table_scan.rb +97 -0
  15. data/examples/table_write.rb +97 -0
  16. data/lib/ok_hbase/active_model.rb +35 -0
  17. data/lib/ok_hbase/client.rb +42 -0
  18. data/lib/ok_hbase/concerns/custom_row/class_methods.rb +13 -0
  19. data/lib/ok_hbase/concerns/custom_row.rb +40 -0
  20. data/lib/ok_hbase/concerns/indexable/class_methods.rb +13 -0
  21. data/lib/ok_hbase/concerns/indexable.rb +101 -0
  22. data/lib/ok_hbase/concerns/row.rb +85 -0
  23. data/lib/ok_hbase/concerns/table/batch.rb +95 -0
  24. data/lib/ok_hbase/concerns/table/class_methods.rb +13 -0
  25. data/lib/ok_hbase/concerns/table/instrumentation.rb +48 -0
  26. data/lib/ok_hbase/concerns/table.rb +241 -0
  27. data/lib/ok_hbase/concerns.rb +13 -0
  28. data/lib/ok_hbase/connection.rb +157 -0
  29. data/lib/ok_hbase/row.rb +21 -0
  30. data/lib/ok_hbase/table.rb +10 -0
  31. data/lib/ok_hbase/version.rb +3 -0
  32. data/lib/ok_hbase.rb +39 -0
  33. data/lib/thrift/hbase/hbase.rb +2643 -0
  34. data/lib/thrift/hbase/hbase_constants.rb +14 -0
  35. data/lib/thrift/hbase/hbase_types.rb +252 -0
  36. data/ok-hbase.gemspec +23 -0
  37. data/spec/ok_hbase/connection_spec.rb +99 -0
  38. data/spec/ok_hbase/table_spec.rb +149 -0
  39. data/spec/ok_hbase_spec.rb +24 -0
  40. data/spec/spec_helper.rb +20 -0
  41. data/tasks/bump.rb +30 -0
  42. metadata +122 -0
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # table_write.rb - basic table batch and single write operations
4
+
5
+ $:.unshift File.expand_path('../../lib', __FILE__)
6
+ $stdout.sync = true
7
+
8
+ require 'awesome_print'
9
+ require 'ok_hbase'
10
+ require 'optparse'
11
+ require 'logger'
12
+
13
+ $options = {}
14
+ $logger = Logger.new(STDOUT)
15
+ $logger.formatter = proc { |severity, datetime, progname, msg| "#{datetime} #{severity}: #{msg}\n" }
16
+ $logger.level = Logger::FATAL
17
+
18
+ def usage(error=nil)
19
+ puts "Error: #{error}\n\n" if error
20
+ puts $optparse
21
+ exit 1
22
+ end
23
+
24
+ def get_connection(table=nil)
25
+ $logger.debug "Setting up connection for table #{table}"
26
+ if table.nil?
27
+ $logger.fatal "Must specify a table"
28
+ return nil
29
+ end
30
+
31
+ $logger.debug "Connecting to #{$options[:hostname]}"
32
+ conn = OkHbase::Connection.new(auto_connect: true, host: $options[:hostname], port: $options[:port],
33
+ timeout: $options[:timeout])
34
+ $logger.debug "Get instance for table #{table}"
35
+ OkHbase::Table.new(table, conn)
36
+ end
37
+
38
+ def write_test_row(conn, rowkey)
39
+ # set any column family shit
40
+ # use a pack method to build the binary sequence
41
+ puts 'wrote all the things'
42
+ end
43
+
44
+ def write_batch_row(conn, rowkey)
45
+ # set any column family shit
46
+
47
+ $options[:rowcount].times do |i|
48
+ # increment and write
49
+ puts 'wrote something things'
50
+ end
51
+ end
52
+
53
+
54
+ def get_rowkey()
55
+ # get a incrementor value if needed
56
+ # set attributes for a row key
57
+ # setup any time data
58
+ # use a pack method to build the binary sequence
59
+ # return binary sequence or decimal sequence to ok-hbase
60
+ puts "rowkey"
61
+ end
62
+
63
+ def main()
64
+ $optparse = OptionParser.new do|opts|
65
+ opts.banner = "Usage: #{__FILE__} [options]"
66
+
67
+ $options[:verbose] = false
68
+ $options[:port] = 9090
69
+ $options[:timeout] = 600
70
+ $options[:rowcount] = 1
71
+
72
+ opts.on('-h', '--help', 'Display this help') do
73
+ usage
74
+ end
75
+
76
+ opts.on('-v', '--verbose', 'Output json result') do
77
+ $options[:verbose] = true
78
+ $logger.level = Logger::DEBUG
79
+ end
80
+
81
+ opts.on('-n', '--host HOSTNAME', 'hostname of RegionServer or master') do |hostname|
82
+ $options[:hostname] = hostname
83
+ end
84
+
85
+ opts.on('-t', '--table TABLE', 'hbase table name') do |table|
86
+ $options[:table] = table
87
+ end
88
+
89
+ opts.on('-p', '--port PORT', "port number of thrift server, defaults to #{$options[:port]}") do |port|
90
+ $options[:port] = port.to_i
91
+ end
92
+
93
+ opts.on('--timeout TIMEOUT', "connect timeout, defaults to #{$options[:timeout]}") do |timeout|
94
+ $options[:timeout] = timeout.to_i
95
+ end
96
+
97
+ opts.on('-a', '--array ARRAY', Array, "array values for pack for rowkey, comma separated, no whitespace in the format of \"11111111,1,1,1,1\"") do |ar|
98
+ $options[:filter_array] = ar.map(&:to_i)
99
+ end
100
+
101
+ opts.on('-p', '--pack PACK', "template string to build binary sequence from literal passed to -a") do |pack|
102
+ $options[:filter_pack] = pack.to_s
103
+ end
104
+
105
+ opts.on('-w', '--write ROWS', "how many times to write with a row key defaults to #{$options[:rowcount]}") do |row|
106
+ $options[:rowcount] = row.to_i
107
+ end
108
+
109
+ end
110
+
111
+ usage "You didn't specify any options" if not ARGV[0]
112
+
113
+ $optparse.parse!
114
+
115
+ usage "You didn't specify a hostname" if not $options[:hostname]
116
+ usage "You didn't specify a table" if not $options[:table]
117
+ usage "You didn't specify an array literal" if not $options[:filter_array]
118
+ usage "You didn't specify a binary sequence template" if not $options[:filter_pack]
119
+
120
+ start_time = Time.now
121
+ c = get_connection($options[:table])
122
+ row_key = get_rowkey()
123
+ write_batch_row(c, row_key)
124
+ total_time = Time.now - start_time
125
+ puts "Wrote #{$options[:rowcount]} row(s) in #{total_time} second(s)"
126
+ end
127
+
128
+ main() if __FILE__ == $0
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # perf_read.rb - basic read perf test
4
+
5
+ $:.unshift File.expand_path('../../lib', __FILE__)
6
+ $stdout.sync = true
7
+
8
+ require 'awesome_print'
9
+ require 'ok_hbase'
10
+ require 'optparse'
11
+ require 'logger'
12
+
13
+ $options = {}
14
+ $logger = Logger.new(STDOUT)
15
+ $logger.formatter = proc { |severity, datetime, progname, msg| "#{datetime} #{severity}: #{msg}\n" }
16
+ $logger.level = Logger::DEBUG
17
+
18
+ def usage(error=nil)
19
+ puts "Error: #{error}\n\n" if error
20
+ puts $optparse
21
+ exit 1
22
+ end
23
+
24
+ def get_connection
25
+ $logger.debug 'Setting up connection'
26
+
27
+
28
+ $logger.debug "Connecting to #{$options[:host]}"
29
+ OkHbase::Connection.new(
30
+ auto_connect: true,
31
+ host: $options[:host],
32
+ port: $options[:port],
33
+ timeout: $options[:timeout]
34
+ )
35
+ end
36
+
37
+ def get_table(table, conn)
38
+ if table.nil?
39
+ $logger.fatal 'Must specify a table'
40
+ return nil
41
+ end
42
+ $logger.debug "Get instance for table #{table}"
43
+ OkHbase::Table.new(table, conn)
44
+ end
45
+
46
+
47
+ def main()
48
+ $optparse = OptionParser.new do |opts|
49
+ opts.banner = "Usage: #{__FILE__} [options]"
50
+
51
+ $options[:host] = 'localhost'
52
+ $options[:port] = 9090
53
+ $options[:timeout] = 10
54
+
55
+ opts.on('-h', '--help', 'Display this help') do
56
+ usage
57
+ end
58
+
59
+ opts.on('-H', '--host HOST', "host or ip address where thrift server is running, defaults to #{$options[:host]}") do |host|
60
+ $options[:host] = host
61
+ end
62
+
63
+ opts.on('-t', '--table TABLE', 'hbase table name') do |table|
64
+ $options[:table] = table
65
+ end
66
+
67
+ opts.on('-p', '--port PORT', "port number of thrift server, defaults to #{$options[:port]}") do |port|
68
+ $options[:port] = port.to_i
69
+ end
70
+
71
+ opts.on('--timeout TIMEOUT', "connect timeout, defaults to #{$options[:timeout]}") do |timeout|
72
+ $options[:timeout] = timeout.to_i
73
+ end
74
+
75
+ opts.on('-P', '--prefix ROW_PREFIX', "row prefix to use in scan") do |prefix|
76
+ $options[:prefix] = prefix
77
+ end
78
+
79
+
80
+ end
81
+
82
+ usage "You didn't specify any options" if not ARGV[0]
83
+
84
+ $optparse.parse!
85
+
86
+ usage "You didn't specify a table" if not $options[:table]
87
+ usage "You didn't specify a prefix" if not $options[:prefix]
88
+
89
+ connection = get_connection()
90
+ table = get_table($options[:table], connection)
91
+
92
+ table.scan(row_prefix: $options[:prefix]) do |row_key, columns|
93
+ ap row_key => columns
94
+ end
95
+ end
96
+
97
+ main() if __FILE__ == $0
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # perf_read.rb - basic read perf test
4
+
5
+ $:.unshift File.expand_path('../../lib', __FILE__)
6
+ $stdout.sync = true
7
+
8
+ require 'awesome_print'
9
+ require 'ok_hbase'
10
+ require 'optparse'
11
+ require 'logger'
12
+
13
+ $options = {}
14
+ $logger = Logger.new(STDOUT)
15
+ $logger.formatter = proc { |severity, datetime, progname, msg| "#{datetime} #{severity}: #{msg}\n" }
16
+ $logger.level = Logger::DEBUG
17
+
18
+ def usage(error=nil)
19
+ puts "Error: #{error}\n\n" if error
20
+ puts $optparse
21
+ exit 1
22
+ end
23
+
24
+ def get_connection
25
+ $logger.debug 'Setting up connection'
26
+
27
+
28
+ $logger.debug "Connecting to #{$options[:host]}"
29
+ OkHbase::Connection.new(
30
+ auto_connect: true,
31
+ host: $options[:host],
32
+ port: $options[:port],
33
+ timeout: $options[:timeout]
34
+ )
35
+ end
36
+
37
+ def create_table(table, conn)
38
+ if table.nil?
39
+ $logger.fatal 'Must specify a table'
40
+ return nil
41
+ end
42
+ $logger.debug "Get instance for table #{table}"
43
+ if conn.tables.include? table
44
+ OkHbase::Table.new(table, conn)
45
+ else
46
+ conn.create_table(table, d: {})
47
+ end
48
+ end
49
+
50
+
51
+ def main()
52
+ $optparse = OptionParser.new do |opts|
53
+ opts.banner = "Usage: #{__FILE__} [options]"
54
+
55
+ $options[:host] = 'localhost'
56
+ $options[:port] = 9090
57
+ $options[:timeout] = 10
58
+
59
+ opts.on('-h', '--help', 'Display this help') do
60
+ usage
61
+ end
62
+
63
+ opts.on('-H', '--host HOST', "host or ip address where thrift server is running, defaults to #{$options[:host]}") do |host|
64
+ $options[:host] = host
65
+ end
66
+
67
+ opts.on('-t', '--table TABLE', 'hbase table name') do |table|
68
+ $options[:table] = table
69
+ end
70
+
71
+ opts.on('-p', '--port PORT', "port number of thrift server, defaults to #{$options[:port]}") do |port|
72
+ $options[:port] = port.to_i
73
+ end
74
+
75
+ opts.on('--timeout TIMEOUT', "connect timeout, defaults to #{$options[:timeout]}") do |timeout|
76
+ $options[:timeout] = timeout.to_i
77
+ end
78
+
79
+ end
80
+
81
+ usage "You didn't specify any options" if not ARGV[0]
82
+
83
+ $optparse.parse!
84
+
85
+ usage "You didn't specify a table" if not $options[:table]
86
+
87
+ connection = get_connection()
88
+ table = create_table($options[:table], connection)
89
+
90
+ ('a'..'zzz').each_with_index do |row_key, index|
91
+ $logger.debug "wrote row: #{row_key}"
92
+ table.put(row_key, {'d:row_number' => "#{index+1}", 'd:message' => "this is row number #{index+1}"})
93
+ $logger.debug "wrote row: #{row_key}"
94
+ end
95
+ end
96
+
97
+ main() if __FILE__ == $0
@@ -0,0 +1,35 @@
1
+ require 'ok_hbase/concerns'
2
+ require 'ok_hbase/row'
3
+
4
+ module OkHbase
5
+ class ActiveModel < OkHbase::Row
6
+ include OkHbase::Concerns::Table::ClassMethods
7
+ include OkHbase::Concerns::CustomRow::ClassMethods
8
+ include OkHbase::Concerns::Indexable::ClassMethods
9
+ include OkHbase::Concerns::Table::Instrumentation
10
+
11
+ def initialize(raw_data={})
12
+
13
+ raw_data = raw_data.with_indifferent_access
14
+ raw_data = raw_data[:raw_data] if raw_data[:raw_data]
15
+
16
+
17
+ options = {
18
+ table: self.class,
19
+ default_column_family: self.class.default_column_family,
20
+ raw_data: raw_data,
21
+ }
22
+ super(options)
23
+ end
24
+
25
+ def self.create(raw_data={})
26
+ instance = new(raw_data)
27
+ instance.save!
28
+ instance
29
+ end
30
+
31
+ def delete(indexes=[])
32
+ self.class.delete(row_key, nil, nil, indexes)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,42 @@
1
+ require 'thrift'
2
+ require 'thrift/transport/socket'
3
+ require 'thrift/protocol/binary_protocol'
4
+
5
+ require 'thrift/hbase/hbase_constants'
6
+ require 'thrift/hbase/hbase_types'
7
+ require 'thrift/hbase/hbase'
8
+
9
+ module OkHbase
10
+ class Client < Apache::Hadoop::Hbase::Thrift::Hbase::Client
11
+
12
+ attr_accessor :max_tries
13
+
14
+ def initialize(iprot, oprot=nil, max_tries=nil)
15
+ @max_tries = max_tries || 0
16
+ super(iprot, oprot)
17
+ end
18
+
19
+ signatures = ['send_message(name, args_class, args = {})', 'receive_message(result_klass)']
20
+
21
+ signatures.each do |signature|
22
+ module_eval <<-RUBY, __FILE__, __LINE__
23
+ def #{signature}
24
+ tries = 0
25
+ begin
26
+ @iprot.trans.open unless @iprot.trans.open?
27
+ super
28
+ rescue => e
29
+ tries += 1
30
+ raise e unless tries < max_tries && recoverable?(e)
31
+ retry
32
+ end
33
+ end
34
+ RUBY
35
+ end
36
+
37
+ def recoverable?(e)
38
+ e.is_a?(Apache::Hadoop::Hbase::Thrift::IOError) ||
39
+ e.is_a?(Thrift::TransportException)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,13 @@
1
+ module OkHbase
2
+ module Concerns
3
+ module CustomRow
4
+ module ClassMethods
5
+ extend ActiveSupport::Concern
6
+
7
+ module ClassMethods
8
+ include OkHbase::Concerns::CustomRow
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,40 @@
1
+ module OkHbase
2
+ module Concerns
3
+ module CustomRow
4
+ extend ActiveSupport::Concern
5
+
6
+ def row(row_key, columns = nil, timestamp = nil, include_timestamp = false)
7
+ self.row_class.new table: self, default_column_family: self.default_column_family, raw_data: super
8
+ end
9
+
10
+ def rows(row_keys, columns = nil, timestamp = nil, include_timestamp = false)
11
+ super.map.with_index! { |data, i| self.row_class.new table: self, row_key: row_keys[i], default_column_family: self.default_column_family, raw_data: data }
12
+ end
13
+
14
+ def scan(opts={})
15
+ if block_given?
16
+ super { |row_key, data| yield self.row_class.new(table: self, row_key: row_key, default_column_family: self.default_column_family, raw_data: data) }
17
+ else
18
+ super.map { |row_key, data| self.row_class.new(table: self, row_key: row_key, default_column_family: self.default_column_family, raw_data: data) }
19
+ end
20
+
21
+ end
22
+
23
+ def row_class
24
+ @@_row_class
25
+ end
26
+
27
+ def default_column_family
28
+ @@default_column_family
29
+ end
30
+
31
+ def use_row_class(klass)
32
+ @@_row_class = klass
33
+ end
34
+
35
+ def use_default_column_family(column_family)
36
+ @@default_column_family = column_family
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,13 @@
1
+ module OkHbase
2
+ module Concerns
3
+ module Indexable
4
+ module ClassMethods
5
+ extend ActiveSupport::Concern
6
+
7
+ module ClassMethods
8
+ include OkHbase::Concerns::Indexable
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,101 @@
1
+ module OkHbase
2
+ module Concerns
3
+ module Indexable
4
+ extend ActiveSupport::Concern
5
+
6
+ def use_index(index_name, opts={})
7
+ options = opts.with_indifferent_access
8
+ attributes = options[:attributes]
9
+ prefix_length = options[:prefix_length]
10
+ index_id = options[:index_id]
11
+ pack_pattern = options[:pack_pattern]
12
+ auto_create = options[:auto_create]
13
+
14
+ @@_indexes ||= {}
15
+ @@_indexes = @@_indexes.with_indifferent_access
16
+ @@_indexes[index_name] = options
17
+
18
+ define_singleton_method :indexes do
19
+ @@_indexes
20
+ end
21
+
22
+
23
+ define_singleton_method :encode_for_row_key do |value|
24
+ # coerce booleans to ints for packing
25
+ value = 1 if value.to_s.downcase == "true"
26
+ value = 0 if value.to_s.downcase == "false"
27
+
28
+ # coerce hbase i64s to Fixnum, Bignum
29
+ value = value.unpack('Q>').first if value.is_a?(String)
30
+
31
+ value
32
+ end
33
+
34
+ define_singleton_method :key_for_index do |index_name, data|
35
+
36
+ options = @@_indexes[index_name]
37
+
38
+ row = self.row_class.new(table: self, default_column_family: self.default_column_family, raw_data: data)
39
+ row_key_components = options[:attributes].map do |attribute|
40
+
41
+ value = if attribute == :index_id
42
+ options[:index_id]
43
+ else
44
+ row.attributes[attribute] || row.send(attribute)
45
+ end
46
+ encode_for_row_key(value)
47
+ end
48
+
49
+ row_key_components.pack(options[:pack_pattern].join(''))
50
+
51
+ end
52
+
53
+ define_singleton_method index_name do |idx_options, &block|
54
+ expected_option_keys = attributes[0...prefix_length]
55
+ prefix_pack_pattern = pack_pattern[0...prefix_length].join('')
56
+
57
+ prefix_components = expected_option_keys.map do |key|
58
+ value = key == :index_id ? index_id : idx_options[key]
59
+ encode_for_row_key(value)
60
+ end
61
+
62
+ row_prefix = prefix_components.pack(prefix_pack_pattern)
63
+
64
+ scan(row_prefix: row_prefix, &block)
65
+ end
66
+ end
67
+
68
+ def put(row_key, data, timestamp = nil, extra_indexes=[])
69
+ batch(timestamp).transaction do |batch|
70
+ @@_indexes.each_pair do |index_name, options|
71
+ next unless options[:auto_create] || extra_indexes.include?(index_name)
72
+
73
+ index_row_key = key_for_index(index_name, data)
74
+
75
+ batch.put(index_row_key, data)
76
+ end
77
+ end
78
+ end
79
+
80
+ def delete(row_key, columns=nil, timestamp=nil, indexes=[])
81
+ row = self.row(row_key)
82
+ attributes = row.attributes
83
+ if attributes[:row_key].blank? && attributes.except(:row_key).blank?
84
+ return
85
+ end
86
+
87
+ indexes = Array(indexes)
88
+
89
+ if indexes.empty?
90
+ indexes = @@_indexes.keys
91
+ end
92
+ self.batch(timestamp).transaction do |batch|
93
+ indexes.each do |index_name|
94
+ index_row_key = key_for_index(index_name, row.attributes)
95
+ batch.delete(index_row_key, columns)
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,85 @@
1
+ module OkHbase
2
+ module Concerns
3
+ module Row
4
+ attr_accessor :table, :row_key, :timestamp, :default_column_family
5
+ attr_reader :raw_data
6
+
7
+ def id
8
+ self.row_key
9
+ end
10
+
11
+ def id=(val)
12
+ self.row_key = val
13
+ end
14
+
15
+ def encoded_data
16
+ Hash[@raw_data.map { |k, v| [k, _encode(v)] }].with_indifferent_access
17
+ end
18
+
19
+ def attributes
20
+ hash = Hash[@raw_data.keys.map do |k|
21
+ k = k.split(':', 2).last
22
+ key_value = [k, send(k)]
23
+ key_value
24
+ end
25
+ ].with_indifferent_access
26
+
27
+ hash[:row_key] = @row_key
28
+ hash
29
+
30
+ end
31
+
32
+ def save!()
33
+ #raise ArgumentError.new "row_key must be a non-empty string" unless !@row_key.blank? && @row_key.is_a?(String)
34
+
35
+ table.put(row_key, encoded_data, timestamp)
36
+ end
37
+
38
+ def delete
39
+ table.delete(row_key)
40
+ end
41
+
42
+
43
+ def method_missing(method, *arguments, &block)
44
+ if method.to_s[-1, 1] == '='
45
+
46
+ key = method[0...-1]
47
+ val = arguments.last
48
+ unless key.to_s.include? ':'
49
+ key = "#{default_column_family}:#{key}"
50
+ else
51
+ end
52
+
53
+ ret_val = raw_data[key] = val
54
+
55
+ ret_val
56
+ else
57
+
58
+ unless method.to_s.include? ':'
59
+ key = "#{default_column_family}:#{method}"
60
+ else
61
+ end
62
+ return raw_data[key]
63
+ end
64
+
65
+ end
66
+
67
+ private
68
+ def _encode(value)
69
+ encoded = case value
70
+ when String
71
+ value.dup.force_encoding(Encoding::UTF_8)
72
+ when Bignum, Fixnum
73
+ [value].pack('Q>').force_encoding(Encoding::UTF_8)
74
+ when TrueClass, FalseClass
75
+ value.to_s.force_encoding(Encoding::UTF_8)
76
+ when NilClass
77
+ value
78
+ end
79
+
80
+ encoded
81
+ end
82
+
83
+ end
84
+ end
85
+ end