hbase-ruby 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/History.txt +11 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.textile +81 -0
  4. data/Rakefile +24 -0
  5. data/VERSION +1 -0
  6. data/lib/hbase.rb +44 -0
  7. data/lib/hbase/client.rb +80 -0
  8. data/lib/hbase/exception.rb +19 -0
  9. data/lib/hbase/model.rb +19 -0
  10. data/lib/hbase/model/column.rb +9 -0
  11. data/lib/hbase/model/column_descriptor.rb +32 -0
  12. data/lib/hbase/model/region_descriptor.rb +9 -0
  13. data/lib/hbase/model/row.rb +11 -0
  14. data/lib/hbase/model/scanner.rb +13 -0
  15. data/lib/hbase/model/table_descriptor.rb +8 -0
  16. data/lib/hbase/operation/meta_operation.rb +10 -0
  17. data/lib/hbase/operation/row_operation.rb +83 -0
  18. data/lib/hbase/operation/scanner_operation.rb +82 -0
  19. data/lib/hbase/operation/table_operation.rb +95 -0
  20. data/lib/hbase/request.rb +7 -0
  21. data/lib/hbase/request/basic_request.rb +27 -0
  22. data/lib/hbase/request/meta_request.rb +17 -0
  23. data/lib/hbase/request/row_request.rb +34 -0
  24. data/lib/hbase/request/scanner_request.rb +25 -0
  25. data/lib/hbase/request/table_request.rb +43 -0
  26. data/lib/hbase/response.rb +7 -0
  27. data/lib/hbase/response/basic_response.rb +16 -0
  28. data/lib/hbase/response/meta_response.rb +35 -0
  29. data/lib/hbase/response/row_response.rb +31 -0
  30. data/lib/hbase/response/scanner_response.rb +37 -0
  31. data/lib/hbase/response/table_response.rb +26 -0
  32. data/spec/hbase/model/column_descriptor_spec.rb +23 -0
  33. data/spec/hbase/model/column_spec.rb +12 -0
  34. data/spec/hbase/model/region_descriptor_spec.rb +4 -0
  35. data/spec/hbase/model/row_spec.rb +12 -0
  36. data/spec/hbase/model/scanner.rb +7 -0
  37. data/spec/hbase/model/table_descriptor_spec.rb +12 -0
  38. data/spec/hbase/operation/meta_operation_spec.rb +18 -0
  39. data/spec/hbase/operation/row_operation_spec.rb +39 -0
  40. data/spec/hbase/operation/scanner_operation_spec.rb +81 -0
  41. data/spec/hbase/operation/table_operation_spec.rb +57 -0
  42. data/spec/hbase/record_spec.rb +25 -0
  43. data/spec/hbase/request/meta_request_spec.rb +10 -0
  44. data/spec/hbase/request/row_request_spec.rb +5 -0
  45. data/spec/hbase/request/scanner_request_spec.rb +5 -0
  46. data/spec/hbase/request/table_request_spec.rb +4 -0
  47. data/spec/hbase/response/meta_response_spec.rb +4 -0
  48. data/spec/hbase/response/row_response_spec.rb +4 -0
  49. data/spec/hbase/response/scanner_response_spec.rb +4 -0
  50. data/spec/hbase/response/table_response_spec.rb +4 -0
  51. data/spec/spec.opts +5 -0
  52. data/spec/spec_helper.rb +7 -0
  53. data/tasks/rspec.rake +7 -0
  54. metadata +147 -0
@@ -0,0 +1,11 @@
1
+ v1.1.0 Nov 7, 2009
2
+ * Bumped to version 1.1.0 to avoid confusion with an existing hbase-ruby on gemcutter that was at version 1.0
3
+ * Reimplemented entirely with HBase Stargate (old REST API was obsoleted by this)
4
+ * Since Stargate can return JSON, parsing is now done with JSON instead of XML
5
+ * API change to open_scanner. Now expects open_scanner(table_name, options) with available options listed under HBase::Model::Scanner.AVAILABLE_OPTS
6
+
7
+ v0.4 Oct 12, 2008
8
+ * support 'scanner'
9
+
10
+ v0.1 Jul 29, 2008
11
+ * initial version
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2007 Dingding Ye
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,81 @@
1
+ hbase-ruby is a pure ruby client for HBase (http://hadoop.apache.org/hbase) that works with the HBase Stargate interface.
2
+ Stargate is the RESTful web service front end for HBase that can serve up a number of formats including XML, JSON, and protobufs.
3
+
4
+ This was originally created by Dingding Ye <yedingding@gmail.com> at http://github.com/sishen/hbase-ruby
5
+
6
+
7
+ h2. Versions
8
+
9
+ * **"hbase-0.20.1 version" - rewritten for Stargate**: http://github.com/greglu/hbase-ruby/tree/master
10
+ * **"hbase-0.20dev version"**: http://github.com/sishen/hbase-ruby/tree/master
11
+ * **"hbase-0.19 version"**: http://github.com/sishen/hbase-ruby/tree/hbase-0.19
12
+
13
+
14
+ h2. Installation
15
+
16
+ <pre>
17
+ <code>$ gem install hbase-ruby -s http://gemcutter.org</code>
18
+ </pre>
19
+
20
+ To work with this gem in your Rails application, add this to the environment.rb file:
21
+ <pre><code>config.gem 'hbase-ruby', :lib => "hbase", :source => "http://gemcutter.org"</code></pre>
22
+
23
+ To build the gem yourself:
24
+ <pre><code>$ rake gem</code></pre>
25
+
26
+
27
+ h2. Getting Started
28
+
29
+ # Download and unpack the most recent release of HBase from http://hadoop.apache.org/hbase/releases.html#Download
30
+ # Edit <hbase-dir>/conf/hbase-env.sh and uncomment/modify the following line to correspond to your Java home path:
31
+ export JAVA_HOME=/usr/lib/jvm/java-6-sun
32
+ # Copy <hbase-dir>/contrib/stargate/hbase-<version>-stargate.jar into <hbase-dir>/lib
33
+ # Copy all the files in the <hbase-dir>/contrib/stargate/lib folder into <hbase-dir>/lib
34
+ # Start up HBase:
35
+ $ <hbase-dir>/bin/start-hbase.sh
36
+ # Start up Stargate (append "-p 1234" at the end if you want to change the port):
37
+ $ <hbase-dir>/bin/hbase org.apache.hadoop.hbase.stargate.Main
38
+
39
+
40
+ h2. Usage
41
+
42
+ Here are some examples:
43
+
44
+ <pre><code>
45
+ require 'hbase'
46
+
47
+ client = HBase::Client.new("http://localhost:8080") # this url is the default for stargate.
48
+
49
+ # Table Operation
50
+ tables = client.list_tables # list available tables
51
+ table = client.create_table('users', 'habbit') # create a table whose column_family is habbit
52
+ table = client.show_table('users') # show the meta info of table users
53
+ client.delete_table('users') # delete 'users' table
54
+
55
+ # Row Operation
56
+ row = client.show_row('users', 'sishen') # show the data of row 'sishen' in table 'users'
57
+ row2 = client.create_row('users', 'sishen', Time.now.to_i, {:name => 'habbit:football', :value => 'i like football'}) # create the row 'sishen' with the data in the table 'users'
58
+ client.delete_row('users', 'sishen', nil, 'habbit:football') # delete the row 'sishen' of table 'users' with the optional column 'habbit:football'
59
+
60
+ # Scanner Operation
61
+ scanner = client.open_scanner('users', {:start_row => "row2", :batch => 5, :columns => ["habbit:"]}) # See more options from HBase::Model::Scanner.AVAILABLE_OPTS
62
+ rows = client.get_rows(scanner)
63
+ client.close_scanner(scanner)
64
+ </code></pre>
65
+
66
+
67
+ h2. Testing
68
+
69
+ Run the specs with the following rake task:
70
+
71
+ $ rake spec
72
+
73
+ or pass it the URL to the HBase Stargate server as an argument:
74
+
75
+ $ rake spec HBASE_URL=http://localhost:8080
76
+
77
+
78
+ h2. Copyright
79
+
80
+ Copyright (c) 2008 Dingding Ye <yedingding@gmail.com>
81
+ Distributed under MIT License
@@ -0,0 +1,24 @@
1
+ require 'rubygems'
2
+
3
+ begin
4
+ require 'jeweler'
5
+
6
+ Jeweler::Tasks.new do |gemspec|
7
+ gemspec.name = "hbase-ruby"
8
+ gemspec.authors = ['Ye Dingding', 'Greg Lu']
9
+ gemspec.email = 'greg.lu@gmail.com'
10
+ gemspec.homepage = "http://github.com/greglu/hbase-ruby"
11
+ gemspec.summary = "A pure ruby client for HBase using the Stargate interface."
12
+ gemspec.description = "A pure ruby client used to interact with HBase through its Stargate interface which serves up XML, JSON, protobuf, and more."
13
+ gemspec.files = FileList["{lib,spec,tasks}/**/*","Rakefile","VERSION","History.txt","MIT-LICENSE","README.textile"].to_a
14
+ gemspec.extra_rdoc_files = FileList["MIT-LICENSE","README.textile"].to_a
15
+
16
+ gemspec.add_development_dependency "rspec"
17
+ gemspec.add_dependency "json"
18
+ end
19
+ Jeweler::GemcutterTasks.new
20
+ rescue LoadError
21
+ puts "Jeweler not available. Install it with: sudo gem install jeweler"
22
+ end
23
+
24
+ Dir['tasks/**/*.rake'].each { |rake| load rake }
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.1.0
@@ -0,0 +1,44 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require 'hbase/client'
5
+ require 'hbase/exception'
6
+ require 'hbase/model'
7
+ require 'hbase/request'
8
+ require 'hbase/response'
9
+
10
+ module HBase; end
11
+
12
+ class Object
13
+ def to_proc
14
+ proc { |obj, *args| obj.send(self, *args) }
15
+ end
16
+
17
+ def blank?
18
+ respond_to?(:empty?) ? empty? : !self
19
+ end
20
+ end
21
+
22
+ class NilClass #:nodoc:
23
+ def blank?
24
+ true
25
+ end
26
+ end
27
+
28
+ class Array #:nodoc:
29
+ alias_method :blank?, :empty?
30
+ end
31
+
32
+ class Hash #:nodoc:
33
+ alias_method :blank?, :empty?
34
+ end
35
+
36
+ class String #:nodoc:
37
+ def blank?
38
+ self !~ /\S/
39
+ end
40
+
41
+ def to_b
42
+ self == "true" ? true : false
43
+ end
44
+ end
@@ -0,0 +1,80 @@
1
+ require 'net/http'
2
+ require 'hbase/operation/meta_operation'
3
+ require 'hbase/operation/table_operation'
4
+ require 'hbase/operation/row_operation'
5
+ require 'hbase/operation/scanner_operation'
6
+
7
+ module HBase
8
+ class Client
9
+ include Operation::MetaOperation
10
+ include Operation::TableOperation
11
+ include Operation::RowOperation
12
+ include Operation::ScannerOperation
13
+
14
+ attr_reader :url, :connection
15
+
16
+ def initialize(url = "http://localhost:8080", opts = {})
17
+ @url = URI.parse(url)
18
+ unless @url.kind_of? URI::HTTP
19
+ raise "invalid http url: #{url}"
20
+ end
21
+
22
+ # Not actually opening the connection yet, just setting up the persistent connection.
23
+ @connection = Net::HTTP.new(@url.host, @url.port)
24
+ @connection.read_timeout = opts[:timeout] if opts[:timeout]
25
+ end
26
+
27
+ def get(path)
28
+ safe_request { @connection.get(@url.path + path, {"Accept" => "application/json"}) }
29
+ end
30
+
31
+ def post(path, data = nil)
32
+ safe_request { @connection.post(@url.path + path, data, {'Content-Type' => 'text/xml'}) }
33
+ end
34
+
35
+ # Needed for scanner functionality
36
+ def post_response(path, data = nil)
37
+ safe_response { @connection.post(@url.path + path, data, {'Content-Type' => 'text/xml'}) }
38
+ end
39
+
40
+ def delete(path)
41
+ safe_request { @connection.delete(@url.path + path) }
42
+ end
43
+
44
+ # Needed for scanner functionality
45
+ def delete_response(path)
46
+ safe_response { @connection.delete(@url.path + path) }
47
+ end
48
+
49
+ def put(path, data = nil)
50
+ safe_request { @connection.put(@url.path + path, data, {'Content-Type' => 'text/xml'}) }
51
+ end
52
+
53
+ private
54
+
55
+ # Part of safe_request was broken up into safe_response because when working with scanners
56
+ # in Stargate, you need to have access to the response itself, and not just the body.
57
+ def safe_response(&block)
58
+ begin
59
+ yield
60
+ rescue Errno::ECONNREFUSED
61
+ raise ConnectionNotEstablishedError, "can't connect to #{@url}"
62
+ rescue Timeout::Error => e
63
+ puts e.backtrace.join("\n")
64
+ raise ConnectionTimeoutError, "execution expired. Maybe query disabled tables"
65
+ end
66
+ end
67
+
68
+ def safe_request(&block)
69
+ response = safe_response{ yield block }
70
+
71
+ case response
72
+ when Net::HTTPSuccess
73
+ response.body
74
+ else
75
+ response.error!
76
+ end
77
+ end
78
+
79
+ end
80
+ end
@@ -0,0 +1,19 @@
1
+ class HBase::Exception < StandardError; end
2
+
3
+ class HBase::ConnectionNotEstablishedError < HBase::Exception; end
4
+
5
+ class HBase::ConnectionTimeoutError < HBase::Exception; end
6
+
7
+ class HBase::TableNotFoundError < HBase::Exception; end
8
+
9
+ class HBase::TableExistsError < HBase::Exception; end
10
+
11
+ class HBase::TableFailCreateError < HBase::Exception; end
12
+
13
+ class HBase::TableNotDisabledError < HBase::Exception; end
14
+
15
+ class HBase::TableFailDisableError < HBase::Exception; end
16
+
17
+ class HBase::TableFailEnableError < HBase::Exception; end
18
+
19
+ class HBase::RowNotFoundError < HBase::Exception; end
@@ -0,0 +1,19 @@
1
+ module HBase
2
+ module Model
3
+ class Record
4
+ def initialize (params)
5
+ params.each do |key, value|
6
+ name = key.to_s
7
+ instance_variable_set("@#{name}", value) if respond_to?(name)
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
13
+
14
+ require 'hbase/model/column'
15
+ require 'hbase/model/column_descriptor'
16
+ require 'hbase/model/region_descriptor'
17
+ require 'hbase/model/row'
18
+ require 'hbase/model/table_descriptor'
19
+ require 'hbase/model/scanner'
@@ -0,0 +1,9 @@
1
+ module HBase
2
+ module Model
3
+ class Column < Record
4
+ attr_accessor :name
5
+ attr_accessor :value
6
+ attr_accessor :timestamp
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,32 @@
1
+ module HBase
2
+ module Model
3
+ module CompressionType
4
+ NONE = "NONE"
5
+ RECORD = "RECORD"
6
+ BLOCK = "BLOCK"
7
+
8
+ CTYPES = [NONE, RECORD, BLOCK]
9
+
10
+ def to_compression_type(type_string)
11
+ CTYPES.include?(type_string) ? type_string : NONE
12
+ end
13
+
14
+ module_function :to_compression_type
15
+ end
16
+
17
+ class ColumnDescriptor < Record
18
+ AVAILABLE_OPTS = { :name => "name", :max_versions => "VERSIONS", :versions => "VERSIONS",
19
+ :compression => "COMPRESSION", :in_memory => "IN_MEMORY",
20
+ :block_cache => "BLOCKCACHE", :blockcache => "BLOCKCACHE",
21
+ :blocksize => "BLOCKSIZE", :length => "LENGTH", :ttl => "TTL",
22
+ :bloomfilter => "BLOOMFILTER"}
23
+ attr_accessor :name
24
+ attr_accessor :compression
25
+ attr_accessor :bloomfilter
26
+ attr_accessor :maximum_cell_size
27
+ attr_accessor :max_versions
28
+
29
+ attr_accessor :versions
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,9 @@
1
+ module HBase
2
+ module Model
3
+ class Region < Record
4
+ end
5
+
6
+ class RegionDescriptor < Record
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ module HBase
2
+ module Model
3
+ class Row < Record
4
+ attr_accessor :table_name
5
+ attr_accessor :name
6
+ attr_accessor :timestamp
7
+ attr_accessor :total_count
8
+ attr_accessor :columns
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,13 @@
1
+ module HBase
2
+ module Model
3
+ class Scanner < Record
4
+ AVAILABLE_OPTS = { :start_row => "startRow", :end_row => "endRow",
5
+ :start_time => "startTime", :end_time => "endTime",
6
+ :batch => "batch", :limit => "batch" }
7
+
8
+ attr_accessor :table_name
9
+ attr_accessor :scanner_id
10
+ attr_accessor :scanner_url
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,8 @@
1
+ module HBase
2
+ module Model
3
+ class TableDescriptor < Record
4
+ attr_accessor :name
5
+ attr_accessor :column_families
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,10 @@
1
+ module HBase
2
+ module Operation
3
+ module MetaOperation
4
+ def list_tables
5
+ request = Request::MetaRequest.new
6
+ tables = Response::MetaResponse.new(get(request.list_tables), :list_tables).parse
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,83 @@
1
+ module HBase
2
+ module Operation
3
+ module RowOperation
4
+ Converter = {
5
+ '&' => '&amp;',
6
+ '<' => '&lt;',
7
+ '>' => '&gt;',
8
+ "'" => '&apos;',
9
+ '"' => '&quot;'
10
+ }
11
+
12
+ def row_timestamps(table_name, name)
13
+ raise NotImplementedError, "Currently not supported in native hbase client"
14
+ end
15
+
16
+ def show_row(table_name, name, timestamp = nil, columns = nil, options = { })
17
+ begin
18
+ options[:version] ||= 1
19
+ request = Request::RowRequest.new(table_name, name, timestamp)
20
+ row = Response::RowResponse.new(get(request.show(columns, options))).parse.first
21
+ row.table_name = table_name
22
+ row.timestamp = timestamp
23
+ row
24
+ rescue Net::ProtocolError => e
25
+ if e.to_s.include?("Table")
26
+ raise TableNotFoundError, "Table '#{table_name}' Not Found"
27
+ elsif e.to_s.include?("Row")
28
+ raise RowNotFoundError, "Row '#{name}' Not Found"
29
+ end
30
+ end
31
+ end
32
+
33
+ def create_row(table_name, name, timestamp = nil, columns = nil)
34
+ begin
35
+ request = Request::RowRequest.new(table_name, name, timestamp)
36
+ data = []
37
+ if columns
38
+ if columns.instance_of? Array
39
+ data = columns
40
+ elsif columns.instance_of? Hash
41
+ data = [columns]
42
+ else
43
+ raise StandardError, "Only Array or Hash data accepted"
44
+ end
45
+ end
46
+
47
+ xml_data = "<?xml version='1.0' encoding='UTF-8' standalone='yes'?><CellSet>"
48
+ xml_data << "<Row key='#{[name].pack('m') rescue ''}'>"
49
+ data.each do |d|
50
+ escape_name = d[:name].gsub(/[&<>'"]/) { |match| Converter[match] }
51
+ xml_data << "<Cell "
52
+ xml_data << "timestamp='#{timestamp}'" if timestamp
53
+ xml_data << "column='#{[escape_name].pack('m') rescue ''}'>"
54
+ xml_data << "#{[d[:value]].pack("m") rescue ''}"
55
+ xml_data << "</Cell>"
56
+ end
57
+ xml_data << "</Row></CellSet>"
58
+
59
+ post(request.create(data.map{|col| col[:name]}), xml_data)
60
+ rescue Net::ProtocolError => e
61
+ if e.to_s.include?("Table")
62
+ raise TableNotFoundError, "Table '#{table_name}' Not Found"
63
+ elsif e.to_s.include?("Row")
64
+ raise RowNotFoundError, "Row '#{name}' Not Found"
65
+ end
66
+ end
67
+ end
68
+
69
+ def delete_row(table_name, name, timestamp = nil, columns = nil)
70
+ begin
71
+ request = Request::RowRequest.new(table_name, name, timestamp)
72
+ Response::RowResponse.new(delete(request.delete(columns)))
73
+ rescue Net::ProtocolError => e
74
+ if e.to_s.include?("Table")
75
+ raise TableNotFoundError, "Table '#{table_name}' Not Found"
76
+ elsif e.to_s.include?("Row")
77
+ raise RowNotFoundError, "Row '#{name}' Not Found"
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end