hbase-ruby 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,6 @@
1
+ v1.1.1 Nov 7, 2009
2
+ * Improved the scanner functionality. Decided it was worth a version bump and release.
3
+
1
4
  v1.1.0 Nov 7, 2009
2
5
  * Bumped to version 1.1.0 to avoid confusion with an existing hbase-ruby on gemcutter that was at version 1.0
3
6
  * Reimplemented entirely with HBase Stargate (old REST API was obsoleted by this)
@@ -57,7 +57,7 @@ row = client.show_row('users', 'sishen') # show the data of row
57
57
  row2 = client.create_row('users', 'sishen', Time.now.to_i, {:name => 'habbit:football', :value => 'i like football'}) # create the row 'sishen' with the data in the table 'users'
58
58
  client.delete_row('users', 'sishen', nil, 'habbit:football') # delete the row 'sishen' of table 'users' with the optional column 'habbit:football'
59
59
 
60
- # Scanner Operation
60
+ # Scanner Operation (see spec/hbase/operation/scanner_operation_spec.rb for more examples)
61
61
  scanner = client.open_scanner('users', {:start_row => "row2", :batch => 5, :columns => ["habbit:"]}) # See more options from HBase::Model::Scanner.AVAILABLE_OPTS
62
62
  rows = client.get_rows(scanner)
63
63
  client.close_scanner(scanner)
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ begin
5
5
 
6
6
  Jeweler::Tasks.new do |gemspec|
7
7
  gemspec.name = "hbase-ruby"
8
- gemspec.authors = ['Ye Dingding', 'Greg Lu']
8
+ gemspec.authors = ['Ye Dingding', 'Openplaces']
9
9
  gemspec.email = 'greg.lu@gmail.com'
10
10
  gemspec.homepage = "http://github.com/greglu/hbase-ruby"
11
11
  gemspec.summary = "A pure ruby client for HBase using the Stargate interface."
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.0
1
+ 1.1.1
@@ -28,6 +28,11 @@ module HBase
28
28
  safe_request { @connection.get(@url.path + path, {"Accept" => "application/json"}) }
29
29
  end
30
30
 
31
+ # Needed for scanner functionality
32
+ def get_response(path)
33
+ safe_response { @connection.get(@url.path + path, {"Accept" => "application/json"}) }
34
+ end
35
+
31
36
  def post(path, data = nil)
32
37
  safe_request { @connection.post(@url.path + path, data, {'Content-Type' => 'text/xml'}) }
33
38
  end
@@ -17,3 +17,5 @@ class HBase::TableFailDisableError < HBase::Exception; end
17
17
  class HBase::TableFailEnableError < HBase::Exception; end
18
18
 
19
19
  class HBase::RowNotFoundError < HBase::Exception; end
20
+
21
+ class HBase::ScannerError < HBase::Exception; end
@@ -3,11 +3,14 @@ module HBase
3
3
  class Scanner < Record
4
4
  AVAILABLE_OPTS = { :start_row => "startRow", :end_row => "endRow",
5
5
  :start_time => "startTime", :end_time => "endTime",
6
- :batch => "batch", :limit => "batch" }
6
+ :batch => "batch" }
7
7
 
8
8
  attr_accessor :table_name
9
- attr_accessor :scanner_id
10
9
  attr_accessor :scanner_url
10
+ attr_accessor :batch_size
11
+
12
+ # Deprecation: scanner_url is used instead of just the ID
13
+ attr_accessor :scanner_id
11
14
  end
12
15
  end
13
16
  end
@@ -3,7 +3,7 @@ module HBase
3
3
  module ScannerOperation
4
4
  # Trying to maintain some API stability for now
5
5
  def open_scanner(table_name, columns, start_row, stop_row = nil, timestamp = nil)
6
- warn "[DEPRECATION] This method is deprecated. Use #open_scanner(table_name, options) instead."
6
+ warn "[DEPRECATION] This method is deprecated. Use #open_scanner(table_name, options = {}) instead."
7
7
 
8
8
  open_scanner(table_name, {:columns => columns, :start_row => start_row, :stop_row => stop_row, :timestamp => timestamp})
9
9
  end
@@ -11,16 +11,15 @@ module HBase
11
11
  def open_scanner(table_name, options = {})
12
12
  raise ArgumentError, "options should be given as a Hash" unless options.instance_of? Hash
13
13
  columns = options.delete(:columns)
14
+ batch = options.delete(:batch) || "10"
14
15
 
15
16
  begin
16
17
  request = Request::ScannerRequest.new(table_name)
17
18
 
18
- xml_data = "<?xml version='1.0' encoding='UTF-8' standalone='yes'?><Scanner "
19
+ xml_data = "<?xml version='1.0' encoding='UTF-8' standalone='yes'?><Scanner batch='#{batch}' "
19
20
  options.each do |key,value|
20
21
  if Model::Scanner::AVAILABLE_OPTS.include? key
21
- xml_data << "#{Model::Scanner::AVAILABLE_OPTS[key]}='"
22
- xml_data << ( (key == :batch) ? value.to_s : [value.to_s].flatten.pack('m') )
23
- xml_data << "' "
22
+ xml_data << "#{Model::Scanner::AVAILABLE_OPTS[key]}='#{[value.to_s].flatten.pack('m')}' "
24
23
  else
25
24
  warn "[open_scanner] Received invalid option key :#{key}"
26
25
  end
@@ -37,25 +36,34 @@ module HBase
37
36
 
38
37
  scanner = Response::ScannerResponse.new(post_response(request.open, xml_data), :open_scanner).parse
39
38
  scanner.table_name = table_name
39
+ scanner.batch_size = batch
40
40
  scanner
41
41
  rescue Net::ProtocolError => e
42
- if e.to_s.include?("TableNotFoundException")
43
- raise TableNotFoundError, "Table #{table_name} Not Found!"
44
- else
45
- raise StandardError, e.to_s
46
- end
42
+ raise StandardError, e.to_s
47
43
  end
48
44
  end
49
45
 
50
46
  def get_rows(scanner, limit = nil)
51
- warn "[DEPRECATION] Use of 'limit' here is deprecated. Instead, define the batch size when creating the scanner." if limit
52
47
  begin
53
48
  request = Request::ScannerRequest.new(scanner.table_name)
54
- rows = Response::ScannerResponse.new(get(request.get_rows(scanner)), :get_rows).parse
55
- rows.each do |row|
56
- row.table_name = scanner.table_name
49
+ request_url = request.get_rows(scanner) # The url to the scanner is the same for each batch
50
+
51
+ rows = []
52
+ begin
53
+ # Loop until we've reached the limit, or the scanner was exhausted (HTTP 204 returned)
54
+ until (limit && rows.size >= limit) || (response = get_response(request_url)).code == "204"
55
+ rows.concat Response::ScannerResponse.new(response.body, :get_rows).parse
56
+
57
+ rows.each do |row|
58
+ row.table_name = scanner.table_name
59
+ end
60
+ end
61
+ rescue Exception => e
62
+ raise HBase::ScannerError, "Scanner failed while getting rows. #{e.message}"
57
63
  end
58
- rows
64
+
65
+ # Prune the last few rows if the limit was passed.
66
+ (limit) ? rows.slice(0, limit) : rows
59
67
  rescue StandardError => e
60
68
  if e.to_s.include?("TableNotFoundException")
61
69
  raise TableNotFoundError, "Table #{table_name} Not Found!"
@@ -15,7 +15,11 @@ module HBase
15
15
  when Net::HTTPCreated
16
16
  HBase::Model::Scanner.new(:scanner_url => raw_data["Location"])
17
17
  else
18
- raise StandardError, "Unable to open scanner. Received the following message: #{raw_data.message}"
18
+ if raw_data.message.include?("TableNotFoundException")
19
+ raise TableNotFoundError, "Table #{table_name} Not Found!"
20
+ else
21
+ raise StandardError, "Unable to open scanner. Received the following message: #{raw_data.message}"
22
+ end
19
23
  end
20
24
  when :get_rows
21
25
  # Dispatch it to RowResponse, since that method is made
@@ -12,6 +12,12 @@ describe HBase::Operation::ScannerOperation do
12
12
  @client.create_row('test-hbase-ruby', 'row3', nil, {:name => 'col1:', :value => "row3-col1"})
13
13
  end
14
14
 
15
+ it "should throw TableNotFoundError if a scanner is requested for an non-existant table" do
16
+ lambda {
17
+ scanner = @client.open_scanner("test-dsg-ruby")
18
+ }.should raise_error
19
+ end
20
+
15
21
  it "should open a scanner and close it successfully" do
16
22
  scanner = @client.open_scanner("test-hbase-ruby")
17
23
  scanner.should.is_a? HBase::Model::Scanner
@@ -25,17 +31,33 @@ describe HBase::Operation::ScannerOperation do
25
31
  }.should_not raise_error
26
32
  end
27
33
 
28
- it "should scan the whole table when given no options" do
34
+ it "should scan the whole table when given no options and no limit" do
29
35
  scanner = @client.open_scanner("test-hbase-ruby")
30
36
 
31
37
  rows = @client.get_rows(scanner)
32
- rows.size.should == 1
33
- rows.first.name.should == "row1"
38
+ rows.size.should == 3
39
+ rows.each do |row|
40
+ row.should be_an_instance_of HBase::Model::Row
41
+ ["row1", "row2", "row3"].should include(row.name)
42
+ end
43
+
44
+ @client.close_scanner(scanner).should be_true
45
+ end
46
+
47
+ it "should scan the whole table but limit the results when given a limit" do
48
+ scanner = @client.open_scanner("test-hbase-ruby")
49
+
50
+ rows = @client.get_rows(scanner, 2)
51
+ rows.size.should == 2
52
+ rows.each do |row|
53
+ row.should be_an_instance_of HBase::Model::Row
54
+ ["row1", "row2"].should include(row.name)
55
+ end
34
56
 
35
57
  @client.close_scanner(scanner).should be_true
36
58
  end
37
59
 
38
- it "should scan the whole table when given a batch size larger than the number of rows" do
60
+ it "should return all rows when given a batch size larger than the number of rows" do
39
61
  scanner = @client.open_scanner("test-hbase-ruby", {:batch => 5})
40
62
 
41
63
  rows = @client.get_rows(scanner)
metadata CHANGED
@@ -1,11 +1,11 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hbase-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ye Dingding
8
- - Greg Lu
8
+ - Openplaces
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []