hbase-ruby 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,6 @@
1
+ v1.1.1 Nov 7, 2009
2
+ * Improved the scanner functionality. Decided it was worth a version bump and release.
3
+
1
4
  v1.1.0 Nov 7, 2009
2
5
  * Bumped to version 1.1.0 to avoid confusion with an existing hbase-ruby on gemcutter that was at version 1.0
3
6
  * Reimplemented entirely with HBase Stargate (old REST API was obsoleted by this)
@@ -57,7 +57,7 @@ row = client.show_row('users', 'sishen') # show the data of row
57
57
  row2 = client.create_row('users', 'sishen', Time.now.to_i, {:name => 'habbit:football', :value => 'i like football'}) # create the row 'sishen' with the data in the table 'users'
58
58
  client.delete_row('users', 'sishen', nil, 'habbit:football') # delete the row 'sishen' of table 'users' with the optional column 'habbit:football'
59
59
 
60
- # Scanner Operation
60
+ # Scanner Operation (see spec/hbase/operation/scanner_operation_spec.rb for more examples)
61
61
  scanner = client.open_scanner('users', {:start_row => "row2", :batch => 5, :columns => ["habbit:"]}) # See more options from HBase::Model::Scanner.AVAILABLE_OPTS
62
62
  rows = client.get_rows(scanner)
63
63
  client.close_scanner(scanner)
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ begin
5
5
 
6
6
  Jeweler::Tasks.new do |gemspec|
7
7
  gemspec.name = "hbase-ruby"
8
- gemspec.authors = ['Ye Dingding', 'Greg Lu']
8
+ gemspec.authors = ['Ye Dingding', 'Openplaces']
9
9
  gemspec.email = 'greg.lu@gmail.com'
10
10
  gemspec.homepage = "http://github.com/greglu/hbase-ruby"
11
11
  gemspec.summary = "A pure ruby client for HBase using the Stargate interface."
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.0
1
+ 1.1.1
@@ -28,6 +28,11 @@ module HBase
28
28
  safe_request { @connection.get(@url.path + path, {"Accept" => "application/json"}) }
29
29
  end
30
30
 
31
+ # Needed for scanner functionality
32
+ def get_response(path)
33
+ safe_response { @connection.get(@url.path + path, {"Accept" => "application/json"}) }
34
+ end
35
+
31
36
  def post(path, data = nil)
32
37
  safe_request { @connection.post(@url.path + path, data, {'Content-Type' => 'text/xml'}) }
33
38
  end
@@ -17,3 +17,5 @@ class HBase::TableFailDisableError < HBase::Exception; end
17
17
  class HBase::TableFailEnableError < HBase::Exception; end
18
18
 
19
19
  class HBase::RowNotFoundError < HBase::Exception; end
20
+
21
+ class HBase::ScannerError < HBase::Exception; end
@@ -3,11 +3,14 @@ module HBase
3
3
  class Scanner < Record
4
4
  AVAILABLE_OPTS = { :start_row => "startRow", :end_row => "endRow",
5
5
  :start_time => "startTime", :end_time => "endTime",
6
- :batch => "batch", :limit => "batch" }
6
+ :batch => "batch" }
7
7
 
8
8
  attr_accessor :table_name
9
- attr_accessor :scanner_id
10
9
  attr_accessor :scanner_url
10
+ attr_accessor :batch_size
11
+
12
+ # Deprecation: scanner_url is used instead of just the ID
13
+ attr_accessor :scanner_id
11
14
  end
12
15
  end
13
16
  end
@@ -3,7 +3,7 @@ module HBase
3
3
  module ScannerOperation
4
4
  # Trying to maintain some API stability for now
5
5
  def open_scanner(table_name, columns, start_row, stop_row = nil, timestamp = nil)
6
- warn "[DEPRECATION] This method is deprecated. Use #open_scanner(table_name, options) instead."
6
+ warn "[DEPRECATION] This method is deprecated. Use #open_scanner(table_name, options = {}) instead."
7
7
 
8
8
  open_scanner(table_name, {:columns => columns, :start_row => start_row, :stop_row => stop_row, :timestamp => timestamp})
9
9
  end
@@ -11,16 +11,15 @@ module HBase
11
11
  def open_scanner(table_name, options = {})
12
12
  raise ArgumentError, "options should be given as a Hash" unless options.instance_of? Hash
13
13
  columns = options.delete(:columns)
14
+ batch = options.delete(:batch) || "10"
14
15
 
15
16
  begin
16
17
  request = Request::ScannerRequest.new(table_name)
17
18
 
18
- xml_data = "<?xml version='1.0' encoding='UTF-8' standalone='yes'?><Scanner "
19
+ xml_data = "<?xml version='1.0' encoding='UTF-8' standalone='yes'?><Scanner batch='#{batch}' "
19
20
  options.each do |key,value|
20
21
  if Model::Scanner::AVAILABLE_OPTS.include? key
21
- xml_data << "#{Model::Scanner::AVAILABLE_OPTS[key]}='"
22
- xml_data << ( (key == :batch) ? value.to_s : [value.to_s].flatten.pack('m') )
23
- xml_data << "' "
22
+ xml_data << "#{Model::Scanner::AVAILABLE_OPTS[key]}='#{[value.to_s].flatten.pack('m')}' "
24
23
  else
25
24
  warn "[open_scanner] Received invalid option key :#{key}"
26
25
  end
@@ -37,25 +36,34 @@ module HBase
37
36
 
38
37
  scanner = Response::ScannerResponse.new(post_response(request.open, xml_data), :open_scanner).parse
39
38
  scanner.table_name = table_name
39
+ scanner.batch_size = batch
40
40
  scanner
41
41
  rescue Net::ProtocolError => e
42
- if e.to_s.include?("TableNotFoundException")
43
- raise TableNotFoundError, "Table #{table_name} Not Found!"
44
- else
45
- raise StandardError, e.to_s
46
- end
42
+ raise StandardError, e.to_s
47
43
  end
48
44
  end
49
45
 
50
46
  def get_rows(scanner, limit = nil)
51
- warn "[DEPRECATION] Use of 'limit' here is deprecated. Instead, define the batch size when creating the scanner." if limit
52
47
  begin
53
48
  request = Request::ScannerRequest.new(scanner.table_name)
54
- rows = Response::ScannerResponse.new(get(request.get_rows(scanner)), :get_rows).parse
55
- rows.each do |row|
56
- row.table_name = scanner.table_name
49
+ request_url = request.get_rows(scanner) # The url to the scanner is the same for each batch
50
+
51
+ rows = []
52
+ begin
53
+ # Loop until we've reached the limit, or the scanner was exhausted (HTTP 204 returned)
54
+ until (limit && rows.size >= limit) || (response = get_response(request_url)).code == "204"
55
+ rows.concat Response::ScannerResponse.new(response.body, :get_rows).parse
56
+
57
+ rows.each do |row|
58
+ row.table_name = scanner.table_name
59
+ end
60
+ end
61
+ rescue Exception => e
62
+ raise HBase::ScannerError, "Scanner failed while getting rows. #{e.message}"
57
63
  end
58
- rows
64
+
65
+ # Prune the last few rows if the limit was passed.
66
+ (limit) ? rows.slice(0, limit) : rows
59
67
  rescue StandardError => e
60
68
  if e.to_s.include?("TableNotFoundException")
61
69
  raise TableNotFoundError, "Table #{table_name} Not Found!"
@@ -15,7 +15,11 @@ module HBase
15
15
  when Net::HTTPCreated
16
16
  HBase::Model::Scanner.new(:scanner_url => raw_data["Location"])
17
17
  else
18
- raise StandardError, "Unable to open scanner. Received the following message: #{raw_data.message}"
18
+ if raw_data.message.include?("TableNotFoundException")
19
+ raise TableNotFoundError, "Table #{table_name} Not Found!"
20
+ else
21
+ raise StandardError, "Unable to open scanner. Received the following message: #{raw_data.message}"
22
+ end
19
23
  end
20
24
  when :get_rows
21
25
  # Dispatch it to RowResponse, since that method is made
@@ -12,6 +12,12 @@ describe HBase::Operation::ScannerOperation do
12
12
  @client.create_row('test-hbase-ruby', 'row3', nil, {:name => 'col1:', :value => "row3-col1"})
13
13
  end
14
14
 
15
+ it "should throw TableNotFoundError if a scanner is requested for an non-existant table" do
16
+ lambda {
17
+ scanner = @client.open_scanner("test-dsg-ruby")
18
+ }.should raise_error
19
+ end
20
+
15
21
  it "should open a scanner and close it successfully" do
16
22
  scanner = @client.open_scanner("test-hbase-ruby")
17
23
  scanner.should.is_a? HBase::Model::Scanner
@@ -25,17 +31,33 @@ describe HBase::Operation::ScannerOperation do
25
31
  }.should_not raise_error
26
32
  end
27
33
 
28
- it "should scan the whole table when given no options" do
34
+ it "should scan the whole table when given no options and no limit" do
29
35
  scanner = @client.open_scanner("test-hbase-ruby")
30
36
 
31
37
  rows = @client.get_rows(scanner)
32
- rows.size.should == 1
33
- rows.first.name.should == "row1"
38
+ rows.size.should == 3
39
+ rows.each do |row|
40
+ row.should be_an_instance_of HBase::Model::Row
41
+ ["row1", "row2", "row3"].should include(row.name)
42
+ end
43
+
44
+ @client.close_scanner(scanner).should be_true
45
+ end
46
+
47
+ it "should scan the whole table but limit the results when given a limit" do
48
+ scanner = @client.open_scanner("test-hbase-ruby")
49
+
50
+ rows = @client.get_rows(scanner, 2)
51
+ rows.size.should == 2
52
+ rows.each do |row|
53
+ row.should be_an_instance_of HBase::Model::Row
54
+ ["row1", "row2"].should include(row.name)
55
+ end
34
56
 
35
57
  @client.close_scanner(scanner).should be_true
36
58
  end
37
59
 
38
- it "should scan the whole table when given a batch size larger than the number of rows" do
60
+ it "should return all rows when given a batch size larger than the number of rows" do
39
61
  scanner = @client.open_scanner("test-hbase-ruby", {:batch => 5})
40
62
 
41
63
  rows = @client.get_rows(scanner)
metadata CHANGED
@@ -1,11 +1,11 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hbase-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ye Dingding
8
- - Greg Lu
8
+ - Openplaces
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []