impala 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,18 +1,18 @@
1
1
  require "bundler/gem_tasks"
2
+ require "rake/testtask"
2
3
 
3
4
  task :default => [:test]
4
5
 
5
- task :test do
6
- ret = true
7
- Dir["test/**/*.rb"].each do |f|
8
- ret = ret && ruby(f, '')
9
- end
10
- exit(ret)
6
+ Rake::TestTask.new do |t|
7
+ t.libs.push "lib"
8
+ t.test_files = FileList['test/test_*.rb']
9
+ t.verbose = true
11
10
  end
12
11
 
13
12
  THRIFT_FILES = FileList['./thrift/*.thrift']
14
13
  GENNED_FILES = FileList['./lib/impala/protocol/*']
15
14
 
15
+ # eden only works on 1.8, unfortunately =(
16
16
  task :gen do
17
17
  THRIFT_FILES.each do |f|
18
18
  sh "thrift -out lib/impala/protocol --gen rb #{f}"
data/impala.gemspec CHANGED
@@ -18,8 +18,7 @@ Gem::Specification.new do |gem|
18
18
  gem.add_development_dependency('eden')
19
19
  gem.add_development_dependency('pry')
20
20
 
21
- gem.add_development_dependency('test-unit')
22
- gem.add_development_dependency('shoulda')
21
+ gem.add_development_dependency('minitest', '~> 4.5')
23
22
  gem.add_development_dependency('mocha')
24
23
 
25
24
  gem.add_development_dependency('yard')
data/lib/impala.rb CHANGED
@@ -21,6 +21,14 @@ module Impala
21
21
  class CursorError < StandardError; end
22
22
  class ParsingError < StandardError; end
23
23
 
24
+ # Connect to an Impala server. If a block is given, it will close the
25
+ # connection after calling the block with the collection.
26
+ # @param [String] host the hostname or IP address of the Impala server
27
+ # @param [int] port the port that the Impala server is listening on
28
+ # @yieldparam [Connection] conn the open connection. Will be closed once the block
29
+ # finishes
30
+ # @return [Connection] the open connection, or, if a block is
31
+ # passed, the return value of the block
24
32
  def self.connect(host=DEFAULT_HOST, port=DEFAULT_PORT)
25
33
  connection = Connection.new(host, port)
26
34
 
@@ -1,7 +1,10 @@
1
1
  module Impala
2
+ # This object represents a connection to an Impala server. It can be used to
3
+ # perform queries on the database.
2
4
  class Connection
3
5
  SLEEP_INTERVAL = 0.1
4
6
 
7
+ # Don't instantiate Connections directly; instead, use {Impala.connect}.
5
8
  def initialize(host, port)
6
9
  @host = host
7
10
  @port = port
@@ -9,6 +12,11 @@ module Impala
9
12
  open
10
13
  end
11
14
 
15
+ def inspect
16
+ "#<#{self.class} #{@host}:#{@port}#{open? ? '' : ' (DISCONNECTED)'}>"
17
+ end
18
+
19
+ # Open the connection if it's currently closed.
12
20
  def open
13
21
  return if @connected
14
22
 
@@ -22,19 +30,31 @@ module Impala
22
30
  @connected = true
23
31
  end
24
32
 
33
+ # Close this connection. It can still be reopened with {#open}.
25
34
  def close
35
+ return unless @connected
36
+
26
37
  @transport.close
27
38
  @connected = false
28
39
  end
29
40
 
41
+ # Returns true if the connection is currently open.
30
42
  def open?
31
43
  @connected
32
44
  end
33
45
 
46
+ # Perform a query and return all the results. This will
47
+ # load the entire result set into memory, so if you're dealing with lots
48
+ # of rows, {#execute} may work better.
49
+ # @param [String] query the query you want to run
50
+ # @return [Array<Hash>] an array of hashes, one for each row.
34
51
  def query(raw_query)
35
52
  execute(raw_query).fetch_all
36
53
  end
37
54
 
55
+ # Perform a query and return a cursor for iterating over the results.
56
+ # @param [String] query the query you want to run
57
+ # @return [Cursor] a cursor for the result rows
38
58
  def execute(raw_query)
39
59
  raise ConnectionError.new("Connection closed") unless open?
40
60
 
data/lib/impala/cursor.rb CHANGED
@@ -1,4 +1,7 @@
1
1
  module Impala
2
+ # Cursors are used to iterate over result sets without loading them all
3
+ # into memory at once. This can be useful if you're dealing with lots of
4
+ # rows. It implements Enumerable, so you can use each/select/map/etc.
2
5
  class Cursor
3
6
  include Enumerable
4
7
 
@@ -11,7 +14,11 @@ module Impala
11
14
  @row_buffer = []
12
15
 
13
16
  @done = false
14
- @closed = false
17
+ @open = true
18
+ end
19
+
20
+ def inspect
21
+ "#<#{self.class}#{open? ? '' : ' (CLOSED)'}>"
15
22
  end
16
23
 
17
24
  def each
@@ -20,8 +27,12 @@ module Impala
20
27
  end
21
28
  end
22
29
 
30
+ # Returns the next available row as a hash, or nil if there are none left.
31
+ # @return [Hash, nil] the next available row, or nil if there are none
32
+ # left
33
+ # @see #fetch_all
23
34
  def fetch_row
24
- raise CursorError.new("Cursor has expired or been closed") if @closed
35
+ raise CursorError.new("Cursor has expired or been closed") unless @open
25
36
 
26
37
  if @row_buffer.empty?
27
38
  if @done
@@ -34,15 +45,30 @@ module Impala
34
45
  @row_buffer.shift
35
46
  end
36
47
 
48
+ # Returns all the remaining rows in the result set.
49
+ # @return [Array<Hash>] the remaining rows in the result set
50
+ # @see #fetch_one
37
51
  def fetch_all
38
52
  self.to_a
39
53
  end
40
54
 
55
+ # Close the cursor on the remote server. Once a cursor is closed, you
56
+ # can no longer fetch any rows from it.
41
57
  def close
42
- @closed = true
58
+ @open = false
43
59
  @service.close(@handle)
44
60
  end
45
61
 
62
+ # Returns true if the cursor is still open.
63
+ def open?
64
+ @open
65
+ end
66
+
67
+ # Returns true if there are any more rows to fetch.
68
+ def has_more?
69
+ !@done || !@row_buffer.empty?
70
+ end
71
+
46
72
  private
47
73
 
48
74
  def fetch_more
@@ -1,6 +1,8 @@
1
1
  require 'impala/protocol/impala_service'
2
2
 
3
3
  module Impala
4
+ # Taken as a whole, this module contains all the thrift-generated stuff that
5
+ # defines the Impala protocol.
4
6
  module Protocol
5
7
  end
6
8
  end
@@ -1,3 +1,3 @@
1
1
  module Impala
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
data/test/test_impala.rb CHANGED
@@ -1,34 +1,64 @@
1
1
  require 'impala'
2
- require 'test-unit'
3
- require 'shoulda'
2
+ require 'minitest/autorun'
4
3
  require 'mocha'
5
4
 
6
- class TestImpala < Test::Unit::TestCase
7
- include Mocha
5
+ describe 'Impala.connect' do
6
+ before do
7
+ Thrift::Socket.expects(:new).with('host', 12345)
8
+ Thrift::BufferedTransport.expects(:new).once.returns(stub(:open => nil))
9
+ Thrift::BinaryProtocol.expects(:new).once
10
+ Impala::Protocol::ImpalaService::Client.expects(:new).once
11
+ end
12
+
13
+ it 'should return an open connection when passed a block' do
14
+ connection = Impala.connect('host', 12345)
15
+ assert_equal(Impala::Connection, connection.class)
16
+ assert_equal(true, connection.open?)
17
+ end
8
18
 
9
- context 'Impala.connect' do
10
- setup do
11
- Thrift::Socket.expects(:new).with('host', 12345)
12
- Thrift::BufferedTransport.expects(:new).once.returns(stub(:open => nil))
13
- Thrift::BinaryProtocol.expects(:new).once
14
- Impala::Protocol::ImpalaService::Client.expects(:new).once
19
+ it 'should return the results of the query when given a block with a query, and then close tho connection' do
20
+ Impala::Connection.any_instance.stubs(:query => 'result')
21
+ Impala::Connection.any_instance.expects(:close).once
22
+
23
+ ret = Impala.connect('host', 12345) do |conn|
24
+ conn.query('query')
15
25
  end
16
26
 
17
- should 'return an open connection when passed a block' do
18
- connection = Impala.connect('host', 12345)
19
- assert_equal(Impala::Connection, connection.class)
20
- assert_equal(true, connection.open?)
27
+ assert_equal('result', ret)
28
+ end
29
+ end
30
+
31
+ describe Impala::Connection do
32
+ describe '#sanitize_query' do
33
+ before do
34
+ Impala::Connection.any_instance.stubs(:open)
35
+ @connection = Impala::Connection.new('test', 1234)
21
36
  end
22
37
 
23
- should 'return the results of the query when given a block with a query, and then close tho connection' do
24
- Impala::Connection.any_instance.stubs(:query => 'result')
25
- Impala::Connection.any_instance.expects(:close).once
38
+ it 'should downcase the command but nothing else' do
39
+ query = 'SELECT blah FROM Blah'
40
+ assert_equal('select blah FROM Blah', @connection.send(:sanitize_query, query))
41
+ end
26
42
 
27
- ret = Impala.connect('host', 12345) do |conn|
28
- conn.query('query')
29
- end
43
+ it 'should reject empty or invalid queries' do
44
+ assert_raises(Impala::InvalidQueryError) { @connection.send(:sanitize_query, '')}
45
+ assert_raises(Impala::InvalidQueryError) { @connection.send(:sanitize_query, 'HERRO herro herro')}
46
+ end
47
+ end
48
+
49
+ describe '#wait_for_result' do
50
+ before do
51
+ Impala::Connection.any_instance.stubs(:open)
52
+ @connection = Impala::Connection.new('test', 1234)
53
+ @service = stub(:get_state => nil)
54
+ @connection.instance_variable_set('@service', @service)
55
+ end
30
56
 
31
- assert_equal('result', ret)
57
+ it 'should close the handle if an exception is raised, and then re-raise' do
58
+ handle = stub()
59
+ @service.expects(:close).with(handle).once
60
+ @service.expects(:get_state).raises(StandardError)
61
+ assert_raises(StandardError) { @connection.send(:wait_for_result, handle) }
32
62
  end
33
63
  end
34
64
  end
@@ -0,0 +1,47 @@
1
+ require 'impala'
2
+ require 'minitest/autorun'
3
+ require 'mocha'
4
+
5
+ # these are tests that require an available Impala server. To run them,
6
+ # declare a IMPALA_SERVER env var, e.g. `IMPALA_SERVER=localhost:21000 rake`
7
+ IMPALA_SERVER = ENV['IMPALA_SERVER']
8
+
9
+ def connect
10
+ parts = IMPALA_SERVER.split(':')
11
+ if parts.length != 2 || parts.any? { |p| p.empty? }
12
+ raise "Invalid IMPALA_SERVER: #{IMPALA_SERVER}"
13
+ end
14
+
15
+ host, port = parts
16
+ Impala.connect(host, port)
17
+ end
18
+
19
+ describe 'connected tests' do
20
+ before do
21
+ skip unless IMPALA_SERVER
22
+ @connection = connect
23
+ end
24
+
25
+ it 'can successfully connect' do
26
+ assert_instance_of(Impala::Connection, @connection)
27
+ assert(@connection.open?, "the connection should be open")
28
+ end
29
+
30
+ it 'can run a basic query' do
31
+ ret = @connection.query('SELECT 1 AS a')
32
+ assert_equal([{:a=>1}], ret, "the result should be a list of hashes")
33
+ end
34
+
35
+ # TODO: this test sucks because there's no way to get multiple records
36
+ # with a literal select. perhaps there should be importable test data?
37
+ it 'can get a cursor and fetch one row at a time' do
38
+ cursor = @connection.execute('SELECT 1 AS a')
39
+ assert_instance_of(Impala::Cursor, cursor, "the result should be a cursor")
40
+
41
+ row = cursor.fetch_row
42
+ assert_equal({:a=>1}, row, "the row should be a hash")
43
+
44
+ assert_equal(false, cursor.has_more?, "has_more? should be false")
45
+ assert_nil(cursor.fetch_row, "subsequent calls to fetch_row should be nil")
46
+ end
47
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: impala
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-25 00:00:00.000000000 Z
12
+ date: 2013-01-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: thrift
@@ -76,37 +76,21 @@ dependencies:
76
76
  - !ruby/object:Gem::Version
77
77
  version: '0'
78
78
  - !ruby/object:Gem::Dependency
79
- name: test-unit
79
+ name: minitest
80
80
  requirement: !ruby/object:Gem::Requirement
81
81
  none: false
82
82
  requirements:
83
- - - ! '>='
84
- - !ruby/object:Gem::Version
85
- version: '0'
86
- type: :development
87
- prerelease: false
88
- version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
- requirements:
91
- - - ! '>='
92
- - !ruby/object:Gem::Version
93
- version: '0'
94
- - !ruby/object:Gem::Dependency
95
- name: shoulda
96
- requirement: !ruby/object:Gem::Requirement
97
- none: false
98
- requirements:
99
- - - ! '>='
83
+ - - ~>
100
84
  - !ruby/object:Gem::Version
101
- version: '0'
85
+ version: '4.5'
102
86
  type: :development
103
87
  prerelease: false
104
88
  version_requirements: !ruby/object:Gem::Requirement
105
89
  none: false
106
90
  requirements:
107
- - - ! '>='
91
+ - - ~>
108
92
  - !ruby/object:Gem::Version
109
- version: '0'
93
+ version: '4.5'
110
94
  - !ruby/object:Gem::Dependency
111
95
  name: mocha
112
96
  requirement: !ruby/object:Gem::Requirement
@@ -242,6 +226,7 @@ files:
242
226
  - lib/impala/protocol/types_types.rb
243
227
  - lib/impala/version.rb
244
228
  - test/test_impala.rb
229
+ - test/test_impala_connected.rb
245
230
  - thrift/Data.thrift
246
231
  - thrift/DataSinks.thrift
247
232
  - thrift/Descriptors.thrift
@@ -290,4 +275,5 @@ specification_version: 3
290
275
  summary: A ruby client for Cloudera's Impala
291
276
  test_files:
292
277
  - test/test_impala.rb
278
+ - test/test_impala_connected.rb
293
279
  has_rdoc: