rbhive 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -37,6 +37,7 @@ module RBHive
37
37
  @client = ThriftHive::Client.new(@protocol)
38
38
  @logger = logger
39
39
  @logger.info("Connecting to #{server} on port #{port}")
40
+ @mutex = Mutex.new
40
41
  end
41
42
 
42
43
  def open
@@ -52,8 +53,7 @@ module RBHive
52
53
  end
53
54
 
54
55
  def execute(query)
55
- @logger.info("Executing Hive Query: #{query}")
56
- client.execute(query)
56
+ execute_safe(query)
57
57
  end
58
58
 
59
59
  def priority=(priority)
@@ -70,21 +70,31 @@ module RBHive
70
70
  end
71
71
 
72
72
  def fetch(query)
73
- execute(query)
74
- ResultSet.new(client.fetchAll, client.getSchema)
73
+ safe do
74
+ execute_unsafe(query)
75
+ rows = client.fetchAll
76
+ schema = SchemaDefinition.new(client.getSchema, rows.first)
77
+ ResultSet.new(rows, schema)
78
+ end
75
79
  end
76
80
 
77
- def fetch_in_batch(query, batch_size=100)
78
- execute(query)
79
- schema = client.getSchema
80
- until (next_batch = client.fetchN(batch_size)).empty?
81
- yield ResultSet.new(next_batch, schema)
81
+ def fetch_in_batch(query, batch_size=1_000)
82
+ safe do
83
+ execute_unsafe(query)
84
+ until (next_batch = client.fetchN(batch_size)).empty?
85
+ schema ||= SchemaDefinition.new(client.getSchema, next_batch.first)
86
+ yield ResultSet.new(next_batch, schema)
87
+ end
82
88
  end
83
89
  end
84
90
 
85
91
  def first(query)
86
- execute(query)
87
- ResultSet.new([client.fetchOne], client.getSchema)
92
+ safe do
93
+ execute_unsafe(query)
94
+ row = client.fetchOne
95
+ schema = SchemaDefinition.new(client.getSchema, row)
96
+ ResultSet.new([row], schema).first
97
+ end
88
98
  end
89
99
 
90
100
  def create_table(schema)
@@ -107,5 +117,22 @@ module RBHive
107
117
  def method_missing(meth, *args)
108
118
  client.send(meth, *args)
109
119
  end
120
+
121
+ private
122
+
123
+ def execute_safe(query)
124
+ safe { execute_unsafe(query) }
125
+ end
126
+
127
+ def execute_unsafe(query)
128
+ @logger.info("Executing Hive Query: #{query}")
129
+ client.execute(query)
130
+ end
131
+
132
+ def safe
133
+ ret = nil
134
+ @mutex.synchronize { ret = yield }
135
+ ret
136
+ end
110
137
  end
111
138
  end
@@ -1,7 +1,7 @@
1
1
  module RBHive
2
2
  class ResultSet < Array
3
- def initialize(rows, schema=[])
4
- @schema = SchemaDefinition.new(schema, rows.first)
3
+ def initialize(rows, schema)
4
+ @schema = schema
5
5
  super(rows.map {|r| @schema.coerce_row(r) })
6
6
  end
7
7
 
@@ -14,16 +14,16 @@ module RBHive
14
14
  end
15
15
 
16
16
  def to_csv(out_file=nil)
17
- output(",", out_file)
17
+ to_seperated_output(",", out_file)
18
18
  end
19
19
 
20
20
  def to_tsv(out_file=nil)
21
- output("\t", out_file)
21
+ to_seperated_output("\t", out_file)
22
22
  end
23
23
 
24
24
  private
25
25
 
26
- def output(sep, out_file)
26
+ def to_seperated_output(sep, out_file)
27
27
  rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
28
28
  sv = rows.join("\n")
29
29
  return sv if out_file.nil?
@@ -3,19 +3,19 @@ module RBHive
3
3
  attr_reader :schema
4
4
 
5
5
  TYPES = {
6
- :boolean => :to_s,
7
- :string => :to_s,
8
- :bigint => :to_i,
9
- :float => :to_f,
10
- :double => :to_f,
11
- :int => :to_i,
12
- :smallint => :to_i,
13
- :tinyint => :to_i
6
+ :boolean => :to_s,
7
+ :string => :to_s,
8
+ :bigint => :to_i,
9
+ :float => :to_f,
10
+ :double => :to_f,
11
+ :int => :to_i,
12
+ :smallint => :to_i,
13
+ :tinyint => :to_i,
14
14
  }
15
15
 
16
16
  def initialize(schema, example_row)
17
17
  @schema = schema
18
- @example_row = example_row.split("\t")
18
+ @example_row = example_row ? example_row.split("\t") : []
19
19
  end
20
20
 
21
21
  def column_names
metadata CHANGED
@@ -1,34 +1,48 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rbhive
3
- version: !ruby/object:Gem::Version
4
- version: 0.2.2
3
+ version: !ruby/object:Gem::Version
4
+ hash: 17
5
5
  prerelease:
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 3
10
+ version: 0.2.3
6
11
  platform: ruby
7
- authors:
12
+ authors:
8
13
  - Forward Internet Group
9
14
  autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
- date: 2010-12-07 00:00:00.000000000 +00:00
17
+
18
+ date: 2010-12-07 00:00:00 +00:00
13
19
  default_executable:
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
16
22
  name: thrift
17
- requirement: &2160558040 !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
18
25
  none: false
19
- requirements:
20
- - - ! '>='
21
- - !ruby/object:Gem::Version
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 15
30
+ segments:
31
+ - 0
32
+ - 4
33
+ - 0
22
34
  version: 0.4.0
23
35
  type: :runtime
24
- prerelease: false
25
- version_requirements: *2160558040
36
+ version_requirements: *id001
26
37
  description: Simple lib for executing Hive queries
27
38
  email: andy@forward.co.uk
28
39
  executables: []
40
+
29
41
  extensions: []
42
+
30
43
  extra_rdoc_files: []
31
- files:
44
+
45
+ files:
32
46
  - lib/rbhive.rb
33
47
  - lib/rbhive/connection.rb
34
48
  - lib/rbhive/table_schema.rb
@@ -52,26 +66,36 @@ files:
52
66
  has_rdoc: true
53
67
  homepage: http://github.com/trfficbroker/rbhive
54
68
  licenses: []
69
+
55
70
  post_install_message:
56
71
  rdoc_options: []
57
- require_paths:
72
+
73
+ require_paths:
58
74
  - lib
59
- required_ruby_version: !ruby/object:Gem::Requirement
75
+ required_ruby_version: !ruby/object:Gem::Requirement
60
76
  none: false
61
- requirements:
62
- - - ! '>='
63
- - !ruby/object:Gem::Version
64
- version: '0'
65
- required_rubygems_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ hash: 3
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
85
  none: false
67
- requirements:
68
- - - ! '>='
69
- - !ruby/object:Gem::Version
70
- version: '0'
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ hash: 3
90
+ segments:
91
+ - 0
92
+ version: "0"
71
93
  requirements: []
94
+
72
95
  rubyforge_project:
73
96
  rubygems_version: 1.6.2
74
97
  signing_key:
75
98
  specification_version: 3
76
99
  summary: Simple lib for executing Hive queries
77
100
  test_files: []
101
+