rbhive 0.1.17 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbhive/connection.rb +1 -23
- data/lib/rbhive/result_set.rb +33 -0
- data/lib/rbhive/schema_definition.rb +60 -0
- data/lib/rbhive/table_schema.rb +88 -0
- data/lib/rbhive.rb +3 -1
- metadata +30 -48
- data/lib/rbhive/schema.rb +0 -73
data/lib/rbhive/connection.rb
CHANGED
@@ -71,7 +71,7 @@ module RBHive
|
|
71
71
|
|
72
72
|
def fetch(query)
|
73
73
|
execute(query)
|
74
|
-
ResultSet.new(client.fetchAll)
|
74
|
+
ResultSet.new(client.fetchAll, client.getSchema)
|
75
75
|
end
|
76
76
|
|
77
77
|
def fetch_in_batch(query, batch_size=100)
|
@@ -107,26 +107,4 @@ module RBHive
|
|
107
107
|
client.send(meth, *args)
|
108
108
|
end
|
109
109
|
end
|
110
|
-
|
111
|
-
class ResultSet < Array
|
112
|
-
def initialize(rows)
|
113
|
-
super(rows.map {|r| r.split("\t") })
|
114
|
-
end
|
115
|
-
|
116
|
-
def to_csv(out_file=nil)
|
117
|
-
output(",", out_file)
|
118
|
-
end
|
119
|
-
|
120
|
-
def to_tsv(out_file=nil)
|
121
|
-
output("\t", out_file)
|
122
|
-
end
|
123
|
-
|
124
|
-
private
|
125
|
-
|
126
|
-
def output(sep, out_file)
|
127
|
-
sv = self.map { |r| r.join(sep) }.join("\n")
|
128
|
-
return sv if out_file.nil?
|
129
|
-
File.open(out_file, 'w') { |f| f << sv }
|
130
|
-
end
|
131
|
-
end
|
132
110
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module RBHive
|
2
|
+
class ResultSet < Array
|
3
|
+
def initialize(rows, schema=[])
|
4
|
+
@schema = SchemaDefinition.new(schema, rows.first)
|
5
|
+
super(rows.map {|r| @schema.coerce_row(r) })
|
6
|
+
end
|
7
|
+
|
8
|
+
def column_names
|
9
|
+
@schema.column_names
|
10
|
+
end
|
11
|
+
|
12
|
+
def column_type_map
|
13
|
+
@schema.column_type_map
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_csv(out_file=nil)
|
17
|
+
output(",", out_file)
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_tsv(out_file=nil)
|
21
|
+
output("\t", out_file)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def output(sep, out_file)
|
27
|
+
rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
|
28
|
+
sv = rows.join("\n")
|
29
|
+
return sv if out_file.nil?
|
30
|
+
File.open(out_file, 'w+') { |f| f << sv }
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module RBHive
|
2
|
+
class SchemaDefinition
|
3
|
+
attr_reader :schema
|
4
|
+
|
5
|
+
TYPES = {
|
6
|
+
:boolean => :to_s,
|
7
|
+
:string => :to_s,
|
8
|
+
:bigint => :to_i,
|
9
|
+
:float => :to_f,
|
10
|
+
:double => :to_f,
|
11
|
+
:int => :to_i,
|
12
|
+
:smallint => :to_i,
|
13
|
+
:tinyint => :to_i
|
14
|
+
}
|
15
|
+
|
16
|
+
def initialize(schema, example_row)
|
17
|
+
@schema = schema
|
18
|
+
@example_row = example_row.split("\t")
|
19
|
+
end
|
20
|
+
|
21
|
+
def column_names
|
22
|
+
@column_names ||= begin
|
23
|
+
schema_names = @schema.fieldSchemas.map {|c| c.name.to_sym }
|
24
|
+
# Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries
|
25
|
+
# For now we will call them :_p1, :_p2, etc. to avoid collisions.
|
26
|
+
offset = 0
|
27
|
+
while schema_names.length < @example_row.length
|
28
|
+
schema_names.push(:"_p#{offset+=1}")
|
29
|
+
end
|
30
|
+
schema_names
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def column_type_map
|
35
|
+
@column_type_map ||= column_names.inject({}) do |hsh, c|
|
36
|
+
definition = @schema.fieldSchemas.find {|s| s.name.to_sym == c }
|
37
|
+
# If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings
|
38
|
+
hsh[c] = definition ? definition.type.to_sym : :string
|
39
|
+
hsh
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def coerce_row(row)
|
44
|
+
column_names.zip(row.split("\t")).inject({}) do |hsh, (column_name, value)|
|
45
|
+
hsh[column_name] = coerce_column(column_name, value)
|
46
|
+
hsh
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def coerce_column(column_name, value)
|
51
|
+
type = column_type_map[column_name]
|
52
|
+
conversion_method = TYPES[type]
|
53
|
+
conversion_method ? value.send(conversion_method) : value
|
54
|
+
end
|
55
|
+
|
56
|
+
def coerce_row_to_array(row)
|
57
|
+
column_names.map { |n| row[n] }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module RBHive
|
2
|
+
class TableSchema
|
3
|
+
attr_accessor :name
|
4
|
+
attr_reader :columns, :partitions
|
5
|
+
def initialize(name, comment=nil, options={}, &blk)
|
6
|
+
@name, @comment = name, comment
|
7
|
+
@location = options[:location] || nil
|
8
|
+
@field_sep = options[:field_sep] || "\t"
|
9
|
+
@line_sep = options[:line_sep] || "\n"
|
10
|
+
@collection_sep = options[:collection_sep] || "|"
|
11
|
+
@columns = []
|
12
|
+
@partitions = []
|
13
|
+
instance_eval(&blk) if blk
|
14
|
+
end
|
15
|
+
|
16
|
+
def column(name, type, comment=nil)
|
17
|
+
@columns << Column.new(name, type, comment)
|
18
|
+
end
|
19
|
+
|
20
|
+
def partition(name, type, comment=nil)
|
21
|
+
@partitions << Column.new(name, type, comment)
|
22
|
+
end
|
23
|
+
|
24
|
+
def create_table_statement()
|
25
|
+
%[CREATE #{external}TABLE #{table_statement}
|
26
|
+
ROW FORMAT DELIMITED
|
27
|
+
FIELDS TERMINATED BY '#{@field_sep}'
|
28
|
+
LINES TERMINATED BY '#{@line_sep}'
|
29
|
+
COLLECTION ITEMS TERMINATED BY '#{@collection_sep}'
|
30
|
+
STORED AS TEXTFILE
|
31
|
+
#{location}]
|
32
|
+
end
|
33
|
+
|
34
|
+
def replace_columns_statement
|
35
|
+
alter_columns_statement("REPLACE")
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_columns_statement
|
39
|
+
alter_columns_statement("ADD")
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
table_statement
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def external
|
49
|
+
@location.nil? ? '' : 'EXTERNAL '
|
50
|
+
end
|
51
|
+
|
52
|
+
def table_statement
|
53
|
+
comment_string = (@comment.nil? ? '' : " COMMENT '#{@comment}'")
|
54
|
+
%[`#{@name}` #{column_statement}#{comment_string}\n#{partition_statement}]
|
55
|
+
end
|
56
|
+
|
57
|
+
def location
|
58
|
+
@location.nil? ? '' : "LOCATION '#{@location}'"
|
59
|
+
end
|
60
|
+
|
61
|
+
def alter_columns_statement(add_or_replace)
|
62
|
+
%[ALTER TABLE `#{name}` #{add_or_replace} COLUMNS #{column_statement}]
|
63
|
+
end
|
64
|
+
|
65
|
+
def column_statement
|
66
|
+
cols = @columns.join(",\n")
|
67
|
+
"(\n#{cols}\n)"
|
68
|
+
end
|
69
|
+
|
70
|
+
def partition_statement
|
71
|
+
return "" if @partitions.nil? || @partitions.empty?
|
72
|
+
cols = @partitions.join(",\n")
|
73
|
+
"PARTITIONED BY (\n#{cols}\n)"
|
74
|
+
end
|
75
|
+
|
76
|
+
class Column
|
77
|
+
attr_reader :name, :type, :comment
|
78
|
+
def initialize(name, type, comment=nil)
|
79
|
+
@name, @type, @comment = name, type, comment
|
80
|
+
end
|
81
|
+
|
82
|
+
def to_s
|
83
|
+
comment_string = @comment.nil? ? '' : " COMMENT '#{@comment}'"
|
84
|
+
"`#{@name}` #{@type.to_s.upcase}#{comment_string}"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
data/lib/rbhive.rb
CHANGED
@@ -1,2 +1,4 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'rbhive', 'connection')
|
2
|
-
require File.join(File.dirname(__FILE__), 'rbhive', '
|
2
|
+
require File.join(File.dirname(__FILE__), 'rbhive', 'table_schema')
|
3
|
+
require File.join(File.dirname(__FILE__), 'rbhive', 'result_set')
|
4
|
+
require File.join(File.dirname(__FILE__), 'rbhive', 'schema_definition')
|
metadata
CHANGED
@@ -1,49 +1,39 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbhive
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 1
|
8
|
-
- 17
|
9
|
-
version: 0.1.17
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Forward Internet Group
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
date: 2010-12-07 00:00:00 +00:00
|
12
|
+
date: 2010-12-07 00:00:00.000000000 +00:00
|
18
13
|
default_executable:
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
21
16
|
name: thrift
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: &2153944980 !ruby/object:Gem::Requirement
|
24
18
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
- 4
|
31
|
-
- 0
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
32
22
|
version: 0.4.0
|
33
23
|
type: :runtime
|
34
|
-
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *2153944980
|
35
26
|
description: Simple lib for executing Hive queries
|
36
27
|
email: andy@forward.co.uk
|
37
28
|
executables: []
|
38
|
-
|
39
29
|
extensions: []
|
40
|
-
|
41
30
|
extra_rdoc_files: []
|
42
|
-
|
43
|
-
files:
|
31
|
+
files:
|
44
32
|
- lib/rbhive.rb
|
45
33
|
- lib/rbhive/connection.rb
|
46
|
-
- lib/rbhive/
|
34
|
+
- lib/rbhive/table_schema.rb
|
35
|
+
- lib/rbhive/result_set.rb
|
36
|
+
- lib/rbhive/schema_definition.rb
|
47
37
|
- lib/thrift/facebook_service.rb
|
48
38
|
- lib/thrift/fb303_constants.rb
|
49
39
|
- lib/thrift/fb303_types.rb
|
@@ -62,34 +52,26 @@ files:
|
|
62
52
|
has_rdoc: true
|
63
53
|
homepage: http://github.com/trfficbroker/rbhive
|
64
54
|
licenses: []
|
65
|
-
|
66
55
|
post_install_message:
|
67
56
|
rdoc_options: []
|
68
|
-
|
69
|
-
require_paths:
|
57
|
+
require_paths:
|
70
58
|
- lib
|
71
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
60
|
none: false
|
73
|
-
requirements:
|
74
|
-
- -
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
|
77
|
-
|
78
|
-
version: "0"
|
79
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ! '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
66
|
none: false
|
81
|
-
requirements:
|
82
|
-
- -
|
83
|
-
- !ruby/object:Gem::Version
|
84
|
-
|
85
|
-
- 0
|
86
|
-
version: "0"
|
67
|
+
requirements:
|
68
|
+
- - ! '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
87
71
|
requirements: []
|
88
|
-
|
89
72
|
rubyforge_project:
|
90
|
-
rubygems_version: 1.
|
73
|
+
rubygems_version: 1.6.2
|
91
74
|
signing_key:
|
92
75
|
specification_version: 3
|
93
76
|
summary: Simple lib for executing Hive queries
|
94
77
|
test_files: []
|
95
|
-
|
data/lib/rbhive/schema.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
class TableSchema
|
2
|
-
attr_accessor :name
|
3
|
-
attr_reader :columns, :partitions
|
4
|
-
def initialize(name, comment=nil, field_sep='\t', line_sep='\n', &blk)
|
5
|
-
@name, @comment, @field_sep, @line_sep = name, comment, field_sep, line_sep
|
6
|
-
@columns = []
|
7
|
-
@partitions = []
|
8
|
-
instance_eval(&blk) if blk
|
9
|
-
end
|
10
|
-
|
11
|
-
def column(name, type, comment=nil)
|
12
|
-
@columns << Column.new(name, type, comment)
|
13
|
-
end
|
14
|
-
|
15
|
-
def partition(name, type, comment=nil)
|
16
|
-
@partitions << Column.new(name, type, comment)
|
17
|
-
end
|
18
|
-
|
19
|
-
def create_table_statement()
|
20
|
-
%[CREATE TABLE #{table_statement}
|
21
|
-
ROW FORMAT DELIMITED
|
22
|
-
FIELDS TERMINATED BY '#{@field_sep}'
|
23
|
-
LINES TERMINATED BY '#{@line_sep}'
|
24
|
-
STORED AS TEXTFILE]
|
25
|
-
end
|
26
|
-
|
27
|
-
def replace_columns_statement
|
28
|
-
alter_columns_statement("REPLACE")
|
29
|
-
end
|
30
|
-
|
31
|
-
def add_columns_statement
|
32
|
-
alter_columns_statement("ADD")
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_s
|
36
|
-
table_statement
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def table_statement
|
42
|
-
comment_string = (@comment.nil? ? '' : " COMMENT '#{@comment}'")
|
43
|
-
%[`#{@name}` #{column_statement}#{comment_string}\n#{partition_statement}]
|
44
|
-
end
|
45
|
-
|
46
|
-
def alter_columns_statement(add_or_replace)
|
47
|
-
%[ALTER TABLE `#{name}` #{add_or_replace} COLUMNS #{column_statement}]
|
48
|
-
end
|
49
|
-
|
50
|
-
def column_statement
|
51
|
-
cols = @columns.join(",\n")
|
52
|
-
"(\n#{cols}\n)"
|
53
|
-
end
|
54
|
-
|
55
|
-
def partition_statement
|
56
|
-
return "" if @partitions.nil? || @partitions.empty?
|
57
|
-
|
58
|
-
cols = @partitions.join(",\n")
|
59
|
-
"PARTITIONED BY (\n#{cols}\n)"
|
60
|
-
end
|
61
|
-
|
62
|
-
class Column
|
63
|
-
attr_reader :name, :type, :comment
|
64
|
-
def initialize(name, type, comment=nil)
|
65
|
-
@name, @type, @comment = name, type, comment
|
66
|
-
end
|
67
|
-
|
68
|
-
def to_s
|
69
|
-
comment_string = @comment.nil? ? '' : " COMMENT '#{@comment}'"
|
70
|
-
"`#{@name}` #{@type.to_s.upcase}#{comment_string}"
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|