rbhive 0.1.17 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbhive/connection.rb +1 -23
- data/lib/rbhive/result_set.rb +33 -0
- data/lib/rbhive/schema_definition.rb +60 -0
- data/lib/rbhive/table_schema.rb +88 -0
- data/lib/rbhive.rb +3 -1
- metadata +30 -48
- data/lib/rbhive/schema.rb +0 -73
data/lib/rbhive/connection.rb
CHANGED
@@ -71,7 +71,7 @@ module RBHive
|
|
71
71
|
|
72
72
|
def fetch(query)
|
73
73
|
execute(query)
|
74
|
-
ResultSet.new(client.fetchAll)
|
74
|
+
ResultSet.new(client.fetchAll, client.getSchema)
|
75
75
|
end
|
76
76
|
|
77
77
|
def fetch_in_batch(query, batch_size=100)
|
@@ -107,26 +107,4 @@ module RBHive
|
|
107
107
|
client.send(meth, *args)
|
108
108
|
end
|
109
109
|
end
|
110
|
-
|
111
|
-
class ResultSet < Array
|
112
|
-
def initialize(rows)
|
113
|
-
super(rows.map {|r| r.split("\t") })
|
114
|
-
end
|
115
|
-
|
116
|
-
def to_csv(out_file=nil)
|
117
|
-
output(",", out_file)
|
118
|
-
end
|
119
|
-
|
120
|
-
def to_tsv(out_file=nil)
|
121
|
-
output("\t", out_file)
|
122
|
-
end
|
123
|
-
|
124
|
-
private
|
125
|
-
|
126
|
-
def output(sep, out_file)
|
127
|
-
sv = self.map { |r| r.join(sep) }.join("\n")
|
128
|
-
return sv if out_file.nil?
|
129
|
-
File.open(out_file, 'w') { |f| f << sv }
|
130
|
-
end
|
131
|
-
end
|
132
110
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module RBHive
|
2
|
+
class ResultSet < Array
|
3
|
+
def initialize(rows, schema=[])
|
4
|
+
@schema = SchemaDefinition.new(schema, rows.first)
|
5
|
+
super(rows.map {|r| @schema.coerce_row(r) })
|
6
|
+
end
|
7
|
+
|
8
|
+
def column_names
|
9
|
+
@schema.column_names
|
10
|
+
end
|
11
|
+
|
12
|
+
def column_type_map
|
13
|
+
@schema.column_type_map
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_csv(out_file=nil)
|
17
|
+
output(",", out_file)
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_tsv(out_file=nil)
|
21
|
+
output("\t", out_file)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def output(sep, out_file)
|
27
|
+
rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
|
28
|
+
sv = rows.join("\n")
|
29
|
+
return sv if out_file.nil?
|
30
|
+
File.open(out_file, 'w+') { |f| f << sv }
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module RBHive
|
2
|
+
class SchemaDefinition
|
3
|
+
attr_reader :schema
|
4
|
+
|
5
|
+
TYPES = {
|
6
|
+
:boolean => :to_s,
|
7
|
+
:string => :to_s,
|
8
|
+
:bigint => :to_i,
|
9
|
+
:float => :to_f,
|
10
|
+
:double => :to_f,
|
11
|
+
:int => :to_i,
|
12
|
+
:smallint => :to_i,
|
13
|
+
:tinyint => :to_i
|
14
|
+
}
|
15
|
+
|
16
|
+
def initialize(schema, example_row)
|
17
|
+
@schema = schema
|
18
|
+
@example_row = example_row.split("\t")
|
19
|
+
end
|
20
|
+
|
21
|
+
def column_names
|
22
|
+
@column_names ||= begin
|
23
|
+
schema_names = @schema.fieldSchemas.map {|c| c.name.to_sym }
|
24
|
+
# Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries
|
25
|
+
# For now we will call them :_p1, :_p2, etc. to avoid collisions.
|
26
|
+
offset = 0
|
27
|
+
while schema_names.length < @example_row.length
|
28
|
+
schema_names.push(:"_p#{offset+=1}")
|
29
|
+
end
|
30
|
+
schema_names
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def column_type_map
|
35
|
+
@column_type_map ||= column_names.inject({}) do |hsh, c|
|
36
|
+
definition = @schema.fieldSchemas.find {|s| s.name.to_sym == c }
|
37
|
+
# If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings
|
38
|
+
hsh[c] = definition ? definition.type.to_sym : :string
|
39
|
+
hsh
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def coerce_row(row)
|
44
|
+
column_names.zip(row.split("\t")).inject({}) do |hsh, (column_name, value)|
|
45
|
+
hsh[column_name] = coerce_column(column_name, value)
|
46
|
+
hsh
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def coerce_column(column_name, value)
|
51
|
+
type = column_type_map[column_name]
|
52
|
+
conversion_method = TYPES[type]
|
53
|
+
conversion_method ? value.send(conversion_method) : value
|
54
|
+
end
|
55
|
+
|
56
|
+
def coerce_row_to_array(row)
|
57
|
+
column_names.map { |n| row[n] }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module RBHive
|
2
|
+
class TableSchema
|
3
|
+
attr_accessor :name
|
4
|
+
attr_reader :columns, :partitions
|
5
|
+
def initialize(name, comment=nil, options={}, &blk)
|
6
|
+
@name, @comment = name, comment
|
7
|
+
@location = options[:location] || nil
|
8
|
+
@field_sep = options[:field_sep] || "\t"
|
9
|
+
@line_sep = options[:line_sep] || "\n"
|
10
|
+
@collection_sep = options[:collection_sep] || "|"
|
11
|
+
@columns = []
|
12
|
+
@partitions = []
|
13
|
+
instance_eval(&blk) if blk
|
14
|
+
end
|
15
|
+
|
16
|
+
def column(name, type, comment=nil)
|
17
|
+
@columns << Column.new(name, type, comment)
|
18
|
+
end
|
19
|
+
|
20
|
+
def partition(name, type, comment=nil)
|
21
|
+
@partitions << Column.new(name, type, comment)
|
22
|
+
end
|
23
|
+
|
24
|
+
def create_table_statement()
|
25
|
+
%[CREATE #{external}TABLE #{table_statement}
|
26
|
+
ROW FORMAT DELIMITED
|
27
|
+
FIELDS TERMINATED BY '#{@field_sep}'
|
28
|
+
LINES TERMINATED BY '#{@line_sep}'
|
29
|
+
COLLECTION ITEMS TERMINATED BY '#{@collection_sep}'
|
30
|
+
STORED AS TEXTFILE
|
31
|
+
#{location}]
|
32
|
+
end
|
33
|
+
|
34
|
+
def replace_columns_statement
|
35
|
+
alter_columns_statement("REPLACE")
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_columns_statement
|
39
|
+
alter_columns_statement("ADD")
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
table_statement
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def external
|
49
|
+
@location.nil? ? '' : 'EXTERNAL '
|
50
|
+
end
|
51
|
+
|
52
|
+
def table_statement
|
53
|
+
comment_string = (@comment.nil? ? '' : " COMMENT '#{@comment}'")
|
54
|
+
%[`#{@name}` #{column_statement}#{comment_string}\n#{partition_statement}]
|
55
|
+
end
|
56
|
+
|
57
|
+
def location
|
58
|
+
@location.nil? ? '' : "LOCATION '#{@location}'"
|
59
|
+
end
|
60
|
+
|
61
|
+
def alter_columns_statement(add_or_replace)
|
62
|
+
%[ALTER TABLE `#{name}` #{add_or_replace} COLUMNS #{column_statement}]
|
63
|
+
end
|
64
|
+
|
65
|
+
def column_statement
|
66
|
+
cols = @columns.join(",\n")
|
67
|
+
"(\n#{cols}\n)"
|
68
|
+
end
|
69
|
+
|
70
|
+
def partition_statement
|
71
|
+
return "" if @partitions.nil? || @partitions.empty?
|
72
|
+
cols = @partitions.join(",\n")
|
73
|
+
"PARTITIONED BY (\n#{cols}\n)"
|
74
|
+
end
|
75
|
+
|
76
|
+
class Column
|
77
|
+
attr_reader :name, :type, :comment
|
78
|
+
def initialize(name, type, comment=nil)
|
79
|
+
@name, @type, @comment = name, type, comment
|
80
|
+
end
|
81
|
+
|
82
|
+
def to_s
|
83
|
+
comment_string = @comment.nil? ? '' : " COMMENT '#{@comment}'"
|
84
|
+
"`#{@name}` #{@type.to_s.upcase}#{comment_string}"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
data/lib/rbhive.rb
CHANGED
@@ -1,2 +1,4 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'rbhive', 'connection')
|
2
|
-
require File.join(File.dirname(__FILE__), 'rbhive', '
|
2
|
+
require File.join(File.dirname(__FILE__), 'rbhive', 'table_schema')
|
3
|
+
require File.join(File.dirname(__FILE__), 'rbhive', 'result_set')
|
4
|
+
require File.join(File.dirname(__FILE__), 'rbhive', 'schema_definition')
|
metadata
CHANGED
@@ -1,49 +1,39 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbhive
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 1
|
8
|
-
- 17
|
9
|
-
version: 0.1.17
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Forward Internet Group
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
date: 2010-12-07 00:00:00 +00:00
|
12
|
+
date: 2010-12-07 00:00:00.000000000 +00:00
|
18
13
|
default_executable:
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
21
16
|
name: thrift
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: &2153944980 !ruby/object:Gem::Requirement
|
24
18
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
- 4
|
31
|
-
- 0
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
32
22
|
version: 0.4.0
|
33
23
|
type: :runtime
|
34
|
-
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *2153944980
|
35
26
|
description: Simple lib for executing Hive queries
|
36
27
|
email: andy@forward.co.uk
|
37
28
|
executables: []
|
38
|
-
|
39
29
|
extensions: []
|
40
|
-
|
41
30
|
extra_rdoc_files: []
|
42
|
-
|
43
|
-
files:
|
31
|
+
files:
|
44
32
|
- lib/rbhive.rb
|
45
33
|
- lib/rbhive/connection.rb
|
46
|
-
- lib/rbhive/
|
34
|
+
- lib/rbhive/table_schema.rb
|
35
|
+
- lib/rbhive/result_set.rb
|
36
|
+
- lib/rbhive/schema_definition.rb
|
47
37
|
- lib/thrift/facebook_service.rb
|
48
38
|
- lib/thrift/fb303_constants.rb
|
49
39
|
- lib/thrift/fb303_types.rb
|
@@ -62,34 +52,26 @@ files:
|
|
62
52
|
has_rdoc: true
|
63
53
|
homepage: http://github.com/trfficbroker/rbhive
|
64
54
|
licenses: []
|
65
|
-
|
66
55
|
post_install_message:
|
67
56
|
rdoc_options: []
|
68
|
-
|
69
|
-
require_paths:
|
57
|
+
require_paths:
|
70
58
|
- lib
|
71
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
60
|
none: false
|
73
|
-
requirements:
|
74
|
-
- -
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
|
77
|
-
|
78
|
-
version: "0"
|
79
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ! '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
66
|
none: false
|
81
|
-
requirements:
|
82
|
-
- -
|
83
|
-
- !ruby/object:Gem::Version
|
84
|
-
|
85
|
-
- 0
|
86
|
-
version: "0"
|
67
|
+
requirements:
|
68
|
+
- - ! '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
87
71
|
requirements: []
|
88
|
-
|
89
72
|
rubyforge_project:
|
90
|
-
rubygems_version: 1.
|
73
|
+
rubygems_version: 1.6.2
|
91
74
|
signing_key:
|
92
75
|
specification_version: 3
|
93
76
|
summary: Simple lib for executing Hive queries
|
94
77
|
test_files: []
|
95
|
-
|
data/lib/rbhive/schema.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
class TableSchema
|
2
|
-
attr_accessor :name
|
3
|
-
attr_reader :columns, :partitions
|
4
|
-
def initialize(name, comment=nil, field_sep='\t', line_sep='\n', &blk)
|
5
|
-
@name, @comment, @field_sep, @line_sep = name, comment, field_sep, line_sep
|
6
|
-
@columns = []
|
7
|
-
@partitions = []
|
8
|
-
instance_eval(&blk) if blk
|
9
|
-
end
|
10
|
-
|
11
|
-
def column(name, type, comment=nil)
|
12
|
-
@columns << Column.new(name, type, comment)
|
13
|
-
end
|
14
|
-
|
15
|
-
def partition(name, type, comment=nil)
|
16
|
-
@partitions << Column.new(name, type, comment)
|
17
|
-
end
|
18
|
-
|
19
|
-
def create_table_statement()
|
20
|
-
%[CREATE TABLE #{table_statement}
|
21
|
-
ROW FORMAT DELIMITED
|
22
|
-
FIELDS TERMINATED BY '#{@field_sep}'
|
23
|
-
LINES TERMINATED BY '#{@line_sep}'
|
24
|
-
STORED AS TEXTFILE]
|
25
|
-
end
|
26
|
-
|
27
|
-
def replace_columns_statement
|
28
|
-
alter_columns_statement("REPLACE")
|
29
|
-
end
|
30
|
-
|
31
|
-
def add_columns_statement
|
32
|
-
alter_columns_statement("ADD")
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_s
|
36
|
-
table_statement
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def table_statement
|
42
|
-
comment_string = (@comment.nil? ? '' : " COMMENT '#{@comment}'")
|
43
|
-
%[`#{@name}` #{column_statement}#{comment_string}\n#{partition_statement}]
|
44
|
-
end
|
45
|
-
|
46
|
-
def alter_columns_statement(add_or_replace)
|
47
|
-
%[ALTER TABLE `#{name}` #{add_or_replace} COLUMNS #{column_statement}]
|
48
|
-
end
|
49
|
-
|
50
|
-
def column_statement
|
51
|
-
cols = @columns.join(",\n")
|
52
|
-
"(\n#{cols}\n)"
|
53
|
-
end
|
54
|
-
|
55
|
-
def partition_statement
|
56
|
-
return "" if @partitions.nil? || @partitions.empty?
|
57
|
-
|
58
|
-
cols = @partitions.join(",\n")
|
59
|
-
"PARTITIONED BY (\n#{cols}\n)"
|
60
|
-
end
|
61
|
-
|
62
|
-
class Column
|
63
|
-
attr_reader :name, :type, :comment
|
64
|
-
def initialize(name, type, comment=nil)
|
65
|
-
@name, @type, @comment = name, type, comment
|
66
|
-
end
|
67
|
-
|
68
|
-
def to_s
|
69
|
-
comment_string = @comment.nil? ? '' : " COMMENT '#{@comment}'"
|
70
|
-
"`#{@name}` #{@type.to_s.upcase}#{comment_string}"
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|