rbhive 0.1.17 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -71,7 +71,7 @@ module RBHive
71
71
 
72
72
  def fetch(query)
73
73
  execute(query)
74
- ResultSet.new(client.fetchAll)
74
+ ResultSet.new(client.fetchAll, client.getSchema)
75
75
  end
76
76
 
77
77
  def fetch_in_batch(query, batch_size=100)
@@ -107,26 +107,4 @@ module RBHive
107
107
  client.send(meth, *args)
108
108
  end
109
109
  end
110
-
111
- class ResultSet < Array
112
- def initialize(rows)
113
- super(rows.map {|r| r.split("\t") })
114
- end
115
-
116
- def to_csv(out_file=nil)
117
- output(",", out_file)
118
- end
119
-
120
- def to_tsv(out_file=nil)
121
- output("\t", out_file)
122
- end
123
-
124
- private
125
-
126
- def output(sep, out_file)
127
- sv = self.map { |r| r.join(sep) }.join("\n")
128
- return sv if out_file.nil?
129
- File.open(out_file, 'w') { |f| f << sv }
130
- end
131
- end
132
110
  end
@@ -0,0 +1,33 @@
1
+ module RBHive
2
+ class ResultSet < Array
3
+ def initialize(rows, schema=[])
4
+ @schema = SchemaDefinition.new(schema, rows.first)
5
+ super(rows.map {|r| @schema.coerce_row(r) })
6
+ end
7
+
8
+ def column_names
9
+ @schema.column_names
10
+ end
11
+
12
+ def column_type_map
13
+ @schema.column_type_map
14
+ end
15
+
16
+ def to_csv(out_file=nil)
17
+ output(",", out_file)
18
+ end
19
+
20
+ def to_tsv(out_file=nil)
21
+ output("\t", out_file)
22
+ end
23
+
24
+ private
25
+
26
+ def output(sep, out_file)
27
+ rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
28
+ sv = rows.join("\n")
29
+ return sv if out_file.nil?
30
+ File.open(out_file, 'w+') { |f| f << sv }
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,60 @@
1
+ module RBHive
2
+ class SchemaDefinition
3
+ attr_reader :schema
4
+
5
+ TYPES = {
6
+ :boolean => :to_s,
7
+ :string => :to_s,
8
+ :bigint => :to_i,
9
+ :float => :to_f,
10
+ :double => :to_f,
11
+ :int => :to_i,
12
+ :smallint => :to_i,
13
+ :tinyint => :to_i
14
+ }
15
+
16
+ def initialize(schema, example_row)
17
+ @schema = schema
18
+ @example_row = example_row.split("\t")
19
+ end
20
+
21
+ def column_names
22
+ @column_names ||= begin
23
+ schema_names = @schema.fieldSchemas.map {|c| c.name.to_sym }
24
+ # Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries
25
+ # For now we will call them :_p1, :_p2, etc. to avoid collisions.
26
+ offset = 0
27
+ while schema_names.length < @example_row.length
28
+ schema_names.push(:"_p#{offset+=1}")
29
+ end
30
+ schema_names
31
+ end
32
+ end
33
+
34
+ def column_type_map
35
+ @column_type_map ||= column_names.inject({}) do |hsh, c|
36
+ definition = @schema.fieldSchemas.find {|s| s.name.to_sym == c }
37
+ # If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings
38
+ hsh[c] = definition ? definition.type.to_sym : :string
39
+ hsh
40
+ end
41
+ end
42
+
43
+ def coerce_row(row)
44
+ column_names.zip(row.split("\t")).inject({}) do |hsh, (column_name, value)|
45
+ hsh[column_name] = coerce_column(column_name, value)
46
+ hsh
47
+ end
48
+ end
49
+
50
+ def coerce_column(column_name, value)
51
+ type = column_type_map[column_name]
52
+ conversion_method = TYPES[type]
53
+ conversion_method ? value.send(conversion_method) : value
54
+ end
55
+
56
+ def coerce_row_to_array(row)
57
+ column_names.map { |n| row[n] }
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,88 @@
1
+ module RBHive
2
+ class TableSchema
3
+ attr_accessor :name
4
+ attr_reader :columns, :partitions
5
+ def initialize(name, comment=nil, options={}, &blk)
6
+ @name, @comment = name, comment
7
+ @location = options[:location] || nil
8
+ @field_sep = options[:field_sep] || "\t"
9
+ @line_sep = options[:line_sep] || "\n"
10
+ @collection_sep = options[:collection_sep] || "|"
11
+ @columns = []
12
+ @partitions = []
13
+ instance_eval(&blk) if blk
14
+ end
15
+
16
+ def column(name, type, comment=nil)
17
+ @columns << Column.new(name, type, comment)
18
+ end
19
+
20
+ def partition(name, type, comment=nil)
21
+ @partitions << Column.new(name, type, comment)
22
+ end
23
+
24
+ def create_table_statement()
25
+ %[CREATE #{external}TABLE #{table_statement}
26
+ ROW FORMAT DELIMITED
27
+ FIELDS TERMINATED BY '#{@field_sep}'
28
+ LINES TERMINATED BY '#{@line_sep}'
29
+ COLLECTION ITEMS TERMINATED BY '#{@collection_sep}'
30
+ STORED AS TEXTFILE
31
+ #{location}]
32
+ end
33
+
34
+ def replace_columns_statement
35
+ alter_columns_statement("REPLACE")
36
+ end
37
+
38
+ def add_columns_statement
39
+ alter_columns_statement("ADD")
40
+ end
41
+
42
+ def to_s
43
+ table_statement
44
+ end
45
+
46
+ private
47
+
48
+ def external
49
+ @location.nil? ? '' : 'EXTERNAL '
50
+ end
51
+
52
+ def table_statement
53
+ comment_string = (@comment.nil? ? '' : " COMMENT '#{@comment}'")
54
+ %[`#{@name}` #{column_statement}#{comment_string}\n#{partition_statement}]
55
+ end
56
+
57
+ def location
58
+ @location.nil? ? '' : "LOCATION '#{@location}'"
59
+ end
60
+
61
+ def alter_columns_statement(add_or_replace)
62
+ %[ALTER TABLE `#{name}` #{add_or_replace} COLUMNS #{column_statement}]
63
+ end
64
+
65
+ def column_statement
66
+ cols = @columns.join(",\n")
67
+ "(\n#{cols}\n)"
68
+ end
69
+
70
+ def partition_statement
71
+ return "" if @partitions.nil? || @partitions.empty?
72
+ cols = @partitions.join(",\n")
73
+ "PARTITIONED BY (\n#{cols}\n)"
74
+ end
75
+
76
+ class Column
77
+ attr_reader :name, :type, :comment
78
+ def initialize(name, type, comment=nil)
79
+ @name, @type, @comment = name, type, comment
80
+ end
81
+
82
+ def to_s
83
+ comment_string = @comment.nil? ? '' : " COMMENT '#{@comment}'"
84
+ "`#{@name}` #{@type.to_s.upcase}#{comment_string}"
85
+ end
86
+ end
87
+ end
88
+ end
data/lib/rbhive.rb CHANGED
@@ -1,2 +1,4 @@
1
1
  require File.join(File.dirname(__FILE__), 'rbhive', 'connection')
2
- require File.join(File.dirname(__FILE__), 'rbhive', 'schema')
2
+ require File.join(File.dirname(__FILE__), 'rbhive', 'table_schema')
3
+ require File.join(File.dirname(__FILE__), 'rbhive', 'result_set')
4
+ require File.join(File.dirname(__FILE__), 'rbhive', 'schema_definition')
metadata CHANGED
@@ -1,49 +1,39 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rbhive
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 1
8
- - 17
9
- version: 0.1.17
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Forward Internet Group
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2010-12-07 00:00:00 +00:00
12
+ date: 2010-12-07 00:00:00.000000000 +00:00
18
13
  default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
21
16
  name: thrift
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: &2153944980 !ruby/object:Gem::Requirement
24
18
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- - 4
31
- - 0
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
32
22
  version: 0.4.0
33
23
  type: :runtime
34
- version_requirements: *id001
24
+ prerelease: false
25
+ version_requirements: *2153944980
35
26
  description: Simple lib for executing Hive queries
36
27
  email: andy@forward.co.uk
37
28
  executables: []
38
-
39
29
  extensions: []
40
-
41
30
  extra_rdoc_files: []
42
-
43
- files:
31
+ files:
44
32
  - lib/rbhive.rb
45
33
  - lib/rbhive/connection.rb
46
- - lib/rbhive/schema.rb
34
+ - lib/rbhive/table_schema.rb
35
+ - lib/rbhive/result_set.rb
36
+ - lib/rbhive/schema_definition.rb
47
37
  - lib/thrift/facebook_service.rb
48
38
  - lib/thrift/fb303_constants.rb
49
39
  - lib/thrift/fb303_types.rb
@@ -62,34 +52,26 @@ files:
62
52
  has_rdoc: true
63
53
  homepage: http://github.com/trfficbroker/rbhive
64
54
  licenses: []
65
-
66
55
  post_install_message:
67
56
  rdoc_options: []
68
-
69
- require_paths:
57
+ require_paths:
70
58
  - lib
71
- required_ruby_version: !ruby/object:Gem::Requirement
59
+ required_ruby_version: !ruby/object:Gem::Requirement
72
60
  none: false
73
- requirements:
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- segments:
77
- - 0
78
- version: "0"
79
- required_rubygems_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
66
  none: false
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- segments:
85
- - 0
86
- version: "0"
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
87
71
  requirements: []
88
-
89
72
  rubyforge_project:
90
- rubygems_version: 1.3.7
73
+ rubygems_version: 1.6.2
91
74
  signing_key:
92
75
  specification_version: 3
93
76
  summary: Simple lib for executing Hive queries
94
77
  test_files: []
95
-
data/lib/rbhive/schema.rb DELETED
@@ -1,73 +0,0 @@
1
- class TableSchema
2
- attr_accessor :name
3
- attr_reader :columns, :partitions
4
- def initialize(name, comment=nil, field_sep='\t', line_sep='\n', &blk)
5
- @name, @comment, @field_sep, @line_sep = name, comment, field_sep, line_sep
6
- @columns = []
7
- @partitions = []
8
- instance_eval(&blk) if blk
9
- end
10
-
11
- def column(name, type, comment=nil)
12
- @columns << Column.new(name, type, comment)
13
- end
14
-
15
- def partition(name, type, comment=nil)
16
- @partitions << Column.new(name, type, comment)
17
- end
18
-
19
- def create_table_statement()
20
- %[CREATE TABLE #{table_statement}
21
- ROW FORMAT DELIMITED
22
- FIELDS TERMINATED BY '#{@field_sep}'
23
- LINES TERMINATED BY '#{@line_sep}'
24
- STORED AS TEXTFILE]
25
- end
26
-
27
- def replace_columns_statement
28
- alter_columns_statement("REPLACE")
29
- end
30
-
31
- def add_columns_statement
32
- alter_columns_statement("ADD")
33
- end
34
-
35
- def to_s
36
- table_statement
37
- end
38
-
39
- private
40
-
41
- def table_statement
42
- comment_string = (@comment.nil? ? '' : " COMMENT '#{@comment}'")
43
- %[`#{@name}` #{column_statement}#{comment_string}\n#{partition_statement}]
44
- end
45
-
46
- def alter_columns_statement(add_or_replace)
47
- %[ALTER TABLE `#{name}` #{add_or_replace} COLUMNS #{column_statement}]
48
- end
49
-
50
- def column_statement
51
- cols = @columns.join(",\n")
52
- "(\n#{cols}\n)"
53
- end
54
-
55
- def partition_statement
56
- return "" if @partitions.nil? || @partitions.empty?
57
-
58
- cols = @partitions.join(",\n")
59
- "PARTITIONED BY (\n#{cols}\n)"
60
- end
61
-
62
- class Column
63
- attr_reader :name, :type, :comment
64
- def initialize(name, type, comment=nil)
65
- @name, @type, @comment = name, type, comment
66
- end
67
-
68
- def to_s
69
- comment_string = @comment.nil? ? '' : " COMMENT '#{@comment}'"
70
- "`#{@name}` #{@type.to_s.upcase}#{comment_string}"
71
- end
72
- end
73
- end