rbhive 0.1.17 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,7 +71,7 @@ module RBHive
71
71
 
72
72
  def fetch(query)
73
73
  execute(query)
74
- ResultSet.new(client.fetchAll)
74
+ ResultSet.new(client.fetchAll, client.getSchema)
75
75
  end
76
76
 
77
77
  def fetch_in_batch(query, batch_size=100)
@@ -107,26 +107,4 @@ module RBHive
107
107
  client.send(meth, *args)
108
108
  end
109
109
  end
110
-
111
- class ResultSet < Array
112
- def initialize(rows)
113
- super(rows.map {|r| r.split("\t") })
114
- end
115
-
116
- def to_csv(out_file=nil)
117
- output(",", out_file)
118
- end
119
-
120
- def to_tsv(out_file=nil)
121
- output("\t", out_file)
122
- end
123
-
124
- private
125
-
126
- def output(sep, out_file)
127
- sv = self.map { |r| r.join(sep) }.join("\n")
128
- return sv if out_file.nil?
129
- File.open(out_file, 'w') { |f| f << sv }
130
- end
131
- end
132
110
  end
@@ -0,0 +1,33 @@
1
+ module RBHive
2
+ class ResultSet < Array
3
+ def initialize(rows, schema=[])
4
+ @schema = SchemaDefinition.new(schema, rows.first)
5
+ super(rows.map {|r| @schema.coerce_row(r) })
6
+ end
7
+
8
+ def column_names
9
+ @schema.column_names
10
+ end
11
+
12
+ def column_type_map
13
+ @schema.column_type_map
14
+ end
15
+
16
+ def to_csv(out_file=nil)
17
+ output(",", out_file)
18
+ end
19
+
20
+ def to_tsv(out_file=nil)
21
+ output("\t", out_file)
22
+ end
23
+
24
+ private
25
+
26
+ def output(sep, out_file)
27
+ rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
28
+ sv = rows.join("\n")
29
+ return sv if out_file.nil?
30
+ File.open(out_file, 'w+') { |f| f << sv }
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,60 @@
1
+ module RBHive
2
+ class SchemaDefinition
3
+ attr_reader :schema
4
+
5
+ TYPES = {
6
+ :boolean => :to_s,
7
+ :string => :to_s,
8
+ :bigint => :to_i,
9
+ :float => :to_f,
10
+ :double => :to_f,
11
+ :int => :to_i,
12
+ :smallint => :to_i,
13
+ :tinyint => :to_i
14
+ }
15
+
16
+ def initialize(schema, example_row)
17
+ @schema = schema
18
+ @example_row = example_row.split("\t")
19
+ end
20
+
21
+ def column_names
22
+ @column_names ||= begin
23
+ schema_names = @schema.fieldSchemas.map {|c| c.name.to_sym }
24
+ # Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries
25
+ # For now we will call them :_p1, :_p2, etc. to avoid collisions.
26
+ offset = 0
27
+ while schema_names.length < @example_row.length
28
+ schema_names.push(:"_p#{offset+=1}")
29
+ end
30
+ schema_names
31
+ end
32
+ end
33
+
34
+ def column_type_map
35
+ @column_type_map ||= column_names.inject({}) do |hsh, c|
36
+ definition = @schema.fieldSchemas.find {|s| s.name.to_sym == c }
37
+ # If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings
38
+ hsh[c] = definition ? definition.type.to_sym : :string
39
+ hsh
40
+ end
41
+ end
42
+
43
+ def coerce_row(row)
44
+ column_names.zip(row.split("\t")).inject({}) do |hsh, (column_name, value)|
45
+ hsh[column_name] = coerce_column(column_name, value)
46
+ hsh
47
+ end
48
+ end
49
+
50
+ def coerce_column(column_name, value)
51
+ type = column_type_map[column_name]
52
+ conversion_method = TYPES[type]
53
+ conversion_method ? value.send(conversion_method) : value
54
+ end
55
+
56
+ def coerce_row_to_array(row)
57
+ column_names.map { |n| row[n] }
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,88 @@
1
+ module RBHive
2
+ class TableSchema
3
+ attr_accessor :name
4
+ attr_reader :columns, :partitions
5
+ def initialize(name, comment=nil, options={}, &blk)
6
+ @name, @comment = name, comment
7
+ @location = options[:location] || nil
8
+ @field_sep = options[:field_sep] || "\t"
9
+ @line_sep = options[:line_sep] || "\n"
10
+ @collection_sep = options[:collection_sep] || "|"
11
+ @columns = []
12
+ @partitions = []
13
+ instance_eval(&blk) if blk
14
+ end
15
+
16
+ def column(name, type, comment=nil)
17
+ @columns << Column.new(name, type, comment)
18
+ end
19
+
20
+ def partition(name, type, comment=nil)
21
+ @partitions << Column.new(name, type, comment)
22
+ end
23
+
24
+ def create_table_statement()
25
+ %[CREATE #{external}TABLE #{table_statement}
26
+ ROW FORMAT DELIMITED
27
+ FIELDS TERMINATED BY '#{@field_sep}'
28
+ LINES TERMINATED BY '#{@line_sep}'
29
+ COLLECTION ITEMS TERMINATED BY '#{@collection_sep}'
30
+ STORED AS TEXTFILE
31
+ #{location}]
32
+ end
33
+
34
+ def replace_columns_statement
35
+ alter_columns_statement("REPLACE")
36
+ end
37
+
38
+ def add_columns_statement
39
+ alter_columns_statement("ADD")
40
+ end
41
+
42
+ def to_s
43
+ table_statement
44
+ end
45
+
46
+ private
47
+
48
+ def external
49
+ @location.nil? ? '' : 'EXTERNAL '
50
+ end
51
+
52
+ def table_statement
53
+ comment_string = (@comment.nil? ? '' : " COMMENT '#{@comment}'")
54
+ %[`#{@name}` #{column_statement}#{comment_string}\n#{partition_statement}]
55
+ end
56
+
57
+ def location
58
+ @location.nil? ? '' : "LOCATION '#{@location}'"
59
+ end
60
+
61
+ def alter_columns_statement(add_or_replace)
62
+ %[ALTER TABLE `#{name}` #{add_or_replace} COLUMNS #{column_statement}]
63
+ end
64
+
65
+ def column_statement
66
+ cols = @columns.join(",\n")
67
+ "(\n#{cols}\n)"
68
+ end
69
+
70
+ def partition_statement
71
+ return "" if @partitions.nil? || @partitions.empty?
72
+ cols = @partitions.join(",\n")
73
+ "PARTITIONED BY (\n#{cols}\n)"
74
+ end
75
+
76
+ class Column
77
+ attr_reader :name, :type, :comment
78
+ def initialize(name, type, comment=nil)
79
+ @name, @type, @comment = name, type, comment
80
+ end
81
+
82
+ def to_s
83
+ comment_string = @comment.nil? ? '' : " COMMENT '#{@comment}'"
84
+ "`#{@name}` #{@type.to_s.upcase}#{comment_string}"
85
+ end
86
+ end
87
+ end
88
+ end
data/lib/rbhive.rb CHANGED
@@ -1,2 +1,4 @@
1
1
  require File.join(File.dirname(__FILE__), 'rbhive', 'connection')
2
- require File.join(File.dirname(__FILE__), 'rbhive', 'schema')
2
+ require File.join(File.dirname(__FILE__), 'rbhive', 'table_schema')
3
+ require File.join(File.dirname(__FILE__), 'rbhive', 'result_set')
4
+ require File.join(File.dirname(__FILE__), 'rbhive', 'schema_definition')
metadata CHANGED
@@ -1,49 +1,39 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rbhive
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 1
8
- - 17
9
- version: 0.1.17
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Forward Internet Group
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2010-12-07 00:00:00 +00:00
12
+ date: 2010-12-07 00:00:00.000000000 +00:00
18
13
  default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
21
16
  name: thrift
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: &2153944980 !ruby/object:Gem::Requirement
24
18
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- - 4
31
- - 0
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
32
22
  version: 0.4.0
33
23
  type: :runtime
34
- version_requirements: *id001
24
+ prerelease: false
25
+ version_requirements: *2153944980
35
26
  description: Simple lib for executing Hive queries
36
27
  email: andy@forward.co.uk
37
28
  executables: []
38
-
39
29
  extensions: []
40
-
41
30
  extra_rdoc_files: []
42
-
43
- files:
31
+ files:
44
32
  - lib/rbhive.rb
45
33
  - lib/rbhive/connection.rb
46
- - lib/rbhive/schema.rb
34
+ - lib/rbhive/table_schema.rb
35
+ - lib/rbhive/result_set.rb
36
+ - lib/rbhive/schema_definition.rb
47
37
  - lib/thrift/facebook_service.rb
48
38
  - lib/thrift/fb303_constants.rb
49
39
  - lib/thrift/fb303_types.rb
@@ -62,34 +52,26 @@ files:
62
52
  has_rdoc: true
63
53
  homepage: http://github.com/trfficbroker/rbhive
64
54
  licenses: []
65
-
66
55
  post_install_message:
67
56
  rdoc_options: []
68
-
69
- require_paths:
57
+ require_paths:
70
58
  - lib
71
- required_ruby_version: !ruby/object:Gem::Requirement
59
+ required_ruby_version: !ruby/object:Gem::Requirement
72
60
  none: false
73
- requirements:
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- segments:
77
- - 0
78
- version: "0"
79
- required_rubygems_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
66
  none: false
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- segments:
85
- - 0
86
- version: "0"
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
87
71
  requirements: []
88
-
89
72
  rubyforge_project:
90
- rubygems_version: 1.3.7
73
+ rubygems_version: 1.6.2
91
74
  signing_key:
92
75
  specification_version: 3
93
76
  summary: Simple lib for executing Hive queries
94
77
  test_files: []
95
-
data/lib/rbhive/schema.rb DELETED
@@ -1,73 +0,0 @@
1
- class TableSchema
2
- attr_accessor :name
3
- attr_reader :columns, :partitions
4
- def initialize(name, comment=nil, field_sep='\t', line_sep='\n', &blk)
5
- @name, @comment, @field_sep, @line_sep = name, comment, field_sep, line_sep
6
- @columns = []
7
- @partitions = []
8
- instance_eval(&blk) if blk
9
- end
10
-
11
- def column(name, type, comment=nil)
12
- @columns << Column.new(name, type, comment)
13
- end
14
-
15
- def partition(name, type, comment=nil)
16
- @partitions << Column.new(name, type, comment)
17
- end
18
-
19
- def create_table_statement()
20
- %[CREATE TABLE #{table_statement}
21
- ROW FORMAT DELIMITED
22
- FIELDS TERMINATED BY '#{@field_sep}'
23
- LINES TERMINATED BY '#{@line_sep}'
24
- STORED AS TEXTFILE]
25
- end
26
-
27
- def replace_columns_statement
28
- alter_columns_statement("REPLACE")
29
- end
30
-
31
- def add_columns_statement
32
- alter_columns_statement("ADD")
33
- end
34
-
35
- def to_s
36
- table_statement
37
- end
38
-
39
- private
40
-
41
- def table_statement
42
- comment_string = (@comment.nil? ? '' : " COMMENT '#{@comment}'")
43
- %[`#{@name}` #{column_statement}#{comment_string}\n#{partition_statement}]
44
- end
45
-
46
- def alter_columns_statement(add_or_replace)
47
- %[ALTER TABLE `#{name}` #{add_or_replace} COLUMNS #{column_statement}]
48
- end
49
-
50
- def column_statement
51
- cols = @columns.join(",\n")
52
- "(\n#{cols}\n)"
53
- end
54
-
55
- def partition_statement
56
- return "" if @partitions.nil? || @partitions.empty?
57
-
58
- cols = @partitions.join(",\n")
59
- "PARTITIONED BY (\n#{cols}\n)"
60
- end
61
-
62
- class Column
63
- attr_reader :name, :type, :comment
64
- def initialize(name, type, comment=nil)
65
- @name, @type, @comment = name, type, comment
66
- end
67
-
68
- def to_s
69
- comment_string = @comment.nil? ? '' : " COMMENT '#{@comment}'"
70
- "`#{@name}` #{@type.to_s.upcase}#{comment_string}"
71
- end
72
- end
73
- end