hivemeta 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,2 +1,7 @@
1
+ * 2011-05-04 - fsf
2
+ - pull delimiter from SERDE_PARAMS and only default to \t
3
+ - hivemeta_query displays delimiter info
4
+ - removed unused Rakefile
5
+
1
6
  * 2011-05-01 - fsf
2
7
  - initial import
data/README CHANGED
@@ -1,16 +1,22 @@
1
1
  hivemeta
2
2
 
3
3
  A ruby API for access to the Hive metastore. Useful for querying columns
4
- in map/reduce applications. Includes a demo application to spit out
4
+ in Hadoop map/reduce applications. Includes a demo application to spit out
5
5
  table information from the command-line via table name search or
6
6
  by the table's location in HDFS.
7
7
 
8
+ Installation
9
+
10
+ gem install hivemeta
11
+
12
+ Usage
13
+
8
14
  streaming map/reduce code snippet:
9
15
 
10
16
  require 'hivemeta'
11
17
 
12
18
  h = HiveMeta::Connection.new(...) # see sample-mapper.rb for detail
13
- inventory = h.table 'sample_inventory'
19
+ inv_table = h.table 'sample_inventory'
14
20
 
15
21
  STDIN.each_line do |line|
16
22
  begin
@@ -24,7 +30,6 @@ STDIN.each_line do |line|
24
30
  puts "#{item_id}\t#{count}" if count >= 1000
25
31
  end
26
32
 
27
-
28
33
  sample usage for the demo app:
29
34
 
30
35
  # query by table names
@@ -100,6 +100,15 @@ tables.each do |table|
100
100
  next if list_tables
101
101
  puts table.path
102
102
  next if list_paths
103
+
104
+ delim = table.delimiter
105
+ a = table.delimiter.ord
106
+ delim = '\\t' if a == 9
107
+ if (1..7).include? a
108
+ delim = "^#{('A'.ord + a - 1).chr}"
109
+ end
110
+ puts "delimiter: \"#{delim}\" (ASCII #{a})"
111
+
103
112
  tput_cols = `tput cols`.chomp.to_i rescue tput_cols = 0
104
113
 
105
114
  table.each_with_index do |col_name, i|
@@ -13,7 +13,7 @@ db_name = 'hivemeta'
13
13
  dbi_string = "DBI:Mysql:#{db_name}:#{db_host}"
14
14
  h = HiveMeta::Connection.new(dbi_string, db_user, db_pass)
15
15
 
16
- inventory = h.table 'sample_inventory'
16
+ inv_table = h.table 'sample_inventory'
17
17
 
18
18
  STDIN.each_line do |line|
19
19
  begin
@@ -64,9 +64,11 @@ module HiveMeta
64
64
  #p name
65
65
  table = Table.new(name)
66
66
 
67
- sql = "select c.INTEGER_IDX, c.column_name, c.COMMENT, s.LOCATION
67
+ sql = "select c.INTEGER_IDX, c.column_name, c.COMMENT,
68
+ s.LOCATION, s.SD_ID
68
69
  from TBLS t, COLUMNS c, SDS s
69
- where t.SD_ID = c.SD_ID and t.SD_ID = s.SD_ID and t.TBL_NAME = ?"
70
+ where t.SD_ID = c.SD_ID and t.SD_ID = s.SD_ID
71
+ and t.TBL_NAME = ?"
70
72
  query sql, name do |rec|
71
73
  #puts "REC:"
72
74
  #p rec
@@ -74,10 +76,24 @@ module HiveMeta
74
76
  col_name = rec[1]
75
77
  col_cmt = rec[2]
76
78
  tbl_loc = rec[3]
79
+ sd_id = rec[4]
77
80
  table.columns[col_idx] = col_name
78
81
  table.comments[col_idx] = col_cmt
79
- table.path = tbl_loc
82
+ table.path = tbl_loc
83
+
84
+ end
85
+
86
+ sql = "select sp.PARAM_VALUE
87
+ from SERDE_PARAMS sp, TBLS t
88
+ where t.SD_ID = sp.SERDE_ID
89
+ and PARAM_KEY = 'field.delim'
90
+ and t.TBL_NAME = ?"
91
+ results = query sql, name
92
+ if results and results[0] and results[0][0]
93
+ table.delimiter = results[0][0]
80
94
  end
95
+ #puts "#{name}: found delim '#{table.delimiter}'" if results[0]
96
+ #puts "#{name}: no delim" if not results[0]
81
97
 
82
98
  tables << table
83
99
  end
data/lib/hivemeta.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require 'hivemeta/connection'
2
2
 
3
3
  module HiveMeta
4
- VERSION = '0.0.1'
4
+ VERSION = '0.0.2'
5
5
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 1
9
- version: 0.0.1
8
+ - 2
9
+ version: 0.0.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Frank Fejes
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-05-01 00:00:00 -05:00
17
+ date: 2011-05-04 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies: []
20
20