hivemeta 0.0.6 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +4 -0
- data/lib/hivemeta/record.rb +2 -8
- data/lib/hivemeta/table.rb +10 -7
- metadata +3 -3
data/CHANGELOG
CHANGED
data/lib/hivemeta/record.rb
CHANGED
@@ -3,17 +3,13 @@ module HiveMeta
|
|
3
3
|
class FieldCountError < StandardError ; end
|
4
4
|
|
5
5
|
class Record
|
6
|
-
def initialize(line, table)
|
6
|
+
def initialize(line, table, opts = {})
|
7
7
|
@fields = line.chomp.split(table.delimiter, -1)
|
8
8
|
if @fields.size != table.columns.size
|
9
|
-
raise FieldCountError
|
9
|
+
raise FieldCountError if not opts[:ignore_field_count]
|
10
10
|
end
|
11
11
|
|
12
12
|
@table = table
|
13
|
-
#@columns = {}
|
14
|
-
# table.each_col_with_index do |col_name, i|
|
15
|
-
# #@columns[col_name.to_sym] = @fields[i]
|
16
|
-
# end
|
17
13
|
end
|
18
14
|
|
19
15
|
# allow for column access via column name as an index
|
@@ -23,7 +19,6 @@ module HiveMeta
|
|
23
19
|
# example: rec[7]
|
24
20
|
def [] index
|
25
21
|
return "#{@fields[index]}" if index.is_a? Integer
|
26
|
-
#"#{@columns[index.to_sym]}"
|
27
22
|
"#{@fields[@table.indexes[index.to_sym]]}"
|
28
23
|
end
|
29
24
|
|
@@ -31,7 +26,6 @@ module HiveMeta
|
|
31
26
|
# example: rec.col_name
|
32
27
|
def method_missing(id, *args)
|
33
28
|
return @fields[@table.indexes[id]] if @fields[@table.indexes[id]]
|
34
|
-
#return @columns[id] if @columns[id]
|
35
29
|
raise NoMethodError
|
36
30
|
end
|
37
31
|
end
|
data/lib/hivemeta/table.rb
CHANGED
@@ -41,27 +41,30 @@ module HiveMeta
|
|
41
41
|
|
42
42
|
# process a row and return a record that can be queried
|
43
43
|
# by column name in a variety of ways
|
44
|
-
def process_row(line)
|
44
|
+
def process_row(line, opts = {})
|
45
45
|
return nil if not line
|
46
46
|
if block_given?
|
47
|
-
yield Record.new(line, self)
|
47
|
+
yield Record.new(line, self, opts)
|
48
48
|
else
|
49
|
-
return Record.new(line, self)
|
49
|
+
return Record.new(line, self, opts)
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
53
|
# process all input (default to STDIN for Hadoop Streaming)
|
54
54
|
# via a provided block
|
55
|
-
def process(
|
55
|
+
def process(opts = {})
|
56
|
+
f = opts[:file] || STDIN
|
57
|
+
|
56
58
|
if not block_given?
|
57
|
-
return process_row
|
59
|
+
return process_row(f.readline, opts)
|
58
60
|
end
|
59
61
|
|
60
62
|
f.each_line do |line|
|
61
63
|
begin
|
62
|
-
process_row(line) {|row| yield row}
|
64
|
+
process_row(line, opts) {|row| yield row}
|
63
65
|
rescue HiveMeta::FieldCountError
|
64
|
-
warning
|
66
|
+
warning = opts[:field_count_warning]
|
67
|
+
warning ||= "reporter:counter:HiveMeta,FieldCountError,1"
|
65
68
|
STDERR.puts warning
|
66
69
|
next
|
67
70
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
|
9
|
-
version: 0.0.6
|
9
|
+
version: 0.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Frank Fejes
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-05-
|
17
|
+
date: 2011-05-21 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|