hivemeta 0.0.6 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +4 -0
- data/lib/hivemeta/record.rb +2 -8
- data/lib/hivemeta/table.rb +10 -7
- metadata +3 -3
data/CHANGELOG
CHANGED
data/lib/hivemeta/record.rb
CHANGED
@@ -3,17 +3,13 @@ module HiveMeta
|
|
3
3
|
class FieldCountError < StandardError ; end
|
4
4
|
|
5
5
|
class Record
|
6
|
-
def initialize(line, table)
|
6
|
+
def initialize(line, table, opts = {})
|
7
7
|
@fields = line.chomp.split(table.delimiter, -1)
|
8
8
|
if @fields.size != table.columns.size
|
9
|
-
raise FieldCountError
|
9
|
+
raise FieldCountError if not opts[:ignore_field_count]
|
10
10
|
end
|
11
11
|
|
12
12
|
@table = table
|
13
|
-
#@columns = {}
|
14
|
-
# table.each_col_with_index do |col_name, i|
|
15
|
-
# #@columns[col_name.to_sym] = @fields[i]
|
16
|
-
# end
|
17
13
|
end
|
18
14
|
|
19
15
|
# allow for column access via column name as an index
|
@@ -23,7 +19,6 @@ module HiveMeta
|
|
23
19
|
# example: rec[7]
|
24
20
|
def [] index
|
25
21
|
return "#{@fields[index]}" if index.is_a? Integer
|
26
|
-
#"#{@columns[index.to_sym]}"
|
27
22
|
"#{@fields[@table.indexes[index.to_sym]]}"
|
28
23
|
end
|
29
24
|
|
@@ -31,7 +26,6 @@ module HiveMeta
|
|
31
26
|
# example: rec.col_name
|
32
27
|
def method_missing(id, *args)
|
33
28
|
return @fields[@table.indexes[id]] if @fields[@table.indexes[id]]
|
34
|
-
#return @columns[id] if @columns[id]
|
35
29
|
raise NoMethodError
|
36
30
|
end
|
37
31
|
end
|
data/lib/hivemeta/table.rb
CHANGED
@@ -41,27 +41,30 @@ module HiveMeta
|
|
41
41
|
|
42
42
|
# process a row and return a record that can be queried
|
43
43
|
# by column name in a variety of ways
|
44
|
-
def process_row(line)
|
44
|
+
def process_row(line, opts = {})
|
45
45
|
return nil if not line
|
46
46
|
if block_given?
|
47
|
-
yield Record.new(line, self)
|
47
|
+
yield Record.new(line, self, opts)
|
48
48
|
else
|
49
|
-
return Record.new(line, self)
|
49
|
+
return Record.new(line, self, opts)
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
53
|
# process all input (default to STDIN for Hadoop Streaming)
|
54
54
|
# via a provided block
|
55
|
-
def process(
|
55
|
+
def process(opts = {})
|
56
|
+
f = opts[:file] || STDIN
|
57
|
+
|
56
58
|
if not block_given?
|
57
|
-
return process_row
|
59
|
+
return process_row(f.readline, opts)
|
58
60
|
end
|
59
61
|
|
60
62
|
f.each_line do |line|
|
61
63
|
begin
|
62
|
-
process_row(line) {|row| yield row}
|
64
|
+
process_row(line, opts) {|row| yield row}
|
63
65
|
rescue HiveMeta::FieldCountError
|
64
|
-
warning
|
66
|
+
warning = opts[:field_count_warning]
|
67
|
+
warning ||= "reporter:counter:HiveMeta,FieldCountError,1"
|
65
68
|
STDERR.puts warning
|
66
69
|
next
|
67
70
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
|
9
|
-
version: 0.0.6
|
9
|
+
version: 0.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Frank Fejes
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-05-
|
17
|
+
date: 2011-05-21 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|