hivemeta 0.0.6 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ * 2011-05-21 - fsf
2
+ - new: added table.process* opts hash
3
+ - new: added :ignore_field_count, :field_count_warning, and :file opts
4
+
1
5
  * 2011-05-19 - fsf
2
6
  - thank you ruby-prof!
3
7
  - perf: 4x+ faster ... now basically on par with manual split into array
@@ -3,17 +3,13 @@ module HiveMeta
3
3
  class FieldCountError < StandardError ; end
4
4
 
5
5
  class Record
6
- def initialize(line, table)
6
+ def initialize(line, table, opts = {})
7
7
  @fields = line.chomp.split(table.delimiter, -1)
8
8
  if @fields.size != table.columns.size
9
- raise FieldCountError
9
+ raise FieldCountError if not opts[:ignore_field_count]
10
10
  end
11
11
 
12
12
  @table = table
13
- #@columns = {}
14
- # table.each_col_with_index do |col_name, i|
15
- # #@columns[col_name.to_sym] = @fields[i]
16
- # end
17
13
  end
18
14
 
19
15
  # allow for column access via column name as an index
@@ -23,7 +19,6 @@ module HiveMeta
23
19
  # example: rec[7]
24
20
  def [] index
25
21
  return "#{@fields[index]}" if index.is_a? Integer
26
- #"#{@columns[index.to_sym]}"
27
22
  "#{@fields[@table.indexes[index.to_sym]]}"
28
23
  end
29
24
 
@@ -31,7 +26,6 @@ module HiveMeta
31
26
  # example: rec.col_name
32
27
  def method_missing(id, *args)
33
28
  return @fields[@table.indexes[id]] if @fields[@table.indexes[id]]
34
- #return @columns[id] if @columns[id]
35
29
  raise NoMethodError
36
30
  end
37
31
  end
@@ -41,27 +41,30 @@ module HiveMeta
41
41
 
42
42
  # process a row and return a record that can be queried
43
43
  # by column name in a variety of ways
44
- def process_row(line)
44
+ def process_row(line, opts = {})
45
45
  return nil if not line
46
46
  if block_given?
47
- yield Record.new(line, self)
47
+ yield Record.new(line, self, opts)
48
48
  else
49
- return Record.new(line, self)
49
+ return Record.new(line, self, opts)
50
50
  end
51
51
  end
52
52
 
53
53
  # process all input (default to STDIN for Hadoop Streaming)
54
54
  # via a provided block
55
- def process(f = STDIN, warning = nil)
55
+ def process(opts = {})
56
+ f = opts[:file] || STDIN
57
+
56
58
  if not block_given?
57
- return process_row f.readline
59
+ return process_row(f.readline, opts)
58
60
  end
59
61
 
60
62
  f.each_line do |line|
61
63
  begin
62
- process_row(line) {|row| yield row}
64
+ process_row(line, opts) {|row| yield row}
63
65
  rescue HiveMeta::FieldCountError
64
- warning ||= "reporter:counter:bad_data,row_size,1"
66
+ warning = opts[:field_count_warning]
67
+ warning ||= "reporter:counter:HiveMeta,FieldCountError,1"
65
68
  STDERR.puts warning
66
69
  next
67
70
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
+ - 1
7
8
  - 0
8
- - 6
9
- version: 0.0.6
9
+ version: 0.1.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Frank Fejes
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-05-19 00:00:00 -05:00
17
+ date: 2011-05-21 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies: []
20
20