csvreader 0.7.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +4 -0
- data/README.md +175 -10
- data/lib/csvreader.rb +109 -0
- data/lib/csvreader/builder.rb +68 -4
- data/lib/csvreader/converter.rb +155 -0
- data/lib/csvreader/parser.rb +0 -48
- data/lib/csvreader/reader.rb +26 -25
- data/lib/csvreader/reader_hash.rb +160 -59
- data/lib/csvreader/version.rb +2 -2
- data/test/test_converter.rb +169 -0
- data/test/test_reader_converters.rb +54 -0
- data/test/test_reader_hash_converters.rb +109 -0
- metadata +6 -2
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
class CsvReader
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Converter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# A Regexp used to find and convert some common Date formats.
|
|
11
|
+
DATE_MATCHER = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4}
|
|
12
|
+
|
|
|
13
|
+
\d{4}-\d{2}-\d{2} )\z
|
|
14
|
+
/x
|
|
15
|
+
|
|
16
|
+
# A Regexp used to find and convert some common DateTime formats.
|
|
17
|
+
DATE_TIME_MATCHER = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4}
|
|
18
|
+
|
|
|
19
|
+
\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}
|
|
20
|
+
|
|
|
21
|
+
# ISO-8601
|
|
22
|
+
\d{4}-\d{2}-\d{2}
|
|
23
|
+
(?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
|
24
|
+
)\z
|
|
25
|
+
/x
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
CONVERTERS = {
|
|
29
|
+
##
|
|
30
|
+
## todo/fix: use regex INTEGER_MATCH / FLOAT_MATCH
|
|
31
|
+
## to avoid rescue (with exception and stacktrace) for every try!!!
|
|
32
|
+
integer: ->(value) { Integer( value ) rescue value },
|
|
33
|
+
float: ->(value) { Float( value ) rescue value },
|
|
34
|
+
numeric: [:integer, :float],
|
|
35
|
+
date: ->(value) {
|
|
36
|
+
begin
|
|
37
|
+
value.match?( DATE_MATCHER ) ? Date.parse( value ) : value
|
|
38
|
+
rescue # date parse errors
|
|
39
|
+
value
|
|
40
|
+
end
|
|
41
|
+
},
|
|
42
|
+
date_time: ->(value) {
|
|
43
|
+
begin
|
|
44
|
+
value.match?( DATE_TIME_MATCHER ) ? DateTime.parse( value ) : value
|
|
45
|
+
rescue # encoding conversion or date parse errors
|
|
46
|
+
value
|
|
47
|
+
end
|
|
48
|
+
},
|
|
49
|
+
|
|
50
|
+
## new - add null and boolean (any others): why? why not?
|
|
51
|
+
null: -> (value) {
|
|
52
|
+
## turn empty strings into nil
|
|
53
|
+
## rename to blank_to_nil or empty_to_nil or add both?
|
|
54
|
+
## todo: add NIL, nil too? or #NA, N/A etc. - why? why not?
|
|
55
|
+
if value.empty? || ['NULL', 'null', 'N/A', 'n/a', '#NA', '#na' ].include?( value )
|
|
56
|
+
nil
|
|
57
|
+
else
|
|
58
|
+
value
|
|
59
|
+
end
|
|
60
|
+
},
|
|
61
|
+
boolean: -> (value) {
|
|
62
|
+
## check yaml for possible true/value values - any missing?
|
|
63
|
+
## add more (or less) - why? why not?
|
|
64
|
+
if ['TRUE', 'true', 't', 'ON', 'on', 'YES', 'yes'].include?( value )
|
|
65
|
+
true
|
|
66
|
+
elsif
|
|
67
|
+
['FALSE', 'false', 'f', 'OFF', 'off', 'NO', 'no'].include?( value )
|
|
68
|
+
false
|
|
69
|
+
else
|
|
70
|
+
value
|
|
71
|
+
end
|
|
72
|
+
},
|
|
73
|
+
bool: [:boolean], ## bool convenience alias for boolean
|
|
74
|
+
|
|
75
|
+
all: [:null, :boolean, :date_time, :numeric],
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
HEADER_CONVERTERS = {
|
|
80
|
+
downcase: ->(value) { value.downcase },
|
|
81
|
+
symbol: ->(value) { value.downcase.gsub( /[^\s\w]+/, "" ).strip.
|
|
82
|
+
gsub( /\s+/, "_" ).to_sym
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def self.create_header_converters( converters )
|
|
88
|
+
new( converters, HEADER_CONVERTERS )
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def self.create_converters( converters )
|
|
92
|
+
new( converters, CONVERTERS )
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def initialize( converters, registry=CONVERTERS )
|
|
98
|
+
converters = case converters
|
|
99
|
+
when nil then []
|
|
100
|
+
when Array then converters
|
|
101
|
+
else [converters]
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
@converters = []
|
|
105
|
+
|
|
106
|
+
converters.each do |converter|
|
|
107
|
+
if converter.is_a? Proc # custom code block
|
|
108
|
+
add_converter( registry, &converter)
|
|
109
|
+
else # by name
|
|
110
|
+
add_converter( converter, registry )
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def to_a() @converters; end ## todo: rename to/use converters attribute name - why? why not?
|
|
116
|
+
def empty?() @converters.empty?; end
|
|
117
|
+
|
|
118
|
+
def convert( value, index_or_header=nil )
|
|
119
|
+
return value if value.nil?
|
|
120
|
+
|
|
121
|
+
@converters.each do |converter|
|
|
122
|
+
value = if converter.arity == 1 # straight converter
|
|
123
|
+
converter.call( value )
|
|
124
|
+
else
|
|
125
|
+
## note: for CsvReader pass in the zero-based field/column index (integer)
|
|
126
|
+
## for CsvHashReader pass in the header/field/column name (string)
|
|
127
|
+
converter.call( value, index_or_header )
|
|
128
|
+
end
|
|
129
|
+
break unless value.is_a?( String ) # note: short-circuit pipeline for speed
|
|
130
|
+
end
|
|
131
|
+
value # final state of value, converted or original
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
private
|
|
136
|
+
|
|
137
|
+
def add_converter( name=nil, registry, &converter )
|
|
138
|
+
if name.nil? # custom converter
|
|
139
|
+
@converters << converter
|
|
140
|
+
else # named converter
|
|
141
|
+
combo = registry[name]
|
|
142
|
+
case combo
|
|
143
|
+
when Array # combo converter
|
|
144
|
+
combo.each do |converter_name|
|
|
145
|
+
add_converter( converter_name, registry )
|
|
146
|
+
end
|
|
147
|
+
else # individual named converter
|
|
148
|
+
@converters << combo
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end # method add_converter
|
|
152
|
+
|
|
153
|
+
end # class Converter
|
|
154
|
+
|
|
155
|
+
end # class CsvReader
|
data/lib/csvreader/parser.rb
CHANGED
|
@@ -2,54 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
class CsvReader
|
|
4
4
|
|
|
5
|
-
class Parser
|
|
6
|
-
|
|
7
|
-
## use/allow different "backends" e.g. ParserStd, ParserStrict, ParserTab, etc.
|
|
8
|
-
## parser must support parse method (with and without block)
|
|
9
|
-
## e.g. records = parse( data )
|
|
10
|
-
## -or-
|
|
11
|
-
## parse( data ) do |record|
|
|
12
|
-
## end
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
DEFAULT = ParserStd.new
|
|
16
|
-
|
|
17
|
-
RFC4180 = ParserStrict.new
|
|
18
|
-
STRICT = ParserStrict.new ## note: make strict its own instance (so you can change config without "breaking" rfc4180)
|
|
19
|
-
EXCEL = ParserStrict.new ## note: make excel its own instance (so you can change configs without "breaking" rfc4180/strict)
|
|
20
|
-
|
|
21
|
-
MYSQL = ParserStrict.new( sep: "\t",
|
|
22
|
-
quote: false,
|
|
23
|
-
escape: true,
|
|
24
|
-
null: "\\N" )
|
|
25
|
-
|
|
26
|
-
POSTGRES = POSTGRESQL = ParserStrict.new( doublequote: false,
|
|
27
|
-
escape: true,
|
|
28
|
-
null: "" )
|
|
29
|
-
|
|
30
|
-
POSTGRES_TEXT = POSTGRESQL_TEXT = ParserStrict.new( sep: "\t",
|
|
31
|
-
quote: false,
|
|
32
|
-
escape: true,
|
|
33
|
-
null: "\\N" )
|
|
34
|
-
|
|
35
|
-
TAB = ParserTab.new
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
|
39
|
-
def self.strict() STRICT; end ## alternative alias for STRICT
|
|
40
|
-
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
|
41
|
-
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
|
42
|
-
def self.mysql() MYSQL; end
|
|
43
|
-
def self.postgresql() POSTGRESQL; end
|
|
44
|
-
def self.postgres() postgresql; end
|
|
45
|
-
def self.postgresql_text() POSTGRESQL_TEXT; end
|
|
46
|
-
def self.postgres_text() postgresql_text; end
|
|
47
|
-
def self.tab() TAB; end
|
|
48
|
-
|
|
49
|
-
end # class Parser
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
5
|
####################################
|
|
54
6
|
# define errors / exceptions
|
|
55
7
|
# for all parsers for (re)use
|
data/lib/csvreader/reader.rb
CHANGED
|
@@ -2,31 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
class CsvReader
|
|
4
4
|
|
|
5
|
-
DEFAULT = CsvBuilder.new( Parser::DEFAULT )
|
|
6
|
-
STRICT = CsvBuilder.new( Parser::STRICT )
|
|
7
|
-
RFC4180 = CsvBuilder.new( Parser::RFC4180 )
|
|
8
|
-
EXCEL = CsvBuilder.new( Parser::EXCEL )
|
|
9
|
-
TAB = CsvBuilder.new( Parser::TAB )
|
|
10
|
-
|
|
11
|
-
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
|
12
|
-
def self.strict() STRICT; end ## alternative alias for RFC4180
|
|
13
|
-
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
|
14
|
-
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
|
15
|
-
def self.tab() TAB; end ## alternative alias for TAB
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
5
|
#######
|
|
22
6
|
## csv reader
|
|
23
7
|
|
|
24
|
-
def self.open( path, mode=
|
|
8
|
+
def self.open( path, mode=nil,
|
|
25
9
|
sep: nil,
|
|
26
10
|
converters: nil,
|
|
27
11
|
parser: nil, &block ) ## rename path to filename or name - why? why not?
|
|
28
|
-
|
|
29
|
-
|
|
12
|
+
|
|
13
|
+
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
|
14
|
+
f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
|
15
|
+
csv = new(f, sep: sep,
|
|
16
|
+
converters: converters,
|
|
17
|
+
parser: parser )
|
|
30
18
|
|
|
31
19
|
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
|
32
20
|
if block_given?
|
|
@@ -127,13 +115,15 @@ class CsvReader
|
|
|
127
115
|
# create the IO object we will read from
|
|
128
116
|
@io = data.is_a?(String) ? StringIO.new(data) : data
|
|
129
117
|
|
|
130
|
-
@sep
|
|
131
|
-
|
|
118
|
+
@sep = sep
|
|
119
|
+
|
|
120
|
+
@converters = Converter.create_converters( converters )
|
|
132
121
|
|
|
133
122
|
@parser = parser.nil? ? Parser::DEFAULT : parser
|
|
134
123
|
end
|
|
135
124
|
|
|
136
125
|
|
|
126
|
+
|
|
137
127
|
### IO and StringIO Delegation ###
|
|
138
128
|
extend Forwardable
|
|
139
129
|
def_delegators :@io,
|
|
@@ -152,13 +142,24 @@ class CsvReader
|
|
|
152
142
|
|
|
153
143
|
def each( &block )
|
|
154
144
|
if block_given?
|
|
155
|
-
kwargs = {
|
|
156
|
-
## converters: converters ## todo: add converters
|
|
157
|
-
}
|
|
145
|
+
kwargs = {}
|
|
158
146
|
## note: only add separator if present/defined (not nil)
|
|
159
147
|
kwargs[:sep] = @sep if @sep && @parser.respond_to?( :'sep=' )
|
|
160
148
|
|
|
161
|
-
|
|
149
|
+
## check array / pipeline of converters is empty (size=0 e.g. is [])
|
|
150
|
+
if @converters.empty?
|
|
151
|
+
@parser.parse( @io, kwargs, &block )
|
|
152
|
+
else
|
|
153
|
+
## add "post"-processing with converters pipeline
|
|
154
|
+
## that is, convert all strings to integer, float, date, ... if wanted
|
|
155
|
+
@parser.parse( @io, kwargs ) do |raw_record|
|
|
156
|
+
record = []
|
|
157
|
+
raw_record.each_with_index do | value, i |
|
|
158
|
+
record << @converters.convert( value, i )
|
|
159
|
+
end
|
|
160
|
+
block.call( record )
|
|
161
|
+
end
|
|
162
|
+
end
|
|
162
163
|
else
|
|
163
164
|
to_enum
|
|
164
165
|
end
|
|
@@ -2,87 +2,188 @@
|
|
|
2
2
|
|
|
3
3
|
class CsvHashReader
|
|
4
4
|
|
|
5
|
-
def initialize( parser )
|
|
6
|
-
@parser = parser
|
|
7
|
-
end
|
|
8
5
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
6
|
+
def self.open( path, mode=nil,
|
|
7
|
+
headers: nil,
|
|
8
|
+
sep: nil,
|
|
9
|
+
converters: nil,
|
|
10
|
+
header_converters: nil,
|
|
11
|
+
parser: nil, &block ) ## rename path to filename or name - why? why not?
|
|
12
|
+
|
|
13
|
+
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
|
14
|
+
f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
|
15
|
+
csv = new(f, headers: headers,
|
|
16
|
+
sep: sep,
|
|
17
|
+
converters: converters,
|
|
18
|
+
header_converters: header_converters,
|
|
19
|
+
parser: parser )
|
|
20
|
+
|
|
21
|
+
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
|
22
|
+
if block_given?
|
|
23
|
+
begin
|
|
24
|
+
block.call( csv )
|
|
25
|
+
ensure
|
|
26
|
+
csv.close
|
|
27
|
+
end
|
|
28
|
+
else
|
|
29
|
+
csv
|
|
30
|
+
end
|
|
31
|
+
end # method self.open
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def self.read( path, headers: nil,
|
|
35
|
+
sep: nil,
|
|
36
|
+
converters: nil,
|
|
37
|
+
header_converters: nil,
|
|
38
|
+
parser: nil )
|
|
39
|
+
open( path,
|
|
40
|
+
headers: headers,
|
|
41
|
+
sep: sep,
|
|
42
|
+
converters: converters,
|
|
43
|
+
header_converters: header_converters,
|
|
44
|
+
parser: parser ) { |csv| csv.read }
|
|
45
|
+
end
|
|
12
46
|
|
|
13
|
-
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
|
14
|
-
def self.strict() STRICT; end ## alternative alias for STRICT
|
|
15
|
-
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
|
16
47
|
|
|
17
48
|
|
|
49
|
+
def self.foreach( path, headers: nil,
|
|
50
|
+
sep: nil,
|
|
51
|
+
converters: nil,
|
|
52
|
+
header_converters: nil,
|
|
53
|
+
parser: nil, &block )
|
|
54
|
+
csv = open( path,
|
|
55
|
+
headers: headers,
|
|
56
|
+
sep: sep,
|
|
57
|
+
converters: converters,
|
|
58
|
+
header_converters: header_converters,
|
|
59
|
+
parser: parser )
|
|
18
60
|
|
|
19
|
-
|
|
20
|
-
|
|
61
|
+
if block_given?
|
|
62
|
+
begin
|
|
63
|
+
csv.each( &block )
|
|
64
|
+
ensure
|
|
65
|
+
csv.close
|
|
66
|
+
end
|
|
67
|
+
else
|
|
68
|
+
csv.to_enum ## note: caller (responsible) must close file!!!
|
|
69
|
+
## remove version without block given - why? why not?
|
|
70
|
+
## use Csv.open().to_enum or Csv.open().each
|
|
71
|
+
## or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
|
|
21
72
|
end
|
|
73
|
+
end # method self.foreach
|
|
22
74
|
|
|
23
|
-
def self.read( path, sep: nil, headers: nil )
|
|
24
|
-
DEFAULT.read( path, sep: sep, headers: headers )
|
|
25
|
-
end
|
|
26
75
|
|
|
27
|
-
|
|
28
|
-
|
|
76
|
+
def self.parse( data, headers: nil,
|
|
77
|
+
sep: nil,
|
|
78
|
+
converters: nil,
|
|
79
|
+
header_converters: nil,
|
|
80
|
+
parser: nil, &block )
|
|
81
|
+
csv = new( data,
|
|
82
|
+
headers: headers,
|
|
83
|
+
sep: sep,
|
|
84
|
+
converters: converters,
|
|
85
|
+
header_converters: header_converters,
|
|
86
|
+
parser: parser )
|
|
87
|
+
|
|
88
|
+
if block_given?
|
|
89
|
+
csv.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
|
90
|
+
else # slurp contents, if no block is given
|
|
91
|
+
csv.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
|
29
92
|
end
|
|
93
|
+
end # method self.parse
|
|
30
94
|
|
|
31
95
|
|
|
32
96
|
|
|
33
|
-
#############################
|
|
34
|
-
## all "high-level" reader methods
|
|
35
|
-
##
|
|
36
97
|
|
|
37
|
-
def parse( data, sep: nil, headers: nil, &block )
|
|
38
|
-
if block_given?
|
|
39
|
-
parse_lines( data, sep: sep, headers: headers, &block )
|
|
40
|
-
else
|
|
41
|
-
records = []
|
|
42
|
-
parse_lines( data, sep: sep, headers: headers ) do |record|
|
|
43
|
-
records << record
|
|
44
|
-
end
|
|
45
|
-
records
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
98
|
|
|
99
|
+
def initialize( data, headers: nil, sep: nil,
|
|
100
|
+
converters: nil,
|
|
101
|
+
header_converters: nil,
|
|
102
|
+
parser: nil )
|
|
103
|
+
raise ArgumentError.new( "Cannot parse nil as CSV" ) if data.nil?
|
|
104
|
+
## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV" if data.nil?
|
|
49
105
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
parse( txt, sep: sep, headers: headers )
|
|
53
|
-
end
|
|
106
|
+
# create the IO object we will read from
|
|
107
|
+
@io = data.is_a?(String) ? StringIO.new(data) : data
|
|
54
108
|
|
|
109
|
+
## pass in headers as array e.g. ['A', 'B', 'C']
|
|
110
|
+
## double check: run header_converters on passed in headers?
|
|
111
|
+
## for now - do NOT auto-convert passed in headers - keep them as-is (1:1)
|
|
112
|
+
@names = headers ? headers : nil
|
|
55
113
|
|
|
56
|
-
|
|
57
|
-
File.open( path, 'r:bom|utf-8' ) do |file|
|
|
58
|
-
parse_lines( file, sep: sep, headers: headers, &block )
|
|
59
|
-
end
|
|
60
|
-
end
|
|
114
|
+
@sep = sep
|
|
61
115
|
|
|
116
|
+
@converters = CsvReader::Converter.create_converters( converters )
|
|
117
|
+
@header_converters = CsvReader::Converter.create_header_converters( header_converters )
|
|
62
118
|
|
|
63
|
-
|
|
119
|
+
@parser = parser.nil? ? CsvReader::Parser::DEFAULT : parser
|
|
120
|
+
end
|
|
64
121
|
|
|
65
|
-
####################
|
|
66
|
-
## parse_lines helper method to keep in one (central) place only (for easy editing/changing)
|
|
67
|
-
## - builds key/value pairs
|
|
68
122
|
|
|
69
|
-
def parse_lines( data, sep: nil, headers: nil, &block)
|
|
70
|
-
## pass in headers as array e.g. ['A', 'B', 'C']
|
|
71
|
-
names = headers ? headers : nil
|
|
72
123
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
124
|
+
### IO and StringIO Delegation ###
|
|
125
|
+
extend Forwardable
|
|
126
|
+
def_delegators :@io,
|
|
127
|
+
:close, :closed?, :eof, :eof?
|
|
128
|
+
|
|
129
|
+
## add more - why? why not?
|
|
130
|
+
## def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write,
|
|
131
|
+
## :closed?, :eof, :eof?, :external_encoding, :fcntl,
|
|
132
|
+
## :fileno, :flock, :flush, :fsync, :internal_encoding,
|
|
133
|
+
## :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen,
|
|
134
|
+
## :seek, :stat, :string, :sync, :sync=, :tell, :to_i,
|
|
135
|
+
## :to_io, :truncate, :tty?
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
include Enumerable
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def each( &block )
|
|
142
|
+
|
|
143
|
+
## todo/fix:
|
|
144
|
+
## add case for headers/names.size != values.size
|
|
145
|
+
## - add rest option? for if less headers than values (see python csv.DictReader - why? why not?)
|
|
146
|
+
##
|
|
147
|
+
## handle case with duplicate and empty header names etc.
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
if block_given?
|
|
151
|
+
kwargs = {}
|
|
152
|
+
## note: only add separator if present/defined (not nil)
|
|
153
|
+
kwargs[:sep] = @sep if @sep && @parser.respond_to?( :'sep=' )
|
|
154
|
+
|
|
155
|
+
@parser.parse( @io, kwargs ) do |raw_values| # sep: sep
|
|
156
|
+
if @names.nil? ## check for (first) headers row
|
|
157
|
+
if @header_converters.empty?
|
|
158
|
+
@names = raw_values ## store header row / a.k.a. field/column names
|
|
159
|
+
else
|
|
160
|
+
values = []
|
|
161
|
+
raw_values.each_with_index do |value,i|
|
|
162
|
+
values << @header_converters.convert( value, i )
|
|
163
|
+
end
|
|
164
|
+
@names = values
|
|
165
|
+
end
|
|
166
|
+
else ## "regular" record
|
|
167
|
+
raw_record = @names.zip( raw_values ).to_h ## todo/fix: check for more values than names/headers!!!
|
|
168
|
+
if @converters.empty?
|
|
169
|
+
block.call( raw_record )
|
|
170
|
+
else
|
|
171
|
+
## add "post"-processing with converters pipeline
|
|
172
|
+
## that is, convert all strings to integer, float, date, ... if wanted
|
|
173
|
+
record = {}
|
|
174
|
+
raw_record.each do | key, value |
|
|
175
|
+
record[ key ] = @converters.convert( value, key )
|
|
176
|
+
end
|
|
177
|
+
block.call( record )
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
else
|
|
182
|
+
to_enum
|
|
183
|
+
end
|
|
184
|
+
end # method each
|
|
185
|
+
|
|
186
|
+
def read() to_a; end # method read
|
|
77
187
|
|
|
78
|
-
@parser.parse( data, kwargs ) do |values| # sep: sep
|
|
79
|
-
if names.nil?
|
|
80
|
-
names = values ## store header row / a.k.a. field/column names
|
|
81
|
-
else
|
|
82
|
-
record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
|
|
83
|
-
block.call( record )
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
188
|
|
|
88
189
|
end # class CsvHashReader
|