csvreader 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +4 -0
- data/lib/csvreader.rb +2 -1
- data/lib/csvreader/buffer.rb +48 -0
- data/lib/csvreader/parser.rb +251 -0
- data/lib/csvreader/reader.rb +83 -83
- data/lib/csvreader/version.rb +1 -1
- data/test/test_parser.rb +77 -0
- data/test/test_reader.rb +5 -49
- data/test/test_reader_hash.rb +52 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed373a97a0bdb4c45d2980894a32014cdcb8ca7c
|
4
|
+
data.tar.gz: 784adcade81e39ad9accd1a9b2d0c76fd666b6f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5523a8697990c691f55aa7c3b23867104b1c4c5b8e9e25b0424a3191e73cbb32cee369541b712f60fc366ba76a8207a77d6b12b68ea209896b6c26e11c5712de
|
7
|
+
data.tar.gz: 7c33c812c2a53303911b6686d03554d6e388b3f936a3b6b8d995ed237651bd171d3bdb8ab8f38f7f327e1a9d1be26d1fa955918012f37cf6a9e1c2cc6ab08373
|
data/Manifest.txt
CHANGED
@@ -4,10 +4,14 @@ Manifest.txt
|
|
4
4
|
README.md
|
5
5
|
Rakefile
|
6
6
|
lib/csvreader.rb
|
7
|
+
lib/csvreader/buffer.rb
|
8
|
+
lib/csvreader/parser.rb
|
7
9
|
lib/csvreader/reader.rb
|
8
10
|
lib/csvreader/version.rb
|
9
11
|
test/data/beer.csv
|
10
12
|
test/data/beer11.csv
|
11
13
|
test/data/shakespeare.csv
|
12
14
|
test/helper.rb
|
15
|
+
test/test_parser.rb
|
13
16
|
test/test_reader.rb
|
17
|
+
test/test_reader_hash.rb
|
data/lib/csvreader.rb
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvReader
|
4
|
+
class BufferIO ## todo: find a better name - why? why not? is really just for reading (keep io?)
|
5
|
+
def initialize( data )
|
6
|
+
# create the IO object we will read from
|
7
|
+
@io = data.is_a?(String) ? StringIO.new(data) : data
|
8
|
+
@buf = [] ## last (buffer) chars (used for peek)
|
9
|
+
end
|
10
|
+
|
11
|
+
def eof?() @buf.size == 0 && @io.eof?; end
|
12
|
+
|
13
|
+
def getc
|
14
|
+
if @buf.size > 0
|
15
|
+
@buf.shift ## get first char from buffer
|
16
|
+
else
|
17
|
+
@io.getc
|
18
|
+
end
|
19
|
+
end # method getc
|
20
|
+
|
21
|
+
|
22
|
+
def ungetc( c )
|
23
|
+
## add upfront as first char in buffer
|
24
|
+
## last in/first out queue!!!!
|
25
|
+
@buf.unshift( c )
|
26
|
+
## puts "ungetc - >#{c} (#{c.ord})< => >#{@buf}<"
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def peek
|
31
|
+
## todo/fix:
|
32
|
+
## use Hexadecimal code: 1A, U+001A for eof char - why? why not?
|
33
|
+
if @buf.size == 0 && @io.eof?
|
34
|
+
puts "peek - hitting eof!!!"
|
35
|
+
## return eof char(s) - exits? is \0 ?? double check
|
36
|
+
return "\0"
|
37
|
+
end
|
38
|
+
|
39
|
+
if @buf.size == 0
|
40
|
+
c = @io.getc
|
41
|
+
@buf.push( c )
|
42
|
+
## puts "peek - fill buffer >#{c}< (#{c.ord})"
|
43
|
+
end
|
44
|
+
|
45
|
+
@buf.first
|
46
|
+
end # method peek
|
47
|
+
end # class BufferIO
|
48
|
+
end # class CsvReader
|
@@ -0,0 +1,251 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvReader
|
4
|
+
class Parser
|
5
|
+
|
6
|
+
|
7
|
+
## char constants
|
8
|
+
DOUBLE_QUOTE = "\""
|
9
|
+
COMMENT = "#" ## use COMMENT_HASH or HASH or ??
|
10
|
+
SPACE = " "
|
11
|
+
TAB = "\t"
|
12
|
+
LF = "\n" ## 0A (hex) 10 (dec)
|
13
|
+
CR = "\r" ## 0D (hex) 13 (dec)
|
14
|
+
|
15
|
+
|
16
|
+
def self.parse( data )
|
17
|
+
puts "parse:"
|
18
|
+
pp data
|
19
|
+
|
20
|
+
parser = new
|
21
|
+
parser.parse( data )
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.parse_line( data )
|
25
|
+
puts "parse_line:"
|
26
|
+
|
27
|
+
parser = new
|
28
|
+
records = parser.parse( data, limit: 1 )
|
29
|
+
|
30
|
+
## unwrap record if empty return nil - why? why not?
|
31
|
+
## return empty record e.g. [] - why? why not?
|
32
|
+
records.size == 0 ? nil : records.first
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
def self.read( path )
|
38
|
+
parser = new
|
39
|
+
File.open( path, 'r:bom|utf-8' ) do |file|
|
40
|
+
parser.parse( file )
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.foreach( path, &block )
|
45
|
+
parser = new
|
46
|
+
File.open( path, 'r:bom|utf-8' ) do |file|
|
47
|
+
parser.foreach( file, &block )
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.parse_lines( data, &block )
|
52
|
+
parser = new
|
53
|
+
parser.parse_lines( data, &block )
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
def parse_field( io, trim: true )
|
61
|
+
value = ""
|
62
|
+
value << parse_spaces( io ) ## add leading spaces
|
63
|
+
|
64
|
+
if (c=io.peek; c=="," || c==LF || c==CR || io.eof?) ## empty field
|
65
|
+
value = value.strip if trim ## strip all spaces
|
66
|
+
## return value; do nothing
|
67
|
+
elsif io.peek == DOUBLE_QUOTE
|
68
|
+
puts "start double_quote field - value >#{value}<"
|
69
|
+
value = value.strip ## note always strip/trim leading spaces in quoted value
|
70
|
+
|
71
|
+
puts "start double_quote field - peek >#{io.peek}< (#{io.peek.ord})"
|
72
|
+
io.getc ## eat-up double_quote
|
73
|
+
|
74
|
+
loop do
|
75
|
+
while (c=io.peek; !(c==DOUBLE_QUOTE || io.eof?))
|
76
|
+
value << io.getc ## eat-up everything unit quote (")
|
77
|
+
end
|
78
|
+
|
79
|
+
break if io.eof?
|
80
|
+
|
81
|
+
io.getc ## eat-up double_quote
|
82
|
+
|
83
|
+
if io.peek == DOUBLE_QUOTE ## doubled up quote?
|
84
|
+
value << io.getc ## add doube quote and continue!!!!
|
85
|
+
else
|
86
|
+
break
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
## note: always eat-up all trailing spaces (" ") and tabs (\t)
|
91
|
+
skip_spaces( io )
|
92
|
+
puts "end double_quote field - peek >#{io.peek}< (#{io.peek.ord})"
|
93
|
+
else
|
94
|
+
puts "start reg field - peek >#{io.peek}< (#{io.peek.ord})"
|
95
|
+
## consume simple value
|
96
|
+
## until we hit "," or "\n" or "\r"
|
97
|
+
## note: will eat-up quotes too!!!
|
98
|
+
while (c=io.peek; !(c=="," || c==LF || c==CR || io.eof?))
|
99
|
+
puts " add char >#{io.peek}< (#{io.peek.ord})"
|
100
|
+
value << io.getc ## eat-up all spaces (" ") and tabs (\t)
|
101
|
+
end
|
102
|
+
value = value.strip if trim ## strip all spaces
|
103
|
+
puts "end reg field - peek >#{io.peek}< (#{io.peek.ord})"
|
104
|
+
end
|
105
|
+
|
106
|
+
value
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
def parse_record( io, trim: true )
|
112
|
+
values = []
|
113
|
+
|
114
|
+
loop do
|
115
|
+
value = parse_field( io, trim: trim )
|
116
|
+
puts "value: »#{value}«"
|
117
|
+
values << value
|
118
|
+
|
119
|
+
if io.eof?
|
120
|
+
break
|
121
|
+
elsif (c=io.peek; c==LF || c==CR)
|
122
|
+
skip_newlines( io )
|
123
|
+
break
|
124
|
+
elsif io.peek == ","
|
125
|
+
io.getc ## eat-up FS(,)
|
126
|
+
else
|
127
|
+
puts "*** csv parse error: found >#{io.peek} (#{io.peek.ord})< - FS (,) or RS (\\n) expected!!!!"
|
128
|
+
exit(1)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
values
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
def skip_newlines( io )
|
137
|
+
return if io.eof?
|
138
|
+
|
139
|
+
while (c=io.peek; c==LF || c==CR)
|
140
|
+
io.getc ## eat-up all \n and \r
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
def skip_until_eol( io )
|
146
|
+
return if io.eof?
|
147
|
+
|
148
|
+
while (c=io.peek; !(c==LF || c==CR || io.eof?))
|
149
|
+
io.getc ## eat-up all until end of line
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def skip_spaces( io )
|
154
|
+
return if io.eof?
|
155
|
+
|
156
|
+
while (c=io.peek; c==SPACE || c==TAB)
|
157
|
+
io.getc ## note: always eat-up all spaces (" ") and tabs (\t)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
def parse_spaces( io ) ## helper method
|
165
|
+
spaces = ""
|
166
|
+
## add leading spaces
|
167
|
+
while (c=io.peek; c==SPACE || c==TAB)
|
168
|
+
spaces << io.getc ## eat-up all spaces (" ") and tabs (\t)
|
169
|
+
end
|
170
|
+
spaces
|
171
|
+
end
|
172
|
+
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
def parse_lines( io_maybe, trim: true,
|
177
|
+
comments: true,
|
178
|
+
blanks: true, &block )
|
179
|
+
|
180
|
+
## find a better name for io_maybe
|
181
|
+
## make sure io is a wrapped into BufferIO!!!!!!
|
182
|
+
if io_maybe.is_a?( BufferIO ) ### allow (re)use of BufferIO if managed from "outside"
|
183
|
+
io = io_maybe
|
184
|
+
else
|
185
|
+
io = BufferIO.new( io_maybe )
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
loop do
|
190
|
+
break if io.eof?
|
191
|
+
|
192
|
+
## hack: use own space buffer for peek( x ) lookahead (more than one char)
|
193
|
+
## check for comments or blank lines
|
194
|
+
if comments || blanks
|
195
|
+
spaces = parse_spaces( io )
|
196
|
+
end
|
197
|
+
|
198
|
+
if comments && io.peek == COMMENT ## comment line
|
199
|
+
puts "skipping comment - peek >#{io.peek}< (#{io.peek.ord})"
|
200
|
+
skip_until_eol( io )
|
201
|
+
skip_newlines( io )
|
202
|
+
elsif blanks && (c=io.peek; c==LF || c==CR || io.eof?)
|
203
|
+
puts "skipping blank - peek >#{io.peek}< (#{io.peek.ord})"
|
204
|
+
skip_newlines( io )
|
205
|
+
else # undo (ungetc spaces)
|
206
|
+
puts "start record - peek >#{io.peek}< (#{io.peek.ord})"
|
207
|
+
|
208
|
+
if comments || blanks
|
209
|
+
## note: MUST ungetc in "reverse" order
|
210
|
+
## ## buffer is last in/first out queue!!!!
|
211
|
+
spaces.reverse.each_char { |space| io.ungetc( space ) }
|
212
|
+
end
|
213
|
+
|
214
|
+
record = parse_record( io, trim: trim )
|
215
|
+
|
216
|
+
## note: requires block - enforce? how? why? why not?
|
217
|
+
block.call( record ) ## yield( record )
|
218
|
+
end
|
219
|
+
end # loop
|
220
|
+
end # method parse_lines
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
|
225
|
+
def parse( io_maybe, trim: true,
|
226
|
+
comments: true,
|
227
|
+
blanks: true,
|
228
|
+
limit: nil )
|
229
|
+
records = []
|
230
|
+
|
231
|
+
parse_lines( io_maybe, trim: trim, comments: comments, blanks: blanks ) do |record|
|
232
|
+
records << record
|
233
|
+
|
234
|
+
## set limit to 1 for processing "single" line (that is, get one record)
|
235
|
+
return records if limit && limit >= records.size
|
236
|
+
end
|
237
|
+
|
238
|
+
records
|
239
|
+
end ## method parse
|
240
|
+
|
241
|
+
|
242
|
+
def foreach( io_maybe, trim: true,
|
243
|
+
comments: true,
|
244
|
+
blanks: true, &block )
|
245
|
+
parse_lines( io_maybe, trim: trim, comments: comments, blanks: blanks, &block )
|
246
|
+
end
|
247
|
+
|
248
|
+
|
249
|
+
|
250
|
+
end # class Parser
|
251
|
+
end # class CsvReader
|
data/lib/csvreader/reader.rb
CHANGED
@@ -3,9 +3,6 @@
|
|
3
3
|
|
4
4
|
module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar - why? why not???
|
5
5
|
|
6
|
-
## STD_CSV_ENGINE = CSV ## to avoid name confusion use longer name - why? why not? find a better name?
|
7
|
-
## use __CSV__ or similar? or just ::CSV ??
|
8
|
-
|
9
6
|
|
10
7
|
class Dialect ## todo: use a module - it's just a namespace/module now - why? why not?
|
11
8
|
###
|
@@ -36,52 +33,35 @@ end # class Dialect
|
|
36
33
|
|
37
34
|
class Configuration
|
38
35
|
|
39
|
-
puts "CSV::VERSION:"
|
40
|
-
puts CSV::VERSION
|
41
|
-
|
42
|
-
puts "builtin CSV::Converters:"
|
43
|
-
pp CSV::Converters
|
44
|
-
|
45
|
-
puts "CSV::DEFAULT_OPTIONS:"
|
46
|
-
pp CSV::DEFAULT_OPTIONS
|
47
|
-
|
48
|
-
## register our own converters
|
49
|
-
## check if strip gets called for nil values too?
|
50
|
-
CSV::Converters[:strip] = ->(field) { field.strip }
|
51
|
-
|
52
36
|
|
53
37
|
attr_accessor :sep ## col_sep (column separator)
|
54
38
|
attr_accessor :na ## not available (string or array of strings or nil) - rename to nas/nils/nulls - why? why not?
|
55
39
|
attr_accessor :trim ### allow ltrim/rtrim/trim - why? why not?
|
40
|
+
attr_accessor :blanks
|
41
|
+
attr_accessor :comments
|
56
42
|
attr_accessor :dialect
|
57
43
|
|
58
44
|
def initialize
|
59
|
-
@sep
|
45
|
+
@sep = ','
|
46
|
+
@blanks = true
|
47
|
+
@comments = true
|
48
|
+
@trim = true
|
60
49
|
## note: do NOT add headers as global - should ALWAYS be explicit
|
61
50
|
## headers (true/false) - changes resultset and requires different processing!!!
|
62
51
|
|
63
52
|
self ## return self for chaining
|
64
53
|
end
|
65
54
|
|
66
|
-
|
55
|
+
## strip leading and trailing spaces
|
56
|
+
def trim?() @trim; end
|
67
57
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
## see skip_blanks in default_options
|
72
|
-
line.empty?
|
73
|
-
end
|
58
|
+
## skip blank lines (with only 1+ spaces)
|
59
|
+
## note: for now blank lines with no spaces will always get skipped
|
60
|
+
def blanks?() @blanks; end
|
74
61
|
|
75
|
-
## lines starting with # (note: only leading spaces allowed)
|
76
|
-
COMMENTS_REGEX = /^\s*#/
|
77
|
-
BLANK_REGEX = /^\s*$/ ## skip all whitespace lines - note: use "" or , for a blank record!!!
|
78
|
-
SKIP_REGEX = Regexp.union( COMMENTS_REGEX, BLANK_REGEX )
|
79
62
|
|
80
|
-
def
|
81
|
-
|
82
|
-
## see skip_lines in default_options
|
83
|
-
line =~ SKIP_REGEX
|
84
|
-
end
|
63
|
+
def comments?() @comments; end
|
64
|
+
|
85
65
|
|
86
66
|
## built-in (default) options
|
87
67
|
## todo: find a better name?
|
@@ -99,9 +79,10 @@ end # class Dialect
|
|
99
79
|
## strip leading and trailing spaces
|
100
80
|
## NOTE/WARN: leading and trailing spaces NOT allowed/working with double quoted values!!!!
|
101
81
|
defaults = {
|
102
|
-
|
103
|
-
|
104
|
-
:
|
82
|
+
blanks: @blanks, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
|
83
|
+
comments: @comments,
|
84
|
+
trim: @trim
|
85
|
+
## :converters => :strip
|
105
86
|
}
|
106
87
|
defaults
|
107
88
|
end
|
@@ -136,47 +117,51 @@ class CsvReader
|
|
136
117
|
converters: nil)
|
137
118
|
## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
|
138
119
|
csv_options = Csv.config.default_options.merge(
|
139
|
-
headers: false, ## note: always turn off headers!!!!!!
|
140
120
|
col_sep: sep
|
141
121
|
)
|
142
122
|
## pp csv_options
|
143
|
-
|
123
|
+
Parser.parse_line( txt ) ##, csv_options )
|
144
124
|
end
|
145
125
|
|
146
|
-
|
126
|
+
|
127
|
+
##
|
128
|
+
## todo/fix: "unify" parse and parse_lines !!!
|
129
|
+
## check for block_given? - why? why not?
|
130
|
+
|
131
|
+
def self.parse( txt, sep: Csv.config.sep )
|
147
132
|
csv_options = Csv.config.default_options.merge(
|
148
|
-
headers: headers,
|
149
133
|
col_sep: sep
|
150
134
|
)
|
151
135
|
## pp csv_options
|
152
|
-
|
136
|
+
Parser.parse( txt ) ###, csv_options )
|
153
137
|
end
|
154
138
|
|
155
|
-
def self.
|
139
|
+
def self.parse_lines( txt, sep: Csv.config.sep, &block )
|
140
|
+
csv_options = Csv.config.default_options.merge(
|
141
|
+
col_sep: sep
|
142
|
+
)
|
143
|
+
## pp csv_options
|
144
|
+
Parser.parse_lines( txt, &block ) ###, csv_options )
|
145
|
+
end
|
146
|
+
|
147
|
+
def self.read( path, sep: Csv.config.sep )
|
156
148
|
## note: use our own file.open
|
157
149
|
## always use utf-8 for now
|
158
150
|
## check/todo: add skip option bom too - why? why not?
|
159
|
-
txt = File.open( path, 'r:bom|utf-8' )
|
160
|
-
parse( txt, sep: sep
|
151
|
+
txt = File.open( path, 'r:bom|utf-8' ).read
|
152
|
+
parse( txt, sep: sep )
|
161
153
|
end
|
162
154
|
|
163
|
-
|
155
|
+
|
156
|
+
def self.foreach( path, sep: Csv.config.sep, &block )
|
164
157
|
csv_options = Csv.config.default_options.merge(
|
165
|
-
|
166
|
-
col_sep: sep,
|
167
|
-
external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
|
158
|
+
col_sep: sep
|
168
159
|
)
|
169
160
|
|
170
|
-
|
171
|
-
## can use bom e.g. 'bom|utf-8' - how?
|
172
|
-
## raises ArgumentError: unknown encoding name - bom|utf-8
|
173
|
-
|
174
|
-
|
175
|
-
CSV.foreach( path, csv_options ) do |row|
|
176
|
-
yield( row ) ## check/todo: use block.call( row ) ## why? why not?
|
177
|
-
end
|
161
|
+
Parser.foreach( path, &block ) ###, csv_options )
|
178
162
|
end
|
179
163
|
|
164
|
+
|
180
165
|
def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
|
181
166
|
# read first lines (only)
|
182
167
|
# and parse with csv to get header from csv library itself
|
@@ -187,49 +172,64 @@ class CsvReader
|
|
187
172
|
## - NOT a comments line or
|
188
173
|
## - NOT a blank line
|
189
174
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
## todo/check if readline includes \n\r too??
|
196
|
-
## yes! - line include \n e.g.
|
197
|
-
## "Brewery,City,Name,Abv\n" or
|
198
|
-
## "#######\n# try with some comments\n# and blank lines even before header\n\nBrewery,City,Name,Abv\n"
|
199
|
-
loop do
|
200
|
-
line = f.readline
|
201
|
-
lines << line
|
202
|
-
break unless Csv.config.skip?( line ) || Csv.config.blank?( line )
|
203
|
-
end
|
204
|
-
end
|
175
|
+
record = nil
|
176
|
+
File.open( path, 'r:bom|utf-8' ) do |file|
|
177
|
+
record = Parser.parse_line( file )
|
178
|
+
end
|
205
179
|
|
206
|
-
|
207
|
-
## pp lines
|
208
|
-
|
209
|
-
## note: do NOT use headers: true to get "plain" data array (no hash records)
|
210
|
-
## hash record does NOT work for single line/row
|
211
|
-
parse_line( lines, sep: sep )
|
180
|
+
record ## todo/fix: return nil for empty - why? why not?
|
212
181
|
end # method self.header
|
213
182
|
|
214
183
|
end # class CsvReader
|
215
184
|
|
216
185
|
|
217
186
|
|
187
|
+
|
218
188
|
class CsvHashReader
|
219
189
|
|
220
|
-
|
221
|
-
|
190
|
+
|
191
|
+
def self.parse( txt, sep: Csv.config.sep, headers: nil )
|
192
|
+
|
193
|
+
## pass in headers as array e.g. ['A', 'B', 'C']
|
194
|
+
names = headers ? headers : nil
|
195
|
+
|
196
|
+
records = []
|
197
|
+
CsvReader.parse_lines( txt ) do |values| # sep: sep
|
198
|
+
if names.nil?
|
199
|
+
names = values ## store header row / a.k.a. field/column names
|
200
|
+
else
|
201
|
+
record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
|
202
|
+
records << record
|
203
|
+
end
|
204
|
+
end
|
205
|
+
records
|
222
206
|
end
|
223
207
|
|
224
|
-
|
225
|
-
|
208
|
+
|
209
|
+
def self.read( path, sep: Csv.config.sep, headers: nil )
|
210
|
+
txt = File.open( path, 'r:bom|utf-8' ).read
|
211
|
+
parse( txt, sep: sep, headers: headers )
|
226
212
|
end
|
227
213
|
|
228
|
-
|
229
|
-
|
214
|
+
|
215
|
+
def self.foreach( path, sep: Csv.config.sep, headers: nil, &block )
|
216
|
+
|
217
|
+
## pass in headers as array e.g. ['A', 'B', 'C']
|
218
|
+
names = headers ? headers : nil
|
219
|
+
|
220
|
+
CsvReader.foreach( path ) do |values| # sep: sep
|
221
|
+
if names.nil?
|
222
|
+
names = values ## store header row / a.k.a. field/column names
|
223
|
+
else
|
224
|
+
record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
|
225
|
+
block.call( record )
|
226
|
+
end
|
227
|
+
end
|
230
228
|
end
|
231
229
|
|
230
|
+
|
232
231
|
def self.header( path, sep: Csv.config.sep ) ## add header too? why? why not?
|
232
|
+
## same as "classic" header method - delegate/reuse :-)
|
233
233
|
CsvReader.header( path, sep: sep )
|
234
234
|
end
|
235
235
|
|
data/lib/csvreader/version.rb
CHANGED
data/test/test_parser.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestParser < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
def test_parse1
|
14
|
+
records = [["a", "b", "c"],
|
15
|
+
["1", "2", "3"],
|
16
|
+
["4", "5", "6"]]
|
17
|
+
|
18
|
+
## don't care about newlines (\r\n)
|
19
|
+
assert_equal records, CsvReader::Parser.parse( "a,b,c\n1,2,3\n4,5,6" )
|
20
|
+
assert_equal records, CsvReader::Parser.parse( "a,b,c\n1,2,3\n4,5,6\n" )
|
21
|
+
assert_equal records, CsvReader::Parser.parse( "a,b,c\r1,2,3\r4,5,6" )
|
22
|
+
assert_equal records, CsvReader::Parser.parse( "a,b,c\r\n1,2,3\r\n4,5,6\r\n" )
|
23
|
+
|
24
|
+
## or leading and trailing spaces
|
25
|
+
assert_equal records, CsvReader::Parser.parse( " \n a , b , c \n 1,2 ,3 \n 4,5,6 " )
|
26
|
+
assert_equal records, CsvReader::Parser.parse( "\n\na, b,c \n 1, 2, 3\n 4, 5, 6" )
|
27
|
+
assert_equal records, CsvReader::Parser.parse( " \"a\" , b , \"c\" \n1, 2,\"3\" \n4,5, \"6\"" )
|
28
|
+
assert_equal records, CsvReader::Parser.parse( "a, b, c\n1, 2,3\n\n\n4,5,6\n\n\n" )
|
29
|
+
assert_equal records, CsvReader::Parser.parse( " a, b ,c \n 1 , 2 , 3 \n4,5,6 " )
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def test_parse_quotes
|
34
|
+
records = [["a", "b", "c"],
|
35
|
+
["11 \n 11", "\"2\"", "3"]]
|
36
|
+
|
37
|
+
assert_equal records, CsvReader::Parser.parse( " a, b ,c \n\"11 \n 11\", \"\"\"2\"\"\" , 3 \n" )
|
38
|
+
assert_equal records, CsvReader::Parser.parse( "\n\n \"a\", \"b\" ,\"c\" \n \"11 \n 11\" , \"\"\"2\"\"\" , 3 \n" )
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_parse_empties
|
42
|
+
records = [["", "", ""]]
|
43
|
+
|
44
|
+
assert_equal records, CsvReader::Parser.parse( ",," )
|
45
|
+
assert_equal records, CsvReader::Parser.parse( <<TXT )
|
46
|
+
"","",""
|
47
|
+
TXT
|
48
|
+
|
49
|
+
assert_equal [], CsvReader::Parser.parse( "" )
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
def test_parse_comments
|
54
|
+
records = [["a", "b", "c"],
|
55
|
+
["1", "2", "3"]]
|
56
|
+
|
57
|
+
assert_equal records, CsvReader::Parser.parse( <<TXT )
|
58
|
+
# comment
|
59
|
+
# comment
|
60
|
+
## comment
|
61
|
+
|
62
|
+
a, b, c
|
63
|
+
1, 2, 3
|
64
|
+
|
65
|
+
TXT
|
66
|
+
|
67
|
+
assert_equal records, CsvReader::Parser.parse( <<TXT )
|
68
|
+
a, b, c
|
69
|
+
1, 2, 3
|
70
|
+
|
71
|
+
# comment
|
72
|
+
# comment
|
73
|
+
## comment
|
74
|
+
TXT
|
75
|
+
end
|
76
|
+
|
77
|
+
end # class TestParser
|
data/test/test_reader.rb
CHANGED
@@ -12,43 +12,17 @@ class TestReader < MiniTest::Test
|
|
12
12
|
|
13
13
|
def test_read
|
14
14
|
puts "== read: beer.csv:"
|
15
|
-
|
15
|
+
rows = CsvReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
|
16
|
+
pp rows
|
16
17
|
|
17
|
-
|
18
|
-
pp data
|
19
|
-
|
20
|
-
data.each do |row|
|
21
|
-
pp row
|
22
|
-
end
|
23
|
-
puts " #{data.size} rows"
|
24
|
-
assert_equal 7, data.size ## note: include header row in count
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_read_hash
|
28
|
-
puts "== read (hash): beer.csv:"
|
29
|
-
table = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer.csv" ) ## returns CSV::Table
|
30
|
-
|
31
|
-
pp table.class.name
|
32
|
-
pp table
|
33
|
-
pp table.to_a ## note: includes header (first row with column names)
|
34
|
-
|
35
|
-
table.each do |row| ## note: will skip (NOT include) header row!!
|
18
|
+
rows.each do |row|
|
36
19
|
pp row
|
37
20
|
end
|
38
|
-
puts " #{
|
39
|
-
assert_equal
|
21
|
+
puts " #{rows.size} rows"
|
22
|
+
assert_equal 7, rows.size ## note: include header row in count
|
40
23
|
end
|
41
24
|
|
42
25
|
|
43
|
-
def test_read_hash11
|
44
|
-
puts "== read (hash): beer11.csv:"
|
45
|
-
table = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer11.csv" )
|
46
|
-
pp table
|
47
|
-
pp table.to_a ## note: includes header (first row with column names)
|
48
|
-
|
49
|
-
assert true
|
50
|
-
end
|
51
|
-
|
52
26
|
|
53
27
|
def test_parse_line
|
54
28
|
puts "== parse_line:"
|
@@ -95,25 +69,7 @@ end
|
|
95
69
|
def test_foreach
|
96
70
|
puts "== foreach: beer11.csv:"
|
97
71
|
CsvReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
|
98
|
-
pp row ## note: is Array (no .fields available!!!!!)
|
99
|
-
end
|
100
|
-
assert true
|
101
|
-
end
|
102
|
-
|
103
|
-
def test_foreach_hash
|
104
|
-
puts "== foreach (hash): beer.csv:"
|
105
|
-
CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer.csv" ) do |row|
|
106
|
-
pp row
|
107
|
-
pp row.fields
|
108
|
-
end
|
109
|
-
assert true
|
110
|
-
end
|
111
|
-
|
112
|
-
def test_foreach_hash11
|
113
|
-
puts "== foreach (hash): beer11.csv:"
|
114
|
-
CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
|
115
72
|
pp row
|
116
|
-
pp row.fields
|
117
73
|
end
|
118
74
|
assert true
|
119
75
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_reader_hash.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestHashReader < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
def test_read
|
14
|
+
puts "== read (hash): beer.csv:"
|
15
|
+
rows = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
|
16
|
+
pp rows
|
17
|
+
pp rows.to_a
|
18
|
+
|
19
|
+
rows.each do |row| ## note: will skip (NOT include) header row!!
|
20
|
+
pp row
|
21
|
+
end
|
22
|
+
puts " #{rows.size} rows" ## note: again will skip (NOT include) header row in count!!!
|
23
|
+
assert_equal 6, rows.size
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_read11
|
27
|
+
puts "== read (hash): beer11.csv:"
|
28
|
+
rows = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer11.csv" )
|
29
|
+
pp rows
|
30
|
+
pp rows.to_a ## note: includes header (first row with column names)
|
31
|
+
|
32
|
+
assert true
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def test_foreach
|
37
|
+
puts "== foreach (hash): beer.csv:"
|
38
|
+
CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer.csv" ) do |row|
|
39
|
+
pp row
|
40
|
+
end
|
41
|
+
assert true
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_foreach11
|
45
|
+
puts "== foreach (hash): beer11.csv:"
|
46
|
+
CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
|
47
|
+
pp row
|
48
|
+
end
|
49
|
+
assert true
|
50
|
+
end
|
51
|
+
|
52
|
+
end # class TestHashReader
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -55,13 +55,17 @@ files:
|
|
55
55
|
- README.md
|
56
56
|
- Rakefile
|
57
57
|
- lib/csvreader.rb
|
58
|
+
- lib/csvreader/buffer.rb
|
59
|
+
- lib/csvreader/parser.rb
|
58
60
|
- lib/csvreader/reader.rb
|
59
61
|
- lib/csvreader/version.rb
|
60
62
|
- test/data/beer.csv
|
61
63
|
- test/data/beer11.csv
|
62
64
|
- test/data/shakespeare.csv
|
63
65
|
- test/helper.rb
|
66
|
+
- test/test_parser.rb
|
64
67
|
- test/test_reader.rb
|
68
|
+
- test/test_reader_hash.rb
|
65
69
|
homepage: https://github.com/csv11/csvreader
|
66
70
|
licenses:
|
67
71
|
- Public Domain
|