csvreader 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +4 -0
- data/lib/csvreader.rb +2 -1
- data/lib/csvreader/buffer.rb +48 -0
- data/lib/csvreader/parser.rb +251 -0
- data/lib/csvreader/reader.rb +83 -83
- data/lib/csvreader/version.rb +1 -1
- data/test/test_parser.rb +77 -0
- data/test/test_reader.rb +5 -49
- data/test/test_reader_hash.rb +52 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed373a97a0bdb4c45d2980894a32014cdcb8ca7c
|
4
|
+
data.tar.gz: 784adcade81e39ad9accd1a9b2d0c76fd666b6f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5523a8697990c691f55aa7c3b23867104b1c4c5b8e9e25b0424a3191e73cbb32cee369541b712f60fc366ba76a8207a77d6b12b68ea209896b6c26e11c5712de
|
7
|
+
data.tar.gz: 7c33c812c2a53303911b6686d03554d6e388b3f936a3b6b8d995ed237651bd171d3bdb8ab8f38f7f327e1a9d1be26d1fa955918012f37cf6a9e1c2cc6ab08373
|
data/Manifest.txt
CHANGED
@@ -4,10 +4,14 @@ Manifest.txt
|
|
4
4
|
README.md
|
5
5
|
Rakefile
|
6
6
|
lib/csvreader.rb
|
7
|
+
lib/csvreader/buffer.rb
|
8
|
+
lib/csvreader/parser.rb
|
7
9
|
lib/csvreader/reader.rb
|
8
10
|
lib/csvreader/version.rb
|
9
11
|
test/data/beer.csv
|
10
12
|
test/data/beer11.csv
|
11
13
|
test/data/shakespeare.csv
|
12
14
|
test/helper.rb
|
15
|
+
test/test_parser.rb
|
13
16
|
test/test_reader.rb
|
17
|
+
test/test_reader_hash.rb
|
data/lib/csvreader.rb
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvReader
|
4
|
+
class BufferIO ## todo: find a better name - why? why not? is really just for reading (keep io?)
|
5
|
+
def initialize( data )
|
6
|
+
# create the IO object we will read from
|
7
|
+
@io = data.is_a?(String) ? StringIO.new(data) : data
|
8
|
+
@buf = [] ## last (buffer) chars (used for peek)
|
9
|
+
end
|
10
|
+
|
11
|
+
def eof?() @buf.size == 0 && @io.eof?; end
|
12
|
+
|
13
|
+
def getc
|
14
|
+
if @buf.size > 0
|
15
|
+
@buf.shift ## get first char from buffer
|
16
|
+
else
|
17
|
+
@io.getc
|
18
|
+
end
|
19
|
+
end # method getc
|
20
|
+
|
21
|
+
|
22
|
+
def ungetc( c )
|
23
|
+
## add upfront as first char in buffer
|
24
|
+
## last in/first out queue!!!!
|
25
|
+
@buf.unshift( c )
|
26
|
+
## puts "ungetc - >#{c} (#{c.ord})< => >#{@buf}<"
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def peek
|
31
|
+
## todo/fix:
|
32
|
+
## use Hexadecimal code: 1A, U+001A for eof char - why? why not?
|
33
|
+
if @buf.size == 0 && @io.eof?
|
34
|
+
puts "peek - hitting eof!!!"
|
35
|
+
## return eof char(s) - exits? is \0 ?? double check
|
36
|
+
return "\0"
|
37
|
+
end
|
38
|
+
|
39
|
+
if @buf.size == 0
|
40
|
+
c = @io.getc
|
41
|
+
@buf.push( c )
|
42
|
+
## puts "peek - fill buffer >#{c}< (#{c.ord})"
|
43
|
+
end
|
44
|
+
|
45
|
+
@buf.first
|
46
|
+
end # method peek
|
47
|
+
end # class BufferIO
|
48
|
+
end # class CsvReader
|
@@ -0,0 +1,251 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvReader
|
4
|
+
class Parser
|
5
|
+
|
6
|
+
|
7
|
+
## char constants
|
8
|
+
DOUBLE_QUOTE = "\""
|
9
|
+
COMMENT = "#" ## use COMMENT_HASH or HASH or ??
|
10
|
+
SPACE = " "
|
11
|
+
TAB = "\t"
|
12
|
+
LF = "\n" ## 0A (hex) 10 (dec)
|
13
|
+
CR = "\r" ## 0D (hex) 13 (dec)
|
14
|
+
|
15
|
+
|
16
|
+
def self.parse( data )
|
17
|
+
puts "parse:"
|
18
|
+
pp data
|
19
|
+
|
20
|
+
parser = new
|
21
|
+
parser.parse( data )
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.parse_line( data )
|
25
|
+
puts "parse_line:"
|
26
|
+
|
27
|
+
parser = new
|
28
|
+
records = parser.parse( data, limit: 1 )
|
29
|
+
|
30
|
+
## unwrap record if empty return nil - why? why not?
|
31
|
+
## return empty record e.g. [] - why? why not?
|
32
|
+
records.size == 0 ? nil : records.first
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
def self.read( path )
|
38
|
+
parser = new
|
39
|
+
File.open( path, 'r:bom|utf-8' ) do |file|
|
40
|
+
parser.parse( file )
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.foreach( path, &block )
|
45
|
+
parser = new
|
46
|
+
File.open( path, 'r:bom|utf-8' ) do |file|
|
47
|
+
parser.foreach( file, &block )
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.parse_lines( data, &block )
|
52
|
+
parser = new
|
53
|
+
parser.parse_lines( data, &block )
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
def parse_field( io, trim: true )
|
61
|
+
value = ""
|
62
|
+
value << parse_spaces( io ) ## add leading spaces
|
63
|
+
|
64
|
+
if (c=io.peek; c=="," || c==LF || c==CR || io.eof?) ## empty field
|
65
|
+
value = value.strip if trim ## strip all spaces
|
66
|
+
## return value; do nothing
|
67
|
+
elsif io.peek == DOUBLE_QUOTE
|
68
|
+
puts "start double_quote field - value >#{value}<"
|
69
|
+
value = value.strip ## note always strip/trim leading spaces in quoted value
|
70
|
+
|
71
|
+
puts "start double_quote field - peek >#{io.peek}< (#{io.peek.ord})"
|
72
|
+
io.getc ## eat-up double_quote
|
73
|
+
|
74
|
+
loop do
|
75
|
+
while (c=io.peek; !(c==DOUBLE_QUOTE || io.eof?))
|
76
|
+
value << io.getc ## eat-up everything unit quote (")
|
77
|
+
end
|
78
|
+
|
79
|
+
break if io.eof?
|
80
|
+
|
81
|
+
io.getc ## eat-up double_quote
|
82
|
+
|
83
|
+
if io.peek == DOUBLE_QUOTE ## doubled up quote?
|
84
|
+
value << io.getc ## add doube quote and continue!!!!
|
85
|
+
else
|
86
|
+
break
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
## note: always eat-up all trailing spaces (" ") and tabs (\t)
|
91
|
+
skip_spaces( io )
|
92
|
+
puts "end double_quote field - peek >#{io.peek}< (#{io.peek.ord})"
|
93
|
+
else
|
94
|
+
puts "start reg field - peek >#{io.peek}< (#{io.peek.ord})"
|
95
|
+
## consume simple value
|
96
|
+
## until we hit "," or "\n" or "\r"
|
97
|
+
## note: will eat-up quotes too!!!
|
98
|
+
while (c=io.peek; !(c=="," || c==LF || c==CR || io.eof?))
|
99
|
+
puts " add char >#{io.peek}< (#{io.peek.ord})"
|
100
|
+
value << io.getc ## eat-up all spaces (" ") and tabs (\t)
|
101
|
+
end
|
102
|
+
value = value.strip if trim ## strip all spaces
|
103
|
+
puts "end reg field - peek >#{io.peek}< (#{io.peek.ord})"
|
104
|
+
end
|
105
|
+
|
106
|
+
value
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
def parse_record( io, trim: true )
|
112
|
+
values = []
|
113
|
+
|
114
|
+
loop do
|
115
|
+
value = parse_field( io, trim: trim )
|
116
|
+
puts "value: »#{value}«"
|
117
|
+
values << value
|
118
|
+
|
119
|
+
if io.eof?
|
120
|
+
break
|
121
|
+
elsif (c=io.peek; c==LF || c==CR)
|
122
|
+
skip_newlines( io )
|
123
|
+
break
|
124
|
+
elsif io.peek == ","
|
125
|
+
io.getc ## eat-up FS(,)
|
126
|
+
else
|
127
|
+
puts "*** csv parse error: found >#{io.peek} (#{io.peek.ord})< - FS (,) or RS (\\n) expected!!!!"
|
128
|
+
exit(1)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
values
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
def skip_newlines( io )
|
137
|
+
return if io.eof?
|
138
|
+
|
139
|
+
while (c=io.peek; c==LF || c==CR)
|
140
|
+
io.getc ## eat-up all \n and \r
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
def skip_until_eol( io )
|
146
|
+
return if io.eof?
|
147
|
+
|
148
|
+
while (c=io.peek; !(c==LF || c==CR || io.eof?))
|
149
|
+
io.getc ## eat-up all until end of line
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def skip_spaces( io )
|
154
|
+
return if io.eof?
|
155
|
+
|
156
|
+
while (c=io.peek; c==SPACE || c==TAB)
|
157
|
+
io.getc ## note: always eat-up all spaces (" ") and tabs (\t)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
def parse_spaces( io ) ## helper method
|
165
|
+
spaces = ""
|
166
|
+
## add leading spaces
|
167
|
+
while (c=io.peek; c==SPACE || c==TAB)
|
168
|
+
spaces << io.getc ## eat-up all spaces (" ") and tabs (\t)
|
169
|
+
end
|
170
|
+
spaces
|
171
|
+
end
|
172
|
+
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
def parse_lines( io_maybe, trim: true,
|
177
|
+
comments: true,
|
178
|
+
blanks: true, &block )
|
179
|
+
|
180
|
+
## find a better name for io_maybe
|
181
|
+
## make sure io is a wrapped into BufferIO!!!!!!
|
182
|
+
if io_maybe.is_a?( BufferIO ) ### allow (re)use of BufferIO if managed from "outside"
|
183
|
+
io = io_maybe
|
184
|
+
else
|
185
|
+
io = BufferIO.new( io_maybe )
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
loop do
|
190
|
+
break if io.eof?
|
191
|
+
|
192
|
+
## hack: use own space buffer for peek( x ) lookahead (more than one char)
|
193
|
+
## check for comments or blank lines
|
194
|
+
if comments || blanks
|
195
|
+
spaces = parse_spaces( io )
|
196
|
+
end
|
197
|
+
|
198
|
+
if comments && io.peek == COMMENT ## comment line
|
199
|
+
puts "skipping comment - peek >#{io.peek}< (#{io.peek.ord})"
|
200
|
+
skip_until_eol( io )
|
201
|
+
skip_newlines( io )
|
202
|
+
elsif blanks && (c=io.peek; c==LF || c==CR || io.eof?)
|
203
|
+
puts "skipping blank - peek >#{io.peek}< (#{io.peek.ord})"
|
204
|
+
skip_newlines( io )
|
205
|
+
else # undo (ungetc spaces)
|
206
|
+
puts "start record - peek >#{io.peek}< (#{io.peek.ord})"
|
207
|
+
|
208
|
+
if comments || blanks
|
209
|
+
## note: MUST ungetc in "reverse" order
|
210
|
+
## ## buffer is last in/first out queue!!!!
|
211
|
+
spaces.reverse.each_char { |space| io.ungetc( space ) }
|
212
|
+
end
|
213
|
+
|
214
|
+
record = parse_record( io, trim: trim )
|
215
|
+
|
216
|
+
## note: requires block - enforce? how? why? why not?
|
217
|
+
block.call( record ) ## yield( record )
|
218
|
+
end
|
219
|
+
end # loop
|
220
|
+
end # method parse_lines
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
|
225
|
+
def parse( io_maybe, trim: true,
|
226
|
+
comments: true,
|
227
|
+
blanks: true,
|
228
|
+
limit: nil )
|
229
|
+
records = []
|
230
|
+
|
231
|
+
parse_lines( io_maybe, trim: trim, comments: comments, blanks: blanks ) do |record|
|
232
|
+
records << record
|
233
|
+
|
234
|
+
## set limit to 1 for processing "single" line (that is, get one record)
|
235
|
+
return records if limit && limit >= records.size
|
236
|
+
end
|
237
|
+
|
238
|
+
records
|
239
|
+
end ## method parse
|
240
|
+
|
241
|
+
|
242
|
+
def foreach( io_maybe, trim: true,
|
243
|
+
comments: true,
|
244
|
+
blanks: true, &block )
|
245
|
+
parse_lines( io_maybe, trim: trim, comments: comments, blanks: blanks, &block )
|
246
|
+
end
|
247
|
+
|
248
|
+
|
249
|
+
|
250
|
+
end # class Parser
|
251
|
+
end # class CsvReader
|
data/lib/csvreader/reader.rb
CHANGED
@@ -3,9 +3,6 @@
|
|
3
3
|
|
4
4
|
module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar - why? why not???
|
5
5
|
|
6
|
-
## STD_CSV_ENGINE = CSV ## to avoid name confusion use longer name - why? why not? find a better name?
|
7
|
-
## use __CSV__ or similar? or just ::CSV ??
|
8
|
-
|
9
6
|
|
10
7
|
class Dialect ## todo: use a module - it's just a namespace/module now - why? why not?
|
11
8
|
###
|
@@ -36,52 +33,35 @@ end # class Dialect
|
|
36
33
|
|
37
34
|
class Configuration
|
38
35
|
|
39
|
-
puts "CSV::VERSION:"
|
40
|
-
puts CSV::VERSION
|
41
|
-
|
42
|
-
puts "builtin CSV::Converters:"
|
43
|
-
pp CSV::Converters
|
44
|
-
|
45
|
-
puts "CSV::DEFAULT_OPTIONS:"
|
46
|
-
pp CSV::DEFAULT_OPTIONS
|
47
|
-
|
48
|
-
## register our own converters
|
49
|
-
## check if strip gets called for nil values too?
|
50
|
-
CSV::Converters[:strip] = ->(field) { field.strip }
|
51
|
-
|
52
36
|
|
53
37
|
attr_accessor :sep ## col_sep (column separator)
|
54
38
|
attr_accessor :na ## not available (string or array of strings or nil) - rename to nas/nils/nulls - why? why not?
|
55
39
|
attr_accessor :trim ### allow ltrim/rtrim/trim - why? why not?
|
40
|
+
attr_accessor :blanks
|
41
|
+
attr_accessor :comments
|
56
42
|
attr_accessor :dialect
|
57
43
|
|
58
44
|
def initialize
|
59
|
-
@sep
|
45
|
+
@sep = ','
|
46
|
+
@blanks = true
|
47
|
+
@comments = true
|
48
|
+
@trim = true
|
60
49
|
## note: do NOT add headers as global - should ALWAYS be explicit
|
61
50
|
## headers (true/false) - changes resultset and requires different processing!!!
|
62
51
|
|
63
52
|
self ## return self for chaining
|
64
53
|
end
|
65
54
|
|
66
|
-
|
55
|
+
## strip leading and trailing spaces
|
56
|
+
def trim?() @trim; end
|
67
57
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
## see skip_blanks in default_options
|
72
|
-
line.empty?
|
73
|
-
end
|
58
|
+
## skip blank lines (with only 1+ spaces)
|
59
|
+
## note: for now blank lines with no spaces will always get skipped
|
60
|
+
def blanks?() @blanks; end
|
74
61
|
|
75
|
-
## lines starting with # (note: only leading spaces allowed)
|
76
|
-
COMMENTS_REGEX = /^\s*#/
|
77
|
-
BLANK_REGEX = /^\s*$/ ## skip all whitespace lines - note: use "" or , for a blank record!!!
|
78
|
-
SKIP_REGEX = Regexp.union( COMMENTS_REGEX, BLANK_REGEX )
|
79
62
|
|
80
|
-
def
|
81
|
-
|
82
|
-
## see skip_lines in default_options
|
83
|
-
line =~ SKIP_REGEX
|
84
|
-
end
|
63
|
+
def comments?() @comments; end
|
64
|
+
|
85
65
|
|
86
66
|
## built-in (default) options
|
87
67
|
## todo: find a better name?
|
@@ -99,9 +79,10 @@ end # class Dialect
|
|
99
79
|
## strip leading and trailing spaces
|
100
80
|
## NOTE/WARN: leading and trailing spaces NOT allowed/working with double quoted values!!!!
|
101
81
|
defaults = {
|
102
|
-
|
103
|
-
|
104
|
-
:
|
82
|
+
blanks: @blanks, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
|
83
|
+
comments: @comments,
|
84
|
+
trim: @trim
|
85
|
+
## :converters => :strip
|
105
86
|
}
|
106
87
|
defaults
|
107
88
|
end
|
@@ -136,47 +117,51 @@ class CsvReader
|
|
136
117
|
converters: nil)
|
137
118
|
## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
|
138
119
|
csv_options = Csv.config.default_options.merge(
|
139
|
-
headers: false, ## note: always turn off headers!!!!!!
|
140
120
|
col_sep: sep
|
141
121
|
)
|
142
122
|
## pp csv_options
|
143
|
-
|
123
|
+
Parser.parse_line( txt ) ##, csv_options )
|
144
124
|
end
|
145
125
|
|
146
|
-
|
126
|
+
|
127
|
+
##
|
128
|
+
## todo/fix: "unify" parse and parse_lines !!!
|
129
|
+
## check for block_given? - why? why not?
|
130
|
+
|
131
|
+
def self.parse( txt, sep: Csv.config.sep )
|
147
132
|
csv_options = Csv.config.default_options.merge(
|
148
|
-
headers: headers,
|
149
133
|
col_sep: sep
|
150
134
|
)
|
151
135
|
## pp csv_options
|
152
|
-
|
136
|
+
Parser.parse( txt ) ###, csv_options )
|
153
137
|
end
|
154
138
|
|
155
|
-
def self.
|
139
|
+
def self.parse_lines( txt, sep: Csv.config.sep, &block )
|
140
|
+
csv_options = Csv.config.default_options.merge(
|
141
|
+
col_sep: sep
|
142
|
+
)
|
143
|
+
## pp csv_options
|
144
|
+
Parser.parse_lines( txt, &block ) ###, csv_options )
|
145
|
+
end
|
146
|
+
|
147
|
+
def self.read( path, sep: Csv.config.sep )
|
156
148
|
## note: use our own file.open
|
157
149
|
## always use utf-8 for now
|
158
150
|
## check/todo: add skip option bom too - why? why not?
|
159
|
-
txt = File.open( path, 'r:bom|utf-8' )
|
160
|
-
parse( txt, sep: sep
|
151
|
+
txt = File.open( path, 'r:bom|utf-8' ).read
|
152
|
+
parse( txt, sep: sep )
|
161
153
|
end
|
162
154
|
|
163
|
-
|
155
|
+
|
156
|
+
def self.foreach( path, sep: Csv.config.sep, &block )
|
164
157
|
csv_options = Csv.config.default_options.merge(
|
165
|
-
|
166
|
-
col_sep: sep,
|
167
|
-
external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
|
158
|
+
col_sep: sep
|
168
159
|
)
|
169
160
|
|
170
|
-
|
171
|
-
## can use bom e.g. 'bom|utf-8' - how?
|
172
|
-
## raises ArgumentError: unknown encoding name - bom|utf-8
|
173
|
-
|
174
|
-
|
175
|
-
CSV.foreach( path, csv_options ) do |row|
|
176
|
-
yield( row ) ## check/todo: use block.call( row ) ## why? why not?
|
177
|
-
end
|
161
|
+
Parser.foreach( path, &block ) ###, csv_options )
|
178
162
|
end
|
179
163
|
|
164
|
+
|
180
165
|
def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
|
181
166
|
# read first lines (only)
|
182
167
|
# and parse with csv to get header from csv library itself
|
@@ -187,49 +172,64 @@ class CsvReader
|
|
187
172
|
## - NOT a comments line or
|
188
173
|
## - NOT a blank line
|
189
174
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
## todo/check if readline includes \n\r too??
|
196
|
-
## yes! - line include \n e.g.
|
197
|
-
## "Brewery,City,Name,Abv\n" or
|
198
|
-
## "#######\n# try with some comments\n# and blank lines even before header\n\nBrewery,City,Name,Abv\n"
|
199
|
-
loop do
|
200
|
-
line = f.readline
|
201
|
-
lines << line
|
202
|
-
break unless Csv.config.skip?( line ) || Csv.config.blank?( line )
|
203
|
-
end
|
204
|
-
end
|
175
|
+
record = nil
|
176
|
+
File.open( path, 'r:bom|utf-8' ) do |file|
|
177
|
+
record = Parser.parse_line( file )
|
178
|
+
end
|
205
179
|
|
206
|
-
|
207
|
-
## pp lines
|
208
|
-
|
209
|
-
## note: do NOT use headers: true to get "plain" data array (no hash records)
|
210
|
-
## hash record does NOT work for single line/row
|
211
|
-
parse_line( lines, sep: sep )
|
180
|
+
record ## todo/fix: return nil for empty - why? why not?
|
212
181
|
end # method self.header
|
213
182
|
|
214
183
|
end # class CsvReader
|
215
184
|
|
216
185
|
|
217
186
|
|
187
|
+
|
218
188
|
class CsvHashReader
|
219
189
|
|
220
|
-
|
221
|
-
|
190
|
+
|
191
|
+
def self.parse( txt, sep: Csv.config.sep, headers: nil )
|
192
|
+
|
193
|
+
## pass in headers as array e.g. ['A', 'B', 'C']
|
194
|
+
names = headers ? headers : nil
|
195
|
+
|
196
|
+
records = []
|
197
|
+
CsvReader.parse_lines( txt ) do |values| # sep: sep
|
198
|
+
if names.nil?
|
199
|
+
names = values ## store header row / a.k.a. field/column names
|
200
|
+
else
|
201
|
+
record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
|
202
|
+
records << record
|
203
|
+
end
|
204
|
+
end
|
205
|
+
records
|
222
206
|
end
|
223
207
|
|
224
|
-
|
225
|
-
|
208
|
+
|
209
|
+
def self.read( path, sep: Csv.config.sep, headers: nil )
|
210
|
+
txt = File.open( path, 'r:bom|utf-8' ).read
|
211
|
+
parse( txt, sep: sep, headers: headers )
|
226
212
|
end
|
227
213
|
|
228
|
-
|
229
|
-
|
214
|
+
|
215
|
+
def self.foreach( path, sep: Csv.config.sep, headers: nil, &block )
|
216
|
+
|
217
|
+
## pass in headers as array e.g. ['A', 'B', 'C']
|
218
|
+
names = headers ? headers : nil
|
219
|
+
|
220
|
+
CsvReader.foreach( path ) do |values| # sep: sep
|
221
|
+
if names.nil?
|
222
|
+
names = values ## store header row / a.k.a. field/column names
|
223
|
+
else
|
224
|
+
record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
|
225
|
+
block.call( record )
|
226
|
+
end
|
227
|
+
end
|
230
228
|
end
|
231
229
|
|
230
|
+
|
232
231
|
def self.header( path, sep: Csv.config.sep ) ## add header too? why? why not?
|
232
|
+
## same as "classic" header method - delegate/reuse :-)
|
233
233
|
CsvReader.header( path, sep: sep )
|
234
234
|
end
|
235
235
|
|
data/lib/csvreader/version.rb
CHANGED
data/test/test_parser.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestParser < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
def test_parse1
|
14
|
+
records = [["a", "b", "c"],
|
15
|
+
["1", "2", "3"],
|
16
|
+
["4", "5", "6"]]
|
17
|
+
|
18
|
+
## don't care about newlines (\r\n)
|
19
|
+
assert_equal records, CsvReader::Parser.parse( "a,b,c\n1,2,3\n4,5,6" )
|
20
|
+
assert_equal records, CsvReader::Parser.parse( "a,b,c\n1,2,3\n4,5,6\n" )
|
21
|
+
assert_equal records, CsvReader::Parser.parse( "a,b,c\r1,2,3\r4,5,6" )
|
22
|
+
assert_equal records, CsvReader::Parser.parse( "a,b,c\r\n1,2,3\r\n4,5,6\r\n" )
|
23
|
+
|
24
|
+
## or leading and trailing spaces
|
25
|
+
assert_equal records, CsvReader::Parser.parse( " \n a , b , c \n 1,2 ,3 \n 4,5,6 " )
|
26
|
+
assert_equal records, CsvReader::Parser.parse( "\n\na, b,c \n 1, 2, 3\n 4, 5, 6" )
|
27
|
+
assert_equal records, CsvReader::Parser.parse( " \"a\" , b , \"c\" \n1, 2,\"3\" \n4,5, \"6\"" )
|
28
|
+
assert_equal records, CsvReader::Parser.parse( "a, b, c\n1, 2,3\n\n\n4,5,6\n\n\n" )
|
29
|
+
assert_equal records, CsvReader::Parser.parse( " a, b ,c \n 1 , 2 , 3 \n4,5,6 " )
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def test_parse_quotes
|
34
|
+
records = [["a", "b", "c"],
|
35
|
+
["11 \n 11", "\"2\"", "3"]]
|
36
|
+
|
37
|
+
assert_equal records, CsvReader::Parser.parse( " a, b ,c \n\"11 \n 11\", \"\"\"2\"\"\" , 3 \n" )
|
38
|
+
assert_equal records, CsvReader::Parser.parse( "\n\n \"a\", \"b\" ,\"c\" \n \"11 \n 11\" , \"\"\"2\"\"\" , 3 \n" )
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_parse_empties
|
42
|
+
records = [["", "", ""]]
|
43
|
+
|
44
|
+
assert_equal records, CsvReader::Parser.parse( ",," )
|
45
|
+
assert_equal records, CsvReader::Parser.parse( <<TXT )
|
46
|
+
"","",""
|
47
|
+
TXT
|
48
|
+
|
49
|
+
assert_equal [], CsvReader::Parser.parse( "" )
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
def test_parse_comments
|
54
|
+
records = [["a", "b", "c"],
|
55
|
+
["1", "2", "3"]]
|
56
|
+
|
57
|
+
assert_equal records, CsvReader::Parser.parse( <<TXT )
|
58
|
+
# comment
|
59
|
+
# comment
|
60
|
+
## comment
|
61
|
+
|
62
|
+
a, b, c
|
63
|
+
1, 2, 3
|
64
|
+
|
65
|
+
TXT
|
66
|
+
|
67
|
+
assert_equal records, CsvReader::Parser.parse( <<TXT )
|
68
|
+
a, b, c
|
69
|
+
1, 2, 3
|
70
|
+
|
71
|
+
# comment
|
72
|
+
# comment
|
73
|
+
## comment
|
74
|
+
TXT
|
75
|
+
end
|
76
|
+
|
77
|
+
end # class TestParser
|
data/test/test_reader.rb
CHANGED
@@ -12,43 +12,17 @@ class TestReader < MiniTest::Test
|
|
12
12
|
|
13
13
|
def test_read
|
14
14
|
puts "== read: beer.csv:"
|
15
|
-
|
15
|
+
rows = CsvReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
|
16
|
+
pp rows
|
16
17
|
|
17
|
-
|
18
|
-
pp data
|
19
|
-
|
20
|
-
data.each do |row|
|
21
|
-
pp row
|
22
|
-
end
|
23
|
-
puts " #{data.size} rows"
|
24
|
-
assert_equal 7, data.size ## note: include header row in count
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_read_hash
|
28
|
-
puts "== read (hash): beer.csv:"
|
29
|
-
table = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer.csv" ) ## returns CSV::Table
|
30
|
-
|
31
|
-
pp table.class.name
|
32
|
-
pp table
|
33
|
-
pp table.to_a ## note: includes header (first row with column names)
|
34
|
-
|
35
|
-
table.each do |row| ## note: will skip (NOT include) header row!!
|
18
|
+
rows.each do |row|
|
36
19
|
pp row
|
37
20
|
end
|
38
|
-
puts " #{
|
39
|
-
assert_equal
|
21
|
+
puts " #{rows.size} rows"
|
22
|
+
assert_equal 7, rows.size ## note: include header row in count
|
40
23
|
end
|
41
24
|
|
42
25
|
|
43
|
-
def test_read_hash11
|
44
|
-
puts "== read (hash): beer11.csv:"
|
45
|
-
table = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer11.csv" )
|
46
|
-
pp table
|
47
|
-
pp table.to_a ## note: includes header (first row with column names)
|
48
|
-
|
49
|
-
assert true
|
50
|
-
end
|
51
|
-
|
52
26
|
|
53
27
|
def test_parse_line
|
54
28
|
puts "== parse_line:"
|
@@ -95,25 +69,7 @@ end
|
|
95
69
|
def test_foreach
|
96
70
|
puts "== foreach: beer11.csv:"
|
97
71
|
CsvReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
|
98
|
-
pp row ## note: is Array (no .fields available!!!!!)
|
99
|
-
end
|
100
|
-
assert true
|
101
|
-
end
|
102
|
-
|
103
|
-
def test_foreach_hash
|
104
|
-
puts "== foreach (hash): beer.csv:"
|
105
|
-
CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer.csv" ) do |row|
|
106
|
-
pp row
|
107
|
-
pp row.fields
|
108
|
-
end
|
109
|
-
assert true
|
110
|
-
end
|
111
|
-
|
112
|
-
def test_foreach_hash11
|
113
|
-
puts "== foreach (hash): beer11.csv:"
|
114
|
-
CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
|
115
72
|
pp row
|
116
|
-
pp row.fields
|
117
73
|
end
|
118
74
|
assert true
|
119
75
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_reader_hash.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestHashReader < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
def test_read
|
14
|
+
puts "== read (hash): beer.csv:"
|
15
|
+
rows = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
|
16
|
+
pp rows
|
17
|
+
pp rows.to_a
|
18
|
+
|
19
|
+
rows.each do |row| ## note: will skip (NOT include) header row!!
|
20
|
+
pp row
|
21
|
+
end
|
22
|
+
puts " #{rows.size} rows" ## note: again will skip (NOT include) header row in count!!!
|
23
|
+
assert_equal 6, rows.size
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_read11
|
27
|
+
puts "== read (hash): beer11.csv:"
|
28
|
+
rows = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer11.csv" )
|
29
|
+
pp rows
|
30
|
+
pp rows.to_a ## note: includes header (first row with column names)
|
31
|
+
|
32
|
+
assert true
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def test_foreach
|
37
|
+
puts "== foreach (hash): beer.csv:"
|
38
|
+
CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer.csv" ) do |row|
|
39
|
+
pp row
|
40
|
+
end
|
41
|
+
assert true
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_foreach11
|
45
|
+
puts "== foreach (hash): beer11.csv:"
|
46
|
+
CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
|
47
|
+
pp row
|
48
|
+
end
|
49
|
+
assert true
|
50
|
+
end
|
51
|
+
|
52
|
+
end # class TestHashReader
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -55,13 +55,17 @@ files:
|
|
55
55
|
- README.md
|
56
56
|
- Rakefile
|
57
57
|
- lib/csvreader.rb
|
58
|
+
- lib/csvreader/buffer.rb
|
59
|
+
- lib/csvreader/parser.rb
|
58
60
|
- lib/csvreader/reader.rb
|
59
61
|
- lib/csvreader/version.rb
|
60
62
|
- test/data/beer.csv
|
61
63
|
- test/data/beer11.csv
|
62
64
|
- test/data/shakespeare.csv
|
63
65
|
- test/helper.rb
|
66
|
+
- test/test_parser.rb
|
64
67
|
- test/test_reader.rb
|
68
|
+
- test/test_reader_hash.rb
|
65
69
|
homepage: https://github.com/csv11/csvreader
|
66
70
|
licenses:
|
67
71
|
- Public Domain
|