csvreader 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a9bc6971bd638abc67e8e82e241dbb370602b0d5
4
- data.tar.gz: 062f2727188a6f3705c21a5cc825194f84bea41c
3
+ metadata.gz: ed373a97a0bdb4c45d2980894a32014cdcb8ca7c
4
+ data.tar.gz: 784adcade81e39ad9accd1a9b2d0c76fd666b6f9
5
5
  SHA512:
6
- metadata.gz: 595f1c779e0457377fe5c09602cba1ce7754803b35b9280e06dfc752759da441c708db830cb159076ce3f445b3b6aaf1fef459ff9eaace4ae6a436988e52455f
7
- data.tar.gz: f4dc02242912ba15bef498838093f85de22c3670b5bcb2b92139adbd9343cbddec753f755729f85b2962df26d724ff069cc73b8f5cccd3edb896dc0d3ac26969
6
+ metadata.gz: 5523a8697990c691f55aa7c3b23867104b1c4c5b8e9e25b0424a3191e73cbb32cee369541b712f60fc366ba76a8207a77d6b12b68ea209896b6c26e11c5712de
7
+ data.tar.gz: 7c33c812c2a53303911b6686d03554d6e388b3f936a3b6b8d995ed237651bd171d3bdb8ab8f38f7f327e1a9d1be26d1fa955918012f37cf6a9e1c2cc6ab08373
@@ -4,10 +4,14 @@ Manifest.txt
4
4
  README.md
5
5
  Rakefile
6
6
  lib/csvreader.rb
7
+ lib/csvreader/buffer.rb
8
+ lib/csvreader/parser.rb
7
9
  lib/csvreader/reader.rb
8
10
  lib/csvreader/version.rb
9
11
  test/data/beer.csv
10
12
  test/data/beer11.csv
11
13
  test/data/shakespeare.csv
12
14
  test/helper.rb
15
+ test/test_parser.rb
13
16
  test/test_reader.rb
17
+ test/test_reader_hash.rb
@@ -8,8 +8,9 @@ require 'pp'
8
8
  ###
9
9
  # our own code
10
10
  require 'csvreader/version' # let version always go first
11
+ require 'csvreader/buffer'
12
+ require 'csvreader/parser'
11
13
  require 'csvreader/reader'
12
14
 
13
15
 
14
-
15
16
  puts CsvReader.banner # say hello
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+
3
+ class CsvReader
4
+ class BufferIO ## todo: find a better name - why? why not? is really just for reading (keep io?)
5
+ def initialize( data )
6
+ # create the IO object we will read from
7
+ @io = data.is_a?(String) ? StringIO.new(data) : data
8
+ @buf = [] ## last (buffer) chars (used for peek)
9
+ end
10
+
11
+ def eof?() @buf.size == 0 && @io.eof?; end
12
+
13
+ def getc
14
+ if @buf.size > 0
15
+ @buf.shift ## get first char from buffer
16
+ else
17
+ @io.getc
18
+ end
19
+ end # method getc
20
+
21
+
22
+ def ungetc( c )
23
+ ## add upfront as first char in buffer
24
+ ## last in/first out queue!!!!
25
+ @buf.unshift( c )
26
+ ## puts "ungetc - >#{c} (#{c.ord})< => >#{@buf}<"
27
+ end
28
+
29
+
30
+ def peek
31
+ ## todo/fix:
32
+ ## use Hexadecimal code: 1A, U+001A for eof char - why? why not?
33
+ if @buf.size == 0 && @io.eof?
34
+ puts "peek - hitting eof!!!"
35
+ ## return eof char(s) - exits? is \0 ?? double check
36
+ return "\0"
37
+ end
38
+
39
+ if @buf.size == 0
40
+ c = @io.getc
41
+ @buf.push( c )
42
+ ## puts "peek - fill buffer >#{c}< (#{c.ord})"
43
+ end
44
+
45
+ @buf.first
46
+ end # method peek
47
+ end # class BufferIO
48
+ end # class CsvReader
@@ -0,0 +1,251 @@
1
+ # encoding: utf-8
2
+
3
+ class CsvReader
4
+ class Parser
5
+
6
+
7
+ ## char constants
8
+ DOUBLE_QUOTE = "\""
9
+ COMMENT = "#" ## use COMMENT_HASH or HASH or ??
10
+ SPACE = " "
11
+ TAB = "\t"
12
+ LF = "\n" ## 0A (hex) 10 (dec)
13
+ CR = "\r" ## 0D (hex) 13 (dec)
14
+
15
+
16
+ def self.parse( data )
17
+ puts "parse:"
18
+ pp data
19
+
20
+ parser = new
21
+ parser.parse( data )
22
+ end
23
+
24
+ def self.parse_line( data )
25
+ puts "parse_line:"
26
+
27
+ parser = new
28
+ records = parser.parse( data, limit: 1 )
29
+
30
+ ## unwrap record if empty return nil - why? why not?
31
+ ## return empty record e.g. [] - why? why not?
32
+ records.size == 0 ? nil : records.first
33
+ end
34
+
35
+
36
+
37
+ def self.read( path )
38
+ parser = new
39
+ File.open( path, 'r:bom|utf-8' ) do |file|
40
+ parser.parse( file )
41
+ end
42
+ end
43
+
44
+ def self.foreach( path, &block )
45
+ parser = new
46
+ File.open( path, 'r:bom|utf-8' ) do |file|
47
+ parser.foreach( file, &block )
48
+ end
49
+ end
50
+
51
+ def self.parse_lines( data, &block )
52
+ parser = new
53
+ parser.parse_lines( data, &block )
54
+ end
55
+
56
+
57
+
58
+
59
+
60
+ def parse_field( io, trim: true )
61
+ value = ""
62
+ value << parse_spaces( io ) ## add leading spaces
63
+
64
+ if (c=io.peek; c=="," || c==LF || c==CR || io.eof?) ## empty field
65
+ value = value.strip if trim ## strip all spaces
66
+ ## return value; do nothing
67
+ elsif io.peek == DOUBLE_QUOTE
68
+ puts "start double_quote field - value >#{value}<"
69
+ value = value.strip ## note always strip/trim leading spaces in quoted value
70
+
71
+ puts "start double_quote field - peek >#{io.peek}< (#{io.peek.ord})"
72
+ io.getc ## eat-up double_quote
73
+
74
+ loop do
75
+ while (c=io.peek; !(c==DOUBLE_QUOTE || io.eof?))
76
+ value << io.getc ## eat-up everything unit quote (")
77
+ end
78
+
79
+ break if io.eof?
80
+
81
+ io.getc ## eat-up double_quote
82
+
83
+ if io.peek == DOUBLE_QUOTE ## doubled up quote?
84
+ value << io.getc ## add doube quote and continue!!!!
85
+ else
86
+ break
87
+ end
88
+ end
89
+
90
+ ## note: always eat-up all trailing spaces (" ") and tabs (\t)
91
+ skip_spaces( io )
92
+ puts "end double_quote field - peek >#{io.peek}< (#{io.peek.ord})"
93
+ else
94
+ puts "start reg field - peek >#{io.peek}< (#{io.peek.ord})"
95
+ ## consume simple value
96
+ ## until we hit "," or "\n" or "\r"
97
+ ## note: will eat-up quotes too!!!
98
+ while (c=io.peek; !(c=="," || c==LF || c==CR || io.eof?))
99
+ puts " add char >#{io.peek}< (#{io.peek.ord})"
100
+ value << io.getc ## eat-up all spaces (" ") and tabs (\t)
101
+ end
102
+ value = value.strip if trim ## strip all spaces
103
+ puts "end reg field - peek >#{io.peek}< (#{io.peek.ord})"
104
+ end
105
+
106
+ value
107
+ end
108
+
109
+
110
+
111
+ def parse_record( io, trim: true )
112
+ values = []
113
+
114
+ loop do
115
+ value = parse_field( io, trim: trim )
116
+ puts "value: »#{value}«"
117
+ values << value
118
+
119
+ if io.eof?
120
+ break
121
+ elsif (c=io.peek; c==LF || c==CR)
122
+ skip_newlines( io )
123
+ break
124
+ elsif io.peek == ","
125
+ io.getc ## eat-up FS(,)
126
+ else
127
+ puts "*** csv parse error: found >#{io.peek} (#{io.peek.ord})< - FS (,) or RS (\\n) expected!!!!"
128
+ exit(1)
129
+ end
130
+ end
131
+
132
+ values
133
+ end
134
+
135
+
136
+ def skip_newlines( io )
137
+ return if io.eof?
138
+
139
+ while (c=io.peek; c==LF || c==CR)
140
+ io.getc ## eat-up all \n and \r
141
+ end
142
+ end
143
+
144
+
145
+ def skip_until_eol( io )
146
+ return if io.eof?
147
+
148
+ while (c=io.peek; !(c==LF || c==CR || io.eof?))
149
+ io.getc ## eat-up all until end of line
150
+ end
151
+ end
152
+
153
+ def skip_spaces( io )
154
+ return if io.eof?
155
+
156
+ while (c=io.peek; c==SPACE || c==TAB)
157
+ io.getc ## note: always eat-up all spaces (" ") and tabs (\t)
158
+ end
159
+ end
160
+
161
+
162
+
163
+
164
+ def parse_spaces( io ) ## helper method
165
+ spaces = ""
166
+ ## add leading spaces
167
+ while (c=io.peek; c==SPACE || c==TAB)
168
+ spaces << io.getc ## eat-up all spaces (" ") and tabs (\t)
169
+ end
170
+ spaces
171
+ end
172
+
173
+
174
+
175
+
176
+ def parse_lines( io_maybe, trim: true,
177
+ comments: true,
178
+ blanks: true, &block )
179
+
180
+ ## find a better name for io_maybe
181
+ ## make sure io is a wrapped into BufferIO!!!!!!
182
+ if io_maybe.is_a?( BufferIO ) ### allow (re)use of BufferIO if managed from "outside"
183
+ io = io_maybe
184
+ else
185
+ io = BufferIO.new( io_maybe )
186
+ end
187
+
188
+
189
+ loop do
190
+ break if io.eof?
191
+
192
+ ## hack: use own space buffer for peek( x ) lookahead (more than one char)
193
+ ## check for comments or blank lines
194
+ if comments || blanks
195
+ spaces = parse_spaces( io )
196
+ end
197
+
198
+ if comments && io.peek == COMMENT ## comment line
199
+ puts "skipping comment - peek >#{io.peek}< (#{io.peek.ord})"
200
+ skip_until_eol( io )
201
+ skip_newlines( io )
202
+ elsif blanks && (c=io.peek; c==LF || c==CR || io.eof?)
203
+ puts "skipping blank - peek >#{io.peek}< (#{io.peek.ord})"
204
+ skip_newlines( io )
205
+ else # undo (ungetc spaces)
206
+ puts "start record - peek >#{io.peek}< (#{io.peek.ord})"
207
+
208
+ if comments || blanks
209
+ ## note: MUST ungetc in "reverse" order
210
+ ## ## buffer is last in/first out queue!!!!
211
+ spaces.reverse.each_char { |space| io.ungetc( space ) }
212
+ end
213
+
214
+ record = parse_record( io, trim: trim )
215
+
216
+ ## note: requires block - enforce? how? why? why not?
217
+ block.call( record ) ## yield( record )
218
+ end
219
+ end # loop
220
+ end # method parse_lines
221
+
222
+
223
+
224
+
225
+ def parse( io_maybe, trim: true,
226
+ comments: true,
227
+ blanks: true,
228
+ limit: nil )
229
+ records = []
230
+
231
+ parse_lines( io_maybe, trim: trim, comments: comments, blanks: blanks ) do |record|
232
+ records << record
233
+
234
+ ## set limit to 1 for processing "single" line (that is, get one record)
235
+ return records if limit && limit >= records.size
236
+ end
237
+
238
+ records
239
+ end ## method parse
240
+
241
+
242
+ def foreach( io_maybe, trim: true,
243
+ comments: true,
244
+ blanks: true, &block )
245
+ parse_lines( io_maybe, trim: trim, comments: comments, blanks: blanks, &block )
246
+ end
247
+
248
+
249
+
250
+ end # class Parser
251
+ end # class CsvReader
@@ -3,9 +3,6 @@
3
3
 
4
4
  module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar - why? why not???
5
5
 
6
- ## STD_CSV_ENGINE = CSV ## to avoid name confusion use longer name - why? why not? find a better name?
7
- ## use __CSV__ or similar? or just ::CSV ??
8
-
9
6
 
10
7
  class Dialect ## todo: use a module - it's just a namespace/module now - why? why not?
11
8
  ###
@@ -36,52 +33,35 @@ end # class Dialect
36
33
 
37
34
  class Configuration
38
35
 
39
- puts "CSV::VERSION:"
40
- puts CSV::VERSION
41
-
42
- puts "builtin CSV::Converters:"
43
- pp CSV::Converters
44
-
45
- puts "CSV::DEFAULT_OPTIONS:"
46
- pp CSV::DEFAULT_OPTIONS
47
-
48
- ## register our own converters
49
- ## check if strip gets called for nil values too?
50
- CSV::Converters[:strip] = ->(field) { field.strip }
51
-
52
36
 
53
37
  attr_accessor :sep ## col_sep (column separator)
54
38
  attr_accessor :na ## not available (string or array of strings or nil) - rename to nas/nils/nulls - why? why not?
55
39
  attr_accessor :trim ### allow ltrim/rtrim/trim - why? why not?
40
+ attr_accessor :blanks
41
+ attr_accessor :comments
56
42
  attr_accessor :dialect
57
43
 
58
44
  def initialize
59
- @sep = ','
45
+ @sep = ','
46
+ @blanks = true
47
+ @comments = true
48
+ @trim = true
60
49
  ## note: do NOT add headers as global - should ALWAYS be explicit
61
50
  ## headers (true/false) - changes resultset and requires different processing!!!
62
51
 
63
52
  self ## return self for chaining
64
53
  end
65
54
 
66
- def trim?() @trim; end ## strip leading and trailing spaces
55
+ ## strip leading and trailing spaces
56
+ def trim?() @trim; end
67
57
 
68
- def blank?( line )
69
- ## note: blank line does NOT include "blank" with spaces only!!
70
- ## use BLANK_REGEX in skip_lines to clean-up/skip/remove/ignore
71
- ## see skip_blanks in default_options
72
- line.empty?
73
- end
58
+ ## skip blank lines (with only 1+ spaces)
59
+ ## note: for now blank lines with no spaces will always get skipped
60
+ def blanks?() @blanks; end
74
61
 
75
- ## lines starting with # (note: only leading spaces allowed)
76
- COMMENTS_REGEX = /^\s*#/
77
- BLANK_REGEX = /^\s*$/ ## skip all whitespace lines - note: use "" or , for a blank record!!!
78
- SKIP_REGEX = Regexp.union( COMMENTS_REGEX, BLANK_REGEX )
79
62
 
80
- def skip?( line )
81
- ## check if comment line - skip comments
82
- ## see skip_lines in default_options
83
- line =~ SKIP_REGEX
84
- end
63
+ def comments?() @comments; end
64
+
85
65
 
86
66
  ## built-in (default) options
87
67
  ## todo: find a better name?
@@ -99,9 +79,10 @@ end # class Dialect
99
79
  ## strip leading and trailing spaces
100
80
  ## NOTE/WARN: leading and trailing spaces NOT allowed/working with double quoted values!!!!
101
81
  defaults = {
102
- skip_blanks: true, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
103
- skip_lines: SKIP_REGEX,
104
- :converters => :strip
82
+ blanks: @blanks, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
83
+ comments: @comments,
84
+ trim: @trim
85
+ ## :converters => :strip
105
86
  }
106
87
  defaults
107
88
  end
@@ -136,47 +117,51 @@ class CsvReader
136
117
  converters: nil)
137
118
  ## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
138
119
  csv_options = Csv.config.default_options.merge(
139
- headers: false, ## note: always turn off headers!!!!!!
140
120
  col_sep: sep
141
121
  )
142
122
  ## pp csv_options
143
- CSV.parse_line( txt, csv_options )
123
+ Parser.parse_line( txt ) ##, csv_options )
144
124
  end
145
125
 
146
- def self.parse( txt, sep: Csv.config.sep, headers: false )
126
+
127
+ ##
128
+ ## todo/fix: "unify" parse and parse_lines !!!
129
+ ## check for block_given? - why? why not?
130
+
131
+ def self.parse( txt, sep: Csv.config.sep )
147
132
  csv_options = Csv.config.default_options.merge(
148
- headers: headers,
149
133
  col_sep: sep
150
134
  )
151
135
  ## pp csv_options
152
- CSV.parse( txt, csv_options )
136
+ Parser.parse( txt ) ###, csv_options )
153
137
  end
154
138
 
155
- def self.read( path, sep: Csv.config.sep, headers: false )
139
+ def self.parse_lines( txt, sep: Csv.config.sep, &block )
140
+ csv_options = Csv.config.default_options.merge(
141
+ col_sep: sep
142
+ )
143
+ ## pp csv_options
144
+ Parser.parse_lines( txt, &block ) ###, csv_options )
145
+ end
146
+
147
+ def self.read( path, sep: Csv.config.sep )
156
148
  ## note: use our own file.open
157
149
  ## always use utf-8 for now
158
150
  ## check/todo: add skip option bom too - why? why not?
159
- txt = File.open( path, 'r:bom|utf-8' )
160
- parse( txt, sep: sep, headers: headers )
151
+ txt = File.open( path, 'r:bom|utf-8' ).read
152
+ parse( txt, sep: sep )
161
153
  end
162
154
 
163
- def self.foreach( path, sep: Csv.config.sep, headers: false )
155
+
156
+ def self.foreach( path, sep: Csv.config.sep, &block )
164
157
  csv_options = Csv.config.default_options.merge(
165
- headers: headers,
166
- col_sep: sep,
167
- external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
158
+ col_sep: sep
168
159
  )
169
160
 
170
- ## todo/check/fix:
171
- ## can use bom e.g. 'bom|utf-8' - how?
172
- ## raises ArgumentError: unknown encoding name - bom|utf-8
173
-
174
-
175
- CSV.foreach( path, csv_options ) do |row|
176
- yield( row ) ## check/todo: use block.call( row ) ## why? why not?
177
- end
161
+ Parser.foreach( path, &block ) ###, csv_options )
178
162
  end
179
163
 
164
+
180
165
  def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
181
166
  # read first lines (only)
182
167
  # and parse with csv to get header from csv library itself
@@ -187,49 +172,64 @@ class CsvReader
187
172
  ## - NOT a comments line or
188
173
  ## - NOT a blank line
189
174
 
190
- lines = ''
191
- File.open( path, 'r:bom|utf-8' ) do |f|
192
-
193
- ## todo/fix: how to handle empty files or files without headers?!
194
-
195
- ## todo/check if readline includes \n\r too??
196
- ## yes! - line include \n e.g.
197
- ## "Brewery,City,Name,Abv\n" or
198
- ## "#######\n# try with some comments\n# and blank lines even before header\n\nBrewery,City,Name,Abv\n"
199
- loop do
200
- line = f.readline
201
- lines << line
202
- break unless Csv.config.skip?( line ) || Csv.config.blank?( line )
203
- end
204
- end
175
+ record = nil
176
+ File.open( path, 'r:bom|utf-8' ) do |file|
177
+ record = Parser.parse_line( file )
178
+ end
205
179
 
206
- ## puts "lines:"
207
- ## pp lines
208
-
209
- ## note: do NOT use headers: true to get "plain" data array (no hash records)
210
- ## hash record does NOT work for single line/row
211
- parse_line( lines, sep: sep )
180
+ record ## todo/fix: return nil for empty - why? why not?
212
181
  end # method self.header
213
182
 
214
183
  end # class CsvReader
215
184
 
216
185
 
217
186
 
187
+
218
188
  class CsvHashReader
219
189
 
220
- def self.parse( txt, sep: Csv.config.sep, headers: true )
221
- CsvReader.parse( txt, sep: sep, headers: headers )
190
+
191
+ def self.parse( txt, sep: Csv.config.sep, headers: nil )
192
+
193
+ ## pass in headers as array e.g. ['A', 'B', 'C']
194
+ names = headers ? headers : nil
195
+
196
+ records = []
197
+ CsvReader.parse_lines( txt ) do |values| # sep: sep
198
+ if names.nil?
199
+ names = values ## store header row / a.k.a. field/column names
200
+ else
201
+ record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
202
+ records << record
203
+ end
204
+ end
205
+ records
222
206
  end
223
207
 
224
- def self.read( path, sep: Csv.config.sep, headers: true )
225
- CsvReader.read( path, sep: sep, headers: headers )
208
+
209
+ def self.read( path, sep: Csv.config.sep, headers: nil )
210
+ txt = File.open( path, 'r:bom|utf-8' ).read
211
+ parse( txt, sep: sep, headers: headers )
226
212
  end
227
213
 
228
- def self.foreach( path, sep: Csv.config.sep, headers: true, &block )
229
- CsvReader.foreach( path, sep: sep, headers: headers, &block )
214
+
215
+ def self.foreach( path, sep: Csv.config.sep, headers: nil, &block )
216
+
217
+ ## pass in headers as array e.g. ['A', 'B', 'C']
218
+ names = headers ? headers : nil
219
+
220
+ CsvReader.foreach( path ) do |values| # sep: sep
221
+ if names.nil?
222
+ names = values ## store header row / a.k.a. field/column names
223
+ else
224
+ record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
225
+ block.call( record )
226
+ end
227
+ end
230
228
  end
231
229
 
230
+
232
231
  def self.header( path, sep: Csv.config.sep ) ## add header too? why? why not?
232
+ ## same as "classic" header method - delegate/reuse :-)
233
233
  CsvReader.header( path, sep: sep )
234
234
  end
235
235
 
@@ -4,7 +4,7 @@
4
4
  class CsvReader ## note: uses a class for now - change to module - why? why not?
5
5
 
6
6
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
7
- MINOR = 3
7
+ MINOR = 4
8
8
  PATCH = 0
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
@@ -0,0 +1,77 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestParser < MiniTest::Test
11
+
12
+
13
+ def test_parse1
14
+ records = [["a", "b", "c"],
15
+ ["1", "2", "3"],
16
+ ["4", "5", "6"]]
17
+
18
+ ## don't care about newlines (\r\n)
19
+ assert_equal records, CsvReader::Parser.parse( "a,b,c\n1,2,3\n4,5,6" )
20
+ assert_equal records, CsvReader::Parser.parse( "a,b,c\n1,2,3\n4,5,6\n" )
21
+ assert_equal records, CsvReader::Parser.parse( "a,b,c\r1,2,3\r4,5,6" )
22
+ assert_equal records, CsvReader::Parser.parse( "a,b,c\r\n1,2,3\r\n4,5,6\r\n" )
23
+
24
+ ## or leading and trailing spaces
25
+ assert_equal records, CsvReader::Parser.parse( " \n a , b , c \n 1,2 ,3 \n 4,5,6 " )
26
+ assert_equal records, CsvReader::Parser.parse( "\n\na, b,c \n 1, 2, 3\n 4, 5, 6" )
27
+ assert_equal records, CsvReader::Parser.parse( " \"a\" , b , \"c\" \n1, 2,\"3\" \n4,5, \"6\"" )
28
+ assert_equal records, CsvReader::Parser.parse( "a, b, c\n1, 2,3\n\n\n4,5,6\n\n\n" )
29
+ assert_equal records, CsvReader::Parser.parse( " a, b ,c \n 1 , 2 , 3 \n4,5,6 " )
30
+ end
31
+
32
+
33
+ def test_parse_quotes
34
+ records = [["a", "b", "c"],
35
+ ["11 \n 11", "\"2\"", "3"]]
36
+
37
+ assert_equal records, CsvReader::Parser.parse( " a, b ,c \n\"11 \n 11\", \"\"\"2\"\"\" , 3 \n" )
38
+ assert_equal records, CsvReader::Parser.parse( "\n\n \"a\", \"b\" ,\"c\" \n \"11 \n 11\" , \"\"\"2\"\"\" , 3 \n" )
39
+ end
40
+
41
+ def test_parse_empties
42
+ records = [["", "", ""]]
43
+
44
+ assert_equal records, CsvReader::Parser.parse( ",," )
45
+ assert_equal records, CsvReader::Parser.parse( <<TXT )
46
+ "","",""
47
+ TXT
48
+
49
+ assert_equal [], CsvReader::Parser.parse( "" )
50
+ end
51
+
52
+
53
+ def test_parse_comments
54
+ records = [["a", "b", "c"],
55
+ ["1", "2", "3"]]
56
+
57
+ assert_equal records, CsvReader::Parser.parse( <<TXT )
58
+ # comment
59
+ # comment
60
+ ## comment
61
+
62
+ a, b, c
63
+ 1, 2, 3
64
+
65
+ TXT
66
+
67
+ assert_equal records, CsvReader::Parser.parse( <<TXT )
68
+ a, b, c
69
+ 1, 2, 3
70
+
71
+ # comment
72
+ # comment
73
+ ## comment
74
+ TXT
75
+ end
76
+
77
+ end # class TestParser
@@ -12,43 +12,17 @@ class TestReader < MiniTest::Test
12
12
 
13
13
  def test_read
14
14
  puts "== read: beer.csv:"
15
- data = CsvReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
15
+ rows = CsvReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
16
+ pp rows
16
17
 
17
- pp data.class.name
18
- pp data
19
-
20
- data.each do |row|
21
- pp row
22
- end
23
- puts " #{data.size} rows"
24
- assert_equal 7, data.size ## note: include header row in count
25
- end
26
-
27
- def test_read_hash
28
- puts "== read (hash): beer.csv:"
29
- table = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer.csv" ) ## returns CSV::Table
30
-
31
- pp table.class.name
32
- pp table
33
- pp table.to_a ## note: includes header (first row with column names)
34
-
35
- table.each do |row| ## note: will skip (NOT include) header row!!
18
+ rows.each do |row|
36
19
  pp row
37
20
  end
38
- puts " #{table.size} rows" ## note: again will skip (NOT include) header row in count!!!
39
- assert_equal 6, table.size
21
+ puts " #{rows.size} rows"
22
+ assert_equal 7, rows.size ## note: include header row in count
40
23
  end
41
24
 
42
25
 
43
- def test_read_hash11
44
- puts "== read (hash): beer11.csv:"
45
- table = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer11.csv" )
46
- pp table
47
- pp table.to_a ## note: includes header (first row with column names)
48
-
49
- assert true
50
- end
51
-
52
26
 
53
27
  def test_parse_line
54
28
  puts "== parse_line:"
@@ -95,25 +69,7 @@ end
95
69
  def test_foreach
96
70
  puts "== foreach: beer11.csv:"
97
71
  CsvReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
98
- pp row ## note: is Array (no .fields available!!!!!)
99
- end
100
- assert true
101
- end
102
-
103
- def test_foreach_hash
104
- puts "== foreach (hash): beer.csv:"
105
- CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer.csv" ) do |row|
106
- pp row
107
- pp row.fields
108
- end
109
- assert true
110
- end
111
-
112
- def test_foreach_hash11
113
- puts "== foreach (hash): beer11.csv:"
114
- CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
115
72
  pp row
116
- pp row.fields
117
73
  end
118
74
  assert true
119
75
  end
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_reader_hash.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestHashReader < MiniTest::Test
11
+
12
+
13
+ def test_read
14
+ puts "== read (hash): beer.csv:"
15
+ rows = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
16
+ pp rows
17
+ pp rows.to_a
18
+
19
+ rows.each do |row| ## note: will skip (NOT include) header row!!
20
+ pp row
21
+ end
22
+ puts " #{rows.size} rows" ## note: again will skip (NOT include) header row in count!!!
23
+ assert_equal 6, rows.size
24
+ end
25
+
26
+ def test_read11
27
+ puts "== read (hash): beer11.csv:"
28
+ rows = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer11.csv" )
29
+ pp rows
30
+ pp rows.to_a ## note: includes header (first row with column names)
31
+
32
+ assert true
33
+ end
34
+
35
+
36
+ def test_foreach
37
+ puts "== foreach (hash): beer.csv:"
38
+ CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer.csv" ) do |row|
39
+ pp row
40
+ end
41
+ assert true
42
+ end
43
+
44
+ def test_foreach11
45
+ puts "== foreach (hash): beer11.csv:"
46
+ CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
47
+ pp row
48
+ end
49
+ assert true
50
+ end
51
+
52
+ end # class TestHashReader
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-20 00:00:00.000000000 Z
11
+ date: 2018-08-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc
@@ -55,13 +55,17 @@ files:
55
55
  - README.md
56
56
  - Rakefile
57
57
  - lib/csvreader.rb
58
+ - lib/csvreader/buffer.rb
59
+ - lib/csvreader/parser.rb
58
60
  - lib/csvreader/reader.rb
59
61
  - lib/csvreader/version.rb
60
62
  - test/data/beer.csv
61
63
  - test/data/beer11.csv
62
64
  - test/data/shakespeare.csv
63
65
  - test/helper.rb
66
+ - test/test_parser.rb
64
67
  - test/test_reader.rb
68
+ - test/test_reader_hash.rb
65
69
  homepage: https://github.com/csv11/csvreader
66
70
  licenses:
67
71
  - Public Domain