csvreader 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a9bc6971bd638abc67e8e82e241dbb370602b0d5
4
- data.tar.gz: 062f2727188a6f3705c21a5cc825194f84bea41c
3
+ metadata.gz: ed373a97a0bdb4c45d2980894a32014cdcb8ca7c
4
+ data.tar.gz: 784adcade81e39ad9accd1a9b2d0c76fd666b6f9
5
5
  SHA512:
6
- metadata.gz: 595f1c779e0457377fe5c09602cba1ce7754803b35b9280e06dfc752759da441c708db830cb159076ce3f445b3b6aaf1fef459ff9eaace4ae6a436988e52455f
7
- data.tar.gz: f4dc02242912ba15bef498838093f85de22c3670b5bcb2b92139adbd9343cbddec753f755729f85b2962df26d724ff069cc73b8f5cccd3edb896dc0d3ac26969
6
+ metadata.gz: 5523a8697990c691f55aa7c3b23867104b1c4c5b8e9e25b0424a3191e73cbb32cee369541b712f60fc366ba76a8207a77d6b12b68ea209896b6c26e11c5712de
7
+ data.tar.gz: 7c33c812c2a53303911b6686d03554d6e388b3f936a3b6b8d995ed237651bd171d3bdb8ab8f38f7f327e1a9d1be26d1fa955918012f37cf6a9e1c2cc6ab08373
@@ -4,10 +4,14 @@ Manifest.txt
4
4
  README.md
5
5
  Rakefile
6
6
  lib/csvreader.rb
7
+ lib/csvreader/buffer.rb
8
+ lib/csvreader/parser.rb
7
9
  lib/csvreader/reader.rb
8
10
  lib/csvreader/version.rb
9
11
  test/data/beer.csv
10
12
  test/data/beer11.csv
11
13
  test/data/shakespeare.csv
12
14
  test/helper.rb
15
+ test/test_parser.rb
13
16
  test/test_reader.rb
17
+ test/test_reader_hash.rb
@@ -8,8 +8,9 @@ require 'pp'
8
8
  ###
9
9
  # our own code
10
10
  require 'csvreader/version' # let version always go first
11
+ require 'csvreader/buffer'
12
+ require 'csvreader/parser'
11
13
  require 'csvreader/reader'
12
14
 
13
15
 
14
-
15
16
  puts CsvReader.banner # say hello
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+
3
+ class CsvReader
4
+ class BufferIO ## todo: find a better name - why? why not? is really just for reading (keep io?)
5
+ def initialize( data )
6
+ # create the IO object we will read from
7
+ @io = data.is_a?(String) ? StringIO.new(data) : data
8
+ @buf = [] ## last (buffer) chars (used for peek)
9
+ end
10
+
11
+ def eof?() @buf.size == 0 && @io.eof?; end
12
+
13
+ def getc
14
+ if @buf.size > 0
15
+ @buf.shift ## get first char from buffer
16
+ else
17
+ @io.getc
18
+ end
19
+ end # method getc
20
+
21
+
22
+ def ungetc( c )
23
+ ## add upfront as first char in buffer
24
+ ## last in/first out queue!!!!
25
+ @buf.unshift( c )
26
+ ## puts "ungetc - >#{c} (#{c.ord})< => >#{@buf}<"
27
+ end
28
+
29
+
30
+ def peek
31
+ ## todo/fix:
32
+ ## use Hexadecimal code: 1A, U+001A for eof char - why? why not?
33
+ if @buf.size == 0 && @io.eof?
34
+ puts "peek - hitting eof!!!"
35
+ ## return eof char(s) - exits? is \0 ?? double check
36
+ return "\0"
37
+ end
38
+
39
+ if @buf.size == 0
40
+ c = @io.getc
41
+ @buf.push( c )
42
+ ## puts "peek - fill buffer >#{c}< (#{c.ord})"
43
+ end
44
+
45
+ @buf.first
46
+ end # method peek
47
+ end # class BufferIO
48
+ end # class CsvReader
@@ -0,0 +1,251 @@
1
+ # encoding: utf-8
2
+
3
+ class CsvReader
4
+ class Parser
5
+
6
+
7
+ ## char constants
8
+ DOUBLE_QUOTE = "\""
9
+ COMMENT = "#" ## use COMMENT_HASH or HASH or ??
10
+ SPACE = " "
11
+ TAB = "\t"
12
+ LF = "\n" ## 0A (hex) 10 (dec)
13
+ CR = "\r" ## 0D (hex) 13 (dec)
14
+
15
+
16
+ def self.parse( data )
17
+ puts "parse:"
18
+ pp data
19
+
20
+ parser = new
21
+ parser.parse( data )
22
+ end
23
+
24
+ def self.parse_line( data )
25
+ puts "parse_line:"
26
+
27
+ parser = new
28
+ records = parser.parse( data, limit: 1 )
29
+
30
+ ## unwrap record if empty return nil - why? why not?
31
+ ## return empty record e.g. [] - why? why not?
32
+ records.size == 0 ? nil : records.first
33
+ end
34
+
35
+
36
+
37
+ def self.read( path )
38
+ parser = new
39
+ File.open( path, 'r:bom|utf-8' ) do |file|
40
+ parser.parse( file )
41
+ end
42
+ end
43
+
44
+ def self.foreach( path, &block )
45
+ parser = new
46
+ File.open( path, 'r:bom|utf-8' ) do |file|
47
+ parser.foreach( file, &block )
48
+ end
49
+ end
50
+
51
+ def self.parse_lines( data, &block )
52
+ parser = new
53
+ parser.parse_lines( data, &block )
54
+ end
55
+
56
+
57
+
58
+
59
+
60
+ def parse_field( io, trim: true )
61
+ value = ""
62
+ value << parse_spaces( io ) ## add leading spaces
63
+
64
+ if (c=io.peek; c=="," || c==LF || c==CR || io.eof?) ## empty field
65
+ value = value.strip if trim ## strip all spaces
66
+ ## return value; do nothing
67
+ elsif io.peek == DOUBLE_QUOTE
68
+ puts "start double_quote field - value >#{value}<"
69
+ value = value.strip ## note always strip/trim leading spaces in quoted value
70
+
71
+ puts "start double_quote field - peek >#{io.peek}< (#{io.peek.ord})"
72
+ io.getc ## eat-up double_quote
73
+
74
+ loop do
75
+ while (c=io.peek; !(c==DOUBLE_QUOTE || io.eof?))
76
+ value << io.getc ## eat-up everything unit quote (")
77
+ end
78
+
79
+ break if io.eof?
80
+
81
+ io.getc ## eat-up double_quote
82
+
83
+ if io.peek == DOUBLE_QUOTE ## doubled up quote?
84
+ value << io.getc ## add doube quote and continue!!!!
85
+ else
86
+ break
87
+ end
88
+ end
89
+
90
+ ## note: always eat-up all trailing spaces (" ") and tabs (\t)
91
+ skip_spaces( io )
92
+ puts "end double_quote field - peek >#{io.peek}< (#{io.peek.ord})"
93
+ else
94
+ puts "start reg field - peek >#{io.peek}< (#{io.peek.ord})"
95
+ ## consume simple value
96
+ ## until we hit "," or "\n" or "\r"
97
+ ## note: will eat-up quotes too!!!
98
+ while (c=io.peek; !(c=="," || c==LF || c==CR || io.eof?))
99
+ puts " add char >#{io.peek}< (#{io.peek.ord})"
100
+ value << io.getc ## eat-up all spaces (" ") and tabs (\t)
101
+ end
102
+ value = value.strip if trim ## strip all spaces
103
+ puts "end reg field - peek >#{io.peek}< (#{io.peek.ord})"
104
+ end
105
+
106
+ value
107
+ end
108
+
109
+
110
+
111
+ def parse_record( io, trim: true )
112
+ values = []
113
+
114
+ loop do
115
+ value = parse_field( io, trim: trim )
116
+ puts "value: »#{value}«"
117
+ values << value
118
+
119
+ if io.eof?
120
+ break
121
+ elsif (c=io.peek; c==LF || c==CR)
122
+ skip_newlines( io )
123
+ break
124
+ elsif io.peek == ","
125
+ io.getc ## eat-up FS(,)
126
+ else
127
+ puts "*** csv parse error: found >#{io.peek} (#{io.peek.ord})< - FS (,) or RS (\\n) expected!!!!"
128
+ exit(1)
129
+ end
130
+ end
131
+
132
+ values
133
+ end
134
+
135
+
136
+ def skip_newlines( io )
137
+ return if io.eof?
138
+
139
+ while (c=io.peek; c==LF || c==CR)
140
+ io.getc ## eat-up all \n and \r
141
+ end
142
+ end
143
+
144
+
145
+ def skip_until_eol( io )
146
+ return if io.eof?
147
+
148
+ while (c=io.peek; !(c==LF || c==CR || io.eof?))
149
+ io.getc ## eat-up all until end of line
150
+ end
151
+ end
152
+
153
+ def skip_spaces( io )
154
+ return if io.eof?
155
+
156
+ while (c=io.peek; c==SPACE || c==TAB)
157
+ io.getc ## note: always eat-up all spaces (" ") and tabs (\t)
158
+ end
159
+ end
160
+
161
+
162
+
163
+
164
+ def parse_spaces( io ) ## helper method
165
+ spaces = ""
166
+ ## add leading spaces
167
+ while (c=io.peek; c==SPACE || c==TAB)
168
+ spaces << io.getc ## eat-up all spaces (" ") and tabs (\t)
169
+ end
170
+ spaces
171
+ end
172
+
173
+
174
+
175
+
176
+ def parse_lines( io_maybe, trim: true,
177
+ comments: true,
178
+ blanks: true, &block )
179
+
180
+ ## find a better name for io_maybe
181
+ ## make sure io is a wrapped into BufferIO!!!!!!
182
+ if io_maybe.is_a?( BufferIO ) ### allow (re)use of BufferIO if managed from "outside"
183
+ io = io_maybe
184
+ else
185
+ io = BufferIO.new( io_maybe )
186
+ end
187
+
188
+
189
+ loop do
190
+ break if io.eof?
191
+
192
+ ## hack: use own space buffer for peek( x ) lookahead (more than one char)
193
+ ## check for comments or blank lines
194
+ if comments || blanks
195
+ spaces = parse_spaces( io )
196
+ end
197
+
198
+ if comments && io.peek == COMMENT ## comment line
199
+ puts "skipping comment - peek >#{io.peek}< (#{io.peek.ord})"
200
+ skip_until_eol( io )
201
+ skip_newlines( io )
202
+ elsif blanks && (c=io.peek; c==LF || c==CR || io.eof?)
203
+ puts "skipping blank - peek >#{io.peek}< (#{io.peek.ord})"
204
+ skip_newlines( io )
205
+ else # undo (ungetc spaces)
206
+ puts "start record - peek >#{io.peek}< (#{io.peek.ord})"
207
+
208
+ if comments || blanks
209
+ ## note: MUST ungetc in "reverse" order
210
+ ## ## buffer is last in/first out queue!!!!
211
+ spaces.reverse.each_char { |space| io.ungetc( space ) }
212
+ end
213
+
214
+ record = parse_record( io, trim: trim )
215
+
216
+ ## note: requires block - enforce? how? why? why not?
217
+ block.call( record ) ## yield( record )
218
+ end
219
+ end # loop
220
+ end # method parse_lines
221
+
222
+
223
+
224
+
225
+ def parse( io_maybe, trim: true,
226
+ comments: true,
227
+ blanks: true,
228
+ limit: nil )
229
+ records = []
230
+
231
+ parse_lines( io_maybe, trim: trim, comments: comments, blanks: blanks ) do |record|
232
+ records << record
233
+
234
+ ## set limit to 1 for processing "single" line (that is, get one record)
235
+ return records if limit && limit >= records.size
236
+ end
237
+
238
+ records
239
+ end ## method parse
240
+
241
+
242
+ def foreach( io_maybe, trim: true,
243
+ comments: true,
244
+ blanks: true, &block )
245
+ parse_lines( io_maybe, trim: trim, comments: comments, blanks: blanks, &block )
246
+ end
247
+
248
+
249
+
250
+ end # class Parser
251
+ end # class CsvReader
@@ -3,9 +3,6 @@
3
3
 
4
4
  module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar - why? why not???
5
5
 
6
- ## STD_CSV_ENGINE = CSV ## to avoid name confusion use longer name - why? why not? find a better name?
7
- ## use __CSV__ or similar? or just ::CSV ??
8
-
9
6
 
10
7
  class Dialect ## todo: use a module - it's just a namespace/module now - why? why not?
11
8
  ###
@@ -36,52 +33,35 @@ end # class Dialect
36
33
 
37
34
  class Configuration
38
35
 
39
- puts "CSV::VERSION:"
40
- puts CSV::VERSION
41
-
42
- puts "builtin CSV::Converters:"
43
- pp CSV::Converters
44
-
45
- puts "CSV::DEFAULT_OPTIONS:"
46
- pp CSV::DEFAULT_OPTIONS
47
-
48
- ## register our own converters
49
- ## check if strip gets called for nil values too?
50
- CSV::Converters[:strip] = ->(field) { field.strip }
51
-
52
36
 
53
37
  attr_accessor :sep ## col_sep (column separator)
54
38
  attr_accessor :na ## not available (string or array of strings or nil) - rename to nas/nils/nulls - why? why not?
55
39
  attr_accessor :trim ### allow ltrim/rtrim/trim - why? why not?
40
+ attr_accessor :blanks
41
+ attr_accessor :comments
56
42
  attr_accessor :dialect
57
43
 
58
44
  def initialize
59
- @sep = ','
45
+ @sep = ','
46
+ @blanks = true
47
+ @comments = true
48
+ @trim = true
60
49
  ## note: do NOT add headers as global - should ALWAYS be explicit
61
50
  ## headers (true/false) - changes resultset and requires different processing!!!
62
51
 
63
52
  self ## return self for chaining
64
53
  end
65
54
 
66
- def trim?() @trim; end ## strip leading and trailing spaces
55
+ ## strip leading and trailing spaces
56
+ def trim?() @trim; end
67
57
 
68
- def blank?( line )
69
- ## note: blank line does NOT include "blank" with spaces only!!
70
- ## use BLANK_REGEX in skip_lines to clean-up/skip/remove/ignore
71
- ## see skip_blanks in default_options
72
- line.empty?
73
- end
58
+ ## skip blank lines (with only 1+ spaces)
59
+ ## note: for now blank lines with no spaces will always get skipped
60
+ def blanks?() @blanks; end
74
61
 
75
- ## lines starting with # (note: only leading spaces allowed)
76
- COMMENTS_REGEX = /^\s*#/
77
- BLANK_REGEX = /^\s*$/ ## skip all whitespace lines - note: use "" or , for a blank record!!!
78
- SKIP_REGEX = Regexp.union( COMMENTS_REGEX, BLANK_REGEX )
79
62
 
80
- def skip?( line )
81
- ## check if comment line - skip comments
82
- ## see skip_lines in default_options
83
- line =~ SKIP_REGEX
84
- end
63
+ def comments?() @comments; end
64
+
85
65
 
86
66
  ## built-in (default) options
87
67
  ## todo: find a better name?
@@ -99,9 +79,10 @@ end # class Dialect
99
79
  ## strip leading and trailing spaces
100
80
  ## NOTE/WARN: leading and trailing spaces NOT allowed/working with double quoted values!!!!
101
81
  defaults = {
102
- skip_blanks: true, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
103
- skip_lines: SKIP_REGEX,
104
- :converters => :strip
82
+ blanks: @blanks, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
83
+ comments: @comments,
84
+ trim: @trim
85
+ ## :converters => :strip
105
86
  }
106
87
  defaults
107
88
  end
@@ -136,47 +117,51 @@ class CsvReader
136
117
  converters: nil)
137
118
  ## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
138
119
  csv_options = Csv.config.default_options.merge(
139
- headers: false, ## note: always turn off headers!!!!!!
140
120
  col_sep: sep
141
121
  )
142
122
  ## pp csv_options
143
- CSV.parse_line( txt, csv_options )
123
+ Parser.parse_line( txt ) ##, csv_options )
144
124
  end
145
125
 
146
- def self.parse( txt, sep: Csv.config.sep, headers: false )
126
+
127
+ ##
128
+ ## todo/fix: "unify" parse and parse_lines !!!
129
+ ## check for block_given? - why? why not?
130
+
131
+ def self.parse( txt, sep: Csv.config.sep )
147
132
  csv_options = Csv.config.default_options.merge(
148
- headers: headers,
149
133
  col_sep: sep
150
134
  )
151
135
  ## pp csv_options
152
- CSV.parse( txt, csv_options )
136
+ Parser.parse( txt ) ###, csv_options )
153
137
  end
154
138
 
155
- def self.read( path, sep: Csv.config.sep, headers: false )
139
+ def self.parse_lines( txt, sep: Csv.config.sep, &block )
140
+ csv_options = Csv.config.default_options.merge(
141
+ col_sep: sep
142
+ )
143
+ ## pp csv_options
144
+ Parser.parse_lines( txt, &block ) ###, csv_options )
145
+ end
146
+
147
+ def self.read( path, sep: Csv.config.sep )
156
148
  ## note: use our own file.open
157
149
  ## always use utf-8 for now
158
150
  ## check/todo: add skip option bom too - why? why not?
159
- txt = File.open( path, 'r:bom|utf-8' )
160
- parse( txt, sep: sep, headers: headers )
151
+ txt = File.open( path, 'r:bom|utf-8' ).read
152
+ parse( txt, sep: sep )
161
153
  end
162
154
 
163
- def self.foreach( path, sep: Csv.config.sep, headers: false )
155
+
156
+ def self.foreach( path, sep: Csv.config.sep, &block )
164
157
  csv_options = Csv.config.default_options.merge(
165
- headers: headers,
166
- col_sep: sep,
167
- external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
158
+ col_sep: sep
168
159
  )
169
160
 
170
- ## todo/check/fix:
171
- ## can use bom e.g. 'bom|utf-8' - how?
172
- ## raises ArgumentError: unknown encoding name - bom|utf-8
173
-
174
-
175
- CSV.foreach( path, csv_options ) do |row|
176
- yield( row ) ## check/todo: use block.call( row ) ## why? why not?
177
- end
161
+ Parser.foreach( path, &block ) ###, csv_options )
178
162
  end
179
163
 
164
+
180
165
  def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
181
166
  # read first lines (only)
182
167
  # and parse with csv to get header from csv library itself
@@ -187,49 +172,64 @@ class CsvReader
187
172
  ## - NOT a comments line or
188
173
  ## - NOT a blank line
189
174
 
190
- lines = ''
191
- File.open( path, 'r:bom|utf-8' ) do |f|
192
-
193
- ## todo/fix: how to handle empty files or files without headers?!
194
-
195
- ## todo/check if readline includes \n\r too??
196
- ## yes! - line include \n e.g.
197
- ## "Brewery,City,Name,Abv\n" or
198
- ## "#######\n# try with some comments\n# and blank lines even before header\n\nBrewery,City,Name,Abv\n"
199
- loop do
200
- line = f.readline
201
- lines << line
202
- break unless Csv.config.skip?( line ) || Csv.config.blank?( line )
203
- end
204
- end
175
+ record = nil
176
+ File.open( path, 'r:bom|utf-8' ) do |file|
177
+ record = Parser.parse_line( file )
178
+ end
205
179
 
206
- ## puts "lines:"
207
- ## pp lines
208
-
209
- ## note: do NOT use headers: true to get "plain" data array (no hash records)
210
- ## hash record does NOT work for single line/row
211
- parse_line( lines, sep: sep )
180
+ record ## todo/fix: return nil for empty - why? why not?
212
181
  end # method self.header
213
182
 
214
183
  end # class CsvReader
215
184
 
216
185
 
217
186
 
187
+
218
188
  class CsvHashReader
219
189
 
220
- def self.parse( txt, sep: Csv.config.sep, headers: true )
221
- CsvReader.parse( txt, sep: sep, headers: headers )
190
+
191
+ def self.parse( txt, sep: Csv.config.sep, headers: nil )
192
+
193
+ ## pass in headers as array e.g. ['A', 'B', 'C']
194
+ names = headers ? headers : nil
195
+
196
+ records = []
197
+ CsvReader.parse_lines( txt ) do |values| # sep: sep
198
+ if names.nil?
199
+ names = values ## store header row / a.k.a. field/column names
200
+ else
201
+ record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
202
+ records << record
203
+ end
204
+ end
205
+ records
222
206
  end
223
207
 
224
- def self.read( path, sep: Csv.config.sep, headers: true )
225
- CsvReader.read( path, sep: sep, headers: headers )
208
+
209
+ def self.read( path, sep: Csv.config.sep, headers: nil )
210
+ txt = File.open( path, 'r:bom|utf-8' ).read
211
+ parse( txt, sep: sep, headers: headers )
226
212
  end
227
213
 
228
- def self.foreach( path, sep: Csv.config.sep, headers: true, &block )
229
- CsvReader.foreach( path, sep: sep, headers: headers, &block )
214
+
215
+ def self.foreach( path, sep: Csv.config.sep, headers: nil, &block )
216
+
217
+ ## pass in headers as array e.g. ['A', 'B', 'C']
218
+ names = headers ? headers : nil
219
+
220
+ CsvReader.foreach( path ) do |values| # sep: sep
221
+ if names.nil?
222
+ names = values ## store header row / a.k.a. field/column names
223
+ else
224
+ record = names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
225
+ block.call( record )
226
+ end
227
+ end
230
228
  end
231
229
 
230
+
232
231
  def self.header( path, sep: Csv.config.sep ) ## add header too? why? why not?
232
+ ## same as "classic" header method - delegate/reuse :-)
233
233
  CsvReader.header( path, sep: sep )
234
234
  end
235
235
 
@@ -4,7 +4,7 @@
4
4
  class CsvReader ## note: uses a class for now - change to module - why? why not?
5
5
 
6
6
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
7
- MINOR = 3
7
+ MINOR = 4
8
8
  PATCH = 0
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
@@ -0,0 +1,77 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestParser < MiniTest::Test
11
+
12
+
13
+ def test_parse1
14
+ records = [["a", "b", "c"],
15
+ ["1", "2", "3"],
16
+ ["4", "5", "6"]]
17
+
18
+ ## don't care about newlines (\r\n)
19
+ assert_equal records, CsvReader::Parser.parse( "a,b,c\n1,2,3\n4,5,6" )
20
+ assert_equal records, CsvReader::Parser.parse( "a,b,c\n1,2,3\n4,5,6\n" )
21
+ assert_equal records, CsvReader::Parser.parse( "a,b,c\r1,2,3\r4,5,6" )
22
+ assert_equal records, CsvReader::Parser.parse( "a,b,c\r\n1,2,3\r\n4,5,6\r\n" )
23
+
24
+ ## or leading and trailing spaces
25
+ assert_equal records, CsvReader::Parser.parse( " \n a , b , c \n 1,2 ,3 \n 4,5,6 " )
26
+ assert_equal records, CsvReader::Parser.parse( "\n\na, b,c \n 1, 2, 3\n 4, 5, 6" )
27
+ assert_equal records, CsvReader::Parser.parse( " \"a\" , b , \"c\" \n1, 2,\"3\" \n4,5, \"6\"" )
28
+ assert_equal records, CsvReader::Parser.parse( "a, b, c\n1, 2,3\n\n\n4,5,6\n\n\n" )
29
+ assert_equal records, CsvReader::Parser.parse( " a, b ,c \n 1 , 2 , 3 \n4,5,6 " )
30
+ end
31
+
32
+
33
+ def test_parse_quotes
34
+ records = [["a", "b", "c"],
35
+ ["11 \n 11", "\"2\"", "3"]]
36
+
37
+ assert_equal records, CsvReader::Parser.parse( " a, b ,c \n\"11 \n 11\", \"\"\"2\"\"\" , 3 \n" )
38
+ assert_equal records, CsvReader::Parser.parse( "\n\n \"a\", \"b\" ,\"c\" \n \"11 \n 11\" , \"\"\"2\"\"\" , 3 \n" )
39
+ end
40
+
41
+ def test_parse_empties
42
+ records = [["", "", ""]]
43
+
44
+ assert_equal records, CsvReader::Parser.parse( ",," )
45
+ assert_equal records, CsvReader::Parser.parse( <<TXT )
46
+ "","",""
47
+ TXT
48
+
49
+ assert_equal [], CsvReader::Parser.parse( "" )
50
+ end
51
+
52
+
53
+ def test_parse_comments
54
+ records = [["a", "b", "c"],
55
+ ["1", "2", "3"]]
56
+
57
+ assert_equal records, CsvReader::Parser.parse( <<TXT )
58
+ # comment
59
+ # comment
60
+ ## comment
61
+
62
+ a, b, c
63
+ 1, 2, 3
64
+
65
+ TXT
66
+
67
+ assert_equal records, CsvReader::Parser.parse( <<TXT )
68
+ a, b, c
69
+ 1, 2, 3
70
+
71
+ # comment
72
+ # comment
73
+ ## comment
74
+ TXT
75
+ end
76
+
77
+ end # class TestParser
@@ -12,43 +12,17 @@ class TestReader < MiniTest::Test
12
12
 
13
13
  def test_read
14
14
  puts "== read: beer.csv:"
15
- data = CsvReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
15
+ rows = CsvReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
16
+ pp rows
16
17
 
17
- pp data.class.name
18
- pp data
19
-
20
- data.each do |row|
21
- pp row
22
- end
23
- puts " #{data.size} rows"
24
- assert_equal 7, data.size ## note: include header row in count
25
- end
26
-
27
- def test_read_hash
28
- puts "== read (hash): beer.csv:"
29
- table = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer.csv" ) ## returns CSV::Table
30
-
31
- pp table.class.name
32
- pp table
33
- pp table.to_a ## note: includes header (first row with column names)
34
-
35
- table.each do |row| ## note: will skip (NOT include) header row!!
18
+ rows.each do |row|
36
19
  pp row
37
20
  end
38
- puts " #{table.size} rows" ## note: again will skip (NOT include) header row in count!!!
39
- assert_equal 6, table.size
21
+ puts " #{rows.size} rows"
22
+ assert_equal 7, rows.size ## note: include header row in count
40
23
  end
41
24
 
42
25
 
43
- def test_read_hash11
44
- puts "== read (hash): beer11.csv:"
45
- table = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer11.csv" )
46
- pp table
47
- pp table.to_a ## note: includes header (first row with column names)
48
-
49
- assert true
50
- end
51
-
52
26
 
53
27
  def test_parse_line
54
28
  puts "== parse_line:"
@@ -95,25 +69,7 @@ end
95
69
  def test_foreach
96
70
  puts "== foreach: beer11.csv:"
97
71
  CsvReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
98
- pp row ## note: is Array (no .fields available!!!!!)
99
- end
100
- assert true
101
- end
102
-
103
- def test_foreach_hash
104
- puts "== foreach (hash): beer.csv:"
105
- CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer.csv" ) do |row|
106
- pp row
107
- pp row.fields
108
- end
109
- assert true
110
- end
111
-
112
- def test_foreach_hash11
113
- puts "== foreach (hash): beer11.csv:"
114
- CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
115
72
  pp row
116
- pp row.fields
117
73
  end
118
74
  assert true
119
75
  end
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_reader_hash.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestHashReader < MiniTest::Test
11
+
12
+
13
+ def test_read
14
+ puts "== read (hash): beer.csv:"
15
+ rows = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
16
+ pp rows
17
+ pp rows.to_a
18
+
19
+ rows.each do |row| ## note: will skip (NOT include) header row!!
20
+ pp row
21
+ end
22
+ puts " #{rows.size} rows" ## note: again will skip (NOT include) header row in count!!!
23
+ assert_equal 6, rows.size
24
+ end
25
+
26
+ def test_read11
27
+ puts "== read (hash): beer11.csv:"
28
+ rows = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer11.csv" )
29
+ pp rows
30
+ pp rows.to_a ## note: includes header (first row with column names)
31
+
32
+ assert true
33
+ end
34
+
35
+
36
+ def test_foreach
37
+ puts "== foreach (hash): beer.csv:"
38
+ CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer.csv" ) do |row|
39
+ pp row
40
+ end
41
+ assert true
42
+ end
43
+
44
+ def test_foreach11
45
+ puts "== foreach (hash): beer11.csv:"
46
+ CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
47
+ pp row
48
+ end
49
+ assert true
50
+ end
51
+
52
+ end # class TestHashReader
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-20 00:00:00.000000000 Z
11
+ date: 2018-08-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc
@@ -55,13 +55,17 @@ files:
55
55
  - README.md
56
56
  - Rakefile
57
57
  - lib/csvreader.rb
58
+ - lib/csvreader/buffer.rb
59
+ - lib/csvreader/parser.rb
58
60
  - lib/csvreader/reader.rb
59
61
  - lib/csvreader/version.rb
60
62
  - test/data/beer.csv
61
63
  - test/data/beer11.csv
62
64
  - test/data/shakespeare.csv
63
65
  - test/helper.rb
66
+ - test/test_parser.rb
64
67
  - test/test_reader.rb
68
+ - test/test_reader_hash.rb
65
69
  homepage: https://github.com/csv11/csvreader
66
70
  licenses:
67
71
  - Public Domain