csvreader 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/README.md +47 -14
- data/Rakefile +3 -3
- data/lib/csvreader/base.rb +17 -2
- data/lib/csvreader/builder.rb +28 -14
- data/lib/csvreader/parser_fixed.rb +90 -0
- data/lib/csvreader/parser_json.rb +4 -1
- data/lib/csvreader/reader.rb +19 -10
- data/lib/csvreader/reader_hash.rb +18 -3
- data/lib/csvreader/version.rb +2 -2
- data/test/helper.rb +1 -0
- data/test/test_parser_fixed.rb +85 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7c45afa79ca462aef1a8a9f1f6da26b7b08e125
|
4
|
+
data.tar.gz: b1a20a1acf5e88331900169f5d3542fb4f866f58
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7cf456f8e886e687ade020c70830332c1b2c3bdb1d14a148ed9a01ce2d7d1e9acffab2a3435907641fad220830f3eee39c20d8ca645e096bae5fe0bb2184a075
|
7
|
+
data.tar.gz: a887567bd67a3080d99adb2a609d8e79a5385b83abe1109f1f4d24a1ebd5fb9536e38701e8fefe5020c16753f674d45b673d00b6300b3422d713a630c3dbf0d6
|
data/Manifest.txt
CHANGED
@@ -9,6 +9,7 @@ lib/csvreader/buffer.rb
|
|
9
9
|
lib/csvreader/builder.rb
|
10
10
|
lib/csvreader/converter.rb
|
11
11
|
lib/csvreader/parser.rb
|
12
|
+
lib/csvreader/parser_fixed.rb
|
12
13
|
lib/csvreader/parser_json.rb
|
13
14
|
lib/csvreader/parser_std.rb
|
14
15
|
lib/csvreader/parser_strict.rb
|
@@ -26,6 +27,7 @@ test/helper.rb
|
|
26
27
|
test/test_buffer.rb
|
27
28
|
test/test_converter.rb
|
28
29
|
test/test_parser.rb
|
30
|
+
test/test_parser_fixed.rb
|
29
31
|
test/test_parser_formats.rb
|
30
32
|
test/test_parser_java.rb
|
31
33
|
test/test_parser_meta.rb
|
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# csvreader - read tabular data in the comma-separated values (csv) format the right way (uses best practices out-of-the-box with zero-configuration)
|
2
2
|
|
3
3
|
|
4
|
-
* home :: [github.com/
|
5
|
-
* bugs :: [github.com/
|
4
|
+
* home :: [github.com/csvreader/csvreader](https://github.com/csvreader/csvreader)
|
5
|
+
* bugs :: [github.com/csvreader/csvreader/issues](https://github.com/csvreader/csvreader/issues)
|
6
6
|
* gem :: [rubygems.org/gems/csvreader](https://rubygems.org/gems/csvreader)
|
7
7
|
* rdoc :: [rubydoc.info/gems/csvreader](http://rubydoc.info/gems/csvreader)
|
8
8
|
* forum :: [wwwmake](http://groups.google.com/group/wwwmake)
|
@@ -13,7 +13,7 @@
|
|
13
13
|
|
14
14
|
|
15
15
|
``` ruby
|
16
|
-
txt
|
16
|
+
txt = <<TXT
|
17
17
|
1,2,3
|
18
18
|
4,5,6
|
19
19
|
TXT
|
@@ -46,7 +46,7 @@ Use the converters keyword option to (auto-)convert strings to nulls, booleans,
|
|
46
46
|
Example:
|
47
47
|
|
48
48
|
``` ruby
|
49
|
-
txt
|
49
|
+
txt = <<TXT
|
50
50
|
1,2,3
|
51
51
|
true,false,null
|
52
52
|
TXT
|
@@ -79,7 +79,7 @@ Csv.parse( 'Ruby, 2020-03-01, 100', converters: [->(v) { Time.parse(v) rescue v
|
|
79
79
|
#=> [["Ruby", 2020-03-01 00:00:00 +0200, "100"]]
|
80
80
|
```
|
81
81
|
|
82
|
-
A custom converter is a method that gets the value passed in
|
82
|
+
A custom converter is a method that gets the value passed in
|
83
83
|
and if successful returns a non-string type (e.g. integer, float, date, etc.)
|
84
84
|
or a string (for further processing with all other converters in the "pipeline" configuration).
|
85
85
|
|
@@ -121,7 +121,7 @@ as an array) and you want your records as hashes instead of arrays of strings.
|
|
121
121
|
Example:
|
122
122
|
|
123
123
|
``` ruby
|
124
|
-
txt
|
124
|
+
txt = <<TXT
|
125
125
|
A,B,C
|
126
126
|
1,2,3
|
127
127
|
4,5,6
|
@@ -132,7 +132,7 @@ pp records
|
|
132
132
|
|
133
133
|
# -or-
|
134
134
|
|
135
|
-
txt2
|
135
|
+
txt2 = <<TXT
|
136
136
|
1,2,3
|
137
137
|
4,5,6
|
138
138
|
TXT
|
@@ -171,7 +171,7 @@ and replace spaces with underscores.
|
|
171
171
|
Example:
|
172
172
|
|
173
173
|
``` ruby
|
174
|
-
txt
|
174
|
+
txt = <<TXT
|
175
175
|
a,b,c
|
176
176
|
1,2,3
|
177
177
|
true,false,null
|
@@ -183,7 +183,7 @@ pp records
|
|
183
183
|
# {a: true, b: false, c: nil}]
|
184
184
|
|
185
185
|
# -or-
|
186
|
-
options = { :converters => :all,
|
186
|
+
options = { :converters => :all,
|
187
187
|
:header_converters => :symbol }
|
188
188
|
|
189
189
|
records = CsvHash.parse( txt, options )
|
@@ -203,7 +203,7 @@ Built-in header converters include:
|
|
203
203
|
|
204
204
|
### What about (typed) structs?
|
205
205
|
|
206
|
-
See the [csvrecord library »](https://github.com/
|
206
|
+
See the [csvrecord library »](https://github.com/csvreader/csvrecord)
|
207
207
|
|
208
208
|
Example from the csvrecord docu:
|
209
209
|
|
@@ -284,7 +284,7 @@ Hofbräu Oktoberfestbier (6.3%) by Staatliches Hofbräuhaus München, München
|
|
284
284
|
|
285
285
|
### What about tabular data packages with pre-defined types / schemas?
|
286
286
|
|
287
|
-
See the [csvpack library »](https://github.com/
|
287
|
+
See the [csvpack library »](https://github.com/csvreader/csvpack)
|
288
288
|
|
289
289
|
|
290
290
|
|
@@ -339,7 +339,7 @@ Use strict if you do NOT want to trim leading and trailing spaces
|
|
339
339
|
and if you do NOT want to skip blank lines. Example:
|
340
340
|
|
341
341
|
``` ruby
|
342
|
-
txt
|
342
|
+
txt = <<TXT
|
343
343
|
1, 2,3
|
344
344
|
4,5 ,6
|
345
345
|
|
@@ -402,7 +402,7 @@ Note: If you use tab (`\t`) use the `TabReader`
|
|
402
402
|
Why? Tab =! CSV. Yes, tab is
|
403
403
|
its own (even) simpler format
|
404
404
|
(e.g. no escape rules, no newlines in values, etc.),
|
405
|
-
see [`TabReader` »](https://github.com/
|
405
|
+
see [`TabReader` »](https://github.com/csvreader/tabreader).
|
406
406
|
|
407
407
|
``` ruby
|
408
408
|
Csv.tab.parse( ... ) # note: "classic" strict tab format
|
@@ -421,6 +421,39 @@ Csv.strict.read( ..., sep: "\t" )
|
|
421
421
|
|
422
422
|
|
423
423
|
|
424
|
+
|
425
|
+
### Q: How can I read records with fixed width (and no separator)?
|
426
|
+
|
427
|
+
Pass in the `width` keyword option with the field widths / lengths
|
428
|
+
to the "fixed" parser. Example:
|
429
|
+
|
430
|
+
``` ruby
|
431
|
+
txt = <<TXT
|
432
|
+
12345678123456781234567890123456789012345678901212345678901234
|
433
|
+
TXT
|
434
|
+
|
435
|
+
Csv.fixed.parse( txt, width: [8,8,32,14] )
|
436
|
+
# => [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
|
437
|
+
|
438
|
+
|
439
|
+
txt = <<TXT
|
440
|
+
John Smith john@example.com 1-888-555-6666
|
441
|
+
Michele O'Reileymichele@example.com 1-333-321-8765
|
442
|
+
TXT
|
443
|
+
|
444
|
+
Csv.fixed.parse( txt, width: [8,8,32,14] )
|
445
|
+
# => [["John", "Smith", "john@example.com", "1-888-555-6666"],
|
446
|
+
# ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
|
447
|
+
|
448
|
+
# and so on
|
449
|
+
```
|
450
|
+
|
451
|
+
Note: You can use for your convenience the built-in
|
452
|
+
`Csv.fix` or `Csv.f` aliases / shortcuts.
|
453
|
+
|
454
|
+
|
455
|
+
|
456
|
+
|
424
457
|
### Q: What's broken in the standard library CSV reader?
|
425
458
|
|
426
459
|
Two major design bugs and many many minor.
|
@@ -476,7 +509,7 @@ and some more.
|
|
476
509
|
|
477
510
|
## Alternatives
|
478
511
|
|
479
|
-
See the Libraries & Tools section in the [Awesome CSV](https://github.com/
|
512
|
+
See the Libraries & Tools section in the [Awesome CSV](https://github.com/csvspecs/awesome-csv#libraries--tools) page.
|
480
513
|
|
481
514
|
|
482
515
|
## License
|
data/Rakefile
CHANGED
@@ -8,19 +8,19 @@ Hoe.spec 'csvreader' do
|
|
8
8
|
self.summary = "csvreader - read tabular data in the comma-separated values (csv) format the right way (uses best practices out-of-the-box with zero-configuration)"
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls = ['https://github.com/
|
11
|
+
self.urls = ['https://github.com/csvreader/csvreader']
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
14
|
self.email = 'wwwmake@googlegroups.com'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
|
-
self.readme_file
|
17
|
+
self.readme_file = 'README.md'
|
18
18
|
self.history_file = 'HISTORY.md'
|
19
19
|
|
20
20
|
self.licenses = ['Public Domain']
|
21
21
|
|
22
22
|
self.spec_extras = {
|
23
|
-
|
23
|
+
required_ruby_version: '>= 2.2.2'
|
24
24
|
}
|
25
25
|
|
26
26
|
end
|
data/lib/csvreader/base.rb
CHANGED
@@ -17,6 +17,8 @@ require 'csvreader/buffer'
|
|
17
17
|
require 'csvreader/parser_std' # best practices pre-configured out-of-the-box
|
18
18
|
require 'csvreader/parser_strict' # flexible (strict - no leading/trailing space triming, blanks, etc.), configure for different formats/dialects
|
19
19
|
require 'csvreader/parser_tab'
|
20
|
+
require 'csvreader/parser_fixed'
|
21
|
+
require 'csvreader/parser_json'
|
20
22
|
require 'csvreader/parser'
|
21
23
|
require 'csvreader/converter'
|
22
24
|
require 'csvreader/reader'
|
@@ -62,6 +64,8 @@ class Parser
|
|
62
64
|
|
63
65
|
TAB = ParserTab.new
|
64
66
|
|
67
|
+
FIXED = ParserFixed.new
|
68
|
+
|
65
69
|
|
66
70
|
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
67
71
|
def self.numeric() NUMERIC; end
|
@@ -76,6 +80,9 @@ class Parser
|
|
76
80
|
def self.postgresql_text() POSTGRESQL_TEXT; end
|
77
81
|
def self.postgres_text() postgresql_text; end
|
78
82
|
def self.tab() TAB; end
|
83
|
+
def self.fixed() FIXED; end
|
84
|
+
def self.fix() fixed; end
|
85
|
+
def self.f() fixed; end
|
79
86
|
end # class Parser
|
80
87
|
end # class CsvReader
|
81
88
|
|
@@ -95,7 +102,8 @@ class CsvReader
|
|
95
102
|
POSTGRES_TEXT = POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
|
96
103
|
|
97
104
|
|
98
|
-
TAB
|
105
|
+
TAB = Builder.new( Parser::TAB )
|
106
|
+
FIXED = Builder.new( Parser::FIXED )
|
99
107
|
|
100
108
|
|
101
109
|
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
@@ -111,6 +119,9 @@ class CsvReader
|
|
111
119
|
def self.postgresql_text() POSTGRESQL_TEXT; end
|
112
120
|
def self.postgres_text() postgresql_text; end
|
113
121
|
def self.tab() TAB; end
|
122
|
+
def self.fixed() FIXED; end
|
123
|
+
def self.fix() fixed; end
|
124
|
+
def self.f() fixed; end
|
114
125
|
end # class CsvReader
|
115
126
|
|
116
127
|
|
@@ -129,7 +140,8 @@ class CsvHashReader
|
|
129
140
|
POSTGRES_TEXT = POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
|
130
141
|
|
131
142
|
|
132
|
-
TAB
|
143
|
+
TAB = Builder.new( Parser::TAB )
|
144
|
+
FIXED = Builder.new( Parser::FIXED )
|
133
145
|
|
134
146
|
|
135
147
|
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
@@ -145,6 +157,9 @@ class CsvHashReader
|
|
145
157
|
def self.postgresql_text() POSTGRESQL_TEXT; end
|
146
158
|
def self.postgres_text() postgresql_text; end
|
147
159
|
def self.tab() TAB; end
|
160
|
+
def self.fixed() FIXED; end
|
161
|
+
def self.fix() fixed; end
|
162
|
+
def self.f() fixed; end
|
148
163
|
end # class CsvHashReader
|
149
164
|
|
150
165
|
|
data/lib/csvreader/builder.rb
CHANGED
@@ -8,7 +8,11 @@ class Builder ## rename to CsvReaderBuilder - why? why not?
|
|
8
8
|
@parser = parser
|
9
9
|
end
|
10
10
|
|
11
|
-
|
11
|
+
|
12
|
+
## (auto-)forward to wrapped parser
|
13
|
+
## note/fix: not all parser use/have config e.g. ParserTab, ParserFixed, etc.
|
14
|
+
def config() @parser.config; end
|
15
|
+
|
12
16
|
|
13
17
|
## todo/fix:
|
14
18
|
## add parser config (attribute) setter e.g.
|
@@ -19,38 +23,40 @@ class Builder ## rename to CsvReaderBuilder - why? why not?
|
|
19
23
|
def open( path, mode=nil,
|
20
24
|
sep: nil,
|
21
25
|
converters: nil,
|
26
|
+
width: nil,
|
22
27
|
parser: @parser, &block )
|
23
28
|
CsvReader.open( path, mode,
|
24
|
-
sep: sep, converters: converters,
|
29
|
+
sep: sep, converters: converters, width: width,
|
25
30
|
parser: @parser, &block )
|
26
31
|
end
|
27
32
|
|
28
33
|
def read( path, sep: nil,
|
29
|
-
converters: nil
|
34
|
+
converters: nil,
|
35
|
+
width: nil )
|
30
36
|
CsvReader.read( path,
|
31
|
-
sep: sep, converters: converters,
|
37
|
+
sep: sep, converters: converters, width: width,
|
32
38
|
parser: @parser )
|
33
39
|
end
|
34
40
|
|
35
|
-
def header( path, sep: nil )
|
41
|
+
def header( path, sep: nil, width: nil )
|
36
42
|
CsvReader.header( path,
|
37
|
-
sep: sep,
|
43
|
+
sep: sep, width: width,
|
38
44
|
parser: @parser )
|
39
45
|
end
|
40
46
|
|
41
47
|
def foreach( path, sep: nil,
|
42
|
-
converters: nil, &block )
|
48
|
+
converters: nil, width: nil, &block )
|
43
49
|
CsvReader.foreach( path,
|
44
|
-
sep: sep, converters: converters,
|
50
|
+
sep: sep, converters: converters, width: width,
|
45
51
|
parser: @parser, &block )
|
46
52
|
end
|
47
53
|
|
48
54
|
|
49
55
|
|
50
56
|
def parse( data, sep: nil,
|
51
|
-
converters: nil, &block )
|
57
|
+
converters: nil, width: nil, &block )
|
52
58
|
CsvReader.parse( data,
|
53
|
-
sep: sep, converters: converters,
|
59
|
+
sep: sep, converters: converters, width: width,
|
54
60
|
parser: @parser, &block )
|
55
61
|
end
|
56
62
|
end # class Builder
|
@@ -64,7 +70,9 @@ class Builder ## rename to CsvHashReaderBuilder - why? why not?
|
|
64
70
|
@parser = parser
|
65
71
|
end
|
66
72
|
|
67
|
-
|
73
|
+
## (auto-)forward to wrapped parser
|
74
|
+
## note/fix: not all parser use/have config e.g. ParserTab, ParserFixed, etc.
|
75
|
+
def config() @parser.config; end
|
68
76
|
|
69
77
|
## todo/fix:
|
70
78
|
## add parser config (attribute) setter e.g.
|
@@ -78,32 +86,37 @@ class Builder ## rename to CsvHashReaderBuilder - why? why not?
|
|
78
86
|
sep: nil,
|
79
87
|
converters: nil,
|
80
88
|
header_converters: nil,
|
89
|
+
width: nil,
|
81
90
|
parser: @parser, &block )
|
82
91
|
CsvHashReader.open( path, mode,
|
83
92
|
headers: headers, sep: sep, converters: converters,
|
84
93
|
header_converters: header_converters,
|
94
|
+
width: width,
|
85
95
|
parser: @parser, &block )
|
86
96
|
end
|
87
97
|
|
88
98
|
def read( path, headers: nil,
|
89
99
|
sep: nil,
|
90
100
|
converters: nil,
|
91
|
-
header_converters: nil
|
101
|
+
header_converters: nil,
|
102
|
+
width: nil )
|
92
103
|
CsvHashReader.read( path,
|
93
104
|
headers: headers,
|
94
105
|
sep: sep, converters: converters,
|
95
106
|
header_converters: header_converters,
|
107
|
+
width: width,
|
96
108
|
parser: @parser )
|
97
109
|
end
|
98
110
|
|
99
111
|
def foreach( path, headers: nil,
|
100
112
|
sep: nil,
|
101
113
|
converters: nil,
|
102
|
-
header_converters: nil, &block )
|
114
|
+
header_converters: nil, width: nil, &block )
|
103
115
|
CsvHashReader.foreach( path,
|
104
116
|
headers: headers,
|
105
117
|
sep: sep, converters: converters,
|
106
118
|
header_converters: header_converters,
|
119
|
+
width: width,
|
107
120
|
parser: @parser, &block )
|
108
121
|
end
|
109
122
|
|
@@ -111,11 +124,12 @@ class Builder ## rename to CsvHashReaderBuilder - why? why not?
|
|
111
124
|
def parse( data, headers: nil,
|
112
125
|
sep: nil,
|
113
126
|
converters: nil,
|
114
|
-
header_converters: nil, &block )
|
127
|
+
header_converters: nil, width: nil, &block )
|
115
128
|
CsvHashReader.parse( data,
|
116
129
|
headers: headers,
|
117
130
|
sep: sep, converters: converters,
|
118
131
|
header_converters: header_converters,
|
132
|
+
width: width,
|
119
133
|
parser: @parser, &block )
|
120
134
|
end
|
121
135
|
end # class Builder
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvReader
|
4
|
+
|
5
|
+
class ParserFixed
|
6
|
+
|
7
|
+
###################################
|
8
|
+
## add simple logger with debug flag/switch
|
9
|
+
#
|
10
|
+
# use Parser.debug = true # to turn on
|
11
|
+
#
|
12
|
+
# todo/fix: use logutils instead of std logger - why? why not?
|
13
|
+
|
14
|
+
def self.build_logger()
|
15
|
+
l = Logger.new( STDOUT )
|
16
|
+
l.level = :info ## set to :info on start; note: is 0 (debug) by default
|
17
|
+
l
|
18
|
+
end
|
19
|
+
def self.logger() @@logger ||= build_logger; end
|
20
|
+
def logger() self.class.logger; end
|
21
|
+
|
22
|
+
|
23
|
+
def parse( data, width:, &block )
|
24
|
+
|
25
|
+
## note: input: required each_line (string or io/file for example)
|
26
|
+
|
27
|
+
input = data ## assume it's a string or io/file handle
|
28
|
+
|
29
|
+
if block_given?
|
30
|
+
parse_lines( input, width: width, &block )
|
31
|
+
else
|
32
|
+
records = []
|
33
|
+
|
34
|
+
parse_lines( input, width: width ) do |record|
|
35
|
+
records << record
|
36
|
+
end
|
37
|
+
|
38
|
+
records
|
39
|
+
end
|
40
|
+
end ## method parse
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def parse_lines( input, width:, &block )
|
47
|
+
|
48
|
+
## note: each line only works with \n (windows) or \r\n (unix)
|
49
|
+
## will NOT work with \r (old mac, any others?) only!!!!
|
50
|
+
input.each_line do |line|
|
51
|
+
|
52
|
+
## note: chomp('') if is an empty string,
|
53
|
+
## it will remove all trailing newlines from the string.
|
54
|
+
## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
|
55
|
+
line = line.chomp( '' )
|
56
|
+
logger.debug "line:" if logger.debug?
|
57
|
+
logger.debug line.pretty_inspect if logger.debug?
|
58
|
+
|
59
|
+
|
60
|
+
## skip empty lines and comments
|
61
|
+
if line =~ /^[ \t]*$/ ## skip blank lines (with whitespace only)
|
62
|
+
logger.debug "skip blank line" if logger.debug?
|
63
|
+
next
|
64
|
+
end
|
65
|
+
|
66
|
+
if line =~ /^[ \t]*#/ # start_with?( "#" ) -- skip comment lines (note: allow leading whitespaces)
|
67
|
+
logger.debug "skip comment line" if logger.debug?
|
68
|
+
next
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
values = []
|
73
|
+
offset = 0 # start position / offset
|
74
|
+
width.each_with_index do |w,i|
|
75
|
+
logger.debug "[#{i}] start: #{offset}, width: #{w}" if logger.debug?
|
76
|
+
|
77
|
+
value = line[offset, w]
|
78
|
+
value = value.strip if value ## note: if not nil strip; only use rstrip (for trailing only) - why? why not?
|
79
|
+
values << value
|
80
|
+
offset += w
|
81
|
+
end
|
82
|
+
|
83
|
+
## note: requires block - enforce? how? why? why not?
|
84
|
+
block.call( values )
|
85
|
+
end
|
86
|
+
end # method parse_lines
|
87
|
+
|
88
|
+
|
89
|
+
end # class ParserFixed
|
90
|
+
end # class CsvReader
|
data/lib/csvreader/reader.rb
CHANGED
@@ -5,12 +5,14 @@ class CsvReader
|
|
5
5
|
def self.open( path, mode=nil,
|
6
6
|
sep: nil,
|
7
7
|
converters: nil,
|
8
|
+
width: nil,
|
8
9
|
parser: nil, &block ) ## rename path to filename or name - why? why not?
|
9
10
|
|
10
11
|
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
11
12
|
f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
12
13
|
csv = new(f, sep: sep,
|
13
14
|
converters: converters,
|
15
|
+
width: width,
|
14
16
|
parser: parser )
|
15
17
|
|
16
18
|
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
@@ -28,20 +30,22 @@ class CsvReader
|
|
28
30
|
|
29
31
|
def self.read( path, sep: nil,
|
30
32
|
converters: nil,
|
33
|
+
width: nil,
|
31
34
|
parser: nil )
|
32
35
|
open( path,
|
33
36
|
sep: sep,
|
34
37
|
converters: converters,
|
38
|
+
width: width,
|
35
39
|
parser: parser ) { |csv| csv.read }
|
36
40
|
end
|
37
41
|
|
38
42
|
|
39
|
-
def self.header( path, sep: nil, parser: nil ) ## use header or headers - or use both (with alias)?
|
43
|
+
def self.header( path, sep: nil, width: nil, parser: nil ) ## use header or headers - or use both (with alias)?
|
40
44
|
# read first lines (only)
|
41
45
|
# and parse with csv to get header from csv library itself
|
42
46
|
|
43
47
|
records = []
|
44
|
-
open( path, sep: sep, parser: parser ) do |csv|
|
48
|
+
open( path, sep: sep, width: width, parser: parser ) do |csv|
|
45
49
|
csv.each do |record|
|
46
50
|
records << record
|
47
51
|
break ## only parse/read first record
|
@@ -56,8 +60,8 @@ class CsvReader
|
|
56
60
|
|
57
61
|
|
58
62
|
def self.foreach( path, sep: nil,
|
59
|
-
converters: nil, parser: nil, &block )
|
60
|
-
csv = open( path, sep: sep, converters: converters, parser: parser )
|
63
|
+
converters: nil, width: nil, parser: nil, &block )
|
64
|
+
csv = open( path, sep: sep, converters: converters, width: width, parser: parser )
|
61
65
|
|
62
66
|
if block_given?
|
63
67
|
begin
|
@@ -76,8 +80,9 @@ class CsvReader
|
|
76
80
|
|
77
81
|
def self.parse( data, sep: nil,
|
78
82
|
converters: nil,
|
83
|
+
width: nil,
|
79
84
|
parser: nil, &block )
|
80
|
-
csv = new( data, sep: sep, converters: converters, parser: parser )
|
85
|
+
csv = new( data, sep: sep, converters: converters, width: width, parser: parser )
|
81
86
|
|
82
87
|
if block_given?
|
83
88
|
csv.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
@@ -93,9 +98,9 @@ class CsvReader
|
|
93
98
|
## always use parse (do NOT/NOT/NOT use parse_line) - why? why not?
|
94
99
|
## todo/fix: remove parse_line!!!
|
95
100
|
def self.parse_line( data, sep: nil,
|
96
|
-
converters: nil )
|
101
|
+
converters: nil, width: nil )
|
97
102
|
records = []
|
98
|
-
parse( data, sep: sep, converters: converters ) do |record|
|
103
|
+
parse( data, sep: sep, converters: converters, width: width ) do |record|
|
99
104
|
records << record
|
100
105
|
break # only parse first record
|
101
106
|
end
|
@@ -105,14 +110,15 @@ class CsvReader
|
|
105
110
|
|
106
111
|
|
107
112
|
|
108
|
-
def initialize( data, sep: nil, converters: nil, parser: nil )
|
113
|
+
def initialize( data, sep: nil, converters: nil, width: nil, parser: nil )
|
109
114
|
raise ArgumentError.new( "Cannot parse nil as CSV" ) if data.nil?
|
110
115
|
## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV" if data.nil?
|
111
116
|
|
112
117
|
# create the IO object we will read from
|
113
118
|
@io = data.is_a?(String) ? StringIO.new(data) : data
|
114
119
|
|
115
|
-
@sep
|
120
|
+
@sep = sep # (optional) for ParserStd, ParserStrict
|
121
|
+
@width = width # (optional) for ParserFixed
|
116
122
|
|
117
123
|
@converters = Converter.create_converters( converters )
|
118
124
|
|
@@ -141,7 +147,10 @@ class CsvReader
|
|
141
147
|
if block_given?
|
142
148
|
kwargs = {}
|
143
149
|
## note: only add separator if present/defined (not nil)
|
144
|
-
|
150
|
+
## todo/fix: change sep keyword to "known" classes!!!!
|
151
|
+
kwargs[:sep] = @sep if @sep && @parser.respond_to?( :'sep=' )
|
152
|
+
|
153
|
+
kwargs[:width] = @width if @parser.is_a?( ParserFixed )
|
145
154
|
|
146
155
|
## check array / pipeline of converters is empty (size=0 e.g. is [])
|
147
156
|
if @converters.empty?
|
@@ -4,8 +4,9 @@ class CsvHashReader
|
|
4
4
|
|
5
5
|
|
6
6
|
## add convenience shortcuts / aliases for CsvReader support classes
|
7
|
-
Parser
|
8
|
-
|
7
|
+
Parser = CsvReader::Parser
|
8
|
+
ParserFixed = CsvReader::ParserFixed
|
9
|
+
Converter = CsvReader::Converter
|
9
10
|
|
10
11
|
|
11
12
|
|
@@ -14,6 +15,7 @@ def self.open( path, mode=nil,
|
|
14
15
|
sep: nil,
|
15
16
|
converters: nil,
|
16
17
|
header_converters: nil,
|
18
|
+
width: nil,
|
17
19
|
parser: nil, &block ) ## rename path to filename or name - why? why not?
|
18
20
|
|
19
21
|
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
@@ -22,6 +24,7 @@ def self.open( path, mode=nil,
|
|
22
24
|
sep: sep,
|
23
25
|
converters: converters,
|
24
26
|
header_converters: header_converters,
|
27
|
+
width: width,
|
25
28
|
parser: parser )
|
26
29
|
|
27
30
|
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
@@ -41,12 +44,14 @@ def self.read( path, headers: nil,
|
|
41
44
|
sep: nil,
|
42
45
|
converters: nil,
|
43
46
|
header_converters: nil,
|
47
|
+
width: nil,
|
44
48
|
parser: nil )
|
45
49
|
open( path,
|
46
50
|
headers: headers,
|
47
51
|
sep: sep,
|
48
52
|
converters: converters,
|
49
53
|
header_converters: header_converters,
|
54
|
+
width: width,
|
50
55
|
parser: parser ) { |csv| csv.read }
|
51
56
|
end
|
52
57
|
|
@@ -56,12 +61,14 @@ def self.foreach( path, headers: nil,
|
|
56
61
|
sep: nil,
|
57
62
|
converters: nil,
|
58
63
|
header_converters: nil,
|
64
|
+
width: nil,
|
59
65
|
parser: nil, &block )
|
60
66
|
csv = open( path,
|
61
67
|
headers: headers,
|
62
68
|
sep: sep,
|
63
69
|
converters: converters,
|
64
70
|
header_converters: header_converters,
|
71
|
+
width: width,
|
65
72
|
parser: parser )
|
66
73
|
|
67
74
|
if block_given?
|
@@ -83,12 +90,14 @@ def self.parse( data, headers: nil,
|
|
83
90
|
sep: nil,
|
84
91
|
converters: nil,
|
85
92
|
header_converters: nil,
|
93
|
+
width: nil,
|
86
94
|
parser: nil, &block )
|
87
95
|
csv = new( data,
|
88
96
|
headers: headers,
|
89
97
|
sep: sep,
|
90
98
|
converters: converters,
|
91
99
|
header_converters: header_converters,
|
100
|
+
width: width,
|
92
101
|
parser: parser )
|
93
102
|
|
94
103
|
if block_given?
|
@@ -105,6 +114,7 @@ end # method self.parse
|
|
105
114
|
def initialize( data, headers: nil, sep: nil,
|
106
115
|
converters: nil,
|
107
116
|
header_converters: nil,
|
117
|
+
width: nil,
|
108
118
|
parser: nil )
|
109
119
|
raise ArgumentError.new( "Cannot parse nil as CSV" ) if data.nil?
|
110
120
|
## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV" if data.nil?
|
@@ -117,7 +127,8 @@ def initialize( data, headers: nil, sep: nil,
|
|
117
127
|
## for now - do NOT auto-convert passed in headers - keep them as-is (1:1)
|
118
128
|
@names = headers ? headers : nil
|
119
129
|
|
120
|
-
@sep
|
130
|
+
@sep = sep
|
131
|
+
@width = width
|
121
132
|
|
122
133
|
@converters = Converter.create_converters( converters )
|
123
134
|
@header_converters = Converter.create_header_converters( header_converters )
|
@@ -156,8 +167,12 @@ def_delegators :@io,
|
|
156
167
|
if block_given?
|
157
168
|
kwargs = {}
|
158
169
|
## note: only add separator if present/defined (not nil)
|
170
|
+
## todo/fix: change sep keyword to "known" classes!!!!
|
159
171
|
kwargs[:sep] = @sep if @sep && @parser.respond_to?( :'sep=' )
|
160
172
|
|
173
|
+
kwargs[:width] = @width if @parser.is_a?( ParserFixed )
|
174
|
+
|
175
|
+
|
161
176
|
@parser.parse( @io, kwargs ) do |raw_values| # sep: sep
|
162
177
|
if @names.nil? ## check for (first) headers row
|
163
178
|
if @header_converters.empty?
|
data/lib/csvreader/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -0,0 +1,85 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_fixed.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestParserFixed < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
def parser() CsvReader::Parser::FIXED; end
|
14
|
+
def reader() CsvReader.fixed; end
|
15
|
+
|
16
|
+
|
17
|
+
def test_width
|
18
|
+
width( parser )
|
19
|
+
width( reader )
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_contacts
|
23
|
+
contacts( parser )
|
24
|
+
contacts( reader )
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def width( parser )
|
29
|
+
records = [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
|
30
|
+
|
31
|
+
assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
|
32
|
+
# fixed width with comments and blank lines
|
33
|
+
|
34
|
+
12345678123456781234567890123456789012345678901212345678901234
|
35
|
+
|
36
|
+
TXT
|
37
|
+
|
38
|
+
assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
|
39
|
+
12345678123456781234567890123456789012345678901212345678901234
|
40
|
+
TXT
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def contacts( parser )
|
45
|
+
records = [["John", "Smith", "john@example.com", "1-888-555-6666"],
|
46
|
+
["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
|
47
|
+
|
48
|
+
assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
|
49
|
+
# fixed width with comments and blank lines
|
50
|
+
|
51
|
+
John Smith john@example.com 1-888-555-6666
|
52
|
+
Michele O'Reileymichele@example.com 1-333-321-8765
|
53
|
+
|
54
|
+
TXT
|
55
|
+
|
56
|
+
|
57
|
+
assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
|
58
|
+
John Smith john@example.com 1-888-555-6666
|
59
|
+
Michele O'Reileymichele@example.com 1-333-321-8765
|
60
|
+
TXT
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
def test_contacts
|
65
|
+
records = [["John", "Smith", "john@example.com", "1-888-555-6666"],
|
66
|
+
["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
|
67
|
+
|
68
|
+
assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
|
69
|
+
# fixed width with comments and blank lines
|
70
|
+
|
71
|
+
John Smith john@example.com 1-888-555-6666
|
72
|
+
Michele O'Reileymichele@example.com 1-333-321-8765
|
73
|
+
|
74
|
+
TXT
|
75
|
+
|
76
|
+
|
77
|
+
assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
|
78
|
+
John Smith john@example.com 1-888-555-6666
|
79
|
+
Michele O'Reileymichele@example.com 1-333-321-8765
|
80
|
+
TXT
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
end # class TestParserFixed
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -60,6 +60,7 @@ files:
|
|
60
60
|
- lib/csvreader/builder.rb
|
61
61
|
- lib/csvreader/converter.rb
|
62
62
|
- lib/csvreader/parser.rb
|
63
|
+
- lib/csvreader/parser_fixed.rb
|
63
64
|
- lib/csvreader/parser_json.rb
|
64
65
|
- lib/csvreader/parser_std.rb
|
65
66
|
- lib/csvreader/parser_strict.rb
|
@@ -77,6 +78,7 @@ files:
|
|
77
78
|
- test/test_buffer.rb
|
78
79
|
- test/test_converter.rb
|
79
80
|
- test/test_parser.rb
|
81
|
+
- test/test_parser_fixed.rb
|
80
82
|
- test/test_parser_formats.rb
|
81
83
|
- test/test_parser_java.rb
|
82
84
|
- test/test_parser_meta.rb
|
@@ -89,7 +91,7 @@ files:
|
|
89
91
|
- test/test_reader_hash.rb
|
90
92
|
- test/test_reader_hash_converters.rb
|
91
93
|
- test/test_samples.rb
|
92
|
-
homepage: https://github.com/
|
94
|
+
homepage: https://github.com/csvreader/csvreader
|
93
95
|
licenses:
|
94
96
|
- Public Domain
|
95
97
|
metadata: {}
|