csvreader 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +1 -0
- data/README.md +173 -0
- data/lib/csvreader/reader.rb +38 -19
- data/lib/csvreader/version.rb +1 -1
- data/test/data/shakespeare.csv +9 -0
- data/test/test_reader.rb +31 -29
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af0fcea1b598e6123786a05532a6f5b2e10a4095
|
4
|
+
data.tar.gz: ba2dc18a6076e425847b440c05819e898f0a66b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28f60b98574e5331b53280f27017fae776c787ee1b7a56815c8a8f9c21a0926e6f561ca8a75f1464f1743849f989a52f122fbe4f20086de8159cf2df53b71bbe
|
7
|
+
data.tar.gz: 6d5b80b11e4774bc227bffe62bc829ab19b70f22fd69ca35c54526b85f261fa5c4bf0a7d87c9ba715738f50a6710bdd843f3b6cc1581f0d88744332fdf062796
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -11,6 +11,179 @@
|
|
11
11
|
|
12
12
|
## Usage
|
13
13
|
|
14
|
+
``` ruby
|
15
|
+
line = "1,2,3"
|
16
|
+
values = CsvReader.parse_line( line )
|
17
|
+
pp values
|
18
|
+
# => ["1","2","3"]
|
19
|
+
```
|
20
|
+
|
21
|
+
or use the convenience helpers:
|
22
|
+
|
23
|
+
``` ruby
|
24
|
+
txt <<=TXT
|
25
|
+
1,2,3
|
26
|
+
4,5,6
|
27
|
+
TXT
|
28
|
+
|
29
|
+
records = CsvReader.parse( txt )
|
30
|
+
pp records
|
31
|
+
# => [["1","2","3"],
|
32
|
+
# ["5","6","7"]]
|
33
|
+
|
34
|
+
# -or-
|
35
|
+
|
36
|
+
records = CsvReader.read( "values.csv" )
|
37
|
+
pp records
|
38
|
+
# => [["1","2","3"],
|
39
|
+
# ["5","6","7"]]
|
40
|
+
|
41
|
+
# -or-
|
42
|
+
|
43
|
+
CsvReader.foreach( "values.csv" ) do |rec|
|
44
|
+
pp rec
|
45
|
+
end
|
46
|
+
# => ["1","2","3"]
|
47
|
+
# => ["5","6","7"]
|
48
|
+
```
|
49
|
+
|
50
|
+
|
51
|
+
### What about headers?
|
52
|
+
|
53
|
+
Use the `CsvHashReader`
|
54
|
+
if the first line is a header (or if missing pass in the headers
|
55
|
+
as an array) and you want your records as hashes instead of arrays of strings.
|
56
|
+
Example:
|
57
|
+
|
58
|
+
``` ruby
|
59
|
+
txt <<=TXT
|
60
|
+
A,B,C
|
61
|
+
1,2,3
|
62
|
+
4,5,6
|
63
|
+
TXT
|
64
|
+
|
65
|
+
records = CsvHashReader.parse( txt )
|
66
|
+
pp records
|
67
|
+
|
68
|
+
# -or-
|
69
|
+
|
70
|
+
txt2 <<=TXT
|
71
|
+
1,2,3
|
72
|
+
4,5,6
|
73
|
+
TXT
|
74
|
+
|
75
|
+
records = CsvHashReader.parse( txt2, headers: ["A","B","C"] )
|
76
|
+
pp records
|
77
|
+
|
78
|
+
# => [{"A": "1", "B": "2", "C": "3"},
|
79
|
+
# {"A": "4", "B": "5", "C": "6"}]
|
80
|
+
|
81
|
+
# -or-
|
82
|
+
|
83
|
+
records = CsvHashReader.read( "hash.csv" )
|
84
|
+
pp records
|
85
|
+
# => [{"A": "1", "B": "2", "C": "3"},
|
86
|
+
# {"A": "4", "B": "5", "C": "6"}]
|
87
|
+
|
88
|
+
# -or-
|
89
|
+
|
90
|
+
CsvHashReader.foreach( "hash.csv" ) do |rec|
|
91
|
+
pp rec
|
92
|
+
end
|
93
|
+
# => {"A": "1", "B": "2", "C": "3"}
|
94
|
+
# => {"A": "4", "B": "5", "C": "6"}
|
95
|
+
```
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
## Frequently Asked Questions (FAQ) and Answers
|
100
|
+
|
101
|
+
### Q: What's CSV the right way? What best practices can I use?
|
102
|
+
|
103
|
+
Use best practices out-of-the-box with zero-configuration.
|
104
|
+
Do you know how to skip blank lines or how to add `#` single-line comments?
|
105
|
+
Or how to trim leading and trailing spaces? No worries. It's turned on by default.
|
106
|
+
|
107
|
+
Yes, you can. Use
|
108
|
+
|
109
|
+
```
|
110
|
+
#######
|
111
|
+
# try with some comments
|
112
|
+
# and blank lines even before header (first row)
|
113
|
+
|
114
|
+
Brewery,City,Name,Abv
|
115
|
+
Andechser Klosterbrauerei,Andechs,Doppelbock Dunkel,7%
|
116
|
+
Augustiner Bräu München,München,Edelstoff,5.6%
|
117
|
+
|
118
|
+
Bayerische Staatsbrauerei Weihenstephan, Freising, Hefe Weissbier, 5.4%
|
119
|
+
Brauerei Spezial, Bamberg, Rauchbier Märzen, 5.1%
|
120
|
+
Hacker-Pschorr Bräu, München, Münchner Dunkel, 5.0%
|
121
|
+
Staatliches Hofbräuhaus München, München, Hofbräu Oktoberfestbier, 6.3%
|
122
|
+
```
|
123
|
+
|
124
|
+
instead of strict "classic"
|
125
|
+
(no blank lines, no comments, no leading and trailing spaces, etc.):
|
126
|
+
|
127
|
+
```
|
128
|
+
Brewery,City,Name,Abv
|
129
|
+
Andechser Klosterbrauerei,Andechs,Doppelbock Dunkel,7%
|
130
|
+
Augustiner Bräu München,München,Edelstoff,5.6%
|
131
|
+
Bayerische Staatsbrauerei Weihenstephan,Freising,Hefe Weissbier,5.4%
|
132
|
+
Brauerei Spezial,Bamberg,Rauchbier Märzen,5.1%
|
133
|
+
Hacker-Pschorr Bräu,München,Münchner Dunkel,5.0%
|
134
|
+
Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
|
135
|
+
```
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
### Q: How can I change the separator to semicolon (`;`) or pipe (`|`)?
|
140
|
+
|
141
|
+
Pass in the `sep` keyword option. Example:
|
142
|
+
|
143
|
+
``` ruby
|
144
|
+
CsvReader.parse_line( ..., sep: ';' )
|
145
|
+
CsvReader.parse( ..., sep: ';' )
|
146
|
+
CsvReader.read( ..., sep: ';' )
|
147
|
+
# ...
|
148
|
+
CsvReader.parse_line( ..., sep: '|' )
|
149
|
+
CsvReader.parse( ..., sep: '|' )
|
150
|
+
CsvReader.read( ..., sep: '|' )
|
151
|
+
# ...
|
152
|
+
# and so on
|
153
|
+
```
|
154
|
+
|
155
|
+
|
156
|
+
Note: If you use tab (`\t`) use the `TabReader`! Why? Tab =! CSV. Yes, tab is
|
157
|
+
its own (even) simpler format
|
158
|
+
(e.g. no escape rules, no newlines in values, etc.),
|
159
|
+
see [`TabReader` »](https://github.com/datatxt/tabreader).
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
### Q: What's broken in the standard library CSV reader?
|
164
|
+
|
165
|
+
Two major design bugs and many many minor.
|
166
|
+
|
167
|
+
1) The CSV class uses `line.split(`,`)` with some kludges (†) with the claim its faster.
|
168
|
+
What?! The right way: CSV needs its own purpose-built parser. There's no other
|
169
|
+
way you can handle all the (edge) cases with double quotes and escaped doubled up
|
170
|
+
double quotes. Period.
|
171
|
+
|
172
|
+
For example, the CSV class cannot handle leading or trailing spaces
|
173
|
+
for double quoted values `1,•"2","3"•`.
|
174
|
+
Or handling double quotes inside values and so on and on.
|
175
|
+
|
176
|
+
(†): kludge - a workaround or quick-and-dirty solution that is clumsy, inelegant, inefficient, difficult to extend and hard to maintain
|
177
|
+
|
178
|
+
2) The CSV class returns `nil` for `,,` but an empty string (`""`)
|
179
|
+
for `"","",""`. The right way: All values are always strings. Period.
|
180
|
+
|
181
|
+
If you want to use `nil` you MUST configure a string (or strings)
|
182
|
+
such as `NA`, `n/a`, `\N`, or similar that map to `nil`.
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
14
187
|
|
15
188
|
## Alternatives
|
16
189
|
|
data/lib/csvreader/reader.rb
CHANGED
@@ -96,21 +96,7 @@ end # module Csvv
|
|
96
96
|
|
97
97
|
class CsvReader
|
98
98
|
|
99
|
-
|
100
|
-
# helper methods
|
101
|
-
def self.unwrap( row_or_array ) ## unwrap row - find a better name? why? why not?
|
102
|
-
## return row values as array of strings
|
103
|
-
if row_or_array.is_a?( CSV::Row )
|
104
|
-
row = row_or_array
|
105
|
-
row.fields ## gets array of string of field values
|
106
|
-
else ## assume "classic" array of strings
|
107
|
-
array = row_or_array
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
def self.foreach( path, sep: Csv.config.sep, headers: true )
|
99
|
+
def self.foreach( path, sep: Csv.config.sep, headers: false )
|
114
100
|
csv_options = Csv.config.default_options.merge(
|
115
101
|
headers: headers,
|
116
102
|
col_sep: sep,
|
@@ -122,8 +108,7 @@ class CsvReader
|
|
122
108
|
end
|
123
109
|
end
|
124
110
|
|
125
|
-
|
126
|
-
def self.read( path, sep: Csv.config.sep, headers: true )
|
111
|
+
def self.read( path, sep: Csv.config.sep, headers: false )
|
127
112
|
## note: use our own file.open
|
128
113
|
## always use utf-8 for now
|
129
114
|
## check/todo: add skip option bom too - why? why not?
|
@@ -131,7 +116,7 @@ class CsvReader
|
|
131
116
|
parse( txt, sep: sep, headers: headers )
|
132
117
|
end
|
133
118
|
|
134
|
-
def self.parse( txt, sep: Csv.config.sep, headers:
|
119
|
+
def self.parse( txt, sep: Csv.config.sep, headers: false )
|
135
120
|
csv_options = Csv.config.default_options.merge(
|
136
121
|
headers: headers,
|
137
122
|
col_sep: sep
|
@@ -140,6 +125,7 @@ class CsvReader
|
|
140
125
|
CSV.parse( txt, csv_options )
|
141
126
|
end
|
142
127
|
|
128
|
+
|
143
129
|
def self.parse_line( txt, sep: Csv.config.sep )
|
144
130
|
## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
|
145
131
|
csv_options = Csv.config.default_options.merge(
|
@@ -151,7 +137,6 @@ class CsvReader
|
|
151
137
|
end
|
152
138
|
|
153
139
|
|
154
|
-
|
155
140
|
def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
|
156
141
|
# read first lines (only)
|
157
142
|
# and parse with csv to get header from csv library itself
|
@@ -185,4 +170,38 @@ class CsvReader
|
|
185
170
|
## hash record does NOT work for single line/row
|
186
171
|
parse_line( lines, sep: sep )
|
187
172
|
end # method self.header
|
173
|
+
|
174
|
+
####################
|
175
|
+
# helper methods
|
176
|
+
def self.unwrap( row_or_array ) ## unwrap row - find a better name? why? why not?
|
177
|
+
## return row values as array of strings
|
178
|
+
if row_or_array.is_a?( CSV::Row )
|
179
|
+
row = row_or_array
|
180
|
+
row.fields ## gets array of string of field values
|
181
|
+
else ## assume "classic" array of strings
|
182
|
+
array = row_or_array
|
183
|
+
end
|
184
|
+
end
|
188
185
|
end # class CsvReader
|
186
|
+
|
187
|
+
|
188
|
+
|
189
|
+
class CsvHashReader
|
190
|
+
|
191
|
+
def self.read( path, sep: Csv.config.sep, headers: true )
|
192
|
+
CsvReader.read( path, sep: sep, headers: headers )
|
193
|
+
end
|
194
|
+
|
195
|
+
def self.parse( txt, sep: Csv.config.sep, headers: true )
|
196
|
+
CsvReader.parse( txt, sep: sep, headers: headers )
|
197
|
+
end
|
198
|
+
|
199
|
+
def self.foreach( path, sep: Csv.config.sep, headers: true, &block )
|
200
|
+
CsvReader.foreach( path, sep: sep, headers: headers, &block )
|
201
|
+
end
|
202
|
+
|
203
|
+
def self.header( path, sep: Csv.config.sep ) ## add header too? why? why not?
|
204
|
+
CsvReader.header( path, sep: sep )
|
205
|
+
end
|
206
|
+
|
207
|
+
end # class CsvHashReader
|
data/lib/csvreader/version.rb
CHANGED
@@ -0,0 +1,9 @@
|
|
1
|
+
Quote,Play,Cite
|
2
|
+
Sweet are the uses of adversity,As You Like It,"Act 2, scene 1, 12"
|
3
|
+
All the world's a stage,As You Like It,"Act 2, scene 7, 139"
|
4
|
+
"We few, we happy few",Henry V,
|
5
|
+
"""Seems,"" madam! Nay it is; I know not ""seems.""",Hamlet,(1.ii.76)
|
6
|
+
"To be, or not to be",Hamlet,"Act 3, scene 1, 55"
|
7
|
+
What's in a name? That which we call a rose by any other name would smell as sweet.,Romeo and Juliet,"(II, ii, 1-2)"
|
8
|
+
"O Romeo, Romeo, wherefore art thou Romeo?",Romeo and Juliet,"Act 2, scene 2, 33"
|
9
|
+
"Tomorrow, and tomorrow, and tomorrow",Macbeth,"Act 5, scene 5, 19"
|
data/test/test_reader.rb
CHANGED
@@ -9,39 +9,40 @@ require 'helper'
|
|
9
9
|
|
10
10
|
class TestReader < MiniTest::Test
|
11
11
|
|
12
|
+
|
12
13
|
def test_read
|
13
14
|
puts "== read: beer.csv:"
|
14
|
-
|
15
|
+
data = CsvReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
|
15
16
|
|
16
|
-
pp
|
17
|
-
pp
|
18
|
-
pp table.to_a ## note: includes header (first row with column names)
|
17
|
+
pp data.class.name
|
18
|
+
pp data
|
19
19
|
|
20
|
-
|
20
|
+
data.each do |row|
|
21
21
|
pp row
|
22
22
|
end
|
23
|
-
puts " #{
|
24
|
-
assert_equal
|
23
|
+
puts " #{data.size} rows"
|
24
|
+
assert_equal 7, data.size ## note: include header row in count
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
28
|
-
puts "== read (
|
29
|
-
|
27
|
+
def test_read_hash
|
28
|
+
puts "== read (hash): beer.csv:"
|
29
|
+
table = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer.csv" ) ## returns CSV::Table
|
30
30
|
|
31
|
-
pp
|
32
|
-
pp
|
31
|
+
pp table.class.name
|
32
|
+
pp table
|
33
|
+
pp table.to_a ## note: includes header (first row with column names)
|
33
34
|
|
34
|
-
|
35
|
+
table.each do |row| ## note: will skip (NOT include) header row!!
|
35
36
|
pp row
|
36
37
|
end
|
37
|
-
puts " #{
|
38
|
-
assert_equal
|
38
|
+
puts " #{table.size} rows" ## note: again will skip (NOT include) header row in count!!!
|
39
|
+
assert_equal 6, table.size
|
39
40
|
end
|
40
41
|
|
41
42
|
|
42
|
-
def
|
43
|
-
puts "== read: beer11.csv:"
|
44
|
-
table =
|
43
|
+
def test_read_hash11
|
44
|
+
puts "== read (hash): beer11.csv:"
|
45
|
+
table = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer11.csv" )
|
45
46
|
pp table
|
46
47
|
pp table.to_a ## note: includes header (first row with column names)
|
47
48
|
|
@@ -90,28 +91,29 @@ def test_header11
|
|
90
91
|
end
|
91
92
|
|
92
93
|
|
94
|
+
|
93
95
|
def test_foreach
|
94
|
-
puts "== foreach:
|
95
|
-
CsvReader.foreach( "#{CsvReader.test_data_dir}/
|
96
|
-
pp row
|
97
|
-
pp row.fields
|
96
|
+
puts "== foreach: beer11.csv:"
|
97
|
+
CsvReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
|
98
|
+
pp row ## note: is Array (no .fields available!!!!!)
|
98
99
|
end
|
99
100
|
assert true
|
100
101
|
end
|
101
102
|
|
102
|
-
def
|
103
|
-
puts "== foreach:
|
104
|
-
|
103
|
+
def test_foreach_hash
|
104
|
+
puts "== foreach (hash): beer.csv:"
|
105
|
+
CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer.csv" ) do |row|
|
105
106
|
pp row
|
106
107
|
pp row.fields
|
107
108
|
end
|
108
109
|
assert true
|
109
110
|
end
|
110
111
|
|
111
|
-
def
|
112
|
-
puts "== foreach (
|
113
|
-
|
114
|
-
pp row
|
112
|
+
def test_foreach_hash11
|
113
|
+
puts "== foreach (hash): beer11.csv:"
|
114
|
+
CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
|
115
|
+
pp row
|
116
|
+
pp row.fields
|
115
117
|
end
|
116
118
|
assert true
|
117
119
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -59,6 +59,7 @@ files:
|
|
59
59
|
- lib/csvreader/version.rb
|
60
60
|
- test/data/beer.csv
|
61
61
|
- test/data/beer11.csv
|
62
|
+
- test/data/shakespeare.csv
|
62
63
|
- test/helper.rb
|
63
64
|
- test/test_reader.rb
|
64
65
|
homepage: https://github.com/csv11/csvreader
|