csvreader 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/csvreader/reader.rb +67 -38
- data/lib/csvreader/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9bc6971bd638abc67e8e82e241dbb370602b0d5
|
4
|
+
data.tar.gz: 062f2727188a6f3705c21a5cc825194f84bea41c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 595f1c779e0457377fe5c09602cba1ce7754803b35b9280e06dfc752759da441c708db830cb159076ce3f445b3b6aaf1fef459ff9eaace4ae6a436988e52455f
|
7
|
+
data.tar.gz: f4dc02242912ba15bef498838093f85de22c3670b5bcb2b92139adbd9343cbddec753f755729f85b2962df26d724ff069cc73b8f5cccd3edb896dc0d3ac26969
|
data/README.md
CHANGED
@@ -164,7 +164,7 @@ see [`TabReader` »](https://github.com/datatxt/tabreader).
|
|
164
164
|
|
165
165
|
Two major design bugs and many many minor.
|
166
166
|
|
167
|
-
1) The CSV class uses `line.split(
|
167
|
+
(1) The CSV class uses `line.split(',')` with some kludges (†) with the claim its faster.
|
168
168
|
What?! The right way: CSV needs its own purpose-built parser. There's no other
|
169
169
|
way you can handle all the (edge) cases with double quotes and escaped doubled up
|
170
170
|
double quotes. Period.
|
@@ -175,7 +175,7 @@ Or handling double quotes inside values and so on and on.
|
|
175
175
|
|
176
176
|
(†): kludge - a workaround or quick-and-dirty solution that is clumsy, inelegant, inefficient, difficult to extend and hard to maintain
|
177
177
|
|
178
|
-
2) The CSV class returns `nil` for `,,` but an empty string (`""`)
|
178
|
+
(2) The CSV class returns `nil` for `,,` but an empty string (`""`)
|
179
179
|
for `"","",""`. The right way: All values are always strings. Period.
|
180
180
|
|
181
181
|
If you want to use `nil` you MUST configure a string (or strings)
|
data/lib/csvreader/reader.rb
CHANGED
@@ -6,6 +6,34 @@ module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar
|
|
6
6
|
## STD_CSV_ENGINE = CSV ## to avoid name confusion use longer name - why? why not? find a better name?
|
7
7
|
## use __CSV__ or similar? or just ::CSV ??
|
8
8
|
|
9
|
+
|
10
|
+
class Dialect ## todo: use a module - it's just a namespace/module now - why? why not?
|
11
|
+
###
|
12
|
+
# (auto-)add these flavors/dialects:
|
13
|
+
# :tab -> uses TabReader(!)
|
14
|
+
# :strict|:rfc4180
|
15
|
+
# :unix -> uses unix-style escapes e.g. \n \" etc.
|
16
|
+
# :windows|:excel
|
17
|
+
# :guess|:auto -> guess (auto-detect) separator - why? why not?
|
18
|
+
|
19
|
+
## e.g. use Dialect.registry[:unix] = { ... } etc.
|
20
|
+
## note use @@ - there is only one registry
|
21
|
+
def self.registry() @@registry ||={} end
|
22
|
+
|
23
|
+
## add built-in dialects:
|
24
|
+
## trim - use strip? why? why not? use alias?
|
25
|
+
registry[:tab] = {} ##{ class: TabReader }
|
26
|
+
registry[:strict] = { strict: true, trim: false } ## add no comments, blank lines, etc. ???
|
27
|
+
registry[:rfc4180] = :strict ## alternative name
|
28
|
+
registry[:windows] = {}
|
29
|
+
registry[:excel] = :windows
|
30
|
+
registry[:unix] = {}
|
31
|
+
|
32
|
+
## todo: add some more
|
33
|
+
end # class Dialect
|
34
|
+
|
35
|
+
|
36
|
+
|
9
37
|
class Configuration
|
10
38
|
|
11
39
|
puts "CSV::VERSION:"
|
@@ -23,6 +51,9 @@ module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar
|
|
23
51
|
|
24
52
|
|
25
53
|
attr_accessor :sep ## col_sep (column separator)
|
54
|
+
attr_accessor :na ## not available (string or array of strings or nil) - rename to nas/nils/nulls - why? why not?
|
55
|
+
attr_accessor :trim ### allow ltrim/rtrim/trim - why? why not?
|
56
|
+
attr_accessor :dialect
|
26
57
|
|
27
58
|
def initialize
|
28
59
|
@sep = ','
|
@@ -32,6 +63,8 @@ module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar
|
|
32
63
|
self ## return self for chaining
|
33
64
|
end
|
34
65
|
|
66
|
+
def trim?() @trim; end ## strip leading and trailing spaces
|
67
|
+
|
35
68
|
def blank?( line )
|
36
69
|
## note: blank line does NOT include "blank" with spaces only!!
|
37
70
|
## use BLANK_REGEX in skip_lines to clean-up/skip/remove/ignore
|
@@ -96,46 +129,53 @@ end # module Csvv
|
|
96
129
|
|
97
130
|
class CsvReader
|
98
131
|
|
99
|
-
def self.
|
132
|
+
def self.parse_line( txt, sep: Csv.config.sep,
|
133
|
+
trim: Csv.config.trim?,
|
134
|
+
na: Csv.config.na,
|
135
|
+
dialect: Csv.config.dialect,
|
136
|
+
converters: nil)
|
137
|
+
## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
|
100
138
|
csv_options = Csv.config.default_options.merge(
|
101
|
-
|
102
|
-
|
103
|
-
external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
|
139
|
+
headers: false, ## note: always turn off headers!!!!!!
|
140
|
+
col_sep: sep
|
104
141
|
)
|
142
|
+
## pp csv_options
|
143
|
+
CSV.parse_line( txt, csv_options )
|
144
|
+
end
|
105
145
|
|
106
|
-
|
107
|
-
|
108
|
-
|
146
|
+
def self.parse( txt, sep: Csv.config.sep, headers: false )
|
147
|
+
csv_options = Csv.config.default_options.merge(
|
148
|
+
headers: headers,
|
149
|
+
col_sep: sep
|
150
|
+
)
|
151
|
+
## pp csv_options
|
152
|
+
CSV.parse( txt, csv_options )
|
109
153
|
end
|
110
154
|
|
111
155
|
def self.read( path, sep: Csv.config.sep, headers: false )
|
112
156
|
## note: use our own file.open
|
113
157
|
## always use utf-8 for now
|
114
158
|
## check/todo: add skip option bom too - why? why not?
|
115
|
-
txt = File.open( path, 'r:utf-8' )
|
159
|
+
txt = File.open( path, 'r:bom|utf-8' )
|
116
160
|
parse( txt, sep: sep, headers: headers )
|
117
161
|
end
|
118
162
|
|
119
|
-
def self.
|
163
|
+
def self.foreach( path, sep: Csv.config.sep, headers: false )
|
120
164
|
csv_options = Csv.config.default_options.merge(
|
121
165
|
headers: headers,
|
122
|
-
col_sep: sep
|
166
|
+
col_sep: sep,
|
167
|
+
external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
|
123
168
|
)
|
124
|
-
## pp csv_options
|
125
|
-
CSV.parse( txt, csv_options )
|
126
|
-
end
|
127
169
|
|
170
|
+
## todo/check/fix:
|
171
|
+
## can use bom e.g. 'bom|utf-8' - how?
|
172
|
+
## raises ArgumentError: unknown encoding name - bom|utf-8
|
128
173
|
|
129
|
-
def self.parse_line( txt, sep: Csv.config.sep )
|
130
|
-
## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
|
131
|
-
csv_options = Csv.config.default_options.merge(
|
132
|
-
headers: false, ## note: always turn off headers!!!!!!
|
133
|
-
col_sep: sep
|
134
|
-
)
|
135
|
-
## pp csv_options
|
136
|
-
CSV.parse_line( txt, csv_options )
|
137
|
-
end
|
138
174
|
|
175
|
+
CSV.foreach( path, csv_options ) do |row|
|
176
|
+
yield( row ) ## check/todo: use block.call( row ) ## why? why not?
|
177
|
+
end
|
178
|
+
end
|
139
179
|
|
140
180
|
def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
|
141
181
|
# read first lines (only)
|
@@ -148,7 +188,7 @@ class CsvReader
|
|
148
188
|
## - NOT a blank line
|
149
189
|
|
150
190
|
lines = ''
|
151
|
-
File.open( path, 'r:utf-8' ) do |f|
|
191
|
+
File.open( path, 'r:bom|utf-8' ) do |f|
|
152
192
|
|
153
193
|
## todo/fix: how to handle empty files or files without headers?!
|
154
194
|
|
@@ -171,31 +211,20 @@ class CsvReader
|
|
171
211
|
parse_line( lines, sep: sep )
|
172
212
|
end # method self.header
|
173
213
|
|
174
|
-
####################
|
175
|
-
# helper methods
|
176
|
-
def self.unwrap( row_or_array ) ## unwrap row - find a better name? why? why not?
|
177
|
-
## return row values as array of strings
|
178
|
-
if row_or_array.is_a?( CSV::Row )
|
179
|
-
row = row_or_array
|
180
|
-
row.fields ## gets array of string of field values
|
181
|
-
else ## assume "classic" array of strings
|
182
|
-
array = row_or_array
|
183
|
-
end
|
184
|
-
end
|
185
214
|
end # class CsvReader
|
186
215
|
|
187
216
|
|
188
217
|
|
189
218
|
class CsvHashReader
|
190
219
|
|
191
|
-
def self.read( path, sep: Csv.config.sep, headers: true )
|
192
|
-
CsvReader.read( path, sep: sep, headers: headers )
|
193
|
-
end
|
194
|
-
|
195
220
|
def self.parse( txt, sep: Csv.config.sep, headers: true )
|
196
221
|
CsvReader.parse( txt, sep: sep, headers: headers )
|
197
222
|
end
|
198
223
|
|
224
|
+
def self.read( path, sep: Csv.config.sep, headers: true )
|
225
|
+
CsvReader.read( path, sep: sep, headers: headers )
|
226
|
+
end
|
227
|
+
|
199
228
|
def self.foreach( path, sep: Csv.config.sep, headers: true, &block )
|
200
229
|
CsvReader.foreach( path, sep: sep, headers: headers, &block )
|
201
230
|
end
|
data/lib/csvreader/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|