csvreader 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: af0fcea1b598e6123786a05532a6f5b2e10a4095
4
- data.tar.gz: ba2dc18a6076e425847b440c05819e898f0a66b2
3
+ metadata.gz: a9bc6971bd638abc67e8e82e241dbb370602b0d5
4
+ data.tar.gz: 062f2727188a6f3705c21a5cc825194f84bea41c
5
5
  SHA512:
6
- metadata.gz: 28f60b98574e5331b53280f27017fae776c787ee1b7a56815c8a8f9c21a0926e6f561ca8a75f1464f1743849f989a52f122fbe4f20086de8159cf2df53b71bbe
7
- data.tar.gz: 6d5b80b11e4774bc227bffe62bc829ab19b70f22fd69ca35c54526b85f261fa5c4bf0a7d87c9ba715738f50a6710bdd843f3b6cc1581f0d88744332fdf062796
6
+ metadata.gz: 595f1c779e0457377fe5c09602cba1ce7754803b35b9280e06dfc752759da441c708db830cb159076ce3f445b3b6aaf1fef459ff9eaace4ae6a436988e52455f
7
+ data.tar.gz: f4dc02242912ba15bef498838093f85de22c3670b5bcb2b92139adbd9343cbddec753f755729f85b2962df26d724ff069cc73b8f5cccd3edb896dc0d3ac26969
data/README.md CHANGED
@@ -164,7 +164,7 @@ see [`TabReader` »](https://github.com/datatxt/tabreader).
164
164
 
165
165
  Two major design bugs and many many minor.
166
166
 
167
- 1) The CSV class uses `line.split(`,`)` with some kludges (†) with the claim its faster.
167
+ (1) The CSV class uses `line.split(',')` with some kludges (†) with the claim its faster.
168
168
  What?! The right way: CSV needs its own purpose-built parser. There's no other
169
169
  way you can handle all the (edge) cases with double quotes and escaped doubled up
170
170
  double quotes. Period.
@@ -175,7 +175,7 @@ Or handling double quotes inside values and so on and on.
175
175
 
176
176
  (†): kludge - a workaround or quick-and-dirty solution that is clumsy, inelegant, inefficient, difficult to extend and hard to maintain
177
177
 
178
- 2) The CSV class returns `nil` for `,,` but an empty string (`""`)
178
+ (2) The CSV class returns `nil` for `,,` but an empty string (`""`)
179
179
  for `"","",""`. The right way: All values are always strings. Period.
180
180
 
181
181
  If you want to use `nil` you MUST configure a string (or strings)
@@ -6,6 +6,34 @@ module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar
6
6
  ## STD_CSV_ENGINE = CSV ## to avoid name confusion use longer name - why? why not? find a better name?
7
7
  ## use __CSV__ or similar? or just ::CSV ??
8
8
 
9
+
10
+ class Dialect ## todo: use a module - it's just a namespace/module now - why? why not?
11
+ ###
12
+ # (auto-)add these flavors/dialects:
13
+ # :tab -> uses TabReader(!)
14
+ # :strict|:rfc4180
15
+ # :unix -> uses unix-style escapes e.g. \n \" etc.
16
+ # :windows|:excel
17
+ # :guess|:auto -> guess (auto-detect) separator - why? why not?
18
+
19
+ ## e.g. use Dialect.registry[:unix] = { ... } etc.
20
+ ## note use @@ - there is only one registry
21
+ def self.registry() @@registry ||={} end
22
+
23
+ ## add built-in dialects:
24
+ ## trim - use strip? why? why not? use alias?
25
+ registry[:tab] = {} ##{ class: TabReader }
26
+ registry[:strict] = { strict: true, trim: false } ## add no comments, blank lines, etc. ???
27
+ registry[:rfc4180] = :strict ## alternative name
28
+ registry[:windows] = {}
29
+ registry[:excel] = :windows
30
+ registry[:unix] = {}
31
+
32
+ ## todo: add some more
33
+ end # class Dialect
34
+
35
+
36
+
9
37
  class Configuration
10
38
 
11
39
  puts "CSV::VERSION:"
@@ -23,6 +51,9 @@ module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar
23
51
 
24
52
 
25
53
  attr_accessor :sep ## col_sep (column separator)
54
+ attr_accessor :na ## not available (string or array of strings or nil) - rename to nas/nils/nulls - why? why not?
55
+ attr_accessor :trim ### allow ltrim/rtrim/trim - why? why not?
56
+ attr_accessor :dialect
26
57
 
27
58
  def initialize
28
59
  @sep = ','
@@ -32,6 +63,8 @@ module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar
32
63
  self ## return self for chaining
33
64
  end
34
65
 
66
+ def trim?() @trim; end ## strip leading and trailing spaces
67
+
35
68
  def blank?( line )
36
69
  ## note: blank line does NOT include "blank" with spaces only!!
37
70
  ## use BLANK_REGEX in skip_lines to clean-up/skip/remove/ignore
@@ -96,46 +129,53 @@ end # module Csvv
96
129
 
97
130
  class CsvReader
98
131
 
99
- def self.foreach( path, sep: Csv.config.sep, headers: false )
132
+ def self.parse_line( txt, sep: Csv.config.sep,
133
+ trim: Csv.config.trim?,
134
+ na: Csv.config.na,
135
+ dialect: Csv.config.dialect,
136
+ converters: nil)
137
+ ## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
100
138
  csv_options = Csv.config.default_options.merge(
101
- headers: headers,
102
- col_sep: sep,
103
- external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
139
+ headers: false, ## note: always turn off headers!!!!!!
140
+ col_sep: sep
104
141
  )
142
+ ## pp csv_options
143
+ CSV.parse_line( txt, csv_options )
144
+ end
105
145
 
106
- CSV.foreach( path, csv_options ) do |row|
107
- yield( row ) ## check/todo: use block.call( row ) ## why? why not?
108
- end
146
+ def self.parse( txt, sep: Csv.config.sep, headers: false )
147
+ csv_options = Csv.config.default_options.merge(
148
+ headers: headers,
149
+ col_sep: sep
150
+ )
151
+ ## pp csv_options
152
+ CSV.parse( txt, csv_options )
109
153
  end
110
154
 
111
155
  def self.read( path, sep: Csv.config.sep, headers: false )
112
156
  ## note: use our own file.open
113
157
  ## always use utf-8 for now
114
158
  ## check/todo: add skip option bom too - why? why not?
115
- txt = File.open( path, 'r:utf-8' )
159
+ txt = File.open( path, 'r:bom|utf-8' )
116
160
  parse( txt, sep: sep, headers: headers )
117
161
  end
118
162
 
119
- def self.parse( txt, sep: Csv.config.sep, headers: false )
163
+ def self.foreach( path, sep: Csv.config.sep, headers: false )
120
164
  csv_options = Csv.config.default_options.merge(
121
165
  headers: headers,
122
- col_sep: sep
166
+ col_sep: sep,
167
+ external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
123
168
  )
124
- ## pp csv_options
125
- CSV.parse( txt, csv_options )
126
- end
127
169
 
170
+ ## todo/check/fix:
171
+ ## can use bom e.g. 'bom|utf-8' - how?
172
+ ## raises ArgumentError: unknown encoding name - bom|utf-8
128
173
 
129
- def self.parse_line( txt, sep: Csv.config.sep )
130
- ## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
131
- csv_options = Csv.config.default_options.merge(
132
- headers: false, ## note: always turn off headers!!!!!!
133
- col_sep: sep
134
- )
135
- ## pp csv_options
136
- CSV.parse_line( txt, csv_options )
137
- end
138
174
 
175
+ CSV.foreach( path, csv_options ) do |row|
176
+ yield( row ) ## check/todo: use block.call( row ) ## why? why not?
177
+ end
178
+ end
139
179
 
140
180
  def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
141
181
  # read first lines (only)
@@ -148,7 +188,7 @@ class CsvReader
148
188
  ## - NOT a blank line
149
189
 
150
190
  lines = ''
151
- File.open( path, 'r:utf-8' ) do |f|
191
+ File.open( path, 'r:bom|utf-8' ) do |f|
152
192
 
153
193
  ## todo/fix: how to handle empty files or files without headers?!
154
194
 
@@ -171,31 +211,20 @@ class CsvReader
171
211
  parse_line( lines, sep: sep )
172
212
  end # method self.header
173
213
 
174
- ####################
175
- # helper methods
176
- def self.unwrap( row_or_array ) ## unwrap row - find a better name? why? why not?
177
- ## return row values as array of strings
178
- if row_or_array.is_a?( CSV::Row )
179
- row = row_or_array
180
- row.fields ## gets array of string of field values
181
- else ## assume "classic" array of strings
182
- array = row_or_array
183
- end
184
- end
185
214
  end # class CsvReader
186
215
 
187
216
 
188
217
 
189
218
  class CsvHashReader
190
219
 
191
- def self.read( path, sep: Csv.config.sep, headers: true )
192
- CsvReader.read( path, sep: sep, headers: headers )
193
- end
194
-
195
220
  def self.parse( txt, sep: Csv.config.sep, headers: true )
196
221
  CsvReader.parse( txt, sep: sep, headers: headers )
197
222
  end
198
223
 
224
+ def self.read( path, sep: Csv.config.sep, headers: true )
225
+ CsvReader.read( path, sep: sep, headers: headers )
226
+ end
227
+
199
228
  def self.foreach( path, sep: Csv.config.sep, headers: true, &block )
200
229
  CsvReader.foreach( path, sep: sep, headers: headers, &block )
201
230
  end
@@ -4,7 +4,7 @@
4
4
  class CsvReader ## note: uses a class for now - change to module - why? why not?
5
5
 
6
6
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
7
- MINOR = 2
7
+ MINOR = 3
8
8
  PATCH = 0
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-19 00:00:00.000000000 Z
11
+ date: 2018-08-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc