csvreader 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +1 -0
- data/README.md +10 -5
- data/lib/csvreader/parser_std.rb +37 -1
- data/lib/csvreader/version.rb +1 -1
- data/test/test_parser_autofix.rb +28 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a8e2f4f6e06ec63483735c1e0966b61398df85eb
|
4
|
+
data.tar.gz: 6c867acfa43c261473b6d6300e3ecd8d7042f0dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2ddc944ee42de5660c68e057d0cbdbdf81b0f135b603e87231da2258bbd9001fc2483881e1c0ff15d5d7b23d2cb9f1b2c799a77a3551dbaee18094ba9aad5086
|
7
|
+
data.tar.gz: 39743a7df8b49b45a9ad1dd7ce8598616733348ecdbc52b94de4e8fc1fb7a54e33d0cbc36f3750a5940c9d46310d94a4c6d983811fb506efa2349bb0adc94d16
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -12,10 +12,16 @@
|
|
12
12
|
|
13
13
|
## What's News?
|
14
14
|
|
15
|
+
**v1.2.2** Added auto-fix/correction/recovery
|
16
|
+
for double quoted value with extra trailing value
|
17
|
+
to the default parser (`ParserStd`) e.g. `"Freddy" Mercury`
|
18
|
+
will get read "as is" and turned
|
19
|
+
into an "unquoted" value with "literal" quotes e.g. `"Freddy" Mercury`.
|
15
20
|
|
16
|
-
|
21
|
+
|
22
|
+
**v1.2.1** Added support for (optional) hashtag to the
|
17
23
|
to the default parser (`ParserStd`) for
|
18
|
-
supporting the [Humanitarian eXchange Language (HXL)](
|
24
|
+
supporting the [Humanitarian eXchange Language (HXL)](https://github.com/csvspecs/csv-hxl).
|
19
25
|
Default is turned off (`false`). Use `Csv.human`
|
20
26
|
or `Csv.hum` or `Csv.hxl` for pre-defined with hashtag turned on.
|
21
27
|
|
@@ -53,7 +59,7 @@ With the "strict" parser you will get a firework of "stray" quote errors / excep
|
|
53
59
|
|
54
60
|
|
55
61
|
**v1.1.1**: Added built-in support for (optional) alternative comments (`%`) - used by
|
56
|
-
[ARFF (attribute-relation file format)](https://
|
62
|
+
[ARFF (attribute-relation file format)](https://github.com/csvspecs/csv-meta#attribute-relation-classic) -
|
57
63
|
and support for (optional) directives (`@`) in header (that is, before any records)
|
58
64
|
to default parser ("The Right Way").
|
59
65
|
Now you can use either `#` or `%` for comments, the first one "wins" - you CANNOT use both.
|
@@ -68,13 +74,12 @@ e.g. `Csv.fixed.parse( txt, width: [8,-2,8,-3,32,-2,14] )`.
|
|
68
74
|
|
69
75
|
**v1.0.3**: Added built-in support for an (optional) front matter (`---`) meta data block
|
70
76
|
in header (that is, before any records)
|
71
|
-
to default parser ("The Right Way") - used by [CSVY (yaml front matter for csv file format)](
|
77
|
+
to default parser ("The Right Way") - used by [CSVY (yaml front matter for csv file format)](https://github.com/csvspecs/csv-meta#front-matter-in-yaml).
|
72
78
|
Use `Csv.parser.meta` to get the parsed meta data block hash (or `nil`) if none.
|
73
79
|
|
74
80
|
|
75
81
|
|
76
82
|
|
77
|
-
|
78
83
|
## Usage
|
79
84
|
|
80
85
|
|
data/lib/csvreader/parser_std.rb
CHANGED
@@ -197,6 +197,26 @@ def parse_quote( input, sep:, opening_quote:, closing_quote:)
|
|
197
197
|
end
|
198
198
|
|
199
199
|
|
200
|
+
def parse_field_until_sep( input, sep: )
|
201
|
+
value = ""
|
202
|
+
logger.debug "start reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
203
|
+
## consume simple value
|
204
|
+
## until we hit "," or "\n" or "\r"
|
205
|
+
## note: will eat-up quotes too!!!
|
206
|
+
while (c=input.peek; !(c==sep || c==LF || c==CR || input.eof?))
|
207
|
+
if input.peek == BACKSLASH
|
208
|
+
value << parse_escape( input, sep: sep )
|
209
|
+
else
|
210
|
+
logger.debug " add char >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
211
|
+
value << input.getc ## note: eat-up all spaces (" ") and tabs (\t) too (strip trailing spaces at the end)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
## note: only strip **trailing** spaces (space and tab only)
|
215
|
+
## do NOT strip newlines etc. might have been added via escape! e.g. \\\n
|
216
|
+
value = value.sub( /[ \t]+$/, '' )
|
217
|
+
value
|
218
|
+
end
|
219
|
+
|
200
220
|
|
201
221
|
|
202
222
|
def parse_field( input, sep: )
|
@@ -226,7 +246,23 @@ def parse_field( input, sep: )
|
|
226
246
|
closing_quote: DOUBLE_QUOTE )
|
227
247
|
|
228
248
|
## note: always eat-up all trailing spaces (" ") and tabs (\t)
|
229
|
-
skip_spaces( input )
|
249
|
+
spaces_count = skip_spaces( input )
|
250
|
+
|
251
|
+
## check for auto-fix trailing data after quoted value e.g. ---,"Fredy" Mercury,---
|
252
|
+
## todo/fix: add auto-fix for all quote variants!!!!!!!!!!!!!!!!!!!!
|
253
|
+
if (c=input.peek; c==sep || c==LF || c==CR || input.eof?)
|
254
|
+
## everything ok (that is, regular quoted value)!!!
|
255
|
+
else
|
256
|
+
## try auto-fix
|
257
|
+
## todo: report warning/issue error (if configured)!!!
|
258
|
+
extra_value = parse_field_until_sep( input, sep: sep )
|
259
|
+
## "reconstruct" non-quoted value
|
260
|
+
spaces = ' ' * spaces_count ## todo: preserve tab (\t) - why? why not?
|
261
|
+
## note: minor (theoratical) issue (doubled quoted got "collapsed/escaped" to one from two in quoted value)
|
262
|
+
## e.g. "hello """ extra, (becomes)=> "hello "" extra (one quote less/"eaten up")
|
263
|
+
value = %Q{"#{value}"#{spaces}#{extra_value}}
|
264
|
+
end
|
265
|
+
|
230
266
|
logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
231
267
|
elsif input.peek == SINGLE_QUOTE ## allow single quote too (by default)
|
232
268
|
logger.debug "start single_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
data/lib/csvreader/version.rb
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_autofix.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestParserAutofix < MiniTest::Test
|
12
|
+
|
13
|
+
|
14
|
+
def parser
|
15
|
+
CsvReader::Parser::DEFAULT
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def test_quote_with_trailing_value
|
20
|
+
recs = [[ "Farrokh", "\"Freddy\" Mercury", "Bulsara" ]]
|
21
|
+
|
22
|
+
assert_equal recs, parser.parse( %Q{Farrokh,"Freddy" Mercury,Bulsara} )
|
23
|
+
assert_equal recs, parser.parse( %Q{ Farrokh , "Freddy" Mercury , Bulsara } )
|
24
|
+
assert_equal recs, parser.parse( %Q{Farrokh, "Freddy" Mercury ,Bulsara} )
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
end # class TestParserAutofix
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -83,6 +83,7 @@ files:
|
|
83
83
|
- test/test_buffer.rb
|
84
84
|
- test/test_converter.rb
|
85
85
|
- test/test_parser.rb
|
86
|
+
- test/test_parser_autofix.rb
|
86
87
|
- test/test_parser_directive.rb
|
87
88
|
- test/test_parser_fixed.rb
|
88
89
|
- test/test_parser_formats.rb
|