csvreader 1.2.1 → 1.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +1 -0
- data/README.md +10 -5
- data/lib/csvreader/parser_std.rb +37 -1
- data/lib/csvreader/version.rb +1 -1
- data/test/test_parser_autofix.rb +28 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a8e2f4f6e06ec63483735c1e0966b61398df85eb
|
4
|
+
data.tar.gz: 6c867acfa43c261473b6d6300e3ecd8d7042f0dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2ddc944ee42de5660c68e057d0cbdbdf81b0f135b603e87231da2258bbd9001fc2483881e1c0ff15d5d7b23d2cb9f1b2c799a77a3551dbaee18094ba9aad5086
|
7
|
+
data.tar.gz: 39743a7df8b49b45a9ad1dd7ce8598616733348ecdbc52b94de4e8fc1fb7a54e33d0cbc36f3750a5940c9d46310d94a4c6d983811fb506efa2349bb0adc94d16
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -12,10 +12,16 @@
|
|
12
12
|
|
13
13
|
## What's News?
|
14
14
|
|
15
|
+
**v1.2.2** Added auto-fix/correction/recovery
|
16
|
+
for double quoted value with extra trailing value
|
17
|
+
to the default parser (`ParserStd`) e.g. `"Freddy" Mercury`
|
18
|
+
will get read "as is" and turned
|
19
|
+
into an "unquoted" value with "literal" quotes e.g. `"Freddy" Mercury`.
|
15
20
|
|
16
|
-
|
21
|
+
|
22
|
+
**v1.2.1** Added support for (optional) hashtag to the
|
17
23
|
to the default parser (`ParserStd`) for
|
18
|
-
supporting the [Humanitarian eXchange Language (HXL)](
|
24
|
+
supporting the [Humanitarian eXchange Language (HXL)](https://github.com/csvspecs/csv-hxl).
|
19
25
|
Default is turned off (`false`). Use `Csv.human`
|
20
26
|
or `Csv.hum` or `Csv.hxl` for pre-defined with hashtag turned on.
|
21
27
|
|
@@ -53,7 +59,7 @@ With the "strict" parser you will get a firework of "stray" quote errors / excep
|
|
53
59
|
|
54
60
|
|
55
61
|
**v1.1.1**: Added built-in support for (optional) alternative comments (`%`) - used by
|
56
|
-
[ARFF (attribute-relation file format)](https://
|
62
|
+
[ARFF (attribute-relation file format)](https://github.com/csvspecs/csv-meta#attribute-relation-classic) -
|
57
63
|
and support for (optional) directives (`@`) in header (that is, before any records)
|
58
64
|
to default parser ("The Right Way").
|
59
65
|
Now you can use either `#` or `%` for comments, the first one "wins" - you CANNOT use both.
|
@@ -68,13 +74,12 @@ e.g. `Csv.fixed.parse( txt, width: [8,-2,8,-3,32,-2,14] )`.
|
|
68
74
|
|
69
75
|
**v1.0.3**: Added built-in support for an (optional) front matter (`---`) meta data block
|
70
76
|
in header (that is, before any records)
|
71
|
-
to default parser ("The Right Way") - used by [CSVY (yaml front matter for csv file format)](
|
77
|
+
to default parser ("The Right Way") - used by [CSVY (yaml front matter for csv file format)](https://github.com/csvspecs/csv-meta#front-matter-in-yaml).
|
72
78
|
Use `Csv.parser.meta` to get the parsed meta data block hash (or `nil`) if none.
|
73
79
|
|
74
80
|
|
75
81
|
|
76
82
|
|
77
|
-
|
78
83
|
## Usage
|
79
84
|
|
80
85
|
|
data/lib/csvreader/parser_std.rb
CHANGED
@@ -197,6 +197,26 @@ def parse_quote( input, sep:, opening_quote:, closing_quote:)
|
|
197
197
|
end
|
198
198
|
|
199
199
|
|
200
|
+
def parse_field_until_sep( input, sep: )
|
201
|
+
value = ""
|
202
|
+
logger.debug "start reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
203
|
+
## consume simple value
|
204
|
+
## until we hit "," or "\n" or "\r"
|
205
|
+
## note: will eat-up quotes too!!!
|
206
|
+
while (c=input.peek; !(c==sep || c==LF || c==CR || input.eof?))
|
207
|
+
if input.peek == BACKSLASH
|
208
|
+
value << parse_escape( input, sep: sep )
|
209
|
+
else
|
210
|
+
logger.debug " add char >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
211
|
+
value << input.getc ## note: eat-up all spaces (" ") and tabs (\t) too (strip trailing spaces at the end)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
## note: only strip **trailing** spaces (space and tab only)
|
215
|
+
## do NOT strip newlines etc. might have been added via escape! e.g. \\\n
|
216
|
+
value = value.sub( /[ \t]+$/, '' )
|
217
|
+
value
|
218
|
+
end
|
219
|
+
|
200
220
|
|
201
221
|
|
202
222
|
def parse_field( input, sep: )
|
@@ -226,7 +246,23 @@ def parse_field( input, sep: )
|
|
226
246
|
closing_quote: DOUBLE_QUOTE )
|
227
247
|
|
228
248
|
## note: always eat-up all trailing spaces (" ") and tabs (\t)
|
229
|
-
skip_spaces( input )
|
249
|
+
spaces_count = skip_spaces( input )
|
250
|
+
|
251
|
+
## check for auto-fix trailing data after quoted value e.g. ---,"Fredy" Mercury,---
|
252
|
+
## todo/fix: add auto-fix for all quote variants!!!!!!!!!!!!!!!!!!!!
|
253
|
+
if (c=input.peek; c==sep || c==LF || c==CR || input.eof?)
|
254
|
+
## everything ok (that is, regular quoted value)!!!
|
255
|
+
else
|
256
|
+
## try auto-fix
|
257
|
+
## todo: report warning/issue error (if configured)!!!
|
258
|
+
extra_value = parse_field_until_sep( input, sep: sep )
|
259
|
+
## "reconstruct" non-quoted value
|
260
|
+
spaces = ' ' * spaces_count ## todo: preserve tab (\t) - why? why not?
|
261
|
+
## note: minor (theoratical) issue (doubled quoted got "collapsed/escaped" to one from two in quoted value)
|
262
|
+
## e.g. "hello """ extra, (becomes)=> "hello "" extra (one quote less/"eaten up")
|
263
|
+
value = %Q{"#{value}"#{spaces}#{extra_value}}
|
264
|
+
end
|
265
|
+
|
230
266
|
logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
231
267
|
elsif input.peek == SINGLE_QUOTE ## allow single quote too (by default)
|
232
268
|
logger.debug "start single_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
data/lib/csvreader/version.rb
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_autofix.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestParserAutofix < MiniTest::Test
|
12
|
+
|
13
|
+
|
14
|
+
def parser
|
15
|
+
CsvReader::Parser::DEFAULT
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def test_quote_with_trailing_value
|
20
|
+
recs = [[ "Farrokh", "\"Freddy\" Mercury", "Bulsara" ]]
|
21
|
+
|
22
|
+
assert_equal recs, parser.parse( %Q{Farrokh,"Freddy" Mercury,Bulsara} )
|
23
|
+
assert_equal recs, parser.parse( %Q{ Farrokh , "Freddy" Mercury , Bulsara } )
|
24
|
+
assert_equal recs, parser.parse( %Q{Farrokh, "Freddy" Mercury ,Bulsara} )
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
end # class TestParserAutofix
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -83,6 +83,7 @@ files:
|
|
83
83
|
- test/test_buffer.rb
|
84
84
|
- test/test_converter.rb
|
85
85
|
- test/test_parser.rb
|
86
|
+
- test/test_parser_autofix.rb
|
86
87
|
- test/test_parser_directive.rb
|
87
88
|
- test/test_parser_fixed.rb
|
88
89
|
- test/test_parser_formats.rb
|