csvreader 1.2.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -2
- data/lib/csvreader/base.rb +10 -0
- data/lib/csvreader/parser_std.rb +26 -11
- data/lib/csvreader/version.rb +1 -1
- data/test/test_parser_meta.rb +2 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e61cda6f5b0fae762451efa0b0819e53b6da9966
|
4
|
+
data.tar.gz: e6dadbde1d714247046603fbdcb1fbd348cacc4c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6494cb0052000592cff4766946c3b7db0ec026db220f2e3857563d6070f282089034a8405c3eb4d8807f1a2dbe4cce67bd789b2794f803765f5fe9702d62a856
|
7
|
+
data.tar.gz: b47bd4cc6a342c5cc5e01e5ec1a67c7ae2fc3a7fc6fb4c1bcbcac018ab0291c6af076444aafb7ecac2f83ba814a502366583bbb61fa36c961e7009b217a3819b
|
data/README.md
CHANGED
@@ -9,9 +9,18 @@
|
|
9
9
|
|
10
10
|
|
11
11
|
|
12
|
+
|
12
13
|
## What's News?
|
13
14
|
|
14
|
-
|
15
|
+
|
16
|
+
**v1.2.1** Added support for (optional) hashtag to the
|
17
|
+
to the default parser (`ParserStd`) for
|
18
|
+
supporting the [Humanitarian eXchange Language (HXL)](http://hxlstandard.org).
|
19
|
+
Default is turned off (`false`). Use `Csv.human`
|
20
|
+
or `Csv.hum` or `Csv.hxl` for pre-defined with hashtag turned on.
|
21
|
+
|
22
|
+
|
23
|
+
**v1.2** Added support for alternative (non-space) separators (e.g. `;|^:`)
|
15
24
|
to the default parser (`ParserStd`).
|
16
25
|
|
17
26
|
|
@@ -531,7 +540,7 @@ Csv.tab.parse( ... ) # note: "classic" strict tab format
|
|
531
540
|
Csv.tab.read( ... )
|
532
541
|
# ...
|
533
542
|
|
534
|
-
Csv.table.parse( ... ) # note: "classic"
|
543
|
+
Csv.table.parse( ... ) # note: "classic" one or more space (or tab) table format
|
535
544
|
Csv.table.read( ... )
|
536
545
|
# ...
|
537
546
|
```
|
data/lib/csvreader/base.rb
CHANGED
@@ -42,6 +42,7 @@ class Parser
|
|
42
42
|
NUMERIC = ParserStd.new( numeric: true,
|
43
43
|
nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
|
44
44
|
null: "" )
|
45
|
+
HUMAN = ParserStd.new( hashtag: true )
|
45
46
|
|
46
47
|
|
47
48
|
RFC4180 = ParserStrict.new
|
@@ -72,6 +73,10 @@ class Parser
|
|
72
73
|
def self.numeric() NUMERIC; end
|
73
74
|
def self.num() numeric; end
|
74
75
|
def self.n() numeric; end
|
76
|
+
def self.human() HUMAN; end
|
77
|
+
def self.hum() human; end
|
78
|
+
def self.hxl() human; end ## HXL (humanitarian eXchange language)
|
79
|
+
|
75
80
|
def self.strict() STRICT; end ## alternative alias for STRICT
|
76
81
|
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
77
82
|
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
@@ -94,6 +99,7 @@ class CsvReader
|
|
94
99
|
### pre-define CsvReader (built-in) formats/dialect
|
95
100
|
DEFAULT = Builder.new( Parser::DEFAULT )
|
96
101
|
NUMERIC = Builder.new( Parser::NUMERIC )
|
102
|
+
HUMAN = Builder.new( Parser::HUMAN )
|
97
103
|
|
98
104
|
STRICT = Builder.new( Parser::STRICT )
|
99
105
|
RFC4180 = Builder.new( Parser::RFC4180 )
|
@@ -113,6 +119,10 @@ class CsvReader
|
|
113
119
|
def self.numeric() NUMERIC; end
|
114
120
|
def self.num() numeric; end
|
115
121
|
def self.n() numeric; end
|
122
|
+
def self.human() HUMAN; end
|
123
|
+
def self.hum() human; end
|
124
|
+
def self.hxl() human; end ## HXL (humanitarian eXchange language)
|
125
|
+
|
116
126
|
def self.strict() STRICT; end ## alternative alias for STRICT
|
117
127
|
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
118
128
|
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
data/lib/csvreader/parser_std.rb
CHANGED
@@ -13,8 +13,8 @@ class ParserStd
|
|
13
13
|
DOUBLE_QUOTE = "\""
|
14
14
|
SINGLE_QUOTE = "'"
|
15
15
|
BACKSLASH = "\\" ## use BACKSLASH_ESCAPE ??
|
16
|
-
|
17
|
-
|
16
|
+
COMMENT_HASH = "#" ## use COMMENT1 or COMMENT_HASH or HASH or ??
|
17
|
+
COMMENT_PERCENT = "%" ## use COMMENT2 or COMMENT_PERCENT or PERCENT or ??
|
18
18
|
DIRECTIVE = "@" ## use a different name e.g. AT or ??
|
19
19
|
SPACE = " " ## \s == ASCII 32 (dec) = (Space)
|
20
20
|
TAB = "\t" ## \t == ASCII 0x09 (hex) = HT (Tab/horizontal tab)
|
@@ -53,7 +53,8 @@ def initialize( sep: ',',
|
|
53
53
|
null: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
|
54
54
|
numeric: false, ## (auto-)convert all non-quoted values to float
|
55
55
|
nan: nil, ## note: only if numeric - set mappings for Float::NAN (not a number) values
|
56
|
-
space: nil
|
56
|
+
space: nil,
|
57
|
+
hashtag: false
|
57
58
|
)
|
58
59
|
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
59
60
|
|
@@ -73,6 +74,10 @@ def initialize( sep: ',',
|
|
73
74
|
## todo/check: only use for unquoted values? why? why not?
|
74
75
|
@config[:space] = space
|
75
76
|
|
77
|
+
## hxl - humanitarian eXchange language uses a hashtag row for "meta data"
|
78
|
+
## e.g. #sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
79
|
+
## do NOT treat # as a comment (always use % for now)
|
80
|
+
@config[:hashtag] = hashtag
|
76
81
|
|
77
82
|
@meta = nil ## no meta data block (use empty hash {} - why? why not?)
|
78
83
|
end
|
@@ -103,6 +108,7 @@ def null=( value ) @config[:null]=value; end
|
|
103
108
|
def numeric=( value ) @config[:numeric]=value; end
|
104
109
|
def nan=( value ) @config[:nan]=value; end
|
105
110
|
def space=( value ) @config[:space]=value; end
|
111
|
+
def hashtag=( value ) @config[:hashtag]=value; end
|
106
112
|
|
107
113
|
|
108
114
|
|
@@ -296,7 +302,7 @@ end
|
|
296
302
|
def parse_record( input, sep: )
|
297
303
|
values = []
|
298
304
|
|
299
|
-
space
|
305
|
+
space = config[:space]
|
300
306
|
|
301
307
|
loop do
|
302
308
|
value = parse_field( input, sep: sep )
|
@@ -424,20 +430,29 @@ def parse_lines( input, sep:, &block )
|
|
424
430
|
## used for meta block (can only start before any records e.g. if record_num == 0)
|
425
431
|
record_num = 0
|
426
432
|
|
427
|
-
## note: can either use '#' or '%' but NOT both; first one "wins"
|
428
|
-
comment = nil
|
429
433
|
|
430
|
-
|
434
|
+
|
435
|
+
hashtag = config[:hashtag]
|
436
|
+
|
437
|
+
if hashtag
|
438
|
+
comment = COMMENT_PERCENT
|
439
|
+
## todo/check: use a "heuristic" to check if its a comment or a hashtag line? why? why not?
|
440
|
+
else
|
441
|
+
## note: can either use '#' or '%' but NOT both; first one "wins"
|
442
|
+
comment = nil
|
443
|
+
end
|
444
|
+
|
445
|
+
|
431
446
|
has_seen_directive = false
|
432
447
|
has_seen_frontmatter = false ## - renameto has_seen_dash (---) - why? why not???
|
433
|
-
|
448
|
+
## note: can either use directives (@) or frontmatter (---) block; first one "wins"
|
434
449
|
|
435
450
|
loop do
|
436
451
|
break if input.eof?
|
437
452
|
|
438
453
|
skipped_spaces = skip_spaces( input )
|
439
454
|
|
440
|
-
if comment.nil? && (c=input.peek; c==
|
455
|
+
if comment.nil? && (c=input.peek; c==COMMENT_HASH || c==COMMENT_PERCENT)
|
441
456
|
logger.debug "skipping comment (first) - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
442
457
|
comment = input.getc ## first comment line (determines/fixes "allowed" comment-style)
|
443
458
|
skip_until_eol( input )
|
@@ -449,13 +464,13 @@ def parse_lines( input, sep:, &block )
|
|
449
464
|
elsif (c=input.peek; c==LF || c==CR || input.eof?)
|
450
465
|
logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
451
466
|
skip_newline( input )
|
452
|
-
elsif record_num == 0 && has_seen_frontmatter == false && input.peek==DIRECTIVE
|
467
|
+
elsif record_num == 0 && hashtag == false && has_seen_frontmatter == false && input.peek==DIRECTIVE
|
453
468
|
## note: "skip" directives for now
|
454
469
|
has_seen_directive = true
|
455
470
|
logger.debug "skip directive" if logger.debug?
|
456
471
|
skip_until_eol( input )
|
457
472
|
skip_newline( input )
|
458
|
-
elsif record_num == 0 && has_seen_directive == false && has_seen_frontmatter == false &&
|
473
|
+
elsif record_num == 0 && hashtag == false && has_seen_directive == false && has_seen_frontmatter == false &&
|
459
474
|
skipped_spaces == 0 && input.peekn(4) =~ /^---[\n\r \t]$/
|
460
475
|
## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
|
461
476
|
has_seen_frontmatter = true
|
data/lib/csvreader/version.rb
CHANGED
data/test/test_parser_meta.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|