csvreader 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -2
- data/lib/csvreader/base.rb +10 -0
- data/lib/csvreader/parser_std.rb +26 -11
- data/lib/csvreader/version.rb +1 -1
- data/test/test_parser_meta.rb +2 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e61cda6f5b0fae762451efa0b0819e53b6da9966
|
4
|
+
data.tar.gz: e6dadbde1d714247046603fbdcb1fbd348cacc4c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6494cb0052000592cff4766946c3b7db0ec026db220f2e3857563d6070f282089034a8405c3eb4d8807f1a2dbe4cce67bd789b2794f803765f5fe9702d62a856
|
7
|
+
data.tar.gz: b47bd4cc6a342c5cc5e01e5ec1a67c7ae2fc3a7fc6fb4c1bcbcac018ab0291c6af076444aafb7ecac2f83ba814a502366583bbb61fa36c961e7009b217a3819b
|
data/README.md
CHANGED
@@ -9,9 +9,18 @@
|
|
9
9
|
|
10
10
|
|
11
11
|
|
12
|
+
|
12
13
|
## What's News?
|
13
14
|
|
14
|
-
|
15
|
+
|
16
|
+
**v1.2.1** Added support for (optional) hashtag to the
|
17
|
+
to the default parser (`ParserStd`) for
|
18
|
+
supporting the [Humanitarian eXchange Language (HXL)](http://hxlstandard.org).
|
19
|
+
Default is turned off (`false`). Use `Csv.human`
|
20
|
+
or `Csv.hum` or `Csv.hxl` for pre-defined with hashtag turned on.
|
21
|
+
|
22
|
+
|
23
|
+
**v1.2** Added support for alternative (non-space) separators (e.g. `;|^:`)
|
15
24
|
to the default parser (`ParserStd`).
|
16
25
|
|
17
26
|
|
@@ -531,7 +540,7 @@ Csv.tab.parse( ... ) # note: "classic" strict tab format
|
|
531
540
|
Csv.tab.read( ... )
|
532
541
|
# ...
|
533
542
|
|
534
|
-
Csv.table.parse( ... ) # note: "classic"
|
543
|
+
Csv.table.parse( ... ) # note: "classic" one or more space (or tab) table format
|
535
544
|
Csv.table.read( ... )
|
536
545
|
# ...
|
537
546
|
```
|
data/lib/csvreader/base.rb
CHANGED
@@ -42,6 +42,7 @@ class Parser
|
|
42
42
|
NUMERIC = ParserStd.new( numeric: true,
|
43
43
|
nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
|
44
44
|
null: "" )
|
45
|
+
HUMAN = ParserStd.new( hashtag: true )
|
45
46
|
|
46
47
|
|
47
48
|
RFC4180 = ParserStrict.new
|
@@ -72,6 +73,10 @@ class Parser
|
|
72
73
|
def self.numeric() NUMERIC; end
|
73
74
|
def self.num() numeric; end
|
74
75
|
def self.n() numeric; end
|
76
|
+
def self.human() HUMAN; end
|
77
|
+
def self.hum() human; end
|
78
|
+
def self.hxl() human; end ## HXL (humanitarian eXchange language)
|
79
|
+
|
75
80
|
def self.strict() STRICT; end ## alternative alias for STRICT
|
76
81
|
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
77
82
|
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
@@ -94,6 +99,7 @@ class CsvReader
|
|
94
99
|
### pre-define CsvReader (built-in) formats/dialect
|
95
100
|
DEFAULT = Builder.new( Parser::DEFAULT )
|
96
101
|
NUMERIC = Builder.new( Parser::NUMERIC )
|
102
|
+
HUMAN = Builder.new( Parser::HUMAN )
|
97
103
|
|
98
104
|
STRICT = Builder.new( Parser::STRICT )
|
99
105
|
RFC4180 = Builder.new( Parser::RFC4180 )
|
@@ -113,6 +119,10 @@ class CsvReader
|
|
113
119
|
def self.numeric() NUMERIC; end
|
114
120
|
def self.num() numeric; end
|
115
121
|
def self.n() numeric; end
|
122
|
+
def self.human() HUMAN; end
|
123
|
+
def self.hum() human; end
|
124
|
+
def self.hxl() human; end ## HXL (humanitarian eXchange language)
|
125
|
+
|
116
126
|
def self.strict() STRICT; end ## alternative alias for STRICT
|
117
127
|
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
118
128
|
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
data/lib/csvreader/parser_std.rb
CHANGED
@@ -13,8 +13,8 @@ class ParserStd
|
|
13
13
|
DOUBLE_QUOTE = "\""
|
14
14
|
SINGLE_QUOTE = "'"
|
15
15
|
BACKSLASH = "\\" ## use BACKSLASH_ESCAPE ??
|
16
|
-
|
17
|
-
|
16
|
+
COMMENT_HASH = "#" ## use COMMENT1 or COMMENT_HASH or HASH or ??
|
17
|
+
COMMENT_PERCENT = "%" ## use COMMENT2 or COMMENT_PERCENT or PERCENT or ??
|
18
18
|
DIRECTIVE = "@" ## use a different name e.g. AT or ??
|
19
19
|
SPACE = " " ## \s == ASCII 32 (dec) = (Space)
|
20
20
|
TAB = "\t" ## \t == ASCII 0x09 (hex) = HT (Tab/horizontal tab)
|
@@ -53,7 +53,8 @@ def initialize( sep: ',',
|
|
53
53
|
null: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
|
54
54
|
numeric: false, ## (auto-)convert all non-quoted values to float
|
55
55
|
nan: nil, ## note: only if numeric - set mappings for Float::NAN (not a number) values
|
56
|
-
space: nil
|
56
|
+
space: nil,
|
57
|
+
hashtag: false
|
57
58
|
)
|
58
59
|
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
59
60
|
|
@@ -73,6 +74,10 @@ def initialize( sep: ',',
|
|
73
74
|
## todo/check: only use for unquoted values? why? why not?
|
74
75
|
@config[:space] = space
|
75
76
|
|
77
|
+
## hxl - humanitarian eXchange language uses a hashtag row for "meta data"
|
78
|
+
## e.g. #sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
|
79
|
+
## do NOT treat # as a comment (always use % for now)
|
80
|
+
@config[:hashtag] = hashtag
|
76
81
|
|
77
82
|
@meta = nil ## no meta data block (use empty hash {} - why? why not?)
|
78
83
|
end
|
@@ -103,6 +108,7 @@ def null=( value ) @config[:null]=value; end
|
|
103
108
|
def numeric=( value ) @config[:numeric]=value; end
|
104
109
|
def nan=( value ) @config[:nan]=value; end
|
105
110
|
def space=( value ) @config[:space]=value; end
|
111
|
+
def hashtag=( value ) @config[:hashtag]=value; end
|
106
112
|
|
107
113
|
|
108
114
|
|
@@ -296,7 +302,7 @@ end
|
|
296
302
|
def parse_record( input, sep: )
|
297
303
|
values = []
|
298
304
|
|
299
|
-
space
|
305
|
+
space = config[:space]
|
300
306
|
|
301
307
|
loop do
|
302
308
|
value = parse_field( input, sep: sep )
|
@@ -424,20 +430,29 @@ def parse_lines( input, sep:, &block )
|
|
424
430
|
## used for meta block (can only start before any records e.g. if record_num == 0)
|
425
431
|
record_num = 0
|
426
432
|
|
427
|
-
## note: can either use '#' or '%' but NOT both; first one "wins"
|
428
|
-
comment = nil
|
429
433
|
|
430
|
-
|
434
|
+
|
435
|
+
hashtag = config[:hashtag]
|
436
|
+
|
437
|
+
if hashtag
|
438
|
+
comment = COMMENT_PERCENT
|
439
|
+
## todo/check: use a "heuristic" to check if its a comment or a hashtag line? why? why not?
|
440
|
+
else
|
441
|
+
## note: can either use '#' or '%' but NOT both; first one "wins"
|
442
|
+
comment = nil
|
443
|
+
end
|
444
|
+
|
445
|
+
|
431
446
|
has_seen_directive = false
|
432
447
|
has_seen_frontmatter = false ## - renameto has_seen_dash (---) - why? why not???
|
433
|
-
|
448
|
+
## note: can either use directives (@) or frontmatter (---) block; first one "wins"
|
434
449
|
|
435
450
|
loop do
|
436
451
|
break if input.eof?
|
437
452
|
|
438
453
|
skipped_spaces = skip_spaces( input )
|
439
454
|
|
440
|
-
if comment.nil? && (c=input.peek; c==
|
455
|
+
if comment.nil? && (c=input.peek; c==COMMENT_HASH || c==COMMENT_PERCENT)
|
441
456
|
logger.debug "skipping comment (first) - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
442
457
|
comment = input.getc ## first comment line (determines/fixes "allowed" comment-style)
|
443
458
|
skip_until_eol( input )
|
@@ -449,13 +464,13 @@ def parse_lines( input, sep:, &block )
|
|
449
464
|
elsif (c=input.peek; c==LF || c==CR || input.eof?)
|
450
465
|
logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
451
466
|
skip_newline( input )
|
452
|
-
elsif record_num == 0 && has_seen_frontmatter == false && input.peek==DIRECTIVE
|
467
|
+
elsif record_num == 0 && hashtag == false && has_seen_frontmatter == false && input.peek==DIRECTIVE
|
453
468
|
## note: "skip" directives for now
|
454
469
|
has_seen_directive = true
|
455
470
|
logger.debug "skip directive" if logger.debug?
|
456
471
|
skip_until_eol( input )
|
457
472
|
skip_newline( input )
|
458
|
-
elsif record_num == 0 && has_seen_directive == false && has_seen_frontmatter == false &&
|
473
|
+
elsif record_num == 0 && hashtag == false && has_seen_directive == false && has_seen_frontmatter == false &&
|
459
474
|
skipped_spaces == 0 && input.peekn(4) =~ /^---[\n\r \t]$/
|
460
475
|
## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
|
461
476
|
has_seen_frontmatter = true
|
data/lib/csvreader/version.rb
CHANGED
data/test/test_parser_meta.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|