csvreader 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e83a3e71ad1ec014c4744e80be07aa7b6caef10
4
- data.tar.gz: c181a4d7f379f241e5a8c1a21af99523c8a0c9d3
3
+ metadata.gz: e61cda6f5b0fae762451efa0b0819e53b6da9966
4
+ data.tar.gz: e6dadbde1d714247046603fbdcb1fbd348cacc4c
5
5
  SHA512:
6
- metadata.gz: be9862e8ff97642f27a18e8d9160d534e3197a862e2a8c2d94b2ffe01b264a47b2e0996693dbc57b5cc68bd6113813fce7ea75289fd2a0e227ad0adc476868b3
7
- data.tar.gz: b7696e7342f7676a928c15f6f35b90c78c56e7519de3eb6de95e4fbb3dd3c7823136bda2676f46aea29930c5026ee9d7384e86fb26cffefb1603e9279fbe0ce7
6
+ metadata.gz: 6494cb0052000592cff4766946c3b7db0ec026db220f2e3857563d6070f282089034a8405c3eb4d8807f1a2dbe4cce67bd789b2794f803765f5fe9702d62a856
7
+ data.tar.gz: b47bd4cc6a342c5cc5e01e5ec1a67c7ae2fc3a7fc6fb4c1bcbcac018ab0291c6af076444aafb7ecac2f83ba814a502366583bbb61fa36c961e7009b217a3819b
data/README.md CHANGED
@@ -9,9 +9,18 @@
9
9
 
10
10
 
11
11
 
12
+
12
13
  ## What's News?
13
14
 
14
- **v1.2** Add support for alternative (non-space) separators (e.g. `;|^:`)
15
+
16
+ **v1.2.1** Added support for (optional) hashtag to the
17
+ to the default parser (`ParserStd`) for
18
+ supporting the [Humanitarian eXchange Language (HXL)](http://hxlstandard.org).
19
+ Default is turned off (`false`). Use `Csv.human`
20
+ or `Csv.hum` or `Csv.hxl` for pre-defined with hashtag turned on.
21
+
22
+
23
+ **v1.2** Added support for alternative (non-space) separators (e.g. `;|^:`)
15
24
  to the default parser (`ParserStd`).
16
25
 
17
26
 
@@ -531,7 +540,7 @@ Csv.tab.parse( ... ) # note: "classic" strict tab format
531
540
  Csv.tab.read( ... )
532
541
  # ...
533
542
 
534
- Csv.table.parse( ... ) # note: "classic" strict tab format
543
+ Csv.table.parse( ... ) # note: "classic" one or more space (or tab) table format
535
544
  Csv.table.read( ... )
536
545
  # ...
537
546
  ```
@@ -42,6 +42,7 @@ class Parser
42
42
  NUMERIC = ParserStd.new( numeric: true,
43
43
  nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
44
44
  null: "" )
45
+ HUMAN = ParserStd.new( hashtag: true )
45
46
 
46
47
 
47
48
  RFC4180 = ParserStrict.new
@@ -72,6 +73,10 @@ class Parser
72
73
  def self.numeric() NUMERIC; end
73
74
  def self.num() numeric; end
74
75
  def self.n() numeric; end
76
+ def self.human() HUMAN; end
77
+ def self.hum() human; end
78
+ def self.hxl() human; end ## HXL (humanitarian eXchange language)
79
+
75
80
  def self.strict() STRICT; end ## alternative alias for STRICT
76
81
  def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
77
82
  def self.excel() EXCEL; end ## alternative alias for EXCEL
@@ -94,6 +99,7 @@ class CsvReader
94
99
  ### pre-define CsvReader (built-in) formats/dialect
95
100
  DEFAULT = Builder.new( Parser::DEFAULT )
96
101
  NUMERIC = Builder.new( Parser::NUMERIC )
102
+ HUMAN = Builder.new( Parser::HUMAN )
97
103
 
98
104
  STRICT = Builder.new( Parser::STRICT )
99
105
  RFC4180 = Builder.new( Parser::RFC4180 )
@@ -113,6 +119,10 @@ class CsvReader
113
119
  def self.numeric() NUMERIC; end
114
120
  def self.num() numeric; end
115
121
  def self.n() numeric; end
122
+ def self.human() HUMAN; end
123
+ def self.hum() human; end
124
+ def self.hxl() human; end ## HXL (humanitarian eXchange language)
125
+
116
126
  def self.strict() STRICT; end ## alternative alias for STRICT
117
127
  def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
118
128
  def self.excel() EXCEL; end ## alternative alias for EXCEL
@@ -13,8 +13,8 @@ class ParserStd
13
13
  DOUBLE_QUOTE = "\""
14
14
  SINGLE_QUOTE = "'"
15
15
  BACKSLASH = "\\" ## use BACKSLASH_ESCAPE ??
16
- COMMENT1 = "#" ## use COMMENT_HASH or HASH or ??
17
- COMMENT2 = "%" ## use COMMENT_PERCENT or PERCENT or ??
16
+ COMMENT_HASH = "#" ## use COMMENT1 or COMMENT_HASH or HASH or ??
17
+ COMMENT_PERCENT = "%" ## use COMMENT2 or COMMENT_PERCENT or PERCENT or ??
18
18
  DIRECTIVE = "@" ## use a different name e.g. AT or ??
19
19
  SPACE = " " ## \s == ASCII 32 (dec) = (Space)
20
20
  TAB = "\t" ## \t == ASCII 0x09 (hex) = HT (Tab/horizontal tab)
@@ -53,7 +53,8 @@ def initialize( sep: ',',
53
53
  null: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
54
54
  numeric: false, ## (auto-)convert all non-quoted values to float
55
55
  nan: nil, ## note: only if numeric - set mappings for Float::NAN (not a number) values
56
- space: nil
56
+ space: nil,
57
+ hashtag: false
57
58
  )
58
59
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
59
60
 
@@ -73,6 +74,10 @@ def initialize( sep: ',',
73
74
  ## todo/check: only use for unquoted values? why? why not?
74
75
  @config[:space] = space
75
76
 
77
+ ## hxl - humanitarian eXchange language uses a hashtag row for "meta data"
78
+ ## e.g. #sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
79
+ ## do NOT treat # as a comment (always use % for now)
80
+ @config[:hashtag] = hashtag
76
81
 
77
82
  @meta = nil ## no meta data block (use empty hash {} - why? why not?)
78
83
  end
@@ -103,6 +108,7 @@ def null=( value ) @config[:null]=value; end
103
108
  def numeric=( value ) @config[:numeric]=value; end
104
109
  def nan=( value ) @config[:nan]=value; end
105
110
  def space=( value ) @config[:space]=value; end
111
+ def hashtag=( value ) @config[:hashtag]=value; end
106
112
 
107
113
 
108
114
 
@@ -296,7 +302,7 @@ end
296
302
  def parse_record( input, sep: )
297
303
  values = []
298
304
 
299
- space = config[:space]
305
+ space = config[:space]
300
306
 
301
307
  loop do
302
308
  value = parse_field( input, sep: sep )
@@ -424,20 +430,29 @@ def parse_lines( input, sep:, &block )
424
430
  ## used for meta block (can only start before any records e.g. if record_num == 0)
425
431
  record_num = 0
426
432
 
427
- ## note: can either use '#' or '%' but NOT both; first one "wins"
428
- comment = nil
429
433
 
430
- ## note: can either use directives (@) or frontmatter (---) block; first one "wins"
434
+
435
+ hashtag = config[:hashtag]
436
+
437
+ if hashtag
438
+ comment = COMMENT_PERCENT
439
+ ## todo/check: use a "heuristic" to check if its a comment or a hashtag line? why? why not?
440
+ else
441
+ ## note: can either use '#' or '%' but NOT both; first one "wins"
442
+ comment = nil
443
+ end
444
+
445
+
431
446
  has_seen_directive = false
432
447
  has_seen_frontmatter = false ## - renameto has_seen_dash (---) - why? why not???
433
-
448
+ ## note: can either use directives (@) or frontmatter (---) block; first one "wins"
434
449
 
435
450
  loop do
436
451
  break if input.eof?
437
452
 
438
453
  skipped_spaces = skip_spaces( input )
439
454
 
440
- if comment.nil? && (c=input.peek; c==COMMENT1 || c==COMMENT2)
455
+ if comment.nil? && (c=input.peek; c==COMMENT_HASH || c==COMMENT_PERCENT)
441
456
  logger.debug "skipping comment (first) - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
442
457
  comment = input.getc ## first comment line (determines/fixes "allowed" comment-style)
443
458
  skip_until_eol( input )
@@ -449,13 +464,13 @@ def parse_lines( input, sep:, &block )
449
464
  elsif (c=input.peek; c==LF || c==CR || input.eof?)
450
465
  logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
451
466
  skip_newline( input )
452
- elsif record_num == 0 && has_seen_frontmatter == false && input.peek==DIRECTIVE
467
+ elsif record_num == 0 && hashtag == false && has_seen_frontmatter == false && input.peek==DIRECTIVE
453
468
  ## note: "skip" directives for now
454
469
  has_seen_directive = true
455
470
  logger.debug "skip directive" if logger.debug?
456
471
  skip_until_eol( input )
457
472
  skip_newline( input )
458
- elsif record_num == 0 && has_seen_directive == false && has_seen_frontmatter == false &&
473
+ elsif record_num == 0 && hashtag == false && has_seen_directive == false && has_seen_frontmatter == false &&
459
474
  skipped_spaces == 0 && input.peekn(4) =~ /^---[\n\r \t]$/
460
475
  ## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
461
476
  has_seen_frontmatter = true
@@ -6,7 +6,7 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
6
6
  module Version
7
7
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
8
8
  MINOR = 2
9
- PATCH = 0
9
+ PATCH = 1
10
10
 
11
11
  ## self.to_s - why? why not?
12
12
  end
@@ -16,6 +16,8 @@ end
16
16
 
17
17
 
18
18
  def test_parse
19
+ pp parser.config
20
+
19
21
  records = [["a", "b", "c"],
20
22
  ["1", "2", "3"]]
21
23
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-05 00:00:00.000000000 Z
11
+ date: 2018-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc