csvreader 1.2.0 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e83a3e71ad1ec014c4744e80be07aa7b6caef10
4
- data.tar.gz: c181a4d7f379f241e5a8c1a21af99523c8a0c9d3
3
+ metadata.gz: e61cda6f5b0fae762451efa0b0819e53b6da9966
4
+ data.tar.gz: e6dadbde1d714247046603fbdcb1fbd348cacc4c
5
5
  SHA512:
6
- metadata.gz: be9862e8ff97642f27a18e8d9160d534e3197a862e2a8c2d94b2ffe01b264a47b2e0996693dbc57b5cc68bd6113813fce7ea75289fd2a0e227ad0adc476868b3
7
- data.tar.gz: b7696e7342f7676a928c15f6f35b90c78c56e7519de3eb6de95e4fbb3dd3c7823136bda2676f46aea29930c5026ee9d7384e86fb26cffefb1603e9279fbe0ce7
6
+ metadata.gz: 6494cb0052000592cff4766946c3b7db0ec026db220f2e3857563d6070f282089034a8405c3eb4d8807f1a2dbe4cce67bd789b2794f803765f5fe9702d62a856
7
+ data.tar.gz: b47bd4cc6a342c5cc5e01e5ec1a67c7ae2fc3a7fc6fb4c1bcbcac018ab0291c6af076444aafb7ecac2f83ba814a502366583bbb61fa36c961e7009b217a3819b
data/README.md CHANGED
@@ -9,9 +9,18 @@
9
9
 
10
10
 
11
11
 
12
+
12
13
  ## What's News?
13
14
 
14
- **v1.2** Add support for alternative (non-space) separators (e.g. `;|^:`)
15
+
16
+ **v1.2.1** Added support for (optional) hashtag to the
17
+ to the default parser (`ParserStd`) for
18
+ supporting the [Humanitarian eXchange Language (HXL)](http://hxlstandard.org).
19
+ Default is turned off (`false`). Use `Csv.human`
20
+ or `Csv.hum` or `Csv.hxl` for pre-defined with hashtag turned on.
21
+
22
+
23
+ **v1.2** Added support for alternative (non-space) separators (e.g. `;|^:`)
15
24
  to the default parser (`ParserStd`).
16
25
 
17
26
 
@@ -531,7 +540,7 @@ Csv.tab.parse( ... ) # note: "classic" strict tab format
531
540
  Csv.tab.read( ... )
532
541
  # ...
533
542
 
534
- Csv.table.parse( ... ) # note: "classic" strict tab format
543
+ Csv.table.parse( ... ) # note: "classic" one or more space (or tab) table format
535
544
  Csv.table.read( ... )
536
545
  # ...
537
546
  ```
@@ -42,6 +42,7 @@ class Parser
42
42
  NUMERIC = ParserStd.new( numeric: true,
43
43
  nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
44
44
  null: "" )
45
+ HUMAN = ParserStd.new( hashtag: true )
45
46
 
46
47
 
47
48
  RFC4180 = ParserStrict.new
@@ -72,6 +73,10 @@ class Parser
72
73
  def self.numeric() NUMERIC; end
73
74
  def self.num() numeric; end
74
75
  def self.n() numeric; end
76
+ def self.human() HUMAN; end
77
+ def self.hum() human; end
78
+ def self.hxl() human; end ## HXL (humanitarian eXchange language)
79
+
75
80
  def self.strict() STRICT; end ## alternative alias for STRICT
76
81
  def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
77
82
  def self.excel() EXCEL; end ## alternative alias for EXCEL
@@ -94,6 +99,7 @@ class CsvReader
94
99
  ### pre-define CsvReader (built-in) formats/dialect
95
100
  DEFAULT = Builder.new( Parser::DEFAULT )
96
101
  NUMERIC = Builder.new( Parser::NUMERIC )
102
+ HUMAN = Builder.new( Parser::HUMAN )
97
103
 
98
104
  STRICT = Builder.new( Parser::STRICT )
99
105
  RFC4180 = Builder.new( Parser::RFC4180 )
@@ -113,6 +119,10 @@ class CsvReader
113
119
  def self.numeric() NUMERIC; end
114
120
  def self.num() numeric; end
115
121
  def self.n() numeric; end
122
+ def self.human() HUMAN; end
123
+ def self.hum() human; end
124
+ def self.hxl() human; end ## HXL (humanitarian eXchange language)
125
+
116
126
  def self.strict() STRICT; end ## alternative alias for STRICT
117
127
  def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
118
128
  def self.excel() EXCEL; end ## alternative alias for EXCEL
@@ -13,8 +13,8 @@ class ParserStd
13
13
  DOUBLE_QUOTE = "\""
14
14
  SINGLE_QUOTE = "'"
15
15
  BACKSLASH = "\\" ## use BACKSLASH_ESCAPE ??
16
- COMMENT1 = "#" ## use COMMENT_HASH or HASH or ??
17
- COMMENT2 = "%" ## use COMMENT_PERCENT or PERCENT or ??
16
+ COMMENT_HASH = "#" ## use COMMENT1 or COMMENT_HASH or HASH or ??
17
+ COMMENT_PERCENT = "%" ## use COMMENT2 or COMMENT_PERCENT or PERCENT or ??
18
18
  DIRECTIVE = "@" ## use a different name e.g. AT or ??
19
19
  SPACE = " " ## \s == ASCII 32 (dec) = (Space)
20
20
  TAB = "\t" ## \t == ASCII 0x09 (hex) = HT (Tab/horizontal tab)
@@ -53,7 +53,8 @@ def initialize( sep: ',',
53
53
  null: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
54
54
  numeric: false, ## (auto-)convert all non-quoted values to float
55
55
  nan: nil, ## note: only if numeric - set mappings for Float::NAN (not a number) values
56
- space: nil
56
+ space: nil,
57
+ hashtag: false
57
58
  )
58
59
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
59
60
 
@@ -73,6 +74,10 @@ def initialize( sep: ',',
73
74
  ## todo/check: only use for unquoted values? why? why not?
74
75
  @config[:space] = space
75
76
 
77
+ ## hxl - humanitarian eXchange language uses a hashtag row for "meta data"
78
+ ## e.g. #sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
79
+ ## do NOT treat # as a comment (always use % for now)
80
+ @config[:hashtag] = hashtag
76
81
 
77
82
  @meta = nil ## no meta data block (use empty hash {} - why? why not?)
78
83
  end
@@ -103,6 +108,7 @@ def null=( value ) @config[:null]=value; end
103
108
  def numeric=( value ) @config[:numeric]=value; end
104
109
  def nan=( value ) @config[:nan]=value; end
105
110
  def space=( value ) @config[:space]=value; end
111
+ def hashtag=( value ) @config[:hashtag]=value; end
106
112
 
107
113
 
108
114
 
@@ -296,7 +302,7 @@ end
296
302
  def parse_record( input, sep: )
297
303
  values = []
298
304
 
299
- space = config[:space]
305
+ space = config[:space]
300
306
 
301
307
  loop do
302
308
  value = parse_field( input, sep: sep )
@@ -424,20 +430,29 @@ def parse_lines( input, sep:, &block )
424
430
  ## used for meta block (can only start before any records e.g. if record_num == 0)
425
431
  record_num = 0
426
432
 
427
- ## note: can either use '#' or '%' but NOT both; first one "wins"
428
- comment = nil
429
433
 
430
- ## note: can either use directives (@) or frontmatter (---) block; first one "wins"
434
+
435
+ hashtag = config[:hashtag]
436
+
437
+ if hashtag
438
+ comment = COMMENT_PERCENT
439
+ ## todo/check: use a "heuristic" to check if its a comment or a hashtag line? why? why not?
440
+ else
441
+ ## note: can either use '#' or '%' but NOT both; first one "wins"
442
+ comment = nil
443
+ end
444
+
445
+
431
446
  has_seen_directive = false
432
447
  has_seen_frontmatter = false ## - renameto has_seen_dash (---) - why? why not???
433
-
448
+ ## note: can either use directives (@) or frontmatter (---) block; first one "wins"
434
449
 
435
450
  loop do
436
451
  break if input.eof?
437
452
 
438
453
  skipped_spaces = skip_spaces( input )
439
454
 
440
- if comment.nil? && (c=input.peek; c==COMMENT1 || c==COMMENT2)
455
+ if comment.nil? && (c=input.peek; c==COMMENT_HASH || c==COMMENT_PERCENT)
441
456
  logger.debug "skipping comment (first) - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
442
457
  comment = input.getc ## first comment line (determines/fixes "allowed" comment-style)
443
458
  skip_until_eol( input )
@@ -449,13 +464,13 @@ def parse_lines( input, sep:, &block )
449
464
  elsif (c=input.peek; c==LF || c==CR || input.eof?)
450
465
  logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
451
466
  skip_newline( input )
452
- elsif record_num == 0 && has_seen_frontmatter == false && input.peek==DIRECTIVE
467
+ elsif record_num == 0 && hashtag == false && has_seen_frontmatter == false && input.peek==DIRECTIVE
453
468
  ## note: "skip" directives for now
454
469
  has_seen_directive = true
455
470
  logger.debug "skip directive" if logger.debug?
456
471
  skip_until_eol( input )
457
472
  skip_newline( input )
458
- elsif record_num == 0 && has_seen_directive == false && has_seen_frontmatter == false &&
473
+ elsif record_num == 0 && hashtag == false && has_seen_directive == false && has_seen_frontmatter == false &&
459
474
  skipped_spaces == 0 && input.peekn(4) =~ /^---[\n\r \t]$/
460
475
  ## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
461
476
  has_seen_frontmatter = true
@@ -6,7 +6,7 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
6
6
  module Version
7
7
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
8
8
  MINOR = 2
9
- PATCH = 0
9
+ PATCH = 1
10
10
 
11
11
  ## self.to_s - why? why not?
12
12
  end
@@ -16,6 +16,8 @@ end
16
16
 
17
17
 
18
18
  def test_parse
19
+ pp parser.config
20
+
19
21
  records = [["a", "b", "c"],
20
22
  ["1", "2", "3"]]
21
23
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-05 00:00:00.000000000 Z
11
+ date: 2018-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc