rdf-tabular 0.4.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +24 -5
- data/VERSION +1 -1
- data/etc/csvw.jsonld +135 -50
- data/lib/rdf/tabular/csvw.rb +215 -181
- data/lib/rdf/tabular/format.rb +8 -6
- data/lib/rdf/tabular/literal.rb +1 -1
- data/lib/rdf/tabular/metadata.rb +61 -80
- data/lib/rdf/tabular/reader.rb +18 -15
- data/lib/rdf/tabular/uax35.rb +143 -38
- data/spec/data/countries-minimal.json +38 -0
- data/spec/data/countries-minimal.ttl +36 -0
- data/spec/data/countries-standard.json +86 -0
- data/spec/data/countries-standard.ttl +75 -0
- data/spec/data/countries.csv +4 -0
- data/spec/data/countries.csv-minimal.json +16 -0
- data/spec/data/countries.csv-minimal.ttl +19 -0
- data/spec/data/countries.csv-standard.json +33 -0
- data/spec/data/countries.csv-standard.ttl +44 -0
- data/spec/data/countries.html +88 -0
- data/spec/data/countries.json +53 -0
- data/spec/data/countries_embed-minimal.json +38 -0
- data/spec/data/countries_embed-minimal.ttl +36 -0
- data/spec/data/countries_embed-standard.json +86 -0
- data/spec/data/countries_embed-standard.ttl +75 -0
- data/spec/data/countries_embed.html +88 -0
- data/spec/data/countries_html-minimal.json +38 -0
- data/spec/data/countries_html-minimal.ttl +36 -0
- data/spec/data/countries_html-standard.json +86 -0
- data/spec/data/countries_html-standard.ttl +75 -0
- data/spec/data/country-codes-and-names-minimal.json +19 -0
- data/spec/data/country-codes-and-names-minimal.ttl +22 -0
- data/spec/data/country-codes-and-names-standard.json +47 -0
- data/spec/data/country-codes-and-names-standard.ttl +45 -0
- data/spec/data/country-codes-and-names.csv +5 -0
- data/spec/data/country_slice.csv +4 -0
- data/spec/data/junior-roles.csv +3 -0
- data/spec/data/junior-roles.json +54 -0
- data/spec/data/roles-minimal.json +32 -0
- data/spec/data/roles-minimal.ttl +36 -0
- data/spec/data/roles-standard.json +56 -0
- data/spec/data/roles-standard.ttl +66 -0
- data/spec/data/roles.json +23 -0
- data/spec/data/senior-roles.csv +3 -0
- data/spec/data/senior-roles.json +52 -0
- data/spec/data/test232-metadata.json +10 -0
- data/spec/data/test232.csv +3 -0
- data/spec/data/tree-ops-atd.json +1 -0
- data/spec/data/tree-ops-ext-minimal.json +42 -0
- data/spec/data/tree-ops-ext-minimal.ttl +34 -0
- data/spec/data/tree-ops-ext-standard.json +93 -0
- data/spec/data/tree-ops-ext-standard.ttl +82 -0
- data/spec/data/tree-ops-ext.csv +4 -0
- data/spec/data/tree-ops-ext.json +81 -0
- data/spec/data/tree-ops-minimal.json +18 -0
- data/spec/data/tree-ops-minimal.ttl +14 -0
- data/spec/data/tree-ops-standard.json +44 -0
- data/spec/data/tree-ops-standard.ttl +44 -0
- data/spec/data/tree-ops-virtual-minimal.json +32 -0
- data/spec/data/tree-ops-virtual-minimal.ttl +25 -0
- data/spec/data/tree-ops-virtual-standard.json +49 -0
- data/spec/data/tree-ops-virtual-standard.ttl +49 -0
- data/spec/data/tree-ops-virtual.json +48 -0
- data/spec/data/tree-ops.csv +3 -0
- data/spec/data/tree-ops.csv-metadata.json +43 -0
- data/spec/data/tree-ops.html +54 -0
- data/spec/data/tree-ops.tsv +3 -0
- data/spec/format_spec.rb +5 -4
- data/spec/metadata_spec.rb +10 -16
- data/spec/suite_helper.rb +2 -2
- data/spec/suite_spec.rb +5 -6
- data/spec/uax35_spec.rb +239 -0
- metadata +149 -36
- data/lib/rdf/tabular/json.rb +0 -0
data/lib/rdf/tabular/reader.rb
CHANGED
@@ -28,16 +28,19 @@ module RDF::Tabular
|
|
28
28
|
RDF::CLI::Option.new(
|
29
29
|
symbol: :metadata,
|
30
30
|
datatype: RDF::URI,
|
31
|
+
control: :url2,
|
31
32
|
on: ["--metadata URI"],
|
32
33
|
description: "user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location.") {|arg| RDF::URI(arg)},
|
33
34
|
RDF::CLI::Option.new(
|
34
35
|
symbol: :minimal,
|
36
|
+
control: :checkbox,
|
35
37
|
datatype: TrueClass,
|
36
38
|
on: ["--minimal"],
|
37
39
|
description: "Includes only the information gleaned from the cells of the tabular data.") {true},
|
38
40
|
RDF::CLI::Option.new(
|
39
41
|
symbol: :noProv,
|
40
42
|
datatype: TrueClass,
|
43
|
+
control: :checkbox,
|
41
44
|
on: ["--no-prov"],
|
42
45
|
description: "do not output optional provenance information.") {true},
|
43
46
|
]
|
@@ -60,7 +63,7 @@ module RDF::Tabular
|
|
60
63
|
# @yieldparam [RDF::Reader] reader
|
61
64
|
# @yieldreturn [void] ignored
|
62
65
|
# @raise [RDF::ReaderError] if the CSV document cannot be loaded
|
63
|
-
def initialize(input = $stdin, options
|
66
|
+
def initialize(input = $stdin, **options, &block)
|
64
67
|
super do
|
65
68
|
# Base would be how we are to take this
|
66
69
|
@options[:base] ||= base_uri.to_s if base_uri
|
@@ -86,7 +89,7 @@ module RDF::Tabular
|
|
86
89
|
# If input is JSON, then the input is the metadata
|
87
90
|
content_type = @input.respond_to?(:content_type) ? @input.content_type : ""
|
88
91
|
if @options[:base] =~ /\.json(?:ld)?$/ || content_type =~ %r(application/(csvm\+|ld\+)?json)
|
89
|
-
@metadata = Metadata.new(@input,
|
92
|
+
@metadata = Metadata.new(@input, filenames: @options[:base], **@options)
|
90
93
|
# If @metadata is for a Table, turn it into a TableGroup
|
91
94
|
@metadata = @metadata.to_table_group if @metadata.is_a?(Table)
|
92
95
|
@metadata.normalize!
|
@@ -99,7 +102,7 @@ module RDF::Tabular
|
|
99
102
|
def script.content_type; "application/csvm+json"; end
|
100
103
|
log_debug("Reader#initialize") {"Process HTML script block"}
|
101
104
|
@input = script
|
102
|
-
@metadata = Metadata.new(@input,
|
105
|
+
@metadata = Metadata.new(@input, filenames: @options[:base], **@options)
|
103
106
|
# If @metadata is for a Table, turn it into a TableGroup
|
104
107
|
@metadata = @metadata.to_table_group if @metadata.is_a?(Table)
|
105
108
|
@metadata.normalize!
|
@@ -116,7 +119,7 @@ module RDF::Tabular
|
|
116
119
|
dialect.separator = "\t" if (input.content_type == "text/tsv" rescue nil)
|
117
120
|
embed_options = @options.dup
|
118
121
|
embed_options[:lang] = dialect_metadata.lang if dialect_metadata.lang
|
119
|
-
embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], embed_options)
|
122
|
+
embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], **embed_options)
|
120
123
|
|
121
124
|
if (@metadata = @options[:metadata]) && @metadata.tableSchema
|
122
125
|
@metadata.verify_compatible!(embedded_metadata)
|
@@ -133,7 +136,7 @@ module RDF::Tabular
|
|
133
136
|
else
|
134
137
|
# It's tabluar data. Find metadata and proceed as if it was specified in the first place
|
135
138
|
@options[:original_input] = @input unless @options[:metadata]
|
136
|
-
@input = @metadata = Metadata.for_input(@input,
|
139
|
+
@input = @metadata = Metadata.for_input(@input, **@options).normalize!
|
137
140
|
end
|
138
141
|
|
139
142
|
log_debug("Reader#initialize") {"input: #{input}, metadata: #{metadata.inspect}"}
|
@@ -183,7 +186,7 @@ module RDF::Tabular
|
|
183
186
|
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
184
187
|
table_resource = RDF::Node.new
|
185
188
|
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
186
|
-
Reader.new(options[:original_input], options.merge(
|
189
|
+
Reader.new(options[:original_input], **options.merge(
|
187
190
|
metadata: input.tables.first,
|
188
191
|
base: input.tables.first.url,
|
189
192
|
no_found_metadata: true,
|
@@ -202,7 +205,7 @@ module RDF::Tabular
|
|
202
205
|
end.flatten.compact
|
203
206
|
table_resource = table.id || RDF::Node.new
|
204
207
|
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
205
|
-
Reader.open(table.url, options.merge(
|
208
|
+
Reader.open(table.url, **options.merge(
|
206
209
|
metadata: table,
|
207
210
|
base: table.url,
|
208
211
|
no_found_metadata: true,
|
@@ -418,9 +421,9 @@ module RDF::Tabular
|
|
418
421
|
|
419
422
|
res = if io
|
420
423
|
::JSON::dump_default_options = json_state
|
421
|
-
::JSON.dump(self.send(hash_fn, options), io)
|
424
|
+
::JSON.dump(self.send(hash_fn, **options), io)
|
422
425
|
else
|
423
|
-
hash = self.send(hash_fn, options)
|
426
|
+
hash = self.send(hash_fn, **options)
|
424
427
|
::JSON.generate(hash, json_state)
|
425
428
|
end
|
426
429
|
|
@@ -440,7 +443,7 @@ module RDF::Tabular
|
|
440
443
|
#
|
441
444
|
# @param [Hash{Symbol => Object}] options
|
442
445
|
# @return [Hash, Array]
|
443
|
-
def to_hash(options
|
446
|
+
def to_hash(**options)
|
444
447
|
# Construct metadata from that passed from file open, along with information from the file.
|
445
448
|
if input.is_a?(Metadata)
|
446
449
|
log_debug("each_statement: metadata") {input.inspect}
|
@@ -464,13 +467,13 @@ module RDF::Tabular
|
|
464
467
|
table_group['tables'] = tables
|
465
468
|
|
466
469
|
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
467
|
-
Reader.new(options[:original_input], options.merge(
|
470
|
+
Reader.new(options[:original_input], **options.merge(
|
468
471
|
metadata: input.tables.first,
|
469
472
|
base: input.tables.first.url,
|
470
473
|
minimal: minimal?,
|
471
474
|
no_found_metadata: true,
|
472
475
|
)) do |r|
|
473
|
-
case t = r.to_hash(options)
|
476
|
+
case t = r.to_hash(**options)
|
474
477
|
when Array then tables += t unless input.tables.first.suppressOutput
|
475
478
|
when Hash then tables << t unless input.tables.first.suppressOutput
|
476
479
|
end
|
@@ -478,13 +481,13 @@ module RDF::Tabular
|
|
478
481
|
else
|
479
482
|
input.each_table do |table|
|
480
483
|
next if table.suppressOutput && !validate?
|
481
|
-
Reader.open(table.url, options.merge(
|
484
|
+
Reader.open(table.url, **options.merge(
|
482
485
|
metadata: table,
|
483
486
|
base: table.url,
|
484
487
|
minimal: minimal?,
|
485
488
|
no_found_metadata: true,
|
486
489
|
)) do |r|
|
487
|
-
case t = r.to_hash(options)
|
490
|
+
case t = r.to_hash(**options)
|
488
491
|
when Array then tables += t unless table.suppressOutput
|
489
492
|
when Hash then tables << t unless table.suppressOutput
|
490
493
|
end
|
@@ -557,7 +560,7 @@ module RDF::Tabular
|
|
557
560
|
co['@id'] = subject.to_s unless subject == 'null'
|
558
561
|
prop = case cell.propertyUrl
|
559
562
|
when RDF.type then '@type'
|
560
|
-
when nil then
|
563
|
+
when nil then CGI.unescape(column.name) # Use URI-decoded name
|
561
564
|
else
|
562
565
|
# Compact the property to a term or prefixed name
|
563
566
|
metadata.context.compact_iri(cell.propertyUrl, vocab: true)
|
data/lib/rdf/tabular/uax35.rb
CHANGED
@@ -7,50 +7,99 @@ module RDF::Tabular
|
|
7
7
|
module UAX35
|
8
8
|
|
9
9
|
##
|
10
|
-
# Parse the date
|
11
|
-
# Otherwise, validate
|
10
|
+
# Parse the date pattern (if provided), and match against the value (if provided)
|
11
|
+
# Otherwise, validate pattern and raise an error.
|
12
12
|
#
|
13
|
-
#
|
13
|
+
# Supported patterns are:
|
14
|
+
#
|
15
|
+
# * yyyy-MM-dd
|
16
|
+
# * yyyyMMdd
|
17
|
+
# * dd-MM-yyyy
|
18
|
+
# * d-M-yyyy
|
19
|
+
# * d-M-yy
|
20
|
+
# * d-M-y
|
21
|
+
# * MM-dd-yyyy
|
22
|
+
# * M-d-yyyy
|
23
|
+
# * M-d-yy
|
24
|
+
# * M-d-y
|
25
|
+
# * dd/MM/yyyy
|
26
|
+
# * d/M/yyyy
|
27
|
+
# * d/M/yy
|
28
|
+
# * d/M/y
|
29
|
+
# * MM/dd/yyyy
|
30
|
+
# * M/d/yyyy
|
31
|
+
# * M/d/yy
|
32
|
+
# * M/d/y
|
33
|
+
# * dd.MM.yyyy
|
34
|
+
# * d.M.yyyy
|
35
|
+
# * d.M.yy
|
36
|
+
# * d.M.y
|
37
|
+
# * MM.dd.yyyy
|
38
|
+
# * M.d.yyyy
|
39
|
+
# * M.d.yy
|
40
|
+
# * M.d.y
|
41
|
+
# * yyyy-MM-ddTHH:mm
|
42
|
+
# * yyyy-MM-ddTHH:mm:ss
|
43
|
+
# * yyyy-MM-ddTHH:mm:ss.S+
|
44
|
+
#
|
45
|
+
# Year comonents less than four digits are normalized to 1900 or 2000 based on if the value is <= 99 or >= 70, it is considered to be in the 1900 range, otherwise, based on 2000.
|
46
|
+
#
|
47
|
+
# @param [String] pattern
|
14
48
|
# @param [String] value
|
15
49
|
# @return [String] XMLSchema version of value
|
16
|
-
# @raise [ArgumentError] if
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
value
|
50
|
+
# @raise [ArgumentError] if pattern is not valid, or nil
|
51
|
+
# @raise [ParseError] if value does not match
|
52
|
+
def parse_uax35_date(pattern, value)
|
53
|
+
date_pattern, time_pattern = nil, nil
|
54
|
+
return value unless pattern
|
55
|
+
orig_value = value ||= ""
|
56
|
+
orig_pattern = pattern
|
21
57
|
|
22
58
|
# Extract tz info
|
23
|
-
if md =
|
24
|
-
|
59
|
+
if md = pattern.match(/^(.*[dyms])+(\s*[xX]+)$/)
|
60
|
+
pattern, tz_pattern = md[1], md[2]
|
25
61
|
end
|
26
62
|
|
27
|
-
|
28
|
-
|
63
|
+
date_pattern, time_pattern = pattern.split(' ')
|
64
|
+
# Snuff out if this is a Time pattern
|
65
|
+
date_pattern, time_pattern = nil, date_pattern if time_pattern.nil? && !date_pattern.match(/[TyMd]/)
|
29
66
|
|
30
67
|
# Extract date, of specified
|
31
|
-
date_part = case
|
68
|
+
date_part = case date_pattern
|
32
69
|
when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
|
33
70
|
when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
|
34
71
|
when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
|
35
72
|
when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
|
73
|
+
when 'd-M-yy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{2})/)
|
74
|
+
when 'd-M-y' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{1,4})/)
|
36
75
|
when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
|
37
76
|
when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
|
38
|
-
when '
|
77
|
+
when 'M-d-yy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{2})/)
|
78
|
+
when 'M-d-y' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{1,4})/)
|
79
|
+
when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{1,4})/)
|
39
80
|
when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
|
40
|
-
when '
|
81
|
+
when 'd/M/yy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{2})/)
|
82
|
+
when 'd/M/y' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{1,4})/)
|
83
|
+
when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{1,4})/)
|
41
84
|
when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
|
85
|
+
when 'M/d/yy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{2})/)
|
86
|
+
when 'M/d/y' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{1,4})/)
|
42
87
|
when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
|
43
88
|
when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
|
89
|
+
when 'd.M.yy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{2})/)
|
90
|
+
when 'd.M.y' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{1,4})/)
|
44
91
|
when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
|
45
92
|
when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
|
93
|
+
when 'M.d.yy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{2})/)
|
94
|
+
when 'M.d.y' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{1,4})/)
|
46
95
|
when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
|
47
96
|
when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
48
97
|
when /yyyy-MM-ddTHH:mm:ss\.S+/
|
49
98
|
md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
50
|
-
num_ms =
|
99
|
+
num_ms = date_pattern.match(/S+/).to_s.length
|
51
100
|
md if md && md[:ms].length <= num_ms
|
52
101
|
else
|
53
|
-
raise ArgumentError, "unrecognized date/time
|
102
|
+
raise ArgumentError, "unrecognized date/time pattern #{date_pattern}" if date_pattern
|
54
103
|
nil
|
55
104
|
end
|
56
105
|
|
@@ -61,25 +110,25 @@ module RDF::Tabular
|
|
61
110
|
end
|
62
111
|
|
63
112
|
# Extract time, of specified
|
64
|
-
time_part = case
|
113
|
+
time_part = case time_pattern
|
65
114
|
when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
66
115
|
when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
|
67
116
|
when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
|
68
117
|
when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
|
69
118
|
when /HH:mm:ss\.S+/
|
70
119
|
md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
71
|
-
num_ms =
|
120
|
+
num_ms = time_pattern.match(/S+/).to_s.length
|
72
121
|
md if md && md[:ms].length <= num_ms
|
73
122
|
else
|
74
|
-
raise ArgumentError, "unrecognized date/time
|
123
|
+
raise ArgumentError, "unrecognized date/time pattern #{pattern}" if time_pattern
|
75
124
|
nil
|
76
125
|
end
|
77
126
|
|
78
|
-
# If there's a
|
79
|
-
|
127
|
+
# If there's a date_pattern but no date_part, match fails
|
128
|
+
raise ParseError, "#{orig_value} does not match pattern #{orig_pattern}" if !orig_value.empty? && date_pattern && date_part.nil?
|
80
129
|
|
81
|
-
# If there's a
|
82
|
-
|
130
|
+
# If there's a time_pattern but no time_part, match fails
|
131
|
+
raise ParseError, "#{orig_value} does not match pattern #{orig_pattern}" if !orig_value.empty? && time_pattern && time_part.nil?
|
83
132
|
|
84
133
|
# Forward past time part
|
85
134
|
value = value[time_part.to_s.length..-1] if time_part
|
@@ -88,8 +137,8 @@ module RDF::Tabular
|
|
88
137
|
time_part = date_part if date_part && date_part.names.include?("hr")
|
89
138
|
|
90
139
|
# If there's a timezone, it may optionally start with whitespace
|
91
|
-
value = value.lstrip if
|
92
|
-
tz_part = case
|
140
|
+
value = value.lstrip if tz_pattern.to_s.start_with?(' ')
|
141
|
+
tz_part = case tz_pattern.to_s.lstrip
|
93
142
|
when 'x' then value.match(/^(?:(?<hr>[+-]\d{2})(?<mi>\d{2})?)$/)
|
94
143
|
when 'X' then value.match(/^(?:(?:(?<hr>[+-]\d{2})(?<mi>\d{2})?)|(?<z>Z))$/)
|
95
144
|
when 'xx' then value.match(/^(?:(?<hr>[+-]\d{2})(?<mi>\d{2}))|$/)
|
@@ -97,15 +146,30 @@ module RDF::Tabular
|
|
97
146
|
when 'xxx' then value.match(/^(?:(?<hr>[+-]\d{2}):(?<mi>\d{2}))$/)
|
98
147
|
when 'XXX' then value.match(/^(?:(?:(?<hr>[+-]\d{2}):(?<mi>\d{2}))|(?<z>Z))$/)
|
99
148
|
else
|
100
|
-
raise ArgumentError, "unrecognized timezone
|
149
|
+
raise ArgumentError, "unrecognized timezone pattern #{tz_pattern.to_s.lstrip}" if tz_pattern
|
101
150
|
nil
|
102
151
|
end
|
103
152
|
|
104
|
-
# If there's a
|
105
|
-
|
153
|
+
# If there's a tz_pattern but no time_part, match fails
|
154
|
+
raise ParseError, "#{orig_value} does not match pattern #{orig_pattern}" if !orig_value.empty? && tz_pattern && tz_part.nil?
|
106
155
|
|
107
156
|
# Compose normalized value
|
108
|
-
vd =
|
157
|
+
vd = if date_part
|
158
|
+
yr, mo, da = [date_part[:yr], date_part[:mo], date_part[:da]].map(&:to_i)
|
159
|
+
|
160
|
+
if date_part[:yr].length < 4
|
161
|
+
# Make sure that yr makes sense, if given
|
162
|
+
yr = case yr
|
163
|
+
when 0..69 then yr + 2000
|
164
|
+
when 100..999 then yr + 2000
|
165
|
+
when 70..99 then yr + 1900
|
166
|
+
else yr
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
("%04d-%02d-%02d" % [yr, mo, da])
|
171
|
+
end
|
172
|
+
|
109
173
|
vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
|
110
174
|
|
111
175
|
# Add milliseconds, if matched
|
@@ -117,37 +181,74 @@ module RDF::Tabular
|
|
117
181
|
end
|
118
182
|
|
119
183
|
##
|
120
|
-
# Parse the date
|
121
|
-
# Otherwise, validate
|
184
|
+
# Parse the date pattern (if provided), and match against the value (if provided)
|
185
|
+
# Otherwise, validate pattern and raise an error
|
122
186
|
#
|
123
187
|
# @param [String] pattern
|
124
188
|
# @param [String] value
|
125
189
|
# @param [String] groupChar
|
126
190
|
# @param [String] decimalChar
|
127
191
|
# @return [String] XMLSchema version of value or nil, if value does not match
|
128
|
-
# @raise [ArgumentError] if
|
192
|
+
# @raise [ArgumentError] if pattern is not valid
|
129
193
|
def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
|
130
194
|
value ||= ""
|
131
195
|
|
132
196
|
re = build_number_re(pattern, groupChar, decimalChar)
|
133
197
|
|
198
|
+
raise ParseError, "#{value} has repeating #{groupChar.inspect}" if groupChar.length == 1 && value.include?(groupChar*2)
|
199
|
+
|
134
200
|
# Upcase value and remove internal spaces
|
135
201
|
value = value.upcase
|
136
202
|
|
137
203
|
if value =~ re
|
138
|
-
|
139
204
|
# Upcase value and remove internal spaces
|
140
205
|
value = value.
|
141
|
-
upcase.
|
142
206
|
gsub(/\s+/, '').
|
143
207
|
gsub(groupChar, '').
|
144
208
|
gsub(decimalChar, '.')
|
145
209
|
|
146
210
|
# result re-assembles parts removed from value
|
147
211
|
value
|
148
|
-
|
212
|
+
elsif !value.empty?
|
149
213
|
# no match
|
150
|
-
|
214
|
+
raise ParseError, "#{value.inspect} does not match #{pattern.inspect}"
|
215
|
+
end
|
216
|
+
|
217
|
+
# Extract percent or per-mille sign
|
218
|
+
case value
|
219
|
+
when /%/
|
220
|
+
value = value.sub('%', '')
|
221
|
+
lhs, rhs = value.split('.')
|
222
|
+
|
223
|
+
# Shift decimal
|
224
|
+
value = case lhs.length
|
225
|
+
when 0 then "0.00#{rhs}".sub('E', 'e')
|
226
|
+
when 1 then "0.0#{lhs}#{rhs}".sub('E', 'e')
|
227
|
+
when 2 then "0.#{lhs}#{rhs}".sub('E', 'e')
|
228
|
+
else
|
229
|
+
ll, lr = lhs[0..lhs.length-3], lhs[-2..-1]
|
230
|
+
ll = ll + "0" unless ll =~ /\d+/
|
231
|
+
"#{ll}.#{lr}#{rhs}".sub('E', 'e')
|
232
|
+
end
|
233
|
+
when /‰/
|
234
|
+
value = value.sub('‰', '')
|
235
|
+
lhs, rhs = value.split('.')
|
236
|
+
|
237
|
+
# Shift decimal
|
238
|
+
value = case lhs.length
|
239
|
+
when 0 then "0.000#{rhs}".sub('E', 'e')
|
240
|
+
when 1 then "0.00#{lhs}#{rhs}".sub('E', 'e')
|
241
|
+
when 2 then "0.0#{lhs}#{rhs}".sub('E', 'e')
|
242
|
+
when 3 then "0.#{lhs}#{rhs}".sub('E', 'e')
|
243
|
+
else
|
244
|
+
ll, lr = lhs[0..lhs.length-4], lhs[-3..-1]
|
245
|
+
ll = ll + "0" unless ll =~ /\d+/
|
246
|
+
"#{ll}.#{lr}#{rhs}".sub('E', 'e')
|
247
|
+
end
|
248
|
+
when /NAN/ then value.sub('NAN', 'NaN')
|
249
|
+
when /E/ then value.sub('E', 'e')
|
250
|
+
else
|
251
|
+
value
|
151
252
|
end
|
152
253
|
end
|
153
254
|
|
@@ -157,9 +258,10 @@ module RDF::Tabular
|
|
157
258
|
# @param [String] groupChar
|
158
259
|
# @param [String] decimalChar
|
159
260
|
# @return [Regexp] Regular expression matching value
|
160
|
-
# @raise [ArgumentError] if
|
261
|
+
# @raise [ArgumentError] if pattern is not valid
|
161
262
|
def build_number_re(pattern, groupChar, decimalChar)
|
162
263
|
# pattern must be composed of only 0, #, decimalChar, groupChar, E, %, and ‰
|
264
|
+
|
163
265
|
ge = Regexp.escape groupChar
|
164
266
|
de = Regexp.escape decimalChar
|
165
267
|
|
@@ -320,5 +422,8 @@ module RDF::Tabular
|
|
320
422
|
|
321
423
|
Regexp.new("^(?<prefix>#{prefix})(?<numeric_part>#{integer_str}#{fractional_str}#{exponent_str})(?<suffix>#{suffix})$")
|
322
424
|
end
|
425
|
+
|
426
|
+
# ParseError is raised when a value does not match the pattern
|
427
|
+
class ParseError < RuntimeError; end
|
323
428
|
end
|
324
429
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"@id": "http://example.org/countries.csv#AD",
|
4
|
+
"http://www.geonames.org/ontology#countryCode": "AD",
|
5
|
+
"schema:latitude": 42.546245,
|
6
|
+
"schema:longitude": 1.601554,
|
7
|
+
"schema:name": "Andorra"
|
8
|
+
},
|
9
|
+
{
|
10
|
+
"@id": "http://example.org/countries.csv#AE",
|
11
|
+
"http://www.geonames.org/ontology#countryCode": "AE",
|
12
|
+
"schema:latitude": 23.424076,
|
13
|
+
"schema:longitude": 53.847818,
|
14
|
+
"schema:name": "United Arab Emirates"
|
15
|
+
},
|
16
|
+
{
|
17
|
+
"@id": "http://example.org/countries.csv#AF",
|
18
|
+
"http://www.geonames.org/ontology#countryCode": "AF",
|
19
|
+
"schema:latitude": 33.93911,
|
20
|
+
"schema:longitude": 67.709953,
|
21
|
+
"schema:name": "Afghanistan"
|
22
|
+
},
|
23
|
+
{
|
24
|
+
"countryRef": "http://example.org/countries.csv#AF",
|
25
|
+
"year": "1960",
|
26
|
+
"population": 9616353
|
27
|
+
},
|
28
|
+
{
|
29
|
+
"countryRef": "http://example.org/countries.csv#AF",
|
30
|
+
"year": "1961",
|
31
|
+
"population": 9799379
|
32
|
+
},
|
33
|
+
{
|
34
|
+
"countryRef": "http://example.org/countries.csv#AF",
|
35
|
+
"year": "1962",
|
36
|
+
"population": 9989846
|
37
|
+
}
|
38
|
+
]
|