rdf-tabular 0.4.0 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +24 -5
- data/VERSION +1 -1
- data/etc/csvw.jsonld +135 -50
- data/lib/rdf/tabular/csvw.rb +215 -181
- data/lib/rdf/tabular/format.rb +8 -6
- data/lib/rdf/tabular/literal.rb +1 -1
- data/lib/rdf/tabular/metadata.rb +61 -80
- data/lib/rdf/tabular/reader.rb +18 -15
- data/lib/rdf/tabular/uax35.rb +143 -38
- data/spec/data/countries-minimal.json +38 -0
- data/spec/data/countries-minimal.ttl +36 -0
- data/spec/data/countries-standard.json +86 -0
- data/spec/data/countries-standard.ttl +75 -0
- data/spec/data/countries.csv +4 -0
- data/spec/data/countries.csv-minimal.json +16 -0
- data/spec/data/countries.csv-minimal.ttl +19 -0
- data/spec/data/countries.csv-standard.json +33 -0
- data/spec/data/countries.csv-standard.ttl +44 -0
- data/spec/data/countries.html +88 -0
- data/spec/data/countries.json +53 -0
- data/spec/data/countries_embed-minimal.json +38 -0
- data/spec/data/countries_embed-minimal.ttl +36 -0
- data/spec/data/countries_embed-standard.json +86 -0
- data/spec/data/countries_embed-standard.ttl +75 -0
- data/spec/data/countries_embed.html +88 -0
- data/spec/data/countries_html-minimal.json +38 -0
- data/spec/data/countries_html-minimal.ttl +36 -0
- data/spec/data/countries_html-standard.json +86 -0
- data/spec/data/countries_html-standard.ttl +75 -0
- data/spec/data/country-codes-and-names-minimal.json +19 -0
- data/spec/data/country-codes-and-names-minimal.ttl +22 -0
- data/spec/data/country-codes-and-names-standard.json +47 -0
- data/spec/data/country-codes-and-names-standard.ttl +45 -0
- data/spec/data/country-codes-and-names.csv +5 -0
- data/spec/data/country_slice.csv +4 -0
- data/spec/data/junior-roles.csv +3 -0
- data/spec/data/junior-roles.json +54 -0
- data/spec/data/roles-minimal.json +32 -0
- data/spec/data/roles-minimal.ttl +36 -0
- data/spec/data/roles-standard.json +56 -0
- data/spec/data/roles-standard.ttl +66 -0
- data/spec/data/roles.json +23 -0
- data/spec/data/senior-roles.csv +3 -0
- data/spec/data/senior-roles.json +52 -0
- data/spec/data/test232-metadata.json +10 -0
- data/spec/data/test232.csv +3 -0
- data/spec/data/tree-ops-atd.json +1 -0
- data/spec/data/tree-ops-ext-minimal.json +42 -0
- data/spec/data/tree-ops-ext-minimal.ttl +34 -0
- data/spec/data/tree-ops-ext-standard.json +93 -0
- data/spec/data/tree-ops-ext-standard.ttl +82 -0
- data/spec/data/tree-ops-ext.csv +4 -0
- data/spec/data/tree-ops-ext.json +81 -0
- data/spec/data/tree-ops-minimal.json +18 -0
- data/spec/data/tree-ops-minimal.ttl +14 -0
- data/spec/data/tree-ops-standard.json +44 -0
- data/spec/data/tree-ops-standard.ttl +44 -0
- data/spec/data/tree-ops-virtual-minimal.json +32 -0
- data/spec/data/tree-ops-virtual-minimal.ttl +25 -0
- data/spec/data/tree-ops-virtual-standard.json +49 -0
- data/spec/data/tree-ops-virtual-standard.ttl +49 -0
- data/spec/data/tree-ops-virtual.json +48 -0
- data/spec/data/tree-ops.csv +3 -0
- data/spec/data/tree-ops.csv-metadata.json +43 -0
- data/spec/data/tree-ops.html +54 -0
- data/spec/data/tree-ops.tsv +3 -0
- data/spec/format_spec.rb +5 -4
- data/spec/metadata_spec.rb +10 -16
- data/spec/suite_helper.rb +2 -2
- data/spec/suite_spec.rb +5 -6
- data/spec/uax35_spec.rb +239 -0
- metadata +149 -36
- data/lib/rdf/tabular/json.rb +0 -0
data/lib/rdf/tabular/reader.rb
CHANGED
@@ -28,16 +28,19 @@ module RDF::Tabular
|
|
28
28
|
RDF::CLI::Option.new(
|
29
29
|
symbol: :metadata,
|
30
30
|
datatype: RDF::URI,
|
31
|
+
control: :url2,
|
31
32
|
on: ["--metadata URI"],
|
32
33
|
description: "user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location.") {|arg| RDF::URI(arg)},
|
33
34
|
RDF::CLI::Option.new(
|
34
35
|
symbol: :minimal,
|
36
|
+
control: :checkbox,
|
35
37
|
datatype: TrueClass,
|
36
38
|
on: ["--minimal"],
|
37
39
|
description: "Includes only the information gleaned from the cells of the tabular data.") {true},
|
38
40
|
RDF::CLI::Option.new(
|
39
41
|
symbol: :noProv,
|
40
42
|
datatype: TrueClass,
|
43
|
+
control: :checkbox,
|
41
44
|
on: ["--no-prov"],
|
42
45
|
description: "do not output optional provenance information.") {true},
|
43
46
|
]
|
@@ -60,7 +63,7 @@ module RDF::Tabular
|
|
60
63
|
# @yieldparam [RDF::Reader] reader
|
61
64
|
# @yieldreturn [void] ignored
|
62
65
|
# @raise [RDF::ReaderError] if the CSV document cannot be loaded
|
63
|
-
def initialize(input = $stdin, options
|
66
|
+
def initialize(input = $stdin, **options, &block)
|
64
67
|
super do
|
65
68
|
# Base would be how we are to take this
|
66
69
|
@options[:base] ||= base_uri.to_s if base_uri
|
@@ -86,7 +89,7 @@ module RDF::Tabular
|
|
86
89
|
# If input is JSON, then the input is the metadata
|
87
90
|
content_type = @input.respond_to?(:content_type) ? @input.content_type : ""
|
88
91
|
if @options[:base] =~ /\.json(?:ld)?$/ || content_type =~ %r(application/(csvm\+|ld\+)?json)
|
89
|
-
@metadata = Metadata.new(@input,
|
92
|
+
@metadata = Metadata.new(@input, filenames: @options[:base], **@options)
|
90
93
|
# If @metadata is for a Table, turn it into a TableGroup
|
91
94
|
@metadata = @metadata.to_table_group if @metadata.is_a?(Table)
|
92
95
|
@metadata.normalize!
|
@@ -99,7 +102,7 @@ module RDF::Tabular
|
|
99
102
|
def script.content_type; "application/csvm+json"; end
|
100
103
|
log_debug("Reader#initialize") {"Process HTML script block"}
|
101
104
|
@input = script
|
102
|
-
@metadata = Metadata.new(@input,
|
105
|
+
@metadata = Metadata.new(@input, filenames: @options[:base], **@options)
|
103
106
|
# If @metadata is for a Table, turn it into a TableGroup
|
104
107
|
@metadata = @metadata.to_table_group if @metadata.is_a?(Table)
|
105
108
|
@metadata.normalize!
|
@@ -116,7 +119,7 @@ module RDF::Tabular
|
|
116
119
|
dialect.separator = "\t" if (input.content_type == "text/tsv" rescue nil)
|
117
120
|
embed_options = @options.dup
|
118
121
|
embed_options[:lang] = dialect_metadata.lang if dialect_metadata.lang
|
119
|
-
embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], embed_options)
|
122
|
+
embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], **embed_options)
|
120
123
|
|
121
124
|
if (@metadata = @options[:metadata]) && @metadata.tableSchema
|
122
125
|
@metadata.verify_compatible!(embedded_metadata)
|
@@ -133,7 +136,7 @@ module RDF::Tabular
|
|
133
136
|
else
|
134
137
|
# It's tabluar data. Find metadata and proceed as if it was specified in the first place
|
135
138
|
@options[:original_input] = @input unless @options[:metadata]
|
136
|
-
@input = @metadata = Metadata.for_input(@input,
|
139
|
+
@input = @metadata = Metadata.for_input(@input, **@options).normalize!
|
137
140
|
end
|
138
141
|
|
139
142
|
log_debug("Reader#initialize") {"input: #{input}, metadata: #{metadata.inspect}"}
|
@@ -183,7 +186,7 @@ module RDF::Tabular
|
|
183
186
|
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
184
187
|
table_resource = RDF::Node.new
|
185
188
|
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
186
|
-
Reader.new(options[:original_input], options.merge(
|
189
|
+
Reader.new(options[:original_input], **options.merge(
|
187
190
|
metadata: input.tables.first,
|
188
191
|
base: input.tables.first.url,
|
189
192
|
no_found_metadata: true,
|
@@ -202,7 +205,7 @@ module RDF::Tabular
|
|
202
205
|
end.flatten.compact
|
203
206
|
table_resource = table.id || RDF::Node.new
|
204
207
|
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
205
|
-
Reader.open(table.url, options.merge(
|
208
|
+
Reader.open(table.url, **options.merge(
|
206
209
|
metadata: table,
|
207
210
|
base: table.url,
|
208
211
|
no_found_metadata: true,
|
@@ -418,9 +421,9 @@ module RDF::Tabular
|
|
418
421
|
|
419
422
|
res = if io
|
420
423
|
::JSON::dump_default_options = json_state
|
421
|
-
::JSON.dump(self.send(hash_fn, options), io)
|
424
|
+
::JSON.dump(self.send(hash_fn, **options), io)
|
422
425
|
else
|
423
|
-
hash = self.send(hash_fn, options)
|
426
|
+
hash = self.send(hash_fn, **options)
|
424
427
|
::JSON.generate(hash, json_state)
|
425
428
|
end
|
426
429
|
|
@@ -440,7 +443,7 @@ module RDF::Tabular
|
|
440
443
|
#
|
441
444
|
# @param [Hash{Symbol => Object}] options
|
442
445
|
# @return [Hash, Array]
|
443
|
-
def to_hash(options
|
446
|
+
def to_hash(**options)
|
444
447
|
# Construct metadata from that passed from file open, along with information from the file.
|
445
448
|
if input.is_a?(Metadata)
|
446
449
|
log_debug("each_statement: metadata") {input.inspect}
|
@@ -464,13 +467,13 @@ module RDF::Tabular
|
|
464
467
|
table_group['tables'] = tables
|
465
468
|
|
466
469
|
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
467
|
-
Reader.new(options[:original_input], options.merge(
|
470
|
+
Reader.new(options[:original_input], **options.merge(
|
468
471
|
metadata: input.tables.first,
|
469
472
|
base: input.tables.first.url,
|
470
473
|
minimal: minimal?,
|
471
474
|
no_found_metadata: true,
|
472
475
|
)) do |r|
|
473
|
-
case t = r.to_hash(options)
|
476
|
+
case t = r.to_hash(**options)
|
474
477
|
when Array then tables += t unless input.tables.first.suppressOutput
|
475
478
|
when Hash then tables << t unless input.tables.first.suppressOutput
|
476
479
|
end
|
@@ -478,13 +481,13 @@ module RDF::Tabular
|
|
478
481
|
else
|
479
482
|
input.each_table do |table|
|
480
483
|
next if table.suppressOutput && !validate?
|
481
|
-
Reader.open(table.url, options.merge(
|
484
|
+
Reader.open(table.url, **options.merge(
|
482
485
|
metadata: table,
|
483
486
|
base: table.url,
|
484
487
|
minimal: minimal?,
|
485
488
|
no_found_metadata: true,
|
486
489
|
)) do |r|
|
487
|
-
case t = r.to_hash(options)
|
490
|
+
case t = r.to_hash(**options)
|
488
491
|
when Array then tables += t unless table.suppressOutput
|
489
492
|
when Hash then tables << t unless table.suppressOutput
|
490
493
|
end
|
@@ -557,7 +560,7 @@ module RDF::Tabular
|
|
557
560
|
co['@id'] = subject.to_s unless subject == 'null'
|
558
561
|
prop = case cell.propertyUrl
|
559
562
|
when RDF.type then '@type'
|
560
|
-
when nil then
|
563
|
+
when nil then CGI.unescape(column.name) # Use URI-decoded name
|
561
564
|
else
|
562
565
|
# Compact the property to a term or prefixed name
|
563
566
|
metadata.context.compact_iri(cell.propertyUrl, vocab: true)
|
data/lib/rdf/tabular/uax35.rb
CHANGED
@@ -7,50 +7,99 @@ module RDF::Tabular
|
|
7
7
|
module UAX35
|
8
8
|
|
9
9
|
##
|
10
|
-
# Parse the date
|
11
|
-
# Otherwise, validate
|
10
|
+
# Parse the date pattern (if provided), and match against the value (if provided)
|
11
|
+
# Otherwise, validate pattern and raise an error.
|
12
12
|
#
|
13
|
-
#
|
13
|
+
# Supported patterns are:
|
14
|
+
#
|
15
|
+
# * yyyy-MM-dd
|
16
|
+
# * yyyyMMdd
|
17
|
+
# * dd-MM-yyyy
|
18
|
+
# * d-M-yyyy
|
19
|
+
# * d-M-yy
|
20
|
+
# * d-M-y
|
21
|
+
# * MM-dd-yyyy
|
22
|
+
# * M-d-yyyy
|
23
|
+
# * M-d-yy
|
24
|
+
# * M-d-y
|
25
|
+
# * dd/MM/yyyy
|
26
|
+
# * d/M/yyyy
|
27
|
+
# * d/M/yy
|
28
|
+
# * d/M/y
|
29
|
+
# * MM/dd/yyyy
|
30
|
+
# * M/d/yyyy
|
31
|
+
# * M/d/yy
|
32
|
+
# * M/d/y
|
33
|
+
# * dd.MM.yyyy
|
34
|
+
# * d.M.yyyy
|
35
|
+
# * d.M.yy
|
36
|
+
# * d.M.y
|
37
|
+
# * MM.dd.yyyy
|
38
|
+
# * M.d.yyyy
|
39
|
+
# * M.d.yy
|
40
|
+
# * M.d.y
|
41
|
+
# * yyyy-MM-ddTHH:mm
|
42
|
+
# * yyyy-MM-ddTHH:mm:ss
|
43
|
+
# * yyyy-MM-ddTHH:mm:ss.S+
|
44
|
+
#
|
45
|
+
# Year comonents less than four digits are normalized to 1900 or 2000 based on if the value is <= 99 or >= 70, it is considered to be in the 1900 range, otherwise, based on 2000.
|
46
|
+
#
|
47
|
+
# @param [String] pattern
|
14
48
|
# @param [String] value
|
15
49
|
# @return [String] XMLSchema version of value
|
16
|
-
# @raise [ArgumentError] if
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
value
|
50
|
+
# @raise [ArgumentError] if pattern is not valid, or nil
|
51
|
+
# @raise [ParseError] if value does not match
|
52
|
+
def parse_uax35_date(pattern, value)
|
53
|
+
date_pattern, time_pattern = nil, nil
|
54
|
+
return value unless pattern
|
55
|
+
orig_value = value ||= ""
|
56
|
+
orig_pattern = pattern
|
21
57
|
|
22
58
|
# Extract tz info
|
23
|
-
if md =
|
24
|
-
|
59
|
+
if md = pattern.match(/^(.*[dyms])+(\s*[xX]+)$/)
|
60
|
+
pattern, tz_pattern = md[1], md[2]
|
25
61
|
end
|
26
62
|
|
27
|
-
|
28
|
-
|
63
|
+
date_pattern, time_pattern = pattern.split(' ')
|
64
|
+
# Snuff out if this is a Time pattern
|
65
|
+
date_pattern, time_pattern = nil, date_pattern if time_pattern.nil? && !date_pattern.match(/[TyMd]/)
|
29
66
|
|
30
67
|
# Extract date, of specified
|
31
|
-
date_part = case
|
68
|
+
date_part = case date_pattern
|
32
69
|
when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
|
33
70
|
when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
|
34
71
|
when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
|
35
72
|
when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
|
73
|
+
when 'd-M-yy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{2})/)
|
74
|
+
when 'd-M-y' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{1,4})/)
|
36
75
|
when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
|
37
76
|
when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
|
38
|
-
when '
|
77
|
+
when 'M-d-yy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{2})/)
|
78
|
+
when 'M-d-y' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{1,4})/)
|
79
|
+
when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{1,4})/)
|
39
80
|
when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
|
40
|
-
when '
|
81
|
+
when 'd/M/yy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{2})/)
|
82
|
+
when 'd/M/y' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{1,4})/)
|
83
|
+
when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{1,4})/)
|
41
84
|
when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
|
85
|
+
when 'M/d/yy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{2})/)
|
86
|
+
when 'M/d/y' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{1,4})/)
|
42
87
|
when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
|
43
88
|
when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
|
89
|
+
when 'd.M.yy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{2})/)
|
90
|
+
when 'd.M.y' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{1,4})/)
|
44
91
|
when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
|
45
92
|
when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
|
93
|
+
when 'M.d.yy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{2})/)
|
94
|
+
when 'M.d.y' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{1,4})/)
|
46
95
|
when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
|
47
96
|
when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
48
97
|
when /yyyy-MM-ddTHH:mm:ss\.S+/
|
49
98
|
md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
50
|
-
num_ms =
|
99
|
+
num_ms = date_pattern.match(/S+/).to_s.length
|
51
100
|
md if md && md[:ms].length <= num_ms
|
52
101
|
else
|
53
|
-
raise ArgumentError, "unrecognized date/time
|
102
|
+
raise ArgumentError, "unrecognized date/time pattern #{date_pattern}" if date_pattern
|
54
103
|
nil
|
55
104
|
end
|
56
105
|
|
@@ -61,25 +110,25 @@ module RDF::Tabular
|
|
61
110
|
end
|
62
111
|
|
63
112
|
# Extract time, of specified
|
64
|
-
time_part = case
|
113
|
+
time_part = case time_pattern
|
65
114
|
when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
66
115
|
when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
|
67
116
|
when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
|
68
117
|
when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
|
69
118
|
when /HH:mm:ss\.S+/
|
70
119
|
md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
71
|
-
num_ms =
|
120
|
+
num_ms = time_pattern.match(/S+/).to_s.length
|
72
121
|
md if md && md[:ms].length <= num_ms
|
73
122
|
else
|
74
|
-
raise ArgumentError, "unrecognized date/time
|
123
|
+
raise ArgumentError, "unrecognized date/time pattern #{pattern}" if time_pattern
|
75
124
|
nil
|
76
125
|
end
|
77
126
|
|
78
|
-
# If there's a
|
79
|
-
|
127
|
+
# If there's a date_pattern but no date_part, match fails
|
128
|
+
raise ParseError, "#{orig_value} does not match pattern #{orig_pattern}" if !orig_value.empty? && date_pattern && date_part.nil?
|
80
129
|
|
81
|
-
# If there's a
|
82
|
-
|
130
|
+
# If there's a time_pattern but no time_part, match fails
|
131
|
+
raise ParseError, "#{orig_value} does not match pattern #{orig_pattern}" if !orig_value.empty? && time_pattern && time_part.nil?
|
83
132
|
|
84
133
|
# Forward past time part
|
85
134
|
value = value[time_part.to_s.length..-1] if time_part
|
@@ -88,8 +137,8 @@ module RDF::Tabular
|
|
88
137
|
time_part = date_part if date_part && date_part.names.include?("hr")
|
89
138
|
|
90
139
|
# If there's a timezone, it may optionally start with whitespace
|
91
|
-
value = value.lstrip if
|
92
|
-
tz_part = case
|
140
|
+
value = value.lstrip if tz_pattern.to_s.start_with?(' ')
|
141
|
+
tz_part = case tz_pattern.to_s.lstrip
|
93
142
|
when 'x' then value.match(/^(?:(?<hr>[+-]\d{2})(?<mi>\d{2})?)$/)
|
94
143
|
when 'X' then value.match(/^(?:(?:(?<hr>[+-]\d{2})(?<mi>\d{2})?)|(?<z>Z))$/)
|
95
144
|
when 'xx' then value.match(/^(?:(?<hr>[+-]\d{2})(?<mi>\d{2}))|$/)
|
@@ -97,15 +146,30 @@ module RDF::Tabular
|
|
97
146
|
when 'xxx' then value.match(/^(?:(?<hr>[+-]\d{2}):(?<mi>\d{2}))$/)
|
98
147
|
when 'XXX' then value.match(/^(?:(?:(?<hr>[+-]\d{2}):(?<mi>\d{2}))|(?<z>Z))$/)
|
99
148
|
else
|
100
|
-
raise ArgumentError, "unrecognized timezone
|
149
|
+
raise ArgumentError, "unrecognized timezone pattern #{tz_pattern.to_s.lstrip}" if tz_pattern
|
101
150
|
nil
|
102
151
|
end
|
103
152
|
|
104
|
-
# If there's a
|
105
|
-
|
153
|
+
# If there's a tz_pattern but no time_part, match fails
|
154
|
+
raise ParseError, "#{orig_value} does not match pattern #{orig_pattern}" if !orig_value.empty? && tz_pattern && tz_part.nil?
|
106
155
|
|
107
156
|
# Compose normalized value
|
108
|
-
vd =
|
157
|
+
vd = if date_part
|
158
|
+
yr, mo, da = [date_part[:yr], date_part[:mo], date_part[:da]].map(&:to_i)
|
159
|
+
|
160
|
+
if date_part[:yr].length < 4
|
161
|
+
# Make sure that yr makes sense, if given
|
162
|
+
yr = case yr
|
163
|
+
when 0..69 then yr + 2000
|
164
|
+
when 100..999 then yr + 2000
|
165
|
+
when 70..99 then yr + 1900
|
166
|
+
else yr
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
("%04d-%02d-%02d" % [yr, mo, da])
|
171
|
+
end
|
172
|
+
|
109
173
|
vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
|
110
174
|
|
111
175
|
# Add milliseconds, if matched
|
@@ -117,37 +181,74 @@ module RDF::Tabular
|
|
117
181
|
end
|
118
182
|
|
119
183
|
##
|
120
|
-
# Parse the date
|
121
|
-
# Otherwise, validate
|
184
|
+
# Parse the date pattern (if provided), and match against the value (if provided)
|
185
|
+
# Otherwise, validate pattern and raise an error
|
122
186
|
#
|
123
187
|
# @param [String] pattern
|
124
188
|
# @param [String] value
|
125
189
|
# @param [String] groupChar
|
126
190
|
# @param [String] decimalChar
|
127
191
|
# @return [String] XMLSchema version of value or nil, if value does not match
|
128
|
-
# @raise [ArgumentError] if
|
192
|
+
# @raise [ArgumentError] if pattern is not valid
|
129
193
|
def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
|
130
194
|
value ||= ""
|
131
195
|
|
132
196
|
re = build_number_re(pattern, groupChar, decimalChar)
|
133
197
|
|
198
|
+
raise ParseError, "#{value} has repeating #{groupChar.inspect}" if groupChar.length == 1 && value.include?(groupChar*2)
|
199
|
+
|
134
200
|
# Upcase value and remove internal spaces
|
135
201
|
value = value.upcase
|
136
202
|
|
137
203
|
if value =~ re
|
138
|
-
|
139
204
|
# Upcase value and remove internal spaces
|
140
205
|
value = value.
|
141
|
-
upcase.
|
142
206
|
gsub(/\s+/, '').
|
143
207
|
gsub(groupChar, '').
|
144
208
|
gsub(decimalChar, '.')
|
145
209
|
|
146
210
|
# result re-assembles parts removed from value
|
147
211
|
value
|
148
|
-
|
212
|
+
elsif !value.empty?
|
149
213
|
# no match
|
150
|
-
|
214
|
+
raise ParseError, "#{value.inspect} does not match #{pattern.inspect}"
|
215
|
+
end
|
216
|
+
|
217
|
+
# Extract percent or per-mille sign
|
218
|
+
case value
|
219
|
+
when /%/
|
220
|
+
value = value.sub('%', '')
|
221
|
+
lhs, rhs = value.split('.')
|
222
|
+
|
223
|
+
# Shift decimal
|
224
|
+
value = case lhs.length
|
225
|
+
when 0 then "0.00#{rhs}".sub('E', 'e')
|
226
|
+
when 1 then "0.0#{lhs}#{rhs}".sub('E', 'e')
|
227
|
+
when 2 then "0.#{lhs}#{rhs}".sub('E', 'e')
|
228
|
+
else
|
229
|
+
ll, lr = lhs[0..lhs.length-3], lhs[-2..-1]
|
230
|
+
ll = ll + "0" unless ll =~ /\d+/
|
231
|
+
"#{ll}.#{lr}#{rhs}".sub('E', 'e')
|
232
|
+
end
|
233
|
+
when /‰/
|
234
|
+
value = value.sub('‰', '')
|
235
|
+
lhs, rhs = value.split('.')
|
236
|
+
|
237
|
+
# Shift decimal
|
238
|
+
value = case lhs.length
|
239
|
+
when 0 then "0.000#{rhs}".sub('E', 'e')
|
240
|
+
when 1 then "0.00#{lhs}#{rhs}".sub('E', 'e')
|
241
|
+
when 2 then "0.0#{lhs}#{rhs}".sub('E', 'e')
|
242
|
+
when 3 then "0.#{lhs}#{rhs}".sub('E', 'e')
|
243
|
+
else
|
244
|
+
ll, lr = lhs[0..lhs.length-4], lhs[-3..-1]
|
245
|
+
ll = ll + "0" unless ll =~ /\d+/
|
246
|
+
"#{ll}.#{lr}#{rhs}".sub('E', 'e')
|
247
|
+
end
|
248
|
+
when /NAN/ then value.sub('NAN', 'NaN')
|
249
|
+
when /E/ then value.sub('E', 'e')
|
250
|
+
else
|
251
|
+
value
|
151
252
|
end
|
152
253
|
end
|
153
254
|
|
@@ -157,9 +258,10 @@ module RDF::Tabular
|
|
157
258
|
# @param [String] groupChar
|
158
259
|
# @param [String] decimalChar
|
159
260
|
# @return [Regexp] Regular expression matching value
|
160
|
-
# @raise [ArgumentError] if
|
261
|
+
# @raise [ArgumentError] if pattern is not valid
|
161
262
|
def build_number_re(pattern, groupChar, decimalChar)
|
162
263
|
# pattern must be composed of only 0, #, decimalChar, groupChar, E, %, and ‰
|
264
|
+
|
163
265
|
ge = Regexp.escape groupChar
|
164
266
|
de = Regexp.escape decimalChar
|
165
267
|
|
@@ -320,5 +422,8 @@ module RDF::Tabular
|
|
320
422
|
|
321
423
|
Regexp.new("^(?<prefix>#{prefix})(?<numeric_part>#{integer_str}#{fractional_str}#{exponent_str})(?<suffix>#{suffix})$")
|
322
424
|
end
|
425
|
+
|
426
|
+
# ParseError is raised when a value does not match the pattern
|
427
|
+
class ParseError < RuntimeError; end
|
323
428
|
end
|
324
429
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"@id": "http://example.org/countries.csv#AD",
|
4
|
+
"http://www.geonames.org/ontology#countryCode": "AD",
|
5
|
+
"schema:latitude": 42.546245,
|
6
|
+
"schema:longitude": 1.601554,
|
7
|
+
"schema:name": "Andorra"
|
8
|
+
},
|
9
|
+
{
|
10
|
+
"@id": "http://example.org/countries.csv#AE",
|
11
|
+
"http://www.geonames.org/ontology#countryCode": "AE",
|
12
|
+
"schema:latitude": 23.424076,
|
13
|
+
"schema:longitude": 53.847818,
|
14
|
+
"schema:name": "United Arab Emirates"
|
15
|
+
},
|
16
|
+
{
|
17
|
+
"@id": "http://example.org/countries.csv#AF",
|
18
|
+
"http://www.geonames.org/ontology#countryCode": "AF",
|
19
|
+
"schema:latitude": 33.93911,
|
20
|
+
"schema:longitude": 67.709953,
|
21
|
+
"schema:name": "Afghanistan"
|
22
|
+
},
|
23
|
+
{
|
24
|
+
"countryRef": "http://example.org/countries.csv#AF",
|
25
|
+
"year": "1960",
|
26
|
+
"population": 9616353
|
27
|
+
},
|
28
|
+
{
|
29
|
+
"countryRef": "http://example.org/countries.csv#AF",
|
30
|
+
"year": "1961",
|
31
|
+
"population": 9799379
|
32
|
+
},
|
33
|
+
{
|
34
|
+
"countryRef": "http://example.org/countries.csv#AF",
|
35
|
+
"year": "1962",
|
36
|
+
"population": 9989846
|
37
|
+
}
|
38
|
+
]
|