rdf-tabular 0.4.0 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +24 -5
- data/VERSION +1 -1
- data/etc/csvw.jsonld +135 -50
- data/lib/rdf/tabular/csvw.rb +215 -181
- data/lib/rdf/tabular/format.rb +8 -6
- data/lib/rdf/tabular/literal.rb +1 -1
- data/lib/rdf/tabular/metadata.rb +61 -80
- data/lib/rdf/tabular/reader.rb +18 -15
- data/lib/rdf/tabular/uax35.rb +143 -38
- data/spec/data/countries-minimal.json +38 -0
- data/spec/data/countries-minimal.ttl +36 -0
- data/spec/data/countries-standard.json +86 -0
- data/spec/data/countries-standard.ttl +75 -0
- data/spec/data/countries.csv +4 -0
- data/spec/data/countries.csv-minimal.json +16 -0
- data/spec/data/countries.csv-minimal.ttl +19 -0
- data/spec/data/countries.csv-standard.json +33 -0
- data/spec/data/countries.csv-standard.ttl +44 -0
- data/spec/data/countries.html +88 -0
- data/spec/data/countries.json +53 -0
- data/spec/data/countries_embed-minimal.json +38 -0
- data/spec/data/countries_embed-minimal.ttl +36 -0
- data/spec/data/countries_embed-standard.json +86 -0
- data/spec/data/countries_embed-standard.ttl +75 -0
- data/spec/data/countries_embed.html +88 -0
- data/spec/data/countries_html-minimal.json +38 -0
- data/spec/data/countries_html-minimal.ttl +36 -0
- data/spec/data/countries_html-standard.json +86 -0
- data/spec/data/countries_html-standard.ttl +75 -0
- data/spec/data/country-codes-and-names-minimal.json +19 -0
- data/spec/data/country-codes-and-names-minimal.ttl +22 -0
- data/spec/data/country-codes-and-names-standard.json +47 -0
- data/spec/data/country-codes-and-names-standard.ttl +45 -0
- data/spec/data/country-codes-and-names.csv +5 -0
- data/spec/data/country_slice.csv +4 -0
- data/spec/data/junior-roles.csv +3 -0
- data/spec/data/junior-roles.json +54 -0
- data/spec/data/roles-minimal.json +32 -0
- data/spec/data/roles-minimal.ttl +36 -0
- data/spec/data/roles-standard.json +56 -0
- data/spec/data/roles-standard.ttl +66 -0
- data/spec/data/roles.json +23 -0
- data/spec/data/senior-roles.csv +3 -0
- data/spec/data/senior-roles.json +52 -0
- data/spec/data/test232-metadata.json +10 -0
- data/spec/data/test232.csv +3 -0
- data/spec/data/tree-ops-atd.json +1 -0
- data/spec/data/tree-ops-ext-minimal.json +42 -0
- data/spec/data/tree-ops-ext-minimal.ttl +34 -0
- data/spec/data/tree-ops-ext-standard.json +93 -0
- data/spec/data/tree-ops-ext-standard.ttl +82 -0
- data/spec/data/tree-ops-ext.csv +4 -0
- data/spec/data/tree-ops-ext.json +81 -0
- data/spec/data/tree-ops-minimal.json +18 -0
- data/spec/data/tree-ops-minimal.ttl +14 -0
- data/spec/data/tree-ops-standard.json +44 -0
- data/spec/data/tree-ops-standard.ttl +44 -0
- data/spec/data/tree-ops-virtual-minimal.json +32 -0
- data/spec/data/tree-ops-virtual-minimal.ttl +25 -0
- data/spec/data/tree-ops-virtual-standard.json +49 -0
- data/spec/data/tree-ops-virtual-standard.ttl +49 -0
- data/spec/data/tree-ops-virtual.json +48 -0
- data/spec/data/tree-ops.csv +3 -0
- data/spec/data/tree-ops.csv-metadata.json +43 -0
- data/spec/data/tree-ops.html +54 -0
- data/spec/data/tree-ops.tsv +3 -0
- data/spec/format_spec.rb +5 -4
- data/spec/metadata_spec.rb +10 -16
- data/spec/suite_helper.rb +2 -2
- data/spec/suite_spec.rb +5 -6
- data/spec/uax35_spec.rb +239 -0
- metadata +149 -36
- data/lib/rdf/tabular/json.rb +0 -0
data/lib/rdf/tabular/format.rb
CHANGED
@@ -24,10 +24,10 @@ module RDF::Tabular
|
|
24
24
|
#
|
25
25
|
# @see http://www.w3.org/TR/rdf-testcases/#ntriples
|
26
26
|
class Format < RDF::Format
|
27
|
-
content_type 'text/csv',
|
27
|
+
content_type 'text/csv;q=0.4',
|
28
28
|
extensions: [:csv, :tsv],
|
29
29
|
alias: %w{
|
30
|
-
text/tab-separated-values
|
30
|
+
text/tab-separated-values;q=0.4
|
31
31
|
application/csvm+json
|
32
32
|
}
|
33
33
|
content_encoding 'utf-8'
|
@@ -52,10 +52,12 @@ module RDF::Tabular
|
|
52
52
|
# @return [Hash{Symbol => Lambda(Array, Hash)}]
|
53
53
|
def self.cli_commands
|
54
54
|
{
|
55
|
-
|
56
|
-
description: "
|
57
|
-
|
58
|
-
|
55
|
+
"tabular-json": {
|
56
|
+
description: "Serialize using tabular JSON",
|
57
|
+
parse: false,
|
58
|
+
filter: {format: :tabular}, # Only shows output format set
|
59
|
+
option_use: {output_format: :disabled},
|
60
|
+
help: "tabular-json --input-format tabular files ...\nGenerate tabular JSON output, rather than RDF for Tabular data",
|
59
61
|
lambda: ->(argv, opts) do
|
60
62
|
raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
|
61
63
|
out = opts[:output] || $stdout
|
data/lib/rdf/tabular/literal.rb
CHANGED
@@ -13,7 +13,7 @@ module RDF::Tabular
|
|
13
13
|
##
|
14
14
|
# @param [Object] value
|
15
15
|
# @option options [String] :lexical (nil)
|
16
|
-
def initialize(value, options
|
16
|
+
def initialize(value, **options)
|
17
17
|
@datatype = options[:datatype] || DATATYPE
|
18
18
|
@string = options[:lexical] if options.has_key?(:lexical)
|
19
19
|
if value.is_a?(String)
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -136,15 +136,15 @@ module RDF::Tabular
|
|
136
136
|
# see `RDF::Util::File.open_file` in RDF.rb and {new}
|
137
137
|
# @yield [Metadata]
|
138
138
|
# @raise [IOError] if file not found
|
139
|
-
def self.open(path, options
|
139
|
+
def self.open(path, **options)
|
140
140
|
options = options.merge(
|
141
141
|
headers: {
|
142
142
|
'Accept' => 'application/ld+json, application/json'
|
143
143
|
}
|
144
144
|
)
|
145
145
|
path = "file:" + path unless path =~ /^\w+:/
|
146
|
-
RDF::Util::File.open_file(path, options) do |file|
|
147
|
-
self.new(file, options.merge(base: path, filenames: path))
|
146
|
+
RDF::Util::File.open_file(path, **options) do |file|
|
147
|
+
self.new(file, **options.merge(base: path, filenames: path))
|
148
148
|
end
|
149
149
|
end
|
150
150
|
|
@@ -173,16 +173,16 @@ module RDF::Tabular
|
|
173
173
|
# @option options [RDF::URI] :base
|
174
174
|
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
175
175
|
# @return [Metadata]
|
176
|
-
def self.for_input(input, options
|
176
|
+
def self.for_input(input, **options)
|
177
177
|
base = options[:base]
|
178
178
|
|
179
179
|
# Use user metadata, if provided
|
180
180
|
metadata = case options[:metadata]
|
181
181
|
when Metadata then options[:metadata]
|
182
182
|
when Hash
|
183
|
-
Metadata.new(options[:metadata], options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
183
|
+
Metadata.new(options[:metadata], **options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
184
184
|
when String, RDF::URI
|
185
|
-
Metadata.open(options[:metadata], options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
185
|
+
Metadata.open(options[:metadata], **options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
186
186
|
end
|
187
187
|
|
188
188
|
# Search for metadata until found
|
@@ -191,13 +191,13 @@ module RDF::Tabular
|
|
191
191
|
if !metadata && input.respond_to?(:links) &&
|
192
192
|
link = input.links.find_link(%w(rel describedby))
|
193
193
|
link_loc = RDF::URI(base).join(link.href).to_s
|
194
|
-
md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
194
|
+
md = Metadata.open(link_loc, **options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
195
195
|
if md
|
196
196
|
# Metadata must describe file to be useful
|
197
197
|
if md.describes_file?(base)
|
198
198
|
metadata = md
|
199
199
|
else
|
200
|
-
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
|
200
|
+
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", **options)
|
201
201
|
end
|
202
202
|
end
|
203
203
|
end
|
@@ -206,28 +206,28 @@ module RDF::Tabular
|
|
206
206
|
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
207
207
|
if !metadata && base
|
208
208
|
templates = site_wide_config(base)
|
209
|
-
log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
209
|
+
log_debug("for_input", **options) {"templates: #{templates.map(&:to_s).inspect}"}
|
210
210
|
locs = templates.map do |template|
|
211
211
|
t = Addressable::Template.new(template)
|
212
212
|
RDF::URI(base).join(t.expand(url: base).to_s)
|
213
213
|
end
|
214
|
-
log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
214
|
+
log_debug("for_input", **options) {"locs: #{locs.map(&:to_s).inspect}"}
|
215
215
|
|
216
216
|
locs.each do |loc|
|
217
217
|
metadata ||= begin
|
218
|
-
md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
218
|
+
md = Metadata.open(loc, **options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
219
219
|
# Metadata must describe file to be useful
|
220
220
|
if md
|
221
221
|
# Metadata must describe file to be useful
|
222
222
|
if md.describes_file?(base)
|
223
223
|
md
|
224
224
|
else
|
225
|
-
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
|
225
|
+
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", **options)
|
226
226
|
nil
|
227
227
|
end
|
228
228
|
end
|
229
229
|
rescue IOError
|
230
|
-
log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
230
|
+
log_debug("for_input", **options) {"failed to load found metadata #{loc}: #{$!}"}
|
231
231
|
nil
|
232
232
|
end
|
233
233
|
end
|
@@ -236,8 +236,8 @@ module RDF::Tabular
|
|
236
236
|
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
237
237
|
metadata = case
|
238
238
|
when metadata then metadata
|
239
|
-
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
|
240
|
-
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, options)
|
239
|
+
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, **options)
|
240
|
+
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, **options)
|
241
241
|
end
|
242
242
|
|
243
243
|
# Make TableGroup, if not already
|
@@ -246,7 +246,7 @@ module RDF::Tabular
|
|
246
246
|
|
247
247
|
##
|
248
248
|
# @private
|
249
|
-
def self.new(input, options
|
249
|
+
def self.new(input, **options)
|
250
250
|
# Triveal case
|
251
251
|
return input if input.is_a?(Metadata)
|
252
252
|
|
@@ -297,7 +297,7 @@ module RDF::Tabular
|
|
297
297
|
end
|
298
298
|
|
299
299
|
md = klass.allocate
|
300
|
-
md.send(:initialize, object, options)
|
300
|
+
md.send(:initialize, object, **options)
|
301
301
|
md
|
302
302
|
rescue ::JSON::ParserError
|
303
303
|
raise Error, "Expected input to be a JSON Object"
|
@@ -318,7 +318,7 @@ module RDF::Tabular
|
|
318
318
|
# @option options [Boolean] :validate Strict metadata validation
|
319
319
|
# @raise [Error]
|
320
320
|
# @return [Metadata]
|
321
|
-
def initialize(input, options
|
321
|
+
def initialize(input, **options)
|
322
322
|
@options = options.dup
|
323
323
|
|
324
324
|
# Parent of this Metadata, if any
|
@@ -467,16 +467,16 @@ module RDF::Tabular
|
|
467
467
|
object[:tableSchema] = case value
|
468
468
|
when String
|
469
469
|
link = context.base.join(value).to_s
|
470
|
-
md = Schema.open(link,
|
470
|
+
md = Schema.open(link, **@options.merge(parent: self, context: nil, normalize: true))
|
471
471
|
md[:@id] ||= link
|
472
472
|
md
|
473
473
|
when Hash
|
474
|
-
Schema.new(value,
|
474
|
+
Schema.new(value, **@options.merge(parent: self, context: nil))
|
475
475
|
when Schema
|
476
476
|
value
|
477
477
|
else
|
478
478
|
log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
479
|
-
Schema.new({},
|
479
|
+
Schema.new({}, **@options.merge(parent: self, context: nil))
|
480
480
|
end
|
481
481
|
end
|
482
482
|
|
@@ -491,7 +491,7 @@ module RDF::Tabular
|
|
491
491
|
when object[:dialect] then object[:dialect]
|
492
492
|
when parent then parent.dialect
|
493
493
|
when is_a?(Table) || is_a?(TableGroup)
|
494
|
-
d = Dialect.new({},
|
494
|
+
d = Dialect.new({}, **@options.merge(parent: self, context: nil))
|
495
495
|
self.dialect = d unless self.parent
|
496
496
|
d
|
497
497
|
else
|
@@ -514,11 +514,11 @@ module RDF::Tabular
|
|
514
514
|
@dialect = object[:dialect] = case value
|
515
515
|
when String
|
516
516
|
link = context.base.join(value).to_s
|
517
|
-
md = Metadata.open(link,
|
517
|
+
md = Metadata.open(link, **@options.merge(parent: self, context: nil, normalize: true))
|
518
518
|
md[:@id] ||= link
|
519
519
|
md
|
520
520
|
when Hash
|
521
|
-
Dialect.new(value,
|
521
|
+
Dialect.new(value, **@options.merge(parent: self, context: nil))
|
522
522
|
when Dialect
|
523
523
|
value
|
524
524
|
else
|
@@ -532,8 +532,8 @@ module RDF::Tabular
|
|
532
532
|
# @raise [Error] if datatype is not valid
|
533
533
|
def datatype=(value)
|
534
534
|
val = case value
|
535
|
-
when Hash then Datatype.new(value,
|
536
|
-
else Datatype.new({base: value},
|
535
|
+
when Hash then Datatype.new(value, **@options.merge(parent: self))
|
536
|
+
else Datatype.new({base: value}, **@options.merge(parent: self))
|
537
537
|
end
|
538
538
|
|
539
539
|
if val.valid? || value.is_a?(Hash)
|
@@ -564,7 +564,7 @@ module RDF::Tabular
|
|
564
564
|
end
|
565
565
|
|
566
566
|
##
|
567
|
-
# Validate metadata, raising an error containing all errors detected during validation
|
567
|
+
# Validate metadata and content, raising an error containing all errors detected during validation
|
568
568
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
569
569
|
# @return [self]
|
570
570
|
def validate
|
@@ -872,7 +872,7 @@ module RDF::Tabular
|
|
872
872
|
csv << data unless data.empty?
|
873
873
|
end
|
874
874
|
else
|
875
|
-
csv = ::CSV.new(input, csv_options)
|
875
|
+
csv = ::CSV.new(input, **csv_options)
|
876
876
|
# Skip skipRows and headerRowCount
|
877
877
|
skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
|
878
878
|
(1..skipped).each {csv.shift}
|
@@ -891,7 +891,7 @@ module RDF::Tabular
|
|
891
891
|
next
|
892
892
|
end
|
893
893
|
number += 1
|
894
|
-
row = Row.new(data, self, number, number + skipped,
|
894
|
+
row = Row.new(data, self, number, number + skipped, **@options)
|
895
895
|
(self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
|
896
896
|
yield(row)
|
897
897
|
end
|
@@ -1036,13 +1036,13 @@ module RDF::Tabular
|
|
1036
1036
|
end
|
1037
1037
|
index = 0
|
1038
1038
|
object_columns.all? do |cb|
|
1039
|
-
ca = non_virtual_columns[index] || Column.new({},
|
1039
|
+
ca = non_virtual_columns[index] || Column.new({}, **@options)
|
1040
1040
|
ta = ca.titles || {}
|
1041
1041
|
tb = cb.titles || {}
|
1042
1042
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1043
1043
|
true
|
1044
1044
|
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1045
|
-
raise Error, "
|
1045
|
+
raise Error, "Column #{index + 1} doesn't match on name: #{ca.name || 'no name'}, #{cb.name || 'no name'}" unless ca.name == cb.name
|
1046
1046
|
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1047
1047
|
# If validating, column compatibility requires strict match between titles
|
1048
1048
|
titles_match = case
|
@@ -1066,10 +1066,10 @@ module RDF::Tabular
|
|
1066
1066
|
true
|
1067
1067
|
elsif !@options[:validate]
|
1068
1068
|
# If not validating, columns don't match, but processing continues
|
1069
|
-
log_warn "
|
1069
|
+
log_warn "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
|
1070
1070
|
true
|
1071
1071
|
else
|
1072
|
-
raise Error, "
|
1072
|
+
raise Error, "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
|
1073
1073
|
end
|
1074
1074
|
end
|
1075
1075
|
index += 1
|
@@ -1235,13 +1235,13 @@ module RDF::Tabular
|
|
1235
1235
|
end
|
1236
1236
|
|
1237
1237
|
# General setter for array properties
|
1238
|
-
def set_array_value(key, value, klass, options
|
1238
|
+
def set_array_value(key, value, klass, **options)
|
1239
1239
|
object[key] = case value
|
1240
1240
|
when Array
|
1241
1241
|
value.map do |v|
|
1242
1242
|
case v
|
1243
1243
|
when Hash
|
1244
|
-
klass.new(v,
|
1244
|
+
klass.new(v, **@options.merge(options).merge(parent: self, context: nil))
|
1245
1245
|
else v
|
1246
1246
|
end
|
1247
1247
|
end
|
@@ -1282,11 +1282,11 @@ module RDF::Tabular
|
|
1282
1282
|
class DebugContext
|
1283
1283
|
include RDF::Util::Logger
|
1284
1284
|
end
|
1285
|
-
def self.log_debug(*args, &block)
|
1286
|
-
DebugContext.new.log_debug(*args, &block)
|
1285
|
+
def self.log_debug(*args, **options, &block)
|
1286
|
+
DebugContext.new.log_debug(*args, **options, &block)
|
1287
1287
|
end
|
1288
|
-
def self.log_warn(*args)
|
1289
|
-
DebugContext.new.log_warn(*args)
|
1288
|
+
def self.log_warn(*args, **options)
|
1289
|
+
DebugContext.new.log_warn(*args, **options)
|
1290
1290
|
end
|
1291
1291
|
end
|
1292
1292
|
|
@@ -1434,7 +1434,7 @@ module RDF::Tabular
|
|
1434
1434
|
content['@context'] = object.delete(:@context) if object[:@context]
|
1435
1435
|
ctx = @context
|
1436
1436
|
remove_instance_variable(:@context) if instance_variables.include?(:@context)
|
1437
|
-
tg = TableGroup.new(content,
|
1437
|
+
tg = TableGroup.new(content, **@options.merge(context: ctx, filenames: @filenames, base: base))
|
1438
1438
|
@parent = tg # Link from parent
|
1439
1439
|
tg
|
1440
1440
|
end
|
@@ -1489,7 +1489,7 @@ module RDF::Tabular
|
|
1489
1489
|
number += 1
|
1490
1490
|
case v
|
1491
1491
|
when Hash
|
1492
|
-
Column.new(v,
|
1492
|
+
Column.new(v, **@options.merge(
|
1493
1493
|
table: (parent if parent.is_a?(Table)),
|
1494
1494
|
parent: self,
|
1495
1495
|
context: nil,
|
@@ -1621,8 +1621,8 @@ module RDF::Tabular
|
|
1621
1621
|
def name
|
1622
1622
|
self[:name] || if titles && (ts = titles[context.default_language || 'und'] || titles[self.lang || 'und'])
|
1623
1623
|
n = Array(ts).first
|
1624
|
-
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1625
|
-
n1 = URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1624
|
+
n0 = RDF::URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1625
|
+
n1 = RDF::URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1626
1626
|
"#{n0}#{n1}"
|
1627
1627
|
end || "_col.#{number}"
|
1628
1628
|
end
|
@@ -1783,12 +1783,12 @@ module RDF::Tabular
|
|
1783
1783
|
# @option options [String] :lang, language to set in table, if any
|
1784
1784
|
# @return [Metadata] Tabular metadata
|
1785
1785
|
# @see http://w3c.github.io/csvw/syntax/#parsing
|
1786
|
-
def embedded_metadata(input, metadata, options
|
1786
|
+
def embedded_metadata(input, metadata, **options)
|
1787
1787
|
options = options.dup
|
1788
1788
|
options.delete(:context) # Don't accidentally use a passed context
|
1789
1789
|
# Normalize input to an IO object
|
1790
1790
|
if input.is_a?(String)
|
1791
|
-
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, options.merge(base: input.to_s))}
|
1791
|
+
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, **options.merge(base: input.to_s))}
|
1792
1792
|
end
|
1793
1793
|
|
1794
1794
|
table = {
|
@@ -1826,7 +1826,7 @@ module RDF::Tabular
|
|
1826
1826
|
row.xpath('th').map(&:content).each_with_index do |value, index|
|
1827
1827
|
# Skip columns
|
1828
1828
|
skipCols = skipColumns.to_i
|
1829
|
-
next if index < skipCols
|
1829
|
+
next if index < skipCols || value.to_s.empty?
|
1830
1830
|
|
1831
1831
|
# Trim value
|
1832
1832
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
@@ -1837,11 +1837,11 @@ module RDF::Tabular
|
|
1837
1837
|
column = columns[index - skipCols] ||= {
|
1838
1838
|
"titles" => {lang => []},
|
1839
1839
|
}
|
1840
|
-
column["titles"][lang] << value
|
1840
|
+
column["titles"][lang] << value if value
|
1841
1841
|
end
|
1842
1842
|
end
|
1843
1843
|
else
|
1844
|
-
csv = ::CSV.new(input, csv_options)
|
1844
|
+
csv = ::CSV.new(input, **csv_options)
|
1845
1845
|
(1..skipRows.to_i).each do
|
1846
1846
|
value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
|
1847
1847
|
# Trim value
|
@@ -1858,7 +1858,7 @@ module RDF::Tabular
|
|
1858
1858
|
Array(row_data).each_with_index do |value, index|
|
1859
1859
|
# Skip columns
|
1860
1860
|
skipCols = skipColumns.to_i
|
1861
|
-
next if index < skipCols
|
1861
|
+
next if index < skipCols || value.to_s.empty?
|
1862
1862
|
|
1863
1863
|
# Trim value
|
1864
1864
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
@@ -1876,7 +1876,7 @@ module RDF::Tabular
|
|
1876
1876
|
log_debug("embedded_metadata") {"table: #{table.inspect}"}
|
1877
1877
|
input.rewind if input.respond_to?(:rewind)
|
1878
1878
|
|
1879
|
-
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1879
|
+
Table.new(table, **options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1880
1880
|
end
|
1881
1881
|
end
|
1882
1882
|
|
@@ -2026,7 +2026,7 @@ module RDF::Tabular
|
|
2026
2026
|
# @param [Hash{Symbol => Object}] options ({})
|
2027
2027
|
# @option options [Boolean] :validate check for PK/FK consistency
|
2028
2028
|
# @return [Row]
|
2029
|
-
def initialize(row, metadata, number, source_number, options
|
2029
|
+
def initialize(row, metadata, number, source_number, **options)
|
2030
2030
|
@table = metadata
|
2031
2031
|
@number = number
|
2032
2032
|
@sourceNumber = source_number
|
@@ -2058,13 +2058,13 @@ module RDF::Tabular
|
|
2058
2058
|
|
2059
2059
|
# create column if necessary
|
2060
2060
|
columns[index - skipColumns] ||=
|
2061
|
-
Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2061
|
+
Column.new({}, **options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2062
2062
|
|
2063
2063
|
column = columns[index - skipColumns]
|
2064
2064
|
|
2065
2065
|
@values << cell = Cell.new(metadata, column, self, value)
|
2066
2066
|
|
2067
|
-
datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
|
2067
|
+
datatype = column.datatype || Datatype.new({base: "string"}, **options.merge(parent: column))
|
2068
2068
|
value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
|
2069
2069
|
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
|
2070
2070
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
@@ -2110,7 +2110,7 @@ module RDF::Tabular
|
|
2110
2110
|
# Map URLs for row
|
2111
2111
|
@values.each_with_index do |cell, index|
|
2112
2112
|
mapped_values = map_values.merge(
|
2113
|
-
"_name" =>
|
2113
|
+
"_name" => CGI.unescape(cell.column.name),
|
2114
2114
|
"_column" => cell.column.number,
|
2115
2115
|
"_sourceColumn" => cell.column.sourceNumber
|
2116
2116
|
)
|
@@ -2171,33 +2171,13 @@ module RDF::Tabular
|
|
2171
2171
|
decimalChar = format["decimalChar"] || '.'
|
2172
2172
|
pattern = format["pattern"]
|
2173
2173
|
|
2174
|
-
|
2174
|
+
begin
|
2175
|
+
value = datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
|
2176
|
+
rescue UAX35::ParseError
|
2175
2177
|
value_errors << "#{value} does not match numeric pattern #{pattern ? pattern.inspect : 'default'}"
|
2176
2178
|
end
|
2177
2179
|
|
2178
|
-
# pattern facet failed
|
2179
|
-
value_errors << "#{value} has repeating #{groupChar.inspect}" if groupChar && value.include?(groupChar*2)
|
2180
|
-
value = value.gsub(groupChar || ',', '')
|
2181
|
-
value = value.sub(decimalChar, '.')
|
2182
|
-
|
2183
|
-
# Extract percent or per-mille sign
|
2184
|
-
percent = permille = false
|
2185
|
-
case value
|
2186
|
-
when /%/
|
2187
|
-
value = value.sub('%', '')
|
2188
|
-
percent = true
|
2189
|
-
when /‰/
|
2190
|
-
value = value.sub('‰', '')
|
2191
|
-
permille = true
|
2192
|
-
end
|
2193
|
-
|
2194
2180
|
lit = RDF::Literal(value, datatype: expanded_dt)
|
2195
|
-
if percent || permille
|
2196
|
-
o = lit.object
|
2197
|
-
o = o / 100 if percent
|
2198
|
-
o = o / 1000 if permille
|
2199
|
-
lit = RDF::Literal(o, datatype: expanded_dt)
|
2200
|
-
end
|
2201
2181
|
|
2202
2182
|
if !lit.plain? && datatype.minimum && lit < datatype.minimum
|
2203
2183
|
value_errors << "#{value} < minimum #{datatype.minimum}"
|
@@ -2238,10 +2218,11 @@ module RDF::Tabular
|
|
2238
2218
|
end
|
2239
2219
|
end
|
2240
2220
|
when :date, :time, :dateTime, :dateTimeStamp, :datetime
|
2241
|
-
|
2221
|
+
begin
|
2222
|
+
value = datatype.parse_uax35_date(format, value)
|
2242
2223
|
lit = RDF::Literal(value, datatype: expanded_dt)
|
2243
|
-
|
2244
|
-
value_errors << "#{
|
2224
|
+
rescue UAX35::ParseError
|
2225
|
+
value_errors << "#{value} does not match format #{format}"
|
2245
2226
|
end
|
2246
2227
|
when :duration, :dayTimeDuration, :yearMonthDuration
|
2247
2228
|
# SPEC CONFUSION: surely format also includes that for other duration types?
|