rdf-tabular 0.4.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +24 -5
- data/VERSION +1 -1
- data/etc/csvw.jsonld +135 -50
- data/lib/rdf/tabular/csvw.rb +215 -181
- data/lib/rdf/tabular/format.rb +8 -6
- data/lib/rdf/tabular/literal.rb +1 -1
- data/lib/rdf/tabular/metadata.rb +61 -80
- data/lib/rdf/tabular/reader.rb +18 -15
- data/lib/rdf/tabular/uax35.rb +143 -38
- data/spec/data/countries-minimal.json +38 -0
- data/spec/data/countries-minimal.ttl +36 -0
- data/spec/data/countries-standard.json +86 -0
- data/spec/data/countries-standard.ttl +75 -0
- data/spec/data/countries.csv +4 -0
- data/spec/data/countries.csv-minimal.json +16 -0
- data/spec/data/countries.csv-minimal.ttl +19 -0
- data/spec/data/countries.csv-standard.json +33 -0
- data/spec/data/countries.csv-standard.ttl +44 -0
- data/spec/data/countries.html +88 -0
- data/spec/data/countries.json +53 -0
- data/spec/data/countries_embed-minimal.json +38 -0
- data/spec/data/countries_embed-minimal.ttl +36 -0
- data/spec/data/countries_embed-standard.json +86 -0
- data/spec/data/countries_embed-standard.ttl +75 -0
- data/spec/data/countries_embed.html +88 -0
- data/spec/data/countries_html-minimal.json +38 -0
- data/spec/data/countries_html-minimal.ttl +36 -0
- data/spec/data/countries_html-standard.json +86 -0
- data/spec/data/countries_html-standard.ttl +75 -0
- data/spec/data/country-codes-and-names-minimal.json +19 -0
- data/spec/data/country-codes-and-names-minimal.ttl +22 -0
- data/spec/data/country-codes-and-names-standard.json +47 -0
- data/spec/data/country-codes-and-names-standard.ttl +45 -0
- data/spec/data/country-codes-and-names.csv +5 -0
- data/spec/data/country_slice.csv +4 -0
- data/spec/data/junior-roles.csv +3 -0
- data/spec/data/junior-roles.json +54 -0
- data/spec/data/roles-minimal.json +32 -0
- data/spec/data/roles-minimal.ttl +36 -0
- data/spec/data/roles-standard.json +56 -0
- data/spec/data/roles-standard.ttl +66 -0
- data/spec/data/roles.json +23 -0
- data/spec/data/senior-roles.csv +3 -0
- data/spec/data/senior-roles.json +52 -0
- data/spec/data/test232-metadata.json +10 -0
- data/spec/data/test232.csv +3 -0
- data/spec/data/tree-ops-atd.json +1 -0
- data/spec/data/tree-ops-ext-minimal.json +42 -0
- data/spec/data/tree-ops-ext-minimal.ttl +34 -0
- data/spec/data/tree-ops-ext-standard.json +93 -0
- data/spec/data/tree-ops-ext-standard.ttl +82 -0
- data/spec/data/tree-ops-ext.csv +4 -0
- data/spec/data/tree-ops-ext.json +81 -0
- data/spec/data/tree-ops-minimal.json +18 -0
- data/spec/data/tree-ops-minimal.ttl +14 -0
- data/spec/data/tree-ops-standard.json +44 -0
- data/spec/data/tree-ops-standard.ttl +44 -0
- data/spec/data/tree-ops-virtual-minimal.json +32 -0
- data/spec/data/tree-ops-virtual-minimal.ttl +25 -0
- data/spec/data/tree-ops-virtual-standard.json +49 -0
- data/spec/data/tree-ops-virtual-standard.ttl +49 -0
- data/spec/data/tree-ops-virtual.json +48 -0
- data/spec/data/tree-ops.csv +3 -0
- data/spec/data/tree-ops.csv-metadata.json +43 -0
- data/spec/data/tree-ops.html +54 -0
- data/spec/data/tree-ops.tsv +3 -0
- data/spec/format_spec.rb +5 -4
- data/spec/metadata_spec.rb +10 -16
- data/spec/suite_helper.rb +2 -2
- data/spec/suite_spec.rb +5 -6
- data/spec/uax35_spec.rb +239 -0
- metadata +149 -36
- data/lib/rdf/tabular/json.rb +0 -0
data/lib/rdf/tabular/format.rb
CHANGED
@@ -24,10 +24,10 @@ module RDF::Tabular
|
|
24
24
|
#
|
25
25
|
# @see http://www.w3.org/TR/rdf-testcases/#ntriples
|
26
26
|
class Format < RDF::Format
|
27
|
-
content_type 'text/csv',
|
27
|
+
content_type 'text/csv;q=0.4',
|
28
28
|
extensions: [:csv, :tsv],
|
29
29
|
alias: %w{
|
30
|
-
text/tab-separated-values
|
30
|
+
text/tab-separated-values;q=0.4
|
31
31
|
application/csvm+json
|
32
32
|
}
|
33
33
|
content_encoding 'utf-8'
|
@@ -52,10 +52,12 @@ module RDF::Tabular
|
|
52
52
|
# @return [Hash{Symbol => Lambda(Array, Hash)}]
|
53
53
|
def self.cli_commands
|
54
54
|
{
|
55
|
-
|
56
|
-
description: "
|
57
|
-
|
58
|
-
|
55
|
+
"tabular-json": {
|
56
|
+
description: "Serialize using tabular JSON",
|
57
|
+
parse: false,
|
58
|
+
filter: {format: :tabular}, # Only shows output format set
|
59
|
+
option_use: {output_format: :disabled},
|
60
|
+
help: "tabular-json --input-format tabular files ...\nGenerate tabular JSON output, rather than RDF for Tabular data",
|
59
61
|
lambda: ->(argv, opts) do
|
60
62
|
raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
|
61
63
|
out = opts[:output] || $stdout
|
data/lib/rdf/tabular/literal.rb
CHANGED
@@ -13,7 +13,7 @@ module RDF::Tabular
|
|
13
13
|
##
|
14
14
|
# @param [Object] value
|
15
15
|
# @option options [String] :lexical (nil)
|
16
|
-
def initialize(value, options
|
16
|
+
def initialize(value, **options)
|
17
17
|
@datatype = options[:datatype] || DATATYPE
|
18
18
|
@string = options[:lexical] if options.has_key?(:lexical)
|
19
19
|
if value.is_a?(String)
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -136,15 +136,15 @@ module RDF::Tabular
|
|
136
136
|
# see `RDF::Util::File.open_file` in RDF.rb and {new}
|
137
137
|
# @yield [Metadata]
|
138
138
|
# @raise [IOError] if file not found
|
139
|
-
def self.open(path, options
|
139
|
+
def self.open(path, **options)
|
140
140
|
options = options.merge(
|
141
141
|
headers: {
|
142
142
|
'Accept' => 'application/ld+json, application/json'
|
143
143
|
}
|
144
144
|
)
|
145
145
|
path = "file:" + path unless path =~ /^\w+:/
|
146
|
-
RDF::Util::File.open_file(path, options) do |file|
|
147
|
-
self.new(file, options.merge(base: path, filenames: path))
|
146
|
+
RDF::Util::File.open_file(path, **options) do |file|
|
147
|
+
self.new(file, **options.merge(base: path, filenames: path))
|
148
148
|
end
|
149
149
|
end
|
150
150
|
|
@@ -173,16 +173,16 @@ module RDF::Tabular
|
|
173
173
|
# @option options [RDF::URI] :base
|
174
174
|
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
175
175
|
# @return [Metadata]
|
176
|
-
def self.for_input(input, options
|
176
|
+
def self.for_input(input, **options)
|
177
177
|
base = options[:base]
|
178
178
|
|
179
179
|
# Use user metadata, if provided
|
180
180
|
metadata = case options[:metadata]
|
181
181
|
when Metadata then options[:metadata]
|
182
182
|
when Hash
|
183
|
-
Metadata.new(options[:metadata], options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
183
|
+
Metadata.new(options[:metadata], **options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
184
184
|
when String, RDF::URI
|
185
|
-
Metadata.open(options[:metadata], options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
185
|
+
Metadata.open(options[:metadata], **options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
186
186
|
end
|
187
187
|
|
188
188
|
# Search for metadata until found
|
@@ -191,13 +191,13 @@ module RDF::Tabular
|
|
191
191
|
if !metadata && input.respond_to?(:links) &&
|
192
192
|
link = input.links.find_link(%w(rel describedby))
|
193
193
|
link_loc = RDF::URI(base).join(link.href).to_s
|
194
|
-
md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
194
|
+
md = Metadata.open(link_loc, **options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
195
195
|
if md
|
196
196
|
# Metadata must describe file to be useful
|
197
197
|
if md.describes_file?(base)
|
198
198
|
metadata = md
|
199
199
|
else
|
200
|
-
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
|
200
|
+
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", **options)
|
201
201
|
end
|
202
202
|
end
|
203
203
|
end
|
@@ -206,28 +206,28 @@ module RDF::Tabular
|
|
206
206
|
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
207
207
|
if !metadata && base
|
208
208
|
templates = site_wide_config(base)
|
209
|
-
log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
209
|
+
log_debug("for_input", **options) {"templates: #{templates.map(&:to_s).inspect}"}
|
210
210
|
locs = templates.map do |template|
|
211
211
|
t = Addressable::Template.new(template)
|
212
212
|
RDF::URI(base).join(t.expand(url: base).to_s)
|
213
213
|
end
|
214
|
-
log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
214
|
+
log_debug("for_input", **options) {"locs: #{locs.map(&:to_s).inspect}"}
|
215
215
|
|
216
216
|
locs.each do |loc|
|
217
217
|
metadata ||= begin
|
218
|
-
md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
218
|
+
md = Metadata.open(loc, **options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
219
219
|
# Metadata must describe file to be useful
|
220
220
|
if md
|
221
221
|
# Metadata must describe file to be useful
|
222
222
|
if md.describes_file?(base)
|
223
223
|
md
|
224
224
|
else
|
225
|
-
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
|
225
|
+
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", **options)
|
226
226
|
nil
|
227
227
|
end
|
228
228
|
end
|
229
229
|
rescue IOError
|
230
|
-
log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
230
|
+
log_debug("for_input", **options) {"failed to load found metadata #{loc}: #{$!}"}
|
231
231
|
nil
|
232
232
|
end
|
233
233
|
end
|
@@ -236,8 +236,8 @@ module RDF::Tabular
|
|
236
236
|
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
237
237
|
metadata = case
|
238
238
|
when metadata then metadata
|
239
|
-
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
|
240
|
-
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, options)
|
239
|
+
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, **options)
|
240
|
+
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, **options)
|
241
241
|
end
|
242
242
|
|
243
243
|
# Make TableGroup, if not already
|
@@ -246,7 +246,7 @@ module RDF::Tabular
|
|
246
246
|
|
247
247
|
##
|
248
248
|
# @private
|
249
|
-
def self.new(input, options
|
249
|
+
def self.new(input, **options)
|
250
250
|
# Triveal case
|
251
251
|
return input if input.is_a?(Metadata)
|
252
252
|
|
@@ -297,7 +297,7 @@ module RDF::Tabular
|
|
297
297
|
end
|
298
298
|
|
299
299
|
md = klass.allocate
|
300
|
-
md.send(:initialize, object, options)
|
300
|
+
md.send(:initialize, object, **options)
|
301
301
|
md
|
302
302
|
rescue ::JSON::ParserError
|
303
303
|
raise Error, "Expected input to be a JSON Object"
|
@@ -318,7 +318,7 @@ module RDF::Tabular
|
|
318
318
|
# @option options [Boolean] :validate Strict metadata validation
|
319
319
|
# @raise [Error]
|
320
320
|
# @return [Metadata]
|
321
|
-
def initialize(input, options
|
321
|
+
def initialize(input, **options)
|
322
322
|
@options = options.dup
|
323
323
|
|
324
324
|
# Parent of this Metadata, if any
|
@@ -467,16 +467,16 @@ module RDF::Tabular
|
|
467
467
|
object[:tableSchema] = case value
|
468
468
|
when String
|
469
469
|
link = context.base.join(value).to_s
|
470
|
-
md = Schema.open(link,
|
470
|
+
md = Schema.open(link, **@options.merge(parent: self, context: nil, normalize: true))
|
471
471
|
md[:@id] ||= link
|
472
472
|
md
|
473
473
|
when Hash
|
474
|
-
Schema.new(value,
|
474
|
+
Schema.new(value, **@options.merge(parent: self, context: nil))
|
475
475
|
when Schema
|
476
476
|
value
|
477
477
|
else
|
478
478
|
log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
479
|
-
Schema.new({},
|
479
|
+
Schema.new({}, **@options.merge(parent: self, context: nil))
|
480
480
|
end
|
481
481
|
end
|
482
482
|
|
@@ -491,7 +491,7 @@ module RDF::Tabular
|
|
491
491
|
when object[:dialect] then object[:dialect]
|
492
492
|
when parent then parent.dialect
|
493
493
|
when is_a?(Table) || is_a?(TableGroup)
|
494
|
-
d = Dialect.new({},
|
494
|
+
d = Dialect.new({}, **@options.merge(parent: self, context: nil))
|
495
495
|
self.dialect = d unless self.parent
|
496
496
|
d
|
497
497
|
else
|
@@ -514,11 +514,11 @@ module RDF::Tabular
|
|
514
514
|
@dialect = object[:dialect] = case value
|
515
515
|
when String
|
516
516
|
link = context.base.join(value).to_s
|
517
|
-
md = Metadata.open(link,
|
517
|
+
md = Metadata.open(link, **@options.merge(parent: self, context: nil, normalize: true))
|
518
518
|
md[:@id] ||= link
|
519
519
|
md
|
520
520
|
when Hash
|
521
|
-
Dialect.new(value,
|
521
|
+
Dialect.new(value, **@options.merge(parent: self, context: nil))
|
522
522
|
when Dialect
|
523
523
|
value
|
524
524
|
else
|
@@ -532,8 +532,8 @@ module RDF::Tabular
|
|
532
532
|
# @raise [Error] if datatype is not valid
|
533
533
|
def datatype=(value)
|
534
534
|
val = case value
|
535
|
-
when Hash then Datatype.new(value,
|
536
|
-
else Datatype.new({base: value},
|
535
|
+
when Hash then Datatype.new(value, **@options.merge(parent: self))
|
536
|
+
else Datatype.new({base: value}, **@options.merge(parent: self))
|
537
537
|
end
|
538
538
|
|
539
539
|
if val.valid? || value.is_a?(Hash)
|
@@ -564,7 +564,7 @@ module RDF::Tabular
|
|
564
564
|
end
|
565
565
|
|
566
566
|
##
|
567
|
-
# Validate metadata, raising an error containing all errors detected during validation
|
567
|
+
# Validate metadata and content, raising an error containing all errors detected during validation
|
568
568
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
569
569
|
# @return [self]
|
570
570
|
def validate
|
@@ -872,7 +872,7 @@ module RDF::Tabular
|
|
872
872
|
csv << data unless data.empty?
|
873
873
|
end
|
874
874
|
else
|
875
|
-
csv = ::CSV.new(input, csv_options)
|
875
|
+
csv = ::CSV.new(input, **csv_options)
|
876
876
|
# Skip skipRows and headerRowCount
|
877
877
|
skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
|
878
878
|
(1..skipped).each {csv.shift}
|
@@ -891,7 +891,7 @@ module RDF::Tabular
|
|
891
891
|
next
|
892
892
|
end
|
893
893
|
number += 1
|
894
|
-
row = Row.new(data, self, number, number + skipped,
|
894
|
+
row = Row.new(data, self, number, number + skipped, **@options)
|
895
895
|
(self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
|
896
896
|
yield(row)
|
897
897
|
end
|
@@ -1036,13 +1036,13 @@ module RDF::Tabular
|
|
1036
1036
|
end
|
1037
1037
|
index = 0
|
1038
1038
|
object_columns.all? do |cb|
|
1039
|
-
ca = non_virtual_columns[index] || Column.new({},
|
1039
|
+
ca = non_virtual_columns[index] || Column.new({}, **@options)
|
1040
1040
|
ta = ca.titles || {}
|
1041
1041
|
tb = cb.titles || {}
|
1042
1042
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1043
1043
|
true
|
1044
1044
|
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1045
|
-
raise Error, "
|
1045
|
+
raise Error, "Column #{index + 1} doesn't match on name: #{ca.name || 'no name'}, #{cb.name || 'no name'}" unless ca.name == cb.name
|
1046
1046
|
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1047
1047
|
# If validating, column compatibility requires strict match between titles
|
1048
1048
|
titles_match = case
|
@@ -1066,10 +1066,10 @@ module RDF::Tabular
|
|
1066
1066
|
true
|
1067
1067
|
elsif !@options[:validate]
|
1068
1068
|
# If not validating, columns don't match, but processing continues
|
1069
|
-
log_warn "
|
1069
|
+
log_warn "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
|
1070
1070
|
true
|
1071
1071
|
else
|
1072
|
-
raise Error, "
|
1072
|
+
raise Error, "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
|
1073
1073
|
end
|
1074
1074
|
end
|
1075
1075
|
index += 1
|
@@ -1235,13 +1235,13 @@ module RDF::Tabular
|
|
1235
1235
|
end
|
1236
1236
|
|
1237
1237
|
# General setter for array properties
|
1238
|
-
def set_array_value(key, value, klass, options
|
1238
|
+
def set_array_value(key, value, klass, **options)
|
1239
1239
|
object[key] = case value
|
1240
1240
|
when Array
|
1241
1241
|
value.map do |v|
|
1242
1242
|
case v
|
1243
1243
|
when Hash
|
1244
|
-
klass.new(v,
|
1244
|
+
klass.new(v, **@options.merge(options).merge(parent: self, context: nil))
|
1245
1245
|
else v
|
1246
1246
|
end
|
1247
1247
|
end
|
@@ -1282,11 +1282,11 @@ module RDF::Tabular
|
|
1282
1282
|
class DebugContext
|
1283
1283
|
include RDF::Util::Logger
|
1284
1284
|
end
|
1285
|
-
def self.log_debug(*args, &block)
|
1286
|
-
DebugContext.new.log_debug(*args, &block)
|
1285
|
+
def self.log_debug(*args, **options, &block)
|
1286
|
+
DebugContext.new.log_debug(*args, **options, &block)
|
1287
1287
|
end
|
1288
|
-
def self.log_warn(*args)
|
1289
|
-
DebugContext.new.log_warn(*args)
|
1288
|
+
def self.log_warn(*args, **options)
|
1289
|
+
DebugContext.new.log_warn(*args, **options)
|
1290
1290
|
end
|
1291
1291
|
end
|
1292
1292
|
|
@@ -1434,7 +1434,7 @@ module RDF::Tabular
|
|
1434
1434
|
content['@context'] = object.delete(:@context) if object[:@context]
|
1435
1435
|
ctx = @context
|
1436
1436
|
remove_instance_variable(:@context) if instance_variables.include?(:@context)
|
1437
|
-
tg = TableGroup.new(content,
|
1437
|
+
tg = TableGroup.new(content, **@options.merge(context: ctx, filenames: @filenames, base: base))
|
1438
1438
|
@parent = tg # Link from parent
|
1439
1439
|
tg
|
1440
1440
|
end
|
@@ -1489,7 +1489,7 @@ module RDF::Tabular
|
|
1489
1489
|
number += 1
|
1490
1490
|
case v
|
1491
1491
|
when Hash
|
1492
|
-
Column.new(v,
|
1492
|
+
Column.new(v, **@options.merge(
|
1493
1493
|
table: (parent if parent.is_a?(Table)),
|
1494
1494
|
parent: self,
|
1495
1495
|
context: nil,
|
@@ -1621,8 +1621,8 @@ module RDF::Tabular
|
|
1621
1621
|
def name
|
1622
1622
|
self[:name] || if titles && (ts = titles[context.default_language || 'und'] || titles[self.lang || 'und'])
|
1623
1623
|
n = Array(ts).first
|
1624
|
-
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1625
|
-
n1 = URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1624
|
+
n0 = RDF::URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1625
|
+
n1 = RDF::URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1626
1626
|
"#{n0}#{n1}"
|
1627
1627
|
end || "_col.#{number}"
|
1628
1628
|
end
|
@@ -1783,12 +1783,12 @@ module RDF::Tabular
|
|
1783
1783
|
# @option options [String] :lang, language to set in table, if any
|
1784
1784
|
# @return [Metadata] Tabular metadata
|
1785
1785
|
# @see http://w3c.github.io/csvw/syntax/#parsing
|
1786
|
-
def embedded_metadata(input, metadata, options
|
1786
|
+
def embedded_metadata(input, metadata, **options)
|
1787
1787
|
options = options.dup
|
1788
1788
|
options.delete(:context) # Don't accidentally use a passed context
|
1789
1789
|
# Normalize input to an IO object
|
1790
1790
|
if input.is_a?(String)
|
1791
|
-
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, options.merge(base: input.to_s))}
|
1791
|
+
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, **options.merge(base: input.to_s))}
|
1792
1792
|
end
|
1793
1793
|
|
1794
1794
|
table = {
|
@@ -1826,7 +1826,7 @@ module RDF::Tabular
|
|
1826
1826
|
row.xpath('th').map(&:content).each_with_index do |value, index|
|
1827
1827
|
# Skip columns
|
1828
1828
|
skipCols = skipColumns.to_i
|
1829
|
-
next if index < skipCols
|
1829
|
+
next if index < skipCols || value.to_s.empty?
|
1830
1830
|
|
1831
1831
|
# Trim value
|
1832
1832
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
@@ -1837,11 +1837,11 @@ module RDF::Tabular
|
|
1837
1837
|
column = columns[index - skipCols] ||= {
|
1838
1838
|
"titles" => {lang => []},
|
1839
1839
|
}
|
1840
|
-
column["titles"][lang] << value
|
1840
|
+
column["titles"][lang] << value if value
|
1841
1841
|
end
|
1842
1842
|
end
|
1843
1843
|
else
|
1844
|
-
csv = ::CSV.new(input, csv_options)
|
1844
|
+
csv = ::CSV.new(input, **csv_options)
|
1845
1845
|
(1..skipRows.to_i).each do
|
1846
1846
|
value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
|
1847
1847
|
# Trim value
|
@@ -1858,7 +1858,7 @@ module RDF::Tabular
|
|
1858
1858
|
Array(row_data).each_with_index do |value, index|
|
1859
1859
|
# Skip columns
|
1860
1860
|
skipCols = skipColumns.to_i
|
1861
|
-
next if index < skipCols
|
1861
|
+
next if index < skipCols || value.to_s.empty?
|
1862
1862
|
|
1863
1863
|
# Trim value
|
1864
1864
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
@@ -1876,7 +1876,7 @@ module RDF::Tabular
|
|
1876
1876
|
log_debug("embedded_metadata") {"table: #{table.inspect}"}
|
1877
1877
|
input.rewind if input.respond_to?(:rewind)
|
1878
1878
|
|
1879
|
-
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1879
|
+
Table.new(table, **options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1880
1880
|
end
|
1881
1881
|
end
|
1882
1882
|
|
@@ -2026,7 +2026,7 @@ module RDF::Tabular
|
|
2026
2026
|
# @param [Hash{Symbol => Object}] options ({})
|
2027
2027
|
# @option options [Boolean] :validate check for PK/FK consistency
|
2028
2028
|
# @return [Row]
|
2029
|
-
def initialize(row, metadata, number, source_number, options
|
2029
|
+
def initialize(row, metadata, number, source_number, **options)
|
2030
2030
|
@table = metadata
|
2031
2031
|
@number = number
|
2032
2032
|
@sourceNumber = source_number
|
@@ -2058,13 +2058,13 @@ module RDF::Tabular
|
|
2058
2058
|
|
2059
2059
|
# create column if necessary
|
2060
2060
|
columns[index - skipColumns] ||=
|
2061
|
-
Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2061
|
+
Column.new({}, **options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2062
2062
|
|
2063
2063
|
column = columns[index - skipColumns]
|
2064
2064
|
|
2065
2065
|
@values << cell = Cell.new(metadata, column, self, value)
|
2066
2066
|
|
2067
|
-
datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
|
2067
|
+
datatype = column.datatype || Datatype.new({base: "string"}, **options.merge(parent: column))
|
2068
2068
|
value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
|
2069
2069
|
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
|
2070
2070
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
@@ -2110,7 +2110,7 @@ module RDF::Tabular
|
|
2110
2110
|
# Map URLs for row
|
2111
2111
|
@values.each_with_index do |cell, index|
|
2112
2112
|
mapped_values = map_values.merge(
|
2113
|
-
"_name" =>
|
2113
|
+
"_name" => CGI.unescape(cell.column.name),
|
2114
2114
|
"_column" => cell.column.number,
|
2115
2115
|
"_sourceColumn" => cell.column.sourceNumber
|
2116
2116
|
)
|
@@ -2171,33 +2171,13 @@ module RDF::Tabular
|
|
2171
2171
|
decimalChar = format["decimalChar"] || '.'
|
2172
2172
|
pattern = format["pattern"]
|
2173
2173
|
|
2174
|
-
|
2174
|
+
begin
|
2175
|
+
value = datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
|
2176
|
+
rescue UAX35::ParseError
|
2175
2177
|
value_errors << "#{value} does not match numeric pattern #{pattern ? pattern.inspect : 'default'}"
|
2176
2178
|
end
|
2177
2179
|
|
2178
|
-
# pattern facet failed
|
2179
|
-
value_errors << "#{value} has repeating #{groupChar.inspect}" if groupChar && value.include?(groupChar*2)
|
2180
|
-
value = value.gsub(groupChar || ',', '')
|
2181
|
-
value = value.sub(decimalChar, '.')
|
2182
|
-
|
2183
|
-
# Extract percent or per-mille sign
|
2184
|
-
percent = permille = false
|
2185
|
-
case value
|
2186
|
-
when /%/
|
2187
|
-
value = value.sub('%', '')
|
2188
|
-
percent = true
|
2189
|
-
when /‰/
|
2190
|
-
value = value.sub('‰', '')
|
2191
|
-
permille = true
|
2192
|
-
end
|
2193
|
-
|
2194
2180
|
lit = RDF::Literal(value, datatype: expanded_dt)
|
2195
|
-
if percent || permille
|
2196
|
-
o = lit.object
|
2197
|
-
o = o / 100 if percent
|
2198
|
-
o = o / 1000 if permille
|
2199
|
-
lit = RDF::Literal(o, datatype: expanded_dt)
|
2200
|
-
end
|
2201
2181
|
|
2202
2182
|
if !lit.plain? && datatype.minimum && lit < datatype.minimum
|
2203
2183
|
value_errors << "#{value} < minimum #{datatype.minimum}"
|
@@ -2238,10 +2218,11 @@ module RDF::Tabular
|
|
2238
2218
|
end
|
2239
2219
|
end
|
2240
2220
|
when :date, :time, :dateTime, :dateTimeStamp, :datetime
|
2241
|
-
|
2221
|
+
begin
|
2222
|
+
value = datatype.parse_uax35_date(format, value)
|
2242
2223
|
lit = RDF::Literal(value, datatype: expanded_dt)
|
2243
|
-
|
2244
|
-
value_errors << "#{
|
2224
|
+
rescue UAX35::ParseError
|
2225
|
+
value_errors << "#{value} does not match format #{format}"
|
2245
2226
|
end
|
2246
2227
|
when :duration, :dayTimeDuration, :yearMonthDuration
|
2247
2228
|
# SPEC CONFUSION: surely format also includes that for other duration types?
|