rdf-tabular 1.0.0 → 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +62 -44
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/etc/csvw.jsonld +135 -50
- data/etc/doap.csv +1 -1
- data/etc/doap.csv-metadata.json +1 -1
- data/etc/doap.ttl +14 -13
- data/etc/earl.html +648 -648
- data/etc/earl.jsonld +691 -691
- data/etc/earl.ttl +846 -846
- data/lib/rdf/tabular.rb +3 -3
- data/lib/rdf/tabular/csvw.rb +626 -182
- data/lib/rdf/tabular/format.rb +8 -6
- data/lib/rdf/tabular/literal.rb +1 -1
- data/lib/rdf/tabular/metadata.rb +63 -58
- data/lib/rdf/tabular/reader.rb +32 -20
- data/lib/rdf/tabular/uax35.rb +1 -1
- data/spec/data/countries-minimal.json +38 -0
- data/spec/data/countries-minimal.ttl +36 -0
- data/spec/data/countries-standard.json +86 -0
- data/spec/data/countries-standard.ttl +75 -0
- data/spec/data/countries.csv +4 -0
- data/spec/data/countries.csv-minimal.json +16 -0
- data/spec/data/countries.csv-minimal.ttl +19 -0
- data/spec/data/countries.csv-standard.json +33 -0
- data/spec/data/countries.csv-standard.ttl +44 -0
- data/spec/data/countries.html +88 -0
- data/spec/data/countries.json +53 -0
- data/spec/data/countries_embed-minimal.json +38 -0
- data/spec/data/countries_embed-minimal.ttl +36 -0
- data/spec/data/countries_embed-standard.json +86 -0
- data/spec/data/countries_embed-standard.ttl +75 -0
- data/spec/data/countries_embed.html +88 -0
- data/spec/data/countries_html-minimal.json +38 -0
- data/spec/data/countries_html-minimal.ttl +36 -0
- data/spec/data/countries_html-standard.json +86 -0
- data/spec/data/countries_html-standard.ttl +75 -0
- data/spec/data/country-codes-and-names-minimal.json +19 -0
- data/spec/data/country-codes-and-names-minimal.ttl +22 -0
- data/spec/data/country-codes-and-names-standard.json +47 -0
- data/spec/data/country-codes-and-names-standard.ttl +45 -0
- data/spec/data/country-codes-and-names.csv +5 -0
- data/spec/data/country_slice.csv +4 -0
- data/spec/data/junior-roles.csv +3 -0
- data/spec/data/junior-roles.json +54 -0
- data/spec/data/roles-minimal.json +32 -0
- data/spec/data/roles-minimal.ttl +36 -0
- data/spec/data/roles-standard.json +56 -0
- data/spec/data/roles-standard.ttl +66 -0
- data/spec/data/roles.json +23 -0
- data/spec/data/senior-roles.csv +3 -0
- data/spec/data/senior-roles.json +52 -0
- data/spec/data/test232-metadata.json +10 -0
- data/spec/data/test232.csv +3 -0
- data/spec/data/tree-ops-atd.json +1 -0
- data/spec/data/tree-ops-ext-minimal.json +42 -0
- data/spec/data/tree-ops-ext-minimal.ttl +34 -0
- data/spec/data/tree-ops-ext-standard.json +93 -0
- data/spec/data/tree-ops-ext-standard.ttl +82 -0
- data/spec/data/tree-ops-ext.csv +4 -0
- data/spec/data/tree-ops-ext.json +81 -0
- data/spec/data/tree-ops-minimal.json +18 -0
- data/spec/data/tree-ops-minimal.ttl +14 -0
- data/spec/data/tree-ops-standard.json +44 -0
- data/spec/data/tree-ops-standard.ttl +44 -0
- data/spec/data/tree-ops-virtual-minimal.json +32 -0
- data/spec/data/tree-ops-virtual-minimal.ttl +25 -0
- data/spec/data/tree-ops-virtual-standard.json +49 -0
- data/spec/data/tree-ops-virtual-standard.ttl +49 -0
- data/spec/data/tree-ops-virtual.json +48 -0
- data/spec/data/tree-ops.csv +3 -0
- data/spec/data/tree-ops.csv-metadata.json +43 -0
- data/spec/data/tree-ops.html +54 -0
- data/spec/data/tree-ops.tsv +3 -0
- data/spec/format_spec.rb +1 -1
- data/spec/metadata_spec.rb +85 -8
- data/spec/reader_spec.rb +2 -2
- data/spec/suite_helper.rb +2 -2
- data/spec/suite_spec.rb +4 -5
- metadata +147 -37
- data/lib/rdf/tabular/json.rb +0 -0
- data/spec/w3c-csvw +0 -1
data/lib/rdf/tabular/format.rb
CHANGED
@@ -22,7 +22,7 @@ module RDF::Tabular
|
|
22
22
|
# @example Obtaining serialization format file extension mappings
|
23
23
|
# RDF::Format.file_extensions #=> {:csv => "text/csv"}
|
24
24
|
#
|
25
|
-
# @see
|
25
|
+
# @see https://www.w3.org/TR/rdf-testcases/#ntriples
|
26
26
|
class Format < RDF::Format
|
27
27
|
content_type 'text/csv;q=0.4',
|
28
28
|
extensions: [:csv, :tsv],
|
@@ -52,15 +52,17 @@ module RDF::Tabular
|
|
52
52
|
# @return [Hash{Symbol => Lambda(Array, Hash)}]
|
53
53
|
def self.cli_commands
|
54
54
|
{
|
55
|
-
|
56
|
-
description: "
|
57
|
-
|
58
|
-
|
55
|
+
"tabular-json": {
|
56
|
+
description: "Serialize using tabular JSON",
|
57
|
+
parse: false,
|
58
|
+
filter: {format: :tabular}, # Only shows output format set
|
59
|
+
option_use: {output_format: :disabled},
|
60
|
+
help: "tabular-json --input-format tabular files ...\nGenerate tabular JSON output, rather than RDF for Tabular data",
|
59
61
|
lambda: ->(argv, opts) do
|
60
62
|
raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
|
61
63
|
out = opts[:output] || $stdout
|
62
64
|
out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
|
63
|
-
RDF::CLI.parse(argv, opts) do |reader|
|
65
|
+
RDF::CLI.parse(argv, **opts) do |reader|
|
64
66
|
out.puts reader.to_json
|
65
67
|
end
|
66
68
|
end
|
data/lib/rdf/tabular/literal.rb
CHANGED
@@ -13,7 +13,7 @@ module RDF::Tabular
|
|
13
13
|
##
|
14
14
|
# @param [Object] value
|
15
15
|
# @option options [String] :lexical (nil)
|
16
|
-
def initialize(value, options
|
16
|
+
def initialize(value, **options)
|
17
17
|
@datatype = options[:datatype] || DATATYPE
|
18
18
|
@string = options[:lexical] if options.has_key?(:lexical)
|
19
19
|
if value.is_a?(String)
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -16,7 +16,7 @@ require 'yaml' # used by BCP47, which should have required it.
|
|
16
16
|
# * Return Column-level annotations
|
17
17
|
# * Return row iterator with column information
|
18
18
|
#
|
19
|
-
# @author [Gregg Kellogg](
|
19
|
+
# @author [Gregg Kellogg](https://greggkellogg.net/)
|
20
20
|
module RDF::Tabular
|
21
21
|
class Metadata
|
22
22
|
include RDF::Util::Logger
|
@@ -136,15 +136,15 @@ module RDF::Tabular
|
|
136
136
|
# see `RDF::Util::File.open_file` in RDF.rb and {new}
|
137
137
|
# @yield [Metadata]
|
138
138
|
# @raise [IOError] if file not found
|
139
|
-
def self.open(path, options
|
139
|
+
def self.open(path, **options)
|
140
140
|
options = options.merge(
|
141
141
|
headers: {
|
142
142
|
'Accept' => 'application/ld+json, application/json'
|
143
143
|
}
|
144
144
|
)
|
145
145
|
path = "file:" + path unless path =~ /^\w+:/
|
146
|
-
RDF::Util::File.open_file(path, options) do |file|
|
147
|
-
self.new(file, options.merge(base: path, filenames: path))
|
146
|
+
RDF::Util::File.open_file(path, **options) do |file|
|
147
|
+
self.new(file, **options.merge(base: path, filenames: path))
|
148
148
|
end
|
149
149
|
end
|
150
150
|
|
@@ -173,16 +173,16 @@ module RDF::Tabular
|
|
173
173
|
# @option options [RDF::URI] :base
|
174
174
|
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
175
175
|
# @return [Metadata]
|
176
|
-
def self.for_input(input, options
|
176
|
+
def self.for_input(input, **options)
|
177
177
|
base = options[:base]
|
178
178
|
|
179
179
|
# Use user metadata, if provided
|
180
180
|
metadata = case options[:metadata]
|
181
181
|
when Metadata then options[:metadata]
|
182
182
|
when Hash
|
183
|
-
Metadata.new(options[:metadata], options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
183
|
+
Metadata.new(options[:metadata], **options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
184
184
|
when String, RDF::URI
|
185
|
-
Metadata.open(options[:metadata], options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
185
|
+
Metadata.open(options[:metadata], **options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
186
186
|
end
|
187
187
|
|
188
188
|
# Search for metadata until found
|
@@ -191,13 +191,13 @@ module RDF::Tabular
|
|
191
191
|
if !metadata && input.respond_to?(:links) &&
|
192
192
|
link = input.links.find_link(%w(rel describedby))
|
193
193
|
link_loc = RDF::URI(base).join(link.href).to_s
|
194
|
-
md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
194
|
+
md = Metadata.open(link_loc, **options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
195
195
|
if md
|
196
196
|
# Metadata must describe file to be useful
|
197
197
|
if md.describes_file?(base)
|
198
198
|
metadata = md
|
199
199
|
else
|
200
|
-
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
|
200
|
+
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", **options)
|
201
201
|
end
|
202
202
|
end
|
203
203
|
end
|
@@ -206,28 +206,30 @@ module RDF::Tabular
|
|
206
206
|
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
207
207
|
if !metadata && base
|
208
208
|
templates = site_wide_config(base)
|
209
|
-
log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
209
|
+
log_debug("for_input", **options) {"templates: #{templates.map(&:to_s).inspect}"}
|
210
210
|
locs = templates.map do |template|
|
211
211
|
t = Addressable::Template.new(template)
|
212
|
-
|
212
|
+
mapped = t.expand(url: base).to_s
|
213
|
+
mapped = RDF::URI.decode(mapped) if options[:decode_uri]
|
214
|
+
RDF::URI(base).join(mapped)
|
213
215
|
end
|
214
|
-
log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
216
|
+
log_debug("for_input", **options) {"locs: #{locs.map(&:to_s).inspect}"}
|
215
217
|
|
216
218
|
locs.each do |loc|
|
217
219
|
metadata ||= begin
|
218
|
-
md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
220
|
+
md = Metadata.open(loc, **options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
219
221
|
# Metadata must describe file to be useful
|
220
222
|
if md
|
221
223
|
# Metadata must describe file to be useful
|
222
224
|
if md.describes_file?(base)
|
223
225
|
md
|
224
226
|
else
|
225
|
-
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
|
227
|
+
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", **options)
|
226
228
|
nil
|
227
229
|
end
|
228
230
|
end
|
229
231
|
rescue IOError
|
230
|
-
log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
232
|
+
log_debug("for_input", **options) {"failed to load found metadata #{loc}: #{$!}"}
|
231
233
|
nil
|
232
234
|
end
|
233
235
|
end
|
@@ -236,8 +238,8 @@ module RDF::Tabular
|
|
236
238
|
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
237
239
|
metadata = case
|
238
240
|
when metadata then metadata
|
239
|
-
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
|
240
|
-
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, options)
|
241
|
+
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, **options)
|
242
|
+
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, **options)
|
241
243
|
end
|
242
244
|
|
243
245
|
# Make TableGroup, if not already
|
@@ -246,7 +248,7 @@ module RDF::Tabular
|
|
246
248
|
|
247
249
|
##
|
248
250
|
# @private
|
249
|
-
def self.new(input, options
|
251
|
+
def self.new(input, **options)
|
250
252
|
# Triveal case
|
251
253
|
return input if input.is_a?(Metadata)
|
252
254
|
|
@@ -297,7 +299,7 @@ module RDF::Tabular
|
|
297
299
|
end
|
298
300
|
|
299
301
|
md = klass.allocate
|
300
|
-
md.send(:initialize, object, options)
|
302
|
+
md.send(:initialize, object, **options)
|
301
303
|
md
|
302
304
|
rescue ::JSON::ParserError
|
303
305
|
raise Error, "Expected input to be a JSON Object"
|
@@ -314,11 +316,13 @@ module RDF::Tabular
|
|
314
316
|
# Context used for this metadata. Taken from input if not provided
|
315
317
|
# @option options [RDF::URI] :base
|
316
318
|
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
319
|
+
# @option options [Boolean] :decode_uri
|
320
|
+
# Decode %-encodings in the result of a URI Template operation.
|
317
321
|
# @option options [Boolean] :normalize normalize the object
|
318
322
|
# @option options [Boolean] :validate Strict metadata validation
|
319
323
|
# @raise [Error]
|
320
324
|
# @return [Metadata]
|
321
|
-
def initialize(input, options
|
325
|
+
def initialize(input, **options)
|
322
326
|
@options = options.dup
|
323
327
|
|
324
328
|
# Parent of this Metadata, if any
|
@@ -467,16 +471,16 @@ module RDF::Tabular
|
|
467
471
|
object[:tableSchema] = case value
|
468
472
|
when String
|
469
473
|
link = context.base.join(value).to_s
|
470
|
-
md = Schema.open(link,
|
474
|
+
md = Schema.open(link, **@options.merge(parent: self, context: nil, normalize: true))
|
471
475
|
md[:@id] ||= link
|
472
476
|
md
|
473
477
|
when Hash
|
474
|
-
Schema.new(value,
|
478
|
+
Schema.new(value, **@options.merge(parent: self, context: nil))
|
475
479
|
when Schema
|
476
480
|
value
|
477
481
|
else
|
478
482
|
log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
479
|
-
Schema.new({},
|
483
|
+
Schema.new({}, **@options.merge(parent: self, context: nil))
|
480
484
|
end
|
481
485
|
end
|
482
486
|
|
@@ -491,7 +495,7 @@ module RDF::Tabular
|
|
491
495
|
when object[:dialect] then object[:dialect]
|
492
496
|
when parent then parent.dialect
|
493
497
|
when is_a?(Table) || is_a?(TableGroup)
|
494
|
-
d = Dialect.new({},
|
498
|
+
d = Dialect.new({}, **@options.merge(parent: self, context: nil))
|
495
499
|
self.dialect = d unless self.parent
|
496
500
|
d
|
497
501
|
else
|
@@ -514,11 +518,11 @@ module RDF::Tabular
|
|
514
518
|
@dialect = object[:dialect] = case value
|
515
519
|
when String
|
516
520
|
link = context.base.join(value).to_s
|
517
|
-
md = Metadata.open(link,
|
521
|
+
md = Metadata.open(link, **@options.merge(parent: self, context: nil, normalize: true))
|
518
522
|
md[:@id] ||= link
|
519
523
|
md
|
520
524
|
when Hash
|
521
|
-
Dialect.new(value,
|
525
|
+
Dialect.new(value, **@options.merge(parent: self, context: nil))
|
522
526
|
when Dialect
|
523
527
|
value
|
524
528
|
else
|
@@ -532,8 +536,8 @@ module RDF::Tabular
|
|
532
536
|
# @raise [Error] if datatype is not valid
|
533
537
|
def datatype=(value)
|
534
538
|
val = case value
|
535
|
-
when Hash then Datatype.new(value,
|
536
|
-
else Datatype.new({base: value},
|
539
|
+
when Hash then Datatype.new(value, **@options.merge(parent: self))
|
540
|
+
else Datatype.new({base: value}, **@options.merge(parent: self))
|
537
541
|
end
|
538
542
|
|
539
543
|
if val.valid? || value.is_a?(Hash)
|
@@ -564,7 +568,7 @@ module RDF::Tabular
|
|
564
568
|
end
|
565
569
|
|
566
570
|
##
|
567
|
-
# Validate metadata, raising an error containing all errors detected during validation
|
571
|
+
# Validate metadata and content, raising an error containing all errors detected during validation
|
568
572
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
569
573
|
# @return [self]
|
570
574
|
def validate
|
@@ -872,7 +876,7 @@ module RDF::Tabular
|
|
872
876
|
csv << data unless data.empty?
|
873
877
|
end
|
874
878
|
else
|
875
|
-
csv = ::CSV.new(input, csv_options)
|
879
|
+
csv = ::CSV.new(input, **csv_options)
|
876
880
|
# Skip skipRows and headerRowCount
|
877
881
|
skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
|
878
882
|
(1..skipped).each {csv.shift}
|
@@ -891,7 +895,7 @@ module RDF::Tabular
|
|
891
895
|
next
|
892
896
|
end
|
893
897
|
number += 1
|
894
|
-
row = Row.new(data, self, number, number + skipped,
|
898
|
+
row = Row.new(data, self, number, number + skipped, **@options)
|
895
899
|
(self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
|
896
900
|
yield(row)
|
897
901
|
end
|
@@ -1036,13 +1040,13 @@ module RDF::Tabular
|
|
1036
1040
|
end
|
1037
1041
|
index = 0
|
1038
1042
|
object_columns.all? do |cb|
|
1039
|
-
ca = non_virtual_columns[index] || Column.new({},
|
1043
|
+
ca = non_virtual_columns[index] || Column.new({}, **@options)
|
1040
1044
|
ta = ca.titles || {}
|
1041
1045
|
tb = cb.titles || {}
|
1042
1046
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1043
1047
|
true
|
1044
1048
|
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1045
|
-
raise Error, "
|
1049
|
+
raise Error, "Column #{index + 1} doesn't match on name: #{ca.name || 'no name'}, #{cb.name || 'no name'}" unless ca.name == cb.name
|
1046
1050
|
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1047
1051
|
# If validating, column compatibility requires strict match between titles
|
1048
1052
|
titles_match = case
|
@@ -1066,10 +1070,10 @@ module RDF::Tabular
|
|
1066
1070
|
true
|
1067
1071
|
elsif !@options[:validate]
|
1068
1072
|
# If not validating, columns don't match, but processing continues
|
1069
|
-
log_warn "
|
1073
|
+
log_warn "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
|
1070
1074
|
true
|
1071
1075
|
else
|
1072
|
-
raise Error, "
|
1076
|
+
raise Error, "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
|
1073
1077
|
end
|
1074
1078
|
end
|
1075
1079
|
index += 1
|
@@ -1235,13 +1239,13 @@ module RDF::Tabular
|
|
1235
1239
|
end
|
1236
1240
|
|
1237
1241
|
# General setter for array properties
|
1238
|
-
def set_array_value(key, value, klass, options
|
1242
|
+
def set_array_value(key, value, klass, **options)
|
1239
1243
|
object[key] = case value
|
1240
1244
|
when Array
|
1241
1245
|
value.map do |v|
|
1242
1246
|
case v
|
1243
1247
|
when Hash
|
1244
|
-
klass.new(v,
|
1248
|
+
klass.new(v, **@options.merge(options).merge(parent: self, context: nil))
|
1245
1249
|
else v
|
1246
1250
|
end
|
1247
1251
|
end
|
@@ -1282,11 +1286,11 @@ module RDF::Tabular
|
|
1282
1286
|
class DebugContext
|
1283
1287
|
include RDF::Util::Logger
|
1284
1288
|
end
|
1285
|
-
def self.log_debug(*args, &block)
|
1286
|
-
DebugContext.new.log_debug(*args, &block)
|
1289
|
+
def self.log_debug(*args, **options, &block)
|
1290
|
+
DebugContext.new.log_debug(*args, **options, &block)
|
1287
1291
|
end
|
1288
|
-
def self.log_warn(*args)
|
1289
|
-
DebugContext.new.log_warn(*args)
|
1292
|
+
def self.log_warn(*args, **options)
|
1293
|
+
DebugContext.new.log_warn(*args, **options)
|
1290
1294
|
end
|
1291
1295
|
end
|
1292
1296
|
|
@@ -1434,7 +1438,7 @@ module RDF::Tabular
|
|
1434
1438
|
content['@context'] = object.delete(:@context) if object[:@context]
|
1435
1439
|
ctx = @context
|
1436
1440
|
remove_instance_variable(:@context) if instance_variables.include?(:@context)
|
1437
|
-
tg = TableGroup.new(content,
|
1441
|
+
tg = TableGroup.new(content, **@options.merge(context: ctx, filenames: @filenames, base: base))
|
1438
1442
|
@parent = tg # Link from parent
|
1439
1443
|
tg
|
1440
1444
|
end
|
@@ -1489,7 +1493,7 @@ module RDF::Tabular
|
|
1489
1493
|
number += 1
|
1490
1494
|
case v
|
1491
1495
|
when Hash
|
1492
|
-
Column.new(v,
|
1496
|
+
Column.new(v, **@options.merge(
|
1493
1497
|
table: (parent if parent.is_a?(Table)),
|
1494
1498
|
parent: self,
|
1495
1499
|
context: nil,
|
@@ -1621,8 +1625,8 @@ module RDF::Tabular
|
|
1621
1625
|
def name
|
1622
1626
|
self[:name] || if titles && (ts = titles[context.default_language || 'und'] || titles[self.lang || 'und'])
|
1623
1627
|
n = Array(ts).first
|
1624
|
-
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1625
|
-
n1 = URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1628
|
+
n0 = RDF::URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1629
|
+
n1 = RDF::URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1626
1630
|
"#{n0}#{n1}"
|
1627
1631
|
end || "_col.#{number}"
|
1628
1632
|
end
|
@@ -1783,12 +1787,12 @@ module RDF::Tabular
|
|
1783
1787
|
# @option options [String] :lang, language to set in table, if any
|
1784
1788
|
# @return [Metadata] Tabular metadata
|
1785
1789
|
# @see http://w3c.github.io/csvw/syntax/#parsing
|
1786
|
-
def embedded_metadata(input, metadata, options
|
1790
|
+
def embedded_metadata(input, metadata, **options)
|
1787
1791
|
options = options.dup
|
1788
1792
|
options.delete(:context) # Don't accidentally use a passed context
|
1789
1793
|
# Normalize input to an IO object
|
1790
1794
|
if input.is_a?(String)
|
1791
|
-
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, options.merge(base: input.to_s))}
|
1795
|
+
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, **options.merge(base: input.to_s))}
|
1792
1796
|
end
|
1793
1797
|
|
1794
1798
|
table = {
|
@@ -1826,7 +1830,7 @@ module RDF::Tabular
|
|
1826
1830
|
row.xpath('th').map(&:content).each_with_index do |value, index|
|
1827
1831
|
# Skip columns
|
1828
1832
|
skipCols = skipColumns.to_i
|
1829
|
-
next if index < skipCols
|
1833
|
+
next if index < skipCols || value.to_s.empty?
|
1830
1834
|
|
1831
1835
|
# Trim value
|
1832
1836
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
@@ -1837,11 +1841,11 @@ module RDF::Tabular
|
|
1837
1841
|
column = columns[index - skipCols] ||= {
|
1838
1842
|
"titles" => {lang => []},
|
1839
1843
|
}
|
1840
|
-
column["titles"][lang] << value
|
1844
|
+
column["titles"][lang] << value if value
|
1841
1845
|
end
|
1842
1846
|
end
|
1843
1847
|
else
|
1844
|
-
csv = ::CSV.new(input, csv_options)
|
1848
|
+
csv = ::CSV.new(input, **csv_options)
|
1845
1849
|
(1..skipRows.to_i).each do
|
1846
1850
|
value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
|
1847
1851
|
# Trim value
|
@@ -1858,7 +1862,7 @@ module RDF::Tabular
|
|
1858
1862
|
Array(row_data).each_with_index do |value, index|
|
1859
1863
|
# Skip columns
|
1860
1864
|
skipCols = skipColumns.to_i
|
1861
|
-
next if index < skipCols
|
1865
|
+
next if index < skipCols || value.to_s.empty?
|
1862
1866
|
|
1863
1867
|
# Trim value
|
1864
1868
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
@@ -1876,7 +1880,7 @@ module RDF::Tabular
|
|
1876
1880
|
log_debug("embedded_metadata") {"table: #{table.inspect}"}
|
1877
1881
|
input.rewind if input.respond_to?(:rewind)
|
1878
1882
|
|
1879
|
-
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1883
|
+
Table.new(table, **options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1880
1884
|
end
|
1881
1885
|
end
|
1882
1886
|
|
@@ -1947,13 +1951,14 @@ module RDF::Tabular
|
|
1947
1951
|
class Row
|
1948
1952
|
# Class for returning values
|
1949
1953
|
Cell = Struct.new(:table, :column, :row, :stringValue, :aboutUrl, :propertyUrl, :valueUrl, :value, :errors) do
|
1950
|
-
def set_urls(mapped_values)
|
1954
|
+
def set_urls(mapped_values, decode_uri)
|
1951
1955
|
%w(aboutUrl propertyUrl valueUrl).each do |prop|
|
1952
1956
|
# If the cell value is nil, and it is not a virtual column
|
1953
1957
|
next if prop == "valueUrl" && value.nil? && !column.virtual
|
1954
1958
|
if v = column.send(prop.to_sym)
|
1955
1959
|
t = Addressable::Template.new(v)
|
1956
1960
|
mapped = t.expand(mapped_values).to_s
|
1961
|
+
mapped = RDF::URI.decode(mapped) if decode_uri
|
1957
1962
|
# FIXME: don't expand here, do it in CSV2RDF
|
1958
1963
|
url = row.context.expand_iri(mapped, documentRelative: true)
|
1959
1964
|
self.send("#{prop}=".to_sym, url)
|
@@ -2026,7 +2031,7 @@ module RDF::Tabular
|
|
2026
2031
|
# @param [Hash{Symbol => Object}] options ({})
|
2027
2032
|
# @option options [Boolean] :validate check for PK/FK consistency
|
2028
2033
|
# @return [Row]
|
2029
|
-
def initialize(row, metadata, number, source_number, options
|
2034
|
+
def initialize(row, metadata, number, source_number, **options)
|
2030
2035
|
@table = metadata
|
2031
2036
|
@number = number
|
2032
2037
|
@sourceNumber = source_number
|
@@ -2058,13 +2063,13 @@ module RDF::Tabular
|
|
2058
2063
|
|
2059
2064
|
# create column if necessary
|
2060
2065
|
columns[index - skipColumns] ||=
|
2061
|
-
Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2066
|
+
Column.new({}, **options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2062
2067
|
|
2063
2068
|
column = columns[index - skipColumns]
|
2064
2069
|
|
2065
2070
|
@values << cell = Cell.new(metadata, column, self, value)
|
2066
2071
|
|
2067
|
-
datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
|
2072
|
+
datatype = column.datatype || Datatype.new({base: "string"}, **options.merge(parent: column))
|
2068
2073
|
value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
|
2069
2074
|
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
|
2070
2075
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
@@ -2110,11 +2115,11 @@ module RDF::Tabular
|
|
2110
2115
|
# Map URLs for row
|
2111
2116
|
@values.each_with_index do |cell, index|
|
2112
2117
|
mapped_values = map_values.merge(
|
2113
|
-
"_name" =>
|
2118
|
+
"_name" => CGI.unescape(cell.column.name),
|
2114
2119
|
"_column" => cell.column.number,
|
2115
2120
|
"_sourceColumn" => cell.column.sourceNumber
|
2116
2121
|
)
|
2117
|
-
cell.set_urls(mapped_values)
|
2122
|
+
cell.set_urls(mapped_values, options[:decode_uri])
|
2118
2123
|
end
|
2119
2124
|
end
|
2120
2125
|
|