rdf-tabular 2.2.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +54 -48
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/etc/doap.csv +1 -1
- data/etc/doap.csv-metadata.json +1 -1
- data/etc/doap.ttl +16 -18
- data/etc/earl.html +648 -648
- data/etc/earl.jsonld +691 -691
- data/etc/earl.ttl +846 -846
- data/lib/rdf/tabular/csvw.rb +500 -90
- data/lib/rdf/tabular/format.rb +2 -2
- data/lib/rdf/tabular/literal.rb +1 -1
- data/lib/rdf/tabular/metadata.rb +63 -58
- data/lib/rdf/tabular/reader.rb +30 -21
- data/lib/rdf/tabular/uax35.rb +1 -1
- data/lib/rdf/tabular.rb +3 -3
- data/spec/metadata_spec.rb +85 -8
- data/spec/reader_spec.rb +2 -2
- data/spec/spec_helper.rb +20 -8
- data/spec/suite_helper.rb +2 -2
- data/spec/suite_spec.rb +4 -5
- metadata +99 -149
- data/lib/rdf/tabular/json.rb +0 -0
data/lib/rdf/tabular/format.rb
CHANGED
@@ -22,7 +22,7 @@ module RDF::Tabular
|
|
22
22
|
# @example Obtaining serialization format file extension mappings
|
23
23
|
# RDF::Format.file_extensions #=> {:csv => "text/csv"}
|
24
24
|
#
|
25
|
-
# @see
|
25
|
+
# @see https://www.w3.org/TR/rdf-testcases/#ntriples
|
26
26
|
class Format < RDF::Format
|
27
27
|
content_type 'text/csv;q=0.4',
|
28
28
|
extensions: [:csv, :tsv],
|
@@ -62,7 +62,7 @@ module RDF::Tabular
|
|
62
62
|
raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
|
63
63
|
out = opts[:output] || $stdout
|
64
64
|
out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
|
65
|
-
RDF::CLI.parse(argv, opts) do |reader|
|
65
|
+
RDF::CLI.parse(argv, **opts) do |reader|
|
66
66
|
out.puts reader.to_json
|
67
67
|
end
|
68
68
|
end
|
data/lib/rdf/tabular/literal.rb
CHANGED
@@ -13,7 +13,7 @@ module RDF::Tabular
|
|
13
13
|
##
|
14
14
|
# @param [Object] value
|
15
15
|
# @option options [String] :lexical (nil)
|
16
|
-
def initialize(value, options
|
16
|
+
def initialize(value, **options)
|
17
17
|
@datatype = options[:datatype] || DATATYPE
|
18
18
|
@string = options[:lexical] if options.has_key?(:lexical)
|
19
19
|
if value.is_a?(String)
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -16,7 +16,7 @@ require 'yaml' # used by BCP47, which should have required it.
|
|
16
16
|
# * Return Column-level annotations
|
17
17
|
# * Return row iterator with column information
|
18
18
|
#
|
19
|
-
# @author [Gregg Kellogg](
|
19
|
+
# @author [Gregg Kellogg](https://greggkellogg.net/)
|
20
20
|
module RDF::Tabular
|
21
21
|
class Metadata
|
22
22
|
include RDF::Util::Logger
|
@@ -136,15 +136,15 @@ module RDF::Tabular
|
|
136
136
|
# see `RDF::Util::File.open_file` in RDF.rb and {new}
|
137
137
|
# @yield [Metadata]
|
138
138
|
# @raise [IOError] if file not found
|
139
|
-
def self.open(path, options
|
139
|
+
def self.open(path, **options)
|
140
140
|
options = options.merge(
|
141
141
|
headers: {
|
142
142
|
'Accept' => 'application/ld+json, application/json'
|
143
143
|
}
|
144
144
|
)
|
145
145
|
path = "file:" + path unless path =~ /^\w+:/
|
146
|
-
RDF::Util::File.open_file(path, options) do |file|
|
147
|
-
self.new(file, options.merge(base: path, filenames: path))
|
146
|
+
RDF::Util::File.open_file(path, **options) do |file|
|
147
|
+
self.new(file, **options.merge(base: path, filenames: path))
|
148
148
|
end
|
149
149
|
end
|
150
150
|
|
@@ -173,16 +173,16 @@ module RDF::Tabular
|
|
173
173
|
# @option options [RDF::URI] :base
|
174
174
|
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
175
175
|
# @return [Metadata]
|
176
|
-
def self.for_input(input, options
|
176
|
+
def self.for_input(input, **options)
|
177
177
|
base = options[:base]
|
178
178
|
|
179
179
|
# Use user metadata, if provided
|
180
180
|
metadata = case options[:metadata]
|
181
181
|
when Metadata then options[:metadata]
|
182
182
|
when Hash
|
183
|
-
Metadata.new(options[:metadata], options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
183
|
+
Metadata.new(options[:metadata], **options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
184
184
|
when String, RDF::URI
|
185
|
-
Metadata.open(options[:metadata], options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
185
|
+
Metadata.open(options[:metadata], **options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
186
186
|
end
|
187
187
|
|
188
188
|
# Search for metadata until found
|
@@ -191,13 +191,13 @@ module RDF::Tabular
|
|
191
191
|
if !metadata && input.respond_to?(:links) &&
|
192
192
|
link = input.links.find_link(%w(rel describedby))
|
193
193
|
link_loc = RDF::URI(base).join(link.href).to_s
|
194
|
-
md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
194
|
+
md = Metadata.open(link_loc, **options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
195
195
|
if md
|
196
196
|
# Metadata must describe file to be useful
|
197
197
|
if md.describes_file?(base)
|
198
198
|
metadata = md
|
199
199
|
else
|
200
|
-
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
|
200
|
+
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", **options)
|
201
201
|
end
|
202
202
|
end
|
203
203
|
end
|
@@ -206,28 +206,30 @@ module RDF::Tabular
|
|
206
206
|
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
207
207
|
if !metadata && base
|
208
208
|
templates = site_wide_config(base)
|
209
|
-
log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
209
|
+
log_debug("for_input", **options) {"templates: #{templates.map(&:to_s).inspect}"}
|
210
210
|
locs = templates.map do |template|
|
211
211
|
t = Addressable::Template.new(template)
|
212
|
-
|
212
|
+
mapped = t.expand(url: base).to_s
|
213
|
+
mapped = RDF::URI.decode(mapped) if options[:decode_uri]
|
214
|
+
RDF::URI(base).join(mapped)
|
213
215
|
end
|
214
|
-
log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
216
|
+
log_debug("for_input", **options) {"locs: #{locs.map(&:to_s).inspect}"}
|
215
217
|
|
216
218
|
locs.each do |loc|
|
217
219
|
metadata ||= begin
|
218
|
-
md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
220
|
+
md = Metadata.open(loc, **options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
219
221
|
# Metadata must describe file to be useful
|
220
222
|
if md
|
221
223
|
# Metadata must describe file to be useful
|
222
224
|
if md.describes_file?(base)
|
223
225
|
md
|
224
226
|
else
|
225
|
-
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
|
227
|
+
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", **options)
|
226
228
|
nil
|
227
229
|
end
|
228
230
|
end
|
229
231
|
rescue IOError
|
230
|
-
log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
232
|
+
log_debug("for_input", **options) {"failed to load found metadata #{loc}: #{$!}"}
|
231
233
|
nil
|
232
234
|
end
|
233
235
|
end
|
@@ -236,8 +238,8 @@ module RDF::Tabular
|
|
236
238
|
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
237
239
|
metadata = case
|
238
240
|
when metadata then metadata
|
239
|
-
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
|
240
|
-
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, options)
|
241
|
+
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, **options)
|
242
|
+
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, **options)
|
241
243
|
end
|
242
244
|
|
243
245
|
# Make TableGroup, if not already
|
@@ -246,7 +248,7 @@ module RDF::Tabular
|
|
246
248
|
|
247
249
|
##
|
248
250
|
# @private
|
249
|
-
def self.new(input, options
|
251
|
+
def self.new(input, **options)
|
250
252
|
# Triveal case
|
251
253
|
return input if input.is_a?(Metadata)
|
252
254
|
|
@@ -297,7 +299,7 @@ module RDF::Tabular
|
|
297
299
|
end
|
298
300
|
|
299
301
|
md = klass.allocate
|
300
|
-
md.send(:initialize, object, options)
|
302
|
+
md.send(:initialize, object, **options)
|
301
303
|
md
|
302
304
|
rescue ::JSON::ParserError
|
303
305
|
raise Error, "Expected input to be a JSON Object"
|
@@ -314,11 +316,13 @@ module RDF::Tabular
|
|
314
316
|
# Context used for this metadata. Taken from input if not provided
|
315
317
|
# @option options [RDF::URI] :base
|
316
318
|
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
319
|
+
# @option options [Boolean] :decode_uri
|
320
|
+
# Decode %-encodings in the result of a URI Template operation.
|
317
321
|
# @option options [Boolean] :normalize normalize the object
|
318
322
|
# @option options [Boolean] :validate Strict metadata validation
|
319
323
|
# @raise [Error]
|
320
324
|
# @return [Metadata]
|
321
|
-
def initialize(input, options
|
325
|
+
def initialize(input, **options)
|
322
326
|
@options = options.dup
|
323
327
|
|
324
328
|
# Parent of this Metadata, if any
|
@@ -467,16 +471,16 @@ module RDF::Tabular
|
|
467
471
|
object[:tableSchema] = case value
|
468
472
|
when String
|
469
473
|
link = context.base.join(value).to_s
|
470
|
-
md = Schema.open(link,
|
474
|
+
md = Schema.open(link, **@options.merge(parent: self, context: nil, normalize: true))
|
471
475
|
md[:@id] ||= link
|
472
476
|
md
|
473
477
|
when Hash
|
474
|
-
Schema.new(value,
|
478
|
+
Schema.new(value, **@options.merge(parent: self, context: nil))
|
475
479
|
when Schema
|
476
480
|
value
|
477
481
|
else
|
478
482
|
log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
479
|
-
Schema.new({},
|
483
|
+
Schema.new({}, **@options.merge(parent: self, context: nil))
|
480
484
|
end
|
481
485
|
end
|
482
486
|
|
@@ -491,7 +495,7 @@ module RDF::Tabular
|
|
491
495
|
when object[:dialect] then object[:dialect]
|
492
496
|
when parent then parent.dialect
|
493
497
|
when is_a?(Table) || is_a?(TableGroup)
|
494
|
-
d = Dialect.new({},
|
498
|
+
d = Dialect.new({}, **@options.merge(parent: self, context: nil))
|
495
499
|
self.dialect = d unless self.parent
|
496
500
|
d
|
497
501
|
else
|
@@ -514,11 +518,11 @@ module RDF::Tabular
|
|
514
518
|
@dialect = object[:dialect] = case value
|
515
519
|
when String
|
516
520
|
link = context.base.join(value).to_s
|
517
|
-
md = Metadata.open(link,
|
521
|
+
md = Metadata.open(link, **@options.merge(parent: self, context: nil, normalize: true))
|
518
522
|
md[:@id] ||= link
|
519
523
|
md
|
520
524
|
when Hash
|
521
|
-
Dialect.new(value,
|
525
|
+
Dialect.new(value, **@options.merge(parent: self, context: nil))
|
522
526
|
when Dialect
|
523
527
|
value
|
524
528
|
else
|
@@ -532,8 +536,8 @@ module RDF::Tabular
|
|
532
536
|
# @raise [Error] if datatype is not valid
|
533
537
|
def datatype=(value)
|
534
538
|
val = case value
|
535
|
-
when Hash then Datatype.new(value,
|
536
|
-
else Datatype.new({base: value},
|
539
|
+
when Hash then Datatype.new(value, **@options.merge(parent: self))
|
540
|
+
else Datatype.new({base: value}, **@options.merge(parent: self))
|
537
541
|
end
|
538
542
|
|
539
543
|
if val.valid? || value.is_a?(Hash)
|
@@ -564,7 +568,7 @@ module RDF::Tabular
|
|
564
568
|
end
|
565
569
|
|
566
570
|
##
|
567
|
-
# Validate metadata, raising an error containing all errors detected during validation
|
571
|
+
# Validate metadata and content, raising an error containing all errors detected during validation
|
568
572
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
569
573
|
# @return [self]
|
570
574
|
def validate
|
@@ -872,7 +876,7 @@ module RDF::Tabular
|
|
872
876
|
csv << data unless data.empty?
|
873
877
|
end
|
874
878
|
else
|
875
|
-
csv = ::CSV.new(input, csv_options)
|
879
|
+
csv = ::CSV.new(input, **csv_options)
|
876
880
|
# Skip skipRows and headerRowCount
|
877
881
|
skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
|
878
882
|
(1..skipped).each {csv.shift}
|
@@ -891,7 +895,7 @@ module RDF::Tabular
|
|
891
895
|
next
|
892
896
|
end
|
893
897
|
number += 1
|
894
|
-
row = Row.new(data, self, number, number + skipped,
|
898
|
+
row = Row.new(data, self, number, number + skipped, **@options)
|
895
899
|
(self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
|
896
900
|
yield(row)
|
897
901
|
end
|
@@ -1036,13 +1040,13 @@ module RDF::Tabular
|
|
1036
1040
|
end
|
1037
1041
|
index = 0
|
1038
1042
|
object_columns.all? do |cb|
|
1039
|
-
ca = non_virtual_columns[index] || Column.new({},
|
1043
|
+
ca = non_virtual_columns[index] || Column.new({}, **@options)
|
1040
1044
|
ta = ca.titles || {}
|
1041
1045
|
tb = cb.titles || {}
|
1042
1046
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1043
1047
|
true
|
1044
1048
|
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1045
|
-
raise Error, "
|
1049
|
+
raise Error, "Column #{index + 1} doesn't match on name: #{ca.name || 'no name'}, #{cb.name || 'no name'}" unless ca.name == cb.name
|
1046
1050
|
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1047
1051
|
# If validating, column compatibility requires strict match between titles
|
1048
1052
|
titles_match = case
|
@@ -1066,10 +1070,10 @@ module RDF::Tabular
|
|
1066
1070
|
true
|
1067
1071
|
elsif !@options[:validate]
|
1068
1072
|
# If not validating, columns don't match, but processing continues
|
1069
|
-
log_warn "
|
1073
|
+
log_warn "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
|
1070
1074
|
true
|
1071
1075
|
else
|
1072
|
-
raise Error, "
|
1076
|
+
raise Error, "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
|
1073
1077
|
end
|
1074
1078
|
end
|
1075
1079
|
index += 1
|
@@ -1235,13 +1239,13 @@ module RDF::Tabular
|
|
1235
1239
|
end
|
1236
1240
|
|
1237
1241
|
# General setter for array properties
|
1238
|
-
def set_array_value(key, value, klass, options
|
1242
|
+
def set_array_value(key, value, klass, **options)
|
1239
1243
|
object[key] = case value
|
1240
1244
|
when Array
|
1241
1245
|
value.map do |v|
|
1242
1246
|
case v
|
1243
1247
|
when Hash
|
1244
|
-
klass.new(v,
|
1248
|
+
klass.new(v, **@options.merge(options).merge(parent: self, context: nil))
|
1245
1249
|
else v
|
1246
1250
|
end
|
1247
1251
|
end
|
@@ -1282,11 +1286,11 @@ module RDF::Tabular
|
|
1282
1286
|
class DebugContext
|
1283
1287
|
include RDF::Util::Logger
|
1284
1288
|
end
|
1285
|
-
def self.log_debug(*args, &block)
|
1286
|
-
DebugContext.new.log_debug(*args, &block)
|
1289
|
+
def self.log_debug(*args, **options, &block)
|
1290
|
+
DebugContext.new.log_debug(*args, **options, &block)
|
1287
1291
|
end
|
1288
|
-
def self.log_warn(*args)
|
1289
|
-
DebugContext.new.log_warn(*args)
|
1292
|
+
def self.log_warn(*args, **options)
|
1293
|
+
DebugContext.new.log_warn(*args, **options)
|
1290
1294
|
end
|
1291
1295
|
end
|
1292
1296
|
|
@@ -1434,7 +1438,7 @@ module RDF::Tabular
|
|
1434
1438
|
content['@context'] = object.delete(:@context) if object[:@context]
|
1435
1439
|
ctx = @context
|
1436
1440
|
remove_instance_variable(:@context) if instance_variables.include?(:@context)
|
1437
|
-
tg = TableGroup.new(content,
|
1441
|
+
tg = TableGroup.new(content, **@options.merge(context: ctx, filenames: @filenames, base: base))
|
1438
1442
|
@parent = tg # Link from parent
|
1439
1443
|
tg
|
1440
1444
|
end
|
@@ -1489,7 +1493,7 @@ module RDF::Tabular
|
|
1489
1493
|
number += 1
|
1490
1494
|
case v
|
1491
1495
|
when Hash
|
1492
|
-
Column.new(v,
|
1496
|
+
Column.new(v, **@options.merge(
|
1493
1497
|
table: (parent if parent.is_a?(Table)),
|
1494
1498
|
parent: self,
|
1495
1499
|
context: nil,
|
@@ -1621,8 +1625,8 @@ module RDF::Tabular
|
|
1621
1625
|
def name
|
1622
1626
|
self[:name] || if titles && (ts = titles[context.default_language || 'und'] || titles[self.lang || 'und'])
|
1623
1627
|
n = Array(ts).first
|
1624
|
-
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1625
|
-
n1 = URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1628
|
+
n0 = RDF::URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1629
|
+
n1 = RDF::URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1626
1630
|
"#{n0}#{n1}"
|
1627
1631
|
end || "_col.#{number}"
|
1628
1632
|
end
|
@@ -1783,12 +1787,12 @@ module RDF::Tabular
|
|
1783
1787
|
# @option options [String] :lang, language to set in table, if any
|
1784
1788
|
# @return [Metadata] Tabular metadata
|
1785
1789
|
# @see http://w3c.github.io/csvw/syntax/#parsing
|
1786
|
-
def embedded_metadata(input, metadata, options
|
1790
|
+
def embedded_metadata(input, metadata, **options)
|
1787
1791
|
options = options.dup
|
1788
1792
|
options.delete(:context) # Don't accidentally use a passed context
|
1789
1793
|
# Normalize input to an IO object
|
1790
1794
|
if input.is_a?(String)
|
1791
|
-
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, options.merge(base: input.to_s))}
|
1795
|
+
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, **options.merge(base: input.to_s))}
|
1792
1796
|
end
|
1793
1797
|
|
1794
1798
|
table = {
|
@@ -1826,7 +1830,7 @@ module RDF::Tabular
|
|
1826
1830
|
row.xpath('th').map(&:content).each_with_index do |value, index|
|
1827
1831
|
# Skip columns
|
1828
1832
|
skipCols = skipColumns.to_i
|
1829
|
-
next if index < skipCols
|
1833
|
+
next if index < skipCols || value.to_s.empty?
|
1830
1834
|
|
1831
1835
|
# Trim value
|
1832
1836
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
@@ -1837,11 +1841,11 @@ module RDF::Tabular
|
|
1837
1841
|
column = columns[index - skipCols] ||= {
|
1838
1842
|
"titles" => {lang => []},
|
1839
1843
|
}
|
1840
|
-
column["titles"][lang] << value
|
1844
|
+
column["titles"][lang] << value if value
|
1841
1845
|
end
|
1842
1846
|
end
|
1843
1847
|
else
|
1844
|
-
csv = ::CSV.new(input, csv_options)
|
1848
|
+
csv = ::CSV.new(input, **csv_options)
|
1845
1849
|
(1..skipRows.to_i).each do
|
1846
1850
|
value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
|
1847
1851
|
# Trim value
|
@@ -1858,7 +1862,7 @@ module RDF::Tabular
|
|
1858
1862
|
Array(row_data).each_with_index do |value, index|
|
1859
1863
|
# Skip columns
|
1860
1864
|
skipCols = skipColumns.to_i
|
1861
|
-
next if index < skipCols
|
1865
|
+
next if index < skipCols || value.to_s.empty?
|
1862
1866
|
|
1863
1867
|
# Trim value
|
1864
1868
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
@@ -1876,7 +1880,7 @@ module RDF::Tabular
|
|
1876
1880
|
log_debug("embedded_metadata") {"table: #{table.inspect}"}
|
1877
1881
|
input.rewind if input.respond_to?(:rewind)
|
1878
1882
|
|
1879
|
-
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1883
|
+
Table.new(table, **options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1880
1884
|
end
|
1881
1885
|
end
|
1882
1886
|
|
@@ -1947,13 +1951,14 @@ module RDF::Tabular
|
|
1947
1951
|
class Row
|
1948
1952
|
# Class for returning values
|
1949
1953
|
Cell = Struct.new(:table, :column, :row, :stringValue, :aboutUrl, :propertyUrl, :valueUrl, :value, :errors) do
|
1950
|
-
def set_urls(mapped_values)
|
1954
|
+
def set_urls(mapped_values, decode_uri)
|
1951
1955
|
%w(aboutUrl propertyUrl valueUrl).each do |prop|
|
1952
1956
|
# If the cell value is nil, and it is not a virtual column
|
1953
1957
|
next if prop == "valueUrl" && value.nil? && !column.virtual
|
1954
1958
|
if v = column.send(prop.to_sym)
|
1955
1959
|
t = Addressable::Template.new(v)
|
1956
1960
|
mapped = t.expand(mapped_values).to_s
|
1961
|
+
mapped = RDF::URI.decode(mapped) if decode_uri
|
1957
1962
|
# FIXME: don't expand here, do it in CSV2RDF
|
1958
1963
|
url = row.context.expand_iri(mapped, documentRelative: true)
|
1959
1964
|
self.send("#{prop}=".to_sym, url)
|
@@ -2026,7 +2031,7 @@ module RDF::Tabular
|
|
2026
2031
|
# @param [Hash{Symbol => Object}] options ({})
|
2027
2032
|
# @option options [Boolean] :validate check for PK/FK consistency
|
2028
2033
|
# @return [Row]
|
2029
|
-
def initialize(row, metadata, number, source_number, options
|
2034
|
+
def initialize(row, metadata, number, source_number, **options)
|
2030
2035
|
@table = metadata
|
2031
2036
|
@number = number
|
2032
2037
|
@sourceNumber = source_number
|
@@ -2058,13 +2063,13 @@ module RDF::Tabular
|
|
2058
2063
|
|
2059
2064
|
# create column if necessary
|
2060
2065
|
columns[index - skipColumns] ||=
|
2061
|
-
Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2066
|
+
Column.new({}, **options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2062
2067
|
|
2063
2068
|
column = columns[index - skipColumns]
|
2064
2069
|
|
2065
2070
|
@values << cell = Cell.new(metadata, column, self, value)
|
2066
2071
|
|
2067
|
-
datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
|
2072
|
+
datatype = column.datatype || Datatype.new({base: "string"}, **options.merge(parent: column))
|
2068
2073
|
value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
|
2069
2074
|
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
|
2070
2075
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
@@ -2110,11 +2115,11 @@ module RDF::Tabular
|
|
2110
2115
|
# Map URLs for row
|
2111
2116
|
@values.each_with_index do |cell, index|
|
2112
2117
|
mapped_values = map_values.merge(
|
2113
|
-
"_name" =>
|
2118
|
+
"_name" => CGI.unescape(cell.column.name),
|
2114
2119
|
"_column" => cell.column.number,
|
2115
2120
|
"_sourceColumn" => cell.column.sourceNumber
|
2116
2121
|
)
|
2117
|
-
cell.set_urls(mapped_values)
|
2122
|
+
cell.set_urls(mapped_values, options[:decode_uri])
|
2118
2123
|
end
|
2119
2124
|
end
|
2120
2125
|
|
data/lib/rdf/tabular/reader.rb
CHANGED
@@ -5,7 +5,7 @@ module RDF::Tabular
|
|
5
5
|
##
|
6
6
|
# A Tabular Data to RDF parser in Ruby.
|
7
7
|
#
|
8
|
-
# @author [Gregg Kellogg](
|
8
|
+
# @author [Gregg Kellogg](https://greggkellogg.net/)
|
9
9
|
class Reader < RDF::Reader
|
10
10
|
format Format
|
11
11
|
include RDF::Util::Logger
|
@@ -22,7 +22,7 @@ module RDF::Tabular
|
|
22
22
|
|
23
23
|
##
|
24
24
|
# Writer options
|
25
|
-
# @see
|
25
|
+
# @see https://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Writer#options-class_method
|
26
26
|
def self.options
|
27
27
|
super + [
|
28
28
|
RDF::CLI::Option.new(
|
@@ -43,6 +43,13 @@ module RDF::Tabular
|
|
43
43
|
control: :checkbox,
|
44
44
|
on: ["--no-prov"],
|
45
45
|
description: "do not output optional provenance information.") {true},
|
46
|
+
RDF::CLI::Option.new(
|
47
|
+
symbol: :decode_uri,
|
48
|
+
datatype: TrueClass,
|
49
|
+
control: :checkbox,
|
50
|
+
on: ["--decode-uri"],
|
51
|
+
description: "decode %-encodings in the result of a URI Template operation."
|
52
|
+
)
|
46
53
|
]
|
47
54
|
end
|
48
55
|
|
@@ -54,16 +61,18 @@ module RDF::Tabular
|
|
54
61
|
# or an Array used as an internalized array of arrays
|
55
62
|
# @param [Hash{Symbol => Object}] options
|
56
63
|
# any additional options (see `RDF::Reader#initialize`)
|
64
|
+
# @option options [Boolean] :decode_uri
|
65
|
+
# Decode %-encodings in the result of a URI Template operation.
|
66
|
+
# @option options [Array<Hash>] :fks_referencing_table
|
67
|
+
# When called with Table metadata, a list of the foreign keys referencing this table
|
57
68
|
# @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
|
58
69
|
# @option options [Boolean] :minimal includes only the information gleaned from the cells of the tabular data
|
59
70
|
# @option options [Boolean] :noProv do not output optional provenance information
|
60
|
-
# @option optinons [Array<Hash>] :fks_referencing_table
|
61
|
-
# When called with Table metadata, a list of the foreign keys referencing this table
|
62
71
|
# @yield [reader] `self`
|
63
72
|
# @yieldparam [RDF::Reader] reader
|
64
73
|
# @yieldreturn [void] ignored
|
65
74
|
# @raise [RDF::ReaderError] if the CSV document cannot be loaded
|
66
|
-
def initialize(input = $stdin, options
|
75
|
+
def initialize(input = $stdin, **options, &block)
|
67
76
|
super do
|
68
77
|
# Base would be how we are to take this
|
69
78
|
@options[:base] ||= base_uri.to_s if base_uri
|
@@ -71,7 +80,7 @@ module RDF::Tabular
|
|
71
80
|
@options[:base] ||= input.path if input.respond_to?(:path)
|
72
81
|
@options[:base] ||= input.filename if input.respond_to?(:filename)
|
73
82
|
if RDF::URI(@options[:base]).relative? && File.exist?(@options[:base].to_s)
|
74
|
-
@options[:base] = "file:/#{File.expand_path(@options[:base])}"
|
83
|
+
@options[:base] = RDF::URI("file:/#{File.expand_path(@options[:base])}").normalize
|
75
84
|
end
|
76
85
|
|
77
86
|
log_debug("Reader#initialize") {"input: #{input.inspect}, base: #{@options[:base]}"}
|
@@ -89,7 +98,7 @@ module RDF::Tabular
|
|
89
98
|
# If input is JSON, then the input is the metadata
|
90
99
|
content_type = @input.respond_to?(:content_type) ? @input.content_type : ""
|
91
100
|
if @options[:base] =~ /\.json(?:ld)?$/ || content_type =~ %r(application/(csvm\+|ld\+)?json)
|
92
|
-
@metadata = Metadata.new(@input,
|
101
|
+
@metadata = Metadata.new(@input, filenames: @options[:base], **@options)
|
93
102
|
# If @metadata is for a Table, turn it into a TableGroup
|
94
103
|
@metadata = @metadata.to_table_group if @metadata.is_a?(Table)
|
95
104
|
@metadata.normalize!
|
@@ -102,7 +111,7 @@ module RDF::Tabular
|
|
102
111
|
def script.content_type; "application/csvm+json"; end
|
103
112
|
log_debug("Reader#initialize") {"Process HTML script block"}
|
104
113
|
@input = script
|
105
|
-
@metadata = Metadata.new(@input,
|
114
|
+
@metadata = Metadata.new(@input, filenames: @options[:base], **@options)
|
106
115
|
# If @metadata is for a Table, turn it into a TableGroup
|
107
116
|
@metadata = @metadata.to_table_group if @metadata.is_a?(Table)
|
108
117
|
@metadata.normalize!
|
@@ -119,7 +128,7 @@ module RDF::Tabular
|
|
119
128
|
dialect.separator = "\t" if (input.content_type == "text/tsv" rescue nil)
|
120
129
|
embed_options = @options.dup
|
121
130
|
embed_options[:lang] = dialect_metadata.lang if dialect_metadata.lang
|
122
|
-
embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], embed_options)
|
131
|
+
embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], **embed_options)
|
123
132
|
|
124
133
|
if (@metadata = @options[:metadata]) && @metadata.tableSchema
|
125
134
|
@metadata.verify_compatible!(embedded_metadata)
|
@@ -136,7 +145,7 @@ module RDF::Tabular
|
|
136
145
|
else
|
137
146
|
# It's tabluar data. Find metadata and proceed as if it was specified in the first place
|
138
147
|
@options[:original_input] = @input unless @options[:metadata]
|
139
|
-
@input = @metadata = Metadata.for_input(@input,
|
148
|
+
@input = @metadata = Metadata.for_input(@input, **@options).normalize!
|
140
149
|
end
|
141
150
|
|
142
151
|
log_debug("Reader#initialize") {"input: #{input}, metadata: #{metadata.inspect}"}
|
@@ -186,7 +195,7 @@ module RDF::Tabular
|
|
186
195
|
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
187
196
|
table_resource = RDF::Node.new
|
188
197
|
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
189
|
-
Reader.new(options[:original_input], options.merge(
|
198
|
+
Reader.new(options[:original_input], **options.merge(
|
190
199
|
metadata: input.tables.first,
|
191
200
|
base: input.tables.first.url,
|
192
201
|
no_found_metadata: true,
|
@@ -205,7 +214,7 @@ module RDF::Tabular
|
|
205
214
|
end.flatten.compact
|
206
215
|
table_resource = table.id || RDF::Node.new
|
207
216
|
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
208
|
-
Reader.open(table.url, options.merge(
|
217
|
+
Reader.open(table.url, **options.merge(
|
209
218
|
metadata: table,
|
210
219
|
base: table.url,
|
211
220
|
no_found_metadata: true,
|
@@ -225,7 +234,7 @@ module RDF::Tabular
|
|
225
234
|
activity = RDF::Node.new
|
226
235
|
add_statement(0, table_group, RDF::Vocab::PROV.wasGeneratedBy, activity)
|
227
236
|
add_statement(0, activity, RDF.type, RDF::Vocab::PROV.Activity)
|
228
|
-
add_statement(0, activity, RDF::Vocab::PROV.wasAssociatedWith, RDF::URI("
|
237
|
+
add_statement(0, activity, RDF::Vocab::PROV.wasAssociatedWith, RDF::URI("https://rubygems.org/gems/rdf-tabular"))
|
229
238
|
add_statement(0, activity, RDF::Vocab::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
|
230
239
|
add_statement(0, activity, RDF::Vocab::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
|
231
240
|
|
@@ -421,9 +430,9 @@ module RDF::Tabular
|
|
421
430
|
|
422
431
|
res = if io
|
423
432
|
::JSON::dump_default_options = json_state
|
424
|
-
::JSON.dump(self.send(hash_fn, options), io)
|
433
|
+
::JSON.dump(self.send(hash_fn, **options), io)
|
425
434
|
else
|
426
|
-
hash = self.send(hash_fn, options)
|
435
|
+
hash = self.send(hash_fn, **options)
|
427
436
|
::JSON.generate(hash, json_state)
|
428
437
|
end
|
429
438
|
|
@@ -443,7 +452,7 @@ module RDF::Tabular
|
|
443
452
|
#
|
444
453
|
# @param [Hash{Symbol => Object}] options
|
445
454
|
# @return [Hash, Array]
|
446
|
-
def to_hash(options
|
455
|
+
def to_hash(**options)
|
447
456
|
# Construct metadata from that passed from file open, along with information from the file.
|
448
457
|
if input.is_a?(Metadata)
|
449
458
|
log_debug("each_statement: metadata") {input.inspect}
|
@@ -467,13 +476,13 @@ module RDF::Tabular
|
|
467
476
|
table_group['tables'] = tables
|
468
477
|
|
469
478
|
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
470
|
-
Reader.new(options[:original_input], options.merge(
|
479
|
+
Reader.new(options[:original_input], **options.merge(
|
471
480
|
metadata: input.tables.first,
|
472
481
|
base: input.tables.first.url,
|
473
482
|
minimal: minimal?,
|
474
483
|
no_found_metadata: true,
|
475
484
|
)) do |r|
|
476
|
-
case t = r.to_hash(options)
|
485
|
+
case t = r.to_hash(**options)
|
477
486
|
when Array then tables += t unless input.tables.first.suppressOutput
|
478
487
|
when Hash then tables << t unless input.tables.first.suppressOutput
|
479
488
|
end
|
@@ -481,13 +490,13 @@ module RDF::Tabular
|
|
481
490
|
else
|
482
491
|
input.each_table do |table|
|
483
492
|
next if table.suppressOutput && !validate?
|
484
|
-
Reader.open(table.url, options.merge(
|
493
|
+
Reader.open(table.url, **options.merge(
|
485
494
|
metadata: table,
|
486
495
|
base: table.url,
|
487
496
|
minimal: minimal?,
|
488
497
|
no_found_metadata: true,
|
489
498
|
)) do |r|
|
490
|
-
case t = r.to_hash(options)
|
499
|
+
case t = r.to_hash(**options)
|
491
500
|
when Array then tables += t unless table.suppressOutput
|
492
501
|
when Hash then tables << t unless table.suppressOutput
|
493
502
|
end
|
@@ -560,7 +569,7 @@ module RDF::Tabular
|
|
560
569
|
co['@id'] = subject.to_s unless subject == 'null'
|
561
570
|
prop = case cell.propertyUrl
|
562
571
|
when RDF.type then '@type'
|
563
|
-
when nil then
|
572
|
+
when nil then CGI.unescape(column.name) # Use URI-decoded name
|
564
573
|
else
|
565
574
|
# Compact the property to a term or prefixed name
|
566
575
|
metadata.context.compact_iri(cell.propertyUrl, vocab: true)
|
data/lib/rdf/tabular/uax35.rb
CHANGED
data/lib/rdf/tabular.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
$:.unshift(File.expand_path("..", __FILE__))
|
2
|
-
require 'rdf' # @see
|
2
|
+
require 'rdf' # @see https://rubygems.org/gems/rdf
|
3
3
|
require 'csv'
|
4
4
|
|
5
5
|
module RDF
|
6
6
|
##
|
7
7
|
# **`RDF::Tabular`** is a Tabular/CSV extension for RDF.rb.
|
8
8
|
#
|
9
|
-
# @see
|
9
|
+
# @see https://w3c.github.io/csvw/
|
10
10
|
#
|
11
|
-
# @author [Gregg Kellogg](
|
11
|
+
# @author [Gregg Kellogg](https://greggkellogg.net/)
|
12
12
|
module Tabular
|
13
13
|
require 'rdf/tabular/format'
|
14
14
|
autoload :Column, 'rdf/tabular/metadata'
|