rdf-tabular 2.2.1 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +54 -48
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/etc/doap.csv +1 -1
- data/etc/doap.csv-metadata.json +1 -1
- data/etc/doap.ttl +16 -18
- data/etc/earl.html +648 -648
- data/etc/earl.jsonld +691 -691
- data/etc/earl.ttl +846 -846
- data/lib/rdf/tabular/csvw.rb +500 -90
- data/lib/rdf/tabular/format.rb +2 -2
- data/lib/rdf/tabular/literal.rb +1 -1
- data/lib/rdf/tabular/metadata.rb +63 -58
- data/lib/rdf/tabular/reader.rb +30 -21
- data/lib/rdf/tabular/uax35.rb +1 -1
- data/lib/rdf/tabular.rb +3 -3
- data/spec/metadata_spec.rb +85 -8
- data/spec/reader_spec.rb +2 -2
- data/spec/spec_helper.rb +20 -8
- data/spec/suite_helper.rb +2 -2
- data/spec/suite_spec.rb +4 -5
- metadata +99 -149
- data/lib/rdf/tabular/json.rb +0 -0
data/lib/rdf/tabular/format.rb
CHANGED
@@ -22,7 +22,7 @@ module RDF::Tabular
|
|
22
22
|
# @example Obtaining serialization format file extension mappings
|
23
23
|
# RDF::Format.file_extensions #=> {:csv => "text/csv"}
|
24
24
|
#
|
25
|
-
# @see
|
25
|
+
# @see https://www.w3.org/TR/rdf-testcases/#ntriples
|
26
26
|
class Format < RDF::Format
|
27
27
|
content_type 'text/csv;q=0.4',
|
28
28
|
extensions: [:csv, :tsv],
|
@@ -62,7 +62,7 @@ module RDF::Tabular
|
|
62
62
|
raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
|
63
63
|
out = opts[:output] || $stdout
|
64
64
|
out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
|
65
|
-
RDF::CLI.parse(argv, opts) do |reader|
|
65
|
+
RDF::CLI.parse(argv, **opts) do |reader|
|
66
66
|
out.puts reader.to_json
|
67
67
|
end
|
68
68
|
end
|
data/lib/rdf/tabular/literal.rb
CHANGED
@@ -13,7 +13,7 @@ module RDF::Tabular
|
|
13
13
|
##
|
14
14
|
# @param [Object] value
|
15
15
|
# @option options [String] :lexical (nil)
|
16
|
-
def initialize(value, options
|
16
|
+
def initialize(value, **options)
|
17
17
|
@datatype = options[:datatype] || DATATYPE
|
18
18
|
@string = options[:lexical] if options.has_key?(:lexical)
|
19
19
|
if value.is_a?(String)
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -16,7 +16,7 @@ require 'yaml' # used by BCP47, which should have required it.
|
|
16
16
|
# * Return Column-level annotations
|
17
17
|
# * Return row iterator with column information
|
18
18
|
#
|
19
|
-
# @author [Gregg Kellogg](
|
19
|
+
# @author [Gregg Kellogg](https://greggkellogg.net/)
|
20
20
|
module RDF::Tabular
|
21
21
|
class Metadata
|
22
22
|
include RDF::Util::Logger
|
@@ -136,15 +136,15 @@ module RDF::Tabular
|
|
136
136
|
# see `RDF::Util::File.open_file` in RDF.rb and {new}
|
137
137
|
# @yield [Metadata]
|
138
138
|
# @raise [IOError] if file not found
|
139
|
-
def self.open(path, options
|
139
|
+
def self.open(path, **options)
|
140
140
|
options = options.merge(
|
141
141
|
headers: {
|
142
142
|
'Accept' => 'application/ld+json, application/json'
|
143
143
|
}
|
144
144
|
)
|
145
145
|
path = "file:" + path unless path =~ /^\w+:/
|
146
|
-
RDF::Util::File.open_file(path, options) do |file|
|
147
|
-
self.new(file, options.merge(base: path, filenames: path))
|
146
|
+
RDF::Util::File.open_file(path, **options) do |file|
|
147
|
+
self.new(file, **options.merge(base: path, filenames: path))
|
148
148
|
end
|
149
149
|
end
|
150
150
|
|
@@ -173,16 +173,16 @@ module RDF::Tabular
|
|
173
173
|
# @option options [RDF::URI] :base
|
174
174
|
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
175
175
|
# @return [Metadata]
|
176
|
-
def self.for_input(input, options
|
176
|
+
def self.for_input(input, **options)
|
177
177
|
base = options[:base]
|
178
178
|
|
179
179
|
# Use user metadata, if provided
|
180
180
|
metadata = case options[:metadata]
|
181
181
|
when Metadata then options[:metadata]
|
182
182
|
when Hash
|
183
|
-
Metadata.new(options[:metadata], options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
183
|
+
Metadata.new(options[:metadata], **options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
184
184
|
when String, RDF::URI
|
185
|
-
Metadata.open(options[:metadata], options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
185
|
+
Metadata.open(options[:metadata], **options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
186
186
|
end
|
187
187
|
|
188
188
|
# Search for metadata until found
|
@@ -191,13 +191,13 @@ module RDF::Tabular
|
|
191
191
|
if !metadata && input.respond_to?(:links) &&
|
192
192
|
link = input.links.find_link(%w(rel describedby))
|
193
193
|
link_loc = RDF::URI(base).join(link.href).to_s
|
194
|
-
md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
194
|
+
md = Metadata.open(link_loc, **options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
195
195
|
if md
|
196
196
|
# Metadata must describe file to be useful
|
197
197
|
if md.describes_file?(base)
|
198
198
|
metadata = md
|
199
199
|
else
|
200
|
-
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
|
200
|
+
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", **options)
|
201
201
|
end
|
202
202
|
end
|
203
203
|
end
|
@@ -206,28 +206,30 @@ module RDF::Tabular
|
|
206
206
|
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
207
207
|
if !metadata && base
|
208
208
|
templates = site_wide_config(base)
|
209
|
-
log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
209
|
+
log_debug("for_input", **options) {"templates: #{templates.map(&:to_s).inspect}"}
|
210
210
|
locs = templates.map do |template|
|
211
211
|
t = Addressable::Template.new(template)
|
212
|
-
|
212
|
+
mapped = t.expand(url: base).to_s
|
213
|
+
mapped = RDF::URI.decode(mapped) if options[:decode_uri]
|
214
|
+
RDF::URI(base).join(mapped)
|
213
215
|
end
|
214
|
-
log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
216
|
+
log_debug("for_input", **options) {"locs: #{locs.map(&:to_s).inspect}"}
|
215
217
|
|
216
218
|
locs.each do |loc|
|
217
219
|
metadata ||= begin
|
218
|
-
md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
220
|
+
md = Metadata.open(loc, **options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
219
221
|
# Metadata must describe file to be useful
|
220
222
|
if md
|
221
223
|
# Metadata must describe file to be useful
|
222
224
|
if md.describes_file?(base)
|
223
225
|
md
|
224
226
|
else
|
225
|
-
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
|
227
|
+
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", **options)
|
226
228
|
nil
|
227
229
|
end
|
228
230
|
end
|
229
231
|
rescue IOError
|
230
|
-
log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
232
|
+
log_debug("for_input", **options) {"failed to load found metadata #{loc}: #{$!}"}
|
231
233
|
nil
|
232
234
|
end
|
233
235
|
end
|
@@ -236,8 +238,8 @@ module RDF::Tabular
|
|
236
238
|
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
237
239
|
metadata = case
|
238
240
|
when metadata then metadata
|
239
|
-
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
|
240
|
-
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, options)
|
241
|
+
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, **options)
|
242
|
+
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, **options)
|
241
243
|
end
|
242
244
|
|
243
245
|
# Make TableGroup, if not already
|
@@ -246,7 +248,7 @@ module RDF::Tabular
|
|
246
248
|
|
247
249
|
##
|
248
250
|
# @private
|
249
|
-
def self.new(input, options
|
251
|
+
def self.new(input, **options)
|
250
252
|
# Triveal case
|
251
253
|
return input if input.is_a?(Metadata)
|
252
254
|
|
@@ -297,7 +299,7 @@ module RDF::Tabular
|
|
297
299
|
end
|
298
300
|
|
299
301
|
md = klass.allocate
|
300
|
-
md.send(:initialize, object, options)
|
302
|
+
md.send(:initialize, object, **options)
|
301
303
|
md
|
302
304
|
rescue ::JSON::ParserError
|
303
305
|
raise Error, "Expected input to be a JSON Object"
|
@@ -314,11 +316,13 @@ module RDF::Tabular
|
|
314
316
|
# Context used for this metadata. Taken from input if not provided
|
315
317
|
# @option options [RDF::URI] :base
|
316
318
|
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
319
|
+
# @option options [Boolean] :decode_uri
|
320
|
+
# Decode %-encodings in the result of a URI Template operation.
|
317
321
|
# @option options [Boolean] :normalize normalize the object
|
318
322
|
# @option options [Boolean] :validate Strict metadata validation
|
319
323
|
# @raise [Error]
|
320
324
|
# @return [Metadata]
|
321
|
-
def initialize(input, options
|
325
|
+
def initialize(input, **options)
|
322
326
|
@options = options.dup
|
323
327
|
|
324
328
|
# Parent of this Metadata, if any
|
@@ -467,16 +471,16 @@ module RDF::Tabular
|
|
467
471
|
object[:tableSchema] = case value
|
468
472
|
when String
|
469
473
|
link = context.base.join(value).to_s
|
470
|
-
md = Schema.open(link,
|
474
|
+
md = Schema.open(link, **@options.merge(parent: self, context: nil, normalize: true))
|
471
475
|
md[:@id] ||= link
|
472
476
|
md
|
473
477
|
when Hash
|
474
|
-
Schema.new(value,
|
478
|
+
Schema.new(value, **@options.merge(parent: self, context: nil))
|
475
479
|
when Schema
|
476
480
|
value
|
477
481
|
else
|
478
482
|
log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
479
|
-
Schema.new({},
|
483
|
+
Schema.new({}, **@options.merge(parent: self, context: nil))
|
480
484
|
end
|
481
485
|
end
|
482
486
|
|
@@ -491,7 +495,7 @@ module RDF::Tabular
|
|
491
495
|
when object[:dialect] then object[:dialect]
|
492
496
|
when parent then parent.dialect
|
493
497
|
when is_a?(Table) || is_a?(TableGroup)
|
494
|
-
d = Dialect.new({},
|
498
|
+
d = Dialect.new({}, **@options.merge(parent: self, context: nil))
|
495
499
|
self.dialect = d unless self.parent
|
496
500
|
d
|
497
501
|
else
|
@@ -514,11 +518,11 @@ module RDF::Tabular
|
|
514
518
|
@dialect = object[:dialect] = case value
|
515
519
|
when String
|
516
520
|
link = context.base.join(value).to_s
|
517
|
-
md = Metadata.open(link,
|
521
|
+
md = Metadata.open(link, **@options.merge(parent: self, context: nil, normalize: true))
|
518
522
|
md[:@id] ||= link
|
519
523
|
md
|
520
524
|
when Hash
|
521
|
-
Dialect.new(value,
|
525
|
+
Dialect.new(value, **@options.merge(parent: self, context: nil))
|
522
526
|
when Dialect
|
523
527
|
value
|
524
528
|
else
|
@@ -532,8 +536,8 @@ module RDF::Tabular
|
|
532
536
|
# @raise [Error] if datatype is not valid
|
533
537
|
def datatype=(value)
|
534
538
|
val = case value
|
535
|
-
when Hash then Datatype.new(value,
|
536
|
-
else Datatype.new({base: value},
|
539
|
+
when Hash then Datatype.new(value, **@options.merge(parent: self))
|
540
|
+
else Datatype.new({base: value}, **@options.merge(parent: self))
|
537
541
|
end
|
538
542
|
|
539
543
|
if val.valid? || value.is_a?(Hash)
|
@@ -564,7 +568,7 @@ module RDF::Tabular
|
|
564
568
|
end
|
565
569
|
|
566
570
|
##
|
567
|
-
# Validate metadata, raising an error containing all errors detected during validation
|
571
|
+
# Validate metadata and content, raising an error containing all errors detected during validation
|
568
572
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
569
573
|
# @return [self]
|
570
574
|
def validate
|
@@ -872,7 +876,7 @@ module RDF::Tabular
|
|
872
876
|
csv << data unless data.empty?
|
873
877
|
end
|
874
878
|
else
|
875
|
-
csv = ::CSV.new(input, csv_options)
|
879
|
+
csv = ::CSV.new(input, **csv_options)
|
876
880
|
# Skip skipRows and headerRowCount
|
877
881
|
skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
|
878
882
|
(1..skipped).each {csv.shift}
|
@@ -891,7 +895,7 @@ module RDF::Tabular
|
|
891
895
|
next
|
892
896
|
end
|
893
897
|
number += 1
|
894
|
-
row = Row.new(data, self, number, number + skipped,
|
898
|
+
row = Row.new(data, self, number, number + skipped, **@options)
|
895
899
|
(self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
|
896
900
|
yield(row)
|
897
901
|
end
|
@@ -1036,13 +1040,13 @@ module RDF::Tabular
|
|
1036
1040
|
end
|
1037
1041
|
index = 0
|
1038
1042
|
object_columns.all? do |cb|
|
1039
|
-
ca = non_virtual_columns[index] || Column.new({},
|
1043
|
+
ca = non_virtual_columns[index] || Column.new({}, **@options)
|
1040
1044
|
ta = ca.titles || {}
|
1041
1045
|
tb = cb.titles || {}
|
1042
1046
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1043
1047
|
true
|
1044
1048
|
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1045
|
-
raise Error, "
|
1049
|
+
raise Error, "Column #{index + 1} doesn't match on name: #{ca.name || 'no name'}, #{cb.name || 'no name'}" unless ca.name == cb.name
|
1046
1050
|
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1047
1051
|
# If validating, column compatibility requires strict match between titles
|
1048
1052
|
titles_match = case
|
@@ -1066,10 +1070,10 @@ module RDF::Tabular
|
|
1066
1070
|
true
|
1067
1071
|
elsif !@options[:validate]
|
1068
1072
|
# If not validating, columns don't match, but processing continues
|
1069
|
-
log_warn "
|
1073
|
+
log_warn "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
|
1070
1074
|
true
|
1071
1075
|
else
|
1072
|
-
raise Error, "
|
1076
|
+
raise Error, "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
|
1073
1077
|
end
|
1074
1078
|
end
|
1075
1079
|
index += 1
|
@@ -1235,13 +1239,13 @@ module RDF::Tabular
|
|
1235
1239
|
end
|
1236
1240
|
|
1237
1241
|
# General setter for array properties
|
1238
|
-
def set_array_value(key, value, klass, options
|
1242
|
+
def set_array_value(key, value, klass, **options)
|
1239
1243
|
object[key] = case value
|
1240
1244
|
when Array
|
1241
1245
|
value.map do |v|
|
1242
1246
|
case v
|
1243
1247
|
when Hash
|
1244
|
-
klass.new(v,
|
1248
|
+
klass.new(v, **@options.merge(options).merge(parent: self, context: nil))
|
1245
1249
|
else v
|
1246
1250
|
end
|
1247
1251
|
end
|
@@ -1282,11 +1286,11 @@ module RDF::Tabular
|
|
1282
1286
|
class DebugContext
|
1283
1287
|
include RDF::Util::Logger
|
1284
1288
|
end
|
1285
|
-
def self.log_debug(*args, &block)
|
1286
|
-
DebugContext.new.log_debug(*args, &block)
|
1289
|
+
def self.log_debug(*args, **options, &block)
|
1290
|
+
DebugContext.new.log_debug(*args, **options, &block)
|
1287
1291
|
end
|
1288
|
-
def self.log_warn(*args)
|
1289
|
-
DebugContext.new.log_warn(*args)
|
1292
|
+
def self.log_warn(*args, **options)
|
1293
|
+
DebugContext.new.log_warn(*args, **options)
|
1290
1294
|
end
|
1291
1295
|
end
|
1292
1296
|
|
@@ -1434,7 +1438,7 @@ module RDF::Tabular
|
|
1434
1438
|
content['@context'] = object.delete(:@context) if object[:@context]
|
1435
1439
|
ctx = @context
|
1436
1440
|
remove_instance_variable(:@context) if instance_variables.include?(:@context)
|
1437
|
-
tg = TableGroup.new(content,
|
1441
|
+
tg = TableGroup.new(content, **@options.merge(context: ctx, filenames: @filenames, base: base))
|
1438
1442
|
@parent = tg # Link from parent
|
1439
1443
|
tg
|
1440
1444
|
end
|
@@ -1489,7 +1493,7 @@ module RDF::Tabular
|
|
1489
1493
|
number += 1
|
1490
1494
|
case v
|
1491
1495
|
when Hash
|
1492
|
-
Column.new(v,
|
1496
|
+
Column.new(v, **@options.merge(
|
1493
1497
|
table: (parent if parent.is_a?(Table)),
|
1494
1498
|
parent: self,
|
1495
1499
|
context: nil,
|
@@ -1621,8 +1625,8 @@ module RDF::Tabular
|
|
1621
1625
|
def name
|
1622
1626
|
self[:name] || if titles && (ts = titles[context.default_language || 'und'] || titles[self.lang || 'und'])
|
1623
1627
|
n = Array(ts).first
|
1624
|
-
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1625
|
-
n1 = URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1628
|
+
n0 = RDF::URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
|
1629
|
+
n1 = RDF::URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
|
1626
1630
|
"#{n0}#{n1}"
|
1627
1631
|
end || "_col.#{number}"
|
1628
1632
|
end
|
@@ -1783,12 +1787,12 @@ module RDF::Tabular
|
|
1783
1787
|
# @option options [String] :lang, language to set in table, if any
|
1784
1788
|
# @return [Metadata] Tabular metadata
|
1785
1789
|
# @see http://w3c.github.io/csvw/syntax/#parsing
|
1786
|
-
def embedded_metadata(input, metadata, options
|
1790
|
+
def embedded_metadata(input, metadata, **options)
|
1787
1791
|
options = options.dup
|
1788
1792
|
options.delete(:context) # Don't accidentally use a passed context
|
1789
1793
|
# Normalize input to an IO object
|
1790
1794
|
if input.is_a?(String)
|
1791
|
-
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, options.merge(base: input.to_s))}
|
1795
|
+
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, **options.merge(base: input.to_s))}
|
1792
1796
|
end
|
1793
1797
|
|
1794
1798
|
table = {
|
@@ -1826,7 +1830,7 @@ module RDF::Tabular
|
|
1826
1830
|
row.xpath('th').map(&:content).each_with_index do |value, index|
|
1827
1831
|
# Skip columns
|
1828
1832
|
skipCols = skipColumns.to_i
|
1829
|
-
next if index < skipCols
|
1833
|
+
next if index < skipCols || value.to_s.empty?
|
1830
1834
|
|
1831
1835
|
# Trim value
|
1832
1836
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
@@ -1837,11 +1841,11 @@ module RDF::Tabular
|
|
1837
1841
|
column = columns[index - skipCols] ||= {
|
1838
1842
|
"titles" => {lang => []},
|
1839
1843
|
}
|
1840
|
-
column["titles"][lang] << value
|
1844
|
+
column["titles"][lang] << value if value
|
1841
1845
|
end
|
1842
1846
|
end
|
1843
1847
|
else
|
1844
|
-
csv = ::CSV.new(input, csv_options)
|
1848
|
+
csv = ::CSV.new(input, **csv_options)
|
1845
1849
|
(1..skipRows.to_i).each do
|
1846
1850
|
value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
|
1847
1851
|
# Trim value
|
@@ -1858,7 +1862,7 @@ module RDF::Tabular
|
|
1858
1862
|
Array(row_data).each_with_index do |value, index|
|
1859
1863
|
# Skip columns
|
1860
1864
|
skipCols = skipColumns.to_i
|
1861
|
-
next if index < skipCols
|
1865
|
+
next if index < skipCols || value.to_s.empty?
|
1862
1866
|
|
1863
1867
|
# Trim value
|
1864
1868
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
@@ -1876,7 +1880,7 @@ module RDF::Tabular
|
|
1876
1880
|
log_debug("embedded_metadata") {"table: #{table.inspect}"}
|
1877
1881
|
input.rewind if input.respond_to?(:rewind)
|
1878
1882
|
|
1879
|
-
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1883
|
+
Table.new(table, **options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1880
1884
|
end
|
1881
1885
|
end
|
1882
1886
|
|
@@ -1947,13 +1951,14 @@ module RDF::Tabular
|
|
1947
1951
|
class Row
|
1948
1952
|
# Class for returning values
|
1949
1953
|
Cell = Struct.new(:table, :column, :row, :stringValue, :aboutUrl, :propertyUrl, :valueUrl, :value, :errors) do
|
1950
|
-
def set_urls(mapped_values)
|
1954
|
+
def set_urls(mapped_values, decode_uri)
|
1951
1955
|
%w(aboutUrl propertyUrl valueUrl).each do |prop|
|
1952
1956
|
# If the cell value is nil, and it is not a virtual column
|
1953
1957
|
next if prop == "valueUrl" && value.nil? && !column.virtual
|
1954
1958
|
if v = column.send(prop.to_sym)
|
1955
1959
|
t = Addressable::Template.new(v)
|
1956
1960
|
mapped = t.expand(mapped_values).to_s
|
1961
|
+
mapped = RDF::URI.decode(mapped) if decode_uri
|
1957
1962
|
# FIXME: don't expand here, do it in CSV2RDF
|
1958
1963
|
url = row.context.expand_iri(mapped, documentRelative: true)
|
1959
1964
|
self.send("#{prop}=".to_sym, url)
|
@@ -2026,7 +2031,7 @@ module RDF::Tabular
|
|
2026
2031
|
# @param [Hash{Symbol => Object}] options ({})
|
2027
2032
|
# @option options [Boolean] :validate check for PK/FK consistency
|
2028
2033
|
# @return [Row]
|
2029
|
-
def initialize(row, metadata, number, source_number, options
|
2034
|
+
def initialize(row, metadata, number, source_number, **options)
|
2030
2035
|
@table = metadata
|
2031
2036
|
@number = number
|
2032
2037
|
@sourceNumber = source_number
|
@@ -2058,13 +2063,13 @@ module RDF::Tabular
|
|
2058
2063
|
|
2059
2064
|
# create column if necessary
|
2060
2065
|
columns[index - skipColumns] ||=
|
2061
|
-
Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2066
|
+
Column.new({}, **options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2062
2067
|
|
2063
2068
|
column = columns[index - skipColumns]
|
2064
2069
|
|
2065
2070
|
@values << cell = Cell.new(metadata, column, self, value)
|
2066
2071
|
|
2067
|
-
datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
|
2072
|
+
datatype = column.datatype || Datatype.new({base: "string"}, **options.merge(parent: column))
|
2068
2073
|
value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
|
2069
2074
|
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
|
2070
2075
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
@@ -2110,11 +2115,11 @@ module RDF::Tabular
|
|
2110
2115
|
# Map URLs for row
|
2111
2116
|
@values.each_with_index do |cell, index|
|
2112
2117
|
mapped_values = map_values.merge(
|
2113
|
-
"_name" =>
|
2118
|
+
"_name" => CGI.unescape(cell.column.name),
|
2114
2119
|
"_column" => cell.column.number,
|
2115
2120
|
"_sourceColumn" => cell.column.sourceNumber
|
2116
2121
|
)
|
2117
|
-
cell.set_urls(mapped_values)
|
2122
|
+
cell.set_urls(mapped_values, options[:decode_uri])
|
2118
2123
|
end
|
2119
2124
|
end
|
2120
2125
|
|
data/lib/rdf/tabular/reader.rb
CHANGED
@@ -5,7 +5,7 @@ module RDF::Tabular
|
|
5
5
|
##
|
6
6
|
# A Tabular Data to RDF parser in Ruby.
|
7
7
|
#
|
8
|
-
# @author [Gregg Kellogg](
|
8
|
+
# @author [Gregg Kellogg](https://greggkellogg.net/)
|
9
9
|
class Reader < RDF::Reader
|
10
10
|
format Format
|
11
11
|
include RDF::Util::Logger
|
@@ -22,7 +22,7 @@ module RDF::Tabular
|
|
22
22
|
|
23
23
|
##
|
24
24
|
# Writer options
|
25
|
-
# @see
|
25
|
+
# @see https://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Writer#options-class_method
|
26
26
|
def self.options
|
27
27
|
super + [
|
28
28
|
RDF::CLI::Option.new(
|
@@ -43,6 +43,13 @@ module RDF::Tabular
|
|
43
43
|
control: :checkbox,
|
44
44
|
on: ["--no-prov"],
|
45
45
|
description: "do not output optional provenance information.") {true},
|
46
|
+
RDF::CLI::Option.new(
|
47
|
+
symbol: :decode_uri,
|
48
|
+
datatype: TrueClass,
|
49
|
+
control: :checkbox,
|
50
|
+
on: ["--decode-uri"],
|
51
|
+
description: "decode %-encodings in the result of a URI Template operation."
|
52
|
+
)
|
46
53
|
]
|
47
54
|
end
|
48
55
|
|
@@ -54,16 +61,18 @@ module RDF::Tabular
|
|
54
61
|
# or an Array used as an internalized array of arrays
|
55
62
|
# @param [Hash{Symbol => Object}] options
|
56
63
|
# any additional options (see `RDF::Reader#initialize`)
|
64
|
+
# @option options [Boolean] :decode_uri
|
65
|
+
# Decode %-encodings in the result of a URI Template operation.
|
66
|
+
# @option options [Array<Hash>] :fks_referencing_table
|
67
|
+
# When called with Table metadata, a list of the foreign keys referencing this table
|
57
68
|
# @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
|
58
69
|
# @option options [Boolean] :minimal includes only the information gleaned from the cells of the tabular data
|
59
70
|
# @option options [Boolean] :noProv do not output optional provenance information
|
60
|
-
# @option optinons [Array<Hash>] :fks_referencing_table
|
61
|
-
# When called with Table metadata, a list of the foreign keys referencing this table
|
62
71
|
# @yield [reader] `self`
|
63
72
|
# @yieldparam [RDF::Reader] reader
|
64
73
|
# @yieldreturn [void] ignored
|
65
74
|
# @raise [RDF::ReaderError] if the CSV document cannot be loaded
|
66
|
-
def initialize(input = $stdin, options
|
75
|
+
def initialize(input = $stdin, **options, &block)
|
67
76
|
super do
|
68
77
|
# Base would be how we are to take this
|
69
78
|
@options[:base] ||= base_uri.to_s if base_uri
|
@@ -71,7 +80,7 @@ module RDF::Tabular
|
|
71
80
|
@options[:base] ||= input.path if input.respond_to?(:path)
|
72
81
|
@options[:base] ||= input.filename if input.respond_to?(:filename)
|
73
82
|
if RDF::URI(@options[:base]).relative? && File.exist?(@options[:base].to_s)
|
74
|
-
@options[:base] = "file:/#{File.expand_path(@options[:base])}"
|
83
|
+
@options[:base] = RDF::URI("file:/#{File.expand_path(@options[:base])}").normalize
|
75
84
|
end
|
76
85
|
|
77
86
|
log_debug("Reader#initialize") {"input: #{input.inspect}, base: #{@options[:base]}"}
|
@@ -89,7 +98,7 @@ module RDF::Tabular
|
|
89
98
|
# If input is JSON, then the input is the metadata
|
90
99
|
content_type = @input.respond_to?(:content_type) ? @input.content_type : ""
|
91
100
|
if @options[:base] =~ /\.json(?:ld)?$/ || content_type =~ %r(application/(csvm\+|ld\+)?json)
|
92
|
-
@metadata = Metadata.new(@input,
|
101
|
+
@metadata = Metadata.new(@input, filenames: @options[:base], **@options)
|
93
102
|
# If @metadata is for a Table, turn it into a TableGroup
|
94
103
|
@metadata = @metadata.to_table_group if @metadata.is_a?(Table)
|
95
104
|
@metadata.normalize!
|
@@ -102,7 +111,7 @@ module RDF::Tabular
|
|
102
111
|
def script.content_type; "application/csvm+json"; end
|
103
112
|
log_debug("Reader#initialize") {"Process HTML script block"}
|
104
113
|
@input = script
|
105
|
-
@metadata = Metadata.new(@input,
|
114
|
+
@metadata = Metadata.new(@input, filenames: @options[:base], **@options)
|
106
115
|
# If @metadata is for a Table, turn it into a TableGroup
|
107
116
|
@metadata = @metadata.to_table_group if @metadata.is_a?(Table)
|
108
117
|
@metadata.normalize!
|
@@ -119,7 +128,7 @@ module RDF::Tabular
|
|
119
128
|
dialect.separator = "\t" if (input.content_type == "text/tsv" rescue nil)
|
120
129
|
embed_options = @options.dup
|
121
130
|
embed_options[:lang] = dialect_metadata.lang if dialect_metadata.lang
|
122
|
-
embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], embed_options)
|
131
|
+
embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], **embed_options)
|
123
132
|
|
124
133
|
if (@metadata = @options[:metadata]) && @metadata.tableSchema
|
125
134
|
@metadata.verify_compatible!(embedded_metadata)
|
@@ -136,7 +145,7 @@ module RDF::Tabular
|
|
136
145
|
else
|
137
146
|
# It's tabluar data. Find metadata and proceed as if it was specified in the first place
|
138
147
|
@options[:original_input] = @input unless @options[:metadata]
|
139
|
-
@input = @metadata = Metadata.for_input(@input,
|
148
|
+
@input = @metadata = Metadata.for_input(@input, **@options).normalize!
|
140
149
|
end
|
141
150
|
|
142
151
|
log_debug("Reader#initialize") {"input: #{input}, metadata: #{metadata.inspect}"}
|
@@ -186,7 +195,7 @@ module RDF::Tabular
|
|
186
195
|
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
187
196
|
table_resource = RDF::Node.new
|
188
197
|
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
189
|
-
Reader.new(options[:original_input], options.merge(
|
198
|
+
Reader.new(options[:original_input], **options.merge(
|
190
199
|
metadata: input.tables.first,
|
191
200
|
base: input.tables.first.url,
|
192
201
|
no_found_metadata: true,
|
@@ -205,7 +214,7 @@ module RDF::Tabular
|
|
205
214
|
end.flatten.compact
|
206
215
|
table_resource = table.id || RDF::Node.new
|
207
216
|
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
208
|
-
Reader.open(table.url, options.merge(
|
217
|
+
Reader.open(table.url, **options.merge(
|
209
218
|
metadata: table,
|
210
219
|
base: table.url,
|
211
220
|
no_found_metadata: true,
|
@@ -225,7 +234,7 @@ module RDF::Tabular
|
|
225
234
|
activity = RDF::Node.new
|
226
235
|
add_statement(0, table_group, RDF::Vocab::PROV.wasGeneratedBy, activity)
|
227
236
|
add_statement(0, activity, RDF.type, RDF::Vocab::PROV.Activity)
|
228
|
-
add_statement(0, activity, RDF::Vocab::PROV.wasAssociatedWith, RDF::URI("
|
237
|
+
add_statement(0, activity, RDF::Vocab::PROV.wasAssociatedWith, RDF::URI("https://rubygems.org/gems/rdf-tabular"))
|
229
238
|
add_statement(0, activity, RDF::Vocab::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
|
230
239
|
add_statement(0, activity, RDF::Vocab::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
|
231
240
|
|
@@ -421,9 +430,9 @@ module RDF::Tabular
|
|
421
430
|
|
422
431
|
res = if io
|
423
432
|
::JSON::dump_default_options = json_state
|
424
|
-
::JSON.dump(self.send(hash_fn, options), io)
|
433
|
+
::JSON.dump(self.send(hash_fn, **options), io)
|
425
434
|
else
|
426
|
-
hash = self.send(hash_fn, options)
|
435
|
+
hash = self.send(hash_fn, **options)
|
427
436
|
::JSON.generate(hash, json_state)
|
428
437
|
end
|
429
438
|
|
@@ -443,7 +452,7 @@ module RDF::Tabular
|
|
443
452
|
#
|
444
453
|
# @param [Hash{Symbol => Object}] options
|
445
454
|
# @return [Hash, Array]
|
446
|
-
def to_hash(options
|
455
|
+
def to_hash(**options)
|
447
456
|
# Construct metadata from that passed from file open, along with information from the file.
|
448
457
|
if input.is_a?(Metadata)
|
449
458
|
log_debug("each_statement: metadata") {input.inspect}
|
@@ -467,13 +476,13 @@ module RDF::Tabular
|
|
467
476
|
table_group['tables'] = tables
|
468
477
|
|
469
478
|
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
470
|
-
Reader.new(options[:original_input], options.merge(
|
479
|
+
Reader.new(options[:original_input], **options.merge(
|
471
480
|
metadata: input.tables.first,
|
472
481
|
base: input.tables.first.url,
|
473
482
|
minimal: minimal?,
|
474
483
|
no_found_metadata: true,
|
475
484
|
)) do |r|
|
476
|
-
case t = r.to_hash(options)
|
485
|
+
case t = r.to_hash(**options)
|
477
486
|
when Array then tables += t unless input.tables.first.suppressOutput
|
478
487
|
when Hash then tables << t unless input.tables.first.suppressOutput
|
479
488
|
end
|
@@ -481,13 +490,13 @@ module RDF::Tabular
|
|
481
490
|
else
|
482
491
|
input.each_table do |table|
|
483
492
|
next if table.suppressOutput && !validate?
|
484
|
-
Reader.open(table.url, options.merge(
|
493
|
+
Reader.open(table.url, **options.merge(
|
485
494
|
metadata: table,
|
486
495
|
base: table.url,
|
487
496
|
minimal: minimal?,
|
488
497
|
no_found_metadata: true,
|
489
498
|
)) do |r|
|
490
|
-
case t = r.to_hash(options)
|
499
|
+
case t = r.to_hash(**options)
|
491
500
|
when Array then tables += t unless table.suppressOutput
|
492
501
|
when Hash then tables << t unless table.suppressOutput
|
493
502
|
end
|
@@ -560,7 +569,7 @@ module RDF::Tabular
|
|
560
569
|
co['@id'] = subject.to_s unless subject == 'null'
|
561
570
|
prop = case cell.propertyUrl
|
562
571
|
when RDF.type then '@type'
|
563
|
-
when nil then
|
572
|
+
when nil then CGI.unescape(column.name) # Use URI-decoded name
|
564
573
|
else
|
565
574
|
# Compact the property to a term or prefixed name
|
566
575
|
metadata.context.compact_iri(cell.propertyUrl, vocab: true)
|
data/lib/rdf/tabular/uax35.rb
CHANGED
data/lib/rdf/tabular.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
$:.unshift(File.expand_path("..", __FILE__))
|
2
|
-
require 'rdf' # @see
|
2
|
+
require 'rdf' # @see https://rubygems.org/gems/rdf
|
3
3
|
require 'csv'
|
4
4
|
|
5
5
|
module RDF
|
6
6
|
##
|
7
7
|
# **`RDF::Tabular`** is a Tabular/CSV extension for RDF.rb.
|
8
8
|
#
|
9
|
-
# @see
|
9
|
+
# @see https://w3c.github.io/csvw/
|
10
10
|
#
|
11
|
-
# @author [Gregg Kellogg](
|
11
|
+
# @author [Gregg Kellogg](https://greggkellogg.net/)
|
12
12
|
module Tabular
|
13
13
|
require 'rdf/tabular/format'
|
14
14
|
autoload :Column, 'rdf/tabular/metadata'
|