rdf-tabular 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/rdf/tabular/metadata.rb +611 -659
- data/lib/rdf/tabular/reader.rb +59 -54
- data/spec/metadata_spec.rb +191 -376
- data/spec/suite_helper.rb +5 -0
- data/spec/suite_spec.rb +53 -39
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88feb17a025351dbc965fb07a24e7f62a605dfae
|
4
|
+
data.tar.gz: 2f13430c7c419d8a14f1c55b3cca451264f1c36d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f17e7e8dea99269719beca3ee0f7789040938b269f18e1e752a5c01f8027734a410523723ab421085971e14e1bf231dae001b99996a350c6cd323bb369d1994
|
7
|
+
data.tar.gz: 58e67ddfa7330a379a9da6fef50abd386857702c553e024a30e63bf7aca28caa323bec4bd9e6968386a47f6914eb2daf99043d00d819296cf3e70ad7d48a60e6
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.3
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -3,6 +3,7 @@ require 'json/ld'
|
|
3
3
|
require 'bcp47'
|
4
4
|
require 'addressable/template'
|
5
5
|
require 'rdf/xsd'
|
6
|
+
require 'yaml' # used by BCP47, which should have required it.
|
6
7
|
|
7
8
|
##
|
8
9
|
# CSVM Metadata processor
|
@@ -20,21 +21,38 @@ module RDF::Tabular
|
|
20
21
|
include Utils
|
21
22
|
|
22
23
|
# Hash representation
|
24
|
+
# @return [Hash<Symbol,Object>]
|
23
25
|
attr_accessor :object
|
24
26
|
|
27
|
+
# Warnings detected on initialization or when setting properties
|
28
|
+
# @return [Array<String>]
|
29
|
+
attr_accessor :warnings
|
30
|
+
|
25
31
|
# Inheritect properties, valid for all types
|
26
32
|
INHERITED_PROPERTIES = {
|
27
|
-
|
28
|
-
|
29
|
-
textDirection: :atomic,
|
30
|
-
separator: :atomic,
|
33
|
+
aboutUrl: :uri_template,
|
34
|
+
datatype: :atomic,
|
31
35
|
default: :atomic,
|
36
|
+
lang: :atomic,
|
37
|
+
null: :atomic,
|
32
38
|
ordered: :atomic,
|
33
|
-
datatype: :atomic,
|
34
|
-
aboutUrl: :uri_template,
|
35
39
|
propertyUrl: :uri_template,
|
40
|
+
required: :atomic,
|
41
|
+
separator: :atomic,
|
42
|
+
textDirection: :atomic,
|
36
43
|
valueUrl: :uri_template,
|
37
44
|
}.freeze
|
45
|
+
INHERITED_DEFAULTS = {
|
46
|
+
aboutUrl: "".freeze,
|
47
|
+
default: "".freeze,
|
48
|
+
lang: "und",
|
49
|
+
null: "".freeze,
|
50
|
+
ordered: false,
|
51
|
+
propertyUrl: "".freeze,
|
52
|
+
required: false,
|
53
|
+
textDirection: "ltr".freeze,
|
54
|
+
valueUrl: "".freeze,
|
55
|
+
}.freeze
|
38
56
|
|
39
57
|
# Valid datatypes
|
40
58
|
DATATYPES = {
|
@@ -143,8 +161,8 @@ module RDF::Tabular
|
|
143
161
|
def self.for_input(input, options = {})
|
144
162
|
base = options[:base]
|
145
163
|
|
146
|
-
# Use user metadata
|
147
|
-
|
164
|
+
# Use user metadata, if provided
|
165
|
+
metadata = case options[:metadata]
|
148
166
|
when Metadata then options[:metadata]
|
149
167
|
when Hash
|
150
168
|
Metadata.new(options[:metadata], options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
@@ -152,42 +170,37 @@ module RDF::Tabular
|
|
152
170
|
Metadata.open(options[:metadata], options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
153
171
|
end
|
154
172
|
|
155
|
-
|
173
|
+
# Search for metadata until found
|
156
174
|
|
157
|
-
#
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
locs << RDF::URI(base).join(link.href)
|
164
|
-
end
|
175
|
+
# load link metadata, if available
|
176
|
+
locs = []
|
177
|
+
if input.respond_to?(:links) &&
|
178
|
+
link = input.links.find_link(%w(rel describedby))
|
179
|
+
locs << RDF::URI(base).join(link.href)
|
180
|
+
end
|
165
181
|
|
166
|
-
|
167
|
-
|
168
|
-
|
182
|
+
if base
|
183
|
+
locs += [RDF::URI("#{base}-metadata.json"), RDF::URI(base).join("metadata.json")]
|
184
|
+
end
|
169
185
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
end
|
186
|
+
locs.each do |loc|
|
187
|
+
metadata ||= begin
|
188
|
+
Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
189
|
+
rescue
|
190
|
+
debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
191
|
+
nil
|
177
192
|
end
|
178
193
|
end
|
179
194
|
|
180
195
|
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
181
196
|
metadata = case
|
182
|
-
when
|
183
|
-
when
|
184
|
-
|
185
|
-
when base then TableGroup.new({tables: [{url: base}]}, options)
|
186
|
-
else TableGroup.new({tables: []}, options)
|
197
|
+
when metadata then metadata
|
198
|
+
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
|
199
|
+
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: []}, options)
|
187
200
|
end
|
188
201
|
|
189
202
|
# Make TableGroup, if not already
|
190
|
-
metadata.is_a?(TableGroup) ? metadata : metadata.
|
203
|
+
metadata.is_a?(TableGroup) ? metadata : metadata.to_table_group
|
191
204
|
end
|
192
205
|
|
193
206
|
##
|
@@ -204,37 +217,32 @@ module RDF::Tabular
|
|
204
217
|
|
205
218
|
unless options[:parent]
|
206
219
|
# Add context, if not set (which it should be)
|
207
|
-
object['@context'] ||= options.delete(:@context) || options[:context]
|
220
|
+
object['@context'] ||= options.delete(:@context) || options[:context]
|
208
221
|
end
|
209
222
|
|
210
223
|
klass = case
|
211
224
|
when !self.equal?(RDF::Tabular::Metadata)
|
212
225
|
self # subclasses can be directly constructed without type dispatch
|
213
226
|
else
|
214
|
-
type = if options[:type]
|
215
|
-
type = options[:type].to_sym
|
216
|
-
raise Error, "If provided, type must be one of :TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect]" unless
|
217
|
-
[:TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect].include?(type)
|
218
|
-
type
|
219
|
-
end
|
227
|
+
type = options[:type].to_sym if options[:type]
|
220
228
|
|
221
229
|
# Figure out type by @type
|
222
|
-
type ||= object['@type']
|
230
|
+
type ||= object['@type'].to_sym if object['@type']
|
223
231
|
|
224
|
-
# Figure out type by site
|
232
|
+
# Otherwise, Figure out type by site
|
225
233
|
object_keys = object.keys.map(&:to_s)
|
226
234
|
type ||= case
|
227
235
|
when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
|
228
236
|
when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
|
229
237
|
when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
|
230
|
-
when %w(columns primaryKey foreignKeys
|
231
|
-
when %w(name
|
238
|
+
when %w(columns primaryKey foreignKeys).any? {|k| object_keys.include?(k)} then :Schema
|
239
|
+
when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
|
232
240
|
when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
|
233
241
|
when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
|
234
242
|
end
|
235
243
|
|
236
244
|
case type.to_s.to_sym
|
237
|
-
when :TableGroup then RDF::Tabular::TableGroup
|
245
|
+
when :TableGroup, :"" then RDF::Tabular::TableGroup
|
238
246
|
when :Table then RDF::Tabular::Table
|
239
247
|
when :Transformation then RDF::Tabular::Transformation
|
240
248
|
when :Schema then RDF::Tabular::Schema
|
@@ -265,14 +273,26 @@ module RDF::Tabular
|
|
265
273
|
# @return [Metadata]
|
266
274
|
def initialize(input, options = {})
|
267
275
|
@options = options.dup
|
276
|
+
@options[:depth] ||= 0
|
277
|
+
|
278
|
+
# Parent of this Metadata, if any
|
279
|
+
@parent = @options[:parent]
|
268
280
|
|
269
281
|
# Get context from input
|
270
282
|
# Optimize by using built-in version of context, and just extract @base, @lang
|
271
283
|
@context = case input['@context']
|
272
|
-
when Array
|
273
|
-
|
274
|
-
|
275
|
-
|
284
|
+
when Array
|
285
|
+
warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
286
|
+
LOCAL_CONTEXT.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
|
287
|
+
when Hash
|
288
|
+
warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
289
|
+
LOCAL_CONTEXT.parse(input['@context'])
|
290
|
+
when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT
|
291
|
+
else
|
292
|
+
if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
|
293
|
+
warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
294
|
+
LOCAL_CONTEXT
|
295
|
+
end
|
276
296
|
end
|
277
297
|
|
278
298
|
reason = @options.delete(:reason)
|
@@ -284,76 +304,23 @@ module RDF::Tabular
|
|
284
304
|
|
285
305
|
@context.base = @options[:base] if @context
|
286
306
|
|
287
|
-
@
|
307
|
+
if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
|
308
|
+
warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
|
309
|
+
@context.default_language = nil
|
310
|
+
end
|
311
|
+
|
288
312
|
@filenames = Array(@options[:filenames]).map {|fn| RDF::URI(fn)} if @options[:filenames]
|
289
313
|
@properties = self.class.const_get(:PROPERTIES)
|
290
314
|
@required = self.class.const_get(:REQUIRED)
|
291
315
|
|
292
316
|
@object = {}
|
293
317
|
|
294
|
-
# Parent of this Metadata, if any
|
295
|
-
@parent = @options[:parent]
|
296
|
-
|
297
318
|
depth do
|
298
319
|
# Input was parsed in .new
|
299
320
|
# Metadata is object with symbolic keys
|
300
321
|
input.each do |key, value|
|
301
322
|
key = key.to_sym
|
302
323
|
case key
|
303
|
-
when :columns
|
304
|
-
# An array of template specifications that provide mechanisms to transform the tabular data into other formats
|
305
|
-
object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
306
|
-
number = 0
|
307
|
-
value.map do |v|
|
308
|
-
number += 1
|
309
|
-
Column.new(v, @options.merge(table: (parent if parent.is_a?(Table)), parent: self, context: nil, number: number))
|
310
|
-
end
|
311
|
-
else
|
312
|
-
# Invalid, but preserve value
|
313
|
-
value
|
314
|
-
end
|
315
|
-
when :datatype
|
316
|
-
self.datatype = value
|
317
|
-
when :dialect
|
318
|
-
# If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
|
319
|
-
object[key] = case value
|
320
|
-
when String then Dialect.open(base.join(value), @options.merge(parent: self, context: nil))
|
321
|
-
when Hash then Dialect.new(value, @options.merge(parent: self, context: nil))
|
322
|
-
else
|
323
|
-
# Invalid, but preserve value
|
324
|
-
value
|
325
|
-
end
|
326
|
-
@type ||= :Table
|
327
|
-
when :tables
|
328
|
-
# An array of table descriptions for the tables in the group.
|
329
|
-
object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
330
|
-
value.map {|v| Table.new(v, @options.merge(parent: self, context: nil))}
|
331
|
-
else
|
332
|
-
# Invalid, but preserve value
|
333
|
-
value
|
334
|
-
end
|
335
|
-
when :tableSchema
|
336
|
-
# An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
|
337
|
-
# SPEC SUGGESTION: when loading a remote schema, assign @id from it's location if not already set
|
338
|
-
object[key] = case value
|
339
|
-
when String
|
340
|
-
link = base.join(value).to_s
|
341
|
-
s = Schema.open(link, @options.merge(parent: self, context: nil))
|
342
|
-
s[:@id] ||= link
|
343
|
-
s
|
344
|
-
when Hash then Schema.new(value, @options.merge(parent: self, context: nil))
|
345
|
-
else
|
346
|
-
# Invalid, but preserve value
|
347
|
-
value
|
348
|
-
end
|
349
|
-
when :transformations
|
350
|
-
# An array of template specifications that provide mechanisms to transform the tabular data into other formats
|
351
|
-
object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
352
|
-
value.map {|v| Transformation.new(v, @options.merge(parent: self, context: nil))}
|
353
|
-
else
|
354
|
-
# Invalid, but preserve value
|
355
|
-
value
|
356
|
-
end
|
357
324
|
when :url
|
358
325
|
# URL of CSV relative to metadata
|
359
326
|
object[:url] = value
|
@@ -361,10 +328,15 @@ module RDF::Tabular
|
|
361
328
|
@context.base = @url if @context # Use as base for expanding IRIs
|
362
329
|
when :@id
|
363
330
|
# metadata identifier
|
364
|
-
object[:@id] = value
|
365
|
-
|
331
|
+
object[:@id] = if value.is_a?(String)
|
332
|
+
value
|
333
|
+
else
|
334
|
+
warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
|
335
|
+
""
|
336
|
+
end
|
337
|
+
@id = base.join(object[:@id])
|
366
338
|
else
|
367
|
-
if @properties.has_key?(key)
|
339
|
+
if @properties.has_key?(key) || INHERITED_PROPERTIES.has_key?(key)
|
368
340
|
self.send("#{key}=".to_sym, value)
|
369
341
|
else
|
370
342
|
object[key] = value
|
@@ -383,9 +355,32 @@ module RDF::Tabular
|
|
383
355
|
end
|
384
356
|
|
385
357
|
# Setters
|
386
|
-
INHERITED_PROPERTIES.keys.each do |
|
387
|
-
define_method("#{
|
388
|
-
|
358
|
+
INHERITED_PROPERTIES.keys.each do |key|
|
359
|
+
define_method("#{key}=".to_sym) do |value|
|
360
|
+
invalid = case key
|
361
|
+
when :aboutUrl, :default, :propertyUrl, :valueUrl
|
362
|
+
"string" unless value.is_a?(String)
|
363
|
+
when :lang
|
364
|
+
"valid BCP47 language tag" unless BCP47::Language.identify(value.to_s)
|
365
|
+
when :null
|
366
|
+
# To be valid, it must be a string or array
|
367
|
+
"string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
|
368
|
+
when :ordered, :required
|
369
|
+
"boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
370
|
+
when :separator
|
371
|
+
"single character" unless value.nil? || value.is_a?(String) && value.length == 1
|
372
|
+
when :textDirection
|
373
|
+
"rtl or ltr" unless %(rtl ltr).include?(value)
|
374
|
+
when :datatype
|
375
|
+
# We handle this through a separate datatype= setter
|
376
|
+
end
|
377
|
+
|
378
|
+
if invalid
|
379
|
+
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
380
|
+
object[key] = default_value(key) unless default_value(key).nil?
|
381
|
+
else
|
382
|
+
object[key] = value
|
383
|
+
end
|
389
384
|
end
|
390
385
|
end
|
391
386
|
|
@@ -395,6 +390,32 @@ module RDF::Tabular
|
|
395
390
|
@context || (parent.context if parent)
|
396
391
|
end
|
397
392
|
|
393
|
+
def tables=(value)
|
394
|
+
set_array_value(:tables, value, Table)
|
395
|
+
end
|
396
|
+
|
397
|
+
# An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
|
398
|
+
# when loading a remote schema, assign @id from it's location if not already set
|
399
|
+
def tableSchema=(value)
|
400
|
+
case value
|
401
|
+
when String
|
402
|
+
link = base.join(value).to_s
|
403
|
+
s = Schema.open(link, @options.merge(parent: self, context: nil))
|
404
|
+
s[:@id] ||= link
|
405
|
+
object[:tableSchema] = s
|
406
|
+
when Hash
|
407
|
+
object[:tableSchema] = Metadata.new(value, @options.merge(parent: self, context: nil))
|
408
|
+
when Schema
|
409
|
+
object[:tableSchema] = value
|
410
|
+
else
|
411
|
+
warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
def transformations=(value)
|
416
|
+
set_array_value(:transformations, value, Metadata)
|
417
|
+
end
|
418
|
+
|
398
419
|
# Treat `dialect` similar to an inherited property, but merge together values from Table and TableGroup
|
399
420
|
# @return [Dialect]
|
400
421
|
def dialect
|
@@ -421,23 +442,32 @@ module RDF::Tabular
|
|
421
442
|
end
|
422
443
|
end
|
423
444
|
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
445
|
+
# If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
|
446
|
+
@dialect = case value
|
447
|
+
when String
|
448
|
+
object[:dialect] = Metadata.open(base.join(value), @options.merge(parent: self, context: nil))
|
449
|
+
when Hash
|
450
|
+
object[:dialect] = Metadata.new(value, @options.merge(parent: self, context: nil))
|
451
|
+
when Dialect
|
428
452
|
object[:dialect] = value
|
429
453
|
else
|
430
|
-
|
431
|
-
|
454
|
+
warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
|
455
|
+
nil
|
432
456
|
end
|
433
457
|
end
|
434
458
|
|
435
459
|
# Set new datatype
|
436
460
|
# @return [Dialect]
|
437
461
|
def datatype=(value)
|
438
|
-
|
439
|
-
when Hash then Datatype.new(value)
|
440
|
-
else Datatype.new({base: value})
|
462
|
+
val = case value
|
463
|
+
when Hash then Datatype.new(value, parent: self)
|
464
|
+
else Datatype.new({base: value}, parent: self)
|
465
|
+
end
|
466
|
+
|
467
|
+
if val.valid?
|
468
|
+
object[:datatype] = val
|
469
|
+
else
|
470
|
+
warn "#{type} has invalid property 'datatype': expected a Datatype"
|
441
471
|
end
|
442
472
|
end
|
443
473
|
|
@@ -476,7 +506,7 @@ module RDF::Tabular
|
|
476
506
|
flatten.
|
477
507
|
select {|v| v.is_a?(Metadata)}.
|
478
508
|
map(&:warnings).
|
479
|
-
flatten).compact
|
509
|
+
flatten).compact.uniq
|
480
510
|
end
|
481
511
|
|
482
512
|
##
|
@@ -485,7 +515,7 @@ module RDF::Tabular
|
|
485
515
|
# @return [self]
|
486
516
|
def validate!
|
487
517
|
expected_props, required_props = @properties.keys, @required
|
488
|
-
errors
|
518
|
+
errors = []
|
489
519
|
|
490
520
|
unless is_a?(Dialect) || is_a?(Transformation)
|
491
521
|
expected_props = expected_props + INHERITED_PROPERTIES.keys
|
@@ -494,163 +524,88 @@ module RDF::Tabular
|
|
494
524
|
# It has only expected properties (exclude metadata)
|
495
525
|
check_keys = object.keys - [:"@id", :"@context"]
|
496
526
|
check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
|
497
|
-
|
527
|
+
warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
|
498
528
|
|
499
529
|
# It has required properties
|
500
|
-
errors << "#{type} missing required keys: #{(required_props
|
530
|
+
errors << "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
|
531
|
+
|
532
|
+
self.normalize!
|
501
533
|
|
502
534
|
# Every property is valid
|
503
535
|
object.keys.each do |key|
|
504
536
|
value = object[key]
|
505
537
|
case key
|
506
|
-
when :
|
507
|
-
|
508
|
-
@warnings << m
|
509
|
-
end
|
538
|
+
when :base
|
539
|
+
warn "#{type} has invalid base '#{key}': #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) || RDF::URI(value).absolute?
|
510
540
|
when :columns
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
errors << e.message
|
517
|
-
end
|
541
|
+
value.each do |v|
|
542
|
+
begin
|
543
|
+
v.validate!
|
544
|
+
rescue Error => e
|
545
|
+
errors << e.message
|
518
546
|
end
|
519
|
-
column_names = value.map(&:name)
|
520
|
-
errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
|
521
|
-
else
|
522
|
-
errors << "#{type} has invalid property '#{key}': expected array of Columns"
|
523
|
-
end
|
524
|
-
when :commentPrefix, :delimiter, :quoteChar
|
525
|
-
unless value.is_a?(String) && value.length == 1
|
526
|
-
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a single character string"
|
527
|
-
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
528
|
-
end
|
529
|
-
when :lineTerminators
|
530
|
-
unless value.is_a?(String)
|
531
|
-
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
|
532
|
-
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
533
547
|
end
|
534
|
-
|
535
|
-
|
548
|
+
column_names = value.map(&:name)
|
549
|
+
errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
|
550
|
+
when :dialect, :tables, :tableSchema, :transformations
|
551
|
+
Array(value).each do |t|
|
536
552
|
begin
|
537
|
-
|
553
|
+
t.validate!
|
538
554
|
rescue Error => e
|
539
555
|
errors << e.message
|
540
556
|
end
|
541
|
-
else
|
542
|
-
@warnings << "#{type} has invalid property '#{key}': expected a Datatype"
|
543
|
-
value = object[key] = nil
|
544
|
-
end
|
545
|
-
when :dialect
|
546
|
-
unless value.is_a?(Dialect)
|
547
|
-
errors << "#{type} has invalid property '#{key}': expected a Dialect Description"
|
548
|
-
end
|
549
|
-
begin
|
550
|
-
value.validate! if value
|
551
|
-
rescue Error => e
|
552
|
-
errors << e.message
|
553
|
-
end
|
554
|
-
when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
|
555
|
-
unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
556
|
-
@warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
|
557
|
-
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
558
|
-
end
|
559
|
-
when :required, :suppressOutput, :virtual
|
560
|
-
unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
561
|
-
@warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
|
562
|
-
object.delete(key)
|
563
|
-
end
|
564
|
-
when :encoding
|
565
|
-
unless (Encoding.find(value) rescue false)
|
566
|
-
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid encoding"
|
567
557
|
end
|
568
558
|
when :foreignKeys
|
569
559
|
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
570
|
-
value.
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
560
|
+
value.each do |fk|
|
561
|
+
columnReference, reference = fk['columnReference'], fk['reference']
|
562
|
+
errors << "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
|
563
|
+
errors << "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
|
564
|
+
|
565
|
+
# Verify that columns exist in this schema
|
566
|
+
errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
|
567
|
+
Array(columnReference).each do |k|
|
568
|
+
errors << "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
|
569
|
+
end
|
580
570
|
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
end
|
587
|
-
# resource is the URL of a Table in the TableGroup
|
588
|
-
ref = base.join(reference['resource']).to_s
|
589
|
-
table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
|
590
|
-
errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
591
|
-
table.tableSchema if table
|
592
|
-
elsif reference.has_key?('schemaReference')
|
593
|
-
# resource is the @id of a Schema in the TableGroup
|
594
|
-
ref = base.join(reference['schemaReference']).to_s
|
595
|
-
tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
|
596
|
-
case tables.length
|
597
|
-
when 0
|
598
|
-
errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
|
599
|
-
nil
|
600
|
-
when 1
|
601
|
-
tables.first.tableSchema
|
602
|
-
else
|
603
|
-
errors << "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
|
604
|
-
nil
|
605
|
-
end
|
571
|
+
if reference.is_a?(Hash)
|
572
|
+
ref_cols = reference['columnReference']
|
573
|
+
schema = if reference.has_key?('resource')
|
574
|
+
if reference.has_key?('schemaReference')
|
575
|
+
errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
|
606
576
|
end
|
577
|
+
# resource is the URL of a Table in the TableGroup
|
578
|
+
ref = base.join(reference['resource']).to_s
|
579
|
+
table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
|
580
|
+
errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
581
|
+
table.tableSchema if table
|
582
|
+
elsif reference.has_key?('schemaReference')
|
583
|
+
# resource is the @id of a Schema in the TableGroup
|
584
|
+
ref = base.join(reference['schemaReference']).to_s
|
585
|
+
tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
|
586
|
+
case tables.length
|
587
|
+
when 0
|
588
|
+
errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
|
589
|
+
nil
|
590
|
+
when 1
|
591
|
+
tables.first.tableSchema
|
592
|
+
else
|
593
|
+
errors << "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
|
594
|
+
nil
|
595
|
+
end
|
596
|
+
end
|
607
597
|
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
598
|
+
if schema
|
599
|
+
# ref_cols must exist in schema
|
600
|
+
errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
|
601
|
+
Array(ref_cols).each do |k|
|
602
|
+
errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
|
613
603
|
end
|
614
|
-
else
|
615
|
-
errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
|
616
604
|
end
|
617
605
|
else
|
618
|
-
errors << "#{type} has invalid property '#{key}': reference must be an object
|
606
|
+
errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
|
619
607
|
end
|
620
608
|
end
|
621
|
-
when :headerRowCount, :skipColumns, :skipRows
|
622
|
-
unless value.is_a?(Numeric) && value.integer? && value > 0
|
623
|
-
@warnings << "#{type} has invalid property '#{key}': #{value.inspect} must be a positive integer"
|
624
|
-
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
625
|
-
end
|
626
|
-
when :base
|
627
|
-
@warnings << "#{type} has invalid base '#{key}': #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) || RDF::URI(value).absolute?
|
628
|
-
when :format
|
629
|
-
unless value.is_a?(String)
|
630
|
-
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
|
631
|
-
object.delete(key)
|
632
|
-
end
|
633
|
-
when :length, :minLength, :maxLength
|
634
|
-
unless value.is_a?(Numeric) && value.integer? && value > 0
|
635
|
-
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a positive integer"
|
636
|
-
object.delete(key)
|
637
|
-
end
|
638
|
-
unless key == :length || value != object[:length]
|
639
|
-
# Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
|
640
|
-
errors << "#{type} has invalid property '#{key}': Use of both length and #{key} requires they be equal"
|
641
|
-
end
|
642
|
-
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
643
|
-
unless value.is_a?(Numeric) ||
|
644
|
-
RDF::Literal::Date.new(value.to_s).valid? ||
|
645
|
-
RDF::Literal::Time.new(value.to_s).valid? ||
|
646
|
-
RDF::Literal::DateTime.new(value.to_s).valid?
|
647
|
-
@warnings << "#{type} has invalid property '#{key}': #{value}, expected numeric or valid date/time"
|
648
|
-
object.delete(key)
|
649
|
-
end
|
650
|
-
when :name
|
651
|
-
unless value.is_a?(String) && name.match(NAME_SYNTAX)
|
652
|
-
errors << "#{type} has invalid property '#{key}': #{value}, expected proper name format"
|
653
|
-
end
|
654
609
|
when :notes
|
655
610
|
unless value.is_a?(Hash) || value.is_a?(Array)
|
656
611
|
errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
|
@@ -662,68 +617,17 @@ module RDF::Tabular
|
|
662
617
|
end
|
663
618
|
when :primaryKey
|
664
619
|
# A column reference property that holds either a single reference to a column description object or an array of references.
|
620
|
+
"#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
|
665
621
|
Array(value).each do |k|
|
666
|
-
errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c
|
667
|
-
end
|
668
|
-
when :tables
|
669
|
-
if value.is_a?(Array) && value.all? {|v| v.is_a?(Table)}
|
670
|
-
value.each do |t|
|
671
|
-
begin
|
672
|
-
t.validate!
|
673
|
-
rescue Error => e
|
674
|
-
errors << e.message
|
675
|
-
end
|
676
|
-
end
|
677
|
-
else
|
678
|
-
errors << "#{type} has invalid property '#{key}': expected array of Tables"
|
679
|
-
end
|
680
|
-
when :scriptFormat, :targetFormat
|
681
|
-
unless RDF::URI(value).valid?
|
682
|
-
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
|
683
|
-
end
|
684
|
-
when :source
|
685
|
-
unless %w(json rdf).include?(value) || value.nil?
|
686
|
-
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected json or rdf"
|
687
|
-
end
|
688
|
-
when :tableDirection
|
689
|
-
unless %w(rtl ltr default).include?(value)
|
690
|
-
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected rtl, ltr, or default"
|
622
|
+
errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
|
691
623
|
end
|
692
|
-
when
|
693
|
-
if value.is_a?(Schema)
|
694
|
-
begin
|
695
|
-
value.validate!
|
696
|
-
rescue Error => e
|
697
|
-
errors << e.message
|
698
|
-
end
|
699
|
-
else
|
700
|
-
errors << "#{type} has invalid property '#{key}': expected Schema"
|
701
|
-
end
|
702
|
-
when :transformations
|
703
|
-
if value.is_a?(Array) && value.all? {|v| v.is_a?(Transformation)}
|
704
|
-
value.each do |t|
|
705
|
-
begin
|
706
|
-
t.validate!
|
707
|
-
rescue Error => e
|
708
|
-
errors << e.message
|
709
|
-
end
|
710
|
-
end
|
711
|
-
else
|
712
|
-
errors << "#{type} has invalid property '#{key}': expected array of Transformations"
|
713
|
-
end
|
714
|
-
when :titles
|
715
|
-
valid_natural_language_property?(:titles, value) {|m| errors << m}
|
716
|
-
when :trim
|
717
|
-
unless %w(true false 1 0 start end).include?(value.to_s.downcase)
|
718
|
-
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected true, false, 1, 0, start or end"
|
719
|
-
end
|
720
|
-
when :url
|
721
|
-
# Only validate URL in validation mode; this allows for a nil URL
|
722
|
-
unless @url.valid?
|
723
|
-
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
|
724
|
-
end
|
725
|
-
when :@id, :@context
|
624
|
+
when :@context
|
726
625
|
# Skip these
|
626
|
+
when :@id
|
627
|
+
# Must not be a BNode
|
628
|
+
if value.to_s.start_with?("_:")
|
629
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:"
|
630
|
+
end
|
727
631
|
when :@type
|
728
632
|
unless value.to_sym == type
|
729
633
|
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected #{type}"
|
@@ -734,8 +638,6 @@ module RDF::Tabular
|
|
734
638
|
rescue Error => e
|
735
639
|
errors << "#{type} has invalid content '#{key}': #{e.message}"
|
736
640
|
end
|
737
|
-
else
|
738
|
-
warnings << "#{type} has invalid property '#{key}': unsupported property"
|
739
641
|
end
|
740
642
|
end
|
741
643
|
|
@@ -747,41 +649,18 @@ module RDF::Tabular
|
|
747
649
|
# Determine if a natural language property is valid
|
748
650
|
# @param [String, Array<String>, Hash{String => String}] value
|
749
651
|
# @yield message error message
|
750
|
-
# @return [
|
751
|
-
def valid_natural_language_property?(
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
# @param [String, Array<String>, Hash{String => String}] value
|
761
|
-
# @yield message error message
|
762
|
-
# @return [Boolean]
|
763
|
-
def valid_inherited_property?(key, value)
|
764
|
-
error = case key
|
765
|
-
when :aboutUrl, :default, :propertyUrl, :valueUrl
|
766
|
-
"string" unless value.is_a?(String)
|
767
|
-
when :lang
|
768
|
-
"valid BCP47 language tag" unless BCP47::Language.identify(value.to_s)
|
769
|
-
when :null
|
770
|
-
# To be valid, it must be a string or array
|
771
|
-
"string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
|
772
|
-
when :ordered
|
773
|
-
"boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
774
|
-
when :separator
|
775
|
-
"single character" unless value.nil? || value.is_a?(String) && value.length == 1
|
776
|
-
when :textDirection
|
777
|
-
"rtl or ltr" unless %(rtl ltr).include?(value)
|
778
|
-
end
|
779
|
-
|
780
|
-
if error
|
781
|
-
yield "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{error}"
|
782
|
-
false
|
652
|
+
# @return [String, nil]
|
653
|
+
def valid_natural_language_property?(value)
|
654
|
+
case value
|
655
|
+
when String
|
656
|
+
when Array
|
657
|
+
"a valid natural language property" unless value.all? {|v| v.is_a?(String)}
|
658
|
+
when Hash
|
659
|
+
"a valid natural language property" if
|
660
|
+
value.keys.any? {|k| k.to_s != "und" && !BCP47::Language.identify(k)} ||
|
661
|
+
value.values.any? {|v| valid_natural_language_property?(v).is_a?(String)}
|
783
662
|
else
|
784
|
-
|
663
|
+
"a valid natural language property"
|
785
664
|
end
|
786
665
|
end
|
787
666
|
|
@@ -801,7 +680,6 @@ module RDF::Tabular
|
|
801
680
|
v = data.join(' ')[1..-1].strip
|
802
681
|
unless v.empty?
|
803
682
|
(self["rdfs:comment"] ||= []) << v
|
804
|
-
yield RDF::Statement.new(nil, RDF::RDFS.comment, RDF::Literal(v))
|
805
683
|
end
|
806
684
|
skipped += 1
|
807
685
|
next
|
@@ -895,204 +773,50 @@ module RDF::Tabular
|
|
895
773
|
object.keys.any? {|k| k.to_s.include?(':')}
|
896
774
|
end
|
897
775
|
|
898
|
-
#
|
899
|
-
# @param [
|
900
|
-
# @
|
901
|
-
def
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
when TableGroup then self.dup
|
906
|
-
when Table
|
907
|
-
if self.is_a?(Table) && self.parent
|
908
|
-
self.parent
|
909
|
-
else
|
910
|
-
content = {"@type" => "TableGroup", "tables" => [self]}
|
911
|
-
content['@context'] = object.delete(:@context) if object[:@context]
|
912
|
-
ctx = @context
|
913
|
-
self.remove_instance_variable(:@context) if self.instance_variables.include?(:@context)
|
914
|
-
tg = TableGroup.new(content, filenames: @filenames, base: base)
|
915
|
-
@parent = tg # Link from parent
|
916
|
-
tg
|
917
|
-
end
|
918
|
-
else self.dup
|
919
|
-
end
|
920
|
-
|
921
|
-
# Merge all passed metadata into this
|
922
|
-
merged = metadata.reduce(this) do |memo, md|
|
923
|
-
md = case md
|
924
|
-
when TableGroup then md
|
925
|
-
when Table
|
926
|
-
if md.parent
|
927
|
-
md.parent
|
928
|
-
else
|
929
|
-
content = {"@type" => "TableGroup", "tables" => [md]}
|
930
|
-
ctx = md.context
|
931
|
-
content['@context'] = md.object.delete(:@context) if md.object[:@context]
|
932
|
-
md.remove_instance_variable(:@context) if md.instance_variables.include?(:@context)
|
933
|
-
tg = TableGroup.new(content, filenames: md.filenames, base: md.base)
|
934
|
-
md.instance_variable_set(:@parent, tg) # Link from parent
|
935
|
-
tg
|
936
|
-
end
|
937
|
-
else
|
938
|
-
md
|
939
|
-
end
|
940
|
-
|
941
|
-
raise "Can't merge #{memo.class} with #{md.class}" unless memo.class == md.class
|
942
|
-
|
943
|
-
memo.merge!(md)
|
944
|
-
end
|
945
|
-
|
946
|
-
# Set @context of merged
|
947
|
-
merged[:@context] = 'http://www.w3.org/ns/csvw'
|
948
|
-
merged
|
949
|
-
end
|
950
|
-
|
951
|
-
# Merge metadata into self
|
952
|
-
def merge!(metadata)
|
953
|
-
raise "Merging non-equivalent metadata types: #{self.class} vs #{metadata.class}" unless self.class == metadata.class
|
954
|
-
|
955
|
-
depth do
|
956
|
-
# Merge filenames
|
957
|
-
if @filenames || metadata.filenames
|
958
|
-
@filenames = (Array(@filenames) | Array(metadata.filenames)).uniq
|
776
|
+
# Verify that the metadata we're using is compatible with embedded metadata
|
777
|
+
# @param [Table] other
|
778
|
+
# @raise [Error] if not compatible
|
779
|
+
def verify_compatible!(other)
|
780
|
+
if self.is_a?(TableGroup)
|
781
|
+
unless tables.any? {|t| t.url == other.url && t.verify_compatible!(other)}
|
782
|
+
raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
959
783
|
end
|
960
|
-
|
961
|
-
#
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
tb = tb.dup
|
994
|
-
tb.instance_variable_set(:@parent, self)
|
995
|
-
debug("merge!: tables") {"add TB: #{tb.inspect}"}
|
996
|
-
object[key] << tb
|
997
|
-
end
|
998
|
-
end
|
999
|
-
when :transformations
|
1000
|
-
# SPEC CONFUSION: differing transformations with same @id?
|
1001
|
-
# When an array of template specifications B is imported into an original array of template specifications A, each template specification within B is combined into the original array A by:
|
1002
|
-
value.each do |t|
|
1003
|
-
if ta = object[key].detect {|e| e.targetFormat == t.targetFormat && e.scriptFormat == t.scriptFormat}
|
1004
|
-
# if there is a template specification with the same targetFormat and scriptFormat in A, the template specification from B is imported into the matching template specification in A
|
1005
|
-
ta.merge!(t)
|
1006
|
-
else
|
1007
|
-
# otherwise, the template specification from B is appended to the array of template specifications A
|
1008
|
-
t = t.dup
|
1009
|
-
t.instance_variable_set(:@parent, self) if self
|
1010
|
-
object[key] << t
|
1011
|
-
end
|
1012
|
-
end
|
1013
|
-
when :columns
|
1014
|
-
# When an array of column descriptions B is imported into an original array of column descriptions A, each column description within B is combined into the original array A by:
|
1015
|
-
Array(value).each_with_index do |cb, index|
|
1016
|
-
ca = object[key][index] || {}
|
1017
|
-
va = ([ca[:name]] + (ca[:titles] || {}).values.flatten).compact.map(&:downcase)
|
1018
|
-
vb = ([cb[:name]] + (cb[:titles] || {}).values.flatten).compact.map(&:downcase)
|
1019
|
-
if !(va & vb).empty?
|
1020
|
-
debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
|
1021
|
-
# If there's a non-empty case-insensitive intersection between the name and titles values for the column description at the same index within A and B, the column description from B is imported into the matching column description in A
|
1022
|
-
ca.merge!(cb)
|
1023
|
-
elsif ca.nil? && cb.virtual
|
1024
|
-
debug("merge!: columns") {"index: #{index}, virtual"}
|
1025
|
-
# otherwise, if at a given index there is no column description within A, but there is a column description within B.
|
1026
|
-
cb = cb.dup
|
1027
|
-
cb.instance_variable_set(:@parent, self) if self
|
1028
|
-
object[key][index] = cb
|
1029
|
-
else
|
1030
|
-
debug("merge!: columns") {"index: #{index}, ignore"}
|
1031
|
-
raise Error, "Columns at same index don't match: #{ca.to_json} vs. #{cb.to_json}"
|
1032
|
-
end
|
1033
|
-
end
|
1034
|
-
# The number of non-virtual columns in A and B MUST be the same
|
1035
|
-
nA = object[key].reject(&:virtual).length
|
1036
|
-
nB = Array(value).reject(&:virtual).length
|
1037
|
-
raise Error, "Columns must have the same number of non-virtual columns" unless nA == nB || nB == 0
|
1038
|
-
when :foreignKeys
|
1039
|
-
# When an array of foreign key definitions B is imported into an original array of foreign key definitions A, each foreign key definition within B which does not appear within A is appended to the original array A.
|
1040
|
-
# SPEC CONFUSION: If definitions vary only a little, they should probably be merged (e.g. common properties).
|
1041
|
-
object[key] = object[key] + (metadata[key] - object[key])
|
1042
|
-
end
|
1043
|
-
when :object
|
1044
|
-
case key
|
1045
|
-
when :notes
|
1046
|
-
# If the property accepts arrays, the result is an array of objects or strings: those from A followed by those from B that were not already a value in A.
|
1047
|
-
a = object[key] || []
|
1048
|
-
object[key] = (a + value).uniq
|
1049
|
-
else
|
1050
|
-
# if the property only accepts single objects
|
1051
|
-
if object[key].is_a?(String) || value.is_a?(String)
|
1052
|
-
# if the value of the property in A is a string or the value from B is a string then the value from A overrides that from B
|
1053
|
-
object[key] ||= value
|
1054
|
-
elsif object[key].is_a?(Metadata)
|
1055
|
-
# otherwise (if both values as objects) the objects are merged as described here
|
1056
|
-
object[key].merge!(value)
|
1057
|
-
elsif object[key].is_a?(Hash)
|
1058
|
-
# otherwise (if both values as objects) the objects are merged as described here
|
1059
|
-
object[key].merge!(value)
|
1060
|
-
else
|
1061
|
-
value = value.dup
|
1062
|
-
value.instance_variable_set(:@parent, self) if self
|
1063
|
-
object[key] = value
|
1064
|
-
end
|
1065
|
-
end
|
1066
|
-
when :natural_language
|
1067
|
-
# If the property is a natural language property, the result is an object whose properties are language codes and where the values of those properties are arrays. The suitable language code for the values is either explicit within the existing value or determined through the default language in the metadata document; if it can't be determined the language code und should be used. The arrays should provide the values from A followed by those from B that were not already a value in A.
|
1068
|
-
a = object[key] || {}
|
1069
|
-
b = value
|
1070
|
-
debug("merge!: natural_language") {
|
1071
|
-
"A: #{a.inspect}, B: #{b.inspect}"
|
1072
|
-
}
|
1073
|
-
b.each do |k, v|
|
1074
|
-
a[k] = Array(a[k]) + (Array(b[k]) - Array(a[k]))
|
1075
|
-
end
|
1076
|
-
# eliminate titles with no language where the same string exists with a language
|
1077
|
-
if a.has_key?("und")
|
1078
|
-
a["und"] = a["und"].reject do |v|
|
1079
|
-
a.any? {|lang, values| lang != 'und' && values.include?(v)}
|
1080
|
-
end
|
1081
|
-
a.delete("und") if a["und"].empty?
|
1082
|
-
end
|
1083
|
-
object[key] = a
|
1084
|
-
when ->(k) {key == :@id}
|
1085
|
-
object[key] ||= value
|
1086
|
-
@id ||= metadata.id
|
1087
|
-
else
|
1088
|
-
# Otherwise, the value from A overrides that from B
|
1089
|
-
object[key] ||= value
|
1090
|
-
end
|
784
|
+
else
|
785
|
+
# Tables must have the same url
|
786
|
+
raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}" unless
|
787
|
+
url == other.url
|
788
|
+
|
789
|
+
# Each column description within B MUST match the corresponding column description in A for non-virtual columns
|
790
|
+
non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
|
791
|
+
object_columns = Array(other.tableSchema.columns)
|
792
|
+
|
793
|
+
# Special case, if there is no header, then there are no column definitions, allow this as being compatile
|
794
|
+
raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}" if
|
795
|
+
non_virtual_columns.length != object_columns.length && !object_columns.empty?
|
796
|
+
index = 0
|
797
|
+
object_columns.all? do |cb|
|
798
|
+
ca = non_virtual_columns[index]
|
799
|
+
va = ([ca[:name]] + case ca[:titles]
|
800
|
+
when String then [ca[:titles]]
|
801
|
+
when Array then ca[:titles]
|
802
|
+
when Hash then ca[:titles].values.flatten
|
803
|
+
else []
|
804
|
+
end).compact.map(&:downcase)
|
805
|
+
|
806
|
+
vb = ([cb[:name]] + case cb[:titles]
|
807
|
+
when String then [cb[:titles]]
|
808
|
+
when Array then cb[:titles]
|
809
|
+
when Hash then cb[:titles].values.flatten
|
810
|
+
else []
|
811
|
+
end).compact.map(&:downcase)
|
812
|
+
|
813
|
+
# If there's a non-empty case-insensitive intersection between the name and titles values for the column description at the same index within A and B, the column description in B is compatible with the matching column description in A
|
814
|
+
raise Error, "Columns don't match: va: #{va}, vb: #{vb}" if (va & vb).empty?
|
815
|
+
debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
|
816
|
+
index += 1
|
1091
817
|
end
|
1092
818
|
end
|
1093
|
-
|
1094
|
-
debug("merge!") {self.inspect}
|
1095
|
-
self
|
819
|
+
true
|
1096
820
|
end
|
1097
821
|
|
1098
822
|
def inspect
|
@@ -1176,7 +900,8 @@ module RDF::Tabular
|
|
1176
900
|
elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
|
1177
901
|
raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
|
1178
902
|
elsif value['@language'] && !BCP47::Language.identify(value['@language'])
|
1179
|
-
|
903
|
+
warn "Value object with @language must use valid language: #{value.to_json}" if @warnings
|
904
|
+
value.delete('@language')
|
1180
905
|
elsif value['@type'] && !context.expand_iri(value['@type'], vocab: true).absolute?
|
1181
906
|
raise Error, "Value object with @type must defined type: #{value.to_json}"
|
1182
907
|
end
|
@@ -1209,15 +934,48 @@ module RDF::Tabular
|
|
1209
934
|
end
|
1210
935
|
protected
|
1211
936
|
|
937
|
+
# Add a warning on this object
|
938
|
+
def warn(string)
|
939
|
+
debug("warn: #{string}")
|
940
|
+
(@warnings ||= []) << string
|
941
|
+
end
|
942
|
+
|
1212
943
|
# When setting a natural language property, always put in language-map form
|
1213
|
-
# @param [Symbol] prop
|
1214
944
|
# @param [Hash{String => String, Array<String>}, Array<String>, String] value
|
1215
945
|
# @return [Hash{String => Array<String>}]
|
1216
|
-
def set_nl(
|
1217
|
-
|
1218
|
-
when String then
|
1219
|
-
when Array then
|
1220
|
-
|
946
|
+
def set_nl(value)
|
947
|
+
case value
|
948
|
+
when String then value
|
949
|
+
when Array then value.select {|v| v.is_a?(String)}
|
950
|
+
when Hash
|
951
|
+
value.delete_if {|k, v| !BCP47::Language.identify(k)}
|
952
|
+
value.each do |k, v|
|
953
|
+
value[k] = Array(v).select {|vv| vv.is_a?(String)}
|
954
|
+
end
|
955
|
+
else nil
|
956
|
+
end
|
957
|
+
end
|
958
|
+
|
959
|
+
# General setter for array properties
|
960
|
+
def set_array_value(key, value, klass, options={})
|
961
|
+
object[key] = case value
|
962
|
+
when Array
|
963
|
+
value.map do |v|
|
964
|
+
case v
|
965
|
+
when Hash
|
966
|
+
klass.new(v, @options.merge(options).merge(parent: self, context: nil))
|
967
|
+
else v
|
968
|
+
end
|
969
|
+
end
|
970
|
+
else
|
971
|
+
warn "#{type} has invalid property '#{key}': expected array of #{klass}"
|
972
|
+
[]
|
973
|
+
end
|
974
|
+
|
975
|
+
unless object[key].all? {|v| v.is_a?(klass)}
|
976
|
+
warn "#{type} has invalid property '#{key}': expected array of #{klass}"
|
977
|
+
# Remove elements that aren't of the right types
|
978
|
+
object[key] = object[key].select! {|v| v.is_a?(klass)}
|
1221
979
|
end
|
1222
980
|
end
|
1223
981
|
|
@@ -1228,6 +986,10 @@ module RDF::Tabular
|
|
1228
986
|
end
|
1229
987
|
end
|
1230
988
|
|
989
|
+
def default_value(prop)
|
990
|
+
self.class.const_get(:DEFAULTS).merge(INHERITED_DEFAULTS)[prop]
|
991
|
+
end
|
992
|
+
|
1231
993
|
##
|
1232
994
|
# Get the root metadata object
|
1233
995
|
# @return [TableGroup, Table]
|
@@ -1268,17 +1030,27 @@ module RDF::Tabular
|
|
1268
1030
|
dialect: :object,
|
1269
1031
|
transformations: :array,
|
1270
1032
|
}.freeze
|
1271
|
-
|
1033
|
+
DEFAULTS = {
|
1034
|
+
tableDirection: "default".freeze,
|
1035
|
+
}.freeze
|
1036
|
+
REQUIRED = [:tables].freeze
|
1272
1037
|
|
1273
1038
|
# Setters
|
1274
|
-
PROPERTIES.each do |
|
1275
|
-
next if
|
1276
|
-
define_method("#{
|
1277
|
-
case
|
1278
|
-
when :
|
1279
|
-
|
1039
|
+
PROPERTIES.each do |key, type|
|
1040
|
+
next if [:tables, :tableSchema, :dialect, :transformations].include?(key)
|
1041
|
+
define_method("#{key}=".to_sym) do |value|
|
1042
|
+
invalid = case key
|
1043
|
+
when :tableDirection
|
1044
|
+
"rtl, ltr, or default" unless %(rtl ltr default).include?(value)
|
1045
|
+
when :notes, :tables, :tableSchema, :dialect, :transformations
|
1046
|
+
# We handle this through a separate setters
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
if invalid
|
1050
|
+
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1051
|
+
object[key] = default_value(key) unless default_value(key).nil?
|
1280
1052
|
else
|
1281
|
-
object[
|
1053
|
+
object[key] = value
|
1282
1054
|
end
|
1283
1055
|
end
|
1284
1056
|
end
|
@@ -1325,11 +1097,14 @@ module RDF::Tabular
|
|
1325
1097
|
|
1326
1098
|
# Return Annotated Table Group representation
|
1327
1099
|
def to_atd
|
1328
|
-
{
|
1329
|
-
"@id" => id,
|
1100
|
+
object.inject({
|
1101
|
+
"@id" => (id.to_s if id),
|
1330
1102
|
"@type" => "AnnotatedTableGroup",
|
1331
|
-
"tables" =>
|
1332
|
-
}
|
1103
|
+
"tables" => []
|
1104
|
+
}) do |memo, (k, v)|
|
1105
|
+
memo[k.to_s] ||= v
|
1106
|
+
memo
|
1107
|
+
end.delete_if {|k,v| v.nil? || v.is_a?(Metadata) || k.to_s == "@context"}
|
1333
1108
|
end
|
1334
1109
|
end
|
1335
1110
|
|
@@ -1345,17 +1120,37 @@ module RDF::Tabular
|
|
1345
1120
|
transformations: :array,
|
1346
1121
|
url: :link,
|
1347
1122
|
}.freeze
|
1123
|
+
DEFAULTS = {
|
1124
|
+
suppressOutput: false,
|
1125
|
+
tableDirection: "default".freeze,
|
1126
|
+
}.freeze
|
1348
1127
|
REQUIRED = [:url].freeze
|
1349
1128
|
|
1350
1129
|
# Setters
|
1351
|
-
PROPERTIES.each do |
|
1352
|
-
next if
|
1353
|
-
define_method("#{
|
1354
|
-
case
|
1355
|
-
when :
|
1356
|
-
|
1130
|
+
PROPERTIES.each do |key, type|
|
1131
|
+
next if [:tableSchema, :dialect, :transformations].include?(key)
|
1132
|
+
define_method("#{key}=".to_sym) do |value|
|
1133
|
+
invalid = case key
|
1134
|
+
when :suppressOutput
|
1135
|
+
"boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
1136
|
+
when :tableDirection
|
1137
|
+
"rtl, ltr, or default" unless %(rtl ltr default).include?(value)
|
1138
|
+
when :url
|
1139
|
+
"valid URL" unless value.is_a?(String) && base.join(value).valid?
|
1140
|
+
when :notes, :tableSchema, :dialect, :transformations
|
1141
|
+
# We handle this through a separate setters
|
1142
|
+
end
|
1143
|
+
|
1144
|
+
if invalid
|
1145
|
+
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1146
|
+
object[key] = default_value(key) unless default_value(key).nil?
|
1147
|
+
elsif key == :url
|
1148
|
+
# URL of CSV relative to metadata
|
1149
|
+
object[:url] = value
|
1150
|
+
@url = base.join(value)
|
1151
|
+
@context.base = @url if @context # Use as base for expanding IRIs
|
1357
1152
|
else
|
1358
|
-
object[
|
1153
|
+
object[key] = value
|
1359
1154
|
end
|
1360
1155
|
end
|
1361
1156
|
end
|
@@ -1366,15 +1161,29 @@ module RDF::Tabular
|
|
1366
1161
|
super || tableSchema && tableSchema.has_annotations?
|
1367
1162
|
end
|
1368
1163
|
|
1164
|
+
# Return a new TableGroup based on this Table
|
1165
|
+
def to_table_group
|
1166
|
+
content = {"@type" => "TableGroup", "tables" => [self]}
|
1167
|
+
content['@context'] = object.delete(:@context) if object[:@context]
|
1168
|
+
ctx = @context
|
1169
|
+
self.remove_instance_variable(:@context) if self.instance_variables.include?(:@context)
|
1170
|
+
tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
|
1171
|
+
@parent = tg # Link from parent
|
1172
|
+
tg
|
1173
|
+
end
|
1174
|
+
|
1369
1175
|
# Return Annotated Table representation
|
1370
1176
|
def to_atd
|
1371
|
-
{
|
1372
|
-
"@id" => id,
|
1177
|
+
object.inject({
|
1178
|
+
"@id" => (id.to_s if id),
|
1373
1179
|
"@type" => "AnnotatedTable",
|
1180
|
+
"url" => self.url.to_s,
|
1374
1181
|
"columns" => tableSchema.columns.map(&:to_atd),
|
1375
|
-
"rows" => []
|
1376
|
-
|
1377
|
-
|
1182
|
+
"rows" => []
|
1183
|
+
}) do |memo, (k, v)|
|
1184
|
+
memo[k.to_s] ||= v
|
1185
|
+
memo
|
1186
|
+
end.delete_if {|k,v| v.nil? || v.is_a?(Metadata) || k.to_s == "@context"}
|
1378
1187
|
end
|
1379
1188
|
|
1380
1189
|
# Logic for accessing elements as accessors
|
@@ -1387,36 +1196,6 @@ module RDF::Tabular
|
|
1387
1196
|
end
|
1388
1197
|
end
|
1389
1198
|
|
1390
|
-
class Transformation < Metadata
|
1391
|
-
PROPERTIES = {
|
1392
|
-
:@id => :link,
|
1393
|
-
:@type => :atomic,
|
1394
|
-
source: :atomic,
|
1395
|
-
targetFormat: :link,
|
1396
|
-
scriptFormat: :link,
|
1397
|
-
titles: :natural_language,
|
1398
|
-
url: :link,
|
1399
|
-
}.freeze
|
1400
|
-
REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
|
1401
|
-
|
1402
|
-
# Setters
|
1403
|
-
PROPERTIES.each do |a, type|
|
1404
|
-
define_method("#{a}=".to_sym) do |value|
|
1405
|
-
case type
|
1406
|
-
when :natural_language
|
1407
|
-
set_nl(a, value)
|
1408
|
-
else
|
1409
|
-
object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
|
1410
|
-
end
|
1411
|
-
end
|
1412
|
-
end
|
1413
|
-
|
1414
|
-
# Logic for accessing elements as accessors
|
1415
|
-
def method_missing(method, *args)
|
1416
|
-
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1417
|
-
end
|
1418
|
-
end
|
1419
|
-
|
1420
1199
|
class Schema < Metadata
|
1421
1200
|
PROPERTIES = {
|
1422
1201
|
:@id => :link,
|
@@ -1425,17 +1204,67 @@ module RDF::Tabular
|
|
1425
1204
|
foreignKeys: :array,
|
1426
1205
|
primaryKey: :column_reference,
|
1427
1206
|
}.freeze
|
1207
|
+
DEFAULTS = {}.freeze
|
1428
1208
|
REQUIRED = [].freeze
|
1429
1209
|
|
1430
1210
|
# Setters
|
1431
|
-
PROPERTIES.each do |
|
1432
|
-
define_method("#{
|
1433
|
-
case
|
1434
|
-
when :
|
1435
|
-
|
1211
|
+
PROPERTIES.each do |key, type|
|
1212
|
+
define_method("#{key}=".to_sym) do |value|
|
1213
|
+
invalid = case key
|
1214
|
+
when :primaryKey
|
1215
|
+
"string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
|
1216
|
+
end
|
1217
|
+
|
1218
|
+
if invalid
|
1219
|
+
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1220
|
+
object[key] = default_value(key) unless default_value(key).nil?
|
1436
1221
|
else
|
1437
|
-
object[
|
1222
|
+
object[key] = value
|
1223
|
+
end
|
1224
|
+
end
|
1225
|
+
end
|
1226
|
+
|
1227
|
+
def columns=(value)
|
1228
|
+
object[:columns] = case value
|
1229
|
+
when Array
|
1230
|
+
number = 0
|
1231
|
+
value.map do |v|
|
1232
|
+
number += 1
|
1233
|
+
case v
|
1234
|
+
when Hash
|
1235
|
+
Column.new(v, @options.merge(
|
1236
|
+
table: (parent if parent.is_a?(Table)),
|
1237
|
+
parent: self,
|
1238
|
+
context: nil,
|
1239
|
+
number: number))
|
1240
|
+
else
|
1241
|
+
v
|
1242
|
+
end
|
1438
1243
|
end
|
1244
|
+
else
|
1245
|
+
warn "#{type} has invalid property 'columns': expected array of Column"
|
1246
|
+
[]
|
1247
|
+
end
|
1248
|
+
|
1249
|
+
unless object[:columns].all? {|v| v.is_a?(Column)}
|
1250
|
+
warn "#{type} has invalid property 'columns': expected array of Column"
|
1251
|
+
# Remove elements that aren't of the right types
|
1252
|
+
object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
|
1253
|
+
end
|
1254
|
+
end
|
1255
|
+
|
1256
|
+
def foreignKeys=(value)
|
1257
|
+
object[:foreignKeys] = case value
|
1258
|
+
when Array then value
|
1259
|
+
else
|
1260
|
+
warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
|
1261
|
+
[]
|
1262
|
+
end
|
1263
|
+
|
1264
|
+
unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
|
1265
|
+
warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
|
1266
|
+
# Remove elements that aren't of the right types
|
1267
|
+
object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
|
1439
1268
|
end
|
1440
1269
|
end
|
1441
1270
|
|
@@ -1456,9 +1285,12 @@ module RDF::Tabular
|
|
1456
1285
|
name: :atomic,
|
1457
1286
|
suppressOutput: :atomic,
|
1458
1287
|
titles: :natural_language,
|
1459
|
-
required: :atomic,
|
1460
1288
|
virtual: :atomic,
|
1461
1289
|
}.freeze
|
1290
|
+
DEFAULTS = {
|
1291
|
+
suppressOutput: false,
|
1292
|
+
virtual: false,
|
1293
|
+
}.freeze
|
1462
1294
|
REQUIRED = [].freeze
|
1463
1295
|
|
1464
1296
|
##
|
@@ -1488,20 +1320,33 @@ module RDF::Tabular
|
|
1488
1320
|
end
|
1489
1321
|
|
1490
1322
|
# Setters
|
1491
|
-
PROPERTIES.each do |
|
1492
|
-
define_method("#{
|
1493
|
-
case
|
1494
|
-
when :
|
1495
|
-
|
1323
|
+
PROPERTIES.each do |key, t|
|
1324
|
+
define_method("#{key}=".to_sym) do |value|
|
1325
|
+
invalid = case key
|
1326
|
+
when :name
|
1327
|
+
"proper name format" unless value.is_a?(String) && value.match(NAME_SYNTAX)
|
1328
|
+
when :suppressOutput, :virtual
|
1329
|
+
"boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
1330
|
+
when :titles
|
1331
|
+
valid_natural_language_property?(value)
|
1332
|
+
end
|
1333
|
+
|
1334
|
+
if invalid && key == :titles
|
1335
|
+
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1336
|
+
object[key] = set_nl(value)
|
1337
|
+
object.delete(key) if object[key].nil?
|
1338
|
+
elsif invalid
|
1339
|
+
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1340
|
+
object[key] = default_value(key) unless default_value(key).nil?
|
1496
1341
|
else
|
1497
|
-
object[
|
1342
|
+
object[key] = value
|
1498
1343
|
end
|
1499
1344
|
end
|
1500
1345
|
end
|
1501
1346
|
|
1502
1347
|
# Return or create a name for the column from titles, if it exists
|
1503
1348
|
def name
|
1504
|
-
|
1349
|
+
self[:name] || if titles && (ts = titles[context.default_language || 'und'])
|
1505
1350
|
n = Array(ts).first
|
1506
1351
|
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
|
1507
1352
|
n1 = URI.encode(n[1..-1], /[^\w\.]/)
|
@@ -1518,17 +1363,20 @@ module RDF::Tabular
|
|
1518
1363
|
|
1519
1364
|
# Return Annotated Column representation
|
1520
1365
|
def to_atd
|
1521
|
-
{
|
1522
|
-
"@id" => id,
|
1366
|
+
object.inject({
|
1367
|
+
"@id" => id.to_s,
|
1523
1368
|
"@type" => "Column",
|
1524
|
-
"table" => (table.id if table),
|
1369
|
+
"table" => (table.id.to_s if table.id),
|
1525
1370
|
"number" => self.number,
|
1526
1371
|
"sourceNumber" => self.sourceNumber,
|
1527
1372
|
"cells" => [],
|
1528
1373
|
"virtual" => self.virtual,
|
1529
1374
|
"name" => self.name,
|
1530
1375
|
"titles" => self.titles
|
1531
|
-
}
|
1376
|
+
}) do |memo, (k, v)|
|
1377
|
+
memo[k.to_s] ||= v
|
1378
|
+
memo
|
1379
|
+
end.delete_if {|k,v| v.nil?}
|
1532
1380
|
end
|
1533
1381
|
|
1534
1382
|
# Logic for accessing elements as accessors
|
@@ -1541,17 +1389,55 @@ module RDF::Tabular
|
|
1541
1389
|
end
|
1542
1390
|
end
|
1543
1391
|
|
1392
|
+
class Transformation < Metadata
|
1393
|
+
PROPERTIES = {
|
1394
|
+
:@id => :link,
|
1395
|
+
:@type => :atomic,
|
1396
|
+
source: :atomic,
|
1397
|
+
targetFormat: :link,
|
1398
|
+
scriptFormat: :link,
|
1399
|
+
titles: :natural_language,
|
1400
|
+
url: :link,
|
1401
|
+
}.freeze
|
1402
|
+
DEFAULTS = {}.freeze
|
1403
|
+
REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
|
1404
|
+
|
1405
|
+
# Setters
|
1406
|
+
PROPERTIES.each do |key, type|
|
1407
|
+
define_method("#{key}=".to_sym) do |value|
|
1408
|
+
invalid = case key
|
1409
|
+
when :scriptFormat, :targetFormat
|
1410
|
+
"valid absolute URL" unless RDF::URI(value).valid?
|
1411
|
+
when :source
|
1412
|
+
"json or rdf" unless %w(json rdf).include?(value) || value.nil?
|
1413
|
+
end
|
1414
|
+
|
1415
|
+
if invalid
|
1416
|
+
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1417
|
+
object[key] = default_value(key) unless default_value(key).nil?
|
1418
|
+
else
|
1419
|
+
object[key] = value
|
1420
|
+
end
|
1421
|
+
end
|
1422
|
+
end
|
1423
|
+
|
1424
|
+
# Logic for accessing elements as accessors
|
1425
|
+
def method_missing(method, *args)
|
1426
|
+
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1427
|
+
end
|
1428
|
+
end
|
1429
|
+
|
1544
1430
|
class Dialect < Metadata
|
1545
1431
|
# Defaults for dialects
|
1546
|
-
|
1547
|
-
commentPrefix:
|
1432
|
+
DEFAULTS = {
|
1433
|
+
commentPrefix: "#".freeze,
|
1548
1434
|
delimiter: ",".freeze,
|
1549
1435
|
doubleQuote: true,
|
1550
1436
|
encoding: "utf-8".freeze,
|
1551
1437
|
header: true,
|
1552
1438
|
headerRowCount: 1,
|
1553
1439
|
lineTerminators: :auto,
|
1554
|
-
quoteChar: '"',
|
1440
|
+
quoteChar: '"'.freeze,
|
1555
1441
|
skipBlankRows: false,
|
1556
1442
|
skipColumns: 0,
|
1557
1443
|
skipInitialSpace: false,
|
@@ -1580,9 +1466,35 @@ module RDF::Tabular
|
|
1580
1466
|
REQUIRED = [].freeze
|
1581
1467
|
|
1582
1468
|
# Setters
|
1583
|
-
PROPERTIES.keys.each do |
|
1584
|
-
define_method("#{
|
1585
|
-
|
1469
|
+
PROPERTIES.keys.each do |key|
|
1470
|
+
define_method("#{key}=".to_sym) do |value|
|
1471
|
+
invalid = case key
|
1472
|
+
when :commentPrefix, :delimiter, :quoteChar
|
1473
|
+
"a single character string" unless value.is_a?(String) && value.length == 1
|
1474
|
+
when :lineTerminators
|
1475
|
+
"a string" unless value.is_a?(String)
|
1476
|
+
when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
|
1477
|
+
"boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
1478
|
+
when :encoding
|
1479
|
+
"a valid encoding" unless (Encoding.find(value) rescue false)
|
1480
|
+
when :headerRowCount, :skipColumns, :skipRows
|
1481
|
+
"a non-negative integer" unless value.is_a?(Numeric) && value.integer? && value >= 0
|
1482
|
+
when :trim
|
1483
|
+
"true, false, start or end" unless %w(true false start end).include?(value.to_s.downcase)
|
1484
|
+
when :titles
|
1485
|
+
valid_natural_language_property?(value)
|
1486
|
+
end
|
1487
|
+
|
1488
|
+
if invalid && key == :titles
|
1489
|
+
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1490
|
+
object[key] = set_nl(value)
|
1491
|
+
object.delete(key) if object[key].nil?
|
1492
|
+
elsif invalid
|
1493
|
+
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1494
|
+
object[key] = default_value(key) unless default_value(key).nil?
|
1495
|
+
else
|
1496
|
+
object[key] = value
|
1497
|
+
end
|
1586
1498
|
end
|
1587
1499
|
end
|
1588
1500
|
|
@@ -1608,19 +1520,22 @@ module RDF::Tabular
|
|
1608
1520
|
# Extract a new Metadata document from the file or data provided
|
1609
1521
|
#
|
1610
1522
|
# @param [#read, #to_s] input IO, or file path or URL
|
1523
|
+
# @param [Table] metadata used for saving annotations created while extracting metadata
|
1611
1524
|
# @param [Hash{Symbol => Object}] options
|
1612
1525
|
# any additional options (see `RDF::Util::File.open_file`)
|
1526
|
+
# @option options [String] :lang, language to set in table, if any
|
1613
1527
|
# @return [Metadata] Tabular metadata
|
1614
1528
|
# @see http://w3c.github.io/csvw/syntax/#parsing
|
1615
|
-
def embedded_metadata(input, options = {})
|
1529
|
+
def embedded_metadata(input, metadata, options = {})
|
1616
1530
|
options = options.dup
|
1617
1531
|
options.delete(:context) # Don't accidentally use a passed context
|
1618
1532
|
# Normalize input to an IO object
|
1619
1533
|
if input.is_a?(String)
|
1620
|
-
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, options.merge(base: input.to_s))}
|
1534
|
+
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, options.merge(base: input.to_s))}
|
1621
1535
|
end
|
1622
1536
|
|
1623
1537
|
table = {
|
1538
|
+
"@context" => "http://www.w3.org/ns/csvw",
|
1624
1539
|
"url" => (options.fetch(:base, "")),
|
1625
1540
|
"@type" => "Table",
|
1626
1541
|
"tableSchema" => {
|
@@ -1628,6 +1543,8 @@ module RDF::Tabular
|
|
1628
1543
|
"columns" => []
|
1629
1544
|
}
|
1630
1545
|
}
|
1546
|
+
metadata ||= table # In case the embedded metadata becomes the final metadata
|
1547
|
+
metadata["lang"] = options[:lang] if options[:lang]
|
1631
1548
|
|
1632
1549
|
# Set encoding on input
|
1633
1550
|
csv = ::CSV.new(input, csv_options)
|
@@ -1638,7 +1555,7 @@ module RDF::Tabular
|
|
1638
1555
|
value.rstrip! if %w(true end).include?(trim.to_s)
|
1639
1556
|
|
1640
1557
|
value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
|
1641
|
-
(
|
1558
|
+
(metadata["rdfs:comment"] ||= []) << value unless value.empty?
|
1642
1559
|
end
|
1643
1560
|
debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
|
1644
1561
|
|
@@ -1669,9 +1586,9 @@ module RDF::Tabular
|
|
1669
1586
|
|
1670
1587
|
# Logic for accessing elements as accessors
|
1671
1588
|
def method_missing(method, *args)
|
1672
|
-
if
|
1589
|
+
if DEFAULTS.has_key?(method.to_sym)
|
1673
1590
|
# As set, or with default
|
1674
|
-
object.fetch(method.to_sym,
|
1591
|
+
object.fetch(method.to_sym, DEFAULTS[method.to_sym])
|
1675
1592
|
else
|
1676
1593
|
super
|
1677
1594
|
end
|
@@ -1691,19 +1608,46 @@ module RDF::Tabular
|
|
1691
1608
|
maxInclusive: :atomic,
|
1692
1609
|
minExclusive: :atomic,
|
1693
1610
|
maxExclusive: :atomic,
|
1694
|
-
decimalChar: :atomic,
|
1695
|
-
groupChar: :atomic,
|
1696
|
-
pattern: :atomic,
|
1697
1611
|
}.freeze
|
1698
1612
|
REQUIRED = [].freeze
|
1613
|
+
DEFAULTS = {}.freeze
|
1699
1614
|
|
1700
1615
|
# Override `base` in Metadata
|
1701
1616
|
def base; object[:base]; end
|
1702
1617
|
|
1703
1618
|
# Setters
|
1704
|
-
PROPERTIES.each do |
|
1705
|
-
define_method("#{
|
1706
|
-
|
1619
|
+
PROPERTIES.each do |key, type|
|
1620
|
+
define_method("#{key}=".to_sym) do |value|
|
1621
|
+
invalid = case key
|
1622
|
+
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
1623
|
+
"numeric or valid date/time" unless value.is_a?(Numeric) ||
|
1624
|
+
RDF::Literal::Date.new(value.to_s).valid? ||
|
1625
|
+
RDF::Literal::Time.new(value.to_s).valid? ||
|
1626
|
+
RDF::Literal::DateTime.new(value.to_s).valid?
|
1627
|
+
when :format
|
1628
|
+
unless value.is_a?(String)
|
1629
|
+
warn "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
|
1630
|
+
if default_value(key).nil?
|
1631
|
+
object.delete(key)
|
1632
|
+
else
|
1633
|
+
object[key] = default_value(key)
|
1634
|
+
end
|
1635
|
+
end
|
1636
|
+
when :length, :minLength, :maxLength
|
1637
|
+
if !(value.is_a?(Numeric) && value.integer? && value >= 0)
|
1638
|
+
"a non-negative integer"
|
1639
|
+
elsif key != :length && object[:length] && value != object[:length]
|
1640
|
+
# Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
|
1641
|
+
"both length and #{key} requires they be equal"
|
1642
|
+
end
|
1643
|
+
end
|
1644
|
+
|
1645
|
+
if invalid
|
1646
|
+
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1647
|
+
object[key] = default_value(key) unless default_value(key).nil?
|
1648
|
+
else
|
1649
|
+
object[key] = value
|
1650
|
+
end
|
1707
1651
|
end
|
1708
1652
|
end
|
1709
1653
|
|
@@ -1741,14 +1685,15 @@ module RDF::Tabular
|
|
1741
1685
|
# Return Annotated Cell representation
|
1742
1686
|
def to_atd
|
1743
1687
|
{
|
1744
|
-
"@id" =>
|
1688
|
+
"@id" => id.to_s,
|
1745
1689
|
"@type" => "Cell",
|
1746
|
-
"column" => column.id,
|
1747
|
-
"row" => row.id,
|
1690
|
+
"column" => column.id.to_s,
|
1691
|
+
"row" => row.id.to_s,
|
1748
1692
|
"stringValue" => self.stringValue,
|
1749
|
-
"
|
1693
|
+
"table" => (table.id.to_s if table.id),
|
1694
|
+
"value" => table.context.expand_value(nil, self.value),
|
1750
1695
|
"errors" => self.errors
|
1751
|
-
}
|
1696
|
+
}.delete_if {|k,v| Array(v).empty?}
|
1752
1697
|
end
|
1753
1698
|
end
|
1754
1699
|
|
@@ -1794,9 +1739,15 @@ module RDF::Tabular
|
|
1794
1739
|
map_values = {"_row" => number, "_sourceRow" => source_number}
|
1795
1740
|
|
1796
1741
|
columns = metadata.tableSchema.columns ||= []
|
1742
|
+
non_virtual_columns = columns.reject(&:virtual)
|
1743
|
+
|
1744
|
+
if row.length < non_virtual_columns.length
|
1745
|
+
raise Error, "Row #{source_number} has #{row.length} columns, expected #{non_virtual_columns.length}"
|
1746
|
+
end
|
1797
1747
|
|
1798
1748
|
# Make sure that the row length is at least as long as the number of column definitions, to implicitly include virtual columns
|
1799
1749
|
columns.each_with_index {|c, index| row[index] ||= (c.null || '')}
|
1750
|
+
|
1800
1751
|
row.each_with_index do |value, index|
|
1801
1752
|
|
1802
1753
|
next if index < skipColumns
|
@@ -1811,7 +1762,7 @@ module RDF::Tabular
|
|
1811
1762
|
|
1812
1763
|
@values << cell = Cell.new(metadata, column, self, value)
|
1813
1764
|
|
1814
|
-
datatype = column.datatype || Datatype.new(base: "string")
|
1765
|
+
datatype = column.datatype || Datatype.new(base: "string", parent: column)
|
1815
1766
|
value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
|
1816
1767
|
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
|
1817
1768
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
@@ -1848,7 +1799,7 @@ module RDF::Tabular
|
|
1848
1799
|
cell.errors = cell_errors
|
1849
1800
|
metadata.send(:debug, "#{self.number}: each_cell ##{self.sourceNumber},#{cell.column.sourceNumber}", cell.errors.join("\n")) unless cell_errors.empty?
|
1850
1801
|
|
1851
|
-
map_values[columns[index - skipColumns].name] =
|
1802
|
+
map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s)
|
1852
1803
|
end
|
1853
1804
|
|
1854
1805
|
# Map URLs for row
|
@@ -1869,13 +1820,13 @@ module RDF::Tabular
|
|
1869
1820
|
# Return Annotated Row representation
|
1870
1821
|
def to_atd
|
1871
1822
|
{
|
1872
|
-
"@id" =>
|
1823
|
+
"@id" => id.to_s,
|
1873
1824
|
"@type" => "Row",
|
1874
|
-
"table" => table.id,
|
1825
|
+
"table" => (table.id.to_s if table.id),
|
1875
1826
|
"number" => self.number,
|
1876
1827
|
"sourceNumber" => self.sourceNumber,
|
1877
1828
|
"cells" => @values.map(&:to_atd)
|
1878
|
-
}
|
1829
|
+
}.delete_if {|k,v| v.nil?}
|
1879
1830
|
end
|
1880
1831
|
|
1881
1832
|
private
|
@@ -1905,17 +1856,18 @@ module RDF::Tabular
|
|
1905
1856
|
:nonPositiveInteger, :negativeInteger,
|
1906
1857
|
:double, :float, :number
|
1907
1858
|
# Normalize representation based on numeric-specific facets
|
1908
|
-
|
1909
|
-
|
1859
|
+
format ||= {}
|
1860
|
+
groupChar = format[:groupChar] || ','
|
1861
|
+
if format[:pattern] && !value.match(Regexp.new(format[:pattern]))
|
1910
1862
|
# pattern facet failed
|
1911
|
-
value_errors << "#{value} does not match pattern #{
|
1863
|
+
value_errors << "#{value} does not match pattern #{format[:pattern]}"
|
1912
1864
|
end
|
1913
1865
|
if value.include?(groupChar*2)
|
1914
1866
|
# pattern facet failed
|
1915
1867
|
value_errors << "#{value} has repeating #{groupChar.inspect}"
|
1916
1868
|
end
|
1917
1869
|
value.gsub!(groupChar, '')
|
1918
|
-
value.sub!(
|
1870
|
+
value.sub!(format[:decimalChar], '.') if format[:decimalChar]
|
1919
1871
|
|
1920
1872
|
# Extract percent or per-mille sign
|
1921
1873
|
percent = permille = false
|
@@ -2021,8 +1973,8 @@ module RDF::Tabular
|
|
2021
1973
|
tz_part = value if tz
|
2022
1974
|
|
2023
1975
|
# Compose normalized value
|
2024
|
-
vd = ("%04d-%02d-%02d" % [date_part[:yr], date_part[:mo], date_part[:da]]) if date_part
|
2025
|
-
vt = ("%02d:%02d:%02d" % [time_part[:hr], time_part[:mi], time_part[:se].to_i]) if time_part
|
1976
|
+
vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
|
1977
|
+
vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
|
2026
1978
|
value = [vd, vt].compact.join('T')
|
2027
1979
|
value += tz_part.to_s
|
2028
1980
|
end
|