rdf-tabular 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 540422ae3c9cb7f94c8c0c396237da7de46b8768
4
- data.tar.gz: eff96886626aa8c73b4ea3150c75441f27af02e4
3
+ metadata.gz: 88feb17a025351dbc965fb07a24e7f62a605dfae
4
+ data.tar.gz: 2f13430c7c419d8a14f1c55b3cca451264f1c36d
5
5
  SHA512:
6
- metadata.gz: cbe14ec96c52d655be56ad1079ff8cb3ce22e366aae7af60a8d43cffa5508ae6b5e528db0b59ac65e3a902fc3d127a660be8b569ad3743af93d992f276902b73
7
- data.tar.gz: 9831d7631d93eaeeb2739c0b58b22061e8485e5954b4687da108ee79c63e5220778652bcde1c8f5103db806b8bae4c74c871a1a684165abe7fef3e0bfef355e5
6
+ metadata.gz: 7f17e7e8dea99269719beca3ee0f7789040938b269f18e1e752a5c01f8027734a410523723ab421085971e14e1bf231dae001b99996a350c6cd323bb369d1994
7
+ data.tar.gz: 58e67ddfa7330a379a9da6fef50abd386857702c553e024a30e63bf7aca28caa323bec4bd9e6968386a47f6914eb2daf99043d00d819296cf3e70ad7d48a60e6
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
@@ -3,6 +3,7 @@ require 'json/ld'
3
3
  require 'bcp47'
4
4
  require 'addressable/template'
5
5
  require 'rdf/xsd'
6
+ require 'yaml' # used by BCP47, which should have required it.
6
7
 
7
8
  ##
8
9
  # CSVM Metadata processor
@@ -20,21 +21,38 @@ module RDF::Tabular
20
21
  include Utils
21
22
 
22
23
  # Hash representation
24
+ # @return [Hash<Symbol,Object>]
23
25
  attr_accessor :object
24
26
 
27
+ # Warnings detected on initialization or when setting properties
28
+ # @return [Array<String>]
29
+ attr_accessor :warnings
30
+
25
31
  # Inheritect properties, valid for all types
26
32
  INHERITED_PROPERTIES = {
27
- null: :atomic,
28
- lang: :atomic,
29
- textDirection: :atomic,
30
- separator: :atomic,
33
+ aboutUrl: :uri_template,
34
+ datatype: :atomic,
31
35
  default: :atomic,
36
+ lang: :atomic,
37
+ null: :atomic,
32
38
  ordered: :atomic,
33
- datatype: :atomic,
34
- aboutUrl: :uri_template,
35
39
  propertyUrl: :uri_template,
40
+ required: :atomic,
41
+ separator: :atomic,
42
+ textDirection: :atomic,
36
43
  valueUrl: :uri_template,
37
44
  }.freeze
45
+ INHERITED_DEFAULTS = {
46
+ aboutUrl: "".freeze,
47
+ default: "".freeze,
48
+ lang: "und",
49
+ null: "".freeze,
50
+ ordered: false,
51
+ propertyUrl: "".freeze,
52
+ required: false,
53
+ textDirection: "ltr".freeze,
54
+ valueUrl: "".freeze,
55
+ }.freeze
38
56
 
39
57
  # Valid datatypes
40
58
  DATATYPES = {
@@ -143,8 +161,8 @@ module RDF::Tabular
143
161
  def self.for_input(input, options = {})
144
162
  base = options[:base]
145
163
 
146
- # Use user metadata
147
- user_metadata = case options[:metadata]
164
+ # Use user metadata, if provided
165
+ metadata = case options[:metadata]
148
166
  when Metadata then options[:metadata]
149
167
  when Hash
150
168
  Metadata.new(options[:metadata], options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
@@ -152,42 +170,37 @@ module RDF::Tabular
152
170
  Metadata.open(options[:metadata], options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
153
171
  end
154
172
 
155
- found_metadata = nil
173
+ # Search for metadata until found
156
174
 
157
- # If user_metadata does not describe input, get the first found from linked-, file-, and directory-specific metadata
158
- unless user_metadata.is_a?(Table) || user_metadata.is_a?(TableGroup) && user_metadata.for_table(base)
159
- # load link metadata, if available
160
- locs = []
161
- if input.respond_to?(:links) &&
162
- link = input.links.find_link(%w(rel describedby))
163
- locs << RDF::URI(base).join(link.href)
164
- end
175
+ # load link metadata, if available
176
+ locs = []
177
+ if input.respond_to?(:links) &&
178
+ link = input.links.find_link(%w(rel describedby))
179
+ locs << RDF::URI(base).join(link.href)
180
+ end
165
181
 
166
- if base
167
- locs += [RDF::URI("#{base}-metadata.json"), RDF::URI(base).join("metadata.json")]
168
- end
182
+ if base
183
+ locs += [RDF::URI("#{base}-metadata.json"), RDF::URI(base).join("metadata.json")]
184
+ end
169
185
 
170
- locs.each do |loc|
171
- found_metadata ||= begin
172
- Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
173
- rescue
174
- debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
175
- nil
176
- end
186
+ locs.each do |loc|
187
+ metadata ||= begin
188
+ Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
189
+ rescue
190
+ debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
191
+ nil
177
192
  end
178
193
  end
179
194
 
180
195
  # Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
181
196
  metadata = case
182
- when user_metadata && found_metadata then user_metadata.merge(found_metadata)
183
- when user_metadata then user_metadata
184
- when found_metadata then found_metadata
185
- when base then TableGroup.new({tables: [{url: base}]}, options)
186
- else TableGroup.new({tables: []}, options)
197
+ when metadata then metadata
198
+ when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
199
+ else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: []}, options)
187
200
  end
188
201
 
189
202
  # Make TableGroup, if not already
190
- metadata.is_a?(TableGroup) ? metadata : metadata.merge(TableGroup.new({}))
203
+ metadata.is_a?(TableGroup) ? metadata : metadata.to_table_group
191
204
  end
192
205
 
193
206
  ##
@@ -204,37 +217,32 @@ module RDF::Tabular
204
217
 
205
218
  unless options[:parent]
206
219
  # Add context, if not set (which it should be)
207
- object['@context'] ||= options.delete(:@context) || options[:context] || 'http://www.w3.org/ns/csvw'
220
+ object['@context'] ||= options.delete(:@context) || options[:context]
208
221
  end
209
222
 
210
223
  klass = case
211
224
  when !self.equal?(RDF::Tabular::Metadata)
212
225
  self # subclasses can be directly constructed without type dispatch
213
226
  else
214
- type = if options[:type]
215
- type = options[:type].to_sym
216
- raise Error, "If provided, type must be one of :TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect]" unless
217
- [:TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect].include?(type)
218
- type
219
- end
227
+ type = options[:type].to_sym if options[:type]
220
228
 
221
229
  # Figure out type by @type
222
- type ||= object['@type']
230
+ type ||= object['@type'].to_sym if object['@type']
223
231
 
224
- # Figure out type by site
232
+ # Otherwise, Figure out type by site
225
233
  object_keys = object.keys.map(&:to_s)
226
234
  type ||= case
227
235
  when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
228
236
  when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
229
237
  when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
230
- when %w(columns primaryKey foreignKeys urlTemplate).any? {|k| object_keys.include?(k)} then :Schema
231
- when %w(name required).any? {|k| object_keys.include?(k)} then :Column
238
+ when %w(columns primaryKey foreignKeys).any? {|k| object_keys.include?(k)} then :Schema
239
+ when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
232
240
  when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
233
241
  when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
234
242
  end
235
243
 
236
244
  case type.to_s.to_sym
237
- when :TableGroup then RDF::Tabular::TableGroup
245
+ when :TableGroup, :"" then RDF::Tabular::TableGroup
238
246
  when :Table then RDF::Tabular::Table
239
247
  when :Transformation then RDF::Tabular::Transformation
240
248
  when :Schema then RDF::Tabular::Schema
@@ -265,14 +273,26 @@ module RDF::Tabular
265
273
  # @return [Metadata]
266
274
  def initialize(input, options = {})
267
275
  @options = options.dup
276
+ @options[:depth] ||= 0
277
+
278
+ # Parent of this Metadata, if any
279
+ @parent = @options[:parent]
268
280
 
269
281
  # Get context from input
270
282
  # Optimize by using built-in version of context, and just extract @base, @lang
271
283
  @context = case input['@context']
272
- when Array then LOCAL_CONTEXT.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
273
- when Hash then LOCAL_CONTEXT.parse(input['@context'])
274
- when nil then nil
275
- else LOCAL_CONTEXT
284
+ when Array
285
+ warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
286
+ LOCAL_CONTEXT.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
287
+ when Hash
288
+ warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
289
+ LOCAL_CONTEXT.parse(input['@context'])
290
+ when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT
291
+ else
292
+ if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
293
+ warn "Context missing required value 'http://www.w3.org/ns/csvw'"
294
+ LOCAL_CONTEXT
295
+ end
276
296
  end
277
297
 
278
298
  reason = @options.delete(:reason)
@@ -284,76 +304,23 @@ module RDF::Tabular
284
304
 
285
305
  @context.base = @options[:base] if @context
286
306
 
287
- @options[:depth] ||= 0
307
+ if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
308
+ warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
309
+ @context.default_language = nil
310
+ end
311
+
288
312
  @filenames = Array(@options[:filenames]).map {|fn| RDF::URI(fn)} if @options[:filenames]
289
313
  @properties = self.class.const_get(:PROPERTIES)
290
314
  @required = self.class.const_get(:REQUIRED)
291
315
 
292
316
  @object = {}
293
317
 
294
- # Parent of this Metadata, if any
295
- @parent = @options[:parent]
296
-
297
318
  depth do
298
319
  # Input was parsed in .new
299
320
  # Metadata is object with symbolic keys
300
321
  input.each do |key, value|
301
322
  key = key.to_sym
302
323
  case key
303
- when :columns
304
- # An array of template specifications that provide mechanisms to transform the tabular data into other formats
305
- object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
306
- number = 0
307
- value.map do |v|
308
- number += 1
309
- Column.new(v, @options.merge(table: (parent if parent.is_a?(Table)), parent: self, context: nil, number: number))
310
- end
311
- else
312
- # Invalid, but preserve value
313
- value
314
- end
315
- when :datatype
316
- self.datatype = value
317
- when :dialect
318
- # If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
319
- object[key] = case value
320
- when String then Dialect.open(base.join(value), @options.merge(parent: self, context: nil))
321
- when Hash then Dialect.new(value, @options.merge(parent: self, context: nil))
322
- else
323
- # Invalid, but preserve value
324
- value
325
- end
326
- @type ||= :Table
327
- when :tables
328
- # An array of table descriptions for the tables in the group.
329
- object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
330
- value.map {|v| Table.new(v, @options.merge(parent: self, context: nil))}
331
- else
332
- # Invalid, but preserve value
333
- value
334
- end
335
- when :tableSchema
336
- # An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
337
- # SPEC SUGGESTION: when loading a remote schema, assign @id from it's location if not already set
338
- object[key] = case value
339
- when String
340
- link = base.join(value).to_s
341
- s = Schema.open(link, @options.merge(parent: self, context: nil))
342
- s[:@id] ||= link
343
- s
344
- when Hash then Schema.new(value, @options.merge(parent: self, context: nil))
345
- else
346
- # Invalid, but preserve value
347
- value
348
- end
349
- when :transformations
350
- # An array of template specifications that provide mechanisms to transform the tabular data into other formats
351
- object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
352
- value.map {|v| Transformation.new(v, @options.merge(parent: self, context: nil))}
353
- else
354
- # Invalid, but preserve value
355
- value
356
- end
357
324
  when :url
358
325
  # URL of CSV relative to metadata
359
326
  object[:url] = value
@@ -361,10 +328,15 @@ module RDF::Tabular
361
328
  @context.base = @url if @context # Use as base for expanding IRIs
362
329
  when :@id
363
330
  # metadata identifier
364
- object[:@id] = value
365
- @id = base.join(value)
331
+ object[:@id] = if value.is_a?(String)
332
+ value
333
+ else
334
+ warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
335
+ ""
336
+ end
337
+ @id = base.join(object[:@id])
366
338
  else
367
- if @properties.has_key?(key)
339
+ if @properties.has_key?(key) || INHERITED_PROPERTIES.has_key?(key)
368
340
  self.send("#{key}=".to_sym, value)
369
341
  else
370
342
  object[key] = value
@@ -383,9 +355,32 @@ module RDF::Tabular
383
355
  end
384
356
 
385
357
  # Setters
386
- INHERITED_PROPERTIES.keys.each do |a|
387
- define_method("#{a}=".to_sym) do |value|
388
- object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
358
+ INHERITED_PROPERTIES.keys.each do |key|
359
+ define_method("#{key}=".to_sym) do |value|
360
+ invalid = case key
361
+ when :aboutUrl, :default, :propertyUrl, :valueUrl
362
+ "string" unless value.is_a?(String)
363
+ when :lang
364
+ "valid BCP47 language tag" unless BCP47::Language.identify(value.to_s)
365
+ when :null
366
+ # To be valid, it must be a string or array
367
+ "string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
368
+ when :ordered, :required
369
+ "boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
370
+ when :separator
371
+ "single character" unless value.nil? || value.is_a?(String) && value.length == 1
372
+ when :textDirection
373
+ "rtl or ltr" unless %(rtl ltr).include?(value)
374
+ when :datatype
375
+ # We handle this through a separate datatype= setter
376
+ end
377
+
378
+ if invalid
379
+ warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
380
+ object[key] = default_value(key) unless default_value(key).nil?
381
+ else
382
+ object[key] = value
383
+ end
389
384
  end
390
385
  end
391
386
 
@@ -395,6 +390,32 @@ module RDF::Tabular
395
390
  @context || (parent.context if parent)
396
391
  end
397
392
 
393
+ def tables=(value)
394
+ set_array_value(:tables, value, Table)
395
+ end
396
+
397
+ # An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
398
+ # when loading a remote schema, assign @id from it's location if not already set
399
+ def tableSchema=(value)
400
+ case value
401
+ when String
402
+ link = base.join(value).to_s
403
+ s = Schema.open(link, @options.merge(parent: self, context: nil))
404
+ s[:@id] ||= link
405
+ object[:tableSchema] = s
406
+ when Hash
407
+ object[:tableSchema] = Metadata.new(value, @options.merge(parent: self, context: nil))
408
+ when Schema
409
+ object[:tableSchema] = value
410
+ else
411
+ warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
412
+ end
413
+ end
414
+
415
+ def transformations=(value)
416
+ set_array_value(:transformations, value, Metadata)
417
+ end
418
+
398
419
  # Treat `dialect` similar to an inherited property, but merge together values from Table and TableGroup
399
420
  # @return [Dialect]
400
421
  def dialect
@@ -421,23 +442,32 @@ module RDF::Tabular
421
442
  end
422
443
  end
423
444
 
424
- if value.is_a?(Hash)
425
- @dialect = object[:dialect] = Dialect.new(value)
426
- elsif value
427
- # Remember invalid dialect for validation purposes
445
+ # If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
446
+ @dialect = case value
447
+ when String
448
+ object[:dialect] = Metadata.open(base.join(value), @options.merge(parent: self, context: nil))
449
+ when Hash
450
+ object[:dialect] = Metadata.new(value, @options.merge(parent: self, context: nil))
451
+ when Dialect
428
452
  object[:dialect] = value
429
453
  else
430
- object.delete(:dialect)
431
- @dialect = nil
454
+ warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
455
+ nil
432
456
  end
433
457
  end
434
458
 
435
459
  # Set new datatype
436
460
  # @return [Dialect]
437
461
  def datatype=(value)
438
- object[:datatype] = case value
439
- when Hash then Datatype.new(value)
440
- else Datatype.new({base: value})
462
+ val = case value
463
+ when Hash then Datatype.new(value, parent: self)
464
+ else Datatype.new({base: value}, parent: self)
465
+ end
466
+
467
+ if val.valid?
468
+ object[:datatype] = val
469
+ else
470
+ warn "#{type} has invalid property 'datatype': expected a Datatype"
441
471
  end
442
472
  end
443
473
 
@@ -476,7 +506,7 @@ module RDF::Tabular
476
506
  flatten.
477
507
  select {|v| v.is_a?(Metadata)}.
478
508
  map(&:warnings).
479
- flatten).compact
509
+ flatten).compact.uniq
480
510
  end
481
511
 
482
512
  ##
@@ -485,7 +515,7 @@ module RDF::Tabular
485
515
  # @return [self]
486
516
  def validate!
487
517
  expected_props, required_props = @properties.keys, @required
488
- errors, @warnings = [], []
518
+ errors = []
489
519
 
490
520
  unless is_a?(Dialect) || is_a?(Transformation)
491
521
  expected_props = expected_props + INHERITED_PROPERTIES.keys
@@ -494,163 +524,88 @@ module RDF::Tabular
494
524
  # It has only expected properties (exclude metadata)
495
525
  check_keys = object.keys - [:"@id", :"@context"]
496
526
  check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
497
- @warnings << "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
527
+ warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
498
528
 
499
529
  # It has required properties
500
- errors << "#{type} missing required keys: #{(required_props & check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
530
+ errors << "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
531
+
532
+ self.normalize!
501
533
 
502
534
  # Every property is valid
503
535
  object.keys.each do |key|
504
536
  value = object[key]
505
537
  case key
506
- when :aboutUrl, :default, :lang, :null, :ordered, :propertyUrl, :separator, :textDirection, :valueUrl
507
- valid_inherited_property?(key, value) do |m|
508
- @warnings << m
509
- end
538
+ when :base
539
+ warn "#{type} has invalid base '#{key}': #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) || RDF::URI(value).absolute?
510
540
  when :columns
511
- if value.is_a?(Array) && value.all? {|v| v.is_a?(Column)}
512
- value.each do |v|
513
- begin
514
- v.validate!
515
- rescue Error => e
516
- errors << e.message
517
- end
541
+ value.each do |v|
542
+ begin
543
+ v.validate!
544
+ rescue Error => e
545
+ errors << e.message
518
546
  end
519
- column_names = value.map(&:name)
520
- errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
521
- else
522
- errors << "#{type} has invalid property '#{key}': expected array of Columns"
523
- end
524
- when :commentPrefix, :delimiter, :quoteChar
525
- unless value.is_a?(String) && value.length == 1
526
- @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a single character string"
527
- object[key] = Dialect::DIALECT_DEFAULTS[key]
528
- end
529
- when :lineTerminators
530
- unless value.is_a?(String)
531
- @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
532
- object[key] = Dialect::DIALECT_DEFAULTS[key]
533
547
  end
534
- when :datatype
535
- if value.is_a?(Datatype)
548
+ column_names = value.map(&:name)
549
+ errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
550
+ when :dialect, :tables, :tableSchema, :transformations
551
+ Array(value).each do |t|
536
552
  begin
537
- value.validate!
553
+ t.validate!
538
554
  rescue Error => e
539
555
  errors << e.message
540
556
  end
541
- else
542
- @warnings << "#{type} has invalid property '#{key}': expected a Datatype"
543
- value = object[key] = nil
544
- end
545
- when :dialect
546
- unless value.is_a?(Dialect)
547
- errors << "#{type} has invalid property '#{key}': expected a Dialect Description"
548
- end
549
- begin
550
- value.validate! if value
551
- rescue Error => e
552
- errors << e.message
553
- end
554
- when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
555
- unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
556
- @warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
557
- object[key] = Dialect::DIALECT_DEFAULTS[key]
558
- end
559
- when :required, :suppressOutput, :virtual
560
- unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
561
- @warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
562
- object.delete(key)
563
- end
564
- when :encoding
565
- unless (Encoding.find(value) rescue false)
566
- @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid encoding"
567
557
  end
568
558
  when :foreignKeys
569
559
  # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
570
- value.is_a?(Array) && value.each do |fk|
571
- if fk.is_a?(Hash)
572
- columnReference, reference = fk['columnReference'], fk['reference']
573
- errors << "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
574
- errors << "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
575
-
576
- # Verify that columns exist in this schema
577
- Array(columnReference).each do |k|
578
- errors << "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c.name == k}
579
- end
560
+ value.each do |fk|
561
+ columnReference, reference = fk['columnReference'], fk['reference']
562
+ errors << "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
563
+ errors << "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
564
+
565
+ # Verify that columns exist in this schema
566
+ errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
567
+ Array(columnReference).each do |k|
568
+ errors << "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
569
+ end
580
570
 
581
- if reference.is_a?(Hash)
582
- ref_cols = reference['columnReference']
583
- schema = if reference.has_key?('resource')
584
- if reference.has_key?('schemaReference')
585
- errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
586
- end
587
- # resource is the URL of a Table in the TableGroup
588
- ref = base.join(reference['resource']).to_s
589
- table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
590
- errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
591
- table.tableSchema if table
592
- elsif reference.has_key?('schemaReference')
593
- # resource is the @id of a Schema in the TableGroup
594
- ref = base.join(reference['schemaReference']).to_s
595
- tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
596
- case tables.length
597
- when 0
598
- errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
599
- nil
600
- when 1
601
- tables.first.tableSchema
602
- else
603
- errors << "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
604
- nil
605
- end
571
+ if reference.is_a?(Hash)
572
+ ref_cols = reference['columnReference']
573
+ schema = if reference.has_key?('resource')
574
+ if reference.has_key?('schemaReference')
575
+ errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
606
576
  end
577
+ # resource is the URL of a Table in the TableGroup
578
+ ref = base.join(reference['resource']).to_s
579
+ table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
580
+ errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
581
+ table.tableSchema if table
582
+ elsif reference.has_key?('schemaReference')
583
+ # resource is the @id of a Schema in the TableGroup
584
+ ref = base.join(reference['schemaReference']).to_s
585
+ tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
586
+ case tables.length
587
+ when 0
588
+ errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
589
+ nil
590
+ when 1
591
+ tables.first.tableSchema
592
+ else
593
+ errors << "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
594
+ nil
595
+ end
596
+ end
607
597
 
608
- if schema
609
- # ref_cols must exist in schema
610
- Array(ref_cols).each do |k|
611
- errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c.name == k}
612
- end
598
+ if schema
599
+ # ref_cols must exist in schema
600
+ errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
601
+ Array(ref_cols).each do |k|
602
+ errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
613
603
  end
614
- else
615
- errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
616
604
  end
617
605
  else
618
- errors << "#{type} has invalid property '#{key}': reference must be an object: #{reference.inspect}"
606
+ errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
619
607
  end
620
608
  end
621
- when :headerRowCount, :skipColumns, :skipRows
622
- unless value.is_a?(Numeric) && value.integer? && value > 0
623
- @warnings << "#{type} has invalid property '#{key}': #{value.inspect} must be a positive integer"
624
- object[key] = Dialect::DIALECT_DEFAULTS[key]
625
- end
626
- when :base
627
- @warnings << "#{type} has invalid base '#{key}': #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) || RDF::URI(value).absolute?
628
- when :format
629
- unless value.is_a?(String)
630
- @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
631
- object.delete(key)
632
- end
633
- when :length, :minLength, :maxLength
634
- unless value.is_a?(Numeric) && value.integer? && value > 0
635
- @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a positive integer"
636
- object.delete(key)
637
- end
638
- unless key == :length || value != object[:length]
639
- # Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
640
- errors << "#{type} has invalid property '#{key}': Use of both length and #{key} requires they be equal"
641
- end
642
- when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
643
- unless value.is_a?(Numeric) ||
644
- RDF::Literal::Date.new(value.to_s).valid? ||
645
- RDF::Literal::Time.new(value.to_s).valid? ||
646
- RDF::Literal::DateTime.new(value.to_s).valid?
647
- @warnings << "#{type} has invalid property '#{key}': #{value}, expected numeric or valid date/time"
648
- object.delete(key)
649
- end
650
- when :name
651
- unless value.is_a?(String) && name.match(NAME_SYNTAX)
652
- errors << "#{type} has invalid property '#{key}': #{value}, expected proper name format"
653
- end
654
609
  when :notes
655
610
  unless value.is_a?(Hash) || value.is_a?(Array)
656
611
  errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
@@ -662,68 +617,17 @@ module RDF::Tabular
662
617
  end
663
618
  when :primaryKey
664
619
  # A column reference property that holds either a single reference to a column description object or an array of references.
620
+ "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
665
621
  Array(value).each do |k|
666
- errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c.name == k}
667
- end
668
- when :tables
669
- if value.is_a?(Array) && value.all? {|v| v.is_a?(Table)}
670
- value.each do |t|
671
- begin
672
- t.validate!
673
- rescue Error => e
674
- errors << e.message
675
- end
676
- end
677
- else
678
- errors << "#{type} has invalid property '#{key}': expected array of Tables"
679
- end
680
- when :scriptFormat, :targetFormat
681
- unless RDF::URI(value).valid?
682
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
683
- end
684
- when :source
685
- unless %w(json rdf).include?(value) || value.nil?
686
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected json or rdf"
687
- end
688
- when :tableDirection
689
- unless %w(rtl ltr default).include?(value)
690
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected rtl, ltr, or default"
622
+ errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
691
623
  end
692
- when :tableSchema
693
- if value.is_a?(Schema)
694
- begin
695
- value.validate!
696
- rescue Error => e
697
- errors << e.message
698
- end
699
- else
700
- errors << "#{type} has invalid property '#{key}': expected Schema"
701
- end
702
- when :transformations
703
- if value.is_a?(Array) && value.all? {|v| v.is_a?(Transformation)}
704
- value.each do |t|
705
- begin
706
- t.validate!
707
- rescue Error => e
708
- errors << e.message
709
- end
710
- end
711
- else
712
- errors << "#{type} has invalid property '#{key}': expected array of Transformations"
713
- end
714
- when :titles
715
- valid_natural_language_property?(:titles, value) {|m| errors << m}
716
- when :trim
717
- unless %w(true false 1 0 start end).include?(value.to_s.downcase)
718
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected true, false, 1, 0, start or end"
719
- end
720
- when :url
721
- # Only validate URL in validation mode; this allows for a nil URL
722
- unless @url.valid?
723
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
724
- end
725
- when :@id, :@context
624
+ when :@context
726
625
  # Skip these
626
+ when :@id
627
+ # Must not be a BNode
628
+ if value.to_s.start_with?("_:")
629
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:"
630
+ end
727
631
  when :@type
728
632
  unless value.to_sym == type
729
633
  errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected #{type}"
@@ -734,8 +638,6 @@ module RDF::Tabular
734
638
  rescue Error => e
735
639
  errors << "#{type} has invalid content '#{key}': #{e.message}"
736
640
  end
737
- else
738
- warnings << "#{type} has invalid property '#{key}': unsupported property"
739
641
  end
740
642
  end
741
643
 
@@ -747,41 +649,18 @@ module RDF::Tabular
747
649
  # Determine if a natural language property is valid
748
650
  # @param [String, Array<String>, Hash{String => String}] value
749
651
  # @yield message error message
750
- # @return [Boolean]
751
- def valid_natural_language_property?(key, value)
752
- unless value.is_a?(Hash) && value.all? {|k, v| Array(v).all? {|vv| vv.is_a?(String)}}
753
- yield "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid natural language property" if block_given?
754
- false
755
- end
756
- end
757
-
758
- ##
759
- # Determine if an inherited property is valid
760
- # @param [String, Array<String>, Hash{String => String}] value
761
- # @yield message error message
762
- # @return [Boolean]
763
- def valid_inherited_property?(key, value)
764
- error = case key
765
- when :aboutUrl, :default, :propertyUrl, :valueUrl
766
- "string" unless value.is_a?(String)
767
- when :lang
768
- "valid BCP47 language tag" unless BCP47::Language.identify(value.to_s)
769
- when :null
770
- # To be valid, it must be a string or array
771
- "string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
772
- when :ordered
773
- "boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
774
- when :separator
775
- "single character" unless value.nil? || value.is_a?(String) && value.length == 1
776
- when :textDirection
777
- "rtl or ltr" unless %(rtl ltr).include?(value)
778
- end
779
-
780
- if error
781
- yield "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{error}"
782
- false
652
+ # @return [String, nil]
653
+ def valid_natural_language_property?(value)
654
+ case value
655
+ when String
656
+ when Array
657
+ "a valid natural language property" unless value.all? {|v| v.is_a?(String)}
658
+ when Hash
659
+ "a valid natural language property" if
660
+ value.keys.any? {|k| k.to_s != "und" && !BCP47::Language.identify(k)} ||
661
+ value.values.any? {|v| valid_natural_language_property?(v).is_a?(String)}
783
662
  else
784
- true
663
+ "a valid natural language property"
785
664
  end
786
665
  end
787
666
 
@@ -801,7 +680,6 @@ module RDF::Tabular
801
680
  v = data.join(' ')[1..-1].strip
802
681
  unless v.empty?
803
682
  (self["rdfs:comment"] ||= []) << v
804
- yield RDF::Statement.new(nil, RDF::RDFS.comment, RDF::Literal(v))
805
683
  end
806
684
  skipped += 1
807
685
  next
@@ -895,204 +773,50 @@ module RDF::Tabular
895
773
  object.keys.any? {|k| k.to_s.include?(':')}
896
774
  end
897
775
 
898
- # Merge metadata into this a copy of this metadata
899
- # @param [Array<Metadata>] metadata
900
- # @return [Metadata]
901
- def merge(*metadata)
902
- return self if metadata.empty?
903
- # If the top-level object of any of the metadata files are table descriptions, these are treated as if they were table group descriptions containing a single table description (ie having a single resource property whose value is the same as the original table description).
904
- this = case self
905
- when TableGroup then self.dup
906
- when Table
907
- if self.is_a?(Table) && self.parent
908
- self.parent
909
- else
910
- content = {"@type" => "TableGroup", "tables" => [self]}
911
- content['@context'] = object.delete(:@context) if object[:@context]
912
- ctx = @context
913
- self.remove_instance_variable(:@context) if self.instance_variables.include?(:@context)
914
- tg = TableGroup.new(content, filenames: @filenames, base: base)
915
- @parent = tg # Link from parent
916
- tg
917
- end
918
- else self.dup
919
- end
920
-
921
- # Merge all passed metadata into this
922
- merged = metadata.reduce(this) do |memo, md|
923
- md = case md
924
- when TableGroup then md
925
- when Table
926
- if md.parent
927
- md.parent
928
- else
929
- content = {"@type" => "TableGroup", "tables" => [md]}
930
- ctx = md.context
931
- content['@context'] = md.object.delete(:@context) if md.object[:@context]
932
- md.remove_instance_variable(:@context) if md.instance_variables.include?(:@context)
933
- tg = TableGroup.new(content, filenames: md.filenames, base: md.base)
934
- md.instance_variable_set(:@parent, tg) # Link from parent
935
- tg
936
- end
937
- else
938
- md
939
- end
940
-
941
- raise "Can't merge #{memo.class} with #{md.class}" unless memo.class == md.class
942
-
943
- memo.merge!(md)
944
- end
945
-
946
- # Set @context of merged
947
- merged[:@context] = 'http://www.w3.org/ns/csvw'
948
- merged
949
- end
950
-
951
- # Merge metadata into self
952
- def merge!(metadata)
953
- raise "Merging non-equivalent metadata types: #{self.class} vs #{metadata.class}" unless self.class == metadata.class
954
-
955
- depth do
956
- # Merge filenames
957
- if @filenames || metadata.filenames
958
- @filenames = (Array(@filenames) | Array(metadata.filenames)).uniq
776
+ # Verify that the metadata we're using is compatible with embedded metadata
777
+ # @param [Table] other
778
+ # @raise [Error] if not compatible
779
+ def verify_compatible!(other)
780
+ if self.is_a?(TableGroup)
781
+ unless tables.any? {|t| t.url == other.url && t.verify_compatible!(other)}
782
+ raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
959
783
  end
960
-
961
- # Normalize A (this) and B (metadata) values into normal form
962
- self.normalize!
963
- metadata = metadata.dup.normalize!
964
-
965
- @dialect = nil # So that it is re-built when needed
966
- # Merge each property from metadata into self
967
- metadata.each do |key, value|
968
- case @properties[key]
969
- when :array
970
- # If the property is an array property, the way in which values are merged depends on the property; see the relevant property for this definition.
971
- object[key] = case object[key]
972
- when nil then []
973
- when Hash then [object[key]] # Shouldn't happen if well formed
974
- else object[key]
975
- end
976
-
977
- value = [value] if value.is_a?(Hash)
978
- case key
979
- when :notes
980
- # If the property is notes, the result is an array containing values from A followed by values from B.
981
- a = object[key].is_a?(Array) ? object[key] : [object[key]].compact
982
- b = value.is_a?(Array) ? value : [value]
983
- object[key] = a + b
984
- when :tables
985
- # When an array of table descriptions B is imported into an original array of table descriptions A, each table description within B is combined into the original array A by:
986
- value.each do |tb|
987
- if ta = object[key].detect {|e| e.url == tb.url}
988
- # if there is a table description with the same url in A, the table description from B is imported into the matching table description in A
989
- debug("merge!: tables") {"TA: #{ta.inspect}, TB: #{tb.inspect}"}
990
- ta.merge!(tb)
991
- else
992
- # otherwise, the table description from B is appended to the array of table descriptions A
993
- tb = tb.dup
994
- tb.instance_variable_set(:@parent, self)
995
- debug("merge!: tables") {"add TB: #{tb.inspect}"}
996
- object[key] << tb
997
- end
998
- end
999
- when :transformations
1000
- # SPEC CONFUSION: differing transformations with same @id?
1001
- # When an array of template specifications B is imported into an original array of template specifications A, each template specification within B is combined into the original array A by:
1002
- value.each do |t|
1003
- if ta = object[key].detect {|e| e.targetFormat == t.targetFormat && e.scriptFormat == t.scriptFormat}
1004
- # if there is a template specification with the same targetFormat and scriptFormat in A, the template specification from B is imported into the matching template specification in A
1005
- ta.merge!(t)
1006
- else
1007
- # otherwise, the template specification from B is appended to the array of template specifications A
1008
- t = t.dup
1009
- t.instance_variable_set(:@parent, self) if self
1010
- object[key] << t
1011
- end
1012
- end
1013
- when :columns
1014
- # When an array of column descriptions B is imported into an original array of column descriptions A, each column description within B is combined into the original array A by:
1015
- Array(value).each_with_index do |cb, index|
1016
- ca = object[key][index] || {}
1017
- va = ([ca[:name]] + (ca[:titles] || {}).values.flatten).compact.map(&:downcase)
1018
- vb = ([cb[:name]] + (cb[:titles] || {}).values.flatten).compact.map(&:downcase)
1019
- if !(va & vb).empty?
1020
- debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
1021
- # If there's a non-empty case-insensitive intersection between the name and titles values for the column description at the same index within A and B, the column description from B is imported into the matching column description in A
1022
- ca.merge!(cb)
1023
- elsif ca.nil? && cb.virtual
1024
- debug("merge!: columns") {"index: #{index}, virtual"}
1025
- # otherwise, if at a given index there is no column description within A, but there is a column description within B.
1026
- cb = cb.dup
1027
- cb.instance_variable_set(:@parent, self) if self
1028
- object[key][index] = cb
1029
- else
1030
- debug("merge!: columns") {"index: #{index}, ignore"}
1031
- raise Error, "Columns at same index don't match: #{ca.to_json} vs. #{cb.to_json}"
1032
- end
1033
- end
1034
- # The number of non-virtual columns in A and B MUST be the same
1035
- nA = object[key].reject(&:virtual).length
1036
- nB = Array(value).reject(&:virtual).length
1037
- raise Error, "Columns must have the same number of non-virtual columns" unless nA == nB || nB == 0
1038
- when :foreignKeys
1039
- # When an array of foreign key definitions B is imported into an original array of foreign key definitions A, each foreign key definition within B which does not appear within A is appended to the original array A.
1040
- # SPEC CONFUSION: If definitions vary only a little, they should probably be merged (e.g. common properties).
1041
- object[key] = object[key] + (metadata[key] - object[key])
1042
- end
1043
- when :object
1044
- case key
1045
- when :notes
1046
- # If the property accepts arrays, the result is an array of objects or strings: those from A followed by those from B that were not already a value in A.
1047
- a = object[key] || []
1048
- object[key] = (a + value).uniq
1049
- else
1050
- # if the property only accepts single objects
1051
- if object[key].is_a?(String) || value.is_a?(String)
1052
- # if the value of the property in A is a string or the value from B is a string then the value from A overrides that from B
1053
- object[key] ||= value
1054
- elsif object[key].is_a?(Metadata)
1055
- # otherwise (if both values as objects) the objects are merged as described here
1056
- object[key].merge!(value)
1057
- elsif object[key].is_a?(Hash)
1058
- # otherwise (if both values as objects) the objects are merged as described here
1059
- object[key].merge!(value)
1060
- else
1061
- value = value.dup
1062
- value.instance_variable_set(:@parent, self) if self
1063
- object[key] = value
1064
- end
1065
- end
1066
- when :natural_language
1067
- # If the property is a natural language property, the result is an object whose properties are language codes and where the values of those properties are arrays. The suitable language code for the values is either explicit within the existing value or determined through the default language in the metadata document; if it can't be determined the language code und should be used. The arrays should provide the values from A followed by those from B that were not already a value in A.
1068
- a = object[key] || {}
1069
- b = value
1070
- debug("merge!: natural_language") {
1071
- "A: #{a.inspect}, B: #{b.inspect}"
1072
- }
1073
- b.each do |k, v|
1074
- a[k] = Array(a[k]) + (Array(b[k]) - Array(a[k]))
1075
- end
1076
- # eliminate titles with no language where the same string exists with a language
1077
- if a.has_key?("und")
1078
- a["und"] = a["und"].reject do |v|
1079
- a.any? {|lang, values| lang != 'und' && values.include?(v)}
1080
- end
1081
- a.delete("und") if a["und"].empty?
1082
- end
1083
- object[key] = a
1084
- when ->(k) {key == :@id}
1085
- object[key] ||= value
1086
- @id ||= metadata.id
1087
- else
1088
- # Otherwise, the value from A overrides that from B
1089
- object[key] ||= value
1090
- end
784
+ else
785
+ # Tables must have the same url
786
+ raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}" unless
787
+ url == other.url
788
+
789
+ # Each column description within B MUST match the corresponding column description in A for non-virtual columns
790
+ non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
791
+ object_columns = Array(other.tableSchema.columns)
792
+
793
+ # Special case, if there is no header, then there are no column definitions, allow this as being compatile
794
+ raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}" if
795
+ non_virtual_columns.length != object_columns.length && !object_columns.empty?
796
+ index = 0
797
+ object_columns.all? do |cb|
798
+ ca = non_virtual_columns[index]
799
+ va = ([ca[:name]] + case ca[:titles]
800
+ when String then [ca[:titles]]
801
+ when Array then ca[:titles]
802
+ when Hash then ca[:titles].values.flatten
803
+ else []
804
+ end).compact.map(&:downcase)
805
+
806
+ vb = ([cb[:name]] + case cb[:titles]
807
+ when String then [cb[:titles]]
808
+ when Array then cb[:titles]
809
+ when Hash then cb[:titles].values.flatten
810
+ else []
811
+ end).compact.map(&:downcase)
812
+
813
+ # If there's a non-empty case-insensitive intersection between the name and titles values for the column description at the same index within A and B, the column description in B is compatible with the matching column description in A
814
+ raise Error, "Columns don't match: va: #{va}, vb: #{vb}" if (va & vb).empty?
815
+ debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
816
+ index += 1
1091
817
  end
1092
818
  end
1093
-
1094
- debug("merge!") {self.inspect}
1095
- self
819
+ true
1096
820
  end
1097
821
 
1098
822
  def inspect
@@ -1176,7 +900,8 @@ module RDF::Tabular
1176
900
  elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
1177
901
  raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
1178
902
  elsif value['@language'] && !BCP47::Language.identify(value['@language'])
1179
- raise Error, "Value object with @language must use valid language: #{value.to_json}"
903
+ warn "Value object with @language must use valid language: #{value.to_json}" if @warnings
904
+ value.delete('@language')
1180
905
  elsif value['@type'] && !context.expand_iri(value['@type'], vocab: true).absolute?
1181
906
  raise Error, "Value object with @type must defined type: #{value.to_json}"
1182
907
  end
@@ -1209,15 +934,48 @@ module RDF::Tabular
1209
934
  end
1210
935
  protected
1211
936
 
937
+ # Add a warning on this object
938
+ def warn(string)
939
+ debug("warn: #{string}")
940
+ (@warnings ||= []) << string
941
+ end
942
+
1212
943
  # When setting a natural language property, always put in language-map form
1213
- # @param [Symbol] prop
1214
944
  # @param [Hash{String => String, Array<String>}, Array<String>, String] value
1215
945
  # @return [Hash{String => Array<String>}]
1216
- def set_nl(prop, value)
1217
- object[prop] = case value
1218
- when String then {(context.default_language || 'und') => [value]}
1219
- when Array then {(context.default_language || 'und') => value}
1220
- else value
946
+ def set_nl(value)
947
+ case value
948
+ when String then value
949
+ when Array then value.select {|v| v.is_a?(String)}
950
+ when Hash
951
+ value.delete_if {|k, v| !BCP47::Language.identify(k)}
952
+ value.each do |k, v|
953
+ value[k] = Array(v).select {|vv| vv.is_a?(String)}
954
+ end
955
+ else nil
956
+ end
957
+ end
958
+
959
+ # General setter for array properties
960
+ def set_array_value(key, value, klass, options={})
961
+ object[key] = case value
962
+ when Array
963
+ value.map do |v|
964
+ case v
965
+ when Hash
966
+ klass.new(v, @options.merge(options).merge(parent: self, context: nil))
967
+ else v
968
+ end
969
+ end
970
+ else
971
+ warn "#{type} has invalid property '#{key}': expected array of #{klass}"
972
+ []
973
+ end
974
+
975
+ unless object[key].all? {|v| v.is_a?(klass)}
976
+ warn "#{type} has invalid property '#{key}': expected array of #{klass}"
977
+ # Remove elements that aren't of the right types
978
+ object[key] = object[key].select! {|v| v.is_a?(klass)}
1221
979
  end
1222
980
  end
1223
981
 
@@ -1228,6 +986,10 @@ module RDF::Tabular
1228
986
  end
1229
987
  end
1230
988
 
989
+ def default_value(prop)
990
+ self.class.const_get(:DEFAULTS).merge(INHERITED_DEFAULTS)[prop]
991
+ end
992
+
1231
993
  ##
1232
994
  # Get the root metadata object
1233
995
  # @return [TableGroup, Table]
@@ -1268,17 +1030,27 @@ module RDF::Tabular
1268
1030
  dialect: :object,
1269
1031
  transformations: :array,
1270
1032
  }.freeze
1271
- REQUIRED = [].freeze
1033
+ DEFAULTS = {
1034
+ tableDirection: "default".freeze,
1035
+ }.freeze
1036
+ REQUIRED = [:tables].freeze
1272
1037
 
1273
1038
  # Setters
1274
- PROPERTIES.each do |a, type|
1275
- next if a == :dialect
1276
- define_method("#{a}=".to_sym) do |value|
1277
- case type
1278
- when :natural_language
1279
- set_nl(a, value)
1039
+ PROPERTIES.each do |key, type|
1040
+ next if [:tables, :tableSchema, :dialect, :transformations].include?(key)
1041
+ define_method("#{key}=".to_sym) do |value|
1042
+ invalid = case key
1043
+ when :tableDirection
1044
+ "rtl, ltr, or default" unless %(rtl ltr default).include?(value)
1045
+ when :notes, :tables, :tableSchema, :dialect, :transformations
1046
+ # We handle this through a separate setters
1047
+ end
1048
+
1049
+ if invalid
1050
+ warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1051
+ object[key] = default_value(key) unless default_value(key).nil?
1280
1052
  else
1281
- object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1053
+ object[key] = value
1282
1054
  end
1283
1055
  end
1284
1056
  end
@@ -1325,11 +1097,14 @@ module RDF::Tabular
1325
1097
 
1326
1098
  # Return Annotated Table Group representation
1327
1099
  def to_atd
1328
- {
1329
- "@id" => id,
1100
+ object.inject({
1101
+ "@id" => (id.to_s if id),
1330
1102
  "@type" => "AnnotatedTableGroup",
1331
- "tables" => tables.map(&:to_atd)
1332
- }
1103
+ "tables" => []
1104
+ }) do |memo, (k, v)|
1105
+ memo[k.to_s] ||= v
1106
+ memo
1107
+ end.delete_if {|k,v| v.nil? || v.is_a?(Metadata) || k.to_s == "@context"}
1333
1108
  end
1334
1109
  end
1335
1110
 
@@ -1345,17 +1120,37 @@ module RDF::Tabular
1345
1120
  transformations: :array,
1346
1121
  url: :link,
1347
1122
  }.freeze
1123
+ DEFAULTS = {
1124
+ suppressOutput: false,
1125
+ tableDirection: "default".freeze,
1126
+ }.freeze
1348
1127
  REQUIRED = [:url].freeze
1349
1128
 
1350
1129
  # Setters
1351
- PROPERTIES.each do |a, type|
1352
- next if a == :dialect
1353
- define_method("#{a}=".to_sym) do |value|
1354
- case type
1355
- when :natural_language
1356
- set_nl(a, value)
1130
+ PROPERTIES.each do |key, type|
1131
+ next if [:tableSchema, :dialect, :transformations].include?(key)
1132
+ define_method("#{key}=".to_sym) do |value|
1133
+ invalid = case key
1134
+ when :suppressOutput
1135
+ "boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
1136
+ when :tableDirection
1137
+ "rtl, ltr, or default" unless %(rtl ltr default).include?(value)
1138
+ when :url
1139
+ "valid URL" unless value.is_a?(String) && base.join(value).valid?
1140
+ when :notes, :tableSchema, :dialect, :transformations
1141
+ # We handle this through a separate setters
1142
+ end
1143
+
1144
+ if invalid
1145
+ warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1146
+ object[key] = default_value(key) unless default_value(key).nil?
1147
+ elsif key == :url
1148
+ # URL of CSV relative to metadata
1149
+ object[:url] = value
1150
+ @url = base.join(value)
1151
+ @context.base = @url if @context # Use as base for expanding IRIs
1357
1152
  else
1358
- object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1153
+ object[key] = value
1359
1154
  end
1360
1155
  end
1361
1156
  end
@@ -1366,15 +1161,29 @@ module RDF::Tabular
1366
1161
  super || tableSchema && tableSchema.has_annotations?
1367
1162
  end
1368
1163
 
1164
+ # Return a new TableGroup based on this Table
1165
+ def to_table_group
1166
+ content = {"@type" => "TableGroup", "tables" => [self]}
1167
+ content['@context'] = object.delete(:@context) if object[:@context]
1168
+ ctx = @context
1169
+ self.remove_instance_variable(:@context) if self.instance_variables.include?(:@context)
1170
+ tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
1171
+ @parent = tg # Link from parent
1172
+ tg
1173
+ end
1174
+
1369
1175
  # Return Annotated Table representation
1370
1176
  def to_atd
1371
- {
1372
- "@id" => id,
1177
+ object.inject({
1178
+ "@id" => (id.to_s if id),
1373
1179
  "@type" => "AnnotatedTable",
1180
+ "url" => self.url.to_s,
1374
1181
  "columns" => tableSchema.columns.map(&:to_atd),
1375
- "rows" => [],
1376
- "url" => self.url.to_s
1377
- }
1182
+ "rows" => []
1183
+ }) do |memo, (k, v)|
1184
+ memo[k.to_s] ||= v
1185
+ memo
1186
+ end.delete_if {|k,v| v.nil? || v.is_a?(Metadata) || k.to_s == "@context"}
1378
1187
  end
1379
1188
 
1380
1189
  # Logic for accessing elements as accessors
@@ -1387,36 +1196,6 @@ module RDF::Tabular
1387
1196
  end
1388
1197
  end
1389
1198
 
1390
- class Transformation < Metadata
1391
- PROPERTIES = {
1392
- :@id => :link,
1393
- :@type => :atomic,
1394
- source: :atomic,
1395
- targetFormat: :link,
1396
- scriptFormat: :link,
1397
- titles: :natural_language,
1398
- url: :link,
1399
- }.freeze
1400
- REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
1401
-
1402
- # Setters
1403
- PROPERTIES.each do |a, type|
1404
- define_method("#{a}=".to_sym) do |value|
1405
- case type
1406
- when :natural_language
1407
- set_nl(a, value)
1408
- else
1409
- object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1410
- end
1411
- end
1412
- end
1413
-
1414
- # Logic for accessing elements as accessors
1415
- def method_missing(method, *args)
1416
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1417
- end
1418
- end
1419
-
1420
1199
  class Schema < Metadata
1421
1200
  PROPERTIES = {
1422
1201
  :@id => :link,
@@ -1425,17 +1204,67 @@ module RDF::Tabular
1425
1204
  foreignKeys: :array,
1426
1205
  primaryKey: :column_reference,
1427
1206
  }.freeze
1207
+ DEFAULTS = {}.freeze
1428
1208
  REQUIRED = [].freeze
1429
1209
 
1430
1210
  # Setters
1431
- PROPERTIES.each do |a, type|
1432
- define_method("#{a}=".to_sym) do |value|
1433
- case type
1434
- when :natural_language
1435
- set_nl(a, value)
1211
+ PROPERTIES.each do |key, type|
1212
+ define_method("#{key}=".to_sym) do |value|
1213
+ invalid = case key
1214
+ when :primaryKey
1215
+ "string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
1216
+ end
1217
+
1218
+ if invalid
1219
+ warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1220
+ object[key] = default_value(key) unless default_value(key).nil?
1436
1221
  else
1437
- object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1222
+ object[key] = value
1223
+ end
1224
+ end
1225
+ end
1226
+
1227
+ def columns=(value)
1228
+ object[:columns] = case value
1229
+ when Array
1230
+ number = 0
1231
+ value.map do |v|
1232
+ number += 1
1233
+ case v
1234
+ when Hash
1235
+ Column.new(v, @options.merge(
1236
+ table: (parent if parent.is_a?(Table)),
1237
+ parent: self,
1238
+ context: nil,
1239
+ number: number))
1240
+ else
1241
+ v
1242
+ end
1438
1243
  end
1244
+ else
1245
+ warn "#{type} has invalid property 'columns': expected array of Column"
1246
+ []
1247
+ end
1248
+
1249
+ unless object[:columns].all? {|v| v.is_a?(Column)}
1250
+ warn "#{type} has invalid property 'columns': expected array of Column"
1251
+ # Remove elements that aren't of the right types
1252
+ object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
1253
+ end
1254
+ end
1255
+
1256
+ def foreignKeys=(value)
1257
+ object[:foreignKeys] = case value
1258
+ when Array then value
1259
+ else
1260
+ warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1261
+ []
1262
+ end
1263
+
1264
+ unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
1265
+ warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1266
+ # Remove elements that aren't of the right types
1267
+ object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
1439
1268
  end
1440
1269
  end
1441
1270
 
@@ -1456,9 +1285,12 @@ module RDF::Tabular
1456
1285
  name: :atomic,
1457
1286
  suppressOutput: :atomic,
1458
1287
  titles: :natural_language,
1459
- required: :atomic,
1460
1288
  virtual: :atomic,
1461
1289
  }.freeze
1290
+ DEFAULTS = {
1291
+ suppressOutput: false,
1292
+ virtual: false,
1293
+ }.freeze
1462
1294
  REQUIRED = [].freeze
1463
1295
 
1464
1296
  ##
@@ -1488,20 +1320,33 @@ module RDF::Tabular
1488
1320
  end
1489
1321
 
1490
1322
  # Setters
1491
- PROPERTIES.each do |a, type|
1492
- define_method("#{a}=".to_sym) do |value|
1493
- case type
1494
- when :natural_language
1495
- set_nl(a, value)
1323
+ PROPERTIES.each do |key, t|
1324
+ define_method("#{key}=".to_sym) do |value|
1325
+ invalid = case key
1326
+ when :name
1327
+ "proper name format" unless value.is_a?(String) && value.match(NAME_SYNTAX)
1328
+ when :suppressOutput, :virtual
1329
+ "boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
1330
+ when :titles
1331
+ valid_natural_language_property?(value)
1332
+ end
1333
+
1334
+ if invalid && key == :titles
1335
+ warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1336
+ object[key] = set_nl(value)
1337
+ object.delete(key) if object[key].nil?
1338
+ elsif invalid
1339
+ warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1340
+ object[key] = default_value(key) unless default_value(key).nil?
1496
1341
  else
1497
- object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1342
+ object[key] = value
1498
1343
  end
1499
1344
  end
1500
1345
  end
1501
1346
 
1502
1347
  # Return or create a name for the column from titles, if it exists
1503
1348
  def name
1504
- object[:name] ||= if titles && (ts = titles[context.default_language || 'und'])
1349
+ self[:name] || if titles && (ts = titles[context.default_language || 'und'])
1505
1350
  n = Array(ts).first
1506
1351
  n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
1507
1352
  n1 = URI.encode(n[1..-1], /[^\w\.]/)
@@ -1518,17 +1363,20 @@ module RDF::Tabular
1518
1363
 
1519
1364
  # Return Annotated Column representation
1520
1365
  def to_atd
1521
- {
1522
- "@id" => id,
1366
+ object.inject({
1367
+ "@id" => id.to_s,
1523
1368
  "@type" => "Column",
1524
- "table" => (table.id if table),
1369
+ "table" => (table.id.to_s if table.id),
1525
1370
  "number" => self.number,
1526
1371
  "sourceNumber" => self.sourceNumber,
1527
1372
  "cells" => [],
1528
1373
  "virtual" => self.virtual,
1529
1374
  "name" => self.name,
1530
1375
  "titles" => self.titles
1531
- }
1376
+ }) do |memo, (k, v)|
1377
+ memo[k.to_s] ||= v
1378
+ memo
1379
+ end.delete_if {|k,v| v.nil?}
1532
1380
  end
1533
1381
 
1534
1382
  # Logic for accessing elements as accessors
@@ -1541,17 +1389,55 @@ module RDF::Tabular
1541
1389
  end
1542
1390
  end
1543
1391
 
1392
+ class Transformation < Metadata
1393
+ PROPERTIES = {
1394
+ :@id => :link,
1395
+ :@type => :atomic,
1396
+ source: :atomic,
1397
+ targetFormat: :link,
1398
+ scriptFormat: :link,
1399
+ titles: :natural_language,
1400
+ url: :link,
1401
+ }.freeze
1402
+ DEFAULTS = {}.freeze
1403
+ REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
1404
+
1405
+ # Setters
1406
+ PROPERTIES.each do |key, type|
1407
+ define_method("#{key}=".to_sym) do |value|
1408
+ invalid = case key
1409
+ when :scriptFormat, :targetFormat
1410
+ "valid absolute URL" unless RDF::URI(value).valid?
1411
+ when :source
1412
+ "json or rdf" unless %w(json rdf).include?(value) || value.nil?
1413
+ end
1414
+
1415
+ if invalid
1416
+ warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1417
+ object[key] = default_value(key) unless default_value(key).nil?
1418
+ else
1419
+ object[key] = value
1420
+ end
1421
+ end
1422
+ end
1423
+
1424
+ # Logic for accessing elements as accessors
1425
+ def method_missing(method, *args)
1426
+ PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1427
+ end
1428
+ end
1429
+
1544
1430
  class Dialect < Metadata
1545
1431
  # Defaults for dialects
1546
- DIALECT_DEFAULTS = {
1547
- commentPrefix: nil,
1432
+ DEFAULTS = {
1433
+ commentPrefix: "#".freeze,
1548
1434
  delimiter: ",".freeze,
1549
1435
  doubleQuote: true,
1550
1436
  encoding: "utf-8".freeze,
1551
1437
  header: true,
1552
1438
  headerRowCount: 1,
1553
1439
  lineTerminators: :auto,
1554
- quoteChar: '"',
1440
+ quoteChar: '"'.freeze,
1555
1441
  skipBlankRows: false,
1556
1442
  skipColumns: 0,
1557
1443
  skipInitialSpace: false,
@@ -1580,9 +1466,35 @@ module RDF::Tabular
1580
1466
  REQUIRED = [].freeze
1581
1467
 
1582
1468
  # Setters
1583
- PROPERTIES.keys.each do |a|
1584
- define_method("#{a}=".to_sym) do |value|
1585
- object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1469
+ PROPERTIES.keys.each do |key|
1470
+ define_method("#{key}=".to_sym) do |value|
1471
+ invalid = case key
1472
+ when :commentPrefix, :delimiter, :quoteChar
1473
+ "a single character string" unless value.is_a?(String) && value.length == 1
1474
+ when :lineTerminators
1475
+ "a string" unless value.is_a?(String)
1476
+ when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
1477
+ "boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
1478
+ when :encoding
1479
+ "a valid encoding" unless (Encoding.find(value) rescue false)
1480
+ when :headerRowCount, :skipColumns, :skipRows
1481
+ "a non-negative integer" unless value.is_a?(Numeric) && value.integer? && value >= 0
1482
+ when :trim
1483
+ "true, false, start or end" unless %w(true false start end).include?(value.to_s.downcase)
1484
+ when :titles
1485
+ valid_natural_language_property?(value)
1486
+ end
1487
+
1488
+ if invalid && key == :titles
1489
+ warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1490
+ object[key] = set_nl(value)
1491
+ object.delete(key) if object[key].nil?
1492
+ elsif invalid
1493
+ warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1494
+ object[key] = default_value(key) unless default_value(key).nil?
1495
+ else
1496
+ object[key] = value
1497
+ end
1586
1498
  end
1587
1499
  end
1588
1500
 
@@ -1608,19 +1520,22 @@ module RDF::Tabular
1608
1520
  # Extract a new Metadata document from the file or data provided
1609
1521
  #
1610
1522
  # @param [#read, #to_s] input IO, or file path or URL
1523
+ # @param [Table] metadata used for saving annotations created while extracting metadata
1611
1524
  # @param [Hash{Symbol => Object}] options
1612
1525
  # any additional options (see `RDF::Util::File.open_file`)
1526
+ # @option options [String] :lang, language to set in table, if any
1613
1527
  # @return [Metadata] Tabular metadata
1614
1528
  # @see http://w3c.github.io/csvw/syntax/#parsing
1615
- def embedded_metadata(input, options = {})
1529
+ def embedded_metadata(input, metadata, options = {})
1616
1530
  options = options.dup
1617
1531
  options.delete(:context) # Don't accidentally use a passed context
1618
1532
  # Normalize input to an IO object
1619
1533
  if input.is_a?(String)
1620
- return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, options.merge(base: input.to_s))}
1534
+ return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, options.merge(base: input.to_s))}
1621
1535
  end
1622
1536
 
1623
1537
  table = {
1538
+ "@context" => "http://www.w3.org/ns/csvw",
1624
1539
  "url" => (options.fetch(:base, "")),
1625
1540
  "@type" => "Table",
1626
1541
  "tableSchema" => {
@@ -1628,6 +1543,8 @@ module RDF::Tabular
1628
1543
  "columns" => []
1629
1544
  }
1630
1545
  }
1546
+ metadata ||= table # In case the embedded metadata becomes the final metadata
1547
+ metadata["lang"] = options[:lang] if options[:lang]
1631
1548
 
1632
1549
  # Set encoding on input
1633
1550
  csv = ::CSV.new(input, csv_options)
@@ -1638,7 +1555,7 @@ module RDF::Tabular
1638
1555
  value.rstrip! if %w(true end).include?(trim.to_s)
1639
1556
 
1640
1557
  value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
1641
- (table["rdfs:comment"] ||= []) << value unless value.empty?
1558
+ (metadata["rdfs:comment"] ||= []) << value unless value.empty?
1642
1559
  end
1643
1560
  debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
1644
1561
 
@@ -1669,9 +1586,9 @@ module RDF::Tabular
1669
1586
 
1670
1587
  # Logic for accessing elements as accessors
1671
1588
  def method_missing(method, *args)
1672
- if DIALECT_DEFAULTS.has_key?(method.to_sym)
1589
+ if DEFAULTS.has_key?(method.to_sym)
1673
1590
  # As set, or with default
1674
- object.fetch(method.to_sym, DIALECT_DEFAULTS[method.to_sym])
1591
+ object.fetch(method.to_sym, DEFAULTS[method.to_sym])
1675
1592
  else
1676
1593
  super
1677
1594
  end
@@ -1691,19 +1608,46 @@ module RDF::Tabular
1691
1608
  maxInclusive: :atomic,
1692
1609
  minExclusive: :atomic,
1693
1610
  maxExclusive: :atomic,
1694
- decimalChar: :atomic,
1695
- groupChar: :atomic,
1696
- pattern: :atomic,
1697
1611
  }.freeze
1698
1612
  REQUIRED = [].freeze
1613
+ DEFAULTS = {}.freeze
1699
1614
 
1700
1615
  # Override `base` in Metadata
1701
1616
  def base; object[:base]; end
1702
1617
 
1703
1618
  # Setters
1704
- PROPERTIES.each do |a, type|
1705
- define_method("#{a}=".to_sym) do |value|
1706
- object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1619
+ PROPERTIES.each do |key, type|
1620
+ define_method("#{key}=".to_sym) do |value|
1621
+ invalid = case key
1622
+ when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
1623
+ "numeric or valid date/time" unless value.is_a?(Numeric) ||
1624
+ RDF::Literal::Date.new(value.to_s).valid? ||
1625
+ RDF::Literal::Time.new(value.to_s).valid? ||
1626
+ RDF::Literal::DateTime.new(value.to_s).valid?
1627
+ when :format
1628
+ unless value.is_a?(String)
1629
+ warn "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
1630
+ if default_value(key).nil?
1631
+ object.delete(key)
1632
+ else
1633
+ object[key] = default_value(key)
1634
+ end
1635
+ end
1636
+ when :length, :minLength, :maxLength
1637
+ if !(value.is_a?(Numeric) && value.integer? && value >= 0)
1638
+ "a non-negative integer"
1639
+ elsif key != :length && object[:length] && value != object[:length]
1640
+ # Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
1641
+ "both length and #{key} requires they be equal"
1642
+ end
1643
+ end
1644
+
1645
+ if invalid
1646
+ warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1647
+ object[key] = default_value(key) unless default_value(key).nil?
1648
+ else
1649
+ object[key] = value
1650
+ end
1707
1651
  end
1708
1652
  end
1709
1653
 
@@ -1741,14 +1685,15 @@ module RDF::Tabular
1741
1685
  # Return Annotated Cell representation
1742
1686
  def to_atd
1743
1687
  {
1744
- "@id" => self.id,
1688
+ "@id" => id.to_s,
1745
1689
  "@type" => "Cell",
1746
- "column" => column.id,
1747
- "row" => row.id,
1690
+ "column" => column.id.to_s,
1691
+ "row" => row.id.to_s,
1748
1692
  "stringValue" => self.stringValue,
1749
- "value" => self.value,
1693
+ "table" => (table.id.to_s if table.id),
1694
+ "value" => table.context.expand_value(nil, self.value),
1750
1695
  "errors" => self.errors
1751
- }
1696
+ }.delete_if {|k,v| Array(v).empty?}
1752
1697
  end
1753
1698
  end
1754
1699
 
@@ -1794,9 +1739,15 @@ module RDF::Tabular
1794
1739
  map_values = {"_row" => number, "_sourceRow" => source_number}
1795
1740
 
1796
1741
  columns = metadata.tableSchema.columns ||= []
1742
+ non_virtual_columns = columns.reject(&:virtual)
1743
+
1744
+ if row.length < non_virtual_columns.length
1745
+ raise Error, "Row #{source_number} has #{row.length} columns, expected #{non_virtual_columns.length}"
1746
+ end
1797
1747
 
1798
1748
  # Make sure that the row length is at least as long as the number of column definitions, to implicitly include virtual columns
1799
1749
  columns.each_with_index {|c, index| row[index] ||= (c.null || '')}
1750
+
1800
1751
  row.each_with_index do |value, index|
1801
1752
 
1802
1753
  next if index < skipColumns
@@ -1811,7 +1762,7 @@ module RDF::Tabular
1811
1762
 
1812
1763
  @values << cell = Cell.new(metadata, column, self, value)
1813
1764
 
1814
- datatype = column.datatype || Datatype.new(base: "string")
1765
+ datatype = column.datatype || Datatype.new(base: "string", parent: column)
1815
1766
  value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
1816
1767
  value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
1817
1768
  # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
@@ -1848,7 +1799,7 @@ module RDF::Tabular
1848
1799
  cell.errors = cell_errors
1849
1800
  metadata.send(:debug, "#{self.number}: each_cell ##{self.sourceNumber},#{cell.column.sourceNumber}", cell.errors.join("\n")) unless cell_errors.empty?
1850
1801
 
1851
- map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s)
1802
+ map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s)
1852
1803
  end
1853
1804
 
1854
1805
  # Map URLs for row
@@ -1869,13 +1820,13 @@ module RDF::Tabular
1869
1820
  # Return Annotated Row representation
1870
1821
  def to_atd
1871
1822
  {
1872
- "@id" => self.id,
1823
+ "@id" => id.to_s,
1873
1824
  "@type" => "Row",
1874
- "table" => table.id,
1825
+ "table" => (table.id.to_s if table.id),
1875
1826
  "number" => self.number,
1876
1827
  "sourceNumber" => self.sourceNumber,
1877
1828
  "cells" => @values.map(&:to_atd)
1878
- }
1829
+ }.delete_if {|k,v| v.nil?}
1879
1830
  end
1880
1831
 
1881
1832
  private
@@ -1905,17 +1856,18 @@ module RDF::Tabular
1905
1856
  :nonPositiveInteger, :negativeInteger,
1906
1857
  :double, :float, :number
1907
1858
  # Normalize representation based on numeric-specific facets
1908
- groupChar = datatype.groupChar || ','
1909
- if datatype.pattern && !value.match(Regexp.new(datatype.pattern))
1859
+ format ||= {}
1860
+ groupChar = format[:groupChar] || ','
1861
+ if format[:pattern] && !value.match(Regexp.new(format[:pattern]))
1910
1862
  # pattern facet failed
1911
- value_errors << "#{value} does not match pattern #{datatype.pattern}"
1863
+ value_errors << "#{value} does not match pattern #{format[:pattern]}"
1912
1864
  end
1913
1865
  if value.include?(groupChar*2)
1914
1866
  # pattern facet failed
1915
1867
  value_errors << "#{value} has repeating #{groupChar.inspect}"
1916
1868
  end
1917
1869
  value.gsub!(groupChar, '')
1918
- value.sub!(datatype.decimalChar, '.') if datatype.decimalChar
1870
+ value.sub!(format[:decimalChar], '.') if format[:decimalChar]
1919
1871
 
1920
1872
  # Extract percent or per-mille sign
1921
1873
  percent = permille = false
@@ -2021,8 +1973,8 @@ module RDF::Tabular
2021
1973
  tz_part = value if tz
2022
1974
 
2023
1975
  # Compose normalized value
2024
- vd = ("%04d-%02d-%02d" % [date_part[:yr], date_part[:mo], date_part[:da]]) if date_part
2025
- vt = ("%02d:%02d:%02d" % [time_part[:hr], time_part[:mi], time_part[:se].to_i]) if time_part
1976
+ vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
1977
+ vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
2026
1978
  value = [vd, vt].compact.join('T')
2027
1979
  value += tz_part.to_s
2028
1980
  end