rdf-tabular 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -180,9 +180,10 @@ module RDF::Tabular
180
180
  # Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
181
181
  metadata = case
182
182
  when user_metadata && found_metadata then user_metadata.merge(found_metadata)
183
- when user_metadata then user_metadata
184
- when found_metadata then found_metadata
185
- else TableGroup.new({resources: [{url: base}]}, options)
183
+ when user_metadata then user_metadata
184
+ when found_metadata then found_metadata
185
+ when base then TableGroup.new({tables: [{url: base}]}, options)
186
+ else TableGroup.new({tables: []}, options)
186
187
  end
187
188
 
188
189
  # Make TableGroup, if not already
@@ -223,13 +224,13 @@ module RDF::Tabular
223
224
  # Figure out type by site
224
225
  object_keys = object.keys.map(&:to_s)
225
226
  type ||= case
226
- when %w(resources).any? {|k| object_keys.include?(k)} then :TableGroup
227
+ when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
227
228
  when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
228
229
  when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
229
230
  when %w(columns primaryKey foreignKeys urlTemplate).any? {|k| object_keys.include?(k)} then :Schema
230
231
  when %w(name required).any? {|k| object_keys.include?(k)} then :Column
231
- when %w(commentPrefix delimiter doubleQuote encoding header headerColumnCount headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
232
- when %w(lineTerminator quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
232
+ when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
233
+ when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
233
234
  end
234
235
 
235
236
  case type.to_s.to_sym
@@ -312,13 +313,7 @@ module RDF::Tabular
312
313
  value
313
314
  end
314
315
  when :datatype
315
- # If in object form, normalize keys to symbols
316
- object[key] = case value
317
- when Hash
318
- value.inject({}) {|memo, (k,v)| memo[k.to_sym] = v; memo}
319
- else
320
- value
321
- end
316
+ self.datatype = value
322
317
  when :dialect
323
318
  # If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
324
319
  object[key] = case value
@@ -329,7 +324,7 @@ module RDF::Tabular
329
324
  value
330
325
  end
331
326
  @type ||= :Table
332
- when :resources
327
+ when :tables
333
328
  # An array of table descriptions for the tables in the group.
334
329
  object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
335
330
  value.map {|v| Table.new(v, @options.merge(parent: self, context: nil))}
@@ -437,6 +432,15 @@ module RDF::Tabular
437
432
  end
438
433
  end
439
434
 
435
+ # Set new datatype
436
+ # @return [Dialect]
437
+ def datatype=(value)
438
+ object[:datatype] = case value
439
+ when Hash then Datatype.new(value)
440
+ else Datatype.new({base: value})
441
+ end
442
+ end
443
+
440
444
  # Type of this Metadata
441
445
  # @return [:TableGroup, :Table, :Transformation, :Schema, :Column]
442
446
  def type; self.class.name.split('::').last.to_sym; end
@@ -463,13 +467,25 @@ module RDF::Tabular
463
467
  e.message.split("\n")
464
468
  end
465
469
 
470
+ ##
471
+ # Validation warnings, available only after validating or finding warnings
472
+ # @return [Array<String>]
473
+ def warnings
474
+ ((@warnings || []) + object.
475
+ values.
476
+ flatten.
477
+ select {|v| v.is_a?(Metadata)}.
478
+ map(&:warnings).
479
+ flatten).compact
480
+ end
481
+
466
482
  ##
467
483
  # Validate metadata, raising an error containing all errors detected during validation
468
484
  # @raise [Error] Raise error if metadata has any unexpected properties
469
485
  # @return [self]
470
486
  def validate!
471
487
  expected_props, required_props = @properties.keys, @required
472
- errors = []
488
+ errors, @warnings = [], []
473
489
 
474
490
  unless is_a?(Dialect) || is_a?(Transformation)
475
491
  expected_props = expected_props + INHERITED_PROPERTIES.keys
@@ -478,7 +494,7 @@ module RDF::Tabular
478
494
  # It has only expected properties (exclude metadata)
479
495
  check_keys = object.keys - [:"@id", :"@context"]
480
496
  check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
481
- errors << "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
497
+ @warnings << "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
482
498
 
483
499
  # It has required properties
484
500
  errors << "#{type} missing required keys: #{(required_props & check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
@@ -487,8 +503,10 @@ module RDF::Tabular
487
503
  object.keys.each do |key|
488
504
  value = object[key]
489
505
  case key
490
- when :aboutUrl, :datatype, :default, :lang, :null, :ordered, :propertyUrl, :separator, :textDirection, :valueUrl
491
- valid_inherited_property?(key, value) {|m| errors << m}
506
+ when :aboutUrl, :default, :lang, :null, :ordered, :propertyUrl, :separator, :textDirection, :valueUrl
507
+ valid_inherited_property?(key, value) do |m|
508
+ @warnings << m
509
+ end
492
510
  when :columns
493
511
  if value.is_a?(Array) && value.all? {|v| v.is_a?(Column)}
494
512
  value.each do |v|
@@ -505,11 +523,24 @@ module RDF::Tabular
505
523
  end
506
524
  when :commentPrefix, :delimiter, :quoteChar
507
525
  unless value.is_a?(String) && value.length == 1
508
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a single character string"
526
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a single character string"
527
+ object[key] = Dialect::DIALECT_DEFAULTS[key]
509
528
  end
510
- when :format, :lineTerminator, :uriTemplate
529
+ when :lineTerminators
511
530
  unless value.is_a?(String)
512
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
531
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
532
+ object[key] = Dialect::DIALECT_DEFAULTS[key]
533
+ end
534
+ when :datatype
535
+ if value.is_a?(Datatype)
536
+ begin
537
+ value.validate!
538
+ rescue Error => e
539
+ errors << e.message
540
+ end
541
+ else
542
+ @warnings << "#{type} has invalid property '#{key}': expected a Datatype"
543
+ value = object[key] = nil
513
544
  end
514
545
  when :dialect
515
546
  unless value.is_a?(Dialect)
@@ -520,13 +551,19 @@ module RDF::Tabular
520
551
  rescue Error => e
521
552
  errors << e.message
522
553
  end
523
- when :doubleQuote, :header, :required, :skipInitialSpace, :skipBlankRows, :suppressOutput, :virtual
554
+ when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
524
555
  unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
525
- errors << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
556
+ @warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
557
+ object[key] = Dialect::DIALECT_DEFAULTS[key]
558
+ end
559
+ when :required, :suppressOutput, :virtual
560
+ unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
561
+ @warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
562
+ object.delete(key)
526
563
  end
527
564
  when :encoding
528
565
  unless (Encoding.find(value) rescue false)
529
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid encoding"
566
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid encoding"
530
567
  end
531
568
  when :foreignKeys
532
569
  # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
@@ -549,13 +586,13 @@ module RDF::Tabular
549
586
  end
550
587
  # resource is the URL of a Table in the TableGroup
551
588
  ref = base.join(reference['resource']).to_s
552
- table = root.is_a?(TableGroup) && root.resources.detect {|t| t.url == ref}
589
+ table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
553
590
  errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
554
591
  table.tableSchema if table
555
592
  elsif reference.has_key?('schemaReference')
556
593
  # resource is the @id of a Schema in the TableGroup
557
594
  ref = base.join(reference['schemaReference']).to_s
558
- tables = root.is_a?(TableGroup) ? root.resources.select {|t| t.tableSchema[:@id] == ref} : []
595
+ tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
559
596
  case tables.length
560
597
  when 0
561
598
  errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
@@ -581,13 +618,22 @@ module RDF::Tabular
581
618
  errors << "#{type} has invalid property '#{key}': reference must be an object: #{reference.inspect}"
582
619
  end
583
620
  end
584
- when :headerColumnCount, :headerRowCount, :skipColumns, :skipRows
621
+ when :headerRowCount, :skipColumns, :skipRows
585
622
  unless value.is_a?(Numeric) && value.integer? && value > 0
586
- errors << "#{type} has invalid property '#{key}': #{value.inspect} must be a positive integer"
623
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect} must be a positive integer"
624
+ object[key] = Dialect::DIALECT_DEFAULTS[key]
625
+ end
626
+ when :base
627
+ @warnings << "#{type} has invalid base '#{key}': #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) || RDF::URI(value).absolute?
628
+ when :format
629
+ unless value.is_a?(String)
630
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
631
+ object.delete(key)
587
632
  end
588
633
  when :length, :minLength, :maxLength
589
634
  unless value.is_a?(Numeric) && value.integer? && value > 0
590
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a positive integer"
635
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a positive integer"
636
+ object.delete(key)
591
637
  end
592
638
  unless key == :length || value != object[:length]
593
639
  # Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
@@ -598,7 +644,8 @@ module RDF::Tabular
598
644
  RDF::Literal::Date.new(value.to_s).valid? ||
599
645
  RDF::Literal::Time.new(value.to_s).valid? ||
600
646
  RDF::Literal::DateTime.new(value.to_s).valid?
601
- errors << "#{type} has invalid property '#{key}': #{value}, expected numeric or valid date/time"
647
+ @warnings << "#{type} has invalid property '#{key}': #{value}, expected numeric or valid date/time"
648
+ object.delete(key)
602
649
  end
603
650
  when :name
604
651
  unless value.is_a?(String) && name.match(NAME_SYNTAX)
@@ -618,7 +665,7 @@ module RDF::Tabular
618
665
  Array(value).each do |k|
619
666
  errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c.name == k}
620
667
  end
621
- when :resources
668
+ when :tables
622
669
  if value.is_a?(Array) && value.all? {|v| v.is_a?(Table)}
623
670
  value.each do |t|
624
671
  begin
@@ -664,13 +711,14 @@ module RDF::Tabular
664
711
  else
665
712
  errors << "#{type} has invalid property '#{key}': expected array of Transformations"
666
713
  end
667
- when :title
668
- valid_natural_language_property?(:title, value) {|m| errors << m}
714
+ when :titles
715
+ valid_natural_language_property?(:titles, value) {|m| errors << m}
669
716
  when :trim
670
717
  unless %w(true false 1 0 start end).include?(value.to_s.downcase)
671
718
  errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected true, false, 1, 0, start or end"
672
719
  end
673
720
  when :url
721
+ # Only validate URL in validation mode; this allows for a nil URL
674
722
  unless @url.valid?
675
723
  errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
676
724
  end
@@ -687,7 +735,7 @@ module RDF::Tabular
687
735
  errors << "#{type} has invalid content '#{key}': #{e.message}"
688
736
  end
689
737
  else
690
- errors << "#{type} has invalid property '#{key}': unsupported property"
738
+ warnings << "#{type} has invalid property '#{key}': unsupported property"
691
739
  end
692
740
  end
693
741
 
@@ -713,53 +761,24 @@ module RDF::Tabular
713
761
  # @yield message error message
714
762
  # @return [Boolean]
715
763
  def valid_inherited_property?(key, value)
716
- pv = parent.send(key) if parent
717
764
  error = case key
718
765
  when :aboutUrl, :default, :propertyUrl, :valueUrl
719
766
  "string" unless value.is_a?(String)
720
- when :datatype
721
- # Normalization usually redundant
722
- dt = normalize_datatype(value)
723
- # FIXME: support arrays of datatypes?
724
- "valid datatype" unless DATATYPES.keys.map(&:to_s).include?(dt[:base]) || RDF::URI(dt[:base]).absolute?
725
767
  when :lang
726
768
  "valid BCP47 language tag" unless BCP47::Language.identify(value.to_s)
727
769
  when :null
728
- # To be valid, it must be a string or array, and must be compatible with any inherited value through being a subset
770
+ # To be valid, it must be a string or array
729
771
  "string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
730
772
  when :ordered
731
773
  "boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
732
774
  when :separator
733
775
  "single character" unless value.nil? || value.is_a?(String) && value.length == 1
734
776
  when :textDirection
735
- # A value for this property is compatible with an inherited value only if they are identical.
736
777
  "rtl or ltr" unless %(rtl ltr).include?(value)
737
- end ||
738
-
739
- case key
740
- # Compatibility
741
- when :aboutUrl, :propertyUrl, :valueUrl
742
- # No restrictions
743
- when :default, :ordered, :separator, :textDirection
744
- "same as that defined on parent" if pv && pv != value
745
- when :datatype
746
- if pv
747
- # Normalization usually redundant
748
- dt = normalize_datatype(value)
749
- pvdt = normalize_datatype(pv)
750
- vl = RDF::Literal.new("", datatype: DATATYPES[dt[:base].to_sym])
751
- pvvl = RDF::Literal.new("", datatype: DATATYPES[pvdt[:base].to_sym])
752
- # must be a subclass of some type defined on parent
753
- "compatible datatype of that defined on parent" unless vl.is_a?(pvvl.class)
754
- end
755
- when :lang
756
- "lang expected to restrict #{pv}" if pv && !value.start_with?(pv)
757
- when :null
758
- "subset of that defined on parent" if pv && (Array(value) & Array(pv)) != Array(value)
759
778
  end
760
779
 
761
780
  if error
762
- yield "#{type} has invalid property '#{key}' ('#{value}'): expected #{error}"
781
+ yield "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{error}"
763
782
  false
764
783
  else
765
784
  true
@@ -888,7 +907,7 @@ module RDF::Tabular
888
907
  if self.is_a?(Table) && self.parent
889
908
  self.parent
890
909
  else
891
- content = {"@type" => "TableGroup", "resources" => [self]}
910
+ content = {"@type" => "TableGroup", "tables" => [self]}
892
911
  content['@context'] = object.delete(:@context) if object[:@context]
893
912
  ctx = @context
894
913
  self.remove_instance_variable(:@context) if self.instance_variables.include?(:@context)
@@ -907,7 +926,7 @@ module RDF::Tabular
907
926
  if md.parent
908
927
  md.parent
909
928
  else
910
- content = {"@type" => "TableGroup", "resources" => [md]}
929
+ content = {"@type" => "TableGroup", "tables" => [md]}
911
930
  ctx = md.context
912
931
  content['@context'] = md.object.delete(:@context) if md.object[:@context]
913
932
  md.remove_instance_variable(:@context) if md.instance_variables.include?(:@context)
@@ -962,18 +981,18 @@ module RDF::Tabular
962
981
  a = object[key].is_a?(Array) ? object[key] : [object[key]].compact
963
982
  b = value.is_a?(Array) ? value : [value]
964
983
  object[key] = a + b
965
- when :resources
984
+ when :tables
966
985
  # When an array of table descriptions B is imported into an original array of table descriptions A, each table description within B is combined into the original array A by:
967
986
  value.each do |tb|
968
987
  if ta = object[key].detect {|e| e.url == tb.url}
969
988
  # if there is a table description with the same url in A, the table description from B is imported into the matching table description in A
970
- debug("merge!: resources") {"TA: #{ta.inspect}, TB: #{tb.inspect}"}
989
+ debug("merge!: tables") {"TA: #{ta.inspect}, TB: #{tb.inspect}"}
971
990
  ta.merge!(tb)
972
991
  else
973
992
  # otherwise, the table description from B is appended to the array of table descriptions A
974
993
  tb = tb.dup
975
994
  tb.instance_variable_set(:@parent, self)
976
- debug("merge!: resources") {"add TB: #{tb.inspect}"}
995
+ debug("merge!: tables") {"add TB: #{tb.inspect}"}
977
996
  object[key] << tb
978
997
  end
979
998
  end
@@ -995,11 +1014,11 @@ module RDF::Tabular
995
1014
  # When an array of column descriptions B is imported into an original array of column descriptions A, each column description within B is combined into the original array A by:
996
1015
  Array(value).each_with_index do |cb, index|
997
1016
  ca = object[key][index] || {}
998
- va = ([ca[:name]] + (ca[:title] || {}).values.flatten).compact.map(&:downcase)
999
- vb = ([cb[:name]] + (cb[:title] || {}).values.flatten).compact.map(&:downcase)
1017
+ va = ([ca[:name]] + (ca[:titles] || {}).values.flatten).compact.map(&:downcase)
1018
+ vb = ([cb[:name]] + (cb[:titles] || {}).values.flatten).compact.map(&:downcase)
1000
1019
  if !(va & vb).empty?
1001
1020
  debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
1002
- # If there's a non-empty case-insensitive intersection between the name and title values for the column description at the same index within A and B, the column description from B is imported into the matching column description in A
1021
+ # If there's a non-empty case-insensitive intersection between the name and titles values for the column description at the same index within A and B, the column description from B is imported into the matching column description in A
1003
1022
  ca.merge!(cb)
1004
1023
  elsif ca.nil? && cb.virtual
1005
1024
  debug("merge!: columns") {"index: #{index}, virtual"}
@@ -1127,11 +1146,6 @@ module RDF::Tabular
1127
1146
  end
1128
1147
  when :natural_language
1129
1148
  value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
1130
- when :atomic
1131
- case key
1132
- when :datatype then normalize_datatype(value)
1133
- else value
1134
- end
1135
1149
  else
1136
1150
  value
1137
1151
  end
@@ -1139,27 +1153,6 @@ module RDF::Tabular
1139
1153
  self
1140
1154
  end
1141
1155
 
1142
- ##
1143
- # Normalize datatype to Object/Hash representation
1144
- # @param [String, Hash{Symbol => String}] value
1145
- # @return [Hash{Symbol => String}]
1146
- def normalize_datatype(value)
1147
- # Normalize datatype to array of object form
1148
- value = {base: value} unless value.is_a?(Hash)
1149
- # Create a new representation using symbols and transformed values
1150
- nv = {}
1151
- value.each do |kk, vv|
1152
- case kk.to_sym
1153
- when :base, :decimalChar, :format, :groupChar, :pattern then nv[kk.to_sym] = vv
1154
- when :length, :minLength, :maxLength, :minimum, :maximum,
1155
- :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
1156
- nv[kk.to_sym] = vv.to_i
1157
- end
1158
- end
1159
- nv[:base] ||= 'string'
1160
- nv
1161
- end
1162
-
1163
1156
  ##
1164
1157
  # Normalize JSON-LD
1165
1158
  #
@@ -1246,7 +1239,7 @@ module RDF::Tabular
1246
1239
  def csv_options
1247
1240
  {
1248
1241
  col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
1249
- row_sep: (is_a?(Dialect) ? self : dialect).lineTerminator,
1242
+ row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
1250
1243
  quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
1251
1244
  encoding: (is_a?(Dialect) ? self : dialect).encoding
1252
1245
  }
@@ -1269,7 +1262,7 @@ module RDF::Tabular
1269
1262
  :@id => :link,
1270
1263
  :@type => :atomic,
1271
1264
  notes: :array,
1272
- resources: :array,
1265
+ tables: :array,
1273
1266
  tableSchema: :object,
1274
1267
  tableDirection: :atomic,
1275
1268
  dialect: :object,
@@ -1293,7 +1286,7 @@ module RDF::Tabular
1293
1286
  # Does the Metadata or any descendant have any common properties
1294
1287
  # @return [Boolean]
1295
1288
  def has_annotations?
1296
- super || resources.any? {|t| t.has_annotations? }
1289
+ super || tables.any? {|t| t.has_annotations? }
1297
1290
  end
1298
1291
 
1299
1292
  # Logic for accessing elements as accessors
@@ -1306,10 +1299,10 @@ module RDF::Tabular
1306
1299
  end
1307
1300
 
1308
1301
  ##
1309
- # Iterate over all resources
1302
+ # Iterate over all tables
1310
1303
  # @yield [Table]
1311
- def each_resource
1312
- resources.map(&:url).each do |url|
1304
+ def each_table
1305
+ tables.map(&:url).each do |url|
1313
1306
  yield for_table(url)
1314
1307
  end
1315
1308
  end
@@ -1320,9 +1313,9 @@ module RDF::Tabular
1320
1313
  # @param [String] url of the table
1321
1314
  # @return [Table]
1322
1315
  def for_table(url)
1323
- # If there are no resources, assume there's one for this table
1324
- #self.resources ||= [Table.new(url: url)]
1325
- if table = Array(resources).detect {|t| t.url == url}
1316
+ # If there are no tables, assume there's one for this table
1317
+ #self.tables ||= [Table.new(url: url)]
1318
+ if table = Array(tables).detect {|t| t.url == url}
1326
1319
  # Set document base for this table for resolving URLs
1327
1320
  table.instance_variable_set(:@context, context.dup)
1328
1321
  table.context.base = url
@@ -1335,7 +1328,7 @@ module RDF::Tabular
1335
1328
  {
1336
1329
  "@id" => id,
1337
1330
  "@type" => "AnnotatedTableGroup",
1338
- "resources" => resources.map(&:to_atd)
1331
+ "tables" => tables.map(&:to_atd)
1339
1332
  }
1340
1333
  end
1341
1334
  end
@@ -1401,7 +1394,7 @@ module RDF::Tabular
1401
1394
  source: :atomic,
1402
1395
  targetFormat: :link,
1403
1396
  scriptFormat: :link,
1404
- title: :natural_language,
1397
+ titles: :natural_language,
1405
1398
  url: :link,
1406
1399
  }.freeze
1407
1400
  REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
@@ -1462,7 +1455,7 @@ module RDF::Tabular
1462
1455
  :@type => :atomic,
1463
1456
  name: :atomic,
1464
1457
  suppressOutput: :atomic,
1465
- title: :natural_language,
1458
+ titles: :natural_language,
1466
1459
  required: :atomic,
1467
1460
  virtual: :atomic,
1468
1461
  }.freeze
@@ -1484,7 +1477,7 @@ module RDF::Tabular
1484
1477
  # @note this is lazy evaluated to avoid dependencies on setting dialect vs. initializing columns
1485
1478
  # @return [Integer] 1-based colnum number
1486
1479
  def sourceNumber
1487
- skipColumns = table ? (dialect.skipColumns.to_i + dialect.headerColumnCount.to_i) : 0
1480
+ skipColumns = table ? dialect.skipColumns.to_i : 0
1488
1481
  number + skipColumns
1489
1482
  end
1490
1483
 
@@ -1506,9 +1499,9 @@ module RDF::Tabular
1506
1499
  end
1507
1500
  end
1508
1501
 
1509
- # Return or create a name for the column from title, if it exists
1502
+ # Return or create a name for the column from titles, if it exists
1510
1503
  def name
1511
- object[:name] ||= if title && (ts = title[context.default_language || 'und'])
1504
+ object[:name] ||= if titles && (ts = titles[context.default_language || 'und'])
1512
1505
  n = Array(ts).first
1513
1506
  n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
1514
1507
  n1 = URI.encode(n[1..-1], /[^\w\.]/)
@@ -1534,7 +1527,7 @@ module RDF::Tabular
1534
1527
  "cells" => [],
1535
1528
  "virtual" => self.virtual,
1536
1529
  "name" => self.name,
1537
- "title" => self.title
1530
+ "titles" => self.titles
1538
1531
  }
1539
1532
  end
1540
1533
 
@@ -1556,9 +1549,8 @@ module RDF::Tabular
1556
1549
  doubleQuote: true,
1557
1550
  encoding: "utf-8".freeze,
1558
1551
  header: true,
1559
- headerColumnCount: 0,
1560
1552
  headerRowCount: 1,
1561
- lineTerminator: :auto, # SPEC says "\r\n"
1553
+ lineTerminators: :auto,
1562
1554
  quoteChar: '"',
1563
1555
  skipBlankRows: false,
1564
1556
  skipColumns: 0,
@@ -1575,9 +1567,8 @@ module RDF::Tabular
1575
1567
  doubleQuote: :atomic,
1576
1568
  encoding: :atomic,
1577
1569
  header: :atomic,
1578
- headerColumnCount: :atomic,
1579
1570
  headerRowCount: :atomic,
1580
- lineTerminator: :atomic,
1571
+ lineTerminators: :atomic,
1581
1572
  quoteChar: :atomic,
1582
1573
  skipBlankRows: :atomic,
1583
1574
  skipColumns: :atomic,
@@ -1625,8 +1616,8 @@ module RDF::Tabular
1625
1616
  options = options.dup
1626
1617
  options.delete(:context) # Don't accidentally use a passed context
1627
1618
  # Normalize input to an IO object
1628
- if !input.respond_to?(:read)
1629
- return ::RDF::Util::File.open_file(input.to_s) {|f| embedded_metadata(f, options.merge(base: input.to_s))}
1619
+ if input.is_a?(String)
1620
+ return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, options.merge(base: input.to_s))}
1630
1621
  end
1631
1622
 
1632
1623
  table = {
@@ -1655,20 +1646,19 @@ module RDF::Tabular
1655
1646
  row_data = Array(csv.shift)
1656
1647
  Array(row_data).each_with_index do |value, index|
1657
1648
  # Skip columns
1658
- skipCols = skipColumns.to_i + headerColumnCount.to_i
1649
+ skipCols = skipColumns.to_i
1659
1650
  next if index < skipCols
1660
1651
 
1661
1652
  # Trim value
1662
1653
  value.lstrip! if %w(true start).include?(trim.to_s)
1663
1654
  value.rstrip! if %w(true end).include?(trim.to_s)
1664
1655
 
1665
- # Initialize title
1666
- # SPEC CONFUSION: does title get an array, or concatenated values?
1656
+ # Initialize titles
1667
1657
  columns = table["tableSchema"]["columns"] ||= []
1668
1658
  column = columns[index - skipCols] ||= {
1669
- "title" => {"und" => []},
1659
+ "titles" => {"und" => []},
1670
1660
  }
1671
- column["title"]["und"] << value
1661
+ column["titles"]["und"] << value
1672
1662
  end
1673
1663
  end
1674
1664
  debug("embedded_metadata") {"table: #{table.inspect}"}
@@ -1688,6 +1678,41 @@ module RDF::Tabular
1688
1678
  end
1689
1679
  end
1690
1680
 
1681
+ class Datatype < Metadata
1682
+ PROPERTIES = {
1683
+ base: :atomic,
1684
+ format: :atomic,
1685
+ length: :atomic,
1686
+ minLength: :atomic,
1687
+ maxLength: :atomic,
1688
+ minimum: :atomic,
1689
+ maximum: :atomic,
1690
+ minInclusive: :atomic,
1691
+ maxInclusive: :atomic,
1692
+ minExclusive: :atomic,
1693
+ maxExclusive: :atomic,
1694
+ decimalChar: :atomic,
1695
+ groupChar: :atomic,
1696
+ pattern: :atomic,
1697
+ }.freeze
1698
+ REQUIRED = [].freeze
1699
+
1700
+ # Override `base` in Metadata
1701
+ def base; object[:base]; end
1702
+
1703
+ # Setters
1704
+ PROPERTIES.each do |a, type|
1705
+ define_method("#{a}=".to_sym) do |value|
1706
+ object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1707
+ end
1708
+ end
1709
+
1710
+ # Logic for accessing elements as accessors
1711
+ def method_missing(method, *args)
1712
+ PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1713
+ end
1714
+ end
1715
+
1691
1716
  # Wraps each resulting row
1692
1717
  class Row
1693
1718
  # Class for returning values
@@ -1759,7 +1784,7 @@ module RDF::Tabular
1759
1784
  @number = number
1760
1785
  @sourceNumber = source_number
1761
1786
  @values = []
1762
- skipColumns = metadata.dialect.skipColumns.to_i + metadata.dialect.headerColumnCount.to_i
1787
+ skipColumns = metadata.dialect.skipColumns.to_i
1763
1788
 
1764
1789
  @context = table.context.dup
1765
1790
  @context.base = table.url
@@ -1786,22 +1811,22 @@ module RDF::Tabular
1786
1811
 
1787
1812
  @values << cell = Cell.new(metadata, column, self, value)
1788
1813
 
1789
- datatype = metadata.normalize_datatype(column.datatype || 'string')
1790
- value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype[:base])
1791
- value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype[:base])
1814
+ datatype = column.datatype || Datatype.new(base: "string")
1815
+ value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
1816
+ value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
1792
1817
  # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
1793
1818
  value = column.default || '' if value.empty?
1794
1819
 
1795
1820
  cell_values = column.separator ? value.split(column.separator) : [value]
1796
1821
 
1797
1822
  cell_values = cell_values.map do |v|
1798
- v = v.strip unless %w(string anyAtomicType any).include?(datatype[:base])
1823
+ v = v.strip unless %w(string anyAtomicType any).include?(datatype.base)
1799
1824
  v = column.default || '' if v.empty?
1800
1825
  if Array(column.null).include?(v)
1801
1826
  nil
1802
1827
  else
1803
1828
  # Trim value
1804
- if %w(string anyAtomicType any).include?(datatype[:base])
1829
+ if %w(string anyAtomicType any).include?(datatype.base)
1805
1830
  v.lstrip! if %w(true start).include?(metadata.dialect.trim.to_s)
1806
1831
  v.rstrip! if %w(true end).include?(metadata.dialect.trim.to_s)
1807
1832
  else
@@ -1809,7 +1834,7 @@ module RDF::Tabular
1809
1834
  v.strip!
1810
1835
  end
1811
1836
 
1812
- expanded_dt = metadata.context.expand_iri(datatype[:base], vocab: true)
1837
+ expanded_dt = metadata.context.expand_iri(datatype.base, vocab: true)
1813
1838
  if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
1814
1839
  lit_or_errors
1815
1840
  else
@@ -1861,36 +1886,36 @@ module RDF::Tabular
1861
1886
  value_errors = []
1862
1887
 
1863
1888
  # Check constraints
1864
- if datatype[:length] && value.length != datatype[:length]
1865
- value_errors << "#{value} does not have length #{datatype[:length]}"
1889
+ if datatype.length && value.length != datatype.length
1890
+ value_errors << "#{value} does not have length #{datatype.length}"
1866
1891
  end
1867
- if datatype[:minLength] && value.length < datatype[:minLength]
1868
- value_errors << "#{value} does not have length >= #{datatype[:minLength]}"
1892
+ if datatype.minLength && value.length < datatype.minLength
1893
+ value_errors << "#{value} does not have length >= #{datatype.minLength}"
1869
1894
  end
1870
- if datatype[:maxLength] && value.length > datatype[:maxLength]
1871
- value_errors << "#{value} does not have length <= #{datatype[:maxLength]}"
1895
+ if datatype.maxLength && value.length > datatype.maxLength
1896
+ value_errors << "#{value} does not have length <= #{datatype.maxLength}"
1872
1897
  end
1873
1898
 
1874
- format = datatype[:format]
1899
+ format = datatype.format
1875
1900
  # Datatype specific constraints and conversions
1876
- case datatype[:base].to_sym
1901
+ case datatype.base.to_sym
1877
1902
  when :decimal, :integer, :long, :int, :short, :byte,
1878
1903
  :nonNegativeInteger, :positiveInteger,
1879
1904
  :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
1880
1905
  :nonPositiveInteger, :negativeInteger,
1881
1906
  :double, :float, :number
1882
1907
  # Normalize representation based on numeric-specific facets
1883
- groupChar = datatype.fetch(:groupChar, ',')
1884
- if datatype[:pattern] && !value.match(Regexp.new(datatype[:pattern]))
1908
+ groupChar = datatype.groupChar || ','
1909
+ if datatype.pattern && !value.match(Regexp.new(datatype.pattern))
1885
1910
  # pattern facet failed
1886
- value_errors << "#{value} does not match pattern #{datatype[:pattern]}"
1911
+ value_errors << "#{value} does not match pattern #{datatype.pattern}"
1887
1912
  end
1888
1913
  if value.include?(groupChar*2)
1889
1914
  # pattern facet failed
1890
1915
  value_errors << "#{value} has repeating #{groupChar.inspect}"
1891
1916
  end
1892
1917
  value.gsub!(groupChar, '')
1893
- value.sub!(datatype.fetch(:decimalChar, '.'), '.')
1918
+ value.sub!(datatype.decimalChar, '.') if datatype.decimalChar
1894
1919
 
1895
1920
  # Extract percent or per-mille sign
1896
1921
  percent = permille = false
@@ -1941,7 +1966,7 @@ module RDF::Tabular
1941
1966
 
1942
1967
  if format
1943
1968
  date_format, time_format = format.split(' ')
1944
- if datatype[:base].to_sym == :time
1969
+ if datatype.base.to_sym == :time
1945
1970
  date_format, time_format = nil, date_format
1946
1971
  end
1947
1972
 
@@ -2008,7 +2033,7 @@ module RDF::Tabular
2008
2033
  lit = RDF::Literal(value, datatype: expanded_dt)
2009
2034
  when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
2010
2035
  :ENTITY, :ID, :IDREF, :NOTATION
2011
- value_errors << "#{value} uses unsupported datatype: #{datatype[:base]}"
2036
+ value_errors << "#{value} uses unsupported datatype: #{datatype.base}"
2012
2037
  else
2013
2038
  # For other types, format is a regexp
2014
2039
  unless format.nil? || value.match(Regexp.new(format))
@@ -2025,7 +2050,7 @@ module RDF::Tabular
2025
2050
  end
2026
2051
 
2027
2052
  # Final value is a valid literal, or a plain literal otherwise
2028
- value_errors << "#{value} is not a valid #{datatype[:base]}" if lit && !lit.valid?
2053
+ value_errors << "#{value} is not a valid #{datatype.base}" if lit && !lit.valid?
2029
2054
 
2030
2055
  # FIXME Value constraints
2031
2056