rdf-tabular 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -180,9 +180,10 @@ module RDF::Tabular
180
180
  # Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
181
181
  metadata = case
182
182
  when user_metadata && found_metadata then user_metadata.merge(found_metadata)
183
- when user_metadata then user_metadata
184
- when found_metadata then found_metadata
185
- else TableGroup.new({resources: [{url: base}]}, options)
183
+ when user_metadata then user_metadata
184
+ when found_metadata then found_metadata
185
+ when base then TableGroup.new({tables: [{url: base}]}, options)
186
+ else TableGroup.new({tables: []}, options)
186
187
  end
187
188
 
188
189
  # Make TableGroup, if not already
@@ -223,13 +224,13 @@ module RDF::Tabular
223
224
  # Figure out type by site
224
225
  object_keys = object.keys.map(&:to_s)
225
226
  type ||= case
226
- when %w(resources).any? {|k| object_keys.include?(k)} then :TableGroup
227
+ when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
227
228
  when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
228
229
  when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
229
230
  when %w(columns primaryKey foreignKeys urlTemplate).any? {|k| object_keys.include?(k)} then :Schema
230
231
  when %w(name required).any? {|k| object_keys.include?(k)} then :Column
231
- when %w(commentPrefix delimiter doubleQuote encoding header headerColumnCount headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
232
- when %w(lineTerminator quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
232
+ when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
233
+ when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
233
234
  end
234
235
 
235
236
  case type.to_s.to_sym
@@ -312,13 +313,7 @@ module RDF::Tabular
312
313
  value
313
314
  end
314
315
  when :datatype
315
- # If in object form, normalize keys to symbols
316
- object[key] = case value
317
- when Hash
318
- value.inject({}) {|memo, (k,v)| memo[k.to_sym] = v; memo}
319
- else
320
- value
321
- end
316
+ self.datatype = value
322
317
  when :dialect
323
318
  # If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
324
319
  object[key] = case value
@@ -329,7 +324,7 @@ module RDF::Tabular
329
324
  value
330
325
  end
331
326
  @type ||= :Table
332
- when :resources
327
+ when :tables
333
328
  # An array of table descriptions for the tables in the group.
334
329
  object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
335
330
  value.map {|v| Table.new(v, @options.merge(parent: self, context: nil))}
@@ -437,6 +432,15 @@ module RDF::Tabular
437
432
  end
438
433
  end
439
434
 
435
+ # Set new datatype
436
+ # @return [Dialect]
437
+ def datatype=(value)
438
+ object[:datatype] = case value
439
+ when Hash then Datatype.new(value)
440
+ else Datatype.new({base: value})
441
+ end
442
+ end
443
+
440
444
  # Type of this Metadata
441
445
  # @return [:TableGroup, :Table, :Transformation, :Schema, :Column]
442
446
  def type; self.class.name.split('::').last.to_sym; end
@@ -463,13 +467,25 @@ module RDF::Tabular
463
467
  e.message.split("\n")
464
468
  end
465
469
 
470
+ ##
471
+ # Validation warnings, available only after validating or finding warnings
472
+ # @return [Array<String>]
473
+ def warnings
474
+ ((@warnings || []) + object.
475
+ values.
476
+ flatten.
477
+ select {|v| v.is_a?(Metadata)}.
478
+ map(&:warnings).
479
+ flatten).compact
480
+ end
481
+
466
482
  ##
467
483
  # Validate metadata, raising an error containing all errors detected during validation
468
484
  # @raise [Error] Raise error if metadata has any unexpected properties
469
485
  # @return [self]
470
486
  def validate!
471
487
  expected_props, required_props = @properties.keys, @required
472
- errors = []
488
+ errors, @warnings = [], []
473
489
 
474
490
  unless is_a?(Dialect) || is_a?(Transformation)
475
491
  expected_props = expected_props + INHERITED_PROPERTIES.keys
@@ -478,7 +494,7 @@ module RDF::Tabular
478
494
  # It has only expected properties (exclude metadata)
479
495
  check_keys = object.keys - [:"@id", :"@context"]
480
496
  check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
481
- errors << "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
497
+ @warnings << "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
482
498
 
483
499
  # It has required properties
484
500
  errors << "#{type} missing required keys: #{(required_props & check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
@@ -487,8 +503,10 @@ module RDF::Tabular
487
503
  object.keys.each do |key|
488
504
  value = object[key]
489
505
  case key
490
- when :aboutUrl, :datatype, :default, :lang, :null, :ordered, :propertyUrl, :separator, :textDirection, :valueUrl
491
- valid_inherited_property?(key, value) {|m| errors << m}
506
+ when :aboutUrl, :default, :lang, :null, :ordered, :propertyUrl, :separator, :textDirection, :valueUrl
507
+ valid_inherited_property?(key, value) do |m|
508
+ @warnings << m
509
+ end
492
510
  when :columns
493
511
  if value.is_a?(Array) && value.all? {|v| v.is_a?(Column)}
494
512
  value.each do |v|
@@ -505,11 +523,24 @@ module RDF::Tabular
505
523
  end
506
524
  when :commentPrefix, :delimiter, :quoteChar
507
525
  unless value.is_a?(String) && value.length == 1
508
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a single character string"
526
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a single character string"
527
+ object[key] = Dialect::DIALECT_DEFAULTS[key]
509
528
  end
510
- when :format, :lineTerminator, :uriTemplate
529
+ when :lineTerminators
511
530
  unless value.is_a?(String)
512
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
531
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
532
+ object[key] = Dialect::DIALECT_DEFAULTS[key]
533
+ end
534
+ when :datatype
535
+ if value.is_a?(Datatype)
536
+ begin
537
+ value.validate!
538
+ rescue Error => e
539
+ errors << e.message
540
+ end
541
+ else
542
+ @warnings << "#{type} has invalid property '#{key}': expected a Datatype"
543
+ value = object[key] = nil
513
544
  end
514
545
  when :dialect
515
546
  unless value.is_a?(Dialect)
@@ -520,13 +551,19 @@ module RDF::Tabular
520
551
  rescue Error => e
521
552
  errors << e.message
522
553
  end
523
- when :doubleQuote, :header, :required, :skipInitialSpace, :skipBlankRows, :suppressOutput, :virtual
554
+ when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
524
555
  unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
525
- errors << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
556
+ @warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
557
+ object[key] = Dialect::DIALECT_DEFAULTS[key]
558
+ end
559
+ when :required, :suppressOutput, :virtual
560
+ unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
561
+ @warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
562
+ object.delete(key)
526
563
  end
527
564
  when :encoding
528
565
  unless (Encoding.find(value) rescue false)
529
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid encoding"
566
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid encoding"
530
567
  end
531
568
  when :foreignKeys
532
569
  # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
@@ -549,13 +586,13 @@ module RDF::Tabular
549
586
  end
550
587
  # resource is the URL of a Table in the TableGroup
551
588
  ref = base.join(reference['resource']).to_s
552
- table = root.is_a?(TableGroup) && root.resources.detect {|t| t.url == ref}
589
+ table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
553
590
  errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
554
591
  table.tableSchema if table
555
592
  elsif reference.has_key?('schemaReference')
556
593
  # resource is the @id of a Schema in the TableGroup
557
594
  ref = base.join(reference['schemaReference']).to_s
558
- tables = root.is_a?(TableGroup) ? root.resources.select {|t| t.tableSchema[:@id] == ref} : []
595
+ tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
559
596
  case tables.length
560
597
  when 0
561
598
  errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
@@ -581,13 +618,22 @@ module RDF::Tabular
581
618
  errors << "#{type} has invalid property '#{key}': reference must be an object: #{reference.inspect}"
582
619
  end
583
620
  end
584
- when :headerColumnCount, :headerRowCount, :skipColumns, :skipRows
621
+ when :headerRowCount, :skipColumns, :skipRows
585
622
  unless value.is_a?(Numeric) && value.integer? && value > 0
586
- errors << "#{type} has invalid property '#{key}': #{value.inspect} must be a positive integer"
623
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect} must be a positive integer"
624
+ object[key] = Dialect::DIALECT_DEFAULTS[key]
625
+ end
626
+ when :base
627
+ @warnings << "#{type} has invalid base '#{key}': #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) || RDF::URI(value).absolute?
628
+ when :format
629
+ unless value.is_a?(String)
630
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
631
+ object.delete(key)
587
632
  end
588
633
  when :length, :minLength, :maxLength
589
634
  unless value.is_a?(Numeric) && value.integer? && value > 0
590
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a positive integer"
635
+ @warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a positive integer"
636
+ object.delete(key)
591
637
  end
592
638
  unless key == :length || value != object[:length]
593
639
  # Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
@@ -598,7 +644,8 @@ module RDF::Tabular
598
644
  RDF::Literal::Date.new(value.to_s).valid? ||
599
645
  RDF::Literal::Time.new(value.to_s).valid? ||
600
646
  RDF::Literal::DateTime.new(value.to_s).valid?
601
- errors << "#{type} has invalid property '#{key}': #{value}, expected numeric or valid date/time"
647
+ @warnings << "#{type} has invalid property '#{key}': #{value}, expected numeric or valid date/time"
648
+ object.delete(key)
602
649
  end
603
650
  when :name
604
651
  unless value.is_a?(String) && name.match(NAME_SYNTAX)
@@ -618,7 +665,7 @@ module RDF::Tabular
618
665
  Array(value).each do |k|
619
666
  errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c.name == k}
620
667
  end
621
- when :resources
668
+ when :tables
622
669
  if value.is_a?(Array) && value.all? {|v| v.is_a?(Table)}
623
670
  value.each do |t|
624
671
  begin
@@ -664,13 +711,14 @@ module RDF::Tabular
664
711
  else
665
712
  errors << "#{type} has invalid property '#{key}': expected array of Transformations"
666
713
  end
667
- when :title
668
- valid_natural_language_property?(:title, value) {|m| errors << m}
714
+ when :titles
715
+ valid_natural_language_property?(:titles, value) {|m| errors << m}
669
716
  when :trim
670
717
  unless %w(true false 1 0 start end).include?(value.to_s.downcase)
671
718
  errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected true, false, 1, 0, start or end"
672
719
  end
673
720
  when :url
721
+ # Only validate URL in validation mode; this allows for a nil URL
674
722
  unless @url.valid?
675
723
  errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
676
724
  end
@@ -687,7 +735,7 @@ module RDF::Tabular
687
735
  errors << "#{type} has invalid content '#{key}': #{e.message}"
688
736
  end
689
737
  else
690
- errors << "#{type} has invalid property '#{key}': unsupported property"
738
+ warnings << "#{type} has invalid property '#{key}': unsupported property"
691
739
  end
692
740
  end
693
741
 
@@ -713,53 +761,24 @@ module RDF::Tabular
713
761
  # @yield message error message
714
762
  # @return [Boolean]
715
763
  def valid_inherited_property?(key, value)
716
- pv = parent.send(key) if parent
717
764
  error = case key
718
765
  when :aboutUrl, :default, :propertyUrl, :valueUrl
719
766
  "string" unless value.is_a?(String)
720
- when :datatype
721
- # Normalization usually redundant
722
- dt = normalize_datatype(value)
723
- # FIXME: support arrays of datatypes?
724
- "valid datatype" unless DATATYPES.keys.map(&:to_s).include?(dt[:base]) || RDF::URI(dt[:base]).absolute?
725
767
  when :lang
726
768
  "valid BCP47 language tag" unless BCP47::Language.identify(value.to_s)
727
769
  when :null
728
- # To be valid, it must be a string or array, and must be compatible with any inherited value through being a subset
770
+ # To be valid, it must be a string or array
729
771
  "string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
730
772
  when :ordered
731
773
  "boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
732
774
  when :separator
733
775
  "single character" unless value.nil? || value.is_a?(String) && value.length == 1
734
776
  when :textDirection
735
- # A value for this property is compatible with an inherited value only if they are identical.
736
777
  "rtl or ltr" unless %(rtl ltr).include?(value)
737
- end ||
738
-
739
- case key
740
- # Compatibility
741
- when :aboutUrl, :propertyUrl, :valueUrl
742
- # No restrictions
743
- when :default, :ordered, :separator, :textDirection
744
- "same as that defined on parent" if pv && pv != value
745
- when :datatype
746
- if pv
747
- # Normalization usually redundant
748
- dt = normalize_datatype(value)
749
- pvdt = normalize_datatype(pv)
750
- vl = RDF::Literal.new("", datatype: DATATYPES[dt[:base].to_sym])
751
- pvvl = RDF::Literal.new("", datatype: DATATYPES[pvdt[:base].to_sym])
752
- # must be a subclass of some type defined on parent
753
- "compatible datatype of that defined on parent" unless vl.is_a?(pvvl.class)
754
- end
755
- when :lang
756
- "lang expected to restrict #{pv}" if pv && !value.start_with?(pv)
757
- when :null
758
- "subset of that defined on parent" if pv && (Array(value) & Array(pv)) != Array(value)
759
778
  end
760
779
 
761
780
  if error
762
- yield "#{type} has invalid property '#{key}' ('#{value}'): expected #{error}"
781
+ yield "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{error}"
763
782
  false
764
783
  else
765
784
  true
@@ -888,7 +907,7 @@ module RDF::Tabular
888
907
  if self.is_a?(Table) && self.parent
889
908
  self.parent
890
909
  else
891
- content = {"@type" => "TableGroup", "resources" => [self]}
910
+ content = {"@type" => "TableGroup", "tables" => [self]}
892
911
  content['@context'] = object.delete(:@context) if object[:@context]
893
912
  ctx = @context
894
913
  self.remove_instance_variable(:@context) if self.instance_variables.include?(:@context)
@@ -907,7 +926,7 @@ module RDF::Tabular
907
926
  if md.parent
908
927
  md.parent
909
928
  else
910
- content = {"@type" => "TableGroup", "resources" => [md]}
929
+ content = {"@type" => "TableGroup", "tables" => [md]}
911
930
  ctx = md.context
912
931
  content['@context'] = md.object.delete(:@context) if md.object[:@context]
913
932
  md.remove_instance_variable(:@context) if md.instance_variables.include?(:@context)
@@ -962,18 +981,18 @@ module RDF::Tabular
962
981
  a = object[key].is_a?(Array) ? object[key] : [object[key]].compact
963
982
  b = value.is_a?(Array) ? value : [value]
964
983
  object[key] = a + b
965
- when :resources
984
+ when :tables
966
985
  # When an array of table descriptions B is imported into an original array of table descriptions A, each table description within B is combined into the original array A by:
967
986
  value.each do |tb|
968
987
  if ta = object[key].detect {|e| e.url == tb.url}
969
988
  # if there is a table description with the same url in A, the table description from B is imported into the matching table description in A
970
- debug("merge!: resources") {"TA: #{ta.inspect}, TB: #{tb.inspect}"}
989
+ debug("merge!: tables") {"TA: #{ta.inspect}, TB: #{tb.inspect}"}
971
990
  ta.merge!(tb)
972
991
  else
973
992
  # otherwise, the table description from B is appended to the array of table descriptions A
974
993
  tb = tb.dup
975
994
  tb.instance_variable_set(:@parent, self)
976
- debug("merge!: resources") {"add TB: #{tb.inspect}"}
995
+ debug("merge!: tables") {"add TB: #{tb.inspect}"}
977
996
  object[key] << tb
978
997
  end
979
998
  end
@@ -995,11 +1014,11 @@ module RDF::Tabular
995
1014
  # When an array of column descriptions B is imported into an original array of column descriptions A, each column description within B is combined into the original array A by:
996
1015
  Array(value).each_with_index do |cb, index|
997
1016
  ca = object[key][index] || {}
998
- va = ([ca[:name]] + (ca[:title] || {}).values.flatten).compact.map(&:downcase)
999
- vb = ([cb[:name]] + (cb[:title] || {}).values.flatten).compact.map(&:downcase)
1017
+ va = ([ca[:name]] + (ca[:titles] || {}).values.flatten).compact.map(&:downcase)
1018
+ vb = ([cb[:name]] + (cb[:titles] || {}).values.flatten).compact.map(&:downcase)
1000
1019
  if !(va & vb).empty?
1001
1020
  debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
1002
- # If there's a non-empty case-insensitive intersection between the name and title values for the column description at the same index within A and B, the column description from B is imported into the matching column description in A
1021
+ # If there's a non-empty case-insensitive intersection between the name and titles values for the column description at the same index within A and B, the column description from B is imported into the matching column description in A
1003
1022
  ca.merge!(cb)
1004
1023
  elsif ca.nil? && cb.virtual
1005
1024
  debug("merge!: columns") {"index: #{index}, virtual"}
@@ -1127,11 +1146,6 @@ module RDF::Tabular
1127
1146
  end
1128
1147
  when :natural_language
1129
1148
  value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
1130
- when :atomic
1131
- case key
1132
- when :datatype then normalize_datatype(value)
1133
- else value
1134
- end
1135
1149
  else
1136
1150
  value
1137
1151
  end
@@ -1139,27 +1153,6 @@ module RDF::Tabular
1139
1153
  self
1140
1154
  end
1141
1155
 
1142
- ##
1143
- # Normalize datatype to Object/Hash representation
1144
- # @param [String, Hash{Symbol => String}] value
1145
- # @return [Hash{Symbol => String}]
1146
- def normalize_datatype(value)
1147
- # Normalize datatype to array of object form
1148
- value = {base: value} unless value.is_a?(Hash)
1149
- # Create a new representation using symbols and transformed values
1150
- nv = {}
1151
- value.each do |kk, vv|
1152
- case kk.to_sym
1153
- when :base, :decimalChar, :format, :groupChar, :pattern then nv[kk.to_sym] = vv
1154
- when :length, :minLength, :maxLength, :minimum, :maximum,
1155
- :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
1156
- nv[kk.to_sym] = vv.to_i
1157
- end
1158
- end
1159
- nv[:base] ||= 'string'
1160
- nv
1161
- end
1162
-
1163
1156
  ##
1164
1157
  # Normalize JSON-LD
1165
1158
  #
@@ -1246,7 +1239,7 @@ module RDF::Tabular
1246
1239
  def csv_options
1247
1240
  {
1248
1241
  col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
1249
- row_sep: (is_a?(Dialect) ? self : dialect).lineTerminator,
1242
+ row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
1250
1243
  quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
1251
1244
  encoding: (is_a?(Dialect) ? self : dialect).encoding
1252
1245
  }
@@ -1269,7 +1262,7 @@ module RDF::Tabular
1269
1262
  :@id => :link,
1270
1263
  :@type => :atomic,
1271
1264
  notes: :array,
1272
- resources: :array,
1265
+ tables: :array,
1273
1266
  tableSchema: :object,
1274
1267
  tableDirection: :atomic,
1275
1268
  dialect: :object,
@@ -1293,7 +1286,7 @@ module RDF::Tabular
1293
1286
  # Does the Metadata or any descendant have any common properties
1294
1287
  # @return [Boolean]
1295
1288
  def has_annotations?
1296
- super || resources.any? {|t| t.has_annotations? }
1289
+ super || tables.any? {|t| t.has_annotations? }
1297
1290
  end
1298
1291
 
1299
1292
  # Logic for accessing elements as accessors
@@ -1306,10 +1299,10 @@ module RDF::Tabular
1306
1299
  end
1307
1300
 
1308
1301
  ##
1309
- # Iterate over all resources
1302
+ # Iterate over all tables
1310
1303
  # @yield [Table]
1311
- def each_resource
1312
- resources.map(&:url).each do |url|
1304
+ def each_table
1305
+ tables.map(&:url).each do |url|
1313
1306
  yield for_table(url)
1314
1307
  end
1315
1308
  end
@@ -1320,9 +1313,9 @@ module RDF::Tabular
1320
1313
  # @param [String] url of the table
1321
1314
  # @return [Table]
1322
1315
  def for_table(url)
1323
- # If there are no resources, assume there's one for this table
1324
- #self.resources ||= [Table.new(url: url)]
1325
- if table = Array(resources).detect {|t| t.url == url}
1316
+ # If there are no tables, assume there's one for this table
1317
+ #self.tables ||= [Table.new(url: url)]
1318
+ if table = Array(tables).detect {|t| t.url == url}
1326
1319
  # Set document base for this table for resolving URLs
1327
1320
  table.instance_variable_set(:@context, context.dup)
1328
1321
  table.context.base = url
@@ -1335,7 +1328,7 @@ module RDF::Tabular
1335
1328
  {
1336
1329
  "@id" => id,
1337
1330
  "@type" => "AnnotatedTableGroup",
1338
- "resources" => resources.map(&:to_atd)
1331
+ "tables" => tables.map(&:to_atd)
1339
1332
  }
1340
1333
  end
1341
1334
  end
@@ -1401,7 +1394,7 @@ module RDF::Tabular
1401
1394
  source: :atomic,
1402
1395
  targetFormat: :link,
1403
1396
  scriptFormat: :link,
1404
- title: :natural_language,
1397
+ titles: :natural_language,
1405
1398
  url: :link,
1406
1399
  }.freeze
1407
1400
  REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
@@ -1462,7 +1455,7 @@ module RDF::Tabular
1462
1455
  :@type => :atomic,
1463
1456
  name: :atomic,
1464
1457
  suppressOutput: :atomic,
1465
- title: :natural_language,
1458
+ titles: :natural_language,
1466
1459
  required: :atomic,
1467
1460
  virtual: :atomic,
1468
1461
  }.freeze
@@ -1484,7 +1477,7 @@ module RDF::Tabular
1484
1477
  # @note this is lazy evaluated to avoid dependencies on setting dialect vs. initializing columns
1485
1478
  # @return [Integer] 1-based colnum number
1486
1479
  def sourceNumber
1487
- skipColumns = table ? (dialect.skipColumns.to_i + dialect.headerColumnCount.to_i) : 0
1480
+ skipColumns = table ? dialect.skipColumns.to_i : 0
1488
1481
  number + skipColumns
1489
1482
  end
1490
1483
 
@@ -1506,9 +1499,9 @@ module RDF::Tabular
1506
1499
  end
1507
1500
  end
1508
1501
 
1509
- # Return or create a name for the column from title, if it exists
1502
+ # Return or create a name for the column from titles, if it exists
1510
1503
  def name
1511
- object[:name] ||= if title && (ts = title[context.default_language || 'und'])
1504
+ object[:name] ||= if titles && (ts = titles[context.default_language || 'und'])
1512
1505
  n = Array(ts).first
1513
1506
  n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
1514
1507
  n1 = URI.encode(n[1..-1], /[^\w\.]/)
@@ -1534,7 +1527,7 @@ module RDF::Tabular
1534
1527
  "cells" => [],
1535
1528
  "virtual" => self.virtual,
1536
1529
  "name" => self.name,
1537
- "title" => self.title
1530
+ "titles" => self.titles
1538
1531
  }
1539
1532
  end
1540
1533
 
@@ -1556,9 +1549,8 @@ module RDF::Tabular
1556
1549
  doubleQuote: true,
1557
1550
  encoding: "utf-8".freeze,
1558
1551
  header: true,
1559
- headerColumnCount: 0,
1560
1552
  headerRowCount: 1,
1561
- lineTerminator: :auto, # SPEC says "\r\n"
1553
+ lineTerminators: :auto,
1562
1554
  quoteChar: '"',
1563
1555
  skipBlankRows: false,
1564
1556
  skipColumns: 0,
@@ -1575,9 +1567,8 @@ module RDF::Tabular
1575
1567
  doubleQuote: :atomic,
1576
1568
  encoding: :atomic,
1577
1569
  header: :atomic,
1578
- headerColumnCount: :atomic,
1579
1570
  headerRowCount: :atomic,
1580
- lineTerminator: :atomic,
1571
+ lineTerminators: :atomic,
1581
1572
  quoteChar: :atomic,
1582
1573
  skipBlankRows: :atomic,
1583
1574
  skipColumns: :atomic,
@@ -1625,8 +1616,8 @@ module RDF::Tabular
1625
1616
  options = options.dup
1626
1617
  options.delete(:context) # Don't accidentally use a passed context
1627
1618
  # Normalize input to an IO object
1628
- if !input.respond_to?(:read)
1629
- return ::RDF::Util::File.open_file(input.to_s) {|f| embedded_metadata(f, options.merge(base: input.to_s))}
1619
+ if input.is_a?(String)
1620
+ return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, options.merge(base: input.to_s))}
1630
1621
  end
1631
1622
 
1632
1623
  table = {
@@ -1655,20 +1646,19 @@ module RDF::Tabular
1655
1646
  row_data = Array(csv.shift)
1656
1647
  Array(row_data).each_with_index do |value, index|
1657
1648
  # Skip columns
1658
- skipCols = skipColumns.to_i + headerColumnCount.to_i
1649
+ skipCols = skipColumns.to_i
1659
1650
  next if index < skipCols
1660
1651
 
1661
1652
  # Trim value
1662
1653
  value.lstrip! if %w(true start).include?(trim.to_s)
1663
1654
  value.rstrip! if %w(true end).include?(trim.to_s)
1664
1655
 
1665
- # Initialize title
1666
- # SPEC CONFUSION: does title get an array, or concatenated values?
1656
+ # Initialize titles
1667
1657
  columns = table["tableSchema"]["columns"] ||= []
1668
1658
  column = columns[index - skipCols] ||= {
1669
- "title" => {"und" => []},
1659
+ "titles" => {"und" => []},
1670
1660
  }
1671
- column["title"]["und"] << value
1661
+ column["titles"]["und"] << value
1672
1662
  end
1673
1663
  end
1674
1664
  debug("embedded_metadata") {"table: #{table.inspect}"}
@@ -1688,6 +1678,41 @@ module RDF::Tabular
1688
1678
  end
1689
1679
  end
1690
1680
 
1681
+ class Datatype < Metadata
1682
+ PROPERTIES = {
1683
+ base: :atomic,
1684
+ format: :atomic,
1685
+ length: :atomic,
1686
+ minLength: :atomic,
1687
+ maxLength: :atomic,
1688
+ minimum: :atomic,
1689
+ maximum: :atomic,
1690
+ minInclusive: :atomic,
1691
+ maxInclusive: :atomic,
1692
+ minExclusive: :atomic,
1693
+ maxExclusive: :atomic,
1694
+ decimalChar: :atomic,
1695
+ groupChar: :atomic,
1696
+ pattern: :atomic,
1697
+ }.freeze
1698
+ REQUIRED = [].freeze
1699
+
1700
+ # Override `base` in Metadata
1701
+ def base; object[:base]; end
1702
+
1703
+ # Setters
1704
+ PROPERTIES.each do |a, type|
1705
+ define_method("#{a}=".to_sym) do |value|
1706
+ object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1707
+ end
1708
+ end
1709
+
1710
+ # Logic for accessing elements as accessors
1711
+ def method_missing(method, *args)
1712
+ PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1713
+ end
1714
+ end
1715
+
1691
1716
  # Wraps each resulting row
1692
1717
  class Row
1693
1718
  # Class for returning values
@@ -1759,7 +1784,7 @@ module RDF::Tabular
1759
1784
  @number = number
1760
1785
  @sourceNumber = source_number
1761
1786
  @values = []
1762
- skipColumns = metadata.dialect.skipColumns.to_i + metadata.dialect.headerColumnCount.to_i
1787
+ skipColumns = metadata.dialect.skipColumns.to_i
1763
1788
 
1764
1789
  @context = table.context.dup
1765
1790
  @context.base = table.url
@@ -1786,22 +1811,22 @@ module RDF::Tabular
1786
1811
 
1787
1812
  @values << cell = Cell.new(metadata, column, self, value)
1788
1813
 
1789
- datatype = metadata.normalize_datatype(column.datatype || 'string')
1790
- value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype[:base])
1791
- value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype[:base])
1814
+ datatype = column.datatype || Datatype.new(base: "string")
1815
+ value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
1816
+ value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
1792
1817
  # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
1793
1818
  value = column.default || '' if value.empty?
1794
1819
 
1795
1820
  cell_values = column.separator ? value.split(column.separator) : [value]
1796
1821
 
1797
1822
  cell_values = cell_values.map do |v|
1798
- v = v.strip unless %w(string anyAtomicType any).include?(datatype[:base])
1823
+ v = v.strip unless %w(string anyAtomicType any).include?(datatype.base)
1799
1824
  v = column.default || '' if v.empty?
1800
1825
  if Array(column.null).include?(v)
1801
1826
  nil
1802
1827
  else
1803
1828
  # Trim value
1804
- if %w(string anyAtomicType any).include?(datatype[:base])
1829
+ if %w(string anyAtomicType any).include?(datatype.base)
1805
1830
  v.lstrip! if %w(true start).include?(metadata.dialect.trim.to_s)
1806
1831
  v.rstrip! if %w(true end).include?(metadata.dialect.trim.to_s)
1807
1832
  else
@@ -1809,7 +1834,7 @@ module RDF::Tabular
1809
1834
  v.strip!
1810
1835
  end
1811
1836
 
1812
- expanded_dt = metadata.context.expand_iri(datatype[:base], vocab: true)
1837
+ expanded_dt = metadata.context.expand_iri(datatype.base, vocab: true)
1813
1838
  if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
1814
1839
  lit_or_errors
1815
1840
  else
@@ -1861,36 +1886,36 @@ module RDF::Tabular
1861
1886
  value_errors = []
1862
1887
 
1863
1888
  # Check constraints
1864
- if datatype[:length] && value.length != datatype[:length]
1865
- value_errors << "#{value} does not have length #{datatype[:length]}"
1889
+ if datatype.length && value.length != datatype.length
1890
+ value_errors << "#{value} does not have length #{datatype.length}"
1866
1891
  end
1867
- if datatype[:minLength] && value.length < datatype[:minLength]
1868
- value_errors << "#{value} does not have length >= #{datatype[:minLength]}"
1892
+ if datatype.minLength && value.length < datatype.minLength
1893
+ value_errors << "#{value} does not have length >= #{datatype.minLength}"
1869
1894
  end
1870
- if datatype[:maxLength] && value.length > datatype[:maxLength]
1871
- value_errors << "#{value} does not have length <= #{datatype[:maxLength]}"
1895
+ if datatype.maxLength && value.length > datatype.maxLength
1896
+ value_errors << "#{value} does not have length <= #{datatype.maxLength}"
1872
1897
  end
1873
1898
 
1874
- format = datatype[:format]
1899
+ format = datatype.format
1875
1900
  # Datatype specific constraints and conversions
1876
- case datatype[:base].to_sym
1901
+ case datatype.base.to_sym
1877
1902
  when :decimal, :integer, :long, :int, :short, :byte,
1878
1903
  :nonNegativeInteger, :positiveInteger,
1879
1904
  :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
1880
1905
  :nonPositiveInteger, :negativeInteger,
1881
1906
  :double, :float, :number
1882
1907
  # Normalize representation based on numeric-specific facets
1883
- groupChar = datatype.fetch(:groupChar, ',')
1884
- if datatype[:pattern] && !value.match(Regexp.new(datatype[:pattern]))
1908
+ groupChar = datatype.groupChar || ','
1909
+ if datatype.pattern && !value.match(Regexp.new(datatype.pattern))
1885
1910
  # pattern facet failed
1886
- value_errors << "#{value} does not match pattern #{datatype[:pattern]}"
1911
+ value_errors << "#{value} does not match pattern #{datatype.pattern}"
1887
1912
  end
1888
1913
  if value.include?(groupChar*2)
1889
1914
  # pattern facet failed
1890
1915
  value_errors << "#{value} has repeating #{groupChar.inspect}"
1891
1916
  end
1892
1917
  value.gsub!(groupChar, '')
1893
- value.sub!(datatype.fetch(:decimalChar, '.'), '.')
1918
+ value.sub!(datatype.decimalChar, '.') if datatype.decimalChar
1894
1919
 
1895
1920
  # Extract percent or per-mille sign
1896
1921
  percent = permille = false
@@ -1941,7 +1966,7 @@ module RDF::Tabular
1941
1966
 
1942
1967
  if format
1943
1968
  date_format, time_format = format.split(' ')
1944
- if datatype[:base].to_sym == :time
1969
+ if datatype.base.to_sym == :time
1945
1970
  date_format, time_format = nil, date_format
1946
1971
  end
1947
1972
 
@@ -2008,7 +2033,7 @@ module RDF::Tabular
2008
2033
  lit = RDF::Literal(value, datatype: expanded_dt)
2009
2034
  when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
2010
2035
  :ENTITY, :ID, :IDREF, :NOTATION
2011
- value_errors << "#{value} uses unsupported datatype: #{datatype[:base]}"
2036
+ value_errors << "#{value} uses unsupported datatype: #{datatype.base}"
2012
2037
  else
2013
2038
  # For other types, format is a regexp
2014
2039
  unless format.nil? || value.match(Regexp.new(format))
@@ -2025,7 +2050,7 @@ module RDF::Tabular
2025
2050
  end
2026
2051
 
2027
2052
  # Final value is a valid literal, or a plain literal otherwise
2028
- value_errors << "#{value} is not a valid #{datatype[:base]}" if lit && !lit.valid?
2053
+ value_errors << "#{value} is not a valid #{datatype.base}" if lit && !lit.valid?
2029
2054
 
2030
2055
  # FIXME Value constraints
2031
2056