rdf-tabular 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +209 -0
- data/VERSION +1 -1
- data/etc/csvw.jsonld +20 -53
- data/etc/doap.csv-metadata.json +23 -23
- data/lib/rdf/tabular/csvw.rb +121 -80
- data/lib/rdf/tabular/metadata.rb +173 -148
- data/lib/rdf/tabular/reader.rb +149 -99
- data/spec/metadata_spec.rb +110 -113
- data/spec/reader_spec.rb +102 -0
- data/spec/suite_helper.rb +2 -1
- metadata +90 -89
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -180,9 +180,10 @@ module RDF::Tabular
|
|
180
180
|
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
181
181
|
metadata = case
|
182
182
|
when user_metadata && found_metadata then user_metadata.merge(found_metadata)
|
183
|
-
when user_metadata
|
184
|
-
when found_metadata
|
185
|
-
|
183
|
+
when user_metadata then user_metadata
|
184
|
+
when found_metadata then found_metadata
|
185
|
+
when base then TableGroup.new({tables: [{url: base}]}, options)
|
186
|
+
else TableGroup.new({tables: []}, options)
|
186
187
|
end
|
187
188
|
|
188
189
|
# Make TableGroup, if not already
|
@@ -223,13 +224,13 @@ module RDF::Tabular
|
|
223
224
|
# Figure out type by site
|
224
225
|
object_keys = object.keys.map(&:to_s)
|
225
226
|
type ||= case
|
226
|
-
when %w(
|
227
|
+
when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
|
227
228
|
when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
|
228
229
|
when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
|
229
230
|
when %w(columns primaryKey foreignKeys urlTemplate).any? {|k| object_keys.include?(k)} then :Schema
|
230
231
|
when %w(name required).any? {|k| object_keys.include?(k)} then :Column
|
231
|
-
when %w(commentPrefix delimiter doubleQuote encoding header
|
232
|
-
when %w(
|
232
|
+
when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
|
233
|
+
when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
|
233
234
|
end
|
234
235
|
|
235
236
|
case type.to_s.to_sym
|
@@ -312,13 +313,7 @@ module RDF::Tabular
|
|
312
313
|
value
|
313
314
|
end
|
314
315
|
when :datatype
|
315
|
-
|
316
|
-
object[key] = case value
|
317
|
-
when Hash
|
318
|
-
value.inject({}) {|memo, (k,v)| memo[k.to_sym] = v; memo}
|
319
|
-
else
|
320
|
-
value
|
321
|
-
end
|
316
|
+
self.datatype = value
|
322
317
|
when :dialect
|
323
318
|
# If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
|
324
319
|
object[key] = case value
|
@@ -329,7 +324,7 @@ module RDF::Tabular
|
|
329
324
|
value
|
330
325
|
end
|
331
326
|
@type ||= :Table
|
332
|
-
when :
|
327
|
+
when :tables
|
333
328
|
# An array of table descriptions for the tables in the group.
|
334
329
|
object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
335
330
|
value.map {|v| Table.new(v, @options.merge(parent: self, context: nil))}
|
@@ -437,6 +432,15 @@ module RDF::Tabular
|
|
437
432
|
end
|
438
433
|
end
|
439
434
|
|
435
|
+
# Set new datatype
|
436
|
+
# @return [Dialect]
|
437
|
+
def datatype=(value)
|
438
|
+
object[:datatype] = case value
|
439
|
+
when Hash then Datatype.new(value)
|
440
|
+
else Datatype.new({base: value})
|
441
|
+
end
|
442
|
+
end
|
443
|
+
|
440
444
|
# Type of this Metadata
|
441
445
|
# @return [:TableGroup, :Table, :Transformation, :Schema, :Column]
|
442
446
|
def type; self.class.name.split('::').last.to_sym; end
|
@@ -463,13 +467,25 @@ module RDF::Tabular
|
|
463
467
|
e.message.split("\n")
|
464
468
|
end
|
465
469
|
|
470
|
+
##
|
471
|
+
# Validation warnings, available only after validating or finding warnings
|
472
|
+
# @return [Array<String>]
|
473
|
+
def warnings
|
474
|
+
((@warnings || []) + object.
|
475
|
+
values.
|
476
|
+
flatten.
|
477
|
+
select {|v| v.is_a?(Metadata)}.
|
478
|
+
map(&:warnings).
|
479
|
+
flatten).compact
|
480
|
+
end
|
481
|
+
|
466
482
|
##
|
467
483
|
# Validate metadata, raising an error containing all errors detected during validation
|
468
484
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
469
485
|
# @return [self]
|
470
486
|
def validate!
|
471
487
|
expected_props, required_props = @properties.keys, @required
|
472
|
-
errors = []
|
488
|
+
errors, @warnings = [], []
|
473
489
|
|
474
490
|
unless is_a?(Dialect) || is_a?(Transformation)
|
475
491
|
expected_props = expected_props + INHERITED_PROPERTIES.keys
|
@@ -478,7 +494,7 @@ module RDF::Tabular
|
|
478
494
|
# It has only expected properties (exclude metadata)
|
479
495
|
check_keys = object.keys - [:"@id", :"@context"]
|
480
496
|
check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
|
481
|
-
|
497
|
+
@warnings << "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
|
482
498
|
|
483
499
|
# It has required properties
|
484
500
|
errors << "#{type} missing required keys: #{(required_props & check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
|
@@ -487,8 +503,10 @@ module RDF::Tabular
|
|
487
503
|
object.keys.each do |key|
|
488
504
|
value = object[key]
|
489
505
|
case key
|
490
|
-
when :aboutUrl, :
|
491
|
-
valid_inherited_property?(key, value)
|
506
|
+
when :aboutUrl, :default, :lang, :null, :ordered, :propertyUrl, :separator, :textDirection, :valueUrl
|
507
|
+
valid_inherited_property?(key, value) do |m|
|
508
|
+
@warnings << m
|
509
|
+
end
|
492
510
|
when :columns
|
493
511
|
if value.is_a?(Array) && value.all? {|v| v.is_a?(Column)}
|
494
512
|
value.each do |v|
|
@@ -505,11 +523,24 @@ module RDF::Tabular
|
|
505
523
|
end
|
506
524
|
when :commentPrefix, :delimiter, :quoteChar
|
507
525
|
unless value.is_a?(String) && value.length == 1
|
508
|
-
|
526
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a single character string"
|
527
|
+
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
509
528
|
end
|
510
|
-
when :
|
529
|
+
when :lineTerminators
|
511
530
|
unless value.is_a?(String)
|
512
|
-
|
531
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
|
532
|
+
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
533
|
+
end
|
534
|
+
when :datatype
|
535
|
+
if value.is_a?(Datatype)
|
536
|
+
begin
|
537
|
+
value.validate!
|
538
|
+
rescue Error => e
|
539
|
+
errors << e.message
|
540
|
+
end
|
541
|
+
else
|
542
|
+
@warnings << "#{type} has invalid property '#{key}': expected a Datatype"
|
543
|
+
value = object[key] = nil
|
513
544
|
end
|
514
545
|
when :dialect
|
515
546
|
unless value.is_a?(Dialect)
|
@@ -520,13 +551,19 @@ module RDF::Tabular
|
|
520
551
|
rescue Error => e
|
521
552
|
errors << e.message
|
522
553
|
end
|
523
|
-
when :doubleQuote, :header, :
|
554
|
+
when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
|
524
555
|
unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
525
|
-
|
556
|
+
@warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
|
557
|
+
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
558
|
+
end
|
559
|
+
when :required, :suppressOutput, :virtual
|
560
|
+
unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
561
|
+
@warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
|
562
|
+
object.delete(key)
|
526
563
|
end
|
527
564
|
when :encoding
|
528
565
|
unless (Encoding.find(value) rescue false)
|
529
|
-
|
566
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid encoding"
|
530
567
|
end
|
531
568
|
when :foreignKeys
|
532
569
|
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
@@ -549,13 +586,13 @@ module RDF::Tabular
|
|
549
586
|
end
|
550
587
|
# resource is the URL of a Table in the TableGroup
|
551
588
|
ref = base.join(reference['resource']).to_s
|
552
|
-
table = root.is_a?(TableGroup) && root.
|
589
|
+
table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
|
553
590
|
errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
554
591
|
table.tableSchema if table
|
555
592
|
elsif reference.has_key?('schemaReference')
|
556
593
|
# resource is the @id of a Schema in the TableGroup
|
557
594
|
ref = base.join(reference['schemaReference']).to_s
|
558
|
-
tables = root.is_a?(TableGroup) ? root.
|
595
|
+
tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
|
559
596
|
case tables.length
|
560
597
|
when 0
|
561
598
|
errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
|
@@ -581,13 +618,22 @@ module RDF::Tabular
|
|
581
618
|
errors << "#{type} has invalid property '#{key}': reference must be an object: #{reference.inspect}"
|
582
619
|
end
|
583
620
|
end
|
584
|
-
when :
|
621
|
+
when :headerRowCount, :skipColumns, :skipRows
|
585
622
|
unless value.is_a?(Numeric) && value.integer? && value > 0
|
586
|
-
|
623
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect} must be a positive integer"
|
624
|
+
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
625
|
+
end
|
626
|
+
when :base
|
627
|
+
@warnings << "#{type} has invalid base '#{key}': #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) || RDF::URI(value).absolute?
|
628
|
+
when :format
|
629
|
+
unless value.is_a?(String)
|
630
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
|
631
|
+
object.delete(key)
|
587
632
|
end
|
588
633
|
when :length, :minLength, :maxLength
|
589
634
|
unless value.is_a?(Numeric) && value.integer? && value > 0
|
590
|
-
|
635
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a positive integer"
|
636
|
+
object.delete(key)
|
591
637
|
end
|
592
638
|
unless key == :length || value != object[:length]
|
593
639
|
# Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
|
@@ -598,7 +644,8 @@ module RDF::Tabular
|
|
598
644
|
RDF::Literal::Date.new(value.to_s).valid? ||
|
599
645
|
RDF::Literal::Time.new(value.to_s).valid? ||
|
600
646
|
RDF::Literal::DateTime.new(value.to_s).valid?
|
601
|
-
|
647
|
+
@warnings << "#{type} has invalid property '#{key}': #{value}, expected numeric or valid date/time"
|
648
|
+
object.delete(key)
|
602
649
|
end
|
603
650
|
when :name
|
604
651
|
unless value.is_a?(String) && name.match(NAME_SYNTAX)
|
@@ -618,7 +665,7 @@ module RDF::Tabular
|
|
618
665
|
Array(value).each do |k|
|
619
666
|
errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c.name == k}
|
620
667
|
end
|
621
|
-
when :
|
668
|
+
when :tables
|
622
669
|
if value.is_a?(Array) && value.all? {|v| v.is_a?(Table)}
|
623
670
|
value.each do |t|
|
624
671
|
begin
|
@@ -664,13 +711,14 @@ module RDF::Tabular
|
|
664
711
|
else
|
665
712
|
errors << "#{type} has invalid property '#{key}': expected array of Transformations"
|
666
713
|
end
|
667
|
-
when :
|
668
|
-
valid_natural_language_property?(:
|
714
|
+
when :titles
|
715
|
+
valid_natural_language_property?(:titles, value) {|m| errors << m}
|
669
716
|
when :trim
|
670
717
|
unless %w(true false 1 0 start end).include?(value.to_s.downcase)
|
671
718
|
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected true, false, 1, 0, start or end"
|
672
719
|
end
|
673
720
|
when :url
|
721
|
+
# Only validate URL in validation mode; this allows for a nil URL
|
674
722
|
unless @url.valid?
|
675
723
|
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
|
676
724
|
end
|
@@ -687,7 +735,7 @@ module RDF::Tabular
|
|
687
735
|
errors << "#{type} has invalid content '#{key}': #{e.message}"
|
688
736
|
end
|
689
737
|
else
|
690
|
-
|
738
|
+
warnings << "#{type} has invalid property '#{key}': unsupported property"
|
691
739
|
end
|
692
740
|
end
|
693
741
|
|
@@ -713,53 +761,24 @@ module RDF::Tabular
|
|
713
761
|
# @yield message error message
|
714
762
|
# @return [Boolean]
|
715
763
|
def valid_inherited_property?(key, value)
|
716
|
-
pv = parent.send(key) if parent
|
717
764
|
error = case key
|
718
765
|
when :aboutUrl, :default, :propertyUrl, :valueUrl
|
719
766
|
"string" unless value.is_a?(String)
|
720
|
-
when :datatype
|
721
|
-
# Normalization usually redundant
|
722
|
-
dt = normalize_datatype(value)
|
723
|
-
# FIXME: support arrays of datatypes?
|
724
|
-
"valid datatype" unless DATATYPES.keys.map(&:to_s).include?(dt[:base]) || RDF::URI(dt[:base]).absolute?
|
725
767
|
when :lang
|
726
768
|
"valid BCP47 language tag" unless BCP47::Language.identify(value.to_s)
|
727
769
|
when :null
|
728
|
-
# To be valid, it must be a string or array
|
770
|
+
# To be valid, it must be a string or array
|
729
771
|
"string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
|
730
772
|
when :ordered
|
731
773
|
"boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
732
774
|
when :separator
|
733
775
|
"single character" unless value.nil? || value.is_a?(String) && value.length == 1
|
734
776
|
when :textDirection
|
735
|
-
# A value for this property is compatible with an inherited value only if they are identical.
|
736
777
|
"rtl or ltr" unless %(rtl ltr).include?(value)
|
737
|
-
end ||
|
738
|
-
|
739
|
-
case key
|
740
|
-
# Compatibility
|
741
|
-
when :aboutUrl, :propertyUrl, :valueUrl
|
742
|
-
# No restrictions
|
743
|
-
when :default, :ordered, :separator, :textDirection
|
744
|
-
"same as that defined on parent" if pv && pv != value
|
745
|
-
when :datatype
|
746
|
-
if pv
|
747
|
-
# Normalization usually redundant
|
748
|
-
dt = normalize_datatype(value)
|
749
|
-
pvdt = normalize_datatype(pv)
|
750
|
-
vl = RDF::Literal.new("", datatype: DATATYPES[dt[:base].to_sym])
|
751
|
-
pvvl = RDF::Literal.new("", datatype: DATATYPES[pvdt[:base].to_sym])
|
752
|
-
# must be a subclass of some type defined on parent
|
753
|
-
"compatible datatype of that defined on parent" unless vl.is_a?(pvvl.class)
|
754
|
-
end
|
755
|
-
when :lang
|
756
|
-
"lang expected to restrict #{pv}" if pv && !value.start_with?(pv)
|
757
|
-
when :null
|
758
|
-
"subset of that defined on parent" if pv && (Array(value) & Array(pv)) != Array(value)
|
759
778
|
end
|
760
779
|
|
761
780
|
if error
|
762
|
-
yield "#{type} has invalid property '#{key}' (
|
781
|
+
yield "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{error}"
|
763
782
|
false
|
764
783
|
else
|
765
784
|
true
|
@@ -888,7 +907,7 @@ module RDF::Tabular
|
|
888
907
|
if self.is_a?(Table) && self.parent
|
889
908
|
self.parent
|
890
909
|
else
|
891
|
-
content = {"@type" => "TableGroup", "
|
910
|
+
content = {"@type" => "TableGroup", "tables" => [self]}
|
892
911
|
content['@context'] = object.delete(:@context) if object[:@context]
|
893
912
|
ctx = @context
|
894
913
|
self.remove_instance_variable(:@context) if self.instance_variables.include?(:@context)
|
@@ -907,7 +926,7 @@ module RDF::Tabular
|
|
907
926
|
if md.parent
|
908
927
|
md.parent
|
909
928
|
else
|
910
|
-
content = {"@type" => "TableGroup", "
|
929
|
+
content = {"@type" => "TableGroup", "tables" => [md]}
|
911
930
|
ctx = md.context
|
912
931
|
content['@context'] = md.object.delete(:@context) if md.object[:@context]
|
913
932
|
md.remove_instance_variable(:@context) if md.instance_variables.include?(:@context)
|
@@ -962,18 +981,18 @@ module RDF::Tabular
|
|
962
981
|
a = object[key].is_a?(Array) ? object[key] : [object[key]].compact
|
963
982
|
b = value.is_a?(Array) ? value : [value]
|
964
983
|
object[key] = a + b
|
965
|
-
when :
|
984
|
+
when :tables
|
966
985
|
# When an array of table descriptions B is imported into an original array of table descriptions A, each table description within B is combined into the original array A by:
|
967
986
|
value.each do |tb|
|
968
987
|
if ta = object[key].detect {|e| e.url == tb.url}
|
969
988
|
# if there is a table description with the same url in A, the table description from B is imported into the matching table description in A
|
970
|
-
debug("merge!:
|
989
|
+
debug("merge!: tables") {"TA: #{ta.inspect}, TB: #{tb.inspect}"}
|
971
990
|
ta.merge!(tb)
|
972
991
|
else
|
973
992
|
# otherwise, the table description from B is appended to the array of table descriptions A
|
974
993
|
tb = tb.dup
|
975
994
|
tb.instance_variable_set(:@parent, self)
|
976
|
-
debug("merge!:
|
995
|
+
debug("merge!: tables") {"add TB: #{tb.inspect}"}
|
977
996
|
object[key] << tb
|
978
997
|
end
|
979
998
|
end
|
@@ -995,11 +1014,11 @@ module RDF::Tabular
|
|
995
1014
|
# When an array of column descriptions B is imported into an original array of column descriptions A, each column description within B is combined into the original array A by:
|
996
1015
|
Array(value).each_with_index do |cb, index|
|
997
1016
|
ca = object[key][index] || {}
|
998
|
-
va = ([ca[:name]] + (ca[:
|
999
|
-
vb = ([cb[:name]] + (cb[:
|
1017
|
+
va = ([ca[:name]] + (ca[:titles] || {}).values.flatten).compact.map(&:downcase)
|
1018
|
+
vb = ([cb[:name]] + (cb[:titles] || {}).values.flatten).compact.map(&:downcase)
|
1000
1019
|
if !(va & vb).empty?
|
1001
1020
|
debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
|
1002
|
-
# If there's a non-empty case-insensitive intersection between the name and
|
1021
|
+
# If there's a non-empty case-insensitive intersection between the name and titles values for the column description at the same index within A and B, the column description from B is imported into the matching column description in A
|
1003
1022
|
ca.merge!(cb)
|
1004
1023
|
elsif ca.nil? && cb.virtual
|
1005
1024
|
debug("merge!: columns") {"index: #{index}, virtual"}
|
@@ -1127,11 +1146,6 @@ module RDF::Tabular
|
|
1127
1146
|
end
|
1128
1147
|
when :natural_language
|
1129
1148
|
value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
|
1130
|
-
when :atomic
|
1131
|
-
case key
|
1132
|
-
when :datatype then normalize_datatype(value)
|
1133
|
-
else value
|
1134
|
-
end
|
1135
1149
|
else
|
1136
1150
|
value
|
1137
1151
|
end
|
@@ -1139,27 +1153,6 @@ module RDF::Tabular
|
|
1139
1153
|
self
|
1140
1154
|
end
|
1141
1155
|
|
1142
|
-
##
|
1143
|
-
# Normalize datatype to Object/Hash representation
|
1144
|
-
# @param [String, Hash{Symbol => String}] value
|
1145
|
-
# @return [Hash{Symbol => String}]
|
1146
|
-
def normalize_datatype(value)
|
1147
|
-
# Normalize datatype to array of object form
|
1148
|
-
value = {base: value} unless value.is_a?(Hash)
|
1149
|
-
# Create a new representation using symbols and transformed values
|
1150
|
-
nv = {}
|
1151
|
-
value.each do |kk, vv|
|
1152
|
-
case kk.to_sym
|
1153
|
-
when :base, :decimalChar, :format, :groupChar, :pattern then nv[kk.to_sym] = vv
|
1154
|
-
when :length, :minLength, :maxLength, :minimum, :maximum,
|
1155
|
-
:minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
1156
|
-
nv[kk.to_sym] = vv.to_i
|
1157
|
-
end
|
1158
|
-
end
|
1159
|
-
nv[:base] ||= 'string'
|
1160
|
-
nv
|
1161
|
-
end
|
1162
|
-
|
1163
1156
|
##
|
1164
1157
|
# Normalize JSON-LD
|
1165
1158
|
#
|
@@ -1246,7 +1239,7 @@ module RDF::Tabular
|
|
1246
1239
|
def csv_options
|
1247
1240
|
{
|
1248
1241
|
col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
|
1249
|
-
row_sep: (is_a?(Dialect) ? self : dialect).
|
1242
|
+
row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
|
1250
1243
|
quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
|
1251
1244
|
encoding: (is_a?(Dialect) ? self : dialect).encoding
|
1252
1245
|
}
|
@@ -1269,7 +1262,7 @@ module RDF::Tabular
|
|
1269
1262
|
:@id => :link,
|
1270
1263
|
:@type => :atomic,
|
1271
1264
|
notes: :array,
|
1272
|
-
|
1265
|
+
tables: :array,
|
1273
1266
|
tableSchema: :object,
|
1274
1267
|
tableDirection: :atomic,
|
1275
1268
|
dialect: :object,
|
@@ -1293,7 +1286,7 @@ module RDF::Tabular
|
|
1293
1286
|
# Does the Metadata or any descendant have any common properties
|
1294
1287
|
# @return [Boolean]
|
1295
1288
|
def has_annotations?
|
1296
|
-
super ||
|
1289
|
+
super || tables.any? {|t| t.has_annotations? }
|
1297
1290
|
end
|
1298
1291
|
|
1299
1292
|
# Logic for accessing elements as accessors
|
@@ -1306,10 +1299,10 @@ module RDF::Tabular
|
|
1306
1299
|
end
|
1307
1300
|
|
1308
1301
|
##
|
1309
|
-
# Iterate over all
|
1302
|
+
# Iterate over all tables
|
1310
1303
|
# @yield [Table]
|
1311
|
-
def
|
1312
|
-
|
1304
|
+
def each_table
|
1305
|
+
tables.map(&:url).each do |url|
|
1313
1306
|
yield for_table(url)
|
1314
1307
|
end
|
1315
1308
|
end
|
@@ -1320,9 +1313,9 @@ module RDF::Tabular
|
|
1320
1313
|
# @param [String] url of the table
|
1321
1314
|
# @return [Table]
|
1322
1315
|
def for_table(url)
|
1323
|
-
# If there are no
|
1324
|
-
#self.
|
1325
|
-
if table = Array(
|
1316
|
+
# If there are no tables, assume there's one for this table
|
1317
|
+
#self.tables ||= [Table.new(url: url)]
|
1318
|
+
if table = Array(tables).detect {|t| t.url == url}
|
1326
1319
|
# Set document base for this table for resolving URLs
|
1327
1320
|
table.instance_variable_set(:@context, context.dup)
|
1328
1321
|
table.context.base = url
|
@@ -1335,7 +1328,7 @@ module RDF::Tabular
|
|
1335
1328
|
{
|
1336
1329
|
"@id" => id,
|
1337
1330
|
"@type" => "AnnotatedTableGroup",
|
1338
|
-
"
|
1331
|
+
"tables" => tables.map(&:to_atd)
|
1339
1332
|
}
|
1340
1333
|
end
|
1341
1334
|
end
|
@@ -1401,7 +1394,7 @@ module RDF::Tabular
|
|
1401
1394
|
source: :atomic,
|
1402
1395
|
targetFormat: :link,
|
1403
1396
|
scriptFormat: :link,
|
1404
|
-
|
1397
|
+
titles: :natural_language,
|
1405
1398
|
url: :link,
|
1406
1399
|
}.freeze
|
1407
1400
|
REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
|
@@ -1462,7 +1455,7 @@ module RDF::Tabular
|
|
1462
1455
|
:@type => :atomic,
|
1463
1456
|
name: :atomic,
|
1464
1457
|
suppressOutput: :atomic,
|
1465
|
-
|
1458
|
+
titles: :natural_language,
|
1466
1459
|
required: :atomic,
|
1467
1460
|
virtual: :atomic,
|
1468
1461
|
}.freeze
|
@@ -1484,7 +1477,7 @@ module RDF::Tabular
|
|
1484
1477
|
# @note this is lazy evaluated to avoid dependencies on setting dialect vs. initializing columns
|
1485
1478
|
# @return [Integer] 1-based colnum number
|
1486
1479
|
def sourceNumber
|
1487
|
-
skipColumns = table ?
|
1480
|
+
skipColumns = table ? dialect.skipColumns.to_i : 0
|
1488
1481
|
number + skipColumns
|
1489
1482
|
end
|
1490
1483
|
|
@@ -1506,9 +1499,9 @@ module RDF::Tabular
|
|
1506
1499
|
end
|
1507
1500
|
end
|
1508
1501
|
|
1509
|
-
# Return or create a name for the column from
|
1502
|
+
# Return or create a name for the column from titles, if it exists
|
1510
1503
|
def name
|
1511
|
-
object[:name] ||= if
|
1504
|
+
object[:name] ||= if titles && (ts = titles[context.default_language || 'und'])
|
1512
1505
|
n = Array(ts).first
|
1513
1506
|
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
|
1514
1507
|
n1 = URI.encode(n[1..-1], /[^\w\.]/)
|
@@ -1534,7 +1527,7 @@ module RDF::Tabular
|
|
1534
1527
|
"cells" => [],
|
1535
1528
|
"virtual" => self.virtual,
|
1536
1529
|
"name" => self.name,
|
1537
|
-
"
|
1530
|
+
"titles" => self.titles
|
1538
1531
|
}
|
1539
1532
|
end
|
1540
1533
|
|
@@ -1556,9 +1549,8 @@ module RDF::Tabular
|
|
1556
1549
|
doubleQuote: true,
|
1557
1550
|
encoding: "utf-8".freeze,
|
1558
1551
|
header: true,
|
1559
|
-
headerColumnCount: 0,
|
1560
1552
|
headerRowCount: 1,
|
1561
|
-
|
1553
|
+
lineTerminators: :auto,
|
1562
1554
|
quoteChar: '"',
|
1563
1555
|
skipBlankRows: false,
|
1564
1556
|
skipColumns: 0,
|
@@ -1575,9 +1567,8 @@ module RDF::Tabular
|
|
1575
1567
|
doubleQuote: :atomic,
|
1576
1568
|
encoding: :atomic,
|
1577
1569
|
header: :atomic,
|
1578
|
-
headerColumnCount: :atomic,
|
1579
1570
|
headerRowCount: :atomic,
|
1580
|
-
|
1571
|
+
lineTerminators: :atomic,
|
1581
1572
|
quoteChar: :atomic,
|
1582
1573
|
skipBlankRows: :atomic,
|
1583
1574
|
skipColumns: :atomic,
|
@@ -1625,8 +1616,8 @@ module RDF::Tabular
|
|
1625
1616
|
options = options.dup
|
1626
1617
|
options.delete(:context) # Don't accidentally use a passed context
|
1627
1618
|
# Normalize input to an IO object
|
1628
|
-
if
|
1629
|
-
return ::RDF::Util::File.open_file(input
|
1619
|
+
if input.is_a?(String)
|
1620
|
+
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, options.merge(base: input.to_s))}
|
1630
1621
|
end
|
1631
1622
|
|
1632
1623
|
table = {
|
@@ -1655,20 +1646,19 @@ module RDF::Tabular
|
|
1655
1646
|
row_data = Array(csv.shift)
|
1656
1647
|
Array(row_data).each_with_index do |value, index|
|
1657
1648
|
# Skip columns
|
1658
|
-
skipCols = skipColumns.to_i
|
1649
|
+
skipCols = skipColumns.to_i
|
1659
1650
|
next if index < skipCols
|
1660
1651
|
|
1661
1652
|
# Trim value
|
1662
1653
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
1663
1654
|
value.rstrip! if %w(true end).include?(trim.to_s)
|
1664
1655
|
|
1665
|
-
# Initialize
|
1666
|
-
# SPEC CONFUSION: does title get an array, or concatenated values?
|
1656
|
+
# Initialize titles
|
1667
1657
|
columns = table["tableSchema"]["columns"] ||= []
|
1668
1658
|
column = columns[index - skipCols] ||= {
|
1669
|
-
"
|
1659
|
+
"titles" => {"und" => []},
|
1670
1660
|
}
|
1671
|
-
column["
|
1661
|
+
column["titles"]["und"] << value
|
1672
1662
|
end
|
1673
1663
|
end
|
1674
1664
|
debug("embedded_metadata") {"table: #{table.inspect}"}
|
@@ -1688,6 +1678,41 @@ module RDF::Tabular
|
|
1688
1678
|
end
|
1689
1679
|
end
|
1690
1680
|
|
1681
|
+
class Datatype < Metadata
|
1682
|
+
PROPERTIES = {
|
1683
|
+
base: :atomic,
|
1684
|
+
format: :atomic,
|
1685
|
+
length: :atomic,
|
1686
|
+
minLength: :atomic,
|
1687
|
+
maxLength: :atomic,
|
1688
|
+
minimum: :atomic,
|
1689
|
+
maximum: :atomic,
|
1690
|
+
minInclusive: :atomic,
|
1691
|
+
maxInclusive: :atomic,
|
1692
|
+
minExclusive: :atomic,
|
1693
|
+
maxExclusive: :atomic,
|
1694
|
+
decimalChar: :atomic,
|
1695
|
+
groupChar: :atomic,
|
1696
|
+
pattern: :atomic,
|
1697
|
+
}.freeze
|
1698
|
+
REQUIRED = [].freeze
|
1699
|
+
|
1700
|
+
# Override `base` in Metadata
|
1701
|
+
def base; object[:base]; end
|
1702
|
+
|
1703
|
+
# Setters
|
1704
|
+
PROPERTIES.each do |a, type|
|
1705
|
+
define_method("#{a}=".to_sym) do |value|
|
1706
|
+
object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
|
1707
|
+
end
|
1708
|
+
end
|
1709
|
+
|
1710
|
+
# Logic for accessing elements as accessors
|
1711
|
+
def method_missing(method, *args)
|
1712
|
+
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1713
|
+
end
|
1714
|
+
end
|
1715
|
+
|
1691
1716
|
# Wraps each resulting row
|
1692
1717
|
class Row
|
1693
1718
|
# Class for returning values
|
@@ -1759,7 +1784,7 @@ module RDF::Tabular
|
|
1759
1784
|
@number = number
|
1760
1785
|
@sourceNumber = source_number
|
1761
1786
|
@values = []
|
1762
|
-
skipColumns = metadata.dialect.skipColumns.to_i
|
1787
|
+
skipColumns = metadata.dialect.skipColumns.to_i
|
1763
1788
|
|
1764
1789
|
@context = table.context.dup
|
1765
1790
|
@context.base = table.url
|
@@ -1786,22 +1811,22 @@ module RDF::Tabular
|
|
1786
1811
|
|
1787
1812
|
@values << cell = Cell.new(metadata, column, self, value)
|
1788
1813
|
|
1789
|
-
datatype =
|
1790
|
-
value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype
|
1791
|
-
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype
|
1814
|
+
datatype = column.datatype || Datatype.new(base: "string")
|
1815
|
+
value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
|
1816
|
+
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
|
1792
1817
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
1793
1818
|
value = column.default || '' if value.empty?
|
1794
1819
|
|
1795
1820
|
cell_values = column.separator ? value.split(column.separator) : [value]
|
1796
1821
|
|
1797
1822
|
cell_values = cell_values.map do |v|
|
1798
|
-
v = v.strip unless %w(string anyAtomicType any).include?(datatype
|
1823
|
+
v = v.strip unless %w(string anyAtomicType any).include?(datatype.base)
|
1799
1824
|
v = column.default || '' if v.empty?
|
1800
1825
|
if Array(column.null).include?(v)
|
1801
1826
|
nil
|
1802
1827
|
else
|
1803
1828
|
# Trim value
|
1804
|
-
if %w(string anyAtomicType any).include?(datatype
|
1829
|
+
if %w(string anyAtomicType any).include?(datatype.base)
|
1805
1830
|
v.lstrip! if %w(true start).include?(metadata.dialect.trim.to_s)
|
1806
1831
|
v.rstrip! if %w(true end).include?(metadata.dialect.trim.to_s)
|
1807
1832
|
else
|
@@ -1809,7 +1834,7 @@ module RDF::Tabular
|
|
1809
1834
|
v.strip!
|
1810
1835
|
end
|
1811
1836
|
|
1812
|
-
expanded_dt = metadata.context.expand_iri(datatype
|
1837
|
+
expanded_dt = metadata.context.expand_iri(datatype.base, vocab: true)
|
1813
1838
|
if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
|
1814
1839
|
lit_or_errors
|
1815
1840
|
else
|
@@ -1861,36 +1886,36 @@ module RDF::Tabular
|
|
1861
1886
|
value_errors = []
|
1862
1887
|
|
1863
1888
|
# Check constraints
|
1864
|
-
if datatype
|
1865
|
-
value_errors << "#{value} does not have length #{datatype
|
1889
|
+
if datatype.length && value.length != datatype.length
|
1890
|
+
value_errors << "#{value} does not have length #{datatype.length}"
|
1866
1891
|
end
|
1867
|
-
if datatype
|
1868
|
-
value_errors << "#{value} does not have length >= #{datatype
|
1892
|
+
if datatype.minLength && value.length < datatype.minLength
|
1893
|
+
value_errors << "#{value} does not have length >= #{datatype.minLength}"
|
1869
1894
|
end
|
1870
|
-
if datatype
|
1871
|
-
value_errors << "#{value} does not have length <= #{datatype
|
1895
|
+
if datatype.maxLength && value.length > datatype.maxLength
|
1896
|
+
value_errors << "#{value} does not have length <= #{datatype.maxLength}"
|
1872
1897
|
end
|
1873
1898
|
|
1874
|
-
format = datatype
|
1899
|
+
format = datatype.format
|
1875
1900
|
# Datatype specific constraints and conversions
|
1876
|
-
case datatype
|
1901
|
+
case datatype.base.to_sym
|
1877
1902
|
when :decimal, :integer, :long, :int, :short, :byte,
|
1878
1903
|
:nonNegativeInteger, :positiveInteger,
|
1879
1904
|
:unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
|
1880
1905
|
:nonPositiveInteger, :negativeInteger,
|
1881
1906
|
:double, :float, :number
|
1882
1907
|
# Normalize representation based on numeric-specific facets
|
1883
|
-
groupChar = datatype.
|
1884
|
-
if datatype
|
1908
|
+
groupChar = datatype.groupChar || ','
|
1909
|
+
if datatype.pattern && !value.match(Regexp.new(datatype.pattern))
|
1885
1910
|
# pattern facet failed
|
1886
|
-
value_errors << "#{value} does not match pattern #{datatype
|
1911
|
+
value_errors << "#{value} does not match pattern #{datatype.pattern}"
|
1887
1912
|
end
|
1888
1913
|
if value.include?(groupChar*2)
|
1889
1914
|
# pattern facet failed
|
1890
1915
|
value_errors << "#{value} has repeating #{groupChar.inspect}"
|
1891
1916
|
end
|
1892
1917
|
value.gsub!(groupChar, '')
|
1893
|
-
value.sub!(datatype.
|
1918
|
+
value.sub!(datatype.decimalChar, '.') if datatype.decimalChar
|
1894
1919
|
|
1895
1920
|
# Extract percent or per-mille sign
|
1896
1921
|
percent = permille = false
|
@@ -1941,7 +1966,7 @@ module RDF::Tabular
|
|
1941
1966
|
|
1942
1967
|
if format
|
1943
1968
|
date_format, time_format = format.split(' ')
|
1944
|
-
if datatype
|
1969
|
+
if datatype.base.to_sym == :time
|
1945
1970
|
date_format, time_format = nil, date_format
|
1946
1971
|
end
|
1947
1972
|
|
@@ -2008,7 +2033,7 @@ module RDF::Tabular
|
|
2008
2033
|
lit = RDF::Literal(value, datatype: expanded_dt)
|
2009
2034
|
when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
|
2010
2035
|
:ENTITY, :ID, :IDREF, :NOTATION
|
2011
|
-
value_errors << "#{value} uses unsupported datatype: #{datatype
|
2036
|
+
value_errors << "#{value} uses unsupported datatype: #{datatype.base}"
|
2012
2037
|
else
|
2013
2038
|
# For other types, format is a regexp
|
2014
2039
|
unless format.nil? || value.match(Regexp.new(format))
|
@@ -2025,7 +2050,7 @@ module RDF::Tabular
|
|
2025
2050
|
end
|
2026
2051
|
|
2027
2052
|
# Final value is a valid literal, or a plain literal otherwise
|
2028
|
-
value_errors << "#{value} is not a valid #{datatype
|
2053
|
+
value_errors << "#{value} is not a valid #{datatype.base}" if lit && !lit.valid?
|
2029
2054
|
|
2030
2055
|
# FIXME Value constraints
|
2031
2056
|
|