rdf-tabular 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +209 -0
- data/VERSION +1 -1
- data/etc/csvw.jsonld +20 -53
- data/etc/doap.csv-metadata.json +23 -23
- data/lib/rdf/tabular/csvw.rb +121 -80
- data/lib/rdf/tabular/metadata.rb +173 -148
- data/lib/rdf/tabular/reader.rb +149 -99
- data/spec/metadata_spec.rb +110 -113
- data/spec/reader_spec.rb +102 -0
- data/spec/suite_helper.rb +2 -1
- metadata +90 -89
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -180,9 +180,10 @@ module RDF::Tabular
|
|
180
180
|
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
181
181
|
metadata = case
|
182
182
|
when user_metadata && found_metadata then user_metadata.merge(found_metadata)
|
183
|
-
when user_metadata
|
184
|
-
when found_metadata
|
185
|
-
|
183
|
+
when user_metadata then user_metadata
|
184
|
+
when found_metadata then found_metadata
|
185
|
+
when base then TableGroup.new({tables: [{url: base}]}, options)
|
186
|
+
else TableGroup.new({tables: []}, options)
|
186
187
|
end
|
187
188
|
|
188
189
|
# Make TableGroup, if not already
|
@@ -223,13 +224,13 @@ module RDF::Tabular
|
|
223
224
|
# Figure out type by site
|
224
225
|
object_keys = object.keys.map(&:to_s)
|
225
226
|
type ||= case
|
226
|
-
when %w(
|
227
|
+
when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
|
227
228
|
when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
|
228
229
|
when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
|
229
230
|
when %w(columns primaryKey foreignKeys urlTemplate).any? {|k| object_keys.include?(k)} then :Schema
|
230
231
|
when %w(name required).any? {|k| object_keys.include?(k)} then :Column
|
231
|
-
when %w(commentPrefix delimiter doubleQuote encoding header
|
232
|
-
when %w(
|
232
|
+
when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
|
233
|
+
when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
|
233
234
|
end
|
234
235
|
|
235
236
|
case type.to_s.to_sym
|
@@ -312,13 +313,7 @@ module RDF::Tabular
|
|
312
313
|
value
|
313
314
|
end
|
314
315
|
when :datatype
|
315
|
-
|
316
|
-
object[key] = case value
|
317
|
-
when Hash
|
318
|
-
value.inject({}) {|memo, (k,v)| memo[k.to_sym] = v; memo}
|
319
|
-
else
|
320
|
-
value
|
321
|
-
end
|
316
|
+
self.datatype = value
|
322
317
|
when :dialect
|
323
318
|
# If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
|
324
319
|
object[key] = case value
|
@@ -329,7 +324,7 @@ module RDF::Tabular
|
|
329
324
|
value
|
330
325
|
end
|
331
326
|
@type ||= :Table
|
332
|
-
when :
|
327
|
+
when :tables
|
333
328
|
# An array of table descriptions for the tables in the group.
|
334
329
|
object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
335
330
|
value.map {|v| Table.new(v, @options.merge(parent: self, context: nil))}
|
@@ -437,6 +432,15 @@ module RDF::Tabular
|
|
437
432
|
end
|
438
433
|
end
|
439
434
|
|
435
|
+
# Set new datatype
|
436
|
+
# @return [Dialect]
|
437
|
+
def datatype=(value)
|
438
|
+
object[:datatype] = case value
|
439
|
+
when Hash then Datatype.new(value)
|
440
|
+
else Datatype.new({base: value})
|
441
|
+
end
|
442
|
+
end
|
443
|
+
|
440
444
|
# Type of this Metadata
|
441
445
|
# @return [:TableGroup, :Table, :Transformation, :Schema, :Column]
|
442
446
|
def type; self.class.name.split('::').last.to_sym; end
|
@@ -463,13 +467,25 @@ module RDF::Tabular
|
|
463
467
|
e.message.split("\n")
|
464
468
|
end
|
465
469
|
|
470
|
+
##
|
471
|
+
# Validation warnings, available only after validating or finding warnings
|
472
|
+
# @return [Array<String>]
|
473
|
+
def warnings
|
474
|
+
((@warnings || []) + object.
|
475
|
+
values.
|
476
|
+
flatten.
|
477
|
+
select {|v| v.is_a?(Metadata)}.
|
478
|
+
map(&:warnings).
|
479
|
+
flatten).compact
|
480
|
+
end
|
481
|
+
|
466
482
|
##
|
467
483
|
# Validate metadata, raising an error containing all errors detected during validation
|
468
484
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
469
485
|
# @return [self]
|
470
486
|
def validate!
|
471
487
|
expected_props, required_props = @properties.keys, @required
|
472
|
-
errors = []
|
488
|
+
errors, @warnings = [], []
|
473
489
|
|
474
490
|
unless is_a?(Dialect) || is_a?(Transformation)
|
475
491
|
expected_props = expected_props + INHERITED_PROPERTIES.keys
|
@@ -478,7 +494,7 @@ module RDF::Tabular
|
|
478
494
|
# It has only expected properties (exclude metadata)
|
479
495
|
check_keys = object.keys - [:"@id", :"@context"]
|
480
496
|
check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
|
481
|
-
|
497
|
+
@warnings << "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
|
482
498
|
|
483
499
|
# It has required properties
|
484
500
|
errors << "#{type} missing required keys: #{(required_props & check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
|
@@ -487,8 +503,10 @@ module RDF::Tabular
|
|
487
503
|
object.keys.each do |key|
|
488
504
|
value = object[key]
|
489
505
|
case key
|
490
|
-
when :aboutUrl, :
|
491
|
-
valid_inherited_property?(key, value)
|
506
|
+
when :aboutUrl, :default, :lang, :null, :ordered, :propertyUrl, :separator, :textDirection, :valueUrl
|
507
|
+
valid_inherited_property?(key, value) do |m|
|
508
|
+
@warnings << m
|
509
|
+
end
|
492
510
|
when :columns
|
493
511
|
if value.is_a?(Array) && value.all? {|v| v.is_a?(Column)}
|
494
512
|
value.each do |v|
|
@@ -505,11 +523,24 @@ module RDF::Tabular
|
|
505
523
|
end
|
506
524
|
when :commentPrefix, :delimiter, :quoteChar
|
507
525
|
unless value.is_a?(String) && value.length == 1
|
508
|
-
|
526
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a single character string"
|
527
|
+
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
509
528
|
end
|
510
|
-
when :
|
529
|
+
when :lineTerminators
|
511
530
|
unless value.is_a?(String)
|
512
|
-
|
531
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
|
532
|
+
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
533
|
+
end
|
534
|
+
when :datatype
|
535
|
+
if value.is_a?(Datatype)
|
536
|
+
begin
|
537
|
+
value.validate!
|
538
|
+
rescue Error => e
|
539
|
+
errors << e.message
|
540
|
+
end
|
541
|
+
else
|
542
|
+
@warnings << "#{type} has invalid property '#{key}': expected a Datatype"
|
543
|
+
value = object[key] = nil
|
513
544
|
end
|
514
545
|
when :dialect
|
515
546
|
unless value.is_a?(Dialect)
|
@@ -520,13 +551,19 @@ module RDF::Tabular
|
|
520
551
|
rescue Error => e
|
521
552
|
errors << e.message
|
522
553
|
end
|
523
|
-
when :doubleQuote, :header, :
|
554
|
+
when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
|
524
555
|
unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
525
|
-
|
556
|
+
@warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
|
557
|
+
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
558
|
+
end
|
559
|
+
when :required, :suppressOutput, :virtual
|
560
|
+
unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
561
|
+
@warnings << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
|
562
|
+
object.delete(key)
|
526
563
|
end
|
527
564
|
when :encoding
|
528
565
|
unless (Encoding.find(value) rescue false)
|
529
|
-
|
566
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid encoding"
|
530
567
|
end
|
531
568
|
when :foreignKeys
|
532
569
|
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
@@ -549,13 +586,13 @@ module RDF::Tabular
|
|
549
586
|
end
|
550
587
|
# resource is the URL of a Table in the TableGroup
|
551
588
|
ref = base.join(reference['resource']).to_s
|
552
|
-
table = root.is_a?(TableGroup) && root.
|
589
|
+
table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
|
553
590
|
errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
554
591
|
table.tableSchema if table
|
555
592
|
elsif reference.has_key?('schemaReference')
|
556
593
|
# resource is the @id of a Schema in the TableGroup
|
557
594
|
ref = base.join(reference['schemaReference']).to_s
|
558
|
-
tables = root.is_a?(TableGroup) ? root.
|
595
|
+
tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
|
559
596
|
case tables.length
|
560
597
|
when 0
|
561
598
|
errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
|
@@ -581,13 +618,22 @@ module RDF::Tabular
|
|
581
618
|
errors << "#{type} has invalid property '#{key}': reference must be an object: #{reference.inspect}"
|
582
619
|
end
|
583
620
|
end
|
584
|
-
when :
|
621
|
+
when :headerRowCount, :skipColumns, :skipRows
|
585
622
|
unless value.is_a?(Numeric) && value.integer? && value > 0
|
586
|
-
|
623
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect} must be a positive integer"
|
624
|
+
object[key] = Dialect::DIALECT_DEFAULTS[key]
|
625
|
+
end
|
626
|
+
when :base
|
627
|
+
@warnings << "#{type} has invalid base '#{key}': #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) || RDF::URI(value).absolute?
|
628
|
+
when :format
|
629
|
+
unless value.is_a?(String)
|
630
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
|
631
|
+
object.delete(key)
|
587
632
|
end
|
588
633
|
when :length, :minLength, :maxLength
|
589
634
|
unless value.is_a?(Numeric) && value.integer? && value > 0
|
590
|
-
|
635
|
+
@warnings << "#{type} has invalid property '#{key}': #{value.inspect}, expected a positive integer"
|
636
|
+
object.delete(key)
|
591
637
|
end
|
592
638
|
unless key == :length || value != object[:length]
|
593
639
|
# Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
|
@@ -598,7 +644,8 @@ module RDF::Tabular
|
|
598
644
|
RDF::Literal::Date.new(value.to_s).valid? ||
|
599
645
|
RDF::Literal::Time.new(value.to_s).valid? ||
|
600
646
|
RDF::Literal::DateTime.new(value.to_s).valid?
|
601
|
-
|
647
|
+
@warnings << "#{type} has invalid property '#{key}': #{value}, expected numeric or valid date/time"
|
648
|
+
object.delete(key)
|
602
649
|
end
|
603
650
|
when :name
|
604
651
|
unless value.is_a?(String) && name.match(NAME_SYNTAX)
|
@@ -618,7 +665,7 @@ module RDF::Tabular
|
|
618
665
|
Array(value).each do |k|
|
619
666
|
errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c.name == k}
|
620
667
|
end
|
621
|
-
when :
|
668
|
+
when :tables
|
622
669
|
if value.is_a?(Array) && value.all? {|v| v.is_a?(Table)}
|
623
670
|
value.each do |t|
|
624
671
|
begin
|
@@ -664,13 +711,14 @@ module RDF::Tabular
|
|
664
711
|
else
|
665
712
|
errors << "#{type} has invalid property '#{key}': expected array of Transformations"
|
666
713
|
end
|
667
|
-
when :
|
668
|
-
valid_natural_language_property?(:
|
714
|
+
when :titles
|
715
|
+
valid_natural_language_property?(:titles, value) {|m| errors << m}
|
669
716
|
when :trim
|
670
717
|
unless %w(true false 1 0 start end).include?(value.to_s.downcase)
|
671
718
|
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected true, false, 1, 0, start or end"
|
672
719
|
end
|
673
720
|
when :url
|
721
|
+
# Only validate URL in validation mode; this allows for a nil URL
|
674
722
|
unless @url.valid?
|
675
723
|
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
|
676
724
|
end
|
@@ -687,7 +735,7 @@ module RDF::Tabular
|
|
687
735
|
errors << "#{type} has invalid content '#{key}': #{e.message}"
|
688
736
|
end
|
689
737
|
else
|
690
|
-
|
738
|
+
warnings << "#{type} has invalid property '#{key}': unsupported property"
|
691
739
|
end
|
692
740
|
end
|
693
741
|
|
@@ -713,53 +761,24 @@ module RDF::Tabular
|
|
713
761
|
# @yield message error message
|
714
762
|
# @return [Boolean]
|
715
763
|
def valid_inherited_property?(key, value)
|
716
|
-
pv = parent.send(key) if parent
|
717
764
|
error = case key
|
718
765
|
when :aboutUrl, :default, :propertyUrl, :valueUrl
|
719
766
|
"string" unless value.is_a?(String)
|
720
|
-
when :datatype
|
721
|
-
# Normalization usually redundant
|
722
|
-
dt = normalize_datatype(value)
|
723
|
-
# FIXME: support arrays of datatypes?
|
724
|
-
"valid datatype" unless DATATYPES.keys.map(&:to_s).include?(dt[:base]) || RDF::URI(dt[:base]).absolute?
|
725
767
|
when :lang
|
726
768
|
"valid BCP47 language tag" unless BCP47::Language.identify(value.to_s)
|
727
769
|
when :null
|
728
|
-
# To be valid, it must be a string or array
|
770
|
+
# To be valid, it must be a string or array
|
729
771
|
"string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
|
730
772
|
when :ordered
|
731
773
|
"boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
732
774
|
when :separator
|
733
775
|
"single character" unless value.nil? || value.is_a?(String) && value.length == 1
|
734
776
|
when :textDirection
|
735
|
-
# A value for this property is compatible with an inherited value only if they are identical.
|
736
777
|
"rtl or ltr" unless %(rtl ltr).include?(value)
|
737
|
-
end ||
|
738
|
-
|
739
|
-
case key
|
740
|
-
# Compatibility
|
741
|
-
when :aboutUrl, :propertyUrl, :valueUrl
|
742
|
-
# No restrictions
|
743
|
-
when :default, :ordered, :separator, :textDirection
|
744
|
-
"same as that defined on parent" if pv && pv != value
|
745
|
-
when :datatype
|
746
|
-
if pv
|
747
|
-
# Normalization usually redundant
|
748
|
-
dt = normalize_datatype(value)
|
749
|
-
pvdt = normalize_datatype(pv)
|
750
|
-
vl = RDF::Literal.new("", datatype: DATATYPES[dt[:base].to_sym])
|
751
|
-
pvvl = RDF::Literal.new("", datatype: DATATYPES[pvdt[:base].to_sym])
|
752
|
-
# must be a subclass of some type defined on parent
|
753
|
-
"compatible datatype of that defined on parent" unless vl.is_a?(pvvl.class)
|
754
|
-
end
|
755
|
-
when :lang
|
756
|
-
"lang expected to restrict #{pv}" if pv && !value.start_with?(pv)
|
757
|
-
when :null
|
758
|
-
"subset of that defined on parent" if pv && (Array(value) & Array(pv)) != Array(value)
|
759
778
|
end
|
760
779
|
|
761
780
|
if error
|
762
|
-
yield "#{type} has invalid property '#{key}' (
|
781
|
+
yield "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{error}"
|
763
782
|
false
|
764
783
|
else
|
765
784
|
true
|
@@ -888,7 +907,7 @@ module RDF::Tabular
|
|
888
907
|
if self.is_a?(Table) && self.parent
|
889
908
|
self.parent
|
890
909
|
else
|
891
|
-
content = {"@type" => "TableGroup", "
|
910
|
+
content = {"@type" => "TableGroup", "tables" => [self]}
|
892
911
|
content['@context'] = object.delete(:@context) if object[:@context]
|
893
912
|
ctx = @context
|
894
913
|
self.remove_instance_variable(:@context) if self.instance_variables.include?(:@context)
|
@@ -907,7 +926,7 @@ module RDF::Tabular
|
|
907
926
|
if md.parent
|
908
927
|
md.parent
|
909
928
|
else
|
910
|
-
content = {"@type" => "TableGroup", "
|
929
|
+
content = {"@type" => "TableGroup", "tables" => [md]}
|
911
930
|
ctx = md.context
|
912
931
|
content['@context'] = md.object.delete(:@context) if md.object[:@context]
|
913
932
|
md.remove_instance_variable(:@context) if md.instance_variables.include?(:@context)
|
@@ -962,18 +981,18 @@ module RDF::Tabular
|
|
962
981
|
a = object[key].is_a?(Array) ? object[key] : [object[key]].compact
|
963
982
|
b = value.is_a?(Array) ? value : [value]
|
964
983
|
object[key] = a + b
|
965
|
-
when :
|
984
|
+
when :tables
|
966
985
|
# When an array of table descriptions B is imported into an original array of table descriptions A, each table description within B is combined into the original array A by:
|
967
986
|
value.each do |tb|
|
968
987
|
if ta = object[key].detect {|e| e.url == tb.url}
|
969
988
|
# if there is a table description with the same url in A, the table description from B is imported into the matching table description in A
|
970
|
-
debug("merge!:
|
989
|
+
debug("merge!: tables") {"TA: #{ta.inspect}, TB: #{tb.inspect}"}
|
971
990
|
ta.merge!(tb)
|
972
991
|
else
|
973
992
|
# otherwise, the table description from B is appended to the array of table descriptions A
|
974
993
|
tb = tb.dup
|
975
994
|
tb.instance_variable_set(:@parent, self)
|
976
|
-
debug("merge!:
|
995
|
+
debug("merge!: tables") {"add TB: #{tb.inspect}"}
|
977
996
|
object[key] << tb
|
978
997
|
end
|
979
998
|
end
|
@@ -995,11 +1014,11 @@ module RDF::Tabular
|
|
995
1014
|
# When an array of column descriptions B is imported into an original array of column descriptions A, each column description within B is combined into the original array A by:
|
996
1015
|
Array(value).each_with_index do |cb, index|
|
997
1016
|
ca = object[key][index] || {}
|
998
|
-
va = ([ca[:name]] + (ca[:
|
999
|
-
vb = ([cb[:name]] + (cb[:
|
1017
|
+
va = ([ca[:name]] + (ca[:titles] || {}).values.flatten).compact.map(&:downcase)
|
1018
|
+
vb = ([cb[:name]] + (cb[:titles] || {}).values.flatten).compact.map(&:downcase)
|
1000
1019
|
if !(va & vb).empty?
|
1001
1020
|
debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
|
1002
|
-
# If there's a non-empty case-insensitive intersection between the name and
|
1021
|
+
# If there's a non-empty case-insensitive intersection between the name and titles values for the column description at the same index within A and B, the column description from B is imported into the matching column description in A
|
1003
1022
|
ca.merge!(cb)
|
1004
1023
|
elsif ca.nil? && cb.virtual
|
1005
1024
|
debug("merge!: columns") {"index: #{index}, virtual"}
|
@@ -1127,11 +1146,6 @@ module RDF::Tabular
|
|
1127
1146
|
end
|
1128
1147
|
when :natural_language
|
1129
1148
|
value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
|
1130
|
-
when :atomic
|
1131
|
-
case key
|
1132
|
-
when :datatype then normalize_datatype(value)
|
1133
|
-
else value
|
1134
|
-
end
|
1135
1149
|
else
|
1136
1150
|
value
|
1137
1151
|
end
|
@@ -1139,27 +1153,6 @@ module RDF::Tabular
|
|
1139
1153
|
self
|
1140
1154
|
end
|
1141
1155
|
|
1142
|
-
##
|
1143
|
-
# Normalize datatype to Object/Hash representation
|
1144
|
-
# @param [String, Hash{Symbol => String}] value
|
1145
|
-
# @return [Hash{Symbol => String}]
|
1146
|
-
def normalize_datatype(value)
|
1147
|
-
# Normalize datatype to array of object form
|
1148
|
-
value = {base: value} unless value.is_a?(Hash)
|
1149
|
-
# Create a new representation using symbols and transformed values
|
1150
|
-
nv = {}
|
1151
|
-
value.each do |kk, vv|
|
1152
|
-
case kk.to_sym
|
1153
|
-
when :base, :decimalChar, :format, :groupChar, :pattern then nv[kk.to_sym] = vv
|
1154
|
-
when :length, :minLength, :maxLength, :minimum, :maximum,
|
1155
|
-
:minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
1156
|
-
nv[kk.to_sym] = vv.to_i
|
1157
|
-
end
|
1158
|
-
end
|
1159
|
-
nv[:base] ||= 'string'
|
1160
|
-
nv
|
1161
|
-
end
|
1162
|
-
|
1163
1156
|
##
|
1164
1157
|
# Normalize JSON-LD
|
1165
1158
|
#
|
@@ -1246,7 +1239,7 @@ module RDF::Tabular
|
|
1246
1239
|
def csv_options
|
1247
1240
|
{
|
1248
1241
|
col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
|
1249
|
-
row_sep: (is_a?(Dialect) ? self : dialect).
|
1242
|
+
row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
|
1250
1243
|
quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
|
1251
1244
|
encoding: (is_a?(Dialect) ? self : dialect).encoding
|
1252
1245
|
}
|
@@ -1269,7 +1262,7 @@ module RDF::Tabular
|
|
1269
1262
|
:@id => :link,
|
1270
1263
|
:@type => :atomic,
|
1271
1264
|
notes: :array,
|
1272
|
-
|
1265
|
+
tables: :array,
|
1273
1266
|
tableSchema: :object,
|
1274
1267
|
tableDirection: :atomic,
|
1275
1268
|
dialect: :object,
|
@@ -1293,7 +1286,7 @@ module RDF::Tabular
|
|
1293
1286
|
# Does the Metadata or any descendant have any common properties
|
1294
1287
|
# @return [Boolean]
|
1295
1288
|
def has_annotations?
|
1296
|
-
super ||
|
1289
|
+
super || tables.any? {|t| t.has_annotations? }
|
1297
1290
|
end
|
1298
1291
|
|
1299
1292
|
# Logic for accessing elements as accessors
|
@@ -1306,10 +1299,10 @@ module RDF::Tabular
|
|
1306
1299
|
end
|
1307
1300
|
|
1308
1301
|
##
|
1309
|
-
# Iterate over all
|
1302
|
+
# Iterate over all tables
|
1310
1303
|
# @yield [Table]
|
1311
|
-
def
|
1312
|
-
|
1304
|
+
def each_table
|
1305
|
+
tables.map(&:url).each do |url|
|
1313
1306
|
yield for_table(url)
|
1314
1307
|
end
|
1315
1308
|
end
|
@@ -1320,9 +1313,9 @@ module RDF::Tabular
|
|
1320
1313
|
# @param [String] url of the table
|
1321
1314
|
# @return [Table]
|
1322
1315
|
def for_table(url)
|
1323
|
-
# If there are no
|
1324
|
-
#self.
|
1325
|
-
if table = Array(
|
1316
|
+
# If there are no tables, assume there's one for this table
|
1317
|
+
#self.tables ||= [Table.new(url: url)]
|
1318
|
+
if table = Array(tables).detect {|t| t.url == url}
|
1326
1319
|
# Set document base for this table for resolving URLs
|
1327
1320
|
table.instance_variable_set(:@context, context.dup)
|
1328
1321
|
table.context.base = url
|
@@ -1335,7 +1328,7 @@ module RDF::Tabular
|
|
1335
1328
|
{
|
1336
1329
|
"@id" => id,
|
1337
1330
|
"@type" => "AnnotatedTableGroup",
|
1338
|
-
"
|
1331
|
+
"tables" => tables.map(&:to_atd)
|
1339
1332
|
}
|
1340
1333
|
end
|
1341
1334
|
end
|
@@ -1401,7 +1394,7 @@ module RDF::Tabular
|
|
1401
1394
|
source: :atomic,
|
1402
1395
|
targetFormat: :link,
|
1403
1396
|
scriptFormat: :link,
|
1404
|
-
|
1397
|
+
titles: :natural_language,
|
1405
1398
|
url: :link,
|
1406
1399
|
}.freeze
|
1407
1400
|
REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
|
@@ -1462,7 +1455,7 @@ module RDF::Tabular
|
|
1462
1455
|
:@type => :atomic,
|
1463
1456
|
name: :atomic,
|
1464
1457
|
suppressOutput: :atomic,
|
1465
|
-
|
1458
|
+
titles: :natural_language,
|
1466
1459
|
required: :atomic,
|
1467
1460
|
virtual: :atomic,
|
1468
1461
|
}.freeze
|
@@ -1484,7 +1477,7 @@ module RDF::Tabular
|
|
1484
1477
|
# @note this is lazy evaluated to avoid dependencies on setting dialect vs. initializing columns
|
1485
1478
|
# @return [Integer] 1-based colnum number
|
1486
1479
|
def sourceNumber
|
1487
|
-
skipColumns = table ?
|
1480
|
+
skipColumns = table ? dialect.skipColumns.to_i : 0
|
1488
1481
|
number + skipColumns
|
1489
1482
|
end
|
1490
1483
|
|
@@ -1506,9 +1499,9 @@ module RDF::Tabular
|
|
1506
1499
|
end
|
1507
1500
|
end
|
1508
1501
|
|
1509
|
-
# Return or create a name for the column from
|
1502
|
+
# Return or create a name for the column from titles, if it exists
|
1510
1503
|
def name
|
1511
|
-
object[:name] ||= if
|
1504
|
+
object[:name] ||= if titles && (ts = titles[context.default_language || 'und'])
|
1512
1505
|
n = Array(ts).first
|
1513
1506
|
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
|
1514
1507
|
n1 = URI.encode(n[1..-1], /[^\w\.]/)
|
@@ -1534,7 +1527,7 @@ module RDF::Tabular
|
|
1534
1527
|
"cells" => [],
|
1535
1528
|
"virtual" => self.virtual,
|
1536
1529
|
"name" => self.name,
|
1537
|
-
"
|
1530
|
+
"titles" => self.titles
|
1538
1531
|
}
|
1539
1532
|
end
|
1540
1533
|
|
@@ -1556,9 +1549,8 @@ module RDF::Tabular
|
|
1556
1549
|
doubleQuote: true,
|
1557
1550
|
encoding: "utf-8".freeze,
|
1558
1551
|
header: true,
|
1559
|
-
headerColumnCount: 0,
|
1560
1552
|
headerRowCount: 1,
|
1561
|
-
|
1553
|
+
lineTerminators: :auto,
|
1562
1554
|
quoteChar: '"',
|
1563
1555
|
skipBlankRows: false,
|
1564
1556
|
skipColumns: 0,
|
@@ -1575,9 +1567,8 @@ module RDF::Tabular
|
|
1575
1567
|
doubleQuote: :atomic,
|
1576
1568
|
encoding: :atomic,
|
1577
1569
|
header: :atomic,
|
1578
|
-
headerColumnCount: :atomic,
|
1579
1570
|
headerRowCount: :atomic,
|
1580
|
-
|
1571
|
+
lineTerminators: :atomic,
|
1581
1572
|
quoteChar: :atomic,
|
1582
1573
|
skipBlankRows: :atomic,
|
1583
1574
|
skipColumns: :atomic,
|
@@ -1625,8 +1616,8 @@ module RDF::Tabular
|
|
1625
1616
|
options = options.dup
|
1626
1617
|
options.delete(:context) # Don't accidentally use a passed context
|
1627
1618
|
# Normalize input to an IO object
|
1628
|
-
if
|
1629
|
-
return ::RDF::Util::File.open_file(input
|
1619
|
+
if input.is_a?(String)
|
1620
|
+
return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, options.merge(base: input.to_s))}
|
1630
1621
|
end
|
1631
1622
|
|
1632
1623
|
table = {
|
@@ -1655,20 +1646,19 @@ module RDF::Tabular
|
|
1655
1646
|
row_data = Array(csv.shift)
|
1656
1647
|
Array(row_data).each_with_index do |value, index|
|
1657
1648
|
# Skip columns
|
1658
|
-
skipCols = skipColumns.to_i
|
1649
|
+
skipCols = skipColumns.to_i
|
1659
1650
|
next if index < skipCols
|
1660
1651
|
|
1661
1652
|
# Trim value
|
1662
1653
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
1663
1654
|
value.rstrip! if %w(true end).include?(trim.to_s)
|
1664
1655
|
|
1665
|
-
# Initialize
|
1666
|
-
# SPEC CONFUSION: does title get an array, or concatenated values?
|
1656
|
+
# Initialize titles
|
1667
1657
|
columns = table["tableSchema"]["columns"] ||= []
|
1668
1658
|
column = columns[index - skipCols] ||= {
|
1669
|
-
"
|
1659
|
+
"titles" => {"und" => []},
|
1670
1660
|
}
|
1671
|
-
column["
|
1661
|
+
column["titles"]["und"] << value
|
1672
1662
|
end
|
1673
1663
|
end
|
1674
1664
|
debug("embedded_metadata") {"table: #{table.inspect}"}
|
@@ -1688,6 +1678,41 @@ module RDF::Tabular
|
|
1688
1678
|
end
|
1689
1679
|
end
|
1690
1680
|
|
1681
|
+
class Datatype < Metadata
|
1682
|
+
PROPERTIES = {
|
1683
|
+
base: :atomic,
|
1684
|
+
format: :atomic,
|
1685
|
+
length: :atomic,
|
1686
|
+
minLength: :atomic,
|
1687
|
+
maxLength: :atomic,
|
1688
|
+
minimum: :atomic,
|
1689
|
+
maximum: :atomic,
|
1690
|
+
minInclusive: :atomic,
|
1691
|
+
maxInclusive: :atomic,
|
1692
|
+
minExclusive: :atomic,
|
1693
|
+
maxExclusive: :atomic,
|
1694
|
+
decimalChar: :atomic,
|
1695
|
+
groupChar: :atomic,
|
1696
|
+
pattern: :atomic,
|
1697
|
+
}.freeze
|
1698
|
+
REQUIRED = [].freeze
|
1699
|
+
|
1700
|
+
# Override `base` in Metadata
|
1701
|
+
def base; object[:base]; end
|
1702
|
+
|
1703
|
+
# Setters
|
1704
|
+
PROPERTIES.each do |a, type|
|
1705
|
+
define_method("#{a}=".to_sym) do |value|
|
1706
|
+
object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
|
1707
|
+
end
|
1708
|
+
end
|
1709
|
+
|
1710
|
+
# Logic for accessing elements as accessors
|
1711
|
+
def method_missing(method, *args)
|
1712
|
+
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1713
|
+
end
|
1714
|
+
end
|
1715
|
+
|
1691
1716
|
# Wraps each resulting row
|
1692
1717
|
class Row
|
1693
1718
|
# Class for returning values
|
@@ -1759,7 +1784,7 @@ module RDF::Tabular
|
|
1759
1784
|
@number = number
|
1760
1785
|
@sourceNumber = source_number
|
1761
1786
|
@values = []
|
1762
|
-
skipColumns = metadata.dialect.skipColumns.to_i
|
1787
|
+
skipColumns = metadata.dialect.skipColumns.to_i
|
1763
1788
|
|
1764
1789
|
@context = table.context.dup
|
1765
1790
|
@context.base = table.url
|
@@ -1786,22 +1811,22 @@ module RDF::Tabular
|
|
1786
1811
|
|
1787
1812
|
@values << cell = Cell.new(metadata, column, self, value)
|
1788
1813
|
|
1789
|
-
datatype =
|
1790
|
-
value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype
|
1791
|
-
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype
|
1814
|
+
datatype = column.datatype || Datatype.new(base: "string")
|
1815
|
+
value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
|
1816
|
+
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
|
1792
1817
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
1793
1818
|
value = column.default || '' if value.empty?
|
1794
1819
|
|
1795
1820
|
cell_values = column.separator ? value.split(column.separator) : [value]
|
1796
1821
|
|
1797
1822
|
cell_values = cell_values.map do |v|
|
1798
|
-
v = v.strip unless %w(string anyAtomicType any).include?(datatype
|
1823
|
+
v = v.strip unless %w(string anyAtomicType any).include?(datatype.base)
|
1799
1824
|
v = column.default || '' if v.empty?
|
1800
1825
|
if Array(column.null).include?(v)
|
1801
1826
|
nil
|
1802
1827
|
else
|
1803
1828
|
# Trim value
|
1804
|
-
if %w(string anyAtomicType any).include?(datatype
|
1829
|
+
if %w(string anyAtomicType any).include?(datatype.base)
|
1805
1830
|
v.lstrip! if %w(true start).include?(metadata.dialect.trim.to_s)
|
1806
1831
|
v.rstrip! if %w(true end).include?(metadata.dialect.trim.to_s)
|
1807
1832
|
else
|
@@ -1809,7 +1834,7 @@ module RDF::Tabular
|
|
1809
1834
|
v.strip!
|
1810
1835
|
end
|
1811
1836
|
|
1812
|
-
expanded_dt = metadata.context.expand_iri(datatype
|
1837
|
+
expanded_dt = metadata.context.expand_iri(datatype.base, vocab: true)
|
1813
1838
|
if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
|
1814
1839
|
lit_or_errors
|
1815
1840
|
else
|
@@ -1861,36 +1886,36 @@ module RDF::Tabular
|
|
1861
1886
|
value_errors = []
|
1862
1887
|
|
1863
1888
|
# Check constraints
|
1864
|
-
if datatype
|
1865
|
-
value_errors << "#{value} does not have length #{datatype
|
1889
|
+
if datatype.length && value.length != datatype.length
|
1890
|
+
value_errors << "#{value} does not have length #{datatype.length}"
|
1866
1891
|
end
|
1867
|
-
if datatype
|
1868
|
-
value_errors << "#{value} does not have length >= #{datatype
|
1892
|
+
if datatype.minLength && value.length < datatype.minLength
|
1893
|
+
value_errors << "#{value} does not have length >= #{datatype.minLength}"
|
1869
1894
|
end
|
1870
|
-
if datatype
|
1871
|
-
value_errors << "#{value} does not have length <= #{datatype
|
1895
|
+
if datatype.maxLength && value.length > datatype.maxLength
|
1896
|
+
value_errors << "#{value} does not have length <= #{datatype.maxLength}"
|
1872
1897
|
end
|
1873
1898
|
|
1874
|
-
format = datatype
|
1899
|
+
format = datatype.format
|
1875
1900
|
# Datatype specific constraints and conversions
|
1876
|
-
case datatype
|
1901
|
+
case datatype.base.to_sym
|
1877
1902
|
when :decimal, :integer, :long, :int, :short, :byte,
|
1878
1903
|
:nonNegativeInteger, :positiveInteger,
|
1879
1904
|
:unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
|
1880
1905
|
:nonPositiveInteger, :negativeInteger,
|
1881
1906
|
:double, :float, :number
|
1882
1907
|
# Normalize representation based on numeric-specific facets
|
1883
|
-
groupChar = datatype.
|
1884
|
-
if datatype
|
1908
|
+
groupChar = datatype.groupChar || ','
|
1909
|
+
if datatype.pattern && !value.match(Regexp.new(datatype.pattern))
|
1885
1910
|
# pattern facet failed
|
1886
|
-
value_errors << "#{value} does not match pattern #{datatype
|
1911
|
+
value_errors << "#{value} does not match pattern #{datatype.pattern}"
|
1887
1912
|
end
|
1888
1913
|
if value.include?(groupChar*2)
|
1889
1914
|
# pattern facet failed
|
1890
1915
|
value_errors << "#{value} has repeating #{groupChar.inspect}"
|
1891
1916
|
end
|
1892
1917
|
value.gsub!(groupChar, '')
|
1893
|
-
value.sub!(datatype.
|
1918
|
+
value.sub!(datatype.decimalChar, '.') if datatype.decimalChar
|
1894
1919
|
|
1895
1920
|
# Extract percent or per-mille sign
|
1896
1921
|
percent = permille = false
|
@@ -1941,7 +1966,7 @@ module RDF::Tabular
|
|
1941
1966
|
|
1942
1967
|
if format
|
1943
1968
|
date_format, time_format = format.split(' ')
|
1944
|
-
if datatype
|
1969
|
+
if datatype.base.to_sym == :time
|
1945
1970
|
date_format, time_format = nil, date_format
|
1946
1971
|
end
|
1947
1972
|
|
@@ -2008,7 +2033,7 @@ module RDF::Tabular
|
|
2008
2033
|
lit = RDF::Literal(value, datatype: expanded_dt)
|
2009
2034
|
when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
|
2010
2035
|
:ENTITY, :ID, :IDREF, :NOTATION
|
2011
|
-
value_errors << "#{value} uses unsupported datatype: #{datatype
|
2036
|
+
value_errors << "#{value} uses unsupported datatype: #{datatype.base}"
|
2012
2037
|
else
|
2013
2038
|
# For other types, format is a regexp
|
2014
2039
|
unless format.nil? || value.match(Regexp.new(format))
|
@@ -2025,7 +2050,7 @@ module RDF::Tabular
|
|
2025
2050
|
end
|
2026
2051
|
|
2027
2052
|
# Final value is a valid literal, or a plain literal otherwise
|
2028
|
-
value_errors << "#{value} is not a valid #{datatype
|
2053
|
+
value_errors << "#{value} is not a valid #{datatype.base}" if lit && !lit.valid?
|
2029
2054
|
|
2030
2055
|
# FIXME Value constraints
|
2031
2056
|
|