rdf-tabular 0.2.1 → 0.4.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/VERSION +1 -1
- data/etc/earl.ttl +1579 -799
- data/lib/rdf/tabular.rb +0 -1
- data/lib/rdf/tabular/format.rb +16 -0
- data/lib/rdf/tabular/metadata.rb +251 -254
- data/lib/rdf/tabular/reader.rb +98 -146
- data/lib/rdf/tabular/uax35.rb +4 -4
- data/spec/format_spec.rb +34 -0
- data/spec/matchers.rb +3 -78
- data/spec/metadata_spec.rb +172 -105
- data/spec/reader_spec.rb +28 -25
- data/spec/spec_helper.rb +5 -3
- data/spec/suite_helper.rb +1 -1
- data/spec/suite_spec.rb +8 -9
- metadata +118 -55
- data/lib/rdf/tabular/utils.rb +0 -33
data/lib/rdf/tabular.rb
CHANGED
@@ -11,7 +11,6 @@ module RDF
|
|
11
11
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
12
12
|
module Tabular
|
13
13
|
require 'rdf/tabular/format'
|
14
|
-
require 'rdf/tabular/utils'
|
15
14
|
autoload :Column, 'rdf/tabular/metadata'
|
16
15
|
autoload :CSVW, 'rdf/tabular/csvw'
|
17
16
|
autoload :Dialect, 'rdf/tabular/metadata'
|
data/lib/rdf/tabular/format.rb
CHANGED
@@ -46,5 +46,21 @@ module RDF::Tabular
|
|
46
46
|
def self.detect(sample)
|
47
47
|
!!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
|
48
48
|
end
|
49
|
+
|
50
|
+
##
|
51
|
+
# Hash of CLI commands appropriate for this format
|
52
|
+
# @return [Hash{Symbol => Lambda(Array, Hash)}]
|
53
|
+
def self.cli_commands
|
54
|
+
{
|
55
|
+
:"tabular-json" => ->(argv, opts) do
|
56
|
+
raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
|
57
|
+
out = opts[:output] || $stdout
|
58
|
+
out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
|
59
|
+
RDF::CLI.parse(argv, opts) do |reader|
|
60
|
+
out.puts reader.to_json
|
61
|
+
end
|
62
|
+
end
|
63
|
+
}
|
64
|
+
end
|
49
65
|
end
|
50
66
|
end
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -19,16 +19,12 @@ require 'yaml' # used by BCP47, which should have required it.
|
|
19
19
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
20
20
|
module RDF::Tabular
|
21
21
|
class Metadata
|
22
|
-
include
|
22
|
+
include RDF::Util::Logger
|
23
23
|
|
24
24
|
# Hash representation
|
25
25
|
# @return [Hash<Symbol,Object>]
|
26
26
|
attr_accessor :object
|
27
27
|
|
28
|
-
# Warnings detected on initialization or when setting properties
|
29
|
-
# @return [Array<String>]
|
30
|
-
attr_accessor :warnings
|
31
|
-
|
32
28
|
# Inheritect properties, valid for all types
|
33
29
|
INHERITED_PROPERTIES = {
|
34
30
|
aboutUrl: :uri_template,
|
@@ -137,7 +133,7 @@ module RDF::Tabular
|
|
137
133
|
#
|
138
134
|
# @param [String] path
|
139
135
|
# @param [Hash{Symbol => Object}] options
|
140
|
-
# see `RDF::Util::File.open_file` in RDF.rb and {
|
136
|
+
# see `RDF::Util::File.open_file` in RDF.rb and {new}
|
141
137
|
# @yield [Metadata]
|
142
138
|
# @raise [IOError] if file not found
|
143
139
|
def self.open(path, options = {})
|
@@ -153,7 +149,7 @@ module RDF::Tabular
|
|
153
149
|
end
|
154
150
|
|
155
151
|
# Return the well-known configuration for a file, and remember using a weak-reference cache to avoid uncessary retreivles.
|
156
|
-
# @param [String] base
|
152
|
+
# @param [String] base the URL used for finding the file
|
157
153
|
# @return [Array<String>, false]
|
158
154
|
def self.site_wide_config(base)
|
159
155
|
require 'rdf/util/cache' unless defined?(::RDF::Util::Cache)
|
@@ -179,7 +175,6 @@ module RDF::Tabular
|
|
179
175
|
# @return [Metadata]
|
180
176
|
def self.for_input(input, options = {})
|
181
177
|
base = options[:base]
|
182
|
-
warnings = options.fetch(:warnings, [])
|
183
178
|
|
184
179
|
# Use user metadata, if provided
|
185
180
|
metadata = case options[:metadata]
|
@@ -202,10 +197,7 @@ module RDF::Tabular
|
|
202
197
|
if md.describes_file?(base)
|
203
198
|
metadata = md
|
204
199
|
else
|
205
|
-
|
206
|
-
if options[:validate] && !options[:warnings]
|
207
|
-
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
208
|
-
end
|
200
|
+
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
|
209
201
|
end
|
210
202
|
end
|
211
203
|
end
|
@@ -214,12 +206,12 @@ module RDF::Tabular
|
|
214
206
|
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
215
207
|
if !metadata && base
|
216
208
|
templates = site_wide_config(base)
|
217
|
-
|
209
|
+
log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
218
210
|
locs = templates.map do |template|
|
219
211
|
t = Addressable::Template.new(template)
|
220
212
|
RDF::URI(base).join(t.expand(url: base).to_s)
|
221
213
|
end
|
222
|
-
|
214
|
+
log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
223
215
|
|
224
216
|
locs.each do |loc|
|
225
217
|
metadata ||= begin
|
@@ -230,15 +222,12 @@ module RDF::Tabular
|
|
230
222
|
if md.describes_file?(base)
|
231
223
|
md
|
232
224
|
else
|
233
|
-
|
234
|
-
if options[:validate] && !options[:warnings]
|
235
|
-
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
236
|
-
end
|
225
|
+
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
|
237
226
|
nil
|
238
227
|
end
|
239
228
|
end
|
240
229
|
rescue IOError
|
241
|
-
|
230
|
+
log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
242
231
|
nil
|
243
232
|
end
|
244
233
|
end
|
@@ -331,7 +320,6 @@ module RDF::Tabular
|
|
331
320
|
# @return [Metadata]
|
332
321
|
def initialize(input, options = {})
|
333
322
|
@options = options.dup
|
334
|
-
@options[:depth] ||= 0
|
335
323
|
|
336
324
|
# Parent of this Metadata, if any
|
337
325
|
@parent = @options[:parent]
|
@@ -344,14 +332,14 @@ module RDF::Tabular
|
|
344
332
|
|
345
333
|
@context = case input['@context']
|
346
334
|
when Array
|
347
|
-
|
335
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
348
336
|
c = LOCAL_CONTEXT.dup
|
349
337
|
c.base = RDF::URI(opt_base)
|
350
338
|
obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
|
351
339
|
raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
|
352
340
|
c.parse(obj)
|
353
341
|
when Hash
|
354
|
-
|
342
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
355
343
|
c = LOCAL_CONTEXT.dup
|
356
344
|
c.base = RDF::URI(opt_base)
|
357
345
|
c.parse(input['@context'])
|
@@ -362,7 +350,7 @@ module RDF::Tabular
|
|
362
350
|
c
|
363
351
|
else
|
364
352
|
if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
|
365
|
-
|
353
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
366
354
|
LOCAL_CONTEXT.dup
|
367
355
|
c = LOCAL_CONTEXT.dup
|
368
356
|
c.base = RDF::URI(opt_base)
|
@@ -375,7 +363,7 @@ module RDF::Tabular
|
|
375
363
|
@options[:base] = @context ? @context.base : RDF::URI(opt_base)
|
376
364
|
|
377
365
|
if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
|
378
|
-
|
366
|
+
log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
|
379
367
|
@context.default_language = nil
|
380
368
|
end
|
381
369
|
|
@@ -385,7 +373,7 @@ module RDF::Tabular
|
|
385
373
|
|
386
374
|
@object = {}
|
387
375
|
|
388
|
-
|
376
|
+
log_depth do
|
389
377
|
# Input was parsed in .new
|
390
378
|
# Metadata is object with symbolic keys
|
391
379
|
input.each do |key, value|
|
@@ -401,7 +389,7 @@ module RDF::Tabular
|
|
401
389
|
object[:@id] = if value.is_a?(String)
|
402
390
|
value
|
403
391
|
else
|
404
|
-
|
392
|
+
log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
|
405
393
|
"" # Default value
|
406
394
|
end
|
407
395
|
@id = @options[:base].join(object[:@id])
|
@@ -426,14 +414,14 @@ module RDF::Tabular
|
|
426
414
|
end
|
427
415
|
|
428
416
|
if reason
|
429
|
-
|
430
|
-
|
431
|
-
|
417
|
+
log_debug("md#initialize") {reason}
|
418
|
+
log_debug("md#initialize") {"filenames: #{filenames}"}
|
419
|
+
log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
|
432
420
|
end
|
433
421
|
end
|
434
422
|
|
435
423
|
# Getters and Setters
|
436
|
-
INHERITED_PROPERTIES.
|
424
|
+
INHERITED_PROPERTIES.each do |key, type|
|
437
425
|
define_method(key) do
|
438
426
|
object.fetch(key) do
|
439
427
|
parent ? parent.send(key) : default_value(key)
|
@@ -459,12 +447,7 @@ module RDF::Tabular
|
|
459
447
|
# We handle this through a separate datatype= setter
|
460
448
|
end
|
461
449
|
|
462
|
-
|
463
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
464
|
-
object.delete(key)
|
465
|
-
else
|
466
|
-
object[key] = value
|
467
|
-
end
|
450
|
+
set_property(key, type, value, invalid)
|
468
451
|
end
|
469
452
|
end
|
470
453
|
|
@@ -492,7 +475,7 @@ module RDF::Tabular
|
|
492
475
|
when Schema
|
493
476
|
value
|
494
477
|
else
|
495
|
-
|
478
|
+
log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
496
479
|
Schema.new({}, @options.merge(parent: self, context: nil))
|
497
480
|
end
|
498
481
|
end
|
@@ -539,7 +522,7 @@ module RDF::Tabular
|
|
539
522
|
when Dialect
|
540
523
|
value
|
541
524
|
else
|
542
|
-
|
525
|
+
log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
|
543
526
|
nil
|
544
527
|
end
|
545
528
|
end
|
@@ -549,15 +532,15 @@ module RDF::Tabular
|
|
549
532
|
# @raise [Error] if datatype is not valid
|
550
533
|
def datatype=(value)
|
551
534
|
val = case value
|
552
|
-
when Hash then Datatype.new(value, parent: self)
|
553
|
-
else Datatype.new({base: value}, parent: self)
|
535
|
+
when Hash then Datatype.new(value, @options.merge(parent: self))
|
536
|
+
else Datatype.new({base: value}, @options.merge(parent: self))
|
554
537
|
end
|
555
538
|
|
556
539
|
if val.valid? || value.is_a?(Hash)
|
557
540
|
# Set it if it was specified as an object, which may cause validation errors later
|
558
541
|
object[:datatype] = val
|
559
542
|
else
|
560
|
-
|
543
|
+
log_warn "#{type} has invalid property 'datatype': expected a built-in or an object"
|
561
544
|
end
|
562
545
|
end
|
563
546
|
|
@@ -572,40 +555,20 @@ module RDF::Tabular
|
|
572
555
|
##
|
573
556
|
# Do we have valid metadata?
|
574
557
|
def valid?
|
575
|
-
validate
|
576
|
-
|
577
|
-
rescue
|
578
|
-
false
|
579
|
-
end
|
580
|
-
|
581
|
-
##
|
582
|
-
# Validation errors
|
583
|
-
# @return [Array<String>]
|
584
|
-
def errors
|
585
|
-
validate! && []
|
586
|
-
rescue Error => e
|
587
|
-
e.message.split("\n")
|
558
|
+
validate # Possibly re-validate
|
559
|
+
!log_statistics[:error]
|
588
560
|
end
|
589
561
|
|
590
|
-
|
591
|
-
|
592
|
-
# @return [Array<String>]
|
593
|
-
def warnings
|
594
|
-
((@warnings || []) + object.
|
595
|
-
values.
|
596
|
-
flatten.
|
597
|
-
select {|v| v.is_a?(Metadata)}.
|
598
|
-
map(&:warnings).
|
599
|
-
flatten).compact.uniq
|
562
|
+
def validate!
|
563
|
+
raise Error, "Metadata error" unless valid?
|
600
564
|
end
|
601
565
|
|
602
566
|
##
|
603
567
|
# Validate metadata, raising an error containing all errors detected during validation
|
604
568
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
605
569
|
# @return [self]
|
606
|
-
def validate
|
570
|
+
def validate
|
607
571
|
expected_props, required_props = @properties.keys, @required
|
608
|
-
errors = []
|
609
572
|
|
610
573
|
unless is_a?(Dialect) || is_a?(Transformation)
|
611
574
|
expected_props = expected_props + INHERITED_PROPERTIES.keys
|
@@ -614,10 +577,10 @@ module RDF::Tabular
|
|
614
577
|
# It has only expected properties (exclude metadata)
|
615
578
|
check_keys = object.keys - [:"@id", :"@context"]
|
616
579
|
check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
|
617
|
-
|
580
|
+
log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
|
618
581
|
|
619
582
|
# It has required properties
|
620
|
-
|
583
|
+
log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
|
621
584
|
|
622
585
|
self.normalize!
|
623
586
|
|
@@ -626,55 +589,49 @@ module RDF::Tabular
|
|
626
589
|
value = object[key]
|
627
590
|
case key
|
628
591
|
when :base
|
629
|
-
|
592
|
+
log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
|
630
593
|
when :columns
|
631
|
-
value.each do |
|
632
|
-
|
633
|
-
|
634
|
-
rescue Error => e
|
635
|
-
errors << e.message
|
636
|
-
end
|
594
|
+
value.each do |col|
|
595
|
+
col.validate
|
596
|
+
log_statistics.merge!(col.log_statistics)
|
637
597
|
end
|
638
598
|
column_names = value.map(&:name)
|
639
|
-
|
599
|
+
log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
|
640
600
|
when :datatype, :dialect, :tables, :tableSchema, :transformations
|
641
601
|
Array(value).each do |t|
|
642
602
|
# Make sure value is of appropriate class
|
643
603
|
if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
|
644
|
-
|
645
|
-
|
646
|
-
rescue Error => e
|
647
|
-
errors << e.message
|
648
|
-
end
|
604
|
+
t.validate
|
605
|
+
log_statistics.merge!(t.log_statistics)
|
649
606
|
else
|
650
|
-
|
607
|
+
log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
|
651
608
|
end
|
652
609
|
end
|
653
|
-
|
610
|
+
log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
|
654
611
|
when :foreignKeys
|
655
612
|
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
656
613
|
value.each do |fk|
|
657
614
|
columnReference, reference = fk['columnReference'], fk['reference']
|
658
|
-
|
659
|
-
|
615
|
+
log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
|
616
|
+
log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
|
660
617
|
|
661
618
|
# Verify that columns exist in this schema
|
662
|
-
|
619
|
+
log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
|
663
620
|
Array(columnReference).each do |k|
|
664
|
-
|
621
|
+
log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
|
665
622
|
end
|
666
623
|
|
667
624
|
if reference.is_a?(Hash)
|
668
|
-
|
625
|
+
log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
|
669
626
|
ref_cols = reference['columnReference']
|
670
627
|
schema = if reference.has_key?('resource')
|
671
628
|
if reference.has_key?('schemaReference')
|
672
|
-
|
629
|
+
log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
|
673
630
|
end
|
674
631
|
# resource is the URL of a Table in the TableGroup
|
675
632
|
ref = context.base.join(reference['resource']).to_s
|
676
|
-
table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
|
677
|
-
|
633
|
+
table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref}
|
634
|
+
log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
678
635
|
table.tableSchema if table
|
679
636
|
elsif reference.has_key?('schemaReference')
|
680
637
|
# resource is the @id of a Schema in the TableGroup
|
@@ -682,25 +639,25 @@ module RDF::Tabular
|
|
682
639
|
tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
|
683
640
|
case tables.length
|
684
641
|
when 0
|
685
|
-
|
642
|
+
log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
|
686
643
|
nil
|
687
644
|
when 1
|
688
645
|
tables.first.tableSchema
|
689
646
|
else
|
690
|
-
|
647
|
+
log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
|
691
648
|
nil
|
692
649
|
end
|
693
650
|
end
|
694
651
|
|
695
652
|
if schema
|
696
653
|
# ref_cols must exist in schema
|
697
|
-
|
654
|
+
log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
|
698
655
|
Array(ref_cols).each do |k|
|
699
|
-
|
656
|
+
log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
|
700
657
|
end
|
701
658
|
end
|
702
659
|
else
|
703
|
-
|
660
|
+
log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
|
704
661
|
end
|
705
662
|
end
|
706
663
|
when :format
|
@@ -712,7 +669,7 @@ module RDF::Tabular
|
|
712
669
|
nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
|
713
670
|
unsignedLong unsignedInt unsignedShort unsignedByte
|
714
671
|
).include?(self.base)
|
715
|
-
|
672
|
+
log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
|
716
673
|
object.delete(:format) # act as if not set
|
717
674
|
end
|
718
675
|
|
@@ -720,14 +677,14 @@ module RDF::Tabular
|
|
720
677
|
begin
|
721
678
|
parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
|
722
679
|
rescue ArgumentError => e
|
723
|
-
|
724
|
-
object[:format].delete("pattern") # act as if not set
|
680
|
+
log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
|
681
|
+
object[:format].delete("pattern") if object[:format] # act as if not set
|
725
682
|
end
|
726
683
|
else
|
727
684
|
case self.base
|
728
685
|
when 'boolean'
|
729
686
|
unless value.split("|").length == 2
|
730
|
-
|
687
|
+
log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
|
731
688
|
object.delete(:format) # act as if not set
|
732
689
|
end
|
733
690
|
when :decimal, :integer, :long, :int, :short, :byte,
|
@@ -738,7 +695,7 @@ module RDF::Tabular
|
|
738
695
|
begin
|
739
696
|
parse_uax35_number(value, nil)
|
740
697
|
rescue ArgumentError => e
|
741
|
-
|
698
|
+
log_warn "#{type} has invalid property '#{key}': #{e.message}"
|
742
699
|
object.delete(:format) # act as if not set
|
743
700
|
end
|
744
701
|
when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
|
@@ -746,7 +703,7 @@ module RDF::Tabular
|
|
746
703
|
begin
|
747
704
|
parse_uax35_date(value, nil)
|
748
705
|
rescue ArgumentError => e
|
749
|
-
|
706
|
+
log_warn "#{type} has invalid property '#{key}': #{e.message}"
|
750
707
|
object.delete(:format) # act as if not set
|
751
708
|
end
|
752
709
|
else
|
@@ -754,7 +711,7 @@ module RDF::Tabular
|
|
754
711
|
begin
|
755
712
|
Regexp.compile(value)
|
756
713
|
rescue
|
757
|
-
|
714
|
+
log_warn "#{type} has invalid property '#{key}': #{$!.message}"
|
758
715
|
object.delete(:format) # act as if not set
|
759
716
|
end
|
760
717
|
end
|
@@ -765,20 +722,20 @@ module RDF::Tabular
|
|
765
722
|
if object[:length]
|
766
723
|
case key
|
767
724
|
when :minLength
|
768
|
-
|
725
|
+
log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
|
769
726
|
when :maxLength
|
770
|
-
|
727
|
+
log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
|
771
728
|
end
|
772
729
|
end
|
773
730
|
|
774
731
|
# Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
|
775
732
|
if key == :maxLength && object[:minLength]
|
776
|
-
|
733
|
+
log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
|
777
734
|
end
|
778
735
|
|
779
736
|
# Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
|
780
737
|
unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
|
781
|
-
|
738
|
+
log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
|
782
739
|
end
|
783
740
|
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
784
741
|
case self.base
|
@@ -786,46 +743,46 @@ module RDF::Tabular
|
|
786
743
|
'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
|
787
744
|
'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
|
788
745
|
'duration', 'dayTimeDuration', 'yearMonthDuration'
|
789
|
-
|
746
|
+
log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
|
790
747
|
|
791
748
|
case key
|
792
749
|
when :minInclusive
|
793
750
|
# Applications MUST raise an error if both minInclusive and minExclusive are specified
|
794
|
-
|
751
|
+
log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
|
795
752
|
|
796
753
|
# Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
|
797
|
-
|
754
|
+
log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
|
798
755
|
|
799
756
|
# Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
|
800
|
-
|
757
|
+
log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
|
801
758
|
when :maxInclusive
|
802
759
|
# Applications MUST raise an error if both maxInclusive and maxExclusive are specified
|
803
|
-
|
760
|
+
log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
|
804
761
|
when :minExclusive
|
805
762
|
# Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
|
806
|
-
|
763
|
+
log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
|
807
764
|
|
808
765
|
# Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
|
809
|
-
|
766
|
+
log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
|
810
767
|
end
|
811
768
|
else
|
812
|
-
|
769
|
+
log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
|
813
770
|
end
|
814
771
|
when :notes
|
815
772
|
unless value.is_a?(Hash) || value.is_a?(Array)
|
816
|
-
|
773
|
+
log_error "#{type} has invalid property '#{key}': #{value}, Object or Array"
|
817
774
|
end
|
818
775
|
begin
|
819
776
|
normalize_jsonld(key, value)
|
820
777
|
rescue Error => e
|
821
|
-
|
778
|
+
log_error "#{type} has invalid content '#{key}': #{e.message}"
|
822
779
|
end
|
823
780
|
when :primaryKey, :rowTitles
|
824
781
|
# A column reference property that holds either a single reference to a column description object or an array of references.
|
825
782
|
"#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
|
826
783
|
Array(value).each do |k|
|
827
784
|
unless self.columns.any? {|c| c[:name] == k}
|
828
|
-
|
785
|
+
log_warn "#{type} has invalid property '#{key}': column reference not found #{k}"
|
829
786
|
object.delete(key)
|
830
787
|
end
|
831
788
|
end
|
@@ -834,34 +791,33 @@ module RDF::Tabular
|
|
834
791
|
when :@id
|
835
792
|
# Must not be a BNode
|
836
793
|
if value.to_s.start_with?("_:")
|
837
|
-
|
794
|
+
log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
|
838
795
|
end
|
839
796
|
|
840
797
|
# Datatype @id MUST NOT be the URL of a built-in type
|
841
798
|
if self.is_a?(Datatype) && DATATYPES.values.include?(value)
|
842
|
-
|
799
|
+
log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
|
843
800
|
end
|
844
801
|
when :@type
|
845
802
|
# Must not be a BNode
|
846
803
|
if value.to_s.start_with?("_:")
|
847
|
-
|
804
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
|
848
805
|
end
|
849
806
|
case type
|
850
807
|
when :Transformation
|
851
|
-
|
808
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
|
852
809
|
else
|
853
|
-
|
810
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
|
854
811
|
end
|
855
812
|
when ->(k) {key.to_s.include?(':')}
|
856
813
|
begin
|
857
814
|
normalize_jsonld(key, value)
|
858
815
|
rescue Error => e
|
859
|
-
|
816
|
+
log_error "#{type} has invalid content '#{key}': #{e.message}"
|
860
817
|
end
|
861
818
|
end
|
862
819
|
end
|
863
820
|
|
864
|
-
raise Error, errors.join("\n") unless errors.empty?
|
865
821
|
self
|
866
822
|
end
|
867
823
|
|
@@ -890,10 +846,37 @@ module RDF::Tabular
|
|
890
846
|
# @param [:read] input
|
891
847
|
# @yield [Row]
|
892
848
|
def each_row(input)
|
893
|
-
csv =
|
894
|
-
|
895
|
-
|
896
|
-
|
849
|
+
csv, number, skipped = nil, 0, 0
|
850
|
+
path = input.base_uri.path rescue ""
|
851
|
+
if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
|
852
|
+
# Input is HTML; use fragment identfier to find table.
|
853
|
+
fragment = RDF::URI(self.url).fragment rescue nil
|
854
|
+
tab = begin
|
855
|
+
# Extract with nokogiri
|
856
|
+
require 'nokogiri' unless defined?(:Nokogiri)
|
857
|
+
doc = Nokogiri::HTML.parse(input)
|
858
|
+
doc.search("##{fragment}").first if fragment
|
859
|
+
rescue LoadError
|
860
|
+
# Extract with REXML
|
861
|
+
# FIXME
|
862
|
+
end
|
863
|
+
|
864
|
+
raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
|
865
|
+
|
866
|
+
# Use rows with <td> to create column data
|
867
|
+
csv = []
|
868
|
+
number = 0
|
869
|
+
tab.xpath('.//tr').map do |row|
|
870
|
+
number += 1 if row.xpath('th')
|
871
|
+
data = row.xpath('td').map(&:content)
|
872
|
+
csv << data unless data.empty?
|
873
|
+
end
|
874
|
+
else
|
875
|
+
csv = ::CSV.new(input, csv_options)
|
876
|
+
# Skip skipRows and headerRowCount
|
877
|
+
skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
|
878
|
+
(1..skipped).each {csv.shift}
|
879
|
+
end
|
897
880
|
csv.each do |data|
|
898
881
|
# Check for embedded comments
|
899
882
|
if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
|
@@ -939,17 +922,17 @@ module RDF::Tabular
|
|
939
922
|
if value['@value']
|
940
923
|
dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
|
941
924
|
lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
|
942
|
-
block.call(RDF::Statement
|
925
|
+
block.call(RDF::Statement(subject, property, lit))
|
943
926
|
else
|
944
927
|
# value MUST be a node object, establish a new subject from `@id`
|
945
928
|
s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
|
946
929
|
|
947
930
|
# Generate a triple
|
948
|
-
block.call(RDF::Statement
|
931
|
+
block.call(RDF::Statement(subject, property, s2))
|
949
932
|
|
950
933
|
# Generate types
|
951
934
|
Array(value['@type']).each do |t|
|
952
|
-
block.call(RDF::Statement
|
935
|
+
block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true)))
|
953
936
|
end
|
954
937
|
|
955
938
|
# Generate triples for all other properties
|
@@ -961,7 +944,7 @@ module RDF::Tabular
|
|
961
944
|
else
|
962
945
|
# Value is a primitive JSON value
|
963
946
|
lit = RDF::Literal(value)
|
964
|
-
block.call(RDF::Statement
|
947
|
+
block.call(RDF::Statement(subject, property, RDF::Literal(value)))
|
965
948
|
end
|
966
949
|
else
|
967
950
|
case value
|
@@ -1016,7 +999,7 @@ module RDF::Tabular
|
|
1016
999
|
if @options[:validate]
|
1017
1000
|
raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
1018
1001
|
else
|
1019
|
-
|
1002
|
+
log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
1020
1003
|
end
|
1021
1004
|
end
|
1022
1005
|
else
|
@@ -1025,7 +1008,7 @@ module RDF::Tabular
|
|
1025
1008
|
if @options[:validate]
|
1026
1009
|
raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
1027
1010
|
else
|
1028
|
-
|
1011
|
+
log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
1029
1012
|
end
|
1030
1013
|
end
|
1031
1014
|
|
@@ -1038,7 +1021,7 @@ module RDF::Tabular
|
|
1038
1021
|
if @options[:validate]
|
1039
1022
|
raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1040
1023
|
else
|
1041
|
-
|
1024
|
+
log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1042
1025
|
|
1043
1026
|
# If present, a virtual column MUST appear after all other non-virtual column definitions
|
1044
1027
|
raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
|
@@ -1053,13 +1036,13 @@ module RDF::Tabular
|
|
1053
1036
|
end
|
1054
1037
|
index = 0
|
1055
1038
|
object_columns.all? do |cb|
|
1056
|
-
ca = non_virtual_columns[index] || Column.new({})
|
1039
|
+
ca = non_virtual_columns[index] || Column.new({}, @options)
|
1057
1040
|
ta = ca.titles || {}
|
1058
1041
|
tb = cb.titles || {}
|
1059
1042
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1060
1043
|
true
|
1061
1044
|
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1062
|
-
raise Error, "Columns don't match
|
1045
|
+
raise Error, "Columns don't match on name: #{ca.name}, #{cb.name}" unless ca.name == cb.name
|
1063
1046
|
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1064
1047
|
# If validating, column compatibility requires strict match between titles
|
1065
1048
|
titles_match = case
|
@@ -1083,10 +1066,10 @@ module RDF::Tabular
|
|
1083
1066
|
true
|
1084
1067
|
elsif !@options[:validate]
|
1085
1068
|
# If not validating, columns don't match, but processing continues
|
1086
|
-
|
1069
|
+
log_warn "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
|
1087
1070
|
true
|
1088
1071
|
else
|
1089
|
-
raise Error, "Columns don't match
|
1072
|
+
raise Error, "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
|
1090
1073
|
end
|
1091
1074
|
end
|
1092
1075
|
index += 1
|
@@ -1180,13 +1163,13 @@ module RDF::Tabular
|
|
1180
1163
|
when Hash
|
1181
1164
|
if value['@value']
|
1182
1165
|
if !(value.keys.sort - %w(@value @type @language)).empty?
|
1183
|
-
|
1166
|
+
log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
|
1184
1167
|
elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
|
1185
|
-
|
1168
|
+
log_error "Value object may not contain both @type and @language: #{value.to_json}"
|
1186
1169
|
elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
|
1187
|
-
|
1170
|
+
log_error "Value object with @language must use valid language: #{value.to_json}"
|
1188
1171
|
elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
|
1189
|
-
|
1172
|
+
log_error "Value object with @type must defined type: #{value.to_json}"
|
1190
1173
|
end
|
1191
1174
|
value
|
1192
1175
|
else
|
@@ -1195,16 +1178,16 @@ module RDF::Tabular
|
|
1195
1178
|
case k
|
1196
1179
|
when "@id"
|
1197
1180
|
nv[k] = context.expand_iri(v, documentRelative: true).to_s
|
1198
|
-
|
1181
|
+
log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
|
1199
1182
|
when "@type"
|
1200
1183
|
Array(v).each do |vv|
|
1201
1184
|
# Validate that all type values transform to absolute IRIs
|
1202
1185
|
resource = context.expand_iri(vv, vocab: true)
|
1203
|
-
|
1186
|
+
log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
|
1204
1187
|
end
|
1205
1188
|
nv[k] = v
|
1206
1189
|
when /^(@|_:)/
|
1207
|
-
|
1190
|
+
log_error "Invalid use of #{k} in JSON-LD content"
|
1208
1191
|
else
|
1209
1192
|
nv[k] = normalize_jsonld(k, v)
|
1210
1193
|
end
|
@@ -1217,10 +1200,22 @@ module RDF::Tabular
|
|
1217
1200
|
end
|
1218
1201
|
protected
|
1219
1202
|
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1203
|
+
def set_property(key, type, value, invalid)
|
1204
|
+
if invalid
|
1205
|
+
log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1206
|
+
case type
|
1207
|
+
when :link, :uri_template
|
1208
|
+
object[key] = ""
|
1209
|
+
when :object
|
1210
|
+
object[key] = {}
|
1211
|
+
when :natural_language
|
1212
|
+
object[key] = set_nl(value) || []
|
1213
|
+
else
|
1214
|
+
object.delete(key)
|
1215
|
+
end
|
1216
|
+
else
|
1217
|
+
object[key] = value
|
1218
|
+
end
|
1224
1219
|
end
|
1225
1220
|
|
1226
1221
|
# When setting a natural language property, always put in language-map form
|
@@ -1251,12 +1246,12 @@ module RDF::Tabular
|
|
1251
1246
|
end
|
1252
1247
|
end
|
1253
1248
|
else
|
1254
|
-
|
1249
|
+
log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
|
1255
1250
|
[]
|
1256
1251
|
end
|
1257
1252
|
|
1258
1253
|
unless object[key].all? {|v| v.is_a?(klass)}
|
1259
|
-
|
1254
|
+
log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
|
1260
1255
|
# Remove elements that aren't of the right types
|
1261
1256
|
object[key] = object[key].select! {|v| v.is_a?(klass)}
|
1262
1257
|
end
|
@@ -1285,14 +1280,13 @@ module RDF::Tabular
|
|
1285
1280
|
end
|
1286
1281
|
|
1287
1282
|
class DebugContext
|
1288
|
-
include
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
end
|
1283
|
+
include RDF::Util::Logger
|
1284
|
+
end
|
1285
|
+
def self.log_debug(*args, &block)
|
1286
|
+
DebugContext.new.log_debug(*args, &block)
|
1293
1287
|
end
|
1294
|
-
def self.
|
1295
|
-
DebugContext.new(*args
|
1288
|
+
def self.log_warn(*args)
|
1289
|
+
DebugContext.new.log_warn(*args)
|
1296
1290
|
end
|
1297
1291
|
end
|
1298
1292
|
|
@@ -1329,12 +1323,7 @@ module RDF::Tabular
|
|
1329
1323
|
# We handle this through a separate setters
|
1330
1324
|
end
|
1331
1325
|
|
1332
|
-
|
1333
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1334
|
-
object.delete(key)
|
1335
|
-
else
|
1336
|
-
object[key] = value
|
1337
|
-
end
|
1326
|
+
set_property(key, type, value, invalid)
|
1338
1327
|
end
|
1339
1328
|
end
|
1340
1329
|
|
@@ -1421,8 +1410,7 @@ module RDF::Tabular
|
|
1421
1410
|
end
|
1422
1411
|
|
1423
1412
|
if invalid
|
1424
|
-
|
1425
|
-
object.delete(key)
|
1413
|
+
set_property(key, type, value, invalid)
|
1426
1414
|
elsif key == :url
|
1427
1415
|
# URL of CSV relative to metadata
|
1428
1416
|
object[:url] = value
|
@@ -1446,7 +1434,7 @@ module RDF::Tabular
|
|
1446
1434
|
content['@context'] = object.delete(:@context) if object[:@context]
|
1447
1435
|
ctx = @context
|
1448
1436
|
remove_instance_variable(:@context) if instance_variables.include?(:@context)
|
1449
|
-
tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
|
1437
|
+
tg = TableGroup.new(content, @options.merge(context: ctx, filenames: @filenames, base: base))
|
1450
1438
|
@parent = tg # Link from parent
|
1451
1439
|
tg
|
1452
1440
|
end
|
@@ -1457,8 +1445,7 @@ module RDF::Tabular
|
|
1457
1445
|
"@id" => (id.to_s if id),
|
1458
1446
|
"@type" => "AnnotatedTable",
|
1459
1447
|
"url" => self.url.to_s,
|
1460
|
-
"
|
1461
|
-
"rows" => []
|
1448
|
+
"tableSchema" => (tableSchema.to_atd if tableSchema),
|
1462
1449
|
}) do |memo, (k, v)|
|
1463
1450
|
memo[k.to_s] ||= v
|
1464
1451
|
memo
|
@@ -1490,12 +1477,7 @@ module RDF::Tabular
|
|
1490
1477
|
"string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
|
1491
1478
|
end
|
1492
1479
|
|
1493
|
-
|
1494
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1495
|
-
object.delete(key)
|
1496
|
-
else
|
1497
|
-
object[key] = value
|
1498
|
-
end
|
1480
|
+
set_property(key, type, value, invalid)
|
1499
1481
|
end
|
1500
1482
|
end
|
1501
1483
|
|
@@ -1517,12 +1499,12 @@ module RDF::Tabular
|
|
1517
1499
|
end
|
1518
1500
|
end
|
1519
1501
|
else
|
1520
|
-
|
1502
|
+
log_warn "#{type} has invalid property 'columns': expected array of Column"
|
1521
1503
|
[]
|
1522
1504
|
end
|
1523
1505
|
|
1524
1506
|
unless object[:columns].all? {|v| v.is_a?(Column)}
|
1525
|
-
|
1507
|
+
log_warn "#{type} has invalid property 'columns': expected array of Column"
|
1526
1508
|
# Remove elements that aren't of the right types
|
1527
1509
|
object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
|
1528
1510
|
end
|
@@ -1532,12 +1514,12 @@ module RDF::Tabular
|
|
1532
1514
|
object[:foreignKeys] = case value
|
1533
1515
|
when Array then value
|
1534
1516
|
else
|
1535
|
-
|
1517
|
+
log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
|
1536
1518
|
[]
|
1537
1519
|
end
|
1538
1520
|
|
1539
1521
|
unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
|
1540
|
-
|
1522
|
+
log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
|
1541
1523
|
# Remove elements that aren't of the right types
|
1542
1524
|
object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
|
1543
1525
|
end
|
@@ -1560,6 +1542,18 @@ module RDF::Tabular
|
|
1560
1542
|
end
|
1561
1543
|
end
|
1562
1544
|
end
|
1545
|
+
|
1546
|
+
# Return Annotated Table representation
|
1547
|
+
def to_atd
|
1548
|
+
object.inject({
|
1549
|
+
"@id" => (id.to_s if id),
|
1550
|
+
"@type" => "Schema",
|
1551
|
+
"columns" => Array(columns).map(&:to_atd),
|
1552
|
+
}) do |memo, (k, v)|
|
1553
|
+
memo[k.to_s] ||= v
|
1554
|
+
memo
|
1555
|
+
end.delete_if {|k,v| v.nil? || v.is_a?(Metadata)}
|
1556
|
+
end
|
1563
1557
|
end
|
1564
1558
|
|
1565
1559
|
class Column < Metadata
|
@@ -1619,16 +1613,7 @@ module RDF::Tabular
|
|
1619
1613
|
valid_natural_language_property?(value)
|
1620
1614
|
end
|
1621
1615
|
|
1622
|
-
|
1623
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1624
|
-
object[key] = set_nl(value)
|
1625
|
-
object.delete(key) if object[key].nil?
|
1626
|
-
elsif invalid
|
1627
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1628
|
-
object.delete(key)
|
1629
|
-
else
|
1630
|
-
object[key] = value
|
1631
|
-
end
|
1616
|
+
set_property(key, t, value, invalid)
|
1632
1617
|
end
|
1633
1618
|
end
|
1634
1619
|
|
@@ -1657,7 +1642,6 @@ module RDF::Tabular
|
|
1657
1642
|
"table" => (table.id.to_s if table.id),
|
1658
1643
|
"number" => self.number,
|
1659
1644
|
"sourceNumber" => self.sourceNumber,
|
1660
|
-
"cells" => [],
|
1661
1645
|
"virtual" => self.virtual,
|
1662
1646
|
"name" => self.name,
|
1663
1647
|
"titles" => self.titles
|
@@ -1700,12 +1684,7 @@ module RDF::Tabular
|
|
1700
1684
|
"json or rdf" unless %w(json rdf).include?(value) || value.nil?
|
1701
1685
|
end
|
1702
1686
|
|
1703
|
-
|
1704
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1705
|
-
object.delete(key)
|
1706
|
-
else
|
1707
|
-
object[key] = value
|
1708
|
-
end
|
1687
|
+
set_property(key, type, value, invalid)
|
1709
1688
|
end
|
1710
1689
|
end
|
1711
1690
|
end
|
@@ -1713,7 +1692,7 @@ module RDF::Tabular
|
|
1713
1692
|
class Dialect < Metadata
|
1714
1693
|
# Defaults for dialects
|
1715
1694
|
DEFAULTS = {
|
1716
|
-
commentPrefix:
|
1695
|
+
commentPrefix: false,
|
1717
1696
|
delimiter: ",".freeze,
|
1718
1697
|
doubleQuote: true,
|
1719
1698
|
encoding: "utf-8".freeze,
|
@@ -1749,7 +1728,7 @@ module RDF::Tabular
|
|
1749
1728
|
REQUIRED = [].freeze
|
1750
1729
|
|
1751
1730
|
# Getters and Setters
|
1752
|
-
PROPERTIES.
|
1731
|
+
PROPERTIES.each do |key, type|
|
1753
1732
|
define_method(key) do
|
1754
1733
|
object.fetch(key, DEFAULTS[key])
|
1755
1734
|
end
|
@@ -1772,16 +1751,7 @@ module RDF::Tabular
|
|
1772
1751
|
valid_natural_language_property?(value)
|
1773
1752
|
end
|
1774
1753
|
|
1775
|
-
|
1776
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1777
|
-
object[key] = set_nl(value)
|
1778
|
-
object.delete(key) if object[key].nil?
|
1779
|
-
elsif invalid
|
1780
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1781
|
-
object.delete(key)
|
1782
|
-
else
|
1783
|
-
object[key] = value
|
1784
|
-
end
|
1754
|
+
set_property(key, type, value, invalid)
|
1785
1755
|
end
|
1786
1756
|
end
|
1787
1757
|
|
@@ -1835,38 +1805,75 @@ module RDF::Tabular
|
|
1835
1805
|
lang ||= 'und'
|
1836
1806
|
|
1837
1807
|
# Set encoding on input
|
1838
|
-
|
1839
|
-
(
|
1840
|
-
|
1841
|
-
|
1842
|
-
|
1843
|
-
|
1808
|
+
path = input.base_uri.path rescue ""
|
1809
|
+
if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
|
1810
|
+
# Input is HTML; use fragment identfier to find table.
|
1811
|
+
fragment = RDF::URI(table["url"]).fragment rescue nil
|
1812
|
+
tab = begin
|
1813
|
+
# Extract with nokogiri
|
1814
|
+
require 'nokogiri' unless defined?(:Nokogiri)
|
1815
|
+
doc = Nokogiri::HTML.parse(input)
|
1816
|
+
doc.search("##{fragment}").first if fragment
|
1817
|
+
rescue LoadError
|
1818
|
+
# Extract with REXML
|
1819
|
+
# FIXME
|
1820
|
+
end
|
1844
1821
|
|
1845
|
-
|
1846
|
-
(metadata["rdfs:comment"] ||= []) << value unless value.empty?
|
1847
|
-
end
|
1848
|
-
debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
|
1822
|
+
raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
|
1849
1823
|
|
1850
|
-
|
1851
|
-
|
1852
|
-
|
1853
|
-
|
1854
|
-
|
1855
|
-
|
1824
|
+
# Use rows with <th> to create column titles
|
1825
|
+
tab.xpath('.//tr').each do |row|
|
1826
|
+
row.xpath('th').map(&:content).each_with_index do |value, index|
|
1827
|
+
# Skip columns
|
1828
|
+
skipCols = skipColumns.to_i
|
1829
|
+
next if index < skipCols
|
1856
1830
|
|
1831
|
+
# Trim value
|
1832
|
+
value.lstrip! if %w(true start).include?(trim.to_s)
|
1833
|
+
value.rstrip! if %w(true end).include?(trim.to_s)
|
1834
|
+
|
1835
|
+
# Initialize titles
|
1836
|
+
columns = table["tableSchema"]["columns"] ||= []
|
1837
|
+
column = columns[index - skipCols] ||= {
|
1838
|
+
"titles" => {lang => []},
|
1839
|
+
}
|
1840
|
+
column["titles"][lang] << value
|
1841
|
+
end
|
1842
|
+
end
|
1843
|
+
else
|
1844
|
+
csv = ::CSV.new(input, csv_options)
|
1845
|
+
(1..skipRows.to_i).each do
|
1846
|
+
value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
|
1857
1847
|
# Trim value
|
1858
1848
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
1859
1849
|
value.rstrip! if %w(true end).include?(trim.to_s)
|
1860
1850
|
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1865
|
-
|
1866
|
-
|
1851
|
+
value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
|
1852
|
+
(metadata["rdfs:comment"] ||= []) << value unless value.empty?
|
1853
|
+
end
|
1854
|
+
log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
|
1855
|
+
|
1856
|
+
(1..headerRowCount).each do
|
1857
|
+
row_data = Array(csv.shift)
|
1858
|
+
Array(row_data).each_with_index do |value, index|
|
1859
|
+
# Skip columns
|
1860
|
+
skipCols = skipColumns.to_i
|
1861
|
+
next if index < skipCols
|
1862
|
+
|
1863
|
+
# Trim value
|
1864
|
+
value.lstrip! if %w(true start).include?(trim.to_s)
|
1865
|
+
value.rstrip! if %w(true end).include?(trim.to_s)
|
1866
|
+
|
1867
|
+
# Initialize titles
|
1868
|
+
columns = table["tableSchema"]["columns"] ||= []
|
1869
|
+
column = columns[index - skipCols] ||= {
|
1870
|
+
"titles" => {lang => []},
|
1871
|
+
}
|
1872
|
+
column["titles"][lang] << value
|
1873
|
+
end
|
1867
1874
|
end
|
1868
1875
|
end
|
1869
|
-
|
1876
|
+
log_debug("embedded_metadata") {"table: #{table.inspect}"}
|
1870
1877
|
input.rewind if input.respond_to?(:rewind)
|
1871
1878
|
|
1872
1879
|
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
@@ -1931,12 +1938,7 @@ module RDF::Tabular
|
|
1931
1938
|
end
|
1932
1939
|
end
|
1933
1940
|
|
1934
|
-
|
1935
|
-
warn "#{self.type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1936
|
-
object.delete(key)
|
1937
|
-
else
|
1938
|
-
object[key] = value
|
1939
|
-
end
|
1941
|
+
set_property(key, type, value, invalid)
|
1940
1942
|
end
|
1941
1943
|
end
|
1942
1944
|
end
|
@@ -2056,35 +2058,26 @@ module RDF::Tabular
|
|
2056
2058
|
|
2057
2059
|
# create column if necessary
|
2058
2060
|
columns[index - skipColumns] ||=
|
2059
|
-
Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
|
2061
|
+
Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2060
2062
|
|
2061
2063
|
column = columns[index - skipColumns]
|
2062
2064
|
|
2063
2065
|
@values << cell = Cell.new(metadata, column, self, value)
|
2064
2066
|
|
2065
|
-
datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
|
2066
|
-
value = value.gsub(/\r\t
|
2067
|
-
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType
|
2067
|
+
datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
|
2068
|
+
value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
|
2069
|
+
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
|
2068
2070
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
2069
2071
|
value = column.default || '' if value.empty?
|
2070
2072
|
|
2071
2073
|
cell_values = column.separator ? value.split(column.separator) : [value]
|
2072
2074
|
|
2073
2075
|
cell_values = cell_values.map do |v|
|
2074
|
-
v = v.strip unless %w(string anyAtomicType
|
2076
|
+
v = v.strip unless %w(string anyAtomicType).include?(datatype.base)
|
2075
2077
|
v = column.default || '' if v.empty?
|
2076
2078
|
if Array(column.null).include?(v)
|
2077
2079
|
nil
|
2078
2080
|
else
|
2079
|
-
# Trim value
|
2080
|
-
if %w(string anyAtomicType any).include?(datatype.base)
|
2081
|
-
v.lstrip! if %w(true start).include?(metadata.dialect.trim.to_s)
|
2082
|
-
v.rstrip! if %w(true end).include?(metadata.dialect.trim.to_s)
|
2083
|
-
else
|
2084
|
-
# unless the datatype is string or anyAtomicType or any, strip leading and trailing whitespace from the string value
|
2085
|
-
v.strip!
|
2086
|
-
end
|
2087
|
-
|
2088
2081
|
expanded_dt = datatype.id || metadata.context.expand_iri(datatype.base, vocab: true)
|
2089
2082
|
if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
|
2090
2083
|
lit_or_errors
|
@@ -2127,7 +2120,11 @@ module RDF::Tabular
|
|
2127
2120
|
|
2128
2121
|
# Identifier for this row, as an RFC7111 fragment
|
2129
2122
|
# @return [RDF::URI]
|
2130
|
-
def id;
|
2123
|
+
def id;
|
2124
|
+
u = table.url.dup
|
2125
|
+
u.fragment = "row=#{self.sourceNumber}"
|
2126
|
+
u
|
2127
|
+
end
|
2131
2128
|
|
2132
2129
|
# Return Annotated Row representation
|
2133
2130
|
def to_atd
|