rdf-tabular 0.2.1 → 0.4.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/VERSION +1 -1
- data/etc/earl.ttl +1579 -799
- data/lib/rdf/tabular.rb +0 -1
- data/lib/rdf/tabular/format.rb +16 -0
- data/lib/rdf/tabular/metadata.rb +251 -254
- data/lib/rdf/tabular/reader.rb +98 -146
- data/lib/rdf/tabular/uax35.rb +4 -4
- data/spec/format_spec.rb +34 -0
- data/spec/matchers.rb +3 -78
- data/spec/metadata_spec.rb +172 -105
- data/spec/reader_spec.rb +28 -25
- data/spec/spec_helper.rb +5 -3
- data/spec/suite_helper.rb +1 -1
- data/spec/suite_spec.rb +8 -9
- metadata +118 -55
- data/lib/rdf/tabular/utils.rb +0 -33
data/lib/rdf/tabular.rb
CHANGED
@@ -11,7 +11,6 @@ module RDF
|
|
11
11
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
12
12
|
module Tabular
|
13
13
|
require 'rdf/tabular/format'
|
14
|
-
require 'rdf/tabular/utils'
|
15
14
|
autoload :Column, 'rdf/tabular/metadata'
|
16
15
|
autoload :CSVW, 'rdf/tabular/csvw'
|
17
16
|
autoload :Dialect, 'rdf/tabular/metadata'
|
data/lib/rdf/tabular/format.rb
CHANGED
@@ -46,5 +46,21 @@ module RDF::Tabular
|
|
46
46
|
def self.detect(sample)
|
47
47
|
!!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
|
48
48
|
end
|
49
|
+
|
50
|
+
##
|
51
|
+
# Hash of CLI commands appropriate for this format
|
52
|
+
# @return [Hash{Symbol => Lambda(Array, Hash)}]
|
53
|
+
def self.cli_commands
|
54
|
+
{
|
55
|
+
:"tabular-json" => ->(argv, opts) do
|
56
|
+
raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
|
57
|
+
out = opts[:output] || $stdout
|
58
|
+
out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
|
59
|
+
RDF::CLI.parse(argv, opts) do |reader|
|
60
|
+
out.puts reader.to_json
|
61
|
+
end
|
62
|
+
end
|
63
|
+
}
|
64
|
+
end
|
49
65
|
end
|
50
66
|
end
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -19,16 +19,12 @@ require 'yaml' # used by BCP47, which should have required it.
|
|
19
19
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
20
20
|
module RDF::Tabular
|
21
21
|
class Metadata
|
22
|
-
include
|
22
|
+
include RDF::Util::Logger
|
23
23
|
|
24
24
|
# Hash representation
|
25
25
|
# @return [Hash<Symbol,Object>]
|
26
26
|
attr_accessor :object
|
27
27
|
|
28
|
-
# Warnings detected on initialization or when setting properties
|
29
|
-
# @return [Array<String>]
|
30
|
-
attr_accessor :warnings
|
31
|
-
|
32
28
|
# Inheritect properties, valid for all types
|
33
29
|
INHERITED_PROPERTIES = {
|
34
30
|
aboutUrl: :uri_template,
|
@@ -137,7 +133,7 @@ module RDF::Tabular
|
|
137
133
|
#
|
138
134
|
# @param [String] path
|
139
135
|
# @param [Hash{Symbol => Object}] options
|
140
|
-
# see `RDF::Util::File.open_file` in RDF.rb and {
|
136
|
+
# see `RDF::Util::File.open_file` in RDF.rb and {new}
|
141
137
|
# @yield [Metadata]
|
142
138
|
# @raise [IOError] if file not found
|
143
139
|
def self.open(path, options = {})
|
@@ -153,7 +149,7 @@ module RDF::Tabular
|
|
153
149
|
end
|
154
150
|
|
155
151
|
# Return the well-known configuration for a file, and remember using a weak-reference cache to avoid uncessary retreivles.
|
156
|
-
# @param [String] base
|
152
|
+
# @param [String] base the URL used for finding the file
|
157
153
|
# @return [Array<String>, false]
|
158
154
|
def self.site_wide_config(base)
|
159
155
|
require 'rdf/util/cache' unless defined?(::RDF::Util::Cache)
|
@@ -179,7 +175,6 @@ module RDF::Tabular
|
|
179
175
|
# @return [Metadata]
|
180
176
|
def self.for_input(input, options = {})
|
181
177
|
base = options[:base]
|
182
|
-
warnings = options.fetch(:warnings, [])
|
183
178
|
|
184
179
|
# Use user metadata, if provided
|
185
180
|
metadata = case options[:metadata]
|
@@ -202,10 +197,7 @@ module RDF::Tabular
|
|
202
197
|
if md.describes_file?(base)
|
203
198
|
metadata = md
|
204
199
|
else
|
205
|
-
|
206
|
-
if options[:validate] && !options[:warnings]
|
207
|
-
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
208
|
-
end
|
200
|
+
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
|
209
201
|
end
|
210
202
|
end
|
211
203
|
end
|
@@ -214,12 +206,12 @@ module RDF::Tabular
|
|
214
206
|
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
215
207
|
if !metadata && base
|
216
208
|
templates = site_wide_config(base)
|
217
|
-
|
209
|
+
log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
218
210
|
locs = templates.map do |template|
|
219
211
|
t = Addressable::Template.new(template)
|
220
212
|
RDF::URI(base).join(t.expand(url: base).to_s)
|
221
213
|
end
|
222
|
-
|
214
|
+
log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
223
215
|
|
224
216
|
locs.each do |loc|
|
225
217
|
metadata ||= begin
|
@@ -230,15 +222,12 @@ module RDF::Tabular
|
|
230
222
|
if md.describes_file?(base)
|
231
223
|
md
|
232
224
|
else
|
233
|
-
|
234
|
-
if options[:validate] && !options[:warnings]
|
235
|
-
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
236
|
-
end
|
225
|
+
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
|
237
226
|
nil
|
238
227
|
end
|
239
228
|
end
|
240
229
|
rescue IOError
|
241
|
-
|
230
|
+
log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
242
231
|
nil
|
243
232
|
end
|
244
233
|
end
|
@@ -331,7 +320,6 @@ module RDF::Tabular
|
|
331
320
|
# @return [Metadata]
|
332
321
|
def initialize(input, options = {})
|
333
322
|
@options = options.dup
|
334
|
-
@options[:depth] ||= 0
|
335
323
|
|
336
324
|
# Parent of this Metadata, if any
|
337
325
|
@parent = @options[:parent]
|
@@ -344,14 +332,14 @@ module RDF::Tabular
|
|
344
332
|
|
345
333
|
@context = case input['@context']
|
346
334
|
when Array
|
347
|
-
|
335
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
348
336
|
c = LOCAL_CONTEXT.dup
|
349
337
|
c.base = RDF::URI(opt_base)
|
350
338
|
obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
|
351
339
|
raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
|
352
340
|
c.parse(obj)
|
353
341
|
when Hash
|
354
|
-
|
342
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
355
343
|
c = LOCAL_CONTEXT.dup
|
356
344
|
c.base = RDF::URI(opt_base)
|
357
345
|
c.parse(input['@context'])
|
@@ -362,7 +350,7 @@ module RDF::Tabular
|
|
362
350
|
c
|
363
351
|
else
|
364
352
|
if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
|
365
|
-
|
353
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
366
354
|
LOCAL_CONTEXT.dup
|
367
355
|
c = LOCAL_CONTEXT.dup
|
368
356
|
c.base = RDF::URI(opt_base)
|
@@ -375,7 +363,7 @@ module RDF::Tabular
|
|
375
363
|
@options[:base] = @context ? @context.base : RDF::URI(opt_base)
|
376
364
|
|
377
365
|
if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
|
378
|
-
|
366
|
+
log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
|
379
367
|
@context.default_language = nil
|
380
368
|
end
|
381
369
|
|
@@ -385,7 +373,7 @@ module RDF::Tabular
|
|
385
373
|
|
386
374
|
@object = {}
|
387
375
|
|
388
|
-
|
376
|
+
log_depth do
|
389
377
|
# Input was parsed in .new
|
390
378
|
# Metadata is object with symbolic keys
|
391
379
|
input.each do |key, value|
|
@@ -401,7 +389,7 @@ module RDF::Tabular
|
|
401
389
|
object[:@id] = if value.is_a?(String)
|
402
390
|
value
|
403
391
|
else
|
404
|
-
|
392
|
+
log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
|
405
393
|
"" # Default value
|
406
394
|
end
|
407
395
|
@id = @options[:base].join(object[:@id])
|
@@ -426,14 +414,14 @@ module RDF::Tabular
|
|
426
414
|
end
|
427
415
|
|
428
416
|
if reason
|
429
|
-
|
430
|
-
|
431
|
-
|
417
|
+
log_debug("md#initialize") {reason}
|
418
|
+
log_debug("md#initialize") {"filenames: #{filenames}"}
|
419
|
+
log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
|
432
420
|
end
|
433
421
|
end
|
434
422
|
|
435
423
|
# Getters and Setters
|
436
|
-
INHERITED_PROPERTIES.
|
424
|
+
INHERITED_PROPERTIES.each do |key, type|
|
437
425
|
define_method(key) do
|
438
426
|
object.fetch(key) do
|
439
427
|
parent ? parent.send(key) : default_value(key)
|
@@ -459,12 +447,7 @@ module RDF::Tabular
|
|
459
447
|
# We handle this through a separate datatype= setter
|
460
448
|
end
|
461
449
|
|
462
|
-
|
463
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
464
|
-
object.delete(key)
|
465
|
-
else
|
466
|
-
object[key] = value
|
467
|
-
end
|
450
|
+
set_property(key, type, value, invalid)
|
468
451
|
end
|
469
452
|
end
|
470
453
|
|
@@ -492,7 +475,7 @@ module RDF::Tabular
|
|
492
475
|
when Schema
|
493
476
|
value
|
494
477
|
else
|
495
|
-
|
478
|
+
log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
496
479
|
Schema.new({}, @options.merge(parent: self, context: nil))
|
497
480
|
end
|
498
481
|
end
|
@@ -539,7 +522,7 @@ module RDF::Tabular
|
|
539
522
|
when Dialect
|
540
523
|
value
|
541
524
|
else
|
542
|
-
|
525
|
+
log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
|
543
526
|
nil
|
544
527
|
end
|
545
528
|
end
|
@@ -549,15 +532,15 @@ module RDF::Tabular
|
|
549
532
|
# @raise [Error] if datatype is not valid
|
550
533
|
def datatype=(value)
|
551
534
|
val = case value
|
552
|
-
when Hash then Datatype.new(value, parent: self)
|
553
|
-
else Datatype.new({base: value}, parent: self)
|
535
|
+
when Hash then Datatype.new(value, @options.merge(parent: self))
|
536
|
+
else Datatype.new({base: value}, @options.merge(parent: self))
|
554
537
|
end
|
555
538
|
|
556
539
|
if val.valid? || value.is_a?(Hash)
|
557
540
|
# Set it if it was specified as an object, which may cause validation errors later
|
558
541
|
object[:datatype] = val
|
559
542
|
else
|
560
|
-
|
543
|
+
log_warn "#{type} has invalid property 'datatype': expected a built-in or an object"
|
561
544
|
end
|
562
545
|
end
|
563
546
|
|
@@ -572,40 +555,20 @@ module RDF::Tabular
|
|
572
555
|
##
|
573
556
|
# Do we have valid metadata?
|
574
557
|
def valid?
|
575
|
-
validate
|
576
|
-
|
577
|
-
rescue
|
578
|
-
false
|
579
|
-
end
|
580
|
-
|
581
|
-
##
|
582
|
-
# Validation errors
|
583
|
-
# @return [Array<String>]
|
584
|
-
def errors
|
585
|
-
validate! && []
|
586
|
-
rescue Error => e
|
587
|
-
e.message.split("\n")
|
558
|
+
validate # Possibly re-validate
|
559
|
+
!log_statistics[:error]
|
588
560
|
end
|
589
561
|
|
590
|
-
|
591
|
-
|
592
|
-
# @return [Array<String>]
|
593
|
-
def warnings
|
594
|
-
((@warnings || []) + object.
|
595
|
-
values.
|
596
|
-
flatten.
|
597
|
-
select {|v| v.is_a?(Metadata)}.
|
598
|
-
map(&:warnings).
|
599
|
-
flatten).compact.uniq
|
562
|
+
def validate!
|
563
|
+
raise Error, "Metadata error" unless valid?
|
600
564
|
end
|
601
565
|
|
602
566
|
##
|
603
567
|
# Validate metadata, raising an error containing all errors detected during validation
|
604
568
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
605
569
|
# @return [self]
|
606
|
-
def validate
|
570
|
+
def validate
|
607
571
|
expected_props, required_props = @properties.keys, @required
|
608
|
-
errors = []
|
609
572
|
|
610
573
|
unless is_a?(Dialect) || is_a?(Transformation)
|
611
574
|
expected_props = expected_props + INHERITED_PROPERTIES.keys
|
@@ -614,10 +577,10 @@ module RDF::Tabular
|
|
614
577
|
# It has only expected properties (exclude metadata)
|
615
578
|
check_keys = object.keys - [:"@id", :"@context"]
|
616
579
|
check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
|
617
|
-
|
580
|
+
log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
|
618
581
|
|
619
582
|
# It has required properties
|
620
|
-
|
583
|
+
log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
|
621
584
|
|
622
585
|
self.normalize!
|
623
586
|
|
@@ -626,55 +589,49 @@ module RDF::Tabular
|
|
626
589
|
value = object[key]
|
627
590
|
case key
|
628
591
|
when :base
|
629
|
-
|
592
|
+
log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
|
630
593
|
when :columns
|
631
|
-
value.each do |
|
632
|
-
|
633
|
-
|
634
|
-
rescue Error => e
|
635
|
-
errors << e.message
|
636
|
-
end
|
594
|
+
value.each do |col|
|
595
|
+
col.validate
|
596
|
+
log_statistics.merge!(col.log_statistics)
|
637
597
|
end
|
638
598
|
column_names = value.map(&:name)
|
639
|
-
|
599
|
+
log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
|
640
600
|
when :datatype, :dialect, :tables, :tableSchema, :transformations
|
641
601
|
Array(value).each do |t|
|
642
602
|
# Make sure value is of appropriate class
|
643
603
|
if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
|
644
|
-
|
645
|
-
|
646
|
-
rescue Error => e
|
647
|
-
errors << e.message
|
648
|
-
end
|
604
|
+
t.validate
|
605
|
+
log_statistics.merge!(t.log_statistics)
|
649
606
|
else
|
650
|
-
|
607
|
+
log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
|
651
608
|
end
|
652
609
|
end
|
653
|
-
|
610
|
+
log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
|
654
611
|
when :foreignKeys
|
655
612
|
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
656
613
|
value.each do |fk|
|
657
614
|
columnReference, reference = fk['columnReference'], fk['reference']
|
658
|
-
|
659
|
-
|
615
|
+
log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
|
616
|
+
log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
|
660
617
|
|
661
618
|
# Verify that columns exist in this schema
|
662
|
-
|
619
|
+
log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
|
663
620
|
Array(columnReference).each do |k|
|
664
|
-
|
621
|
+
log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
|
665
622
|
end
|
666
623
|
|
667
624
|
if reference.is_a?(Hash)
|
668
|
-
|
625
|
+
log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
|
669
626
|
ref_cols = reference['columnReference']
|
670
627
|
schema = if reference.has_key?('resource')
|
671
628
|
if reference.has_key?('schemaReference')
|
672
|
-
|
629
|
+
log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
|
673
630
|
end
|
674
631
|
# resource is the URL of a Table in the TableGroup
|
675
632
|
ref = context.base.join(reference['resource']).to_s
|
676
|
-
table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
|
677
|
-
|
633
|
+
table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref}
|
634
|
+
log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
678
635
|
table.tableSchema if table
|
679
636
|
elsif reference.has_key?('schemaReference')
|
680
637
|
# resource is the @id of a Schema in the TableGroup
|
@@ -682,25 +639,25 @@ module RDF::Tabular
|
|
682
639
|
tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
|
683
640
|
case tables.length
|
684
641
|
when 0
|
685
|
-
|
642
|
+
log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
|
686
643
|
nil
|
687
644
|
when 1
|
688
645
|
tables.first.tableSchema
|
689
646
|
else
|
690
|
-
|
647
|
+
log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
|
691
648
|
nil
|
692
649
|
end
|
693
650
|
end
|
694
651
|
|
695
652
|
if schema
|
696
653
|
# ref_cols must exist in schema
|
697
|
-
|
654
|
+
log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
|
698
655
|
Array(ref_cols).each do |k|
|
699
|
-
|
656
|
+
log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
|
700
657
|
end
|
701
658
|
end
|
702
659
|
else
|
703
|
-
|
660
|
+
log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
|
704
661
|
end
|
705
662
|
end
|
706
663
|
when :format
|
@@ -712,7 +669,7 @@ module RDF::Tabular
|
|
712
669
|
nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
|
713
670
|
unsignedLong unsignedInt unsignedShort unsignedByte
|
714
671
|
).include?(self.base)
|
715
|
-
|
672
|
+
log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
|
716
673
|
object.delete(:format) # act as if not set
|
717
674
|
end
|
718
675
|
|
@@ -720,14 +677,14 @@ module RDF::Tabular
|
|
720
677
|
begin
|
721
678
|
parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
|
722
679
|
rescue ArgumentError => e
|
723
|
-
|
724
|
-
object[:format].delete("pattern") # act as if not set
|
680
|
+
log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
|
681
|
+
object[:format].delete("pattern") if object[:format] # act as if not set
|
725
682
|
end
|
726
683
|
else
|
727
684
|
case self.base
|
728
685
|
when 'boolean'
|
729
686
|
unless value.split("|").length == 2
|
730
|
-
|
687
|
+
log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
|
731
688
|
object.delete(:format) # act as if not set
|
732
689
|
end
|
733
690
|
when :decimal, :integer, :long, :int, :short, :byte,
|
@@ -738,7 +695,7 @@ module RDF::Tabular
|
|
738
695
|
begin
|
739
696
|
parse_uax35_number(value, nil)
|
740
697
|
rescue ArgumentError => e
|
741
|
-
|
698
|
+
log_warn "#{type} has invalid property '#{key}': #{e.message}"
|
742
699
|
object.delete(:format) # act as if not set
|
743
700
|
end
|
744
701
|
when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
|
@@ -746,7 +703,7 @@ module RDF::Tabular
|
|
746
703
|
begin
|
747
704
|
parse_uax35_date(value, nil)
|
748
705
|
rescue ArgumentError => e
|
749
|
-
|
706
|
+
log_warn "#{type} has invalid property '#{key}': #{e.message}"
|
750
707
|
object.delete(:format) # act as if not set
|
751
708
|
end
|
752
709
|
else
|
@@ -754,7 +711,7 @@ module RDF::Tabular
|
|
754
711
|
begin
|
755
712
|
Regexp.compile(value)
|
756
713
|
rescue
|
757
|
-
|
714
|
+
log_warn "#{type} has invalid property '#{key}': #{$!.message}"
|
758
715
|
object.delete(:format) # act as if not set
|
759
716
|
end
|
760
717
|
end
|
@@ -765,20 +722,20 @@ module RDF::Tabular
|
|
765
722
|
if object[:length]
|
766
723
|
case key
|
767
724
|
when :minLength
|
768
|
-
|
725
|
+
log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
|
769
726
|
when :maxLength
|
770
|
-
|
727
|
+
log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
|
771
728
|
end
|
772
729
|
end
|
773
730
|
|
774
731
|
# Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
|
775
732
|
if key == :maxLength && object[:minLength]
|
776
|
-
|
733
|
+
log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
|
777
734
|
end
|
778
735
|
|
779
736
|
# Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
|
780
737
|
unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
|
781
|
-
|
738
|
+
log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
|
782
739
|
end
|
783
740
|
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
784
741
|
case self.base
|
@@ -786,46 +743,46 @@ module RDF::Tabular
|
|
786
743
|
'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
|
787
744
|
'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
|
788
745
|
'duration', 'dayTimeDuration', 'yearMonthDuration'
|
789
|
-
|
746
|
+
log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
|
790
747
|
|
791
748
|
case key
|
792
749
|
when :minInclusive
|
793
750
|
# Applications MUST raise an error if both minInclusive and minExclusive are specified
|
794
|
-
|
751
|
+
log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
|
795
752
|
|
796
753
|
# Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
|
797
|
-
|
754
|
+
log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
|
798
755
|
|
799
756
|
# Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
|
800
|
-
|
757
|
+
log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
|
801
758
|
when :maxInclusive
|
802
759
|
# Applications MUST raise an error if both maxInclusive and maxExclusive are specified
|
803
|
-
|
760
|
+
log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
|
804
761
|
when :minExclusive
|
805
762
|
# Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
|
806
|
-
|
763
|
+
log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
|
807
764
|
|
808
765
|
# Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
|
809
|
-
|
766
|
+
log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
|
810
767
|
end
|
811
768
|
else
|
812
|
-
|
769
|
+
log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
|
813
770
|
end
|
814
771
|
when :notes
|
815
772
|
unless value.is_a?(Hash) || value.is_a?(Array)
|
816
|
-
|
773
|
+
log_error "#{type} has invalid property '#{key}': #{value}, Object or Array"
|
817
774
|
end
|
818
775
|
begin
|
819
776
|
normalize_jsonld(key, value)
|
820
777
|
rescue Error => e
|
821
|
-
|
778
|
+
log_error "#{type} has invalid content '#{key}': #{e.message}"
|
822
779
|
end
|
823
780
|
when :primaryKey, :rowTitles
|
824
781
|
# A column reference property that holds either a single reference to a column description object or an array of references.
|
825
782
|
"#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
|
826
783
|
Array(value).each do |k|
|
827
784
|
unless self.columns.any? {|c| c[:name] == k}
|
828
|
-
|
785
|
+
log_warn "#{type} has invalid property '#{key}': column reference not found #{k}"
|
829
786
|
object.delete(key)
|
830
787
|
end
|
831
788
|
end
|
@@ -834,34 +791,33 @@ module RDF::Tabular
|
|
834
791
|
when :@id
|
835
792
|
# Must not be a BNode
|
836
793
|
if value.to_s.start_with?("_:")
|
837
|
-
|
794
|
+
log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
|
838
795
|
end
|
839
796
|
|
840
797
|
# Datatype @id MUST NOT be the URL of a built-in type
|
841
798
|
if self.is_a?(Datatype) && DATATYPES.values.include?(value)
|
842
|
-
|
799
|
+
log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
|
843
800
|
end
|
844
801
|
when :@type
|
845
802
|
# Must not be a BNode
|
846
803
|
if value.to_s.start_with?("_:")
|
847
|
-
|
804
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
|
848
805
|
end
|
849
806
|
case type
|
850
807
|
when :Transformation
|
851
|
-
|
808
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
|
852
809
|
else
|
853
|
-
|
810
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
|
854
811
|
end
|
855
812
|
when ->(k) {key.to_s.include?(':')}
|
856
813
|
begin
|
857
814
|
normalize_jsonld(key, value)
|
858
815
|
rescue Error => e
|
859
|
-
|
816
|
+
log_error "#{type} has invalid content '#{key}': #{e.message}"
|
860
817
|
end
|
861
818
|
end
|
862
819
|
end
|
863
820
|
|
864
|
-
raise Error, errors.join("\n") unless errors.empty?
|
865
821
|
self
|
866
822
|
end
|
867
823
|
|
@@ -890,10 +846,37 @@ module RDF::Tabular
|
|
890
846
|
# @param [:read] input
|
891
847
|
# @yield [Row]
|
892
848
|
def each_row(input)
|
893
|
-
csv =
|
894
|
-
|
895
|
-
|
896
|
-
|
849
|
+
csv, number, skipped = nil, 0, 0
|
850
|
+
path = input.base_uri.path rescue ""
|
851
|
+
if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
|
852
|
+
# Input is HTML; use fragment identfier to find table.
|
853
|
+
fragment = RDF::URI(self.url).fragment rescue nil
|
854
|
+
tab = begin
|
855
|
+
# Extract with nokogiri
|
856
|
+
require 'nokogiri' unless defined?(:Nokogiri)
|
857
|
+
doc = Nokogiri::HTML.parse(input)
|
858
|
+
doc.search("##{fragment}").first if fragment
|
859
|
+
rescue LoadError
|
860
|
+
# Extract with REXML
|
861
|
+
# FIXME
|
862
|
+
end
|
863
|
+
|
864
|
+
raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
|
865
|
+
|
866
|
+
# Use rows with <td> to create column data
|
867
|
+
csv = []
|
868
|
+
number = 0
|
869
|
+
tab.xpath('.//tr').map do |row|
|
870
|
+
number += 1 if row.xpath('th')
|
871
|
+
data = row.xpath('td').map(&:content)
|
872
|
+
csv << data unless data.empty?
|
873
|
+
end
|
874
|
+
else
|
875
|
+
csv = ::CSV.new(input, csv_options)
|
876
|
+
# Skip skipRows and headerRowCount
|
877
|
+
skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
|
878
|
+
(1..skipped).each {csv.shift}
|
879
|
+
end
|
897
880
|
csv.each do |data|
|
898
881
|
# Check for embedded comments
|
899
882
|
if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
|
@@ -939,17 +922,17 @@ module RDF::Tabular
|
|
939
922
|
if value['@value']
|
940
923
|
dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
|
941
924
|
lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
|
942
|
-
block.call(RDF::Statement
|
925
|
+
block.call(RDF::Statement(subject, property, lit))
|
943
926
|
else
|
944
927
|
# value MUST be a node object, establish a new subject from `@id`
|
945
928
|
s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
|
946
929
|
|
947
930
|
# Generate a triple
|
948
|
-
block.call(RDF::Statement
|
931
|
+
block.call(RDF::Statement(subject, property, s2))
|
949
932
|
|
950
933
|
# Generate types
|
951
934
|
Array(value['@type']).each do |t|
|
952
|
-
block.call(RDF::Statement
|
935
|
+
block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true)))
|
953
936
|
end
|
954
937
|
|
955
938
|
# Generate triples for all other properties
|
@@ -961,7 +944,7 @@ module RDF::Tabular
|
|
961
944
|
else
|
962
945
|
# Value is a primitive JSON value
|
963
946
|
lit = RDF::Literal(value)
|
964
|
-
block.call(RDF::Statement
|
947
|
+
block.call(RDF::Statement(subject, property, RDF::Literal(value)))
|
965
948
|
end
|
966
949
|
else
|
967
950
|
case value
|
@@ -1016,7 +999,7 @@ module RDF::Tabular
|
|
1016
999
|
if @options[:validate]
|
1017
1000
|
raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
1018
1001
|
else
|
1019
|
-
|
1002
|
+
log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
1020
1003
|
end
|
1021
1004
|
end
|
1022
1005
|
else
|
@@ -1025,7 +1008,7 @@ module RDF::Tabular
|
|
1025
1008
|
if @options[:validate]
|
1026
1009
|
raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
1027
1010
|
else
|
1028
|
-
|
1011
|
+
log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
1029
1012
|
end
|
1030
1013
|
end
|
1031
1014
|
|
@@ -1038,7 +1021,7 @@ module RDF::Tabular
|
|
1038
1021
|
if @options[:validate]
|
1039
1022
|
raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1040
1023
|
else
|
1041
|
-
|
1024
|
+
log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1042
1025
|
|
1043
1026
|
# If present, a virtual column MUST appear after all other non-virtual column definitions
|
1044
1027
|
raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
|
@@ -1053,13 +1036,13 @@ module RDF::Tabular
|
|
1053
1036
|
end
|
1054
1037
|
index = 0
|
1055
1038
|
object_columns.all? do |cb|
|
1056
|
-
ca = non_virtual_columns[index] || Column.new({})
|
1039
|
+
ca = non_virtual_columns[index] || Column.new({}, @options)
|
1057
1040
|
ta = ca.titles || {}
|
1058
1041
|
tb = cb.titles || {}
|
1059
1042
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1060
1043
|
true
|
1061
1044
|
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1062
|
-
raise Error, "Columns don't match
|
1045
|
+
raise Error, "Columns don't match on name: #{ca.name}, #{cb.name}" unless ca.name == cb.name
|
1063
1046
|
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1064
1047
|
# If validating, column compatibility requires strict match between titles
|
1065
1048
|
titles_match = case
|
@@ -1083,10 +1066,10 @@ module RDF::Tabular
|
|
1083
1066
|
true
|
1084
1067
|
elsif !@options[:validate]
|
1085
1068
|
# If not validating, columns don't match, but processing continues
|
1086
|
-
|
1069
|
+
log_warn "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
|
1087
1070
|
true
|
1088
1071
|
else
|
1089
|
-
raise Error, "Columns don't match
|
1072
|
+
raise Error, "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
|
1090
1073
|
end
|
1091
1074
|
end
|
1092
1075
|
index += 1
|
@@ -1180,13 +1163,13 @@ module RDF::Tabular
|
|
1180
1163
|
when Hash
|
1181
1164
|
if value['@value']
|
1182
1165
|
if !(value.keys.sort - %w(@value @type @language)).empty?
|
1183
|
-
|
1166
|
+
log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
|
1184
1167
|
elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
|
1185
|
-
|
1168
|
+
log_error "Value object may not contain both @type and @language: #{value.to_json}"
|
1186
1169
|
elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
|
1187
|
-
|
1170
|
+
log_error "Value object with @language must use valid language: #{value.to_json}"
|
1188
1171
|
elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
|
1189
|
-
|
1172
|
+
log_error "Value object with @type must defined type: #{value.to_json}"
|
1190
1173
|
end
|
1191
1174
|
value
|
1192
1175
|
else
|
@@ -1195,16 +1178,16 @@ module RDF::Tabular
|
|
1195
1178
|
case k
|
1196
1179
|
when "@id"
|
1197
1180
|
nv[k] = context.expand_iri(v, documentRelative: true).to_s
|
1198
|
-
|
1181
|
+
log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
|
1199
1182
|
when "@type"
|
1200
1183
|
Array(v).each do |vv|
|
1201
1184
|
# Validate that all type values transform to absolute IRIs
|
1202
1185
|
resource = context.expand_iri(vv, vocab: true)
|
1203
|
-
|
1186
|
+
log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
|
1204
1187
|
end
|
1205
1188
|
nv[k] = v
|
1206
1189
|
when /^(@|_:)/
|
1207
|
-
|
1190
|
+
log_error "Invalid use of #{k} in JSON-LD content"
|
1208
1191
|
else
|
1209
1192
|
nv[k] = normalize_jsonld(k, v)
|
1210
1193
|
end
|
@@ -1217,10 +1200,22 @@ module RDF::Tabular
|
|
1217
1200
|
end
|
1218
1201
|
protected
|
1219
1202
|
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1203
|
+
def set_property(key, type, value, invalid)
|
1204
|
+
if invalid
|
1205
|
+
log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1206
|
+
case type
|
1207
|
+
when :link, :uri_template
|
1208
|
+
object[key] = ""
|
1209
|
+
when :object
|
1210
|
+
object[key] = {}
|
1211
|
+
when :natural_language
|
1212
|
+
object[key] = set_nl(value) || []
|
1213
|
+
else
|
1214
|
+
object.delete(key)
|
1215
|
+
end
|
1216
|
+
else
|
1217
|
+
object[key] = value
|
1218
|
+
end
|
1224
1219
|
end
|
1225
1220
|
|
1226
1221
|
# When setting a natural language property, always put in language-map form
|
@@ -1251,12 +1246,12 @@ module RDF::Tabular
|
|
1251
1246
|
end
|
1252
1247
|
end
|
1253
1248
|
else
|
1254
|
-
|
1249
|
+
log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
|
1255
1250
|
[]
|
1256
1251
|
end
|
1257
1252
|
|
1258
1253
|
unless object[key].all? {|v| v.is_a?(klass)}
|
1259
|
-
|
1254
|
+
log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
|
1260
1255
|
# Remove elements that aren't of the right types
|
1261
1256
|
object[key] = object[key].select! {|v| v.is_a?(klass)}
|
1262
1257
|
end
|
@@ -1285,14 +1280,13 @@ module RDF::Tabular
|
|
1285
1280
|
end
|
1286
1281
|
|
1287
1282
|
class DebugContext
|
1288
|
-
include
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
end
|
1283
|
+
include RDF::Util::Logger
|
1284
|
+
end
|
1285
|
+
def self.log_debug(*args, &block)
|
1286
|
+
DebugContext.new.log_debug(*args, &block)
|
1293
1287
|
end
|
1294
|
-
def self.
|
1295
|
-
DebugContext.new(*args
|
1288
|
+
def self.log_warn(*args)
|
1289
|
+
DebugContext.new.log_warn(*args)
|
1296
1290
|
end
|
1297
1291
|
end
|
1298
1292
|
|
@@ -1329,12 +1323,7 @@ module RDF::Tabular
|
|
1329
1323
|
# We handle this through a separate setters
|
1330
1324
|
end
|
1331
1325
|
|
1332
|
-
|
1333
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1334
|
-
object.delete(key)
|
1335
|
-
else
|
1336
|
-
object[key] = value
|
1337
|
-
end
|
1326
|
+
set_property(key, type, value, invalid)
|
1338
1327
|
end
|
1339
1328
|
end
|
1340
1329
|
|
@@ -1421,8 +1410,7 @@ module RDF::Tabular
|
|
1421
1410
|
end
|
1422
1411
|
|
1423
1412
|
if invalid
|
1424
|
-
|
1425
|
-
object.delete(key)
|
1413
|
+
set_property(key, type, value, invalid)
|
1426
1414
|
elsif key == :url
|
1427
1415
|
# URL of CSV relative to metadata
|
1428
1416
|
object[:url] = value
|
@@ -1446,7 +1434,7 @@ module RDF::Tabular
|
|
1446
1434
|
content['@context'] = object.delete(:@context) if object[:@context]
|
1447
1435
|
ctx = @context
|
1448
1436
|
remove_instance_variable(:@context) if instance_variables.include?(:@context)
|
1449
|
-
tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
|
1437
|
+
tg = TableGroup.new(content, @options.merge(context: ctx, filenames: @filenames, base: base))
|
1450
1438
|
@parent = tg # Link from parent
|
1451
1439
|
tg
|
1452
1440
|
end
|
@@ -1457,8 +1445,7 @@ module RDF::Tabular
|
|
1457
1445
|
"@id" => (id.to_s if id),
|
1458
1446
|
"@type" => "AnnotatedTable",
|
1459
1447
|
"url" => self.url.to_s,
|
1460
|
-
"
|
1461
|
-
"rows" => []
|
1448
|
+
"tableSchema" => (tableSchema.to_atd if tableSchema),
|
1462
1449
|
}) do |memo, (k, v)|
|
1463
1450
|
memo[k.to_s] ||= v
|
1464
1451
|
memo
|
@@ -1490,12 +1477,7 @@ module RDF::Tabular
|
|
1490
1477
|
"string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
|
1491
1478
|
end
|
1492
1479
|
|
1493
|
-
|
1494
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1495
|
-
object.delete(key)
|
1496
|
-
else
|
1497
|
-
object[key] = value
|
1498
|
-
end
|
1480
|
+
set_property(key, type, value, invalid)
|
1499
1481
|
end
|
1500
1482
|
end
|
1501
1483
|
|
@@ -1517,12 +1499,12 @@ module RDF::Tabular
|
|
1517
1499
|
end
|
1518
1500
|
end
|
1519
1501
|
else
|
1520
|
-
|
1502
|
+
log_warn "#{type} has invalid property 'columns': expected array of Column"
|
1521
1503
|
[]
|
1522
1504
|
end
|
1523
1505
|
|
1524
1506
|
unless object[:columns].all? {|v| v.is_a?(Column)}
|
1525
|
-
|
1507
|
+
log_warn "#{type} has invalid property 'columns': expected array of Column"
|
1526
1508
|
# Remove elements that aren't of the right types
|
1527
1509
|
object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
|
1528
1510
|
end
|
@@ -1532,12 +1514,12 @@ module RDF::Tabular
|
|
1532
1514
|
object[:foreignKeys] = case value
|
1533
1515
|
when Array then value
|
1534
1516
|
else
|
1535
|
-
|
1517
|
+
log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
|
1536
1518
|
[]
|
1537
1519
|
end
|
1538
1520
|
|
1539
1521
|
unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
|
1540
|
-
|
1522
|
+
log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
|
1541
1523
|
# Remove elements that aren't of the right types
|
1542
1524
|
object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
|
1543
1525
|
end
|
@@ -1560,6 +1542,18 @@ module RDF::Tabular
|
|
1560
1542
|
end
|
1561
1543
|
end
|
1562
1544
|
end
|
1545
|
+
|
1546
|
+
# Return Annotated Table representation
|
1547
|
+
def to_atd
|
1548
|
+
object.inject({
|
1549
|
+
"@id" => (id.to_s if id),
|
1550
|
+
"@type" => "Schema",
|
1551
|
+
"columns" => Array(columns).map(&:to_atd),
|
1552
|
+
}) do |memo, (k, v)|
|
1553
|
+
memo[k.to_s] ||= v
|
1554
|
+
memo
|
1555
|
+
end.delete_if {|k,v| v.nil? || v.is_a?(Metadata)}
|
1556
|
+
end
|
1563
1557
|
end
|
1564
1558
|
|
1565
1559
|
class Column < Metadata
|
@@ -1619,16 +1613,7 @@ module RDF::Tabular
|
|
1619
1613
|
valid_natural_language_property?(value)
|
1620
1614
|
end
|
1621
1615
|
|
1622
|
-
|
1623
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1624
|
-
object[key] = set_nl(value)
|
1625
|
-
object.delete(key) if object[key].nil?
|
1626
|
-
elsif invalid
|
1627
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1628
|
-
object.delete(key)
|
1629
|
-
else
|
1630
|
-
object[key] = value
|
1631
|
-
end
|
1616
|
+
set_property(key, t, value, invalid)
|
1632
1617
|
end
|
1633
1618
|
end
|
1634
1619
|
|
@@ -1657,7 +1642,6 @@ module RDF::Tabular
|
|
1657
1642
|
"table" => (table.id.to_s if table.id),
|
1658
1643
|
"number" => self.number,
|
1659
1644
|
"sourceNumber" => self.sourceNumber,
|
1660
|
-
"cells" => [],
|
1661
1645
|
"virtual" => self.virtual,
|
1662
1646
|
"name" => self.name,
|
1663
1647
|
"titles" => self.titles
|
@@ -1700,12 +1684,7 @@ module RDF::Tabular
|
|
1700
1684
|
"json or rdf" unless %w(json rdf).include?(value) || value.nil?
|
1701
1685
|
end
|
1702
1686
|
|
1703
|
-
|
1704
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1705
|
-
object.delete(key)
|
1706
|
-
else
|
1707
|
-
object[key] = value
|
1708
|
-
end
|
1687
|
+
set_property(key, type, value, invalid)
|
1709
1688
|
end
|
1710
1689
|
end
|
1711
1690
|
end
|
@@ -1713,7 +1692,7 @@ module RDF::Tabular
|
|
1713
1692
|
class Dialect < Metadata
|
1714
1693
|
# Defaults for dialects
|
1715
1694
|
DEFAULTS = {
|
1716
|
-
commentPrefix:
|
1695
|
+
commentPrefix: false,
|
1717
1696
|
delimiter: ",".freeze,
|
1718
1697
|
doubleQuote: true,
|
1719
1698
|
encoding: "utf-8".freeze,
|
@@ -1749,7 +1728,7 @@ module RDF::Tabular
|
|
1749
1728
|
REQUIRED = [].freeze
|
1750
1729
|
|
1751
1730
|
# Getters and Setters
|
1752
|
-
PROPERTIES.
|
1731
|
+
PROPERTIES.each do |key, type|
|
1753
1732
|
define_method(key) do
|
1754
1733
|
object.fetch(key, DEFAULTS[key])
|
1755
1734
|
end
|
@@ -1772,16 +1751,7 @@ module RDF::Tabular
|
|
1772
1751
|
valid_natural_language_property?(value)
|
1773
1752
|
end
|
1774
1753
|
|
1775
|
-
|
1776
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1777
|
-
object[key] = set_nl(value)
|
1778
|
-
object.delete(key) if object[key].nil?
|
1779
|
-
elsif invalid
|
1780
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1781
|
-
object.delete(key)
|
1782
|
-
else
|
1783
|
-
object[key] = value
|
1784
|
-
end
|
1754
|
+
set_property(key, type, value, invalid)
|
1785
1755
|
end
|
1786
1756
|
end
|
1787
1757
|
|
@@ -1835,38 +1805,75 @@ module RDF::Tabular
|
|
1835
1805
|
lang ||= 'und'
|
1836
1806
|
|
1837
1807
|
# Set encoding on input
|
1838
|
-
|
1839
|
-
(
|
1840
|
-
|
1841
|
-
|
1842
|
-
|
1843
|
-
|
1808
|
+
path = input.base_uri.path rescue ""
|
1809
|
+
if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
|
1810
|
+
# Input is HTML; use fragment identfier to find table.
|
1811
|
+
fragment = RDF::URI(table["url"]).fragment rescue nil
|
1812
|
+
tab = begin
|
1813
|
+
# Extract with nokogiri
|
1814
|
+
require 'nokogiri' unless defined?(:Nokogiri)
|
1815
|
+
doc = Nokogiri::HTML.parse(input)
|
1816
|
+
doc.search("##{fragment}").first if fragment
|
1817
|
+
rescue LoadError
|
1818
|
+
# Extract with REXML
|
1819
|
+
# FIXME
|
1820
|
+
end
|
1844
1821
|
|
1845
|
-
|
1846
|
-
(metadata["rdfs:comment"] ||= []) << value unless value.empty?
|
1847
|
-
end
|
1848
|
-
debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
|
1822
|
+
raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
|
1849
1823
|
|
1850
|
-
|
1851
|
-
|
1852
|
-
|
1853
|
-
|
1854
|
-
|
1855
|
-
|
1824
|
+
# Use rows with <th> to create column titles
|
1825
|
+
tab.xpath('.//tr').each do |row|
|
1826
|
+
row.xpath('th').map(&:content).each_with_index do |value, index|
|
1827
|
+
# Skip columns
|
1828
|
+
skipCols = skipColumns.to_i
|
1829
|
+
next if index < skipCols
|
1856
1830
|
|
1831
|
+
# Trim value
|
1832
|
+
value.lstrip! if %w(true start).include?(trim.to_s)
|
1833
|
+
value.rstrip! if %w(true end).include?(trim.to_s)
|
1834
|
+
|
1835
|
+
# Initialize titles
|
1836
|
+
columns = table["tableSchema"]["columns"] ||= []
|
1837
|
+
column = columns[index - skipCols] ||= {
|
1838
|
+
"titles" => {lang => []},
|
1839
|
+
}
|
1840
|
+
column["titles"][lang] << value
|
1841
|
+
end
|
1842
|
+
end
|
1843
|
+
else
|
1844
|
+
csv = ::CSV.new(input, csv_options)
|
1845
|
+
(1..skipRows.to_i).each do
|
1846
|
+
value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
|
1857
1847
|
# Trim value
|
1858
1848
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
1859
1849
|
value.rstrip! if %w(true end).include?(trim.to_s)
|
1860
1850
|
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1865
|
-
|
1866
|
-
|
1851
|
+
value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
|
1852
|
+
(metadata["rdfs:comment"] ||= []) << value unless value.empty?
|
1853
|
+
end
|
1854
|
+
log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
|
1855
|
+
|
1856
|
+
(1..headerRowCount).each do
|
1857
|
+
row_data = Array(csv.shift)
|
1858
|
+
Array(row_data).each_with_index do |value, index|
|
1859
|
+
# Skip columns
|
1860
|
+
skipCols = skipColumns.to_i
|
1861
|
+
next if index < skipCols
|
1862
|
+
|
1863
|
+
# Trim value
|
1864
|
+
value.lstrip! if %w(true start).include?(trim.to_s)
|
1865
|
+
value.rstrip! if %w(true end).include?(trim.to_s)
|
1866
|
+
|
1867
|
+
# Initialize titles
|
1868
|
+
columns = table["tableSchema"]["columns"] ||= []
|
1869
|
+
column = columns[index - skipCols] ||= {
|
1870
|
+
"titles" => {lang => []},
|
1871
|
+
}
|
1872
|
+
column["titles"][lang] << value
|
1873
|
+
end
|
1867
1874
|
end
|
1868
1875
|
end
|
1869
|
-
|
1876
|
+
log_debug("embedded_metadata") {"table: #{table.inspect}"}
|
1870
1877
|
input.rewind if input.respond_to?(:rewind)
|
1871
1878
|
|
1872
1879
|
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
@@ -1931,12 +1938,7 @@ module RDF::Tabular
|
|
1931
1938
|
end
|
1932
1939
|
end
|
1933
1940
|
|
1934
|
-
|
1935
|
-
warn "#{self.type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1936
|
-
object.delete(key)
|
1937
|
-
else
|
1938
|
-
object[key] = value
|
1939
|
-
end
|
1941
|
+
set_property(key, type, value, invalid)
|
1940
1942
|
end
|
1941
1943
|
end
|
1942
1944
|
end
|
@@ -2056,35 +2058,26 @@ module RDF::Tabular
|
|
2056
2058
|
|
2057
2059
|
# create column if necessary
|
2058
2060
|
columns[index - skipColumns] ||=
|
2059
|
-
Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
|
2061
|
+
Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2060
2062
|
|
2061
2063
|
column = columns[index - skipColumns]
|
2062
2064
|
|
2063
2065
|
@values << cell = Cell.new(metadata, column, self, value)
|
2064
2066
|
|
2065
|
-
datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
|
2066
|
-
value = value.gsub(/\r\t
|
2067
|
-
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType
|
2067
|
+
datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
|
2068
|
+
value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
|
2069
|
+
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
|
2068
2070
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
2069
2071
|
value = column.default || '' if value.empty?
|
2070
2072
|
|
2071
2073
|
cell_values = column.separator ? value.split(column.separator) : [value]
|
2072
2074
|
|
2073
2075
|
cell_values = cell_values.map do |v|
|
2074
|
-
v = v.strip unless %w(string anyAtomicType
|
2076
|
+
v = v.strip unless %w(string anyAtomicType).include?(datatype.base)
|
2075
2077
|
v = column.default || '' if v.empty?
|
2076
2078
|
if Array(column.null).include?(v)
|
2077
2079
|
nil
|
2078
2080
|
else
|
2079
|
-
# Trim value
|
2080
|
-
if %w(string anyAtomicType any).include?(datatype.base)
|
2081
|
-
v.lstrip! if %w(true start).include?(metadata.dialect.trim.to_s)
|
2082
|
-
v.rstrip! if %w(true end).include?(metadata.dialect.trim.to_s)
|
2083
|
-
else
|
2084
|
-
# unless the datatype is string or anyAtomicType or any, strip leading and trailing whitespace from the string value
|
2085
|
-
v.strip!
|
2086
|
-
end
|
2087
|
-
|
2088
2081
|
expanded_dt = datatype.id || metadata.context.expand_iri(datatype.base, vocab: true)
|
2089
2082
|
if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
|
2090
2083
|
lit_or_errors
|
@@ -2127,7 +2120,11 @@ module RDF::Tabular
|
|
2127
2120
|
|
2128
2121
|
# Identifier for this row, as an RFC7111 fragment
|
2129
2122
|
# @return [RDF::URI]
|
2130
|
-
def id;
|
2123
|
+
def id;
|
2124
|
+
u = table.url.dup
|
2125
|
+
u.fragment = "row=#{self.sourceNumber}"
|
2126
|
+
u
|
2127
|
+
end
|
2131
2128
|
|
2132
2129
|
# Return Annotated Row representation
|
2133
2130
|
def to_atd
|