rdf-tabular 0.2.1 → 0.4.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,6 @@ module RDF
11
11
  # @author [Gregg Kellogg](http://greggkellogg.net/)
12
12
  module Tabular
13
13
  require 'rdf/tabular/format'
14
- require 'rdf/tabular/utils'
15
14
  autoload :Column, 'rdf/tabular/metadata'
16
15
  autoload :CSVW, 'rdf/tabular/csvw'
17
16
  autoload :Dialect, 'rdf/tabular/metadata'
@@ -46,5 +46,21 @@ module RDF::Tabular
46
46
  def self.detect(sample)
47
47
  !!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
48
48
  end
49
+
50
+ ##
51
+ # Hash of CLI commands appropriate for this format
52
+ # @return [Hash{Symbol => Lambda(Array, Hash)}]
53
+ def self.cli_commands
54
+ {
55
+ :"tabular-json" => ->(argv, opts) do
56
+ raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
57
+ out = opts[:output] || $stdout
58
+ out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
59
+ RDF::CLI.parse(argv, opts) do |reader|
60
+ out.puts reader.to_json
61
+ end
62
+ end
63
+ }
64
+ end
49
65
  end
50
66
  end
@@ -19,16 +19,12 @@ require 'yaml' # used by BCP47, which should have required it.
19
19
  # @author [Gregg Kellogg](http://greggkellogg.net/)
20
20
  module RDF::Tabular
21
21
  class Metadata
22
- include Utils
22
+ include RDF::Util::Logger
23
23
 
24
24
  # Hash representation
25
25
  # @return [Hash<Symbol,Object>]
26
26
  attr_accessor :object
27
27
 
28
- # Warnings detected on initialization or when setting properties
29
- # @return [Array<String>]
30
- attr_accessor :warnings
31
-
32
28
  # Inheritect properties, valid for all types
33
29
  INHERITED_PROPERTIES = {
34
30
  aboutUrl: :uri_template,
@@ -137,7 +133,7 @@ module RDF::Tabular
137
133
  #
138
134
  # @param [String] path
139
135
  # @param [Hash{Symbol => Object}] options
140
- # see `RDF::Util::File.open_file` in RDF.rb and {#new}
136
+ # see `RDF::Util::File.open_file` in RDF.rb and {new}
141
137
  # @yield [Metadata]
142
138
  # @raise [IOError] if file not found
143
139
  def self.open(path, options = {})
@@ -153,7 +149,7 @@ module RDF::Tabular
153
149
  end
154
150
 
155
151
  # Return the well-known configuration for a file, and remember using a weak-reference cache to avoid uncessary retreivles.
156
- # @param [String] base, the URL used for finding the file
152
+ # @param [String] base the URL used for finding the file
157
153
  # @return [Array<String>, false]
158
154
  def self.site_wide_config(base)
159
155
  require 'rdf/util/cache' unless defined?(::RDF::Util::Cache)
@@ -179,7 +175,6 @@ module RDF::Tabular
179
175
  # @return [Metadata]
180
176
  def self.for_input(input, options = {})
181
177
  base = options[:base]
182
- warnings = options.fetch(:warnings, [])
183
178
 
184
179
  # Use user metadata, if provided
185
180
  metadata = case options[:metadata]
@@ -202,10 +197,7 @@ module RDF::Tabular
202
197
  if md.describes_file?(base)
203
198
  metadata = md
204
199
  else
205
- warnings << "Found metadata at #{link_loc}, which does not describe #{base}, ignoring"
206
- if options[:validate] && !options[:warnings]
207
- $stderr.puts "Warnings: #{warnings.join("\n")}"
208
- end
200
+ log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
209
201
  end
210
202
  end
211
203
  end
@@ -214,12 +206,12 @@ module RDF::Tabular
214
206
  # If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
215
207
  if !metadata && base
216
208
  templates = site_wide_config(base)
217
- debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
209
+ log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
218
210
  locs = templates.map do |template|
219
211
  t = Addressable::Template.new(template)
220
212
  RDF::URI(base).join(t.expand(url: base).to_s)
221
213
  end
222
- debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
214
+ log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
223
215
 
224
216
  locs.each do |loc|
225
217
  metadata ||= begin
@@ -230,15 +222,12 @@ module RDF::Tabular
230
222
  if md.describes_file?(base)
231
223
  md
232
224
  else
233
- warnings << "Found metadata at #{loc}, which does not describe #{base}, ignoring"
234
- if options[:validate] && !options[:warnings]
235
- $stderr.puts "Warnings: #{warnings.join("\n")}"
236
- end
225
+ log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
237
226
  nil
238
227
  end
239
228
  end
240
229
  rescue IOError
241
- debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
230
+ log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
242
231
  nil
243
232
  end
244
233
  end
@@ -331,7 +320,6 @@ module RDF::Tabular
331
320
  # @return [Metadata]
332
321
  def initialize(input, options = {})
333
322
  @options = options.dup
334
- @options[:depth] ||= 0
335
323
 
336
324
  # Parent of this Metadata, if any
337
325
  @parent = @options[:parent]
@@ -344,14 +332,14 @@ module RDF::Tabular
344
332
 
345
333
  @context = case input['@context']
346
334
  when Array
347
- warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
335
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
348
336
  c = LOCAL_CONTEXT.dup
349
337
  c.base = RDF::URI(opt_base)
350
338
  obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
351
339
  raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
352
340
  c.parse(obj)
353
341
  when Hash
354
- warn "Context missing required value 'http://www.w3.org/ns/csvw'"
342
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
355
343
  c = LOCAL_CONTEXT.dup
356
344
  c.base = RDF::URI(opt_base)
357
345
  c.parse(input['@context'])
@@ -362,7 +350,7 @@ module RDF::Tabular
362
350
  c
363
351
  else
364
352
  if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
365
- warn "Context missing required value 'http://www.w3.org/ns/csvw'"
353
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
366
354
  LOCAL_CONTEXT.dup
367
355
  c = LOCAL_CONTEXT.dup
368
356
  c.base = RDF::URI(opt_base)
@@ -375,7 +363,7 @@ module RDF::Tabular
375
363
  @options[:base] = @context ? @context.base : RDF::URI(opt_base)
376
364
 
377
365
  if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
378
- warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
366
+ log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
379
367
  @context.default_language = nil
380
368
  end
381
369
 
@@ -385,7 +373,7 @@ module RDF::Tabular
385
373
 
386
374
  @object = {}
387
375
 
388
- depth do
376
+ log_depth do
389
377
  # Input was parsed in .new
390
378
  # Metadata is object with symbolic keys
391
379
  input.each do |key, value|
@@ -401,7 +389,7 @@ module RDF::Tabular
401
389
  object[:@id] = if value.is_a?(String)
402
390
  value
403
391
  else
404
- warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
392
+ log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
405
393
  "" # Default value
406
394
  end
407
395
  @id = @options[:base].join(object[:@id])
@@ -426,14 +414,14 @@ module RDF::Tabular
426
414
  end
427
415
 
428
416
  if reason
429
- debug("md#initialize") {reason}
430
- debug("md#initialize") {"filenames: #{filenames}"}
431
- debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
417
+ log_debug("md#initialize") {reason}
418
+ log_debug("md#initialize") {"filenames: #{filenames}"}
419
+ log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
432
420
  end
433
421
  end
434
422
 
435
423
  # Getters and Setters
436
- INHERITED_PROPERTIES.keys.each do |key|
424
+ INHERITED_PROPERTIES.each do |key, type|
437
425
  define_method(key) do
438
426
  object.fetch(key) do
439
427
  parent ? parent.send(key) : default_value(key)
@@ -459,12 +447,7 @@ module RDF::Tabular
459
447
  # We handle this through a separate datatype= setter
460
448
  end
461
449
 
462
- if invalid
463
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
464
- object.delete(key)
465
- else
466
- object[key] = value
467
- end
450
+ set_property(key, type, value, invalid)
468
451
  end
469
452
  end
470
453
 
@@ -492,7 +475,7 @@ module RDF::Tabular
492
475
  when Schema
493
476
  value
494
477
  else
495
- warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
478
+ log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
496
479
  Schema.new({}, @options.merge(parent: self, context: nil))
497
480
  end
498
481
  end
@@ -539,7 +522,7 @@ module RDF::Tabular
539
522
  when Dialect
540
523
  value
541
524
  else
542
- warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
525
+ log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
543
526
  nil
544
527
  end
545
528
  end
@@ -549,15 +532,15 @@ module RDF::Tabular
549
532
  # @raise [Error] if datatype is not valid
550
533
  def datatype=(value)
551
534
  val = case value
552
- when Hash then Datatype.new(value, parent: self)
553
- else Datatype.new({base: value}, parent: self)
535
+ when Hash then Datatype.new(value, @options.merge(parent: self))
536
+ else Datatype.new({base: value}, @options.merge(parent: self))
554
537
  end
555
538
 
556
539
  if val.valid? || value.is_a?(Hash)
557
540
  # Set it if it was specified as an object, which may cause validation errors later
558
541
  object[:datatype] = val
559
542
  else
560
- warn "#{type} has invalid property 'datatype': expected a built-in or an object"
543
+ log_warn "#{type} has invalid property 'datatype': expected a built-in or an object"
561
544
  end
562
545
  end
563
546
 
@@ -572,40 +555,20 @@ module RDF::Tabular
572
555
  ##
573
556
  # Do we have valid metadata?
574
557
  def valid?
575
- validate!
576
- true
577
- rescue
578
- false
579
- end
580
-
581
- ##
582
- # Validation errors
583
- # @return [Array<String>]
584
- def errors
585
- validate! && []
586
- rescue Error => e
587
- e.message.split("\n")
558
+ validate # Possibly re-validate
559
+ !log_statistics[:error]
588
560
  end
589
561
 
590
- ##
591
- # Validation warnings, available only after validating or finding warnings
592
- # @return [Array<String>]
593
- def warnings
594
- ((@warnings || []) + object.
595
- values.
596
- flatten.
597
- select {|v| v.is_a?(Metadata)}.
598
- map(&:warnings).
599
- flatten).compact.uniq
562
+ def validate!
563
+ raise Error, "Metadata error" unless valid?
600
564
  end
601
565
 
602
566
  ##
603
567
  # Validate metadata, raising an error containing all errors detected during validation
604
568
  # @raise [Error] Raise error if metadata has any unexpected properties
605
569
  # @return [self]
606
- def validate!
570
+ def validate
607
571
  expected_props, required_props = @properties.keys, @required
608
- errors = []
609
572
 
610
573
  unless is_a?(Dialect) || is_a?(Transformation)
611
574
  expected_props = expected_props + INHERITED_PROPERTIES.keys
@@ -614,10 +577,10 @@ module RDF::Tabular
614
577
  # It has only expected properties (exclude metadata)
615
578
  check_keys = object.keys - [:"@id", :"@context"]
616
579
  check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
617
- warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
580
+ log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
618
581
 
619
582
  # It has required properties
620
- errors << "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
583
+ log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
621
584
 
622
585
  self.normalize!
623
586
 
@@ -626,55 +589,49 @@ module RDF::Tabular
626
589
  value = object[key]
627
590
  case key
628
591
  when :base
629
- errors << "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
592
+ log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
630
593
  when :columns
631
- value.each do |v|
632
- begin
633
- v.validate!
634
- rescue Error => e
635
- errors << e.message
636
- end
594
+ value.each do |col|
595
+ col.validate
596
+ log_statistics.merge!(col.log_statistics)
637
597
  end
638
598
  column_names = value.map(&:name)
639
- errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
599
+ log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
640
600
  when :datatype, :dialect, :tables, :tableSchema, :transformations
641
601
  Array(value).each do |t|
642
602
  # Make sure value is of appropriate class
643
603
  if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
644
- begin
645
- t.validate!
646
- rescue Error => e
647
- errors << e.message
648
- end
604
+ t.validate
605
+ log_statistics.merge!(t.log_statistics)
649
606
  else
650
- errors << "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
607
+ log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
651
608
  end
652
609
  end
653
- errors << "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
610
+ log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
654
611
  when :foreignKeys
655
612
  # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
656
613
  value.each do |fk|
657
614
  columnReference, reference = fk['columnReference'], fk['reference']
658
- errors << "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
659
- errors << "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
615
+ log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
616
+ log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
660
617
 
661
618
  # Verify that columns exist in this schema
662
- errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
619
+ log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
663
620
  Array(columnReference).each do |k|
664
- errors << "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
621
+ log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
665
622
  end
666
623
 
667
624
  if reference.is_a?(Hash)
668
- errors << "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
625
+ log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
669
626
  ref_cols = reference['columnReference']
670
627
  schema = if reference.has_key?('resource')
671
628
  if reference.has_key?('schemaReference')
672
- errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
629
+ log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
673
630
  end
674
631
  # resource is the URL of a Table in the TableGroup
675
632
  ref = context.base.join(reference['resource']).to_s
676
- table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
677
- errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
633
+ table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref}
634
+ log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
678
635
  table.tableSchema if table
679
636
  elsif reference.has_key?('schemaReference')
680
637
  # resource is the @id of a Schema in the TableGroup
@@ -682,25 +639,25 @@ module RDF::Tabular
682
639
  tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
683
640
  case tables.length
684
641
  when 0
685
- errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
642
+ log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
686
643
  nil
687
644
  when 1
688
645
  tables.first.tableSchema
689
646
  else
690
- errors << "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
647
+ log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
691
648
  nil
692
649
  end
693
650
  end
694
651
 
695
652
  if schema
696
653
  # ref_cols must exist in schema
697
- errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
654
+ log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
698
655
  Array(ref_cols).each do |k|
699
- errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
656
+ log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
700
657
  end
701
658
  end
702
659
  else
703
- errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
660
+ log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
704
661
  end
705
662
  end
706
663
  when :format
@@ -712,7 +669,7 @@ module RDF::Tabular
712
669
  nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
713
670
  unsignedLong unsignedInt unsignedShort unsignedByte
714
671
  ).include?(self.base)
715
- warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
672
+ log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
716
673
  object.delete(:format) # act as if not set
717
674
  end
718
675
 
@@ -720,14 +677,14 @@ module RDF::Tabular
720
677
  begin
721
678
  parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
722
679
  rescue ArgumentError => e
723
- warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
724
- object[:format].delete("pattern") # act as if not set
680
+ log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
681
+ object[:format].delete("pattern") if object[:format] # act as if not set
725
682
  end
726
683
  else
727
684
  case self.base
728
685
  when 'boolean'
729
686
  unless value.split("|").length == 2
730
- warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
687
+ log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
731
688
  object.delete(:format) # act as if not set
732
689
  end
733
690
  when :decimal, :integer, :long, :int, :short, :byte,
@@ -738,7 +695,7 @@ module RDF::Tabular
738
695
  begin
739
696
  parse_uax35_number(value, nil)
740
697
  rescue ArgumentError => e
741
- warn "#{type} has invalid property '#{key}': #{e.message}"
698
+ log_warn "#{type} has invalid property '#{key}': #{e.message}"
742
699
  object.delete(:format) # act as if not set
743
700
  end
744
701
  when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
@@ -746,7 +703,7 @@ module RDF::Tabular
746
703
  begin
747
704
  parse_uax35_date(value, nil)
748
705
  rescue ArgumentError => e
749
- warn "#{type} has invalid property '#{key}': #{e.message}"
706
+ log_warn "#{type} has invalid property '#{key}': #{e.message}"
750
707
  object.delete(:format) # act as if not set
751
708
  end
752
709
  else
@@ -754,7 +711,7 @@ module RDF::Tabular
754
711
  begin
755
712
  Regexp.compile(value)
756
713
  rescue
757
- warn "#{type} has invalid property '#{key}': #{$!.message}"
714
+ log_warn "#{type} has invalid property '#{key}': #{$!.message}"
758
715
  object.delete(:format) # act as if not set
759
716
  end
760
717
  end
@@ -765,20 +722,20 @@ module RDF::Tabular
765
722
  if object[:length]
766
723
  case key
767
724
  when :minLength
768
- errors << "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
725
+ log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
769
726
  when :maxLength
770
- errors << "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
727
+ log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
771
728
  end
772
729
  end
773
730
 
774
731
  # Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
775
732
  if key == :maxLength && object[:minLength]
776
- errors << "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
733
+ log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
777
734
  end
778
735
 
779
736
  # Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
780
737
  unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
781
- errors << "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
738
+ log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
782
739
  end
783
740
  when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
784
741
  case self.base
@@ -786,46 +743,46 @@ module RDF::Tabular
786
743
  'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
787
744
  'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
788
745
  'duration', 'dayTimeDuration', 'yearMonthDuration'
789
- errors << "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
746
+ log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
790
747
 
791
748
  case key
792
749
  when :minInclusive
793
750
  # Applications MUST raise an error if both minInclusive and minExclusive are specified
794
- errors << "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
751
+ log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
795
752
 
796
753
  # Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
797
- errors << "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
754
+ log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
798
755
 
799
756
  # Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
800
- errors << "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
757
+ log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
801
758
  when :maxInclusive
802
759
  # Applications MUST raise an error if both maxInclusive and maxExclusive are specified
803
- errors << "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
760
+ log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
804
761
  when :minExclusive
805
762
  # Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
806
- errors << "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
763
+ log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
807
764
 
808
765
  # Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
809
- errors << "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
766
+ log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
810
767
  end
811
768
  else
812
- errors << "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
769
+ log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
813
770
  end
814
771
  when :notes
815
772
  unless value.is_a?(Hash) || value.is_a?(Array)
816
- errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
773
+ log_error "#{type} has invalid property '#{key}': #{value}, Object or Array"
817
774
  end
818
775
  begin
819
776
  normalize_jsonld(key, value)
820
777
  rescue Error => e
821
- errors << "#{type} has invalid content '#{key}': #{e.message}"
778
+ log_error "#{type} has invalid content '#{key}': #{e.message}"
822
779
  end
823
780
  when :primaryKey, :rowTitles
824
781
  # A column reference property that holds either a single reference to a column description object or an array of references.
825
782
  "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
826
783
  Array(value).each do |k|
827
784
  unless self.columns.any? {|c| c[:name] == k}
828
- warn "#{type} has invalid property '#{key}': column reference not found #{k}"
785
+ log_warn "#{type} has invalid property '#{key}': column reference not found #{k}"
829
786
  object.delete(key)
830
787
  end
831
788
  end
@@ -834,34 +791,33 @@ module RDF::Tabular
834
791
  when :@id
835
792
  # Must not be a BNode
836
793
  if value.to_s.start_with?("_:")
837
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
794
+ log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
838
795
  end
839
796
 
840
797
  # Datatype @id MUST NOT be the URL of a built-in type
841
798
  if self.is_a?(Datatype) && DATATYPES.values.include?(value)
842
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
799
+ log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
843
800
  end
844
801
  when :@type
845
802
  # Must not be a BNode
846
803
  if value.to_s.start_with?("_:")
847
- errors << "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
804
+ log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
848
805
  end
849
806
  case type
850
807
  when :Transformation
851
- errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
808
+ log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
852
809
  else
853
- errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
810
+ log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
854
811
  end
855
812
  when ->(k) {key.to_s.include?(':')}
856
813
  begin
857
814
  normalize_jsonld(key, value)
858
815
  rescue Error => e
859
- errors << "#{type} has invalid content '#{key}': #{e.message}"
816
+ log_error "#{type} has invalid content '#{key}': #{e.message}"
860
817
  end
861
818
  end
862
819
  end
863
820
 
864
- raise Error, errors.join("\n") unless errors.empty?
865
821
  self
866
822
  end
867
823
 
@@ -890,10 +846,37 @@ module RDF::Tabular
890
846
  # @param [:read] input
891
847
  # @yield [Row]
892
848
  def each_row(input)
893
- csv = ::CSV.new(input, csv_options)
894
- # Skip skipRows and headerRowCount
895
- number, skipped = 0, (dialect.skipRows.to_i + dialect.headerRowCount)
896
- (1..skipped).each {csv.shift}
849
+ csv, number, skipped = nil, 0, 0
850
+ path = input.base_uri.path rescue ""
851
+ if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
852
+ # Input is HTML; use fragment identfier to find table.
853
+ fragment = RDF::URI(self.url).fragment rescue nil
854
+ tab = begin
855
+ # Extract with nokogiri
856
+ require 'nokogiri' unless defined?(:Nokogiri)
857
+ doc = Nokogiri::HTML.parse(input)
858
+ doc.search("##{fragment}").first if fragment
859
+ rescue LoadError
860
+ # Extract with REXML
861
+ # FIXME
862
+ end
863
+
864
+ raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
865
+
866
+ # Use rows with <td> to create column data
867
+ csv = []
868
+ number = 0
869
+ tab.xpath('.//tr').map do |row|
870
+ number += 1 if row.xpath('th')
871
+ data = row.xpath('td').map(&:content)
872
+ csv << data unless data.empty?
873
+ end
874
+ else
875
+ csv = ::CSV.new(input, csv_options)
876
+ # Skip skipRows and headerRowCount
877
+ skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
878
+ (1..skipped).each {csv.shift}
879
+ end
897
880
  csv.each do |data|
898
881
  # Check for embedded comments
899
882
  if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
@@ -939,17 +922,17 @@ module RDF::Tabular
939
922
  if value['@value']
940
923
  dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
941
924
  lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
942
- block.call(RDF::Statement.new(subject, property, lit))
925
+ block.call(RDF::Statement(subject, property, lit))
943
926
  else
944
927
  # value MUST be a node object, establish a new subject from `@id`
945
928
  s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
946
929
 
947
930
  # Generate a triple
948
- block.call(RDF::Statement.new(subject, property, s2))
931
+ block.call(RDF::Statement(subject, property, s2))
949
932
 
950
933
  # Generate types
951
934
  Array(value['@type']).each do |t|
952
- block.call(RDF::Statement.new(s2, RDF.type, context.expand_iri(t, vocab: true)))
935
+ block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true)))
953
936
  end
954
937
 
955
938
  # Generate triples for all other properties
@@ -961,7 +944,7 @@ module RDF::Tabular
961
944
  else
962
945
  # Value is a primitive JSON value
963
946
  lit = RDF::Literal(value)
964
- block.call(RDF::Statement.new(subject, property, RDF::Literal(value)))
947
+ block.call(RDF::Statement(subject, property, RDF::Literal(value)))
965
948
  end
966
949
  else
967
950
  case value
@@ -1016,7 +999,7 @@ module RDF::Tabular
1016
999
  if @options[:validate]
1017
1000
  raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1018
1001
  else
1019
- warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1002
+ log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1020
1003
  end
1021
1004
  end
1022
1005
  else
@@ -1025,7 +1008,7 @@ module RDF::Tabular
1025
1008
  if @options[:validate]
1026
1009
  raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1027
1010
  else
1028
- warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1011
+ log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1029
1012
  end
1030
1013
  end
1031
1014
 
@@ -1038,7 +1021,7 @@ module RDF::Tabular
1038
1021
  if @options[:validate]
1039
1022
  raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1040
1023
  else
1041
- warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1024
+ log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1042
1025
 
1043
1026
  # If present, a virtual column MUST appear after all other non-virtual column definitions
1044
1027
  raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
@@ -1053,13 +1036,13 @@ module RDF::Tabular
1053
1036
  end
1054
1037
  index = 0
1055
1038
  object_columns.all? do |cb|
1056
- ca = non_virtual_columns[index] || Column.new({})
1039
+ ca = non_virtual_columns[index] || Column.new({}, @options)
1057
1040
  ta = ca.titles || {}
1058
1041
  tb = cb.titles || {}
1059
1042
  if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
1060
1043
  true
1061
1044
  elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
1062
- raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}" unless ca.name == cb.name
1045
+ raise Error, "Columns don't match on name: #{ca.name}, #{cb.name}" unless ca.name == cb.name
1063
1046
  elsif @options[:validate] || !ta.empty? && !tb.empty?
1064
1047
  # If validating, column compatibility requires strict match between titles
1065
1048
  titles_match = case
@@ -1083,10 +1066,10 @@ module RDF::Tabular
1083
1066
  true
1084
1067
  elsif !@options[:validate]
1085
1068
  # If not validating, columns don't match, but processing continues
1086
- warn "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1069
+ log_warn "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
1087
1070
  true
1088
1071
  else
1089
- raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1072
+ raise Error, "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
1090
1073
  end
1091
1074
  end
1092
1075
  index += 1
@@ -1180,13 +1163,13 @@ module RDF::Tabular
1180
1163
  when Hash
1181
1164
  if value['@value']
1182
1165
  if !(value.keys.sort - %w(@value @type @language)).empty?
1183
- raise Error, "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
1166
+ log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
1184
1167
  elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
1185
- raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
1168
+ log_error "Value object may not contain both @type and @language: #{value.to_json}"
1186
1169
  elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
1187
- raise Error, "Value object with @language must use valid language: #{value.to_json}"
1170
+ log_error "Value object with @language must use valid language: #{value.to_json}"
1188
1171
  elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
1189
- raise Error, "Value object with @type must defined type: #{value.to_json}"
1172
+ log_error "Value object with @type must defined type: #{value.to_json}"
1190
1173
  end
1191
1174
  value
1192
1175
  else
@@ -1195,16 +1178,16 @@ module RDF::Tabular
1195
1178
  case k
1196
1179
  when "@id"
1197
1180
  nv[k] = context.expand_iri(v, documentRelative: true).to_s
1198
- raise Error, "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
1181
+ log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
1199
1182
  when "@type"
1200
1183
  Array(v).each do |vv|
1201
1184
  # Validate that all type values transform to absolute IRIs
1202
1185
  resource = context.expand_iri(vv, vocab: true)
1203
- raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
1186
+ log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
1204
1187
  end
1205
1188
  nv[k] = v
1206
1189
  when /^(@|_:)/
1207
- raise Error, "Invalid use of #{k} in JSON-LD content"
1190
+ log_error "Invalid use of #{k} in JSON-LD content"
1208
1191
  else
1209
1192
  nv[k] = normalize_jsonld(k, v)
1210
1193
  end
@@ -1217,10 +1200,22 @@ module RDF::Tabular
1217
1200
  end
1218
1201
  protected
1219
1202
 
1220
- # Add a warning on this object
1221
- def warn(string)
1222
- debug("warn: #{string}")
1223
- (@warnings ||= []) << string
1203
+ def set_property(key, type, value, invalid)
1204
+ if invalid
1205
+ log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1206
+ case type
1207
+ when :link, :uri_template
1208
+ object[key] = ""
1209
+ when :object
1210
+ object[key] = {}
1211
+ when :natural_language
1212
+ object[key] = set_nl(value) || []
1213
+ else
1214
+ object.delete(key)
1215
+ end
1216
+ else
1217
+ object[key] = value
1218
+ end
1224
1219
  end
1225
1220
 
1226
1221
  # When setting a natural language property, always put in language-map form
@@ -1251,12 +1246,12 @@ module RDF::Tabular
1251
1246
  end
1252
1247
  end
1253
1248
  else
1254
- warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1249
+ log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1255
1250
  []
1256
1251
  end
1257
1252
 
1258
1253
  unless object[key].all? {|v| v.is_a?(klass)}
1259
- warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1254
+ log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1260
1255
  # Remove elements that aren't of the right types
1261
1256
  object[key] = object[key].select! {|v| v.is_a?(klass)}
1262
1257
  end
@@ -1285,14 +1280,13 @@ module RDF::Tabular
1285
1280
  end
1286
1281
 
1287
1282
  class DebugContext
1288
- include Utils
1289
- def initialize(*args, &block)
1290
- @options = {}
1291
- debug(*args, &block)
1292
- end
1283
+ include RDF::Util::Logger
1284
+ end
1285
+ def self.log_debug(*args, &block)
1286
+ DebugContext.new.log_debug(*args, &block)
1293
1287
  end
1294
- def self.debug(*args, &block)
1295
- DebugContext.new(*args, &block)
1288
+ def self.log_warn(*args)
1289
+ DebugContext.new.log_warn(*args)
1296
1290
  end
1297
1291
  end
1298
1292
 
@@ -1329,12 +1323,7 @@ module RDF::Tabular
1329
1323
  # We handle this through a separate setters
1330
1324
  end
1331
1325
 
1332
- if invalid
1333
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1334
- object.delete(key)
1335
- else
1336
- object[key] = value
1337
- end
1326
+ set_property(key, type, value, invalid)
1338
1327
  end
1339
1328
  end
1340
1329
 
@@ -1421,8 +1410,7 @@ module RDF::Tabular
1421
1410
  end
1422
1411
 
1423
1412
  if invalid
1424
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1425
- object.delete(key)
1413
+ set_property(key, type, value, invalid)
1426
1414
  elsif key == :url
1427
1415
  # URL of CSV relative to metadata
1428
1416
  object[:url] = value
@@ -1446,7 +1434,7 @@ module RDF::Tabular
1446
1434
  content['@context'] = object.delete(:@context) if object[:@context]
1447
1435
  ctx = @context
1448
1436
  remove_instance_variable(:@context) if instance_variables.include?(:@context)
1449
- tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
1437
+ tg = TableGroup.new(content, @options.merge(context: ctx, filenames: @filenames, base: base))
1450
1438
  @parent = tg # Link from parent
1451
1439
  tg
1452
1440
  end
@@ -1457,8 +1445,7 @@ module RDF::Tabular
1457
1445
  "@id" => (id.to_s if id),
1458
1446
  "@type" => "AnnotatedTable",
1459
1447
  "url" => self.url.to_s,
1460
- "columns" => Array(tableSchema ? tableSchema.columns : []).map(&:to_atd),
1461
- "rows" => []
1448
+ "tableSchema" => (tableSchema.to_atd if tableSchema),
1462
1449
  }) do |memo, (k, v)|
1463
1450
  memo[k.to_s] ||= v
1464
1451
  memo
@@ -1490,12 +1477,7 @@ module RDF::Tabular
1490
1477
  "string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
1491
1478
  end
1492
1479
 
1493
- if invalid
1494
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1495
- object.delete(key)
1496
- else
1497
- object[key] = value
1498
- end
1480
+ set_property(key, type, value, invalid)
1499
1481
  end
1500
1482
  end
1501
1483
 
@@ -1517,12 +1499,12 @@ module RDF::Tabular
1517
1499
  end
1518
1500
  end
1519
1501
  else
1520
- warn "#{type} has invalid property 'columns': expected array of Column"
1502
+ log_warn "#{type} has invalid property 'columns': expected array of Column"
1521
1503
  []
1522
1504
  end
1523
1505
 
1524
1506
  unless object[:columns].all? {|v| v.is_a?(Column)}
1525
- warn "#{type} has invalid property 'columns': expected array of Column"
1507
+ log_warn "#{type} has invalid property 'columns': expected array of Column"
1526
1508
  # Remove elements that aren't of the right types
1527
1509
  object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
1528
1510
  end
@@ -1532,12 +1514,12 @@ module RDF::Tabular
1532
1514
  object[:foreignKeys] = case value
1533
1515
  when Array then value
1534
1516
  else
1535
- warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1517
+ log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1536
1518
  []
1537
1519
  end
1538
1520
 
1539
1521
  unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
1540
- warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1522
+ log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1541
1523
  # Remove elements that aren't of the right types
1542
1524
  object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
1543
1525
  end
@@ -1560,6 +1542,18 @@ module RDF::Tabular
1560
1542
  end
1561
1543
  end
1562
1544
  end
1545
+
1546
+ # Return Annotated Table representation
1547
+ def to_atd
1548
+ object.inject({
1549
+ "@id" => (id.to_s if id),
1550
+ "@type" => "Schema",
1551
+ "columns" => Array(columns).map(&:to_atd),
1552
+ }) do |memo, (k, v)|
1553
+ memo[k.to_s] ||= v
1554
+ memo
1555
+ end.delete_if {|k,v| v.nil? || v.is_a?(Metadata)}
1556
+ end
1563
1557
  end
1564
1558
 
1565
1559
  class Column < Metadata
@@ -1619,16 +1613,7 @@ module RDF::Tabular
1619
1613
  valid_natural_language_property?(value)
1620
1614
  end
1621
1615
 
1622
- if invalid && key == :titles
1623
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1624
- object[key] = set_nl(value)
1625
- object.delete(key) if object[key].nil?
1626
- elsif invalid
1627
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1628
- object.delete(key)
1629
- else
1630
- object[key] = value
1631
- end
1616
+ set_property(key, t, value, invalid)
1632
1617
  end
1633
1618
  end
1634
1619
 
@@ -1657,7 +1642,6 @@ module RDF::Tabular
1657
1642
  "table" => (table.id.to_s if table.id),
1658
1643
  "number" => self.number,
1659
1644
  "sourceNumber" => self.sourceNumber,
1660
- "cells" => [],
1661
1645
  "virtual" => self.virtual,
1662
1646
  "name" => self.name,
1663
1647
  "titles" => self.titles
@@ -1700,12 +1684,7 @@ module RDF::Tabular
1700
1684
  "json or rdf" unless %w(json rdf).include?(value) || value.nil?
1701
1685
  end
1702
1686
 
1703
- if invalid
1704
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1705
- object.delete(key)
1706
- else
1707
- object[key] = value
1708
- end
1687
+ set_property(key, type, value, invalid)
1709
1688
  end
1710
1689
  end
1711
1690
  end
@@ -1713,7 +1692,7 @@ module RDF::Tabular
1713
1692
  class Dialect < Metadata
1714
1693
  # Defaults for dialects
1715
1694
  DEFAULTS = {
1716
- commentPrefix: "#".freeze,
1695
+ commentPrefix: false,
1717
1696
  delimiter: ",".freeze,
1718
1697
  doubleQuote: true,
1719
1698
  encoding: "utf-8".freeze,
@@ -1749,7 +1728,7 @@ module RDF::Tabular
1749
1728
  REQUIRED = [].freeze
1750
1729
 
1751
1730
  # Getters and Setters
1752
- PROPERTIES.keys.each do |key|
1731
+ PROPERTIES.each do |key, type|
1753
1732
  define_method(key) do
1754
1733
  object.fetch(key, DEFAULTS[key])
1755
1734
  end
@@ -1772,16 +1751,7 @@ module RDF::Tabular
1772
1751
  valid_natural_language_property?(value)
1773
1752
  end
1774
1753
 
1775
- if invalid && key == :titles
1776
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1777
- object[key] = set_nl(value)
1778
- object.delete(key) if object[key].nil?
1779
- elsif invalid
1780
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1781
- object.delete(key)
1782
- else
1783
- object[key] = value
1784
- end
1754
+ set_property(key, type, value, invalid)
1785
1755
  end
1786
1756
  end
1787
1757
 
@@ -1835,38 +1805,75 @@ module RDF::Tabular
1835
1805
  lang ||= 'und'
1836
1806
 
1837
1807
  # Set encoding on input
1838
- csv = ::CSV.new(input, csv_options)
1839
- (1..skipRows.to_i).each do
1840
- value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
1841
- # Trim value
1842
- value.lstrip! if %w(true start).include?(trim.to_s)
1843
- value.rstrip! if %w(true end).include?(trim.to_s)
1808
+ path = input.base_uri.path rescue ""
1809
+ if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
1810
+ # Input is HTML; use fragment identfier to find table.
1811
+ fragment = RDF::URI(table["url"]).fragment rescue nil
1812
+ tab = begin
1813
+ # Extract with nokogiri
1814
+ require 'nokogiri' unless defined?(:Nokogiri)
1815
+ doc = Nokogiri::HTML.parse(input)
1816
+ doc.search("##{fragment}").first if fragment
1817
+ rescue LoadError
1818
+ # Extract with REXML
1819
+ # FIXME
1820
+ end
1844
1821
 
1845
- value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
1846
- (metadata["rdfs:comment"] ||= []) << value unless value.empty?
1847
- end
1848
- debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
1822
+ raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
1849
1823
 
1850
- (1..headerRowCount).each do
1851
- row_data = Array(csv.shift)
1852
- Array(row_data).each_with_index do |value, index|
1853
- # Skip columns
1854
- skipCols = skipColumns.to_i
1855
- next if index < skipCols
1824
+ # Use rows with <th> to create column titles
1825
+ tab.xpath('.//tr').each do |row|
1826
+ row.xpath('th').map(&:content).each_with_index do |value, index|
1827
+ # Skip columns
1828
+ skipCols = skipColumns.to_i
1829
+ next if index < skipCols
1856
1830
 
1831
+ # Trim value
1832
+ value.lstrip! if %w(true start).include?(trim.to_s)
1833
+ value.rstrip! if %w(true end).include?(trim.to_s)
1834
+
1835
+ # Initialize titles
1836
+ columns = table["tableSchema"]["columns"] ||= []
1837
+ column = columns[index - skipCols] ||= {
1838
+ "titles" => {lang => []},
1839
+ }
1840
+ column["titles"][lang] << value
1841
+ end
1842
+ end
1843
+ else
1844
+ csv = ::CSV.new(input, csv_options)
1845
+ (1..skipRows.to_i).each do
1846
+ value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
1857
1847
  # Trim value
1858
1848
  value.lstrip! if %w(true start).include?(trim.to_s)
1859
1849
  value.rstrip! if %w(true end).include?(trim.to_s)
1860
1850
 
1861
- # Initialize titles
1862
- columns = table["tableSchema"]["columns"] ||= []
1863
- column = columns[index - skipCols] ||= {
1864
- "titles" => {lang => []},
1865
- }
1866
- column["titles"][lang] << value
1851
+ value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
1852
+ (metadata["rdfs:comment"] ||= []) << value unless value.empty?
1853
+ end
1854
+ log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
1855
+
1856
+ (1..headerRowCount).each do
1857
+ row_data = Array(csv.shift)
1858
+ Array(row_data).each_with_index do |value, index|
1859
+ # Skip columns
1860
+ skipCols = skipColumns.to_i
1861
+ next if index < skipCols
1862
+
1863
+ # Trim value
1864
+ value.lstrip! if %w(true start).include?(trim.to_s)
1865
+ value.rstrip! if %w(true end).include?(trim.to_s)
1866
+
1867
+ # Initialize titles
1868
+ columns = table["tableSchema"]["columns"] ||= []
1869
+ column = columns[index - skipCols] ||= {
1870
+ "titles" => {lang => []},
1871
+ }
1872
+ column["titles"][lang] << value
1873
+ end
1867
1874
  end
1868
1875
  end
1869
- debug("embedded_metadata") {"table: #{table.inspect}"}
1876
+ log_debug("embedded_metadata") {"table: #{table.inspect}"}
1870
1877
  input.rewind if input.respond_to?(:rewind)
1871
1878
 
1872
1879
  Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
@@ -1931,12 +1938,7 @@ module RDF::Tabular
1931
1938
  end
1932
1939
  end
1933
1940
 
1934
- if invalid
1935
- warn "#{self.type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1936
- object.delete(key)
1937
- else
1938
- object[key] = value
1939
- end
1941
+ set_property(key, type, value, invalid)
1940
1942
  end
1941
1943
  end
1942
1944
  end
@@ -2056,35 +2058,26 @@ module RDF::Tabular
2056
2058
 
2057
2059
  # create column if necessary
2058
2060
  columns[index - skipColumns] ||=
2059
- Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
2061
+ Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
2060
2062
 
2061
2063
  column = columns[index - skipColumns]
2062
2064
 
2063
2065
  @values << cell = Cell.new(metadata, column, self, value)
2064
2066
 
2065
- datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
2066
- value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
2067
- value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
2067
+ datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
2068
+ value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
2069
+ value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
2068
2070
  # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
2069
2071
  value = column.default || '' if value.empty?
2070
2072
 
2071
2073
  cell_values = column.separator ? value.split(column.separator) : [value]
2072
2074
 
2073
2075
  cell_values = cell_values.map do |v|
2074
- v = v.strip unless %w(string anyAtomicType any).include?(datatype.base)
2076
+ v = v.strip unless %w(string anyAtomicType).include?(datatype.base)
2075
2077
  v = column.default || '' if v.empty?
2076
2078
  if Array(column.null).include?(v)
2077
2079
  nil
2078
2080
  else
2079
- # Trim value
2080
- if %w(string anyAtomicType any).include?(datatype.base)
2081
- v.lstrip! if %w(true start).include?(metadata.dialect.trim.to_s)
2082
- v.rstrip! if %w(true end).include?(metadata.dialect.trim.to_s)
2083
- else
2084
- # unless the datatype is string or anyAtomicType or any, strip leading and trailing whitespace from the string value
2085
- v.strip!
2086
- end
2087
-
2088
2081
  expanded_dt = datatype.id || metadata.context.expand_iri(datatype.base, vocab: true)
2089
2082
  if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
2090
2083
  lit_or_errors
@@ -2127,7 +2120,11 @@ module RDF::Tabular
2127
2120
 
2128
2121
  # Identifier for this row, as an RFC7111 fragment
2129
2122
  # @return [RDF::URI]
2130
- def id; table.url + "#row=#{self.sourceNumber}"; end
2123
+ def id;
2124
+ u = table.url.dup
2125
+ u.fragment = "row=#{self.sourceNumber}"
2126
+ u
2127
+ end
2131
2128
 
2132
2129
  # Return Annotated Row representation
2133
2130
  def to_atd