rdf-tabular 0.2.1 → 0.4.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
@@ -11,7 +11,6 @@ module RDF
11
11
  # @author [Gregg Kellogg](http://greggkellogg.net/)
12
12
  module Tabular
13
13
  require 'rdf/tabular/format'
14
- require 'rdf/tabular/utils'
15
14
  autoload :Column, 'rdf/tabular/metadata'
16
15
  autoload :CSVW, 'rdf/tabular/csvw'
17
16
  autoload :Dialect, 'rdf/tabular/metadata'
@@ -46,5 +46,21 @@ module RDF::Tabular
46
46
  def self.detect(sample)
47
47
  !!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
48
48
  end
49
+
50
+ ##
51
+ # Hash of CLI commands appropriate for this format
52
+ # @return [Hash{Symbol => Lambda(Array, Hash)}]
53
+ def self.cli_commands
54
+ {
55
+ :"tabular-json" => ->(argv, opts) do
56
+ raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
57
+ out = opts[:output] || $stdout
58
+ out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
59
+ RDF::CLI.parse(argv, opts) do |reader|
60
+ out.puts reader.to_json
61
+ end
62
+ end
63
+ }
64
+ end
49
65
  end
50
66
  end
@@ -19,16 +19,12 @@ require 'yaml' # used by BCP47, which should have required it.
19
19
  # @author [Gregg Kellogg](http://greggkellogg.net/)
20
20
  module RDF::Tabular
21
21
  class Metadata
22
- include Utils
22
+ include RDF::Util::Logger
23
23
 
24
24
  # Hash representation
25
25
  # @return [Hash<Symbol,Object>]
26
26
  attr_accessor :object
27
27
 
28
- # Warnings detected on initialization or when setting properties
29
- # @return [Array<String>]
30
- attr_accessor :warnings
31
-
32
28
  # Inheritect properties, valid for all types
33
29
  INHERITED_PROPERTIES = {
34
30
  aboutUrl: :uri_template,
@@ -137,7 +133,7 @@ module RDF::Tabular
137
133
  #
138
134
  # @param [String] path
139
135
  # @param [Hash{Symbol => Object}] options
140
- # see `RDF::Util::File.open_file` in RDF.rb and {#new}
136
+ # see `RDF::Util::File.open_file` in RDF.rb and {new}
141
137
  # @yield [Metadata]
142
138
  # @raise [IOError] if file not found
143
139
  def self.open(path, options = {})
@@ -153,7 +149,7 @@ module RDF::Tabular
153
149
  end
154
150
 
155
151
  # Return the well-known configuration for a file, and remember using a weak-reference cache to avoid uncessary retreivles.
156
- # @param [String] base, the URL used for finding the file
152
+ # @param [String] base the URL used for finding the file
157
153
  # @return [Array<String>, false]
158
154
  def self.site_wide_config(base)
159
155
  require 'rdf/util/cache' unless defined?(::RDF::Util::Cache)
@@ -179,7 +175,6 @@ module RDF::Tabular
179
175
  # @return [Metadata]
180
176
  def self.for_input(input, options = {})
181
177
  base = options[:base]
182
- warnings = options.fetch(:warnings, [])
183
178
 
184
179
  # Use user metadata, if provided
185
180
  metadata = case options[:metadata]
@@ -202,10 +197,7 @@ module RDF::Tabular
202
197
  if md.describes_file?(base)
203
198
  metadata = md
204
199
  else
205
- warnings << "Found metadata at #{link_loc}, which does not describe #{base}, ignoring"
206
- if options[:validate] && !options[:warnings]
207
- $stderr.puts "Warnings: #{warnings.join("\n")}"
208
- end
200
+ log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
209
201
  end
210
202
  end
211
203
  end
@@ -214,12 +206,12 @@ module RDF::Tabular
214
206
  # If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
215
207
  if !metadata && base
216
208
  templates = site_wide_config(base)
217
- debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
209
+ log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
218
210
  locs = templates.map do |template|
219
211
  t = Addressable::Template.new(template)
220
212
  RDF::URI(base).join(t.expand(url: base).to_s)
221
213
  end
222
- debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
214
+ log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
223
215
 
224
216
  locs.each do |loc|
225
217
  metadata ||= begin
@@ -230,15 +222,12 @@ module RDF::Tabular
230
222
  if md.describes_file?(base)
231
223
  md
232
224
  else
233
- warnings << "Found metadata at #{loc}, which does not describe #{base}, ignoring"
234
- if options[:validate] && !options[:warnings]
235
- $stderr.puts "Warnings: #{warnings.join("\n")}"
236
- end
225
+ log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
237
226
  nil
238
227
  end
239
228
  end
240
229
  rescue IOError
241
- debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
230
+ log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
242
231
  nil
243
232
  end
244
233
  end
@@ -331,7 +320,6 @@ module RDF::Tabular
331
320
  # @return [Metadata]
332
321
  def initialize(input, options = {})
333
322
  @options = options.dup
334
- @options[:depth] ||= 0
335
323
 
336
324
  # Parent of this Metadata, if any
337
325
  @parent = @options[:parent]
@@ -344,14 +332,14 @@ module RDF::Tabular
344
332
 
345
333
  @context = case input['@context']
346
334
  when Array
347
- warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
335
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
348
336
  c = LOCAL_CONTEXT.dup
349
337
  c.base = RDF::URI(opt_base)
350
338
  obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
351
339
  raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
352
340
  c.parse(obj)
353
341
  when Hash
354
- warn "Context missing required value 'http://www.w3.org/ns/csvw'"
342
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
355
343
  c = LOCAL_CONTEXT.dup
356
344
  c.base = RDF::URI(opt_base)
357
345
  c.parse(input['@context'])
@@ -362,7 +350,7 @@ module RDF::Tabular
362
350
  c
363
351
  else
364
352
  if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
365
- warn "Context missing required value 'http://www.w3.org/ns/csvw'"
353
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
366
354
  LOCAL_CONTEXT.dup
367
355
  c = LOCAL_CONTEXT.dup
368
356
  c.base = RDF::URI(opt_base)
@@ -375,7 +363,7 @@ module RDF::Tabular
375
363
  @options[:base] = @context ? @context.base : RDF::URI(opt_base)
376
364
 
377
365
  if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
378
- warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
366
+ log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
379
367
  @context.default_language = nil
380
368
  end
381
369
 
@@ -385,7 +373,7 @@ module RDF::Tabular
385
373
 
386
374
  @object = {}
387
375
 
388
- depth do
376
+ log_depth do
389
377
  # Input was parsed in .new
390
378
  # Metadata is object with symbolic keys
391
379
  input.each do |key, value|
@@ -401,7 +389,7 @@ module RDF::Tabular
401
389
  object[:@id] = if value.is_a?(String)
402
390
  value
403
391
  else
404
- warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
392
+ log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
405
393
  "" # Default value
406
394
  end
407
395
  @id = @options[:base].join(object[:@id])
@@ -426,14 +414,14 @@ module RDF::Tabular
426
414
  end
427
415
 
428
416
  if reason
429
- debug("md#initialize") {reason}
430
- debug("md#initialize") {"filenames: #{filenames}"}
431
- debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
417
+ log_debug("md#initialize") {reason}
418
+ log_debug("md#initialize") {"filenames: #{filenames}"}
419
+ log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
432
420
  end
433
421
  end
434
422
 
435
423
  # Getters and Setters
436
- INHERITED_PROPERTIES.keys.each do |key|
424
+ INHERITED_PROPERTIES.each do |key, type|
437
425
  define_method(key) do
438
426
  object.fetch(key) do
439
427
  parent ? parent.send(key) : default_value(key)
@@ -459,12 +447,7 @@ module RDF::Tabular
459
447
  # We handle this through a separate datatype= setter
460
448
  end
461
449
 
462
- if invalid
463
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
464
- object.delete(key)
465
- else
466
- object[key] = value
467
- end
450
+ set_property(key, type, value, invalid)
468
451
  end
469
452
  end
470
453
 
@@ -492,7 +475,7 @@ module RDF::Tabular
492
475
  when Schema
493
476
  value
494
477
  else
495
- warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
478
+ log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
496
479
  Schema.new({}, @options.merge(parent: self, context: nil))
497
480
  end
498
481
  end
@@ -539,7 +522,7 @@ module RDF::Tabular
539
522
  when Dialect
540
523
  value
541
524
  else
542
- warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
525
+ log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
543
526
  nil
544
527
  end
545
528
  end
@@ -549,15 +532,15 @@ module RDF::Tabular
549
532
  # @raise [Error] if datatype is not valid
550
533
  def datatype=(value)
551
534
  val = case value
552
- when Hash then Datatype.new(value, parent: self)
553
- else Datatype.new({base: value}, parent: self)
535
+ when Hash then Datatype.new(value, @options.merge(parent: self))
536
+ else Datatype.new({base: value}, @options.merge(parent: self))
554
537
  end
555
538
 
556
539
  if val.valid? || value.is_a?(Hash)
557
540
  # Set it if it was specified as an object, which may cause validation errors later
558
541
  object[:datatype] = val
559
542
  else
560
- warn "#{type} has invalid property 'datatype': expected a built-in or an object"
543
+ log_warn "#{type} has invalid property 'datatype': expected a built-in or an object"
561
544
  end
562
545
  end
563
546
 
@@ -572,40 +555,20 @@ module RDF::Tabular
572
555
  ##
573
556
  # Do we have valid metadata?
574
557
  def valid?
575
- validate!
576
- true
577
- rescue
578
- false
579
- end
580
-
581
- ##
582
- # Validation errors
583
- # @return [Array<String>]
584
- def errors
585
- validate! && []
586
- rescue Error => e
587
- e.message.split("\n")
558
+ validate # Possibly re-validate
559
+ !log_statistics[:error]
588
560
  end
589
561
 
590
- ##
591
- # Validation warnings, available only after validating or finding warnings
592
- # @return [Array<String>]
593
- def warnings
594
- ((@warnings || []) + object.
595
- values.
596
- flatten.
597
- select {|v| v.is_a?(Metadata)}.
598
- map(&:warnings).
599
- flatten).compact.uniq
562
+ def validate!
563
+ raise Error, "Metadata error" unless valid?
600
564
  end
601
565
 
602
566
  ##
603
567
  # Validate metadata, raising an error containing all errors detected during validation
604
568
  # @raise [Error] Raise error if metadata has any unexpected properties
605
569
  # @return [self]
606
- def validate!
570
+ def validate
607
571
  expected_props, required_props = @properties.keys, @required
608
- errors = []
609
572
 
610
573
  unless is_a?(Dialect) || is_a?(Transformation)
611
574
  expected_props = expected_props + INHERITED_PROPERTIES.keys
@@ -614,10 +577,10 @@ module RDF::Tabular
614
577
  # It has only expected properties (exclude metadata)
615
578
  check_keys = object.keys - [:"@id", :"@context"]
616
579
  check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
617
- warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
580
+ log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
618
581
 
619
582
  # It has required properties
620
- errors << "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
583
+ log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
621
584
 
622
585
  self.normalize!
623
586
 
@@ -626,55 +589,49 @@ module RDF::Tabular
626
589
  value = object[key]
627
590
  case key
628
591
  when :base
629
- errors << "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
592
+ log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
630
593
  when :columns
631
- value.each do |v|
632
- begin
633
- v.validate!
634
- rescue Error => e
635
- errors << e.message
636
- end
594
+ value.each do |col|
595
+ col.validate
596
+ log_statistics.merge!(col.log_statistics)
637
597
  end
638
598
  column_names = value.map(&:name)
639
- errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
599
+ log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
640
600
  when :datatype, :dialect, :tables, :tableSchema, :transformations
641
601
  Array(value).each do |t|
642
602
  # Make sure value is of appropriate class
643
603
  if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
644
- begin
645
- t.validate!
646
- rescue Error => e
647
- errors << e.message
648
- end
604
+ t.validate
605
+ log_statistics.merge!(t.log_statistics)
649
606
  else
650
- errors << "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
607
+ log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
651
608
  end
652
609
  end
653
- errors << "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
610
+ log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
654
611
  when :foreignKeys
655
612
  # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
656
613
  value.each do |fk|
657
614
  columnReference, reference = fk['columnReference'], fk['reference']
658
- errors << "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
659
- errors << "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
615
+ log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
616
+ log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
660
617
 
661
618
  # Verify that columns exist in this schema
662
- errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
619
+ log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
663
620
  Array(columnReference).each do |k|
664
- errors << "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
621
+ log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
665
622
  end
666
623
 
667
624
  if reference.is_a?(Hash)
668
- errors << "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
625
+ log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
669
626
  ref_cols = reference['columnReference']
670
627
  schema = if reference.has_key?('resource')
671
628
  if reference.has_key?('schemaReference')
672
- errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
629
+ log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
673
630
  end
674
631
  # resource is the URL of a Table in the TableGroup
675
632
  ref = context.base.join(reference['resource']).to_s
676
- table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
677
- errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
633
+ table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref}
634
+ log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
678
635
  table.tableSchema if table
679
636
  elsif reference.has_key?('schemaReference')
680
637
  # resource is the @id of a Schema in the TableGroup
@@ -682,25 +639,25 @@ module RDF::Tabular
682
639
  tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
683
640
  case tables.length
684
641
  when 0
685
- errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
642
+ log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
686
643
  nil
687
644
  when 1
688
645
  tables.first.tableSchema
689
646
  else
690
- errors << "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
647
+ log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
691
648
  nil
692
649
  end
693
650
  end
694
651
 
695
652
  if schema
696
653
  # ref_cols must exist in schema
697
- errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
654
+ log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
698
655
  Array(ref_cols).each do |k|
699
- errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
656
+ log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
700
657
  end
701
658
  end
702
659
  else
703
- errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
660
+ log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
704
661
  end
705
662
  end
706
663
  when :format
@@ -712,7 +669,7 @@ module RDF::Tabular
712
669
  nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
713
670
  unsignedLong unsignedInt unsignedShort unsignedByte
714
671
  ).include?(self.base)
715
- warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
672
+ log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
716
673
  object.delete(:format) # act as if not set
717
674
  end
718
675
 
@@ -720,14 +677,14 @@ module RDF::Tabular
720
677
  begin
721
678
  parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
722
679
  rescue ArgumentError => e
723
- warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
724
- object[:format].delete("pattern") # act as if not set
680
+ log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
681
+ object[:format].delete("pattern") if object[:format] # act as if not set
725
682
  end
726
683
  else
727
684
  case self.base
728
685
  when 'boolean'
729
686
  unless value.split("|").length == 2
730
- warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
687
+ log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
731
688
  object.delete(:format) # act as if not set
732
689
  end
733
690
  when :decimal, :integer, :long, :int, :short, :byte,
@@ -738,7 +695,7 @@ module RDF::Tabular
738
695
  begin
739
696
  parse_uax35_number(value, nil)
740
697
  rescue ArgumentError => e
741
- warn "#{type} has invalid property '#{key}': #{e.message}"
698
+ log_warn "#{type} has invalid property '#{key}': #{e.message}"
742
699
  object.delete(:format) # act as if not set
743
700
  end
744
701
  when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
@@ -746,7 +703,7 @@ module RDF::Tabular
746
703
  begin
747
704
  parse_uax35_date(value, nil)
748
705
  rescue ArgumentError => e
749
- warn "#{type} has invalid property '#{key}': #{e.message}"
706
+ log_warn "#{type} has invalid property '#{key}': #{e.message}"
750
707
  object.delete(:format) # act as if not set
751
708
  end
752
709
  else
@@ -754,7 +711,7 @@ module RDF::Tabular
754
711
  begin
755
712
  Regexp.compile(value)
756
713
  rescue
757
- warn "#{type} has invalid property '#{key}': #{$!.message}"
714
+ log_warn "#{type} has invalid property '#{key}': #{$!.message}"
758
715
  object.delete(:format) # act as if not set
759
716
  end
760
717
  end
@@ -765,20 +722,20 @@ module RDF::Tabular
765
722
  if object[:length]
766
723
  case key
767
724
  when :minLength
768
- errors << "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
725
+ log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
769
726
  when :maxLength
770
- errors << "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
727
+ log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
771
728
  end
772
729
  end
773
730
 
774
731
  # Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
775
732
  if key == :maxLength && object[:minLength]
776
- errors << "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
733
+ log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
777
734
  end
778
735
 
779
736
  # Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
780
737
  unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
781
- errors << "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
738
+ log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
782
739
  end
783
740
  when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
784
741
  case self.base
@@ -786,46 +743,46 @@ module RDF::Tabular
786
743
  'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
787
744
  'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
788
745
  'duration', 'dayTimeDuration', 'yearMonthDuration'
789
- errors << "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
746
+ log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
790
747
 
791
748
  case key
792
749
  when :minInclusive
793
750
  # Applications MUST raise an error if both minInclusive and minExclusive are specified
794
- errors << "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
751
+ log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
795
752
 
796
753
  # Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
797
- errors << "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
754
+ log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
798
755
 
799
756
  # Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
800
- errors << "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
757
+ log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
801
758
  when :maxInclusive
802
759
  # Applications MUST raise an error if both maxInclusive and maxExclusive are specified
803
- errors << "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
760
+ log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
804
761
  when :minExclusive
805
762
  # Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
806
- errors << "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
763
+ log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
807
764
 
808
765
  # Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
809
- errors << "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
766
+ log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
810
767
  end
811
768
  else
812
- errors << "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
769
+ log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
813
770
  end
814
771
  when :notes
815
772
  unless value.is_a?(Hash) || value.is_a?(Array)
816
- errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
773
+ log_error "#{type} has invalid property '#{key}': #{value}, Object or Array"
817
774
  end
818
775
  begin
819
776
  normalize_jsonld(key, value)
820
777
  rescue Error => e
821
- errors << "#{type} has invalid content '#{key}': #{e.message}"
778
+ log_error "#{type} has invalid content '#{key}': #{e.message}"
822
779
  end
823
780
  when :primaryKey, :rowTitles
824
781
  # A column reference property that holds either a single reference to a column description object or an array of references.
825
782
  "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
826
783
  Array(value).each do |k|
827
784
  unless self.columns.any? {|c| c[:name] == k}
828
- warn "#{type} has invalid property '#{key}': column reference not found #{k}"
785
+ log_warn "#{type} has invalid property '#{key}': column reference not found #{k}"
829
786
  object.delete(key)
830
787
  end
831
788
  end
@@ -834,34 +791,33 @@ module RDF::Tabular
834
791
  when :@id
835
792
  # Must not be a BNode
836
793
  if value.to_s.start_with?("_:")
837
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
794
+ log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
838
795
  end
839
796
 
840
797
  # Datatype @id MUST NOT be the URL of a built-in type
841
798
  if self.is_a?(Datatype) && DATATYPES.values.include?(value)
842
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
799
+ log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
843
800
  end
844
801
  when :@type
845
802
  # Must not be a BNode
846
803
  if value.to_s.start_with?("_:")
847
- errors << "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
804
+ log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
848
805
  end
849
806
  case type
850
807
  when :Transformation
851
- errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
808
+ log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
852
809
  else
853
- errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
810
+ log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
854
811
  end
855
812
  when ->(k) {key.to_s.include?(':')}
856
813
  begin
857
814
  normalize_jsonld(key, value)
858
815
  rescue Error => e
859
- errors << "#{type} has invalid content '#{key}': #{e.message}"
816
+ log_error "#{type} has invalid content '#{key}': #{e.message}"
860
817
  end
861
818
  end
862
819
  end
863
820
 
864
- raise Error, errors.join("\n") unless errors.empty?
865
821
  self
866
822
  end
867
823
 
@@ -890,10 +846,37 @@ module RDF::Tabular
890
846
  # @param [:read] input
891
847
  # @yield [Row]
892
848
  def each_row(input)
893
- csv = ::CSV.new(input, csv_options)
894
- # Skip skipRows and headerRowCount
895
- number, skipped = 0, (dialect.skipRows.to_i + dialect.headerRowCount)
896
- (1..skipped).each {csv.shift}
849
+ csv, number, skipped = nil, 0, 0
850
+ path = input.base_uri.path rescue ""
851
+ if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
852
+ # Input is HTML; use fragment identfier to find table.
853
+ fragment = RDF::URI(self.url).fragment rescue nil
854
+ tab = begin
855
+ # Extract with nokogiri
856
+ require 'nokogiri' unless defined?(:Nokogiri)
857
+ doc = Nokogiri::HTML.parse(input)
858
+ doc.search("##{fragment}").first if fragment
859
+ rescue LoadError
860
+ # Extract with REXML
861
+ # FIXME
862
+ end
863
+
864
+ raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
865
+
866
+ # Use rows with <td> to create column data
867
+ csv = []
868
+ number = 0
869
+ tab.xpath('.//tr').map do |row|
870
+ number += 1 if row.xpath('th')
871
+ data = row.xpath('td').map(&:content)
872
+ csv << data unless data.empty?
873
+ end
874
+ else
875
+ csv = ::CSV.new(input, csv_options)
876
+ # Skip skipRows and headerRowCount
877
+ skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
878
+ (1..skipped).each {csv.shift}
879
+ end
897
880
  csv.each do |data|
898
881
  # Check for embedded comments
899
882
  if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
@@ -939,17 +922,17 @@ module RDF::Tabular
939
922
  if value['@value']
940
923
  dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
941
924
  lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
942
- block.call(RDF::Statement.new(subject, property, lit))
925
+ block.call(RDF::Statement(subject, property, lit))
943
926
  else
944
927
  # value MUST be a node object, establish a new subject from `@id`
945
928
  s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
946
929
 
947
930
  # Generate a triple
948
- block.call(RDF::Statement.new(subject, property, s2))
931
+ block.call(RDF::Statement(subject, property, s2))
949
932
 
950
933
  # Generate types
951
934
  Array(value['@type']).each do |t|
952
- block.call(RDF::Statement.new(s2, RDF.type, context.expand_iri(t, vocab: true)))
935
+ block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true)))
953
936
  end
954
937
 
955
938
  # Generate triples for all other properties
@@ -961,7 +944,7 @@ module RDF::Tabular
961
944
  else
962
945
  # Value is a primitive JSON value
963
946
  lit = RDF::Literal(value)
964
- block.call(RDF::Statement.new(subject, property, RDF::Literal(value)))
947
+ block.call(RDF::Statement(subject, property, RDF::Literal(value)))
965
948
  end
966
949
  else
967
950
  case value
@@ -1016,7 +999,7 @@ module RDF::Tabular
1016
999
  if @options[:validate]
1017
1000
  raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1018
1001
  else
1019
- warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1002
+ log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1020
1003
  end
1021
1004
  end
1022
1005
  else
@@ -1025,7 +1008,7 @@ module RDF::Tabular
1025
1008
  if @options[:validate]
1026
1009
  raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1027
1010
  else
1028
- warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1011
+ log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1029
1012
  end
1030
1013
  end
1031
1014
 
@@ -1038,7 +1021,7 @@ module RDF::Tabular
1038
1021
  if @options[:validate]
1039
1022
  raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1040
1023
  else
1041
- warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1024
+ log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1042
1025
 
1043
1026
  # If present, a virtual column MUST appear after all other non-virtual column definitions
1044
1027
  raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
@@ -1053,13 +1036,13 @@ module RDF::Tabular
1053
1036
  end
1054
1037
  index = 0
1055
1038
  object_columns.all? do |cb|
1056
- ca = non_virtual_columns[index] || Column.new({})
1039
+ ca = non_virtual_columns[index] || Column.new({}, @options)
1057
1040
  ta = ca.titles || {}
1058
1041
  tb = cb.titles || {}
1059
1042
  if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
1060
1043
  true
1061
1044
  elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
1062
- raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}" unless ca.name == cb.name
1045
+ raise Error, "Columns don't match on name: #{ca.name}, #{cb.name}" unless ca.name == cb.name
1063
1046
  elsif @options[:validate] || !ta.empty? && !tb.empty?
1064
1047
  # If validating, column compatibility requires strict match between titles
1065
1048
  titles_match = case
@@ -1083,10 +1066,10 @@ module RDF::Tabular
1083
1066
  true
1084
1067
  elsif !@options[:validate]
1085
1068
  # If not validating, columns don't match, but processing continues
1086
- warn "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1069
+ log_warn "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
1087
1070
  true
1088
1071
  else
1089
- raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1072
+ raise Error, "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
1090
1073
  end
1091
1074
  end
1092
1075
  index += 1
@@ -1180,13 +1163,13 @@ module RDF::Tabular
1180
1163
  when Hash
1181
1164
  if value['@value']
1182
1165
  if !(value.keys.sort - %w(@value @type @language)).empty?
1183
- raise Error, "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
1166
+ log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
1184
1167
  elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
1185
- raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
1168
+ log_error "Value object may not contain both @type and @language: #{value.to_json}"
1186
1169
  elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
1187
- raise Error, "Value object with @language must use valid language: #{value.to_json}"
1170
+ log_error "Value object with @language must use valid language: #{value.to_json}"
1188
1171
  elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
1189
- raise Error, "Value object with @type must defined type: #{value.to_json}"
1172
+ log_error "Value object with @type must defined type: #{value.to_json}"
1190
1173
  end
1191
1174
  value
1192
1175
  else
@@ -1195,16 +1178,16 @@ module RDF::Tabular
1195
1178
  case k
1196
1179
  when "@id"
1197
1180
  nv[k] = context.expand_iri(v, documentRelative: true).to_s
1198
- raise Error, "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
1181
+ log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
1199
1182
  when "@type"
1200
1183
  Array(v).each do |vv|
1201
1184
  # Validate that all type values transform to absolute IRIs
1202
1185
  resource = context.expand_iri(vv, vocab: true)
1203
- raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
1186
+ log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
1204
1187
  end
1205
1188
  nv[k] = v
1206
1189
  when /^(@|_:)/
1207
- raise Error, "Invalid use of #{k} in JSON-LD content"
1190
+ log_error "Invalid use of #{k} in JSON-LD content"
1208
1191
  else
1209
1192
  nv[k] = normalize_jsonld(k, v)
1210
1193
  end
@@ -1217,10 +1200,22 @@ module RDF::Tabular
1217
1200
  end
1218
1201
  protected
1219
1202
 
1220
- # Add a warning on this object
1221
- def warn(string)
1222
- debug("warn: #{string}")
1223
- (@warnings ||= []) << string
1203
+ def set_property(key, type, value, invalid)
1204
+ if invalid
1205
+ log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1206
+ case type
1207
+ when :link, :uri_template
1208
+ object[key] = ""
1209
+ when :object
1210
+ object[key] = {}
1211
+ when :natural_language
1212
+ object[key] = set_nl(value) || []
1213
+ else
1214
+ object.delete(key)
1215
+ end
1216
+ else
1217
+ object[key] = value
1218
+ end
1224
1219
  end
1225
1220
 
1226
1221
  # When setting a natural language property, always put in language-map form
@@ -1251,12 +1246,12 @@ module RDF::Tabular
1251
1246
  end
1252
1247
  end
1253
1248
  else
1254
- warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1249
+ log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1255
1250
  []
1256
1251
  end
1257
1252
 
1258
1253
  unless object[key].all? {|v| v.is_a?(klass)}
1259
- warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1254
+ log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1260
1255
  # Remove elements that aren't of the right types
1261
1256
  object[key] = object[key].select! {|v| v.is_a?(klass)}
1262
1257
  end
@@ -1285,14 +1280,13 @@ module RDF::Tabular
1285
1280
  end
1286
1281
 
1287
1282
  class DebugContext
1288
- include Utils
1289
- def initialize(*args, &block)
1290
- @options = {}
1291
- debug(*args, &block)
1292
- end
1283
+ include RDF::Util::Logger
1284
+ end
1285
+ def self.log_debug(*args, &block)
1286
+ DebugContext.new.log_debug(*args, &block)
1293
1287
  end
1294
- def self.debug(*args, &block)
1295
- DebugContext.new(*args, &block)
1288
+ def self.log_warn(*args)
1289
+ DebugContext.new.log_warn(*args)
1296
1290
  end
1297
1291
  end
1298
1292
 
@@ -1329,12 +1323,7 @@ module RDF::Tabular
1329
1323
  # We handle this through a separate setters
1330
1324
  end
1331
1325
 
1332
- if invalid
1333
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1334
- object.delete(key)
1335
- else
1336
- object[key] = value
1337
- end
1326
+ set_property(key, type, value, invalid)
1338
1327
  end
1339
1328
  end
1340
1329
 
@@ -1421,8 +1410,7 @@ module RDF::Tabular
1421
1410
  end
1422
1411
 
1423
1412
  if invalid
1424
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1425
- object.delete(key)
1413
+ set_property(key, type, value, invalid)
1426
1414
  elsif key == :url
1427
1415
  # URL of CSV relative to metadata
1428
1416
  object[:url] = value
@@ -1446,7 +1434,7 @@ module RDF::Tabular
1446
1434
  content['@context'] = object.delete(:@context) if object[:@context]
1447
1435
  ctx = @context
1448
1436
  remove_instance_variable(:@context) if instance_variables.include?(:@context)
1449
- tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
1437
+ tg = TableGroup.new(content, @options.merge(context: ctx, filenames: @filenames, base: base))
1450
1438
  @parent = tg # Link from parent
1451
1439
  tg
1452
1440
  end
@@ -1457,8 +1445,7 @@ module RDF::Tabular
1457
1445
  "@id" => (id.to_s if id),
1458
1446
  "@type" => "AnnotatedTable",
1459
1447
  "url" => self.url.to_s,
1460
- "columns" => Array(tableSchema ? tableSchema.columns : []).map(&:to_atd),
1461
- "rows" => []
1448
+ "tableSchema" => (tableSchema.to_atd if tableSchema),
1462
1449
  }) do |memo, (k, v)|
1463
1450
  memo[k.to_s] ||= v
1464
1451
  memo
@@ -1490,12 +1477,7 @@ module RDF::Tabular
1490
1477
  "string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
1491
1478
  end
1492
1479
 
1493
- if invalid
1494
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1495
- object.delete(key)
1496
- else
1497
- object[key] = value
1498
- end
1480
+ set_property(key, type, value, invalid)
1499
1481
  end
1500
1482
  end
1501
1483
 
@@ -1517,12 +1499,12 @@ module RDF::Tabular
1517
1499
  end
1518
1500
  end
1519
1501
  else
1520
- warn "#{type} has invalid property 'columns': expected array of Column"
1502
+ log_warn "#{type} has invalid property 'columns': expected array of Column"
1521
1503
  []
1522
1504
  end
1523
1505
 
1524
1506
  unless object[:columns].all? {|v| v.is_a?(Column)}
1525
- warn "#{type} has invalid property 'columns': expected array of Column"
1507
+ log_warn "#{type} has invalid property 'columns': expected array of Column"
1526
1508
  # Remove elements that aren't of the right types
1527
1509
  object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
1528
1510
  end
@@ -1532,12 +1514,12 @@ module RDF::Tabular
1532
1514
  object[:foreignKeys] = case value
1533
1515
  when Array then value
1534
1516
  else
1535
- warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1517
+ log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1536
1518
  []
1537
1519
  end
1538
1520
 
1539
1521
  unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
1540
- warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1522
+ log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1541
1523
  # Remove elements that aren't of the right types
1542
1524
  object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
1543
1525
  end
@@ -1560,6 +1542,18 @@ module RDF::Tabular
1560
1542
  end
1561
1543
  end
1562
1544
  end
1545
+
1546
+ # Return Annotated Table representation
1547
+ def to_atd
1548
+ object.inject({
1549
+ "@id" => (id.to_s if id),
1550
+ "@type" => "Schema",
1551
+ "columns" => Array(columns).map(&:to_atd),
1552
+ }) do |memo, (k, v)|
1553
+ memo[k.to_s] ||= v
1554
+ memo
1555
+ end.delete_if {|k,v| v.nil? || v.is_a?(Metadata)}
1556
+ end
1563
1557
  end
1564
1558
 
1565
1559
  class Column < Metadata
@@ -1619,16 +1613,7 @@ module RDF::Tabular
1619
1613
  valid_natural_language_property?(value)
1620
1614
  end
1621
1615
 
1622
- if invalid && key == :titles
1623
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1624
- object[key] = set_nl(value)
1625
- object.delete(key) if object[key].nil?
1626
- elsif invalid
1627
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1628
- object.delete(key)
1629
- else
1630
- object[key] = value
1631
- end
1616
+ set_property(key, t, value, invalid)
1632
1617
  end
1633
1618
  end
1634
1619
 
@@ -1657,7 +1642,6 @@ module RDF::Tabular
1657
1642
  "table" => (table.id.to_s if table.id),
1658
1643
  "number" => self.number,
1659
1644
  "sourceNumber" => self.sourceNumber,
1660
- "cells" => [],
1661
1645
  "virtual" => self.virtual,
1662
1646
  "name" => self.name,
1663
1647
  "titles" => self.titles
@@ -1700,12 +1684,7 @@ module RDF::Tabular
1700
1684
  "json or rdf" unless %w(json rdf).include?(value) || value.nil?
1701
1685
  end
1702
1686
 
1703
- if invalid
1704
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1705
- object.delete(key)
1706
- else
1707
- object[key] = value
1708
- end
1687
+ set_property(key, type, value, invalid)
1709
1688
  end
1710
1689
  end
1711
1690
  end
@@ -1713,7 +1692,7 @@ module RDF::Tabular
1713
1692
  class Dialect < Metadata
1714
1693
  # Defaults for dialects
1715
1694
  DEFAULTS = {
1716
- commentPrefix: "#".freeze,
1695
+ commentPrefix: false,
1717
1696
  delimiter: ",".freeze,
1718
1697
  doubleQuote: true,
1719
1698
  encoding: "utf-8".freeze,
@@ -1749,7 +1728,7 @@ module RDF::Tabular
1749
1728
  REQUIRED = [].freeze
1750
1729
 
1751
1730
  # Getters and Setters
1752
- PROPERTIES.keys.each do |key|
1731
+ PROPERTIES.each do |key, type|
1753
1732
  define_method(key) do
1754
1733
  object.fetch(key, DEFAULTS[key])
1755
1734
  end
@@ -1772,16 +1751,7 @@ module RDF::Tabular
1772
1751
  valid_natural_language_property?(value)
1773
1752
  end
1774
1753
 
1775
- if invalid && key == :titles
1776
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1777
- object[key] = set_nl(value)
1778
- object.delete(key) if object[key].nil?
1779
- elsif invalid
1780
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1781
- object.delete(key)
1782
- else
1783
- object[key] = value
1784
- end
1754
+ set_property(key, type, value, invalid)
1785
1755
  end
1786
1756
  end
1787
1757
 
@@ -1835,38 +1805,75 @@ module RDF::Tabular
1835
1805
  lang ||= 'und'
1836
1806
 
1837
1807
  # Set encoding on input
1838
- csv = ::CSV.new(input, csv_options)
1839
- (1..skipRows.to_i).each do
1840
- value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
1841
- # Trim value
1842
- value.lstrip! if %w(true start).include?(trim.to_s)
1843
- value.rstrip! if %w(true end).include?(trim.to_s)
1808
+ path = input.base_uri.path rescue ""
1809
+ if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
1810
+ # Input is HTML; use fragment identfier to find table.
1811
+ fragment = RDF::URI(table["url"]).fragment rescue nil
1812
+ tab = begin
1813
+ # Extract with nokogiri
1814
+ require 'nokogiri' unless defined?(:Nokogiri)
1815
+ doc = Nokogiri::HTML.parse(input)
1816
+ doc.search("##{fragment}").first if fragment
1817
+ rescue LoadError
1818
+ # Extract with REXML
1819
+ # FIXME
1820
+ end
1844
1821
 
1845
- value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
1846
- (metadata["rdfs:comment"] ||= []) << value unless value.empty?
1847
- end
1848
- debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
1822
+ raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
1849
1823
 
1850
- (1..headerRowCount).each do
1851
- row_data = Array(csv.shift)
1852
- Array(row_data).each_with_index do |value, index|
1853
- # Skip columns
1854
- skipCols = skipColumns.to_i
1855
- next if index < skipCols
1824
+ # Use rows with <th> to create column titles
1825
+ tab.xpath('.//tr').each do |row|
1826
+ row.xpath('th').map(&:content).each_with_index do |value, index|
1827
+ # Skip columns
1828
+ skipCols = skipColumns.to_i
1829
+ next if index < skipCols
1856
1830
 
1831
+ # Trim value
1832
+ value.lstrip! if %w(true start).include?(trim.to_s)
1833
+ value.rstrip! if %w(true end).include?(trim.to_s)
1834
+
1835
+ # Initialize titles
1836
+ columns = table["tableSchema"]["columns"] ||= []
1837
+ column = columns[index - skipCols] ||= {
1838
+ "titles" => {lang => []},
1839
+ }
1840
+ column["titles"][lang] << value
1841
+ end
1842
+ end
1843
+ else
1844
+ csv = ::CSV.new(input, csv_options)
1845
+ (1..skipRows.to_i).each do
1846
+ value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
1857
1847
  # Trim value
1858
1848
  value.lstrip! if %w(true start).include?(trim.to_s)
1859
1849
  value.rstrip! if %w(true end).include?(trim.to_s)
1860
1850
 
1861
- # Initialize titles
1862
- columns = table["tableSchema"]["columns"] ||= []
1863
- column = columns[index - skipCols] ||= {
1864
- "titles" => {lang => []},
1865
- }
1866
- column["titles"][lang] << value
1851
+ value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
1852
+ (metadata["rdfs:comment"] ||= []) << value unless value.empty?
1853
+ end
1854
+ log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
1855
+
1856
+ (1..headerRowCount).each do
1857
+ row_data = Array(csv.shift)
1858
+ Array(row_data).each_with_index do |value, index|
1859
+ # Skip columns
1860
+ skipCols = skipColumns.to_i
1861
+ next if index < skipCols
1862
+
1863
+ # Trim value
1864
+ value.lstrip! if %w(true start).include?(trim.to_s)
1865
+ value.rstrip! if %w(true end).include?(trim.to_s)
1866
+
1867
+ # Initialize titles
1868
+ columns = table["tableSchema"]["columns"] ||= []
1869
+ column = columns[index - skipCols] ||= {
1870
+ "titles" => {lang => []},
1871
+ }
1872
+ column["titles"][lang] << value
1873
+ end
1867
1874
  end
1868
1875
  end
1869
- debug("embedded_metadata") {"table: #{table.inspect}"}
1876
+ log_debug("embedded_metadata") {"table: #{table.inspect}"}
1870
1877
  input.rewind if input.respond_to?(:rewind)
1871
1878
 
1872
1879
  Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
@@ -1931,12 +1938,7 @@ module RDF::Tabular
1931
1938
  end
1932
1939
  end
1933
1940
 
1934
- if invalid
1935
- warn "#{self.type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1936
- object.delete(key)
1937
- else
1938
- object[key] = value
1939
- end
1941
+ set_property(key, type, value, invalid)
1940
1942
  end
1941
1943
  end
1942
1944
  end
@@ -2056,35 +2058,26 @@ module RDF::Tabular
2056
2058
 
2057
2059
  # create column if necessary
2058
2060
  columns[index - skipColumns] ||=
2059
- Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
2061
+ Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
2060
2062
 
2061
2063
  column = columns[index - skipColumns]
2062
2064
 
2063
2065
  @values << cell = Cell.new(metadata, column, self, value)
2064
2066
 
2065
- datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
2066
- value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
2067
- value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
2067
+ datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
2068
+ value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
2069
+ value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
2068
2070
  # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
2069
2071
  value = column.default || '' if value.empty?
2070
2072
 
2071
2073
  cell_values = column.separator ? value.split(column.separator) : [value]
2072
2074
 
2073
2075
  cell_values = cell_values.map do |v|
2074
- v = v.strip unless %w(string anyAtomicType any).include?(datatype.base)
2076
+ v = v.strip unless %w(string anyAtomicType).include?(datatype.base)
2075
2077
  v = column.default || '' if v.empty?
2076
2078
  if Array(column.null).include?(v)
2077
2079
  nil
2078
2080
  else
2079
- # Trim value
2080
- if %w(string anyAtomicType any).include?(datatype.base)
2081
- v.lstrip! if %w(true start).include?(metadata.dialect.trim.to_s)
2082
- v.rstrip! if %w(true end).include?(metadata.dialect.trim.to_s)
2083
- else
2084
- # unless the datatype is string or anyAtomicType or any, strip leading and trailing whitespace from the string value
2085
- v.strip!
2086
- end
2087
-
2088
2081
  expanded_dt = datatype.id || metadata.context.expand_iri(datatype.base, vocab: true)
2089
2082
  if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
2090
2083
  lit_or_errors
@@ -2127,7 +2120,11 @@ module RDF::Tabular
2127
2120
 
2128
2121
  # Identifier for this row, as an RFC7111 fragment
2129
2122
  # @return [RDF::URI]
2130
- def id; table.url + "#row=#{self.sourceNumber}"; end
2123
+ def id;
2124
+ u = table.url.dup
2125
+ u.fragment = "row=#{self.sourceNumber}"
2126
+ u
2127
+ end
2131
2128
 
2132
2129
  # Return Annotated Row representation
2133
2130
  def to_atd