rdf-tabular 0.3.0 → 0.4.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2feb2648ce7d91d183b562e401c5a95f97c6387d
4
- data.tar.gz: 54f150bbde26f030d759c79016c75369fd09f999
3
+ metadata.gz: b34ec5c872bbf6e8d8f13559b255283cd118cd46
4
+ data.tar.gz: 08ace967385cb72fdc48e48ad434f7e0bd35753d
5
5
  SHA512:
6
- metadata.gz: 484aba808b7d2a448fda9c12cbc14b7f3938b76c16bdde81117401def7e7903e78c23efb9018a0297d9707994678f3c5fe0973ccd9c14c51e5050df08560871c
7
- data.tar.gz: 04259b3a8e056af10efd8924ae7e6deaff7766275ca5b2ad14bb5d1ab27b7eff98f083dfa77cac68ac47ef7b7d0f819c6a13b9ab7dab582a82750f0db5b55e15
6
+ metadata.gz: 55a2305ce14c365631a1f7ad178e4b2c603ef2279c74537cb36f33a47ef81a19b250b1cad64719be5c1921536e06ee2ba4bd2fa2745dff81bfe652ed31ed823d
7
+ data.tar.gz: 9a3b83c57938b94ebf1ab86052a0bccd144c9380394150748a6e580bf480e1bf39f835a9b2ff2633b3d6ca09823b782c945f3a40fbeec653d6c0ae61218805af
data/README.md CHANGED
@@ -13,6 +13,7 @@ RDF::Tabular parses CSV or other Tabular Data into [RDF][] and JSON using the [W
13
13
 
14
14
  * Parses [number patterns](http://www.unicode.org/reports/tr35/tr35-39/tr35-numbers.html#Number_Patterns) from [UAX35][]
15
15
  * Parses [date formats](http://www.unicode.org/reports/tr35/tr35-39/tr35-dates.html#Contents) from [UAX35][]
16
+ * Returns detailed errors and warnings using optional `Logger`.
16
17
 
17
18
  ## Installation
18
19
  Install with `gem install rdf-tabular`
@@ -247,8 +248,8 @@ Full documentation available on [RubyDoc](http://rubydoc.info/gems/rdf-tabular/f
247
248
  * {RDF::Tabular::Reader}
248
249
 
249
250
  ## Dependencies
250
- * [Ruby](http://ruby-lang.org/) (>= 2.0.0)
251
- * [RDF.rb](http://rubygems.org/gems/rdf) (>= 1.0)
251
+ * [Ruby](http://ruby-lang.org/) (>= 2.0)
252
+ * [RDF.rb](http://rubygems.org/gems/rdf) (>= 2.0)
252
253
  * [JSON](https://rubygems.org/gems/json) (>= 1.5)
253
254
 
254
255
  ## Installation
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.0
1
+ 0.4.0.beta2
data/lib/rdf/tabular.rb CHANGED
@@ -11,7 +11,6 @@ module RDF
11
11
  # @author [Gregg Kellogg](http://greggkellogg.net/)
12
12
  module Tabular
13
13
  require 'rdf/tabular/format'
14
- require 'rdf/tabular/utils'
15
14
  autoload :Column, 'rdf/tabular/metadata'
16
15
  autoload :CSVW, 'rdf/tabular/csvw'
17
16
  autoload :Dialect, 'rdf/tabular/metadata'
@@ -46,5 +46,21 @@ module RDF::Tabular
46
46
  def self.detect(sample)
47
47
  !!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
48
48
  end
49
+
50
+ ##
51
+ # Hash of CLI commands appropriate for this format
52
+ # @return [Hash{Symbol => Lambda(Array, Hash)}]
53
+ def self.cli_commands
54
+ {
55
+ :"tabular-json" => ->(argv, opts) do
56
+ raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
57
+ out = opts[:output] || $stdout
58
+ out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
59
+ RDF::CLI.parse(argv, opts) do |reader|
60
+ out.puts reader.to_json
61
+ end
62
+ end
63
+ }
64
+ end
49
65
  end
50
66
  end
@@ -19,16 +19,12 @@ require 'yaml' # used by BCP47, which should have required it.
19
19
  # @author [Gregg Kellogg](http://greggkellogg.net/)
20
20
  module RDF::Tabular
21
21
  class Metadata
22
- include Utils
22
+ include RDF::Util::Logger
23
23
 
24
24
  # Hash representation
25
25
  # @return [Hash<Symbol,Object>]
26
26
  attr_accessor :object
27
27
 
28
- # Warnings detected on initialization or when setting properties
29
- # @return [Array<String>]
30
- attr_accessor :warnings
31
-
32
28
  # Inheritect properties, valid for all types
33
29
  INHERITED_PROPERTIES = {
34
30
  aboutUrl: :uri_template,
@@ -179,7 +175,6 @@ module RDF::Tabular
179
175
  # @return [Metadata]
180
176
  def self.for_input(input, options = {})
181
177
  base = options[:base]
182
- warnings = options.fetch(:warnings, [])
183
178
 
184
179
  # Use user metadata, if provided
185
180
  metadata = case options[:metadata]
@@ -202,10 +197,7 @@ module RDF::Tabular
202
197
  if md.describes_file?(base)
203
198
  metadata = md
204
199
  else
205
- warnings << "Found metadata at #{link_loc}, which does not describe #{base}, ignoring"
206
- if options[:validate] && !options[:warnings]
207
- $stderr.puts "Warnings: #{warnings.join("\n")}"
208
- end
200
+ log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
209
201
  end
210
202
  end
211
203
  end
@@ -214,12 +206,12 @@ module RDF::Tabular
214
206
  # If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
215
207
  if !metadata && base
216
208
  templates = site_wide_config(base)
217
- debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
209
+ log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
218
210
  locs = templates.map do |template|
219
211
  t = Addressable::Template.new(template)
220
212
  RDF::URI(base).join(t.expand(url: base).to_s)
221
213
  end
222
- debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
214
+ log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
223
215
 
224
216
  locs.each do |loc|
225
217
  metadata ||= begin
@@ -230,15 +222,12 @@ module RDF::Tabular
230
222
  if md.describes_file?(base)
231
223
  md
232
224
  else
233
- warnings << "Found metadata at #{loc}, which does not describe #{base}, ignoring"
234
- if options[:validate] && !options[:warnings]
235
- $stderr.puts "Warnings: #{warnings.join("\n")}"
236
- end
225
+ log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
237
226
  nil
238
227
  end
239
228
  end
240
229
  rescue IOError
241
- debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
230
+ log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
242
231
  nil
243
232
  end
244
233
  end
@@ -331,7 +320,6 @@ module RDF::Tabular
331
320
  # @return [Metadata]
332
321
  def initialize(input, options = {})
333
322
  @options = options.dup
334
- @options[:depth] ||= 0
335
323
 
336
324
  # Parent of this Metadata, if any
337
325
  @parent = @options[:parent]
@@ -344,14 +332,14 @@ module RDF::Tabular
344
332
 
345
333
  @context = case input['@context']
346
334
  when Array
347
- warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
335
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
348
336
  c = LOCAL_CONTEXT.dup
349
337
  c.base = RDF::URI(opt_base)
350
338
  obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
351
339
  raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
352
340
  c.parse(obj)
353
341
  when Hash
354
- warn "Context missing required value 'http://www.w3.org/ns/csvw'"
342
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
355
343
  c = LOCAL_CONTEXT.dup
356
344
  c.base = RDF::URI(opt_base)
357
345
  c.parse(input['@context'])
@@ -362,7 +350,7 @@ module RDF::Tabular
362
350
  c
363
351
  else
364
352
  if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
365
- warn "Context missing required value 'http://www.w3.org/ns/csvw'"
353
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
366
354
  LOCAL_CONTEXT.dup
367
355
  c = LOCAL_CONTEXT.dup
368
356
  c.base = RDF::URI(opt_base)
@@ -375,7 +363,7 @@ module RDF::Tabular
375
363
  @options[:base] = @context ? @context.base : RDF::URI(opt_base)
376
364
 
377
365
  if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
378
- warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
366
+ log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
379
367
  @context.default_language = nil
380
368
  end
381
369
 
@@ -385,7 +373,7 @@ module RDF::Tabular
385
373
 
386
374
  @object = {}
387
375
 
388
- depth do
376
+ log_depth do
389
377
  # Input was parsed in .new
390
378
  # Metadata is object with symbolic keys
391
379
  input.each do |key, value|
@@ -401,7 +389,7 @@ module RDF::Tabular
401
389
  object[:@id] = if value.is_a?(String)
402
390
  value
403
391
  else
404
- warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
392
+ log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
405
393
  "" # Default value
406
394
  end
407
395
  @id = @options[:base].join(object[:@id])
@@ -426,9 +414,9 @@ module RDF::Tabular
426
414
  end
427
415
 
428
416
  if reason
429
- debug("md#initialize") {reason}
430
- debug("md#initialize") {"filenames: #{filenames}"}
431
- debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
417
+ log_debug("md#initialize") {reason}
418
+ log_debug("md#initialize") {"filenames: #{filenames}"}
419
+ log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
432
420
  end
433
421
  end
434
422
 
@@ -487,7 +475,7 @@ module RDF::Tabular
487
475
  when Schema
488
476
  value
489
477
  else
490
- warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
478
+ log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
491
479
  Schema.new({}, @options.merge(parent: self, context: nil))
492
480
  end
493
481
  end
@@ -534,7 +522,7 @@ module RDF::Tabular
534
522
  when Dialect
535
523
  value
536
524
  else
537
- warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
525
+ log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
538
526
  nil
539
527
  end
540
528
  end
@@ -544,15 +532,15 @@ module RDF::Tabular
544
532
  # @raise [Error] if datatype is not valid
545
533
  def datatype=(value)
546
534
  val = case value
547
- when Hash then Datatype.new(value, parent: self)
548
- else Datatype.new({base: value}, parent: self)
535
+ when Hash then Datatype.new(value, @options.merge(parent: self))
536
+ else Datatype.new({base: value}, @options.merge(parent: self))
549
537
  end
550
538
 
551
539
  if val.valid? || value.is_a?(Hash)
552
540
  # Set it if it was specified as an object, which may cause validation errors later
553
541
  object[:datatype] = val
554
542
  else
555
- warn "#{type} has invalid property 'datatype': expected a built-in or an object"
543
+ log_warn "#{type} has invalid property 'datatype': expected a built-in or an object"
556
544
  end
557
545
  end
558
546
 
@@ -567,40 +555,20 @@ module RDF::Tabular
567
555
  ##
568
556
  # Do we have valid metadata?
569
557
  def valid?
570
- validate!
571
- true
572
- rescue
573
- false
574
- end
575
-
576
- ##
577
- # Validation errors
578
- # @return [Array<String>]
579
- def errors
580
- validate! && []
581
- rescue Error => e
582
- e.message.split("\n")
558
+ validate # Possibly re-validate
559
+ !log_statistics[:error]
583
560
  end
584
561
 
585
- ##
586
- # Validation warnings, available only after validating or finding warnings
587
- # @return [Array<String>]
588
- def warnings
589
- ((@warnings || []) + object.
590
- values.
591
- flatten.
592
- select {|v| v.is_a?(Metadata)}.
593
- map(&:warnings).
594
- flatten).compact.uniq
562
+ def validate!
563
+ raise Error, "Metadata error" unless valid?
595
564
  end
596
565
 
597
566
  ##
598
567
  # Validate metadata, raising an error containing all errors detected during validation
599
568
  # @raise [Error] Raise error if metadata has any unexpected properties
600
569
  # @return [self]
601
- def validate!
570
+ def validate
602
571
  expected_props, required_props = @properties.keys, @required
603
- errors = []
604
572
 
605
573
  unless is_a?(Dialect) || is_a?(Transformation)
606
574
  expected_props = expected_props + INHERITED_PROPERTIES.keys
@@ -609,10 +577,10 @@ module RDF::Tabular
609
577
  # It has only expected properties (exclude metadata)
610
578
  check_keys = object.keys - [:"@id", :"@context"]
611
579
  check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
612
- warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
580
+ log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
613
581
 
614
582
  # It has required properties
615
- errors << "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
583
+ log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
616
584
 
617
585
  self.normalize!
618
586
 
@@ -621,55 +589,49 @@ module RDF::Tabular
621
589
  value = object[key]
622
590
  case key
623
591
  when :base
624
- errors << "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
592
+ log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
625
593
  when :columns
626
- value.each do |v|
627
- begin
628
- v.validate!
629
- rescue Error => e
630
- errors << e.message
631
- end
594
+ value.each do |col|
595
+ col.validate
596
+ log_statistics.merge!(col.log_statistics)
632
597
  end
633
598
  column_names = value.map(&:name)
634
- errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
599
+ log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
635
600
  when :datatype, :dialect, :tables, :tableSchema, :transformations
636
601
  Array(value).each do |t|
637
602
  # Make sure value is of appropriate class
638
603
  if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
639
- begin
640
- t.validate!
641
- rescue Error => e
642
- errors << e.message
643
- end
604
+ t.validate
605
+ log_statistics.merge!(t.log_statistics)
644
606
  else
645
- errors << "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
607
+ log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
646
608
  end
647
609
  end
648
- errors << "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
610
+ log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
649
611
  when :foreignKeys
650
612
  # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
651
613
  value.each do |fk|
652
614
  columnReference, reference = fk['columnReference'], fk['reference']
653
- errors << "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
654
- errors << "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
615
+ log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
616
+ log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
655
617
 
656
618
  # Verify that columns exist in this schema
657
- errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
619
+ log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
658
620
  Array(columnReference).each do |k|
659
- errors << "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
621
+ log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
660
622
  end
661
623
 
662
624
  if reference.is_a?(Hash)
663
- errors << "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
625
+ log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
664
626
  ref_cols = reference['columnReference']
665
627
  schema = if reference.has_key?('resource')
666
628
  if reference.has_key?('schemaReference')
667
- errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
629
+ log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
668
630
  end
669
631
  # resource is the URL of a Table in the TableGroup
670
632
  ref = context.base.join(reference['resource']).to_s
671
- table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
672
- errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
633
+ table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref}
634
+ log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
673
635
  table.tableSchema if table
674
636
  elsif reference.has_key?('schemaReference')
675
637
  # resource is the @id of a Schema in the TableGroup
@@ -677,25 +639,25 @@ module RDF::Tabular
677
639
  tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
678
640
  case tables.length
679
641
  when 0
680
- errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
642
+ log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
681
643
  nil
682
644
  when 1
683
645
  tables.first.tableSchema
684
646
  else
685
- errors << "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
647
+ log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
686
648
  nil
687
649
  end
688
650
  end
689
651
 
690
652
  if schema
691
653
  # ref_cols must exist in schema
692
- errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
654
+ log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
693
655
  Array(ref_cols).each do |k|
694
- errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
656
+ log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
695
657
  end
696
658
  end
697
659
  else
698
- errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
660
+ log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
699
661
  end
700
662
  end
701
663
  when :format
@@ -707,7 +669,7 @@ module RDF::Tabular
707
669
  nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
708
670
  unsignedLong unsignedInt unsignedShort unsignedByte
709
671
  ).include?(self.base)
710
- warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
672
+ log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
711
673
  object.delete(:format) # act as if not set
712
674
  end
713
675
 
@@ -715,14 +677,14 @@ module RDF::Tabular
715
677
  begin
716
678
  parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
717
679
  rescue ArgumentError => e
718
- warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
719
- object[:format].delete("pattern") # act as if not set
680
+ log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
681
+ object[:format].delete("pattern") if object[:format] # act as if not set
720
682
  end
721
683
  else
722
684
  case self.base
723
685
  when 'boolean'
724
686
  unless value.split("|").length == 2
725
- warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
687
+ log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
726
688
  object.delete(:format) # act as if not set
727
689
  end
728
690
  when :decimal, :integer, :long, :int, :short, :byte,
@@ -733,7 +695,7 @@ module RDF::Tabular
733
695
  begin
734
696
  parse_uax35_number(value, nil)
735
697
  rescue ArgumentError => e
736
- warn "#{type} has invalid property '#{key}': #{e.message}"
698
+ log_warn "#{type} has invalid property '#{key}': #{e.message}"
737
699
  object.delete(:format) # act as if not set
738
700
  end
739
701
  when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
@@ -741,7 +703,7 @@ module RDF::Tabular
741
703
  begin
742
704
  parse_uax35_date(value, nil)
743
705
  rescue ArgumentError => e
744
- warn "#{type} has invalid property '#{key}': #{e.message}"
706
+ log_warn "#{type} has invalid property '#{key}': #{e.message}"
745
707
  object.delete(:format) # act as if not set
746
708
  end
747
709
  else
@@ -749,7 +711,7 @@ module RDF::Tabular
749
711
  begin
750
712
  Regexp.compile(value)
751
713
  rescue
752
- warn "#{type} has invalid property '#{key}': #{$!.message}"
714
+ log_warn "#{type} has invalid property '#{key}': #{$!.message}"
753
715
  object.delete(:format) # act as if not set
754
716
  end
755
717
  end
@@ -760,20 +722,20 @@ module RDF::Tabular
760
722
  if object[:length]
761
723
  case key
762
724
  when :minLength
763
- errors << "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
725
+ log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
764
726
  when :maxLength
765
- errors << "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
727
+ log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
766
728
  end
767
729
  end
768
730
 
769
731
  # Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
770
732
  if key == :maxLength && object[:minLength]
771
- errors << "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
733
+ log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
772
734
  end
773
735
 
774
736
  # Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
775
737
  unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
776
- errors << "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
738
+ log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
777
739
  end
778
740
  when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
779
741
  case self.base
@@ -781,46 +743,46 @@ module RDF::Tabular
781
743
  'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
782
744
  'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
783
745
  'duration', 'dayTimeDuration', 'yearMonthDuration'
784
- errors << "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
746
+ log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
785
747
 
786
748
  case key
787
749
  when :minInclusive
788
750
  # Applications MUST raise an error if both minInclusive and minExclusive are specified
789
- errors << "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
751
+ log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
790
752
 
791
753
  # Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
792
- errors << "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
754
+ log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
793
755
 
794
756
  # Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
795
- errors << "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
757
+ log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
796
758
  when :maxInclusive
797
759
  # Applications MUST raise an error if both maxInclusive and maxExclusive are specified
798
- errors << "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
760
+ log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
799
761
  when :minExclusive
800
762
  # Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
801
- errors << "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
763
+ log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
802
764
 
803
765
  # Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
804
- errors << "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
766
+ log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
805
767
  end
806
768
  else
807
- errors << "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
769
+ log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
808
770
  end
809
771
  when :notes
810
772
  unless value.is_a?(Hash) || value.is_a?(Array)
811
- errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
773
+ log_error "#{type} has invalid property '#{key}': #{value}, Object or Array"
812
774
  end
813
775
  begin
814
776
  normalize_jsonld(key, value)
815
777
  rescue Error => e
816
- errors << "#{type} has invalid content '#{key}': #{e.message}"
778
+ log_error "#{type} has invalid content '#{key}': #{e.message}"
817
779
  end
818
780
  when :primaryKey, :rowTitles
819
781
  # A column reference property that holds either a single reference to a column description object or an array of references.
820
782
  "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
821
783
  Array(value).each do |k|
822
784
  unless self.columns.any? {|c| c[:name] == k}
823
- warn "#{type} has invalid property '#{key}': column reference not found #{k}"
785
+ log_warn "#{type} has invalid property '#{key}': column reference not found #{k}"
824
786
  object.delete(key)
825
787
  end
826
788
  end
@@ -829,34 +791,33 @@ module RDF::Tabular
829
791
  when :@id
830
792
  # Must not be a BNode
831
793
  if value.to_s.start_with?("_:")
832
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
794
+ log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
833
795
  end
834
796
 
835
797
  # Datatype @id MUST NOT be the URL of a built-in type
836
798
  if self.is_a?(Datatype) && DATATYPES.values.include?(value)
837
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
799
+ log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
838
800
  end
839
801
  when :@type
840
802
  # Must not be a BNode
841
803
  if value.to_s.start_with?("_:")
842
- errors << "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
804
+ log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
843
805
  end
844
806
  case type
845
807
  when :Transformation
846
- errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
808
+ log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
847
809
  else
848
- errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
810
+ log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
849
811
  end
850
812
  when ->(k) {key.to_s.include?(':')}
851
813
  begin
852
814
  normalize_jsonld(key, value)
853
815
  rescue Error => e
854
- errors << "#{type} has invalid content '#{key}': #{e.message}"
816
+ log_error "#{type} has invalid content '#{key}': #{e.message}"
855
817
  end
856
818
  end
857
819
  end
858
820
 
859
- raise Error, errors.join("\n") unless errors.empty?
860
821
  self
861
822
  end
862
823
 
@@ -885,10 +846,37 @@ module RDF::Tabular
885
846
  # @param [:read] input
886
847
  # @yield [Row]
887
848
  def each_row(input)
888
- csv = ::CSV.new(input, csv_options)
889
- # Skip skipRows and headerRowCount
890
- number, skipped = 0, (dialect.skipRows.to_i + dialect.headerRowCount)
891
- (1..skipped).each {csv.shift}
849
+ csv, number, skipped = nil, 0, 0
850
+ path = input.base_uri.path rescue ""
851
+ if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
852
+ # Input is HTML; use fragment identfier to find table.
853
+ fragment = RDF::URI(self.url).fragment rescue nil
854
+ tab = begin
855
+ # Extract with nokogiri
856
+ require 'nokogiri' unless defined?(:Nokogiri)
857
+ doc = Nokogiri::HTML.parse(input)
858
+ doc.search("##{fragment}").first if fragment
859
+ rescue LoadError
860
+ # Extract with REXML
861
+ # FIXME
862
+ end
863
+
864
+ raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
865
+
866
+ # Use rows with <td> to create column data
867
+ csv = []
868
+ number = 0
869
+ tab.xpath('.//tr').map do |row|
870
+ number += 1 if row.xpath('th')
871
+ data = row.xpath('td').map(&:content)
872
+ csv << data unless data.empty?
873
+ end
874
+ else
875
+ csv = ::CSV.new(input, csv_options)
876
+ # Skip skipRows and headerRowCount
877
+ skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
878
+ (1..skipped).each {csv.shift}
879
+ end
892
880
  csv.each do |data|
893
881
  # Check for embedded comments
894
882
  if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
@@ -934,17 +922,17 @@ module RDF::Tabular
934
922
  if value['@value']
935
923
  dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
936
924
  lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
937
- block.call(RDF::Statement.new(subject, property, lit))
925
+ block.call(RDF::Statement(subject, property, lit))
938
926
  else
939
927
  # value MUST be a node object, establish a new subject from `@id`
940
928
  s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
941
929
 
942
930
  # Generate a triple
943
- block.call(RDF::Statement.new(subject, property, s2))
931
+ block.call(RDF::Statement(subject, property, s2))
944
932
 
945
933
  # Generate types
946
934
  Array(value['@type']).each do |t|
947
- block.call(RDF::Statement.new(s2, RDF.type, context.expand_iri(t, vocab: true)))
935
+ block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true)))
948
936
  end
949
937
 
950
938
  # Generate triples for all other properties
@@ -956,7 +944,7 @@ module RDF::Tabular
956
944
  else
957
945
  # Value is a primitive JSON value
958
946
  lit = RDF::Literal(value)
959
- block.call(RDF::Statement.new(subject, property, RDF::Literal(value)))
947
+ block.call(RDF::Statement(subject, property, RDF::Literal(value)))
960
948
  end
961
949
  else
962
950
  case value
@@ -1011,7 +999,7 @@ module RDF::Tabular
1011
999
  if @options[:validate]
1012
1000
  raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1013
1001
  else
1014
- warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1002
+ log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1015
1003
  end
1016
1004
  end
1017
1005
  else
@@ -1020,7 +1008,7 @@ module RDF::Tabular
1020
1008
  if @options[:validate]
1021
1009
  raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1022
1010
  else
1023
- warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1011
+ log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1024
1012
  end
1025
1013
  end
1026
1014
 
@@ -1033,7 +1021,7 @@ module RDF::Tabular
1033
1021
  if @options[:validate]
1034
1022
  raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1035
1023
  else
1036
- warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1024
+ log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1037
1025
 
1038
1026
  # If present, a virtual column MUST appear after all other non-virtual column definitions
1039
1027
  raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
@@ -1048,13 +1036,13 @@ module RDF::Tabular
1048
1036
  end
1049
1037
  index = 0
1050
1038
  object_columns.all? do |cb|
1051
- ca = non_virtual_columns[index] || Column.new({})
1039
+ ca = non_virtual_columns[index] || Column.new({}, @options)
1052
1040
  ta = ca.titles || {}
1053
1041
  tb = cb.titles || {}
1054
1042
  if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
1055
1043
  true
1056
1044
  elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
1057
- raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}" unless ca.name == cb.name
1045
+ raise Error, "Columns don't match on name: #{ca.name}, #{cb.name}" unless ca.name == cb.name
1058
1046
  elsif @options[:validate] || !ta.empty? && !tb.empty?
1059
1047
  # If validating, column compatibility requires strict match between titles
1060
1048
  titles_match = case
@@ -1078,10 +1066,10 @@ module RDF::Tabular
1078
1066
  true
1079
1067
  elsif !@options[:validate]
1080
1068
  # If not validating, columns don't match, but processing continues
1081
- warn "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1069
+ log_warn "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
1082
1070
  true
1083
1071
  else
1084
- raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1072
+ raise Error, "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
1085
1073
  end
1086
1074
  end
1087
1075
  index += 1
@@ -1175,13 +1163,13 @@ module RDF::Tabular
1175
1163
  when Hash
1176
1164
  if value['@value']
1177
1165
  if !(value.keys.sort - %w(@value @type @language)).empty?
1178
- raise Error, "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
1166
+ log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
1179
1167
  elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
1180
- raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
1168
+ log_error "Value object may not contain both @type and @language: #{value.to_json}"
1181
1169
  elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
1182
- raise Error, "Value object with @language must use valid language: #{value.to_json}"
1170
+ log_error "Value object with @language must use valid language: #{value.to_json}"
1183
1171
  elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
1184
- raise Error, "Value object with @type must defined type: #{value.to_json}"
1172
+ log_error "Value object with @type must defined type: #{value.to_json}"
1185
1173
  end
1186
1174
  value
1187
1175
  else
@@ -1190,16 +1178,16 @@ module RDF::Tabular
1190
1178
  case k
1191
1179
  when "@id"
1192
1180
  nv[k] = context.expand_iri(v, documentRelative: true).to_s
1193
- raise Error, "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
1181
+ log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
1194
1182
  when "@type"
1195
1183
  Array(v).each do |vv|
1196
1184
  # Validate that all type values transform to absolute IRIs
1197
1185
  resource = context.expand_iri(vv, vocab: true)
1198
- raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
1186
+ log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
1199
1187
  end
1200
1188
  nv[k] = v
1201
1189
  when /^(@|_:)/
1202
- raise Error, "Invalid use of #{k} in JSON-LD content"
1190
+ log_error "Invalid use of #{k} in JSON-LD content"
1203
1191
  else
1204
1192
  nv[k] = normalize_jsonld(k, v)
1205
1193
  end
@@ -1212,15 +1200,9 @@ module RDF::Tabular
1212
1200
  end
1213
1201
  protected
1214
1202
 
1215
- # Add a warning on this object
1216
- def warn(string)
1217
- debug("warn: #{string}")
1218
- (@warnings ||= []) << string
1219
- end
1220
-
1221
1203
  def set_property(key, type, value, invalid)
1222
1204
  if invalid
1223
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1205
+ log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1224
1206
  case type
1225
1207
  when :link, :uri_template
1226
1208
  object[key] = ""
@@ -1264,12 +1246,12 @@ module RDF::Tabular
1264
1246
  end
1265
1247
  end
1266
1248
  else
1267
- warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1249
+ log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1268
1250
  []
1269
1251
  end
1270
1252
 
1271
1253
  unless object[key].all? {|v| v.is_a?(klass)}
1272
- warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1254
+ log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1273
1255
  # Remove elements that aren't of the right types
1274
1256
  object[key] = object[key].select! {|v| v.is_a?(klass)}
1275
1257
  end
@@ -1298,14 +1280,13 @@ module RDF::Tabular
1298
1280
  end
1299
1281
 
1300
1282
  class DebugContext
1301
- include Utils
1302
- def initialize(*args, &block)
1303
- @options = {}
1304
- debug(*args, &block)
1305
- end
1283
+ include RDF::Util::Logger
1284
+ end
1285
+ def self.log_debug(*args, &block)
1286
+ DebugContext.new.log_debug(*args, &block)
1306
1287
  end
1307
- def self.debug(*args, &block)
1308
- DebugContext.new(*args, &block)
1288
+ def self.log_warn(*args)
1289
+ DebugContext.new.log_warn(*args)
1309
1290
  end
1310
1291
  end
1311
1292
 
@@ -1453,7 +1434,7 @@ module RDF::Tabular
1453
1434
  content['@context'] = object.delete(:@context) if object[:@context]
1454
1435
  ctx = @context
1455
1436
  remove_instance_variable(:@context) if instance_variables.include?(:@context)
1456
- tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
1437
+ tg = TableGroup.new(content, @options.merge(context: ctx, filenames: @filenames, base: base))
1457
1438
  @parent = tg # Link from parent
1458
1439
  tg
1459
1440
  end
@@ -1464,8 +1445,7 @@ module RDF::Tabular
1464
1445
  "@id" => (id.to_s if id),
1465
1446
  "@type" => "AnnotatedTable",
1466
1447
  "url" => self.url.to_s,
1467
- "columns" => Array(tableSchema ? tableSchema.columns : []).map(&:to_atd),
1468
- "rows" => []
1448
+ "tableSchema" => (tableSchema.to_atd if tableSchema),
1469
1449
  }) do |memo, (k, v)|
1470
1450
  memo[k.to_s] ||= v
1471
1451
  memo
@@ -1519,12 +1499,12 @@ module RDF::Tabular
1519
1499
  end
1520
1500
  end
1521
1501
  else
1522
- warn "#{type} has invalid property 'columns': expected array of Column"
1502
+ log_warn "#{type} has invalid property 'columns': expected array of Column"
1523
1503
  []
1524
1504
  end
1525
1505
 
1526
1506
  unless object[:columns].all? {|v| v.is_a?(Column)}
1527
- warn "#{type} has invalid property 'columns': expected array of Column"
1507
+ log_warn "#{type} has invalid property 'columns': expected array of Column"
1528
1508
  # Remove elements that aren't of the right types
1529
1509
  object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
1530
1510
  end
@@ -1534,12 +1514,12 @@ module RDF::Tabular
1534
1514
  object[:foreignKeys] = case value
1535
1515
  when Array then value
1536
1516
  else
1537
- warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1517
+ log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1538
1518
  []
1539
1519
  end
1540
1520
 
1541
1521
  unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
1542
- warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1522
+ log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1543
1523
  # Remove elements that aren't of the right types
1544
1524
  object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
1545
1525
  end
@@ -1562,6 +1542,18 @@ module RDF::Tabular
1562
1542
  end
1563
1543
  end
1564
1544
  end
1545
+
1546
+ # Return Annotated Table representation
1547
+ def to_atd
1548
+ object.inject({
1549
+ "@id" => (id.to_s if id),
1550
+ "@type" => "Schema",
1551
+ "columns" => Array(columns).map(&:to_atd),
1552
+ }) do |memo, (k, v)|
1553
+ memo[k.to_s] ||= v
1554
+ memo
1555
+ end.delete_if {|k,v| v.nil? || v.is_a?(Metadata)}
1556
+ end
1565
1557
  end
1566
1558
 
1567
1559
  class Column < Metadata
@@ -1650,7 +1642,6 @@ module RDF::Tabular
1650
1642
  "table" => (table.id.to_s if table.id),
1651
1643
  "number" => self.number,
1652
1644
  "sourceNumber" => self.sourceNumber,
1653
- "cells" => [],
1654
1645
  "virtual" => self.virtual,
1655
1646
  "name" => self.name,
1656
1647
  "titles" => self.titles
@@ -1701,7 +1692,7 @@ module RDF::Tabular
1701
1692
  class Dialect < Metadata
1702
1693
  # Defaults for dialects
1703
1694
  DEFAULTS = {
1704
- commentPrefix: "#".freeze,
1695
+ commentPrefix: false,
1705
1696
  delimiter: ",".freeze,
1706
1697
  doubleQuote: true,
1707
1698
  encoding: "utf-8".freeze,
@@ -1814,38 +1805,75 @@ module RDF::Tabular
1814
1805
  lang ||= 'und'
1815
1806
 
1816
1807
  # Set encoding on input
1817
- csv = ::CSV.new(input, csv_options)
1818
- (1..skipRows.to_i).each do
1819
- value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
1820
- # Trim value
1821
- value.lstrip! if %w(true start).include?(trim.to_s)
1822
- value.rstrip! if %w(true end).include?(trim.to_s)
1823
-
1824
- value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
1825
- (metadata["rdfs:comment"] ||= []) << value unless value.empty?
1826
- end
1827
- debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
1828
-
1829
- (1..headerRowCount).each do
1830
- row_data = Array(csv.shift)
1831
- Array(row_data).each_with_index do |value, index|
1832
- # Skip columns
1833
- skipCols = skipColumns.to_i
1834
- next if index < skipCols
1808
+ path = input.base_uri.path rescue ""
1809
+ if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
1810
+ # Input is HTML; use fragment identfier to find table.
1811
+ fragment = RDF::URI(table["url"]).fragment rescue nil
1812
+ tab = begin
1813
+ # Extract with nokogiri
1814
+ require 'nokogiri' unless defined?(:Nokogiri)
1815
+ doc = Nokogiri::HTML.parse(input)
1816
+ doc.search("##{fragment}").first if fragment
1817
+ rescue LoadError
1818
+ # Extract with REXML
1819
+ # FIXME
1820
+ end
1835
1821
 
1822
+ raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
1823
+
1824
+ # Use rows with <th> to create column titles
1825
+ tab.xpath('.//tr').each do |row|
1826
+ row.xpath('th').map(&:content).each_with_index do |value, index|
1827
+ # Skip columns
1828
+ skipCols = skipColumns.to_i
1829
+ next if index < skipCols
1830
+
1831
+ # Trim value
1832
+ value.lstrip! if %w(true start).include?(trim.to_s)
1833
+ value.rstrip! if %w(true end).include?(trim.to_s)
1834
+
1835
+ # Initialize titles
1836
+ columns = table["tableSchema"]["columns"] ||= []
1837
+ column = columns[index - skipCols] ||= {
1838
+ "titles" => {lang => []},
1839
+ }
1840
+ column["titles"][lang] << value
1841
+ end
1842
+ end
1843
+ else
1844
+ csv = ::CSV.new(input, csv_options)
1845
+ (1..skipRows.to_i).each do
1846
+ value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
1836
1847
  # Trim value
1837
1848
  value.lstrip! if %w(true start).include?(trim.to_s)
1838
1849
  value.rstrip! if %w(true end).include?(trim.to_s)
1839
1850
 
1840
- # Initialize titles
1841
- columns = table["tableSchema"]["columns"] ||= []
1842
- column = columns[index - skipCols] ||= {
1843
- "titles" => {lang => []},
1844
- }
1845
- column["titles"][lang] << value
1851
+ value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
1852
+ (metadata["rdfs:comment"] ||= []) << value unless value.empty?
1853
+ end
1854
+ log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
1855
+
1856
+ (1..headerRowCount).each do
1857
+ row_data = Array(csv.shift)
1858
+ Array(row_data).each_with_index do |value, index|
1859
+ # Skip columns
1860
+ skipCols = skipColumns.to_i
1861
+ next if index < skipCols
1862
+
1863
+ # Trim value
1864
+ value.lstrip! if %w(true start).include?(trim.to_s)
1865
+ value.rstrip! if %w(true end).include?(trim.to_s)
1866
+
1867
+ # Initialize titles
1868
+ columns = table["tableSchema"]["columns"] ||= []
1869
+ column = columns[index - skipCols] ||= {
1870
+ "titles" => {lang => []},
1871
+ }
1872
+ column["titles"][lang] << value
1873
+ end
1846
1874
  end
1847
1875
  end
1848
- debug("embedded_metadata") {"table: #{table.inspect}"}
1876
+ log_debug("embedded_metadata") {"table: #{table.inspect}"}
1849
1877
  input.rewind if input.respond_to?(:rewind)
1850
1878
 
1851
1879
  Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
@@ -2030,13 +2058,13 @@ module RDF::Tabular
2030
2058
 
2031
2059
  # create column if necessary
2032
2060
  columns[index - skipColumns] ||=
2033
- Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
2061
+ Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
2034
2062
 
2035
2063
  column = columns[index - skipColumns]
2036
2064
 
2037
2065
  @values << cell = Cell.new(metadata, column, self, value)
2038
2066
 
2039
- datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
2067
+ datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
2040
2068
  value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
2041
2069
  value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
2042
2070
  # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
@@ -2092,7 +2120,11 @@ module RDF::Tabular
2092
2120
 
2093
2121
  # Identifier for this row, as an RFC7111 fragment
2094
2122
  # @return [RDF::URI]
2095
- def id; table.url + "#row=#{self.sourceNumber}"; end
2123
+ def id;
2124
+ u = table.url.dup
2125
+ u.fragment = "row=#{self.sourceNumber}"
2126
+ u
2127
+ end
2096
2128
 
2097
2129
  # Return Annotated Row representation
2098
2130
  def to_atd