rdf-tabular 0.3.0 → 0.4.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2feb2648ce7d91d183b562e401c5a95f97c6387d
4
- data.tar.gz: 54f150bbde26f030d759c79016c75369fd09f999
3
+ metadata.gz: b34ec5c872bbf6e8d8f13559b255283cd118cd46
4
+ data.tar.gz: 08ace967385cb72fdc48e48ad434f7e0bd35753d
5
5
  SHA512:
6
- metadata.gz: 484aba808b7d2a448fda9c12cbc14b7f3938b76c16bdde81117401def7e7903e78c23efb9018a0297d9707994678f3c5fe0973ccd9c14c51e5050df08560871c
7
- data.tar.gz: 04259b3a8e056af10efd8924ae7e6deaff7766275ca5b2ad14bb5d1ab27b7eff98f083dfa77cac68ac47ef7b7d0f819c6a13b9ab7dab582a82750f0db5b55e15
6
+ metadata.gz: 55a2305ce14c365631a1f7ad178e4b2c603ef2279c74537cb36f33a47ef81a19b250b1cad64719be5c1921536e06ee2ba4bd2fa2745dff81bfe652ed31ed823d
7
+ data.tar.gz: 9a3b83c57938b94ebf1ab86052a0bccd144c9380394150748a6e580bf480e1bf39f835a9b2ff2633b3d6ca09823b782c945f3a40fbeec653d6c0ae61218805af
data/README.md CHANGED
@@ -13,6 +13,7 @@ RDF::Tabular parses CSV or other Tabular Data into [RDF][] and JSON using the [W
13
13
 
14
14
  * Parses [number patterns](http://www.unicode.org/reports/tr35/tr35-39/tr35-numbers.html#Number_Patterns) from [UAX35][]
15
15
  * Parses [date formats](http://www.unicode.org/reports/tr35/tr35-39/tr35-dates.html#Contents) from [UAX35][]
16
+ * Returns detailed errors and warnings using optional `Logger`.
16
17
 
17
18
  ## Installation
18
19
  Install with `gem install rdf-tabular`
@@ -247,8 +248,8 @@ Full documentation available on [RubyDoc](http://rubydoc.info/gems/rdf-tabular/f
247
248
  * {RDF::Tabular::Reader}
248
249
 
249
250
  ## Dependencies
250
- * [Ruby](http://ruby-lang.org/) (>= 2.0.0)
251
- * [RDF.rb](http://rubygems.org/gems/rdf) (>= 1.0)
251
+ * [Ruby](http://ruby-lang.org/) (>= 2.0)
252
+ * [RDF.rb](http://rubygems.org/gems/rdf) (>= 2.0)
252
253
  * [JSON](https://rubygems.org/gems/json) (>= 1.5)
253
254
 
254
255
  ## Installation
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.0
1
+ 0.4.0.beta2
data/lib/rdf/tabular.rb CHANGED
@@ -11,7 +11,6 @@ module RDF
11
11
  # @author [Gregg Kellogg](http://greggkellogg.net/)
12
12
  module Tabular
13
13
  require 'rdf/tabular/format'
14
- require 'rdf/tabular/utils'
15
14
  autoload :Column, 'rdf/tabular/metadata'
16
15
  autoload :CSVW, 'rdf/tabular/csvw'
17
16
  autoload :Dialect, 'rdf/tabular/metadata'
@@ -46,5 +46,21 @@ module RDF::Tabular
46
46
  def self.detect(sample)
47
47
  !!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
48
48
  end
49
+
50
+ ##
51
+ # Hash of CLI commands appropriate for this format
52
+ # @return [Hash{Symbol => Lambda(Array, Hash)}]
53
+ def self.cli_commands
54
+ {
55
+ :"tabular-json" => ->(argv, opts) do
56
+ raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
57
+ out = opts[:output] || $stdout
58
+ out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
59
+ RDF::CLI.parse(argv, opts) do |reader|
60
+ out.puts reader.to_json
61
+ end
62
+ end
63
+ }
64
+ end
49
65
  end
50
66
  end
@@ -19,16 +19,12 @@ require 'yaml' # used by BCP47, which should have required it.
19
19
  # @author [Gregg Kellogg](http://greggkellogg.net/)
20
20
  module RDF::Tabular
21
21
  class Metadata
22
- include Utils
22
+ include RDF::Util::Logger
23
23
 
24
24
  # Hash representation
25
25
  # @return [Hash<Symbol,Object>]
26
26
  attr_accessor :object
27
27
 
28
- # Warnings detected on initialization or when setting properties
29
- # @return [Array<String>]
30
- attr_accessor :warnings
31
-
32
28
  # Inheritect properties, valid for all types
33
29
  INHERITED_PROPERTIES = {
34
30
  aboutUrl: :uri_template,
@@ -179,7 +175,6 @@ module RDF::Tabular
179
175
  # @return [Metadata]
180
176
  def self.for_input(input, options = {})
181
177
  base = options[:base]
182
- warnings = options.fetch(:warnings, [])
183
178
 
184
179
  # Use user metadata, if provided
185
180
  metadata = case options[:metadata]
@@ -202,10 +197,7 @@ module RDF::Tabular
202
197
  if md.describes_file?(base)
203
198
  metadata = md
204
199
  else
205
- warnings << "Found metadata at #{link_loc}, which does not describe #{base}, ignoring"
206
- if options[:validate] && !options[:warnings]
207
- $stderr.puts "Warnings: #{warnings.join("\n")}"
208
- end
200
+ log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
209
201
  end
210
202
  end
211
203
  end
@@ -214,12 +206,12 @@ module RDF::Tabular
214
206
  # If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
215
207
  if !metadata && base
216
208
  templates = site_wide_config(base)
217
- debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
209
+ log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
218
210
  locs = templates.map do |template|
219
211
  t = Addressable::Template.new(template)
220
212
  RDF::URI(base).join(t.expand(url: base).to_s)
221
213
  end
222
- debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
214
+ log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
223
215
 
224
216
  locs.each do |loc|
225
217
  metadata ||= begin
@@ -230,15 +222,12 @@ module RDF::Tabular
230
222
  if md.describes_file?(base)
231
223
  md
232
224
  else
233
- warnings << "Found metadata at #{loc}, which does not describe #{base}, ignoring"
234
- if options[:validate] && !options[:warnings]
235
- $stderr.puts "Warnings: #{warnings.join("\n")}"
236
- end
225
+ log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
237
226
  nil
238
227
  end
239
228
  end
240
229
  rescue IOError
241
- debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
230
+ log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
242
231
  nil
243
232
  end
244
233
  end
@@ -331,7 +320,6 @@ module RDF::Tabular
331
320
  # @return [Metadata]
332
321
  def initialize(input, options = {})
333
322
  @options = options.dup
334
- @options[:depth] ||= 0
335
323
 
336
324
  # Parent of this Metadata, if any
337
325
  @parent = @options[:parent]
@@ -344,14 +332,14 @@ module RDF::Tabular
344
332
 
345
333
  @context = case input['@context']
346
334
  when Array
347
- warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
335
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
348
336
  c = LOCAL_CONTEXT.dup
349
337
  c.base = RDF::URI(opt_base)
350
338
  obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
351
339
  raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
352
340
  c.parse(obj)
353
341
  when Hash
354
- warn "Context missing required value 'http://www.w3.org/ns/csvw'"
342
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
355
343
  c = LOCAL_CONTEXT.dup
356
344
  c.base = RDF::URI(opt_base)
357
345
  c.parse(input['@context'])
@@ -362,7 +350,7 @@ module RDF::Tabular
362
350
  c
363
351
  else
364
352
  if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
365
- warn "Context missing required value 'http://www.w3.org/ns/csvw'"
353
+ log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
366
354
  LOCAL_CONTEXT.dup
367
355
  c = LOCAL_CONTEXT.dup
368
356
  c.base = RDF::URI(opt_base)
@@ -375,7 +363,7 @@ module RDF::Tabular
375
363
  @options[:base] = @context ? @context.base : RDF::URI(opt_base)
376
364
 
377
365
  if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
378
- warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
366
+ log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
379
367
  @context.default_language = nil
380
368
  end
381
369
 
@@ -385,7 +373,7 @@ module RDF::Tabular
385
373
 
386
374
  @object = {}
387
375
 
388
- depth do
376
+ log_depth do
389
377
  # Input was parsed in .new
390
378
  # Metadata is object with symbolic keys
391
379
  input.each do |key, value|
@@ -401,7 +389,7 @@ module RDF::Tabular
401
389
  object[:@id] = if value.is_a?(String)
402
390
  value
403
391
  else
404
- warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
392
+ log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
405
393
  "" # Default value
406
394
  end
407
395
  @id = @options[:base].join(object[:@id])
@@ -426,9 +414,9 @@ module RDF::Tabular
426
414
  end
427
415
 
428
416
  if reason
429
- debug("md#initialize") {reason}
430
- debug("md#initialize") {"filenames: #{filenames}"}
431
- debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
417
+ log_debug("md#initialize") {reason}
418
+ log_debug("md#initialize") {"filenames: #{filenames}"}
419
+ log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
432
420
  end
433
421
  end
434
422
 
@@ -487,7 +475,7 @@ module RDF::Tabular
487
475
  when Schema
488
476
  value
489
477
  else
490
- warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
478
+ log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
491
479
  Schema.new({}, @options.merge(parent: self, context: nil))
492
480
  end
493
481
  end
@@ -534,7 +522,7 @@ module RDF::Tabular
534
522
  when Dialect
535
523
  value
536
524
  else
537
- warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
525
+ log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
538
526
  nil
539
527
  end
540
528
  end
@@ -544,15 +532,15 @@ module RDF::Tabular
544
532
  # @raise [Error] if datatype is not valid
545
533
  def datatype=(value)
546
534
  val = case value
547
- when Hash then Datatype.new(value, parent: self)
548
- else Datatype.new({base: value}, parent: self)
535
+ when Hash then Datatype.new(value, @options.merge(parent: self))
536
+ else Datatype.new({base: value}, @options.merge(parent: self))
549
537
  end
550
538
 
551
539
  if val.valid? || value.is_a?(Hash)
552
540
  # Set it if it was specified as an object, which may cause validation errors later
553
541
  object[:datatype] = val
554
542
  else
555
- warn "#{type} has invalid property 'datatype': expected a built-in or an object"
543
+ log_warn "#{type} has invalid property 'datatype': expected a built-in or an object"
556
544
  end
557
545
  end
558
546
 
@@ -567,40 +555,20 @@ module RDF::Tabular
567
555
  ##
568
556
  # Do we have valid metadata?
569
557
  def valid?
570
- validate!
571
- true
572
- rescue
573
- false
574
- end
575
-
576
- ##
577
- # Validation errors
578
- # @return [Array<String>]
579
- def errors
580
- validate! && []
581
- rescue Error => e
582
- e.message.split("\n")
558
+ validate # Possibly re-validate
559
+ !log_statistics[:error]
583
560
  end
584
561
 
585
- ##
586
- # Validation warnings, available only after validating or finding warnings
587
- # @return [Array<String>]
588
- def warnings
589
- ((@warnings || []) + object.
590
- values.
591
- flatten.
592
- select {|v| v.is_a?(Metadata)}.
593
- map(&:warnings).
594
- flatten).compact.uniq
562
+ def validate!
563
+ raise Error, "Metadata error" unless valid?
595
564
  end
596
565
 
597
566
  ##
598
567
  # Validate metadata, raising an error containing all errors detected during validation
599
568
  # @raise [Error] Raise error if metadata has any unexpected properties
600
569
  # @return [self]
601
- def validate!
570
+ def validate
602
571
  expected_props, required_props = @properties.keys, @required
603
- errors = []
604
572
 
605
573
  unless is_a?(Dialect) || is_a?(Transformation)
606
574
  expected_props = expected_props + INHERITED_PROPERTIES.keys
@@ -609,10 +577,10 @@ module RDF::Tabular
609
577
  # It has only expected properties (exclude metadata)
610
578
  check_keys = object.keys - [:"@id", :"@context"]
611
579
  check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
612
- warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
580
+ log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
613
581
 
614
582
  # It has required properties
615
- errors << "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
583
+ log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
616
584
 
617
585
  self.normalize!
618
586
 
@@ -621,55 +589,49 @@ module RDF::Tabular
621
589
  value = object[key]
622
590
  case key
623
591
  when :base
624
- errors << "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
592
+ log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
625
593
  when :columns
626
- value.each do |v|
627
- begin
628
- v.validate!
629
- rescue Error => e
630
- errors << e.message
631
- end
594
+ value.each do |col|
595
+ col.validate
596
+ log_statistics.merge!(col.log_statistics)
632
597
  end
633
598
  column_names = value.map(&:name)
634
- errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
599
+ log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
635
600
  when :datatype, :dialect, :tables, :tableSchema, :transformations
636
601
  Array(value).each do |t|
637
602
  # Make sure value is of appropriate class
638
603
  if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
639
- begin
640
- t.validate!
641
- rescue Error => e
642
- errors << e.message
643
- end
604
+ t.validate
605
+ log_statistics.merge!(t.log_statistics)
644
606
  else
645
- errors << "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
607
+ log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
646
608
  end
647
609
  end
648
- errors << "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
610
+ log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
649
611
  when :foreignKeys
650
612
  # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
651
613
  value.each do |fk|
652
614
  columnReference, reference = fk['columnReference'], fk['reference']
653
- errors << "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
654
- errors << "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
615
+ log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
616
+ log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
655
617
 
656
618
  # Verify that columns exist in this schema
657
- errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
619
+ log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
658
620
  Array(columnReference).each do |k|
659
- errors << "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
621
+ log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
660
622
  end
661
623
 
662
624
  if reference.is_a?(Hash)
663
- errors << "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
625
+ log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
664
626
  ref_cols = reference['columnReference']
665
627
  schema = if reference.has_key?('resource')
666
628
  if reference.has_key?('schemaReference')
667
- errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
629
+ log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
668
630
  end
669
631
  # resource is the URL of a Table in the TableGroup
670
632
  ref = context.base.join(reference['resource']).to_s
671
- table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
672
- errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
633
+ table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref}
634
+ log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
673
635
  table.tableSchema if table
674
636
  elsif reference.has_key?('schemaReference')
675
637
  # resource is the @id of a Schema in the TableGroup
@@ -677,25 +639,25 @@ module RDF::Tabular
677
639
  tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
678
640
  case tables.length
679
641
  when 0
680
- errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
642
+ log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
681
643
  nil
682
644
  when 1
683
645
  tables.first.tableSchema
684
646
  else
685
- errors << "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
647
+ log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
686
648
  nil
687
649
  end
688
650
  end
689
651
 
690
652
  if schema
691
653
  # ref_cols must exist in schema
692
- errors << "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
654
+ log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
693
655
  Array(ref_cols).each do |k|
694
- errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
656
+ log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
695
657
  end
696
658
  end
697
659
  else
698
- errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
660
+ log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
699
661
  end
700
662
  end
701
663
  when :format
@@ -707,7 +669,7 @@ module RDF::Tabular
707
669
  nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
708
670
  unsignedLong unsignedInt unsignedShort unsignedByte
709
671
  ).include?(self.base)
710
- warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
672
+ log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
711
673
  object.delete(:format) # act as if not set
712
674
  end
713
675
 
@@ -715,14 +677,14 @@ module RDF::Tabular
715
677
  begin
716
678
  parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
717
679
  rescue ArgumentError => e
718
- warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
719
- object[:format].delete("pattern") # act as if not set
680
+ log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
681
+ object[:format].delete("pattern") if object[:format] # act as if not set
720
682
  end
721
683
  else
722
684
  case self.base
723
685
  when 'boolean'
724
686
  unless value.split("|").length == 2
725
- warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
687
+ log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
726
688
  object.delete(:format) # act as if not set
727
689
  end
728
690
  when :decimal, :integer, :long, :int, :short, :byte,
@@ -733,7 +695,7 @@ module RDF::Tabular
733
695
  begin
734
696
  parse_uax35_number(value, nil)
735
697
  rescue ArgumentError => e
736
- warn "#{type} has invalid property '#{key}': #{e.message}"
698
+ log_warn "#{type} has invalid property '#{key}': #{e.message}"
737
699
  object.delete(:format) # act as if not set
738
700
  end
739
701
  when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
@@ -741,7 +703,7 @@ module RDF::Tabular
741
703
  begin
742
704
  parse_uax35_date(value, nil)
743
705
  rescue ArgumentError => e
744
- warn "#{type} has invalid property '#{key}': #{e.message}"
706
+ log_warn "#{type} has invalid property '#{key}': #{e.message}"
745
707
  object.delete(:format) # act as if not set
746
708
  end
747
709
  else
@@ -749,7 +711,7 @@ module RDF::Tabular
749
711
  begin
750
712
  Regexp.compile(value)
751
713
  rescue
752
- warn "#{type} has invalid property '#{key}': #{$!.message}"
714
+ log_warn "#{type} has invalid property '#{key}': #{$!.message}"
753
715
  object.delete(:format) # act as if not set
754
716
  end
755
717
  end
@@ -760,20 +722,20 @@ module RDF::Tabular
760
722
  if object[:length]
761
723
  case key
762
724
  when :minLength
763
- errors << "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
725
+ log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
764
726
  when :maxLength
765
- errors << "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
727
+ log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
766
728
  end
767
729
  end
768
730
 
769
731
  # Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
770
732
  if key == :maxLength && object[:minLength]
771
- errors << "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
733
+ log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
772
734
  end
773
735
 
774
736
  # Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
775
737
  unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
776
- errors << "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
738
+ log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
777
739
  end
778
740
  when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
779
741
  case self.base
@@ -781,46 +743,46 @@ module RDF::Tabular
781
743
  'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
782
744
  'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
783
745
  'duration', 'dayTimeDuration', 'yearMonthDuration'
784
- errors << "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
746
+ log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
785
747
 
786
748
  case key
787
749
  when :minInclusive
788
750
  # Applications MUST raise an error if both minInclusive and minExclusive are specified
789
- errors << "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
751
+ log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
790
752
 
791
753
  # Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
792
- errors << "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
754
+ log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
793
755
 
794
756
  # Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
795
- errors << "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
757
+ log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
796
758
  when :maxInclusive
797
759
  # Applications MUST raise an error if both maxInclusive and maxExclusive are specified
798
- errors << "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
760
+ log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
799
761
  when :minExclusive
800
762
  # Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
801
- errors << "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
763
+ log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
802
764
 
803
765
  # Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
804
- errors << "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
766
+ log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
805
767
  end
806
768
  else
807
- errors << "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
769
+ log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
808
770
  end
809
771
  when :notes
810
772
  unless value.is_a?(Hash) || value.is_a?(Array)
811
- errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
773
+ log_error "#{type} has invalid property '#{key}': #{value}, Object or Array"
812
774
  end
813
775
  begin
814
776
  normalize_jsonld(key, value)
815
777
  rescue Error => e
816
- errors << "#{type} has invalid content '#{key}': #{e.message}"
778
+ log_error "#{type} has invalid content '#{key}': #{e.message}"
817
779
  end
818
780
  when :primaryKey, :rowTitles
819
781
  # A column reference property that holds either a single reference to a column description object or an array of references.
820
782
  "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
821
783
  Array(value).each do |k|
822
784
  unless self.columns.any? {|c| c[:name] == k}
823
- warn "#{type} has invalid property '#{key}': column reference not found #{k}"
785
+ log_warn "#{type} has invalid property '#{key}': column reference not found #{k}"
824
786
  object.delete(key)
825
787
  end
826
788
  end
@@ -829,34 +791,33 @@ module RDF::Tabular
829
791
  when :@id
830
792
  # Must not be a BNode
831
793
  if value.to_s.start_with?("_:")
832
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
794
+ log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
833
795
  end
834
796
 
835
797
  # Datatype @id MUST NOT be the URL of a built-in type
836
798
  if self.is_a?(Datatype) && DATATYPES.values.include?(value)
837
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
799
+ log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
838
800
  end
839
801
  when :@type
840
802
  # Must not be a BNode
841
803
  if value.to_s.start_with?("_:")
842
- errors << "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
804
+ log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
843
805
  end
844
806
  case type
845
807
  when :Transformation
846
- errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
808
+ log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
847
809
  else
848
- errors << "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
810
+ log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
849
811
  end
850
812
  when ->(k) {key.to_s.include?(':')}
851
813
  begin
852
814
  normalize_jsonld(key, value)
853
815
  rescue Error => e
854
- errors << "#{type} has invalid content '#{key}': #{e.message}"
816
+ log_error "#{type} has invalid content '#{key}': #{e.message}"
855
817
  end
856
818
  end
857
819
  end
858
820
 
859
- raise Error, errors.join("\n") unless errors.empty?
860
821
  self
861
822
  end
862
823
 
@@ -885,10 +846,37 @@ module RDF::Tabular
885
846
  # @param [:read] input
886
847
  # @yield [Row]
887
848
  def each_row(input)
888
- csv = ::CSV.new(input, csv_options)
889
- # Skip skipRows and headerRowCount
890
- number, skipped = 0, (dialect.skipRows.to_i + dialect.headerRowCount)
891
- (1..skipped).each {csv.shift}
849
+ csv, number, skipped = nil, 0, 0
850
+ path = input.base_uri.path rescue ""
851
+ if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
852
+ # Input is HTML; use fragment identfier to find table.
853
+ fragment = RDF::URI(self.url).fragment rescue nil
854
+ tab = begin
855
+ # Extract with nokogiri
856
+ require 'nokogiri' unless defined?(:Nokogiri)
857
+ doc = Nokogiri::HTML.parse(input)
858
+ doc.search("##{fragment}").first if fragment
859
+ rescue LoadError
860
+ # Extract with REXML
861
+ # FIXME
862
+ end
863
+
864
+ raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
865
+
866
+ # Use rows with <td> to create column data
867
+ csv = []
868
+ number = 0
869
+ tab.xpath('.//tr').map do |row|
870
+ number += 1 if row.xpath('th')
871
+ data = row.xpath('td').map(&:content)
872
+ csv << data unless data.empty?
873
+ end
874
+ else
875
+ csv = ::CSV.new(input, csv_options)
876
+ # Skip skipRows and headerRowCount
877
+ skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
878
+ (1..skipped).each {csv.shift}
879
+ end
892
880
  csv.each do |data|
893
881
  # Check for embedded comments
894
882
  if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
@@ -934,17 +922,17 @@ module RDF::Tabular
934
922
  if value['@value']
935
923
  dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
936
924
  lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
937
- block.call(RDF::Statement.new(subject, property, lit))
925
+ block.call(RDF::Statement(subject, property, lit))
938
926
  else
939
927
  # value MUST be a node object, establish a new subject from `@id`
940
928
  s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
941
929
 
942
930
  # Generate a triple
943
- block.call(RDF::Statement.new(subject, property, s2))
931
+ block.call(RDF::Statement(subject, property, s2))
944
932
 
945
933
  # Generate types
946
934
  Array(value['@type']).each do |t|
947
- block.call(RDF::Statement.new(s2, RDF.type, context.expand_iri(t, vocab: true)))
935
+ block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true)))
948
936
  end
949
937
 
950
938
  # Generate triples for all other properties
@@ -956,7 +944,7 @@ module RDF::Tabular
956
944
  else
957
945
  # Value is a primitive JSON value
958
946
  lit = RDF::Literal(value)
959
- block.call(RDF::Statement.new(subject, property, RDF::Literal(value)))
947
+ block.call(RDF::Statement(subject, property, RDF::Literal(value)))
960
948
  end
961
949
  else
962
950
  case value
@@ -1011,7 +999,7 @@ module RDF::Tabular
1011
999
  if @options[:validate]
1012
1000
  raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1013
1001
  else
1014
- warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1002
+ log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
1015
1003
  end
1016
1004
  end
1017
1005
  else
@@ -1020,7 +1008,7 @@ module RDF::Tabular
1020
1008
  if @options[:validate]
1021
1009
  raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1022
1010
  else
1023
- warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1011
+ log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1024
1012
  end
1025
1013
  end
1026
1014
 
@@ -1033,7 +1021,7 @@ module RDF::Tabular
1033
1021
  if @options[:validate]
1034
1022
  raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1035
1023
  else
1036
- warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1024
+ log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
1037
1025
 
1038
1026
  # If present, a virtual column MUST appear after all other non-virtual column definitions
1039
1027
  raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
@@ -1048,13 +1036,13 @@ module RDF::Tabular
1048
1036
  end
1049
1037
  index = 0
1050
1038
  object_columns.all? do |cb|
1051
- ca = non_virtual_columns[index] || Column.new({})
1039
+ ca = non_virtual_columns[index] || Column.new({}, @options)
1052
1040
  ta = ca.titles || {}
1053
1041
  tb = cb.titles || {}
1054
1042
  if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
1055
1043
  true
1056
1044
  elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
1057
- raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}" unless ca.name == cb.name
1045
+ raise Error, "Columns don't match on name: #{ca.name}, #{cb.name}" unless ca.name == cb.name
1058
1046
  elsif @options[:validate] || !ta.empty? && !tb.empty?
1059
1047
  # If validating, column compatibility requires strict match between titles
1060
1048
  titles_match = case
@@ -1078,10 +1066,10 @@ module RDF::Tabular
1078
1066
  true
1079
1067
  elsif !@options[:validate]
1080
1068
  # If not validating, columns don't match, but processing continues
1081
- warn "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1069
+ log_warn "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
1082
1070
  true
1083
1071
  else
1084
- raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1072
+ raise Error, "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
1085
1073
  end
1086
1074
  end
1087
1075
  index += 1
@@ -1175,13 +1163,13 @@ module RDF::Tabular
1175
1163
  when Hash
1176
1164
  if value['@value']
1177
1165
  if !(value.keys.sort - %w(@value @type @language)).empty?
1178
- raise Error, "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
1166
+ log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
1179
1167
  elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
1180
- raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
1168
+ log_error "Value object may not contain both @type and @language: #{value.to_json}"
1181
1169
  elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
1182
- raise Error, "Value object with @language must use valid language: #{value.to_json}"
1170
+ log_error "Value object with @language must use valid language: #{value.to_json}"
1183
1171
  elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
1184
- raise Error, "Value object with @type must defined type: #{value.to_json}"
1172
+ log_error "Value object with @type must defined type: #{value.to_json}"
1185
1173
  end
1186
1174
  value
1187
1175
  else
@@ -1190,16 +1178,16 @@ module RDF::Tabular
1190
1178
  case k
1191
1179
  when "@id"
1192
1180
  nv[k] = context.expand_iri(v, documentRelative: true).to_s
1193
- raise Error, "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
1181
+ log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
1194
1182
  when "@type"
1195
1183
  Array(v).each do |vv|
1196
1184
  # Validate that all type values transform to absolute IRIs
1197
1185
  resource = context.expand_iri(vv, vocab: true)
1198
- raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
1186
+ log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
1199
1187
  end
1200
1188
  nv[k] = v
1201
1189
  when /^(@|_:)/
1202
- raise Error, "Invalid use of #{k} in JSON-LD content"
1190
+ log_error "Invalid use of #{k} in JSON-LD content"
1203
1191
  else
1204
1192
  nv[k] = normalize_jsonld(k, v)
1205
1193
  end
@@ -1212,15 +1200,9 @@ module RDF::Tabular
1212
1200
  end
1213
1201
  protected
1214
1202
 
1215
- # Add a warning on this object
1216
- def warn(string)
1217
- debug("warn: #{string}")
1218
- (@warnings ||= []) << string
1219
- end
1220
-
1221
1203
  def set_property(key, type, value, invalid)
1222
1204
  if invalid
1223
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1205
+ log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1224
1206
  case type
1225
1207
  when :link, :uri_template
1226
1208
  object[key] = ""
@@ -1264,12 +1246,12 @@ module RDF::Tabular
1264
1246
  end
1265
1247
  end
1266
1248
  else
1267
- warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1249
+ log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1268
1250
  []
1269
1251
  end
1270
1252
 
1271
1253
  unless object[key].all? {|v| v.is_a?(klass)}
1272
- warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1254
+ log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
1273
1255
  # Remove elements that aren't of the right types
1274
1256
  object[key] = object[key].select! {|v| v.is_a?(klass)}
1275
1257
  end
@@ -1298,14 +1280,13 @@ module RDF::Tabular
1298
1280
  end
1299
1281
 
1300
1282
  class DebugContext
1301
- include Utils
1302
- def initialize(*args, &block)
1303
- @options = {}
1304
- debug(*args, &block)
1305
- end
1283
+ include RDF::Util::Logger
1284
+ end
1285
+ def self.log_debug(*args, &block)
1286
+ DebugContext.new.log_debug(*args, &block)
1306
1287
  end
1307
- def self.debug(*args, &block)
1308
- DebugContext.new(*args, &block)
1288
+ def self.log_warn(*args)
1289
+ DebugContext.new.log_warn(*args)
1309
1290
  end
1310
1291
  end
1311
1292
 
@@ -1453,7 +1434,7 @@ module RDF::Tabular
1453
1434
  content['@context'] = object.delete(:@context) if object[:@context]
1454
1435
  ctx = @context
1455
1436
  remove_instance_variable(:@context) if instance_variables.include?(:@context)
1456
- tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
1437
+ tg = TableGroup.new(content, @options.merge(context: ctx, filenames: @filenames, base: base))
1457
1438
  @parent = tg # Link from parent
1458
1439
  tg
1459
1440
  end
@@ -1464,8 +1445,7 @@ module RDF::Tabular
1464
1445
  "@id" => (id.to_s if id),
1465
1446
  "@type" => "AnnotatedTable",
1466
1447
  "url" => self.url.to_s,
1467
- "columns" => Array(tableSchema ? tableSchema.columns : []).map(&:to_atd),
1468
- "rows" => []
1448
+ "tableSchema" => (tableSchema.to_atd if tableSchema),
1469
1449
  }) do |memo, (k, v)|
1470
1450
  memo[k.to_s] ||= v
1471
1451
  memo
@@ -1519,12 +1499,12 @@ module RDF::Tabular
1519
1499
  end
1520
1500
  end
1521
1501
  else
1522
- warn "#{type} has invalid property 'columns': expected array of Column"
1502
+ log_warn "#{type} has invalid property 'columns': expected array of Column"
1523
1503
  []
1524
1504
  end
1525
1505
 
1526
1506
  unless object[:columns].all? {|v| v.is_a?(Column)}
1527
- warn "#{type} has invalid property 'columns': expected array of Column"
1507
+ log_warn "#{type} has invalid property 'columns': expected array of Column"
1528
1508
  # Remove elements that aren't of the right types
1529
1509
  object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
1530
1510
  end
@@ -1534,12 +1514,12 @@ module RDF::Tabular
1534
1514
  object[:foreignKeys] = case value
1535
1515
  when Array then value
1536
1516
  else
1537
- warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1517
+ log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1538
1518
  []
1539
1519
  end
1540
1520
 
1541
1521
  unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
1542
- warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1522
+ log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
1543
1523
  # Remove elements that aren't of the right types
1544
1524
  object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
1545
1525
  end
@@ -1562,6 +1542,18 @@ module RDF::Tabular
1562
1542
  end
1563
1543
  end
1564
1544
  end
1545
+
1546
+ # Return Annotated Table representation
1547
+ def to_atd
1548
+ object.inject({
1549
+ "@id" => (id.to_s if id),
1550
+ "@type" => "Schema",
1551
+ "columns" => Array(columns).map(&:to_atd),
1552
+ }) do |memo, (k, v)|
1553
+ memo[k.to_s] ||= v
1554
+ memo
1555
+ end.delete_if {|k,v| v.nil? || v.is_a?(Metadata)}
1556
+ end
1565
1557
  end
1566
1558
 
1567
1559
  class Column < Metadata
@@ -1650,7 +1642,6 @@ module RDF::Tabular
1650
1642
  "table" => (table.id.to_s if table.id),
1651
1643
  "number" => self.number,
1652
1644
  "sourceNumber" => self.sourceNumber,
1653
- "cells" => [],
1654
1645
  "virtual" => self.virtual,
1655
1646
  "name" => self.name,
1656
1647
  "titles" => self.titles
@@ -1701,7 +1692,7 @@ module RDF::Tabular
1701
1692
  class Dialect < Metadata
1702
1693
  # Defaults for dialects
1703
1694
  DEFAULTS = {
1704
- commentPrefix: "#".freeze,
1695
+ commentPrefix: false,
1705
1696
  delimiter: ",".freeze,
1706
1697
  doubleQuote: true,
1707
1698
  encoding: "utf-8".freeze,
@@ -1814,38 +1805,75 @@ module RDF::Tabular
1814
1805
  lang ||= 'und'
1815
1806
 
1816
1807
  # Set encoding on input
1817
- csv = ::CSV.new(input, csv_options)
1818
- (1..skipRows.to_i).each do
1819
- value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
1820
- # Trim value
1821
- value.lstrip! if %w(true start).include?(trim.to_s)
1822
- value.rstrip! if %w(true end).include?(trim.to_s)
1823
-
1824
- value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
1825
- (metadata["rdfs:comment"] ||= []) << value unless value.empty?
1826
- end
1827
- debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
1828
-
1829
- (1..headerRowCount).each do
1830
- row_data = Array(csv.shift)
1831
- Array(row_data).each_with_index do |value, index|
1832
- # Skip columns
1833
- skipCols = skipColumns.to_i
1834
- next if index < skipCols
1808
+ path = input.base_uri.path rescue ""
1809
+ if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
1810
+ # Input is HTML; use fragment identfier to find table.
1811
+ fragment = RDF::URI(table["url"]).fragment rescue nil
1812
+ tab = begin
1813
+ # Extract with nokogiri
1814
+ require 'nokogiri' unless defined?(:Nokogiri)
1815
+ doc = Nokogiri::HTML.parse(input)
1816
+ doc.search("##{fragment}").first if fragment
1817
+ rescue LoadError
1818
+ # Extract with REXML
1819
+ # FIXME
1820
+ end
1835
1821
 
1822
+ raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
1823
+
1824
+ # Use rows with <th> to create column titles
1825
+ tab.xpath('.//tr').each do |row|
1826
+ row.xpath('th').map(&:content).each_with_index do |value, index|
1827
+ # Skip columns
1828
+ skipCols = skipColumns.to_i
1829
+ next if index < skipCols
1830
+
1831
+ # Trim value
1832
+ value.lstrip! if %w(true start).include?(trim.to_s)
1833
+ value.rstrip! if %w(true end).include?(trim.to_s)
1834
+
1835
+ # Initialize titles
1836
+ columns = table["tableSchema"]["columns"] ||= []
1837
+ column = columns[index - skipCols] ||= {
1838
+ "titles" => {lang => []},
1839
+ }
1840
+ column["titles"][lang] << value
1841
+ end
1842
+ end
1843
+ else
1844
+ csv = ::CSV.new(input, csv_options)
1845
+ (1..skipRows.to_i).each do
1846
+ value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
1836
1847
  # Trim value
1837
1848
  value.lstrip! if %w(true start).include?(trim.to_s)
1838
1849
  value.rstrip! if %w(true end).include?(trim.to_s)
1839
1850
 
1840
- # Initialize titles
1841
- columns = table["tableSchema"]["columns"] ||= []
1842
- column = columns[index - skipCols] ||= {
1843
- "titles" => {lang => []},
1844
- }
1845
- column["titles"][lang] << value
1851
+ value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
1852
+ (metadata["rdfs:comment"] ||= []) << value unless value.empty?
1853
+ end
1854
+ log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
1855
+
1856
+ (1..headerRowCount).each do
1857
+ row_data = Array(csv.shift)
1858
+ Array(row_data).each_with_index do |value, index|
1859
+ # Skip columns
1860
+ skipCols = skipColumns.to_i
1861
+ next if index < skipCols
1862
+
1863
+ # Trim value
1864
+ value.lstrip! if %w(true start).include?(trim.to_s)
1865
+ value.rstrip! if %w(true end).include?(trim.to_s)
1866
+
1867
+ # Initialize titles
1868
+ columns = table["tableSchema"]["columns"] ||= []
1869
+ column = columns[index - skipCols] ||= {
1870
+ "titles" => {lang => []},
1871
+ }
1872
+ column["titles"][lang] << value
1873
+ end
1846
1874
  end
1847
1875
  end
1848
- debug("embedded_metadata") {"table: #{table.inspect}"}
1876
+ log_debug("embedded_metadata") {"table: #{table.inspect}"}
1849
1877
  input.rewind if input.respond_to?(:rewind)
1850
1878
 
1851
1879
  Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
@@ -2030,13 +2058,13 @@ module RDF::Tabular
2030
2058
 
2031
2059
  # create column if necessary
2032
2060
  columns[index - skipColumns] ||=
2033
- Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
2061
+ Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
2034
2062
 
2035
2063
  column = columns[index - skipColumns]
2036
2064
 
2037
2065
  @values << cell = Cell.new(metadata, column, self, value)
2038
2066
 
2039
- datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
2067
+ datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
2040
2068
  value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
2041
2069
  value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
2042
2070
  # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
@@ -2092,7 +2120,11 @@ module RDF::Tabular
2092
2120
 
2093
2121
  # Identifier for this row, as an RFC7111 fragment
2094
2122
  # @return [RDF::URI]
2095
- def id; table.url + "#row=#{self.sourceNumber}"; end
2123
+ def id;
2124
+ u = table.url.dup
2125
+ u.fragment = "row=#{self.sourceNumber}"
2126
+ u
2127
+ end
2096
2128
 
2097
2129
  # Return Annotated Row representation
2098
2130
  def to_atd