rdf-tabular 0.3.0 → 0.4.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/VERSION +1 -1
- data/lib/rdf/tabular.rb +0 -1
- data/lib/rdf/tabular/format.rb +16 -0
- data/lib/rdf/tabular/metadata.rb +223 -191
- data/lib/rdf/tabular/reader.rb +84 -133
- data/lib/rdf/tabular/uax35.rb +0 -2
- data/spec/format_spec.rb +34 -0
- data/spec/matchers.rb +3 -78
- data/spec/metadata_spec.rb +150 -80
- data/spec/reader_spec.rb +27 -24
- data/spec/spec_helper.rb +4 -3
- data/spec/suite_helper.rb +1 -1
- data/spec/suite_spec.rb +8 -9
- metadata +109 -60
- data/lib/rdf/tabular/utils.rb +0 -33
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: b34ec5c872bbf6e8d8f13559b255283cd118cd46
         | 
| 4 | 
            +
              data.tar.gz: 08ace967385cb72fdc48e48ad434f7e0bd35753d
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 55a2305ce14c365631a1f7ad178e4b2c603ef2279c74537cb36f33a47ef81a19b250b1cad64719be5c1921536e06ee2ba4bd2fa2745dff81bfe652ed31ed823d
         | 
| 7 | 
            +
              data.tar.gz: 9a3b83c57938b94ebf1ab86052a0bccd144c9380394150748a6e580bf480e1bf39f835a9b2ff2633b3d6ca09823b782c945f3a40fbeec653d6c0ae61218805af
         | 
    
        data/README.md
    CHANGED
    
    | @@ -13,6 +13,7 @@ RDF::Tabular parses CSV or other Tabular Data into [RDF][] and JSON using the [W | |
| 13 13 |  | 
| 14 14 | 
             
            * Parses [number patterns](http://www.unicode.org/reports/tr35/tr35-39/tr35-numbers.html#Number_Patterns) from [UAX35][]
         | 
| 15 15 | 
             
            * Parses [date formats](http://www.unicode.org/reports/tr35/tr35-39/tr35-dates.html#Contents) from [UAX35][]
         | 
| 16 | 
            +
            * Returns detailed errors and warnings using optional `Logger`.
         | 
| 16 17 |  | 
| 17 18 | 
             
            ## Installation
         | 
| 18 19 | 
             
            Install with `gem install rdf-tabular`
         | 
| @@ -247,8 +248,8 @@ Full documentation available on [RubyDoc](http://rubydoc.info/gems/rdf-tabular/f | |
| 247 248 | 
             
              * {RDF::Tabular::Reader}
         | 
| 248 249 |  | 
| 249 250 | 
             
            ## Dependencies
         | 
| 250 | 
            -
            * [Ruby](http://ruby-lang.org/) (>= 2.0 | 
| 251 | 
            -
            * [RDF.rb](http://rubygems.org/gems/rdf) (>=  | 
| 251 | 
            +
            * [Ruby](http://ruby-lang.org/) (>= 2.0)
         | 
| 252 | 
            +
            * [RDF.rb](http://rubygems.org/gems/rdf) (>= 2.0)
         | 
| 252 253 | 
             
            * [JSON](https://rubygems.org/gems/json) (>= 1.5)
         | 
| 253 254 |  | 
| 254 255 | 
             
            ## Installation
         | 
    
        data/VERSION
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            0. | 
| 1 | 
            +
            0.4.0.beta2
         | 
    
        data/lib/rdf/tabular.rb
    CHANGED
    
    | @@ -11,7 +11,6 @@ module RDF | |
| 11 11 | 
             
              # @author [Gregg Kellogg](http://greggkellogg.net/)
         | 
| 12 12 | 
             
              module Tabular
         | 
| 13 13 | 
             
                require 'rdf/tabular/format'
         | 
| 14 | 
            -
                require 'rdf/tabular/utils'
         | 
| 15 14 | 
             
                autoload :Column,         'rdf/tabular/metadata'
         | 
| 16 15 | 
             
                autoload :CSVW,           'rdf/tabular/csvw'
         | 
| 17 16 | 
             
                autoload :Dialect,        'rdf/tabular/metadata'
         | 
    
        data/lib/rdf/tabular/format.rb
    CHANGED
    
    | @@ -46,5 +46,21 @@ module RDF::Tabular | |
| 46 46 | 
             
                def self.detect(sample)
         | 
| 47 47 | 
             
                  !!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
         | 
| 48 48 | 
             
                end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                ##
         | 
| 51 | 
            +
                # Hash of CLI commands appropriate for this format
         | 
| 52 | 
            +
                # @return [Hash{Symbol => Lambda(Array, Hash)}]
         | 
| 53 | 
            +
                def self.cli_commands
         | 
| 54 | 
            +
                  {
         | 
| 55 | 
            +
                    :"tabular-json" => ->(argv, opts) do
         | 
| 56 | 
            +
                      raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
         | 
| 57 | 
            +
                      out = opts[:output] || $stdout
         | 
| 58 | 
            +
                      out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
         | 
| 59 | 
            +
                      RDF::CLI.parse(argv, opts) do |reader|
         | 
| 60 | 
            +
                        out.puts reader.to_json
         | 
| 61 | 
            +
                      end
         | 
| 62 | 
            +
                    end
         | 
| 63 | 
            +
                  }
         | 
| 64 | 
            +
                end
         | 
| 49 65 | 
             
              end
         | 
| 50 66 | 
             
            end
         | 
    
        data/lib/rdf/tabular/metadata.rb
    CHANGED
    
    | @@ -19,16 +19,12 @@ require 'yaml'  # used by BCP47, which should have required it. | |
| 19 19 | 
             
            # @author [Gregg Kellogg](http://greggkellogg.net/)
         | 
| 20 20 | 
             
            module RDF::Tabular
         | 
| 21 21 | 
             
              class Metadata
         | 
| 22 | 
            -
                include  | 
| 22 | 
            +
                include RDF::Util::Logger
         | 
| 23 23 |  | 
| 24 24 | 
             
                # Hash representation
         | 
| 25 25 | 
             
                # @return [Hash<Symbol,Object>]
         | 
| 26 26 | 
             
                attr_accessor :object
         | 
| 27 27 |  | 
| 28 | 
            -
                # Warnings detected on initialization or when setting properties
         | 
| 29 | 
            -
                # @return [Array<String>]
         | 
| 30 | 
            -
                attr_accessor :warnings
         | 
| 31 | 
            -
             | 
| 32 28 | 
             
                # Inheritect properties, valid for all types
         | 
| 33 29 | 
             
                INHERITED_PROPERTIES = {
         | 
| 34 30 | 
             
                  aboutUrl:           :uri_template,
         | 
| @@ -179,7 +175,6 @@ module RDF::Tabular | |
| 179 175 | 
             
                # @return [Metadata]
         | 
| 180 176 | 
             
                def self.for_input(input, options = {})
         | 
| 181 177 | 
             
                  base = options[:base]
         | 
| 182 | 
            -
                  warnings = options.fetch(:warnings, [])
         | 
| 183 178 |  | 
| 184 179 | 
             
                  # Use user metadata, if provided
         | 
| 185 180 | 
             
                  metadata = case options[:metadata]
         | 
| @@ -202,10 +197,7 @@ module RDF::Tabular | |
| 202 197 | 
             
                      if md.describes_file?(base)
         | 
| 203 198 | 
             
                        metadata = md
         | 
| 204 199 | 
             
                      else
         | 
| 205 | 
            -
                         | 
| 206 | 
            -
                        if options[:validate] && !options[:warnings]
         | 
| 207 | 
            -
                          $stderr.puts "Warnings: #{warnings.join("\n")}"
         | 
| 208 | 
            -
                        end
         | 
| 200 | 
            +
                        log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
         | 
| 209 201 | 
             
                      end
         | 
| 210 202 | 
             
                    end
         | 
| 211 203 | 
             
                  end
         | 
| @@ -214,12 +206,12 @@ module RDF::Tabular | |
| 214 206 | 
             
                  # If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
         | 
| 215 207 | 
             
                  if !metadata && base
         | 
| 216 208 | 
             
                    templates = site_wide_config(base)
         | 
| 217 | 
            -
                     | 
| 209 | 
            +
                    log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
         | 
| 218 210 | 
             
                    locs = templates.map do |template|
         | 
| 219 211 | 
             
                      t = Addressable::Template.new(template)
         | 
| 220 212 | 
             
                      RDF::URI(base).join(t.expand(url: base).to_s)
         | 
| 221 213 | 
             
                    end
         | 
| 222 | 
            -
                     | 
| 214 | 
            +
                    log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
         | 
| 223 215 |  | 
| 224 216 | 
             
                    locs.each do |loc|
         | 
| 225 217 | 
             
                      metadata ||= begin
         | 
| @@ -230,15 +222,12 @@ module RDF::Tabular | |
| 230 222 | 
             
                          if md.describes_file?(base)
         | 
| 231 223 | 
             
                            md
         | 
| 232 224 | 
             
                          else
         | 
| 233 | 
            -
                             | 
| 234 | 
            -
                            if options[:validate] && !options[:warnings]
         | 
| 235 | 
            -
                              $stderr.puts "Warnings: #{warnings.join("\n")}"
         | 
| 236 | 
            -
                            end
         | 
| 225 | 
            +
                            log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
         | 
| 237 226 | 
             
                            nil
         | 
| 238 227 | 
             
                          end
         | 
| 239 228 | 
             
                        end
         | 
| 240 229 | 
             
                      rescue IOError
         | 
| 241 | 
            -
                         | 
| 230 | 
            +
                        log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
         | 
| 242 231 | 
             
                        nil
         | 
| 243 232 | 
             
                      end
         | 
| 244 233 | 
             
                    end
         | 
| @@ -331,7 +320,6 @@ module RDF::Tabular | |
| 331 320 | 
             
                # @return [Metadata]
         | 
| 332 321 | 
             
                def initialize(input, options = {})
         | 
| 333 322 | 
             
                  @options = options.dup
         | 
| 334 | 
            -
                  @options[:depth] ||= 0
         | 
| 335 323 |  | 
| 336 324 | 
             
                  # Parent of this Metadata, if any
         | 
| 337 325 | 
             
                  @parent = @options[:parent]
         | 
| @@ -344,14 +332,14 @@ module RDF::Tabular | |
| 344 332 |  | 
| 345 333 | 
             
                  @context = case input['@context']
         | 
| 346 334 | 
             
                  when Array
         | 
| 347 | 
            -
                     | 
| 335 | 
            +
                    log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
         | 
| 348 336 | 
             
                    c = LOCAL_CONTEXT.dup
         | 
| 349 337 | 
             
                    c.base = RDF::URI(opt_base)
         | 
| 350 338 | 
             
                    obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
         | 
| 351 339 | 
             
                    raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
         | 
| 352 340 | 
             
                    c.parse(obj)
         | 
| 353 341 | 
             
                  when Hash
         | 
| 354 | 
            -
                     | 
| 342 | 
            +
                    log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
         | 
| 355 343 | 
             
                    c = LOCAL_CONTEXT.dup
         | 
| 356 344 | 
             
                    c.base = RDF::URI(opt_base)
         | 
| 357 345 | 
             
                    c.parse(input['@context'])
         | 
| @@ -362,7 +350,7 @@ module RDF::Tabular | |
| 362 350 | 
             
                    c
         | 
| 363 351 | 
             
                  else
         | 
| 364 352 | 
             
                    if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
         | 
| 365 | 
            -
                       | 
| 353 | 
            +
                      log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
         | 
| 366 354 | 
             
                      LOCAL_CONTEXT.dup
         | 
| 367 355 | 
             
                      c = LOCAL_CONTEXT.dup
         | 
| 368 356 | 
             
                      c.base = RDF::URI(opt_base)
         | 
| @@ -375,7 +363,7 @@ module RDF::Tabular | |
| 375 363 | 
             
                  @options[:base] = @context ? @context.base : RDF::URI(opt_base)
         | 
| 376 364 |  | 
| 377 365 | 
             
                  if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
         | 
| 378 | 
            -
                     | 
| 366 | 
            +
                    log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
         | 
| 379 367 | 
             
                    @context.default_language = nil
         | 
| 380 368 | 
             
                  end
         | 
| 381 369 |  | 
| @@ -385,7 +373,7 @@ module RDF::Tabular | |
| 385 373 |  | 
| 386 374 | 
             
                  @object = {}
         | 
| 387 375 |  | 
| 388 | 
            -
                   | 
| 376 | 
            +
                  log_depth do
         | 
| 389 377 | 
             
                    # Input was parsed in .new
         | 
| 390 378 | 
             
                    # Metadata is object with symbolic keys
         | 
| 391 379 | 
             
                    input.each do |key, value|
         | 
| @@ -401,7 +389,7 @@ module RDF::Tabular | |
| 401 389 | 
             
                        object[:@id] = if value.is_a?(String)
         | 
| 402 390 | 
             
                          value
         | 
| 403 391 | 
             
                        else
         | 
| 404 | 
            -
                           | 
| 392 | 
            +
                          log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
         | 
| 405 393 | 
             
                          ""  # Default value
         | 
| 406 394 | 
             
                        end
         | 
| 407 395 | 
             
                        @id = @options[:base].join(object[:@id])
         | 
| @@ -426,9 +414,9 @@ module RDF::Tabular | |
| 426 414 | 
             
                  end
         | 
| 427 415 |  | 
| 428 416 | 
             
                  if reason
         | 
| 429 | 
            -
                     | 
| 430 | 
            -
                     | 
| 431 | 
            -
                     | 
| 417 | 
            +
                    log_debug("md#initialize") {reason}
         | 
| 418 | 
            +
                    log_debug("md#initialize") {"filenames: #{filenames}"}
         | 
| 419 | 
            +
                    log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
         | 
| 432 420 | 
             
                  end
         | 
| 433 421 | 
             
                end
         | 
| 434 422 |  | 
| @@ -487,7 +475,7 @@ module RDF::Tabular | |
| 487 475 | 
             
                  when Schema
         | 
| 488 476 | 
             
                    value
         | 
| 489 477 | 
             
                  else
         | 
| 490 | 
            -
                     | 
| 478 | 
            +
                    log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
         | 
| 491 479 | 
             
                    Schema.new({}, @options.merge(parent: self, context: nil))
         | 
| 492 480 | 
             
                  end
         | 
| 493 481 | 
             
                end
         | 
| @@ -534,7 +522,7 @@ module RDF::Tabular | |
| 534 522 | 
             
                  when Dialect
         | 
| 535 523 | 
             
                    value
         | 
| 536 524 | 
             
                  else
         | 
| 537 | 
            -
                     | 
| 525 | 
            +
                    log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
         | 
| 538 526 | 
             
                    nil
         | 
| 539 527 | 
             
                  end
         | 
| 540 528 | 
             
                end
         | 
| @@ -544,15 +532,15 @@ module RDF::Tabular | |
| 544 532 | 
             
                # @raise [Error] if datatype is not valid
         | 
| 545 533 | 
             
                def datatype=(value)
         | 
| 546 534 | 
             
                  val = case value
         | 
| 547 | 
            -
                  when Hash then Datatype.new(value, parent: self)
         | 
| 548 | 
            -
                  else           Datatype.new({base: value}, parent: self)
         | 
| 535 | 
            +
                  when Hash then Datatype.new(value, @options.merge(parent: self))
         | 
| 536 | 
            +
                  else           Datatype.new({base: value}, @options.merge(parent: self))
         | 
| 549 537 | 
             
                  end
         | 
| 550 538 |  | 
| 551 539 | 
             
                  if val.valid? || value.is_a?(Hash)
         | 
| 552 540 | 
             
                    # Set it if it was specified as an object, which may cause validation errors later
         | 
| 553 541 | 
             
                    object[:datatype] = val
         | 
| 554 542 | 
             
                  else
         | 
| 555 | 
            -
                     | 
| 543 | 
            +
                    log_warn "#{type} has invalid property 'datatype': expected a built-in or an object"
         | 
| 556 544 | 
             
                  end
         | 
| 557 545 | 
             
                end
         | 
| 558 546 |  | 
| @@ -567,40 +555,20 @@ module RDF::Tabular | |
| 567 555 | 
             
                ##
         | 
| 568 556 | 
             
                # Do we have valid metadata?
         | 
| 569 557 | 
             
                def valid?
         | 
| 570 | 
            -
                  validate | 
| 571 | 
            -
                   | 
| 572 | 
            -
                rescue
         | 
| 573 | 
            -
                  false
         | 
| 574 | 
            -
                end
         | 
| 575 | 
            -
             | 
| 576 | 
            -
                ##
         | 
| 577 | 
            -
                # Validation errors
         | 
| 578 | 
            -
                # @return [Array<String>]
         | 
| 579 | 
            -
                def errors
         | 
| 580 | 
            -
                  validate! && []
         | 
| 581 | 
            -
                rescue Error => e
         | 
| 582 | 
            -
                  e.message.split("\n")
         | 
| 558 | 
            +
                  validate # Possibly re-validate
         | 
| 559 | 
            +
                  !log_statistics[:error]
         | 
| 583 560 | 
             
                end
         | 
| 584 561 |  | 
| 585 | 
            -
                 | 
| 586 | 
            -
             | 
| 587 | 
            -
                # @return [Array<String>]
         | 
| 588 | 
            -
                def warnings
         | 
| 589 | 
            -
                  ((@warnings || []) + object.
         | 
| 590 | 
            -
                    values.
         | 
| 591 | 
            -
                    flatten.
         | 
| 592 | 
            -
                    select {|v| v.is_a?(Metadata)}.
         | 
| 593 | 
            -
                    map(&:warnings).
         | 
| 594 | 
            -
                    flatten).compact.uniq
         | 
| 562 | 
            +
                def validate!
         | 
| 563 | 
            +
                  raise Error, "Metadata error" unless valid?
         | 
| 595 564 | 
             
                end
         | 
| 596 565 |  | 
| 597 566 | 
             
                ##
         | 
| 598 567 | 
             
                # Validate metadata, raising an error containing all errors detected during validation
         | 
| 599 568 | 
             
                # @raise [Error] Raise error if metadata has any unexpected properties
         | 
| 600 569 | 
             
                # @return [self]
         | 
| 601 | 
            -
                def validate | 
| 570 | 
            +
                def validate
         | 
| 602 571 | 
             
                  expected_props, required_props = @properties.keys, @required
         | 
| 603 | 
            -
                  errors = []
         | 
| 604 572 |  | 
| 605 573 | 
             
                  unless is_a?(Dialect) || is_a?(Transformation)
         | 
| 606 574 | 
             
                    expected_props = expected_props + INHERITED_PROPERTIES.keys
         | 
| @@ -609,10 +577,10 @@ module RDF::Tabular | |
| 609 577 | 
             
                  # It has only expected properties (exclude metadata)
         | 
| 610 578 | 
             
                  check_keys = object.keys - [:"@id", :"@context"]
         | 
| 611 579 | 
             
                  check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
         | 
| 612 | 
            -
                   | 
| 580 | 
            +
                  log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
         | 
| 613 581 |  | 
| 614 582 | 
             
                  # It has required properties
         | 
| 615 | 
            -
                   | 
| 583 | 
            +
                  log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}"  unless (required_props & check_keys) == required_props
         | 
| 616 584 |  | 
| 617 585 | 
             
                  self.normalize!
         | 
| 618 586 |  | 
| @@ -621,55 +589,49 @@ module RDF::Tabular | |
| 621 589 | 
             
                    value = object[key]
         | 
| 622 590 | 
             
                    case key
         | 
| 623 591 | 
             
                    when :base
         | 
| 624 | 
            -
                       | 
| 592 | 
            +
                      log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
         | 
| 625 593 | 
             
                    when :columns
         | 
| 626 | 
            -
                      value.each do | | 
| 627 | 
            -
                         | 
| 628 | 
            -
             | 
| 629 | 
            -
                        rescue Error => e
         | 
| 630 | 
            -
                          errors << e.message
         | 
| 631 | 
            -
                        end
         | 
| 594 | 
            +
                      value.each do |col|
         | 
| 595 | 
            +
                        col.validate
         | 
| 596 | 
            +
                        log_statistics.merge!(col.log_statistics)
         | 
| 632 597 | 
             
                      end
         | 
| 633 598 | 
             
                      column_names = value.map(&:name)
         | 
| 634 | 
            -
                       | 
| 599 | 
            +
                      log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
         | 
| 635 600 | 
             
                    when :datatype, :dialect, :tables, :tableSchema, :transformations
         | 
| 636 601 | 
             
                      Array(value).each do |t|
         | 
| 637 602 | 
             
                        # Make sure value is of appropriate class
         | 
| 638 603 | 
             
                        if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
         | 
| 639 | 
            -
                           | 
| 640 | 
            -
             | 
| 641 | 
            -
                          rescue Error => e
         | 
| 642 | 
            -
                            errors << e.message
         | 
| 643 | 
            -
                          end
         | 
| 604 | 
            +
                          t.validate
         | 
| 605 | 
            +
                          log_statistics.merge!(t.log_statistics)
         | 
| 644 606 | 
             
                        else
         | 
| 645 | 
            -
                           | 
| 607 | 
            +
                          log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
         | 
| 646 608 | 
             
                        end
         | 
| 647 609 | 
             
                      end
         | 
| 648 | 
            -
                       | 
| 610 | 
            +
                      log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
         | 
| 649 611 | 
             
                    when :foreignKeys
         | 
| 650 612 | 
             
                      # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
         | 
| 651 613 | 
             
                      value.each do |fk|
         | 
| 652 614 | 
             
                        columnReference, reference = fk['columnReference'], fk['reference']
         | 
| 653 | 
            -
                         | 
| 654 | 
            -
                         | 
| 615 | 
            +
                        log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
         | 
| 616 | 
            +
                        log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
         | 
| 655 617 |  | 
| 656 618 | 
             
                        # Verify that columns exist in this schema
         | 
| 657 | 
            -
                         | 
| 619 | 
            +
                        log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
         | 
| 658 620 | 
             
                        Array(columnReference).each do |k|
         | 
| 659 | 
            -
                           | 
| 621 | 
            +
                          log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
         | 
| 660 622 | 
             
                        end
         | 
| 661 623 |  | 
| 662 624 | 
             
                        if reference.is_a?(Hash)
         | 
| 663 | 
            -
                           | 
| 625 | 
            +
                          log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
         | 
| 664 626 | 
             
                          ref_cols = reference['columnReference']
         | 
| 665 627 | 
             
                          schema = if reference.has_key?('resource')
         | 
| 666 628 | 
             
                            if reference.has_key?('schemaReference')
         | 
| 667 | 
            -
                               | 
| 629 | 
            +
                              log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}" 
         | 
| 668 630 | 
             
                            end
         | 
| 669 631 | 
             
                            # resource is the URL of a Table in the TableGroup
         | 
| 670 632 | 
             
                            ref = context.base.join(reference['resource']).to_s
         | 
| 671 | 
            -
                            table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
         | 
| 672 | 
            -
                             | 
| 633 | 
            +
                            table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref}
         | 
| 634 | 
            +
                            log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
         | 
| 673 635 | 
             
                            table.tableSchema if table
         | 
| 674 636 | 
             
                          elsif reference.has_key?('schemaReference')
         | 
| 675 637 | 
             
                            # resource is the @id of a Schema in the TableGroup
         | 
| @@ -677,25 +639,25 @@ module RDF::Tabular | |
| 677 639 | 
             
                            tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
         | 
| 678 640 | 
             
                            case tables.length
         | 
| 679 641 | 
             
                            when 0
         | 
| 680 | 
            -
                               | 
| 642 | 
            +
                              log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
         | 
| 681 643 | 
             
                              nil
         | 
| 682 644 | 
             
                            when 1
         | 
| 683 645 | 
             
                              tables.first.tableSchema
         | 
| 684 646 | 
             
                            else
         | 
| 685 | 
            -
                               | 
| 647 | 
            +
                              log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
         | 
| 686 648 | 
             
                              nil
         | 
| 687 649 | 
             
                            end
         | 
| 688 650 | 
             
                          end
         | 
| 689 651 |  | 
| 690 652 | 
             
                          if schema
         | 
| 691 653 | 
             
                            # ref_cols must exist in schema
         | 
| 692 | 
            -
                             | 
| 654 | 
            +
                            log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
         | 
| 693 655 | 
             
                            Array(ref_cols).each do |k|
         | 
| 694 | 
            -
                               | 
| 656 | 
            +
                              log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
         | 
| 695 657 | 
             
                            end
         | 
| 696 658 | 
             
                          end
         | 
| 697 659 | 
             
                        else
         | 
| 698 | 
            -
                           | 
| 660 | 
            +
                          log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
         | 
| 699 661 | 
             
                        end
         | 
| 700 662 | 
             
                      end
         | 
| 701 663 | 
             
                    when :format
         | 
| @@ -707,7 +669,7 @@ module RDF::Tabular | |
| 707 669 | 
             
                          nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
         | 
| 708 670 | 
             
                          unsignedLong unsignedInt unsignedShort unsignedByte
         | 
| 709 671 | 
             
                        ).include?(self.base)
         | 
| 710 | 
            -
                           | 
| 672 | 
            +
                          log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
         | 
| 711 673 | 
             
                          object.delete(:format) # act as if not set
         | 
| 712 674 | 
             
                        end
         | 
| 713 675 |  | 
| @@ -715,14 +677,14 @@ module RDF::Tabular | |
| 715 677 | 
             
                        begin
         | 
| 716 678 | 
             
                          parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
         | 
| 717 679 | 
             
                        rescue ArgumentError => e
         | 
| 718 | 
            -
                           | 
| 719 | 
            -
                          object[:format].delete("pattern") # act as if not set
         | 
| 680 | 
            +
                          log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
         | 
| 681 | 
            +
                          object[:format].delete("pattern") if object[:format] # act as if not set
         | 
| 720 682 | 
             
                        end
         | 
| 721 683 | 
             
                      else
         | 
| 722 684 | 
             
                        case self.base
         | 
| 723 685 | 
             
                        when 'boolean'
         | 
| 724 686 | 
             
                          unless value.split("|").length == 2
         | 
| 725 | 
            -
                             | 
| 687 | 
            +
                            log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
         | 
| 726 688 | 
             
                            object.delete(:format) # act as if not set
         | 
| 727 689 | 
             
                          end
         | 
| 728 690 | 
             
                        when :decimal, :integer, :long, :int, :short, :byte,
         | 
| @@ -733,7 +695,7 @@ module RDF::Tabular | |
| 733 695 | 
             
                          begin
         | 
| 734 696 | 
             
                            parse_uax35_number(value, nil)
         | 
| 735 697 | 
             
                          rescue ArgumentError => e
         | 
| 736 | 
            -
                             | 
| 698 | 
            +
                            log_warn "#{type} has invalid property '#{key}': #{e.message}"
         | 
| 737 699 | 
             
                            object.delete(:format) # act as if not set
         | 
| 738 700 | 
             
                          end
         | 
| 739 701 | 
             
                        when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
         | 
| @@ -741,7 +703,7 @@ module RDF::Tabular | |
| 741 703 | 
             
                          begin
         | 
| 742 704 | 
             
                            parse_uax35_date(value, nil)
         | 
| 743 705 | 
             
                          rescue ArgumentError => e
         | 
| 744 | 
            -
                             | 
| 706 | 
            +
                            log_warn "#{type} has invalid property '#{key}': #{e.message}"
         | 
| 745 707 | 
             
                            object.delete(:format) # act as if not set
         | 
| 746 708 | 
             
                          end
         | 
| 747 709 | 
             
                        else
         | 
| @@ -749,7 +711,7 @@ module RDF::Tabular | |
| 749 711 | 
             
                          begin
         | 
| 750 712 | 
             
                            Regexp.compile(value)
         | 
| 751 713 | 
             
                          rescue
         | 
| 752 | 
            -
                             | 
| 714 | 
            +
                            log_warn "#{type} has invalid property '#{key}': #{$!.message}"
         | 
| 753 715 | 
             
                            object.delete(:format) # act as if not set
         | 
| 754 716 | 
             
                          end
         | 
| 755 717 | 
             
                        end
         | 
| @@ -760,20 +722,20 @@ module RDF::Tabular | |
| 760 722 | 
             
                      if object[:length]
         | 
| 761 723 | 
             
                        case key
         | 
| 762 724 | 
             
                        when :minLength
         | 
| 763 | 
            -
                           | 
| 725 | 
            +
                          log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
         | 
| 764 726 | 
             
                        when :maxLength
         | 
| 765 | 
            -
                           | 
| 727 | 
            +
                          log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
         | 
| 766 728 | 
             
                        end
         | 
| 767 729 | 
             
                      end
         | 
| 768 730 |  | 
| 769 731 | 
             
                      # Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
         | 
| 770 732 | 
             
                      if key == :maxLength && object[:minLength]
         | 
| 771 | 
            -
                         | 
| 733 | 
            +
                        log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
         | 
| 772 734 | 
             
                      end
         | 
| 773 735 |  | 
| 774 736 | 
             
                      # Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
         | 
| 775 737 | 
             
                      unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
         | 
| 776 | 
            -
                         | 
| 738 | 
            +
                        log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
         | 
| 777 739 | 
             
                      end
         | 
| 778 740 | 
             
                    when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
         | 
| 779 741 | 
             
                      case self.base
         | 
| @@ -781,46 +743,46 @@ module RDF::Tabular | |
| 781 743 | 
             
                           'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
         | 
| 782 744 | 
             
                           'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
         | 
| 783 745 | 
             
                           'duration', 'dayTimeDuration', 'yearMonthDuration'
         | 
| 784 | 
            -
                         | 
| 746 | 
            +
                        log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
         | 
| 785 747 |  | 
| 786 748 | 
             
                        case key
         | 
| 787 749 | 
             
                        when :minInclusive
         | 
| 788 750 | 
             
                          # Applications MUST raise an error if both minInclusive and minExclusive are specified
         | 
| 789 | 
            -
                           | 
| 751 | 
            +
                          log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
         | 
| 790 752 |  | 
| 791 753 | 
             
                          # Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
         | 
| 792 | 
            -
                           | 
| 754 | 
            +
                          log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
         | 
| 793 755 |  | 
| 794 756 | 
             
                          # Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
         | 
| 795 | 
            -
                           | 
| 757 | 
            +
                          log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
         | 
| 796 758 | 
             
                        when :maxInclusive
         | 
| 797 759 | 
             
                          # Applications MUST raise an error if both maxInclusive and maxExclusive are specified
         | 
| 798 | 
            -
                           | 
| 760 | 
            +
                          log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
         | 
| 799 761 | 
             
                        when :minExclusive
         | 
| 800 762 | 
             
                          # Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
         | 
| 801 | 
            -
                           | 
| 763 | 
            +
                          log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
         | 
| 802 764 |  | 
| 803 765 | 
             
                          # Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
         | 
| 804 | 
            -
                           | 
| 766 | 
            +
                          log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
         | 
| 805 767 | 
             
                        end
         | 
| 806 768 | 
             
                      else
         | 
| 807 | 
            -
                         | 
| 769 | 
            +
                        log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
         | 
| 808 770 | 
             
                      end
         | 
| 809 771 | 
             
                    when :notes
         | 
| 810 772 | 
             
                      unless value.is_a?(Hash) || value.is_a?(Array)
         | 
| 811 | 
            -
                         | 
| 773 | 
            +
                        log_error "#{type} has invalid property '#{key}': #{value}, Object or Array"
         | 
| 812 774 | 
             
                      end
         | 
| 813 775 | 
             
                      begin
         | 
| 814 776 | 
             
                        normalize_jsonld(key, value)
         | 
| 815 777 | 
             
                      rescue Error => e
         | 
| 816 | 
            -
                         | 
| 778 | 
            +
                        log_error "#{type} has invalid content '#{key}': #{e.message}"
         | 
| 817 779 | 
             
                      end
         | 
| 818 780 | 
             
                    when :primaryKey, :rowTitles
         | 
| 819 781 | 
             
                      # A column reference property that holds either a single reference to a column description object or an array of references.
         | 
| 820 782 | 
             
                      "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
         | 
| 821 783 | 
             
                      Array(value).each do |k|
         | 
| 822 784 | 
             
                        unless self.columns.any? {|c| c[:name] == k}
         | 
| 823 | 
            -
                           | 
| 785 | 
            +
                          log_warn "#{type} has invalid property '#{key}': column reference not found #{k}"
         | 
| 824 786 | 
             
                          object.delete(key)
         | 
| 825 787 | 
             
                        end
         | 
| 826 788 | 
             
                      end
         | 
| @@ -829,34 +791,33 @@ module RDF::Tabular | |
| 829 791 | 
             
                    when :@id
         | 
| 830 792 | 
             
                      # Must not be a BNode
         | 
| 831 793 | 
             
                      if value.to_s.start_with?("_:")
         | 
| 832 | 
            -
                         | 
| 794 | 
            +
                        log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
         | 
| 833 795 | 
             
                      end
         | 
| 834 796 |  | 
| 835 797 | 
             
                      # Datatype @id MUST NOT be the URL of a built-in type
         | 
| 836 798 | 
             
                      if self.is_a?(Datatype) && DATATYPES.values.include?(value)
         | 
| 837 | 
            -
                         | 
| 799 | 
            +
                        log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
         | 
| 838 800 | 
             
                      end
         | 
| 839 801 | 
             
                    when :@type
         | 
| 840 802 | 
             
                      # Must not be a BNode
         | 
| 841 803 | 
             
                      if value.to_s.start_with?("_:")
         | 
| 842 | 
            -
                         | 
| 804 | 
            +
                        log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
         | 
| 843 805 | 
             
                      end
         | 
| 844 806 | 
             
                      case type
         | 
| 845 807 | 
             
                      when :Transformation
         | 
| 846 | 
            -
                         | 
| 808 | 
            +
                        log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
         | 
| 847 809 | 
             
                      else
         | 
| 848 | 
            -
                         | 
| 810 | 
            +
                        log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
         | 
| 849 811 | 
             
                      end
         | 
| 850 812 | 
             
                    when ->(k) {key.to_s.include?(':')}
         | 
| 851 813 | 
             
                      begin
         | 
| 852 814 | 
             
                        normalize_jsonld(key, value)
         | 
| 853 815 | 
             
                      rescue Error => e
         | 
| 854 | 
            -
                         | 
| 816 | 
            +
                        log_error "#{type} has invalid content '#{key}': #{e.message}"
         | 
| 855 817 | 
             
                      end
         | 
| 856 818 | 
             
                    end
         | 
| 857 819 | 
             
                  end
         | 
| 858 820 |  | 
| 859 | 
            -
                  raise Error, errors.join("\n") unless errors.empty?
         | 
| 860 821 | 
             
                  self
         | 
| 861 822 | 
             
                end
         | 
| 862 823 |  | 
| @@ -885,10 +846,37 @@ module RDF::Tabular | |
| 885 846 | 
             
                # @param [:read] input
         | 
| 886 847 | 
             
                # @yield [Row]
         | 
| 887 848 | 
             
                def each_row(input)
         | 
| 888 | 
            -
                  csv =  | 
| 889 | 
            -
                   | 
| 890 | 
            -
                   | 
| 891 | 
            -
             | 
| 849 | 
            +
                  csv, number, skipped = nil, 0, 0
         | 
| 850 | 
            +
                  path = input.base_uri.path rescue ""
         | 
| 851 | 
            +
                  if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
         | 
| 852 | 
            +
                    # Input is HTML; use fragment identfier to find table.
         | 
| 853 | 
            +
                    fragment = RDF::URI(self.url).fragment rescue nil
         | 
| 854 | 
            +
                    tab = begin
         | 
| 855 | 
            +
                      # Extract with nokogiri
         | 
| 856 | 
            +
                      require 'nokogiri' unless defined?(:Nokogiri)
         | 
| 857 | 
            +
                      doc = Nokogiri::HTML.parse(input)
         | 
| 858 | 
            +
                      doc.search("##{fragment}").first if fragment
         | 
| 859 | 
            +
                    rescue LoadError
         | 
| 860 | 
            +
                      # Extract with REXML
         | 
| 861 | 
            +
                      # FIXME
         | 
| 862 | 
            +
                    end
         | 
| 863 | 
            +
             | 
| 864 | 
            +
                    raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
         | 
| 865 | 
            +
             | 
| 866 | 
            +
                    # Use rows with <td> to create column data
         | 
| 867 | 
            +
                    csv = []
         | 
| 868 | 
            +
                    number = 0
         | 
| 869 | 
            +
                    tab.xpath('.//tr').map do |row|
         | 
| 870 | 
            +
                      number += 1 if row.xpath('th')
         | 
| 871 | 
            +
                      data = row.xpath('td').map(&:content)
         | 
| 872 | 
            +
                      csv << data unless data.empty?
         | 
| 873 | 
            +
                    end
         | 
| 874 | 
            +
                  else
         | 
| 875 | 
            +
                    csv = ::CSV.new(input, csv_options)
         | 
| 876 | 
            +
                    # Skip skipRows and headerRowCount
         | 
| 877 | 
            +
                    skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
         | 
| 878 | 
            +
                    (1..skipped).each {csv.shift}
         | 
| 879 | 
            +
                  end
         | 
| 892 880 | 
             
                  csv.each do |data|
         | 
| 893 881 | 
             
                    # Check for embedded comments
         | 
| 894 882 | 
             
                    if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
         | 
| @@ -934,17 +922,17 @@ module RDF::Tabular | |
| 934 922 | 
             
                      if value['@value']
         | 
| 935 923 | 
             
                        dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
         | 
| 936 924 | 
             
                        lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
         | 
| 937 | 
            -
                        block.call(RDF::Statement | 
| 925 | 
            +
                        block.call(RDF::Statement(subject, property, lit))
         | 
| 938 926 | 
             
                      else
         | 
| 939 927 | 
             
                        # value MUST be a node object, establish a new subject from `@id`
         | 
| 940 928 | 
             
                        s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
         | 
| 941 929 |  | 
| 942 930 | 
             
                        # Generate a triple
         | 
| 943 | 
            -
                        block.call(RDF::Statement | 
| 931 | 
            +
                        block.call(RDF::Statement(subject, property, s2))
         | 
| 944 932 |  | 
| 945 933 | 
             
                        # Generate types
         | 
| 946 934 | 
             
                        Array(value['@type']).each do |t|
         | 
| 947 | 
            -
                          block.call(RDF::Statement | 
| 935 | 
            +
                          block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true)))
         | 
| 948 936 | 
             
                        end
         | 
| 949 937 |  | 
| 950 938 | 
             
                        # Generate triples for all other properties
         | 
| @@ -956,7 +944,7 @@ module RDF::Tabular | |
| 956 944 | 
             
                    else
         | 
| 957 945 | 
             
                      # Value is a primitive JSON value
         | 
| 958 946 | 
             
                      lit = RDF::Literal(value)
         | 
| 959 | 
            -
                      block.call(RDF::Statement | 
| 947 | 
            +
                      block.call(RDF::Statement(subject, property, RDF::Literal(value)))
         | 
| 960 948 | 
             
                    end
         | 
| 961 949 | 
             
                  else
         | 
| 962 950 | 
             
                    case value
         | 
| @@ -1011,7 +999,7 @@ module RDF::Tabular | |
| 1011 999 | 
             
                      if @options[:validate]
         | 
| 1012 1000 | 
             
                        raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
         | 
| 1013 1001 | 
             
                      else
         | 
| 1014 | 
            -
                         | 
| 1002 | 
            +
                        log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
         | 
| 1015 1003 | 
             
                      end
         | 
| 1016 1004 | 
             
                    end
         | 
| 1017 1005 | 
             
                  else
         | 
| @@ -1020,7 +1008,7 @@ module RDF::Tabular | |
| 1020 1008 | 
             
                      if @options[:validate]
         | 
| 1021 1009 | 
             
                        raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
         | 
| 1022 1010 | 
             
                      else
         | 
| 1023 | 
            -
                         | 
| 1011 | 
            +
                        log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
         | 
| 1024 1012 | 
             
                      end
         | 
| 1025 1013 | 
             
                    end
         | 
| 1026 1014 |  | 
| @@ -1033,7 +1021,7 @@ module RDF::Tabular | |
| 1033 1021 | 
             
                      if @options[:validate]
         | 
| 1034 1022 | 
             
                        raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
         | 
| 1035 1023 | 
             
                      else
         | 
| 1036 | 
            -
                         | 
| 1024 | 
            +
                        log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
         | 
| 1037 1025 |  | 
| 1038 1026 | 
             
                        # If present, a virtual column MUST appear after all other non-virtual column definitions
         | 
| 1039 1027 | 
             
                        raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
         | 
| @@ -1048,13 +1036,13 @@ module RDF::Tabular | |
| 1048 1036 | 
             
                    end
         | 
| 1049 1037 | 
             
                    index = 0
         | 
| 1050 1038 | 
             
                    object_columns.all? do |cb|
         | 
| 1051 | 
            -
                      ca = non_virtual_columns[index] || Column.new({})
         | 
| 1039 | 
            +
                      ca = non_virtual_columns[index] || Column.new({}, @options)
         | 
| 1052 1040 | 
             
                      ta = ca.titles || {}
         | 
| 1053 1041 | 
             
                      tb = cb.titles || {}
         | 
| 1054 1042 | 
             
                      if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
         | 
| 1055 1043 | 
             
                        true
         | 
| 1056 1044 | 
             
                      elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
         | 
| 1057 | 
            -
                        raise Error, "Columns don't match | 
| 1045 | 
            +
                        raise Error, "Columns don't match on name: #{ca.name}, #{cb.name}" unless ca.name == cb.name
         | 
| 1058 1046 | 
             
                      elsif @options[:validate] || !ta.empty? && !tb.empty?
         | 
| 1059 1047 | 
             
                        # If validating, column compatibility requires strict match between titles
         | 
| 1060 1048 | 
             
                        titles_match = case
         | 
| @@ -1078,10 +1066,10 @@ module RDF::Tabular | |
| 1078 1066 | 
             
                          true
         | 
| 1079 1067 | 
             
                        elsif !@options[:validate]
         | 
| 1080 1068 | 
             
                          # If not validating, columns don't match, but processing continues
         | 
| 1081 | 
            -
                           | 
| 1069 | 
            +
                          log_warn "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
         | 
| 1082 1070 | 
             
                          true
         | 
| 1083 1071 | 
             
                        else
         | 
| 1084 | 
            -
                          raise Error, "Columns don't match | 
| 1072 | 
            +
                          raise Error, "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
         | 
| 1085 1073 | 
             
                        end
         | 
| 1086 1074 | 
             
                      end
         | 
| 1087 1075 | 
             
                      index += 1
         | 
| @@ -1175,13 +1163,13 @@ module RDF::Tabular | |
| 1175 1163 | 
             
                  when Hash
         | 
| 1176 1164 | 
             
                    if value['@value']
         | 
| 1177 1165 | 
             
                      if !(value.keys.sort - %w(@value @type @language)).empty?
         | 
| 1178 | 
            -
                         | 
| 1166 | 
            +
                        log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
         | 
| 1179 1167 | 
             
                      elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
         | 
| 1180 | 
            -
                         | 
| 1168 | 
            +
                        log_error "Value object may not contain both @type and @language: #{value.to_json}"
         | 
| 1181 1169 | 
             
                      elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
         | 
| 1182 | 
            -
                         | 
| 1170 | 
            +
                        log_error "Value object with @language must use valid language: #{value.to_json}"
         | 
| 1183 1171 | 
             
                      elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
         | 
| 1184 | 
            -
                         | 
| 1172 | 
            +
                        log_error "Value object with @type must defined type: #{value.to_json}"
         | 
| 1185 1173 | 
             
                      end
         | 
| 1186 1174 | 
             
                      value
         | 
| 1187 1175 | 
             
                    else
         | 
| @@ -1190,16 +1178,16 @@ module RDF::Tabular | |
| 1190 1178 | 
             
                        case k
         | 
| 1191 1179 | 
             
                        when "@id"
         | 
| 1192 1180 | 
             
                          nv[k] = context.expand_iri(v, documentRelative: true).to_s
         | 
| 1193 | 
            -
                           | 
| 1181 | 
            +
                          log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
         | 
| 1194 1182 | 
             
                        when "@type"
         | 
| 1195 1183 | 
             
                          Array(v).each do |vv|
         | 
| 1196 1184 | 
             
                            # Validate that all type values transform to absolute IRIs
         | 
| 1197 1185 | 
             
                            resource = context.expand_iri(vv, vocab: true)
         | 
| 1198 | 
            -
                             | 
| 1186 | 
            +
                            log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
         | 
| 1199 1187 | 
             
                          end
         | 
| 1200 1188 | 
             
                          nv[k] = v
         | 
| 1201 1189 | 
             
                        when /^(@|_:)/
         | 
| 1202 | 
            -
                           | 
| 1190 | 
            +
                          log_error "Invalid use of #{k} in JSON-LD content"
         | 
| 1203 1191 | 
             
                        else
         | 
| 1204 1192 | 
             
                          nv[k] = normalize_jsonld(k, v)
         | 
| 1205 1193 | 
             
                        end
         | 
| @@ -1212,15 +1200,9 @@ module RDF::Tabular | |
| 1212 1200 | 
             
                end
         | 
| 1213 1201 | 
             
              protected
         | 
| 1214 1202 |  | 
| 1215 | 
            -
                # Add a warning on this object
         | 
| 1216 | 
            -
                def warn(string)
         | 
| 1217 | 
            -
                  debug("warn: #{string}")
         | 
| 1218 | 
            -
                  (@warnings ||= []) << string
         | 
| 1219 | 
            -
                end
         | 
| 1220 | 
            -
             | 
| 1221 1203 | 
             
                def set_property(key, type, value, invalid)
         | 
| 1222 1204 | 
             
                  if invalid
         | 
| 1223 | 
            -
                     | 
| 1205 | 
            +
                    log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
         | 
| 1224 1206 | 
             
                    case type
         | 
| 1225 1207 | 
             
                    when :link, :uri_template
         | 
| 1226 1208 | 
             
                      object[key] = ""
         | 
| @@ -1264,12 +1246,12 @@ module RDF::Tabular | |
| 1264 1246 | 
             
                      end
         | 
| 1265 1247 | 
             
                    end
         | 
| 1266 1248 | 
             
                  else
         | 
| 1267 | 
            -
                     | 
| 1249 | 
            +
                    log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
         | 
| 1268 1250 | 
             
                    []
         | 
| 1269 1251 | 
             
                  end
         | 
| 1270 1252 |  | 
| 1271 1253 | 
             
                  unless object[key].all? {|v| v.is_a?(klass)}
         | 
| 1272 | 
            -
                     | 
| 1254 | 
            +
                    log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
         | 
| 1273 1255 | 
             
                    # Remove elements that aren't of the right types
         | 
| 1274 1256 | 
             
                    object[key] = object[key].select! {|v| v.is_a?(klass)}
         | 
| 1275 1257 | 
             
                  end
         | 
| @@ -1298,14 +1280,13 @@ module RDF::Tabular | |
| 1298 1280 | 
             
                end
         | 
| 1299 1281 |  | 
| 1300 1282 | 
             
                class DebugContext
         | 
| 1301 | 
            -
                  include  | 
| 1302 | 
            -
             | 
| 1303 | 
            -
             | 
| 1304 | 
            -
             | 
| 1305 | 
            -
                  end
         | 
| 1283 | 
            +
                  include RDF::Util::Logger
         | 
| 1284 | 
            +
                end
         | 
| 1285 | 
            +
                def self.log_debug(*args, &block)
         | 
| 1286 | 
            +
                  DebugContext.new.log_debug(*args, &block)
         | 
| 1306 1287 | 
             
                end
         | 
| 1307 | 
            -
                def self. | 
| 1308 | 
            -
                  DebugContext.new(*args | 
| 1288 | 
            +
                def self.log_warn(*args)
         | 
| 1289 | 
            +
                  DebugContext.new.log_warn(*args)
         | 
| 1309 1290 | 
             
                end
         | 
| 1310 1291 | 
             
              end
         | 
| 1311 1292 |  | 
| @@ -1453,7 +1434,7 @@ module RDF::Tabular | |
| 1453 1434 | 
             
                  content['@context'] = object.delete(:@context) if object[:@context]
         | 
| 1454 1435 | 
             
                  ctx = @context
         | 
| 1455 1436 | 
             
                  remove_instance_variable(:@context) if instance_variables.include?(:@context)
         | 
| 1456 | 
            -
                  tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
         | 
| 1437 | 
            +
                  tg = TableGroup.new(content, @options.merge(context: ctx, filenames: @filenames, base: base))
         | 
| 1457 1438 | 
             
                  @parent = tg  # Link from parent
         | 
| 1458 1439 | 
             
                  tg
         | 
| 1459 1440 | 
             
                end
         | 
| @@ -1464,8 +1445,7 @@ module RDF::Tabular | |
| 1464 1445 | 
             
                    "@id" => (id.to_s if id),
         | 
| 1465 1446 | 
             
                    "@type" => "AnnotatedTable",
         | 
| 1466 1447 | 
             
                    "url" => self.url.to_s,
         | 
| 1467 | 
            -
                    " | 
| 1468 | 
            -
                    "rows" => []
         | 
| 1448 | 
            +
                    "tableSchema" => (tableSchema.to_atd if tableSchema),
         | 
| 1469 1449 | 
             
                  }) do |memo, (k, v)|
         | 
| 1470 1450 | 
             
                    memo[k.to_s] ||= v
         | 
| 1471 1451 | 
             
                    memo
         | 
| @@ -1519,12 +1499,12 @@ module RDF::Tabular | |
| 1519 1499 | 
             
                      end
         | 
| 1520 1500 | 
             
                    end
         | 
| 1521 1501 | 
             
                  else
         | 
| 1522 | 
            -
                     | 
| 1502 | 
            +
                    log_warn "#{type} has invalid property 'columns': expected array of Column"
         | 
| 1523 1503 | 
             
                    []
         | 
| 1524 1504 | 
             
                  end
         | 
| 1525 1505 |  | 
| 1526 1506 | 
             
                  unless object[:columns].all? {|v| v.is_a?(Column)}
         | 
| 1527 | 
            -
                     | 
| 1507 | 
            +
                    log_warn "#{type} has invalid property 'columns': expected array of Column"
         | 
| 1528 1508 | 
             
                    # Remove elements that aren't of the right types
         | 
| 1529 1509 | 
             
                    object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
         | 
| 1530 1510 | 
             
                  end
         | 
| @@ -1534,12 +1514,12 @@ module RDF::Tabular | |
| 1534 1514 | 
             
                  object[:foreignKeys] = case value
         | 
| 1535 1515 | 
             
                  when Array then value
         | 
| 1536 1516 | 
             
                  else
         | 
| 1537 | 
            -
                     | 
| 1517 | 
            +
                    log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
         | 
| 1538 1518 | 
             
                    []
         | 
| 1539 1519 | 
             
                  end
         | 
| 1540 1520 |  | 
| 1541 1521 | 
             
                  unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
         | 
| 1542 | 
            -
                     | 
| 1522 | 
            +
                    log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
         | 
| 1543 1523 | 
             
                    # Remove elements that aren't of the right types
         | 
| 1544 1524 | 
             
                    object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
         | 
| 1545 1525 | 
             
                  end
         | 
| @@ -1562,6 +1542,18 @@ module RDF::Tabular | |
| 1562 1542 | 
             
                    end
         | 
| 1563 1543 | 
             
                  end
         | 
| 1564 1544 | 
             
                end
         | 
| 1545 | 
            +
             | 
| 1546 | 
            +
                # Return Annotated Table representation
         | 
| 1547 | 
            +
                def to_atd
         | 
| 1548 | 
            +
                  object.inject({
         | 
| 1549 | 
            +
                    "@id" => (id.to_s if id),
         | 
| 1550 | 
            +
                    "@type" => "Schema",
         | 
| 1551 | 
            +
                    "columns" => Array(columns).map(&:to_atd),
         | 
| 1552 | 
            +
                  }) do |memo, (k, v)|
         | 
| 1553 | 
            +
                    memo[k.to_s] ||= v
         | 
| 1554 | 
            +
                    memo
         | 
| 1555 | 
            +
                  end.delete_if {|k,v| v.nil? || v.is_a?(Metadata)}
         | 
| 1556 | 
            +
                end
         | 
| 1565 1557 | 
             
              end
         | 
| 1566 1558 |  | 
| 1567 1559 | 
             
              class Column < Metadata
         | 
| @@ -1650,7 +1642,6 @@ module RDF::Tabular | |
| 1650 1642 | 
             
                    "table" => (table.id.to_s if table.id),
         | 
| 1651 1643 | 
             
                    "number" => self.number,
         | 
| 1652 1644 | 
             
                    "sourceNumber" => self.sourceNumber,
         | 
| 1653 | 
            -
                    "cells" => [],
         | 
| 1654 1645 | 
             
                    "virtual" => self.virtual,
         | 
| 1655 1646 | 
             
                    "name" => self.name,
         | 
| 1656 1647 | 
             
                    "titles" => self.titles
         | 
| @@ -1701,7 +1692,7 @@ module RDF::Tabular | |
| 1701 1692 | 
             
              class Dialect < Metadata
         | 
| 1702 1693 | 
             
                # Defaults for dialects
         | 
| 1703 1694 | 
             
                DEFAULTS = {
         | 
| 1704 | 
            -
                  commentPrefix:       | 
| 1695 | 
            +
                  commentPrefix:      false,
         | 
| 1705 1696 | 
             
                  delimiter:          ",".freeze,
         | 
| 1706 1697 | 
             
                  doubleQuote:        true,
         | 
| 1707 1698 | 
             
                  encoding:           "utf-8".freeze,
         | 
| @@ -1814,38 +1805,75 @@ module RDF::Tabular | |
| 1814 1805 | 
             
                  lang ||= 'und'
         | 
| 1815 1806 |  | 
| 1816 1807 | 
             
                  # Set encoding on input
         | 
| 1817 | 
            -
                   | 
| 1818 | 
            -
                  ( | 
| 1819 | 
            -
                     | 
| 1820 | 
            -
                     | 
| 1821 | 
            -
                     | 
| 1822 | 
            -
             | 
| 1823 | 
            -
             | 
| 1824 | 
            -
             | 
| 1825 | 
            -
             | 
| 1826 | 
            -
             | 
| 1827 | 
            -
             | 
| 1828 | 
            -
             | 
| 1829 | 
            -
             | 
| 1830 | 
            -
                    row_data = Array(csv.shift)
         | 
| 1831 | 
            -
                    Array(row_data).each_with_index do |value, index|
         | 
| 1832 | 
            -
                      # Skip columns
         | 
| 1833 | 
            -
                      skipCols = skipColumns.to_i
         | 
| 1834 | 
            -
                      next if index < skipCols
         | 
| 1808 | 
            +
                  path = input.base_uri.path rescue ""
         | 
| 1809 | 
            +
                  if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
         | 
| 1810 | 
            +
                    # Input is HTML; use fragment identfier to find table.
         | 
| 1811 | 
            +
                    fragment = RDF::URI(table["url"]).fragment rescue nil
         | 
| 1812 | 
            +
                    tab = begin
         | 
| 1813 | 
            +
                      # Extract with nokogiri
         | 
| 1814 | 
            +
                      require 'nokogiri' unless defined?(:Nokogiri)
         | 
| 1815 | 
            +
                      doc = Nokogiri::HTML.parse(input)
         | 
| 1816 | 
            +
                      doc.search("##{fragment}").first if fragment
         | 
| 1817 | 
            +
                    rescue LoadError
         | 
| 1818 | 
            +
                      # Extract with REXML
         | 
| 1819 | 
            +
                      # FIXME
         | 
| 1820 | 
            +
                    end
         | 
| 1835 1821 |  | 
| 1822 | 
            +
                    raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
         | 
| 1823 | 
            +
             | 
| 1824 | 
            +
                    # Use rows with <th> to create column titles
         | 
| 1825 | 
            +
                    tab.xpath('.//tr').each do |row|
         | 
| 1826 | 
            +
                      row.xpath('th').map(&:content).each_with_index do |value, index|
         | 
| 1827 | 
            +
                        # Skip columns
         | 
| 1828 | 
            +
                        skipCols = skipColumns.to_i
         | 
| 1829 | 
            +
                        next if index < skipCols
         | 
| 1830 | 
            +
             | 
| 1831 | 
            +
                        # Trim value
         | 
| 1832 | 
            +
                        value.lstrip! if %w(true start).include?(trim.to_s)
         | 
| 1833 | 
            +
                        value.rstrip! if %w(true end).include?(trim.to_s)
         | 
| 1834 | 
            +
             | 
| 1835 | 
            +
                        # Initialize titles
         | 
| 1836 | 
            +
                        columns = table["tableSchema"]["columns"] ||= []
         | 
| 1837 | 
            +
                        column = columns[index - skipCols] ||= {
         | 
| 1838 | 
            +
                          "titles" => {lang => []},
         | 
| 1839 | 
            +
                        }
         | 
| 1840 | 
            +
                        column["titles"][lang] << value
         | 
| 1841 | 
            +
                      end
         | 
| 1842 | 
            +
                    end
         | 
| 1843 | 
            +
                  else
         | 
| 1844 | 
            +
                    csv = ::CSV.new(input, csv_options)
         | 
| 1845 | 
            +
                    (1..skipRows.to_i).each do
         | 
| 1846 | 
            +
                      value = csv.shift.join(delimiter)  # Skip initial lines, these form comment annotations
         | 
| 1836 1847 | 
             
                      # Trim value
         | 
| 1837 1848 | 
             
                      value.lstrip! if %w(true start).include?(trim.to_s)
         | 
| 1838 1849 | 
             
                      value.rstrip! if %w(true end).include?(trim.to_s)
         | 
| 1839 1850 |  | 
| 1840 | 
            -
                       | 
| 1841 | 
            -
                       | 
| 1842 | 
            -
             | 
| 1843 | 
            -
             | 
| 1844 | 
            -
             | 
| 1845 | 
            -
             | 
| 1851 | 
            +
                      value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
         | 
| 1852 | 
            +
                      (metadata["rdfs:comment"] ||= []) << value unless value.empty?
         | 
| 1853 | 
            +
                    end
         | 
| 1854 | 
            +
                    log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
         | 
| 1855 | 
            +
             | 
| 1856 | 
            +
                    (1..headerRowCount).each do
         | 
| 1857 | 
            +
                      row_data = Array(csv.shift)
         | 
| 1858 | 
            +
                      Array(row_data).each_with_index do |value, index|
         | 
| 1859 | 
            +
                        # Skip columns
         | 
| 1860 | 
            +
                        skipCols = skipColumns.to_i
         | 
| 1861 | 
            +
                        next if index < skipCols
         | 
| 1862 | 
            +
             | 
| 1863 | 
            +
                        # Trim value
         | 
| 1864 | 
            +
                        value.lstrip! if %w(true start).include?(trim.to_s)
         | 
| 1865 | 
            +
                        value.rstrip! if %w(true end).include?(trim.to_s)
         | 
| 1866 | 
            +
             | 
| 1867 | 
            +
                        # Initialize titles
         | 
| 1868 | 
            +
                        columns = table["tableSchema"]["columns"] ||= []
         | 
| 1869 | 
            +
                        column = columns[index - skipCols] ||= {
         | 
| 1870 | 
            +
                          "titles" => {lang => []},
         | 
| 1871 | 
            +
                        }
         | 
| 1872 | 
            +
                        column["titles"][lang] << value
         | 
| 1873 | 
            +
                      end
         | 
| 1846 1874 | 
             
                    end
         | 
| 1847 1875 | 
             
                  end
         | 
| 1848 | 
            -
                   | 
| 1876 | 
            +
                  log_debug("embedded_metadata") {"table: #{table.inspect}"}
         | 
| 1849 1877 | 
             
                  input.rewind if input.respond_to?(:rewind)
         | 
| 1850 1878 |  | 
| 1851 1879 | 
             
                  Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
         | 
| @@ -2030,13 +2058,13 @@ module RDF::Tabular | |
| 2030 2058 |  | 
| 2031 2059 | 
             
                    # create column if necessary
         | 
| 2032 2060 | 
             
                    columns[index - skipColumns] ||=
         | 
| 2033 | 
            -
                      Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
         | 
| 2061 | 
            +
                      Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
         | 
| 2034 2062 |  | 
| 2035 2063 | 
             
                    column = columns[index - skipColumns]
         | 
| 2036 2064 |  | 
| 2037 2065 | 
             
                    @values << cell = Cell.new(metadata, column, self, value)
         | 
| 2038 2066 |  | 
| 2039 | 
            -
                    datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
         | 
| 2067 | 
            +
                    datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
         | 
| 2040 2068 | 
             
                    value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
         | 
| 2041 2069 | 
             
                    value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
         | 
| 2042 2070 | 
             
                    # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
         | 
| @@ -2092,7 +2120,11 @@ module RDF::Tabular | |
| 2092 2120 |  | 
| 2093 2121 | 
             
                # Identifier for this row, as an RFC7111 fragment 
         | 
| 2094 2122 | 
             
                # @return [RDF::URI]
         | 
| 2095 | 
            -
                def id; | 
| 2123 | 
            +
                def id;
         | 
| 2124 | 
            +
                  u = table.url.dup
         | 
| 2125 | 
            +
                  u.fragment = "row=#{self.sourceNumber}"
         | 
| 2126 | 
            +
                  u
         | 
| 2127 | 
            +
                end
         | 
| 2096 2128 |  | 
| 2097 2129 | 
             
                # Return Annotated Row representation
         | 
| 2098 2130 | 
             
                def to_atd
         |