RubyGems - rdf-microdata - Versions diffs - 2.2.1 → 2.2.2 - Mend

rdf-microdata 2.2.1 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/README.md +15 -2
data/VERSION +1 -1
data/lib/rdf/microdata.rb +9 -5
data/lib/rdf/microdata/format.rb +112 -0
data/lib/rdf/microdata/jsonld_reader.rb +251 -0
data/lib/rdf/microdata/rdfa_reader.rb +132 -0
data/lib/rdf/microdata/reader.rb +75 -154
data/lib/rdf/microdata/reader/nokogiri.rb +6 -0
data/lib/rdf/microdata/registry.rb +109 -0
metadata +33 -10

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 7e58ac0a63c3b8878c3b4de2b3f6297244043ff5
-  data.tar.gz: 7f99b16d0c372a1ef0aab0629b9444f4f3c9a26d
+  metadata.gz: 6d40780aa7dd1ba5bac58af54ee1ed4a3f7d2905
+  data.tar.gz: aa2ee1835bad718bef436d97ea55967a1dafb86c
 SHA512:
-  metadata.gz: 8bfea227f774d19a94f1a4bd50e21fa232f023a47b1b89fd3dae53a3321ecaa2d14c99693be34b9976ab4544ee8256ed43c645fe65bbe9d0a94c395f8feca85a
-  data.tar.gz: e50ebb4ef9e0bf50fc2a13d96c7f6ac6ca6d08273dfb21b132d4e79850de8a730b7db8f7c90c4aafd532a2d448ec86a0133637547d62d48969b3d2686a701574
+  metadata.gz: 2272c36c3a46c21584ef9f11ef43e7a3c4d54e4ede2d5f6cf7da3022fb1981a3e27fa6abca99ef753570e107703d598ddd6f498656a53f3d650fc878df5a0d39
+  data.tar.gz: a7eadfe033b7f74fa2c7705f7db0af5b67f6c8d95619464420708f6eb873d0c72b71a8c17fd7813dc1e838e17be91bae9aed6c90a97bd318ab82f789bbba926b

data/README.md CHANGED

@@ -60,11 +60,24 @@ Full documentation available on [Rubydoc.info][Microdata doc]
 * {RDF::Microdata::Reader}
   * {RDF::Microdata::Reader::Nokogiri}
-### Additional vocabularies
+### RDFa-based Reader
+There is an experimental reader based on transforming Microdata to RDFa within the DOM. To invoke
+this, add the `rdfa: true` option to the {RDF::Microdata::Reader.new}, or
+use {RDF::Microdata::RdfaReader} directly.
+The reader exposes a `#rdfa` method, which can be used to retrieve the transformed HTML+RDFa
+### JSON-lD-based Reader
+There is an experimental reader based on transforming Microdata to JSON-LD. To invoke
+this, add the `jsonld: true` option to the {RDF::Microdata::Reader.new}, or
+use {RDF::Microdata::JsonLdReader} directly.
+The reader exposes a `#json` method, which can be used to retrieve the generated JSON-LD
 ## Resources
 * [RDF.rb][RDF.rb]
-* [Documentation](http://rdf.rubyforge.org/microdata)
+* [Documentation](http://www.rubydoc.info/github/ruby-rdf/rdf-microdata/)
 * [History](file:History.md)
 * [Microdata][]
 * [Microdata RDF][]

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 2.2.1
1	+ 2.2.2

data/lib/rdf/microdata.rb CHANGED

@@ -15,18 +15,22 @@ module RDF
   #     end
   #   end
   #
-  # @see http://rdf.rubyforge.org/
+  # @see http://www.rubydoc.info/github/ruby-rdf/rdf/
   # @see http://www.w3.org/TR/2011/WD-microdata-20110525/
   #
   # @author [Gregg Kellogg](http://greggkellogg.net/)
   module Microdata
     USES_VOCAB = RDF::URI("http://www.w3.org/ns/rdfa#usesVocabulary")
+    DEFAULT_REGISTRY = File.expand_path("../../../etc/registry.json", __FILE__)
     require 'rdf/microdata/format'
     require 'rdf/microdata/vocab'
-    autoload :Expansion,  'rdf/microdata/expansion'
-    autoload :Profile,    'rdf/microdata/profile'
-    autoload :Reader,     'rdf/microdata/reader'
-    autoload :VERSION,    'rdf/microdata/version'
+    autoload :Expansion,    'rdf/microdata/expansion'
+    autoload :JsonLdReader, 'rdf/microdata/jsonld_reader'
+    autoload :Profile,      'rdf/microdata/profile'
+    autoload :RdfaReader,   'rdf/microdata/rdfa_reader'
+    autoload :Reader,       'rdf/microdata/reader'
+    autoload :Registry,     'rdf/microdata/registry'
+    autoload :VERSION,      'rdf/microdata/version'
   end
 end

data/lib/rdf/microdata/format.rb CHANGED

@@ -41,5 +41,117 @@ module RDF::Microdata
     def self.detect(sample)
       !!sample.match(/<[^>]*(itemprop|itemtype|itemref|itemscope|itemid)[^>]*>/m)
     end
+    ##
+    # Hash of CLI commands appropriate for this format
+    # @return [Hash{Symbol => Hash}]
+    def self.cli_commands
+      {
+        "to-rdfa": {
+          description: "Transform HTML+Microdata into HTML+RDFa",
+          parse: false,
+          help: "to-rdfa files ...\nTransform HTML+Microdata into HTML+RDFa",
+          filter: {
+            format: :microdata
+          },
+          option_use: {output_format: :disabled},
+          lambda: ->(files, options) do
+            out = options[:output] || $stdout
+            xsl = Nokogiri::XSLT(%(<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+              <xsl:param name="indent-increment" select="'  '"/>
+              <xsl:output method="html" doctype-system="about:legacy-compat"/>
+              <xsl:template name="newline">
+                <xsl:text disable-output-escaping="yes">
+            </xsl:text>
+              </xsl:template>
+              <xsl:template match="comment() | processing-instruction()">
+                <xsl:param name="indent" select="''"/>
+                <xsl:call-template name="newline"/>
+                <xsl:value-of select="$indent"/>
+                <xsl:copy />
+              </xsl:template>
+              <xsl:template match="text()">
+                <xsl:param name="indent" select="''"/>
+                <xsl:call-template name="newline"/>
+                <xsl:value-of select="$indent"/>
+                <xsl:value-of select="normalize-space(.)"/>
+              </xsl:template>
+              <xsl:template match="text()[normalize-space(.)='']"/>
+              <xsl:template match="*">
+                <xsl:param name="indent" select="''"/>
+                <xsl:call-template name="newline"/>
+                <xsl:value-of select="$indent"/>
+                  <xsl:choose>
+                   <xsl:when test="count(child::*) > 0">
+                    <xsl:copy>
+                     <xsl:copy-of select="@*"/>
+                     <xsl:apply-templates select="*|text()">
+                       <xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
+                     </xsl:apply-templates>
+                     <xsl:call-template name="newline"/>
+                     <xsl:value-of select="$indent"/>
+                    </xsl:copy>
+                   </xsl:when>
+                   <xsl:otherwise>
+                    <xsl:copy-of select="."/>
+                   </xsl:otherwise>
+                 </xsl:choose>
+              </xsl:template>
+            </xsl:stylesheet>).gsub(/^            /, ''))
+            if files.empty?
+              # If files are empty, either use options[::evaluate]
+              input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
+              input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
+              RDF::Microdata::Reader.new(input, options.merge(rdfa: true)) do |reader|
+                reader.rdfa.xpath("//text()").each do |txt|
+                  txt.content = txt.content.to_s.strip
+                end
+                out.puts xsl.apply_to(reader.rdfa).to_s
+              end
+            else
+              files.each do |file|
+                RDF::Microdata::Reader.open(file, options.merge(rdfa: true)) do |reader|
+                  reader.rdfa.xpath("//text()").each do |txt|
+                    txt.content = txt.content.to_s.strip
+                  end
+                  out.puts xsl.apply_to(reader.rdfa).to_s
+                end
+              end
+            end
+          end
+        },
+        "to-jsonld": {
+          description: "Transform HTML+Microdata into JSON-LD",
+          parse: false,
+          help: "to-jsonld files ...\nTransform HTML+Microdata into JSON-LD",
+          filter: {
+            format: :microdata
+          },
+          option_use: {output_format: :disabled},
+          lambda: ->(files, options) do
+            out = options[:output] || $stdout
+            if files.empty?
+              # If files are empty, either use options[::evaluate]
+              input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
+              input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
+              RDF::Microdata::Reader.new(input, options.merge(jsonld: true)) do |reader|
+                out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
+              end
+            else
+              files.each do |file|
+                RDF::Microdata::Reader.open(file, options.merge(jsonld: true)) do |reader|
+                  out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
+                end
+              end
+            end
+          end
+        },
+      }
+    end
   end
 end

data/lib/rdf/microdata/jsonld_reader.rb ADDED

@@ -0,0 +1,251 @@
+require 'json/ld'
+require 'nokogumbo'
+module RDF::Microdata
+  ##
+  # Update DOM to turn Microdata into JSON-LD and parse using the JSON-LD Reader
+  class JsonLdReader < JSON::LD::Reader
+    # The resulting JSON-LD
+    # @return [Hash]
+    attr_reader :jsonld
+    def self.format(klass = nil)
+      if klass.nil?
+        RDF::Microdata::Format
+      else
+        super
+      end
+    end
+    ##
+    # Initializes the JsonLdReader instance.
+    #
+    # @param  [IO, File, String] input
+    #   the input stream to read
+    # @param  [Hash{Symbol => Object}] options
+    #   any additional options (see `RDF::Reader#initialize`)
+    # @return [reader]
+    # @yield  [reader] `self`
+    # @yieldparam  [RDF::Reader] reader
+    # @yieldreturn [void] ignored
+    # @raise [RDF::ReaderError] if _validate_
+    def initialize(input = $stdin, options = {}, &block)
+      @options = options
+      log_debug('', "using JSON-LD transformation reader")
+      input = case input
+      when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
+      else
+        # Try to detect charset from input
+        options[:encoding] ||= input.charset if input.respond_to?(:charset)
+        # Otherwise, default is utf-8
+        options[:encoding] ||= 'utf-8'
+        options[:encoding] = options[:encoding].to_s if options[:encoding]
+        input = input.read if input.respond_to?(:read)
+        ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
+      end
+      # Load registry
+      begin
+        registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
+        log_debug('', "registry = #{registry_uri.inspect}")
+        Registry.load_registry(registry_uri)
+      rescue JSON::ParserError => e
+        log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
+      end
+      @jsonld = {'@graph' => []}
+      # Start with all top-level items
+      input.css("[itemscope]").each do |item|
+        next if item['itemprop']  # Only top-level items
+        jsonld['@graph'] << get_object(item)
+      end
+      log_debug('', "Transformed document: #{jsonld.to_json(JSON::LD::JSON_STATE)}")
+      # Rely on RDFa reader
+      super(jsonld.to_json, options, &block)
+    end
+    private
+    # Return JSON-LD representation of an item
+    # @param [Nokogiri::XML::Element] item
+    # @param [Hash{Nokogiri::XML::Node => Hash}]
+    # @return [Hash]
+    def get_object(item, memory = {})
+      if result = memory[item]
+        # Result is a reference to that item; assign a blank-node identifier if necessary
+        result['@id'] ||= alloc_bnode
+        return result
+      end
+      result = {}
+      memory[item] = result
+      # If the item has a global identifier, add an entry to result called "@id" whose value is the global identifier of item.
+      result['@id'] = item['itemid'].to_s if item['itemid']
+      # If the item has any item types, add an entry to result called "@type" whose value is an array listing the item types of item, in the order they were specified on the itemtype attribute.
+      if item['itemtype']
+        # Only absolute URLs
+        types = item.attribute('itemtype').
+          remove.
+          to_s.
+          split(/\s+/).
+          select {|t| RDF::URI(t).absolute?}
+        if vocab = types.first
+          vocab = Registry.find(vocab) || begin
+            type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil?
+            Registry.new(type_vocab) if type_vocab
+          end
+          (result['@context'] = {})['@vocab'] = vocab.uri.to_s if vocab
+          result['@type'] = types unless types.empty?
+        end
+      end
+      # For each element element that has one or more property names and is one of the properties of the item item, in the order those elements are given by the algorithm that returns the properties of an item, run the following substeps
+      item_properties(item).each do |element|
+        value = if element['itemscope']
+          get_object(element, memory)
+        else
+          property_value(element)
+        end
+        element['itemprop'].to_s.split(/\s+/).each do |prop|
+          result[prop] ||= [] << value
+        end
+      end
+      result
+    end
+    ##
+    #
+    # @param [Nokogiri::XML::Element] item
+    # @return [Array<Nokogiri::XML::Element>]
+    #   List of property elements for an item
+    def item_properties(item)
+      results, memory, pending = [], [item], item.children.select(&:element?)
+      log_debug(item, "item_properties")
+      # If root has an itemref attribute, split the value of that itemref attribute on spaces. For each resulting token ID, if there is an element in the document whose ID is ID, then add the first such element to pending.
+      item['itemref'].to_s.split(/\s+/).each do |ref|
+        if referenced = referenced = item.at_css("##{ref}")
+          pending << referenced
+        end
+      end
+      while !pending.empty?
+        current = pending.shift
+        # Error
+        break if memory.include?(current)
+        memory << current
+        # If current does not have an itemscope attribute, then: add all the child elements of current to pending.
+        pending += current.children.select(&:element?) unless current['itemscope']
+        # If current has an itemprop attribute specified and has one or more property names, then add current to results.
+        results << current unless current['itemprop'].to_s.split(/\s+/).empty?
+      end
+      results
+    end
+    ##
+    #
+    def property_value(element)
+      base = element.base || base_uri
+      log_debug(element) {"property_value(#{element.name}): base #{base.inspect}"}
+      value = case
+      when element.has_attribute?('itemscope')
+        {}
+      when element.has_attribute?('content')
+        if element.language
+          {"@value" => element['content'].to_s.strip, language: element.language}
+        else
+          element['content'].to_s.strip
+        end
+      when %w(data meter).include?(element.name) && element.attribute('value')
+        # XXX parse as number?
+        {"@value" => element['value'].to_s.strip}
+      when %w(audio embed iframe img source track video).include?(element.name)
+        {"@id" => uri(element.attribute('src'), base).to_s}
+      when %w(a area link).include?(element.name)
+        {"@id" => uri(element.attribute('href'), base).to_s}
+      when %w(object).include?(element.name)
+        {"@id" => uri(element.attribute('data'), base).to_s}
+      when %w(time).include?(element.name)
+        # use datatype?
+        (element.attribute('datetime') || element.text).to_s.strip
+      else
+        if element.language
+          {"@value" => element.inner_text.to_s.strip, language: element.language}
+        else
+          element.inner_text.to_s.strip
+        end
+      end
+      log_debug(element) {"  #{value.inspect}"}
+      value
+    end
+    # Allocate a new blank node identifier
+    # @return [String]
+    def alloc_bnode
+      @bnode_base ||= "_:a"
+      res = @bnode_base
+      @bnode_base = res.succ
+      res
+    end
+    # Fixme, what about xml:base relative to element?
+    def uri(value, base = nil)
+      value = if base
+        base = uri(base) unless base.is_a?(RDF::URI)
+        base.join(value.to_s)
+      else
+        RDF::URI(value.to_s)
+      end
+      value.validate! if validate?
+      value.canonicalize! if canonicalize?
+      value = RDF::URI.intern(value) if intern?
+      value
+    end
+  end
+end
+# Monkey Patch Nokogiri
+module Nokogiri::XML
+  class Element
+    ##
+    # Get any xml:base in effect for this element
+    def base
+      if @base.nil?
+        @base = attributes['xml:base'] ||
+        (parent && parent.element? && parent.base) ||
+        false
+      end
+      @base == false ? nil : @base
+    end
+    ##
+    # Get any xml:lang or lang in effect for this element
+    def language
+      if @language.nil?
+        language = case
+        when self["xml:lang"]
+          self["xml:lang"].to_s
+        when self["lang"]
+          self["lang"].to_s
+        else
+          parent && parent.element? && parent.language
+        end
+      end
+      @language == false ? nil : @language
+    end
+  end
+end

data/lib/rdf/microdata/rdfa_reader.rb ADDED

@@ -0,0 +1,132 @@
+require 'rdf/rdfa'
+require 'nokogumbo'
+module RDF::Microdata
+  ##
+  # Update DOM to turn Microdata into RDFa and parse using the RDFa Reader
+  class RdfaReader < RDF::RDFa::Reader
+    # The transformed DOM using RDFa
+    # @return [RDF::HTML::Document]
+    attr_reader :rdfa
+    def self.format(klass = nil)
+      if klass.nil?
+        RDF::Microdata::Format
+      else
+        super
+      end
+    end
+    ##
+    # Initializes the RdfaReader instance.
+    #
+    # @param  [IO, File, String] input
+    #   the input stream to read
+    # @param  [Hash{Symbol => Object}] options
+    #   any additional options (see `RDF::Reader#initialize`)
+    # @return [reader]
+    # @yield  [reader] `self`
+    # @yieldparam  [RDF::Reader] reader
+    # @yieldreturn [void] ignored
+    # @raise [RDF::ReaderError] if _validate_
+    def initialize(input = $stdin, options = {}, &block)
+      @options = options
+      log_debug('', "using RDFa transformation reader")
+      input = case input
+      when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
+      else
+        # Try to detect charset from input
+        options[:encoding] ||= input.charset if input.respond_to?(:charset)
+        # Otherwise, default is utf-8
+        options[:encoding] ||= 'utf-8'
+        options[:encoding] = options[:encoding].to_s if options[:encoding]
+        input = input.read if input.respond_to?(:read)
+        ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
+      end
+      # Load registry
+      begin
+        registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
+        log_debug('', "registry = #{registry_uri.inspect}")
+        Registry.load_registry(registry_uri)
+      rescue JSON::ParserError => e
+        log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
+      end
+      # For all members having @itemscope
+      input.css("[itemscope]").each do |item|
+        # Get @itemtypes to create @type and @vocab
+        item.attribute('itemscope').remove
+        if item['itemtype']
+          # Only absolute URLs
+          types = item.attribute('itemtype').
+            remove.
+            to_s.
+            split(/\s+/).
+            select {|t| RDF::URI(t).absolute?}
+          item['typeof'] = types.join(' ') unless types.empty?
+          if vocab = types.first
+            vocab = Registry.find(vocab) || begin
+              type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil?
+              Registry.new(type_vocab) if type_vocab
+            end
+            item['vocab'] = vocab.uri.to_s if vocab
+          end
+        end
+        # Change each itemid attribute to an resource attribute with the same value
+        if item['itemid']
+          id = item.attribute('itemid').remove
+          item[item['itemprop'] ? 'resource' : 'about'] = id
+        else
+          # Otherwise, ensure that @typeof has at least an empty value
+          item['typeof'] ||= ''
+        end
+      end
+      # Add @resource for all itemprop values of object based on a @data value
+      input.css("object[itemprop][data]").each do |item|
+        item['resource'] ||= item['data']
+      end
+      # Replace all @itemprop values with @property
+      input.css("[itemprop]").each {|item| item['property'] = item.attribute('itemprop').remove}
+      # Wrap all @itemref properties
+      input.css("[itemref]").each do |item|
+        item_vocab = item['vocab'] || item.ancestors.detect {|a| a.attribute('vocab')}
+        item_vocab = item_vocab.to_s if item_vocab
+        item.attribute('itemref').remove.to_s.split(/\s+/).each do |ref|
+          if referenced = input.css("##{ref}")
+            # Add @vocab to referenced using the closest ansestor having @vocab of item.
+            # If the element with id reference has no resource attribute, add a resource attribute whose value is a NUMBER SIGN U+0023 followed by reference to the element.
+            # If the element with id reference has no typeof attribute, add a typeof="rdfa:Pattern" attribute to the element.
+            referenced.wrap(%(<div vocab="#{item_vocab}" resource="##{ref}" typeof="rdfa:Pattern"))
+            # Add a link child element to the element that represents the item, with a rel="rdfa:copy" attribute and an href attribute whose value is a NUMBER SIGN U+0023 followed by reference
+            link = ::Nokogiri::XML::Node.new('link', input)
+            link['rel'] = 'rdfa:copy'
+            link['href'] = "##{ref}"
+            item << link
+          end
+        end
+      end
+      @rdfa = input
+      log_debug('', "Transformed document: #{input.to_html}")
+      options = options.merge(
+        library: :nokogiri,
+        reference_folding: true,
+        host_language: :html5,
+        version: :"rdfa1.1")
+      # Rely on RDFa reader
+      super(input, options, &block)
+    end
+  end
+end

data/lib/rdf/microdata/reader.rb CHANGED

@@ -15,15 +15,16 @@ module RDF::Microdata
     include Expansion
     include RDF::Util::Logger
     URL_PROPERTY_ELEMENTS = %w(a area audio embed iframe img link object source track video)
-    DEFAULT_REGISTRY = File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "..", "etc", "registry.json"))
     # @private
     class CrawlFailure < StandardError; end
-    # @!attribute [r] implementation
     # @return [Module] Returns the HTML implementation module for this reader instance.
     attr_reader :implementation
+    # @return [Hash{Object => RDF::Resource}] maps RDF elements (items) to resources
+    attr_reader :memory
     ##
     # Returns the base URI determined by this reader.
     #
@@ -36,109 +37,46 @@ module RDF::Microdata
       @options[:base_uri]
     end
-    # Interface to registry
-    class Registry
-      # @return [RDF::URI] Prefix of vocabulary
-      attr_reader :uri
-      # @return [Hash] properties
-      attr_reader :properties
-      ##
-      # Initialize the registry from a URI or file path
-      #
-      # @param [String] registry_uri
-      def self.load_registry(registry_uri)
-        return if @registry_uri == registry_uri
-        json = RDF::Util::File.open_file(registry_uri) { |f| JSON.load(f) }
-        @prefixes = {}
-        json.each do |prefix, elements|
-          next unless elements.is_a?(Hash)
-          properties = elements.fetch("properties", {})
-          @prefixes[prefix] = Registry.new(prefix, properties)
-        end
-        @registry_uri = registry_uri
-      end
-      ##
-      # Initialize registry for a particular prefix URI
-      #
-      # @param [RDF::URI] prefixURI
-      # @param [Hash] properties ({})
-      def initialize(prefixURI, properties = {})
-        @uri = prefixURI
-        @properties = properties
-        @property_base = prefixURI.to_s
-        # Append a '#' for fragment if necessary
-        @property_base += '#' unless %w(/ #).include?(@property_base[-1,1])
-      end
+    ##
+    # Reader options
+    # @see http://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Reader#options-class_method
+    def self.options
+      super + [
+        RDF::CLI::Option.new(
+          symbol: :rdfa,
+          datatype: TrueClass,
+          on: ["--rdfa"],
+          description: "Transform and parse as RDFa.") {true},
+      ]
+    end
-      ##
-      # Find a registry entry given a type URI
-      #
-      # @param [RDF::URI] type
-      # @return [Registry]
-      def self.find(type)
-        @prefixes ||= {}
-        k = @prefixes.keys.detect {|key| type.to_s.index(key) == 0 }
-        @prefixes[k] if k
-      end
-      ##
-      # Generate a predicateURI given a `name`
-      #
-      # @param [#to_s] name
-      # @param [Hash{}] ec Evaluation Context
-      # @return [RDF::URI]
-      def predicateURI(name, ec)
-        u = RDF::URI(name)
-        # 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_
-        return u if u.absolute?
-        n = frag_escape(name)
-        if ec[:current_type].nil?
-          # 2) If current type from context is null, there can be no current vocabulary.
-          #    Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name
-          u = RDF::URI(ec[:document_base].to_s)
-          u.fragment = frag_escape(name)
-          u
-        else
-          # 4) If scheme is vocabulary return the URI reference constructed by appending the fragment escaped value of name to current vocabulary, separated by a U+0023 NUMBER SIGN character (#) unless the current vocabulary ends with either a U+0023 NUMBER SIGN character (#) or SOLIDUS U+002F (/).
-          RDF::URI(@property_base + n)
+    ##
+    # Redirect for RDFa Reader given `:rdfa` option
+    #
+    # @private
+    def self.new(input = nil, options = {}, &block)
+      klass = if options[:rdfa]
+        # Requires rdf-rdfa gem to be loaded
+        begin
+          require 'rdf/rdfa'
+        rescue LoadError
+          raise ReaderError, "Use of RDFa-based reader requires rdf-rdfa gem"
         end
-      end
-      ##
-      # Yield a equivalentProperty or subPropertyOf if appropriate
-      #
-      # @param [RDF::URI] predicateURI
-      # @yield equiv
-      # @yieldparam [RDF::URI] equiv
-      def expand(predicateURI)
-        tok = tokenize(predicateURI)
-        if @properties[tok].is_a?(Hash)
-          value = @properties[tok].fetch("subPropertyOf", nil)
-          value ||= @properties[tok].fetch("equivalentProperty", nil)
-          Array(value).each {|equiv| yield RDF::URI(equiv)}
+        RdfaReader
+      elsif options[:jsonld]
+        # Requires rdf-rdfa gem to be loaded
+        begin
+          require 'json/ld'
+        rescue LoadError
+          raise ReaderError, "Use of JSON-LD-based reader requires json-ld gem"
         end
+        JsonLdReader
+      else
+        self
       end
-      ##
-      # Turn a predicateURI into a simple token
-      # @param [RDF::URI] predicateURI
-      # @return [String]
-      def tokenize(predicateURI)
-        predicateURI.to_s.sub(@property_base, '')
-      end
-      ##
-      # Fragment escape a name
-      def frag_escape(name)
-        name.to_s.gsub(/["#%<>\[\\\]^{|}]/) {|c| '%' + c.unpack('H2' * c.bytesize).join('%').upcase}
-      end
+      reader = klass.allocate
+      reader.send(:initialize, input, options, &block)
+      reader
     end
     ##
@@ -178,12 +116,12 @@ module RDF::Microdata
         log_error("Empty document") if root.nil?
         log_error(doc_errors.map(&:message).uniq.join("\n")) if !doc_errors.empty?
-        log_debug(@doc, "library = #{@library}")
+        log_debug('', "library = #{@library}")
         # Load registry
         begin
-          registry_uri = options[:registry] || DEFAULT_REGISTRY
-          log_debug(@doc, "registry = #{registry_uri.inspect}")
+          registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
+          log_debug('', "registry = #{registry_uri.inspect}")
           Registry.load_registry(registry_uri)
         rescue JSON::ParserError => e
           log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
@@ -270,6 +208,7 @@ module RDF::Microdata
     # Parsing a Microdata document (this is *not* the recursive method)
     def parse_whole_document(doc, base)
       base = doc_base(base)
+      @memory = {}
       options[:base_uri] = if (base)
         # Strip any fragment from base
         base = base.to_s.split('#').first
@@ -280,15 +219,9 @@ module RDF::Microdata
       log_info(nil) {"parse_whole_doc: base='#{base}'"}
-      ec = {
-        memory:             {},
-        current_type:       nil,
-        current_vocabulary: nil,
-        document_base:      base,
-      }
       # 1) For each element that is also a top-level item, Generate the triples for that item using the evaluation context.
       getItems.each do |el|
-        log_depth {generate_triples(el, ec)}
+        log_depth {generate_triples(el, Registry.new(nil))}
       end
       log_info(doc, "parse_whole_doc: traversal complete")
@@ -298,12 +231,11 @@ module RDF::Microdata
     # Generate triples for an item
     #
     # @param [RDF::Resource] item
-    # @param [Hash{Symbol => Object}] ec
+    # @param [Registry] vocab
     # @option ec [Hash{Nokogiri::XML::Element} => RDF::Resource] memory
-    # @option ec [RDF::Resource] :current_type
+    # @option ec [RDF::Resource] :current_vocabulary
     # @return [RDF::Resource]
-    def generate_triples(item, ec = {})
-      memory = ec[:memory]
+    def generate_triples(item, vocab)
       # 1) If there is an entry for item in memory, then let subject be the subject of that entry. Otherwise, if item has a global identifier and that global identifier is an absolute URL, let subject be that global identifier. Otherwise, let subject be a new blank node.
       subject = if memory.include?(item.node)
         memory[item.node][:subject]
@@ -312,12 +244,13 @@ module RDF::Microdata
       end || RDF::Node.new
       memory[item.node] ||= {}
-      log_debug(item) {"gentrips(2): subject=#{subject.inspect}, current_type: #{ec[:current_type]}"}
+      log_debug(item) {"gentrips(2): subject=#{subject.inspect}, vocab: #{vocab.inspect}"}
       # 2) Add a mapping from item to subject in memory, if there isn't one already.
       memory[item.node][:subject] ||= subject
       # 3) For each type returned from element.itemType of the element defining the item.
+      # 4) Set vocab to the first value returned from element.itemType of the element defining the item.
       type = nil
       item.attribute('itemtype').to_s.split(' ').map{|n| uri(n)}.select(&:absolute?).each do |t|
         #   3.1. If type is an absolute URL, generate the following triple:
@@ -325,36 +258,26 @@ module RDF::Microdata
         add_triple(item, subject, RDF.type, t)
       end
-      # 4) Set type to the first value returned from element.itemType of the element defining the item.
-      # 5) Otherwise, set type to current type from the Evaluation Context if not empty.
-      type ||= ec[:current_type]
-      log_debug(item)  {"gentrips(5): type=#{type.inspect}"}
-      # 6) If the registry contains a URI prefix that is a character for character match of type up to the length of the URI prefix, set vocab as that URI prefix.
-      vocab = Registry.find(type)
-      # 7) Otherwise, if type is not empty, construct vocab by removing everything following the last SOLIDUS U+002F ("/") or NUMBER SIGN U+0023 ("#") from the path component of type.
-      vocab ||= begin
-        type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1')
-        log_debug(item)  {"gentrips(7): type_vocab=#{type_vocab.inspect}"}
-        Registry.new(type_vocab)
+      # 6) If the registry contains a URI prefix that is a character for character match of vocab up to the length of the URI prefix, set vocab as that URI prefix.
+      if type || vocab.nil?
+        vocab = Registry.find(type) || begin
+          type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless type.nil?
+          log_debug(item)  {"gentrips(7): type_vocab=#{type_vocab.inspect}"}
+          Registry.new(type_vocab)
+        end
       end
-      # 8) Update evaluation context setting current vocabulary to vocab.
-      ec[:current_vocabulary] = vocab
+      # Otherwise, use vocab from evaluation context
+      log_debug(item) {"gentrips(8): vocab: #{vocab.inspect}"}
       # 9. For each element _element_ that has one or more property names and is one of the properties of the item _item_, run the following substep:
       props = item_properties(item)
       # 9.1. For each name name in element's property names, run the following substeps:
       props.each do |element|
         element.attribute('itemprop').to_s.split(' ').compact.each do |name|
-          log_debug(item) {"gentrips(9.1): name=#{name.inspect}, type=#{type}"}
-          # 9.1.1) Let context be a copy of evaluation context with current type set to type and current vocabulary set to vocab.
-          ec_new = ec.merge({current_type: type, current_vocabulary: vocab})
+          log_debug(item) {"gentrips(9.1): name=#{name.inspect}, vocab=#{vocab.inspect}"}
           # 9.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate.
-          predicate = vocab.predicateURI(name, ec_new)
+          predicate = vocab.predicateURI(name, base_uri)
           # 9.1.3) Let value be the property value of element.
           value = property_value(element)
@@ -362,7 +285,7 @@ module RDF::Microdata
           # 9.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps.
           if value.is_a?(Hash)
-            value = generate_triples(element, ec_new)
+            value = generate_triples(element, vocab)
             log_debug(item) {"gentrips(9.1.4): value=#{value.inspect}"}
           end
@@ -384,11 +307,9 @@ module RDF::Microdata
       props.each do |element|
         element.attribute('itemprop-reverse').to_s.split(' ').compact.each do |name|
           log_debug(item) {"gentrips(10.1): name=#{name.inspect}"}
-          # 10.1.1) Let context be a copy of evaluation context with current type set to type and current vocabulary set to vocab.
-          ec_new = ec.merge({current_type: type, current_vocabulary: vocab})
           # 10.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate.
-          predicate = vocab.predicateURI(name, ec_new)
+          predicate = vocab.predicateURI(name, base_uri)
           # 10.1.3) Let value be the property value of element.
           value = property_value(element)
@@ -396,7 +317,7 @@ module RDF::Microdata
           # 10.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps.
           if value.is_a?(Hash)
-            value = generate_triples(element, ec_new)
+            value = generate_triples(element, vocab)
             log_debug(item) {"gentrips(10.1.4): value=#{value.inspect}"}
           elsif value.is_a?(RDF::Literal)
             # 10.1.5) Otherwise, if value is a literal, ignore the value and continue to the next name; it is an error for the value of @itemprop-reverse to be a literal
@@ -432,13 +353,13 @@ module RDF::Microdata
     # To crawl the properties of an element root with a list memory, the user agent must run the following steps. These steps either fail or return a list with a count of errors. The count of errors is used as part of the authoring conformance criteria below.
     #
     # @param [Nokogiri::XML::Element] root
-    # @param [Array<Nokokogiri::XML::Element>] memory
+    # @param [Array<Nokokogiri::XML::Element>] memo
     # @param [Boolean] reverse crawl reverse properties
     # @return [Array<Nokogiri::XML::Element>]
     #   Resultant elements
-    def crawl_properties(root, memory, reverse)
-      # 1. If root is in memory, then the algorithm fails; abort these steps.
-      raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if memory.include?(root)
+    def crawl_properties(root, memo, reverse)
+      # 1. If root is in memo, then the algorithm fails; abort these steps.
+      raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if memo.include?(root)
       # 2. Collect all the elements in the item root; let results be the resulting list of elements, and errors be the resulting count of errors.
       results = elements_in_item(root)
@@ -447,13 +368,13 @@ module RDF::Microdata
       # 3. Remove any elements from results that do not have an @itemprop (@itemprop-reverse) attribute specified.
       results = results.select {|e| e.has_attribute?(reverse ? 'itemprop-reverse' : 'itemprop')}
-      # 4. Let new memory be a new list consisting of the old list memory with the addition of root.
-      raise CrawlFailure, "itemref recursion" if memory.detect {|n| root.node.object_id == n.node.object_id}
-      new_memory = memory + [root]
+      # 4. Let new memo be a new list consisting of the old list memo with the addition of root.
+      raise CrawlFailure, "itemref recursion" if memo.detect {|n| root.node.object_id == n.node.object_id}
+      new_memo = memo + [root]
-      # 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new memory as the memory.
+      # 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new memo as the memo.
       results.select {|e| e.has_attribute?('itemscope')}.each do |element|
-        log_depth {crawl_properties(element, new_memory, reverse)}
+        log_depth {crawl_properties(element, new_memo, reverse)}
       end
       results
@@ -469,7 +390,7 @@ module RDF::Microdata
     def elements_in_item(root)
       # Let results and pending be empty lists of elements.
       # Let errors be zero.
-      results, memory, errors = [], [], 0
+      results, memo, errors = [], [], 0
       # Add all the children elements of root to pending.
       pending = root.elements
@@ -487,13 +408,13 @@ module RDF::Microdata
       # Loop: Remove an element from pending and let current be that element.
       while current = pending.shift
-        if memory.include?(current)
+        if memo.include?(current)
           raise CrawlFailure, "elements_in_item: results already includes #{current.inspect}"
         elsif !current.has_attribute?('itemscope')
           # If current is not already in results and current does not have an itemscope attribute, then: add all the child elements of current to pending.
           pending += current.elements
         end
-        memory << current
+        memo << current
         # If current is not already in results, then: add current to results.
         results << current unless results.include?(current)

data/lib/rdf/microdata/reader/nokogiri.rb CHANGED

@@ -103,6 +103,12 @@ module RDF::Microdata
           NodeSetProxy.new(@node.elements, self)
         end
+        ##
+        # Rational debug output
+        def to_str
+          @node.path
+        end
         ##
         # Proxy for everything else to @node
         def method_missing(method, *args)

data/lib/rdf/microdata/registry.rb ADDED

@@ -0,0 +1,109 @@
+require 'json'
+module RDF::Microdata
+  # Interface to registry
+  class Registry
+    # @return [RDF::URI] Prefix of vocabulary
+    attr_reader :uri
+    # @return [Hash] properties
+    attr_reader :properties
+    ##
+    # Initialize the registry from a URI or file path
+    #
+    # @param [String] registry_uri
+    def self.load_registry(registry_uri)
+      return if @registry_uri == registry_uri
+      json = RDF::Util::File.open_file(registry_uri) { |f| ::JSON.load(f) }
+      @prefixes = {}
+      json.each do |prefix, elements|
+        next unless elements.is_a?(Hash)
+        properties = elements.fetch("properties", {})
+        @prefixes[prefix] = Registry.new(prefix, properties)
+      end
+      @registry_uri = registry_uri
+    end
+    ##
+    # Initialize registry for a particular prefix URI
+    #
+    # @param [RDF::URI] prefixURI
+    # @param [Hash] properties ({})
+    def initialize(prefixURI, properties = {})
+      @uri = prefixURI
+      @properties = properties
+      @property_base = prefixURI.to_s
+      # Append a '#' for fragment if necessary
+      @property_base += '#' unless %w(/ #).include?(@property_base[-1,1])
+    end
+    ##
+    # Find a registry entry given a type URI
+    #
+    # @param [RDF::URI] type
+    # @return [Registry]
+    def self.find(type)
+      @prefixes ||= {}
+      k = @prefixes.keys.detect {|key| type.to_s.index(key) == 0 }
+      @prefixes[k] if k
+    end
+    ##
+    # Generate a predicateURI given a `name`
+    #
+    # @param [#to_s] name
+    # @param [Hash{}] ec Evaluation Context
+    # @return [RDF::URI]
+    def predicateURI(name, base_uri)
+      u = RDF::URI(name)
+      # 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_
+      return u if u.absolute?
+      n = frag_escape(name)
+      if uri.nil?
+        # 2) If current vocabulary from context is null, there can be no current vocabulary.
+        #    Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name
+        u = RDF::URI(base_uri.to_s)
+        u.fragment = frag_escape(name)
+        u
+      else
+        # 4) If scheme is vocabulary return the URI reference constructed by appending the fragment escaped value of name to current vocabulary, separated by a U+0023 NUMBER SIGN character (#) unless the current vocabulary ends with either a U+0023 NUMBER SIGN character (#) or SOLIDUS U+002F (/).
+        RDF::URI(@property_base + n)
+      end
+    end
+    ##
+    # Yield a equivalentProperty or subPropertyOf if appropriate
+    #
+    # @param [RDF::URI] predicateURI
+    # @yield equiv
+    # @yieldparam [RDF::URI] equiv
+    def expand(predicateURI)
+      tok = tokenize(predicateURI)
+      if @properties[tok].is_a?(Hash)
+        value = @properties[tok].fetch("subPropertyOf", nil)
+        value ||= @properties[tok].fetch("equivalentProperty", nil)
+        Array(value).each {|equiv| yield RDF::URI(equiv)}
+      end
+    end
+    ##
+    # Turn a predicateURI into a simple token
+    # @param [RDF::URI] predicateURI
+    # @return [String]
+    def tokenize(predicateURI)
+      predicateURI.to_s.sub(@property_base, '')
+    end
+    ##
+    # Fragment escape a name
+    def frag_escape(name)
+      name.to_s.gsub(/["#%<>\[\\\]^{|}]/) {|c| '%' + c.unpack('H2' * c.bytesize).join('%').upcase}
+    end
+  end
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rdf-microdata
 version: !ruby/object:Gem::Version
-  version: 2.2.1
+  version: 2.2.2
 platform: ruby
 authors:
 - Gregg
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-05-17 00:00:00.000000000 Z
+date: 2017-10-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rdf
@@ -18,6 +18,9 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '2.2'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.2.8
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
@@ -25,20 +28,23 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '2.2'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.2.8
 - !ruby/object:Gem::Dependency
   name: rdf-xsd
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '2.1'
+        version: '2.2'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '2.1'
+        version: '2.2'
 - !ruby/object:Gem::Dependency
   name: htmlentities
   requirement: !ruby/object:Gem::Requirement
@@ -59,14 +65,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.7'
+        version: '1.8'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.7'
+        version: '1.8'
 - !ruby/object:Gem::Dependency
   name: equivalent-xml
   requirement: !ruby/object:Gem::Requirement
@@ -101,14 +107,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.5'
+        version: '3.6'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.5'
+        version: '3.6'
 - !ruby/object:Gem::Dependency
   name: rspec-its
   requirement: !ruby/object:Gem::Requirement
@@ -123,6 +129,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '1.2'
+- !ruby/object:Gem::Dependency
+  name: json-ld
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.1'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.1'
 - !ruby/object:Gem::Dependency
   name: rdf-spec
   requirement: !ruby/object:Gem::Requirement
@@ -196,8 +216,11 @@ files:
 - lib/rdf/microdata.rb
 - lib/rdf/microdata/expansion.rb
 - lib/rdf/microdata/format.rb
+- lib/rdf/microdata/jsonld_reader.rb
+- lib/rdf/microdata/rdfa_reader.rb
 - lib/rdf/microdata/reader.rb
 - lib/rdf/microdata/reader/nokogiri.rb
+- lib/rdf/microdata/registry.rb
 - lib/rdf/microdata/version.rb
 - lib/rdf/microdata/vocab.rb
 homepage: http://ruby-rdf.github.com/rdf-microdata
@@ -219,8 +242,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project: rdf-microdata
-rubygems_version: 2.6.11
+rubyforge_project:
+rubygems_version: 2.6.12
 signing_key:
 specification_version: 4
 summary: Microdata reader for Ruby.