rdf-microdata 2.2.1 → 2.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +15 -2
- data/VERSION +1 -1
- data/lib/rdf/microdata.rb +9 -5
- data/lib/rdf/microdata/format.rb +112 -0
- data/lib/rdf/microdata/jsonld_reader.rb +251 -0
- data/lib/rdf/microdata/rdfa_reader.rb +132 -0
- data/lib/rdf/microdata/reader.rb +75 -154
- data/lib/rdf/microdata/reader/nokogiri.rb +6 -0
- data/lib/rdf/microdata/registry.rb +109 -0
- metadata +33 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d40780aa7dd1ba5bac58af54ee1ed4a3f7d2905
|
4
|
+
data.tar.gz: aa2ee1835bad718bef436d97ea55967a1dafb86c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2272c36c3a46c21584ef9f11ef43e7a3c4d54e4ede2d5f6cf7da3022fb1981a3e27fa6abca99ef753570e107703d598ddd6f498656a53f3d650fc878df5a0d39
|
7
|
+
data.tar.gz: a7eadfe033b7f74fa2c7705f7db0af5b67f6c8d95619464420708f6eb873d0c72b71a8c17fd7813dc1e838e17be91bae9aed6c90a97bd318ab82f789bbba926b
|
data/README.md
CHANGED
@@ -60,11 +60,24 @@ Full documentation available on [Rubydoc.info][Microdata doc]
|
|
60
60
|
* {RDF::Microdata::Reader}
|
61
61
|
* {RDF::Microdata::Reader::Nokogiri}
|
62
62
|
|
63
|
-
|
63
|
+
|
64
|
+
### RDFa-based Reader
|
65
|
+
There is an experimental reader based on transforming Microdata to RDFa within the DOM. To invoke
|
66
|
+
this, add the `rdfa: true` option to the {RDF::Microdata::Reader.new}, or
|
67
|
+
use {RDF::Microdata::RdfaReader} directly.
|
68
|
+
|
69
|
+
The reader exposes a `#rdfa` method, which can be used to retrieve the transformed HTML+RDFa
|
70
|
+
|
71
|
+
### JSON-lD-based Reader
|
72
|
+
There is an experimental reader based on transforming Microdata to JSON-LD. To invoke
|
73
|
+
this, add the `jsonld: true` option to the {RDF::Microdata::Reader.new}, or
|
74
|
+
use {RDF::Microdata::JsonLdReader} directly.
|
75
|
+
|
76
|
+
The reader exposes a `#json` method, which can be used to retrieve the generated JSON-LD
|
64
77
|
|
65
78
|
## Resources
|
66
79
|
* [RDF.rb][RDF.rb]
|
67
|
-
* [Documentation](http://
|
80
|
+
* [Documentation](http://www.rubydoc.info/github/ruby-rdf/rdf-microdata/)
|
68
81
|
* [History](file:History.md)
|
69
82
|
* [Microdata][]
|
70
83
|
* [Microdata RDF][]
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.2.
|
1
|
+
2.2.2
|
data/lib/rdf/microdata.rb
CHANGED
@@ -15,18 +15,22 @@ module RDF
|
|
15
15
|
# end
|
16
16
|
# end
|
17
17
|
#
|
18
|
-
# @see http://
|
18
|
+
# @see http://www.rubydoc.info/github/ruby-rdf/rdf/
|
19
19
|
# @see http://www.w3.org/TR/2011/WD-microdata-20110525/
|
20
20
|
#
|
21
21
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
22
22
|
module Microdata
|
23
23
|
USES_VOCAB = RDF::URI("http://www.w3.org/ns/rdfa#usesVocabulary")
|
24
|
+
DEFAULT_REGISTRY = File.expand_path("../../../etc/registry.json", __FILE__)
|
24
25
|
|
25
26
|
require 'rdf/microdata/format'
|
26
27
|
require 'rdf/microdata/vocab'
|
27
|
-
autoload :Expansion,
|
28
|
-
autoload :
|
29
|
-
autoload :
|
30
|
-
autoload :
|
28
|
+
autoload :Expansion, 'rdf/microdata/expansion'
|
29
|
+
autoload :JsonLdReader, 'rdf/microdata/jsonld_reader'
|
30
|
+
autoload :Profile, 'rdf/microdata/profile'
|
31
|
+
autoload :RdfaReader, 'rdf/microdata/rdfa_reader'
|
32
|
+
autoload :Reader, 'rdf/microdata/reader'
|
33
|
+
autoload :Registry, 'rdf/microdata/registry'
|
34
|
+
autoload :VERSION, 'rdf/microdata/version'
|
31
35
|
end
|
32
36
|
end
|
data/lib/rdf/microdata/format.rb
CHANGED
@@ -41,5 +41,117 @@ module RDF::Microdata
|
|
41
41
|
def self.detect(sample)
|
42
42
|
!!sample.match(/<[^>]*(itemprop|itemtype|itemref|itemscope|itemid)[^>]*>/m)
|
43
43
|
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Hash of CLI commands appropriate for this format
|
47
|
+
# @return [Hash{Symbol => Hash}]
|
48
|
+
def self.cli_commands
|
49
|
+
{
|
50
|
+
"to-rdfa": {
|
51
|
+
description: "Transform HTML+Microdata into HTML+RDFa",
|
52
|
+
parse: false,
|
53
|
+
help: "to-rdfa files ...\nTransform HTML+Microdata into HTML+RDFa",
|
54
|
+
filter: {
|
55
|
+
format: :microdata
|
56
|
+
},
|
57
|
+
option_use: {output_format: :disabled},
|
58
|
+
lambda: ->(files, options) do
|
59
|
+
out = options[:output] || $stdout
|
60
|
+
xsl = Nokogiri::XSLT(%(<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
61
|
+
<xsl:param name="indent-increment" select="' '"/>
|
62
|
+
<xsl:output method="html" doctype-system="about:legacy-compat"/>
|
63
|
+
|
64
|
+
<xsl:template name="newline">
|
65
|
+
<xsl:text disable-output-escaping="yes">
|
66
|
+
</xsl:text>
|
67
|
+
</xsl:template>
|
68
|
+
|
69
|
+
<xsl:template match="comment() | processing-instruction()">
|
70
|
+
<xsl:param name="indent" select="''"/>
|
71
|
+
<xsl:call-template name="newline"/>
|
72
|
+
<xsl:value-of select="$indent"/>
|
73
|
+
<xsl:copy />
|
74
|
+
</xsl:template>
|
75
|
+
|
76
|
+
<xsl:template match="text()">
|
77
|
+
<xsl:param name="indent" select="''"/>
|
78
|
+
<xsl:call-template name="newline"/>
|
79
|
+
<xsl:value-of select="$indent"/>
|
80
|
+
<xsl:value-of select="normalize-space(.)"/>
|
81
|
+
</xsl:template>
|
82
|
+
|
83
|
+
<xsl:template match="text()[normalize-space(.)='']"/>
|
84
|
+
|
85
|
+
<xsl:template match="*">
|
86
|
+
<xsl:param name="indent" select="''"/>
|
87
|
+
<xsl:call-template name="newline"/>
|
88
|
+
<xsl:value-of select="$indent"/>
|
89
|
+
<xsl:choose>
|
90
|
+
<xsl:when test="count(child::*) > 0">
|
91
|
+
<xsl:copy>
|
92
|
+
<xsl:copy-of select="@*"/>
|
93
|
+
<xsl:apply-templates select="*|text()">
|
94
|
+
<xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
|
95
|
+
</xsl:apply-templates>
|
96
|
+
<xsl:call-template name="newline"/>
|
97
|
+
<xsl:value-of select="$indent"/>
|
98
|
+
</xsl:copy>
|
99
|
+
</xsl:when>
|
100
|
+
<xsl:otherwise>
|
101
|
+
<xsl:copy-of select="."/>
|
102
|
+
</xsl:otherwise>
|
103
|
+
</xsl:choose>
|
104
|
+
</xsl:template>
|
105
|
+
</xsl:stylesheet>).gsub(/^ /, ''))
|
106
|
+
if files.empty?
|
107
|
+
# If files are empty, either use options[::evaluate]
|
108
|
+
input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
|
109
|
+
input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
|
110
|
+
RDF::Microdata::Reader.new(input, options.merge(rdfa: true)) do |reader|
|
111
|
+
reader.rdfa.xpath("//text()").each do |txt|
|
112
|
+
txt.content = txt.content.to_s.strip
|
113
|
+
end
|
114
|
+
out.puts xsl.apply_to(reader.rdfa).to_s
|
115
|
+
end
|
116
|
+
else
|
117
|
+
files.each do |file|
|
118
|
+
RDF::Microdata::Reader.open(file, options.merge(rdfa: true)) do |reader|
|
119
|
+
reader.rdfa.xpath("//text()").each do |txt|
|
120
|
+
txt.content = txt.content.to_s.strip
|
121
|
+
end
|
122
|
+
out.puts xsl.apply_to(reader.rdfa).to_s
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
},
|
128
|
+
"to-jsonld": {
|
129
|
+
description: "Transform HTML+Microdata into JSON-LD",
|
130
|
+
parse: false,
|
131
|
+
help: "to-jsonld files ...\nTransform HTML+Microdata into JSON-LD",
|
132
|
+
filter: {
|
133
|
+
format: :microdata
|
134
|
+
},
|
135
|
+
option_use: {output_format: :disabled},
|
136
|
+
lambda: ->(files, options) do
|
137
|
+
out = options[:output] || $stdout
|
138
|
+
if files.empty?
|
139
|
+
# If files are empty, either use options[::evaluate]
|
140
|
+
input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
|
141
|
+
input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
|
142
|
+
RDF::Microdata::Reader.new(input, options.merge(jsonld: true)) do |reader|
|
143
|
+
out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
|
144
|
+
end
|
145
|
+
else
|
146
|
+
files.each do |file|
|
147
|
+
RDF::Microdata::Reader.open(file, options.merge(jsonld: true)) do |reader|
|
148
|
+
out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
},
|
154
|
+
}
|
155
|
+
end
|
44
156
|
end
|
45
157
|
end
|
@@ -0,0 +1,251 @@
|
|
1
|
+
require 'json/ld'
|
2
|
+
require 'nokogumbo'
|
3
|
+
|
4
|
+
module RDF::Microdata
|
5
|
+
##
|
6
|
+
# Update DOM to turn Microdata into JSON-LD and parse using the JSON-LD Reader
|
7
|
+
class JsonLdReader < JSON::LD::Reader
|
8
|
+
# The resulting JSON-LD
|
9
|
+
# @return [Hash]
|
10
|
+
attr_reader :jsonld
|
11
|
+
|
12
|
+
def self.format(klass = nil)
|
13
|
+
if klass.nil?
|
14
|
+
RDF::Microdata::Format
|
15
|
+
else
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# Initializes the JsonLdReader instance.
|
22
|
+
#
|
23
|
+
# @param [IO, File, String] input
|
24
|
+
# the input stream to read
|
25
|
+
# @param [Hash{Symbol => Object}] options
|
26
|
+
# any additional options (see `RDF::Reader#initialize`)
|
27
|
+
# @return [reader]
|
28
|
+
# @yield [reader] `self`
|
29
|
+
# @yieldparam [RDF::Reader] reader
|
30
|
+
# @yieldreturn [void] ignored
|
31
|
+
# @raise [RDF::ReaderError] if _validate_
|
32
|
+
def initialize(input = $stdin, options = {}, &block)
|
33
|
+
@options = options
|
34
|
+
log_debug('', "using JSON-LD transformation reader")
|
35
|
+
|
36
|
+
input = case input
|
37
|
+
when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
|
38
|
+
else
|
39
|
+
# Try to detect charset from input
|
40
|
+
options[:encoding] ||= input.charset if input.respond_to?(:charset)
|
41
|
+
|
42
|
+
# Otherwise, default is utf-8
|
43
|
+
options[:encoding] ||= 'utf-8'
|
44
|
+
options[:encoding] = options[:encoding].to_s if options[:encoding]
|
45
|
+
input = input.read if input.respond_to?(:read)
|
46
|
+
::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
|
47
|
+
end
|
48
|
+
|
49
|
+
# Load registry
|
50
|
+
begin
|
51
|
+
registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
|
52
|
+
log_debug('', "registry = #{registry_uri.inspect}")
|
53
|
+
Registry.load_registry(registry_uri)
|
54
|
+
rescue JSON::ParserError => e
|
55
|
+
log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
|
56
|
+
end
|
57
|
+
|
58
|
+
@jsonld = {'@graph' => []}
|
59
|
+
|
60
|
+
# Start with all top-level items
|
61
|
+
input.css("[itemscope]").each do |item|
|
62
|
+
next if item['itemprop'] # Only top-level items
|
63
|
+
jsonld['@graph'] << get_object(item)
|
64
|
+
end
|
65
|
+
|
66
|
+
log_debug('', "Transformed document: #{jsonld.to_json(JSON::LD::JSON_STATE)}")
|
67
|
+
|
68
|
+
# Rely on RDFa reader
|
69
|
+
super(jsonld.to_json, options, &block)
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
# Return JSON-LD representation of an item
|
74
|
+
# @param [Nokogiri::XML::Element] item
|
75
|
+
# @param [Hash{Nokogiri::XML::Node => Hash}]
|
76
|
+
# @return [Hash]
|
77
|
+
def get_object(item, memory = {})
|
78
|
+
if result = memory[item]
|
79
|
+
# Result is a reference to that item; assign a blank-node identifier if necessary
|
80
|
+
result['@id'] ||= alloc_bnode
|
81
|
+
return result
|
82
|
+
end
|
83
|
+
|
84
|
+
result = {}
|
85
|
+
memory[item] = result
|
86
|
+
|
87
|
+
# If the item has a global identifier, add an entry to result called "@id" whose value is the global identifier of item.
|
88
|
+
result['@id'] = item['itemid'].to_s if item['itemid']
|
89
|
+
|
90
|
+
# If the item has any item types, add an entry to result called "@type" whose value is an array listing the item types of item, in the order they were specified on the itemtype attribute.
|
91
|
+
if item['itemtype']
|
92
|
+
# Only absolute URLs
|
93
|
+
types = item.attribute('itemtype').
|
94
|
+
remove.
|
95
|
+
to_s.
|
96
|
+
split(/\s+/).
|
97
|
+
select {|t| RDF::URI(t).absolute?}
|
98
|
+
if vocab = types.first
|
99
|
+
vocab = Registry.find(vocab) || begin
|
100
|
+
type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil?
|
101
|
+
Registry.new(type_vocab) if type_vocab
|
102
|
+
end
|
103
|
+
(result['@context'] = {})['@vocab'] = vocab.uri.to_s if vocab
|
104
|
+
result['@type'] = types unless types.empty?
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# For each element element that has one or more property names and is one of the properties of the item item, in the order those elements are given by the algorithm that returns the properties of an item, run the following substeps
|
109
|
+
item_properties(item).each do |element|
|
110
|
+
value = if element['itemscope']
|
111
|
+
get_object(element, memory)
|
112
|
+
else
|
113
|
+
property_value(element)
|
114
|
+
end
|
115
|
+
element['itemprop'].to_s.split(/\s+/).each do |prop|
|
116
|
+
result[prop] ||= [] << value
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
result
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
#
|
125
|
+
# @param [Nokogiri::XML::Element] item
|
126
|
+
# @return [Array<Nokogiri::XML::Element>]
|
127
|
+
# List of property elements for an item
|
128
|
+
def item_properties(item)
|
129
|
+
results, memory, pending = [], [item], item.children.select(&:element?)
|
130
|
+
log_debug(item, "item_properties")
|
131
|
+
|
132
|
+
# If root has an itemref attribute, split the value of that itemref attribute on spaces. For each resulting token ID, if there is an element in the document whose ID is ID, then add the first such element to pending.
|
133
|
+
item['itemref'].to_s.split(/\s+/).each do |ref|
|
134
|
+
if referenced = referenced = item.at_css("##{ref}")
|
135
|
+
pending << referenced
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
while !pending.empty?
|
140
|
+
current = pending.shift
|
141
|
+
# Error
|
142
|
+
break if memory.include?(current)
|
143
|
+
memory << current
|
144
|
+
|
145
|
+
# If current does not have an itemscope attribute, then: add all the child elements of current to pending.
|
146
|
+
pending += current.children.select(&:element?) unless current['itemscope']
|
147
|
+
|
148
|
+
# If current has an itemprop attribute specified and has one or more property names, then add current to results.
|
149
|
+
results << current unless current['itemprop'].to_s.split(/\s+/).empty?
|
150
|
+
end
|
151
|
+
|
152
|
+
results
|
153
|
+
end
|
154
|
+
|
155
|
+
##
|
156
|
+
#
|
157
|
+
def property_value(element)
|
158
|
+
base = element.base || base_uri
|
159
|
+
log_debug(element) {"property_value(#{element.name}): base #{base.inspect}"}
|
160
|
+
value = case
|
161
|
+
when element.has_attribute?('itemscope')
|
162
|
+
{}
|
163
|
+
when element.has_attribute?('content')
|
164
|
+
if element.language
|
165
|
+
{"@value" => element['content'].to_s.strip, language: element.language}
|
166
|
+
else
|
167
|
+
element['content'].to_s.strip
|
168
|
+
end
|
169
|
+
when %w(data meter).include?(element.name) && element.attribute('value')
|
170
|
+
# XXX parse as number?
|
171
|
+
{"@value" => element['value'].to_s.strip}
|
172
|
+
when %w(audio embed iframe img source track video).include?(element.name)
|
173
|
+
{"@id" => uri(element.attribute('src'), base).to_s}
|
174
|
+
when %w(a area link).include?(element.name)
|
175
|
+
{"@id" => uri(element.attribute('href'), base).to_s}
|
176
|
+
when %w(object).include?(element.name)
|
177
|
+
{"@id" => uri(element.attribute('data'), base).to_s}
|
178
|
+
when %w(time).include?(element.name)
|
179
|
+
# use datatype?
|
180
|
+
(element.attribute('datetime') || element.text).to_s.strip
|
181
|
+
else
|
182
|
+
if element.language
|
183
|
+
{"@value" => element.inner_text.to_s.strip, language: element.language}
|
184
|
+
else
|
185
|
+
element.inner_text.to_s.strip
|
186
|
+
end
|
187
|
+
end
|
188
|
+
log_debug(element) {" #{value.inspect}"}
|
189
|
+
value
|
190
|
+
end
|
191
|
+
|
192
|
+
# Allocate a new blank node identifier
|
193
|
+
# @return [String]
|
194
|
+
def alloc_bnode
|
195
|
+
@bnode_base ||= "_:a"
|
196
|
+
res = @bnode_base
|
197
|
+
@bnode_base = res.succ
|
198
|
+
res
|
199
|
+
end
|
200
|
+
|
201
|
+
# Fixme, what about xml:base relative to element?
|
202
|
+
def uri(value, base = nil)
|
203
|
+
value = if base
|
204
|
+
base = uri(base) unless base.is_a?(RDF::URI)
|
205
|
+
base.join(value.to_s)
|
206
|
+
else
|
207
|
+
RDF::URI(value.to_s)
|
208
|
+
end
|
209
|
+
value.validate! if validate?
|
210
|
+
value.canonicalize! if canonicalize?
|
211
|
+
value = RDF::URI.intern(value) if intern?
|
212
|
+
value
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# Monkey Patch Nokogiri
|
218
|
+
module Nokogiri::XML
|
219
|
+
class Element
|
220
|
+
|
221
|
+
##
|
222
|
+
# Get any xml:base in effect for this element
|
223
|
+
def base
|
224
|
+
if @base.nil?
|
225
|
+
@base = attributes['xml:base'] ||
|
226
|
+
(parent && parent.element? && parent.base) ||
|
227
|
+
false
|
228
|
+
end
|
229
|
+
|
230
|
+
@base == false ? nil : @base
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
##
|
235
|
+
# Get any xml:lang or lang in effect for this element
|
236
|
+
def language
|
237
|
+
if @language.nil?
|
238
|
+
language = case
|
239
|
+
when self["xml:lang"]
|
240
|
+
self["xml:lang"].to_s
|
241
|
+
when self["lang"]
|
242
|
+
self["lang"].to_s
|
243
|
+
else
|
244
|
+
parent && parent.element? && parent.language
|
245
|
+
end
|
246
|
+
end
|
247
|
+
@language == false ? nil : @language
|
248
|
+
end
|
249
|
+
|
250
|
+
end
|
251
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require 'rdf/rdfa'
|
2
|
+
require 'nokogumbo'
|
3
|
+
|
4
|
+
module RDF::Microdata
|
5
|
+
##
|
6
|
+
# Update DOM to turn Microdata into RDFa and parse using the RDFa Reader
|
7
|
+
class RdfaReader < RDF::RDFa::Reader
|
8
|
+
# The transformed DOM using RDFa
|
9
|
+
# @return [RDF::HTML::Document]
|
10
|
+
attr_reader :rdfa
|
11
|
+
|
12
|
+
def self.format(klass = nil)
|
13
|
+
if klass.nil?
|
14
|
+
RDF::Microdata::Format
|
15
|
+
else
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# Initializes the RdfaReader instance.
|
22
|
+
#
|
23
|
+
# @param [IO, File, String] input
|
24
|
+
# the input stream to read
|
25
|
+
# @param [Hash{Symbol => Object}] options
|
26
|
+
# any additional options (see `RDF::Reader#initialize`)
|
27
|
+
# @return [reader]
|
28
|
+
# @yield [reader] `self`
|
29
|
+
# @yieldparam [RDF::Reader] reader
|
30
|
+
# @yieldreturn [void] ignored
|
31
|
+
# @raise [RDF::ReaderError] if _validate_
|
32
|
+
def initialize(input = $stdin, options = {}, &block)
|
33
|
+
@options = options
|
34
|
+
log_debug('', "using RDFa transformation reader")
|
35
|
+
|
36
|
+
input = case input
|
37
|
+
when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
|
38
|
+
else
|
39
|
+
# Try to detect charset from input
|
40
|
+
options[:encoding] ||= input.charset if input.respond_to?(:charset)
|
41
|
+
|
42
|
+
# Otherwise, default is utf-8
|
43
|
+
options[:encoding] ||= 'utf-8'
|
44
|
+
options[:encoding] = options[:encoding].to_s if options[:encoding]
|
45
|
+
input = input.read if input.respond_to?(:read)
|
46
|
+
::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
|
47
|
+
end
|
48
|
+
|
49
|
+
# Load registry
|
50
|
+
begin
|
51
|
+
registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
|
52
|
+
log_debug('', "registry = #{registry_uri.inspect}")
|
53
|
+
Registry.load_registry(registry_uri)
|
54
|
+
rescue JSON::ParserError => e
|
55
|
+
log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
|
56
|
+
end
|
57
|
+
|
58
|
+
# For all members having @itemscope
|
59
|
+
input.css("[itemscope]").each do |item|
|
60
|
+
# Get @itemtypes to create @type and @vocab
|
61
|
+
item.attribute('itemscope').remove
|
62
|
+
if item['itemtype']
|
63
|
+
# Only absolute URLs
|
64
|
+
types = item.attribute('itemtype').
|
65
|
+
remove.
|
66
|
+
to_s.
|
67
|
+
split(/\s+/).
|
68
|
+
select {|t| RDF::URI(t).absolute?}
|
69
|
+
|
70
|
+
item['typeof'] = types.join(' ') unless types.empty?
|
71
|
+
if vocab = types.first
|
72
|
+
vocab = Registry.find(vocab) || begin
|
73
|
+
type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil?
|
74
|
+
Registry.new(type_vocab) if type_vocab
|
75
|
+
end
|
76
|
+
item['vocab'] = vocab.uri.to_s if vocab
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Change each itemid attribute to an resource attribute with the same value
|
81
|
+
if item['itemid']
|
82
|
+
id = item.attribute('itemid').remove
|
83
|
+
item[item['itemprop'] ? 'resource' : 'about'] = id
|
84
|
+
else
|
85
|
+
# Otherwise, ensure that @typeof has at least an empty value
|
86
|
+
item['typeof'] ||= ''
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Add @resource for all itemprop values of object based on a @data value
|
91
|
+
input.css("object[itemprop][data]").each do |item|
|
92
|
+
item['resource'] ||= item['data']
|
93
|
+
end
|
94
|
+
|
95
|
+
# Replace all @itemprop values with @property
|
96
|
+
input.css("[itemprop]").each {|item| item['property'] = item.attribute('itemprop').remove}
|
97
|
+
|
98
|
+
# Wrap all @itemref properties
|
99
|
+
input.css("[itemref]").each do |item|
|
100
|
+
item_vocab = item['vocab'] || item.ancestors.detect {|a| a.attribute('vocab')}
|
101
|
+
item_vocab = item_vocab.to_s if item_vocab
|
102
|
+
|
103
|
+
item.attribute('itemref').remove.to_s.split(/\s+/).each do |ref|
|
104
|
+
if referenced = input.css("##{ref}")
|
105
|
+
# Add @vocab to referenced using the closest ansestor having @vocab of item.
|
106
|
+
# If the element with id reference has no resource attribute, add a resource attribute whose value is a NUMBER SIGN U+0023 followed by reference to the element.
|
107
|
+
# If the element with id reference has no typeof attribute, add a typeof="rdfa:Pattern" attribute to the element.
|
108
|
+
referenced.wrap(%(<div vocab="#{item_vocab}" resource="##{ref}" typeof="rdfa:Pattern"))
|
109
|
+
|
110
|
+
# Add a link child element to the element that represents the item, with a rel="rdfa:copy" attribute and an href attribute whose value is a NUMBER SIGN U+0023 followed by reference
|
111
|
+
link = ::Nokogiri::XML::Node.new('link', input)
|
112
|
+
link['rel'] = 'rdfa:copy'
|
113
|
+
link['href'] = "##{ref}"
|
114
|
+
item << link
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
@rdfa = input
|
120
|
+
log_debug('', "Transformed document: #{input.to_html}")
|
121
|
+
|
122
|
+
options = options.merge(
|
123
|
+
library: :nokogiri,
|
124
|
+
reference_folding: true,
|
125
|
+
host_language: :html5,
|
126
|
+
version: :"rdfa1.1")
|
127
|
+
|
128
|
+
# Rely on RDFa reader
|
129
|
+
super(input, options, &block)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
data/lib/rdf/microdata/reader.rb
CHANGED
@@ -15,15 +15,16 @@ module RDF::Microdata
|
|
15
15
|
include Expansion
|
16
16
|
include RDF::Util::Logger
|
17
17
|
URL_PROPERTY_ELEMENTS = %w(a area audio embed iframe img link object source track video)
|
18
|
-
DEFAULT_REGISTRY = File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "..", "etc", "registry.json"))
|
19
18
|
|
20
19
|
# @private
|
21
20
|
class CrawlFailure < StandardError; end
|
22
21
|
|
23
|
-
# @!attribute [r] implementation
|
24
22
|
# @return [Module] Returns the HTML implementation module for this reader instance.
|
25
23
|
attr_reader :implementation
|
26
24
|
|
25
|
+
# @return [Hash{Object => RDF::Resource}] maps RDF elements (items) to resources
|
26
|
+
attr_reader :memory
|
27
|
+
|
27
28
|
##
|
28
29
|
# Returns the base URI determined by this reader.
|
29
30
|
#
|
@@ -36,109 +37,46 @@ module RDF::Microdata
|
|
36
37
|
@options[:base_uri]
|
37
38
|
end
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
def self.load_registry(registry_uri)
|
52
|
-
return if @registry_uri == registry_uri
|
53
|
-
|
54
|
-
json = RDF::Util::File.open_file(registry_uri) { |f| JSON.load(f) }
|
55
|
-
|
56
|
-
@prefixes = {}
|
57
|
-
json.each do |prefix, elements|
|
58
|
-
next unless elements.is_a?(Hash)
|
59
|
-
properties = elements.fetch("properties", {})
|
60
|
-
@prefixes[prefix] = Registry.new(prefix, properties)
|
61
|
-
end
|
62
|
-
@registry_uri = registry_uri
|
63
|
-
end
|
64
|
-
|
65
|
-
##
|
66
|
-
# Initialize registry for a particular prefix URI
|
67
|
-
#
|
68
|
-
# @param [RDF::URI] prefixURI
|
69
|
-
# @param [Hash] properties ({})
|
70
|
-
def initialize(prefixURI, properties = {})
|
71
|
-
@uri = prefixURI
|
72
|
-
@properties = properties
|
73
|
-
@property_base = prefixURI.to_s
|
74
|
-
# Append a '#' for fragment if necessary
|
75
|
-
@property_base += '#' unless %w(/ #).include?(@property_base[-1,1])
|
76
|
-
end
|
40
|
+
##
|
41
|
+
# Reader options
|
42
|
+
# @see http://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Reader#options-class_method
|
43
|
+
def self.options
|
44
|
+
super + [
|
45
|
+
RDF::CLI::Option.new(
|
46
|
+
symbol: :rdfa,
|
47
|
+
datatype: TrueClass,
|
48
|
+
on: ["--rdfa"],
|
49
|
+
description: "Transform and parse as RDFa.") {true},
|
50
|
+
]
|
51
|
+
end
|
77
52
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
##
|
90
|
-
# Generate a predicateURI given a `name`
|
91
|
-
#
|
92
|
-
# @param [#to_s] name
|
93
|
-
# @param [Hash{}] ec Evaluation Context
|
94
|
-
# @return [RDF::URI]
|
95
|
-
def predicateURI(name, ec)
|
96
|
-
u = RDF::URI(name)
|
97
|
-
# 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_
|
98
|
-
return u if u.absolute?
|
99
|
-
|
100
|
-
n = frag_escape(name)
|
101
|
-
if ec[:current_type].nil?
|
102
|
-
# 2) If current type from context is null, there can be no current vocabulary.
|
103
|
-
# Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name
|
104
|
-
u = RDF::URI(ec[:document_base].to_s)
|
105
|
-
u.fragment = frag_escape(name)
|
106
|
-
u
|
107
|
-
else
|
108
|
-
# 4) If scheme is vocabulary return the URI reference constructed by appending the fragment escaped value of name to current vocabulary, separated by a U+0023 NUMBER SIGN character (#) unless the current vocabulary ends with either a U+0023 NUMBER SIGN character (#) or SOLIDUS U+002F (/).
|
109
|
-
RDF::URI(@property_base + n)
|
53
|
+
##
|
54
|
+
# Redirect for RDFa Reader given `:rdfa` option
|
55
|
+
#
|
56
|
+
# @private
|
57
|
+
def self.new(input = nil, options = {}, &block)
|
58
|
+
klass = if options[:rdfa]
|
59
|
+
# Requires rdf-rdfa gem to be loaded
|
60
|
+
begin
|
61
|
+
require 'rdf/rdfa'
|
62
|
+
rescue LoadError
|
63
|
+
raise ReaderError, "Use of RDFa-based reader requires rdf-rdfa gem"
|
110
64
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
# @yieldparam [RDF::URI] equiv
|
119
|
-
def expand(predicateURI)
|
120
|
-
tok = tokenize(predicateURI)
|
121
|
-
if @properties[tok].is_a?(Hash)
|
122
|
-
value = @properties[tok].fetch("subPropertyOf", nil)
|
123
|
-
value ||= @properties[tok].fetch("equivalentProperty", nil)
|
124
|
-
|
125
|
-
Array(value).each {|equiv| yield RDF::URI(equiv)}
|
65
|
+
RdfaReader
|
66
|
+
elsif options[:jsonld]
|
67
|
+
# Requires rdf-rdfa gem to be loaded
|
68
|
+
begin
|
69
|
+
require 'json/ld'
|
70
|
+
rescue LoadError
|
71
|
+
raise ReaderError, "Use of JSON-LD-based reader requires json-ld gem"
|
126
72
|
end
|
73
|
+
JsonLdReader
|
74
|
+
else
|
75
|
+
self
|
127
76
|
end
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
# @param [RDF::URI] predicateURI
|
132
|
-
# @return [String]
|
133
|
-
def tokenize(predicateURI)
|
134
|
-
predicateURI.to_s.sub(@property_base, '')
|
135
|
-
end
|
136
|
-
|
137
|
-
##
|
138
|
-
# Fragment escape a name
|
139
|
-
def frag_escape(name)
|
140
|
-
name.to_s.gsub(/["#%<>\[\\\]^{|}]/) {|c| '%' + c.unpack('H2' * c.bytesize).join('%').upcase}
|
141
|
-
end
|
77
|
+
reader = klass.allocate
|
78
|
+
reader.send(:initialize, input, options, &block)
|
79
|
+
reader
|
142
80
|
end
|
143
81
|
|
144
82
|
##
|
@@ -178,12 +116,12 @@ module RDF::Microdata
|
|
178
116
|
log_error("Empty document") if root.nil?
|
179
117
|
log_error(doc_errors.map(&:message).uniq.join("\n")) if !doc_errors.empty?
|
180
118
|
|
181
|
-
log_debug(
|
119
|
+
log_debug('', "library = #{@library}")
|
182
120
|
|
183
121
|
# Load registry
|
184
122
|
begin
|
185
|
-
registry_uri = options[:registry] || DEFAULT_REGISTRY
|
186
|
-
log_debug(
|
123
|
+
registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
|
124
|
+
log_debug('', "registry = #{registry_uri.inspect}")
|
187
125
|
Registry.load_registry(registry_uri)
|
188
126
|
rescue JSON::ParserError => e
|
189
127
|
log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
|
@@ -270,6 +208,7 @@ module RDF::Microdata
|
|
270
208
|
# Parsing a Microdata document (this is *not* the recursive method)
|
271
209
|
def parse_whole_document(doc, base)
|
272
210
|
base = doc_base(base)
|
211
|
+
@memory = {}
|
273
212
|
options[:base_uri] = if (base)
|
274
213
|
# Strip any fragment from base
|
275
214
|
base = base.to_s.split('#').first
|
@@ -280,15 +219,9 @@ module RDF::Microdata
|
|
280
219
|
|
281
220
|
log_info(nil) {"parse_whole_doc: base='#{base}'"}
|
282
221
|
|
283
|
-
ec = {
|
284
|
-
memory: {},
|
285
|
-
current_type: nil,
|
286
|
-
current_vocabulary: nil,
|
287
|
-
document_base: base,
|
288
|
-
}
|
289
222
|
# 1) For each element that is also a top-level item, Generate the triples for that item using the evaluation context.
|
290
223
|
getItems.each do |el|
|
291
|
-
log_depth {generate_triples(el,
|
224
|
+
log_depth {generate_triples(el, Registry.new(nil))}
|
292
225
|
end
|
293
226
|
|
294
227
|
log_info(doc, "parse_whole_doc: traversal complete")
|
@@ -298,12 +231,11 @@ module RDF::Microdata
|
|
298
231
|
# Generate triples for an item
|
299
232
|
#
|
300
233
|
# @param [RDF::Resource] item
|
301
|
-
# @param [
|
234
|
+
# @param [Registry] vocab
|
302
235
|
# @option ec [Hash{Nokogiri::XML::Element} => RDF::Resource] memory
|
303
|
-
# @option ec [RDF::Resource] :
|
236
|
+
# @option ec [RDF::Resource] :current_vocabulary
|
304
237
|
# @return [RDF::Resource]
|
305
|
-
def generate_triples(item,
|
306
|
-
memory = ec[:memory]
|
238
|
+
def generate_triples(item, vocab)
|
307
239
|
# 1) If there is an entry for item in memory, then let subject be the subject of that entry. Otherwise, if item has a global identifier and that global identifier is an absolute URL, let subject be that global identifier. Otherwise, let subject be a new blank node.
|
308
240
|
subject = if memory.include?(item.node)
|
309
241
|
memory[item.node][:subject]
|
@@ -312,12 +244,13 @@ module RDF::Microdata
|
|
312
244
|
end || RDF::Node.new
|
313
245
|
memory[item.node] ||= {}
|
314
246
|
|
315
|
-
log_debug(item) {"gentrips(2): subject=#{subject.inspect},
|
247
|
+
log_debug(item) {"gentrips(2): subject=#{subject.inspect}, vocab: #{vocab.inspect}"}
|
316
248
|
|
317
249
|
# 2) Add a mapping from item to subject in memory, if there isn't one already.
|
318
250
|
memory[item.node][:subject] ||= subject
|
319
251
|
|
320
252
|
# 3) For each type returned from element.itemType of the element defining the item.
|
253
|
+
# 4) Set vocab to the first value returned from element.itemType of the element defining the item.
|
321
254
|
type = nil
|
322
255
|
item.attribute('itemtype').to_s.split(' ').map{|n| uri(n)}.select(&:absolute?).each do |t|
|
323
256
|
# 3.1. If type is an absolute URL, generate the following triple:
|
@@ -325,36 +258,26 @@ module RDF::Microdata
|
|
325
258
|
add_triple(item, subject, RDF.type, t)
|
326
259
|
end
|
327
260
|
|
328
|
-
#
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
vocab = Registry.find(type)
|
336
|
-
|
337
|
-
# 7) Otherwise, if type is not empty, construct vocab by removing everything following the last SOLIDUS U+002F ("/") or NUMBER SIGN U+0023 ("#") from the path component of type.
|
338
|
-
vocab ||= begin
|
339
|
-
type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1')
|
340
|
-
log_debug(item) {"gentrips(7): type_vocab=#{type_vocab.inspect}"}
|
341
|
-
Registry.new(type_vocab)
|
261
|
+
# 6) If the registry contains a URI prefix that is a character for character match of vocab up to the length of the URI prefix, set vocab as that URI prefix.
|
262
|
+
if type || vocab.nil?
|
263
|
+
vocab = Registry.find(type) || begin
|
264
|
+
type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless type.nil?
|
265
|
+
log_debug(item) {"gentrips(7): type_vocab=#{type_vocab.inspect}"}
|
266
|
+
Registry.new(type_vocab)
|
267
|
+
end
|
342
268
|
end
|
343
269
|
|
344
|
-
#
|
345
|
-
|
270
|
+
# Otherwise, use vocab from evaluation context
|
271
|
+
log_debug(item) {"gentrips(8): vocab: #{vocab.inspect}"}
|
346
272
|
|
347
273
|
# 9. For each element _element_ that has one or more property names and is one of the properties of the item _item_, run the following substep:
|
348
274
|
props = item_properties(item)
|
349
275
|
# 9.1. For each name name in element's property names, run the following substeps:
|
350
276
|
props.each do |element|
|
351
277
|
element.attribute('itemprop').to_s.split(' ').compact.each do |name|
|
352
|
-
log_debug(item) {"gentrips(9.1): name=#{name.inspect},
|
353
|
-
# 9.1.1) Let context be a copy of evaluation context with current type set to type and current vocabulary set to vocab.
|
354
|
-
ec_new = ec.merge({current_type: type, current_vocabulary: vocab})
|
355
|
-
|
278
|
+
log_debug(item) {"gentrips(9.1): name=#{name.inspect}, vocab=#{vocab.inspect}"}
|
356
279
|
# 9.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate.
|
357
|
-
predicate = vocab.predicateURI(name,
|
280
|
+
predicate = vocab.predicateURI(name, base_uri)
|
358
281
|
|
359
282
|
# 9.1.3) Let value be the property value of element.
|
360
283
|
value = property_value(element)
|
@@ -362,7 +285,7 @@ module RDF::Microdata
|
|
362
285
|
|
363
286
|
# 9.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps.
|
364
287
|
if value.is_a?(Hash)
|
365
|
-
value = generate_triples(element,
|
288
|
+
value = generate_triples(element, vocab)
|
366
289
|
log_debug(item) {"gentrips(9.1.4): value=#{value.inspect}"}
|
367
290
|
end
|
368
291
|
|
@@ -384,11 +307,9 @@ module RDF::Microdata
|
|
384
307
|
props.each do |element|
|
385
308
|
element.attribute('itemprop-reverse').to_s.split(' ').compact.each do |name|
|
386
309
|
log_debug(item) {"gentrips(10.1): name=#{name.inspect}"}
|
387
|
-
# 10.1.1) Let context be a copy of evaluation context with current type set to type and current vocabulary set to vocab.
|
388
|
-
ec_new = ec.merge({current_type: type, current_vocabulary: vocab})
|
389
310
|
|
390
311
|
# 10.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate.
|
391
|
-
predicate = vocab.predicateURI(name,
|
312
|
+
predicate = vocab.predicateURI(name, base_uri)
|
392
313
|
|
393
314
|
# 10.1.3) Let value be the property value of element.
|
394
315
|
value = property_value(element)
|
@@ -396,7 +317,7 @@ module RDF::Microdata
|
|
396
317
|
|
397
318
|
# 10.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps.
|
398
319
|
if value.is_a?(Hash)
|
399
|
-
value = generate_triples(element,
|
320
|
+
value = generate_triples(element, vocab)
|
400
321
|
log_debug(item) {"gentrips(10.1.4): value=#{value.inspect}"}
|
401
322
|
elsif value.is_a?(RDF::Literal)
|
402
323
|
# 10.1.5) Otherwise, if value is a literal, ignore the value and continue to the next name; it is an error for the value of @itemprop-reverse to be a literal
|
@@ -432,13 +353,13 @@ module RDF::Microdata
|
|
432
353
|
# To crawl the properties of an element root with a list memory, the user agent must run the following steps. These steps either fail or return a list with a count of errors. The count of errors is used as part of the authoring conformance criteria below.
|
433
354
|
#
|
434
355
|
# @param [Nokogiri::XML::Element] root
|
435
|
-
# @param [Array<Nokokogiri::XML::Element>]
|
356
|
+
# @param [Array<Nokokogiri::XML::Element>] memo
|
436
357
|
# @param [Boolean] reverse crawl reverse properties
|
437
358
|
# @return [Array<Nokogiri::XML::Element>]
|
438
359
|
# Resultant elements
|
439
|
-
def crawl_properties(root,
|
440
|
-
# 1. If root is in
|
441
|
-
raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if
|
360
|
+
def crawl_properties(root, memo, reverse)
|
361
|
+
# 1. If root is in memo, then the algorithm fails; abort these steps.
|
362
|
+
raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if memo.include?(root)
|
442
363
|
|
443
364
|
# 2. Collect all the elements in the item root; let results be the resulting list of elements, and errors be the resulting count of errors.
|
444
365
|
results = elements_in_item(root)
|
@@ -447,13 +368,13 @@ module RDF::Microdata
|
|
447
368
|
# 3. Remove any elements from results that do not have an @itemprop (@itemprop-reverse) attribute specified.
|
448
369
|
results = results.select {|e| e.has_attribute?(reverse ? 'itemprop-reverse' : 'itemprop')}
|
449
370
|
|
450
|
-
# 4. Let new
|
451
|
-
raise CrawlFailure, "itemref recursion" if
|
452
|
-
|
371
|
+
# 4. Let new memo be a new list consisting of the old list memo with the addition of root.
|
372
|
+
raise CrawlFailure, "itemref recursion" if memo.detect {|n| root.node.object_id == n.node.object_id}
|
373
|
+
new_memo = memo + [root]
|
453
374
|
|
454
|
-
# 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new
|
375
|
+
# 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new memo as the memo.
|
455
376
|
results.select {|e| e.has_attribute?('itemscope')}.each do |element|
|
456
|
-
log_depth {crawl_properties(element,
|
377
|
+
log_depth {crawl_properties(element, new_memo, reverse)}
|
457
378
|
end
|
458
379
|
|
459
380
|
results
|
@@ -469,7 +390,7 @@ module RDF::Microdata
|
|
469
390
|
def elements_in_item(root)
|
470
391
|
# Let results and pending be empty lists of elements.
|
471
392
|
# Let errors be zero.
|
472
|
-
results,
|
393
|
+
results, memo, errors = [], [], 0
|
473
394
|
|
474
395
|
# Add all the children elements of root to pending.
|
475
396
|
pending = root.elements
|
@@ -487,13 +408,13 @@ module RDF::Microdata
|
|
487
408
|
|
488
409
|
# Loop: Remove an element from pending and let current be that element.
|
489
410
|
while current = pending.shift
|
490
|
-
if
|
411
|
+
if memo.include?(current)
|
491
412
|
raise CrawlFailure, "elements_in_item: results already includes #{current.inspect}"
|
492
413
|
elsif !current.has_attribute?('itemscope')
|
493
414
|
# If current is not already in results and current does not have an itemscope attribute, then: add all the child elements of current to pending.
|
494
415
|
pending += current.elements
|
495
416
|
end
|
496
|
-
|
417
|
+
memo << current
|
497
418
|
|
498
419
|
# If current is not already in results, then: add current to results.
|
499
420
|
results << current unless results.include?(current)
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'json'
|
2
|
+
module RDF::Microdata
|
3
|
+
|
4
|
+
# Interface to registry
|
5
|
+
class Registry
|
6
|
+
# @return [RDF::URI] Prefix of vocabulary
|
7
|
+
attr_reader :uri
|
8
|
+
|
9
|
+
# @return [Hash] properties
|
10
|
+
attr_reader :properties
|
11
|
+
|
12
|
+
##
|
13
|
+
# Initialize the registry from a URI or file path
|
14
|
+
#
|
15
|
+
# @param [String] registry_uri
|
16
|
+
def self.load_registry(registry_uri)
|
17
|
+
return if @registry_uri == registry_uri
|
18
|
+
|
19
|
+
json = RDF::Util::File.open_file(registry_uri) { |f| ::JSON.load(f) }
|
20
|
+
|
21
|
+
@prefixes = {}
|
22
|
+
json.each do |prefix, elements|
|
23
|
+
next unless elements.is_a?(Hash)
|
24
|
+
properties = elements.fetch("properties", {})
|
25
|
+
@prefixes[prefix] = Registry.new(prefix, properties)
|
26
|
+
end
|
27
|
+
@registry_uri = registry_uri
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Initialize registry for a particular prefix URI
|
32
|
+
#
|
33
|
+
# @param [RDF::URI] prefixURI
|
34
|
+
# @param [Hash] properties ({})
|
35
|
+
def initialize(prefixURI, properties = {})
|
36
|
+
@uri = prefixURI
|
37
|
+
@properties = properties
|
38
|
+
@property_base = prefixURI.to_s
|
39
|
+
# Append a '#' for fragment if necessary
|
40
|
+
@property_base += '#' unless %w(/ #).include?(@property_base[-1,1])
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# Find a registry entry given a type URI
|
45
|
+
#
|
46
|
+
# @param [RDF::URI] type
|
47
|
+
# @return [Registry]
|
48
|
+
def self.find(type)
|
49
|
+
@prefixes ||= {}
|
50
|
+
k = @prefixes.keys.detect {|key| type.to_s.index(key) == 0 }
|
51
|
+
@prefixes[k] if k
|
52
|
+
end
|
53
|
+
|
54
|
+
##
|
55
|
+
# Generate a predicateURI given a `name`
|
56
|
+
#
|
57
|
+
# @param [#to_s] name
|
58
|
+
# @param [Hash{}] ec Evaluation Context
|
59
|
+
# @return [RDF::URI]
|
60
|
+
def predicateURI(name, base_uri)
|
61
|
+
u = RDF::URI(name)
|
62
|
+
# 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_
|
63
|
+
return u if u.absolute?
|
64
|
+
|
65
|
+
n = frag_escape(name)
|
66
|
+
if uri.nil?
|
67
|
+
# 2) If current vocabulary from context is null, there can be no current vocabulary.
|
68
|
+
# Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name
|
69
|
+
u = RDF::URI(base_uri.to_s)
|
70
|
+
u.fragment = frag_escape(name)
|
71
|
+
u
|
72
|
+
else
|
73
|
+
# 4) If scheme is vocabulary return the URI reference constructed by appending the fragment escaped value of name to current vocabulary, separated by a U+0023 NUMBER SIGN character (#) unless the current vocabulary ends with either a U+0023 NUMBER SIGN character (#) or SOLIDUS U+002F (/).
|
74
|
+
RDF::URI(@property_base + n)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# Yield a equivalentProperty or subPropertyOf if appropriate
|
80
|
+
#
|
81
|
+
# @param [RDF::URI] predicateURI
|
82
|
+
# @yield equiv
|
83
|
+
# @yieldparam [RDF::URI] equiv
|
84
|
+
def expand(predicateURI)
|
85
|
+
tok = tokenize(predicateURI)
|
86
|
+
if @properties[tok].is_a?(Hash)
|
87
|
+
value = @properties[tok].fetch("subPropertyOf", nil)
|
88
|
+
value ||= @properties[tok].fetch("equivalentProperty", nil)
|
89
|
+
|
90
|
+
Array(value).each {|equiv| yield RDF::URI(equiv)}
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
# Turn a predicateURI into a simple token
|
96
|
+
# @param [RDF::URI] predicateURI
|
97
|
+
# @return [String]
|
98
|
+
def tokenize(predicateURI)
|
99
|
+
predicateURI.to_s.sub(@property_base, '')
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# Fragment escape a name
|
104
|
+
def frag_escape(name)
|
105
|
+
name.to_s.gsub(/["#%<>\[\\\]^{|}]/) {|c| '%' + c.unpack('H2' * c.bytesize).join('%').upcase}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rdf-microdata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-10-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rdf
|
@@ -18,6 +18,9 @@ dependencies:
|
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
20
|
version: '2.2'
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 2.2.8
|
21
24
|
type: :runtime
|
22
25
|
prerelease: false
|
23
26
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -25,20 +28,23 @@ dependencies:
|
|
25
28
|
- - "~>"
|
26
29
|
- !ruby/object:Gem::Version
|
27
30
|
version: '2.2'
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.2.8
|
28
34
|
- !ruby/object:Gem::Dependency
|
29
35
|
name: rdf-xsd
|
30
36
|
requirement: !ruby/object:Gem::Requirement
|
31
37
|
requirements:
|
32
38
|
- - "~>"
|
33
39
|
- !ruby/object:Gem::Version
|
34
|
-
version: '2.
|
40
|
+
version: '2.2'
|
35
41
|
type: :runtime
|
36
42
|
prerelease: false
|
37
43
|
version_requirements: !ruby/object:Gem::Requirement
|
38
44
|
requirements:
|
39
45
|
- - "~>"
|
40
46
|
- !ruby/object:Gem::Version
|
41
|
-
version: '2.
|
47
|
+
version: '2.2'
|
42
48
|
- !ruby/object:Gem::Dependency
|
43
49
|
name: htmlentities
|
44
50
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,14 +65,14 @@ dependencies:
|
|
59
65
|
requirements:
|
60
66
|
- - "~>"
|
61
67
|
- !ruby/object:Gem::Version
|
62
|
-
version: '1.
|
68
|
+
version: '1.8'
|
63
69
|
type: :runtime
|
64
70
|
prerelease: false
|
65
71
|
version_requirements: !ruby/object:Gem::Requirement
|
66
72
|
requirements:
|
67
73
|
- - "~>"
|
68
74
|
- !ruby/object:Gem::Version
|
69
|
-
version: '1.
|
75
|
+
version: '1.8'
|
70
76
|
- !ruby/object:Gem::Dependency
|
71
77
|
name: equivalent-xml
|
72
78
|
requirement: !ruby/object:Gem::Requirement
|
@@ -101,14 +107,14 @@ dependencies:
|
|
101
107
|
requirements:
|
102
108
|
- - "~>"
|
103
109
|
- !ruby/object:Gem::Version
|
104
|
-
version: '3.
|
110
|
+
version: '3.6'
|
105
111
|
type: :development
|
106
112
|
prerelease: false
|
107
113
|
version_requirements: !ruby/object:Gem::Requirement
|
108
114
|
requirements:
|
109
115
|
- - "~>"
|
110
116
|
- !ruby/object:Gem::Version
|
111
|
-
version: '3.
|
117
|
+
version: '3.6'
|
112
118
|
- !ruby/object:Gem::Dependency
|
113
119
|
name: rspec-its
|
114
120
|
requirement: !ruby/object:Gem::Requirement
|
@@ -123,6 +129,20 @@ dependencies:
|
|
123
129
|
- - "~>"
|
124
130
|
- !ruby/object:Gem::Version
|
125
131
|
version: '1.2'
|
132
|
+
- !ruby/object:Gem::Dependency
|
133
|
+
name: json-ld
|
134
|
+
requirement: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '2.1'
|
139
|
+
type: :development
|
140
|
+
prerelease: false
|
141
|
+
version_requirements: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '2.1'
|
126
146
|
- !ruby/object:Gem::Dependency
|
127
147
|
name: rdf-spec
|
128
148
|
requirement: !ruby/object:Gem::Requirement
|
@@ -196,8 +216,11 @@ files:
|
|
196
216
|
- lib/rdf/microdata.rb
|
197
217
|
- lib/rdf/microdata/expansion.rb
|
198
218
|
- lib/rdf/microdata/format.rb
|
219
|
+
- lib/rdf/microdata/jsonld_reader.rb
|
220
|
+
- lib/rdf/microdata/rdfa_reader.rb
|
199
221
|
- lib/rdf/microdata/reader.rb
|
200
222
|
- lib/rdf/microdata/reader/nokogiri.rb
|
223
|
+
- lib/rdf/microdata/registry.rb
|
201
224
|
- lib/rdf/microdata/version.rb
|
202
225
|
- lib/rdf/microdata/vocab.rb
|
203
226
|
homepage: http://ruby-rdf.github.com/rdf-microdata
|
@@ -219,8 +242,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
219
242
|
- !ruby/object:Gem::Version
|
220
243
|
version: '0'
|
221
244
|
requirements: []
|
222
|
-
rubyforge_project:
|
223
|
-
rubygems_version: 2.6.
|
245
|
+
rubyforge_project:
|
246
|
+
rubygems_version: 2.6.12
|
224
247
|
signing_key:
|
225
248
|
specification_version: 4
|
226
249
|
summary: Microdata reader for Ruby.
|