rdf-microdata 2.2.1 → 3.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +32 -21
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/etc/doap.html +9 -9
- data/etc/doap.nt +19 -19
- data/etc/doap.ttl +20 -21
- data/lib/rdf/microdata.rb +10 -7
- data/lib/rdf/microdata/expansion.rb +2 -3
- data/lib/rdf/microdata/format.rb +87 -1
- data/lib/rdf/microdata/rdfa_reader.rb +121 -0
- data/lib/rdf/microdata/reader.rb +72 -159
- data/lib/rdf/microdata/reader/nokogiri.rb +13 -5
- data/lib/rdf/microdata/registry.rb +109 -0
- metadata +57 -30
data/lib/rdf/microdata/reader.rb
CHANGED
@@ -8,22 +8,23 @@ module RDF::Microdata
|
|
8
8
|
#
|
9
9
|
# Based on processing rules, amended with the following:
|
10
10
|
#
|
11
|
-
# @see
|
12
|
-
# @author [Gregg Kellogg](
|
11
|
+
# @see https://dvcs.w3.org/hg/htmldata/raw-file/0d6b89f5befb/microdata-rdf/index.html
|
12
|
+
# @author [Gregg Kellogg](https://greggkellogg.net/)
|
13
13
|
class Reader < RDF::Reader
|
14
14
|
format Format
|
15
15
|
include Expansion
|
16
16
|
include RDF::Util::Logger
|
17
17
|
URL_PROPERTY_ELEMENTS = %w(a area audio embed iframe img link object source track video)
|
18
|
-
DEFAULT_REGISTRY = File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "..", "etc", "registry.json"))
|
19
18
|
|
20
19
|
# @private
|
21
20
|
class CrawlFailure < StandardError; end
|
22
21
|
|
23
|
-
# @!attribute [r] implementation
|
24
22
|
# @return [Module] Returns the HTML implementation module for this reader instance.
|
25
23
|
attr_reader :implementation
|
26
24
|
|
25
|
+
# @return [Hash{Object => RDF::Resource}] maps RDF elements (items) to resources
|
26
|
+
attr_reader :memory
|
27
|
+
|
27
28
|
##
|
28
29
|
# Returns the base URI determined by this reader.
|
29
30
|
#
|
@@ -36,109 +37,38 @@ module RDF::Microdata
|
|
36
37
|
@options[:base_uri]
|
37
38
|
end
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
def self.load_registry(registry_uri)
|
52
|
-
return if @registry_uri == registry_uri
|
53
|
-
|
54
|
-
json = RDF::Util::File.open_file(registry_uri) { |f| JSON.load(f) }
|
55
|
-
|
56
|
-
@prefixes = {}
|
57
|
-
json.each do |prefix, elements|
|
58
|
-
next unless elements.is_a?(Hash)
|
59
|
-
properties = elements.fetch("properties", {})
|
60
|
-
@prefixes[prefix] = Registry.new(prefix, properties)
|
61
|
-
end
|
62
|
-
@registry_uri = registry_uri
|
63
|
-
end
|
64
|
-
|
65
|
-
##
|
66
|
-
# Initialize registry for a particular prefix URI
|
67
|
-
#
|
68
|
-
# @param [RDF::URI] prefixURI
|
69
|
-
# @param [Hash] properties ({})
|
70
|
-
def initialize(prefixURI, properties = {})
|
71
|
-
@uri = prefixURI
|
72
|
-
@properties = properties
|
73
|
-
@property_base = prefixURI.to_s
|
74
|
-
# Append a '#' for fragment if necessary
|
75
|
-
@property_base += '#' unless %w(/ #).include?(@property_base[-1,1])
|
76
|
-
end
|
77
|
-
|
78
|
-
##
|
79
|
-
# Find a registry entry given a type URI
|
80
|
-
#
|
81
|
-
# @param [RDF::URI] type
|
82
|
-
# @return [Registry]
|
83
|
-
def self.find(type)
|
84
|
-
@prefixes ||= {}
|
85
|
-
k = @prefixes.keys.detect {|key| type.to_s.index(key) == 0 }
|
86
|
-
@prefixes[k] if k
|
87
|
-
end
|
88
|
-
|
89
|
-
##
|
90
|
-
# Generate a predicateURI given a `name`
|
91
|
-
#
|
92
|
-
# @param [#to_s] name
|
93
|
-
# @param [Hash{}] ec Evaluation Context
|
94
|
-
# @return [RDF::URI]
|
95
|
-
def predicateURI(name, ec)
|
96
|
-
u = RDF::URI(name)
|
97
|
-
# 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_
|
98
|
-
return u if u.absolute?
|
99
|
-
|
100
|
-
n = frag_escape(name)
|
101
|
-
if ec[:current_type].nil?
|
102
|
-
# 2) If current type from context is null, there can be no current vocabulary.
|
103
|
-
# Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name
|
104
|
-
u = RDF::URI(ec[:document_base].to_s)
|
105
|
-
u.fragment = frag_escape(name)
|
106
|
-
u
|
107
|
-
else
|
108
|
-
# 4) If scheme is vocabulary return the URI reference constructed by appending the fragment escaped value of name to current vocabulary, separated by a U+0023 NUMBER SIGN character (#) unless the current vocabulary ends with either a U+0023 NUMBER SIGN character (#) or SOLIDUS U+002F (/).
|
109
|
-
RDF::URI(@property_base + n)
|
110
|
-
end
|
111
|
-
end
|
40
|
+
##
|
41
|
+
# Reader options
|
42
|
+
# @see https://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Reader#options-class_method
|
43
|
+
def self.options
|
44
|
+
super + [
|
45
|
+
RDF::CLI::Option.new(
|
46
|
+
symbol: :rdfa,
|
47
|
+
datatype: TrueClass,
|
48
|
+
on: ["--rdfa"],
|
49
|
+
description: "Transform and parse as RDFa.") {true},
|
50
|
+
]
|
51
|
+
end
|
112
52
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
Array(value).each {|equiv| yield RDF::URI(equiv)}
|
53
|
+
##
|
54
|
+
# Redirect for RDFa Reader given `:rdfa` option
|
55
|
+
#
|
56
|
+
# @private
|
57
|
+
def self.new(input = nil, **options, &block)
|
58
|
+
klass = if options[:rdfa]
|
59
|
+
# Requires rdf-rdfa gem to be loaded
|
60
|
+
begin
|
61
|
+
require 'rdf/rdfa'
|
62
|
+
rescue LoadError
|
63
|
+
raise ReaderError, "Use of RDFa-based reader requires rdf-rdfa gem"
|
126
64
|
end
|
65
|
+
RdfaReader
|
66
|
+
else
|
67
|
+
self
|
127
68
|
end
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
# @param [RDF::URI] predicateURI
|
132
|
-
# @return [String]
|
133
|
-
def tokenize(predicateURI)
|
134
|
-
predicateURI.to_s.sub(@property_base, '')
|
135
|
-
end
|
136
|
-
|
137
|
-
##
|
138
|
-
# Fragment escape a name
|
139
|
-
def frag_escape(name)
|
140
|
-
name.to_s.gsub(/["#%<>\[\\\]^{|}]/) {|c| '%' + c.unpack('H2' * c.bytesize).join('%').upcase}
|
141
|
-
end
|
69
|
+
reader = klass.allocate
|
70
|
+
reader.send(:initialize, input, **options, &block)
|
71
|
+
reader
|
142
72
|
end
|
143
73
|
|
144
74
|
##
|
@@ -164,7 +94,7 @@ module RDF::Microdata
|
|
164
94
|
# @yieldparam [RDF::Reader] reader
|
165
95
|
# @yieldreturn [void] ignored
|
166
96
|
# @raise [Error] Raises `RDF::ReaderError` when validating
|
167
|
-
def initialize(input = $stdin, options
|
97
|
+
def initialize(input = $stdin, **options, &block)
|
168
98
|
super do
|
169
99
|
@library = :nokogiri
|
170
100
|
|
@@ -173,17 +103,17 @@ module RDF::Microdata
|
|
173
103
|
self.extend(@implementation)
|
174
104
|
|
175
105
|
input.rewind if input.respond_to?(:rewind)
|
176
|
-
initialize_html(input, options) rescue log_fatal($!.message, exception: RDF::ReaderError)
|
106
|
+
initialize_html(input, **options) rescue log_fatal($!.message, exception: RDF::ReaderError)
|
177
107
|
|
178
108
|
log_error("Empty document") if root.nil?
|
179
109
|
log_error(doc_errors.map(&:message).uniq.join("\n")) if !doc_errors.empty?
|
180
110
|
|
181
|
-
log_debug(
|
111
|
+
log_debug('', "library = #{@library}")
|
182
112
|
|
183
113
|
# Load registry
|
184
114
|
begin
|
185
|
-
registry_uri = options[:registry] || DEFAULT_REGISTRY
|
186
|
-
log_debug(
|
115
|
+
registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
|
116
|
+
log_debug('', "registry = #{registry_uri.inspect}")
|
187
117
|
Registry.load_registry(registry_uri)
|
188
118
|
rescue JSON::ParserError => e
|
189
119
|
log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
|
@@ -270,6 +200,7 @@ module RDF::Microdata
|
|
270
200
|
# Parsing a Microdata document (this is *not* the recursive method)
|
271
201
|
def parse_whole_document(doc, base)
|
272
202
|
base = doc_base(base)
|
203
|
+
@memory = {}
|
273
204
|
options[:base_uri] = if (base)
|
274
205
|
# Strip any fragment from base
|
275
206
|
base = base.to_s.split('#').first
|
@@ -280,15 +211,9 @@ module RDF::Microdata
|
|
280
211
|
|
281
212
|
log_info(nil) {"parse_whole_doc: base='#{base}'"}
|
282
213
|
|
283
|
-
ec = {
|
284
|
-
memory: {},
|
285
|
-
current_type: nil,
|
286
|
-
current_vocabulary: nil,
|
287
|
-
document_base: base,
|
288
|
-
}
|
289
214
|
# 1) For each element that is also a top-level item, Generate the triples for that item using the evaluation context.
|
290
215
|
getItems.each do |el|
|
291
|
-
log_depth {generate_triples(el,
|
216
|
+
log_depth {generate_triples(el, Registry.new(nil))}
|
292
217
|
end
|
293
218
|
|
294
219
|
log_info(doc, "parse_whole_doc: traversal complete")
|
@@ -298,12 +223,11 @@ module RDF::Microdata
|
|
298
223
|
# Generate triples for an item
|
299
224
|
#
|
300
225
|
# @param [RDF::Resource] item
|
301
|
-
# @param [
|
226
|
+
# @param [Registry] vocab
|
302
227
|
# @option ec [Hash{Nokogiri::XML::Element} => RDF::Resource] memory
|
303
|
-
# @option ec [RDF::Resource] :
|
228
|
+
# @option ec [RDF::Resource] :current_vocabulary
|
304
229
|
# @return [RDF::Resource]
|
305
|
-
def generate_triples(item,
|
306
|
-
memory = ec[:memory]
|
230
|
+
def generate_triples(item, vocab)
|
307
231
|
# 1) If there is an entry for item in memory, then let subject be the subject of that entry. Otherwise, if item has a global identifier and that global identifier is an absolute URL, let subject be that global identifier. Otherwise, let subject be a new blank node.
|
308
232
|
subject = if memory.include?(item.node)
|
309
233
|
memory[item.node][:subject]
|
@@ -312,12 +236,13 @@ module RDF::Microdata
|
|
312
236
|
end || RDF::Node.new
|
313
237
|
memory[item.node] ||= {}
|
314
238
|
|
315
|
-
log_debug(item) {"gentrips(2): subject=#{subject.inspect},
|
239
|
+
log_debug(item) {"gentrips(2): subject=#{subject.inspect}, vocab: #{vocab.inspect}"}
|
316
240
|
|
317
241
|
# 2) Add a mapping from item to subject in memory, if there isn't one already.
|
318
242
|
memory[item.node][:subject] ||= subject
|
319
243
|
|
320
244
|
# 3) For each type returned from element.itemType of the element defining the item.
|
245
|
+
# 4) Set vocab to the first value returned from element.itemType of the element defining the item.
|
321
246
|
type = nil
|
322
247
|
item.attribute('itemtype').to_s.split(' ').map{|n| uri(n)}.select(&:absolute?).each do |t|
|
323
248
|
# 3.1. If type is an absolute URL, generate the following triple:
|
@@ -325,36 +250,26 @@ module RDF::Microdata
|
|
325
250
|
add_triple(item, subject, RDF.type, t)
|
326
251
|
end
|
327
252
|
|
328
|
-
#
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
vocab = Registry.find(type)
|
336
|
-
|
337
|
-
# 7) Otherwise, if type is not empty, construct vocab by removing everything following the last SOLIDUS U+002F ("/") or NUMBER SIGN U+0023 ("#") from the path component of type.
|
338
|
-
vocab ||= begin
|
339
|
-
type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1')
|
340
|
-
log_debug(item) {"gentrips(7): type_vocab=#{type_vocab.inspect}"}
|
341
|
-
Registry.new(type_vocab)
|
253
|
+
# 6) If the registry contains a URI prefix that is a character for character match of vocab up to the length of the URI prefix, set vocab as that URI prefix.
|
254
|
+
if type || vocab.nil?
|
255
|
+
vocab = Registry.find(type) || begin
|
256
|
+
type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless type.nil?
|
257
|
+
log_debug(item) {"gentrips(7): type_vocab=#{type_vocab.inspect}"}
|
258
|
+
Registry.new(type_vocab)
|
259
|
+
end
|
342
260
|
end
|
343
261
|
|
344
|
-
#
|
345
|
-
|
262
|
+
# Otherwise, use vocab from evaluation context
|
263
|
+
log_debug(item) {"gentrips(8): vocab: #{vocab.inspect}"}
|
346
264
|
|
347
265
|
# 9. For each element _element_ that has one or more property names and is one of the properties of the item _item_, run the following substep:
|
348
266
|
props = item_properties(item)
|
349
267
|
# 9.1. For each name name in element's property names, run the following substeps:
|
350
268
|
props.each do |element|
|
351
269
|
element.attribute('itemprop').to_s.split(' ').compact.each do |name|
|
352
|
-
log_debug(item) {"gentrips(9.1): name=#{name.inspect},
|
353
|
-
# 9.1.1) Let context be a copy of evaluation context with current type set to type and current vocabulary set to vocab.
|
354
|
-
ec_new = ec.merge({current_type: type, current_vocabulary: vocab})
|
355
|
-
|
270
|
+
log_debug(item) {"gentrips(9.1): name=#{name.inspect}, vocab=#{vocab.inspect}"}
|
356
271
|
# 9.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate.
|
357
|
-
predicate = vocab.predicateURI(name,
|
272
|
+
predicate = vocab.predicateURI(name, base_uri)
|
358
273
|
|
359
274
|
# 9.1.3) Let value be the property value of element.
|
360
275
|
value = property_value(element)
|
@@ -362,7 +277,7 @@ module RDF::Microdata
|
|
362
277
|
|
363
278
|
# 9.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps.
|
364
279
|
if value.is_a?(Hash)
|
365
|
-
value = generate_triples(element,
|
280
|
+
value = generate_triples(element, vocab)
|
366
281
|
log_debug(item) {"gentrips(9.1.4): value=#{value.inspect}"}
|
367
282
|
end
|
368
283
|
|
@@ -384,11 +299,9 @@ module RDF::Microdata
|
|
384
299
|
props.each do |element|
|
385
300
|
element.attribute('itemprop-reverse').to_s.split(' ').compact.each do |name|
|
386
301
|
log_debug(item) {"gentrips(10.1): name=#{name.inspect}"}
|
387
|
-
# 10.1.1) Let context be a copy of evaluation context with current type set to type and current vocabulary set to vocab.
|
388
|
-
ec_new = ec.merge({current_type: type, current_vocabulary: vocab})
|
389
302
|
|
390
303
|
# 10.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate.
|
391
|
-
predicate = vocab.predicateURI(name,
|
304
|
+
predicate = vocab.predicateURI(name, base_uri)
|
392
305
|
|
393
306
|
# 10.1.3) Let value be the property value of element.
|
394
307
|
value = property_value(element)
|
@@ -396,7 +309,7 @@ module RDF::Microdata
|
|
396
309
|
|
397
310
|
# 10.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps.
|
398
311
|
if value.is_a?(Hash)
|
399
|
-
value = generate_triples(element,
|
312
|
+
value = generate_triples(element, vocab)
|
400
313
|
log_debug(item) {"gentrips(10.1.4): value=#{value.inspect}"}
|
401
314
|
elsif value.is_a?(RDF::Literal)
|
402
315
|
# 10.1.5) Otherwise, if value is a literal, ignore the value and continue to the next name; it is an error for the value of @itemprop-reverse to be a literal
|
@@ -432,13 +345,13 @@ module RDF::Microdata
|
|
432
345
|
# To crawl the properties of an element root with a list memory, the user agent must run the following steps. These steps either fail or return a list with a count of errors. The count of errors is used as part of the authoring conformance criteria below.
|
433
346
|
#
|
434
347
|
# @param [Nokogiri::XML::Element] root
|
435
|
-
# @param [Array<Nokokogiri::XML::Element>]
|
348
|
+
# @param [Array<Nokokogiri::XML::Element>] memo
|
436
349
|
# @param [Boolean] reverse crawl reverse properties
|
437
350
|
# @return [Array<Nokogiri::XML::Element>]
|
438
351
|
# Resultant elements
|
439
|
-
def crawl_properties(root,
|
440
|
-
# 1. If root is in
|
441
|
-
raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if
|
352
|
+
def crawl_properties(root, memo, reverse)
|
353
|
+
# 1. If root is in memo, then the algorithm fails; abort these steps.
|
354
|
+
raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if memo.include?(root)
|
442
355
|
|
443
356
|
# 2. Collect all the elements in the item root; let results be the resulting list of elements, and errors be the resulting count of errors.
|
444
357
|
results = elements_in_item(root)
|
@@ -447,13 +360,13 @@ module RDF::Microdata
|
|
447
360
|
# 3. Remove any elements from results that do not have an @itemprop (@itemprop-reverse) attribute specified.
|
448
361
|
results = results.select {|e| e.has_attribute?(reverse ? 'itemprop-reverse' : 'itemprop')}
|
449
362
|
|
450
|
-
# 4. Let new
|
451
|
-
raise CrawlFailure, "itemref recursion" if
|
452
|
-
|
363
|
+
# 4. Let new memo be a new list consisting of the old list memo with the addition of root.
|
364
|
+
raise CrawlFailure, "itemref recursion" if memo.detect {|n| root.node.object_id == n.node.object_id}
|
365
|
+
new_memo = memo + [root]
|
453
366
|
|
454
|
-
# 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new
|
367
|
+
# 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new memo as the memo.
|
455
368
|
results.select {|e| e.has_attribute?('itemscope')}.each do |element|
|
456
|
-
log_depth {crawl_properties(element,
|
369
|
+
log_depth {crawl_properties(element, new_memo, reverse)}
|
457
370
|
end
|
458
371
|
|
459
372
|
results
|
@@ -469,7 +382,7 @@ module RDF::Microdata
|
|
469
382
|
def elements_in_item(root)
|
470
383
|
# Let results and pending be empty lists of elements.
|
471
384
|
# Let errors be zero.
|
472
|
-
results,
|
385
|
+
results, memo, errors = [], [], 0
|
473
386
|
|
474
387
|
# Add all the children elements of root to pending.
|
475
388
|
pending = root.elements
|
@@ -487,13 +400,13 @@ module RDF::Microdata
|
|
487
400
|
|
488
401
|
# Loop: Remove an element from pending and let current be that element.
|
489
402
|
while current = pending.shift
|
490
|
-
if
|
403
|
+
if memo.include?(current)
|
491
404
|
raise CrawlFailure, "elements_in_item: results already includes #{current.inspect}"
|
492
405
|
elsif !current.has_attribute?('itemscope')
|
493
406
|
# If current is not already in results and current does not have an itemscope attribute, then: add all the child elements of current to pending.
|
494
407
|
pending += current.elements
|
495
408
|
end
|
496
|
-
|
409
|
+
memo << current
|
497
410
|
|
498
411
|
# If current is not already in results, then: add current to results.
|
499
412
|
results << current unless results.include?(current)
|
@@ -3,7 +3,7 @@ module RDF::Microdata
|
|
3
3
|
##
|
4
4
|
# Nokogiri implementation of an HTML parser.
|
5
5
|
#
|
6
|
-
# @see
|
6
|
+
# @see https://nokogiri.org/
|
7
7
|
module Nokogiri
|
8
8
|
##
|
9
9
|
# Returns the name of the underlying XML library.
|
@@ -103,6 +103,12 @@ module RDF::Microdata
|
|
103
103
|
NodeSetProxy.new(@node.elements, self)
|
104
104
|
end
|
105
105
|
|
106
|
+
##
|
107
|
+
# Rational debug output
|
108
|
+
def to_str
|
109
|
+
@node.path
|
110
|
+
end
|
111
|
+
|
106
112
|
##
|
107
113
|
# Proxy for everything else to @node
|
108
114
|
def method_missing(method, *args)
|
@@ -172,7 +178,7 @@ module RDF::Microdata
|
|
172
178
|
#
|
173
179
|
# @param [Hash{Symbol => Object}] options
|
174
180
|
# @return [void]
|
175
|
-
def initialize_html(input, options
|
181
|
+
def initialize_html(input, **options)
|
176
182
|
require 'nokogiri' unless defined?(::Nokogiri)
|
177
183
|
@doc = case input
|
178
184
|
when ::Nokogiri::XML::Document
|
@@ -188,7 +194,7 @@ module RDF::Microdata
|
|
188
194
|
begin
|
189
195
|
require 'nokogumbo' unless defined?(::Nokogumbo)
|
190
196
|
input = input.read if input.respond_to?(:read)
|
191
|
-
::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
|
197
|
+
::Nokogiri::HTML5(input.force_encoding(options[:encoding]), max_parse_errors: 1000)
|
192
198
|
rescue LoadError
|
193
199
|
::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding])
|
194
200
|
end
|
@@ -206,7 +212,9 @@ module RDF::Microdata
|
|
206
212
|
##
|
207
213
|
# Document errors
|
208
214
|
def doc_errors
|
209
|
-
@doc.errors.reject
|
215
|
+
@doc.errors.reject do |e|
|
216
|
+
e.to_s =~ %r{(The doctype must be the first token in the document)|(Expected a doctype token)|(Unexpected '\?' where start tag name is expected)}
|
217
|
+
end
|
210
218
|
end
|
211
219
|
|
212
220
|
##
|
@@ -224,7 +232,7 @@ module RDF::Microdata
|
|
224
232
|
##
|
225
233
|
# Based on Microdata element.getItems
|
226
234
|
#
|
227
|
-
# @see
|
235
|
+
# @see https://www.w3.org/TR/2011/WD-microdata-20110525/#top-level-microdata-items
|
228
236
|
def getItems
|
229
237
|
@doc.css('[itemscope]').select {|el| !el.has_attribute?('itemprop')}.map {|n| NodeProxy.new(n)}
|
230
238
|
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'json'
|
2
|
+
module RDF::Microdata
|
3
|
+
|
4
|
+
# Interface to registry
|
5
|
+
class Registry
|
6
|
+
# @return [RDF::URI] Prefix of vocabulary
|
7
|
+
attr_reader :uri
|
8
|
+
|
9
|
+
# @return [Hash] properties
|
10
|
+
attr_reader :properties
|
11
|
+
|
12
|
+
##
|
13
|
+
# Initialize the registry from a URI or file path
|
14
|
+
#
|
15
|
+
# @param [String] registry_uri
|
16
|
+
def self.load_registry(registry_uri)
|
17
|
+
return if @registry_uri == registry_uri
|
18
|
+
|
19
|
+
json = RDF::Util::File.open_file(registry_uri) { |f| ::JSON.load(f) }
|
20
|
+
|
21
|
+
@prefixes = {}
|
22
|
+
json.each do |prefix, elements|
|
23
|
+
next unless elements.is_a?(Hash)
|
24
|
+
properties = elements.fetch("properties", {})
|
25
|
+
@prefixes[prefix] = Registry.new(prefix, properties)
|
26
|
+
end
|
27
|
+
@registry_uri = registry_uri
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Initialize registry for a particular prefix URI
|
32
|
+
#
|
33
|
+
# @param [RDF::URI] prefixURI
|
34
|
+
# @param [Hash] properties ({})
|
35
|
+
def initialize(prefixURI, properties = {})
|
36
|
+
@uri = prefixURI
|
37
|
+
@properties = properties
|
38
|
+
@property_base = prefixURI.to_s
|
39
|
+
# Append a '#' for fragment if necessary
|
40
|
+
@property_base += '#' unless %w(/ #).include?(@property_base[-1,1])
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# Find a registry entry given a type URI
|
45
|
+
#
|
46
|
+
# @param [RDF::URI] type
|
47
|
+
# @return [Registry]
|
48
|
+
def self.find(type)
|
49
|
+
@prefixes ||= {}
|
50
|
+
k = @prefixes.keys.detect {|key| type.to_s.index(key) == 0 }
|
51
|
+
@prefixes[k] if k
|
52
|
+
end
|
53
|
+
|
54
|
+
##
|
55
|
+
# Generate a predicateURI given a `name`
|
56
|
+
#
|
57
|
+
# @param [#to_s] name
|
58
|
+
# @param [RDF::URI] base_uri base URI for resolving `name`.
|
59
|
+
# @return [RDF::URI]
|
60
|
+
def predicateURI(name, base_uri)
|
61
|
+
u = RDF::URI(name)
|
62
|
+
# 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_
|
63
|
+
return u if u.absolute?
|
64
|
+
|
65
|
+
n = frag_escape(name)
|
66
|
+
if uri.nil?
|
67
|
+
# 2) If current vocabulary from context is null, there can be no current vocabulary.
|
68
|
+
# Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name
|
69
|
+
u = RDF::URI(base_uri.to_s)
|
70
|
+
u.fragment = frag_escape(name)
|
71
|
+
u
|
72
|
+
else
|
73
|
+
# 4) If scheme is vocabulary return the URI reference constructed by appending the fragment escaped value of name to current vocabulary, separated by a U+0023 NUMBER SIGN character (#) unless the current vocabulary ends with either a U+0023 NUMBER SIGN character (#) or SOLIDUS U+002F (/).
|
74
|
+
RDF::URI(@property_base + n)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# Yield a equivalentProperty or subPropertyOf if appropriate
|
80
|
+
#
|
81
|
+
# @param [RDF::URI] predicateURI
|
82
|
+
# @yield equiv
|
83
|
+
# @yieldparam [RDF::URI] equiv
|
84
|
+
def expand(predicateURI)
|
85
|
+
tok = tokenize(predicateURI)
|
86
|
+
if @properties[tok].is_a?(Hash)
|
87
|
+
value = @properties[tok].fetch("subPropertyOf", nil)
|
88
|
+
value ||= @properties[tok].fetch("equivalentProperty", nil)
|
89
|
+
|
90
|
+
Array(value).each {|equiv| yield RDF::URI(equiv)}
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
# Turn a predicateURI into a simple token
|
96
|
+
# @param [RDF::URI] predicateURI
|
97
|
+
# @return [String]
|
98
|
+
def tokenize(predicateURI)
|
99
|
+
predicateURI.to_s.sub(@property_base, '')
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# Fragment escape a name
|
104
|
+
def frag_escape(name)
|
105
|
+
name.to_s.gsub(/["#%<>\[\\\]^{|}]/) {|c| '%' + c.unpack('H2' * c.bytesize).join('%').upcase}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|