rdf-microdata 2.2.0 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +25 -19
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/etc/doap.html +9 -9
- data/etc/doap.nt +19 -19
- data/etc/doap.ttl +17 -16
- data/lib/rdf/microdata.rb +10 -7
- data/lib/rdf/microdata/expansion.rb +2 -3
- data/lib/rdf/microdata/format.rb +87 -1
- data/lib/rdf/microdata/rdfa_reader.rb +121 -0
- data/lib/rdf/microdata/reader.rb +73 -160
- data/lib/rdf/microdata/reader/nokogiri.rb +13 -5
- data/lib/rdf/microdata/registry.rb +109 -0
- metadata +44 -29
data/lib/rdf/microdata/reader.rb
CHANGED
@@ -8,22 +8,23 @@ module RDF::Microdata
|
|
8
8
|
#
|
9
9
|
# Based on processing rules, amended with the following:
|
10
10
|
#
|
11
|
-
# @see
|
12
|
-
# @author [Gregg Kellogg](
|
11
|
+
# @see https://dvcs.w3.org/hg/htmldata/raw-file/0d6b89f5befb/microdata-rdf/index.html
|
12
|
+
# @author [Gregg Kellogg](https://greggkellogg.net/)
|
13
13
|
class Reader < RDF::Reader
|
14
14
|
format Format
|
15
15
|
include Expansion
|
16
16
|
include RDF::Util::Logger
|
17
17
|
URL_PROPERTY_ELEMENTS = %w(a area audio embed iframe img link object source track video)
|
18
|
-
DEFAULT_REGISTRY = File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "..", "etc", "registry.json"))
|
19
18
|
|
20
19
|
# @private
|
21
20
|
class CrawlFailure < StandardError; end
|
22
21
|
|
23
|
-
# @!attribute [r] implementation
|
24
22
|
# @return [Module] Returns the HTML implementation module for this reader instance.
|
25
23
|
attr_reader :implementation
|
26
24
|
|
25
|
+
# @return [Hash{Object => RDF::Resource}] maps RDF elements (items) to resources
|
26
|
+
attr_reader :memory
|
27
|
+
|
27
28
|
##
|
28
29
|
# Returns the base URI determined by this reader.
|
29
30
|
#
|
@@ -36,109 +37,38 @@ module RDF::Microdata
|
|
36
37
|
@options[:base_uri]
|
37
38
|
end
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
def self.load_registry(registry_uri)
|
52
|
-
return if @registry_uri == registry_uri
|
53
|
-
|
54
|
-
json = RDF::Util::File.open_file(registry_uri) { |f| JSON.load(f) }
|
55
|
-
|
56
|
-
@prefixes = {}
|
57
|
-
json.each do |prefix, elements|
|
58
|
-
next unless elements.is_a?(Hash)
|
59
|
-
properties = elements.fetch("properties", {})
|
60
|
-
@prefixes[prefix] = Registry.new(prefix, properties)
|
61
|
-
end
|
62
|
-
@registry_uri = registry_uri
|
63
|
-
end
|
64
|
-
|
65
|
-
##
|
66
|
-
# Initialize registry for a particular prefix URI
|
67
|
-
#
|
68
|
-
# @param [RDF::URI] prefixURI
|
69
|
-
# @param [Hash] properties ({})
|
70
|
-
def initialize(prefixURI, properties = {})
|
71
|
-
@uri = prefixURI
|
72
|
-
@properties = properties
|
73
|
-
@property_base = prefixURI.to_s
|
74
|
-
# Append a '#' for fragment if necessary
|
75
|
-
@property_base += '#' unless %w(/ #).include?(@property_base[-1,1])
|
76
|
-
end
|
77
|
-
|
78
|
-
##
|
79
|
-
# Find a registry entry given a type URI
|
80
|
-
#
|
81
|
-
# @param [RDF::URI] type
|
82
|
-
# @return [Registry]
|
83
|
-
def self.find(type)
|
84
|
-
@prefixes ||= {}
|
85
|
-
k = @prefixes.keys.detect {|key| type.to_s.index(key) == 0 }
|
86
|
-
@prefixes[k] if k
|
87
|
-
end
|
88
|
-
|
89
|
-
##
|
90
|
-
# Generate a predicateURI given a `name`
|
91
|
-
#
|
92
|
-
# @param [#to_s] name
|
93
|
-
# @param [Hash{}] ec Evaluation Context
|
94
|
-
# @return [RDF::URI]
|
95
|
-
def predicateURI(name, ec)
|
96
|
-
u = RDF::URI(name)
|
97
|
-
# 1) If _name_ is an _absolute URL_, return _name_ as a _URI reference_
|
98
|
-
return u if u.absolute?
|
99
|
-
|
100
|
-
n = frag_escape(name)
|
101
|
-
if ec[:current_type].nil?
|
102
|
-
# 2) If current type from context is null, there can be no current vocabulary.
|
103
|
-
# Return the URI reference that is the document base with its fragment set to the fragment-escaped value of name
|
104
|
-
u = RDF::URI(ec[:document_base].to_s)
|
105
|
-
u.fragment = frag_escape(name)
|
106
|
-
u
|
107
|
-
else
|
108
|
-
# 4) If scheme is vocabulary return the URI reference constructed by appending the fragment escaped value of name to current vocabulary, separated by a U+0023 NUMBER SIGN character (#) unless the current vocabulary ends with either a U+0023 NUMBER SIGN character (#) or SOLIDUS U+002F (/).
|
109
|
-
RDF::URI(@property_base + n)
|
110
|
-
end
|
111
|
-
end
|
40
|
+
##
|
41
|
+
# Reader options
|
42
|
+
# @see https://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Reader#options-class_method
|
43
|
+
def self.options
|
44
|
+
super + [
|
45
|
+
RDF::CLI::Option.new(
|
46
|
+
symbol: :rdfa,
|
47
|
+
datatype: TrueClass,
|
48
|
+
on: ["--rdfa"],
|
49
|
+
description: "Transform and parse as RDFa.") {true},
|
50
|
+
]
|
51
|
+
end
|
112
52
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
Array(value).each {|equiv| yield RDF::URI(equiv)}
|
53
|
+
##
|
54
|
+
# Redirect for RDFa Reader given `:rdfa` option
|
55
|
+
#
|
56
|
+
# @private
|
57
|
+
def self.new(input = nil, **options, &block)
|
58
|
+
klass = if options[:rdfa]
|
59
|
+
# Requires rdf-rdfa gem to be loaded
|
60
|
+
begin
|
61
|
+
require 'rdf/rdfa'
|
62
|
+
rescue LoadError
|
63
|
+
raise ReaderError, "Use of RDFa-based reader requires rdf-rdfa gem"
|
126
64
|
end
|
65
|
+
RdfaReader
|
66
|
+
else
|
67
|
+
self
|
127
68
|
end
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
# @param [RDF::URI] predicateURI
|
132
|
-
# @return [String]
|
133
|
-
def tokenize(predicateURI)
|
134
|
-
predicateURI.to_s.sub(@property_base, '')
|
135
|
-
end
|
136
|
-
|
137
|
-
##
|
138
|
-
# Fragment escape a name
|
139
|
-
def frag_escape(name)
|
140
|
-
name.to_s.gsub(/["#%<>\[\\\]^{|}]/) {|c| '%' + c.unpack('H2' * c.bytesize).join('%').upcase}
|
141
|
-
end
|
69
|
+
reader = klass.allocate
|
70
|
+
reader.send(:initialize, input, **options, &block)
|
71
|
+
reader
|
142
72
|
end
|
143
73
|
|
144
74
|
##
|
@@ -164,7 +94,7 @@ module RDF::Microdata
|
|
164
94
|
# @yieldparam [RDF::Reader] reader
|
165
95
|
# @yieldreturn [void] ignored
|
166
96
|
# @raise [Error] Raises `RDF::ReaderError` when validating
|
167
|
-
def initialize(input = $stdin, options
|
97
|
+
def initialize(input = $stdin, **options, &block)
|
168
98
|
super do
|
169
99
|
@library = :nokogiri
|
170
100
|
|
@@ -173,17 +103,17 @@ module RDF::Microdata
|
|
173
103
|
self.extend(@implementation)
|
174
104
|
|
175
105
|
input.rewind if input.respond_to?(:rewind)
|
176
|
-
initialize_html(input, options) rescue log_fatal($!.message, exception: RDF::ReaderError)
|
106
|
+
initialize_html(input, **options) rescue log_fatal($!.message, exception: RDF::ReaderError)
|
177
107
|
|
178
108
|
log_error("Empty document") if root.nil?
|
179
109
|
log_error(doc_errors.map(&:message).uniq.join("\n")) if !doc_errors.empty?
|
180
110
|
|
181
|
-
log_debug(
|
111
|
+
log_debug('', "library = #{@library}")
|
182
112
|
|
183
113
|
# Load registry
|
184
114
|
begin
|
185
|
-
registry_uri = options[:registry] || DEFAULT_REGISTRY
|
186
|
-
log_debug(
|
115
|
+
registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
|
116
|
+
log_debug('', "registry = #{registry_uri.inspect}")
|
187
117
|
Registry.load_registry(registry_uri)
|
188
118
|
rescue JSON::ParserError => e
|
189
119
|
log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
|
@@ -270,6 +200,7 @@ module RDF::Microdata
|
|
270
200
|
# Parsing a Microdata document (this is *not* the recursive method)
|
271
201
|
def parse_whole_document(doc, base)
|
272
202
|
base = doc_base(base)
|
203
|
+
@memory = {}
|
273
204
|
options[:base_uri] = if (base)
|
274
205
|
# Strip any fragment from base
|
275
206
|
base = base.to_s.split('#').first
|
@@ -280,15 +211,9 @@ module RDF::Microdata
|
|
280
211
|
|
281
212
|
log_info(nil) {"parse_whole_doc: base='#{base}'"}
|
282
213
|
|
283
|
-
ec = {
|
284
|
-
memory: {},
|
285
|
-
current_type: nil,
|
286
|
-
current_vocabulary: nil,
|
287
|
-
document_base: base,
|
288
|
-
}
|
289
214
|
# 1) For each element that is also a top-level item, Generate the triples for that item using the evaluation context.
|
290
215
|
getItems.each do |el|
|
291
|
-
log_depth {generate_triples(el,
|
216
|
+
log_depth {generate_triples(el, Registry.new(nil))}
|
292
217
|
end
|
293
218
|
|
294
219
|
log_info(doc, "parse_whole_doc: traversal complete")
|
@@ -298,12 +223,11 @@ module RDF::Microdata
|
|
298
223
|
# Generate triples for an item
|
299
224
|
#
|
300
225
|
# @param [RDF::Resource] item
|
301
|
-
# @param [
|
226
|
+
# @param [Registry] vocab
|
302
227
|
# @option ec [Hash{Nokogiri::XML::Element} => RDF::Resource] memory
|
303
|
-
# @option ec [RDF::Resource] :
|
228
|
+
# @option ec [RDF::Resource] :current_vocabulary
|
304
229
|
# @return [RDF::Resource]
|
305
|
-
def generate_triples(item,
|
306
|
-
memory = ec[:memory]
|
230
|
+
def generate_triples(item, vocab)
|
307
231
|
# 1) If there is an entry for item in memory, then let subject be the subject of that entry. Otherwise, if item has a global identifier and that global identifier is an absolute URL, let subject be that global identifier. Otherwise, let subject be a new blank node.
|
308
232
|
subject = if memory.include?(item.node)
|
309
233
|
memory[item.node][:subject]
|
@@ -312,12 +236,13 @@ module RDF::Microdata
|
|
312
236
|
end || RDF::Node.new
|
313
237
|
memory[item.node] ||= {}
|
314
238
|
|
315
|
-
log_debug(item) {"gentrips(2): subject=#{subject.inspect},
|
239
|
+
log_debug(item) {"gentrips(2): subject=#{subject.inspect}, vocab: #{vocab.inspect}"}
|
316
240
|
|
317
241
|
# 2) Add a mapping from item to subject in memory, if there isn't one already.
|
318
242
|
memory[item.node][:subject] ||= subject
|
319
243
|
|
320
244
|
# 3) For each type returned from element.itemType of the element defining the item.
|
245
|
+
# 4) Set vocab to the first value returned from element.itemType of the element defining the item.
|
321
246
|
type = nil
|
322
247
|
item.attribute('itemtype').to_s.split(' ').map{|n| uri(n)}.select(&:absolute?).each do |t|
|
323
248
|
# 3.1. If type is an absolute URL, generate the following triple:
|
@@ -325,36 +250,26 @@ module RDF::Microdata
|
|
325
250
|
add_triple(item, subject, RDF.type, t)
|
326
251
|
end
|
327
252
|
|
328
|
-
#
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
vocab = Registry.find(type)
|
336
|
-
|
337
|
-
# 7) Otherwise, if type is not empty, construct vocab by removing everything following the last SOLIDUS U+002F ("/") or NUMBER SIGN U+0023 ("#") from the path component of type.
|
338
|
-
vocab ||= begin
|
339
|
-
type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1')
|
340
|
-
log_debug(item) {"gentrips(7): type_vocab=#{type_vocab.inspect}"}
|
341
|
-
Registry.new(type_vocab)
|
253
|
+
# 6) If the registry contains a URI prefix that is a character for character match of vocab up to the length of the URI prefix, set vocab as that URI prefix.
|
254
|
+
if type || vocab.nil?
|
255
|
+
vocab = Registry.find(type) || begin
|
256
|
+
type_vocab = type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless type.nil?
|
257
|
+
log_debug(item) {"gentrips(7): type_vocab=#{type_vocab.inspect}"}
|
258
|
+
Registry.new(type_vocab)
|
259
|
+
end
|
342
260
|
end
|
343
261
|
|
344
|
-
#
|
345
|
-
|
262
|
+
# Otherwise, use vocab from evaluation context
|
263
|
+
log_debug(item) {"gentrips(8): vocab: #{vocab.inspect}"}
|
346
264
|
|
347
265
|
# 9. For each element _element_ that has one or more property names and is one of the properties of the item _item_, run the following substep:
|
348
266
|
props = item_properties(item)
|
349
267
|
# 9.1. For each name name in element's property names, run the following substeps:
|
350
268
|
props.each do |element|
|
351
269
|
element.attribute('itemprop').to_s.split(' ').compact.each do |name|
|
352
|
-
log_debug(item) {"gentrips(9.1): name=#{name.inspect},
|
353
|
-
# 9.1.1) Let context be a copy of evaluation context with current type set to type and current vocabulary set to vocab.
|
354
|
-
ec_new = ec.merge({current_type: type, current_vocabulary: vocab})
|
355
|
-
|
270
|
+
log_debug(item) {"gentrips(9.1): name=#{name.inspect}, vocab=#{vocab.inspect}"}
|
356
271
|
# 9.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate.
|
357
|
-
predicate = vocab.predicateURI(name,
|
272
|
+
predicate = vocab.predicateURI(name, base_uri)
|
358
273
|
|
359
274
|
# 9.1.3) Let value be the property value of element.
|
360
275
|
value = property_value(element)
|
@@ -362,7 +277,7 @@ module RDF::Microdata
|
|
362
277
|
|
363
278
|
# 9.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps.
|
364
279
|
if value.is_a?(Hash)
|
365
|
-
value = generate_triples(element,
|
280
|
+
value = generate_triples(element, vocab)
|
366
281
|
log_debug(item) {"gentrips(9.1.4): value=#{value.inspect}"}
|
367
282
|
end
|
368
283
|
|
@@ -384,11 +299,9 @@ module RDF::Microdata
|
|
384
299
|
props.each do |element|
|
385
300
|
element.attribute('itemprop-reverse').to_s.split(' ').compact.each do |name|
|
386
301
|
log_debug(item) {"gentrips(10.1): name=#{name.inspect}"}
|
387
|
-
# 10.1.1) Let context be a copy of evaluation context with current type set to type and current vocabulary set to vocab.
|
388
|
-
ec_new = ec.merge({current_type: type, current_vocabulary: vocab})
|
389
302
|
|
390
303
|
# 10.1.2) Let predicate be the result of generate predicate URI using context and name. Update context by setting current name to predicate.
|
391
|
-
predicate = vocab.predicateURI(name,
|
304
|
+
predicate = vocab.predicateURI(name, base_uri)
|
392
305
|
|
393
306
|
# 10.1.3) Let value be the property value of element.
|
394
307
|
value = property_value(element)
|
@@ -396,7 +309,7 @@ module RDF::Microdata
|
|
396
309
|
|
397
310
|
# 10.1.4) If value is an item, then generate the triples for value context. Replace value by the subject returned from those steps.
|
398
311
|
if value.is_a?(Hash)
|
399
|
-
value = generate_triples(element,
|
312
|
+
value = generate_triples(element, vocab)
|
400
313
|
log_debug(item) {"gentrips(10.1.4): value=#{value.inspect}"}
|
401
314
|
elsif value.is_a?(RDF::Literal)
|
402
315
|
# 10.1.5) Otherwise, if value is a literal, ignore the value and continue to the next name; it is an error for the value of @itemprop-reverse to be a literal
|
@@ -432,13 +345,13 @@ module RDF::Microdata
|
|
432
345
|
# To crawl the properties of an element root with a list memory, the user agent must run the following steps. These steps either fail or return a list with a count of errors. The count of errors is used as part of the authoring conformance criteria below.
|
433
346
|
#
|
434
347
|
# @param [Nokogiri::XML::Element] root
|
435
|
-
# @param [Array<Nokokogiri::XML::Element>]
|
348
|
+
# @param [Array<Nokokogiri::XML::Element>] memo
|
436
349
|
# @param [Boolean] reverse crawl reverse properties
|
437
350
|
# @return [Array<Nokogiri::XML::Element>]
|
438
351
|
# Resultant elements
|
439
|
-
def crawl_properties(root,
|
440
|
-
# 1. If root is in
|
441
|
-
raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if
|
352
|
+
def crawl_properties(root, memo, reverse)
|
353
|
+
# 1. If root is in memo, then the algorithm fails; abort these steps.
|
354
|
+
raise CrawlFailure, "crawl_props mem already has #{root.inspect}" if memo.include?(root)
|
442
355
|
|
443
356
|
# 2. Collect all the elements in the item root; let results be the resulting list of elements, and errors be the resulting count of errors.
|
444
357
|
results = elements_in_item(root)
|
@@ -447,13 +360,13 @@ module RDF::Microdata
|
|
447
360
|
# 3. Remove any elements from results that do not have an @itemprop (@itemprop-reverse) attribute specified.
|
448
361
|
results = results.select {|e| e.has_attribute?(reverse ? 'itemprop-reverse' : 'itemprop')}
|
449
362
|
|
450
|
-
# 4. Let new
|
451
|
-
raise CrawlFailure, "itemref recursion" if
|
452
|
-
|
363
|
+
# 4. Let new memo be a new list consisting of the old list memo with the addition of root.
|
364
|
+
raise CrawlFailure, "itemref recursion" if memo.detect {|n| root.node.object_id == n.node.object_id}
|
365
|
+
new_memo = memo + [root]
|
453
366
|
|
454
|
-
# 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new
|
367
|
+
# 5. For each element in results that has an @itemscope attribute specified, crawl the properties of the element, with new memo as the memo.
|
455
368
|
results.select {|e| e.has_attribute?('itemscope')}.each do |element|
|
456
|
-
log_depth {crawl_properties(element,
|
369
|
+
log_depth {crawl_properties(element, new_memo, reverse)}
|
457
370
|
end
|
458
371
|
|
459
372
|
results
|
@@ -469,7 +382,7 @@ module RDF::Microdata
|
|
469
382
|
def elements_in_item(root)
|
470
383
|
# Let results and pending be empty lists of elements.
|
471
384
|
# Let errors be zero.
|
472
|
-
results,
|
385
|
+
results, memo, errors = [], [], 0
|
473
386
|
|
474
387
|
# Add all the children elements of root to pending.
|
475
388
|
pending = root.elements
|
@@ -487,13 +400,13 @@ module RDF::Microdata
|
|
487
400
|
|
488
401
|
# Loop: Remove an element from pending and let current be that element.
|
489
402
|
while current = pending.shift
|
490
|
-
if
|
403
|
+
if memo.include?(current)
|
491
404
|
raise CrawlFailure, "elements_in_item: results already includes #{current.inspect}"
|
492
405
|
elsif !current.has_attribute?('itemscope')
|
493
406
|
# If current is not already in results and current does not have an itemscope attribute, then: add all the child elements of current to pending.
|
494
407
|
pending += current.elements
|
495
408
|
end
|
496
|
-
|
409
|
+
memo << current
|
497
410
|
|
498
411
|
# If current is not already in results, then: add current to results.
|
499
412
|
results << current unless results.include?(current)
|
@@ -510,7 +423,7 @@ module RDF::Microdata
|
|
510
423
|
value = case
|
511
424
|
when element.has_attribute?('itemscope')
|
512
425
|
{}
|
513
|
-
when element.
|
426
|
+
when element.has_attribute?('content')
|
514
427
|
RDF::Literal.new(element.attribute('content').to_s, language: element.language)
|
515
428
|
when %w(data meter).include?(element.name) && element.attribute('value')
|
516
429
|
# Lexically scan value and assign appropriate type, otherwise, leave untyped
|
@@ -3,7 +3,7 @@ module RDF::Microdata
|
|
3
3
|
##
|
4
4
|
# Nokogiri implementation of an HTML parser.
|
5
5
|
#
|
6
|
-
# @see
|
6
|
+
# @see https://nokogiri.org/
|
7
7
|
module Nokogiri
|
8
8
|
##
|
9
9
|
# Returns the name of the underlying XML library.
|
@@ -103,6 +103,12 @@ module RDF::Microdata
|
|
103
103
|
NodeSetProxy.new(@node.elements, self)
|
104
104
|
end
|
105
105
|
|
106
|
+
##
|
107
|
+
# Rational debug output
|
108
|
+
def to_str
|
109
|
+
@node.path
|
110
|
+
end
|
111
|
+
|
106
112
|
##
|
107
113
|
# Proxy for everything else to @node
|
108
114
|
def method_missing(method, *args)
|
@@ -172,7 +178,7 @@ module RDF::Microdata
|
|
172
178
|
#
|
173
179
|
# @param [Hash{Symbol => Object}] options
|
174
180
|
# @return [void]
|
175
|
-
def initialize_html(input, options
|
181
|
+
def initialize_html(input, **options)
|
176
182
|
require 'nokogiri' unless defined?(::Nokogiri)
|
177
183
|
@doc = case input
|
178
184
|
when ::Nokogiri::XML::Document
|
@@ -188,7 +194,7 @@ module RDF::Microdata
|
|
188
194
|
begin
|
189
195
|
require 'nokogumbo' unless defined?(::Nokogumbo)
|
190
196
|
input = input.read if input.respond_to?(:read)
|
191
|
-
::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
|
197
|
+
::Nokogiri::HTML5(input.force_encoding(options[:encoding]), max_parse_errors: 1000)
|
192
198
|
rescue LoadError
|
193
199
|
::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding])
|
194
200
|
end
|
@@ -206,7 +212,9 @@ module RDF::Microdata
|
|
206
212
|
##
|
207
213
|
# Document errors
|
208
214
|
def doc_errors
|
209
|
-
@doc.errors.reject
|
215
|
+
@doc.errors.reject do |e|
|
216
|
+
e.to_s =~ %r{(The doctype must be the first token in the document)|(Expected a doctype token)|(Unexpected '\?' where start tag name is expected)}
|
217
|
+
end
|
210
218
|
end
|
211
219
|
|
212
220
|
##
|
@@ -224,7 +232,7 @@ module RDF::Microdata
|
|
224
232
|
##
|
225
233
|
# Based on Microdata element.getItems
|
226
234
|
#
|
227
|
-
# @see
|
235
|
+
# @see https://www.w3.org/TR/2011/WD-microdata-20110525/#top-level-microdata-items
|
228
236
|
def getItems
|
229
237
|
@doc.css('[itemscope]').select {|el| !el.has_attribute?('itemprop')}.map {|n| NodeProxy.new(n)}
|
230
238
|
end
|