rdf-rdfa 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,10 +4,27 @@ module RDF::RDFa
4
4
  ##
5
5
  # An RDFa parser in Ruby
6
6
  #
7
+ # Based on processing rules described here:
8
+ # @see http://www.w3.org/TR/rdfa-syntax/#s_model RDFa 1.0
9
+ # @see http://www.w3.org/2010/02/rdfa/drafts/2010/ED-rdfa-core-20100803/ RDFa 1.1
10
+ #
7
11
  # @author [Gregg Kellogg](http://kellogg-assoc.com/)
8
12
  class Reader < RDF::Reader
9
13
  format Format
10
14
 
15
+ SafeCURIEorCURIEorURI = {
16
+ :rdfa_1_0 => [:term, :safe_curie, :uri, :bnode],
17
+ :rdfa_1_1 => [:safe_curie, :curie, :term, :uri, :bnode],
18
+ }
19
+ TERMorCURIEorAbsURI = {
20
+ :rdfa_1_0 => [:term, :curie],
21
+ :rdfa_1_1 => [:term, :curie, :absuri],
22
+ }
23
+ TERMorCURIEorAbsURIprop = {
24
+ :rdfa_1_0 => [:curie],
25
+ :rdfa_1_1 => [:term, :curie, :absuri],
26
+ }
27
+
11
28
  NC_REGEXP = Regexp.new(
12
29
  %{^
13
30
  (?!\\\\u0301) # &#x301; is a non-spacing acute accent.
@@ -21,49 +38,78 @@ module RDF::RDFa
21
38
  $},
22
39
  Regexp::EXTENDED)
23
40
 
24
- # Host language, One of:
25
- # :xhtml_rdfa_1_0
26
- # :xhtml_rdfa_1_1
41
+ # Host language
42
+ # @return [:xhtml]
27
43
  attr_reader :host_language
28
44
 
29
45
  # The Recursive Baggage
46
+ # @private
30
47
  class EvaluationContext # :nodoc:
31
- # The base. This will usually be the URL of the document being processed,
48
+ # The base.
49
+ #
50
+ # This will usually be the URL of the document being processed,
32
51
  # but it could be some other URL, set by some other mechanism,
33
52
  # such as the (X)HTML base element. The important thing is that it establishes
34
53
  # a URL against which relative paths can be resolved.
54
+ #
55
+ # @return [URI]
35
56
  attr :base, true
36
57
  # The parent subject.
58
+ #
37
59
  # The initial value will be the same as the initial value of base,
38
60
  # but it will usually change during the course of processing.
61
+ #
62
+ # @return [URI]
39
63
  attr :parent_subject, true
40
64
  # The parent object.
65
+ #
41
66
  # In some situations the object of a statement becomes the subject of any nested statements,
42
67
  # and this property is used to convey this value.
43
68
  # Note that this value may be a bnode, since in some situations a number of nested statements
44
69
  # are grouped together on one bnode.
45
70
  # This means that the bnode must be set in the containing statement and passed down,
46
71
  # and this property is used to convey this value.
72
+ #
73
+ # @return URI
47
74
  attr :parent_object, true
48
75
  # A list of current, in-scope URI mappings.
76
+ #
77
+ # @return [Hash{Symbol => String}]
49
78
  attr :uri_mappings, true
50
- # A list of incomplete triples. A triple can be incomplete when no object resource
79
+ # A list of incomplete triples.
80
+ #
81
+ # A triple can be incomplete when no object resource
51
82
  # is provided alongside a predicate that requires a resource (i.e., @rel or @rev).
52
83
  # The triples can be completed when a resource becomes available,
53
84
  # which will be when the next subject is specified (part of the process called chaining).
85
+ #
86
+ # @return [Array<Array<URI, Resource>>]
54
87
  attr :incomplete_triples, true
55
88
  # The language. Note that there is no default language.
89
+ #
90
+ # @return [Symbol]
56
91
  attr :language, true
57
92
  # The term mappings, a list of terms and their associated URIs.
93
+ #
58
94
  # This specification does not define an initial list.
59
95
  # Host Languages may define an initial list.
60
96
  # If a Host Language provides an initial list, it should do so via an RDFa Profile document.
97
+ #
98
+ # @return [Hash{Symbol => URI}]
61
99
  attr :term_mappings, true
62
- # The default vocabulary, a value to use as the prefix URI when a term is used.
100
+ # The default vocabulary
101
+ #
102
+ # A value to use as the prefix URI when a term is used.
63
103
  # This specification does not define an initial setting for the default vocabulary.
64
104
  # Host Languages may define an initial setting.
105
+ #
106
+ # @return [URI]
65
107
  attr :default_vocabulary, true
66
108
 
109
+ # @param [RDF::URI] base
110
+ # @param [Hash] host_defaults
111
+ # @option host_defaults [Hash{String => URI}] :term_mappings Hash of NCName => URI
112
+ # @option host_defaults [Hash{String => URI}] :vocabulary Hash of prefix => URI
67
113
  def initialize(base, host_defaults)
68
114
  # Initialize the evaluation context, [5.1]
69
115
  @base = base
@@ -73,10 +119,12 @@ module RDF::RDFa
73
119
  @language = nil
74
120
  @uri_mappings = host_defaults.fetch(:uri_mappings, {})
75
121
  @term_mappings = host_defaults.fetch(:term_mappings, {})
76
- @default_voabulary = host_defaults.fetch(:voabulary, nil)
122
+ @default_vocabulary = host_defaults.fetch(:vocabulary, nil)
77
123
  end
78
124
 
79
125
  # Copy this Evaluation Context
126
+ #
127
+ # @param [EvaluationContext] from
80
128
  def initialize_copy(from)
81
129
  # clone the evaluation context correctly
82
130
  @uri_mappings = from.uri_mappings.clone
@@ -95,13 +143,16 @@ module RDF::RDFa
95
143
  ##
96
144
  # Initializes the RDFa reader instance.
97
145
  #
98
- # @param [Nokogiri::HTML::Document, Nokogiri::XML::Document, IO, File, String] input
146
+ # @param [Nokogiri::HTML::Document, Nokogiri::XML::Document, #read, #to_s] input
99
147
  # @option options [Array] :debug (nil) Array to place debug messages
148
+ # @option options [Graph] :processor_graph (nil) Graph to record information, warnings and errors.
100
149
  # @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
101
150
  # @option options [Boolean] :base_uri (nil) Base URI to use for relative URIs.
151
+ # @option options [:rdfa_1_0, :rdfa_1_1] :version (:rdfa_1_1) Parser version information
152
+ # @option options [:xhtml] :host_language (:xhtml) Host Language
102
153
  # @return [reader]
103
154
  # @yield [reader]
104
- # @yieldparam [Reader] reader
155
+ # @yieldparam [RDF::Reader] reader
105
156
  # @raise [RDF::ReaderError]:: Raises RDF::ReaderError if _strict_
106
157
  def initialize(input = $stdin, options = {}, &block)
107
158
  super do
@@ -110,14 +161,18 @@ module RDF::RDFa
110
161
  @base_uri = RDF::URI.intern(options[:base_uri])
111
162
  @@vocabulary_cache ||= {}
112
163
 
164
+ @version = options[:version] ? options[:version].to_sym : :rdfa_1_1
165
+ @host_language = options[:host_language] || :xhtml
166
+
113
167
  @doc = case input
114
168
  when Nokogiri::HTML::Document then input
115
169
  when Nokogiri::XML::Document then input
116
170
  else Nokogiri::XML.parse(input, @base_uri.to_s)
117
171
  end
118
172
 
119
- raise RDF::ReaderError, "Synax errors:\n#{@doc.errors}" if !@doc.errors.empty? && @strict
120
- raise RDF::ReaderError, "Empty document" if (@doc.nil? || @doc.root.nil?) && @strict
173
+ add_error(nil, "Empty document", RDF::RDFA.HostLanguageMarkupError) if (@doc.nil? || @doc.root.nil?)
174
+ add_warning(nil, "Synax errors:\n#{@doc.errors}", RDF::RDFA.HostLanguageMarkupError) unless @doc.errors.empty?
175
+
121
176
  block.call(self) if block_given?
122
177
  end
123
178
  end
@@ -131,16 +186,11 @@ module RDF::RDFa
131
186
  def each_statement(&block)
132
187
  @callback = block
133
188
 
134
- # Determine host language
135
- # XXX - right now only XHTML defined
136
- @host_language = case @doc.root.attributes["version"].to_s
137
- when /XHTML+RDFa/ then :xhtml
138
- end
139
-
140
- # If none found, assume xhtml
141
- @host_language ||= :xhtml
142
-
143
- @host_defaults = {}
189
+ # Section 4.2 RDFa Host Language Conformance
190
+ #
191
+ # The Host Language may define a default RDFa Profile. If it does, the RDFa Profile triples that establish term or
192
+ # URI mappings associated with that profile must not change without changing the profile URI. RDFa Processors may
193
+ # embed, cache, or retrieve the RDFa Profile triples associated with that profile.
144
194
  @host_defaults = case @host_language
145
195
  when :xhtml
146
196
  {
@@ -156,6 +206,10 @@ module RDF::RDFa
156
206
  {}
157
207
  end
158
208
 
209
+ @host_defaults.delete(:vocabulary) if @version == :rdfa_1_0
210
+
211
+ add_debug(@doc, "version = #{@version}, host_language = #{@host_language}")
212
+
159
213
  # parse
160
214
  parse_whole_document(@doc, @base_uri)
161
215
  end
@@ -185,9 +239,8 @@ module RDF::RDFa
185
239
  # Figure out the document path, if it is a Nokogiri::XML::Element or Attribute
186
240
  def node_path(node)
187
241
  case node
188
- when Nokogiri::XML::Element, Nokogiri::XML::Attr then "#{node_path(node.parent)}/#{node.name}"
189
- when String then node
190
- else ""
242
+ when Nokogiri::XML::Node then node.display_path
243
+ else node.to_s
191
244
  end
192
245
  end
193
246
 
@@ -196,10 +249,36 @@ module RDF::RDFa
196
249
  # @param [XML Node, any] node:: XML Node or string for showing context
197
250
  # @param [String] message::
198
251
  def add_debug(node, message)
199
- puts "#{node_path(node)}: #{message}" if $DEBUG
200
- @debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
252
+ add_processor_message(node, message, RDF::RDFA.InformationalMessage)
201
253
  end
202
254
 
255
+ def add_info(node, message, process_class = RDF::RDFA.InformationalMessage)
256
+ add_processor_message(node, message, process_class)
257
+ end
258
+
259
+ def add_warning(node, message, process_class = RDF::RDFA.MiscellaneousWarning)
260
+ add_processor_message(node, message, process_class)
261
+ end
262
+
263
+ def add_error(node, message, process_class = RDF::RDFA.MiscellaneousError)
264
+ add_processor_message(node, message, process_class)
265
+ raise ParserException, message if @strict
266
+ end
267
+
268
+ def add_processor_message(node, message, process_class)
269
+ puts "#{node_path(node)}: #{message}" if ::RDF::RDFa::debug?
270
+ @debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
271
+ if @processor_graph
272
+ @processor_sequence ||= 0
273
+ n = RDF::Node.new
274
+ @processor_graph << RDF::Statement.new(n, RDF["type"], process_class)
275
+ @processor_graph << RDF::Statement.new(n, RDF::DC.description, message)
276
+ @processor_graph << RDF::Statement.new(n, RDF::DC.date, RDF::Literal::Date.new(DateTime.now.to_date))
277
+ @processor_graph << RDF::Statement.new(n, RDF::RDFA.sequence, RDF::Literal::Integer.new(@processor_sequence += 1))
278
+ @processor_graph << RDF::Statement.new(n, RDF::RDFA.source, node_path(node))
279
+ end
280
+ end
281
+
203
282
  # add a statement, object can be literal or URI or bnode
204
283
  #
205
284
  # @param [Nokogiri::XML::Node, any] node:: XML Node or string for showing context
@@ -210,7 +289,7 @@ module RDF::RDFa
210
289
  # @raise [ReaderError]:: Checks parameter types and raises if they are incorrect if parsing mode is _strict_.
211
290
  def add_triple(node, subject, predicate, object)
212
291
  statement = RDF::Statement.new(subject, predicate, object)
213
- add_debug(node, "statement: #{statement}")
292
+ add_debug(node, "statement: #{statement.to_ntriples}")
214
293
  @callback.call(statement)
215
294
  end
216
295
 
@@ -229,99 +308,96 @@ module RDF::RDFa
229
308
  end
230
309
 
231
310
  # initialize the evaluation context with the appropriate base
232
- evaluation_context = EvaluationContext.new(base, @host_defaults)
311
+ evaluation_context = EvaluationContext.new(@base_uri, @host_defaults)
233
312
 
234
313
  traverse(doc.root, evaluation_context)
235
314
  end
236
315
 
237
- # Extract the XMLNS mappings from an element
238
- def extract_mappings(element, uri_mappings, term_mappings)
239
- # Process @profile
240
- # Next the current element is parsed for any updates to the local term mappings and
241
- # local list of URI mappings via @profile.
242
- # If @profile is present, its value is processed as defined in RDFa Profiles.
243
- element.attributes['profile'].to_s.split(/\s/).each do |profile|
244
- if node_path(element) == "/html/head"
245
- # Don't try to open ourselves!
246
- add_debug(element, "extract_mappings: skip head profile <#{profile}>")
247
- next
248
- elsif @@vocabulary_cache[profile]
249
- add_debug(element, "extract_mappings: cached profile <#{profile}>")
250
- @@vocabulary_cache[profile]
251
- elsif @base_uri.to_s == profile
252
- # Don't try to open ourselves!
253
- add_debug(element, "extract_mappings: skip recursive profile <#{profile}>")
254
- next
316
+ # Parse and process URI mappings, Term mappings and a default vocabulary from @profile
317
+ #
318
+ # Yields each mapping
319
+ def process_profile(element)
320
+ element.attributes['profile'].to_s.split(/\s/).reverse.each do |profile|
321
+ # Don't try to open ourselves!
322
+ if @uri == profile
323
+ add_debug(element, "process_profile: skip recursive profile <#{profile}>")
255
324
  elsif @@vocabulary_cache.has_key?(profile)
256
- add_debug(element, "extract_mappings: skip previously parsed profile <#{profile}>")
325
+ add_debug(element, "process_profile: skip previously parsed profile <#{profile}>")
257
326
  else
258
327
  begin
259
- add_debug(element, "extract_mappings: parse profile <#{profile}>")
260
328
  @@vocabulary_cache[profile] = {
261
329
  :uri_mappings => {},
262
- :term_mappings => {}
330
+ :term_mappings => {},
331
+ :default_vocabulary => nil
263
332
  }
264
333
  um = @@vocabulary_cache[profile][:uri_mappings]
265
334
  tm = @@vocabulary_cache[profile][:term_mappings]
266
- add_debug(element, "extract_mappings: profile open <#{profile}>")
267
-
268
- old_debug, old_verbose, = $DEBUG, $verbose
269
- $DEBUG, $verbose = false, false
270
- # FIXME: format shouldn't need to be specified here
271
- p_graph = RDF::Graph.load(profile, :base_uri => profile, :format => RDF::Format.for(profile) || :rdfa)
272
- puts p_graph.inspect if old_debug
273
- $DEBUG, $verbose = old_debug, old_verbose
274
- p_graph.each_subject do |subject|
275
- # If one of the objects is not a Literal no mapping is created.
335
+ add_debug(element, "process_profile: parse profile <#{profile}>")
336
+
337
+ # Parse profile, and extract mappings from graph
338
+ old_debug, old_verbose, = ::RDF::RDFa::debug?, $verbose
339
+ ::RDF::RDFa::debug, $verbose = false, false
340
+ # Fixme, RDF isn't smart enough to figure this out from MIME-Type
341
+ load_opts = {:base_uri => profile}
342
+ load_opts[:format] = :rdfa unless RDF::Format.for(:file_name => profile)
343
+ p_graph = RDF::Graph.load(profile, load_opts)
344
+ ::RDF::RDFa::debug, $verbose = old_debug, old_verbose
345
+ p_graph.subjects.each do |subject|
346
+ # If one of the objects is not a Literal or if there are additional rdfa:uri or rdfa:term
347
+ # predicates sharing the same subject, no mapping is created.
276
348
  uri = p_graph.first_object([subject, RDF::RDFA['uri'], nil])
277
349
  term = p_graph.first_object([subject, RDF::RDFA['term'], nil])
278
350
  prefix = p_graph.first_object([subject, RDF::RDFA['prefix'], nil])
279
- add_debug(element, "extract_mappings: uri=#{uri.inspect}, term=#{term.inspect}, prefix=#{prefix.inspect}")
351
+ vocab = p_graph.first_object([subject, RDF::RDFA['vocabulary'], nil])
352
+ add_debug(element, "process_profile: uri=#{uri.inspect}, term=#{term.inspect}, prefix=#{prefix.inspect}, vocabulary=#{vocab.inspect}")
280
353
 
281
- next if !uri || (!term && !prefix)
282
- raise RDF::ReaderError, "rdf:uri must be a Literal" unless uri.is_a?(RDF::Literal)
283
- raise RDF::ReaderError, "rdf:term must be a Literal" unless term.nil? || term.is_a?(RDF::Literal)
284
- raise RDF::ReaderError, "rdf:prefix must be a Literal" unless prefix.nil? || prefix.is_a?(RDF::Literal)
285
-
354
+ raise RDF::ReaderError, "rdf:uri #{uri.inspect} must be a Literal" unless uri.nil? || uri.is_a?(RDF::Literal)
355
+ raise RDF::ReaderError, "rdf:term #{term.inspect} must be a Literal" unless term.nil? || term.is_a?(RDF::Literal)
356
+ raise RDF::ReaderError, "rdf:prefix #{prefix.inspect} must be a Literal" unless prefix.nil? || prefix.is_a?(RDF::Literal)
357
+ raise RDF::ReaderError, "rdf:vocabulary #{vocab.inspect} must be a Literal" unless vocab.nil? || vocab.is_a?(RDF::Literal)
358
+
359
+ @@vocabulary_cache[profile][:default_vocabulary] = vocab.value if vocab
360
+
286
361
  # For every extracted triple that is the common subject of an rdfa:prefix and an rdfa:uri
287
362
  # predicate, create a mapping from the object literal of the rdfa:prefix predicate to the
288
363
  # object literal of the rdfa:uri predicate. Add or update this mapping in the local list of
289
364
  # URI mappings after transforming the 'prefix' component to lower-case.
290
365
  # For every extracted
291
- um[prefix.value.downcase] = uri.value if prefix
366
+ um[prefix.value.downcase] = uri.value if prefix && prefix.value != "_"
292
367
 
293
368
  # triple that is the common subject of an rdfa:term and an rdfa:uri predicate, create a
294
369
  # mapping from the object literal of the rdfa:term predicate to the object literal of the
295
370
  # rdfa:uri predicate. Add or update this mapping in the local term mappings.
296
- tm[term.value] = RDF::URI.intern(uri.value) if term
371
+ tm[term.value.downcase] = RDF::URI.intern(uri.value) if term
297
372
  end
298
- # FIXME: subject isn't in scope here
299
- #rescue RDF::ReaderError
300
- # add_debug(element, "extract_mappings: profile subject #{subject.to_s}: #{e.message}")
301
- # raise if @strict
302
- rescue RuntimeError => e
303
- add_debug(element, "extract_mappings: profile: #{e.message}")
304
- raise if @strict
373
+ rescue RDF::ReaderError => e
374
+ add_error(element, e.message, RDF::RDFA.ProfileReferenceError)
375
+ raise # Incase we're not in strict mode, we need to be sure processing stops
305
376
  end
306
377
  end
307
-
308
- # Merge mappings from this vocabulary
309
- uri_mappings.merge!(@@vocabulary_cache[profile][:uri_mappings])
310
- term_mappings.merge!(@@vocabulary_cache[profile][:term_mappings])
378
+ profile_mappings = @@vocabulary_cache[profile]
379
+ yield :uri_mappings, profile_mappings[:uri_mappings] unless profile_mappings[:uri_mappings].empty?
380
+ yield :term_mappings, profile_mappings[:term_mappings] unless profile_mappings[:term_mappings].empty?
381
+ yield :default_vocabulary, profile_mappings[:default_vocabulary] if profile_mappings[:default_vocabulary]
311
382
  end
312
-
383
+ end
384
+
385
+ # Extract the XMLNS mappings from an element
386
+ def extract_mappings(element, uri_mappings, term_mappings)
313
387
  # look for xmlns
314
388
  # (note, this may be dependent on @host_language)
315
389
  # Regardless of how the mapping is declared, the value to be mapped must be converted to lower case,
316
390
  # and the URI is not processed in any way; in particular if it is a relative path it is
317
391
  # not resolved against the current base.
318
- element.namespaces.each do |attr_name, attr_value|
319
- begin
320
- abbr, prefix = attr_name.split(":")
321
- uri_mappings[prefix.to_s.downcase] = attr_value.to_s if abbr.downcase == "xmlns" && prefix
322
- rescue ReaderError => e
323
- add_debug(element, "extract_mappings raised #{e.class}: #{e.message}")
324
- raise if @strict
392
+ element.namespace_definitions.each do |ns|
393
+ # A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix.
394
+ next if ns.prefix == "_"
395
+
396
+ # Downcase prefix for RDFa 1.1
397
+ pfx_lc = (@version == :rdfa_1_0 || ns.prefix.nil?) ? ns.prefix : ns.prefix.to_s.downcase
398
+ if ns.prefix
399
+ uri_mappings[pfx_lc] = ns.href
400
+ add_debug(element, "extract_mappings: xmlns:#{ns.prefix} => <#{ns.href}>")
325
401
  end
326
402
  end
327
403
 
@@ -335,11 +411,12 @@ module RDF::RDFa
335
411
  next unless prefix.match(/:$/)
336
412
  prefix.chop!
337
413
 
414
+ # A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix.
415
+ next if prefix == "_"
416
+
338
417
  uri_mappings[prefix] = uri
339
- end
340
-
341
- add_debug(element, "uri_mappings: #{uri_mappings.map{|k,v|"#{k}='#{v}'"}.join(", ")}")
342
- add_debug(element, "term_mappings: #{term_mappings.map{|k,v|"#{k}='#{v}'"}.join(", ")}")
418
+ add_debug(element, "extract_mappings: prefix #{prefix} => <#{uri}>")
419
+ end unless @version == :rdfa_1_0
343
420
  end
344
421
 
345
422
  # The recursive helper function
@@ -352,7 +429,7 @@ module RDF::RDFa
352
429
 
353
430
  add_debug(element, "traverse, ec: #{evaluation_context.inspect}")
354
431
 
355
- # local variables [5.5 Step 1]
432
+ # local variables [7.5 Step 1]
356
433
  recurse = true
357
434
  skip = false
358
435
  new_subject = nil
@@ -375,30 +452,53 @@ module RDF::RDFa
375
452
  vocab = attrs['vocab']
376
453
 
377
454
  # Pull out the attributes needed for the skip test.
378
- property = attrs['property'].to_s if attrs['property']
379
- typeof = attrs['typeof'].to_s if attrs['typeof']
455
+ property = attrs['property'].to_s.strip if attrs['property']
456
+ typeof = attrs['typeof'].to_s.strip if attrs['typeof']
380
457
  datatype = attrs['datatype'].to_s if attrs['datatype']
381
458
  content = attrs['content'].to_s if attrs['content']
382
- rel = attrs['rel'].to_s if attrs['rel']
383
- rev = attrs['rev'].to_s if attrs['rev']
459
+ rel = attrs['rel'].to_s.strip if attrs['rel']
460
+ rev = attrs['rev'].to_s.strip if attrs['rev']
461
+
462
+ # Local term mappings [7.5 Steps 2]
463
+ # Next the current element is parsed for any updates to the local term mappings and local list of URI mappings via @profile.
464
+ # If @profile is present, its value is processed as defined in RDFa Profiles.
465
+ unless @version == :rdfa_1_0
466
+ begin
467
+ process_profile(element) do |which, value|
468
+ add_debug(element, "[Step 2] traverse, #{which}: #{value.inspect}")
469
+ case which
470
+ when :uri_mappings then uri_mappings.merge!(value)
471
+ when :term_mappings then term_mappings.merge!(value)
472
+ when :default_vocabulary then default_vocabulary = value
473
+ end
474
+ end
475
+ rescue
476
+ # Skip this element and all sub-elements
477
+ # If any referenced RDFa Profile is not available, then the current element and its children must not place any
478
+ # triples in the default graph .
479
+ raise if @strict
480
+ return
481
+ end
482
+ end
384
483
 
385
- # Default vocabulary [7.5 Step 2]
386
- # First the current element is examined for any change to the default vocabulary via @vocab.
484
+ # Default vocabulary [7.5 Step 3]
485
+ # Next the current element is examined for any change to the default vocabulary via @vocab.
387
486
  # If @vocab is present and contains a value, its value updates the local default vocabulary.
388
487
  # If the value is empty, then the local default vocabulary must be reset to the Host Language defined default.
389
488
  unless vocab.nil?
390
489
  default_vocabulary = if vocab.to_s.empty?
391
490
  # Set default_vocabulary to host language default
392
- @host_defaults.fetch(:voabulary, nil)
491
+ add_debug(element, "[Step 2] traverse, reset default_vocaulary to #{@host_defaults.fetch(:vocabulary, nil).inspect}")
492
+ @host_defaults.fetch(:vocabulary, nil)
393
493
  else
394
- vocab.to_s
494
+ RDF::URI.intern(vocab)
395
495
  end
396
496
  add_debug(element, "[Step 2] traverse, default_vocaulary: #{default_vocabulary.inspect}")
397
497
  end
398
498
 
399
- # Local term mappings [7.5 Steps 3 & 4]
400
- # Next the current element is parsed for any updates to the local term mappings and local list of URI mappings via @profile.
401
- # If @profile is present, its value is processed as defined in RDFa Profiles.
499
+ # Local term mappings [7.5 Steps 4]
500
+ # Next, the current element is then examined for URI mapping s and these are added to the local list of URI mappings.
501
+ # Note that a URI mapping will simply overwrite any current mapping in the list that has the same name
402
502
  extract_mappings(element, uri_mappings, term_mappings)
403
503
 
404
504
  # Language information [7.5 Step 5]
@@ -419,8 +519,16 @@ module RDF::RDFa
419
519
  add_debug(element, "HTML5 [3.2.3.3] traverse, lang: #{language || 'nil'}") if attrs['lang']
420
520
 
421
521
  # rels and revs
422
- rels = process_uris(element, rel, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
423
- revs = process_uris(element, rev, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
522
+ rels = process_uris(element, rel, evaluation_context,
523
+ :uri_mappings => uri_mappings,
524
+ :term_mappings => term_mappings,
525
+ :vocab => default_vocabulary,
526
+ :restrictions => TERMorCURIEorAbsURI[@version])
527
+ revs = process_uris(element, rev, evaluation_context,
528
+ :uri_mappings => uri_mappings,
529
+ :term_mappings => term_mappings,
530
+ :vocab => default_vocabulary,
531
+ :restrictions => TERMorCURIEorAbsURI[@version])
424
532
 
425
533
  add_debug(element, "traverse, about: #{about.nil? ? 'nil' : about}, src: #{src.nil? ? 'nil' : src}, resource: #{resource.nil? ? 'nil' : resource}, href: #{href.nil? ? 'nil' : href}")
426
534
  add_debug(element, "traverse, property: #{property.nil? ? 'nil' : property}, typeof: #{typeof.nil? ? 'nil' : typeof}, datatype: #{datatype.nil? ? 'nil' : datatype}, content: #{content.nil? ? 'nil' : content}")
@@ -429,14 +537,18 @@ module RDF::RDFa
429
537
  if !(rel || rev)
430
538
  # Establishing a new subject if no rel/rev [7.5 Step 6]
431
539
  # May not be valid, but can exist
432
- if about
433
- new_subject = process_uri(element, about, evaluation_context, :uri_mappings => uri_mappings)
540
+ new_subject = if about
541
+ process_uri(element, about, evaluation_context,
542
+ :uri_mappings => uri_mappings,
543
+ :restrictions => SafeCURIEorCURIEorURI[@version])
434
544
  elsif src
435
- new_subject = process_uri(element, src, evaluation_context)
545
+ process_uri(element, src, evaluation_context, :restrictions => [:uri])
436
546
  elsif resource
437
- new_subject = process_uri(element, resource, evaluation_context, :uri_mappings => uri_mappings)
547
+ process_uri(element, resource, evaluation_context,
548
+ :uri_mappings => uri_mappings,
549
+ :restrictions => SafeCURIEorCURIEorURI[@version])
438
550
  elsif href
439
- new_subject = process_uri(element, href, evaluation_context)
551
+ process_uri(element, href, evaluation_context, :restrictions => [:uri])
440
552
  end
441
553
 
442
554
  # If no URI is provided by a resource attribute, then the first match from the following rules
@@ -445,52 +557,52 @@ module RDF::RDFa
445
557
  # otherwise,
446
558
  # if parent object is present, new subject is set to the value of parent object.
447
559
  # Additionally, if @property is not present then the skip element flag is set to 'true';
448
- if new_subject.nil?
449
- if @host_language == :xhtml && element.name =~ /^(head|body)$/ && evaluation_context.base
450
- # From XHTML+RDFa 1.1:
451
- # if no URI is provided, then first check to see if the element is the head or body element.
452
- # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
453
- new_subject = RDF::URI.intern(evaluation_context.base)
454
- elsif element.attributes['typeof']
455
- new_subject = RDF::Node.new
456
- else
457
- # if it's null, it's null and nothing changes
458
- new_subject = evaluation_context.parent_object
459
- skip = true unless property
460
- end
560
+ new_subject ||= if @host_language == :xhtml && element.name =~ /^(head|body)$/ && evaluation_context.base
561
+ # From XHTML+RDFa 1.1:
562
+ # if no URI is provided, then first check to see if the element is the head or body element.
563
+ # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
564
+ evaluation_context.base
565
+ elsif element.attributes['typeof']
566
+ RDF::Node.new
567
+ else
568
+ # if it's null, it's null and nothing changes
569
+ skip = true unless property
570
+ evaluation_context.parent_object
461
571
  end
462
572
  add_debug(element, "[Step 6] new_subject: #{new_subject}, skip = #{skip}")
463
573
  else
464
574
  # [7.5 Step 7]
465
575
  # If the current element does contain a @rel or @rev attribute, then the next step is to
466
576
  # establish both a value for new subject and a value for current object resource:
467
- if about
468
- new_subject = process_uri(element, about, evaluation_context, :uri_mappings => uri_mappings)
469
- elsif src
470
- new_subject = process_uri(element, src, evaluation_context, :uri_mappings => uri_mappings)
471
- end
577
+ new_subject = process_uri(element, about, evaluation_context,
578
+ :uri_mappings => uri_mappings,
579
+ :restrictions => SafeCURIEorCURIEorURI[@version]) ||
580
+ process_uri(element, src, evaluation_context,
581
+ :uri_mappings => uri_mappings,
582
+ :restrictions => [:uri])
472
583
 
473
584
  # If no URI is provided then the first match from the following rules will apply
474
- if new_subject.nil?
475
- if @host_language == :xhtml && element.name =~ /^(head|body)$/
476
- # From XHTML+RDFa 1.1:
477
- # if no URI is provided, then first check to see if the element is the head or body element.
478
- # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
479
- new_subject = RDF::URI.intern(evaluation_context.base)
480
- elsif element.attributes['typeof']
481
- new_subject = RDF::Node.new
482
- else
483
- # if it's null, it's null and nothing changes
484
- new_subject = evaluation_context.parent_object
485
- # no skip flag set this time
486
- end
585
+ new_subject ||= if @host_language == :xhtml && element.name =~ /^(head|body)$/
586
+ # From XHTML+RDFa 1.1:
587
+ # if no URI is provided, then first check to see if the element is the head or body element.
588
+ # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
589
+ evaluation_context.base
590
+ elsif element.attributes['typeof']
591
+ RDF::Node.new
592
+ else
593
+ # if it's null, it's null and nothing changes
594
+ evaluation_context.parent_object
595
+ # no skip flag set this time
487
596
  end
488
597
 
489
598
  # Then the current object resource is set to the URI obtained from the first match from the following rules:
490
- if resource
491
- current_object_resource = process_uri(element, resource, evaluation_context, :uri_mappings => uri_mappings)
599
+ current_object_resource = if resource
600
+ process_uri(element, resource, evaluation_context,
601
+ :uri_mappings => uri_mappings,
602
+ :restrictions => SafeCURIEorCURIEorURI[@version])
492
603
  elsif href
493
- current_object_resource = process_uri(element, href, evaluation_context)
604
+ process_uri(element, href, evaluation_context,
605
+ :restrictions => [:uri])
494
606
  end
495
607
 
496
608
  add_debug(element, "[Step 7] new_subject: #{new_subject}, current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource}")
@@ -498,11 +610,15 @@ module RDF::RDFa
498
610
 
499
611
  # Process @typeof if there is a subject [Step 8]
500
612
  if new_subject and typeof
501
- # Typeof is TERMorCURIEorURIs
502
- types = process_uris(element, typeof, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
613
+ # Typeof is TERMorCURIEorAbsURIs
614
+ types = process_uris(element, typeof, evaluation_context,
615
+ :uri_mappings => uri_mappings,
616
+ :term_mappings => term_mappings,
617
+ :vocab => default_vocabulary,
618
+ :restrictions => TERMorCURIEorAbsURI[@version])
503
619
  add_debug(element, "typeof: #{typeof}")
504
620
  types.each do |one_type|
505
- add_triple(element, new_subject, RDF.type, one_type)
621
+ add_triple(element, new_subject, RDF["type"], one_type)
506
622
  end
507
623
  end
508
624
 
@@ -531,30 +647,60 @@ module RDF::RDFa
531
647
 
532
648
  # Establish current object literal [Step 11]
533
649
  if property
534
- properties = process_uris(element, property, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
650
+ properties = process_uris(element, property, evaluation_context,
651
+ :uri_mappings => uri_mappings,
652
+ :term_mappings => term_mappings,
653
+ :vocab => default_vocabulary,
654
+ :restrictions => TERMorCURIEorAbsURIprop[@version])
655
+
656
+ properties.reject! do |p|
657
+ if p.is_a?(RDF::URI)
658
+ false
659
+ else
660
+ add_debug(element, "Illegal predicate: #{p.inspect}")
661
+ raise RDF::ReaderError, "predicate #{p.inspect} must be a URI" if @strict
662
+ true
663
+ end
664
+ end
535
665
 
536
666
  # get the literal datatype
537
- type = datatype
538
667
  children_node_types = element.children.collect{|c| c.class}.uniq
539
668
 
540
669
  # the following 3 IF clauses should be mutually exclusive. Written as is to prevent extensive indentation.
541
- type_resource = process_uri(element, type, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary) if type
542
- if type and !type.empty? and (type_resource.to_s != RDF.XMLLiteral.to_s)
670
+ datatype = process_uri(element, datatype, evaluation_context,
671
+ :uri_mappings => uri_mappings,
672
+ :term_mappings => term_mappings,
673
+ :vocab => default_vocabulary,
674
+ :restrictions => TERMorCURIEorAbsURI[@version]) unless datatype.to_s.empty?
675
+ current_object_literal = if !datatype.to_s.empty? && datatype.to_s != RDF.XMLLiteral.to_s
543
676
  # typed literal
544
- add_debug(element, "[Step 11] typed literal")
545
- current_object_literal = RDF::Literal.new(content || element.inner_text.to_s, :datatype => type_resource, :language => language)
546
- elsif content or (children_node_types == [Nokogiri::XML::Text]) or (element.children.length == 0) or (type == '')
547
- # plain literal
548
- add_debug(element, "[Step 11] plain literal")
549
- current_object_literal = RDF::Literal.new(content || element.inner_text.to_s, :language => language)
550
- elsif children_node_types != [Nokogiri::XML::Text] and (type == nil or type_resource.to_s == RDF.XMLLiteral.to_s)
551
- # XML Literal
552
- add_debug(element, "[Step 11] XML Literal: #{element.inner_html}")
553
- current_object_literal = RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => uri_mappings.merge("" => "http://www.w3.org/1999/xhtml"))
554
- recurse = false
677
+ add_debug(element, "[Step 11] typed literal (#{datatype})")
678
+ RDF::Literal.new(content || element.inner_text.to_s, :datatype => datatype, :language => language)
679
+ elsif @version == :rdfa_1_1
680
+ if datatype.to_s == RDF.XMLLiteral.to_s
681
+ # XML Literal
682
+ add_debug(element, "[Step 11(1.1)] XML Literal: #{element.inner_html}")
683
+ recurse = false
684
+ RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => uri_mappings.merge("" => "http://www.w3.org/1999/xhtml"))
685
+ else
686
+ # plain literal
687
+ add_debug(element, "[Step 11(1.1)] plain literal")
688
+ RDF::Literal.new(content || element.inner_text.to_s, :language => language)
689
+ end
690
+ else
691
+ if content || (children_node_types == [Nokogiri::XML::Text]) || (element.children.length == 0) || datatype == ""
692
+ # plain literal
693
+ add_debug(element, "[Step 11 (1.0)] plain literal")
694
+ RDF::Literal.new(content || element.inner_text.to_s, :language => language)
695
+ elsif children_node_types != [Nokogiri::XML::Text] and (datatype == nil or datatype.to_s == RDF.XMLLiteral.to_s)
696
+ # XML Literal
697
+ add_debug(element, "[Step 11 (1.0)] XML Literal: #{element.inner_html}")
698
+ recurse = false
699
+ RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => uri_mappings.merge("" => "http://www.w3.org/1999/xhtml"))
700
+ end
555
701
  end
556
-
557
- # add each property
702
+
703
+ # add each property
558
704
  properties.each do |p|
559
705
  add_triple(element, new_subject, p, current_object_literal)
560
706
  end
@@ -611,7 +757,7 @@ module RDF::RDFa
611
757
  end
612
758
  end
613
759
 
614
- # space-separated TERMorCURIEorURI
760
+ # space-separated TERMorCURIEorAbsURI or SafeCURIEorCURIEorURI
615
761
  def process_uris(element, value, evaluation_context, options)
616
762
  return [] if value.to_s.empty?
617
763
  add_debug(element, "process_uris: #{value}")
@@ -619,34 +765,56 @@ module RDF::RDFa
619
765
  end
620
766
 
621
767
  def process_uri(element, value, evaluation_context, options = {})
622
- #return if value.to_s.empty?
623
- #add_debug(element, "process_uri: #{value}")
768
+ return if value.nil?
769
+ restrictions = options[:restrictions]
770
+ add_debug(element, "process_uri: #{value}, restrictions = #{restrictions.inspect}")
624
771
  options = {:uri_mappings => {}}.merge(options)
625
- if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/)
772
+ if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/) && restrictions.include?(:safe_curie)
626
773
  # SafeCURIEorCURIEorURI
627
774
  # When the value is surrounded by square brackets, then the content within the brackets is
628
775
  # evaluated as a CURIE according to the CURIE Syntax definition. If it is not a valid CURIE, the
629
776
  # value must be ignored.
630
- uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject)
777
+ uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject, restrictions)
631
778
  add_debug(element, "process_uri: #{value} => safeCURIE => <#{uri}>")
632
779
  uri
633
- elsif options[:term_mappings] && NC_REGEXP.match(value.to_s)
634
- # TERMorCURIEorURI
780
+ elsif options[:term_mappings] && NC_REGEXP.match(value.to_s) && restrictions.include?(:term)
781
+ # TERMorCURIEorAbsURI
635
782
  # If the value is an NCName, then it is evaluated as a term according to General Use of Terms in
636
783
  # Attributes. Note that this step may mean that the value is to be ignored.
637
- uri = process_term(value.to_s, options)
784
+ uri = process_term(element, value.to_s, options)
638
785
  add_debug(element, "process_uri: #{value} => term => <#{uri}>")
639
786
  uri
640
787
  else
641
- # SafeCURIEorCURIEorURI or TERMorCURIEorURI
788
+ # SafeCURIEorCURIEorURI or TERMorCURIEorAbsURI
642
789
  # Otherwise, the value is evaluated as a CURIE.
643
790
  # If it is a valid CURIE, the resulting URI is used; otherwise, the value will be processed as a URI.
644
- uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject)
791
+ uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject, restrictions)
645
792
  if uri
646
793
  add_debug(element, "process_uri: #{value} => CURIE => <#{uri}>")
647
- else
648
- ## FIXME: throw exception if there is no base uri set?
649
- uri = RDF::URI.intern(RDF::URI.intern(evaluation_context.base).join(value))
794
+ elsif @version == :rdfa_1_0 && value.to_s.match(/^xml/i)
795
+ # Special case to not allow anything starting with XML to be treated as a URI
796
+ elsif restrictions.include?(:absuri) || restrictions.include?(:uri)
797
+ begin
798
+ # AbsURI does not use xml:base
799
+ if restrictions.include?(:absuri)
800
+ uri = RDF::URI.intern(value)
801
+ unless uri.absolute?
802
+ uri = nil
803
+ raise RDF::ReaderError, "Relative URI #{value}"
804
+ end
805
+ else
806
+ uri = evaluation_context.base.join(Addressable::URI.parse(value))
807
+ end
808
+ rescue Addressable::URI::InvalidURIError => e
809
+ add_warning(element, "Malformed prefix #{value}", RDF::RDFA.UndefinedPrefixError)
810
+ rescue RDF::ReaderError => e
811
+ add_debug(element, e.message)
812
+ if value.to_s =~ /^\(^\w\):/
813
+ add_warning(element, "Undefined prefix #{$1}", RDF::RDFA.UndefinedPrefixError)
814
+ else
815
+ add_warning(element, "Relative URI #{value}")
816
+ end
817
+ end
650
818
  add_debug(element, "process_uri: #{value} => URI => <#{uri}>")
651
819
  end
652
820
  uri
@@ -659,7 +827,7 @@ module RDF::RDFa
659
827
  # @param [Hash] options:: Parser options, one of
660
828
  # <em>options[:term_mappings]</em>:: Term mappings
661
829
  # <em>options[:vocab]</em>:: Default vocabulary
662
- def process_term(value, options)
830
+ def process_term(element, value, options)
663
831
  case
664
832
  when options[:term_mappings].is_a?(Hash) && options[:term_mappings].has_key?(value.to_s.downcase)
665
833
  # If the term is in the local term mappings, use the associated URI.
@@ -670,35 +838,43 @@ module RDF::RDFa
670
838
  RDF::URI.intern(options[:vocab] + value)
671
839
  else
672
840
  # Finally, if there is no local default vocabulary, the term has no associated URI and must be ignored.
841
+ add_warning(element, "Term #{value} is not defined", RDF::RDFA.UndefinedTermError)
673
842
  nil
674
843
  end
675
844
  end
676
845
 
677
846
  # From section 6. CURIE Syntax Definition
678
- def curie_to_resource_or_bnode(element, curie, uri_mappings, subject)
847
+ def curie_to_resource_or_bnode(element, curie, uri_mappings, subject, restrictions)
679
848
  # URI mappings for CURIEs default to XHV, rather than the default doc namespace
680
849
  prefix, reference = curie.to_s.split(":")
681
850
 
682
851
  # consider the bnode situation
683
- if prefix == "_"
852
+ if prefix == "_" && restrictions.include?(:bnode)
853
+ # we force a non-nil name, otherwise it generates a new name
854
+ # As a special case, _: is also a valid reference for one specific bnode.
684
855
  bnode(reference)
685
856
  elsif curie.to_s.match(/^:/)
857
+ add_debug(element, "curie_to_resource_or_bnode: default prefix: defined? #{!!uri_mappings[""]}, defaults: #{@host_defaults[:prefix]}")
686
858
  # Default prefix
687
859
  if uri_mappings[""]
688
860
  RDF::URI.intern(uri_mappings[""] + reference.to_s)
689
861
  elsif @host_defaults[:prefix]
690
862
  RDF::URI.intern(uri_mappings[@host_defaults[:prefix]] + reference.to_s)
863
+ else
864
+ #add_warning(element, "Default namespace prefix is not defined", RDF::RDFA.UndefinedPrefixError)
865
+ nil
691
866
  end
692
867
  elsif !curie.to_s.match(/:/)
693
868
  # No prefix, undefined (in this context, it is evaluated as a term elsewhere)
694
869
  nil
695
870
  else
696
871
  # Prefixes always downcased
697
- ns = uri_mappings[prefix.to_s.downcase]
872
+ prefix = prefix.to_s.downcase unless @version == :rdfa_1_0
873
+ ns = uri_mappings[prefix.to_s]
698
874
  if ns
699
875
  RDF::URI.intern(ns + reference.to_s)
700
876
  else
701
- add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix.downcase}")
877
+ #add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix}")
702
878
  nil
703
879
  end
704
880
  end