rdf-rdfa 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,10 +4,27 @@ module RDF::RDFa
4
4
  ##
5
5
  # An RDFa parser in Ruby
6
6
  #
7
+ # Based on processing rules described here:
8
+ # @see http://www.w3.org/TR/rdfa-syntax/#s_model RDFa 1.0
9
+ # @see http://www.w3.org/2010/02/rdfa/drafts/2010/ED-rdfa-core-20100803/ RDFa 1.1
10
+ #
7
11
  # @author [Gregg Kellogg](http://kellogg-assoc.com/)
8
12
  class Reader < RDF::Reader
9
13
  format Format
10
14
 
15
+ SafeCURIEorCURIEorURI = {
16
+ :rdfa_1_0 => [:term, :safe_curie, :uri, :bnode],
17
+ :rdfa_1_1 => [:safe_curie, :curie, :term, :uri, :bnode],
18
+ }
19
+ TERMorCURIEorAbsURI = {
20
+ :rdfa_1_0 => [:term, :curie],
21
+ :rdfa_1_1 => [:term, :curie, :absuri],
22
+ }
23
+ TERMorCURIEorAbsURIprop = {
24
+ :rdfa_1_0 => [:curie],
25
+ :rdfa_1_1 => [:term, :curie, :absuri],
26
+ }
27
+
11
28
  NC_REGEXP = Regexp.new(
12
29
  %{^
13
30
  (?!\\\\u0301) # &#x301; is a non-spacing acute accent.
@@ -21,49 +38,78 @@ module RDF::RDFa
21
38
  $},
22
39
  Regexp::EXTENDED)
23
40
 
24
- # Host language, One of:
25
- # :xhtml_rdfa_1_0
26
- # :xhtml_rdfa_1_1
41
+ # Host language
42
+ # @return [:xhtml]
27
43
  attr_reader :host_language
28
44
 
29
45
  # The Recursive Baggage
46
+ # @private
30
47
  class EvaluationContext # :nodoc:
31
- # The base. This will usually be the URL of the document being processed,
48
+ # The base.
49
+ #
50
+ # This will usually be the URL of the document being processed,
32
51
  # but it could be some other URL, set by some other mechanism,
33
52
  # such as the (X)HTML base element. The important thing is that it establishes
34
53
  # a URL against which relative paths can be resolved.
54
+ #
55
+ # @return [URI]
35
56
  attr :base, true
36
57
  # The parent subject.
58
+ #
37
59
  # The initial value will be the same as the initial value of base,
38
60
  # but it will usually change during the course of processing.
61
+ #
62
+ # @return [URI]
39
63
  attr :parent_subject, true
40
64
  # The parent object.
65
+ #
41
66
  # In some situations the object of a statement becomes the subject of any nested statements,
42
67
  # and this property is used to convey this value.
43
68
  # Note that this value may be a bnode, since in some situations a number of nested statements
44
69
  # are grouped together on one bnode.
45
70
  # This means that the bnode must be set in the containing statement and passed down,
46
71
  # and this property is used to convey this value.
72
+ #
73
+ # @return URI
47
74
  attr :parent_object, true
48
75
  # A list of current, in-scope URI mappings.
76
+ #
77
+ # @return [Hash{Symbol => String}]
49
78
  attr :uri_mappings, true
50
- # A list of incomplete triples. A triple can be incomplete when no object resource
79
+ # A list of incomplete triples.
80
+ #
81
+ # A triple can be incomplete when no object resource
51
82
  # is provided alongside a predicate that requires a resource (i.e., @rel or @rev).
52
83
  # The triples can be completed when a resource becomes available,
53
84
  # which will be when the next subject is specified (part of the process called chaining).
85
+ #
86
+ # @return [Array<Array<URI, Resource>>]
54
87
  attr :incomplete_triples, true
55
88
  # The language. Note that there is no default language.
89
+ #
90
+ # @return [Symbol]
56
91
  attr :language, true
57
92
  # The term mappings, a list of terms and their associated URIs.
93
+ #
58
94
  # This specification does not define an initial list.
59
95
  # Host Languages may define an initial list.
60
96
  # If a Host Language provides an initial list, it should do so via an RDFa Profile document.
97
+ #
98
+ # @return [Hash{Symbol => URI}]
61
99
  attr :term_mappings, true
62
- # The default vocabulary, a value to use as the prefix URI when a term is used.
100
+ # The default vocabulary
101
+ #
102
+ # A value to use as the prefix URI when a term is used.
63
103
  # This specification does not define an initial setting for the default vocabulary.
64
104
  # Host Languages may define an initial setting.
105
+ #
106
+ # @return [URI]
65
107
  attr :default_vocabulary, true
66
108
 
109
+ # @param [RDF::URI] base
110
+ # @param [Hash] host_defaults
111
+ # @option host_defaults [Hash{String => URI}] :term_mappings Hash of NCName => URI
112
+ # @option host_defaults [Hash{String => URI}] :vocabulary Hash of prefix => URI
67
113
  def initialize(base, host_defaults)
68
114
  # Initialize the evaluation context, [5.1]
69
115
  @base = base
@@ -73,10 +119,12 @@ module RDF::RDFa
73
119
  @language = nil
74
120
  @uri_mappings = host_defaults.fetch(:uri_mappings, {})
75
121
  @term_mappings = host_defaults.fetch(:term_mappings, {})
76
- @default_voabulary = host_defaults.fetch(:voabulary, nil)
122
+ @default_vocabulary = host_defaults.fetch(:vocabulary, nil)
77
123
  end
78
124
 
79
125
  # Copy this Evaluation Context
126
+ #
127
+ # @param [EvaluationContext] from
80
128
  def initialize_copy(from)
81
129
  # clone the evaluation context correctly
82
130
  @uri_mappings = from.uri_mappings.clone
@@ -95,13 +143,16 @@ module RDF::RDFa
95
143
  ##
96
144
  # Initializes the RDFa reader instance.
97
145
  #
98
- # @param [Nokogiri::HTML::Document, Nokogiri::XML::Document, IO, File, String] input
146
+ # @param [Nokogiri::HTML::Document, Nokogiri::XML::Document, #read, #to_s] input
99
147
  # @option options [Array] :debug (nil) Array to place debug messages
148
+ # @option options [Graph] :processor_graph (nil) Graph to record information, warnings and errors.
100
149
  # @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
101
150
  # @option options [Boolean] :base_uri (nil) Base URI to use for relative URIs.
151
+ # @option options [:rdfa_1_0, :rdfa_1_1] :version (:rdfa_1_1) Parser version information
152
+ # @option options [:xhtml] :host_language (:xhtml) Host Language
102
153
  # @return [reader]
103
154
  # @yield [reader]
104
- # @yieldparam [Reader] reader
155
+ # @yieldparam [RDF::Reader] reader
105
156
  # @raise [RDF::ReaderError]:: Raises RDF::ReaderError if _strict_
106
157
  def initialize(input = $stdin, options = {}, &block)
107
158
  super do
@@ -110,14 +161,18 @@ module RDF::RDFa
110
161
  @base_uri = RDF::URI.intern(options[:base_uri])
111
162
  @@vocabulary_cache ||= {}
112
163
 
164
+ @version = options[:version] ? options[:version].to_sym : :rdfa_1_1
165
+ @host_language = options[:host_language] || :xhtml
166
+
113
167
  @doc = case input
114
168
  when Nokogiri::HTML::Document then input
115
169
  when Nokogiri::XML::Document then input
116
170
  else Nokogiri::XML.parse(input, @base_uri.to_s)
117
171
  end
118
172
 
119
- raise RDF::ReaderError, "Synax errors:\n#{@doc.errors}" if !@doc.errors.empty? && @strict
120
- raise RDF::ReaderError, "Empty document" if (@doc.nil? || @doc.root.nil?) && @strict
173
+ add_error(nil, "Empty document", RDF::RDFA.HostLanguageMarkupError) if (@doc.nil? || @doc.root.nil?)
174
+ add_warning(nil, "Synax errors:\n#{@doc.errors}", RDF::RDFA.HostLanguageMarkupError) unless @doc.errors.empty?
175
+
121
176
  block.call(self) if block_given?
122
177
  end
123
178
  end
@@ -131,16 +186,11 @@ module RDF::RDFa
131
186
  def each_statement(&block)
132
187
  @callback = block
133
188
 
134
- # Determine host language
135
- # XXX - right now only XHTML defined
136
- @host_language = case @doc.root.attributes["version"].to_s
137
- when /XHTML+RDFa/ then :xhtml
138
- end
139
-
140
- # If none found, assume xhtml
141
- @host_language ||= :xhtml
142
-
143
- @host_defaults = {}
189
+ # Section 4.2 RDFa Host Language Conformance
190
+ #
191
+ # The Host Language may define a default RDFa Profile. If it does, the RDFa Profile triples that establish term or
192
+ # URI mappings associated with that profile must not change without changing the profile URI. RDFa Processors may
193
+ # embed, cache, or retrieve the RDFa Profile triples associated with that profile.
144
194
  @host_defaults = case @host_language
145
195
  when :xhtml
146
196
  {
@@ -156,6 +206,10 @@ module RDF::RDFa
156
206
  {}
157
207
  end
158
208
 
209
+ @host_defaults.delete(:vocabulary) if @version == :rdfa_1_0
210
+
211
+ add_debug(@doc, "version = #{@version}, host_language = #{@host_language}")
212
+
159
213
  # parse
160
214
  parse_whole_document(@doc, @base_uri)
161
215
  end
@@ -185,9 +239,8 @@ module RDF::RDFa
185
239
  # Figure out the document path, if it is a Nokogiri::XML::Element or Attribute
186
240
  def node_path(node)
187
241
  case node
188
- when Nokogiri::XML::Element, Nokogiri::XML::Attr then "#{node_path(node.parent)}/#{node.name}"
189
- when String then node
190
- else ""
242
+ when Nokogiri::XML::Node then node.display_path
243
+ else node.to_s
191
244
  end
192
245
  end
193
246
 
@@ -196,10 +249,36 @@ module RDF::RDFa
196
249
  # @param [XML Node, any] node:: XML Node or string for showing context
197
250
  # @param [String] message::
198
251
  def add_debug(node, message)
199
- puts "#{node_path(node)}: #{message}" if $DEBUG
200
- @debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
252
+ add_processor_message(node, message, RDF::RDFA.InformationalMessage)
201
253
  end
202
254
 
255
+ def add_info(node, message, process_class = RDF::RDFA.InformationalMessage)
256
+ add_processor_message(node, message, process_class)
257
+ end
258
+
259
+ def add_warning(node, message, process_class = RDF::RDFA.MiscellaneousWarning)
260
+ add_processor_message(node, message, process_class)
261
+ end
262
+
263
+ def add_error(node, message, process_class = RDF::RDFA.MiscellaneousError)
264
+ add_processor_message(node, message, process_class)
265
+ raise ParserException, message if @strict
266
+ end
267
+
268
+ def add_processor_message(node, message, process_class)
269
+ puts "#{node_path(node)}: #{message}" if ::RDF::RDFa::debug?
270
+ @debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
271
+ if @processor_graph
272
+ @processor_sequence ||= 0
273
+ n = RDF::Node.new
274
+ @processor_graph << RDF::Statement.new(n, RDF["type"], process_class)
275
+ @processor_graph << RDF::Statement.new(n, RDF::DC.description, message)
276
+ @processor_graph << RDF::Statement.new(n, RDF::DC.date, RDF::Literal::Date.new(DateTime.now.to_date))
277
+ @processor_graph << RDF::Statement.new(n, RDF::RDFA.sequence, RDF::Literal::Integer.new(@processor_sequence += 1))
278
+ @processor_graph << RDF::Statement.new(n, RDF::RDFA.source, node_path(node))
279
+ end
280
+ end
281
+
203
282
  # add a statement, object can be literal or URI or bnode
204
283
  #
205
284
  # @param [Nokogiri::XML::Node, any] node:: XML Node or string for showing context
@@ -210,7 +289,7 @@ module RDF::RDFa
210
289
  # @raise [ReaderError]:: Checks parameter types and raises if they are incorrect if parsing mode is _strict_.
211
290
  def add_triple(node, subject, predicate, object)
212
291
  statement = RDF::Statement.new(subject, predicate, object)
213
- add_debug(node, "statement: #{statement}")
292
+ add_debug(node, "statement: #{statement.to_ntriples}")
214
293
  @callback.call(statement)
215
294
  end
216
295
 
@@ -229,99 +308,96 @@ module RDF::RDFa
229
308
  end
230
309
 
231
310
  # initialize the evaluation context with the appropriate base
232
- evaluation_context = EvaluationContext.new(base, @host_defaults)
311
+ evaluation_context = EvaluationContext.new(@base_uri, @host_defaults)
233
312
 
234
313
  traverse(doc.root, evaluation_context)
235
314
  end
236
315
 
237
- # Extract the XMLNS mappings from an element
238
- def extract_mappings(element, uri_mappings, term_mappings)
239
- # Process @profile
240
- # Next the current element is parsed for any updates to the local term mappings and
241
- # local list of URI mappings via @profile.
242
- # If @profile is present, its value is processed as defined in RDFa Profiles.
243
- element.attributes['profile'].to_s.split(/\s/).each do |profile|
244
- if node_path(element) == "/html/head"
245
- # Don't try to open ourselves!
246
- add_debug(element, "extract_mappings: skip head profile <#{profile}>")
247
- next
248
- elsif @@vocabulary_cache[profile]
249
- add_debug(element, "extract_mappings: cached profile <#{profile}>")
250
- @@vocabulary_cache[profile]
251
- elsif @base_uri.to_s == profile
252
- # Don't try to open ourselves!
253
- add_debug(element, "extract_mappings: skip recursive profile <#{profile}>")
254
- next
316
+ # Parse and process URI mappings, Term mappings and a default vocabulary from @profile
317
+ #
318
+ # Yields each mapping
319
+ def process_profile(element)
320
+ element.attributes['profile'].to_s.split(/\s/).reverse.each do |profile|
321
+ # Don't try to open ourselves!
322
+ if @uri == profile
323
+ add_debug(element, "process_profile: skip recursive profile <#{profile}>")
255
324
  elsif @@vocabulary_cache.has_key?(profile)
256
- add_debug(element, "extract_mappings: skip previously parsed profile <#{profile}>")
325
+ add_debug(element, "process_profile: skip previously parsed profile <#{profile}>")
257
326
  else
258
327
  begin
259
- add_debug(element, "extract_mappings: parse profile <#{profile}>")
260
328
  @@vocabulary_cache[profile] = {
261
329
  :uri_mappings => {},
262
- :term_mappings => {}
330
+ :term_mappings => {},
331
+ :default_vocabulary => nil
263
332
  }
264
333
  um = @@vocabulary_cache[profile][:uri_mappings]
265
334
  tm = @@vocabulary_cache[profile][:term_mappings]
266
- add_debug(element, "extract_mappings: profile open <#{profile}>")
267
-
268
- old_debug, old_verbose, = $DEBUG, $verbose
269
- $DEBUG, $verbose = false, false
270
- # FIXME: format shouldn't need to be specified here
271
- p_graph = RDF::Graph.load(profile, :base_uri => profile, :format => RDF::Format.for(profile) || :rdfa)
272
- puts p_graph.inspect if old_debug
273
- $DEBUG, $verbose = old_debug, old_verbose
274
- p_graph.each_subject do |subject|
275
- # If one of the objects is not a Literal no mapping is created.
335
+ add_debug(element, "process_profile: parse profile <#{profile}>")
336
+
337
+ # Parse profile, and extract mappings from graph
338
+ old_debug, old_verbose, = ::RDF::RDFa::debug?, $verbose
339
+ ::RDF::RDFa::debug, $verbose = false, false
340
+ # Fixme, RDF isn't smart enough to figure this out from MIME-Type
341
+ load_opts = {:base_uri => profile}
342
+ load_opts[:format] = :rdfa unless RDF::Format.for(:file_name => profile)
343
+ p_graph = RDF::Graph.load(profile, load_opts)
344
+ ::RDF::RDFa::debug, $verbose = old_debug, old_verbose
345
+ p_graph.subjects.each do |subject|
346
+ # If one of the objects is not a Literal or if there are additional rdfa:uri or rdfa:term
347
+ # predicates sharing the same subject, no mapping is created.
276
348
  uri = p_graph.first_object([subject, RDF::RDFA['uri'], nil])
277
349
  term = p_graph.first_object([subject, RDF::RDFA['term'], nil])
278
350
  prefix = p_graph.first_object([subject, RDF::RDFA['prefix'], nil])
279
- add_debug(element, "extract_mappings: uri=#{uri.inspect}, term=#{term.inspect}, prefix=#{prefix.inspect}")
351
+ vocab = p_graph.first_object([subject, RDF::RDFA['vocabulary'], nil])
352
+ add_debug(element, "process_profile: uri=#{uri.inspect}, term=#{term.inspect}, prefix=#{prefix.inspect}, vocabulary=#{vocab.inspect}")
280
353
 
281
- next if !uri || (!term && !prefix)
282
- raise RDF::ReaderError, "rdf:uri must be a Literal" unless uri.is_a?(RDF::Literal)
283
- raise RDF::ReaderError, "rdf:term must be a Literal" unless term.nil? || term.is_a?(RDF::Literal)
284
- raise RDF::ReaderError, "rdf:prefix must be a Literal" unless prefix.nil? || prefix.is_a?(RDF::Literal)
285
-
354
+ raise RDF::ReaderError, "rdf:uri #{uri.inspect} must be a Literal" unless uri.nil? || uri.is_a?(RDF::Literal)
355
+ raise RDF::ReaderError, "rdf:term #{term.inspect} must be a Literal" unless term.nil? || term.is_a?(RDF::Literal)
356
+ raise RDF::ReaderError, "rdf:prefix #{prefix.inspect} must be a Literal" unless prefix.nil? || prefix.is_a?(RDF::Literal)
357
+ raise RDF::ReaderError, "rdf:vocabulary #{vocab.inspect} must be a Literal" unless vocab.nil? || vocab.is_a?(RDF::Literal)
358
+
359
+ @@vocabulary_cache[profile][:default_vocabulary] = vocab.value if vocab
360
+
286
361
  # For every extracted triple that is the common subject of an rdfa:prefix and an rdfa:uri
287
362
  # predicate, create a mapping from the object literal of the rdfa:prefix predicate to the
288
363
  # object literal of the rdfa:uri predicate. Add or update this mapping in the local list of
289
364
  # URI mappings after transforming the 'prefix' component to lower-case.
290
365
  # For every extracted
291
- um[prefix.value.downcase] = uri.value if prefix
366
+ um[prefix.value.downcase] = uri.value if prefix && prefix.value != "_"
292
367
 
293
368
  # triple that is the common subject of an rdfa:term and an rdfa:uri predicate, create a
294
369
  # mapping from the object literal of the rdfa:term predicate to the object literal of the
295
370
  # rdfa:uri predicate. Add or update this mapping in the local term mappings.
296
- tm[term.value] = RDF::URI.intern(uri.value) if term
371
+ tm[term.value.downcase] = RDF::URI.intern(uri.value) if term
297
372
  end
298
- # FIXME: subject isn't in scope here
299
- #rescue RDF::ReaderError
300
- # add_debug(element, "extract_mappings: profile subject #{subject.to_s}: #{e.message}")
301
- # raise if @strict
302
- rescue RuntimeError => e
303
- add_debug(element, "extract_mappings: profile: #{e.message}")
304
- raise if @strict
373
+ rescue RDF::ReaderError => e
374
+ add_error(element, e.message, RDF::RDFA.ProfileReferenceError)
375
+ raise # Incase we're not in strict mode, we need to be sure processing stops
305
376
  end
306
377
  end
307
-
308
- # Merge mappings from this vocabulary
309
- uri_mappings.merge!(@@vocabulary_cache[profile][:uri_mappings])
310
- term_mappings.merge!(@@vocabulary_cache[profile][:term_mappings])
378
+ profile_mappings = @@vocabulary_cache[profile]
379
+ yield :uri_mappings, profile_mappings[:uri_mappings] unless profile_mappings[:uri_mappings].empty?
380
+ yield :term_mappings, profile_mappings[:term_mappings] unless profile_mappings[:term_mappings].empty?
381
+ yield :default_vocabulary, profile_mappings[:default_vocabulary] if profile_mappings[:default_vocabulary]
311
382
  end
312
-
383
+ end
384
+
385
+ # Extract the XMLNS mappings from an element
386
+ def extract_mappings(element, uri_mappings, term_mappings)
313
387
  # look for xmlns
314
388
  # (note, this may be dependent on @host_language)
315
389
  # Regardless of how the mapping is declared, the value to be mapped must be converted to lower case,
316
390
  # and the URI is not processed in any way; in particular if it is a relative path it is
317
391
  # not resolved against the current base.
318
- element.namespaces.each do |attr_name, attr_value|
319
- begin
320
- abbr, prefix = attr_name.split(":")
321
- uri_mappings[prefix.to_s.downcase] = attr_value.to_s if abbr.downcase == "xmlns" && prefix
322
- rescue ReaderError => e
323
- add_debug(element, "extract_mappings raised #{e.class}: #{e.message}")
324
- raise if @strict
392
+ element.namespace_definitions.each do |ns|
393
+ # A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix.
394
+ next if ns.prefix == "_"
395
+
396
+ # Downcase prefix for RDFa 1.1
397
+ pfx_lc = (@version == :rdfa_1_0 || ns.prefix.nil?) ? ns.prefix : ns.prefix.to_s.downcase
398
+ if ns.prefix
399
+ uri_mappings[pfx_lc] = ns.href
400
+ add_debug(element, "extract_mappings: xmlns:#{ns.prefix} => <#{ns.href}>")
325
401
  end
326
402
  end
327
403
 
@@ -335,11 +411,12 @@ module RDF::RDFa
335
411
  next unless prefix.match(/:$/)
336
412
  prefix.chop!
337
413
 
414
+ # A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix.
415
+ next if prefix == "_"
416
+
338
417
  uri_mappings[prefix] = uri
339
- end
340
-
341
- add_debug(element, "uri_mappings: #{uri_mappings.map{|k,v|"#{k}='#{v}'"}.join(", ")}")
342
- add_debug(element, "term_mappings: #{term_mappings.map{|k,v|"#{k}='#{v}'"}.join(", ")}")
418
+ add_debug(element, "extract_mappings: prefix #{prefix} => <#{uri}>")
419
+ end unless @version == :rdfa_1_0
343
420
  end
344
421
 
345
422
  # The recursive helper function
@@ -352,7 +429,7 @@ module RDF::RDFa
352
429
 
353
430
  add_debug(element, "traverse, ec: #{evaluation_context.inspect}")
354
431
 
355
- # local variables [5.5 Step 1]
432
+ # local variables [7.5 Step 1]
356
433
  recurse = true
357
434
  skip = false
358
435
  new_subject = nil
@@ -375,30 +452,53 @@ module RDF::RDFa
375
452
  vocab = attrs['vocab']
376
453
 
377
454
  # Pull out the attributes needed for the skip test.
378
- property = attrs['property'].to_s if attrs['property']
379
- typeof = attrs['typeof'].to_s if attrs['typeof']
455
+ property = attrs['property'].to_s.strip if attrs['property']
456
+ typeof = attrs['typeof'].to_s.strip if attrs['typeof']
380
457
  datatype = attrs['datatype'].to_s if attrs['datatype']
381
458
  content = attrs['content'].to_s if attrs['content']
382
- rel = attrs['rel'].to_s if attrs['rel']
383
- rev = attrs['rev'].to_s if attrs['rev']
459
+ rel = attrs['rel'].to_s.strip if attrs['rel']
460
+ rev = attrs['rev'].to_s.strip if attrs['rev']
461
+
462
+ # Local term mappings [7.5 Steps 2]
463
+ # Next the current element is parsed for any updates to the local term mappings and local list of URI mappings via @profile.
464
+ # If @profile is present, its value is processed as defined in RDFa Profiles.
465
+ unless @version == :rdfa_1_0
466
+ begin
467
+ process_profile(element) do |which, value|
468
+ add_debug(element, "[Step 2] traverse, #{which}: #{value.inspect}")
469
+ case which
470
+ when :uri_mappings then uri_mappings.merge!(value)
471
+ when :term_mappings then term_mappings.merge!(value)
472
+ when :default_vocabulary then default_vocabulary = value
473
+ end
474
+ end
475
+ rescue
476
+ # Skip this element and all sub-elements
477
+ # If any referenced RDFa Profile is not available, then the current element and its children must not place any
478
+ # triples in the default graph .
479
+ raise if @strict
480
+ return
481
+ end
482
+ end
384
483
 
385
- # Default vocabulary [7.5 Step 2]
386
- # First the current element is examined for any change to the default vocabulary via @vocab.
484
+ # Default vocabulary [7.5 Step 3]
485
+ # Next the current element is examined for any change to the default vocabulary via @vocab.
387
486
  # If @vocab is present and contains a value, its value updates the local default vocabulary.
388
487
  # If the value is empty, then the local default vocabulary must be reset to the Host Language defined default.
389
488
  unless vocab.nil?
390
489
  default_vocabulary = if vocab.to_s.empty?
391
490
  # Set default_vocabulary to host language default
392
- @host_defaults.fetch(:voabulary, nil)
491
+ add_debug(element, "[Step 2] traverse, reset default_vocaulary to #{@host_defaults.fetch(:vocabulary, nil).inspect}")
492
+ @host_defaults.fetch(:vocabulary, nil)
393
493
  else
394
- vocab.to_s
494
+ RDF::URI.intern(vocab)
395
495
  end
396
496
  add_debug(element, "[Step 2] traverse, default_vocaulary: #{default_vocabulary.inspect}")
397
497
  end
398
498
 
399
- # Local term mappings [7.5 Steps 3 & 4]
400
- # Next the current element is parsed for any updates to the local term mappings and local list of URI mappings via @profile.
401
- # If @profile is present, its value is processed as defined in RDFa Profiles.
499
+ # Local term mappings [7.5 Steps 4]
500
+ # Next, the current element is then examined for URI mapping s and these are added to the local list of URI mappings.
501
+ # Note that a URI mapping will simply overwrite any current mapping in the list that has the same name
402
502
  extract_mappings(element, uri_mappings, term_mappings)
403
503
 
404
504
  # Language information [7.5 Step 5]
@@ -419,8 +519,16 @@ module RDF::RDFa
419
519
  add_debug(element, "HTML5 [3.2.3.3] traverse, lang: #{language || 'nil'}") if attrs['lang']
420
520
 
421
521
  # rels and revs
422
- rels = process_uris(element, rel, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
423
- revs = process_uris(element, rev, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
522
+ rels = process_uris(element, rel, evaluation_context,
523
+ :uri_mappings => uri_mappings,
524
+ :term_mappings => term_mappings,
525
+ :vocab => default_vocabulary,
526
+ :restrictions => TERMorCURIEorAbsURI[@version])
527
+ revs = process_uris(element, rev, evaluation_context,
528
+ :uri_mappings => uri_mappings,
529
+ :term_mappings => term_mappings,
530
+ :vocab => default_vocabulary,
531
+ :restrictions => TERMorCURIEorAbsURI[@version])
424
532
 
425
533
  add_debug(element, "traverse, about: #{about.nil? ? 'nil' : about}, src: #{src.nil? ? 'nil' : src}, resource: #{resource.nil? ? 'nil' : resource}, href: #{href.nil? ? 'nil' : href}")
426
534
  add_debug(element, "traverse, property: #{property.nil? ? 'nil' : property}, typeof: #{typeof.nil? ? 'nil' : typeof}, datatype: #{datatype.nil? ? 'nil' : datatype}, content: #{content.nil? ? 'nil' : content}")
@@ -429,14 +537,18 @@ module RDF::RDFa
429
537
  if !(rel || rev)
430
538
  # Establishing a new subject if no rel/rev [7.5 Step 6]
431
539
  # May not be valid, but can exist
432
- if about
433
- new_subject = process_uri(element, about, evaluation_context, :uri_mappings => uri_mappings)
540
+ new_subject = if about
541
+ process_uri(element, about, evaluation_context,
542
+ :uri_mappings => uri_mappings,
543
+ :restrictions => SafeCURIEorCURIEorURI[@version])
434
544
  elsif src
435
- new_subject = process_uri(element, src, evaluation_context)
545
+ process_uri(element, src, evaluation_context, :restrictions => [:uri])
436
546
  elsif resource
437
- new_subject = process_uri(element, resource, evaluation_context, :uri_mappings => uri_mappings)
547
+ process_uri(element, resource, evaluation_context,
548
+ :uri_mappings => uri_mappings,
549
+ :restrictions => SafeCURIEorCURIEorURI[@version])
438
550
  elsif href
439
- new_subject = process_uri(element, href, evaluation_context)
551
+ process_uri(element, href, evaluation_context, :restrictions => [:uri])
440
552
  end
441
553
 
442
554
  # If no URI is provided by a resource attribute, then the first match from the following rules
@@ -445,52 +557,52 @@ module RDF::RDFa
445
557
  # otherwise,
446
558
  # if parent object is present, new subject is set to the value of parent object.
447
559
  # Additionally, if @property is not present then the skip element flag is set to 'true';
448
- if new_subject.nil?
449
- if @host_language == :xhtml && element.name =~ /^(head|body)$/ && evaluation_context.base
450
- # From XHTML+RDFa 1.1:
451
- # if no URI is provided, then first check to see if the element is the head or body element.
452
- # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
453
- new_subject = RDF::URI.intern(evaluation_context.base)
454
- elsif element.attributes['typeof']
455
- new_subject = RDF::Node.new
456
- else
457
- # if it's null, it's null and nothing changes
458
- new_subject = evaluation_context.parent_object
459
- skip = true unless property
460
- end
560
+ new_subject ||= if @host_language == :xhtml && element.name =~ /^(head|body)$/ && evaluation_context.base
561
+ # From XHTML+RDFa 1.1:
562
+ # if no URI is provided, then first check to see if the element is the head or body element.
563
+ # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
564
+ evaluation_context.base
565
+ elsif element.attributes['typeof']
566
+ RDF::Node.new
567
+ else
568
+ # if it's null, it's null and nothing changes
569
+ skip = true unless property
570
+ evaluation_context.parent_object
461
571
  end
462
572
  add_debug(element, "[Step 6] new_subject: #{new_subject}, skip = #{skip}")
463
573
  else
464
574
  # [7.5 Step 7]
465
575
  # If the current element does contain a @rel or @rev attribute, then the next step is to
466
576
  # establish both a value for new subject and a value for current object resource:
467
- if about
468
- new_subject = process_uri(element, about, evaluation_context, :uri_mappings => uri_mappings)
469
- elsif src
470
- new_subject = process_uri(element, src, evaluation_context, :uri_mappings => uri_mappings)
471
- end
577
+ new_subject = process_uri(element, about, evaluation_context,
578
+ :uri_mappings => uri_mappings,
579
+ :restrictions => SafeCURIEorCURIEorURI[@version]) ||
580
+ process_uri(element, src, evaluation_context,
581
+ :uri_mappings => uri_mappings,
582
+ :restrictions => [:uri])
472
583
 
473
584
  # If no URI is provided then the first match from the following rules will apply
474
- if new_subject.nil?
475
- if @host_language == :xhtml && element.name =~ /^(head|body)$/
476
- # From XHTML+RDFa 1.1:
477
- # if no URI is provided, then first check to see if the element is the head or body element.
478
- # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
479
- new_subject = RDF::URI.intern(evaluation_context.base)
480
- elsif element.attributes['typeof']
481
- new_subject = RDF::Node.new
482
- else
483
- # if it's null, it's null and nothing changes
484
- new_subject = evaluation_context.parent_object
485
- # no skip flag set this time
486
- end
585
+ new_subject ||= if @host_language == :xhtml && element.name =~ /^(head|body)$/
586
+ # From XHTML+RDFa 1.1:
587
+ # if no URI is provided, then first check to see if the element is the head or body element.
588
+ # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
589
+ evaluation_context.base
590
+ elsif element.attributes['typeof']
591
+ RDF::Node.new
592
+ else
593
+ # if it's null, it's null and nothing changes
594
+ evaluation_context.parent_object
595
+ # no skip flag set this time
487
596
  end
488
597
 
489
598
  # Then the current object resource is set to the URI obtained from the first match from the following rules:
490
- if resource
491
- current_object_resource = process_uri(element, resource, evaluation_context, :uri_mappings => uri_mappings)
599
+ current_object_resource = if resource
600
+ process_uri(element, resource, evaluation_context,
601
+ :uri_mappings => uri_mappings,
602
+ :restrictions => SafeCURIEorCURIEorURI[@version])
492
603
  elsif href
493
- current_object_resource = process_uri(element, href, evaluation_context)
604
+ process_uri(element, href, evaluation_context,
605
+ :restrictions => [:uri])
494
606
  end
495
607
 
496
608
  add_debug(element, "[Step 7] new_subject: #{new_subject}, current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource}")
@@ -498,11 +610,15 @@ module RDF::RDFa
498
610
 
499
611
  # Process @typeof if there is a subject [Step 8]
500
612
  if new_subject and typeof
501
- # Typeof is TERMorCURIEorURIs
502
- types = process_uris(element, typeof, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
613
+ # Typeof is TERMorCURIEorAbsURIs
614
+ types = process_uris(element, typeof, evaluation_context,
615
+ :uri_mappings => uri_mappings,
616
+ :term_mappings => term_mappings,
617
+ :vocab => default_vocabulary,
618
+ :restrictions => TERMorCURIEorAbsURI[@version])
503
619
  add_debug(element, "typeof: #{typeof}")
504
620
  types.each do |one_type|
505
- add_triple(element, new_subject, RDF.type, one_type)
621
+ add_triple(element, new_subject, RDF["type"], one_type)
506
622
  end
507
623
  end
508
624
 
@@ -531,30 +647,60 @@ module RDF::RDFa
531
647
 
532
648
  # Establish current object literal [Step 11]
533
649
  if property
534
- properties = process_uris(element, property, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
650
+ properties = process_uris(element, property, evaluation_context,
651
+ :uri_mappings => uri_mappings,
652
+ :term_mappings => term_mappings,
653
+ :vocab => default_vocabulary,
654
+ :restrictions => TERMorCURIEorAbsURIprop[@version])
655
+
656
+ properties.reject! do |p|
657
+ if p.is_a?(RDF::URI)
658
+ false
659
+ else
660
+ add_debug(element, "Illegal predicate: #{p.inspect}")
661
+ raise RDF::ReaderError, "predicate #{p.inspect} must be a URI" if @strict
662
+ true
663
+ end
664
+ end
535
665
 
536
666
  # get the literal datatype
537
- type = datatype
538
667
  children_node_types = element.children.collect{|c| c.class}.uniq
539
668
 
540
669
  # the following 3 IF clauses should be mutually exclusive. Written as is to prevent extensive indentation.
541
- type_resource = process_uri(element, type, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary) if type
542
- if type and !type.empty? and (type_resource.to_s != RDF.XMLLiteral.to_s)
670
+ datatype = process_uri(element, datatype, evaluation_context,
671
+ :uri_mappings => uri_mappings,
672
+ :term_mappings => term_mappings,
673
+ :vocab => default_vocabulary,
674
+ :restrictions => TERMorCURIEorAbsURI[@version]) unless datatype.to_s.empty?
675
+ current_object_literal = if !datatype.to_s.empty? && datatype.to_s != RDF.XMLLiteral.to_s
543
676
  # typed literal
544
- add_debug(element, "[Step 11] typed literal")
545
- current_object_literal = RDF::Literal.new(content || element.inner_text.to_s, :datatype => type_resource, :language => language)
546
- elsif content or (children_node_types == [Nokogiri::XML::Text]) or (element.children.length == 0) or (type == '')
547
- # plain literal
548
- add_debug(element, "[Step 11] plain literal")
549
- current_object_literal = RDF::Literal.new(content || element.inner_text.to_s, :language => language)
550
- elsif children_node_types != [Nokogiri::XML::Text] and (type == nil or type_resource.to_s == RDF.XMLLiteral.to_s)
551
- # XML Literal
552
- add_debug(element, "[Step 11] XML Literal: #{element.inner_html}")
553
- current_object_literal = RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => uri_mappings.merge("" => "http://www.w3.org/1999/xhtml"))
554
- recurse = false
677
+ add_debug(element, "[Step 11] typed literal (#{datatype})")
678
+ RDF::Literal.new(content || element.inner_text.to_s, :datatype => datatype, :language => language)
679
+ elsif @version == :rdfa_1_1
680
+ if datatype.to_s == RDF.XMLLiteral.to_s
681
+ # XML Literal
682
+ add_debug(element, "[Step 11(1.1)] XML Literal: #{element.inner_html}")
683
+ recurse = false
684
+ RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => uri_mappings.merge("" => "http://www.w3.org/1999/xhtml"))
685
+ else
686
+ # plain literal
687
+ add_debug(element, "[Step 11(1.1)] plain literal")
688
+ RDF::Literal.new(content || element.inner_text.to_s, :language => language)
689
+ end
690
+ else
691
+ if content || (children_node_types == [Nokogiri::XML::Text]) || (element.children.length == 0) || datatype == ""
692
+ # plain literal
693
+ add_debug(element, "[Step 11 (1.0)] plain literal")
694
+ RDF::Literal.new(content || element.inner_text.to_s, :language => language)
695
+ elsif children_node_types != [Nokogiri::XML::Text] and (datatype == nil or datatype.to_s == RDF.XMLLiteral.to_s)
696
+ # XML Literal
697
+ add_debug(element, "[Step 11 (1.0)] XML Literal: #{element.inner_html}")
698
+ recurse = false
699
+ RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => uri_mappings.merge("" => "http://www.w3.org/1999/xhtml"))
700
+ end
555
701
  end
556
-
557
- # add each property
702
+
703
+ # add each property
558
704
  properties.each do |p|
559
705
  add_triple(element, new_subject, p, current_object_literal)
560
706
  end
@@ -611,7 +757,7 @@ module RDF::RDFa
611
757
  end
612
758
  end
613
759
 
614
- # space-separated TERMorCURIEorURI
760
+ # space-separated TERMorCURIEorAbsURI or SafeCURIEorCURIEorURI
615
761
  def process_uris(element, value, evaluation_context, options)
616
762
  return [] if value.to_s.empty?
617
763
  add_debug(element, "process_uris: #{value}")
@@ -619,34 +765,56 @@ module RDF::RDFa
619
765
  end
620
766
 
621
767
  def process_uri(element, value, evaluation_context, options = {})
622
- #return if value.to_s.empty?
623
- #add_debug(element, "process_uri: #{value}")
768
+ return if value.nil?
769
+ restrictions = options[:restrictions]
770
+ add_debug(element, "process_uri: #{value}, restrictions = #{restrictions.inspect}")
624
771
  options = {:uri_mappings => {}}.merge(options)
625
- if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/)
772
+ if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/) && restrictions.include?(:safe_curie)
626
773
  # SafeCURIEorCURIEorURI
627
774
  # When the value is surrounded by square brackets, then the content within the brackets is
628
775
  # evaluated as a CURIE according to the CURIE Syntax definition. If it is not a valid CURIE, the
629
776
  # value must be ignored.
630
- uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject)
777
+ uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject, restrictions)
631
778
  add_debug(element, "process_uri: #{value} => safeCURIE => <#{uri}>")
632
779
  uri
633
- elsif options[:term_mappings] && NC_REGEXP.match(value.to_s)
634
- # TERMorCURIEorURI
780
+ elsif options[:term_mappings] && NC_REGEXP.match(value.to_s) && restrictions.include?(:term)
781
+ # TERMorCURIEorAbsURI
635
782
  # If the value is an NCName, then it is evaluated as a term according to General Use of Terms in
636
783
  # Attributes. Note that this step may mean that the value is to be ignored.
637
- uri = process_term(value.to_s, options)
784
+ uri = process_term(element, value.to_s, options)
638
785
  add_debug(element, "process_uri: #{value} => term => <#{uri}>")
639
786
  uri
640
787
  else
641
- # SafeCURIEorCURIEorURI or TERMorCURIEorURI
788
+ # SafeCURIEorCURIEorURI or TERMorCURIEorAbsURI
642
789
  # Otherwise, the value is evaluated as a CURIE.
643
790
  # If it is a valid CURIE, the resulting URI is used; otherwise, the value will be processed as a URI.
644
- uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject)
791
+ uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject, restrictions)
645
792
  if uri
646
793
  add_debug(element, "process_uri: #{value} => CURIE => <#{uri}>")
647
- else
648
- ## FIXME: throw exception if there is no base uri set?
649
- uri = RDF::URI.intern(RDF::URI.intern(evaluation_context.base).join(value))
794
+ elsif @version == :rdfa_1_0 && value.to_s.match(/^xml/i)
795
+ # Special case to not allow anything starting with XML to be treated as a URI
796
+ elsif restrictions.include?(:absuri) || restrictions.include?(:uri)
797
+ begin
798
+ # AbsURI does not use xml:base
799
+ if restrictions.include?(:absuri)
800
+ uri = RDF::URI.intern(value)
801
+ unless uri.absolute?
802
+ uri = nil
803
+ raise RDF::ReaderError, "Relative URI #{value}"
804
+ end
805
+ else
806
+ uri = evaluation_context.base.join(Addressable::URI.parse(value))
807
+ end
808
+ rescue Addressable::URI::InvalidURIError => e
809
+ add_warning(element, "Malformed prefix #{value}", RDF::RDFA.UndefinedPrefixError)
810
+ rescue RDF::ReaderError => e
811
+ add_debug(element, e.message)
812
+ if value.to_s =~ /^\(^\w\):/
813
+ add_warning(element, "Undefined prefix #{$1}", RDF::RDFA.UndefinedPrefixError)
814
+ else
815
+ add_warning(element, "Relative URI #{value}")
816
+ end
817
+ end
650
818
  add_debug(element, "process_uri: #{value} => URI => <#{uri}>")
651
819
  end
652
820
  uri
@@ -659,7 +827,7 @@ module RDF::RDFa
659
827
  # @param [Hash] options:: Parser options, one of
660
828
  # <em>options[:term_mappings]</em>:: Term mappings
661
829
  # <em>options[:vocab]</em>:: Default vocabulary
662
- def process_term(value, options)
830
+ def process_term(element, value, options)
663
831
  case
664
832
  when options[:term_mappings].is_a?(Hash) && options[:term_mappings].has_key?(value.to_s.downcase)
665
833
  # If the term is in the local term mappings, use the associated URI.
@@ -670,35 +838,43 @@ module RDF::RDFa
670
838
  RDF::URI.intern(options[:vocab] + value)
671
839
  else
672
840
  # Finally, if there is no local default vocabulary, the term has no associated URI and must be ignored.
841
+ add_warning(element, "Term #{value} is not defined", RDF::RDFA.UndefinedTermError)
673
842
  nil
674
843
  end
675
844
  end
676
845
 
677
846
  # From section 6. CURIE Syntax Definition
678
- def curie_to_resource_or_bnode(element, curie, uri_mappings, subject)
847
+ def curie_to_resource_or_bnode(element, curie, uri_mappings, subject, restrictions)
679
848
  # URI mappings for CURIEs default to XHV, rather than the default doc namespace
680
849
  prefix, reference = curie.to_s.split(":")
681
850
 
682
851
  # consider the bnode situation
683
- if prefix == "_"
852
+ if prefix == "_" && restrictions.include?(:bnode)
853
+ # we force a non-nil name, otherwise it generates a new name
854
+ # As a special case, _: is also a valid reference for one specific bnode.
684
855
  bnode(reference)
685
856
  elsif curie.to_s.match(/^:/)
857
+ add_debug(element, "curie_to_resource_or_bnode: default prefix: defined? #{!!uri_mappings[""]}, defaults: #{@host_defaults[:prefix]}")
686
858
  # Default prefix
687
859
  if uri_mappings[""]
688
860
  RDF::URI.intern(uri_mappings[""] + reference.to_s)
689
861
  elsif @host_defaults[:prefix]
690
862
  RDF::URI.intern(uri_mappings[@host_defaults[:prefix]] + reference.to_s)
863
+ else
864
+ #add_warning(element, "Default namespace prefix is not defined", RDF::RDFA.UndefinedPrefixError)
865
+ nil
691
866
  end
692
867
  elsif !curie.to_s.match(/:/)
693
868
  # No prefix, undefined (in this context, it is evaluated as a term elsewhere)
694
869
  nil
695
870
  else
696
871
  # Prefixes always downcased
697
- ns = uri_mappings[prefix.to_s.downcase]
872
+ prefix = prefix.to_s.downcase unless @version == :rdfa_1_0
873
+ ns = uri_mappings[prefix.to_s]
698
874
  if ns
699
875
  RDF::URI.intern(ns + reference.to_s)
700
876
  else
701
- add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix.downcase}")
877
+ #add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix}")
702
878
  nil
703
879
  end
704
880
  end