rdf_context 0.5.6 → 0.5.7
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +10 -0
- data/{History.txt → History.rdoc} +8 -1
- data/Rakefile +9 -2
- data/VERSION +1 -1
- data/bin/rdf_context +5 -2
- data/lib/rdf_context/aggregate_graph.rb +31 -2
- data/lib/rdf_context/array_hacks.rb +3 -3
- data/lib/rdf_context/bnode.rb +3 -3
- data/lib/rdf_context/conjunctive_graph.rb +8 -8
- data/lib/rdf_context/duration.rb +17 -4
- data/lib/rdf_context/graph.rb +84 -46
- data/lib/rdf_context/literal.rb +36 -3
- data/lib/rdf_context/n3parser.rb +4 -4
- data/lib/rdf_context/namespace.rb +21 -8
- data/lib/rdf_context/parser.rb +31 -16
- data/lib/rdf_context/quoted_graph.rb +5 -4
- data/lib/rdf_context/rdfaparser.rb +176 -91
- data/lib/rdf_context/rdfxmlparser.rb +50 -13
- data/lib/rdf_context/serializer/abstract_serializer.rb +14 -4
- data/lib/rdf_context/serializer/nt_serializer.rb +5 -0
- data/lib/rdf_context/serializer/recursive_serializer.rb +4 -0
- data/lib/rdf_context/serializer/turtle_serializer.rb +28 -27
- data/lib/rdf_context/serializer/xml_serializer.rb +11 -9
- data/lib/rdf_context/store/abstract_sql_store.rb +47 -4
- data/lib/rdf_context/store/abstract_store.rb +73 -1
- data/lib/rdf_context/store/list_store.rb +25 -6
- data/lib/rdf_context/store/memory_store.rb +33 -1
- data/lib/rdf_context/store/sqlite3_store.rb +7 -4
- data/lib/rdf_context/term_utils.rb +6 -0
- data/lib/rdf_context/triple.rb +17 -6
- data/lib/rdf_context/uriref.rb +19 -3
- data/spec/html4-manifest.yml +176 -176
- data/spec/html5-manifest.yml +176 -176
- data/spec/rdfa_helper.rb +8 -2
- data/spec/rdfa_parser_spec.rb +1 -1
- data/spec/rdfcore/Manifest.yml +1561 -2626
- data/spec/swap_test/n3parser.yml +134 -279
- data/spec/swap_test/regression.yml +140 -305
- data/spec/turtle/manifest-bad.yml +155 -310
- data/spec/turtle/manifest.yml +155 -310
- data/spec/xhtml-manifest.yml +139 -587
- data/spec/xhtml11-manifest.yml +4405 -0
- metadata +21 -7
- data/.gitmodules +0 -3
data/lib/rdf_context/literal.rb
CHANGED
@@ -277,6 +277,7 @@ module RdfContext
|
|
277
277
|
end
|
278
278
|
end
|
279
279
|
|
280
|
+
# @private
|
280
281
|
class Language
|
281
282
|
attr_accessor :value
|
282
283
|
def initialize(string)
|
@@ -302,10 +303,25 @@ module RdfContext
|
|
302
303
|
def to_s; @value; end
|
303
304
|
end
|
304
305
|
|
305
|
-
|
306
|
+
# Contents of Literal
|
307
|
+
attr_accessor :contents
|
308
|
+
|
309
|
+
# Encoding defined for literal
|
310
|
+
# @return [Literal::Encoding]
|
311
|
+
attr_accessor :encoding
|
312
|
+
|
313
|
+
# Language associated with literal
|
314
|
+
# @return [String]
|
315
|
+
attr_accessor :lang
|
306
316
|
|
307
317
|
# Create a new Literal. Optinally pass a namespaces hash
|
308
318
|
# for use in applying to rdf::XMLLiteral values.
|
319
|
+
# @param [Object] contents
|
320
|
+
# @param [Encoding] encoding
|
321
|
+
# @option options [String] :language
|
322
|
+
# @option options [Hash{String => Namespace}] :namespaces
|
323
|
+
# @return [Literal]
|
324
|
+
# @raise [TypeError]
|
309
325
|
def initialize(contents, encoding, options = {})
|
310
326
|
unless encoding.is_a?(Encoding)
|
311
327
|
raise TypeError, "#{encoding.inspect} should be an instance of Encoding"
|
@@ -319,6 +335,11 @@ module RdfContext
|
|
319
335
|
end
|
320
336
|
|
321
337
|
# Create literal from a string that is already N3 encoded.
|
338
|
+
# @param [Object] contents
|
339
|
+
# @param [String] language
|
340
|
+
# @param [Encoding] encoding (nil)
|
341
|
+
# @return [Literal]
|
342
|
+
# @raise [TypeError]
|
322
343
|
def self.n3_encoded(contents, language, encoding = nil)
|
323
344
|
encoding = encoding.nil? ? Encoding.the_null_encoding : Encoding.coerce(encoding)
|
324
345
|
options = {}
|
@@ -330,6 +351,10 @@ module RdfContext
|
|
330
351
|
end
|
331
352
|
|
332
353
|
# Create an un-typed literal with a language
|
354
|
+
# @param [Object] contents
|
355
|
+
# @param [String] language (nil)
|
356
|
+
# @return [Literal]
|
357
|
+
# @raise [TypeError]
|
333
358
|
def self.untyped(contents, language = nil)
|
334
359
|
options = {}
|
335
360
|
options[:language] = language if language
|
@@ -337,19 +362,27 @@ module RdfContext
|
|
337
362
|
end
|
338
363
|
|
339
364
|
# Create a typed literal
|
340
|
-
#
|
341
|
-
#
|
365
|
+
# @param [Object] contents
|
366
|
+
# @param [Encoding] encoding (nil)
|
367
|
+
# @option options [Hash{String => Namespace}] :namespaces
|
368
|
+
# @return [Literal]
|
369
|
+
# @raise [TypeError]
|
342
370
|
def self.typed(contents, encoding, options = {})
|
343
371
|
encoding = Encoding.coerce(encoding)
|
344
372
|
new(contents, encoding, options)
|
345
373
|
end
|
346
374
|
|
347
375
|
# Create a literal appropriate for type of object by datatype introspection
|
376
|
+
# @param [Object] contents
|
377
|
+
# @return [Literal]
|
378
|
+
# @raise [TypeError]
|
348
379
|
def self.build_from(object)
|
349
380
|
new(object.to_s, infer_encoding_for(object))
|
350
381
|
end
|
351
382
|
|
352
383
|
# Infer the proper XML datatype for the given object
|
384
|
+
# @param [Object] contents
|
385
|
+
# @return [Encoding]
|
353
386
|
def self.infer_encoding_for(object)
|
354
387
|
case object
|
355
388
|
when TrueClass then Encoding.boolean
|
data/lib/rdf_context/n3parser.rb
CHANGED
@@ -15,13 +15,13 @@ module RdfContext
|
|
15
15
|
#
|
16
16
|
# @param [String] n3_str:: the Notation3/Turtle string
|
17
17
|
# @param [String] uri:: the URI of the document
|
18
|
-
# @
|
19
|
-
#
|
20
|
-
# <em>options[:strict]</em>:: Abort or proceed on error
|
18
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
19
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
21
20
|
# @return [Graph]
|
22
|
-
# @raise
|
21
|
+
# @raise RdfException or subclass
|
23
22
|
#
|
24
23
|
# @author Patrick Sinclair (metade)
|
24
|
+
# @author Gregg Kellogg
|
25
25
|
def parse(stream, uri = nil, options = {}, &block) # :yields: triple
|
26
26
|
super
|
27
27
|
|
@@ -10,10 +10,10 @@ module RdfContext
|
|
10
10
|
# ==== Example
|
11
11
|
# Namespace.new("http://xmlns.com/foaf/0.1/", "foaf") # => returns a new Foaf namespace
|
12
12
|
#
|
13
|
-
# @param [
|
14
|
-
# @param [
|
15
|
-
# @return [Namespace]
|
16
|
-
# @raise [
|
13
|
+
# @param [#to_s] uri the URI of the namespace
|
14
|
+
# @param [#to_s] prefix the prefix of the namespace
|
15
|
+
# @return [Namespace] The newly created namespace.
|
16
|
+
# @raise [ParserException] Checks validity of the desired prefix and raises if it is incorrect.
|
17
17
|
#
|
18
18
|
# @author Tom Morris, Pius Uzamere
|
19
19
|
def initialize(uri, prefix)
|
@@ -28,15 +28,18 @@ module RdfContext
|
|
28
28
|
##
|
29
29
|
# Allows the construction of arbitrary URIs on the namespace.
|
30
30
|
#
|
31
|
-
#
|
31
|
+
# @example
|
32
32
|
# foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf"); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/knows"
|
33
33
|
# foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf", true); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/#knows"
|
34
34
|
#
|
35
35
|
# To avoid naming problems, a suffix may have an appended '_', which will be removed when the URI is generated.
|
36
36
|
#
|
37
|
-
# @
|
38
|
-
# @
|
39
|
-
# @
|
37
|
+
# @param [#to_s] methodname to append to NS URI to create a new URI
|
38
|
+
# @param [Array] args Ignored arguments
|
39
|
+
# @return [URIRef] The newly created URI.
|
40
|
+
# @raise [Error] Checks validity of the desired prefix and raises if it is incorrect.
|
41
|
+
# @author Tom Morris
|
42
|
+
# @author Pius Uzamere
|
40
43
|
def method_missing(methodname, *args)
|
41
44
|
self + methodname
|
42
45
|
end
|
@@ -44,6 +47,8 @@ module RdfContext
|
|
44
47
|
# Construct a URIRef from a namespace as in method_missing, but without method collision issues.
|
45
48
|
# Rules are somewhat different than for normal URI unions, as the raw URI is used as the source,
|
46
49
|
# not a normalized URI, and the result is not normalized
|
50
|
+
# @param [#to_s] methodname to append to NS URI to create a new URI
|
51
|
+
# @return [URIRef] The newly created URI.
|
47
52
|
def +(suffix)
|
48
53
|
prefix = @uri
|
49
54
|
suffix = suffix.to_s.sub(/^\#/, "") if prefix.index("#")
|
@@ -52,31 +57,39 @@ module RdfContext
|
|
52
57
|
end
|
53
58
|
|
54
59
|
# Make sure to attach fragment
|
60
|
+
# @return [URIRef] The newly created URI.
|
55
61
|
def uri
|
56
62
|
self + ""
|
57
63
|
end
|
58
64
|
|
59
65
|
# Bind this namespace to a Graph
|
66
|
+
# @param [Graph] graph
|
67
|
+
# @return [Namespace] The newly created URI.
|
60
68
|
def bind(graph)
|
61
69
|
graph.bind(self)
|
62
70
|
end
|
63
71
|
|
64
72
|
# Compare namespaces
|
73
|
+
# @param [Namespace] other
|
74
|
+
# @return [Boolean]
|
65
75
|
def eql?(other)
|
66
76
|
self.uri == other.uri
|
67
77
|
end
|
68
78
|
alias_method :==, :eql?
|
69
79
|
|
70
80
|
# Output xmlns attribute name
|
81
|
+
# @return [String]
|
71
82
|
def xmlns_attr
|
72
83
|
prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
|
73
84
|
end
|
74
85
|
|
75
86
|
# Output namespace definition as a hash
|
87
|
+
# @return [Hash{String => String}]
|
76
88
|
def xmlns_hash
|
77
89
|
{xmlns_attr => @uri.to_s}
|
78
90
|
end
|
79
91
|
|
92
|
+
# @return [String]
|
80
93
|
def to_s
|
81
94
|
"#{prefix}: #{@uri}"
|
82
95
|
end
|
data/lib/rdf_context/parser.rb
CHANGED
@@ -9,11 +9,10 @@ module RdfContext
|
|
9
9
|
##
|
10
10
|
# Creates a new parser
|
11
11
|
#
|
12
|
-
# @
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
# <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
|
12
|
+
# @option options [Graph] :graph (nil) Graph to parse into, otherwise a new RdfContext::Graph instance is created
|
13
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
14
|
+
# @option options [:rdfxml, :html, :n3] :type (nil)
|
15
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
17
16
|
def initialize(options = {})
|
18
17
|
# initialize the triplestore
|
19
18
|
@graph = options[:graph]
|
@@ -24,12 +23,15 @@ module RdfContext
|
|
24
23
|
|
25
24
|
# Instantiate Parser and parse document
|
26
25
|
#
|
27
|
-
# @param [
|
28
|
-
# @param [String] uri
|
29
|
-
# @
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
26
|
+
# @param [#read, #to_s] stream the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
|
27
|
+
# @param [String] uri (nil) the URI of the document
|
28
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
29
|
+
# @option options [:rdfxml, :html, :n3] :type (nil)
|
30
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
31
|
+
# @return [Graph] Returns the graph containing parsed triples
|
32
|
+
# @yield [triple]
|
33
|
+
# @yieldparam [Triple] triple
|
34
|
+
# @raise [Error]:: Raises RdfError if _strict_
|
33
35
|
# @return [Graph]:: Returns the graph containing parsed triples
|
34
36
|
# @raise [Error]:: Raises RdfError if _strict_
|
35
37
|
def self.parse(stream, uri = nil, options = {}, &block) # :yields: triple
|
@@ -44,11 +46,15 @@ module RdfContext
|
|
44
46
|
#
|
45
47
|
# Virtual Class, prototype for Parser subclass.
|
46
48
|
#
|
47
|
-
# @param [
|
48
|
-
# @param [String] uri
|
49
|
-
# @
|
50
|
-
#
|
51
|
-
#
|
49
|
+
# @param [#read, #to_s] stream the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
|
50
|
+
# @param [String] uri (nil) the URI of the document
|
51
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
52
|
+
# @option options [:rdfxml, :html, :n3] :type (nil)
|
53
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
54
|
+
# @return [Graph] Returns the graph containing parsed triples
|
55
|
+
# @yield [triple]
|
56
|
+
# @yieldparam [Triple] triple
|
57
|
+
# @raise [Error]:: Raises RdfError if _strict_
|
52
58
|
# @return [Graph]:: Returns the graph containing parsed triples
|
53
59
|
# @raise [Error]:: Raises RdfError if _strict_
|
54
60
|
def parse(stream, uri = nil, options = {}, &block) # :yields: triple
|
@@ -81,17 +87,26 @@ module RdfContext
|
|
81
87
|
end
|
82
88
|
|
83
89
|
|
90
|
+
# @return [Graph]
|
84
91
|
def graph; @delegate ? @delegate.graph : (@graph || Graph.new); end
|
92
|
+
|
93
|
+
# @return [Array<String>]
|
85
94
|
def debug; @delegate ? @delegate.debug : @debug; end
|
86
95
|
|
87
96
|
# Return N3 Parser instance
|
97
|
+
# @return [N3Parser]
|
88
98
|
def self.n3_parser(options = {}); N3Parser.new(options); end
|
89
99
|
# Return RDF/XML Parser instance
|
100
|
+
# @return [RdfXmlParser]
|
90
101
|
def self.rdfxml_parser(options = {}); RdfXmlParser.new(options); end
|
91
102
|
# Return Rdfa Parser instance
|
103
|
+
# @return [RdfaParser]
|
92
104
|
def self.rdfa_parser(options = {}); RdfaParser.new(options); end
|
93
105
|
|
94
106
|
# Heuristically detect the format of the uri
|
107
|
+
# @param [#read, #to_s] stream
|
108
|
+
# @param [#to_s] uri (nil)
|
109
|
+
# @return [:rdfxml, :rdfa, :n3]
|
95
110
|
def detect_format(stream, uri = nil)
|
96
111
|
uri ||= stream.path if stream.respond_to?(:path)
|
97
112
|
format = case uri.to_s
|
@@ -14,15 +14,15 @@ module RdfContext
|
|
14
14
|
##
|
15
15
|
# Adds one or more extant triples to a graph. Delegates to Store.
|
16
16
|
#
|
17
|
-
#
|
17
|
+
# @example
|
18
18
|
# g = Graph.new;
|
19
19
|
# t1 = Triple.new(BNode.new, URIRef.new("http://xmlns.com/foaf/0.1/knows"), BNode.new);
|
20
20
|
# t2 = Triple.new(BNode.new, URIRef.new("http://xmlns.com/foaf/0.1/knows"), BNode.new);
|
21
21
|
# g.add(t1, t2, ...)
|
22
22
|
#
|
23
|
-
# @param [Triple] triples
|
24
|
-
#
|
25
|
-
# @return [Graph]
|
23
|
+
# @param [Array<Triple>] triples one or more triples. Last element may be a hash for options
|
24
|
+
# @option [Resource] :context Graph context in which to deposit triples, defaults to default_context or self
|
25
|
+
# @return [Graph] Returns the graph
|
26
26
|
def add(*triples)
|
27
27
|
options = triples.last.is_a?(Hash) ? triples.pop : {}
|
28
28
|
ctx = options[:context] || @default_context || self
|
@@ -31,6 +31,7 @@ module RdfContext
|
|
31
31
|
end
|
32
32
|
|
33
33
|
# Return an n3 identifier for the Graph
|
34
|
+
# @return [String]
|
34
35
|
def n3
|
35
36
|
"{#{self.identifier.to_n3}}"
|
36
37
|
end
|
@@ -5,56 +5,88 @@ module RdfContext
|
|
5
5
|
# An RDFa parser in Ruby
|
6
6
|
#
|
7
7
|
# Based on processing rules described here:
|
8
|
-
#
|
8
|
+
# @see http://www.w3.org/TR/rdfa-syntax/#s_model RDFa 1.0
|
9
|
+
# @see http://www.w3.org/2010/02/rdfa/drafts/2010/ED-rdfa-core-20100705/ RDFa 1.1
|
9
10
|
#
|
10
|
-
# Ben Adida
|
11
|
-
#
|
12
|
-
# Gregg Kellogg
|
13
|
-
# 2009-08-04
|
11
|
+
# @author Ben Adida
|
12
|
+
# @author Gregg Kellogg
|
14
13
|
class RdfaParser < Parser
|
15
|
-
# Host language
|
16
|
-
#
|
17
|
-
# :xhtml_rdfa_1_1
|
14
|
+
# Host language
|
15
|
+
# @return [:xhtml]
|
18
16
|
attr_reader :host_language
|
17
|
+
|
18
|
+
# Version
|
19
|
+
# @return [:rdfa_1_0, :rdfa_1_1]
|
20
|
+
attr_reader :version
|
19
21
|
|
20
22
|
# The Recursive Baggage
|
23
|
+
# @private
|
21
24
|
class EvaluationContext # :nodoc:
|
22
|
-
# The base.
|
25
|
+
# The base.
|
26
|
+
#
|
27
|
+
# This will usually be the URL of the document being processed,
|
23
28
|
# but it could be some other URL, set by some other mechanism,
|
24
29
|
# such as the (X)HTML base element. The important thing is that it establishes
|
25
30
|
# a URL against which relative paths can be resolved.
|
31
|
+
#
|
32
|
+
# @return [URIRef]
|
26
33
|
attr :base, true
|
27
34
|
# The parent subject.
|
35
|
+
#
|
28
36
|
# The initial value will be the same as the initial value of base,
|
29
37
|
# but it will usually change during the course of processing.
|
38
|
+
#
|
39
|
+
# @return [URIRef]
|
30
40
|
attr :parent_subject, true
|
31
41
|
# The parent object.
|
42
|
+
#
|
32
43
|
# In some situations the object of a statement becomes the subject of any nested statements,
|
33
44
|
# and this property is used to convey this value.
|
34
45
|
# Note that this value may be a bnode, since in some situations a number of nested statements
|
35
46
|
# are grouped together on one bnode.
|
36
47
|
# This means that the bnode must be set in the containing statement and passed down,
|
37
48
|
# and this property is used to convey this value.
|
49
|
+
#
|
50
|
+
# @return URIRef
|
38
51
|
attr :parent_object, true
|
39
52
|
# A list of current, in-scope URI mappings.
|
53
|
+
#
|
54
|
+
# @return [Hash{String => Namespace}]
|
40
55
|
attr :uri_mappings, true
|
41
|
-
# A list of incomplete triples.
|
56
|
+
# A list of incomplete triples.
|
57
|
+
#
|
58
|
+
# A triple can be incomplete when no object resource
|
42
59
|
# is provided alongside a predicate that requires a resource (i.e., @rel or @rev).
|
43
60
|
# The triples can be completed when a resource becomes available,
|
44
61
|
# which will be when the next subject is specified (part of the process called chaining).
|
62
|
+
#
|
63
|
+
# @return [Array<Array<URIRef, Resource>>]
|
45
64
|
attr :incomplete_triples, true
|
46
65
|
# The language. Note that there is no default language.
|
66
|
+
#
|
67
|
+
# @return [String]
|
47
68
|
attr :language, true
|
48
69
|
# The term mappings, a list of terms and their associated URIs.
|
70
|
+
#
|
49
71
|
# This specification does not define an initial list.
|
50
72
|
# Host Languages may define an initial list.
|
51
73
|
# If a Host Language provides an initial list, it should do so via an RDFa Profile document.
|
74
|
+
#
|
75
|
+
# @return [Hash{String => URIRef}]
|
52
76
|
attr :term_mappings, true
|
53
|
-
# The default vocabulary
|
77
|
+
# The default vocabulary
|
78
|
+
#
|
79
|
+
# A value to use as the prefix URI when a term is used.
|
54
80
|
# This specification does not define an initial setting for the default vocabulary.
|
55
81
|
# Host Languages may define an initial setting.
|
82
|
+
#
|
83
|
+
# @return [URIRef]
|
56
84
|
attr :default_vocabulary, true
|
57
85
|
|
86
|
+
# @param [String] base
|
87
|
+
# @param [Hash] host_defaults
|
88
|
+
# @option host_defaults [Hash{String => URIRef}] :term_mappings Hash of NCName => URIRef
|
89
|
+
# @option host_defaults [Hash{String => Namespace}] :vocabulary Hash of prefix => URIRef
|
58
90
|
def initialize(base, host_defaults)
|
59
91
|
# Initialize the evaluation context, [5.1]
|
60
92
|
@base = base
|
@@ -64,10 +96,12 @@ module RdfContext
|
|
64
96
|
@incomplete_triples = []
|
65
97
|
@language = nil
|
66
98
|
@term_mappings = host_defaults.fetch(:term_mappings, {})
|
67
|
-
@
|
99
|
+
@default_vocabulary = host_defaults.fetch(:vocabulary, nil)
|
68
100
|
end
|
69
101
|
|
70
102
|
# Copy this Evaluation Context
|
103
|
+
#
|
104
|
+
# @param [EvaluationContext] from
|
71
105
|
def initialize_copy(from)
|
72
106
|
# clone the evaluation context correctly
|
73
107
|
@uri_mappings = from.uri_mappings.clone
|
@@ -86,11 +120,10 @@ module RdfContext
|
|
86
120
|
##
|
87
121
|
# Creates a new parser for RDFa.
|
88
122
|
#
|
89
|
-
# @
|
90
|
-
#
|
91
|
-
#
|
92
|
-
#
|
93
|
-
# <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
|
123
|
+
# @option options [Graph] :graph (nil) Graph to parse into, otherwise a new RdfContext::Graph instance is created
|
124
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
125
|
+
# @option options [:rdfxml, :html, :n3] :type (nil)
|
126
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
94
127
|
def initialize(options = {})
|
95
128
|
super
|
96
129
|
@@vocabulary_cache ||= {}
|
@@ -104,12 +137,13 @@ module RdfContext
|
|
104
137
|
# Optionally, the stream may be a Nokogiri::HTML::Document or Nokogiri::XML::Document
|
105
138
|
# With a block, yeilds each statement with URIRef, BNode or Literal elements
|
106
139
|
#
|
107
|
-
# @param [
|
108
|
-
# @param [String] uri
|
109
|
-
# @
|
110
|
-
#
|
111
|
-
#
|
112
|
-
# @
|
140
|
+
# @param [Nokogiri::HTML::Document, Nokogiri::XML::Document, #read, #to_s] stream the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
|
141
|
+
# @param [String] uri (nil) the URI of the document
|
142
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
143
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
144
|
+
# @return [Graph] Returns the graph containing parsed triples
|
145
|
+
# @yield [triple]
|
146
|
+
# @yieldparam [Triple] triple
|
113
147
|
# @raise [Error]:: Raises RdfError if _strict_
|
114
148
|
def parse(stream, uri = nil, options = {}, &block) # :yields: triple
|
115
149
|
super
|
@@ -125,19 +159,24 @@ module RdfContext
|
|
125
159
|
|
126
160
|
# Determine host language
|
127
161
|
# XXX - right now only XHTML defined
|
128
|
-
|
162
|
+
version = @doc.root.attributes["version"].to_s if @doc.root
|
163
|
+
|
164
|
+
@host_language = case version.to_s
|
129
165
|
when /XHTML+RDFa/ then :xhtml
|
130
166
|
end
|
131
167
|
|
132
168
|
# If none found, assume xhtml
|
133
169
|
@host_language ||= :xhtml
|
134
170
|
|
171
|
+
@version = version.to_s.match(/RDFa 1.0/) ? :rdfa_1_0 : :rdfa_1_1
|
172
|
+
|
135
173
|
@host_defaults = case @host_language
|
136
174
|
when :xhtml
|
137
175
|
@graph.bind(XHV_NS)
|
138
176
|
{
|
139
177
|
:vocabulary => XHV_NS.uri,
|
140
178
|
:prefix => XHV_NS,
|
179
|
+
:uri_mappings => {"xhv" => XHV_NS}, # RDF::XHTML is wrong
|
141
180
|
:term_mappings => %w(
|
142
181
|
alternate appendix bookmark cite chapter contents copyright first glossary help icon index
|
143
182
|
last license meta next p3pv1 prev role section stylesheet subsection start top up
|
@@ -146,7 +185,10 @@ module RdfContext
|
|
146
185
|
else
|
147
186
|
{}
|
148
187
|
end
|
188
|
+
|
189
|
+
@host_defaults.delete(:vocabulary) if @version == :rdfa_1_0
|
149
190
|
|
191
|
+
add_debug(@doc, "version = #{@version}, host_language = #{@host_language}")
|
150
192
|
# parse
|
151
193
|
parse_whole_document(@doc, @uri)
|
152
194
|
|
@@ -232,13 +274,13 @@ module RdfContext
|
|
232
274
|
raise ParserException, "rdf:uri must be a Literal" unless uri.is_a?(Literal)
|
233
275
|
raise ParserException, "rdf:term must be a Literal" unless term.nil? || term.is_a?(Literal)
|
234
276
|
raise ParserException, "rdf:prefix must be a Literal" unless prefix.nil? || prefix.is_a?(Literal)
|
235
|
-
|
277
|
+
|
236
278
|
# For every extracted triple that is the common subject of an rdfa:prefix and an rdfa:uri
|
237
279
|
# predicate, create a mapping from the object literal of the rdfa:prefix predicate to the
|
238
280
|
# object literal of the rdfa:uri predicate. Add or update this mapping in the local list of
|
239
281
|
# URI mappings after transforming the 'prefix' component to lower-case.
|
240
282
|
# For every extracted
|
241
|
-
um[prefix.to_s.downcase] = @graph.bind(Namespace.new(uri.to_s, prefix.to_s.downcase)) if prefix
|
283
|
+
um[prefix.to_s.downcase] = @graph.bind(Namespace.new(uri.to_s, prefix.to_s.downcase)) if prefix && prefix.to_s != "_"
|
242
284
|
|
243
285
|
# triple that is the common subject of an rdfa:term and an rdfa:uri predicate, create a
|
244
286
|
# mapping from the object literal of the rdfa:term predicate to the object literal of the
|
@@ -257,7 +299,7 @@ module RdfContext
|
|
257
299
|
# Merge mappings from this vocabulary
|
258
300
|
uri_mappings.merge!(@@vocabulary_cache[profile][:uri_mappings])
|
259
301
|
term_mappings.merge!(@@vocabulary_cache[profile][:term_mappings])
|
260
|
-
end
|
302
|
+
end unless @version == :rdfa_1_0
|
261
303
|
|
262
304
|
# look for xmlns
|
263
305
|
# (note, this may be dependent on @host_language)
|
@@ -267,7 +309,11 @@ module RdfContext
|
|
267
309
|
element.namespaces.each do |attr_name, attr_value|
|
268
310
|
begin
|
269
311
|
abbr, prefix = attr_name.split(":")
|
270
|
-
|
312
|
+
# A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix.
|
313
|
+
next if prefix == "_"
|
314
|
+
|
315
|
+
pfx_lc = @version == :rdfa_1_0 || prefix.nil? ? prefix : prefix.to_s.downcase
|
316
|
+
uri_mappings[pfx_lc] = @graph.bind(Namespace.new(attr_value, prefix.to_s)) if abbr.downcase == "xmlns" && prefix
|
271
317
|
rescue RdfException => e
|
272
318
|
add_debug(element, "extract_mappings raised #{e.class}: #{e.message}")
|
273
319
|
raise if @strict
|
@@ -285,8 +331,11 @@ module RdfContext
|
|
285
331
|
next unless prefix.match(/:$/)
|
286
332
|
prefix.chop!
|
287
333
|
|
334
|
+
# A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix.
|
335
|
+
next if prefix == "_"
|
336
|
+
|
288
337
|
uri_mappings[prefix] = @graph.bind(Namespace.new(uri, prefix))
|
289
|
-
end
|
338
|
+
end unless @version == :rdfa_1_0
|
290
339
|
|
291
340
|
add_debug(element, "uri_mappings: #{uri_mappings.values.map{|ns|ns.to_s}.join(", ")}")
|
292
341
|
add_debug(element, "term_mappings: #{term_mappings.keys.join(", ")}")
|
@@ -339,9 +388,9 @@ module RdfContext
|
|
339
388
|
unless vocab.nil?
|
340
389
|
default_vocabulary = if vocab.to_s.empty?
|
341
390
|
# Set default_vocabulary to host language default
|
342
|
-
@host_defaults.fetch(:
|
391
|
+
@host_defaults.fetch(:vocabulary, nil)
|
343
392
|
else
|
344
|
-
vocab
|
393
|
+
URIRef.new(vocab)
|
345
394
|
end
|
346
395
|
add_debug(element, "[Step 2] traverse, default_vocaulary: #{default_vocabulary.inspect}")
|
347
396
|
end
|
@@ -368,8 +417,16 @@ module RdfContext
|
|
368
417
|
add_debug(element, "HTML5 [3.2.3.3] traverse, lang: #{language}") if attrs['lang']
|
369
418
|
|
370
419
|
# rels and revs
|
371
|
-
rels = process_uris(element, rel, evaluation_context,
|
372
|
-
|
420
|
+
rels = process_uris(element, rel, evaluation_context,
|
421
|
+
:uri_mappings => uri_mappings,
|
422
|
+
:term_mappings => term_mappings,
|
423
|
+
:vocab => default_vocabulary,
|
424
|
+
:r_1_0_restrictions => [:uri, :bnode, :term])
|
425
|
+
revs = process_uris(element, rev, evaluation_context,
|
426
|
+
:uri_mappings => uri_mappings,
|
427
|
+
:term_mappings => term_mappings,
|
428
|
+
:vocab => default_vocabulary,
|
429
|
+
:r_1_0_restrictions => [:uri, :bnode, :term])
|
373
430
|
|
374
431
|
add_debug(element, "traverse, about: #{about.nil? ? 'nil' : about}, src: #{src.nil? ? 'nil' : src}, resource: #{resource.nil? ? 'nil' : resource}, href: #{href.nil? ? 'nil' : href}")
|
375
432
|
add_debug(element, "traverse, property: #{property.nil? ? 'nil' : property}, typeof: #{typeof.nil? ? 'nil' : typeof}, datatype: #{datatype.nil? ? 'nil' : datatype}, content: #{content.nil? ? 'nil' : content}")
|
@@ -378,14 +435,18 @@ module RdfContext
|
|
378
435
|
if !(rel || rev)
|
379
436
|
# Establishing a new subject if no rel/rev [7.5 Step 6]
|
380
437
|
# May not be valid, but can exist
|
381
|
-
if about
|
382
|
-
|
438
|
+
new_subject = if about
|
439
|
+
process_uri(element, about, evaluation_context,
|
440
|
+
:uri_mappings => uri_mappings,
|
441
|
+
:r_1_0_restrictions => [:uri, :safe_curie, :bnode])
|
383
442
|
elsif src
|
384
|
-
|
443
|
+
process_uri(element, src, evaluation_context, :r_1_0_restrictions => [:uri])
|
385
444
|
elsif resource
|
386
|
-
|
445
|
+
process_uri(element, resource, evaluation_context,
|
446
|
+
:uri_mappings => uri_mappings,
|
447
|
+
:r_1_0_restrictions => [:uri, :safe_curie, :bnode])
|
387
448
|
elsif href
|
388
|
-
|
449
|
+
process_uri(element, href, evaluation_context, :r_1_0_restrictions => [:uri])
|
389
450
|
end
|
390
451
|
|
391
452
|
# If no URI is provided by a resource attribute, then the first match from the following rules
|
@@ -394,52 +455,49 @@ module RdfContext
|
|
394
455
|
# otherwise,
|
395
456
|
# if parent object is present, new subject is set to the value of parent object.
|
396
457
|
# Additionally, if @property is not present then the skip element flag is set to 'true';
|
397
|
-
if
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
skip = true unless property
|
409
|
-
end
|
458
|
+
new_subject ||= if @host_language == :xhtml && element.name =~ /^(head|body)$/ && evaluation_context.base
|
459
|
+
# From XHTML+RDFa 1.1:
|
460
|
+
# if no URI is provided, then first check to see if the element is the head or body element.
|
461
|
+
# If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
|
462
|
+
URIRef.new(evaluation_context.base, :normalize => false)
|
463
|
+
elsif element.attributes['typeof']
|
464
|
+
BNode.new
|
465
|
+
else
|
466
|
+
skip = true unless property
|
467
|
+
# if it's null, it's null and nothing changes
|
468
|
+
evaluation_context.parent_object
|
410
469
|
end
|
411
470
|
add_debug(element, "[Step 6] new_subject: #{new_subject}, skip = #{skip}")
|
412
471
|
else
|
413
472
|
# [7.5 Step 7]
|
414
473
|
# If the current element does contain a @rel or @rev attribute, then the next step is to
|
415
474
|
# establish both a value for new subject and a value for current object resource:
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
new_subject = process_uri(element, src, evaluation_context, :uri_mappings => uri_mappings)
|
420
|
-
end
|
475
|
+
new_subject = process_uri(element, about || src, evaluation_context,
|
476
|
+
:uri_mappings => uri_mappings,
|
477
|
+
:r_1_0_restrictions => [:uri, :safe_curie, :bnode])
|
421
478
|
|
422
479
|
# If no URI is provided then the first match from the following rules will apply
|
423
|
-
if
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
# no skip flag set this time
|
435
|
-
end
|
480
|
+
new_subject ||= if @host_language == :xhtml && element.name =~ /^(head|body)$/
|
481
|
+
# From XHTML+RDFa 1.1:
|
482
|
+
# if no URI is provided, then first check to see if the element is the head or body element.
|
483
|
+
# If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
|
484
|
+
URIRef.new(evaluation_context.base, :normalize => false)
|
485
|
+
elsif element.attributes['typeof']
|
486
|
+
BNode.new
|
487
|
+
else
|
488
|
+
# if it's null, it's null and nothing changes
|
489
|
+
evaluation_context.parent_object
|
490
|
+
# no skip flag set this time
|
436
491
|
end
|
437
492
|
|
438
493
|
# Then the current object resource is set to the URI obtained from the first match from the following rules:
|
439
|
-
if resource
|
440
|
-
|
494
|
+
current_object_resource = if resource
|
495
|
+
process_uri(element, resource, evaluation_context,
|
496
|
+
:uri_mappings => uri_mappings,
|
497
|
+
:r_1_0_restrictions => [:uri, :safe_curie, :bnode])
|
441
498
|
elsif href
|
442
|
-
|
499
|
+
process_uri(element, href, evaluation_context,
|
500
|
+
:r_1_0_restrictions => [:uri])
|
443
501
|
end
|
444
502
|
|
445
503
|
add_debug(element, "[Step 7] new_subject: #{new_subject}, current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource}")
|
@@ -448,7 +506,11 @@ module RdfContext
|
|
448
506
|
# Process @typeof if there is a subject [Step 8]
|
449
507
|
if new_subject and typeof
|
450
508
|
# Typeof is TERMorCURIEorURIs
|
451
|
-
types = process_uris(element, typeof, evaluation_context,
|
509
|
+
types = process_uris(element, typeof, evaluation_context,
|
510
|
+
:uri_mappings => uri_mappings,
|
511
|
+
:term_mappings => term_mappings,
|
512
|
+
:vocab => default_vocabulary,
|
513
|
+
:r_1_0_restrictions => [:curie, :bnode])
|
452
514
|
add_debug(element, "typeof: #{typeof}")
|
453
515
|
types.each do |one_type|
|
454
516
|
add_triple(element, new_subject, RDF_TYPE, one_type)
|
@@ -480,27 +542,45 @@ module RdfContext
|
|
480
542
|
|
481
543
|
# Establish current object literal [Step 11]
|
482
544
|
if property
|
483
|
-
properties = process_uris(element, property, evaluation_context,
|
545
|
+
properties = process_uris(element, property, evaluation_context,
|
546
|
+
:uri_mappings => uri_mappings,
|
547
|
+
:term_mappings => term_mappings,
|
548
|
+
:vocab => default_vocabulary,
|
549
|
+
:r_1_0_restrictions => [:curie, :bnode])
|
550
|
+
|
551
|
+
properties.reject! do |p|
|
552
|
+
if p.is_a?(URIRef)
|
553
|
+
false
|
554
|
+
else
|
555
|
+
add_debug(element, "Illegal predicate: #{p.inspect}")
|
556
|
+
raise InvalidPredicate, "predicate #{p.inspect} must be a URI" if @strict
|
557
|
+
true
|
558
|
+
end
|
559
|
+
end
|
484
560
|
|
485
561
|
# get the literal datatype
|
486
562
|
type = datatype
|
487
563
|
children_node_types = element.children.collect{|c| c.class}.uniq
|
488
564
|
|
489
565
|
# the following 3 IF clauses should be mutually exclusive. Written as is to prevent extensive indentation.
|
490
|
-
type_resource = process_uri(element, type, evaluation_context,
|
491
|
-
|
566
|
+
type_resource = process_uri(element, type, evaluation_context,
|
567
|
+
:uri_mappings => uri_mappings,
|
568
|
+
:term_mappings => term_mappings,
|
569
|
+
:vocab => default_vocabulary,
|
570
|
+
:r_1_0_restrictions => [:curie, :bnode]) if type
|
571
|
+
current_object_literal = if type and !type.empty? and (type_resource.to_s != XML_LITERAL.to_s)
|
492
572
|
# typed literal
|
493
573
|
add_debug(element, "[Step 11] typed literal")
|
494
|
-
|
574
|
+
Literal.typed(content || element.inner_text, type_resource, :language => language)
|
495
575
|
elsif content or (children_node_types == [Nokogiri::XML::Text]) or (element.children.length == 0) or (type == '')
|
496
576
|
# plain literal
|
497
577
|
add_debug(element, "[Step 11] plain literal")
|
498
|
-
|
578
|
+
Literal.untyped(content || element.inner_text, language)
|
499
579
|
elsif children_node_types != [Nokogiri::XML::Text] and (type == nil or type_resource.to_s == XML_LITERAL.to_s)
|
500
580
|
# XML Literal
|
501
581
|
add_debug(element, "[Step 11] XML Literal: #{element.inner_html}")
|
502
|
-
current_object_literal = Literal.typed(element.children, XML_LITERAL, :language => language, :namespaces => uri_mappings)
|
503
582
|
recurse = false
|
583
|
+
Literal.typed(element.children, XML_LITERAL, :language => language, :namespaces => uri_mappings)
|
504
584
|
end
|
505
585
|
|
506
586
|
# add each property
|
@@ -568,18 +648,19 @@ module RdfContext
|
|
568
648
|
end
|
569
649
|
|
570
650
|
def process_uri(element, value, evaluation_context, options = {})
|
571
|
-
|
572
|
-
|
651
|
+
return if value.nil?
|
652
|
+
restrictions = @version == :rdfa_1_0 ? options[:r_1_0_restrictions] : [:uri, :bnode, :curie, :safe_curie, :term]
|
653
|
+
add_debug(element, "process_uri: restrictions = #{restrictions.inspect}")
|
573
654
|
options = {:uri_mappings => {}}.merge(options)
|
574
|
-
if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/)
|
655
|
+
if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/) && restrictions.include?(:safe_curie)
|
575
656
|
# SafeCURIEorCURIEorURI
|
576
657
|
# When the value is surrounded by square brackets, then the content within the brackets is
|
577
658
|
# evaluated as a CURIE according to the CURIE Syntax definition. If it is not a valid CURIE, the
|
578
659
|
# value must be ignored.
|
579
|
-
uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject)
|
660
|
+
uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject, restrictions)
|
580
661
|
add_debug(element, "process_uri: #{value} => safeCURIE => <#{uri}>")
|
581
662
|
uri
|
582
|
-
elsif options[:term_mappings] && NC_REGEXP.match(value.to_s)
|
663
|
+
elsif options[:term_mappings] && NC_REGEXP.match(value.to_s) && restrictions.include?(:term)
|
583
664
|
# TERMorCURIEorURI
|
584
665
|
# If the value is an NCName, then it is evaluated as a term according to General Use of Terms in
|
585
666
|
# Attributes. Note that this step may mean that the value is to be ignored.
|
@@ -590,9 +671,11 @@ module RdfContext
|
|
590
671
|
# SafeCURIEorCURIEorURI or TERMorCURIEorURI
|
591
672
|
# Otherwise, the value is evaluated as a CURIE.
|
592
673
|
# If it is a valid CURIE, the resulting URI is used; otherwise, the value will be processed as a URI.
|
593
|
-
uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject)
|
674
|
+
uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject, restrictions)
|
594
675
|
if uri
|
595
676
|
add_debug(element, "process_uri: #{value} => CURIE => <#{uri}>")
|
677
|
+
elsif @version == :rdfa_1_0 && value.to_s.match(/^xml/)
|
678
|
+
# Special case to not allow anything starting with XML to be treated as a URI
|
596
679
|
else
|
597
680
|
uri = URIRef.new(value, evaluation_context.base, :normalize => false)
|
598
681
|
add_debug(element, "process_uri: #{value} => URI => <#{uri}>")
|
@@ -615,7 +698,7 @@ module RdfContext
|
|
615
698
|
options[:term_mappings][value.to_s.downcase]
|
616
699
|
when options[:vocab]
|
617
700
|
# Otherwise, if there is a local default vocabulary the URI is obtained by concatenating that value and the term.
|
618
|
-
options[:vocab] + value
|
701
|
+
URIRef.new(options[:vocab].to_s + value)
|
619
702
|
else
|
620
703
|
# Finally, if there is no local default vocabulary, the term has no associated URI and must be ignored.
|
621
704
|
nil
|
@@ -623,15 +706,16 @@ module RdfContext
|
|
623
706
|
end
|
624
707
|
|
625
708
|
# From section 6. CURIE Syntax Definition
|
626
|
-
def curie_to_resource_or_bnode(element, curie, uri_mappings, subject)
|
709
|
+
def curie_to_resource_or_bnode(element, curie, uri_mappings, subject, restrictions)
|
627
710
|
# URI mappings for CURIEs default to XH_MAPPING, rather than the default doc namespace
|
628
711
|
prefix, reference = curie.to_s.split(":")
|
629
712
|
|
630
713
|
# consider the bnode situation
|
631
|
-
if prefix == "_"
|
714
|
+
if prefix == "_" && restrictions.include?(:bnode)
|
632
715
|
# we force a non-nil name, otherwise it generates a new name
|
633
716
|
BNode.new(reference || "", @named_bnodes)
|
634
717
|
elsif curie.to_s.match(/^:/)
|
718
|
+
add_debug(element, "curie_to_resource_or_bnode: default prefix: defined? #{!!uri_mappings[""]}, defaults: #{@host_defaults[:prefix]}")
|
635
719
|
# Default prefix
|
636
720
|
if uri_mappings[""]
|
637
721
|
uri_mappings[""].send("#{reference}_")
|
@@ -642,12 +726,13 @@ module RdfContext
|
|
642
726
|
# No prefix, undefined (in this context, it is evaluated as a term elsewhere)
|
643
727
|
nil
|
644
728
|
else
|
645
|
-
#
|
646
|
-
|
729
|
+
# Prefixes always downcased
|
730
|
+
prefix = prefix.to_s.downcase unless @version == :rdfa_1_0
|
731
|
+
ns = uri_mappings[prefix.to_s]
|
647
732
|
if ns
|
648
733
|
ns + reference
|
649
734
|
else
|
650
|
-
add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix
|
735
|
+
add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix}")
|
651
736
|
nil
|
652
737
|
end
|
653
738
|
end
|