rdf_context 0.5.6 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +10 -0
- data/{History.txt → History.rdoc} +8 -1
- data/Rakefile +9 -2
- data/VERSION +1 -1
- data/bin/rdf_context +5 -2
- data/lib/rdf_context/aggregate_graph.rb +31 -2
- data/lib/rdf_context/array_hacks.rb +3 -3
- data/lib/rdf_context/bnode.rb +3 -3
- data/lib/rdf_context/conjunctive_graph.rb +8 -8
- data/lib/rdf_context/duration.rb +17 -4
- data/lib/rdf_context/graph.rb +84 -46
- data/lib/rdf_context/literal.rb +36 -3
- data/lib/rdf_context/n3parser.rb +4 -4
- data/lib/rdf_context/namespace.rb +21 -8
- data/lib/rdf_context/parser.rb +31 -16
- data/lib/rdf_context/quoted_graph.rb +5 -4
- data/lib/rdf_context/rdfaparser.rb +176 -91
- data/lib/rdf_context/rdfxmlparser.rb +50 -13
- data/lib/rdf_context/serializer/abstract_serializer.rb +14 -4
- data/lib/rdf_context/serializer/nt_serializer.rb +5 -0
- data/lib/rdf_context/serializer/recursive_serializer.rb +4 -0
- data/lib/rdf_context/serializer/turtle_serializer.rb +28 -27
- data/lib/rdf_context/serializer/xml_serializer.rb +11 -9
- data/lib/rdf_context/store/abstract_sql_store.rb +47 -4
- data/lib/rdf_context/store/abstract_store.rb +73 -1
- data/lib/rdf_context/store/list_store.rb +25 -6
- data/lib/rdf_context/store/memory_store.rb +33 -1
- data/lib/rdf_context/store/sqlite3_store.rb +7 -4
- data/lib/rdf_context/term_utils.rb +6 -0
- data/lib/rdf_context/triple.rb +17 -6
- data/lib/rdf_context/uriref.rb +19 -3
- data/spec/html4-manifest.yml +176 -176
- data/spec/html5-manifest.yml +176 -176
- data/spec/rdfa_helper.rb +8 -2
- data/spec/rdfa_parser_spec.rb +1 -1
- data/spec/rdfcore/Manifest.yml +1561 -2626
- data/spec/swap_test/n3parser.yml +134 -279
- data/spec/swap_test/regression.yml +140 -305
- data/spec/turtle/manifest-bad.yml +155 -310
- data/spec/turtle/manifest.yml +155 -310
- data/spec/xhtml-manifest.yml +139 -587
- data/spec/xhtml11-manifest.yml +4405 -0
- metadata +21 -7
- data/.gitmodules +0 -3
data/lib/rdf_context/literal.rb
CHANGED
@@ -277,6 +277,7 @@ module RdfContext
|
|
277
277
|
end
|
278
278
|
end
|
279
279
|
|
280
|
+
# @private
|
280
281
|
class Language
|
281
282
|
attr_accessor :value
|
282
283
|
def initialize(string)
|
@@ -302,10 +303,25 @@ module RdfContext
|
|
302
303
|
def to_s; @value; end
|
303
304
|
end
|
304
305
|
|
305
|
-
|
306
|
+
# Contents of Literal
|
307
|
+
attr_accessor :contents
|
308
|
+
|
309
|
+
# Encoding defined for literal
|
310
|
+
# @return [Literal::Encoding]
|
311
|
+
attr_accessor :encoding
|
312
|
+
|
313
|
+
# Language associated with literal
|
314
|
+
# @return [String]
|
315
|
+
attr_accessor :lang
|
306
316
|
|
307
317
|
# Create a new Literal. Optinally pass a namespaces hash
|
308
318
|
# for use in applying to rdf::XMLLiteral values.
|
319
|
+
# @param [Object] contents
|
320
|
+
# @param [Encoding] encoding
|
321
|
+
# @option options [String] :language
|
322
|
+
# @option options [Hash{String => Namespace}] :namespaces
|
323
|
+
# @return [Literal]
|
324
|
+
# @raise [TypeError]
|
309
325
|
def initialize(contents, encoding, options = {})
|
310
326
|
unless encoding.is_a?(Encoding)
|
311
327
|
raise TypeError, "#{encoding.inspect} should be an instance of Encoding"
|
@@ -319,6 +335,11 @@ module RdfContext
|
|
319
335
|
end
|
320
336
|
|
321
337
|
# Create literal from a string that is already N3 encoded.
|
338
|
+
# @param [Object] contents
|
339
|
+
# @param [String] language
|
340
|
+
# @param [Encoding] encoding (nil)
|
341
|
+
# @return [Literal]
|
342
|
+
# @raise [TypeError]
|
322
343
|
def self.n3_encoded(contents, language, encoding = nil)
|
323
344
|
encoding = encoding.nil? ? Encoding.the_null_encoding : Encoding.coerce(encoding)
|
324
345
|
options = {}
|
@@ -330,6 +351,10 @@ module RdfContext
|
|
330
351
|
end
|
331
352
|
|
332
353
|
# Create an un-typed literal with a language
|
354
|
+
# @param [Object] contents
|
355
|
+
# @param [String] language (nil)
|
356
|
+
# @return [Literal]
|
357
|
+
# @raise [TypeError]
|
333
358
|
def self.untyped(contents, language = nil)
|
334
359
|
options = {}
|
335
360
|
options[:language] = language if language
|
@@ -337,19 +362,27 @@ module RdfContext
|
|
337
362
|
end
|
338
363
|
|
339
364
|
# Create a typed literal
|
340
|
-
#
|
341
|
-
#
|
365
|
+
# @param [Object] contents
|
366
|
+
# @param [Encoding] encoding (nil)
|
367
|
+
# @option options [Hash{String => Namespace}] :namespaces
|
368
|
+
# @return [Literal]
|
369
|
+
# @raise [TypeError]
|
342
370
|
def self.typed(contents, encoding, options = {})
|
343
371
|
encoding = Encoding.coerce(encoding)
|
344
372
|
new(contents, encoding, options)
|
345
373
|
end
|
346
374
|
|
347
375
|
# Create a literal appropriate for type of object by datatype introspection
|
376
|
+
# @param [Object] contents
|
377
|
+
# @return [Literal]
|
378
|
+
# @raise [TypeError]
|
348
379
|
def self.build_from(object)
|
349
380
|
new(object.to_s, infer_encoding_for(object))
|
350
381
|
end
|
351
382
|
|
352
383
|
# Infer the proper XML datatype for the given object
|
384
|
+
# @param [Object] contents
|
385
|
+
# @return [Encoding]
|
353
386
|
def self.infer_encoding_for(object)
|
354
387
|
case object
|
355
388
|
when TrueClass then Encoding.boolean
|
data/lib/rdf_context/n3parser.rb
CHANGED
@@ -15,13 +15,13 @@ module RdfContext
|
|
15
15
|
#
|
16
16
|
# @param [String] n3_str:: the Notation3/Turtle string
|
17
17
|
# @param [String] uri:: the URI of the document
|
18
|
-
# @
|
19
|
-
#
|
20
|
-
# <em>options[:strict]</em>:: Abort or proceed on error
|
18
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
19
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
21
20
|
# @return [Graph]
|
22
|
-
# @raise
|
21
|
+
# @raise RdfException or subclass
|
23
22
|
#
|
24
23
|
# @author Patrick Sinclair (metade)
|
24
|
+
# @author Gregg Kellogg
|
25
25
|
def parse(stream, uri = nil, options = {}, &block) # :yields: triple
|
26
26
|
super
|
27
27
|
|
@@ -10,10 +10,10 @@ module RdfContext
|
|
10
10
|
# ==== Example
|
11
11
|
# Namespace.new("http://xmlns.com/foaf/0.1/", "foaf") # => returns a new Foaf namespace
|
12
12
|
#
|
13
|
-
# @param [
|
14
|
-
# @param [
|
15
|
-
# @return [Namespace]
|
16
|
-
# @raise [
|
13
|
+
# @param [#to_s] uri the URI of the namespace
|
14
|
+
# @param [#to_s] prefix the prefix of the namespace
|
15
|
+
# @return [Namespace] The newly created namespace.
|
16
|
+
# @raise [ParserException] Checks validity of the desired prefix and raises if it is incorrect.
|
17
17
|
#
|
18
18
|
# @author Tom Morris, Pius Uzamere
|
19
19
|
def initialize(uri, prefix)
|
@@ -28,15 +28,18 @@ module RdfContext
|
|
28
28
|
##
|
29
29
|
# Allows the construction of arbitrary URIs on the namespace.
|
30
30
|
#
|
31
|
-
#
|
31
|
+
# @example
|
32
32
|
# foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf"); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/knows"
|
33
33
|
# foaf = Namespace.new("http://xmlns.com/foaf/0.1/", "foaf", true); foaf.knows # => returns a new URIRef with URI "http://xmlns.com/foaf/0.1/#knows"
|
34
34
|
#
|
35
35
|
# To avoid naming problems, a suffix may have an appended '_', which will be removed when the URI is generated.
|
36
36
|
#
|
37
|
-
# @
|
38
|
-
# @
|
39
|
-
# @
|
37
|
+
# @param [#to_s] methodname to append to NS URI to create a new URI
|
38
|
+
# @param [Array] args Ignored arguments
|
39
|
+
# @return [URIRef] The newly created URI.
|
40
|
+
# @raise [Error] Checks validity of the desired prefix and raises if it is incorrect.
|
41
|
+
# @author Tom Morris
|
42
|
+
# @author Pius Uzamere
|
40
43
|
def method_missing(methodname, *args)
|
41
44
|
self + methodname
|
42
45
|
end
|
@@ -44,6 +47,8 @@ module RdfContext
|
|
44
47
|
# Construct a URIRef from a namespace as in method_missing, but without method collision issues.
|
45
48
|
# Rules are somewhat different than for normal URI unions, as the raw URI is used as the source,
|
46
49
|
# not a normalized URI, and the result is not normalized
|
50
|
+
# @param [#to_s] methodname to append to NS URI to create a new URI
|
51
|
+
# @return [URIRef] The newly created URI.
|
47
52
|
def +(suffix)
|
48
53
|
prefix = @uri
|
49
54
|
suffix = suffix.to_s.sub(/^\#/, "") if prefix.index("#")
|
@@ -52,31 +57,39 @@ module RdfContext
|
|
52
57
|
end
|
53
58
|
|
54
59
|
# Make sure to attach fragment
|
60
|
+
# @return [URIRef] The newly created URI.
|
55
61
|
def uri
|
56
62
|
self + ""
|
57
63
|
end
|
58
64
|
|
59
65
|
# Bind this namespace to a Graph
|
66
|
+
# @param [Graph] graph
|
67
|
+
# @return [Namespace] The newly created URI.
|
60
68
|
def bind(graph)
|
61
69
|
graph.bind(self)
|
62
70
|
end
|
63
71
|
|
64
72
|
# Compare namespaces
|
73
|
+
# @param [Namespace] other
|
74
|
+
# @return [Boolean]
|
65
75
|
def eql?(other)
|
66
76
|
self.uri == other.uri
|
67
77
|
end
|
68
78
|
alias_method :==, :eql?
|
69
79
|
|
70
80
|
# Output xmlns attribute name
|
81
|
+
# @return [String]
|
71
82
|
def xmlns_attr
|
72
83
|
prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
|
73
84
|
end
|
74
85
|
|
75
86
|
# Output namespace definition as a hash
|
87
|
+
# @return [Hash{String => String}]
|
76
88
|
def xmlns_hash
|
77
89
|
{xmlns_attr => @uri.to_s}
|
78
90
|
end
|
79
91
|
|
92
|
+
# @return [String]
|
80
93
|
def to_s
|
81
94
|
"#{prefix}: #{@uri}"
|
82
95
|
end
|
data/lib/rdf_context/parser.rb
CHANGED
@@ -9,11 +9,10 @@ module RdfContext
|
|
9
9
|
##
|
10
10
|
# Creates a new parser
|
11
11
|
#
|
12
|
-
# @
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
# <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
|
12
|
+
# @option options [Graph] :graph (nil) Graph to parse into, otherwise a new RdfContext::Graph instance is created
|
13
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
14
|
+
# @option options [:rdfxml, :html, :n3] :type (nil)
|
15
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
17
16
|
def initialize(options = {})
|
18
17
|
# initialize the triplestore
|
19
18
|
@graph = options[:graph]
|
@@ -24,12 +23,15 @@ module RdfContext
|
|
24
23
|
|
25
24
|
# Instantiate Parser and parse document
|
26
25
|
#
|
27
|
-
# @param [
|
28
|
-
# @param [String] uri
|
29
|
-
# @
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
26
|
+
# @param [#read, #to_s] stream the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
|
27
|
+
# @param [String] uri (nil) the URI of the document
|
28
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
29
|
+
# @option options [:rdfxml, :html, :n3] :type (nil)
|
30
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
31
|
+
# @return [Graph] Returns the graph containing parsed triples
|
32
|
+
# @yield [triple]
|
33
|
+
# @yieldparam [Triple] triple
|
34
|
+
# @raise [Error]:: Raises RdfError if _strict_
|
33
35
|
# @return [Graph]:: Returns the graph containing parsed triples
|
34
36
|
# @raise [Error]:: Raises RdfError if _strict_
|
35
37
|
def self.parse(stream, uri = nil, options = {}, &block) # :yields: triple
|
@@ -44,11 +46,15 @@ module RdfContext
|
|
44
46
|
#
|
45
47
|
# Virtual Class, prototype for Parser subclass.
|
46
48
|
#
|
47
|
-
# @param [
|
48
|
-
# @param [String] uri
|
49
|
-
# @
|
50
|
-
#
|
51
|
-
#
|
49
|
+
# @param [#read, #to_s] stream the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
|
50
|
+
# @param [String] uri (nil) the URI of the document
|
51
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
52
|
+
# @option options [:rdfxml, :html, :n3] :type (nil)
|
53
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
54
|
+
# @return [Graph] Returns the graph containing parsed triples
|
55
|
+
# @yield [triple]
|
56
|
+
# @yieldparam [Triple] triple
|
57
|
+
# @raise [Error]:: Raises RdfError if _strict_
|
52
58
|
# @return [Graph]:: Returns the graph containing parsed triples
|
53
59
|
# @raise [Error]:: Raises RdfError if _strict_
|
54
60
|
def parse(stream, uri = nil, options = {}, &block) # :yields: triple
|
@@ -81,17 +87,26 @@ module RdfContext
|
|
81
87
|
end
|
82
88
|
|
83
89
|
|
90
|
+
# @return [Graph]
|
84
91
|
def graph; @delegate ? @delegate.graph : (@graph || Graph.new); end
|
92
|
+
|
93
|
+
# @return [Array<String>]
|
85
94
|
def debug; @delegate ? @delegate.debug : @debug; end
|
86
95
|
|
87
96
|
# Return N3 Parser instance
|
97
|
+
# @return [N3Parser]
|
88
98
|
def self.n3_parser(options = {}); N3Parser.new(options); end
|
89
99
|
# Return RDF/XML Parser instance
|
100
|
+
# @return [RdfXmlParser]
|
90
101
|
def self.rdfxml_parser(options = {}); RdfXmlParser.new(options); end
|
91
102
|
# Return Rdfa Parser instance
|
103
|
+
# @return [RdfaParser]
|
92
104
|
def self.rdfa_parser(options = {}); RdfaParser.new(options); end
|
93
105
|
|
94
106
|
# Heuristically detect the format of the uri
|
107
|
+
# @param [#read, #to_s] stream
|
108
|
+
# @param [#to_s] uri (nil)
|
109
|
+
# @return [:rdfxml, :rdfa, :n3]
|
95
110
|
def detect_format(stream, uri = nil)
|
96
111
|
uri ||= stream.path if stream.respond_to?(:path)
|
97
112
|
format = case uri.to_s
|
@@ -14,15 +14,15 @@ module RdfContext
|
|
14
14
|
##
|
15
15
|
# Adds one or more extant triples to a graph. Delegates to Store.
|
16
16
|
#
|
17
|
-
#
|
17
|
+
# @example
|
18
18
|
# g = Graph.new;
|
19
19
|
# t1 = Triple.new(BNode.new, URIRef.new("http://xmlns.com/foaf/0.1/knows"), BNode.new);
|
20
20
|
# t2 = Triple.new(BNode.new, URIRef.new("http://xmlns.com/foaf/0.1/knows"), BNode.new);
|
21
21
|
# g.add(t1, t2, ...)
|
22
22
|
#
|
23
|
-
# @param [Triple] triples
|
24
|
-
#
|
25
|
-
# @return [Graph]
|
23
|
+
# @param [Array<Triple>] triples one or more triples. Last element may be a hash for options
|
24
|
+
# @option [Resource] :context Graph context in which to deposit triples, defaults to default_context or self
|
25
|
+
# @return [Graph] Returns the graph
|
26
26
|
def add(*triples)
|
27
27
|
options = triples.last.is_a?(Hash) ? triples.pop : {}
|
28
28
|
ctx = options[:context] || @default_context || self
|
@@ -31,6 +31,7 @@ module RdfContext
|
|
31
31
|
end
|
32
32
|
|
33
33
|
# Return an n3 identifier for the Graph
|
34
|
+
# @return [String]
|
34
35
|
def n3
|
35
36
|
"{#{self.identifier.to_n3}}"
|
36
37
|
end
|
@@ -5,56 +5,88 @@ module RdfContext
|
|
5
5
|
# An RDFa parser in Ruby
|
6
6
|
#
|
7
7
|
# Based on processing rules described here:
|
8
|
-
#
|
8
|
+
# @see http://www.w3.org/TR/rdfa-syntax/#s_model RDFa 1.0
|
9
|
+
# @see http://www.w3.org/2010/02/rdfa/drafts/2010/ED-rdfa-core-20100705/ RDFa 1.1
|
9
10
|
#
|
10
|
-
# Ben Adida
|
11
|
-
#
|
12
|
-
# Gregg Kellogg
|
13
|
-
# 2009-08-04
|
11
|
+
# @author Ben Adida
|
12
|
+
# @author Gregg Kellogg
|
14
13
|
class RdfaParser < Parser
|
15
|
-
# Host language
|
16
|
-
#
|
17
|
-
# :xhtml_rdfa_1_1
|
14
|
+
# Host language
|
15
|
+
# @return [:xhtml]
|
18
16
|
attr_reader :host_language
|
17
|
+
|
18
|
+
# Version
|
19
|
+
# @return [:rdfa_1_0, :rdfa_1_1]
|
20
|
+
attr_reader :version
|
19
21
|
|
20
22
|
# The Recursive Baggage
|
23
|
+
# @private
|
21
24
|
class EvaluationContext # :nodoc:
|
22
|
-
# The base.
|
25
|
+
# The base.
|
26
|
+
#
|
27
|
+
# This will usually be the URL of the document being processed,
|
23
28
|
# but it could be some other URL, set by some other mechanism,
|
24
29
|
# such as the (X)HTML base element. The important thing is that it establishes
|
25
30
|
# a URL against which relative paths can be resolved.
|
31
|
+
#
|
32
|
+
# @return [URIRef]
|
26
33
|
attr :base, true
|
27
34
|
# The parent subject.
|
35
|
+
#
|
28
36
|
# The initial value will be the same as the initial value of base,
|
29
37
|
# but it will usually change during the course of processing.
|
38
|
+
#
|
39
|
+
# @return [URIRef]
|
30
40
|
attr :parent_subject, true
|
31
41
|
# The parent object.
|
42
|
+
#
|
32
43
|
# In some situations the object of a statement becomes the subject of any nested statements,
|
33
44
|
# and this property is used to convey this value.
|
34
45
|
# Note that this value may be a bnode, since in some situations a number of nested statements
|
35
46
|
# are grouped together on one bnode.
|
36
47
|
# This means that the bnode must be set in the containing statement and passed down,
|
37
48
|
# and this property is used to convey this value.
|
49
|
+
#
|
50
|
+
# @return URIRef
|
38
51
|
attr :parent_object, true
|
39
52
|
# A list of current, in-scope URI mappings.
|
53
|
+
#
|
54
|
+
# @return [Hash{String => Namespace}]
|
40
55
|
attr :uri_mappings, true
|
41
|
-
# A list of incomplete triples.
|
56
|
+
# A list of incomplete triples.
|
57
|
+
#
|
58
|
+
# A triple can be incomplete when no object resource
|
42
59
|
# is provided alongside a predicate that requires a resource (i.e., @rel or @rev).
|
43
60
|
# The triples can be completed when a resource becomes available,
|
44
61
|
# which will be when the next subject is specified (part of the process called chaining).
|
62
|
+
#
|
63
|
+
# @return [Array<Array<URIRef, Resource>>]
|
45
64
|
attr :incomplete_triples, true
|
46
65
|
# The language. Note that there is no default language.
|
66
|
+
#
|
67
|
+
# @return [String]
|
47
68
|
attr :language, true
|
48
69
|
# The term mappings, a list of terms and their associated URIs.
|
70
|
+
#
|
49
71
|
# This specification does not define an initial list.
|
50
72
|
# Host Languages may define an initial list.
|
51
73
|
# If a Host Language provides an initial list, it should do so via an RDFa Profile document.
|
74
|
+
#
|
75
|
+
# @return [Hash{String => URIRef}]
|
52
76
|
attr :term_mappings, true
|
53
|
-
# The default vocabulary
|
77
|
+
# The default vocabulary
|
78
|
+
#
|
79
|
+
# A value to use as the prefix URI when a term is used.
|
54
80
|
# This specification does not define an initial setting for the default vocabulary.
|
55
81
|
# Host Languages may define an initial setting.
|
82
|
+
#
|
83
|
+
# @return [URIRef]
|
56
84
|
attr :default_vocabulary, true
|
57
85
|
|
86
|
+
# @param [String] base
|
87
|
+
# @param [Hash] host_defaults
|
88
|
+
# @option host_defaults [Hash{String => URIRef}] :term_mappings Hash of NCName => URIRef
|
89
|
+
# @option host_defaults [Hash{String => Namespace}] :vocabulary Hash of prefix => URIRef
|
58
90
|
def initialize(base, host_defaults)
|
59
91
|
# Initialize the evaluation context, [5.1]
|
60
92
|
@base = base
|
@@ -64,10 +96,12 @@ module RdfContext
|
|
64
96
|
@incomplete_triples = []
|
65
97
|
@language = nil
|
66
98
|
@term_mappings = host_defaults.fetch(:term_mappings, {})
|
67
|
-
@
|
99
|
+
@default_vocabulary = host_defaults.fetch(:vocabulary, nil)
|
68
100
|
end
|
69
101
|
|
70
102
|
# Copy this Evaluation Context
|
103
|
+
#
|
104
|
+
# @param [EvaluationContext] from
|
71
105
|
def initialize_copy(from)
|
72
106
|
# clone the evaluation context correctly
|
73
107
|
@uri_mappings = from.uri_mappings.clone
|
@@ -86,11 +120,10 @@ module RdfContext
|
|
86
120
|
##
|
87
121
|
# Creates a new parser for RDFa.
|
88
122
|
#
|
89
|
-
# @
|
90
|
-
#
|
91
|
-
#
|
92
|
-
#
|
93
|
-
# <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
|
123
|
+
# @option options [Graph] :graph (nil) Graph to parse into, otherwise a new RdfContext::Graph instance is created
|
124
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
125
|
+
# @option options [:rdfxml, :html, :n3] :type (nil)
|
126
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
94
127
|
def initialize(options = {})
|
95
128
|
super
|
96
129
|
@@vocabulary_cache ||= {}
|
@@ -104,12 +137,13 @@ module RdfContext
|
|
104
137
|
# Optionally, the stream may be a Nokogiri::HTML::Document or Nokogiri::XML::Document
|
105
138
|
# With a block, yeilds each statement with URIRef, BNode or Literal elements
|
106
139
|
#
|
107
|
-
# @param [
|
108
|
-
# @param [String] uri
|
109
|
-
# @
|
110
|
-
#
|
111
|
-
#
|
112
|
-
# @
|
140
|
+
# @param [Nokogiri::HTML::Document, Nokogiri::XML::Document, #read, #to_s] stream the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
|
141
|
+
# @param [String] uri (nil) the URI of the document
|
142
|
+
# @option options [Array] :debug (nil) Array to place debug messages
|
143
|
+
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
|
144
|
+
# @return [Graph] Returns the graph containing parsed triples
|
145
|
+
# @yield [triple]
|
146
|
+
# @yieldparam [Triple] triple
|
113
147
|
# @raise [Error]:: Raises RdfError if _strict_
|
114
148
|
def parse(stream, uri = nil, options = {}, &block) # :yields: triple
|
115
149
|
super
|
@@ -125,19 +159,24 @@ module RdfContext
|
|
125
159
|
|
126
160
|
# Determine host language
|
127
161
|
# XXX - right now only XHTML defined
|
128
|
-
|
162
|
+
version = @doc.root.attributes["version"].to_s if @doc.root
|
163
|
+
|
164
|
+
@host_language = case version.to_s
|
129
165
|
when /XHTML+RDFa/ then :xhtml
|
130
166
|
end
|
131
167
|
|
132
168
|
# If none found, assume xhtml
|
133
169
|
@host_language ||= :xhtml
|
134
170
|
|
171
|
+
@version = version.to_s.match(/RDFa 1.0/) ? :rdfa_1_0 : :rdfa_1_1
|
172
|
+
|
135
173
|
@host_defaults = case @host_language
|
136
174
|
when :xhtml
|
137
175
|
@graph.bind(XHV_NS)
|
138
176
|
{
|
139
177
|
:vocabulary => XHV_NS.uri,
|
140
178
|
:prefix => XHV_NS,
|
179
|
+
:uri_mappings => {"xhv" => XHV_NS}, # RDF::XHTML is wrong
|
141
180
|
:term_mappings => %w(
|
142
181
|
alternate appendix bookmark cite chapter contents copyright first glossary help icon index
|
143
182
|
last license meta next p3pv1 prev role section stylesheet subsection start top up
|
@@ -146,7 +185,10 @@ module RdfContext
|
|
146
185
|
else
|
147
186
|
{}
|
148
187
|
end
|
188
|
+
|
189
|
+
@host_defaults.delete(:vocabulary) if @version == :rdfa_1_0
|
149
190
|
|
191
|
+
add_debug(@doc, "version = #{@version}, host_language = #{@host_language}")
|
150
192
|
# parse
|
151
193
|
parse_whole_document(@doc, @uri)
|
152
194
|
|
@@ -232,13 +274,13 @@ module RdfContext
|
|
232
274
|
raise ParserException, "rdf:uri must be a Literal" unless uri.is_a?(Literal)
|
233
275
|
raise ParserException, "rdf:term must be a Literal" unless term.nil? || term.is_a?(Literal)
|
234
276
|
raise ParserException, "rdf:prefix must be a Literal" unless prefix.nil? || prefix.is_a?(Literal)
|
235
|
-
|
277
|
+
|
236
278
|
# For every extracted triple that is the common subject of an rdfa:prefix and an rdfa:uri
|
237
279
|
# predicate, create a mapping from the object literal of the rdfa:prefix predicate to the
|
238
280
|
# object literal of the rdfa:uri predicate. Add or update this mapping in the local list of
|
239
281
|
# URI mappings after transforming the 'prefix' component to lower-case.
|
240
282
|
# For every extracted
|
241
|
-
um[prefix.to_s.downcase] = @graph.bind(Namespace.new(uri.to_s, prefix.to_s.downcase)) if prefix
|
283
|
+
um[prefix.to_s.downcase] = @graph.bind(Namespace.new(uri.to_s, prefix.to_s.downcase)) if prefix && prefix.to_s != "_"
|
242
284
|
|
243
285
|
# triple that is the common subject of an rdfa:term and an rdfa:uri predicate, create a
|
244
286
|
# mapping from the object literal of the rdfa:term predicate to the object literal of the
|
@@ -257,7 +299,7 @@ module RdfContext
|
|
257
299
|
# Merge mappings from this vocabulary
|
258
300
|
uri_mappings.merge!(@@vocabulary_cache[profile][:uri_mappings])
|
259
301
|
term_mappings.merge!(@@vocabulary_cache[profile][:term_mappings])
|
260
|
-
end
|
302
|
+
end unless @version == :rdfa_1_0
|
261
303
|
|
262
304
|
# look for xmlns
|
263
305
|
# (note, this may be dependent on @host_language)
|
@@ -267,7 +309,11 @@ module RdfContext
|
|
267
309
|
element.namespaces.each do |attr_name, attr_value|
|
268
310
|
begin
|
269
311
|
abbr, prefix = attr_name.split(":")
|
270
|
-
|
312
|
+
# A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix.
|
313
|
+
next if prefix == "_"
|
314
|
+
|
315
|
+
pfx_lc = @version == :rdfa_1_0 || prefix.nil? ? prefix : prefix.to_s.downcase
|
316
|
+
uri_mappings[pfx_lc] = @graph.bind(Namespace.new(attr_value, prefix.to_s)) if abbr.downcase == "xmlns" && prefix
|
271
317
|
rescue RdfException => e
|
272
318
|
add_debug(element, "extract_mappings raised #{e.class}: #{e.message}")
|
273
319
|
raise if @strict
|
@@ -285,8 +331,11 @@ module RdfContext
|
|
285
331
|
next unless prefix.match(/:$/)
|
286
332
|
prefix.chop!
|
287
333
|
|
334
|
+
# A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix.
|
335
|
+
next if prefix == "_"
|
336
|
+
|
288
337
|
uri_mappings[prefix] = @graph.bind(Namespace.new(uri, prefix))
|
289
|
-
end
|
338
|
+
end unless @version == :rdfa_1_0
|
290
339
|
|
291
340
|
add_debug(element, "uri_mappings: #{uri_mappings.values.map{|ns|ns.to_s}.join(", ")}")
|
292
341
|
add_debug(element, "term_mappings: #{term_mappings.keys.join(", ")}")
|
@@ -339,9 +388,9 @@ module RdfContext
|
|
339
388
|
unless vocab.nil?
|
340
389
|
default_vocabulary = if vocab.to_s.empty?
|
341
390
|
# Set default_vocabulary to host language default
|
342
|
-
@host_defaults.fetch(:
|
391
|
+
@host_defaults.fetch(:vocabulary, nil)
|
343
392
|
else
|
344
|
-
vocab
|
393
|
+
URIRef.new(vocab)
|
345
394
|
end
|
346
395
|
add_debug(element, "[Step 2] traverse, default_vocaulary: #{default_vocabulary.inspect}")
|
347
396
|
end
|
@@ -368,8 +417,16 @@ module RdfContext
|
|
368
417
|
add_debug(element, "HTML5 [3.2.3.3] traverse, lang: #{language}") if attrs['lang']
|
369
418
|
|
370
419
|
# rels and revs
|
371
|
-
rels = process_uris(element, rel, evaluation_context,
|
372
|
-
|
420
|
+
rels = process_uris(element, rel, evaluation_context,
|
421
|
+
:uri_mappings => uri_mappings,
|
422
|
+
:term_mappings => term_mappings,
|
423
|
+
:vocab => default_vocabulary,
|
424
|
+
:r_1_0_restrictions => [:uri, :bnode, :term])
|
425
|
+
revs = process_uris(element, rev, evaluation_context,
|
426
|
+
:uri_mappings => uri_mappings,
|
427
|
+
:term_mappings => term_mappings,
|
428
|
+
:vocab => default_vocabulary,
|
429
|
+
:r_1_0_restrictions => [:uri, :bnode, :term])
|
373
430
|
|
374
431
|
add_debug(element, "traverse, about: #{about.nil? ? 'nil' : about}, src: #{src.nil? ? 'nil' : src}, resource: #{resource.nil? ? 'nil' : resource}, href: #{href.nil? ? 'nil' : href}")
|
375
432
|
add_debug(element, "traverse, property: #{property.nil? ? 'nil' : property}, typeof: #{typeof.nil? ? 'nil' : typeof}, datatype: #{datatype.nil? ? 'nil' : datatype}, content: #{content.nil? ? 'nil' : content}")
|
@@ -378,14 +435,18 @@ module RdfContext
|
|
378
435
|
if !(rel || rev)
|
379
436
|
# Establishing a new subject if no rel/rev [7.5 Step 6]
|
380
437
|
# May not be valid, but can exist
|
381
|
-
if about
|
382
|
-
|
438
|
+
new_subject = if about
|
439
|
+
process_uri(element, about, evaluation_context,
|
440
|
+
:uri_mappings => uri_mappings,
|
441
|
+
:r_1_0_restrictions => [:uri, :safe_curie, :bnode])
|
383
442
|
elsif src
|
384
|
-
|
443
|
+
process_uri(element, src, evaluation_context, :r_1_0_restrictions => [:uri])
|
385
444
|
elsif resource
|
386
|
-
|
445
|
+
process_uri(element, resource, evaluation_context,
|
446
|
+
:uri_mappings => uri_mappings,
|
447
|
+
:r_1_0_restrictions => [:uri, :safe_curie, :bnode])
|
387
448
|
elsif href
|
388
|
-
|
449
|
+
process_uri(element, href, evaluation_context, :r_1_0_restrictions => [:uri])
|
389
450
|
end
|
390
451
|
|
391
452
|
# If no URI is provided by a resource attribute, then the first match from the following rules
|
@@ -394,52 +455,49 @@ module RdfContext
|
|
394
455
|
# otherwise,
|
395
456
|
# if parent object is present, new subject is set to the value of parent object.
|
396
457
|
# Additionally, if @property is not present then the skip element flag is set to 'true';
|
397
|
-
if
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
skip = true unless property
|
409
|
-
end
|
458
|
+
new_subject ||= if @host_language == :xhtml && element.name =~ /^(head|body)$/ && evaluation_context.base
|
459
|
+
# From XHTML+RDFa 1.1:
|
460
|
+
# if no URI is provided, then first check to see if the element is the head or body element.
|
461
|
+
# If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
|
462
|
+
URIRef.new(evaluation_context.base, :normalize => false)
|
463
|
+
elsif element.attributes['typeof']
|
464
|
+
BNode.new
|
465
|
+
else
|
466
|
+
skip = true unless property
|
467
|
+
# if it's null, it's null and nothing changes
|
468
|
+
evaluation_context.parent_object
|
410
469
|
end
|
411
470
|
add_debug(element, "[Step 6] new_subject: #{new_subject}, skip = #{skip}")
|
412
471
|
else
|
413
472
|
# [7.5 Step 7]
|
414
473
|
# If the current element does contain a @rel or @rev attribute, then the next step is to
|
415
474
|
# establish both a value for new subject and a value for current object resource:
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
new_subject = process_uri(element, src, evaluation_context, :uri_mappings => uri_mappings)
|
420
|
-
end
|
475
|
+
new_subject = process_uri(element, about || src, evaluation_context,
|
476
|
+
:uri_mappings => uri_mappings,
|
477
|
+
:r_1_0_restrictions => [:uri, :safe_curie, :bnode])
|
421
478
|
|
422
479
|
# If no URI is provided then the first match from the following rules will apply
|
423
|
-
if
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
# no skip flag set this time
|
435
|
-
end
|
480
|
+
new_subject ||= if @host_language == :xhtml && element.name =~ /^(head|body)$/
|
481
|
+
# From XHTML+RDFa 1.1:
|
482
|
+
# if no URI is provided, then first check to see if the element is the head or body element.
|
483
|
+
# If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
|
484
|
+
URIRef.new(evaluation_context.base, :normalize => false)
|
485
|
+
elsif element.attributes['typeof']
|
486
|
+
BNode.new
|
487
|
+
else
|
488
|
+
# if it's null, it's null and nothing changes
|
489
|
+
evaluation_context.parent_object
|
490
|
+
# no skip flag set this time
|
436
491
|
end
|
437
492
|
|
438
493
|
# Then the current object resource is set to the URI obtained from the first match from the following rules:
|
439
|
-
if resource
|
440
|
-
|
494
|
+
current_object_resource = if resource
|
495
|
+
process_uri(element, resource, evaluation_context,
|
496
|
+
:uri_mappings => uri_mappings,
|
497
|
+
:r_1_0_restrictions => [:uri, :safe_curie, :bnode])
|
441
498
|
elsif href
|
442
|
-
|
499
|
+
process_uri(element, href, evaluation_context,
|
500
|
+
:r_1_0_restrictions => [:uri])
|
443
501
|
end
|
444
502
|
|
445
503
|
add_debug(element, "[Step 7] new_subject: #{new_subject}, current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource}")
|
@@ -448,7 +506,11 @@ module RdfContext
|
|
448
506
|
# Process @typeof if there is a subject [Step 8]
|
449
507
|
if new_subject and typeof
|
450
508
|
# Typeof is TERMorCURIEorURIs
|
451
|
-
types = process_uris(element, typeof, evaluation_context,
|
509
|
+
types = process_uris(element, typeof, evaluation_context,
|
510
|
+
:uri_mappings => uri_mappings,
|
511
|
+
:term_mappings => term_mappings,
|
512
|
+
:vocab => default_vocabulary,
|
513
|
+
:r_1_0_restrictions => [:curie, :bnode])
|
452
514
|
add_debug(element, "typeof: #{typeof}")
|
453
515
|
types.each do |one_type|
|
454
516
|
add_triple(element, new_subject, RDF_TYPE, one_type)
|
@@ -480,27 +542,45 @@ module RdfContext
|
|
480
542
|
|
481
543
|
# Establish current object literal [Step 11]
|
482
544
|
if property
|
483
|
-
properties = process_uris(element, property, evaluation_context,
|
545
|
+
properties = process_uris(element, property, evaluation_context,
|
546
|
+
:uri_mappings => uri_mappings,
|
547
|
+
:term_mappings => term_mappings,
|
548
|
+
:vocab => default_vocabulary,
|
549
|
+
:r_1_0_restrictions => [:curie, :bnode])
|
550
|
+
|
551
|
+
properties.reject! do |p|
|
552
|
+
if p.is_a?(URIRef)
|
553
|
+
false
|
554
|
+
else
|
555
|
+
add_debug(element, "Illegal predicate: #{p.inspect}")
|
556
|
+
raise InvalidPredicate, "predicate #{p.inspect} must be a URI" if @strict
|
557
|
+
true
|
558
|
+
end
|
559
|
+
end
|
484
560
|
|
485
561
|
# get the literal datatype
|
486
562
|
type = datatype
|
487
563
|
children_node_types = element.children.collect{|c| c.class}.uniq
|
488
564
|
|
489
565
|
# the following 3 IF clauses should be mutually exclusive. Written as is to prevent extensive indentation.
|
490
|
-
type_resource = process_uri(element, type, evaluation_context,
|
491
|
-
|
566
|
+
type_resource = process_uri(element, type, evaluation_context,
|
567
|
+
:uri_mappings => uri_mappings,
|
568
|
+
:term_mappings => term_mappings,
|
569
|
+
:vocab => default_vocabulary,
|
570
|
+
:r_1_0_restrictions => [:curie, :bnode]) if type
|
571
|
+
current_object_literal = if type and !type.empty? and (type_resource.to_s != XML_LITERAL.to_s)
|
492
572
|
# typed literal
|
493
573
|
add_debug(element, "[Step 11] typed literal")
|
494
|
-
|
574
|
+
Literal.typed(content || element.inner_text, type_resource, :language => language)
|
495
575
|
elsif content or (children_node_types == [Nokogiri::XML::Text]) or (element.children.length == 0) or (type == '')
|
496
576
|
# plain literal
|
497
577
|
add_debug(element, "[Step 11] plain literal")
|
498
|
-
|
578
|
+
Literal.untyped(content || element.inner_text, language)
|
499
579
|
elsif children_node_types != [Nokogiri::XML::Text] and (type == nil or type_resource.to_s == XML_LITERAL.to_s)
|
500
580
|
# XML Literal
|
501
581
|
add_debug(element, "[Step 11] XML Literal: #{element.inner_html}")
|
502
|
-
current_object_literal = Literal.typed(element.children, XML_LITERAL, :language => language, :namespaces => uri_mappings)
|
503
582
|
recurse = false
|
583
|
+
Literal.typed(element.children, XML_LITERAL, :language => language, :namespaces => uri_mappings)
|
504
584
|
end
|
505
585
|
|
506
586
|
# add each property
|
@@ -568,18 +648,19 @@ module RdfContext
|
|
568
648
|
end
|
569
649
|
|
570
650
|
def process_uri(element, value, evaluation_context, options = {})
|
571
|
-
|
572
|
-
|
651
|
+
return if value.nil?
|
652
|
+
restrictions = @version == :rdfa_1_0 ? options[:r_1_0_restrictions] : [:uri, :bnode, :curie, :safe_curie, :term]
|
653
|
+
add_debug(element, "process_uri: restrictions = #{restrictions.inspect}")
|
573
654
|
options = {:uri_mappings => {}}.merge(options)
|
574
|
-
if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/)
|
655
|
+
if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/) && restrictions.include?(:safe_curie)
|
575
656
|
# SafeCURIEorCURIEorURI
|
576
657
|
# When the value is surrounded by square brackets, then the content within the brackets is
|
577
658
|
# evaluated as a CURIE according to the CURIE Syntax definition. If it is not a valid CURIE, the
|
578
659
|
# value must be ignored.
|
579
|
-
uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject)
|
660
|
+
uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject, restrictions)
|
580
661
|
add_debug(element, "process_uri: #{value} => safeCURIE => <#{uri}>")
|
581
662
|
uri
|
582
|
-
elsif options[:term_mappings] && NC_REGEXP.match(value.to_s)
|
663
|
+
elsif options[:term_mappings] && NC_REGEXP.match(value.to_s) && restrictions.include?(:term)
|
583
664
|
# TERMorCURIEorURI
|
584
665
|
# If the value is an NCName, then it is evaluated as a term according to General Use of Terms in
|
585
666
|
# Attributes. Note that this step may mean that the value is to be ignored.
|
@@ -590,9 +671,11 @@ module RdfContext
|
|
590
671
|
# SafeCURIEorCURIEorURI or TERMorCURIEorURI
|
591
672
|
# Otherwise, the value is evaluated as a CURIE.
|
592
673
|
# If it is a valid CURIE, the resulting URI is used; otherwise, the value will be processed as a URI.
|
593
|
-
uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject)
|
674
|
+
uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject, restrictions)
|
594
675
|
if uri
|
595
676
|
add_debug(element, "process_uri: #{value} => CURIE => <#{uri}>")
|
677
|
+
elsif @version == :rdfa_1_0 && value.to_s.match(/^xml/)
|
678
|
+
# Special case to not allow anything starting with XML to be treated as a URI
|
596
679
|
else
|
597
680
|
uri = URIRef.new(value, evaluation_context.base, :normalize => false)
|
598
681
|
add_debug(element, "process_uri: #{value} => URI => <#{uri}>")
|
@@ -615,7 +698,7 @@ module RdfContext
|
|
615
698
|
options[:term_mappings][value.to_s.downcase]
|
616
699
|
when options[:vocab]
|
617
700
|
# Otherwise, if there is a local default vocabulary the URI is obtained by concatenating that value and the term.
|
618
|
-
options[:vocab] + value
|
701
|
+
URIRef.new(options[:vocab].to_s + value)
|
619
702
|
else
|
620
703
|
# Finally, if there is no local default vocabulary, the term has no associated URI and must be ignored.
|
621
704
|
nil
|
@@ -623,15 +706,16 @@ module RdfContext
|
|
623
706
|
end
|
624
707
|
|
625
708
|
# From section 6. CURIE Syntax Definition
|
626
|
-
def curie_to_resource_or_bnode(element, curie, uri_mappings, subject)
|
709
|
+
def curie_to_resource_or_bnode(element, curie, uri_mappings, subject, restrictions)
|
627
710
|
# URI mappings for CURIEs default to XH_MAPPING, rather than the default doc namespace
|
628
711
|
prefix, reference = curie.to_s.split(":")
|
629
712
|
|
630
713
|
# consider the bnode situation
|
631
|
-
if prefix == "_"
|
714
|
+
if prefix == "_" && restrictions.include?(:bnode)
|
632
715
|
# we force a non-nil name, otherwise it generates a new name
|
633
716
|
BNode.new(reference || "", @named_bnodes)
|
634
717
|
elsif curie.to_s.match(/^:/)
|
718
|
+
add_debug(element, "curie_to_resource_or_bnode: default prefix: defined? #{!!uri_mappings[""]}, defaults: #{@host_defaults[:prefix]}")
|
635
719
|
# Default prefix
|
636
720
|
if uri_mappings[""]
|
637
721
|
uri_mappings[""].send("#{reference}_")
|
@@ -642,12 +726,13 @@ module RdfContext
|
|
642
726
|
# No prefix, undefined (in this context, it is evaluated as a term elsewhere)
|
643
727
|
nil
|
644
728
|
else
|
645
|
-
#
|
646
|
-
|
729
|
+
# Prefixes always downcased
|
730
|
+
prefix = prefix.to_s.downcase unless @version == :rdfa_1_0
|
731
|
+
ns = uri_mappings[prefix.to_s]
|
647
732
|
if ns
|
648
733
|
ns + reference
|
649
734
|
else
|
650
|
-
add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix
|
735
|
+
add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix}")
|
651
736
|
nil
|
652
737
|
end
|
653
738
|
end
|