rdf-rdfa 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +2 -0
- data/README.rdoc +59 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/example.rb +27 -0
- data/lib/rdfa/format.rb +19 -0
- data/lib/rdfa/reader.rb +798 -0
- data/lib/rdfa/reader/exceptions.rb +14 -0
- data/lib/rdfa/reader/namespace.rb +72 -0
- data/lib/rdfa/reader/version.rb +23 -0
- data/spec/namespaces_spec.rb +112 -0
- data/spec/rdfa-triples/0001.nt +1 -0
- data/spec/rdfa-triples/0006.nt +2 -0
- data/spec/rdfa-triples/0007.nt +3 -0
- data/spec/rdfa-triples/0008.nt +1 -0
- data/spec/rdfa-triples/0009.nt +1 -0
- data/spec/rdfa-triples/0010.nt +2 -0
- data/spec/rdfa-triples/0011.nt +3 -0
- data/spec/rdfa-triples/0012.nt +1 -0
- data/spec/rdfa-triples/0013.nt +1 -0
- data/spec/rdfa-triples/0014.nt +1 -0
- data/spec/rdfa-triples/0015.nt +2 -0
- data/spec/rdfa-triples/0017.nt +3 -0
- data/spec/rdfa-triples/0018.nt +1 -0
- data/spec/rdfa-triples/0019.nt +1 -0
- data/spec/rdfa-triples/0020.nt +1 -0
- data/spec/rdfa-triples/0021.nt +1 -0
- data/spec/rdfa-triples/0023.nt +1 -0
- data/spec/rdfa-triples/0025.nt +2 -0
- data/spec/rdfa-triples/0026.nt +1 -0
- data/spec/rdfa-triples/0027.nt +1 -0
- data/spec/rdfa-triples/0029.nt +1 -0
- data/spec/rdfa-triples/0030.nt +1 -0
- data/spec/rdfa-triples/0031.nt +1 -0
- data/spec/rdfa-triples/0032.nt +1 -0
- data/spec/rdfa-triples/0033.nt +2 -0
- data/spec/rdfa-triples/0034.nt +1 -0
- data/spec/rdfa-triples/0035.nt +1 -0
- data/spec/rdfa-triples/0036.nt +1 -0
- data/spec/rdfa-triples/0037.nt +1 -0
- data/spec/rdfa-triples/0038.nt +1 -0
- data/spec/rdfa-triples/0039.nt +1 -0
- data/spec/rdfa-triples/0040.nt +1 -0
- data/spec/rdfa-triples/0041.nt +1 -0
- data/spec/rdfa-triples/0042.nt +0 -0
- data/spec/rdfa-triples/0046.nt +3 -0
- data/spec/rdfa-triples/0047.nt +3 -0
- data/spec/rdfa-triples/0048.nt +3 -0
- data/spec/rdfa-triples/0049.nt +2 -0
- data/spec/rdfa-triples/0050.nt +2 -0
- data/spec/rdfa-triples/0051.nt +2 -0
- data/spec/rdfa-triples/0052.nt +1 -0
- data/spec/rdfa-triples/0053.nt +2 -0
- data/spec/rdfa-triples/0054.nt +2 -0
- data/spec/rdfa-triples/0055.nt +2 -0
- data/spec/rdfa-triples/0056.nt +3 -0
- data/spec/rdfa-triples/0057.nt +4 -0
- data/spec/rdfa-triples/0058.nt +6 -0
- data/spec/rdfa-triples/0059.nt +6 -0
- data/spec/rdfa-triples/0060.nt +2 -0
- data/spec/rdfa-triples/0061.nt +1 -0
- data/spec/rdfa-triples/0062.nt +1 -0
- data/spec/rdfa-triples/0063.nt +1 -0
- data/spec/rdfa-triples/0064.nt +1 -0
- data/spec/rdfa-triples/0065.nt +3 -0
- data/spec/rdfa-triples/0066.nt +1 -0
- data/spec/rdfa-triples/0067.nt +1 -0
- data/spec/rdfa-triples/0068.nt +1 -0
- data/spec/rdfa-triples/0069.nt +1 -0
- data/spec/rdfa-triples/0070.nt +1 -0
- data/spec/rdfa-triples/0071.nt +1 -0
- data/spec/rdfa-triples/0072.nt +1 -0
- data/spec/rdfa-triples/0073.nt +1 -0
- data/spec/rdfa-triples/0074.nt +1 -0
- data/spec/rdfa-triples/0075.nt +1 -0
- data/spec/rdfa-triples/0076.nt +23 -0
- data/spec/rdfa-triples/0077.nt +23 -0
- data/spec/rdfa-triples/0078.nt +6 -0
- data/spec/rdfa-triples/0079.nt +3 -0
- data/spec/rdfa-triples/0080.nt +1 -0
- data/spec/rdfa-triples/0081.nt +6 -0
- data/spec/rdfa-triples/0082.nt +8 -0
- data/spec/rdfa-triples/0083.nt +6 -0
- data/spec/rdfa-triples/0084.nt +8 -0
- data/spec/rdfa-triples/0085.nt +4 -0
- data/spec/rdfa-triples/0086.nt +0 -0
- data/spec/rdfa-triples/0087.nt +23 -0
- data/spec/rdfa-triples/0088.nt +3 -0
- data/spec/rdfa-triples/0089.nt +1 -0
- data/spec/rdfa-triples/0090.nt +1 -0
- data/spec/rdfa-triples/0091.nt +3 -0
- data/spec/rdfa-triples/0092.nt +3 -0
- data/spec/rdfa-triples/0093.nt +2 -0
- data/spec/rdfa-triples/0094.nt +3 -0
- data/spec/rdfa-triples/0099.nt +1 -0
- data/spec/rdfa-triples/0100.nt +3 -0
- data/spec/rdfa-triples/0101.nt +3 -0
- data/spec/rdfa-triples/0102.nt +1 -0
- data/spec/rdfa-triples/0103.nt +1 -0
- data/spec/rdfa-triples/0104.nt +3 -0
- data/spec/rdfa-triples/0105.nt +1 -0
- data/spec/rdfa-triples/0106.nt +1 -0
- data/spec/rdfa-triples/0107.nt +0 -0
- data/spec/rdfa-triples/0108.nt +1 -0
- data/spec/rdfa-triples/0109.nt +1 -0
- data/spec/rdfa-triples/0110.nt +1 -0
- data/spec/rdfa-triples/0111.nt +2 -0
- data/spec/rdfa-triples/0112.nt +1 -0
- data/spec/rdfa-triples/0113.nt +2 -0
- data/spec/rdfa-triples/0114.nt +3 -0
- data/spec/rdfa-triples/0115.nt +4 -0
- data/spec/rdfa-triples/0116.nt +2 -0
- data/spec/rdfa-triples/0117.nt +2 -0
- data/spec/rdfa-triples/0118.nt +1 -0
- data/spec/rdfa-triples/0119.nt +1 -0
- data/spec/rdfa-triples/0120.nt +1 -0
- data/spec/rdfa-triples/0121.nt +2 -0
- data/spec/rdfa-triples/0122.nt +1 -0
- data/spec/rdfa-triples/0123.nt +3 -0
- data/spec/rdfa-triples/0124.nt +4 -0
- data/spec/rdfa-triples/0125.nt +1 -0
- data/spec/rdfa-triples/0126.nt +3 -0
- data/spec/rdfa-triples/1001.nt +6 -0
- data/spec/rdfa_helper.rb +188 -0
- data/spec/rdfa_parser_spec.rb +146 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +8 -0
- metadata +246 -0
data/History.txt
ADDED
data/README.rdoc
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
= RDF::RDFa reader/writer
|
|
2
|
+
|
|
3
|
+
RDFa parser for RDF.rb.
|
|
4
|
+
|
|
5
|
+
== DESCRIPTION:
|
|
6
|
+
|
|
7
|
+
RDF::RDFa is an RDFa parser for Ruby using the RDF.rb library suite.
|
|
8
|
+
|
|
9
|
+
== FEATURES:
|
|
10
|
+
RDF::RDFa parses RDFa into a Graph object.
|
|
11
|
+
|
|
12
|
+
* Fully compliant XHTML/RDFa 1.0 parser.
|
|
13
|
+
|
|
14
|
+
Install with 'gem install rdf-rdfa'
|
|
15
|
+
|
|
16
|
+
== Usage:
|
|
17
|
+
Instantiate a parser and parse source, specifying type and base-URL
|
|
18
|
+
|
|
19
|
+
use RDF::RDFa
|
|
20
|
+
p = Parser.new
|
|
21
|
+
graph = p.parse(input, "http://example.com")
|
|
22
|
+
|
|
23
|
+
== Resources:
|
|
24
|
+
* Distiller[http://kellogg-assoc/distiller]
|
|
25
|
+
* RDoc[http://rdoc.info/projects/gkellogg/rdf-rdfa]
|
|
26
|
+
* History[http://github.com/gkellogg/rdf-rdfa/blob/master/History.txt]
|
|
27
|
+
|
|
28
|
+
== LICENSE:
|
|
29
|
+
|
|
30
|
+
(The MIT License)
|
|
31
|
+
|
|
32
|
+
Copyright (c) 2009-2010 Gregg Kellogg
|
|
33
|
+
|
|
34
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
35
|
+
a copy of this software and associated documentation files (the
|
|
36
|
+
'Software'), to deal in the Software without restriction, including
|
|
37
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
38
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
39
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
40
|
+
the following conditions:
|
|
41
|
+
|
|
42
|
+
The above copyright notice and this permission notice shall be
|
|
43
|
+
included in all copies or substantial portions of the Software.
|
|
44
|
+
|
|
45
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
46
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
47
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
48
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
49
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
50
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
51
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
52
|
+
|
|
53
|
+
== FEEDBACK
|
|
54
|
+
|
|
55
|
+
* gregg@kellogg-assoc.com
|
|
56
|
+
* rubygems.org/rdf-rdfa
|
|
57
|
+
* github.com/gkellogg/rdf-rdfa
|
|
58
|
+
* gkellogg.lighthouseapp.com for bug reports
|
|
59
|
+
* public-rdf-ruby mailing list on w3.org
|
data/Rakefile
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
|
|
3
|
+
begin
|
|
4
|
+
gem 'jeweler'
|
|
5
|
+
require 'jeweler'
|
|
6
|
+
Jeweler::Tasks.new do |gemspec|
|
|
7
|
+
gemspec.name = "rdf-rdfa"
|
|
8
|
+
gemspec.summary = "RDFa parser for RDF.rb."
|
|
9
|
+
gemspec.description = <<-DESCRIPTION
|
|
10
|
+
RDF::RDFa is an RDFa parser for Ruby using the RDF.rb library suite.
|
|
11
|
+
DESCRIPTION
|
|
12
|
+
gemspec.email = "gregg@kellogg-assoc.com"
|
|
13
|
+
gemspec.homepage = "http://github.com/gkellogg/rdf-rdfa"
|
|
14
|
+
gemspec.authors = ["Gregg Kellogg", "Nicholas Humfrey"]
|
|
15
|
+
gemspec.add_dependency('nokogiri', '>= 1.3.3')
|
|
16
|
+
gemspec.add_dependency('rdf', '>= 0.1.6')
|
|
17
|
+
gemspec.add_development_dependency('rspec')
|
|
18
|
+
gemspec.add_development_dependency('activesupport', '>= 2.3.0')
|
|
19
|
+
gemspec.extra_rdoc_files = %w(README.rdoc History.txt)
|
|
20
|
+
end
|
|
21
|
+
Jeweler::GemcutterTasks.new
|
|
22
|
+
rescue LoadError
|
|
23
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require 'spec/rake/spectask'
|
|
27
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
|
28
|
+
spec.libs << 'lib' << 'spec'
|
|
29
|
+
spec.spec_files = FileList['spec/*_spec.rb']
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
desc "Run specs through RCov"
|
|
33
|
+
Spec::Rake::SpecTask.new("spec:rcov") do |spec|
|
|
34
|
+
spec.libs << 'lib' << 'spec'
|
|
35
|
+
spec.pattern = 'spec/*_spec.rb'
|
|
36
|
+
spec.rcov = true
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
desc "Generate HTML report specs"
|
|
40
|
+
Spec::Rake::SpecTask.new("doc:spec") do |spec|
|
|
41
|
+
spec.libs << 'lib' << 'spec'
|
|
42
|
+
spec.spec_files = FileList['spec/*_spec.rb']
|
|
43
|
+
spec.spec_opts = ["--format", "html:doc/spec.html"]
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
task :default => :spec
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.0.1
|
data/example.rb
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
$:.unshift(File.join(File.dirname(__FILE__), 'lib'))
|
|
4
|
+
|
|
5
|
+
require 'rubygems'
|
|
6
|
+
require 'rdfa/reader'
|
|
7
|
+
|
|
8
|
+
data = <<-EOF;
|
|
9
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
10
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd">
|
|
11
|
+
<html xmlns="http://www.w3.org/1999/xhtml"
|
|
12
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
13
|
+
<head>
|
|
14
|
+
<title>Test 0001</title>
|
|
15
|
+
</head>
|
|
16
|
+
<body>
|
|
17
|
+
<p>This photo was taken by <span class="author" about="photo1.jpg" property="dc:creator">Mark Birbeck</span>.</p>
|
|
18
|
+
</body>
|
|
19
|
+
</html>
|
|
20
|
+
EOF
|
|
21
|
+
|
|
22
|
+
$DEBUG = false
|
|
23
|
+
|
|
24
|
+
reader = RDFa::Reader.new(data, :debug => false)
|
|
25
|
+
reader.each_statement do |statement|
|
|
26
|
+
statement.inspect!
|
|
27
|
+
end
|
data/lib/rdfa/format.rb
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module RDF::RDFa
|
|
2
|
+
##
|
|
3
|
+
# N-Triples format specification.
|
|
4
|
+
#
|
|
5
|
+
# @example Obtaining an NTriples format class
|
|
6
|
+
# RDF::Format.for(:ntriples) #=> RDF::NTriples::Format
|
|
7
|
+
# RDF::Format.for("etc/doap.nt")
|
|
8
|
+
# RDF::Format.for(:file_name => "etc/doap.nt")
|
|
9
|
+
# RDF::Format.for(:file_extension => "nt")
|
|
10
|
+
# RDF::Format.for(:content_type => "text/plain")
|
|
11
|
+
#
|
|
12
|
+
# @see http://www.w3.org/TR/rdf-testcases/#ntriples
|
|
13
|
+
class Format < RDF::Format
|
|
14
|
+
content_type 'text/html', :extension => :html
|
|
15
|
+
content_encoding 'ascii'
|
|
16
|
+
|
|
17
|
+
reader { RDFa::Reader }
|
|
18
|
+
end
|
|
19
|
+
end
|
data/lib/rdfa/reader.rb
ADDED
|
@@ -0,0 +1,798 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
require 'rdf'
|
|
3
|
+
|
|
4
|
+
module RDF::RDFa
|
|
5
|
+
##
|
|
6
|
+
# An RDFa parser in Ruby
|
|
7
|
+
#
|
|
8
|
+
# Based on processing rules described here:
|
|
9
|
+
# file:///Users/gregg/Projects/rdf_context/RDFa%20Core%201.1.html#sequence
|
|
10
|
+
#
|
|
11
|
+
# Ben Adida
|
|
12
|
+
# 2008-05-07
|
|
13
|
+
# Gregg Kellogg
|
|
14
|
+
# 2009-08-04
|
|
15
|
+
class Reader < RDF::Reader
|
|
16
|
+
autoload :Namespace, 'rdfa/reader/namespace'
|
|
17
|
+
autoload :VERSION, 'rdfa/reader/version'
|
|
18
|
+
|
|
19
|
+
NC_REGEXP = Regexp.new(
|
|
20
|
+
%{^
|
|
21
|
+
(?!\\\\u0301) # ́ is a non-spacing acute accent.
|
|
22
|
+
# It is legal within an XML Name, but not as the first character.
|
|
23
|
+
( [a-zA-Z_]
|
|
24
|
+
| \\\\u[0-9a-fA-F]
|
|
25
|
+
)
|
|
26
|
+
( [0-9a-zA-Z_\.-]
|
|
27
|
+
| \\\\u([0-9a-fA-F]{4})
|
|
28
|
+
)*
|
|
29
|
+
$},
|
|
30
|
+
Regexp::EXTENDED)
|
|
31
|
+
|
|
32
|
+
#XML_LITERAL = Literal::Encoding.xmlliteral
|
|
33
|
+
XML_LITERAL = RDF['XMLLiteral']
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# FIXME: use RDF::URI.qname instead
|
|
37
|
+
RDF_NS = Namespace.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf")
|
|
38
|
+
RDFA_NS = Namespace.new("http://www.w3.org/ns/rdfa#", "rdfa")
|
|
39
|
+
RDFS_NS = Namespace.new("http://www.w3.org/2000/01/rdf-schema#", "rdfs")
|
|
40
|
+
XHV_NS = Namespace.new("http://www.w3.org/1999/xhtml/vocab#", "xhv")
|
|
41
|
+
XML_NS = Namespace.new("http://www.w3.org/XML/1998/namespace", "xml")
|
|
42
|
+
XSD_NS = Namespace.new("http://www.w3.org/2001/XMLSchema#", "xsd")
|
|
43
|
+
XSI_NS = Namespace.new("http://www.w3.org/2001/XMLSchema-instance", "xsi")
|
|
44
|
+
XH_MAPPING = {"" => Namespace.new("http://www.w3.org/1999/xhtml/vocab\#", nil)}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
require 'rdfa/format'
|
|
48
|
+
format RDFa::Format
|
|
49
|
+
|
|
50
|
+
attr_reader :debug
|
|
51
|
+
|
|
52
|
+
##
|
|
53
|
+
# @return [RDF::Graph]
|
|
54
|
+
attr_reader :graph
|
|
55
|
+
|
|
56
|
+
# Host language, One of:
|
|
57
|
+
# :xhtml_rdfa_1_0
|
|
58
|
+
# :xhtml_rdfa_1_1
|
|
59
|
+
attr_reader :host_language
|
|
60
|
+
|
|
61
|
+
# The Recursive Baggage
|
|
62
|
+
class EvaluationContext # :nodoc:
|
|
63
|
+
# The base. This will usually be the URL of the document being processed,
|
|
64
|
+
# but it could be some other URL, set by some other mechanism,
|
|
65
|
+
# such as the (X)HTML base element. The important thing is that it establishes
|
|
66
|
+
# a URL against which relative paths can be resolved.
|
|
67
|
+
attr :base, true
|
|
68
|
+
# The parent subject.
|
|
69
|
+
# The initial value will be the same as the initial value of base,
|
|
70
|
+
# but it will usually change during the course of processing.
|
|
71
|
+
attr :parent_subject, true
|
|
72
|
+
# The parent object.
|
|
73
|
+
# In some situations the object of a statement becomes the subject of any nested statements,
|
|
74
|
+
# and this property is used to convey this value.
|
|
75
|
+
# Note that this value may be a bnode, since in some situations a number of nested statements
|
|
76
|
+
# are grouped together on one bnode.
|
|
77
|
+
# This means that the bnode must be set in the containing statement and passed down,
|
|
78
|
+
# and this property is used to convey this value.
|
|
79
|
+
attr :parent_object, true
|
|
80
|
+
# A list of current, in-scope URI mappings.
|
|
81
|
+
attr :uri_mappings, true
|
|
82
|
+
# A list of incomplete triples. A triple can be incomplete when no object resource
|
|
83
|
+
# is provided alongside a predicate that requires a resource (i.e., @rel or @rev).
|
|
84
|
+
# The triples can be completed when a resource becomes available,
|
|
85
|
+
# which will be when the next subject is specified (part of the process called chaining).
|
|
86
|
+
attr :incomplete_triples, true
|
|
87
|
+
# The language. Note that there is no default language.
|
|
88
|
+
attr :language, true
|
|
89
|
+
# The term mappings, a list of terms and their associated URIs.
|
|
90
|
+
# This specification does not define an initial list.
|
|
91
|
+
# Host Languages may define an initial list.
|
|
92
|
+
# If a Host Language provides an initial list, it should do so via an RDFa Profile document.
|
|
93
|
+
attr :term_mappings, true
|
|
94
|
+
# The default vocabulary, a value to use as the prefix URI when a term is used.
|
|
95
|
+
# This specification does not define an initial setting for the default vocabulary.
|
|
96
|
+
# Host Languages may define an initial setting.
|
|
97
|
+
attr :default_vocabulary, true
|
|
98
|
+
|
|
99
|
+
def initialize(base, host_defaults)
|
|
100
|
+
# Initialize the evaluation context, [5.1]
|
|
101
|
+
@base = base
|
|
102
|
+
@parent_subject = @base
|
|
103
|
+
@parent_object = nil
|
|
104
|
+
@uri_mappings = {}
|
|
105
|
+
@incomplete_triples = []
|
|
106
|
+
@language = nil
|
|
107
|
+
@term_mappings = host_defaults.fetch(:term_mappings, {})
|
|
108
|
+
@default_voabulary = host_defaults.fetch(:voabulary, nil)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Copy this Evaluation Context
|
|
112
|
+
def initialize_copy(from)
|
|
113
|
+
# clone the evaluation context correctly
|
|
114
|
+
@uri_mappings = from.uri_mappings.clone
|
|
115
|
+
@incomplete_triples = from.incomplete_triples.clone
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def inspect
|
|
119
|
+
v = %w(base parent_subject parent_object language default_vocabulary).map {|a| "#{a}='#{self.send(a).nil? ? '<nil>' : self.send(a)}'"}
|
|
120
|
+
v << "uri_mappings[#{uri_mappings.keys.length}]"
|
|
121
|
+
v << "incomplete_triples[#{incomplete_triples.length}]"
|
|
122
|
+
v << "term_mappings[#{term_mappings.keys.length}]"
|
|
123
|
+
v.join(",")
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Parse XHTML+RDFa document from a string or input stream to closure or graph.
|
|
130
|
+
#
|
|
131
|
+
# If the parser is called with a block, triples are passed to the block rather
|
|
132
|
+
# than added to the graph.
|
|
133
|
+
#
|
|
134
|
+
# Optionally, the stream may be a Nokogiri::HTML::Document or Nokogiri::XML::Document
|
|
135
|
+
# With a block, yeilds each statement with URI, BNode or Literal elements
|
|
136
|
+
#
|
|
137
|
+
# @param [IO] stream:: the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
|
|
138
|
+
# @param [String] uri:: the URI of the document
|
|
139
|
+
# @param [Hash] options:: Parser options, one of
|
|
140
|
+
# <em>options[:debug]</em>:: Array to place debug messages
|
|
141
|
+
# <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
|
|
142
|
+
# @return [Graph]:: Returns the graph containing parsed triples
|
|
143
|
+
# @raise [Error]:: Raises RdfError if _strict_
|
|
144
|
+
|
|
145
|
+
##
|
|
146
|
+
# Initializes the RDFa reader instance.
|
|
147
|
+
#
|
|
148
|
+
# @param [IO, File, String] input
|
|
149
|
+
# @param [Hash{Symbol => Object}] options
|
|
150
|
+
# @yield [reader]
|
|
151
|
+
# @yieldparam [Reader] reader
|
|
152
|
+
def initialize(input = $stdin, options = {}, &block)
|
|
153
|
+
super
|
|
154
|
+
|
|
155
|
+
@graph = RDF::Graph.new
|
|
156
|
+
@debug = options[:debug]
|
|
157
|
+
@strict = options[:strict]
|
|
158
|
+
@base_uri = options[:base_uri]
|
|
159
|
+
@base_uri = RDF::URI.parse(@base_uri) if @base_uri.is_a?(String)
|
|
160
|
+
@named_bnodes = {}
|
|
161
|
+
@@vocabulary_cache ||= {}
|
|
162
|
+
@nsbinding = {}
|
|
163
|
+
@uri_binding = {}
|
|
164
|
+
|
|
165
|
+
@doc = case input
|
|
166
|
+
when Nokogiri::HTML::Document then input
|
|
167
|
+
when Nokogiri::XML::Document then input
|
|
168
|
+
else Nokogiri::XML.parse(input, @base_uri.to_s)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
raise ParserException, "Empty document" if @doc.nil? && @strict
|
|
172
|
+
@callback = block
|
|
173
|
+
|
|
174
|
+
# Determine host language
|
|
175
|
+
# XXX - right now only XHTML defined
|
|
176
|
+
@host_language = case @doc.root.attributes["version"].to_s
|
|
177
|
+
when /XHTML+RDFa/ then :xhtml
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# If none found, assume xhtml
|
|
181
|
+
@host_language ||= :xhtml
|
|
182
|
+
|
|
183
|
+
@host_defaults = {}
|
|
184
|
+
@host_defaults = case @host_language
|
|
185
|
+
when :xhtml
|
|
186
|
+
bind(XHV_NS)
|
|
187
|
+
{
|
|
188
|
+
:vocabulary => XHV_NS.uri,
|
|
189
|
+
:prefix => XHV_NS,
|
|
190
|
+
:term_mappings => %w(
|
|
191
|
+
alternate appendix bookmark cite chapter contents copyright first glossary help icon index
|
|
192
|
+
last license meta next p3pv1 prev role section stylesheet subsection start top up
|
|
193
|
+
).inject({}) { |hash, term| hash[term] = XHV_NS.send("#{term}_"); hash },
|
|
194
|
+
}
|
|
195
|
+
else
|
|
196
|
+
{}
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# parse
|
|
200
|
+
parse_whole_document(@doc, @base_uri)
|
|
201
|
+
|
|
202
|
+
block.call(self) if block_given?
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
##
|
|
207
|
+
# Iterates the given block for each RDF statement in the input.
|
|
208
|
+
#
|
|
209
|
+
# @yield [statement]
|
|
210
|
+
# @yieldparam [RDF::Statement] statement
|
|
211
|
+
# @return [void]
|
|
212
|
+
def each_statement(&block)
|
|
213
|
+
@graph.each_statement(&block)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
##
|
|
217
|
+
# Iterates the given block for each RDF triple in the input.
|
|
218
|
+
#
|
|
219
|
+
# @yield [subject, predicate, object]
|
|
220
|
+
# @yieldparam [RDF::Resource] subject
|
|
221
|
+
# @yieldparam [RDF::URI] predicate
|
|
222
|
+
# @yieldparam [RDF::Value] object
|
|
223
|
+
# @return [void]
|
|
224
|
+
def each_triple(&block)
|
|
225
|
+
@graph.each_triple(&block)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Bind namespace to store, returns bound namespace
|
|
229
|
+
def bind(namespace)
|
|
230
|
+
# Over-write an empty prefix
|
|
231
|
+
uri = namespace.uri.to_s
|
|
232
|
+
@uri_binding.delete(uri)
|
|
233
|
+
@nsbinding.delete_if {|prefix, ns| namespace.prefix == prefix}
|
|
234
|
+
|
|
235
|
+
@uri_binding[uri] = namespace
|
|
236
|
+
@nsbinding[namespace.prefix.to_s] = namespace
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Namespace for prefix
|
|
240
|
+
def namespace(prefix)
|
|
241
|
+
@nsbinding[prefix.to_s]
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Prefix for namespace
|
|
245
|
+
def prefix(namespace)
|
|
246
|
+
namespace.is_a?(Namespace) ? @uri_binding[namespace.uri.to_s].prefix : @uri_binding[namespace].prefix
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
private
|
|
250
|
+
|
|
251
|
+
# Figure out the document path, if it is a Nokogiri::XML::Element or Attribute
|
|
252
|
+
def node_path(node)
|
|
253
|
+
case node
|
|
254
|
+
when Nokogiri::XML::Element, Nokogiri::XML::Attr then "#{node_path(node.parent)}/#{node.name}"
|
|
255
|
+
when String then node
|
|
256
|
+
else ""
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Add debug event to debug array, if specified
|
|
261
|
+
#
|
|
262
|
+
# @param [XML Node, any] node:: XML Node or string for showing context
|
|
263
|
+
# @param [String] message::
|
|
264
|
+
def add_debug(node, message)
|
|
265
|
+
puts "#{node_path(node)}: #{message}" if $DEBUG
|
|
266
|
+
@debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# add a triple, object can be literal or URI or bnode
|
|
270
|
+
#
|
|
271
|
+
# If the parser is called with a block, triples are passed to the block rather
|
|
272
|
+
# than added to the graph.
|
|
273
|
+
#
|
|
274
|
+
# @param [Nokogiri::XML::Node, any] node:: XML Node or string for showing context
|
|
275
|
+
# @param [URI, BNode] subject:: the subject of the triple
|
|
276
|
+
# @param [URI] predicate:: the predicate of the triple
|
|
277
|
+
# @param [URI, BNode, Literal] object:: the object of the triple
|
|
278
|
+
# @return [Array]:: An array of the triples (leaky abstraction? consider returning the graph instead)
|
|
279
|
+
# @raise [Error]:: Checks parameter types and raises if they are incorrect if parsing mode is _strict_.
|
|
280
|
+
def add_triple(node, subject, predicate, object)
|
|
281
|
+
triple = RDF::Statement.new(subject, predicate, object)
|
|
282
|
+
add_debug(node, "triple: #{triple}")
|
|
283
|
+
if @callback
|
|
284
|
+
@callback.call(triple) # Perform yield to saved block
|
|
285
|
+
else
|
|
286
|
+
@graph << triple
|
|
287
|
+
end
|
|
288
|
+
triple
|
|
289
|
+
# FIXME: rescue RdfException => e
|
|
290
|
+
rescue Exception => e
|
|
291
|
+
add_debug(node, "add_triple raised #{e.class}: #{e.message}")
|
|
292
|
+
puts e.backtrace if $DEBUG
|
|
293
|
+
raise if @strict
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
# Parsing an RDFa document (this is *not* the recursive method)
|
|
298
|
+
def parse_whole_document(doc, base)
|
|
299
|
+
# find if the document has a base element
|
|
300
|
+
# XXX - HTML specific
|
|
301
|
+
base_el = doc.css('html>head>base').first
|
|
302
|
+
if (base_el)
|
|
303
|
+
base = base_el.attributes['href']
|
|
304
|
+
# Strip any fragment from base
|
|
305
|
+
base = base.to_s.split("#").first
|
|
306
|
+
@base_uri = RDF::URI.new(base)
|
|
307
|
+
add_debug(base_el, "parse_whole_doc: base='#{base}'")
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# initialize the evaluation context with the appropriate base
|
|
311
|
+
evaluation_context = EvaluationContext.new(base, @host_defaults)
|
|
312
|
+
|
|
313
|
+
traverse(doc.root, evaluation_context)
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
# Extract the XMLNS mappings from an element
|
|
317
|
+
def extract_mappings(element, uri_mappings, term_mappings)
|
|
318
|
+
# Process @profile
|
|
319
|
+
# Next the current element is parsed for any updates to the local term mappings and
|
|
320
|
+
# local list of URI mappings via @profile.
|
|
321
|
+
# If @profile is present, its value is processed as defined in RDFa Profiles.
|
|
322
|
+
element.attributes['profile'].to_s.split(/\s/).each do |profile|
|
|
323
|
+
# Don't try to open ourselves!
|
|
324
|
+
if @base_uri == profile
|
|
325
|
+
add_debug(element, "extract_mappings: skip recursive profile <#{profile}>")
|
|
326
|
+
@@vocabulary_cache[profile]
|
|
327
|
+
elsif @@vocabulary_cache.has_key?(profile)
|
|
328
|
+
add_debug(element, "extract_mappings: skip previously parsed profile <#{profile}>")
|
|
329
|
+
else
|
|
330
|
+
begin
|
|
331
|
+
add_debug(element, "extract_mappings: parse profile <#{profile}>")
|
|
332
|
+
@@vocabulary_cache[profile] = {
|
|
333
|
+
:uri_mappings => {},
|
|
334
|
+
:term_mappings => {}
|
|
335
|
+
}
|
|
336
|
+
um = @@vocabulary_cache[profile][:uri_mappings]
|
|
337
|
+
tm = @@vocabulary_cache[profile][:term_mappings]
|
|
338
|
+
add_debug(element, "extract_mappings: profile open <#{profile}>")
|
|
339
|
+
require 'patron' unless defined?(Patron)
|
|
340
|
+
sess = Patron::Session.new
|
|
341
|
+
sess.timeout = 10
|
|
342
|
+
resp = sess.get(profile)
|
|
343
|
+
raise RuntimeError, "HTTP returned status #{resp.status} when reading #{profile}" if resp.status >= 400
|
|
344
|
+
|
|
345
|
+
# Parse profile, and extract mappings from graph
|
|
346
|
+
old_debug, old_verbose, = $DEBUG, $verbose
|
|
347
|
+
$DEBUG, $verbose = false, false
|
|
348
|
+
p_graph = Parser.parse(resp.body, profile)
|
|
349
|
+
ttl = p_graph.serialize(:format => :ttl) if @debug || $DEBUG
|
|
350
|
+
$DEBUG, $verbose = old_debug, old_verbose
|
|
351
|
+
add_debug(element, ttl) if ttl
|
|
352
|
+
p_graph.subjects.each do |subject|
|
|
353
|
+
props = p_graph.properties(subject)
|
|
354
|
+
#puts props.inspect
|
|
355
|
+
|
|
356
|
+
# If one of the objects is not a Literal or if there are additional rdfa:uri or rdfa:term
|
|
357
|
+
# predicates sharing the same subject, no mapping is created.
|
|
358
|
+
uri = props[RDFA_NS.uri.to_s]
|
|
359
|
+
term = props[RDFA_NS.term_.to_s]
|
|
360
|
+
prefix = props[RDFA_NS.prefix_.to_s]
|
|
361
|
+
add_debug(element, "extract_mappings: uri=#{uri.inspect}, term=#{term.inspect}, prefix=#{prefix.inspect}")
|
|
362
|
+
|
|
363
|
+
next if !uri || (!term && !prefix)
|
|
364
|
+
raise ParserException, "multi-valued rdf:uri" if uri.length != 1
|
|
365
|
+
raise ParserException, "multi-valued rdf:term." if term && term.length != 1
|
|
366
|
+
raise ParserException, "multi-valued rdf:prefix" if prefix && prefix.length != 1
|
|
367
|
+
|
|
368
|
+
uri = uri.first
|
|
369
|
+
term = term.first if term
|
|
370
|
+
prefix = prefix.first if prefix
|
|
371
|
+
raise ParserException, "rdf:uri must be a Literal" unless uri.is_a?(Literal)
|
|
372
|
+
raise ParserException, "rdf:term must be a Literal" unless term.nil? || term.is_a?(Literal)
|
|
373
|
+
raise ParserException, "rdf:prefix must be a Literal" unless prefix.nil? || prefix.is_a?(Literal)
|
|
374
|
+
|
|
375
|
+
# For every extracted triple that is the common subject of an rdfa:prefix and an rdfa:uri
|
|
376
|
+
# predicate, create a mapping from the object literal of the rdfa:prefix predicate to the
|
|
377
|
+
# object literal of the rdfa:uri predicate. Add or update this mapping in the local list of
|
|
378
|
+
# URI mappings after transforming the 'prefix' component to lower-case.
|
|
379
|
+
# For every extracted
|
|
380
|
+
um[prefix.to_s.downcase] = bind(Namespace.new(uri.to_s, prefix.to_s.downcase)) if prefix
|
|
381
|
+
|
|
382
|
+
# triple that is the common subject of an rdfa:term and an rdfa:uri predicate, create a
|
|
383
|
+
# mapping from the object literal of the rdfa:term predicate to the object literal of the
|
|
384
|
+
# rdfa:uri predicate. Add or update this mapping in the local term mappings.
|
|
385
|
+
tm[term.to_s] = RDF::URI.new(uri.to_s) if term
|
|
386
|
+
end
|
|
387
|
+
rescue ParserException
|
|
388
|
+
add_debug(element, "extract_mappings: profile subject #{subject.to_s}: #{e.message}")
|
|
389
|
+
raise if @strict
|
|
390
|
+
rescue RuntimeError => e
|
|
391
|
+
add_debug(element, "extract_mappings: profile: #{e.message}")
|
|
392
|
+
raise if @strict
|
|
393
|
+
end
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Merge mappings from this vocabulary
|
|
397
|
+
uri_mappings.merge!(@@vocabulary_cache[profile][:uri_mappings])
|
|
398
|
+
term_mappings.merge!(@@vocabulary_cache[profile][:term_mappings])
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
# look for xmlns
|
|
402
|
+
# (note, this may be dependent on @host_language)
|
|
403
|
+
# Regardless of how the mapping is declared, the value to be mapped must be converted to lower case,
|
|
404
|
+
# and the URI is not processed in any way; in particular if it is a relative path it is
|
|
405
|
+
# not resolved against the current base.
|
|
406
|
+
element.namespaces.each do |attr_name, attr_value|
|
|
407
|
+
begin
|
|
408
|
+
abbr, prefix = attr_name.split(":")
|
|
409
|
+
uri_mappings[prefix.to_s.downcase] = bind(Namespace.new(attr_value, prefix.to_s.downcase)) if abbr.downcase == "xmlns" && prefix
|
|
410
|
+
# FIXME: rescue RdfException => e
|
|
411
|
+
rescue Exception => e
|
|
412
|
+
add_debug(element, "extract_mappings raised #{e.class}: #{e.message}")
|
|
413
|
+
raise if @strict
|
|
414
|
+
end
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Set mappings from @prefix
|
|
418
|
+
# prefix is a whitespace separated list of prefix-name URI pairs of the form
|
|
419
|
+
# NCName ':' ' '+ xs:anyURI
|
|
420
|
+
# SPEC Confusion: prefix is forced to lower-case in @profile, but not specified here.
|
|
421
|
+
mappings = element.attributes["prefix"].to_s.split(/\s+/)
|
|
422
|
+
while mappings.length > 0 do
|
|
423
|
+
prefix, uri = mappings.shift.downcase, mappings.shift
|
|
424
|
+
#puts "uri_mappings prefix #{prefix} <#{uri}>"
|
|
425
|
+
next unless prefix.match(/:$/)
|
|
426
|
+
prefix.chop!
|
|
427
|
+
|
|
428
|
+
uri_mappings[prefix] = bind(Namespace.new(uri, prefix))
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
add_debug(element, "uri_mappings: #{uri_mappings.values.map{|ns|ns.to_s}.join(", ")}")
|
|
432
|
+
add_debug(element, "term_mappings: #{term_mappings.keys.join(", ")}")
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
# The recursive helper function
|
|
436
|
+
def traverse(element, evaluation_context)
|
|
437
|
+
if element.nil?
|
|
438
|
+
add_debug(element, "traverse nil element")
|
|
439
|
+
raise ParserException, "Can't parse nil element" if @strict
|
|
440
|
+
return nil
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
add_debug(element, "traverse, ec: #{evaluation_context.inspect}")
|
|
444
|
+
|
|
445
|
+
# local variables [5.5 Step 1]
|
|
446
|
+
recurse = true
|
|
447
|
+
skip = false
|
|
448
|
+
new_subject = nil
|
|
449
|
+
current_object_resource = nil
|
|
450
|
+
uri_mappings = evaluation_context.uri_mappings.clone
|
|
451
|
+
incomplete_triples = []
|
|
452
|
+
language = evaluation_context.language
|
|
453
|
+
term_mappings = evaluation_context.term_mappings.clone
|
|
454
|
+
default_vocabulary = evaluation_context.default_vocabulary
|
|
455
|
+
|
|
456
|
+
current_object_literal = nil # XXX Not explicit
|
|
457
|
+
|
|
458
|
+
# shortcut
|
|
459
|
+
attrs = element.attributes
|
|
460
|
+
|
|
461
|
+
about = attrs['about']
|
|
462
|
+
src = attrs['src']
|
|
463
|
+
resource = attrs['resource']
|
|
464
|
+
href = attrs['href']
|
|
465
|
+
vocab = attrs['vocab']
|
|
466
|
+
|
|
467
|
+
# Pull out the attributes needed for the skip test.
|
|
468
|
+
property = attrs['property'].to_s if attrs['property']
|
|
469
|
+
typeof = attrs['typeof'].to_s if attrs['typeof']
|
|
470
|
+
datatype = attrs['datatype'].to_s if attrs['datatype']
|
|
471
|
+
content = attrs['content'].to_s if attrs['content']
|
|
472
|
+
rel = attrs['rel'].to_s if attrs['rel']
|
|
473
|
+
rev = attrs['rev'].to_s if attrs['rev']
|
|
474
|
+
|
|
475
|
+
# Default vocabulary [7.5 Step 2]
|
|
476
|
+
# First the current element is examined for any change to the default vocabulary via @vocab.
|
|
477
|
+
# If @vocab is present and contains a value, its value updates the local default vocabulary.
|
|
478
|
+
# If the value is empty, then the local default vocabulary must be reset to the Host Language defined default.
|
|
479
|
+
unless vocab.nil?
|
|
480
|
+
default_vocabulary = if vocab.to_s.empty?
|
|
481
|
+
# Set default_vocabulary to host language default
|
|
482
|
+
@host_defaults.fetch(:voabulary, nil)
|
|
483
|
+
else
|
|
484
|
+
vocab.to_s
|
|
485
|
+
end
|
|
486
|
+
add_debug(element, "[Step 2] traverse, default_vocaulary: #{default_vocabulary.inspect}")
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
# Local term mappings [7.5 Steps 3 & 4]
|
|
490
|
+
# Next the current element is parsed for any updates to the local term mappings and local list of URI mappings via @profile.
|
|
491
|
+
# If @profile is present, its value is processed as defined in RDFa Profiles.
|
|
492
|
+
extract_mappings(element, uri_mappings, term_mappings)
|
|
493
|
+
|
|
494
|
+
# Language information [7.5 Step 5]
|
|
495
|
+
# From HTML5 [3.2.3.3]
|
|
496
|
+
# If both the lang attribute in no namespace and the lang attribute in the XML namespace are set
|
|
497
|
+
# on an element, user agents must use the lang attribute in the XML namespace, and the lang
|
|
498
|
+
# attribute in no namespace must be ignored for the purposes of determining the element's
|
|
499
|
+
# language.
|
|
500
|
+
language = case
|
|
501
|
+
when element.at_xpath("@xml:lang", "xml" => XML_NS.uri.to_s)
|
|
502
|
+
element.at_xpath("@xml:lang", "xml" => XML_NS.uri.to_s).to_s
|
|
503
|
+
when element.at_xpath("lang")
|
|
504
|
+
element.at_xpath("lang").to_s
|
|
505
|
+
else
|
|
506
|
+
language
|
|
507
|
+
end
|
|
508
|
+
add_debug(element, "HTML5 [3.2.3.3] traverse, lang: #{language}") if attrs['lang']
|
|
509
|
+
|
|
510
|
+
# rels and revs
|
|
511
|
+
rels = process_uris(element, rel, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
|
|
512
|
+
revs = process_uris(element, rev, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
|
|
513
|
+
|
|
514
|
+
add_debug(element, "traverse, about: #{about.nil? ? 'nil' : about}, src: #{src.nil? ? 'nil' : src}, resource: #{resource.nil? ? 'nil' : resource}, href: #{href.nil? ? 'nil' : href}")
|
|
515
|
+
add_debug(element, "traverse, property: #{property.nil? ? 'nil' : property}, typeof: #{typeof.nil? ? 'nil' : typeof}, datatype: #{datatype.nil? ? 'nil' : datatype}, content: #{content.nil? ? 'nil' : content}")
|
|
516
|
+
add_debug(element, "traverse, rels: #{rels.join(" ")}, revs: #{revs.join(" ")}")
|
|
517
|
+
|
|
518
|
+
if !(rel || rev)
|
|
519
|
+
# Establishing a new subject if no rel/rev [7.5 Step 6]
|
|
520
|
+
# May not be valid, but can exist
|
|
521
|
+
if about
|
|
522
|
+
new_subject = process_uri(element, about, evaluation_context, :uri_mappings => uri_mappings)
|
|
523
|
+
elsif src
|
|
524
|
+
new_subject = process_uri(element, src, evaluation_context)
|
|
525
|
+
elsif resource
|
|
526
|
+
new_subject = process_uri(element, resource, evaluation_context, :uri_mappings => uri_mappings)
|
|
527
|
+
elsif href
|
|
528
|
+
new_subject = process_uri(element, href, evaluation_context)
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
# If no URI is provided by a resource attribute, then the first match from the following rules
|
|
532
|
+
# will apply:
|
|
533
|
+
# if @typeof is present, then new subject is set to be a newly created bnode.
|
|
534
|
+
# otherwise,
|
|
535
|
+
# if parent object is present, new subject is set to the value of parent object.
|
|
536
|
+
# Additionally, if @property is not present then the skip element flag is set to 'true';
|
|
537
|
+
if new_subject.nil?
|
|
538
|
+
if @host_language == :xhtml && element.name =~ /^(head|body)$/ && evaluation_context.base
|
|
539
|
+
# From XHTML+RDFa 1.1:
|
|
540
|
+
# if no URI is provided, then first check to see if the element is the head or body element.
|
|
541
|
+
# If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
|
|
542
|
+
new_subject = RDF::URI.new(evaluation_context.base)
|
|
543
|
+
elsif element.attributes['typeof']
|
|
544
|
+
new_subject = RDF::Node.new
|
|
545
|
+
else
|
|
546
|
+
# if it's null, it's null and nothing changes
|
|
547
|
+
new_subject = evaluation_context.parent_object
|
|
548
|
+
skip = true unless property
|
|
549
|
+
end
|
|
550
|
+
end
|
|
551
|
+
add_debug(element, "[Step 6] new_subject: #{new_subject}, skip = #{skip}")
|
|
552
|
+
else
|
|
553
|
+
# [7.5 Step 7]
|
|
554
|
+
# If the current element does contain a @rel or @rev attribute, then the next step is to
|
|
555
|
+
# establish both a value for new subject and a value for current object resource:
|
|
556
|
+
if about
|
|
557
|
+
new_subject = process_uri(element, about, evaluation_context, :uri_mappings => uri_mappings)
|
|
558
|
+
elsif src
|
|
559
|
+
new_subject = process_uri(element, src, evaluation_context, :uri_mappings => uri_mappings)
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
# If no URI is provided then the first match from the following rules will apply
|
|
563
|
+
if new_subject.nil?
|
|
564
|
+
if @host_language == :xhtml && element.name =~ /^(head|body)$/
|
|
565
|
+
# From XHTML+RDFa 1.1:
|
|
566
|
+
# if no URI is provided, then first check to see if the element is the head or body element.
|
|
567
|
+
# If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
|
|
568
|
+
new_subject = RDF::URI.new(evaluation_context.base)
|
|
569
|
+
elsif element.attributes['typeof']
|
|
570
|
+
new_subject = RDF::Node.new
|
|
571
|
+
else
|
|
572
|
+
# if it's null, it's null and nothing changes
|
|
573
|
+
new_subject = evaluation_context.parent_object
|
|
574
|
+
# no skip flag set this time
|
|
575
|
+
end
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
# Then the current object resource is set to the URI obtained from the first match from the following rules:
|
|
579
|
+
if resource
|
|
580
|
+
current_object_resource = process_uri(element, resource, evaluation_context, :uri_mappings => uri_mappings)
|
|
581
|
+
elsif href
|
|
582
|
+
current_object_resource = process_uri(element, href, evaluation_context)
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
add_debug(element, "[Step 7] new_subject: #{new_subject}, current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource}")
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
# Process @typeof if there is a subject [Step 8]
|
|
589
|
+
if new_subject and typeof
|
|
590
|
+
# Typeof is TERMorCURIEorURIs
|
|
591
|
+
types = process_uris(element, typeof, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
|
|
592
|
+
add_debug(element, "typeof: #{typeof}")
|
|
593
|
+
types.each do |one_type|
|
|
594
|
+
add_triple(element, new_subject, RDF_TYPE, one_type)
|
|
595
|
+
end
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
# Generate triples with given object [Step 9]
|
|
599
|
+
if current_object_resource
|
|
600
|
+
rels.each do |r|
|
|
601
|
+
add_triple(element, new_subject, r, current_object_resource)
|
|
602
|
+
end
|
|
603
|
+
|
|
604
|
+
revs.each do |r|
|
|
605
|
+
add_triple(element, current_object_resource, r, new_subject)
|
|
606
|
+
end
|
|
607
|
+
elsif rel || rev
|
|
608
|
+
# Incomplete triples and bnode creation [Step 10]
|
|
609
|
+
add_debug(element, "[Step 10] incompletes: rels: #{rels}, revs: #{revs}")
|
|
610
|
+
current_object_resource = RDF::Node.new
|
|
611
|
+
|
|
612
|
+
rels.each do |r|
|
|
613
|
+
incomplete_triples << {:predicate => r, :direction => :forward}
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
revs.each do |r|
|
|
617
|
+
incomplete_triples << {:predicate => r, :direction => :reverse}
|
|
618
|
+
end
|
|
619
|
+
end
|
|
620
|
+
|
|
621
|
+
# Establish current object literal [Step 11]
|
|
622
|
+
if property
|
|
623
|
+
properties = process_uris(element, property, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
|
|
624
|
+
|
|
625
|
+
# get the literal datatype
|
|
626
|
+
type = datatype
|
|
627
|
+
children_node_types = element.children.collect{|c| c.class}.uniq
|
|
628
|
+
|
|
629
|
+
# the following 3 IF clauses should be mutually exclusive. Written as is to prevent extensive indentation.
|
|
630
|
+
type_resource = process_uri(element, type, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary) if type
|
|
631
|
+
if type and !type.empty? and (type_resource.to_s != XML_LITERAL.to_s)
|
|
632
|
+
# typed literal
|
|
633
|
+
add_debug(element, "[Step 11] typed literal")
|
|
634
|
+
current_object_literal = RDF::Literal.new(content || element.inner_text, :datatype => type_resource, :language => language)
|
|
635
|
+
elsif content or (children_node_types == [Nokogiri::XML::Text]) or (element.children.length == 0) or (type == '')
|
|
636
|
+
# plain literal
|
|
637
|
+
add_debug(element, "[Step 11] plain literal")
|
|
638
|
+
current_object_literal = RDF::Literal.new(content || element.inner_text, :language => language)
|
|
639
|
+
elsif children_node_types != [Nokogiri::XML::Text] and (type == nil or type_resource.to_s == XML_LITERAL.to_s)
|
|
640
|
+
# XML Literal
|
|
641
|
+
add_debug(element, "[Step 11] XML Literal: #{element.inner_html}")
|
|
642
|
+
current_object_literal = RDF::Literal.new(element.inner_html, :datatype => XML_LITERAL, :language => language, :namespaces => uri_mappings)
|
|
643
|
+
recurse = false
|
|
644
|
+
end
|
|
645
|
+
|
|
646
|
+
# add each property
|
|
647
|
+
properties.each do |p|
|
|
648
|
+
add_triple(element, new_subject, p, current_object_literal)
|
|
649
|
+
end
|
|
650
|
+
# SPEC CONFUSION: "the triple has been created" ==> there may be more than one
|
|
651
|
+
# set the recurse flag above in the IF about xmlliteral, as it is the only place that can happen
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
if not skip and new_subject && !evaluation_context.incomplete_triples.empty?
|
|
655
|
+
# Complete the incomplete triples from the evaluation context [Step 12]
|
|
656
|
+
add_debug(element, "[Step 12] complete incomplete triples: new_subject=#{new_subject}, completes=#{evaluation_context.incomplete_triples.inspect}")
|
|
657
|
+
evaluation_context.incomplete_triples.each do |trip|
|
|
658
|
+
if trip[:direction] == :forward
|
|
659
|
+
add_triple(element, evaluation_context.parent_subject, trip[:predicate], new_subject)
|
|
660
|
+
elsif trip[:direction] == :reverse
|
|
661
|
+
add_triple(element, new_subject, trip[:predicate], evaluation_context.parent_subject)
|
|
662
|
+
end
|
|
663
|
+
end
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
# Create a new evaluation context and proceed recursively [Step 13]
|
|
667
|
+
if recurse
|
|
668
|
+
if skip
|
|
669
|
+
if language == evaluation_context.language &&
|
|
670
|
+
uri_mappings == evaluation_context.uri_mappings &&
|
|
671
|
+
term_mappings == evaluation_context.term_mappings &&
|
|
672
|
+
default_vocabulary == evaluation_context.default_vocabulary &&
|
|
673
|
+
new_ec = evaluation_context
|
|
674
|
+
add_debug(element, "[Step 13] skip: reused ec")
|
|
675
|
+
else
|
|
676
|
+
new_ec = evaluation_context.clone
|
|
677
|
+
new_ec.language = language
|
|
678
|
+
new_ec.uri_mappings = uri_mappings
|
|
679
|
+
new_ec.term_mappings = term_mappings
|
|
680
|
+
new_ec.default_vocabulary = default_vocabulary
|
|
681
|
+
add_debug(element, "[Step 13] skip: cloned ec")
|
|
682
|
+
end
|
|
683
|
+
else
|
|
684
|
+
# create a new evaluation context
|
|
685
|
+
new_ec = EvaluationContext.new(evaluation_context.base, @host_defaults)
|
|
686
|
+
new_ec.parent_subject = new_subject || evaluation_context.parent_subject
|
|
687
|
+
new_ec.parent_object = current_object_resource || new_subject || evaluation_context.parent_subject
|
|
688
|
+
new_ec.uri_mappings = uri_mappings
|
|
689
|
+
new_ec.incomplete_triples = incomplete_triples
|
|
690
|
+
new_ec.language = language
|
|
691
|
+
new_ec.term_mappings = term_mappings
|
|
692
|
+
new_ec.default_vocabulary = default_vocabulary
|
|
693
|
+
add_debug(element, "[Step 13] new ec")
|
|
694
|
+
end
|
|
695
|
+
|
|
696
|
+
element.children.each do |child|
|
|
697
|
+
# recurse only if it's an element
|
|
698
|
+
traverse(child, new_ec) if child.class == Nokogiri::XML::Element
|
|
699
|
+
end
|
|
700
|
+
end
|
|
701
|
+
end
|
|
702
|
+
|
|
703
|
+
# space-separated TERMorCURIEorURI
|
|
704
|
+
def process_uris(element, value, evaluation_context, options)
|
|
705
|
+
return [] if value.to_s.empty?
|
|
706
|
+
add_debug(element, "process_uris: #{value}")
|
|
707
|
+
value.to_s.split(/\s+/).map {|v| process_uri(element, v, evaluation_context, options)}.compact
|
|
708
|
+
end
|
|
709
|
+
|
|
710
|
+
def process_uri(element, value, evaluation_context, options = {})
|
|
711
|
+
#return if value.to_s.empty?
|
|
712
|
+
#add_debug(element, "process_uri: #{value}")
|
|
713
|
+
options = {:uri_mappings => {}}.merge(options)
|
|
714
|
+
if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/)
|
|
715
|
+
# SafeCURIEorCURIEorURI
|
|
716
|
+
# When the value is surrounded by square brackets, then the content within the brackets is
|
|
717
|
+
# evaluated as a CURIE according to the CURIE Syntax definition. If it is not a valid CURIE, the
|
|
718
|
+
# value must be ignored.
|
|
719
|
+
uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject)
|
|
720
|
+
add_debug(element, "process_uri: #{value} => safeCURIE => <#{uri}>")
|
|
721
|
+
uri
|
|
722
|
+
elsif options[:term_mappings] && NC_REGEXP.match(value.to_s)
|
|
723
|
+
# TERMorCURIEorURI
|
|
724
|
+
# If the value is an NCName, then it is evaluated as a term according to General Use of Terms in
|
|
725
|
+
# Attributes. Note that this step may mean that the value is to be ignored.
|
|
726
|
+
uri = process_term(value.to_s, options)
|
|
727
|
+
add_debug(element, "process_uri: #{value} => term => <#{uri}>")
|
|
728
|
+
uri
|
|
729
|
+
else
|
|
730
|
+
# SafeCURIEorCURIEorURI or TERMorCURIEorURI
|
|
731
|
+
# Otherwise, the value is evaluated as a CURIE.
|
|
732
|
+
# If it is a valid CURIE, the resulting URI is used; otherwise, the value will be processed as a URI.
|
|
733
|
+
uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject)
|
|
734
|
+
if uri
|
|
735
|
+
add_debug(element, "process_uri: #{value} => CURIE => <#{uri}>")
|
|
736
|
+
else
|
|
737
|
+
#FIXME: uri = URIRef.new(value, evaluation_context.base)
|
|
738
|
+
uri = RDF::URI.new(value)
|
|
739
|
+
add_debug(element, "process_uri: #{value} => URI => <#{uri}>")
|
|
740
|
+
end
|
|
741
|
+
uri
|
|
742
|
+
end
|
|
743
|
+
end
|
|
744
|
+
|
|
745
|
+
# [7.4.3] General Use of Terms in Attributes
|
|
746
|
+
#
|
|
747
|
+
# @param [String] term:: term
|
|
748
|
+
# @param [Hash] options:: Parser options, one of
|
|
749
|
+
# <em>options[:term_mappings]</em>:: Term mappings
|
|
750
|
+
# <em>options[:vocab]</em>:: Default vocabulary
|
|
751
|
+
def process_term(value, options)
|
|
752
|
+
case
|
|
753
|
+
when options[:term_mappings].is_a?(Hash) && options[:term_mappings].has_key?(value.to_s.downcase)
|
|
754
|
+
# If the term is in the local term mappings, use the associated URI.
|
|
755
|
+
# XXX Spec Confusion: are terms always downcased? Or only for XHTML Vocab?
|
|
756
|
+
options[:term_mappings][value.to_s.downcase]
|
|
757
|
+
when options[:vocab]
|
|
758
|
+
# Otherwise, if there is a local default vocabulary the URI is obtained by concatenating that value and the term.
|
|
759
|
+
options[:vocab] + value
|
|
760
|
+
else
|
|
761
|
+
# Finally, if there is no local default vocabulary, the term has no associated URI and must be ignored.
|
|
762
|
+
nil
|
|
763
|
+
end
|
|
764
|
+
end
|
|
765
|
+
|
|
766
|
+
# From section 6. CURIE Syntax Definition
|
|
767
|
+
def curie_to_resource_or_bnode(element, curie, uri_mappings, subject)
|
|
768
|
+
# URI mappings for CURIEs default to XH_MAPPING, rather than the default doc namespace
|
|
769
|
+
prefix, reference = curie.to_s.split(":")
|
|
770
|
+
|
|
771
|
+
# consider the bnode situation
|
|
772
|
+
if prefix == "_"
|
|
773
|
+
# we force a non-nil name, otherwise it generates a new name
|
|
774
|
+
# FIXME: BNode.new(reference || "", @named_bnodes)
|
|
775
|
+
RDF::Node.new(reference || nil)
|
|
776
|
+
elsif curie.to_s.match(/^:/)
|
|
777
|
+
# Default prefix
|
|
778
|
+
if uri_mappings[""]
|
|
779
|
+
uri_mappings[""].send("#{reference}_")
|
|
780
|
+
elsif @host_defaults[:prefix]
|
|
781
|
+
@host_defaults[:prefix].send("#{reference}_")
|
|
782
|
+
end
|
|
783
|
+
elsif !curie.to_s.match(/:/)
|
|
784
|
+
# No prefix, undefined (in this context, it is evaluated as a term elsewhere)
|
|
785
|
+
nil
|
|
786
|
+
else
|
|
787
|
+
# XXX Spec Confusion, are prefixes always downcased?
|
|
788
|
+
ns = uri_mappings[prefix.to_s.downcase]
|
|
789
|
+
if ns
|
|
790
|
+
ns + reference
|
|
791
|
+
else
|
|
792
|
+
add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix.downcase}")
|
|
793
|
+
nil
|
|
794
|
+
end
|
|
795
|
+
end
|
|
796
|
+
end
|
|
797
|
+
end
|
|
798
|
+
end
|