rdf-rdfa 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +2 -0
- data/README.rdoc +59 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/example.rb +27 -0
- data/lib/rdfa/format.rb +19 -0
- data/lib/rdfa/reader.rb +798 -0
- data/lib/rdfa/reader/exceptions.rb +14 -0
- data/lib/rdfa/reader/namespace.rb +72 -0
- data/lib/rdfa/reader/version.rb +23 -0
- data/spec/namespaces_spec.rb +112 -0
- data/spec/rdfa-triples/0001.nt +1 -0
- data/spec/rdfa-triples/0006.nt +2 -0
- data/spec/rdfa-triples/0007.nt +3 -0
- data/spec/rdfa-triples/0008.nt +1 -0
- data/spec/rdfa-triples/0009.nt +1 -0
- data/spec/rdfa-triples/0010.nt +2 -0
- data/spec/rdfa-triples/0011.nt +3 -0
- data/spec/rdfa-triples/0012.nt +1 -0
- data/spec/rdfa-triples/0013.nt +1 -0
- data/spec/rdfa-triples/0014.nt +1 -0
- data/spec/rdfa-triples/0015.nt +2 -0
- data/spec/rdfa-triples/0017.nt +3 -0
- data/spec/rdfa-triples/0018.nt +1 -0
- data/spec/rdfa-triples/0019.nt +1 -0
- data/spec/rdfa-triples/0020.nt +1 -0
- data/spec/rdfa-triples/0021.nt +1 -0
- data/spec/rdfa-triples/0023.nt +1 -0
- data/spec/rdfa-triples/0025.nt +2 -0
- data/spec/rdfa-triples/0026.nt +1 -0
- data/spec/rdfa-triples/0027.nt +1 -0
- data/spec/rdfa-triples/0029.nt +1 -0
- data/spec/rdfa-triples/0030.nt +1 -0
- data/spec/rdfa-triples/0031.nt +1 -0
- data/spec/rdfa-triples/0032.nt +1 -0
- data/spec/rdfa-triples/0033.nt +2 -0
- data/spec/rdfa-triples/0034.nt +1 -0
- data/spec/rdfa-triples/0035.nt +1 -0
- data/spec/rdfa-triples/0036.nt +1 -0
- data/spec/rdfa-triples/0037.nt +1 -0
- data/spec/rdfa-triples/0038.nt +1 -0
- data/spec/rdfa-triples/0039.nt +1 -0
- data/spec/rdfa-triples/0040.nt +1 -0
- data/spec/rdfa-triples/0041.nt +1 -0
- data/spec/rdfa-triples/0042.nt +0 -0
- data/spec/rdfa-triples/0046.nt +3 -0
- data/spec/rdfa-triples/0047.nt +3 -0
- data/spec/rdfa-triples/0048.nt +3 -0
- data/spec/rdfa-triples/0049.nt +2 -0
- data/spec/rdfa-triples/0050.nt +2 -0
- data/spec/rdfa-triples/0051.nt +2 -0
- data/spec/rdfa-triples/0052.nt +1 -0
- data/spec/rdfa-triples/0053.nt +2 -0
- data/spec/rdfa-triples/0054.nt +2 -0
- data/spec/rdfa-triples/0055.nt +2 -0
- data/spec/rdfa-triples/0056.nt +3 -0
- data/spec/rdfa-triples/0057.nt +4 -0
- data/spec/rdfa-triples/0058.nt +6 -0
- data/spec/rdfa-triples/0059.nt +6 -0
- data/spec/rdfa-triples/0060.nt +2 -0
- data/spec/rdfa-triples/0061.nt +1 -0
- data/spec/rdfa-triples/0062.nt +1 -0
- data/spec/rdfa-triples/0063.nt +1 -0
- data/spec/rdfa-triples/0064.nt +1 -0
- data/spec/rdfa-triples/0065.nt +3 -0
- data/spec/rdfa-triples/0066.nt +1 -0
- data/spec/rdfa-triples/0067.nt +1 -0
- data/spec/rdfa-triples/0068.nt +1 -0
- data/spec/rdfa-triples/0069.nt +1 -0
- data/spec/rdfa-triples/0070.nt +1 -0
- data/spec/rdfa-triples/0071.nt +1 -0
- data/spec/rdfa-triples/0072.nt +1 -0
- data/spec/rdfa-triples/0073.nt +1 -0
- data/spec/rdfa-triples/0074.nt +1 -0
- data/spec/rdfa-triples/0075.nt +1 -0
- data/spec/rdfa-triples/0076.nt +23 -0
- data/spec/rdfa-triples/0077.nt +23 -0
- data/spec/rdfa-triples/0078.nt +6 -0
- data/spec/rdfa-triples/0079.nt +3 -0
- data/spec/rdfa-triples/0080.nt +1 -0
- data/spec/rdfa-triples/0081.nt +6 -0
- data/spec/rdfa-triples/0082.nt +8 -0
- data/spec/rdfa-triples/0083.nt +6 -0
- data/spec/rdfa-triples/0084.nt +8 -0
- data/spec/rdfa-triples/0085.nt +4 -0
- data/spec/rdfa-triples/0086.nt +0 -0
- data/spec/rdfa-triples/0087.nt +23 -0
- data/spec/rdfa-triples/0088.nt +3 -0
- data/spec/rdfa-triples/0089.nt +1 -0
- data/spec/rdfa-triples/0090.nt +1 -0
- data/spec/rdfa-triples/0091.nt +3 -0
- data/spec/rdfa-triples/0092.nt +3 -0
- data/spec/rdfa-triples/0093.nt +2 -0
- data/spec/rdfa-triples/0094.nt +3 -0
- data/spec/rdfa-triples/0099.nt +1 -0
- data/spec/rdfa-triples/0100.nt +3 -0
- data/spec/rdfa-triples/0101.nt +3 -0
- data/spec/rdfa-triples/0102.nt +1 -0
- data/spec/rdfa-triples/0103.nt +1 -0
- data/spec/rdfa-triples/0104.nt +3 -0
- data/spec/rdfa-triples/0105.nt +1 -0
- data/spec/rdfa-triples/0106.nt +1 -0
- data/spec/rdfa-triples/0107.nt +0 -0
- data/spec/rdfa-triples/0108.nt +1 -0
- data/spec/rdfa-triples/0109.nt +1 -0
- data/spec/rdfa-triples/0110.nt +1 -0
- data/spec/rdfa-triples/0111.nt +2 -0
- data/spec/rdfa-triples/0112.nt +1 -0
- data/spec/rdfa-triples/0113.nt +2 -0
- data/spec/rdfa-triples/0114.nt +3 -0
- data/spec/rdfa-triples/0115.nt +4 -0
- data/spec/rdfa-triples/0116.nt +2 -0
- data/spec/rdfa-triples/0117.nt +2 -0
- data/spec/rdfa-triples/0118.nt +1 -0
- data/spec/rdfa-triples/0119.nt +1 -0
- data/spec/rdfa-triples/0120.nt +1 -0
- data/spec/rdfa-triples/0121.nt +2 -0
- data/spec/rdfa-triples/0122.nt +1 -0
- data/spec/rdfa-triples/0123.nt +3 -0
- data/spec/rdfa-triples/0124.nt +4 -0
- data/spec/rdfa-triples/0125.nt +1 -0
- data/spec/rdfa-triples/0126.nt +3 -0
- data/spec/rdfa-triples/1001.nt +6 -0
- data/spec/rdfa_helper.rb +188 -0
- data/spec/rdfa_parser_spec.rb +146 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +8 -0
- metadata +246 -0
data/History.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
= RDF::RDFa reader/writer
|
2
|
+
|
3
|
+
RDFa parser for RDF.rb.
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
RDF::RDFa is an RDFa parser for Ruby using the RDF.rb library suite.
|
8
|
+
|
9
|
+
== FEATURES:
|
10
|
+
RDF::RDFa parses RDFa into a Graph object.
|
11
|
+
|
12
|
+
* Fully compliant XHTML/RDFa 1.0 parser.
|
13
|
+
|
14
|
+
Install with 'gem install rdf-rdfa'
|
15
|
+
|
16
|
+
== Usage:
|
17
|
+
Instantiate a parser and parse source, specifying type and base-URL
|
18
|
+
|
19
|
+
use RDF::RDFa
|
20
|
+
p = Parser.new
|
21
|
+
graph = p.parse(input, "http://example.com")
|
22
|
+
|
23
|
+
== Resources:
|
24
|
+
* Distiller[http://kellogg-assoc/distiller]
|
25
|
+
* RDoc[http://rdoc.info/projects/gkellogg/rdf-rdfa]
|
26
|
+
* History[http://github.com/gkellogg/rdf-rdfa/blob/master/History.txt]
|
27
|
+
|
28
|
+
== LICENSE:
|
29
|
+
|
30
|
+
(The MIT License)
|
31
|
+
|
32
|
+
Copyright (c) 2009-2010 Gregg Kellogg
|
33
|
+
|
34
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
35
|
+
a copy of this software and associated documentation files (the
|
36
|
+
'Software'), to deal in the Software without restriction, including
|
37
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
38
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
39
|
+
permit persons to whom the Software is furnished to do so, subject to
|
40
|
+
the following conditions:
|
41
|
+
|
42
|
+
The above copyright notice and this permission notice shall be
|
43
|
+
included in all copies or substantial portions of the Software.
|
44
|
+
|
45
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
46
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
47
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
48
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
49
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
50
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
51
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
52
|
+
|
53
|
+
== FEEDBACK
|
54
|
+
|
55
|
+
* gregg@kellogg-assoc.com
|
56
|
+
* rubygems.org/rdf-rdfa
|
57
|
+
* github.com/gkellogg/rdf-rdfa
|
58
|
+
* gkellogg.lighthouseapp.com for bug reports
|
59
|
+
* public-rdf-ruby mailing list on w3.org
|
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
begin
|
4
|
+
gem 'jeweler'
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gemspec|
|
7
|
+
gemspec.name = "rdf-rdfa"
|
8
|
+
gemspec.summary = "RDFa parser for RDF.rb."
|
9
|
+
gemspec.description = <<-DESCRIPTION
|
10
|
+
RDF::RDFa is an RDFa parser for Ruby using the RDF.rb library suite.
|
11
|
+
DESCRIPTION
|
12
|
+
gemspec.email = "gregg@kellogg-assoc.com"
|
13
|
+
gemspec.homepage = "http://github.com/gkellogg/rdf-rdfa"
|
14
|
+
gemspec.authors = ["Gregg Kellogg", "Nicholas Humfrey"]
|
15
|
+
gemspec.add_dependency('nokogiri', '>= 1.3.3')
|
16
|
+
gemspec.add_dependency('rdf', '>= 0.1.6')
|
17
|
+
gemspec.add_development_dependency('rspec')
|
18
|
+
gemspec.add_development_dependency('activesupport', '>= 2.3.0')
|
19
|
+
gemspec.extra_rdoc_files = %w(README.rdoc History.txt)
|
20
|
+
end
|
21
|
+
Jeweler::GemcutterTasks.new
|
22
|
+
rescue LoadError
|
23
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'spec/rake/spectask'
|
27
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
28
|
+
spec.libs << 'lib' << 'spec'
|
29
|
+
spec.spec_files = FileList['spec/*_spec.rb']
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "Run specs through RCov"
|
33
|
+
Spec::Rake::SpecTask.new("spec:rcov") do |spec|
|
34
|
+
spec.libs << 'lib' << 'spec'
|
35
|
+
spec.pattern = 'spec/*_spec.rb'
|
36
|
+
spec.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
desc "Generate HTML report specs"
|
40
|
+
Spec::Rake::SpecTask.new("doc:spec") do |spec|
|
41
|
+
spec.libs << 'lib' << 'spec'
|
42
|
+
spec.spec_files = FileList['spec/*_spec.rb']
|
43
|
+
spec.spec_opts = ["--format", "html:doc/spec.html"]
|
44
|
+
end
|
45
|
+
|
46
|
+
task :default => :spec
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
data/example.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$:.unshift(File.join(File.dirname(__FILE__), 'lib'))
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'rdfa/reader'
|
7
|
+
|
8
|
+
data = <<-EOF;
|
9
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
10
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd">
|
11
|
+
<html xmlns="http://www.w3.org/1999/xhtml"
|
12
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/">
|
13
|
+
<head>
|
14
|
+
<title>Test 0001</title>
|
15
|
+
</head>
|
16
|
+
<body>
|
17
|
+
<p>This photo was taken by <span class="author" about="photo1.jpg" property="dc:creator">Mark Birbeck</span>.</p>
|
18
|
+
</body>
|
19
|
+
</html>
|
20
|
+
EOF
|
21
|
+
|
22
|
+
$DEBUG = false
|
23
|
+
|
24
|
+
reader = RDFa::Reader.new(data, :debug => false)
|
25
|
+
reader.each_statement do |statement|
|
26
|
+
statement.inspect!
|
27
|
+
end
|
data/lib/rdfa/format.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
module RDF::RDFa
|
2
|
+
##
|
3
|
+
# N-Triples format specification.
|
4
|
+
#
|
5
|
+
# @example Obtaining an NTriples format class
|
6
|
+
# RDF::Format.for(:ntriples) #=> RDF::NTriples::Format
|
7
|
+
# RDF::Format.for("etc/doap.nt")
|
8
|
+
# RDF::Format.for(:file_name => "etc/doap.nt")
|
9
|
+
# RDF::Format.for(:file_extension => "nt")
|
10
|
+
# RDF::Format.for(:content_type => "text/plain")
|
11
|
+
#
|
12
|
+
# @see http://www.w3.org/TR/rdf-testcases/#ntriples
|
13
|
+
class Format < RDF::Format
|
14
|
+
content_type 'text/html', :extension => :html
|
15
|
+
content_encoding 'ascii'
|
16
|
+
|
17
|
+
reader { RDFa::Reader }
|
18
|
+
end
|
19
|
+
end
|
data/lib/rdfa/reader.rb
ADDED
@@ -0,0 +1,798 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'rdf'
|
3
|
+
|
4
|
+
module RDF::RDFa
|
5
|
+
##
|
6
|
+
# An RDFa parser in Ruby
|
7
|
+
#
|
8
|
+
# Based on processing rules described here:
|
9
|
+
# file:///Users/gregg/Projects/rdf_context/RDFa%20Core%201.1.html#sequence
|
10
|
+
#
|
11
|
+
# Ben Adida
|
12
|
+
# 2008-05-07
|
13
|
+
# Gregg Kellogg
|
14
|
+
# 2009-08-04
|
15
|
+
class Reader < RDF::Reader
|
16
|
+
autoload :Namespace, 'rdfa/reader/namespace'
|
17
|
+
autoload :VERSION, 'rdfa/reader/version'
|
18
|
+
|
19
|
+
NC_REGEXP = Regexp.new(
|
20
|
+
%{^
|
21
|
+
(?!\\\\u0301) # ́ is a non-spacing acute accent.
|
22
|
+
# It is legal within an XML Name, but not as the first character.
|
23
|
+
( [a-zA-Z_]
|
24
|
+
| \\\\u[0-9a-fA-F]
|
25
|
+
)
|
26
|
+
( [0-9a-zA-Z_\.-]
|
27
|
+
| \\\\u([0-9a-fA-F]{4})
|
28
|
+
)*
|
29
|
+
$},
|
30
|
+
Regexp::EXTENDED)
|
31
|
+
|
32
|
+
#XML_LITERAL = Literal::Encoding.xmlliteral
|
33
|
+
XML_LITERAL = RDF['XMLLiteral']
|
34
|
+
|
35
|
+
|
36
|
+
# FIXME: use RDF::URI.qname instead
|
37
|
+
RDF_NS = Namespace.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf")
|
38
|
+
RDFA_NS = Namespace.new("http://www.w3.org/ns/rdfa#", "rdfa")
|
39
|
+
RDFS_NS = Namespace.new("http://www.w3.org/2000/01/rdf-schema#", "rdfs")
|
40
|
+
XHV_NS = Namespace.new("http://www.w3.org/1999/xhtml/vocab#", "xhv")
|
41
|
+
XML_NS = Namespace.new("http://www.w3.org/XML/1998/namespace", "xml")
|
42
|
+
XSD_NS = Namespace.new("http://www.w3.org/2001/XMLSchema#", "xsd")
|
43
|
+
XSI_NS = Namespace.new("http://www.w3.org/2001/XMLSchema-instance", "xsi")
|
44
|
+
XH_MAPPING = {"" => Namespace.new("http://www.w3.org/1999/xhtml/vocab\#", nil)}
|
45
|
+
|
46
|
+
|
47
|
+
require 'rdfa/format'
|
48
|
+
format RDFa::Format
|
49
|
+
|
50
|
+
attr_reader :debug
|
51
|
+
|
52
|
+
##
|
53
|
+
# @return [RDF::Graph]
|
54
|
+
attr_reader :graph
|
55
|
+
|
56
|
+
# Host language, One of:
|
57
|
+
# :xhtml_rdfa_1_0
|
58
|
+
# :xhtml_rdfa_1_1
|
59
|
+
attr_reader :host_language
|
60
|
+
|
61
|
+
# The Recursive Baggage
|
62
|
+
class EvaluationContext # :nodoc:
|
63
|
+
# The base. This will usually be the URL of the document being processed,
|
64
|
+
# but it could be some other URL, set by some other mechanism,
|
65
|
+
# such as the (X)HTML base element. The important thing is that it establishes
|
66
|
+
# a URL against which relative paths can be resolved.
|
67
|
+
attr :base, true
|
68
|
+
# The parent subject.
|
69
|
+
# The initial value will be the same as the initial value of base,
|
70
|
+
# but it will usually change during the course of processing.
|
71
|
+
attr :parent_subject, true
|
72
|
+
# The parent object.
|
73
|
+
# In some situations the object of a statement becomes the subject of any nested statements,
|
74
|
+
# and this property is used to convey this value.
|
75
|
+
# Note that this value may be a bnode, since in some situations a number of nested statements
|
76
|
+
# are grouped together on one bnode.
|
77
|
+
# This means that the bnode must be set in the containing statement and passed down,
|
78
|
+
# and this property is used to convey this value.
|
79
|
+
attr :parent_object, true
|
80
|
+
# A list of current, in-scope URI mappings.
|
81
|
+
attr :uri_mappings, true
|
82
|
+
# A list of incomplete triples. A triple can be incomplete when no object resource
|
83
|
+
# is provided alongside a predicate that requires a resource (i.e., @rel or @rev).
|
84
|
+
# The triples can be completed when a resource becomes available,
|
85
|
+
# which will be when the next subject is specified (part of the process called chaining).
|
86
|
+
attr :incomplete_triples, true
|
87
|
+
# The language. Note that there is no default language.
|
88
|
+
attr :language, true
|
89
|
+
# The term mappings, a list of terms and their associated URIs.
|
90
|
+
# This specification does not define an initial list.
|
91
|
+
# Host Languages may define an initial list.
|
92
|
+
# If a Host Language provides an initial list, it should do so via an RDFa Profile document.
|
93
|
+
attr :term_mappings, true
|
94
|
+
# The default vocabulary, a value to use as the prefix URI when a term is used.
|
95
|
+
# This specification does not define an initial setting for the default vocabulary.
|
96
|
+
# Host Languages may define an initial setting.
|
97
|
+
attr :default_vocabulary, true
|
98
|
+
|
99
|
+
def initialize(base, host_defaults)
|
100
|
+
# Initialize the evaluation context, [5.1]
|
101
|
+
@base = base
|
102
|
+
@parent_subject = @base
|
103
|
+
@parent_object = nil
|
104
|
+
@uri_mappings = {}
|
105
|
+
@incomplete_triples = []
|
106
|
+
@language = nil
|
107
|
+
@term_mappings = host_defaults.fetch(:term_mappings, {})
|
108
|
+
@default_voabulary = host_defaults.fetch(:voabulary, nil)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Copy this Evaluation Context
|
112
|
+
def initialize_copy(from)
|
113
|
+
# clone the evaluation context correctly
|
114
|
+
@uri_mappings = from.uri_mappings.clone
|
115
|
+
@incomplete_triples = from.incomplete_triples.clone
|
116
|
+
end
|
117
|
+
|
118
|
+
def inspect
|
119
|
+
v = %w(base parent_subject parent_object language default_vocabulary).map {|a| "#{a}='#{self.send(a).nil? ? '<nil>' : self.send(a)}'"}
|
120
|
+
v << "uri_mappings[#{uri_mappings.keys.length}]"
|
121
|
+
v << "incomplete_triples[#{incomplete_triples.length}]"
|
122
|
+
v << "term_mappings[#{term_mappings.keys.length}]"
|
123
|
+
v.join(",")
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
# Parse XHTML+RDFa document from a string or input stream to closure or graph.
|
130
|
+
#
|
131
|
+
# If the parser is called with a block, triples are passed to the block rather
|
132
|
+
# than added to the graph.
|
133
|
+
#
|
134
|
+
# Optionally, the stream may be a Nokogiri::HTML::Document or Nokogiri::XML::Document
|
135
|
+
# With a block, yeilds each statement with URI, BNode or Literal elements
|
136
|
+
#
|
137
|
+
# @param [IO] stream:: the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
|
138
|
+
# @param [String] uri:: the URI of the document
|
139
|
+
# @param [Hash] options:: Parser options, one of
|
140
|
+
# <em>options[:debug]</em>:: Array to place debug messages
|
141
|
+
# <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
|
142
|
+
# @return [Graph]:: Returns the graph containing parsed triples
|
143
|
+
# @raise [Error]:: Raises RdfError if _strict_
|
144
|
+
|
145
|
+
##
|
146
|
+
# Initializes the RDFa reader instance.
|
147
|
+
#
|
148
|
+
# @param [IO, File, String] input
|
149
|
+
# @param [Hash{Symbol => Object}] options
|
150
|
+
# @yield [reader]
|
151
|
+
# @yieldparam [Reader] reader
|
152
|
+
def initialize(input = $stdin, options = {}, &block)
|
153
|
+
super
|
154
|
+
|
155
|
+
@graph = RDF::Graph.new
|
156
|
+
@debug = options[:debug]
|
157
|
+
@strict = options[:strict]
|
158
|
+
@base_uri = options[:base_uri]
|
159
|
+
@base_uri = RDF::URI.parse(@base_uri) if @base_uri.is_a?(String)
|
160
|
+
@named_bnodes = {}
|
161
|
+
@@vocabulary_cache ||= {}
|
162
|
+
@nsbinding = {}
|
163
|
+
@uri_binding = {}
|
164
|
+
|
165
|
+
@doc = case input
|
166
|
+
when Nokogiri::HTML::Document then input
|
167
|
+
when Nokogiri::XML::Document then input
|
168
|
+
else Nokogiri::XML.parse(input, @base_uri.to_s)
|
169
|
+
end
|
170
|
+
|
171
|
+
raise ParserException, "Empty document" if @doc.nil? && @strict
|
172
|
+
@callback = block
|
173
|
+
|
174
|
+
# Determine host language
|
175
|
+
# XXX - right now only XHTML defined
|
176
|
+
@host_language = case @doc.root.attributes["version"].to_s
|
177
|
+
when /XHTML+RDFa/ then :xhtml
|
178
|
+
end
|
179
|
+
|
180
|
+
# If none found, assume xhtml
|
181
|
+
@host_language ||= :xhtml
|
182
|
+
|
183
|
+
@host_defaults = {}
|
184
|
+
@host_defaults = case @host_language
|
185
|
+
when :xhtml
|
186
|
+
bind(XHV_NS)
|
187
|
+
{
|
188
|
+
:vocabulary => XHV_NS.uri,
|
189
|
+
:prefix => XHV_NS,
|
190
|
+
:term_mappings => %w(
|
191
|
+
alternate appendix bookmark cite chapter contents copyright first glossary help icon index
|
192
|
+
last license meta next p3pv1 prev role section stylesheet subsection start top up
|
193
|
+
).inject({}) { |hash, term| hash[term] = XHV_NS.send("#{term}_"); hash },
|
194
|
+
}
|
195
|
+
else
|
196
|
+
{}
|
197
|
+
end
|
198
|
+
|
199
|
+
# parse
|
200
|
+
parse_whole_document(@doc, @base_uri)
|
201
|
+
|
202
|
+
block.call(self) if block_given?
|
203
|
+
end
|
204
|
+
|
205
|
+
|
206
|
+
##
|
207
|
+
# Iterates the given block for each RDF statement in the input.
|
208
|
+
#
|
209
|
+
# @yield [statement]
|
210
|
+
# @yieldparam [RDF::Statement] statement
|
211
|
+
# @return [void]
|
212
|
+
def each_statement(&block)
|
213
|
+
@graph.each_statement(&block)
|
214
|
+
end
|
215
|
+
|
216
|
+
##
|
217
|
+
# Iterates the given block for each RDF triple in the input.
|
218
|
+
#
|
219
|
+
# @yield [subject, predicate, object]
|
220
|
+
# @yieldparam [RDF::Resource] subject
|
221
|
+
# @yieldparam [RDF::URI] predicate
|
222
|
+
# @yieldparam [RDF::Value] object
|
223
|
+
# @return [void]
|
224
|
+
def each_triple(&block)
|
225
|
+
@graph.each_triple(&block)
|
226
|
+
end
|
227
|
+
|
228
|
+
# Bind namespace to store, returns bound namespace
|
229
|
+
def bind(namespace)
|
230
|
+
# Over-write an empty prefix
|
231
|
+
uri = namespace.uri.to_s
|
232
|
+
@uri_binding.delete(uri)
|
233
|
+
@nsbinding.delete_if {|prefix, ns| namespace.prefix == prefix}
|
234
|
+
|
235
|
+
@uri_binding[uri] = namespace
|
236
|
+
@nsbinding[namespace.prefix.to_s] = namespace
|
237
|
+
end
|
238
|
+
|
239
|
+
# Namespace for prefix
|
240
|
+
def namespace(prefix)
|
241
|
+
@nsbinding[prefix.to_s]
|
242
|
+
end
|
243
|
+
|
244
|
+
# Prefix for namespace
|
245
|
+
def prefix(namespace)
|
246
|
+
namespace.is_a?(Namespace) ? @uri_binding[namespace.uri.to_s].prefix : @uri_binding[namespace].prefix
|
247
|
+
end
|
248
|
+
|
249
|
+
private
|
250
|
+
|
251
|
+
# Figure out the document path, if it is a Nokogiri::XML::Element or Attribute
|
252
|
+
def node_path(node)
|
253
|
+
case node
|
254
|
+
when Nokogiri::XML::Element, Nokogiri::XML::Attr then "#{node_path(node.parent)}/#{node.name}"
|
255
|
+
when String then node
|
256
|
+
else ""
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
# Add debug event to debug array, if specified
|
261
|
+
#
|
262
|
+
# @param [XML Node, any] node:: XML Node or string for showing context
|
263
|
+
# @param [String] message::
|
264
|
+
def add_debug(node, message)
|
265
|
+
puts "#{node_path(node)}: #{message}" if $DEBUG
|
266
|
+
@debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
|
267
|
+
end
|
268
|
+
|
269
|
+
# add a triple, object can be literal or URI or bnode
|
270
|
+
#
|
271
|
+
# If the parser is called with a block, triples are passed to the block rather
|
272
|
+
# than added to the graph.
|
273
|
+
#
|
274
|
+
# @param [Nokogiri::XML::Node, any] node:: XML Node or string for showing context
|
275
|
+
# @param [URI, BNode] subject:: the subject of the triple
|
276
|
+
# @param [URI] predicate:: the predicate of the triple
|
277
|
+
# @param [URI, BNode, Literal] object:: the object of the triple
|
278
|
+
# @return [Array]:: An array of the triples (leaky abstraction? consider returning the graph instead)
|
279
|
+
# @raise [Error]:: Checks parameter types and raises if they are incorrect if parsing mode is _strict_.
|
280
|
+
def add_triple(node, subject, predicate, object)
|
281
|
+
triple = RDF::Statement.new(subject, predicate, object)
|
282
|
+
add_debug(node, "triple: #{triple}")
|
283
|
+
if @callback
|
284
|
+
@callback.call(triple) # Perform yield to saved block
|
285
|
+
else
|
286
|
+
@graph << triple
|
287
|
+
end
|
288
|
+
triple
|
289
|
+
# FIXME: rescue RdfException => e
|
290
|
+
rescue Exception => e
|
291
|
+
add_debug(node, "add_triple raised #{e.class}: #{e.message}")
|
292
|
+
puts e.backtrace if $DEBUG
|
293
|
+
raise if @strict
|
294
|
+
end
|
295
|
+
|
296
|
+
|
297
|
+
# Parsing an RDFa document (this is *not* the recursive method)
|
298
|
+
def parse_whole_document(doc, base)
|
299
|
+
# find if the document has a base element
|
300
|
+
# XXX - HTML specific
|
301
|
+
base_el = doc.css('html>head>base').first
|
302
|
+
if (base_el)
|
303
|
+
base = base_el.attributes['href']
|
304
|
+
# Strip any fragment from base
|
305
|
+
base = base.to_s.split("#").first
|
306
|
+
@base_uri = RDF::URI.new(base)
|
307
|
+
add_debug(base_el, "parse_whole_doc: base='#{base}'")
|
308
|
+
end
|
309
|
+
|
310
|
+
# initialize the evaluation context with the appropriate base
|
311
|
+
evaluation_context = EvaluationContext.new(base, @host_defaults)
|
312
|
+
|
313
|
+
traverse(doc.root, evaluation_context)
|
314
|
+
end
|
315
|
+
|
316
|
+
# Extract the XMLNS mappings from an element
|
317
|
+
def extract_mappings(element, uri_mappings, term_mappings)
|
318
|
+
# Process @profile
|
319
|
+
# Next the current element is parsed for any updates to the local term mappings and
|
320
|
+
# local list of URI mappings via @profile.
|
321
|
+
# If @profile is present, its value is processed as defined in RDFa Profiles.
|
322
|
+
element.attributes['profile'].to_s.split(/\s/).each do |profile|
|
323
|
+
# Don't try to open ourselves!
|
324
|
+
if @base_uri == profile
|
325
|
+
add_debug(element, "extract_mappings: skip recursive profile <#{profile}>")
|
326
|
+
@@vocabulary_cache[profile]
|
327
|
+
elsif @@vocabulary_cache.has_key?(profile)
|
328
|
+
add_debug(element, "extract_mappings: skip previously parsed profile <#{profile}>")
|
329
|
+
else
|
330
|
+
begin
|
331
|
+
add_debug(element, "extract_mappings: parse profile <#{profile}>")
|
332
|
+
@@vocabulary_cache[profile] = {
|
333
|
+
:uri_mappings => {},
|
334
|
+
:term_mappings => {}
|
335
|
+
}
|
336
|
+
um = @@vocabulary_cache[profile][:uri_mappings]
|
337
|
+
tm = @@vocabulary_cache[profile][:term_mappings]
|
338
|
+
add_debug(element, "extract_mappings: profile open <#{profile}>")
|
339
|
+
require 'patron' unless defined?(Patron)
|
340
|
+
sess = Patron::Session.new
|
341
|
+
sess.timeout = 10
|
342
|
+
resp = sess.get(profile)
|
343
|
+
raise RuntimeError, "HTTP returned status #{resp.status} when reading #{profile}" if resp.status >= 400
|
344
|
+
|
345
|
+
# Parse profile, and extract mappings from graph
|
346
|
+
old_debug, old_verbose, = $DEBUG, $verbose
|
347
|
+
$DEBUG, $verbose = false, false
|
348
|
+
p_graph = Parser.parse(resp.body, profile)
|
349
|
+
ttl = p_graph.serialize(:format => :ttl) if @debug || $DEBUG
|
350
|
+
$DEBUG, $verbose = old_debug, old_verbose
|
351
|
+
add_debug(element, ttl) if ttl
|
352
|
+
p_graph.subjects.each do |subject|
|
353
|
+
props = p_graph.properties(subject)
|
354
|
+
#puts props.inspect
|
355
|
+
|
356
|
+
# If one of the objects is not a Literal or if there are additional rdfa:uri or rdfa:term
|
357
|
+
# predicates sharing the same subject, no mapping is created.
|
358
|
+
uri = props[RDFA_NS.uri.to_s]
|
359
|
+
term = props[RDFA_NS.term_.to_s]
|
360
|
+
prefix = props[RDFA_NS.prefix_.to_s]
|
361
|
+
add_debug(element, "extract_mappings: uri=#{uri.inspect}, term=#{term.inspect}, prefix=#{prefix.inspect}")
|
362
|
+
|
363
|
+
next if !uri || (!term && !prefix)
|
364
|
+
raise ParserException, "multi-valued rdf:uri" if uri.length != 1
|
365
|
+
raise ParserException, "multi-valued rdf:term." if term && term.length != 1
|
366
|
+
raise ParserException, "multi-valued rdf:prefix" if prefix && prefix.length != 1
|
367
|
+
|
368
|
+
uri = uri.first
|
369
|
+
term = term.first if term
|
370
|
+
prefix = prefix.first if prefix
|
371
|
+
raise ParserException, "rdf:uri must be a Literal" unless uri.is_a?(Literal)
|
372
|
+
raise ParserException, "rdf:term must be a Literal" unless term.nil? || term.is_a?(Literal)
|
373
|
+
raise ParserException, "rdf:prefix must be a Literal" unless prefix.nil? || prefix.is_a?(Literal)
|
374
|
+
|
375
|
+
# For every extracted triple that is the common subject of an rdfa:prefix and an rdfa:uri
|
376
|
+
# predicate, create a mapping from the object literal of the rdfa:prefix predicate to the
|
377
|
+
# object literal of the rdfa:uri predicate. Add or update this mapping in the local list of
|
378
|
+
# URI mappings after transforming the 'prefix' component to lower-case.
|
379
|
+
# For every extracted
|
380
|
+
um[prefix.to_s.downcase] = bind(Namespace.new(uri.to_s, prefix.to_s.downcase)) if prefix
|
381
|
+
|
382
|
+
# triple that is the common subject of an rdfa:term and an rdfa:uri predicate, create a
|
383
|
+
# mapping from the object literal of the rdfa:term predicate to the object literal of the
|
384
|
+
# rdfa:uri predicate. Add or update this mapping in the local term mappings.
|
385
|
+
tm[term.to_s] = RDF::URI.new(uri.to_s) if term
|
386
|
+
end
|
387
|
+
rescue ParserException
|
388
|
+
add_debug(element, "extract_mappings: profile subject #{subject.to_s}: #{e.message}")
|
389
|
+
raise if @strict
|
390
|
+
rescue RuntimeError => e
|
391
|
+
add_debug(element, "extract_mappings: profile: #{e.message}")
|
392
|
+
raise if @strict
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
# Merge mappings from this vocabulary
|
397
|
+
uri_mappings.merge!(@@vocabulary_cache[profile][:uri_mappings])
|
398
|
+
term_mappings.merge!(@@vocabulary_cache[profile][:term_mappings])
|
399
|
+
end
|
400
|
+
|
401
|
+
# look for xmlns
|
402
|
+
# (note, this may be dependent on @host_language)
|
403
|
+
# Regardless of how the mapping is declared, the value to be mapped must be converted to lower case,
|
404
|
+
# and the URI is not processed in any way; in particular if it is a relative path it is
|
405
|
+
# not resolved against the current base.
|
406
|
+
element.namespaces.each do |attr_name, attr_value|
|
407
|
+
begin
|
408
|
+
abbr, prefix = attr_name.split(":")
|
409
|
+
uri_mappings[prefix.to_s.downcase] = bind(Namespace.new(attr_value, prefix.to_s.downcase)) if abbr.downcase == "xmlns" && prefix
|
410
|
+
# FIXME: rescue RdfException => e
|
411
|
+
rescue Exception => e
|
412
|
+
add_debug(element, "extract_mappings raised #{e.class}: #{e.message}")
|
413
|
+
raise if @strict
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
# Set mappings from @prefix
|
418
|
+
# prefix is a whitespace separated list of prefix-name URI pairs of the form
|
419
|
+
# NCName ':' ' '+ xs:anyURI
|
420
|
+
# SPEC Confusion: prefix is forced to lower-case in @profile, but not specified here.
|
421
|
+
mappings = element.attributes["prefix"].to_s.split(/\s+/)
|
422
|
+
while mappings.length > 0 do
|
423
|
+
prefix, uri = mappings.shift.downcase, mappings.shift
|
424
|
+
#puts "uri_mappings prefix #{prefix} <#{uri}>"
|
425
|
+
next unless prefix.match(/:$/)
|
426
|
+
prefix.chop!
|
427
|
+
|
428
|
+
uri_mappings[prefix] = bind(Namespace.new(uri, prefix))
|
429
|
+
end
|
430
|
+
|
431
|
+
add_debug(element, "uri_mappings: #{uri_mappings.values.map{|ns|ns.to_s}.join(", ")}")
|
432
|
+
add_debug(element, "term_mappings: #{term_mappings.keys.join(", ")}")
|
433
|
+
end
|
434
|
+
|
435
|
+
# The recursive helper function
|
436
|
+
def traverse(element, evaluation_context)
|
437
|
+
if element.nil?
|
438
|
+
add_debug(element, "traverse nil element")
|
439
|
+
raise ParserException, "Can't parse nil element" if @strict
|
440
|
+
return nil
|
441
|
+
end
|
442
|
+
|
443
|
+
add_debug(element, "traverse, ec: #{evaluation_context.inspect}")
|
444
|
+
|
445
|
+
# local variables [5.5 Step 1]
|
446
|
+
recurse = true
|
447
|
+
skip = false
|
448
|
+
new_subject = nil
|
449
|
+
current_object_resource = nil
|
450
|
+
uri_mappings = evaluation_context.uri_mappings.clone
|
451
|
+
incomplete_triples = []
|
452
|
+
language = evaluation_context.language
|
453
|
+
term_mappings = evaluation_context.term_mappings.clone
|
454
|
+
default_vocabulary = evaluation_context.default_vocabulary
|
455
|
+
|
456
|
+
current_object_literal = nil # XXX Not explicit
|
457
|
+
|
458
|
+
# shortcut
|
459
|
+
attrs = element.attributes
|
460
|
+
|
461
|
+
about = attrs['about']
|
462
|
+
src = attrs['src']
|
463
|
+
resource = attrs['resource']
|
464
|
+
href = attrs['href']
|
465
|
+
vocab = attrs['vocab']
|
466
|
+
|
467
|
+
# Pull out the attributes needed for the skip test.
|
468
|
+
property = attrs['property'].to_s if attrs['property']
|
469
|
+
typeof = attrs['typeof'].to_s if attrs['typeof']
|
470
|
+
datatype = attrs['datatype'].to_s if attrs['datatype']
|
471
|
+
content = attrs['content'].to_s if attrs['content']
|
472
|
+
rel = attrs['rel'].to_s if attrs['rel']
|
473
|
+
rev = attrs['rev'].to_s if attrs['rev']
|
474
|
+
|
475
|
+
# Default vocabulary [7.5 Step 2]
|
476
|
+
# First the current element is examined for any change to the default vocabulary via @vocab.
|
477
|
+
# If @vocab is present and contains a value, its value updates the local default vocabulary.
|
478
|
+
# If the value is empty, then the local default vocabulary must be reset to the Host Language defined default.
|
479
|
+
unless vocab.nil?
|
480
|
+
default_vocabulary = if vocab.to_s.empty?
|
481
|
+
# Set default_vocabulary to host language default
|
482
|
+
@host_defaults.fetch(:voabulary, nil)
|
483
|
+
else
|
484
|
+
vocab.to_s
|
485
|
+
end
|
486
|
+
add_debug(element, "[Step 2] traverse, default_vocaulary: #{default_vocabulary.inspect}")
|
487
|
+
end
|
488
|
+
|
489
|
+
# Local term mappings [7.5 Steps 3 & 4]
|
490
|
+
# Next the current element is parsed for any updates to the local term mappings and local list of URI mappings via @profile.
|
491
|
+
# If @profile is present, its value is processed as defined in RDFa Profiles.
|
492
|
+
extract_mappings(element, uri_mappings, term_mappings)
|
493
|
+
|
494
|
+
# Language information [7.5 Step 5]
|
495
|
+
# From HTML5 [3.2.3.3]
|
496
|
+
# If both the lang attribute in no namespace and the lang attribute in the XML namespace are set
|
497
|
+
# on an element, user agents must use the lang attribute in the XML namespace, and the lang
|
498
|
+
# attribute in no namespace must be ignored for the purposes of determining the element's
|
499
|
+
# language.
|
500
|
+
language = case
|
501
|
+
when element.at_xpath("@xml:lang", "xml" => XML_NS.uri.to_s)
|
502
|
+
element.at_xpath("@xml:lang", "xml" => XML_NS.uri.to_s).to_s
|
503
|
+
when element.at_xpath("lang")
|
504
|
+
element.at_xpath("lang").to_s
|
505
|
+
else
|
506
|
+
language
|
507
|
+
end
|
508
|
+
add_debug(element, "HTML5 [3.2.3.3] traverse, lang: #{language}") if attrs['lang']
|
509
|
+
|
510
|
+
# rels and revs
|
511
|
+
rels = process_uris(element, rel, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
|
512
|
+
revs = process_uris(element, rev, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
|
513
|
+
|
514
|
+
add_debug(element, "traverse, about: #{about.nil? ? 'nil' : about}, src: #{src.nil? ? 'nil' : src}, resource: #{resource.nil? ? 'nil' : resource}, href: #{href.nil? ? 'nil' : href}")
|
515
|
+
add_debug(element, "traverse, property: #{property.nil? ? 'nil' : property}, typeof: #{typeof.nil? ? 'nil' : typeof}, datatype: #{datatype.nil? ? 'nil' : datatype}, content: #{content.nil? ? 'nil' : content}")
|
516
|
+
add_debug(element, "traverse, rels: #{rels.join(" ")}, revs: #{revs.join(" ")}")
|
517
|
+
|
518
|
+
if !(rel || rev)
|
519
|
+
# Establishing a new subject if no rel/rev [7.5 Step 6]
|
520
|
+
# May not be valid, but can exist
|
521
|
+
if about
|
522
|
+
new_subject = process_uri(element, about, evaluation_context, :uri_mappings => uri_mappings)
|
523
|
+
elsif src
|
524
|
+
new_subject = process_uri(element, src, evaluation_context)
|
525
|
+
elsif resource
|
526
|
+
new_subject = process_uri(element, resource, evaluation_context, :uri_mappings => uri_mappings)
|
527
|
+
elsif href
|
528
|
+
new_subject = process_uri(element, href, evaluation_context)
|
529
|
+
end
|
530
|
+
|
531
|
+
# If no URI is provided by a resource attribute, then the first match from the following rules
|
532
|
+
# will apply:
|
533
|
+
# if @typeof is present, then new subject is set to be a newly created bnode.
|
534
|
+
# otherwise,
|
535
|
+
# if parent object is present, new subject is set to the value of parent object.
|
536
|
+
# Additionally, if @property is not present then the skip element flag is set to 'true';
|
537
|
+
if new_subject.nil?
|
538
|
+
if @host_language == :xhtml && element.name =~ /^(head|body)$/ && evaluation_context.base
|
539
|
+
# From XHTML+RDFa 1.1:
|
540
|
+
# if no URI is provided, then first check to see if the element is the head or body element.
|
541
|
+
# If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
|
542
|
+
new_subject = RDF::URI.new(evaluation_context.base)
|
543
|
+
elsif element.attributes['typeof']
|
544
|
+
new_subject = RDF::Node.new
|
545
|
+
else
|
546
|
+
# if it's null, it's null and nothing changes
|
547
|
+
new_subject = evaluation_context.parent_object
|
548
|
+
skip = true unless property
|
549
|
+
end
|
550
|
+
end
|
551
|
+
add_debug(element, "[Step 6] new_subject: #{new_subject}, skip = #{skip}")
|
552
|
+
else
|
553
|
+
# [7.5 Step 7]
|
554
|
+
# If the current element does contain a @rel or @rev attribute, then the next step is to
|
555
|
+
# establish both a value for new subject and a value for current object resource:
|
556
|
+
if about
|
557
|
+
new_subject = process_uri(element, about, evaluation_context, :uri_mappings => uri_mappings)
|
558
|
+
elsif src
|
559
|
+
new_subject = process_uri(element, src, evaluation_context, :uri_mappings => uri_mappings)
|
560
|
+
end
|
561
|
+
|
562
|
+
# If no URI is provided then the first match from the following rules will apply
|
563
|
+
if new_subject.nil?
|
564
|
+
if @host_language == :xhtml && element.name =~ /^(head|body)$/
|
565
|
+
# From XHTML+RDFa 1.1:
|
566
|
+
# if no URI is provided, then first check to see if the element is the head or body element.
|
567
|
+
# If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
|
568
|
+
new_subject = RDF::URI.new(evaluation_context.base)
|
569
|
+
elsif element.attributes['typeof']
|
570
|
+
new_subject = RDF::Node.new
|
571
|
+
else
|
572
|
+
# if it's null, it's null and nothing changes
|
573
|
+
new_subject = evaluation_context.parent_object
|
574
|
+
# no skip flag set this time
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
# Then the current object resource is set to the URI obtained from the first match from the following rules:
|
579
|
+
if resource
|
580
|
+
current_object_resource = process_uri(element, resource, evaluation_context, :uri_mappings => uri_mappings)
|
581
|
+
elsif href
|
582
|
+
current_object_resource = process_uri(element, href, evaluation_context)
|
583
|
+
end
|
584
|
+
|
585
|
+
add_debug(element, "[Step 7] new_subject: #{new_subject}, current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource}")
|
586
|
+
end
|
587
|
+
|
588
|
+
# Process @typeof if there is a subject [Step 8]
|
589
|
+
if new_subject and typeof
|
590
|
+
# Typeof is TERMorCURIEorURIs
|
591
|
+
types = process_uris(element, typeof, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
|
592
|
+
add_debug(element, "typeof: #{typeof}")
|
593
|
+
types.each do |one_type|
|
594
|
+
add_triple(element, new_subject, RDF_TYPE, one_type)
|
595
|
+
end
|
596
|
+
end
|
597
|
+
|
598
|
+
# Generate triples with given object [Step 9]
|
599
|
+
if current_object_resource
|
600
|
+
rels.each do |r|
|
601
|
+
add_triple(element, new_subject, r, current_object_resource)
|
602
|
+
end
|
603
|
+
|
604
|
+
revs.each do |r|
|
605
|
+
add_triple(element, current_object_resource, r, new_subject)
|
606
|
+
end
|
607
|
+
elsif rel || rev
|
608
|
+
# Incomplete triples and bnode creation [Step 10]
|
609
|
+
add_debug(element, "[Step 10] incompletes: rels: #{rels}, revs: #{revs}")
|
610
|
+
current_object_resource = RDF::Node.new
|
611
|
+
|
612
|
+
rels.each do |r|
|
613
|
+
incomplete_triples << {:predicate => r, :direction => :forward}
|
614
|
+
end
|
615
|
+
|
616
|
+
revs.each do |r|
|
617
|
+
incomplete_triples << {:predicate => r, :direction => :reverse}
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
# Establish current object literal [Step 11]
|
622
|
+
if property
|
623
|
+
properties = process_uris(element, property, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
|
624
|
+
|
625
|
+
# get the literal datatype
|
626
|
+
type = datatype
|
627
|
+
children_node_types = element.children.collect{|c| c.class}.uniq
|
628
|
+
|
629
|
+
# the following 3 IF clauses should be mutually exclusive. Written as is to prevent extensive indentation.
|
630
|
+
type_resource = process_uri(element, type, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary) if type
|
631
|
+
if type and !type.empty? and (type_resource.to_s != XML_LITERAL.to_s)
|
632
|
+
# typed literal
|
633
|
+
add_debug(element, "[Step 11] typed literal")
|
634
|
+
current_object_literal = RDF::Literal.new(content || element.inner_text, :datatype => type_resource, :language => language)
|
635
|
+
elsif content or (children_node_types == [Nokogiri::XML::Text]) or (element.children.length == 0) or (type == '')
|
636
|
+
# plain literal
|
637
|
+
add_debug(element, "[Step 11] plain literal")
|
638
|
+
current_object_literal = RDF::Literal.new(content || element.inner_text, :language => language)
|
639
|
+
elsif children_node_types != [Nokogiri::XML::Text] and (type == nil or type_resource.to_s == XML_LITERAL.to_s)
|
640
|
+
# XML Literal
|
641
|
+
add_debug(element, "[Step 11] XML Literal: #{element.inner_html}")
|
642
|
+
current_object_literal = RDF::Literal.new(element.inner_html, :datatype => XML_LITERAL, :language => language, :namespaces => uri_mappings)
|
643
|
+
recurse = false
|
644
|
+
end
|
645
|
+
|
646
|
+
# add each property
|
647
|
+
properties.each do |p|
|
648
|
+
add_triple(element, new_subject, p, current_object_literal)
|
649
|
+
end
|
650
|
+
# SPEC CONFUSION: "the triple has been created" ==> there may be more than one
|
651
|
+
# set the recurse flag above in the IF about xmlliteral, as it is the only place that can happen
|
652
|
+
end
|
653
|
+
|
654
|
+
if not skip and new_subject && !evaluation_context.incomplete_triples.empty?
|
655
|
+
# Complete the incomplete triples from the evaluation context [Step 12]
|
656
|
+
add_debug(element, "[Step 12] complete incomplete triples: new_subject=#{new_subject}, completes=#{evaluation_context.incomplete_triples.inspect}")
|
657
|
+
evaluation_context.incomplete_triples.each do |trip|
|
658
|
+
if trip[:direction] == :forward
|
659
|
+
add_triple(element, evaluation_context.parent_subject, trip[:predicate], new_subject)
|
660
|
+
elsif trip[:direction] == :reverse
|
661
|
+
add_triple(element, new_subject, trip[:predicate], evaluation_context.parent_subject)
|
662
|
+
end
|
663
|
+
end
|
664
|
+
end
|
665
|
+
|
666
|
+
# Create a new evaluation context and proceed recursively [Step 13]
|
667
|
+
if recurse
|
668
|
+
if skip
|
669
|
+
if language == evaluation_context.language &&
|
670
|
+
uri_mappings == evaluation_context.uri_mappings &&
|
671
|
+
term_mappings == evaluation_context.term_mappings &&
|
672
|
+
default_vocabulary == evaluation_context.default_vocabulary &&
|
673
|
+
new_ec = evaluation_context
|
674
|
+
add_debug(element, "[Step 13] skip: reused ec")
|
675
|
+
else
|
676
|
+
new_ec = evaluation_context.clone
|
677
|
+
new_ec.language = language
|
678
|
+
new_ec.uri_mappings = uri_mappings
|
679
|
+
new_ec.term_mappings = term_mappings
|
680
|
+
new_ec.default_vocabulary = default_vocabulary
|
681
|
+
add_debug(element, "[Step 13] skip: cloned ec")
|
682
|
+
end
|
683
|
+
else
|
684
|
+
# create a new evaluation context
|
685
|
+
new_ec = EvaluationContext.new(evaluation_context.base, @host_defaults)
|
686
|
+
new_ec.parent_subject = new_subject || evaluation_context.parent_subject
|
687
|
+
new_ec.parent_object = current_object_resource || new_subject || evaluation_context.parent_subject
|
688
|
+
new_ec.uri_mappings = uri_mappings
|
689
|
+
new_ec.incomplete_triples = incomplete_triples
|
690
|
+
new_ec.language = language
|
691
|
+
new_ec.term_mappings = term_mappings
|
692
|
+
new_ec.default_vocabulary = default_vocabulary
|
693
|
+
add_debug(element, "[Step 13] new ec")
|
694
|
+
end
|
695
|
+
|
696
|
+
element.children.each do |child|
|
697
|
+
# recurse only if it's an element
|
698
|
+
traverse(child, new_ec) if child.class == Nokogiri::XML::Element
|
699
|
+
end
|
700
|
+
end
|
701
|
+
end
|
702
|
+
|
703
|
+
# space-separated TERMorCURIEorURI
|
704
|
+
def process_uris(element, value, evaluation_context, options)
|
705
|
+
return [] if value.to_s.empty?
|
706
|
+
add_debug(element, "process_uris: #{value}")
|
707
|
+
value.to_s.split(/\s+/).map {|v| process_uri(element, v, evaluation_context, options)}.compact
|
708
|
+
end
|
709
|
+
|
710
|
+
def process_uri(element, value, evaluation_context, options = {})
|
711
|
+
#return if value.to_s.empty?
|
712
|
+
#add_debug(element, "process_uri: #{value}")
|
713
|
+
options = {:uri_mappings => {}}.merge(options)
|
714
|
+
if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/)
|
715
|
+
# SafeCURIEorCURIEorURI
|
716
|
+
# When the value is surrounded by square brackets, then the content within the brackets is
|
717
|
+
# evaluated as a CURIE according to the CURIE Syntax definition. If it is not a valid CURIE, the
|
718
|
+
# value must be ignored.
|
719
|
+
uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject)
|
720
|
+
add_debug(element, "process_uri: #{value} => safeCURIE => <#{uri}>")
|
721
|
+
uri
|
722
|
+
elsif options[:term_mappings] && NC_REGEXP.match(value.to_s)
|
723
|
+
# TERMorCURIEorURI
|
724
|
+
# If the value is an NCName, then it is evaluated as a term according to General Use of Terms in
|
725
|
+
# Attributes. Note that this step may mean that the value is to be ignored.
|
726
|
+
uri = process_term(value.to_s, options)
|
727
|
+
add_debug(element, "process_uri: #{value} => term => <#{uri}>")
|
728
|
+
uri
|
729
|
+
else
|
730
|
+
# SafeCURIEorCURIEorURI or TERMorCURIEorURI
|
731
|
+
# Otherwise, the value is evaluated as a CURIE.
|
732
|
+
# If it is a valid CURIE, the resulting URI is used; otherwise, the value will be processed as a URI.
|
733
|
+
uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject)
|
734
|
+
if uri
|
735
|
+
add_debug(element, "process_uri: #{value} => CURIE => <#{uri}>")
|
736
|
+
else
|
737
|
+
#FIXME: uri = URIRef.new(value, evaluation_context.base)
|
738
|
+
uri = RDF::URI.new(value)
|
739
|
+
add_debug(element, "process_uri: #{value} => URI => <#{uri}>")
|
740
|
+
end
|
741
|
+
uri
|
742
|
+
end
|
743
|
+
end
|
744
|
+
|
745
|
+
# [7.4.3] General Use of Terms in Attributes
|
746
|
+
#
|
747
|
+
# @param [String] term:: term
|
748
|
+
# @param [Hash] options:: Parser options, one of
|
749
|
+
# <em>options[:term_mappings]</em>:: Term mappings
|
750
|
+
# <em>options[:vocab]</em>:: Default vocabulary
|
751
|
+
def process_term(value, options)
|
752
|
+
case
|
753
|
+
when options[:term_mappings].is_a?(Hash) && options[:term_mappings].has_key?(value.to_s.downcase)
|
754
|
+
# If the term is in the local term mappings, use the associated URI.
|
755
|
+
# XXX Spec Confusion: are terms always downcased? Or only for XHTML Vocab?
|
756
|
+
options[:term_mappings][value.to_s.downcase]
|
757
|
+
when options[:vocab]
|
758
|
+
# Otherwise, if there is a local default vocabulary the URI is obtained by concatenating that value and the term.
|
759
|
+
options[:vocab] + value
|
760
|
+
else
|
761
|
+
# Finally, if there is no local default vocabulary, the term has no associated URI and must be ignored.
|
762
|
+
nil
|
763
|
+
end
|
764
|
+
end
|
765
|
+
|
766
|
+
# From section 6. CURIE Syntax Definition
|
767
|
+
def curie_to_resource_or_bnode(element, curie, uri_mappings, subject)
|
768
|
+
# URI mappings for CURIEs default to XH_MAPPING, rather than the default doc namespace
|
769
|
+
prefix, reference = curie.to_s.split(":")
|
770
|
+
|
771
|
+
# consider the bnode situation
|
772
|
+
if prefix == "_"
|
773
|
+
# we force a non-nil name, otherwise it generates a new name
|
774
|
+
# FIXME: BNode.new(reference || "", @named_bnodes)
|
775
|
+
RDF::Node.new(reference || nil)
|
776
|
+
elsif curie.to_s.match(/^:/)
|
777
|
+
# Default prefix
|
778
|
+
if uri_mappings[""]
|
779
|
+
uri_mappings[""].send("#{reference}_")
|
780
|
+
elsif @host_defaults[:prefix]
|
781
|
+
@host_defaults[:prefix].send("#{reference}_")
|
782
|
+
end
|
783
|
+
elsif !curie.to_s.match(/:/)
|
784
|
+
# No prefix, undefined (in this context, it is evaluated as a term elsewhere)
|
785
|
+
nil
|
786
|
+
else
|
787
|
+
# XXX Spec Confusion, are prefixes always downcased?
|
788
|
+
ns = uri_mappings[prefix.to_s.downcase]
|
789
|
+
if ns
|
790
|
+
ns + reference
|
791
|
+
else
|
792
|
+
add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix.downcase}")
|
793
|
+
nil
|
794
|
+
end
|
795
|
+
end
|
796
|
+
end
|
797
|
+
end
|
798
|
+
end
|