the-experimenters-rdf-rdfxml 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +13 -0
- data/AUTHORS +1 -0
- data/CONTRIBUTORS +1 -0
- data/History.rdoc +100 -0
- data/README +95 -0
- data/README.md +95 -0
- data/Rakefile +59 -0
- data/UNLICENSE +24 -0
- data/VERSION +1 -0
- data/etc/doap.nt +47 -0
- data/etc/doap.xml +73 -0
- data/example.rb +37 -0
- data/lib/rdf/rdfxml.rb +50 -0
- data/lib/rdf/rdfxml/format.rb +43 -0
- data/lib/rdf/rdfxml/patches/array_hacks.rb +53 -0
- data/lib/rdf/rdfxml/patches/graph_properties.rb +34 -0
- data/lib/rdf/rdfxml/patches/literal_hacks.rb +156 -0
- data/lib/rdf/rdfxml/patches/nokogiri_hacks.rb +16 -0
- data/lib/rdf/rdfxml/reader.rb +646 -0
- data/lib/rdf/rdfxml/version.rb +18 -0
- data/lib/rdf/rdfxml/vocab.rb +3 -0
- data/lib/rdf/rdfxml/writer.rb +559 -0
- data/rdf-rdfxml.gemspec +109 -0
- data/script/console +10 -0
- data/script/parse +55 -0
- data/script/tc +50 -0
- data/script/yard-to-rubyforge +2 -0
- data/spec/.gitignore +1 -0
- data/spec/format_spec.rb +28 -0
- data/spec/graph_spec.rb +59 -0
- data/spec/literal_spec.rb +244 -0
- data/spec/matchers.rb +79 -0
- data/spec/rdf_test.rb +69 -0
- data/spec/reader_spec.rb +361 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +53 -0
- data/spec/writer_spec.rb +714 -0
- metadata +190 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
class Nokogiri::XML::Node
|
3
|
+
# URI of namespace + node_name
|
4
|
+
def uri
|
5
|
+
ns = self.namespace ? self.namespace.href : RDF::XML.to_s
|
6
|
+
RDF::URI.intern(ns + self.node_name)
|
7
|
+
end
|
8
|
+
|
9
|
+
def display_path
|
10
|
+
@display_path ||= case self
|
11
|
+
when Nokogiri::XML::Document then ""
|
12
|
+
when Nokogiri::XML::Element then parent ? "#{parent.display_path}/#{name}" : name
|
13
|
+
when Nokogiri::XML::Attr then "#{parent.display_path}@#{name}"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,646 @@
|
|
1
|
+
require 'nokogiri' # FIXME: Implement using different modules as in RDF::TriX
|
2
|
+
|
3
|
+
module RDF::RDFXML
|
4
|
+
##
|
5
|
+
# An RDF/XML parser in Ruby
|
6
|
+
#
|
7
|
+
# Based on RDF/XML Syntax Specification: http://www.w3.org/TR/REC-rdf-syntax/
|
8
|
+
#
|
9
|
+
# @author [Gregg Kellogg](http://kellogg-assoc.com/)
|
10
|
+
class Reader < RDF::Reader
|
11
|
+
format Format
|
12
|
+
|
13
|
+
CORE_SYNTAX_TERMS = %w(RDF ID about parseType resource nodeID datatype).map {|n| "http://www.w3.org/1999/02/22-rdf-syntax-ns##{n}"}
|
14
|
+
OLD_TERMS = %w(aboutEach aboutEachPrefix bagID).map {|n| "http://www.w3.org/1999/02/22-rdf-syntax-ns##{n}"}
|
15
|
+
|
16
|
+
# The Recursive Baggage
|
17
|
+
class EvaluationContext # :nodoc:
|
18
|
+
attr_reader :base
|
19
|
+
attr :subject, true
|
20
|
+
attr :uri_mappings, true
|
21
|
+
attr :language, true
|
22
|
+
attr :graph, true
|
23
|
+
attr :li_counter, true
|
24
|
+
|
25
|
+
def initialize(base, element, graph, &cb)
|
26
|
+
# Initialize the evaluation context, [5.1]
|
27
|
+
self.base = RDF::URI.intern(base)
|
28
|
+
@uri_mappings = {}
|
29
|
+
@language = nil
|
30
|
+
@graph = graph
|
31
|
+
@li_counter = 0
|
32
|
+
|
33
|
+
extract_from_element(element, &cb) if element
|
34
|
+
end
|
35
|
+
|
36
|
+
# Clone existing evaluation context adding information from element
|
37
|
+
def clone(element, options = {}, &cb)
|
38
|
+
new_ec = EvaluationContext.new(@base, nil, @graph)
|
39
|
+
new_ec.uri_mappings = self.uri_mappings.clone
|
40
|
+
new_ec.language = self.language
|
41
|
+
|
42
|
+
new_ec.extract_from_element(element, &cb) if element
|
43
|
+
|
44
|
+
options.each_pair {|k, v| new_ec.send("#{k}=", v)}
|
45
|
+
new_ec
|
46
|
+
end
|
47
|
+
|
48
|
+
# Extract Evaluation Context from an element by looking at ancestors recurively
|
49
|
+
def extract_from_ancestors(el, &cb)
|
50
|
+
ancestors = el.ancestors
|
51
|
+
while ancestors.length > 0
|
52
|
+
a = ancestors.pop
|
53
|
+
next unless a.element?
|
54
|
+
extract_from_element(a, &cb)
|
55
|
+
end
|
56
|
+
extract_from_element(el, &cb)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Extract Evaluation Context from an element
|
60
|
+
def extract_from_element(el, &cb)
|
61
|
+
b = el.attribute_with_ns("base", RDF::XML.to_s)
|
62
|
+
b = nil if b.respond_to?(:null?) && b.null? # to ensure FFI Pointer compatibility
|
63
|
+
|
64
|
+
lang = el.attribute_with_ns("lang", RDF::XML.to_s)
|
65
|
+
lang = nil if lang.respond_to?(:null?) && lang.null? # to make FFI Pointer compatibility
|
66
|
+
|
67
|
+
self.base = self.base.join(b) if b
|
68
|
+
self.language = lang if lang
|
69
|
+
self.uri_mappings.merge!(extract_mappings(el, &cb))
|
70
|
+
end
|
71
|
+
|
72
|
+
# Extract the XMLNS mappings from an element
|
73
|
+
def extract_mappings(element, &cb)
|
74
|
+
mappings = {}
|
75
|
+
|
76
|
+
# look for xmlns
|
77
|
+
element.namespaces.each do |attr_name,attr_value|
|
78
|
+
abbr, prefix = attr_name.to_s.split(":")
|
79
|
+
if abbr == "xmlns"
|
80
|
+
attr_value = self.base.to_s + attr_value if attr_value.match(/^\#/)
|
81
|
+
mappings[prefix] = attr_value
|
82
|
+
cb.call(prefix, attr_value) if block_given?
|
83
|
+
end
|
84
|
+
end
|
85
|
+
mappings
|
86
|
+
end
|
87
|
+
|
88
|
+
# Produce the next list entry for this context
|
89
|
+
def li_next
|
90
|
+
@li_counter += 1
|
91
|
+
predicate = RDF["_#{@li_counter}"]
|
92
|
+
end
|
93
|
+
|
94
|
+
# Set XML base. Ignore any fragment
|
95
|
+
def base=(b)
|
96
|
+
base = Addressable::URI.parse(b)
|
97
|
+
base.fragment = nil
|
98
|
+
@base = RDF::URI.intern(base)
|
99
|
+
end
|
100
|
+
|
101
|
+
def inspect
|
102
|
+
v = %w(base subject language).map {|a| "#{a}='#{self.send(a).nil? ? 'nil' : self.send(a)}'"}
|
103
|
+
v << "uri_mappings[#{uri_mappings.keys.length}]"
|
104
|
+
v.join(",")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
##
|
109
|
+
# Initializes the RDF/XML reader instance.
|
110
|
+
#
|
111
|
+
# @param [Nokogiri::XML::Document, IO, File, String] input
|
112
|
+
# the input stream to read
|
113
|
+
# @param [Hash{Symbol => Object}] options
|
114
|
+
# any additional options
|
115
|
+
# @option options [Encoding] :encoding (Encoding::UTF_8)
|
116
|
+
# the encoding of the input stream (Ruby 1.9+)
|
117
|
+
# @option options [Boolean] :validate (false)
|
118
|
+
# whether to validate the parsed statements and values
|
119
|
+
# @option options [Boolean] :canonicalize (false)
|
120
|
+
# whether to canonicalize parsed literals
|
121
|
+
# @option options [Boolean] :intern (true)
|
122
|
+
# whether to intern all parsed URIs
|
123
|
+
# @option options [Hash] :prefixes (Hash.new)
|
124
|
+
# the prefix mappings to use (not supported by all readers)
|
125
|
+
# @option options [#to_s] :base_uri (nil)
|
126
|
+
# the base URI to use when resolving relative URIs
|
127
|
+
# @option options [Array] :debug
|
128
|
+
# Array to place debug messages
|
129
|
+
# @return [reader]
|
130
|
+
# @yield [reader] `self`
|
131
|
+
# @yieldparam [RDF::Reader] reader
|
132
|
+
# @yieldreturn [void] ignored
|
133
|
+
# @raise [Error]:: Raises RDF::ReaderError if _validate_
|
134
|
+
def initialize(input = $stdin, options = {}, &block)
|
135
|
+
super do
|
136
|
+
@debug = options[:debug]
|
137
|
+
@base_uri = uri(options[:base_uri]) if options[:base_uri]
|
138
|
+
|
139
|
+
@doc = case input
|
140
|
+
when Nokogiri::XML::Document then input
|
141
|
+
else Nokogiri::XML.parse(input, @base_uri.to_s)
|
142
|
+
end
|
143
|
+
|
144
|
+
raise RDF::ReaderError, "Synax errors:\n#{@doc.errors}" if !@doc.errors.empty? && validate?
|
145
|
+
raise RDF::ReaderError, "Empty document" if (@doc.nil? || @doc.root.nil?) && validate?
|
146
|
+
|
147
|
+
block.call(self) if block_given?
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# No need to rewind, as parsing is done in initialize
|
152
|
+
def rewind; end
|
153
|
+
|
154
|
+
# Document closed when read in initialize
|
155
|
+
def close; end
|
156
|
+
|
157
|
+
##
|
158
|
+
# Iterates the given block for each RDF statement in the input.
|
159
|
+
#
|
160
|
+
# @yield [statement]
|
161
|
+
# @yieldparam [RDF::Statement] statement
|
162
|
+
# @return [void]
|
163
|
+
def each_statement(&block)
|
164
|
+
# Block called from add_statement
|
165
|
+
@callback = block
|
166
|
+
|
167
|
+
root = @doc.root
|
168
|
+
|
169
|
+
add_debug(root, "base_uri: #{@base_uri || 'nil'}")
|
170
|
+
|
171
|
+
rdf_nodes = root.xpath("//rdf:RDF", "rdf" => RDF.to_uri.to_s)
|
172
|
+
if rdf_nodes.length == 0
|
173
|
+
# If none found, root element may be processed as an RDF Node
|
174
|
+
|
175
|
+
ec = EvaluationContext.new(@base_uri, root, @graph) do |prefix, value|
|
176
|
+
prefix(prefix, value)
|
177
|
+
end
|
178
|
+
|
179
|
+
nodeElement(root, ec)
|
180
|
+
else
|
181
|
+
rdf_nodes.each do |node|
|
182
|
+
# XXX Skip this element if it's contained within another rdf:RDF element
|
183
|
+
|
184
|
+
# Extract base, lang and namespaces from parents to create proper evaluation context
|
185
|
+
ec = EvaluationContext.new(@base_uri, nil, @graph) do |prefix, value|
|
186
|
+
prefix(prefix, value)
|
187
|
+
end
|
188
|
+
ec.extract_from_ancestors(node)
|
189
|
+
node.children.each {|el|
|
190
|
+
next unless el.elem?
|
191
|
+
new_ec = ec.clone(el) do |prefix, value|
|
192
|
+
prefix(prefix, value)
|
193
|
+
end
|
194
|
+
nodeElement(el, new_ec)
|
195
|
+
}
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
##
|
201
|
+
# Iterates the given block for each RDF triple in the input.
|
202
|
+
#
|
203
|
+
# @yield [subject, predicate, object]
|
204
|
+
# @yieldparam [RDF::Resource] subject
|
205
|
+
# @yieldparam [RDF::URI] predicate
|
206
|
+
# @yieldparam [RDF::Value] object
|
207
|
+
# @return [void]
|
208
|
+
def each_triple(&block)
|
209
|
+
each_statement do |statement|
|
210
|
+
block.call(*statement.to_triple)
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
private
|
215
|
+
|
216
|
+
# Keep track of allocated BNodes
|
217
|
+
def bnode(value = nil)
|
218
|
+
@bnode_cache ||= {}
|
219
|
+
@bnode_cache[value.to_s] ||= RDF::Node.new(value)
|
220
|
+
end
|
221
|
+
|
222
|
+
# Figure out the document path, if it is a Nokogiri::XML::Element or Attribute
|
223
|
+
def node_path(node)
|
224
|
+
case node
|
225
|
+
when Nokogiri::XML::Node then node.display_path
|
226
|
+
else node.to_s
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
# Add debug event to debug array, if specified
|
231
|
+
#
|
232
|
+
# @param [XML Node, any] node:: XML Node or string for showing context
|
233
|
+
# @param [String] message::
|
234
|
+
def add_debug(node, message)
|
235
|
+
puts "#{node_path(node)}: #{message}" if ::RDF::RDFXML::debug?
|
236
|
+
@debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
|
237
|
+
end
|
238
|
+
|
239
|
+
# add a statement, object can be literal or URI or bnode
|
240
|
+
#
|
241
|
+
# @param [Nokogiri::XML::Node, any] node:: XML Node or string for showing context
|
242
|
+
# @param [URI, BNode] subject:: the subject of the statement
|
243
|
+
# @param [URI] predicate:: the predicate of the statement
|
244
|
+
# @param [URI, BNode, Literal] object:: the object of the statement
|
245
|
+
# @return [Statement]:: Added statement
|
246
|
+
# @raise [RDF::ReaderError]:: Checks parameter types and raises if they are incorrect if validating.
|
247
|
+
def add_triple(node, subject, predicate, object)
|
248
|
+
statement = RDF::Statement.new(subject, predicate, object)
|
249
|
+
add_debug(node, "statement: #{statement}")
|
250
|
+
@callback.call(statement)
|
251
|
+
end
|
252
|
+
|
253
|
+
# XML nodeElement production
|
254
|
+
#
|
255
|
+
# @param [XML Element] el:: XMl Element to parse
|
256
|
+
# @param [EvaluationContext] ec:: Evaluation context
|
257
|
+
# @return [RDF::URI] subject:: The subject found for the node
|
258
|
+
# @raise [RDF::ReaderError]:: Raises Exception if validating
|
259
|
+
def nodeElement(el, ec)
|
260
|
+
# subject
|
261
|
+
subject = ec.subject || parse_subject(el, ec)
|
262
|
+
|
263
|
+
add_debug(el, "nodeElement, ec: #{ec.inspect}")
|
264
|
+
add_debug(el, "nodeElement, el: #{el.uri}")
|
265
|
+
add_debug(el, "nodeElement, subject: #{subject.nil? ? 'nil' : subject.to_s}")
|
266
|
+
|
267
|
+
unless el.uri.to_s == RDF.Description.to_s
|
268
|
+
add_triple(el, subject, RDF.type, el.uri)
|
269
|
+
end
|
270
|
+
|
271
|
+
# produce triples for attributes
|
272
|
+
el.attribute_nodes.each do |attr|
|
273
|
+
add_debug(el, "propertyAttr: #{attr.uri}='#{attr.value}'")
|
274
|
+
if attr.uri.to_s == RDF.type.to_s
|
275
|
+
# If there is an attribute a in propertyAttr with a.URI == rdf:type
|
276
|
+
# then u:=uri(identifier:=resolve(a.string-value))
|
277
|
+
# and the following triple is added to the graph:
|
278
|
+
u = ec.base.join(attr.value)
|
279
|
+
add_triple(attr, subject, RDF.type, u)
|
280
|
+
elsif is_propertyAttr?(attr)
|
281
|
+
# Attributes not RDF.type
|
282
|
+
predicate = attr.uri
|
283
|
+
lit = RDF::Literal.new(attr.value, :language => ec.language, :validate => validate?, :canonicalize => canonicalize?)
|
284
|
+
add_triple(attr, subject, predicate, lit)
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
# Handle the propertyEltList children events in document order
|
289
|
+
li_counter = 0 # this will increase for each li we iterate through
|
290
|
+
el.children.each do |child|
|
291
|
+
next unless child.elem?
|
292
|
+
child_ec = ec.clone(child) do |prefix, value|
|
293
|
+
prefix(prefix, value)
|
294
|
+
end
|
295
|
+
predicate = child.uri
|
296
|
+
add_debug(child, "propertyElt, predicate: #{predicate}")
|
297
|
+
propertyElementURI_check(child)
|
298
|
+
|
299
|
+
# Determine the content type of this property element
|
300
|
+
text_nodes = child.children.select {|e| e.text? && !e.blank?}
|
301
|
+
element_nodes = child.children.select {|c| c.element? }
|
302
|
+
add_debug(child, "#{text_nodes.length} text nodes, #{element_nodes.length} element nodes")
|
303
|
+
if element_nodes.length > 1
|
304
|
+
element_nodes.each do |node|
|
305
|
+
add_debug(child, " node: #{node.to_s}")
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
# List expansion
|
310
|
+
predicate = ec.li_next if predicate == RDF.li
|
311
|
+
|
312
|
+
# Productions based on set of attributes
|
313
|
+
|
314
|
+
# All remaining reserved XML Names (See Name in XML 1.0) are now removed from the set.
|
315
|
+
# These are, all attribute information items in the set with property [prefix] beginning with xml
|
316
|
+
# (case independent comparison) and all attribute information items with [prefix] property having
|
317
|
+
# no value and which have [local name] beginning with xml (case independent comparison) are removed.
|
318
|
+
# Note that the [base URI] accessor is computed by XML Base before any xml:base attribute information item
|
319
|
+
# is deleted.
|
320
|
+
attrs = {}
|
321
|
+
id = datatype = parseType = resourceAttr = nodeID = nil
|
322
|
+
|
323
|
+
child.attribute_nodes.each do |attr|
|
324
|
+
if attr.namespace.to_s.empty?
|
325
|
+
# The support for a limited set of non-namespaced names is REQUIRED and intended to allow
|
326
|
+
# RDF/XML documents specified in [RDF-MS] to remain valid;
|
327
|
+
# new documents SHOULD NOT use these unqualified attributes and applications
|
328
|
+
# MAY choose to warn when the unqualified form is seen in a document.
|
329
|
+
add_debug(el, "Unqualified attribute '#{attr}'")
|
330
|
+
#attrs[attr.to_s] = attr.value unless attr.to_s.match?(/^xml/)
|
331
|
+
elsif attr.namespace.href == RDF::XML.to_s
|
332
|
+
# No production. Lang and base elements already extracted
|
333
|
+
elsif attr.namespace.href == RDF.to_uri.to_s
|
334
|
+
case attr.name
|
335
|
+
when "ID" then id = attr.value
|
336
|
+
when "datatype" then datatype = attr.value
|
337
|
+
when "parseType" then parseType = attr.value
|
338
|
+
when "resource" then resourceAttr = attr.value
|
339
|
+
when "nodeID" then nodeID = attr.value
|
340
|
+
else attrs[attr] = attr.value
|
341
|
+
end
|
342
|
+
else
|
343
|
+
attrs[attr] = attr.value
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
if nodeID && resourceAttr
|
348
|
+
add_debug(el, "Cannot have rdf:nodeID and rdf:resource.")
|
349
|
+
raise RDF::ReaderError.new("Cannot have rdf:nodeID and rdf:resource.") if validate?
|
350
|
+
end
|
351
|
+
|
352
|
+
# Apply character transformations
|
353
|
+
id = id_check(el, RDF::NTriples.unescape(id), nil) if id
|
354
|
+
resourceAttr = RDF::NTriples.unescape(resourceAttr) if resourceAttr
|
355
|
+
nodeID = nodeID_check(el, RDF::NTriples.unescape(nodeID)) if nodeID
|
356
|
+
|
357
|
+
add_debug(child, "attrs: #{attrs.inspect}")
|
358
|
+
add_debug(child, "datatype: #{datatype}") if datatype
|
359
|
+
add_debug(child, "parseType: #{parseType}") if parseType
|
360
|
+
add_debug(child, "resource: #{resourceAttr}") if resourceAttr
|
361
|
+
add_debug(child, "nodeID: #{nodeID}") if nodeID
|
362
|
+
add_debug(child, "id: #{id}") if id
|
363
|
+
|
364
|
+
if attrs.empty? && datatype.nil? && parseType.nil? && element_nodes.length == 1
|
365
|
+
# Production resourcePropertyElt
|
366
|
+
|
367
|
+
new_ec = child_ec.clone(nil) do |prefix, value|
|
368
|
+
prefix(prefix, value)
|
369
|
+
end
|
370
|
+
new_node_element = element_nodes.first
|
371
|
+
add_debug(child, "resourcePropertyElt: #{node_path(new_node_element)}")
|
372
|
+
new_subject = nodeElement(new_node_element, new_ec)
|
373
|
+
add_triple(child, subject, predicate, new_subject)
|
374
|
+
elsif attrs.empty? && parseType.nil? && element_nodes.length == 0 && text_nodes.length > 0
|
375
|
+
# Production literalPropertyElt
|
376
|
+
add_debug(child, "literalPropertyElt")
|
377
|
+
|
378
|
+
literal_opts = {:validate => validate?, :canonicalize => canonicalize?}
|
379
|
+
if datatype
|
380
|
+
literal_opts[:datatype] = uri(datatype)
|
381
|
+
else
|
382
|
+
literal_opts[:language] = child_ec.language
|
383
|
+
end
|
384
|
+
literal = RDF::Literal.new(child.inner_html, literal_opts)
|
385
|
+
add_triple(child, subject, predicate, literal)
|
386
|
+
reify(id, child, subject, predicate, literal, ec) if id
|
387
|
+
elsif parseType == "Resource"
|
388
|
+
# Production parseTypeResourcePropertyElt
|
389
|
+
add_debug(child, "parseTypeResourcePropertyElt")
|
390
|
+
|
391
|
+
unless attrs.empty?
|
392
|
+
warn = "Resource Property with extra attributes: '#{attrs.inspect}'"
|
393
|
+
add_debug(child, warn)
|
394
|
+
raise RDF::ReaderError.new(warn) if validate?
|
395
|
+
end
|
396
|
+
|
397
|
+
# For element e with possibly empty element content c.
|
398
|
+
n = RDF::Node.new
|
399
|
+
add_triple(child, subject, predicate, n)
|
400
|
+
|
401
|
+
# Reification
|
402
|
+
reify(id, child, subject, predicate, n, child_ec) if id
|
403
|
+
|
404
|
+
# If the element content c is not empty, then use event n to create a new sequence of events as follows:
|
405
|
+
#
|
406
|
+
# start-element(URI := rdf:Description,
|
407
|
+
# subject := n,
|
408
|
+
# attributes := set())
|
409
|
+
# c
|
410
|
+
# end-element()
|
411
|
+
add_debug(child, "compose new sequence with rdf:Description")
|
412
|
+
node = child.clone
|
413
|
+
pt_attr = node.attribute("parseType")
|
414
|
+
node.namespace = pt_attr.namespace
|
415
|
+
node.attributes.keys.each {|a| node.remove_attribute(a)}
|
416
|
+
node.node_name = "Description"
|
417
|
+
new_ec = child_ec.clone(nil, :subject => n) do |prefix, value|
|
418
|
+
prefix(prefix, value)
|
419
|
+
end
|
420
|
+
nodeElement(node, new_ec)
|
421
|
+
elsif parseType == "Collection"
|
422
|
+
# Production parseTypeCollectionPropertyElt
|
423
|
+
add_debug(child, "parseTypeCollectionPropertyElt")
|
424
|
+
|
425
|
+
unless attrs.empty?
|
426
|
+
warn = "Resource Property with extra attributes: '#{attrs.inspect}'"
|
427
|
+
add_debug(child, warn)
|
428
|
+
raise RDF::ReaderError.new(warn) if validate?
|
429
|
+
end
|
430
|
+
|
431
|
+
# For element event e with possibly empty nodeElementList l. Set s:=list().
|
432
|
+
# For each element event f in l, n := bnodeid(identifier := generated-blank-node-id()) and append n to s to give a sequence of events.
|
433
|
+
s = element_nodes.map { RDF::Node.new }
|
434
|
+
n = s.first || RDF["nil"]
|
435
|
+
add_triple(child, subject, predicate, n)
|
436
|
+
reify(id, child, subject, predicate, n, child_ec) if id
|
437
|
+
|
438
|
+
# Add first/rest entries for all list elements
|
439
|
+
s.each_index do |i|
|
440
|
+
n = s[i]
|
441
|
+
o = s[i+1]
|
442
|
+
f = element_nodes[i]
|
443
|
+
|
444
|
+
new_ec = child_ec.clone(nil) do |prefix, value|
|
445
|
+
prefix(prefix, value)
|
446
|
+
end
|
447
|
+
object = nodeElement(f, new_ec)
|
448
|
+
add_triple(child, n, RDF.first, object)
|
449
|
+
add_triple(child, n, RDF.rest, o ? o : RDF.nil)
|
450
|
+
end
|
451
|
+
elsif parseType # Literal or Other
|
452
|
+
# Production parseTypeResourcePropertyElt
|
453
|
+
add_debug(child, parseType == "Literal" ? "parseTypeResourcePropertyElt" : "parseTypeOtherPropertyElt (#{parseType})")
|
454
|
+
|
455
|
+
unless attrs.empty?
|
456
|
+
warn = "Resource Property with extra attributes: '#{attrs.inspect}'"
|
457
|
+
add_debug(child, warn)
|
458
|
+
raise RDF::ReaderError.new(warn) if validate?
|
459
|
+
end
|
460
|
+
|
461
|
+
if resourceAttr
|
462
|
+
warn = "illegal rdf:resource"
|
463
|
+
add_debug(child, warn)
|
464
|
+
raise RDF::ReaderError.new(warn) if validate?
|
465
|
+
end
|
466
|
+
|
467
|
+
object = RDF::Literal.new(child.children, :datatype => RDF.XMLLiteral, :namespaces => child_ec.uri_mappings, :language => ec.language)
|
468
|
+
add_triple(child, subject, predicate, object)
|
469
|
+
elsif text_nodes.length == 0 && element_nodes.length == 0
|
470
|
+
# Production emptyPropertyElt
|
471
|
+
add_debug(child, "emptyPropertyElt")
|
472
|
+
|
473
|
+
if attrs.empty? && resourceAttr.nil? && nodeID.nil?
|
474
|
+
literal = RDF::Literal.new("", :language => ec.language)
|
475
|
+
add_triple(child, subject, predicate, literal)
|
476
|
+
|
477
|
+
# Reification
|
478
|
+
reify(id, child, subject, predicate, literal, child_ec) if id
|
479
|
+
else
|
480
|
+
if resourceAttr
|
481
|
+
resource = ec.base.join(resourceAttr)
|
482
|
+
elsif nodeID
|
483
|
+
resource = bnode(nodeID)
|
484
|
+
else
|
485
|
+
resource = RDF::Node.new
|
486
|
+
end
|
487
|
+
|
488
|
+
# produce triples for attributes
|
489
|
+
attrs.each_pair do |attr, val|
|
490
|
+
add_debug(el, "attr: #{attr.name}='#{val}'")
|
491
|
+
|
492
|
+
if attr.uri.to_s == RDF.type.to_s
|
493
|
+
add_triple(child, resource, RDF.type, val)
|
494
|
+
else
|
495
|
+
# Check for illegal attributes
|
496
|
+
next unless is_propertyAttr?(attr)
|
497
|
+
|
498
|
+
# Attributes not in RDF.type
|
499
|
+
lit = RDF::Literal.new(val, :language => child_ec.language)
|
500
|
+
add_triple(child, resource, attr.uri, lit)
|
501
|
+
end
|
502
|
+
end
|
503
|
+
add_triple(child, subject, predicate, resource)
|
504
|
+
|
505
|
+
# Reification
|
506
|
+
reify(id, child, subject, predicate, resource, child_ec) if id
|
507
|
+
end
|
508
|
+
end
|
509
|
+
end
|
510
|
+
|
511
|
+
# Return subject
|
512
|
+
subject
|
513
|
+
end
|
514
|
+
|
515
|
+
private
|
516
|
+
# Reify subject, predicate, and object given the EvaluationContext (ec) and current XMl element (el)
|
517
|
+
def reify(id, el, subject, predicate, object, ec)
|
518
|
+
add_debug(el, "reify, id: #{id}")
|
519
|
+
rsubject = ec.base.join("#" + id)
|
520
|
+
add_triple(el, rsubject, RDF.subject, subject)
|
521
|
+
add_triple(el, rsubject, RDF.predicate, predicate)
|
522
|
+
add_triple(el, rsubject, RDF.object, object)
|
523
|
+
add_triple(el, rsubject, RDF.type, RDF["Statement"])
|
524
|
+
end
|
525
|
+
|
526
|
+
# Figure out the subject from the element.
|
527
|
+
def parse_subject(el, ec)
|
528
|
+
old_property_check(el)
|
529
|
+
|
530
|
+
nodeElementURI_check(el)
|
531
|
+
about = el.attribute("about")
|
532
|
+
id = el.attribute("ID")
|
533
|
+
nodeID = el.attribute("nodeID")
|
534
|
+
|
535
|
+
if nodeID && about
|
536
|
+
add_debug(el, "Cannot have rdf:nodeID and rdf:about.")
|
537
|
+
raise RDF::ReaderError.new("Cannot have rdf:nodeID and rdf:about.") if validate?
|
538
|
+
elsif nodeID && id
|
539
|
+
add_debug(el, "Cannot have rdf:nodeID and rdf:ID.")
|
540
|
+
raise RDF::ReaderError.new("Cannot have rdf:nodeID and rdf:ID.") if validate?
|
541
|
+
end
|
542
|
+
|
543
|
+
case
|
544
|
+
when id
|
545
|
+
add_debug(el, "parse_subject, id: '#{RDF::NTriples.unescape(id.value)}'")
|
546
|
+
id_check(el, RDF::NTriples.unescape(id.value), ec.base) # Returns URI
|
547
|
+
when nodeID
|
548
|
+
# The value of rdf:nodeID must match the XML Name production
|
549
|
+
nodeID = nodeID_check(el, RDF::NTriples.unescape(nodeID.value))
|
550
|
+
add_debug(el, "parse_subject, nodeID: '#{nodeID}")
|
551
|
+
bnode(nodeID)
|
552
|
+
when about
|
553
|
+
about = RDF::NTriples.unescape(about.value)
|
554
|
+
add_debug(el, "parse_subject, about: '#{about}'")
|
555
|
+
ec.base.join(about)
|
556
|
+
else
|
557
|
+
add_debug(el, "parse_subject, BNode")
|
558
|
+
RDF::Node.new
|
559
|
+
end
|
560
|
+
end
|
561
|
+
|
562
|
+
# ID attribute must be an NCName
|
563
|
+
def id_check(el, id, base)
|
564
|
+
unless NC_REGEXP.match(id)
|
565
|
+
warn = "ID addtribute '#{id}' must be a NCName"
|
566
|
+
add_debug(el, warn)
|
567
|
+
raise RDF::ReaderError.new(warn) if validate?
|
568
|
+
end
|
569
|
+
# ID may only be specified once for the same URI
|
570
|
+
if base
|
571
|
+
uri = uri(base, "##{id}")
|
572
|
+
if prefix(id) && RDF::URI(prefix(id)) == uri
|
573
|
+
warn = "ID addtribute '#{id}' may only be defined once for the same URI"
|
574
|
+
add_debug(el, warn)
|
575
|
+
raise RDF::ReaderError.new(warn) if validate?
|
576
|
+
end
|
577
|
+
|
578
|
+
RDF::URI(prefix(id, uri))
|
579
|
+
# Returns URI, in this case
|
580
|
+
else
|
581
|
+
id
|
582
|
+
end
|
583
|
+
end
|
584
|
+
|
585
|
+
# nodeID must be an XML Name
|
586
|
+
# nodeID must pass Production rdf-id
|
587
|
+
def nodeID_check(el, nodeID)
|
588
|
+
if NC_REGEXP.match(nodeID)
|
589
|
+
nodeID
|
590
|
+
else
|
591
|
+
add_debug(el, "nodeID addtribute '#{nodeID}' must be an XML Name")
|
592
|
+
raise RDF::ReaderError.new("nodeID addtribute '#{nodeID}' must be a NCName") if validate?
|
593
|
+
nil
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
597
|
+
# Is this attribute a Property Attribute?
|
598
|
+
def is_propertyAttr?(attr)
|
599
|
+
if ([RDF.Description.to_s, RDF.li.to_s] + OLD_TERMS).include?(attr.uri.to_s)
|
600
|
+
warn = "Invalid use of rdf:#{attr.name}"
|
601
|
+
add_debug(attr, warn)
|
602
|
+
raise RDF::ReaderError.new(warn) if validate?
|
603
|
+
return false
|
604
|
+
end
|
605
|
+
!CORE_SYNTAX_TERMS.include?(attr.uri.to_s) && attr.namespace && attr.namespace.href != RDF::XML.to_s
|
606
|
+
end
|
607
|
+
|
608
|
+
# Check Node Element name
|
609
|
+
def nodeElementURI_check(el)
|
610
|
+
if (CORE_SYNTAX_TERMS + [RDF.li.to_s] + OLD_TERMS).include?(el.uri.to_s)
|
611
|
+
warn = "Invalid use of rdf:#{el.name}"
|
612
|
+
add_debug(el, warn)
|
613
|
+
raise RDF::ReaderError.new(warn) if validate?
|
614
|
+
end
|
615
|
+
end
|
616
|
+
|
617
|
+
# Check Property Element name
|
618
|
+
def propertyElementURI_check(el)
|
619
|
+
if (CORE_SYNTAX_TERMS + [RDF.Description.to_s] + OLD_TERMS).include?(el.uri.to_s)
|
620
|
+
warn = "Invalid use of rdf:#{el.name}"
|
621
|
+
add_debug(el, warn)
|
622
|
+
raise RDF::ReaderError.new(warn) if validate?
|
623
|
+
end
|
624
|
+
end
|
625
|
+
|
626
|
+
# Check for the use of an obsolete RDF property
|
627
|
+
def old_property_check(el)
|
628
|
+
el.attribute_nodes.each do |attr|
|
629
|
+
if OLD_TERMS.include?(attr.uri.to_s)
|
630
|
+
add_debug(el, "Obsolete attribute '#{attr.uri}'")
|
631
|
+
raise RDF::ReaderError.new("Obsolete attribute '#{attr.uri}'") if validate?
|
632
|
+
end
|
633
|
+
end
|
634
|
+
end
|
635
|
+
|
636
|
+
def uri(value, append = nil)
|
637
|
+
value = RDF::URI.new(value)
|
638
|
+
value = value.join(append) if append
|
639
|
+
value.validate! if validate?
|
640
|
+
value.canonicalize! if canonicalize?
|
641
|
+
value = RDF::URI.intern(value) if intern?
|
642
|
+
value
|
643
|
+
end
|
644
|
+
|
645
|
+
end
|
646
|
+
end
|