rdf-rdfxml 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.markdown CHANGED
@@ -1,3 +1,8 @@
1
+ ### 0.3.6
2
+ * Attempt at JRuby and REXML support; not quite there yet.
3
+ * Element content uses `#inner_text`, rather than `#inner_html`; this performs entity decoding.
4
+ * Use rdf-xsd gem instead of internal support for XMLLiterals.
5
+
1
6
  ### 0.3.5
2
7
  * RDF.rb 0.3.4 compatibility.
3
8
  * Added format detection.
data/README.markdown CHANGED
@@ -24,7 +24,7 @@ Instantiate a parser and parse source, specifying type and base-URL
24
24
 
25
25
  ## Dependencies
26
26
  * [RDF.rb](http://rubygems.org/gems/rdf) (>= 0.3.4)
27
- * [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.4.4)
27
+ * Soft dependency on [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.5.0)
28
28
 
29
29
  ## Documentation
30
30
  Full documentation available on [RubyForge](http://rubydoc.info/github/gkellogg/rdf-rdfxml/master/frames)
@@ -41,7 +41,6 @@ Full documentation available on [RubyForge](http://rubydoc.info/github/gkellogg/
41
41
  ### Patches
42
42
  * {Array}
43
43
  * {RDF::Graph}
44
- * {RDF::Literal::XML}
45
44
  * {Nokogiri::XML::Node}
46
45
 
47
46
  ## TODO
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.5
1
+ 0.3.6
data/lib/rdf/rdfxml.rb CHANGED
@@ -23,8 +23,6 @@ module RDF
23
23
  require 'rdf/rdfxml/format'
24
24
  require 'rdf/rdfxml/vocab'
25
25
  require 'rdf/rdfxml/patches/array_hacks'
26
- require 'rdf/rdfxml/patches/literal_hacks'
27
- require 'rdf/rdfxml/patches/nokogiri_hacks'
28
26
  autoload :Reader, 'rdf/rdfxml/reader'
29
27
  autoload :Writer, 'rdf/rdfxml/writer'
30
28
  autoload :VERSION, 'rdf/rdfxml/version'
@@ -36,7 +34,7 @@ module RDF
36
34
  (?!\\\\u0301) # ́ is a non-spacing acute accent.
37
35
  # It is legal within an XML Name, but not as the first character.
38
36
  ( [a-zA-Z_]
39
- | \\\\u[0-9a-fA-F]
37
+ | \\\\u[0-9a-fA-F]{4}
40
38
  )
41
39
  ( [0-9a-zA-Z_\.-]
42
40
  | \\\\u([0-9a-fA-F]{4})
@@ -20,7 +20,6 @@ module RDF::RDFXML
20
20
  #
21
21
  # @see http://www.w3.org/TR/rdf-testcases/#ntriples
22
22
  class Format < RDF::Format
23
- content_type 'application/xml', :extension => :xml
24
23
  content_type 'application/rdf+xml', :extension => :rdf
25
24
  content_encoding 'utf-8'
26
25
 
@@ -40,17 +39,4 @@ module RDF::RDFXML
40
39
  sample.match(/<(\w+:)?(RDF)/)
41
40
  end
42
41
  end
43
-
44
- # Aliases for this format
45
- #
46
- # This allows the following:
47
- #
48
- # @example Obtaining an RDFXML format class
49
- # RDF::Format.for(:xml) # RDF::RDFXML::XML
50
- # RDF::Format.for(:xml).reader # RDF::RDFXML::Reader
51
- # RDF::Format.for(:xml).writer # RDF::RDFXML::Writer
52
- class XML < RDF::Format
53
- reader { RDF::RDFXML::Reader }
54
- writer { RDF::RDFXML::Writer }
55
- end
56
42
  end
@@ -10,7 +10,7 @@ class Nokogiri::XML::Node
10
10
  @display_path ||= case self
11
11
  when Nokogiri::XML::Document then ""
12
12
  when Nokogiri::XML::Element then parent ? "#{parent.display_path}/#{name}" : name
13
- when Nokogiri::XML::Attr then "#{parent.display_path}@#{name}"
13
+ when Nokogiri::XML::Attr then "#{parent && parent.display_path}@#{name}"
14
14
  end
15
15
  end
16
16
 
@@ -1,4 +1,9 @@
1
- require 'nokogiri' # FIXME: Implement using different modules as in RDF::TriX
1
+ begin
2
+ require 'nokogiri'
3
+ rescue LoadError => e
4
+ :rexml
5
+ end
6
+ require 'rdf/xsd'
2
7
 
3
8
  module RDF::RDFXML
4
9
  ##
@@ -58,8 +63,8 @@ module RDF::RDFXML
58
63
 
59
64
  # Extract Evaluation Context from an element
60
65
  def extract_from_element(el, &cb)
61
- b = el.attribute_with_ns("base", RDF::XML.to_s)
62
- lang = el.attribute_with_ns("lang", RDF::XML.to_s)
66
+ b = el.base
67
+ lang = el.language
63
68
  self.base = self.base.join(b) if b
64
69
  self.language = lang if lang
65
70
  self.uri_mappings.merge!(extract_mappings(el, &cb))
@@ -101,6 +106,11 @@ module RDF::RDFXML
101
106
  end
102
107
  end
103
108
 
109
+ # Returns the XML implementation module for this reader instance.
110
+ #
111
+ # @attr_reader [Module]
112
+ attr_reader :implementation
113
+
104
114
  ##
105
115
  # Initializes the RDF/XML reader instance.
106
116
  #
@@ -108,6 +118,8 @@ module RDF::RDFXML
108
118
  # the input stream to read
109
119
  # @param [Hash{Symbol => Object}] options
110
120
  # any additional options
121
+ # @option options [Symbol] :library
122
+ # One of :nokogiri or :rexml. If nil/unspecified uses :nokogiri if available, :rexml otherwise.
111
123
  # @option options [Encoding] :encoding (Encoding::UTF_8)
112
124
  # the encoding of the input stream (Ruby 1.9+)
113
125
  # @option options [Boolean] :validate (false)
@@ -132,16 +144,27 @@ module RDF::RDFXML
132
144
  @debug = options[:debug]
133
145
  @base_uri = uri(options[:base_uri]) if options[:base_uri]
134
146
 
135
- @doc = case input
136
- when Nokogiri::XML::Document then input
137
- else
138
- Nokogiri::XML.parse(input, @base_uri.to_s) do |config|
139
- config.noent
140
- end
147
+ @library = case options[:library]
148
+ when nil
149
+ # Use Nokogiri when available, and REXML otherwise:
150
+ defined?(::Nokogiri) ? :nokogiri : :rexml
151
+ when :nokogiri, :rexml
152
+ options[:library]
153
+ else
154
+ raise ArgumentError.new("expected :rexml or :nokogiri, but got #{options[:library].inspect}")
141
155
  end
142
-
143
- raise RDF::ReaderError, "Synax errors:\n#{@doc.errors}" if !@doc.errors.empty? && validate?
144
- raise RDF::ReaderError, "Empty document" if (@doc.nil? || @doc.root.nil?) && validate?
156
+
157
+ require "rdf/rdfxml/reader/#{@library}"
158
+ @implementation = case @library
159
+ when :nokogiri then Nokogiri
160
+ when :rexml then REXML
161
+ end
162
+ self.extend(@implementation)
163
+
164
+ initialize_xml(input, options) rescue raise RDF::ReaderError.new($!.message)
165
+
166
+ raise RDF::ReaderError, "Empty document" if root.nil? && validate?
167
+ raise RDF::ReaderError, "Synax errors:\n#{doc_errors}" if !doc_errors.empty? && validate?
145
168
 
146
169
  block.call(self) if block_given?
147
170
  end
@@ -163,7 +186,7 @@ module RDF::RDFXML
163
186
  # Block called from add_statement
164
187
  @callback = block
165
188
 
166
- root = @doc.root
189
+ raise "root must be a proxy not a #{root.class}" unless root.is_a?(@implementation::NodeProxy)
167
190
 
168
191
  add_debug(root, "base_uri: #{@base_uri || 'nil'}")
169
192
 
@@ -178,6 +201,7 @@ module RDF::RDFXML
178
201
  nodeElement(root, ec)
179
202
  else
180
203
  rdf_nodes.each do |node|
204
+ raise "node must be a proxy not a #{node.class}" unless node.is_a?(@implementation::NodeProxy)
181
205
  # XXX Skip this element if it's contained within another rdf:RDF element
182
206
 
183
207
  # Extract base, lang and namespaces from parents to create proper evaluation context
@@ -187,6 +211,7 @@ module RDF::RDFXML
187
211
  ec.extract_from_ancestors(node)
188
212
  node.children.each {|el|
189
213
  next unless el.elem?
214
+ raise "el must be a proxy not a #{el.class}" unless el.is_a?(@implementation::NodeProxy)
190
215
  new_ec = ec.clone(el) do |prefix, value|
191
216
  prefix(prefix, value)
192
217
  end
@@ -220,10 +245,7 @@ module RDF::RDFXML
220
245
 
221
246
  # Figure out the document path, if it is a Nokogiri::XML::Element or Attribute
222
247
  def node_path(node)
223
- case node
224
- when Nokogiri::XML::Node then node.display_path
225
- else node.to_s
226
- end
248
+ "<#{base_uri}>#{node.respond_to?(:display_path) ? node.display_path : node}"
227
249
  end
228
250
 
229
251
  # Add debug event to debug array, if specified
@@ -259,6 +281,7 @@ module RDF::RDFXML
259
281
  # @return [RDF::URI] subject:: The subject found for the node
260
282
  # @raise [RDF::ReaderError]:: Raises Exception if validating
261
283
  def nodeElement(el, ec)
284
+ raise "el must be a proxy not a #{el.class}" unless el.is_a?(@implementation::NodeProxy)
262
285
  # subject
263
286
  subject = ec.subject || parse_subject(el, ec)
264
287
 
@@ -290,6 +313,7 @@ module RDF::RDFXML
290
313
  # Handle the propertyEltList children events in document order
291
314
  li_counter = 0 # this will increase for each li we iterate through
292
315
  el.children.each do |child|
316
+ raise "child must be a proxy not a #{child.class}" unless child.is_a?(@implementation::NodeProxy)
293
317
  next unless child.elem?
294
318
  child_ec = ec.clone(child) do |prefix, value|
295
319
  prefix(prefix, value)
@@ -299,15 +323,25 @@ module RDF::RDFXML
299
323
  propertyElementURI_check(child)
300
324
 
301
325
  # Determine the content type of this property element
326
+ raise "child must be a proxy not a #{child.class}" unless child.is_a?(@implementation::NodeProxy)
327
+
302
328
  text_nodes = child.children.select {|e| e.text? && !e.blank?}
303
329
  element_nodes = child.children.select {|c| c.element? }
304
- add_debug(child) {"#{text_nodes.length} text nodes, #{element_nodes.length} element nodes"}
330
+ add_debug(child) {"#{text_nodes.to_a.length} text nodes, #{element_nodes.to_a.length} element nodes"}
331
+
332
+ text_nodes.each do |node|
333
+ raise "text node must be a proxy not a #{node.class}" unless node.is_a?(@implementation::NodeProxy)
334
+ end
335
+ element_nodes.each do |node|
336
+ raise "element node must be a proxy not a #{node.class}" unless node.is_a?(@implementation::NodeProxy)
337
+ end
338
+
305
339
  if element_nodes.length > 1
306
340
  element_nodes.each do |node|
307
341
  add_debug(child) {" node: #{node.to_s}"}
308
342
  end
309
343
  end
310
-
344
+
311
345
  # List expansion
312
346
  predicate = ec.li_next if predicate == RDF.li
313
347
 
@@ -370,6 +404,7 @@ module RDF::RDFXML
370
404
  prefix(prefix, value)
371
405
  end
372
406
  new_node_element = element_nodes.first
407
+ raise "new_node_element must be a proxy not a #{new_node_element.class}" unless new_node_element.is_a?(@implementation::NodeProxy)
373
408
  add_debug(child) {"resourcePropertyElt: #{node_path(new_node_element)}"}
374
409
  new_subject = nodeElement(new_node_element, new_ec)
375
410
  add_triple(child, subject, predicate, new_subject)
@@ -383,7 +418,7 @@ module RDF::RDFXML
383
418
  else
384
419
  literal_opts[:language] = child_ec.language
385
420
  end
386
- literal = RDF::Literal.new(child.inner_html, literal_opts)
421
+ literal = RDF::Literal.new(child.inner_text, literal_opts)
387
422
  add_triple(child, subject, predicate, literal)
388
423
  reify(id, child, subject, predicate, literal, ec) if id
389
424
  elsif parseType == "Resource"
@@ -412,10 +447,10 @@ module RDF::RDFXML
412
447
  # end-element()
413
448
  add_debug(child, "compose new sequence with rdf:Description")
414
449
  node = child.clone
415
- pt_attr = node.attribute("parseType")
416
- node.namespace = pt_attr.namespace
417
450
  node.attributes.keys.each {|a| node.remove_attribute(a)}
418
451
  node.node_name = "Description"
452
+ node.add_namespace(nil, RDF.to_uri.to_s)
453
+ add_debug(node) { "uri: #{node.uri}, namespace: #{node.namespace.inspect}"}
419
454
  new_ec = child_ec.clone(nil, :subject => n) do |prefix, value|
420
455
  prefix(prefix, value)
421
456
  end
@@ -466,13 +501,27 @@ module RDF::RDFXML
466
501
  raise RDF::ReaderError.new(warn) if validate?
467
502
  end
468
503
 
469
- object = RDF::Literal.new(child.children, :datatype => RDF.XMLLiteral, :namespaces => child_ec.uri_mappings, :language => ec.language)
470
- add_triple(child, subject, predicate, object)
504
+ begin
505
+ c14nxl = child.children.c14nxl(
506
+ :library => @library,
507
+ :language => ec.language,
508
+ :namespaces => child_ec.uri_mappings)
509
+ object = RDF::Literal.new(c14nxl,
510
+ :library => @library,
511
+ :datatype => RDF.XMLLiteral,
512
+ :validate => validate?,
513
+ :canonicalize => canonicalize?)
514
+
515
+ add_triple(child, subject, predicate, object)
516
+ rescue ArgumentError => e
517
+ add_error(child, e.message)
518
+ end
471
519
  elsif text_nodes.length == 0 && element_nodes.length == 0
472
520
  # Production emptyPropertyElt
473
521
  add_debug(child, "emptyPropertyElt")
474
522
 
475
523
  if attrs.empty? && resourceAttr.nil? && nodeID.nil?
524
+
476
525
  literal = RDF::Literal.new("", :language => ec.language)
477
526
  add_triple(child, subject, predicate, literal)
478
527
 
@@ -530,9 +579,9 @@ module RDF::RDFXML
530
579
  old_property_check(el)
531
580
 
532
581
  nodeElementURI_check(el)
533
- about = el.attribute("about")
534
- id = el.attribute("ID")
535
- nodeID = el.attribute("nodeID")
582
+ about = el.attribute_with_ns("about", RDF.to_uri.to_s)
583
+ id = el.attribute_with_ns("ID", RDF.to_uri.to_s)
584
+ nodeID = el.attribute_with_ns("nodeID", RDF.to_uri.to_s)
536
585
 
537
586
  if nodeID && about
538
587
  add_debug(el, "Cannot have rdf:nodeID and rdf:about.")
@@ -0,0 +1,226 @@
1
+ module RDF::RDFXML
2
+ class Reader < RDF::Reader
3
+ ##
4
+ # Nokogiri implementation of an XML parser.
5
+ #
6
+ # @see http://nokogiri.org/
7
+ module Nokogiri
8
+ ##
9
+ # Returns the name of the underlying XML library.
10
+ #
11
+ # @return [Symbol]
12
+ def self.library
13
+ :nokogiri
14
+ end
15
+
16
+ # Proxy class to implement uniform element accessors
17
+ class NodeProxy
18
+ attr_reader :node
19
+ attr_reader :parent
20
+
21
+ def initialize(node, parent = nil)
22
+ @node = node
23
+ @parent = parent
24
+ end
25
+
26
+ ##
27
+ # Element language
28
+ #
29
+ # From HTML5 [3.2.3.3]
30
+ # If both the lang attribute in no namespace and the lang attribute in the XML namespace are set
31
+ # on an element, user agents must use the lang attribute in the XML namespace, and the lang
32
+ # attribute in no namespace must be ignored for the purposes of determining the element's
33
+ # language.
34
+ #
35
+ # @return [String]
36
+ def language
37
+ attribute_with_ns("lang", RDF::XML.to_s)
38
+ end
39
+
40
+ ##
41
+ # Return xml:base on element, if defined
42
+ #
43
+ # @return [String]
44
+ def base
45
+ attribute_with_ns("base", RDF::XML.to_s)
46
+ end
47
+
48
+ ##
49
+ # Monkey patch attribute_with_ns, to insure nil is returned for #null?
50
+ #
51
+ # Get the attribute node with name and namespace
52
+ #
53
+ # @param [String] name
54
+ # @param [String] namespace
55
+ # @return [Nokogiri::XML::Attr]
56
+ def attribute_with_ns(name, namespace)
57
+ a = @node.attribute_with_ns(name, namespace)
58
+
59
+ (a.respond_to?(:null?) && a.null?) ? nil : a # to ensure FFI Pointer compatibility
60
+ end
61
+
62
+ def display_path
63
+ @display_path ||= begin
64
+ path = []
65
+ path << parent.display_path if parent
66
+ path << @node.name
67
+ case @node
68
+ when ::Nokogiri::XML::Element then path.join("/")
69
+ when ::Nokogiri::XML::Attr then path.join("@")
70
+ else path.join("?")
71
+ end
72
+ end
73
+ end
74
+
75
+ ##
76
+ # Return true of all child elements are text
77
+ #
78
+ # @return [Array<:text, :element, :attribute>]
79
+ def text_content?
80
+ @text_content ||= @node.children.all? {|c| c.text?}
81
+ end
82
+
83
+ ##
84
+ # Retrieve XMLNS definitions for this element
85
+ #
86
+ # @return [Hash{String => String}]
87
+ def namespaces
88
+ @namespaces ||= @node.namespace_definitions.inject({}) {|memo, ns| memo[ns.prefix] = ns.href.to_s; memo }
89
+ end
90
+
91
+ ##
92
+ # Children of this node
93
+ #
94
+ # @return [NodeSetProxy]
95
+ def children
96
+ @children ||= NodeSetProxy.new(@node.children, self)
97
+ end
98
+
99
+ # Ancestors of this element, in order
100
+ def ancestors
101
+ @ancestors ||= parent ? parent.ancestors + [parent] : []
102
+ end
103
+
104
+ ##
105
+ # Inner text of an element. Decode Entities
106
+ #
107
+ # @return [String]
108
+ #def inner_text
109
+ # coder = HTMLEntities.new
110
+ # coder.decode(@node.inner_text)
111
+ #end
112
+
113
+ def attribute_nodes
114
+ @attribute_nodes ||= NodeSetProxy.new(@node.attribute_nodes, self)
115
+ end
116
+
117
+ def xpath(*args)
118
+ @node.xpath(*args).map {|n| NodeProxy.new(n)}
119
+ end
120
+
121
+ # For jRuby, there is a bug that prevents the namespace from being set on an element
122
+ if RUBY_PLATFORM == "java"
123
+ def add_namespace(prefix, href)
124
+ @def_namespace = href if prefix.nil?
125
+ @node.add_namespace(prefix, href)
126
+ end
127
+
128
+ def namespace
129
+ @def_namespace || @node.namespace
130
+ end
131
+ end
132
+
133
+ # URI of namespace + node_name
134
+ def uri
135
+ ns = namespace || RDF::XML.to_s
136
+ ns = ns.href if ns.respond_to?(:href)
137
+ RDF::URI.intern(ns + self.node_name)
138
+ end
139
+
140
+ ##
141
+ # Proxy for everything else to @node
142
+ def method_missing(method, *args)
143
+ @node.send(method, *args)
144
+ end
145
+ end
146
+
147
+ ##
148
+ # NodeSet proxy
149
+ class NodeSetProxy
150
+ attr_reader :node_set
151
+ attr_reader :parent
152
+
153
+ def initialize(node_set, parent)
154
+ @node_set = node_set
155
+ @parent = parent
156
+ end
157
+
158
+ ##
159
+ # Return a proxy for each child
160
+ #
161
+ # @yield(child)
162
+ # @yieldparam(NodeProxy)
163
+ def each
164
+ @node_set.each do |c|
165
+ yield NodeProxy.new(c, parent)
166
+ end
167
+ end
168
+
169
+ ##
170
+ # Return selected NodeProxies based on selection
171
+ #
172
+ # @yield(child)
173
+ # @yieldparam(NodeProxy)
174
+ # @return [Array[NodeProxy]]
175
+ def select
176
+ @node_set.to_a.map {|n| NodeProxy.new(n, parent)}.select do |c|
177
+ yield c
178
+ end
179
+ end
180
+
181
+ ##
182
+ # Proxy for everything else to @node_set
183
+ def method_missing(method, *args)
184
+ @node_set.send(method, *args)
185
+ end
186
+ end
187
+
188
+ ##
189
+ # Initializes the underlying XML library.
190
+ #
191
+ # @param [Hash{Symbol => Object}] options
192
+ # @return [void]
193
+ def initialize_xml(input, options = {})
194
+ require 'nokogiri' unless defined?(::Nokogiri)
195
+ @doc = case input
196
+ when ::Nokogiri::XML::Document
197
+ input
198
+ else
199
+ # Try to detect charset from input
200
+ options[:encoding] ||= input.charset if input.respond_to?(:charset)
201
+
202
+ # Otherwise, default is utf-8
203
+ options[:encoding] ||= 'utf-8'
204
+
205
+ ::Nokogiri::XML.parse(input, base_uri.to_s, options[:encoding]) do |config|
206
+ config.noent
207
+ end
208
+ end
209
+ end
210
+
211
+ # Accessor methods to mask native elements & attributes
212
+
213
+ ##
214
+ # Return proxy for document root
215
+ def root
216
+ @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
217
+ end
218
+
219
+ ##
220
+ # Document errors
221
+ def doc_errors
222
+ @doc.errors
223
+ end
224
+ end
225
+ end
226
+ end
@@ -0,0 +1,215 @@
1
+ module RDF::RDFXML
2
+ class Reader < RDF::Reader
3
+ ##
4
+ # REXML implementation of an XML parser.
5
+ #
6
+ # @see http://www.germane-software.com/software/rexml/
7
+ module REXML
8
+ ##
9
+ # Returns the name of the underlying XML library.
10
+ #
11
+ # @return [Symbol]
12
+ def self.library
13
+ :rexml
14
+ end
15
+
16
+ # Proxy class to implement uniform element accessors
17
+ class NodeProxy
18
+ attr_reader :node
19
+ attr_reader :parent
20
+
21
+ def initialize(node, parent = nil)
22
+ @node = node
23
+ @parent = parent
24
+ end
25
+
26
+ ##
27
+ # Element language
28
+ #
29
+ # From HTML5 [3.2.3.3]
30
+ # If both the lang attribute in no namespace and the lang attribute in the XML namespace are set
31
+ # on an element, user agents must use the lang attribute in the XML namespace, and the lang
32
+ # attribute in no namespace must be ignored for the purposes of determining the element's
33
+ # language.
34
+ #
35
+ # @return [String]
36
+ def language
37
+ @node.attribute("lang", RDF::XML.to_s)
38
+ end
39
+
40
+ ##
41
+ # Return xml:base on element, if defined
42
+ #
43
+ # @return [String]
44
+ def base
45
+ @node.attribute("base", RDF::XML.to_s)
46
+ end
47
+
48
+ def attribute_with_ns(name, namespace)
49
+ @node.attribute(name, namespace)
50
+ end
51
+
52
+ def display_path
53
+ @display_path ||= begin
54
+ path = []
55
+ path << parent.display_path if parent
56
+ path << @node.name
57
+ case @node
58
+ when ::REXML::Element then path.join("/")
59
+ when ::REXML::Attribute then path.join("@")
60
+ else path.join("?")
61
+ end
62
+ end
63
+ end
64
+
65
+ # URI of namespace + name
66
+ def uri
67
+ ns = namespace || RDF::XML.to_s
68
+ ns = ns.href if ns.respond_to?(:href)
69
+ RDF::URI.intern(ns + @node.name)
70
+ end
71
+
72
+ ##
73
+ # Return true of all child elements are text
74
+ #
75
+ # @return [Array<:text, :element, :attribute>]
76
+ def text_content?
77
+ @node.children.all? {|c| c.is_a?(::REXML::Text)}
78
+ end
79
+
80
+ ##
81
+ # Retrieve XMLNS definitions for this element
82
+ #
83
+ # @return [Hash{String => String}]
84
+ def namespaces
85
+ ns_decls = {}
86
+ @node.attributes.each do |name, attr|
87
+ next unless name =~ /^xmlns(?:\:(.+))?/
88
+ ns_decls[$1] = attr
89
+ end
90
+ ns_decls
91
+ end
92
+
93
+ ##
94
+ # Children of this node
95
+ #
96
+ # @return [NodeSetProxy]
97
+ def children
98
+ NodeSetProxy.new(@node.children, self)
99
+ end
100
+
101
+ # Ancestors of this element, in order
102
+ def ancestors
103
+ @ancestors ||= parent ? parent.ancestors + [parent] : []
104
+ end
105
+
106
+ ##
107
+ # Inner text of an element
108
+ #
109
+ # @see http://apidock.com/ruby/REXML/Element/get_text#743-Get-all-inner-texts
110
+ # @return [String]
111
+ def inner_text
112
+ coder = HTMLEntities.new
113
+ ::REXML::XPath.match(@node,'.//text()').map { |e|
114
+ coder.decode(e)
115
+ }.join
116
+ end
117
+
118
+ ##
119
+ # Node type accessors
120
+ #
121
+ # @return [Boolean]
122
+ def element?
123
+ @node.is_a?(::REXML::Element)
124
+ end
125
+
126
+ def attribute_nodes
127
+ @attribute_nodes ||= NodeSetProxy.new(@node.children.select {|n| n.is_a?(::REXML::Attribute)}, self)
128
+ end
129
+
130
+ def xpath(*args)
131
+ #NodeSetProxy.new(::REXML::XPath.match(@node, path, namespaces), self)
132
+ ::REXML::XPath.match(@node, *args).map {|n| NodeProxy.new(n)}
133
+ end
134
+
135
+ def elem?
136
+ @node.is_a?(::REXML::Element)
137
+ end
138
+
139
+ ##
140
+ # Proxy for everything else to @node
141
+ def method_missing(method, *args)
142
+ @node.send(method, *args)
143
+ end
144
+ end
145
+
146
+ ##
147
+ # NodeSet proxy
148
+ class NodeSetProxy
149
+ attr_reader :node_set
150
+ attr_reader :parent
151
+
152
+ def initialize(node_set, parent)
153
+ @node_set = node_set
154
+ @parent = parent
155
+ end
156
+
157
+ ##
158
+ # Return a proxy for each child
159
+ #
160
+ # @yield(child)
161
+ # @yieldparam(NodeProxy)
162
+ def each
163
+ @node_set.each do |c|
164
+ yield NodeProxy.new(c, parent)
165
+ end
166
+ end
167
+
168
+ ##
169
+ # Proxy for everything else to @node_set
170
+ def method_missing(method, *args)
171
+ @node_set.send(method, *args)
172
+ end
173
+ end
174
+
175
+ ##
176
+ # Initializes the underlying XML library.
177
+ #
178
+ # @param [Hash{Symbol => Object}] options
179
+ # @return [void]
180
+ def initialize_xml(input, options = {})
181
+ require 'rexml/document' unless defined?(::REXML)
182
+ @doc = case input
183
+ when ::REXML::Document
184
+ input
185
+ else
186
+ # Try to detect charset from input
187
+ options[:encoding] ||= input.charset if input.respond_to?(:charset)
188
+
189
+ # Otherwise, default is utf-8
190
+ options[:encoding] ||= 'utf-8'
191
+
192
+ # Set xml:base for the document element, if defined
193
+ @base_uri = base_uri ? base_uri.to_s : nil
194
+
195
+ # Only parse as XML, no HTML mode
196
+ ::REXML::Document.new(input.respond_to?(:read) ? input.read : input.to_s)
197
+ end
198
+ end
199
+
200
+ # Accessor methods to mask native elements & attributes
201
+
202
+ ##
203
+ # Return proxy for document root
204
+ def root
205
+ @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
206
+ end
207
+
208
+ ##
209
+ # Document errors
210
+ def doc_errors
211
+ []
212
+ end
213
+ end
214
+ end
215
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rdf-rdfxml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.3.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,11 +10,11 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2011-09-14 00:00:00.000000000 Z
13
+ date: 2012-03-12 00:00:00.000000000Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rdf
17
- requirement: &2161650980 !ruby/object:Gem::Requirement
17
+ requirement: &70358239660100 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,32 +22,54 @@ dependencies:
22
22
  version: 0.3.4
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *2161650980
25
+ version_requirements: *70358239660100
26
26
  - !ruby/object:Gem::Dependency
27
- name: nokogiri
28
- requirement: &2161650360 !ruby/object:Gem::Requirement
27
+ name: rdf-xsd
28
+ requirement: &70358239659640 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
32
32
  - !ruby/object:Gem::Version
33
- version: 1.4.4
33
+ version: 0.3.5
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *2161650360
36
+ version_requirements: *70358239659640
37
+ - !ruby/object:Gem::Dependency
38
+ name: nokogiri
39
+ requirement: &70358239688860 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: 1.5.0
45
+ type: :development
46
+ prerelease: false
47
+ version_requirements: *70358239688860
48
+ - !ruby/object:Gem::Dependency
49
+ name: equivalent-xml
50
+ requirement: &70358239688400 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
55
+ version: 0.2.8
56
+ type: :development
57
+ prerelease: false
58
+ version_requirements: *70358239688400
37
59
  - !ruby/object:Gem::Dependency
38
60
  name: open-uri-cached
39
- requirement: &2161649680 !ruby/object:Gem::Requirement
61
+ requirement: &70358239687940 !ruby/object:Gem::Requirement
40
62
  none: false
41
63
  requirements:
42
64
  - - ! '>='
43
65
  - !ruby/object:Gem::Version
44
- version: '0'
66
+ version: 0.0.4
45
67
  type: :development
46
68
  prerelease: false
47
- version_requirements: *2161649680
69
+ version_requirements: *70358239687940
48
70
  - !ruby/object:Gem::Dependency
49
71
  name: spira
50
- requirement: &2161648620 !ruby/object:Gem::Requirement
72
+ requirement: &70358239687480 !ruby/object:Gem::Requirement
51
73
  none: false
52
74
  requirements:
53
75
  - - ! '>='
@@ -55,21 +77,43 @@ dependencies:
55
77
  version: 0.0.12
56
78
  type: :development
57
79
  prerelease: false
58
- version_requirements: *2161648620
80
+ version_requirements: *70358239687480
59
81
  - !ruby/object:Gem::Dependency
60
82
  name: rspec
61
- requirement: &2161647120 !ruby/object:Gem::Requirement
83
+ requirement: &70358239687020 !ruby/object:Gem::Requirement
62
84
  none: false
63
85
  requirements:
64
86
  - - ! '>='
65
87
  - !ruby/object:Gem::Version
66
- version: 2.5.0
88
+ version: 2.8.0
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: *70358239687020
92
+ - !ruby/object:Gem::Dependency
93
+ name: rdf-isomorphic
94
+ requirement: &70358239686560 !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ! '>='
98
+ - !ruby/object:Gem::Version
99
+ version: 0.3.4
100
+ type: :development
101
+ prerelease: false
102
+ version_requirements: *70358239686560
103
+ - !ruby/object:Gem::Dependency
104
+ name: rdf-n3
105
+ requirement: &70358239686100 !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 0.3.4
67
111
  type: :development
68
112
  prerelease: false
69
- version_requirements: *2161647120
113
+ version_requirements: *70358239686100
70
114
  - !ruby/object:Gem::Dependency
71
115
  name: rdf-spec
72
- requirement: &2161627120 !ruby/object:Gem::Requirement
116
+ requirement: &70358239685640 !ruby/object:Gem::Requirement
73
117
  none: false
74
118
  requirements:
75
119
  - - ! '>='
@@ -77,10 +121,10 @@ dependencies:
77
121
  version: 0.3.4
78
122
  type: :development
79
123
  prerelease: false
80
- version_requirements: *2161627120
124
+ version_requirements: *70358239685640
81
125
  - !ruby/object:Gem::Dependency
82
126
  name: yard
83
- requirement: &2161626020 !ruby/object:Gem::Requirement
127
+ requirement: &70358239685180 !ruby/object:Gem::Requirement
84
128
  none: false
85
129
  requirements:
86
130
  - - ! '>='
@@ -88,7 +132,7 @@ dependencies:
88
132
  version: 0.6.0
89
133
  type: :development
90
134
  prerelease: false
91
- version_requirements: *2161626020
135
+ version_requirements: *70358239685180
92
136
  description: RDF::RDFXML is an RDF/XML reader and writer for the RDF.rb library suite.
93
137
  email: public-rdf-ruby@w3.org
94
138
  executables: []
@@ -104,8 +148,9 @@ files:
104
148
  - lib/rdf/rdfxml/format.rb
105
149
  - lib/rdf/rdfxml/patches/array_hacks.rb
106
150
  - lib/rdf/rdfxml/patches/graph_properties.rb
107
- - lib/rdf/rdfxml/patches/literal_hacks.rb
108
151
  - lib/rdf/rdfxml/patches/nokogiri_hacks.rb
152
+ - lib/rdf/rdfxml/reader/nokogiri.rb
153
+ - lib/rdf/rdfxml/reader/rexml.rb
109
154
  - lib/rdf/rdfxml/reader.rb
110
155
  - lib/rdf/rdfxml/version.rb
111
156
  - lib/rdf/rdfxml/vocab.rb
@@ -132,8 +177,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
177
  version: '0'
133
178
  requirements: []
134
179
  rubyforge_project: rdf-rdfxml
135
- rubygems_version: 1.8.10
180
+ rubygems_version: 1.8.17
136
181
  signing_key:
137
182
  specification_version: 3
138
183
  summary: RDF/XML reader/writer for RDF.rb.
139
184
  test_files: []
185
+ has_rdoc: false
@@ -1,156 +0,0 @@
1
- # Use Nokogiri or LibXML when available, and REXML otherwise:
2
- begin
3
- require 'nokogiri'
4
- rescue LoadError => e
5
- begin
6
- require 'libxml'
7
- rescue LoadError => e
8
- :rexml
9
- end
10
- end
11
-
12
- module RDF; class Literal
13
- ##
14
- # An XML literal.
15
- #
16
- # @see http://www.w3.org/TR/rdf-concepts/#section-XMLLiteral
17
- # @see http://www.w3.org/TR/rdfa-core/#s_xml_literals
18
- # @since 0.2.1
19
- class XML < Literal
20
- ##
21
- # @param [Object] value
22
- # @option options [String] :lexical (nil)
23
- # @option options [Hash] :namespaces ({}) Use "" to declare default namespace
24
- # @option options [Symbol] :language (nil)
25
- # @option options [:nokogiri, :libxml, or :rexml] :library
26
- def initialize(value, options = {})
27
- options[:namespaces] ||= {}
28
-
29
- @library = case options[:library]
30
- when nil
31
- case
32
- when defined?(::Nokogiri) then :nokogiri
33
- when defined?(::LibXML) then :libxml
34
- else :rexml
35
- end
36
- when :nokogiri, :libxml, :rexml
37
- options[:library]
38
- else
39
- raise ArgumentError.new("expected :rexml, :libxml or :nokogiri, but got #{options[:library].inspect}")
40
- end
41
-
42
- @datatype = options[:datatype] || DATATYPE
43
- @string = options[:lexical] if options.has_key?(:lexical)
44
- @object = parse_value(value, options)
45
- @string = serialize_nodeset(@object)
46
- end
47
-
48
- ##
49
- # Converts the literal into its canonical lexical representation.
50
- #
51
- # @return [Literal]
52
- # @see http://www.w3.org/TR/xml-exc-c14n/
53
- def canonicalize
54
- # This is the opportunity to use exclusive canonicalization library
55
- self
56
- end
57
-
58
- ##
59
- # Returns the value as a string.
60
- #
61
- # @return [String]
62
- def to_s
63
- @string
64
- end
65
-
66
- private
67
-
68
- def parse_value(value, options)
69
- ns_hash = {}
70
- options[:namespaces].each_pair do |prefix, uri|
71
- prefix = prefix.to_s
72
- attr = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
73
- ns_hash[attr] = uri.to_s
74
- end
75
-
76
- case @library
77
- when :nokogiri then parse_value_nokogiri(value, ns_hash, options)
78
- when :libxml then parse_value_libxml(value, ns_hash, options)
79
- when :rexml then parse_value_rexml(value, ns_hash, options)
80
- else value.to_s
81
- end
82
- end
83
-
84
- def serialize_nodeset(object)
85
- case @library
86
- when :nokogiri then serialize_nodeset_nokogiri(object)
87
- when :libxml then serialize_nodeset_libxml(object)
88
- when :rexml then serialize_nodeset_rexml(object)
89
- else object
90
- end
91
- end
92
-
93
- # Nokogiri implementations
94
- if defined?(::Nokogiri)
95
- # A somewhat half-hearted attempt at C14N.
96
- # Main problem is that it promotes all namespaces to top element, instead of demoting them
97
- # to the required element, and does not properly order either namespaces or attributes.
98
- #
99
- # An open-issue in Nokogiri is to add support for C14N from the underlying libxml2 libraries.
100
- def parse_value_nokogiri(value, ns_hash, options)
101
- elements = if value.is_a?(Nokogiri::XML::NodeSet)
102
- value
103
- else
104
- ns_strs = []
105
- ns_hash.each_pair {|a, u| ns_strs << "#{a}=\"#{u}\""}
106
- # Add inherited namespaces to created root element so that they're inherited to sub-elements
107
- Nokogiri::XML::Document.parse("<foo #{ns_strs.join(" ")}>#{value.to_s}</foo>").root.children
108
- end
109
-
110
- elements.map do |c|
111
- if c.is_a?(Nokogiri::XML::Element)
112
- c = Nokogiri::XML.parse(c.dup.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS)).root
113
-
114
- # Apply defined namespaces
115
- ns_hash.each_pair do |prefix, href|
116
- c[prefix] = href unless c.namespaces[prefix]
117
- end
118
-
119
- # Add language
120
- if options[:language] && c["lang"].to_s.empty?
121
- c["xml:lang"] = options[:language].to_s
122
- end
123
- end
124
- c
125
- end
126
- end
127
-
128
- def serialize_nodeset_nokogiri(object)
129
- object.map {|c| c.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS)}.join("")
130
- end
131
- end # Nokogiri
132
-
133
- if defined?(::LibXML)
134
- # This should use Document#canonicalize if as and when it is available in libxml-ruby
135
- def parse_value_libxml(value, ns_strs, language)
136
- # Fixme
137
- end
138
-
139
- def serialize_nodeset_libxml(object)
140
- # Fixme
141
- end
142
- end # LibXML
143
-
144
- # REXML
145
- # This could make use of the XMLCanonicalizer gem (http://rubygems.org/gems/XMLCanonicalizer)
146
- # But, it hasn't been touched since 2007 and relies on log4r.
147
- def parse_value_rexml(value, ns_strs, language)
148
- # Fixme
149
- end
150
-
151
- def serialize_nodeset_rexml(object)
152
- # Fixme
153
- end
154
-
155
- end # class XML
156
- end; end