rsxml 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,10 @@
1
1
  = rsxml
2
2
 
3
- A Ruby library to translate XML documents into an s-expression representation, and back again
3
+ A Ruby library to translate XML documents into an s-expression representation, and back again, in the style of SXML : http://en.wikipedia.org/wiki/SXML
4
+
5
+ Why would you want to do this ? Well, s-expressions can be == compared natively in Ruby, are easy to read
6
+ and editors indent them nicely when embedded in code. These features make them very suitable for writing readable
7
+ XML generation code and readable tests for XML generating code
4
8
 
5
9
  Rsxml represents XML documents as s-expressions thus :
6
10
 
@@ -17,14 +21,51 @@ It is easy to convert XML docuemnts to Rsxml representation and back again :
17
21
  Rsxml.to_rsxml(xml)
18
22
  => ["Foo", {"foofoo"=>"10"}, ["Bar", "barbar"]]
19
23
 
20
- If present, namespaces and namespace prefixes are retained :
24
+ === Namespaces
21
25
 
22
- xml = Rsxml.to_xml(["foo:foofoo", {"xmlns"=>"http://bar.com/bar", "xmlns:foo"=>"http://foo.com/foo", "foo:bar"=>1, "foo:baz"=>"baz"}])
23
- => '<foo:foofoo foo:baz="baz" foo:bar="1" xmlns="http://bar.com/bar" xmlns:foo="http://foo.com/foo"></foo:foofoo>'
26
+ XML namespaces are dealt with straightforwardly. When an XML document is converted to Rsxml, namespaces are preserved, and you can specify namespaces in an Rsxml structure in two ways
27
+ * using QName prefixes and declarative attributes, exactly as with XML
28
+ * using exploded QNames consisting of <tt>[local_name, prefix, uri]</tt> triples and <tt>[local_name, prefix]</tt> pairs
24
29
 
30
+ === Converting to Rsxml
25
31
 
26
- Rsxml.to_rsxml(xml)
27
- => ["foo:foofoo", {"foo:baz"=>"baz", "foo:bar"=>"1", "xmlns:foo"=>"http://foo.com/foo", "xmlns"=>"http://bar.com/bar"}]
32
+ When you convert an XML document to Rsxml you can choose either <tt>:xml</tt> or <tt>:exploded</tt> style
33
+
34
+ ==== <tt>:xml</tt> style
35
+
36
+ In <tt>:xml</tt> style namespaces are declared using attributes, and namespaces are referenced using
37
+ prefixed QNames, just as in XML
38
+
39
+ Rsxml.to_rsxml('<foo:foofoo xmlns:foo="http://foo.com/foo" foo:bar="barbar"/>', :style=>:xml)
40
+ => ["foo:foofoo", {"foo:bar"=>"barbar", "xmlns:foo"=>"http://foo.com/foo"}]
41
+
42
+ ==== <tt>:exploded</tt> style
43
+
44
+ In <tt>:exploded</tt> style namespaces are not declared using attributes, and QNames are specified
45
+ using <tt>[local_name, prefix, uri]</tt> triples
46
+
47
+ Rsxml.to_rsxml('<foo:foofoo xmlns:foo="http://foo.com/foo" foo:bar="barbar"/>', :style=>:exploded)
48
+ => [["foofoo", "foo", "http://foo.com/foo"], {["bar", "foo", "http://foo.com/foo"]=>"barbar"}]
49
+
50
+ === Converting to XML
51
+
52
+ Rsxml styles can be mixed, and unnecessary namespace references can be skipped for readability
53
+
54
+ Rsxml.to_xml([["foofoo", "foo", "http://foo.com/foo"], {"foo:bar"=>"1", ["baz", "foo"]=>"2"}])
55
+ => '<foo:foofoo foo:baz="2" foo:bar="1" xmlns:foo="http://foo.com/foo"></foo:foofoo>'
56
+
57
+ === Fragments
58
+
59
+ XML Fragments, without proper namespace declarations, can be parsed by passing a Hash of namespace
60
+ prefix bindings
61
+
62
+ Rsxml.to_rsxml('<foo:foofoo foo:bar="barbar"/>', :ns=>{"foo"=>"http://foo.com/foo"}, :style=>:xml)
63
+ => ["foo:foofoo", {"foo:bar"=>"barbar"}]
64
+
65
+ Fragments can be generated similarly :
66
+
67
+ Rsxml.to_xml(["foo:foofoo", {"foo:bar"=>"barbar"}], :ns=>{"foo"=>"http://foo.com/foo"})
68
+ => '<foo:foofoo foo:bar="barbar"></foo:foofoo>'
28
69
 
29
70
  == Install
30
71
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.4
1
+ 0.2.0
@@ -1,26 +1,36 @@
1
+ $: << File.expand_path('../../lib', __FILE__)
2
+
1
3
  require 'nokogiri'
2
4
  require 'builder'
5
+ require 'rsxml/util'
6
+ require 'rsxml/namespace'
7
+ require 'rsxml/visitor'
8
+ require 'rsxml/sexp'
9
+ require 'rsxml/xml'
3
10
 
4
11
  module Rsxml
12
+ class << self
13
+ include Util
14
+ attr_accessor :logger
15
+ end
16
+
5
17
  module_function
6
18
 
7
- def check_opts(constraints, opts)
8
- (opts||{}).each do |k,v|
9
- raise "opt not permitted: #{k}" if !constraints.has_key?(k)
10
- constraint = constraints[k]
11
- end
19
+ def log
20
+ yield(logger) if logger
12
21
  end
13
22
 
23
+ TO_XML_OPTS = {:ns=>nil}
24
+
14
25
  # convert an Rsxml s-expression representation of an XML document to XML
15
26
  # Rsxml.to_xml(["Foo", {"foofoo"=>"10"}, ["Bar", "barbar"] ["Baz"]])
16
27
  # => '<Foo foofoo="10"><Bar>barbar</Bar><Baz></Baz></Foo>'
17
- def to_xml(rsxml, &transformer)
18
- xml = Builder::XmlMarkup.new
19
- Sexp.write_xml(xml, rsxml, &transformer)
20
- xml.target!
28
+ def to_xml(rsxml, opts={})
29
+ opts = check_opts(TO_XML_OPTS, opts)
30
+ Sexp.traverse(rsxml, Visitor::WriteXmlVisitor.new, Visitor::Context.new(opts[:ns])).to_s
21
31
  end
22
32
 
23
- TO_RSXML_OPTS = {:ns=>nil}
33
+ TO_RSXML_OPTS = {:ns=>nil}.merge(Visitor::BuildRsxmlVisitor::OPTS)
24
34
 
25
35
  # convert an XML string to an Rsxml s-expression representation
26
36
  # Rsxml.to_rsxml('<Foo foofoo="10"><Bar>barbar</Bar><Baz></Baz></Foo>')
@@ -33,10 +43,10 @@ module Rsxml
33
43
  # Rsxml.to_rsxml(fragment, {"foo"=>"http://foo.com/foo", ""=>"http://baz.com/baz"})
34
44
  # => ["foo:Foo", {"foo:foofoo"=>"10", "xmlns:foo"=>"http://foo.com/foo", "xmlns"=>"http://baz.com/baz"}, ["Bar", "barbar"], ["Baz"]]
35
45
  def to_rsxml(doc, opts={})
36
- check_opts(TO_RSXML_OPTS, opts)
37
- doc = Xml.wrap_fragment(doc, opts[:ns])
46
+ opts = check_opts(TO_RSXML_OPTS, opts)
47
+ doc = Xml.wrap_fragment(doc, opts.delete(:ns))
38
48
  root = Xml.unwrap_fragment(Nokogiri::XML(doc).children.first)
39
- Xml.read_xml(root, [])
49
+ Xml.traverse(root, Visitor::BuildRsxmlVisitor.new(opts)).sexp
40
50
  end
41
51
 
42
52
  # compare two documents in XML or Rsxml. returns +true+ if they are identical, and
@@ -47,140 +57,4 @@ module Rsxml
47
57
  Sexp.compare(sexp_a, sexp_b)
48
58
  end
49
59
 
50
- module Sexp
51
- module_function
52
-
53
- def write_xml(xml, sexp, path="", &transformer)
54
- tag, attrs, children = decompose_sexp(sexp)
55
-
56
- if transformer
57
- txtag, txattrs = transformer.call(tag, attrs, path)
58
- else
59
- txtag, txattrs = [tag, attrs]
60
- end
61
-
62
- cp = [path, tag].join("/")
63
- xml.__send__(txtag, txattrs) do
64
- children.each_with_index do |child, i|
65
- if child.is_a?(Array)
66
- write_xml(xml, child, "#{cp}[#{i}]", &transformer)
67
- else
68
- xml << child
69
- end
70
- end
71
- end
72
- end
73
-
74
- def decompose_sexp(sexp)
75
- raise "invalid rsxml: #{rsxml.inspect}" if sexp.length<1
76
- tag = sexp[0].to_s
77
- if sexp[1].is_a?(Hash)
78
- attrs = sexp[1]
79
- children = sexp[2..-1]
80
- else
81
- attrs = {}
82
- children = sexp[1..-1]
83
- end
84
- [tag, attrs, children]
85
- end
86
-
87
- class ComparisonError < RuntimeError
88
- attr_reader :path
89
- def initialize(msg, path)
90
- super("[#{path}]: #{msg}")
91
- @path = path
92
- end
93
- end
94
-
95
- def compare(sexpa, sexpb, path=nil)
96
- taga, attrsa, childrena = decompose_sexp(sexpa)
97
- tagb, attrsb, childrenb = decompose_sexp(sexpb)
98
-
99
- raise ComparisonError.new("element names differ: '#{taga}', '#{tagb}'", path) if taga != tagb
100
- raise ComparisonError.new("attributes differ", path) if attrsa != attrsb
101
- raise ComparisonError.new("child cound differes", path) if childrena.length != childrenb.length
102
-
103
- path = [path, taga].compact.join("/")
104
- (0...childrena.length).each do |i|
105
- if childrena[i].is_a?(Array) && childrenb[i].is_a?(Array)
106
- compare(childrena[i], childrenb[i], path)
107
- else
108
- raise ComparisonError.new("content differs: '#{childrena[i]}', '#{childrenb[i]}'", path) if childrena[i] != childrenb[i]
109
- end
110
- end
111
- true
112
- end
113
- end
114
-
115
- module Xml
116
- module_function
117
-
118
- WRAP_ELEMENT = "RsxmlXmlWrapper"
119
-
120
- def wrap_fragment(fragment, ns_prefixes)
121
- return fragment if !ns_prefixes
122
-
123
- ns_attrs = Hash[*ns_prefixes.map do |prefix,href|
124
- prefix = nil if prefix.to_s.length == 0
125
- [["xmlns", prefix].compact.join(":"), href]
126
- end.flatten]
127
- xml = Builder::XmlMarkup.new
128
- xml.__send__(WRAP_ELEMENT, ns_attrs) do
129
- xml << fragment
130
- end
131
- xml.target!
132
- end
133
-
134
- def unwrap_fragment(node)
135
- if node.name==WRAP_ELEMENT
136
- node.children.first
137
- else
138
- node
139
- end
140
- end
141
-
142
- def read_xml(node, ns_stack)
143
- prefix = node.namespace.prefix if node.namespace
144
- tag = node.name
145
- ns_tag = [prefix,tag].compact.join(":")
146
-
147
- attrs = read_attributes(node.attributes)
148
- attrs = attrs.merge(namespace_attributes(node.namespaces, ns_stack))
149
- attrs = nil if attrs.empty?
150
-
151
- children = node.children.map do |child|
152
- if child.text?
153
- child.text
154
- else
155
- begin
156
- ns_stack.push(node.namespaces)
157
- read_xml(child, ns_stack)
158
- ensure
159
- ns_stack.pop
160
- end
161
- end
162
- end
163
-
164
- [ns_tag, attrs, *children].compact
165
- end
166
-
167
- def read_attributes(attrs)
168
- Hash[*attrs.map do |n, attr|
169
- prefix = attr.namespace.prefix if attr.namespace
170
- name = attr.name
171
- ns_name = [prefix,name].compact.join(":")
172
- [ns_name, attr.value]
173
- end.flatten]
174
- end
175
-
176
- def namespace_attributes(namespaces, ns_stack)
177
- Hash[*namespaces.map do |prefix,href|
178
- [prefix, href] if !find_namespace(prefix, ns_stack)
179
- end.compact.flatten]
180
- end
181
-
182
- def find_namespace(prefix, ns_stack)
183
- ns_stack.reverse.find{ |nsh| nsh.has_key?(prefix)}
184
- end
185
- end
186
60
  end
@@ -0,0 +1,175 @@
1
+ module Rsxml
2
+ module Namespace
3
+ module_function
4
+
5
+ # compact all attribute QNames to Strings
6
+ def compact_attr_qnames(ns_stack, attrs)
7
+ Hash[attrs.map do |name,value|
8
+ [compact_qname(ns_stack, name), value]
9
+ end]
10
+ end
11
+
12
+ # explode attribute QNames to [LocalPart, prefix, URI] triples,
13
+ def explode_attr_qnames(ns_stack, attrs)
14
+ Hash[attrs.map do |name, value|
15
+ uq_name = explode_qname(ns_stack, name, true)
16
+ local_name, prefix, uri = uq_name
17
+ if !prefix || prefix==""
18
+ [local_name, value]
19
+ else
20
+ [uq_name, value]
21
+ end
22
+ end]
23
+ end
24
+
25
+ # produce a QName String from a [LocalPart, prefix, URI] triple
26
+ def compact_qname(ns_stack, name)
27
+ return name if name.is_a?(String)
28
+
29
+ local_part, prefix, uri = name
30
+ raise "invalid name: #{name}" if !prefix && uri
31
+ if prefix
32
+ if prefix!="xmlns"
33
+ ns = find_namespace_uri(ns_stack, prefix, uri)
34
+ raise "namespace prefix not bound to a namespace: '#{prefix}'" if ! ns
35
+ end
36
+ [prefix, local_part].map{|s| s.to_s unless s.to_s.empty?}.compact.join(':')
37
+ else
38
+ local_part
39
+ end
40
+ end
41
+
42
+ # split a QName into [LocalPart, prefix, URI] triple
43
+ def explode_qname(ns_stack, qname, attr=false)
44
+ if qname.is_a?(Array)
45
+ if qname.length>1 && !qname[1].nil?
46
+ return qname
47
+ elsif qname.length>1 && qname[1].nil? && !qname[2].nil?
48
+ raise "invalid name: #{qname.inspect}"
49
+ else
50
+ return qname[0]
51
+ end
52
+ end
53
+
54
+ local_part, prefix = split_qname(qname)
55
+ if prefix
56
+ if prefix=="xmlns" && attr
57
+ [local_part, prefix]
58
+ else
59
+ uri = find_namespace_uri(ns_stack, prefix)
60
+ raise "namespace prefix not bound: '#{prefix}'" if ! uri
61
+ [local_part, prefix, uri]
62
+ end
63
+ else
64
+ if attr
65
+ local_part
66
+ else
67
+ default_uri = find_namespace_uri(ns_stack, "")
68
+ if default_uri
69
+ [local_part, "", default_uri]
70
+ else
71
+ local_part
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ # split a qname String into a [local_part, prefix] pair
78
+ def split_qname(qname)
79
+ return qname if qname.is_a?(Array)
80
+
81
+ if qname =~ /^[^:]+:[^:]+$/
82
+ [*qname.split(':')].reverse
83
+ else
84
+ qname
85
+ end
86
+ end
87
+
88
+ # returns the namespace uri for a prefix, if declared in the stack
89
+ def find_namespace_uri(ns_stack, prefix, uri_check=nil)
90
+ tns = ns_stack.reverse.find{|ns| ns.has_key?(prefix)}
91
+ uri = tns[prefix] if tns
92
+ raise "prefix: '#{prefix}' is bound to uri: '#{uri}', but should be '#{uri_check}'" if uri_check && uri && uri!=uri_check
93
+ uri
94
+ end
95
+
96
+
97
+ # extract a Hash of {prefix=>uri} mappings declared in attributes
98
+ def extract_declared_namespace_bindings(attrs)
99
+ Hash[attrs.map do |name,value|
100
+ local_part, prefix, uri = split_qname(name)
101
+ if (prefix && prefix == "xmlns")
102
+ [local_part, value]
103
+ elsif (!prefix && local_part == "xmlns")
104
+ ["", value]
105
+ end
106
+ end.compact]
107
+ end
108
+
109
+ # extract a Hash of {prefix=>uri} mappings from exploded QName tag and attrs
110
+ def extract_explicit_namespace_bindings(tag, attrs)
111
+ tag_local_part, tag_prefix, tag_uri = tag
112
+ ns = {}
113
+ ns[tag_prefix] = tag_uri if tag_prefix && tag_uri
114
+
115
+ attrs.each do |name, value|
116
+ attr_local_part, attr_prefix, attr_uri = name
117
+ if attr_prefix && attr_uri
118
+ raise "bindings clash: '#{attr_prefix}'=>'#{ns[attr_prefix]}' , '#{attr_prefix}'=>'#{attr_uri}'" if ns.has_key?(attr_prefix) && ns[attr_prefix]!=attr_uri
119
+ ns[attr_prefix] = attr_uri
120
+ end
121
+ end
122
+ ns
123
+ end
124
+
125
+ # figure out which explicit namespaces need declaring
126
+ #
127
+ # +ns_stack+ is the stack of namespace bindings
128
+ # +ns_explicit+ is the explicit refs for a tag
129
+ def undeclared_namespace_bindings(ns_stack, ns_explicit)
130
+ Hash[ns_explicit.map do |prefix,uri|
131
+ [prefix, uri] if !find_namespace_uri(ns_stack, prefix, uri)
132
+ end.compact]
133
+ end
134
+
135
+ # produce a Hash of namespace declaration attributes with exploded
136
+ # QNames, from
137
+ # a Hash of namespace prefix bindings
138
+ def exploded_namespace_declarations(ns)
139
+ Hash[ns.map do |prefix, uri|
140
+ if prefix==""
141
+ ["xmlns", uri]
142
+ else
143
+ [[prefix, "xmlns"], uri]
144
+ end
145
+ end]
146
+ end
147
+
148
+ # merges two sets of namespace bindings, raising error on clash
149
+ def merge_namespace_bindings(ns1, ns2)
150
+ m = ns1.clone
151
+ ns2.each do |k,v|
152
+ raise "bindings clash: '#{k}'=>'#{m[k]}' , '#{k}'=>'#{v}'" if m.has_key?(k) && m[k]!=v
153
+ m[k]=v
154
+ end
155
+ m
156
+ end
157
+
158
+ # given the existing +ns_stack+ of ns bindings, a +tag+ and it's +attributes+,
159
+ # return a pair <tt>[ns_bindings, ns_additional_decls]</tt> containing
160
+ # ns bindings for the stack, and additional required (exploded) namespace
161
+ # declarations to be added to the attributes
162
+ def namespace_bindings_declarations(ns_stack, tag, attrs)
163
+ ns_declared = extract_declared_namespace_bindings(attrs)
164
+ ns_explicit = extract_explicit_namespace_bindings(tag, attrs)
165
+ ns_undeclared = undeclared_namespace_bindings(ns_stack + [ns_declared], ns_explicit)
166
+ ns_bindings = merge_namespace_bindings(ns_declared, ns_undeclared)
167
+
168
+ # and declarations for undeclared namespaces
169
+ ns_additional_decls = exploded_namespace_declarations(ns_undeclared)
170
+
171
+ [ns_bindings, ns_additional_decls]
172
+ end
173
+
174
+ end
175
+ end
@@ -0,0 +1,90 @@
1
+ module Rsxml
2
+ module Sexp
3
+
4
+ module_function
5
+
6
+ # pre-order traversal of the sexp, calling methods on
7
+ # the visitor with each node
8
+ def traverse(sexp, visitor, context=Visitor::Context.new)
9
+ tag, attrs, children = decompose_sexp(sexp)
10
+
11
+ ns_bindings, ns_additional_decls = Namespace::namespace_bindings_declarations(context.ns_stack, tag, attrs)
12
+
13
+ context.ns_stack.push(ns_bindings)
14
+
15
+ etag = Namespace::explode_qname(context.ns_stack, tag)
16
+ eattrs = Namespace::explode_attr_qnames(context.ns_stack, attrs)
17
+
18
+ eattrs = eattrs.merge(ns_additional_decls)
19
+
20
+ begin
21
+ visitor.tag(context, etag, eattrs) do
22
+ context.push_node([etag, eattrs])
23
+ begin
24
+ children.each_with_index do |child, i|
25
+ if child.is_a?(Array)
26
+ traverse(child, visitor, context)
27
+ else
28
+ visitor.text(context, child)
29
+ context.processed_node(child)
30
+ end
31
+ end
32
+ ensure
33
+ context.pop_node
34
+ end
35
+ end
36
+
37
+ ensure
38
+ context.ns_stack.pop
39
+ end
40
+
41
+ visitor
42
+ end
43
+
44
+ # decompose a sexp to a [tag, attrs, children] list
45
+ def decompose_sexp(sexp)
46
+ raise "invalid rsxml: #{rsxml.inspect}" if sexp.length<1
47
+ if sexp[0].is_a?(Array)
48
+ tag = sexp[0]
49
+ else
50
+ tag = sexp[0].to_s
51
+ end
52
+ if sexp[1].is_a?(Hash)
53
+ attrs = sexp[1]
54
+ children = sexp[2..-1]
55
+ else
56
+ attrs = {}
57
+ children = sexp[1..-1]
58
+ end
59
+ [tag, attrs, children]
60
+ end
61
+
62
+ class ComparisonError < RuntimeError
63
+ attr_reader :path
64
+ def initialize(msg, path)
65
+ super("[#{path}]: #{msg}")
66
+ @path = path
67
+ end
68
+ end
69
+
70
+ def compare(sexpa, sexpb, path=nil)
71
+ taga, attrsa, childrena = decompose_sexp(sexpa)
72
+ tagb, attrsb, childrenb = decompose_sexp(sexpb)
73
+
74
+ raise ComparisonError.new("element names differ: '#{taga}', '#{tagb}'", path) if taga != tagb
75
+ raise ComparisonError.new("attributes differ", path) if attrsa != attrsb
76
+ raise ComparisonError.new("child count differs", path) if childrena.length != childrenb.length
77
+
78
+ path = [path, taga].compact.join("/")
79
+ (0...childrena.length).each do |i|
80
+ if childrena[i].is_a?(Array) && childrenb[i].is_a?(Array)
81
+ compare(childrena[i], childrenb[i], path)
82
+ else
83
+ raise ComparisonError.new("content differs: '#{childrena[i]}', '#{childrenb[i]}'", path) if childrena[i] != childrenb[i]
84
+ end
85
+ end
86
+ true
87
+ end
88
+ end
89
+
90
+ end