rsxml 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,10 @@
1
1
  = rsxml
2
2
 
3
- A Ruby library to translate XML documents into an s-expression representation, and back again
3
+ A Ruby library to translate XML documents into an s-expression representation, and back again, in the style of SXML : http://en.wikipedia.org/wiki/SXML
4
+
5
+ Why would you want to do this ? Well, s-expressions can be == compared natively in Ruby, are easy to read
6
+ and editors indent them nicely when embedded in code. These features make them very suitable for writing readable
7
+ XML generation code and readable tests for XML generating code
4
8
 
5
9
  Rsxml represents XML documents as s-expressions thus :
6
10
 
@@ -17,14 +21,51 @@ It is easy to convert XML docuemnts to Rsxml representation and back again :
17
21
  Rsxml.to_rsxml(xml)
18
22
  => ["Foo", {"foofoo"=>"10"}, ["Bar", "barbar"]]
19
23
 
20
- If present, namespaces and namespace prefixes are retained :
24
+ === Namespaces
21
25
 
22
- xml = Rsxml.to_xml(["foo:foofoo", {"xmlns"=>"http://bar.com/bar", "xmlns:foo"=>"http://foo.com/foo", "foo:bar"=>1, "foo:baz"=>"baz"}])
23
- => '<foo:foofoo foo:baz="baz" foo:bar="1" xmlns="http://bar.com/bar" xmlns:foo="http://foo.com/foo"></foo:foofoo>'
26
+ XML namespaces are dealt with straightforwardly. When an XML document is converted to Rsxml, namespaces are preserved, and you can specify namespaces in an Rsxml structure in two ways
27
+ * using QName prefixes and declarative attributes, exactly as with XML
28
+ * using exploded QNames consisting of <tt>[local_name, prefix, uri]</tt> triples and <tt>[local_name, prefix]</tt> pairs
24
29
 
30
+ === Converting to Rsxml
25
31
 
26
- Rsxml.to_rsxml(xml)
27
- => ["foo:foofoo", {"foo:baz"=>"baz", "foo:bar"=>"1", "xmlns:foo"=>"http://foo.com/foo", "xmlns"=>"http://bar.com/bar"}]
32
+ When you convert an XML document to Rsxml you can choose either <tt>:xml</tt> or <tt>:exploded</tt> style
33
+
34
+ ==== <tt>:xml</tt> style
35
+
36
+ In <tt>:xml</tt> style namespaces are declared using attributes, and namespaces are referenced using
37
+ prefixed QNames, just as in XML
38
+
39
+ Rsxml.to_rsxml('<foo:foofoo xmlns:foo="http://foo.com/foo" foo:bar="barbar"/>', :style=>:xml)
40
+ => ["foo:foofoo", {"foo:bar"=>"barbar", "xmlns:foo"=>"http://foo.com/foo"}]
41
+
42
+ ==== <tt>:exploded</tt> style
43
+
44
+ In <tt>:exploded</tt> style namespaces are not declared using attributes, and QNames are specified
45
+ using <tt>[local_name, prefix, uri]</tt> triples
46
+
47
+ Rsxml.to_rsxml('<foo:foofoo xmlns:foo="http://foo.com/foo" foo:bar="barbar"/>', :style=>:exploded)
48
+ => [["foofoo", "foo", "http://foo.com/foo"], {["bar", "foo", "http://foo.com/foo"]=>"barbar"}]
49
+
50
+ === Converting to XML
51
+
52
+ Rsxml styles can be mixed, and unnecessary namespace references can be skipped for readability
53
+
54
+ Rsxml.to_xml([["foofoo", "foo", "http://foo.com/foo"], {"foo:bar"=>"1", ["baz", "foo"]=>"2"}])
55
+ => '<foo:foofoo foo:baz="2" foo:bar="1" xmlns:foo="http://foo.com/foo"></foo:foofoo>'
56
+
57
+ === Fragments
58
+
59
+ XML Fragments, without proper namespace declarations, can be parsed by passing a Hash of namespace
60
+ prefix bindings
61
+
62
+ Rsxml.to_rsxml('<foo:foofoo foo:bar="barbar"/>', :ns=>{"foo"=>"http://foo.com/foo"}, :style=>:xml)
63
+ => ["foo:foofoo", {"foo:bar"=>"barbar"}]
64
+
65
+ Fragments can be generated similarly :
66
+
67
+ Rsxml.to_xml(["foo:foofoo", {"foo:bar"=>"barbar"}], :ns=>{"foo"=>"http://foo.com/foo"})
68
+ => '<foo:foofoo foo:bar="barbar"></foo:foofoo>'
28
69
 
29
70
  == Install
30
71
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.4
1
+ 0.2.0
@@ -1,26 +1,36 @@
1
+ $: << File.expand_path('../../lib', __FILE__)
2
+
1
3
  require 'nokogiri'
2
4
  require 'builder'
5
+ require 'rsxml/util'
6
+ require 'rsxml/namespace'
7
+ require 'rsxml/visitor'
8
+ require 'rsxml/sexp'
9
+ require 'rsxml/xml'
3
10
 
4
11
  module Rsxml
12
+ class << self
13
+ include Util
14
+ attr_accessor :logger
15
+ end
16
+
5
17
  module_function
6
18
 
7
- def check_opts(constraints, opts)
8
- (opts||{}).each do |k,v|
9
- raise "opt not permitted: #{k}" if !constraints.has_key?(k)
10
- constraint = constraints[k]
11
- end
19
+ def log
20
+ yield(logger) if logger
12
21
  end
13
22
 
23
+ TO_XML_OPTS = {:ns=>nil}
24
+
14
25
  # convert an Rsxml s-expression representation of an XML document to XML
15
26
  # Rsxml.to_xml(["Foo", {"foofoo"=>"10"}, ["Bar", "barbar"] ["Baz"]])
16
27
  # => '<Foo foofoo="10"><Bar>barbar</Bar><Baz></Baz></Foo>'
17
- def to_xml(rsxml, &transformer)
18
- xml = Builder::XmlMarkup.new
19
- Sexp.write_xml(xml, rsxml, &transformer)
20
- xml.target!
28
+ def to_xml(rsxml, opts={})
29
+ opts = check_opts(TO_XML_OPTS, opts)
30
+ Sexp.traverse(rsxml, Visitor::WriteXmlVisitor.new, Visitor::Context.new(opts[:ns])).to_s
21
31
  end
22
32
 
23
- TO_RSXML_OPTS = {:ns=>nil}
33
+ TO_RSXML_OPTS = {:ns=>nil}.merge(Visitor::BuildRsxmlVisitor::OPTS)
24
34
 
25
35
  # convert an XML string to an Rsxml s-expression representation
26
36
  # Rsxml.to_rsxml('<Foo foofoo="10"><Bar>barbar</Bar><Baz></Baz></Foo>')
@@ -33,10 +43,10 @@ module Rsxml
33
43
  # Rsxml.to_rsxml(fragment, {"foo"=>"http://foo.com/foo", ""=>"http://baz.com/baz"})
34
44
  # => ["foo:Foo", {"foo:foofoo"=>"10", "xmlns:foo"=>"http://foo.com/foo", "xmlns"=>"http://baz.com/baz"}, ["Bar", "barbar"], ["Baz"]]
35
45
  def to_rsxml(doc, opts={})
36
- check_opts(TO_RSXML_OPTS, opts)
37
- doc = Xml.wrap_fragment(doc, opts[:ns])
46
+ opts = check_opts(TO_RSXML_OPTS, opts)
47
+ doc = Xml.wrap_fragment(doc, opts.delete(:ns))
38
48
  root = Xml.unwrap_fragment(Nokogiri::XML(doc).children.first)
39
- Xml.read_xml(root, [])
49
+ Xml.traverse(root, Visitor::BuildRsxmlVisitor.new(opts)).sexp
40
50
  end
41
51
 
42
52
  # compare two documents in XML or Rsxml. returns +true+ if they are identical, and
@@ -47,140 +57,4 @@ module Rsxml
47
57
  Sexp.compare(sexp_a, sexp_b)
48
58
  end
49
59
 
50
- module Sexp
51
- module_function
52
-
53
- def write_xml(xml, sexp, path="", &transformer)
54
- tag, attrs, children = decompose_sexp(sexp)
55
-
56
- if transformer
57
- txtag, txattrs = transformer.call(tag, attrs, path)
58
- else
59
- txtag, txattrs = [tag, attrs]
60
- end
61
-
62
- cp = [path, tag].join("/")
63
- xml.__send__(txtag, txattrs) do
64
- children.each_with_index do |child, i|
65
- if child.is_a?(Array)
66
- write_xml(xml, child, "#{cp}[#{i}]", &transformer)
67
- else
68
- xml << child
69
- end
70
- end
71
- end
72
- end
73
-
74
- def decompose_sexp(sexp)
75
- raise "invalid rsxml: #{rsxml.inspect}" if sexp.length<1
76
- tag = sexp[0].to_s
77
- if sexp[1].is_a?(Hash)
78
- attrs = sexp[1]
79
- children = sexp[2..-1]
80
- else
81
- attrs = {}
82
- children = sexp[1..-1]
83
- end
84
- [tag, attrs, children]
85
- end
86
-
87
- class ComparisonError < RuntimeError
88
- attr_reader :path
89
- def initialize(msg, path)
90
- super("[#{path}]: #{msg}")
91
- @path = path
92
- end
93
- end
94
-
95
- def compare(sexpa, sexpb, path=nil)
96
- taga, attrsa, childrena = decompose_sexp(sexpa)
97
- tagb, attrsb, childrenb = decompose_sexp(sexpb)
98
-
99
- raise ComparisonError.new("element names differ: '#{taga}', '#{tagb}'", path) if taga != tagb
100
- raise ComparisonError.new("attributes differ", path) if attrsa != attrsb
101
- raise ComparisonError.new("child cound differes", path) if childrena.length != childrenb.length
102
-
103
- path = [path, taga].compact.join("/")
104
- (0...childrena.length).each do |i|
105
- if childrena[i].is_a?(Array) && childrenb[i].is_a?(Array)
106
- compare(childrena[i], childrenb[i], path)
107
- else
108
- raise ComparisonError.new("content differs: '#{childrena[i]}', '#{childrenb[i]}'", path) if childrena[i] != childrenb[i]
109
- end
110
- end
111
- true
112
- end
113
- end
114
-
115
- module Xml
116
- module_function
117
-
118
- WRAP_ELEMENT = "RsxmlXmlWrapper"
119
-
120
- def wrap_fragment(fragment, ns_prefixes)
121
- return fragment if !ns_prefixes
122
-
123
- ns_attrs = Hash[*ns_prefixes.map do |prefix,href|
124
- prefix = nil if prefix.to_s.length == 0
125
- [["xmlns", prefix].compact.join(":"), href]
126
- end.flatten]
127
- xml = Builder::XmlMarkup.new
128
- xml.__send__(WRAP_ELEMENT, ns_attrs) do
129
- xml << fragment
130
- end
131
- xml.target!
132
- end
133
-
134
- def unwrap_fragment(node)
135
- if node.name==WRAP_ELEMENT
136
- node.children.first
137
- else
138
- node
139
- end
140
- end
141
-
142
- def read_xml(node, ns_stack)
143
- prefix = node.namespace.prefix if node.namespace
144
- tag = node.name
145
- ns_tag = [prefix,tag].compact.join(":")
146
-
147
- attrs = read_attributes(node.attributes)
148
- attrs = attrs.merge(namespace_attributes(node.namespaces, ns_stack))
149
- attrs = nil if attrs.empty?
150
-
151
- children = node.children.map do |child|
152
- if child.text?
153
- child.text
154
- else
155
- begin
156
- ns_stack.push(node.namespaces)
157
- read_xml(child, ns_stack)
158
- ensure
159
- ns_stack.pop
160
- end
161
- end
162
- end
163
-
164
- [ns_tag, attrs, *children].compact
165
- end
166
-
167
- def read_attributes(attrs)
168
- Hash[*attrs.map do |n, attr|
169
- prefix = attr.namespace.prefix if attr.namespace
170
- name = attr.name
171
- ns_name = [prefix,name].compact.join(":")
172
- [ns_name, attr.value]
173
- end.flatten]
174
- end
175
-
176
- def namespace_attributes(namespaces, ns_stack)
177
- Hash[*namespaces.map do |prefix,href|
178
- [prefix, href] if !find_namespace(prefix, ns_stack)
179
- end.compact.flatten]
180
- end
181
-
182
- def find_namespace(prefix, ns_stack)
183
- ns_stack.reverse.find{ |nsh| nsh.has_key?(prefix)}
184
- end
185
- end
186
60
  end
@@ -0,0 +1,175 @@
1
+ module Rsxml
2
+ module Namespace
3
+ module_function
4
+
5
+ # compact all attribute QNames to Strings
6
+ def compact_attr_qnames(ns_stack, attrs)
7
+ Hash[attrs.map do |name,value|
8
+ [compact_qname(ns_stack, name), value]
9
+ end]
10
+ end
11
+
12
+ # explode attribute QNames to [LocalPart, prefix, URI] triples,
13
+ def explode_attr_qnames(ns_stack, attrs)
14
+ Hash[attrs.map do |name, value|
15
+ uq_name = explode_qname(ns_stack, name, true)
16
+ local_name, prefix, uri = uq_name
17
+ if !prefix || prefix==""
18
+ [local_name, value]
19
+ else
20
+ [uq_name, value]
21
+ end
22
+ end]
23
+ end
24
+
25
+ # produce a QName String from a [LocalPart, prefix, URI] triple
26
+ def compact_qname(ns_stack, name)
27
+ return name if name.is_a?(String)
28
+
29
+ local_part, prefix, uri = name
30
+ raise "invalid name: #{name}" if !prefix && uri
31
+ if prefix
32
+ if prefix!="xmlns"
33
+ ns = find_namespace_uri(ns_stack, prefix, uri)
34
+ raise "namespace prefix not bound to a namespace: '#{prefix}'" if ! ns
35
+ end
36
+ [prefix, local_part].map{|s| s.to_s unless s.to_s.empty?}.compact.join(':')
37
+ else
38
+ local_part
39
+ end
40
+ end
41
+
42
+ # split a QName into [LocalPart, prefix, URI] triple
43
+ def explode_qname(ns_stack, qname, attr=false)
44
+ if qname.is_a?(Array)
45
+ if qname.length>1 && !qname[1].nil?
46
+ return qname
47
+ elsif qname.length>1 && qname[1].nil? && !qname[2].nil?
48
+ raise "invalid name: #{qname.inspect}"
49
+ else
50
+ return qname[0]
51
+ end
52
+ end
53
+
54
+ local_part, prefix = split_qname(qname)
55
+ if prefix
56
+ if prefix=="xmlns" && attr
57
+ [local_part, prefix]
58
+ else
59
+ uri = find_namespace_uri(ns_stack, prefix)
60
+ raise "namespace prefix not bound: '#{prefix}'" if ! uri
61
+ [local_part, prefix, uri]
62
+ end
63
+ else
64
+ if attr
65
+ local_part
66
+ else
67
+ default_uri = find_namespace_uri(ns_stack, "")
68
+ if default_uri
69
+ [local_part, "", default_uri]
70
+ else
71
+ local_part
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ # split a qname String into a [local_part, prefix] pair
78
+ def split_qname(qname)
79
+ return qname if qname.is_a?(Array)
80
+
81
+ if qname =~ /^[^:]+:[^:]+$/
82
+ [*qname.split(':')].reverse
83
+ else
84
+ qname
85
+ end
86
+ end
87
+
88
+ # returns the namespace uri for a prefix, if declared in the stack
89
+ def find_namespace_uri(ns_stack, prefix, uri_check=nil)
90
+ tns = ns_stack.reverse.find{|ns| ns.has_key?(prefix)}
91
+ uri = tns[prefix] if tns
92
+ raise "prefix: '#{prefix}' is bound to uri: '#{uri}', but should be '#{uri_check}'" if uri_check && uri && uri!=uri_check
93
+ uri
94
+ end
95
+
96
+
97
+ # extract a Hash of {prefix=>uri} mappings declared in attributes
98
+ def extract_declared_namespace_bindings(attrs)
99
+ Hash[attrs.map do |name,value|
100
+ local_part, prefix, uri = split_qname(name)
101
+ if (prefix && prefix == "xmlns")
102
+ [local_part, value]
103
+ elsif (!prefix && local_part == "xmlns")
104
+ ["", value]
105
+ end
106
+ end.compact]
107
+ end
108
+
109
+ # extract a Hash of {prefix=>uri} mappings from exploded QName tag and attrs
110
+ def extract_explicit_namespace_bindings(tag, attrs)
111
+ tag_local_part, tag_prefix, tag_uri = tag
112
+ ns = {}
113
+ ns[tag_prefix] = tag_uri if tag_prefix && tag_uri
114
+
115
+ attrs.each do |name, value|
116
+ attr_local_part, attr_prefix, attr_uri = name
117
+ if attr_prefix && attr_uri
118
+ raise "bindings clash: '#{attr_prefix}'=>'#{ns[attr_prefix]}' , '#{attr_prefix}'=>'#{attr_uri}'" if ns.has_key?(attr_prefix) && ns[attr_prefix]!=attr_uri
119
+ ns[attr_prefix] = attr_uri
120
+ end
121
+ end
122
+ ns
123
+ end
124
+
125
+ # figure out which explicit namespaces need declaring
126
+ #
127
+ # +ns_stack+ is the stack of namespace bindings
128
+ # +ns_explicit+ is the explicit refs for a tag
129
+ def undeclared_namespace_bindings(ns_stack, ns_explicit)
130
+ Hash[ns_explicit.map do |prefix,uri|
131
+ [prefix, uri] if !find_namespace_uri(ns_stack, prefix, uri)
132
+ end.compact]
133
+ end
134
+
135
+ # produce a Hash of namespace declaration attributes with exploded
136
+ # QNames, from
137
+ # a Hash of namespace prefix bindings
138
+ def exploded_namespace_declarations(ns)
139
+ Hash[ns.map do |prefix, uri|
140
+ if prefix==""
141
+ ["xmlns", uri]
142
+ else
143
+ [[prefix, "xmlns"], uri]
144
+ end
145
+ end]
146
+ end
147
+
148
+ # merges two sets of namespace bindings, raising error on clash
149
+ def merge_namespace_bindings(ns1, ns2)
150
+ m = ns1.clone
151
+ ns2.each do |k,v|
152
+ raise "bindings clash: '#{k}'=>'#{m[k]}' , '#{k}'=>'#{v}'" if m.has_key?(k) && m[k]!=v
153
+ m[k]=v
154
+ end
155
+ m
156
+ end
157
+
158
+ # given the existing +ns_stack+ of ns bindings, a +tag+ and it's +attributes+,
159
+ # return a pair <tt>[ns_bindings, ns_additional_decls]</tt> containing
160
+ # ns bindings for the stack, and additional required (exploded) namespace
161
+ # declarations to be added to the attributes
162
+ def namespace_bindings_declarations(ns_stack, tag, attrs)
163
+ ns_declared = extract_declared_namespace_bindings(attrs)
164
+ ns_explicit = extract_explicit_namespace_bindings(tag, attrs)
165
+ ns_undeclared = undeclared_namespace_bindings(ns_stack + [ns_declared], ns_explicit)
166
+ ns_bindings = merge_namespace_bindings(ns_declared, ns_undeclared)
167
+
168
+ # and declarations for undeclared namespaces
169
+ ns_additional_decls = exploded_namespace_declarations(ns_undeclared)
170
+
171
+ [ns_bindings, ns_additional_decls]
172
+ end
173
+
174
+ end
175
+ end
@@ -0,0 +1,90 @@
1
+ module Rsxml
2
+ module Sexp
3
+
4
+ module_function
5
+
6
+ # pre-order traversal of the sexp, calling methods on
7
+ # the visitor with each node
8
+ def traverse(sexp, visitor, context=Visitor::Context.new)
9
+ tag, attrs, children = decompose_sexp(sexp)
10
+
11
+ ns_bindings, ns_additional_decls = Namespace::namespace_bindings_declarations(context.ns_stack, tag, attrs)
12
+
13
+ context.ns_stack.push(ns_bindings)
14
+
15
+ etag = Namespace::explode_qname(context.ns_stack, tag)
16
+ eattrs = Namespace::explode_attr_qnames(context.ns_stack, attrs)
17
+
18
+ eattrs = eattrs.merge(ns_additional_decls)
19
+
20
+ begin
21
+ visitor.tag(context, etag, eattrs) do
22
+ context.push_node([etag, eattrs])
23
+ begin
24
+ children.each_with_index do |child, i|
25
+ if child.is_a?(Array)
26
+ traverse(child, visitor, context)
27
+ else
28
+ visitor.text(context, child)
29
+ context.processed_node(child)
30
+ end
31
+ end
32
+ ensure
33
+ context.pop_node
34
+ end
35
+ end
36
+
37
+ ensure
38
+ context.ns_stack.pop
39
+ end
40
+
41
+ visitor
42
+ end
43
+
44
+ # decompose a sexp to a [tag, attrs, children] list
45
+ def decompose_sexp(sexp)
46
+ raise "invalid rsxml: #{rsxml.inspect}" if sexp.length<1
47
+ if sexp[0].is_a?(Array)
48
+ tag = sexp[0]
49
+ else
50
+ tag = sexp[0].to_s
51
+ end
52
+ if sexp[1].is_a?(Hash)
53
+ attrs = sexp[1]
54
+ children = sexp[2..-1]
55
+ else
56
+ attrs = {}
57
+ children = sexp[1..-1]
58
+ end
59
+ [tag, attrs, children]
60
+ end
61
+
62
+ class ComparisonError < RuntimeError
63
+ attr_reader :path
64
+ def initialize(msg, path)
65
+ super("[#{path}]: #{msg}")
66
+ @path = path
67
+ end
68
+ end
69
+
70
+ def compare(sexpa, sexpb, path=nil)
71
+ taga, attrsa, childrena = decompose_sexp(sexpa)
72
+ tagb, attrsb, childrenb = decompose_sexp(sexpb)
73
+
74
+ raise ComparisonError.new("element names differ: '#{taga}', '#{tagb}'", path) if taga != tagb
75
+ raise ComparisonError.new("attributes differ", path) if attrsa != attrsb
76
+ raise ComparisonError.new("child count differs", path) if childrena.length != childrenb.length
77
+
78
+ path = [path, taga].compact.join("/")
79
+ (0...childrena.length).each do |i|
80
+ if childrena[i].is_a?(Array) && childrenb[i].is_a?(Array)
81
+ compare(childrena[i], childrenb[i], path)
82
+ else
83
+ raise ComparisonError.new("content differs: '#{childrena[i]}', '#{childrenb[i]}'", path) if childrena[i] != childrenb[i]
84
+ end
85
+ end
86
+ true
87
+ end
88
+ end
89
+
90
+ end