XMLCanonicalizer 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,61 @@
1
+ == C14N-r
2
+
3
+ Project home: www.rubyforge.org/projects/c14n-r
4
+ Author : Roland Schmitt, roland.schmitt@web.de
5
+ Date : 12.05.2007
6
+
7
+ Version : 0.1
8
+
9
+ = Contents
10
+ 1. What is it (and what is it not)
11
+ 2. Requirements
12
+ 3. Installation
13
+ 4. Usage
14
+ 5. Samples
15
+ 6. License
16
+ 7. Support
17
+ 8. URLs
18
+
19
+ = 1. What is it (and what is it not)
20
+ C14N-r stands for "xml canonicalizer for ruby". It implements the "Canonical XML
21
+ Version 1.0" recommendation from the w3c [1].
22
+ c14n-r is a sub-project from wss4r [2], an implementation of some web service security standards.
23
+
24
+ If you like it or hate it or want more features, drop me an email at Roland.Schmitt@web.de.
25
+
26
+ = 2. Requirements
27
+ log4r >= 1.0.4
28
+
29
+ = 3. Installation
30
+ Simly do:
31
+ gem install --local XMLCanonicalizer-0.1.gem
32
+
33
+ = 4. Usage
34
+ require "xmlcanonicalizer"
35
+ c = Xml::Util::XmlCanonicalizer.new(true,true) #XmlCanonicalizer.new(WITH_COMMENTS, EXCLUSIVE_MODE)
36
+ c.canonicalize_element(element, false) #XmlCanonicalizer::canonicalize_element(element, LOGGING)
37
+
38
+ = 5. Examples
39
+ require "xmlcanonicalizer"
40
+ require "rexml/document"
41
+ include REXML
42
+ string = <<EOF
43
+ <mydoc>
44
+ <someelement attribute="nanoo">Text, text, text</someelement><anotherelement/>
45
+ </mydoc>
46
+ EOF
47
+ doc = Document.new(string)
48
+ c = XML::Util::XmlCanonicalizer.new(true,true)
49
+ doc = c.canonicalize(doc)
50
+ puts(doc.to_s())
51
+
52
+ = 6. License
53
+ C14N-r is licensed under GPL and Ruby's custom license. See GPL and RUBYS.
54
+
55
+ = 7. Support
56
+ The RubyForge mailing list is at www.rubyforge.org/projects/c14n-r.
57
+ Or, to contact the author, send mail to Roland.Schmitt@web.de
58
+
59
+ = 8. URLs
60
+ [1] - http://www.w3.org/TR/xml-c14n
61
+ [2] - http://www.rubyforge.org/projects/wss4r
@@ -0,0 +1,438 @@
1
+ #require "rexml/document"
2
+ #require "base64"
3
+ #require "log4r"
4
+
5
+ #include REXML
6
+ #include Log4r
7
+
8
+ module XML
9
+ module Util
10
+
11
+ class REXML::Instruction
12
+ def write(writer, indent=-1, transitive=false, ie_hack=false)
13
+ indent(writer, indent)
14
+ writer << START.sub(/\\/u, '')
15
+ writer << @target
16
+ writer << ' '
17
+ writer << @content if @content != nil
18
+ writer << STOP.sub(/\\/u, '')
19
+ end
20
+ end
21
+
22
+ class REXML::Attribute
23
+ def <=>(a2)
24
+ if (self === a2)
25
+ return 0
26
+ elsif (self == nil)
27
+ return -1
28
+ elsif (a2 == nil)
29
+ return 1
30
+ elsif (self.prefix() == a2.prefix())
31
+ return self.name()<=>a2.name()
32
+ end
33
+ if (self.prefix() == nil)
34
+ return -1
35
+ elsif (a2.prefix() == nil)
36
+ return 1
37
+ end
38
+ ret = self.namespace()<=>a2.namespace()
39
+ if (ret == 0)
40
+ ret = self.prefix()<=>a2.prefix()
41
+ end
42
+ return ret
43
+ end
44
+ end
45
+
46
+ class REXML::Element
47
+ def search_namespace(prefix)
48
+ if (self.namespace(prefix) == nil)
49
+ return (self.parent().search_namespace(prefix)) if (self.parent() != nil)
50
+ else
51
+ return self.namespace(prefix)
52
+ end
53
+ end
54
+ def rendered=(rendered)
55
+ @rendered = rendered
56
+ end
57
+ def rendered?()
58
+ return @rendered
59
+ end
60
+ def node_namespaces()
61
+ ns = Array.new()
62
+ ns.push(self.prefix())
63
+ self.attributes().each_attribute{|a|
64
+ if (a.prefix() != nil)
65
+ ns.push(a.prefix())
66
+ end
67
+ if (a.prefix() == "" && a.local_name() == "xmlns")
68
+ ns.push("xmlns")
69
+ end
70
+ }
71
+ ns
72
+ end
73
+ end
74
+
75
+ class NamespaceNode
76
+ attr_reader :prefix, :uri
77
+ def initialize(prefix, uri)
78
+ @prefix = prefix
79
+ @uri = uri
80
+ end
81
+ end
82
+
83
+ class XmlCanonicalizer
84
+ attr_accessor :prefix_list, :logger
85
+
86
+ BEFORE_DOC_ELEMENT = 0
87
+ INSIDE_DOC_ELEMENT = 1
88
+ AFTER_DOC_ELEMENT = 2
89
+
90
+ NODE_TYPE_ATTRIBUTE = 3
91
+ NODE_TYPE_WHITESPACE = 4
92
+ NODE_TYPE_COMMENT = 5
93
+ NODE_TYPE_PI = 6
94
+ NODE_TYPE_TEXT = 7
95
+
96
+
97
+ def initialize(with_comments, excl_c14n)
98
+ @with_comments = with_comments
99
+ @exclusive = excl_c14n
100
+ @res = ""
101
+ @state = BEFORE_DOC_ELEMENT
102
+ @xnl = Array.new()
103
+ @prevVisibleNamespacesStart = 0
104
+ @prevVisibleNamespacesEnd = 0
105
+ @visibleNamespaces = Array.new()
106
+ @inclusive_namespaces = Array.new()
107
+ @prefix_list = nil
108
+ @rendered_prefixes = Array.new()
109
+ end
110
+
111
+ def add_inclusive_namespaces(prefix_list, element, visible_namespaces)
112
+ namespaces = element.attributes()
113
+ namespaces.each_attribute{|ns|
114
+ if (ns.prefix=="xmlns")
115
+ if (prefix_list.include?(ns.local_name()))
116
+ visible_namespaces.push(NamespaceNode.new("xmlns:"+ns.local_name(), ns.value()))
117
+ end
118
+ end
119
+ }
120
+ parent = element.parent()
121
+ add_inclusive_namespaces(prefix_list, parent, visible_namespaces) if (parent)
122
+ visible_namespaces
123
+ end
124
+
125
+ def canonicalize(document)
126
+ write_document_node(document)
127
+ @res
128
+ end
129
+
130
+ def canonicalize_element(element, logging = true)
131
+ logging=(true) if logging
132
+ @logger.debug("Canonicalize element:\n " + element.to_s()) if @logger
133
+ @inclusive_namespaces = add_inclusive_namespaces(@prefix_list, element, @inclusive_namespaces) if (@prefix_list)
134
+ @preserve_document = element.document()
135
+ tmp_parent = element.parent()
136
+ body_string = remove_whitespace(element.to_s().gsub("\n","").gsub("\t","").gsub("\r",""))
137
+ document = Document.new(body_string)
138
+ tmp_parent.delete_element(element)
139
+ element = tmp_parent.add_element(document.root())
140
+ @preserve_element = element
141
+ document = Document.new(element.to_s())
142
+ ns = element.namespace(element.prefix())
143
+ document.root().add_namespace(element.prefix(), ns)
144
+ write_document_node(document)
145
+ @logger.debug("Canonicalized result:\n " + @res.to_s()) if @logger
146
+ @res
147
+ end
148
+
149
+ def write_document_node(document)
150
+ @state = BEFORE_DOC_ELEMENT
151
+ if (document.class().to_s() == "REXML::Element")
152
+ write_node(document)
153
+ else
154
+ document.each_child{|child|
155
+ write_node(child)
156
+ }
157
+ end
158
+ @res
159
+ end
160
+
161
+ def write_node(node)
162
+ visible = is_node_visible(node)
163
+ if ((node.node_type() == :text) && white_text?(node.value()))
164
+ res = node.value()
165
+ res.gsub("\r\n","\n")
166
+ #res = res.delete(" ").delete("\t")
167
+ res.delete("\r")
168
+ @res = @res + res
169
+ #write_text_node(node,visible) if (@state == INSIDE_DOC_ELEMENT)
170
+ return
171
+ end
172
+ if (node.node_type() == :text)
173
+ write_text_node(node, visible)
174
+ return
175
+ end
176
+ if (node.node_type() == :element)
177
+ write_element_node(node, visible) if (!node.rendered?())
178
+ node.rendered=(true)
179
+ end
180
+ if (node.node_type() == :processing_instruction)
181
+ end
182
+ if (node.node_type() == :comment)
183
+ end
184
+ end
185
+
186
+ def write_element_node(node, visible)
187
+ savedPrevVisibleNamespacesStart = @prevVisibleNamespacesStart
188
+ savedPrevVisibleNamespacesEnd = @prevVisibleNamespacesEnd
189
+ savedVisibleNamespacesSize = @visibleNamespaces.size()
190
+ state = @state
191
+ state = INSIDE_DOC_ELEMENT if (visible && state == BEFORE_DOC_ELEMENT)
192
+ @res = @res + "<" + node.expanded_name() if (visible)
193
+ write_namespace_axis(node, visible)
194
+ write_attribute_axis(node)
195
+ @res = @res + ">" if (visible)
196
+ node.each_child{|child|
197
+ write_node(child)
198
+ }
199
+ @res = @res + "</" +node.expanded_name() + ">" if (visible)
200
+ @state = AFTER_DOC_ELEMENT if (visible && state == BEFORE_DOC_ELEMENT)
201
+ @prevVisibleNamespacesStart = savedPrevVisibleNamespacesStart
202
+ @prevVisibleNamespacesEnd = savedPrevVisibleNamespacesEnd
203
+ @visibleNamespaces.slice!(savedVisibleNamespacesSize, @visibleNamespaces.size() - savedVisibleNamespacesSize) if (@visibleNamespaces.size() > savedVisibleNamespacesSize)
204
+ end
205
+
206
+ def write_namespace_axis(node, visible)
207
+ doc = node.document()
208
+ has_empty_namespace = false
209
+ list = Array.new()
210
+ cur = node
211
+ #while ((cur != nil) && (cur != doc) && (cur.node_type() != :document))
212
+ namespaces = cur.node_namespaces()
213
+ namespaces.each{|prefix|
214
+ next if ((prefix == "xmlns") && (node.namespace(prefix) == ""))
215
+ namespace = cur.namespace(prefix)
216
+ next if (is_namespace_node(namespace))
217
+ next if (node.namespace(prefix) != cur.namespace(prefix))
218
+ next if (prefix == "xml" && namespace == "http://www.w3.org/XML/1998/namespace")
219
+ next if (!is_node_visible(cur))
220
+ rendered = is_namespace_rendered(prefix, namespace)
221
+ @visibleNamespaces.push(NamespaceNode.new("xmlns:"+prefix,namespace)) if (visible)
222
+ if ((!rendered) && !list.include?(prefix))
223
+ list.push(prefix)
224
+ end
225
+ has_empty_namespace = true if (prefix == nil)
226
+ }
227
+ if (visible && !has_empty_namespace && !is_namespace_rendered(nil, nil))
228
+ @res = @res + ' xmlns=""'
229
+ end
230
+ #TODO: ns of inclusive_list
231
+ #=begin
232
+ if ((@prefix_list) && (node.to_s() == node.parent().to_s()))
233
+ #list.push(node.prefix())
234
+ @inclusive_namespaces.each{|ns|
235
+ prefix = ns.prefix().split(":")[1]
236
+ list.push(prefix) if (!list.include?(prefix) && (!node.attributes.prefixes.include?(prefix)))
237
+ }
238
+ @prefix_list = nil
239
+ end
240
+ #=end
241
+ list.sort!()
242
+ list.each{|prefix|
243
+ next if (prefix == "")
244
+ next if (@rendered_prefixes.include?(prefix))
245
+ @rendered_prefixes.push(prefix)
246
+ ns = node.namespace(prefix)
247
+ ns = @preserve_element.namespace(prefix) if (ns == nil)
248
+ @res = @res + normalize_string(" " + prefix + '="' + ns + '"', NODE_TYPE_TEXT) if (prefix == "xmlns")
249
+ @res = @res + normalize_string(" xmlns:" + prefix + '="' + ns + '"', NODE_TYPE_TEXT) if (prefix != nil && prefix != "xmlns")
250
+ }
251
+ if (visible)
252
+ @prevVisibleNamespacesStart = @prevVisibleNamespacesEnd
253
+ @prevVisibleNamespacesEnd = @visibleNamespaces.size()
254
+ end
255
+ end
256
+
257
+ def write_attribute_axis(node)
258
+ list = Array.new()
259
+ node.attributes().each_attribute{|attr|
260
+ list.push(attr) if (!is_namespace_node(attr.value()) && !is_namespace_decl(attr)) # && is_node_visible(
261
+ }
262
+ if (!@exclusive && node.parent() != nil && node.parent().parent() != nil)
263
+ cur = node.parent()
264
+ while (cur != nil)
265
+ #next if (cur.attributes() == nil)
266
+ cur.each_attribute{|attribute|
267
+ next if (attribute.prefix() != "xml")
268
+ next if (attribute.prefix().index("xmlns") == 0)
269
+ next if (node.namespace(attribute.prefix()) == attribute.value())
270
+ found = true
271
+ list.each{|n|
272
+ if (n.prefix() == "xml" && n.value() == attritbute.value())
273
+ found = true
274
+ break
275
+ end
276
+ }
277
+ next if (found)
278
+ list.push(attribute)
279
+ }
280
+ end
281
+ end
282
+ list.each{|attribute|
283
+ if (attribute != nil)
284
+ if (attribute.name() != "xmlns")
285
+ @res = @res + " " + normalize_string(attribute.to_string(), NODE_TYPE_ATTRIBUTE).gsub("'",'"')
286
+ end
287
+ # else
288
+ # @res = @res + " " + normalize_string(attribute.name()+'="'+attribute.to_s()+'"', NODE_TYPE_ATTRIBUTE).gsub("'",'"')
289
+ #end
290
+ end
291
+ }
292
+ end
293
+
294
+ def is_namespace_node(namespace_uri)
295
+ return (namespace_uri == "http://www.w3.org/2000/xmlns/")
296
+ end
297
+
298
+ def is_namespace_rendered(prefix, uri)
299
+ is_empty_ns = prefix == nil && uri == nil
300
+ if (is_empty_ns)
301
+ start = 0
302
+ else
303
+ start = @prevVisibleNamespacesStart
304
+ end
305
+ @visibleNamespaces.each{|ns|
306
+ if (ns.prefix() == "xmlns:"+prefix.to_s() && ns.uri() == uri)
307
+ return true
308
+ end
309
+ }
310
+ return is_empty_ns
311
+ #(@visibleNamespaces.size()-1).downto(start) {|i|
312
+ # ns = @visibleNamespaces[i]
313
+ # return true if (ns.prefix() == "xmlns:"+prefix.to_s() && ns.uri() == uri)
314
+ # #p = ns.prefix() if (ns.prefix().index("xmlns") == 0)
315
+ # #return ns.uri() == uri if (p == prefix)
316
+ #}
317
+ #return is_empty_ns
318
+ end
319
+
320
+ def is_node_visible(node)
321
+ return true if (@xnl.size() == 0)
322
+ @xnl.each{|element|
323
+ return true if (element == node)
324
+ }
325
+ return false
326
+ end
327
+
328
+ def normalize_string(input, type)
329
+ sb = ""
330
+ return input
331
+ end
332
+ #input.each_byte{|b|
333
+ # if (b ==60 && (type == NODE_TYPE_ATTRIBUTE || is_text_node(type)))
334
+ # sb = sb + "&lt;"
335
+ # elsif (b == 62 && is_text_node(type))
336
+ # sb = sb + "&gt;"
337
+ # elsif (b == 38 && (is_text_node(type) || is_text_node(type))) #Ampersand
338
+ # sb = sb + "&amp;"
339
+ # elsif (b == 34 && is_text_node(type)) #Quote
340
+ # sb = sb + "&quot;"
341
+ # elsif (b == 9 && is_text_node(type)) #Tabulator
342
+ # sb = sb + "&#x9;"
343
+ # elsif (b == 11 && is_text_node(type)) #CR
344
+ # sb = sb + "&#xA;"
345
+ # elsif (b == 13 && (type == NODE_TYPE_ATTRIBUTE || (is_text_node(type) && type != NODE_TYPE_WHITESPACE) || type == NODE_TYPE_COMMENT || type == NODE_TYPE_PI))
346
+ # sb = sb + "&#xD;"
347
+ # elsif (b == 13)
348
+ # next
349
+ # else
350
+ # sb = sb.concat(b)
351
+ # end
352
+ #}
353
+ #sb
354
+ #end
355
+
356
+ def write_text_node(node, visible)
357
+ if (visible)
358
+ @res = @res + normalize_string(node.value(), node.node_type())
359
+ end
360
+ end
361
+
362
+ def white_text?(text)
363
+ return true if ((text.strip() == "") || (text.strip() == nil))
364
+ return false
365
+ end
366
+
367
+ def is_namespace_decl(attribute)
368
+ #return true if (attribute.name() == "xmlns")
369
+ return true if (attribute.prefix().index("xmlns") == 0)
370
+ return false
371
+ end
372
+
373
+ def is_text_node(type)
374
+ return true if (type == NODE_TYPE_TEXT || type == NODE_TYPE_CDATA || type == NODE_TYPE_WHITESPACE)
375
+ return false
376
+ end
377
+
378
+ def remove_whitespace(string)
379
+ new_string = ""
380
+ in_white = false
381
+ string.each_byte{|b|
382
+ #if (in_white && b == 32)
383
+ #else
384
+ if !(in_white && b == 32)
385
+ new_string = new_string + b.chr()
386
+ end
387
+ if (b == 62) #>
388
+ in_white = true
389
+ end
390
+ if (b == 60) #<
391
+ in_white = false
392
+ end
393
+ }
394
+ new_string
395
+ end
396
+ end
397
+
398
+ def logging=(state)
399
+ if (state)
400
+ @logger = Logger.new("xmlcanonicalizer")
401
+ @logger.level = DEBUG
402
+ @logger.trace = false
403
+ p = PatternFormatter.new(:pattern => "[%l] %d :: %.100m %15t")
404
+ @logger.add(FileOutputter.new("wss4r", {:filename => "xmlcanonicalizer.log", :formatter => BasicFormatter}))
405
+ else
406
+ @logger = nil
407
+ end
408
+ end
409
+ end #Util
410
+ end #XML
411
+
412
+
413
+ if __FILE__ == $0
414
+ document = Document.new(File.new(ARGV[0]))
415
+ body = nil
416
+ c = WSS4R::Security::Util::XmlCanonicalizer.new(false, true)
417
+
418
+ if (ARGV.size() == 3)
419
+ body = ARGV[2]
420
+ if (body == "true")
421
+ element = XPath.match(document, "/soap:Envelope/soap:Body")[0]
422
+ element = XPath.first(document, "/soap:Envelope/soap:Header/wsse:Security/Signature/SignedInfo")
423
+ result = c.canonicalize_element(element)
424
+ puts("-----")
425
+ puts(result)
426
+ puts("-----")
427
+ puts(result.size())
428
+ puts("-----")
429
+ puts(CryptHash.new().digest_b64(result))
430
+ end
431
+ else
432
+ result = c.canonicalize(document)
433
+ end
434
+
435
+ file = File.new(ARGV[1], "wb")
436
+ file.write(result)
437
+ file.close()
438
+ end
@@ -0,0 +1,9 @@
1
+ require "rexml/document"
2
+ require "base64"
3
+ require "log4r"
4
+
5
+ include REXML
6
+ include Log4r
7
+
8
+ require "xml/util/xmlcanonicalizer"
9
+
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: XMLCanonicalizer
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.1"
7
+ date: 2007-05-11 00:00:00 +02:00
8
+ summary: Implementation of w3c xml canonicalizer standart.
9
+ require_paths:
10
+ - lib
11
+ email: Roland.Schmitt@web.de
12
+ homepage: http://www.rubyforge.org/projects/c14n-r
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: xmlanonicalizer
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Roland Schmitt
31
+ files:
32
+ - lib/xml
33
+ - lib/xmlcanonicalizer.rb
34
+ - lib/xml/util
35
+ - lib/xml/util/xmlcanonicalizer.rb
36
+ - README
37
+ test_files: []
38
+
39
+ rdoc_options: []
40
+
41
+ extra_rdoc_files:
42
+ - README
43
+ executables: []
44
+
45
+ extensions: []
46
+
47
+ requirements: []
48
+
49
+ dependencies:
50
+ - !ruby/object:Gem::Dependency
51
+ name: log4r
52
+ version_requirement:
53
+ version_requirements: !ruby/object:Gem::Version::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: 1.0.4
58
+ version: