XMLCanonicalizer 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,61 @@
1
+ == C14N-r
2
+
3
+ Project home: www.rubyforge.org/projects/c14n-r
4
+ Author : Roland Schmitt, roland.schmitt@web.de
5
+ Date : 12.05.2007
6
+
7
+ Version : 0.1
8
+
9
+ = Contents
10
+ 1. What is it (and what is it not)
11
+ 2. Requirements
12
+ 3. Installation
13
+ 4. Usage
14
+ 5. Samples
15
+ 6. License
16
+ 7. Support
17
+ 8. URLs
18
+
19
+ = 1. What is it (and what is it not)
20
+ C14N-r stands for "xml canonicalizer for ruby". It implements the "Canonical XML
21
+ Version 1.0" recommendation from the w3c [1].
22
+ c14n-r is a sub-project from wss4r [2], an implementation of some web service security standards.
23
+
24
+ If you like it or hate it or want more features, drop me an email at Roland.Schmitt@web.de.
25
+
26
+ = 2. Requirements
27
+ log4r >= 1.0.4
28
+
29
+ = 3. Installation
30
+ Simly do:
31
+ gem install --local XMLCanonicalizer-0.1.gem
32
+
33
+ = 4. Usage
34
+ require "xmlcanonicalizer"
35
+ c = Xml::Util::XmlCanonicalizer.new(true,true) #XmlCanonicalizer.new(WITH_COMMENTS, EXCLUSIVE_MODE)
36
+ c.canonicalize_element(element, false) #XmlCanonicalizer::canonicalize_element(element, LOGGING)
37
+
38
+ = 5. Examples
39
+ require "xmlcanonicalizer"
40
+ require "rexml/document"
41
+ include REXML
42
+ string = <<EOF
43
+ <mydoc>
44
+ <someelement attribute="nanoo">Text, text, text</someelement><anotherelement/>
45
+ </mydoc>
46
+ EOF
47
+ doc = Document.new(string)
48
+ c = XML::Util::XmlCanonicalizer.new(true,true)
49
+ doc = c.canonicalize(doc)
50
+ puts(doc.to_s())
51
+
52
+ = 6. License
53
+ C14N-r is licensed under GPL and Ruby's custom license. See GPL and RUBYS.
54
+
55
+ = 7. Support
56
+ The RubyForge mailing list is at www.rubyforge.org/projects/c14n-r.
57
+ Or, to contact the author, send mail to Roland.Schmitt@web.de
58
+
59
+ = 8. URLs
60
+ [1] - http://www.w3.org/TR/xml-c14n
61
+ [2] - http://www.rubyforge.org/projects/wss4r
@@ -0,0 +1,438 @@
1
+ #require "rexml/document"
2
+ #require "base64"
3
+ #require "log4r"
4
+
5
+ #include REXML
6
+ #include Log4r
7
+
8
+ module XML
9
+ module Util
10
+
11
+ class REXML::Instruction
12
+ def write(writer, indent=-1, transitive=false, ie_hack=false)
13
+ indent(writer, indent)
14
+ writer << START.sub(/\\/u, '')
15
+ writer << @target
16
+ writer << ' '
17
+ writer << @content if @content != nil
18
+ writer << STOP.sub(/\\/u, '')
19
+ end
20
+ end
21
+
22
+ class REXML::Attribute
23
+ def <=>(a2)
24
+ if (self === a2)
25
+ return 0
26
+ elsif (self == nil)
27
+ return -1
28
+ elsif (a2 == nil)
29
+ return 1
30
+ elsif (self.prefix() == a2.prefix())
31
+ return self.name()<=>a2.name()
32
+ end
33
+ if (self.prefix() == nil)
34
+ return -1
35
+ elsif (a2.prefix() == nil)
36
+ return 1
37
+ end
38
+ ret = self.namespace()<=>a2.namespace()
39
+ if (ret == 0)
40
+ ret = self.prefix()<=>a2.prefix()
41
+ end
42
+ return ret
43
+ end
44
+ end
45
+
46
+ class REXML::Element
47
+ def search_namespace(prefix)
48
+ if (self.namespace(prefix) == nil)
49
+ return (self.parent().search_namespace(prefix)) if (self.parent() != nil)
50
+ else
51
+ return self.namespace(prefix)
52
+ end
53
+ end
54
+ def rendered=(rendered)
55
+ @rendered = rendered
56
+ end
57
+ def rendered?()
58
+ return @rendered
59
+ end
60
+ def node_namespaces()
61
+ ns = Array.new()
62
+ ns.push(self.prefix())
63
+ self.attributes().each_attribute{|a|
64
+ if (a.prefix() != nil)
65
+ ns.push(a.prefix())
66
+ end
67
+ if (a.prefix() == "" && a.local_name() == "xmlns")
68
+ ns.push("xmlns")
69
+ end
70
+ }
71
+ ns
72
+ end
73
+ end
74
+
75
+ class NamespaceNode
76
+ attr_reader :prefix, :uri
77
+ def initialize(prefix, uri)
78
+ @prefix = prefix
79
+ @uri = uri
80
+ end
81
+ end
82
+
83
+ class XmlCanonicalizer
84
+ attr_accessor :prefix_list, :logger
85
+
86
+ BEFORE_DOC_ELEMENT = 0
87
+ INSIDE_DOC_ELEMENT = 1
88
+ AFTER_DOC_ELEMENT = 2
89
+
90
+ NODE_TYPE_ATTRIBUTE = 3
91
+ NODE_TYPE_WHITESPACE = 4
92
+ NODE_TYPE_COMMENT = 5
93
+ NODE_TYPE_PI = 6
94
+ NODE_TYPE_TEXT = 7
95
+
96
+
97
+ def initialize(with_comments, excl_c14n)
98
+ @with_comments = with_comments
99
+ @exclusive = excl_c14n
100
+ @res = ""
101
+ @state = BEFORE_DOC_ELEMENT
102
+ @xnl = Array.new()
103
+ @prevVisibleNamespacesStart = 0
104
+ @prevVisibleNamespacesEnd = 0
105
+ @visibleNamespaces = Array.new()
106
+ @inclusive_namespaces = Array.new()
107
+ @prefix_list = nil
108
+ @rendered_prefixes = Array.new()
109
+ end
110
+
111
+ def add_inclusive_namespaces(prefix_list, element, visible_namespaces)
112
+ namespaces = element.attributes()
113
+ namespaces.each_attribute{|ns|
114
+ if (ns.prefix=="xmlns")
115
+ if (prefix_list.include?(ns.local_name()))
116
+ visible_namespaces.push(NamespaceNode.new("xmlns:"+ns.local_name(), ns.value()))
117
+ end
118
+ end
119
+ }
120
+ parent = element.parent()
121
+ add_inclusive_namespaces(prefix_list, parent, visible_namespaces) if (parent)
122
+ visible_namespaces
123
+ end
124
+
125
+ def canonicalize(document)
126
+ write_document_node(document)
127
+ @res
128
+ end
129
+
130
+ def canonicalize_element(element, logging = true)
131
+ logging=(true) if logging
132
+ @logger.debug("Canonicalize element:\n " + element.to_s()) if @logger
133
+ @inclusive_namespaces = add_inclusive_namespaces(@prefix_list, element, @inclusive_namespaces) if (@prefix_list)
134
+ @preserve_document = element.document()
135
+ tmp_parent = element.parent()
136
+ body_string = remove_whitespace(element.to_s().gsub("\n","").gsub("\t","").gsub("\r",""))
137
+ document = Document.new(body_string)
138
+ tmp_parent.delete_element(element)
139
+ element = tmp_parent.add_element(document.root())
140
+ @preserve_element = element
141
+ document = Document.new(element.to_s())
142
+ ns = element.namespace(element.prefix())
143
+ document.root().add_namespace(element.prefix(), ns)
144
+ write_document_node(document)
145
+ @logger.debug("Canonicalized result:\n " + @res.to_s()) if @logger
146
+ @res
147
+ end
148
+
149
+ def write_document_node(document)
150
+ @state = BEFORE_DOC_ELEMENT
151
+ if (document.class().to_s() == "REXML::Element")
152
+ write_node(document)
153
+ else
154
+ document.each_child{|child|
155
+ write_node(child)
156
+ }
157
+ end
158
+ @res
159
+ end
160
+
161
+ def write_node(node)
162
+ visible = is_node_visible(node)
163
+ if ((node.node_type() == :text) && white_text?(node.value()))
164
+ res = node.value()
165
+ res.gsub("\r\n","\n")
166
+ #res = res.delete(" ").delete("\t")
167
+ res.delete("\r")
168
+ @res = @res + res
169
+ #write_text_node(node,visible) if (@state == INSIDE_DOC_ELEMENT)
170
+ return
171
+ end
172
+ if (node.node_type() == :text)
173
+ write_text_node(node, visible)
174
+ return
175
+ end
176
+ if (node.node_type() == :element)
177
+ write_element_node(node, visible) if (!node.rendered?())
178
+ node.rendered=(true)
179
+ end
180
+ if (node.node_type() == :processing_instruction)
181
+ end
182
+ if (node.node_type() == :comment)
183
+ end
184
+ end
185
+
186
+ def write_element_node(node, visible)
187
+ savedPrevVisibleNamespacesStart = @prevVisibleNamespacesStart
188
+ savedPrevVisibleNamespacesEnd = @prevVisibleNamespacesEnd
189
+ savedVisibleNamespacesSize = @visibleNamespaces.size()
190
+ state = @state
191
+ state = INSIDE_DOC_ELEMENT if (visible && state == BEFORE_DOC_ELEMENT)
192
+ @res = @res + "<" + node.expanded_name() if (visible)
193
+ write_namespace_axis(node, visible)
194
+ write_attribute_axis(node)
195
+ @res = @res + ">" if (visible)
196
+ node.each_child{|child|
197
+ write_node(child)
198
+ }
199
+ @res = @res + "</" +node.expanded_name() + ">" if (visible)
200
+ @state = AFTER_DOC_ELEMENT if (visible && state == BEFORE_DOC_ELEMENT)
201
+ @prevVisibleNamespacesStart = savedPrevVisibleNamespacesStart
202
+ @prevVisibleNamespacesEnd = savedPrevVisibleNamespacesEnd
203
+ @visibleNamespaces.slice!(savedVisibleNamespacesSize, @visibleNamespaces.size() - savedVisibleNamespacesSize) if (@visibleNamespaces.size() > savedVisibleNamespacesSize)
204
+ end
205
+
206
+ def write_namespace_axis(node, visible)
207
+ doc = node.document()
208
+ has_empty_namespace = false
209
+ list = Array.new()
210
+ cur = node
211
+ #while ((cur != nil) && (cur != doc) && (cur.node_type() != :document))
212
+ namespaces = cur.node_namespaces()
213
+ namespaces.each{|prefix|
214
+ next if ((prefix == "xmlns") && (node.namespace(prefix) == ""))
215
+ namespace = cur.namespace(prefix)
216
+ next if (is_namespace_node(namespace))
217
+ next if (node.namespace(prefix) != cur.namespace(prefix))
218
+ next if (prefix == "xml" && namespace == "http://www.w3.org/XML/1998/namespace")
219
+ next if (!is_node_visible(cur))
220
+ rendered = is_namespace_rendered(prefix, namespace)
221
+ @visibleNamespaces.push(NamespaceNode.new("xmlns:"+prefix,namespace)) if (visible)
222
+ if ((!rendered) && !list.include?(prefix))
223
+ list.push(prefix)
224
+ end
225
+ has_empty_namespace = true if (prefix == nil)
226
+ }
227
+ if (visible && !has_empty_namespace && !is_namespace_rendered(nil, nil))
228
+ @res = @res + ' xmlns=""'
229
+ end
230
+ #TODO: ns of inclusive_list
231
+ #=begin
232
+ if ((@prefix_list) && (node.to_s() == node.parent().to_s()))
233
+ #list.push(node.prefix())
234
+ @inclusive_namespaces.each{|ns|
235
+ prefix = ns.prefix().split(":")[1]
236
+ list.push(prefix) if (!list.include?(prefix) && (!node.attributes.prefixes.include?(prefix)))
237
+ }
238
+ @prefix_list = nil
239
+ end
240
+ #=end
241
+ list.sort!()
242
+ list.each{|prefix|
243
+ next if (prefix == "")
244
+ next if (@rendered_prefixes.include?(prefix))
245
+ @rendered_prefixes.push(prefix)
246
+ ns = node.namespace(prefix)
247
+ ns = @preserve_element.namespace(prefix) if (ns == nil)
248
+ @res = @res + normalize_string(" " + prefix + '="' + ns + '"', NODE_TYPE_TEXT) if (prefix == "xmlns")
249
+ @res = @res + normalize_string(" xmlns:" + prefix + '="' + ns + '"', NODE_TYPE_TEXT) if (prefix != nil && prefix != "xmlns")
250
+ }
251
+ if (visible)
252
+ @prevVisibleNamespacesStart = @prevVisibleNamespacesEnd
253
+ @prevVisibleNamespacesEnd = @visibleNamespaces.size()
254
+ end
255
+ end
256
+
257
+ def write_attribute_axis(node)
258
+ list = Array.new()
259
+ node.attributes().each_attribute{|attr|
260
+ list.push(attr) if (!is_namespace_node(attr.value()) && !is_namespace_decl(attr)) # && is_node_visible(
261
+ }
262
+ if (!@exclusive && node.parent() != nil && node.parent().parent() != nil)
263
+ cur = node.parent()
264
+ while (cur != nil)
265
+ #next if (cur.attributes() == nil)
266
+ cur.each_attribute{|attribute|
267
+ next if (attribute.prefix() != "xml")
268
+ next if (attribute.prefix().index("xmlns") == 0)
269
+ next if (node.namespace(attribute.prefix()) == attribute.value())
270
+ found = true
271
+ list.each{|n|
272
+ if (n.prefix() == "xml" && n.value() == attritbute.value())
273
+ found = true
274
+ break
275
+ end
276
+ }
277
+ next if (found)
278
+ list.push(attribute)
279
+ }
280
+ end
281
+ end
282
+ list.each{|attribute|
283
+ if (attribute != nil)
284
+ if (attribute.name() != "xmlns")
285
+ @res = @res + " " + normalize_string(attribute.to_string(), NODE_TYPE_ATTRIBUTE).gsub("'",'"')
286
+ end
287
+ # else
288
+ # @res = @res + " " + normalize_string(attribute.name()+'="'+attribute.to_s()+'"', NODE_TYPE_ATTRIBUTE).gsub("'",'"')
289
+ #end
290
+ end
291
+ }
292
+ end
293
+
294
+ def is_namespace_node(namespace_uri)
295
+ return (namespace_uri == "http://www.w3.org/2000/xmlns/")
296
+ end
297
+
298
+ def is_namespace_rendered(prefix, uri)
299
+ is_empty_ns = prefix == nil && uri == nil
300
+ if (is_empty_ns)
301
+ start = 0
302
+ else
303
+ start = @prevVisibleNamespacesStart
304
+ end
305
+ @visibleNamespaces.each{|ns|
306
+ if (ns.prefix() == "xmlns:"+prefix.to_s() && ns.uri() == uri)
307
+ return true
308
+ end
309
+ }
310
+ return is_empty_ns
311
+ #(@visibleNamespaces.size()-1).downto(start) {|i|
312
+ # ns = @visibleNamespaces[i]
313
+ # return true if (ns.prefix() == "xmlns:"+prefix.to_s() && ns.uri() == uri)
314
+ # #p = ns.prefix() if (ns.prefix().index("xmlns") == 0)
315
+ # #return ns.uri() == uri if (p == prefix)
316
+ #}
317
+ #return is_empty_ns
318
+ end
319
+
320
+ def is_node_visible(node)
321
+ return true if (@xnl.size() == 0)
322
+ @xnl.each{|element|
323
+ return true if (element == node)
324
+ }
325
+ return false
326
+ end
327
+
328
+ def normalize_string(input, type)
329
+ sb = ""
330
+ return input
331
+ end
332
+ #input.each_byte{|b|
333
+ # if (b ==60 && (type == NODE_TYPE_ATTRIBUTE || is_text_node(type)))
334
+ # sb = sb + "&lt;"
335
+ # elsif (b == 62 && is_text_node(type))
336
+ # sb = sb + "&gt;"
337
+ # elsif (b == 38 && (is_text_node(type) || is_text_node(type))) #Ampersand
338
+ # sb = sb + "&amp;"
339
+ # elsif (b == 34 && is_text_node(type)) #Quote
340
+ # sb = sb + "&quot;"
341
+ # elsif (b == 9 && is_text_node(type)) #Tabulator
342
+ # sb = sb + "&#x9;"
343
+ # elsif (b == 11 && is_text_node(type)) #CR
344
+ # sb = sb + "&#xA;"
345
+ # elsif (b == 13 && (type == NODE_TYPE_ATTRIBUTE || (is_text_node(type) && type != NODE_TYPE_WHITESPACE) || type == NODE_TYPE_COMMENT || type == NODE_TYPE_PI))
346
+ # sb = sb + "&#xD;"
347
+ # elsif (b == 13)
348
+ # next
349
+ # else
350
+ # sb = sb.concat(b)
351
+ # end
352
+ #}
353
+ #sb
354
+ #end
355
+
356
+ def write_text_node(node, visible)
357
+ if (visible)
358
+ @res = @res + normalize_string(node.value(), node.node_type())
359
+ end
360
+ end
361
+
362
+ def white_text?(text)
363
+ return true if ((text.strip() == "") || (text.strip() == nil))
364
+ return false
365
+ end
366
+
367
+ def is_namespace_decl(attribute)
368
+ #return true if (attribute.name() == "xmlns")
369
+ return true if (attribute.prefix().index("xmlns") == 0)
370
+ return false
371
+ end
372
+
373
+ def is_text_node(type)
374
+ return true if (type == NODE_TYPE_TEXT || type == NODE_TYPE_CDATA || type == NODE_TYPE_WHITESPACE)
375
+ return false
376
+ end
377
+
378
+ def remove_whitespace(string)
379
+ new_string = ""
380
+ in_white = false
381
+ string.each_byte{|b|
382
+ #if (in_white && b == 32)
383
+ #else
384
+ if !(in_white && b == 32)
385
+ new_string = new_string + b.chr()
386
+ end
387
+ if (b == 62) #>
388
+ in_white = true
389
+ end
390
+ if (b == 60) #<
391
+ in_white = false
392
+ end
393
+ }
394
+ new_string
395
+ end
396
+ end
397
+
398
+ def logging=(state)
399
+ if (state)
400
+ @logger = Logger.new("xmlcanonicalizer")
401
+ @logger.level = DEBUG
402
+ @logger.trace = false
403
+ p = PatternFormatter.new(:pattern => "[%l] %d :: %.100m %15t")
404
+ @logger.add(FileOutputter.new("wss4r", {:filename => "xmlcanonicalizer.log", :formatter => BasicFormatter}))
405
+ else
406
+ @logger = nil
407
+ end
408
+ end
409
+ end #Util
410
+ end #XML
411
+
412
+
413
+ if __FILE__ == $0
414
+ document = Document.new(File.new(ARGV[0]))
415
+ body = nil
416
+ c = WSS4R::Security::Util::XmlCanonicalizer.new(false, true)
417
+
418
+ if (ARGV.size() == 3)
419
+ body = ARGV[2]
420
+ if (body == "true")
421
+ element = XPath.match(document, "/soap:Envelope/soap:Body")[0]
422
+ element = XPath.first(document, "/soap:Envelope/soap:Header/wsse:Security/Signature/SignedInfo")
423
+ result = c.canonicalize_element(element)
424
+ puts("-----")
425
+ puts(result)
426
+ puts("-----")
427
+ puts(result.size())
428
+ puts("-----")
429
+ puts(CryptHash.new().digest_b64(result))
430
+ end
431
+ else
432
+ result = c.canonicalize(document)
433
+ end
434
+
435
+ file = File.new(ARGV[1], "wb")
436
+ file.write(result)
437
+ file.close()
438
+ end
@@ -0,0 +1,9 @@
1
+ require "rexml/document"
2
+ require "base64"
3
+ require "log4r"
4
+
5
+ include REXML
6
+ include Log4r
7
+
8
+ require "xml/util/xmlcanonicalizer"
9
+
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: XMLCanonicalizer
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.1"
7
+ date: 2007-05-11 00:00:00 +02:00
8
+ summary: Implementation of w3c xml canonicalizer standart.
9
+ require_paths:
10
+ - lib
11
+ email: Roland.Schmitt@web.de
12
+ homepage: http://www.rubyforge.org/projects/c14n-r
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: xmlanonicalizer
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Roland Schmitt
31
+ files:
32
+ - lib/xml
33
+ - lib/xmlcanonicalizer.rb
34
+ - lib/xml/util
35
+ - lib/xml/util/xmlcanonicalizer.rb
36
+ - README
37
+ test_files: []
38
+
39
+ rdoc_options: []
40
+
41
+ extra_rdoc_files:
42
+ - README
43
+ executables: []
44
+
45
+ extensions: []
46
+
47
+ requirements: []
48
+
49
+ dependencies:
50
+ - !ruby/object:Gem::Dependency
51
+ name: log4r
52
+ version_requirement:
53
+ version_requirements: !ruby/object:Gem::Version::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: 1.0.4
58
+ version: