nokogiri 1.5.0-java → 1.5.1-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (81) hide show
  1. data/CHANGELOG.ja.rdoc +56 -12
  2. data/CHANGELOG.rdoc +45 -0
  3. data/C_CODING_STYLE.rdoc +27 -0
  4. data/Manifest.txt +4 -0
  5. data/README.rdoc +11 -7
  6. data/Rakefile +44 -26
  7. data/bin/nokogiri +10 -2
  8. data/ext/java/nokogiri/HtmlDocument.java +37 -2
  9. data/ext/java/nokogiri/NokogiriService.java +10 -2
  10. data/ext/java/nokogiri/XmlAttr.java +1 -1
  11. data/ext/java/nokogiri/XmlDocument.java +68 -11
  12. data/ext/java/nokogiri/XmlDocumentFragment.java +16 -5
  13. data/ext/java/nokogiri/XmlElement.java +0 -40
  14. data/ext/java/nokogiri/XmlNamespace.java +8 -1
  15. data/ext/java/nokogiri/XmlNode.java +131 -27
  16. data/ext/java/nokogiri/XmlNodeSet.java +4 -1
  17. data/ext/java/nokogiri/XmlSaxParserContext.java +2 -13
  18. data/ext/java/nokogiri/XmlXpathContext.java +4 -1
  19. data/ext/java/nokogiri/XsltStylesheet.java +198 -37
  20. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +40 -2
  21. data/ext/java/nokogiri/internals/NokogiriHelpers.java +82 -9
  22. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +4 -3
  23. data/ext/java/nokogiri/internals/ParserContext.java +33 -3
  24. data/ext/java/nokogiri/internals/SaveContextVisitor.java +203 -12
  25. data/ext/java/nokogiri/internals/XmlDomParser.java +33 -2
  26. data/ext/java/nokogiri/internals/XmlDomParserContext.java +32 -12
  27. data/ext/nokogiri/extconf.rb +11 -3
  28. data/ext/nokogiri/html_document.c +16 -0
  29. data/ext/nokogiri/html_sax_parser_context.c +59 -37
  30. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  31. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  32. data/ext/nokogiri/nokogiri.c +6 -8
  33. data/ext/nokogiri/nokogiri.h +3 -0
  34. data/ext/nokogiri/xml_document.c +101 -3
  35. data/ext/nokogiri/xml_document.h +3 -3
  36. data/ext/nokogiri/xml_node.c +150 -58
  37. data/ext/nokogiri/xml_node_set.c +169 -120
  38. data/ext/nokogiri/xml_node_set.h +5 -0
  39. data/ext/nokogiri/xml_sax_parser_context.c +64 -41
  40. data/ext/nokogiri/xml_text.c +2 -0
  41. data/ext/nokogiri/xml_xpath_context.c +30 -24
  42. data/ext/nokogiri/xslt_stylesheet.c +62 -16
  43. data/ext/nokogiri/xslt_stylesheet.h +5 -0
  44. data/lib/nokogiri/css/parser.rb +163 -157
  45. data/lib/nokogiri/css/parser.y +6 -3
  46. data/lib/nokogiri/css/tokenizer.rb +1 -1
  47. data/lib/nokogiri/css/tokenizer.rex +1 -1
  48. data/lib/nokogiri/html.rb +1 -0
  49. data/lib/nokogiri/html/document.rb +82 -42
  50. data/lib/nokogiri/html/sax/push_parser.rb +16 -0
  51. data/lib/nokogiri/nokogiri.jar +0 -0
  52. data/lib/nokogiri/version.rb +1 -1
  53. data/lib/nokogiri/xml.rb +6 -0
  54. data/lib/nokogiri/xml/builder.rb +7 -1
  55. data/lib/nokogiri/xml/document.rb +32 -17
  56. data/lib/nokogiri/xml/document_fragment.rb +6 -1
  57. data/lib/nokogiri/xml/node.rb +40 -9
  58. data/lib/nokogiri/xslt.rb +5 -1
  59. data/tasks/cross_compile.rb +1 -0
  60. data/tasks/nokogiri.org.rb +6 -0
  61. data/tasks/test.rb +1 -0
  62. data/test/css/test_xpath_visitor.rb +6 -0
  63. data/test/helper.rb +1 -0
  64. data/test/html/test_document.rb +26 -0
  65. data/test/html/test_document_fragment.rb +1 -2
  66. data/test/test_memory_leak.rb +81 -1
  67. data/test/test_xslt_transforms.rb +152 -123
  68. data/test/xml/test_builder.rb +24 -2
  69. data/test/xml/test_c14n.rb +151 -0
  70. data/test/xml/test_document.rb +48 -0
  71. data/test/xml/test_namespace.rb +5 -0
  72. data/test/xml/test_node.rb +82 -1
  73. data/test/xml/test_node_attributes.rb +19 -0
  74. data/test/xml/test_node_inheritance.rb +32 -0
  75. data/test/xml/test_node_reparenting.rb +32 -0
  76. data/test/xml/test_node_set.rb +16 -8
  77. data/test/xml/test_reader_encoding.rb +16 -0
  78. data/test/xml/test_unparented_node.rb +32 -0
  79. data/test/xml/test_xinclude.rb +83 -0
  80. data/test/xml/test_xpath.rb +22 -0
  81. metadata +147 -123
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * (The MIT License)
3
3
  *
4
- * Copyright (c) 2008 - 2011:
4
+ * Copyright (c) 2008 - 2012:
5
5
  *
6
6
  * * {Aaron Patterson}[http://tenderlovemaking.com]
7
7
  * * {Mike Dalessio}[http://mike.daless.io]
@@ -35,10 +35,17 @@ package nokogiri.internals;
35
35
  import java.io.File;
36
36
  import java.io.UnsupportedEncodingException;
37
37
  import java.nio.ByteBuffer;
38
+ import java.nio.CharBuffer;
39
+ import java.nio.charset.CharacterCodingException;
38
40
  import java.nio.charset.Charset;
41
+ import java.nio.charset.CharsetDecoder;
42
+ import java.nio.charset.CharsetEncoder;
43
+ import java.util.ArrayList;
44
+ import java.util.List;
39
45
  import java.util.regex.Matcher;
40
46
  import java.util.regex.Pattern;
41
47
 
48
+ import nokogiri.HtmlDocument;
42
49
  import nokogiri.NokogiriService;
43
50
  import nokogiri.XmlAttr;
44
51
  import nokogiri.XmlCdata;
@@ -57,6 +64,7 @@ import org.jruby.RubyArray;
57
64
  import org.jruby.RubyClass;
58
65
  import org.jruby.RubyEncoding;
59
66
  import org.jruby.RubyString;
67
+ import org.jruby.runtime.ThreadContext;
60
68
  import org.jruby.runtime.builtin.IRubyObject;
61
69
  import org.jruby.util.ByteList;
62
70
  import org.w3c.dom.Attr;
@@ -74,6 +82,7 @@ import org.w3c.dom.NodeList;
74
82
  public class NokogiriHelpers {
75
83
  public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
76
84
  public static final String VALID_ROOT_NODE = "NOKOGIRI_VALIDE_ROOT_NODE";
85
+ public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
77
86
 
78
87
  public static XmlNode getCachedNode(Node node) {
79
88
  return (XmlNode) node.getUserData(CACHED_NODE);
@@ -89,12 +98,14 @@ public class NokogiriHelpers {
89
98
  if(node == null) return ruby.getNil();
90
99
  if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
91
100
  XmlDocument xmlDocument = (XmlDocument)node.getOwnerDocument().getUserData(CACHED_NODE);
101
+ if (!(xmlDocument instanceof HtmlDocument)) {
92
102
  String prefix = getLocalNameForNamespace(((Attr)node).getName());
93
103
  prefix = prefix != null ? prefix : "";
94
104
  String href = ((Attr)node).getValue();
95
105
  XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
96
106
  if (xmlNamespace != null) return xmlNamespace;
97
107
  else return XmlNamespace.createFromAttr(ruby, (Attr)node);
108
+ }
98
109
  }
99
110
  XmlNode xmlNode = getCachedNode(node);
100
111
  if(xmlNode == null) {
@@ -145,7 +156,7 @@ public class NokogiriHelpers {
145
156
  return xmlCdata;
146
157
  case Node.DOCUMENT_NODE:
147
158
  XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"));
148
- xmlDocument.setNode(runtime.getCurrentContext(), node);
159
+ xmlDocument.setDocumentNode(runtime.getCurrentContext(), node);
149
160
  return xmlDocument;
150
161
  case Node.DOCUMENT_TYPE_NODE:
151
162
  XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
@@ -167,6 +178,11 @@ public class NokogiriHelpers {
167
178
  return RubyString.newString(runtime, s);
168
179
  }
169
180
 
181
+ public static IRubyObject stringOrNil(Ruby runtime, byte[] bytes) {
182
+ if (bytes == null) return runtime.getNil();
183
+ return RubyString.newString(runtime, bytes);
184
+ }
185
+
170
186
  public static IRubyObject stringOrBlank(Ruby runtime, String s) {
171
187
  if (s == null) return runtime.newString();
172
188
  return RubyString.newString(runtime, s);
@@ -263,6 +279,15 @@ public class NokogiriHelpers {
263
279
  ByteBuffer buf = ByteBuffer.wrap(data, offset, len);
264
280
  return getCharsetUTF8().decode(buf).toString();
265
281
  }
282
+
283
+ public static List<String> rubyStringArrayToJavaList(RubyArray ary) {
284
+ List<String> list = new ArrayList<String>();
285
+ for (int i=0; i < ary.getLength(); i++) {
286
+ Object obj = ary.get(i);
287
+ if (obj != null) list.add(obj.toString());
288
+ }
289
+ return list;
290
+ }
266
291
 
267
292
  public static String getNodeCompletePath(Node node) {
268
293
 
@@ -531,13 +556,6 @@ public class NokogiriHelpers {
531
556
  return convert(encoded_pattern, s, encoded, decoded);
532
557
  }
533
558
 
534
- private static Pattern not_escaped_pattern = Pattern.compile("\\&(?!(amp;|gt;|lt;))|<|>");
535
- public static boolean isNotXmlEscaped(String s) {
536
- if (s == null) return false;
537
- Matcher matcher = not_escaped_pattern.matcher(s);
538
- return (matcher.find());
539
- }
540
-
541
559
  public static String getNodeName(Node node) {
542
560
  if(node == null) { System.out.println("node is null"); return ""; }
543
561
  String name = node.getNodeName();
@@ -569,6 +587,38 @@ public class NokogiriHelpers {
569
587
  public static boolean isXmlBase(String attrName) {
570
588
  return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
571
589
  }
590
+
591
+ public static boolean isWhitespaceText(ThreadContext context, IRubyObject obj) {
592
+ if (obj == null || obj.isNil()) return false;
593
+
594
+ XmlNode node = (XmlNode) obj;
595
+ if (!(node instanceof XmlText))
596
+ return false;
597
+
598
+ String content = rubyStringToString(node.content(context));
599
+ return content.trim().length() == 0;
600
+ }
601
+
602
+ public static boolean isWhitespaceText(String s) {
603
+ return s.trim().length() == 0;
604
+ }
605
+
606
+ public static String canonicalizeWhitespce(String s) {
607
+ StringBuilder sb = new StringBuilder();
608
+ char[] chars = s.toCharArray();
609
+ boolean newline_added = false;
610
+ for (int i=0; i<chars.length; i++) {
611
+ if (chars[i] == '\n') {
612
+ if (!newline_added) {
613
+ sb.append(chars[i]);
614
+ newline_added = true;
615
+ }
616
+ } else {
617
+ sb.append(chars[i]);
618
+ }
619
+ }
620
+ return sb.toString();
621
+ }
572
622
 
573
623
  public static String newQName(String newPrefix, Node node) {
574
624
  if(newPrefix == null) {
@@ -636,4 +686,27 @@ public class NokogiriHelpers {
636
686
  if (dtdFile.exists()) return dtdFile.getPath();
637
687
  return null;
638
688
  }
689
+
690
+ public static boolean isUTF8(String encoding) {
691
+ if (encoding == null) return true; // no need to convert encoding
692
+ int ret = Charset.forName(encoding).compareTo(Charset.forName("UTF-8"));
693
+ return ret == 0;
694
+ }
695
+
696
+ public static byte[] convertEncoding(Charset output_charset, String input_string) throws CharacterCodingException {
697
+ Charset input = Charset.forName("UTF-8");
698
+ CharsetDecoder decoder = input.newDecoder();
699
+ CharsetEncoder encoder = output_charset.newEncoder();
700
+ decoder.reset();
701
+ encoder.reset();
702
+ ByteBuffer bbuf = ByteBuffer.wrap(input_string.getBytes());
703
+ CharBuffer cbuf = decoder.decode(bbuf);
704
+ bbuf.clear();
705
+ encoder.encode(cbuf, bbuf, true);
706
+ int length = bbuf.position();
707
+ byte[] bytes = new byte[length];
708
+ System.arraycopy(bbuf.array(), 0, bytes, 0, length);
709
+ return bytes;
710
+ }
711
+
639
712
  }
@@ -42,14 +42,15 @@ import javax.xml.transform.TransformerException;
42
42
  */
43
43
  public class NokogiriXsltErrorListener implements ErrorListener {
44
44
  public enum ErrorType {
45
+ SUCCESS,
45
46
  WARNING,
46
47
  ERROR,
47
48
  FATAL
48
49
  }
49
50
 
50
- private ErrorType type;
51
- private String errorMessage;
52
- private Exception exception;
51
+ private ErrorType type = ErrorType.SUCCESS;
52
+ private String errorMessage = null;
53
+ private Exception exception = null;
53
54
 
54
55
  public void warning(TransformerException ex) throws TransformerException {
55
56
  type = ErrorType.WARNING;
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * (The MIT License)
3
3
  *
4
- * Copyright (c) 2008 - 2011:
4
+ * Copyright (c) 2008 - 2012:
5
5
  *
6
6
  * * {Aaron Patterson}[http://tenderlovemaking.com]
7
7
  * * {Mike Dalessio}[http://mike.daless.io]
@@ -43,6 +43,7 @@ import java.io.InputStream;
43
43
 
44
44
  import org.jruby.Ruby;
45
45
  import org.jruby.RubyClass;
46
+ import org.jruby.RubyFixnum;
46
47
  import org.jruby.RubyIO;
47
48
  import org.jruby.RubyObject;
48
49
  import org.jruby.RubyString;
@@ -65,6 +66,8 @@ import org.xml.sax.ext.EntityResolver2;
65
66
  */
66
67
  public class ParserContext extends RubyObject {
67
68
  protected InputSource source = null;
69
+ protected IRubyObject detected_encoding = null;
70
+ protected int stringDataSize = -1;
68
71
 
69
72
  /**
70
73
  * Create a file base input source taking into account the current
@@ -102,11 +105,28 @@ public class ParserContext extends RubyObject {
102
105
  public void setInputSource(ThreadContext context, IRubyObject data, IRubyObject url) {
103
106
  Ruby ruby = context.getRuntime();
104
107
  String path = (String) url.toJava(String.class);
108
+ if (data.getType().respondsTo("detect_encoding")) {
109
+ // data is EnocodingReader
110
+ try {
111
+ data.callMethod(context, "read", RubyFixnum.newFixnum(context.getRuntime(), 1024));
112
+ } catch (RaiseException e) {
113
+ detected_encoding = e.getException().getInstanceVariable("@found_encoding");
114
+ }
115
+ }
116
+
105
117
  if (isAbsolutePath(path)) {
106
- source = new InputSource();
107
- source.setSystemId(path);
118
+ returnWithSystemId(path);
108
119
  return;
109
120
  }
121
+ // Dir.chdir might be called at some point before this.
122
+ String currentDir = context.getRuntime().getCurrentDirectory();
123
+ if (path != null && currentDir != null && currentDir.length() != 0) {
124
+ String absPath = currentDir + "/" + path;
125
+ if (isAbsolutePath(absPath)) {
126
+ returnWithSystemId(absPath);
127
+ return;
128
+ }
129
+ }
110
130
  RubyString stringData = null;
111
131
  if (invoke(context, data, "respond_to?",
112
132
  ruby.newSymbol("to_io").to_sym()).isTrue()) {
@@ -140,6 +160,7 @@ public class ParserContext extends RubyObject {
140
160
  }
141
161
  if (stringData != null) {
142
162
  ByteList bytes = stringData.getByteList();
163
+ stringDataSize = bytes.length() - bytes.begin();
143
164
  source = new InputSource(new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length()));
144
165
  }
145
166
  }
@@ -148,6 +169,15 @@ public class ParserContext extends RubyObject {
148
169
  if (url == null) return false;
149
170
  return (new File(url)).isAbsolute();
150
171
  }
172
+
173
+ private void returnWithSystemId(String url) {
174
+ source = new InputSource();
175
+ if (detected_encoding != null) {
176
+ source.setEncoding((String) detected_encoding.toJava(String.class));
177
+ }
178
+ source.setSystemId(url);
179
+ return;
180
+ }
151
181
 
152
182
  /**
153
183
  * Set the InputSource to read from <code>file</code>, a String filename.
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * (The MIT License)
3
3
  *
4
- * Copyright (c) 2008 - 2011:
4
+ * Copyright (c) 2008 - 2012:
5
5
  *
6
6
  * * {Aaron Patterson}[http://tenderlovemaking.com]
7
7
  * * {Mike Dalessio}[http://mike.daless.io]
@@ -32,9 +32,18 @@
32
32
 
33
33
  package nokogiri.internals;
34
34
 
35
+ import static nokogiri.internals.NokogiriHelpers.canonicalizeWhitespce;
35
36
  import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
36
- import static nokogiri.internals.NokogiriHelpers.isNotXmlEscaped;
37
+ import static nokogiri.internals.NokogiriHelpers.isNamespace;
38
+ import static nokogiri.internals.NokogiriHelpers.isWhitespaceText;
37
39
 
40
+ import java.util.ArrayDeque;
41
+ import java.util.ArrayList;
42
+ import java.util.Arrays;
43
+ import java.util.Comparator;
44
+ import java.util.Deque;
45
+ import java.util.Iterator;
46
+ import java.util.List;
38
47
  import java.util.Stack;
39
48
 
40
49
  import org.cyberneko.html.HTMLElements;
@@ -65,7 +74,11 @@ public class SaveContextVisitor {
65
74
  private Stack<String> indentation;
66
75
  private String encoding, indentString;
67
76
  private boolean format, noDecl, noEmpty, noXhtml, asXhtml, asXml, asHtml, asBuilder, htmlDoc, fragment;
68
-
77
+ private boolean canonical, incl_ns, with_comments, subsets, exclusive;
78
+ private List<Node> c14nNodeList;
79
+ private Deque<Attr[]> c14nNamespaceStack;
80
+ private Deque<Attr[]> c14nAttrStack;
81
+ private List<String> c14nExclusiveInclusivePrefixes = null;
69
82
  /*
70
83
  * U can't touch this.
71
84
  * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
@@ -81,13 +94,22 @@ public class SaveContextVisitor {
81
94
  public static final int AS_XML = 32;
82
95
  public static final int AS_HTML = 64;
83
96
  public static final int AS_BUILDER = 128;
97
+
98
+ public static final int CANONICAL = 1;
99
+ public static final int INCL_NS = 2;
100
+ public static final int WITH_COMMENTS = 4;
101
+ public static final int SUBSETS = 8;
102
+ public static final int EXCLUSIVE = 16;
84
103
 
85
- public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment) {
104
+ public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment, int canonicalOpts) {
86
105
  buffer = new StringBuffer();
87
106
  this.encoding = encoding;
88
107
  indentation = new Stack<String>(); indentation.push("");
89
108
  this.htmlDoc = htmlDoc;
90
109
  this.fragment = fragment;
110
+ c14nNodeList = new ArrayList<Node>();
111
+ c14nNamespaceStack = new ArrayDeque<Attr[]>();
112
+ c14nAttrStack = new ArrayDeque<Attr[]>();
91
113
  format = (options & FORMAT) == FORMAT;
92
114
 
93
115
  noDecl = (options & NO_DECL) == NO_DECL;
@@ -97,6 +119,12 @@ public class SaveContextVisitor {
97
119
  asXml = (options & AS_XML) == AS_XML;
98
120
  asHtml = (options & AS_HTML) == AS_HTML;
99
121
  asBuilder = (options & AS_BUILDER) == AS_BUILDER;
122
+
123
+ canonical = (canonicalOpts & CANONICAL) == CANONICAL;
124
+ incl_ns = (canonicalOpts & INCL_NS) == INCL_NS;
125
+ with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS;
126
+ subsets = (canonicalOpts & SUBSETS) == SUBSETS;
127
+
100
128
  if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces
101
129
  if ((!format && indent != null) && indent.length() > 0) format = true;
102
130
  if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces
@@ -117,6 +145,14 @@ public class SaveContextVisitor {
117
145
  this.encoding = encoding;
118
146
  }
119
147
 
148
+ public List<Node> getC14nNodeList() {
149
+ return c14nNodeList;
150
+ }
151
+
152
+ public void setC14nExclusiveInclusivePrefixes(List<String> prefixes) {
153
+ c14nExclusiveInclusivePrefixes = prefixes;
154
+ }
155
+
120
156
  public boolean enter(Node node) {
121
157
  if (node instanceof Document) {
122
158
  return enter((Document)node);
@@ -275,6 +311,10 @@ public class SaveContextVisitor {
275
311
  }
276
312
 
277
313
  public boolean enter(Comment comment) {
314
+ if (canonical) {
315
+ c14nNodeList.add(comment);
316
+ if (!with_comments) return true;
317
+ }
278
318
  buffer.append("<!--");
279
319
  buffer.append(comment.getData());
280
320
  buffer.append("-->");
@@ -293,7 +333,7 @@ public class SaveContextVisitor {
293
333
 
294
334
  if (encoding != null) {
295
335
  buffer.append(" encoding=\"");
296
- buffer.append(encoding.toUpperCase());
336
+ buffer.append(encoding);
297
337
  buffer.append("\"");
298
338
  }
299
339
  buffer.append("?>\n");
@@ -306,6 +346,10 @@ public class SaveContextVisitor {
306
346
  }
307
347
 
308
348
  public boolean enter(DocumentType docType) {
349
+ if (canonical) {
350
+ c14nNodeList.add(docType);
351
+ return true;
352
+ }
309
353
  String name = docType.getName();
310
354
  String pubId = docType.getPublicId();
311
355
  String sysId = docType.getSystemId();
@@ -334,6 +378,12 @@ public class SaveContextVisitor {
334
378
  }
335
379
 
336
380
  public boolean enter(Element element) {
381
+ if (canonical) {
382
+ c14nNodeList.add(element);
383
+ if (element == element.getOwnerDocument().getDocumentElement()) {
384
+ c14nNodeList.add(element.getOwnerDocument());
385
+ }
386
+ }
337
387
  String current = indentation.peek();
338
388
  buffer.append(current);
339
389
  if (needIndent()) {
@@ -341,9 +391,8 @@ public class SaveContextVisitor {
341
391
  }
342
392
  String name = element.getTagName();
343
393
  buffer.append("<" + name);
344
- NamedNodeMap attrs = element.getAttributes();
345
- for (int i=0; i<attrs.getLength(); i++) {
346
- Attr attr = (Attr) attrs.item(i);
394
+ Attr[] attrs = getAttrsAndNamespaces(element);
395
+ for (Attr attr : attrs) {
347
396
  if (attr.getSpecified()) {
348
397
  buffer.append(" ");
349
398
  enter(attr);
@@ -356,10 +405,8 @@ public class SaveContextVisitor {
356
405
  return true;
357
406
  }
358
407
  // no child
359
- if (asHtml) {
408
+ if (asHtml || asXhtml) {
360
409
  buffer.append(">");
361
- } else if (asXhtml) {
362
- buffer.append(" />");
363
410
  } else if (asXml && noEmpty) {
364
411
  buffer.append(">");
365
412
  } else {
@@ -390,7 +437,142 @@ public class SaveContextVisitor {
390
437
  return element.isEmpty();
391
438
  }
392
439
 
440
+ private Attr[] getAttrsAndNamespaces(Element element) {
441
+ NamedNodeMap attrs = element.getAttributes();
442
+ if (!canonical) {
443
+ if (attrs == null || attrs.getLength() == 0) return new Attr[0];
444
+ Attr[] attrsAndNamespaces = new Attr[attrs.getLength()];
445
+ for (int i=0; i<attrs.getLength(); i++) {
446
+ attrsAndNamespaces[i] = (Attr) attrs.item(i);
447
+ }
448
+ return attrsAndNamespaces;
449
+ } else {
450
+ List<Attr> namespaces = new ArrayList<Attr>();
451
+ List<Attr> attributes = new ArrayList<Attr>();
452
+ if (subsets) {
453
+ getAttrsOfAncestors(element.getParentNode(), namespaces, attributes);
454
+ Attr[] namespaceOfAncestors = getSortedArray(namespaces);
455
+ Attr[] attributeOfAncestors = getSortedArray(attributes);
456
+ c14nNamespaceStack.push(namespaceOfAncestors);
457
+ c14nAttrStack.push(attributeOfAncestors);
458
+ subsets = false; // namespace propagation should be done only once on top level node.
459
+ }
460
+
461
+ getNamespacesAndAttrs(element, namespaces, attributes);
462
+
463
+ Attr[] namespaceArray = getSortedArray(namespaces);
464
+ Attr[] attributeArray = getSortedArray(attributes);
465
+ Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length];
466
+ for (int i=0; i<allAttrs.length; i++) {
467
+ if (i < namespaceArray.length) {
468
+ allAttrs[i] = namespaceArray[i];
469
+ } else {
470
+ allAttrs[i] = attributeArray[i-namespaceArray.length];
471
+ }
472
+ }
473
+ c14nNamespaceStack.push(namespaceArray);
474
+ c14nAttrStack.push(attributeArray);
475
+ return allAttrs;
476
+ }
477
+
478
+ }
479
+
480
+ private void getAttrsOfAncestors(Node parent, List<Attr> namespaces, List<Attr> attributes) {
481
+ if (parent == null) return;
482
+ NamedNodeMap attrs = parent.getAttributes();
483
+ if (attrs == null || attrs.getLength() == 0) return;
484
+ for (int i=0; i < attrs.getLength(); i++) {
485
+ Attr attr = (Attr)attrs.item(i);
486
+ if (isNamespace(attr.getNodeName())) namespaces.add(attr);
487
+ else attributes.add(attr);
488
+ }
489
+ getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes);
490
+ }
491
+
492
+ private void getNamespacesAndAttrs(Node current, List<Attr> namespaces, List<Attr> attributes) {
493
+ NamedNodeMap attrs = current.getAttributes();
494
+ for (int i=0; i<attrs.getLength(); i++) {
495
+ Attr attr = (Attr)attrs.item(i);
496
+ if (isNamespace(attr.getNodeName())) {
497
+ getNamespacesWithPropagated(namespaces, attr);
498
+ } else {
499
+ getAttributesWithPropagated(attributes, attr);
500
+ }
501
+ if (exclusive) {
502
+ verifyXmlSpace(attributes, attrs);
503
+ }
504
+ }
505
+ }
506
+
507
+ private void getNamespacesWithPropagated(List<Attr> namespaces, Attr attr) {
508
+ boolean newNamespace = true;
509
+ Iterator<Attr[]> iter = c14nNamespaceStack.iterator();
510
+ while (iter.hasNext()) {
511
+ Attr[] parentNamespaces = iter.next();
512
+ for (int n=0; n < parentNamespaces.length; n++) {
513
+ if (parentNamespaces[n].getNodeName().equals(attr.getNodeName())) {
514
+ if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) {
515
+ // exactly the same namespace should not be added
516
+ newNamespace = false;
517
+ } else {
518
+ // in case of namespace url change, propagated namespace will be override
519
+ namespaces.remove(parentNamespaces[n]);
520
+ }
521
+ }
522
+ }
523
+ if (newNamespace && !namespaces.contains(attr)) namespaces.add(attr);
524
+ }
525
+ }
526
+
527
+ private void getAttributesWithPropagated(List<Attr> attributes, Attr attr) {
528
+ boolean newAttribute = true;
529
+ Iterator<Attr[]> iter = c14nAttrStack.iterator();
530
+ while (iter.hasNext()) {
531
+ Attr[] parentAttr = iter.next();
532
+ for (int n=0; n < parentAttr.length; n++) {
533
+ if (!parentAttr[n].getNodeName().startsWith("xml:")) continue;
534
+ if (parentAttr[n].getNodeName().equals(attr.getNodeName())) {
535
+ if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) {
536
+ // exactly the same attribute should not be added
537
+ newAttribute = false;
538
+ } else {
539
+ // in case of attribute value change, propagated attribute will be override
540
+ attributes.remove(parentAttr[n]);
541
+ }
542
+ }
543
+ }
544
+ if (newAttribute) attributes.add(attr);
545
+ }
546
+ }
547
+
548
+ private void verifyXmlSpace(List<Attr> attributes, NamedNodeMap attrs) {
549
+ Attr attr = (Attr) attrs.getNamedItem("xml:space");
550
+ if (attr == null) {
551
+ for (int i=0; i < attributes.size(); i++) {
552
+ if (attributes.get(i).getNodeName().equals("xml:space")) {
553
+ attributes.remove(i);
554
+ break;
555
+ }
556
+ }
557
+ }
558
+ }
559
+
560
+ private Attr[] getSortedArray(List<Attr> attrList) {
561
+ Attr[] attrArray = attrList.toArray(new Attr[0]);
562
+ Arrays.sort(attrArray, new Comparator<Attr>() {
563
+ @Override
564
+ public int compare(Attr attr0, Attr attr1) {
565
+ return attr0.getNodeName().compareTo(attr1.getNodeName());
566
+ }
567
+ });
568
+ return attrArray;
569
+ }
570
+
393
571
  public void leave(Element element) {
572
+ if (canonical) {
573
+ c14nNamespaceStack.poll();
574
+ c14nAttrStack.poll();
575
+ }
394
576
  String name = element.getTagName();
395
577
  if (element.hasChildNodes()) {
396
578
  if (needIndentInClosing(element)) {
@@ -504,6 +686,7 @@ public class SaveContextVisitor {
504
686
  if (asHtml) buffer.append(">");
505
687
  else buffer.append("?>");
506
688
  buffer.append("\n");
689
+ if (canonical) c14nNodeList.add(pi);
507
690
  return true;
508
691
  }
509
692
 
@@ -514,6 +697,13 @@ public class SaveContextVisitor {
514
697
  private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
515
698
  public boolean enter(Text text) {
516
699
  String textContent = text.getNodeValue();
700
+ if (canonical) {
701
+ c14nNodeList.add(text);
702
+ if (isWhitespaceText(textContent)) {
703
+ buffer.append(canonicalizeWhitespce(textContent));
704
+ return true;
705
+ }
706
+ }
517
707
  if (needIndentText() && "".equals(textContent.trim())) return true;
518
708
  if (needIndentText()) {
519
709
  String current = indentation.peek();
@@ -521,9 +711,10 @@ public class SaveContextVisitor {
521
711
  indentation.push(current + indentString);
522
712
  if (textContent.charAt(0) == lineSeparator) textContent = textContent.substring(1);
523
713
  }
524
- if (isNotXmlEscaped(textContent)) {
714
+ if (text.getUserData(NokogiriHelpers.ENCODED_STRING) == null || !((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING))) {
525
715
  textContent = encodeJavaString(textContent);
526
716
  }
717
+
527
718
  if (getEncoding(text) == null) {
528
719
  textContent = encodeStringToHtmlEntity(textContent);
529
720
  }