nokogiri 1.11.0.rc4-java → 1.11.5-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (144) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +12 -12
  4. data/LICENSE.md +1 -1
  5. data/README.md +168 -91
  6. data/dependencies.yml +12 -12
  7. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  8. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  9. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  10. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  11. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  12. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  13. data/ext/java/nokogiri/NokogiriService.java +595 -556
  14. data/ext/java/nokogiri/XmlAttr.java +118 -126
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  16. data/ext/java/nokogiri/XmlCdata.java +35 -58
  17. data/ext/java/nokogiri/XmlComment.java +46 -67
  18. data/ext/java/nokogiri/XmlDocument.java +645 -572
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  20. data/ext/java/nokogiri/XmlDtd.java +448 -414
  21. data/ext/java/nokogiri/XmlElement.java +23 -48
  22. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  23. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  24. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  25. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  26. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  27. data/ext/java/nokogiri/XmlNode.java +1843 -1620
  28. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  30. data/ext/java/nokogiri/XmlReader.java +513 -450
  31. data/ext/java/nokogiri/XmlRelaxng.java +85 -104
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -315
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  34. data/ext/java/nokogiri/XmlSchema.java +328 -295
  35. data/ext/java/nokogiri/XmlSyntaxError.java +113 -115
  36. data/ext/java/nokogiri/XmlText.java +55 -76
  37. data/ext/java/nokogiri/XmlXpathContext.java +240 -238
  38. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  39. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  40. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -202
  41. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  42. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  43. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  44. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  45. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  46. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  47. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -98
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  52. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +116 -131
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -56
  55. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  56. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  57. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  58. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  59. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  60. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  61. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  62. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  63. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  64. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  65. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  66. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  67. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  68. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  82. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  83. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  84. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  85. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  86. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  87. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  88. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  89. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  90. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  91. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  94. data/ext/nokogiri/depend +34 -474
  95. data/ext/nokogiri/extconf.rb +270 -183
  96. data/ext/nokogiri/html_document.c +10 -15
  97. data/ext/nokogiri/html_element_description.c +84 -71
  98. data/ext/nokogiri/html_entity_lookup.c +21 -16
  99. data/ext/nokogiri/html_sax_parser_context.c +67 -64
  100. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  101. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  102. data/ext/nokogiri/nokogiri.c +190 -60
  103. data/ext/nokogiri/test_global_handlers.c +40 -0
  104. data/ext/nokogiri/xml_attr.c +15 -15
  105. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  106. data/ext/nokogiri/xml_cdata.c +13 -18
  107. data/ext/nokogiri/xml_comment.c +19 -26
  108. data/ext/nokogiri/xml_document.c +246 -188
  109. data/ext/nokogiri/xml_document_fragment.c +13 -15
  110. data/ext/nokogiri/xml_dtd.c +54 -48
  111. data/ext/nokogiri/xml_element_content.c +30 -27
  112. data/ext/nokogiri/xml_element_decl.c +22 -22
  113. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  114. data/ext/nokogiri/xml_entity_decl.c +32 -30
  115. data/ext/nokogiri/xml_entity_reference.c +16 -18
  116. data/ext/nokogiri/xml_namespace.c +56 -49
  117. data/ext/nokogiri/xml_node.c +371 -320
  118. data/ext/nokogiri/xml_node_set.c +168 -156
  119. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  120. data/ext/nokogiri/xml_reader.c +191 -157
  121. data/ext/nokogiri/xml_relax_ng.c +29 -23
  122. data/ext/nokogiri/xml_sax_parser.c +117 -112
  123. data/ext/nokogiri/xml_sax_parser_context.c +101 -84
  124. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  125. data/ext/nokogiri/xml_schema.c +48 -42
  126. data/ext/nokogiri/xml_syntax_error.c +42 -21
  127. data/ext/nokogiri/xml_text.c +13 -17
  128. data/ext/nokogiri/xml_xpath_context.c +134 -127
  129. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  130. data/lib/nokogiri.rb +2 -6
  131. data/lib/nokogiri/css/parser.rb +1 -1
  132. data/lib/nokogiri/extension.rb +26 -0
  133. data/lib/nokogiri/html/document_fragment.rb +15 -15
  134. data/lib/nokogiri/nokogiri.jar +0 -0
  135. data/lib/nokogiri/version/constant.rb +1 -1
  136. data/lib/nokogiri/version/info.rb +32 -8
  137. data/lib/nokogiri/xml/document.rb +74 -28
  138. data/lib/nokogiri/xml/node.rb +39 -42
  139. data/lib/nokogiri/xml/reader.rb +2 -9
  140. data/lib/nokogiri/xml/xpath.rb +1 -3
  141. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  142. metadata +62 -127
  143. data/ext/nokogiri/xml_io.c +0 -63
  144. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2012:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri.internals;
34
2
 
35
3
  import static nokogiri.internals.NokogiriHelpers.canonicalizeWhitespace;
@@ -66,713 +34,828 @@ import org.w3c.dom.Text;
66
34
  * @author Patrick Mahoney <pat@polycrystal.org>
67
35
  * @author Yoko Harada <yokolet@gmail.com>
68
36
  */
69
- public class SaveContextVisitor {
70
-
71
- private final StringBuilder buffer;
72
- private final Stack<String> indentation;
73
- private String encoding;
74
- private final CharSequence indentString;
75
- private boolean format;
76
- private final boolean noDecl;
77
- private final boolean noEmpty;
78
- private final boolean noXhtml;
79
- private final boolean asXhtml;
80
- private boolean asXml;
81
- private final boolean asHtml;
82
- private final boolean asBuilder;
83
- private boolean htmlDoc;
84
- private final boolean fragment;
85
- private final boolean canonical, incl_ns, with_comments;
86
- private boolean subsets;
87
- private boolean exclusive;
88
- private final List<Node> c14nNodeList;
89
- private final Deque<Attr[]> c14nNamespaceStack;
90
- private final Deque<Attr[]> c14nAttrStack;
91
- //private List<String> c14nExclusiveInclusivePrefixes = null;
92
-
93
- /*
94
- * U can't touch this.
95
- * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
96
- *
97
- * Taken from libxml save options.
98
- */
99
-
100
- public static final int FORMAT = 1;
101
- public static final int NO_DECL = 2;
102
- public static final int NO_EMPTY = 4;
103
- public static final int NO_XHTML = 8;
104
- public static final int AS_XHTML = 16;
105
- public static final int AS_XML = 32;
106
- public static final int AS_HTML = 64;
107
- public static final int AS_BUILDER = 128;
108
-
109
- public static final int CANONICAL = 1;
110
- public static final int INCL_NS = 2;
111
- public static final int WITH_COMMENTS = 4;
112
- public static final int SUBSETS = 8;
113
- public static final int EXCLUSIVE = 16;
114
-
115
- public SaveContextVisitor(int options, CharSequence indent, String encoding, boolean htmlDoc, boolean fragment, int canonicalOpts) {
116
- buffer = new StringBuilder();
117
- this.encoding = encoding;
118
- indentation = new Stack<String>(); indentation.push("");
119
- this.htmlDoc = htmlDoc;
120
- this.fragment = fragment;
121
- c14nNodeList = new ArrayList<Node>();
122
- c14nNamespaceStack = new ArrayDeque<Attr[]>();
123
- c14nAttrStack = new ArrayDeque<Attr[]>();
124
- format = (options & FORMAT) == FORMAT;
125
-
126
- noDecl = (options & NO_DECL) == NO_DECL;
127
- noEmpty = (options & NO_EMPTY) == NO_EMPTY;
128
- noXhtml = (options & NO_XHTML) == NO_XHTML;
129
- asXhtml = (options & AS_XHTML) == AS_XHTML;
130
- asXml = (options & AS_XML) == AS_XML;
131
- asHtml = (options & AS_HTML) == AS_HTML;
132
- asBuilder = (options & AS_BUILDER) == AS_BUILDER;
133
-
134
- canonical = (canonicalOpts & CANONICAL) == CANONICAL;
135
- incl_ns = (canonicalOpts & INCL_NS) == INCL_NS;
136
- with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS;
137
- subsets = (canonicalOpts & SUBSETS) == SUBSETS;
138
-
139
- if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces
140
- if ((!format && indent != null) && indent.length() > 0) format = true;
141
- if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces
142
- indentString = indent;
143
- if (!asXml && !asHtml && !asXhtml && !asBuilder) asXml = true;
144
- }
145
-
146
- @Override
147
- public String toString() {
148
- return buffer.toString();
149
- }
150
-
151
- public StringBuilder getInternalBuffer() { return buffer; }
152
-
153
- public void setHtmlDoc(boolean htmlDoc) {
154
- this.htmlDoc = htmlDoc;
155
- }
156
-
157
- public void setEncoding(String encoding) {
158
- this.encoding = encoding;
159
- }
160
-
161
- public boolean enter(Node node) {
162
- if (node instanceof Document) {
163
- return enter((Document)node);
164
- }
165
- if (node instanceof Element) {
166
- return enter((Element)node);
167
- }
168
- if (node instanceof Attr) {
169
- return enter((Attr)node);
170
- }
171
- if (node instanceof Text) {
172
- return enter((Text)node);
173
- }
174
- if (node instanceof CDATASection) {
175
- return enter((CDATASection)node);
176
- }
177
- if (node instanceof Comment) {
178
- return enter((Comment)node);
179
- }
180
- if (node instanceof DocumentType) {
181
- return enter((DocumentType)node);
182
- }
183
- if (node instanceof Entity) {
184
- return enter((Entity)node);
185
- }
186
- if (node instanceof EntityReference) {
187
- return enter((EntityReference) node);
188
- }
189
- if (node instanceof Notation) {
190
- return enter((Notation)node);
191
- }
192
- if (node instanceof ProcessingInstruction) {
193
- return enter((ProcessingInstruction)node);
194
- }
195
- return false;
196
- }
197
-
198
- public void leave(Node node) {
199
- if (node instanceof Document) {
200
- leave((Document)node);
201
- return;
202
- }
203
- if (node instanceof Element) {
204
- leave((Element)node);
205
- return;
206
- }
207
- if (node instanceof Attr) {
208
- leave((Attr)node);
209
- return;
210
- }
211
- if (node instanceof Text) {
212
- return;
213
- }
214
- if (node instanceof CDATASection) {
215
- leave((CDATASection)node);
216
- return;
217
- }
218
- if (node instanceof Comment) {
219
- leave((Comment)node);
220
- return;
221
- }
222
- if (node instanceof DocumentType) {
223
- leave((DocumentType)node);
224
- return;
225
- }
226
- if (node instanceof Entity) {
227
- leave((Entity)node);
228
- return;
229
- }
230
- if (node instanceof EntityReference) {
231
- leave((EntityReference) node);
232
- return;
233
- }
234
- if (node instanceof Notation) {
235
- leave((Notation)node);
236
- return;
237
- }
238
- if (node instanceof ProcessingInstruction) {
239
- leave((ProcessingInstruction)node);
240
- return;
241
- }
242
- }
243
-
244
- public boolean enter(String string) {
245
- buffer.append(string);
246
- return true;
247
- }
248
-
249
- public void leave(String string) {
250
- // no-op
251
- }
252
-
253
- public boolean enter(Attr attr) {
254
- String name = attr.getName();
255
- buffer.append(name);
256
- if (!asHtml || !isHtmlBooleanAttr(name)) {
257
- buffer.append('=');
258
- buffer.append('"');
259
- String value = replaceCharsetIfNecessary(attr);
260
- buffer.append(serializeAttrTextContent(value, htmlDoc));
261
- buffer.append('"');
262
- }
263
- return true;
264
- }
265
-
266
- private static final Pattern CHARSET =
267
- Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE);
268
-
269
- private String replaceCharsetIfNecessary(Attr attr) {
270
- String value = attr.getValue();
271
- if (encoding == null) return value; // unable to replace in any case
272
- if (!"content".equals(attr.getName().toLowerCase())) return value; // must be content attr
273
- if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) return value;
274
- Matcher m = CHARSET.matcher(value);
275
- if (!m.find()) return value;
276
- if (value.contains(encoding)) return value; // no need to replace
277
- return value.replace(m.group(), "charset=" + encoding);
278
- }
279
-
280
- static final Set<String> HTML_BOOLEAN_ATTRS;
281
- static {
282
- final String[] _HTML_BOOLEAN_ATTRS = {
283
- "checked", "compact", "declare", "defer", "disabled", "ismap",
284
- "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
285
- "selected"
286
- };
287
- HTML_BOOLEAN_ATTRS = new HashSet<String>(Arrays.asList(_HTML_BOOLEAN_ATTRS));
288
- }
289
-
290
- private static boolean isHtmlBooleanAttr(String name) {
291
- return HTML_BOOLEAN_ATTRS.contains(name);
292
- }
293
-
294
- private static CharSequence serializeAttrTextContent(String str, boolean htmlDoc) {
295
- if (str == null || str.length() == 0) return "";
296
-
297
- StringBuilder buffer = new StringBuilder(str.length() + 16);
298
-
299
- for (int i = 0; i < str.length(); i++) {
300
- char c; switch (c = str.charAt(i)) {
301
- case '\n': buffer.append("&#10;"); break;
302
- case '\r': buffer.append("&#13;"); break;
303
- case '\t': buffer.append("&#9;"); break;
304
- case '"': if (htmlDoc) buffer.append("%22");
305
- else buffer.append("&quot;");
306
- break;
307
- case '<': buffer.append("&lt;"); break;
308
- case '>': buffer.append("&gt;"); break;
309
- case '&': buffer.append("&amp;"); break;
310
- default: buffer.append(c);
311
- }
312
- }
313
-
314
- return buffer;
315
- }
316
-
317
- public void leave(Attr attr) {
318
- // no-op
319
- }
320
-
321
- public boolean enter(CDATASection cdata) {
322
- buffer.append("<![CDATA[");
323
- buffer.append(cdata.getData());
324
- buffer.append("]]>");
325
- return true;
326
- }
327
-
328
- public void leave(CDATASection cdata) {
329
- // no-op
330
- }
331
-
332
- public boolean enter(Comment comment) {
333
- if (canonical) {
334
- c14nNodeList.add(comment);
335
- if (!with_comments) return true;
336
- }
337
- buffer.append("<!--");
338
- buffer.append(comment.getData());
339
- buffer.append("-->");
340
- return true;
341
- }
342
-
343
- public void leave(Comment comment) {
344
- // no-op
345
- }
346
-
347
- public boolean enter(Document document) {
348
- if (!noDecl) {
349
- buffer.append("<?xml version=\"");
350
- buffer.append(document.getXmlVersion());
351
- buffer.append("\"");
352
-
353
- if (encoding != null) {
354
- buffer.append(" encoding=\"");
355
- buffer.append(encoding);
356
- buffer.append("\"");
357
- }
358
- buffer.append("?>\n");
359
- }
360
- return true;
361
- }
362
-
363
- public void leave(Document document) {
364
- // no-op
365
- }
366
-
367
- public boolean enter(DocumentType docType) {
368
- if (canonical) {
369
- c14nNodeList.add(docType);
370
- return true;
371
- }
372
- String name = docType.getName();
373
- String pubId = docType.getPublicId();
374
- String sysId = docType.getSystemId();
375
- String internalSubset = docType.getInternalSubset();
376
- if (docType.getPreviousSibling() != null) {
377
- buffer.append('\n');
378
- }
379
- buffer.append("<!DOCTYPE ").append(name).append(' ');
380
- if (pubId != null) {
381
- buffer.append("PUBLIC \"").append(pubId).append('"');
382
- if (sysId != null) buffer.append(" \"").append(sysId).append('"');
383
- } else if (sysId != null) {
384
- buffer.append("SYSTEM \"").append(sysId).append('"');
385
- }
386
- if (internalSubset != null) {
387
- buffer.append(' ').append('[');
388
- buffer.append(internalSubset);
389
- buffer.append(']');
390
- }
391
- buffer.append(">\n");
392
- return true;
393
- }
394
-
395
- public void leave(DocumentType docType) {
396
- // no-op
397
- }
398
-
399
- public boolean enter(Element element) {
400
- if (canonical) {
401
- c14nNodeList.add(element);
402
- if (element == element.getOwnerDocument().getDocumentElement()) {
403
- c14nNodeList.add(element.getOwnerDocument());
404
- }
405
- }
406
- String current = indentation.peek();
407
- buffer.append(current);
408
- if (needIndent(element)) {
409
- indentation.push(current + indentString);
410
- }
411
- String name = element.getTagName();
412
- buffer.append('<').append(name);
413
- Attr[] attrs = getAttrsAndNamespaces(element);
414
- for (Attr attr : attrs) {
415
- if (attr.getSpecified()) {
416
- buffer.append(' ');
417
- enter(attr);
418
- leave(attr);
419
- }
420
- }
421
- if (element.hasChildNodes()) {
422
- buffer.append('>');
423
- if (needBreakInOpening(element)) buffer.append('\n');
424
- return true;
425
- }
426
- // no child
427
- if (asHtml) {
428
- buffer.append('>');
429
- } else if (asXml && noEmpty) {
430
- buffer.append('>');
431
- } else if (asXhtml) {
432
- if (isEmpty(name)) {
433
- buffer.append(" />"); // see http://www.w3.org/TR/xhtml1/#C_2
434
- } else {
435
- buffer.append('>');
436
- }
437
- } else {
438
- buffer.append("/>");
439
- }
440
- if (needBreakInOpening(element)) {
441
- buffer.append('\n');
442
- }
443
- return true;
444
- }
445
-
446
- private boolean needIndent(Element element) {
447
- if (containsText(element)) return false;
448
- if (fragment) return false; // a given option might be fragment and format. fragment matters
449
- if (format || asBuilder) return true;
450
- return false;
451
- }
452
-
453
- private boolean needBreakInOpening(Element element) {
454
- if (containsText(element)) return false;
455
- if (fragment) return false;
456
- if (format) return true;
457
- if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
458
- if (format && element.getNextSibling() == null && element.hasChildNodes()) return true;
459
- return false;
460
- }
461
-
462
- private boolean isEmpty(String name) {
463
- HTMLElements.Element element = HTMLElements.getElement(name);
464
- return element.isEmpty();
465
- }
466
-
467
- private Attr[] getAttrsAndNamespaces(Element element) {
468
- NamedNodeMap attrs = element.getAttributes();
469
- if (!canonical) {
470
- if (attrs == null || attrs.getLength() == 0) return new Attr[0];
471
- Attr[] attrsAndNamespaces = new Attr[attrs.getLength()];
472
- for (int i=0; i<attrs.getLength(); i++) {
473
- attrsAndNamespaces[i] = (Attr) attrs.item(i);
474
- }
475
- return attrsAndNamespaces;
37
+ public class SaveContextVisitor
38
+ {
39
+
40
+ private final StringBuilder buffer;
41
+ private final Stack<String> indentation;
42
+ private String encoding;
43
+ private final CharSequence indentString;
44
+ private boolean format;
45
+ private final boolean noDecl;
46
+ private final boolean noEmpty;
47
+ private final boolean noXhtml;
48
+ private final boolean asXhtml;
49
+ private boolean asXml;
50
+ private final boolean asHtml;
51
+ private final boolean asBuilder;
52
+ private boolean htmlDoc;
53
+ private final boolean fragment;
54
+ private final boolean canonical, incl_ns, with_comments;
55
+ private boolean subsets;
56
+ private boolean exclusive;
57
+ private final List<Node> c14nNodeList;
58
+ private final Deque<Attr[]> c14nNamespaceStack;
59
+ private final Deque<Attr[]> c14nAttrStack;
60
+ //private List<String> c14nExclusiveInclusivePrefixes = null;
61
+
62
+ /*
63
+ * U can't touch this.
64
+ * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
65
+ *
66
+ * Taken from libxml save options.
67
+ */
68
+
69
+ public static final int FORMAT = 1;
70
+ public static final int NO_DECL = 2;
71
+ public static final int NO_EMPTY = 4;
72
+ public static final int NO_XHTML = 8;
73
+ public static final int AS_XHTML = 16;
74
+ public static final int AS_XML = 32;
75
+ public static final int AS_HTML = 64;
76
+ public static final int AS_BUILDER = 128;
77
+
78
+ public static final int CANONICAL = 1;
79
+ public static final int INCL_NS = 2;
80
+ public static final int WITH_COMMENTS = 4;
81
+ public static final int SUBSETS = 8;
82
+ public static final int EXCLUSIVE = 16;
83
+
84
+ public
85
+ SaveContextVisitor(int options, CharSequence indent, String encoding, boolean htmlDoc, boolean fragment,
86
+ int canonicalOpts)
87
+ {
88
+ buffer = new StringBuilder();
89
+ this.encoding = encoding;
90
+ indentation = new Stack<String>();
91
+ indentation.push("");
92
+ this.htmlDoc = htmlDoc;
93
+ this.fragment = fragment;
94
+ c14nNodeList = new ArrayList<Node>();
95
+ c14nNamespaceStack = new ArrayDeque<Attr[]>();
96
+ c14nAttrStack = new ArrayDeque<Attr[]>();
97
+ format = (options & FORMAT) == FORMAT;
98
+
99
+ noDecl = (options & NO_DECL) == NO_DECL;
100
+ noEmpty = (options & NO_EMPTY) == NO_EMPTY;
101
+ noXhtml = (options & NO_XHTML) == NO_XHTML;
102
+ asXhtml = (options & AS_XHTML) == AS_XHTML;
103
+ asXml = (options & AS_XML) == AS_XML;
104
+ asHtml = (options & AS_HTML) == AS_HTML;
105
+ asBuilder = (options & AS_BUILDER) == AS_BUILDER;
106
+
107
+ canonical = (canonicalOpts & CANONICAL) == CANONICAL;
108
+ incl_ns = (canonicalOpts & INCL_NS) == INCL_NS;
109
+ with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS;
110
+ subsets = (canonicalOpts & SUBSETS) == SUBSETS;
111
+
112
+ if ((format && indent == null) || (format && indent.length() == 0)) { indent = " "; } // default, two spaces
113
+ if ((!format && indent != null) && indent.length() > 0) { format = true; }
114
+ if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) { indent = " "; } // default, two spaces
115
+ indentString = indent;
116
+ if (!asXml && !asHtml && !asXhtml && !asBuilder) { asXml = true; }
117
+ }
118
+
119
+ @Override
120
+ public String
121
+ toString()
122
+ {
123
+ return buffer.toString();
124
+ }
125
+
126
+ public StringBuilder
127
+ getInternalBuffer() { return buffer; }
128
+
129
+ public void
130
+ setHtmlDoc(boolean htmlDoc)
131
+ {
132
+ this.htmlDoc = htmlDoc;
133
+ }
134
+
135
+ public void
136
+ setEncoding(String encoding)
137
+ {
138
+ this.encoding = encoding;
139
+ }
140
+
141
+ public boolean
142
+ enter(Node node)
143
+ {
144
+ if (node instanceof Document) {
145
+ return enter((Document)node);
146
+ }
147
+ if (node instanceof Element) {
148
+ return enter((Element)node);
149
+ }
150
+ if (node instanceof Attr) {
151
+ return enter((Attr)node);
152
+ }
153
+ if (node instanceof Text) {
154
+ return enter((Text)node);
155
+ }
156
+ if (node instanceof CDATASection) {
157
+ return enter((CDATASection)node);
158
+ }
159
+ if (node instanceof Comment) {
160
+ return enter((Comment)node);
161
+ }
162
+ if (node instanceof DocumentType) {
163
+ return enter((DocumentType)node);
164
+ }
165
+ if (node instanceof Entity) {
166
+ return enter((Entity)node);
167
+ }
168
+ if (node instanceof EntityReference) {
169
+ return enter((EntityReference) node);
170
+ }
171
+ if (node instanceof Notation) {
172
+ return enter((Notation)node);
173
+ }
174
+ if (node instanceof ProcessingInstruction) {
175
+ return enter((ProcessingInstruction)node);
176
+ }
177
+ return false;
178
+ }
179
+
180
+ public void
181
+ leave(Node node)
182
+ {
183
+ if (node instanceof Document) {
184
+ leave((Document)node);
185
+ return;
186
+ }
187
+ if (node instanceof Element) {
188
+ leave((Element)node);
189
+ return;
190
+ }
191
+ if (node instanceof Attr) {
192
+ leave((Attr)node);
193
+ return;
194
+ }
195
+ if (node instanceof Text) {
196
+ return;
197
+ }
198
+ if (node instanceof CDATASection) {
199
+ leave((CDATASection)node);
200
+ return;
201
+ }
202
+ if (node instanceof Comment) {
203
+ leave((Comment)node);
204
+ return;
205
+ }
206
+ if (node instanceof DocumentType) {
207
+ leave((DocumentType)node);
208
+ return;
209
+ }
210
+ if (node instanceof Entity) {
211
+ leave((Entity)node);
212
+ return;
213
+ }
214
+ if (node instanceof EntityReference) {
215
+ leave((EntityReference) node);
216
+ return;
217
+ }
218
+ if (node instanceof Notation) {
219
+ leave((Notation)node);
220
+ return;
221
+ }
222
+ if (node instanceof ProcessingInstruction) {
223
+ leave((ProcessingInstruction)node);
224
+ return;
225
+ }
226
+ }
227
+
228
+ public boolean
229
+ enter(String string)
230
+ {
231
+ buffer.append(string);
232
+ return true;
233
+ }
234
+
235
+ public void
236
+ leave(String string)
237
+ {
238
+ // no-op
239
+ }
240
+
241
+ public boolean
242
+ enter(Attr attr)
243
+ {
244
+ String name = attr.getName();
245
+ buffer.append(name);
246
+ if (!asHtml || !isHtmlBooleanAttr(name)) {
247
+ buffer.append('=');
248
+ buffer.append('"');
249
+ String value = replaceCharsetIfNecessary(attr);
250
+ buffer.append(serializeAttrTextContent(value, htmlDoc));
251
+ buffer.append('"');
252
+ }
253
+ return true;
254
+ }
255
+
256
+ private static final Pattern CHARSET =
257
+ Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE);
258
+
259
+ private String
260
+ replaceCharsetIfNecessary(Attr attr)
261
+ {
262
+ String value = attr.getValue();
263
+ if (encoding == null) { return value; } // unable to replace in any case
264
+ if (!"content".equals(attr.getName().toLowerCase())) { return value; } // must be content attr
265
+ if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) { return value; }
266
+ Matcher m = CHARSET.matcher(value);
267
+ if (!m.find()) { return value; }
268
+ if (value.contains(encoding)) { return value; } // no need to replace
269
+ return value.replace(m.group(), "charset=" + encoding);
270
+ }
271
+
272
+ static final Set<String> HTML_BOOLEAN_ATTRS;
273
+ static
274
+ {
275
+ final String[] _HTML_BOOLEAN_ATTRS = {
276
+ "checked", "compact", "declare", "defer", "disabled", "ismap",
277
+ "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
278
+ "selected"
279
+ };
280
+ HTML_BOOLEAN_ATTRS = new HashSet<String>(Arrays.asList(_HTML_BOOLEAN_ATTRS));
281
+ }
282
+
283
+ private static boolean
284
+ isHtmlBooleanAttr(String name)
285
+ {
286
+ return HTML_BOOLEAN_ATTRS.contains(name);
287
+ }
288
+
289
+ private static CharSequence
290
+ serializeAttrTextContent(String str, boolean htmlDoc)
291
+ {
292
+ if (str == null || str.length() == 0) { return ""; }
293
+
294
+ StringBuilder buffer = new StringBuilder(str.length() + 16);
295
+
296
+ for (int i = 0; i < str.length(); i++) {
297
+ char c;
298
+ switch (c = str.charAt(i)) {
299
+ case '\n':
300
+ buffer.append("&#10;");
301
+ break;
302
+ case '\r':
303
+ buffer.append("&#13;");
304
+ break;
305
+ case '\t':
306
+ buffer.append("&#9;");
307
+ break;
308
+ case '"':
309
+ if (htmlDoc) { buffer.append("%22"); }
310
+ else { buffer.append("&quot;"); }
311
+ break;
312
+ case '<':
313
+ buffer.append("&lt;");
314
+ break;
315
+ case '>':
316
+ buffer.append("&gt;");
317
+ break;
318
+ case '&':
319
+ buffer.append("&amp;");
320
+ break;
321
+ default:
322
+ buffer.append(c);
323
+ }
324
+ }
325
+
326
+ return buffer;
327
+ }
328
+
329
+ public void
330
+ leave(Attr attr)
331
+ {
332
+ // no-op
333
+ }
334
+
335
+ public boolean
336
+ enter(CDATASection cdata)
337
+ {
338
+ buffer.append("<![CDATA[");
339
+ buffer.append(cdata.getData());
340
+ buffer.append("]]>");
341
+ return true;
342
+ }
343
+
344
+ public void
345
+ leave(CDATASection cdata)
346
+ {
347
+ // no-op
348
+ }
349
+
350
+ public boolean
351
+ enter(Comment comment)
352
+ {
353
+ if (canonical) {
354
+ c14nNodeList.add(comment);
355
+ if (!with_comments) { return true; }
356
+ }
357
+ buffer.append("<!--");
358
+ buffer.append(comment.getData());
359
+ buffer.append("-->");
360
+ return true;
361
+ }
362
+
363
+ public void
364
+ leave(Comment comment)
365
+ {
366
+ // no-op
367
+ }
368
+
369
+ public boolean
370
+ enter(Document document)
371
+ {
372
+ if (!noDecl) {
373
+ buffer.append("<?xml version=\"");
374
+ buffer.append(document.getXmlVersion());
375
+ buffer.append("\"");
376
+
377
+ if (encoding != null) {
378
+ buffer.append(" encoding=\"");
379
+ buffer.append(encoding);
380
+ buffer.append("\"");
381
+ }
382
+ buffer.append("?>\n");
383
+ }
384
+ return true;
385
+ }
386
+
387
+ public void
388
+ leave(Document document)
389
+ {
390
+ // no-op
391
+ }
392
+
393
+ public boolean
394
+ enter(DocumentType docType)
395
+ {
396
+ if (canonical) {
397
+ c14nNodeList.add(docType);
398
+ return true;
399
+ }
400
+ String name = docType.getName();
401
+ String pubId = docType.getPublicId();
402
+ String sysId = docType.getSystemId();
403
+ String internalSubset = docType.getInternalSubset();
404
+ if (docType.getPreviousSibling() != null) {
405
+ buffer.append('\n');
406
+ }
407
+ buffer.append("<!DOCTYPE ").append(name).append(' ');
408
+ if (pubId != null) {
409
+ buffer.append("PUBLIC \"").append(pubId).append('"');
410
+ if (sysId != null) { buffer.append(" \"").append(sysId).append('"'); }
411
+ } else if (sysId != null) {
412
+ buffer.append("SYSTEM \"").append(sysId).append('"');
413
+ }
414
+ if (internalSubset != null) {
415
+ buffer.append(' ').append('[');
416
+ buffer.append(internalSubset);
417
+ buffer.append(']');
418
+ }
419
+ buffer.append(">\n");
420
+ return true;
421
+ }
422
+
423
+ public void
424
+ leave(DocumentType docType)
425
+ {
426
+ // no-op
427
+ }
428
+
429
+ public boolean
430
+ enter(Element element)
431
+ {
432
+ if (canonical) {
433
+ c14nNodeList.add(element);
434
+ if (element == element.getOwnerDocument().getDocumentElement()) {
435
+ c14nNodeList.add(element.getOwnerDocument());
436
+ }
437
+ }
438
+ String current = indentation.peek();
439
+ buffer.append(current);
440
+ if (needIndent(element)) {
441
+ indentation.push(current + indentString);
442
+ }
443
+ String name = element.getTagName();
444
+ buffer.append('<').append(name);
445
+ Attr[] attrs = getAttrsAndNamespaces(element);
446
+ for (Attr attr : attrs) {
447
+ if (attr.getSpecified()) {
448
+ buffer.append(' ');
449
+ enter(attr);
450
+ leave(attr);
451
+ }
452
+ }
453
+ if (element.hasChildNodes()) {
454
+ buffer.append('>');
455
+ if (needBreakInOpening(element)) { buffer.append('\n'); }
456
+ return true;
457
+ }
458
+ // no child
459
+ if (asHtml) {
460
+ buffer.append('>');
461
+ } else if (asXml && noEmpty) {
462
+ buffer.append('>');
463
+ } else if (asXhtml) {
464
+ if (isEmpty(name)) {
465
+ buffer.append(" />"); // see http://www.w3.org/TR/xhtml1/#C_2
466
+ } else {
467
+ buffer.append('>');
468
+ }
469
+ } else {
470
+ buffer.append("/>");
471
+ }
472
+ if (needBreakInOpening(element)) {
473
+ buffer.append('\n');
474
+ }
475
+ return true;
476
+ }
477
+
478
+ private boolean
479
+ needIndent(Element element)
480
+ {
481
+ if (containsText(element)) { return false; }
482
+ if (fragment) { return false; } // a given option might be fragment and format. fragment matters
483
+ if (format || asBuilder) { return true; }
484
+ return false;
485
+ }
486
+
487
+ private boolean
488
+ needBreakInOpening(Element element)
489
+ {
490
+ if (containsText(element)) { return false; }
491
+ if (fragment) { return false; }
492
+ if (format) { return true; }
493
+ if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) { return true; }
494
+ if (format && element.getNextSibling() == null && element.hasChildNodes()) { return true; }
495
+ return false;
496
+ }
497
+
498
+ private boolean
499
+ isEmpty(String name)
500
+ {
501
+ HTMLElements.Element element = HTMLElements.getElement(name);
502
+ return element.isEmpty();
503
+ }
504
+
505
+ private Attr[]
506
+ getAttrsAndNamespaces(Element element)
507
+ {
508
+ NamedNodeMap attrs = element.getAttributes();
509
+ if (!canonical) {
510
+ if (attrs == null || attrs.getLength() == 0) { return new Attr[0]; }
511
+ Attr[] attrsAndNamespaces = new Attr[attrs.getLength()];
512
+ for (int i = 0; i < attrs.getLength(); i++) {
513
+ attrsAndNamespaces[i] = (Attr) attrs.item(i);
514
+ }
515
+ return attrsAndNamespaces;
516
+ } else {
517
+ List<Attr> namespaces = new ArrayList<Attr>();
518
+ List<Attr> attributes = new ArrayList<Attr>();
519
+ if (subsets) {
520
+ getAttrsOfAncestors(element.getParentNode(), namespaces, attributes);
521
+ Attr[] namespaceOfAncestors = getSortedArray(namespaces);
522
+ Attr[] attributeOfAncestors = getSortedArray(attributes);
523
+ c14nNamespaceStack.push(namespaceOfAncestors);
524
+ c14nAttrStack.push(attributeOfAncestors);
525
+ subsets = false; // namespace propagation should be done only once on top level node.
526
+ }
527
+
528
+ getNamespacesAndAttrs(element, namespaces, attributes);
529
+
530
+ Attr[] namespaceArray = getSortedArray(namespaces);
531
+ Attr[] attributeArray = getSortedArray(attributes);
532
+ Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length];
533
+ for (int i = 0; i < allAttrs.length; i++) {
534
+ if (i < namespaceArray.length) {
535
+ allAttrs[i] = namespaceArray[i];
476
536
  } else {
477
- List<Attr> namespaces = new ArrayList<Attr>();
478
- List<Attr> attributes = new ArrayList<Attr>();
479
- if (subsets) {
480
- getAttrsOfAncestors(element.getParentNode(), namespaces, attributes);
481
- Attr[] namespaceOfAncestors = getSortedArray(namespaces);
482
- Attr[] attributeOfAncestors = getSortedArray(attributes);
483
- c14nNamespaceStack.push(namespaceOfAncestors);
484
- c14nAttrStack.push(attributeOfAncestors);
485
- subsets = false; // namespace propagation should be done only once on top level node.
486
- }
487
-
488
- getNamespacesAndAttrs(element, namespaces, attributes);
489
-
490
- Attr[] namespaceArray = getSortedArray(namespaces);
491
- Attr[] attributeArray = getSortedArray(attributes);
492
- Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length];
493
- for (int i=0; i<allAttrs.length; i++) {
494
- if (i < namespaceArray.length) {
495
- allAttrs[i] = namespaceArray[i];
496
- } else {
497
- allAttrs[i] = attributeArray[i-namespaceArray.length];
498
- }
499
- }
500
- c14nNamespaceStack.push(namespaceArray);
501
- c14nAttrStack.push(attributeArray);
502
- return allAttrs;
503
- }
504
-
505
- }
506
-
507
- private void getAttrsOfAncestors(Node parent, List<Attr> namespaces, List<Attr> attributes) {
508
- if (parent == null) return;
509
- NamedNodeMap attrs = parent.getAttributes();
510
- if (attrs == null || attrs.getLength() == 0) return;
511
- for (int i=0; i < attrs.getLength(); i++) {
512
- Attr attr = (Attr)attrs.item(i);
513
- if (isNamespace(attr.getNodeName())) namespaces.add(attr);
514
- else attributes.add(attr);
515
- }
516
- getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes);
517
- }
518
-
519
- private void getNamespacesAndAttrs(Node current, List<Attr> namespaces, List<Attr> attributes) {
520
- NamedNodeMap attrs = current.getAttributes();
521
- for (int i=0; i<attrs.getLength(); i++) {
522
- Attr attr = (Attr)attrs.item(i);
523
- if (isNamespace(attr.getNodeName())) {
524
- getNamespacesWithPropagated(namespaces, attr);
525
- } else {
526
- getAttributesWithPropagated(attributes, attr);
527
- }
528
- if (exclusive) {
529
- verifyXmlSpace(attributes, attrs);
530
- }
531
- }
532
- }
533
-
534
- private void getNamespacesWithPropagated(List<Attr> namespaces, Attr attr) {
535
- boolean newNamespace = true;
536
- Iterator<Attr[]> iter = c14nNamespaceStack.iterator();
537
- while (iter.hasNext()) {
538
- Attr[] parentNamespaces = iter.next();
539
- for (int n=0; n < parentNamespaces.length; n++) {
540
- if (parentNamespaces[n].getNodeName().equals(attr.getNodeName())) {
541
- if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) {
542
- // exactly the same namespace should not be added
543
- newNamespace = false;
544
- } else {
545
- // in case of namespace url change, propagated namespace will be override
546
- namespaces.remove(parentNamespaces[n]);
547
- }
548
- }
549
- }
550
- if (newNamespace && !namespaces.contains(attr)) namespaces.add(attr);
551
- }
552
- }
553
-
554
- private void getAttributesWithPropagated(List<Attr> attributes, Attr attr) {
555
- boolean newAttribute = true;
556
- Iterator<Attr[]> iter = c14nAttrStack.iterator();
557
- while (iter.hasNext()) {
558
- Attr[] parentAttr = iter.next();
559
- for (int n=0; n < parentAttr.length; n++) {
560
- if (!parentAttr[n].getNodeName().startsWith("xml:")) continue;
561
- if (parentAttr[n].getNodeName().equals(attr.getNodeName())) {
562
- if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) {
563
- // exactly the same attribute should not be added
564
- newAttribute = false;
565
- } else {
566
- // in case of attribute value change, propagated attribute will be override
567
- attributes.remove(parentAttr[n]);
568
- }
569
- }
570
- }
571
- if (newAttribute) attributes.add(attr);
572
- }
573
- }
574
-
575
- private void verifyXmlSpace(List<Attr> attributes, NamedNodeMap attrs) {
576
- Attr attr = (Attr) attrs.getNamedItem("xml:space");
577
- if (attr == null) {
578
- for (int i=0; i < attributes.size(); i++) {
579
- if (attributes.get(i).getNodeName().equals("xml:space")) {
580
- attributes.remove(i);
581
- break;
582
- }
583
- }
584
- }
585
- }
586
-
587
- private Attr[] getSortedArray(List<Attr> attrList) {
588
- Attr[] attrArray = attrList.toArray(new Attr[0]);
589
- Arrays.sort(attrArray, new Comparator<Attr>() {
590
- @Override
591
- public int compare(Attr attr0, Attr attr1) {
592
- return attr0.getNodeName().compareTo(attr1.getNodeName());
593
- }
594
- });
595
- return attrArray;
596
- }
597
-
598
- public void leave(Element element) {
599
- if (canonical) {
600
- c14nNamespaceStack.poll();
601
- c14nAttrStack.poll();
602
- }
603
- String name = element.getTagName();
604
- if (element.hasChildNodes()) {
605
- if (needIndentInClosing(element)) {
606
- indentation.pop();
607
- buffer.append(indentation.peek());
608
- } else if (asBuilder) {
609
- if (!containsText(element)) indentation.pop();
610
- }
611
- buffer.append("</").append(name).append('>');
612
- if (needBreakInClosing(element)) {
613
- buffer.append('\n');
614
- }
615
- return;
616
- }
617
- // no child, but HTML might need a closing tag.
618
- if (asHtml || noEmpty) {
619
- if (!isEmpty(name) && noEmpty) {
620
- buffer.append("</").append(name).append('>');
621
- }
622
- }
623
- if (needBreakInClosing(element)) {
624
- if (!containsText(element)) indentation.pop();
625
- buffer.append('\n');
626
- }
627
- }
628
-
629
- private boolean needIndentInClosing(Element element) {
630
- if (containsText(element)) return false;
631
-
632
- if (fragment) return false; // a given option might be fragment and format. fragment matters
633
- if (format) return true;
634
- if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
635
- return false;
636
- }
637
-
638
- private boolean needBreakInClosing(Element element) {
639
- if (fragment) return false;
640
- if (format || asBuilder) return true;
641
- return false;
642
- }
643
-
644
- private boolean containsText(Element element) {
645
- return (element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.TEXT_NODE);
646
- }
647
-
648
- public boolean enter(Entity entity) {
649
- String name = entity.getNodeName();
650
- String pubId = entity.getPublicId();
651
- String sysId = entity.getSystemId();
652
- String notation = entity.getNotationName();
653
- buffer.append("<!ENTITY ");
654
- buffer.append(name);
655
- if (pubId != null) {
656
- buffer.append(" PUBLIC \"");
657
- buffer.append(pubId);
658
- buffer.append("\"");
659
- }
660
- if (sysId != null) {
661
- buffer.append(" SYSTEM \"");
662
- buffer.append(sysId);
663
- buffer.append("\"");
664
- }
665
- if (notation != null) {
666
- buffer.append(" NDATA ");
667
- buffer.append(notation);
668
- }
669
- buffer.append(">");
670
- return true;
671
- }
672
-
673
- public void leave(Entity entity) {
674
- // no-op
675
- }
676
-
677
- public boolean enter(EntityReference entityRef) {
678
- buffer.append('&').append(entityRef.getNodeName()).append(';');
679
- return true;
680
- }
681
- public void leave(EntityReference entityRef) {
682
- // no-op
683
- }
684
-
685
- public boolean enter(Notation notation) {
686
- String name = notation.getNodeName();
687
- String pubId = notation.getPublicId();
688
- String sysId = notation.getSystemId();
689
- buffer.append("<!NOTATION ");
690
- buffer.append(name);
691
- if (pubId != null) {
692
- buffer.append(" PUBLIC \"");
693
- buffer.append(pubId);
694
- buffer.append("\"");
695
- if (sysId != null) {
696
- buffer.append(" \"");
697
- buffer.append(sysId);
698
- buffer.append("\"");
699
- }
700
- } else if (sysId != null) {
701
- buffer.append(" SYSTEM \"");
702
- buffer.append(sysId);
703
- buffer.append("\"");
704
- }
705
- buffer.append(">");
537
+ allAttrs[i] = attributeArray[i - namespaceArray.length];
538
+ }
539
+ }
540
+ c14nNamespaceStack.push(namespaceArray);
541
+ c14nAttrStack.push(attributeArray);
542
+ return allAttrs;
543
+ }
544
+
545
+ }
546
+
547
+ private void
548
+ getAttrsOfAncestors(Node parent, List<Attr> namespaces, List<Attr> attributes)
549
+ {
550
+ if (parent == null) { return; }
551
+ NamedNodeMap attrs = parent.getAttributes();
552
+ if (attrs == null || attrs.getLength() == 0) { return; }
553
+ for (int i = 0; i < attrs.getLength(); i++) {
554
+ Attr attr = (Attr)attrs.item(i);
555
+ if (isNamespace(attr.getNodeName())) { namespaces.add(attr); }
556
+ else { attributes.add(attr); }
557
+ }
558
+ getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes);
559
+ }
560
+
561
+ private void
562
+ getNamespacesAndAttrs(Node current, List<Attr> namespaces, List<Attr> attributes)
563
+ {
564
+ NamedNodeMap attrs = current.getAttributes();
565
+ for (int i = 0; i < attrs.getLength(); i++) {
566
+ Attr attr = (Attr)attrs.item(i);
567
+ if (isNamespace(attr.getNodeName())) {
568
+ getNamespacesWithPropagated(namespaces, attr);
569
+ } else {
570
+ getAttributesWithPropagated(attributes, attr);
571
+ }
572
+ if (exclusive) {
573
+ verifyXmlSpace(attributes, attrs);
574
+ }
575
+ }
576
+ }
577
+
578
+ private void
579
+ getNamespacesWithPropagated(List<Attr> namespaces, Attr attr)
580
+ {
581
+ boolean newNamespace = true;
582
+ Iterator<Attr[]> iter = c14nNamespaceStack.iterator();
583
+ while (iter.hasNext()) {
584
+ Attr[] parentNamespaces = iter.next();
585
+ for (int n = 0; n < parentNamespaces.length; n++) {
586
+ if (parentNamespaces[n].getNodeName().equals(attr.getNodeName())) {
587
+ if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) {
588
+ // exactly the same namespace should not be added
589
+ newNamespace = false;
590
+ } else {
591
+ // in case of namespace url change, propagated namespace will be override
592
+ namespaces.remove(parentNamespaces[n]);
593
+ }
594
+ }
595
+ }
596
+ if (newNamespace && !namespaces.contains(attr)) { namespaces.add(attr); }
597
+ }
598
+ }
599
+
600
+ private void
601
+ getAttributesWithPropagated(List<Attr> attributes, Attr attr)
602
+ {
603
+ boolean newAttribute = true;
604
+ Iterator<Attr[]> iter = c14nAttrStack.iterator();
605
+ while (iter.hasNext()) {
606
+ Attr[] parentAttr = iter.next();
607
+ for (int n = 0; n < parentAttr.length; n++) {
608
+ if (!parentAttr[n].getNodeName().startsWith("xml:")) { continue; }
609
+ if (parentAttr[n].getNodeName().equals(attr.getNodeName())) {
610
+ if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) {
611
+ // exactly the same attribute should not be added
612
+ newAttribute = false;
613
+ } else {
614
+ // in case of attribute value change, propagated attribute will be override
615
+ attributes.remove(parentAttr[n]);
616
+ }
617
+ }
618
+ }
619
+ if (newAttribute) { attributes.add(attr); }
620
+ }
621
+ }
622
+
623
+ private void
624
+ verifyXmlSpace(List<Attr> attributes, NamedNodeMap attrs)
625
+ {
626
+ Attr attr = (Attr) attrs.getNamedItem("xml:space");
627
+ if (attr == null) {
628
+ for (int i = 0; i < attributes.size(); i++) {
629
+ if (attributes.get(i).getNodeName().equals("xml:space")) {
630
+ attributes.remove(i);
631
+ break;
632
+ }
633
+ }
634
+ }
635
+ }
636
+
637
+ private Attr[]
638
+ getSortedArray(List<Attr> attrList)
639
+ {
640
+ Attr[] attrArray = attrList.toArray(new Attr[0]);
641
+ Arrays.sort(attrArray, new Comparator<Attr>() {
642
+ @Override
643
+ public int compare(Attr attr0, Attr attr1) {
644
+ return attr0.getNodeName().compareTo(attr1.getNodeName());
645
+ }
646
+ });
647
+ return attrArray;
648
+ }
649
+
650
+ public void
651
+ leave(Element element)
652
+ {
653
+ if (canonical) {
654
+ c14nNamespaceStack.poll();
655
+ c14nAttrStack.poll();
656
+ }
657
+ String name = element.getTagName();
658
+ if (element.hasChildNodes()) {
659
+ if (needIndentInClosing(element)) {
660
+ indentation.pop();
661
+ buffer.append(indentation.peek());
662
+ } else if (asBuilder) {
663
+ if (!containsText(element)) { indentation.pop(); }
664
+ }
665
+ buffer.append("</").append(name).append('>');
666
+ if (needBreakInClosing(element)) {
667
+ buffer.append('\n');
668
+ }
669
+ return;
670
+ }
671
+ // no child, but HTML might need a closing tag.
672
+ if (asHtml || noEmpty) {
673
+ if (!isEmpty(name) && noEmpty) {
674
+ buffer.append("</").append(name).append('>');
675
+ }
676
+ }
677
+ if (needBreakInClosing(element)) {
678
+ if (!containsText(element)) { indentation.pop(); }
679
+ buffer.append('\n');
680
+ }
681
+ }
682
+
683
+ private boolean
684
+ needIndentInClosing(Element element)
685
+ {
686
+ if (containsText(element)) { return false; }
687
+
688
+ if (fragment) { return false; } // a given option might be fragment and format. fragment matters
689
+ if (format) { return true; }
690
+ if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) { return true; }
691
+ return false;
692
+ }
693
+
694
+ private boolean
695
+ needBreakInClosing(Element element)
696
+ {
697
+ if (fragment) { return false; }
698
+ if (format || asBuilder) { return true; }
699
+ return false;
700
+ }
701
+
702
+ private boolean
703
+ containsText(Element element)
704
+ {
705
+ return (element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.TEXT_NODE);
706
+ }
707
+
708
+ public boolean
709
+ enter(Entity entity)
710
+ {
711
+ String name = entity.getNodeName();
712
+ String pubId = entity.getPublicId();
713
+ String sysId = entity.getSystemId();
714
+ String notation = entity.getNotationName();
715
+ buffer.append("<!ENTITY ");
716
+ buffer.append(name);
717
+ if (pubId != null) {
718
+ buffer.append(" PUBLIC \"");
719
+ buffer.append(pubId);
720
+ buffer.append("\"");
721
+ }
722
+ if (sysId != null) {
723
+ buffer.append(" SYSTEM \"");
724
+ buffer.append(sysId);
725
+ buffer.append("\"");
726
+ }
727
+ if (notation != null) {
728
+ buffer.append(" NDATA ");
729
+ buffer.append(notation);
730
+ }
731
+ buffer.append(">");
732
+ return true;
733
+ }
734
+
735
+ public void
736
+ leave(Entity entity)
737
+ {
738
+ // no-op
739
+ }
740
+
741
+ public boolean
742
+ enter(EntityReference entityRef)
743
+ {
744
+ buffer.append('&').append(entityRef.getNodeName()).append(';');
745
+ return true;
746
+ }
747
+ public void
748
+ leave(EntityReference entityRef)
749
+ {
750
+ // no-op
751
+ }
752
+
753
+ public boolean
754
+ enter(Notation notation)
755
+ {
756
+ String name = notation.getNodeName();
757
+ String pubId = notation.getPublicId();
758
+ String sysId = notation.getSystemId();
759
+ buffer.append("<!NOTATION ");
760
+ buffer.append(name);
761
+ if (pubId != null) {
762
+ buffer.append(" PUBLIC \"");
763
+ buffer.append(pubId);
764
+ buffer.append("\"");
765
+ if (sysId != null) {
766
+ buffer.append(" \"");
767
+ buffer.append(sysId);
768
+ buffer.append("\"");
769
+ }
770
+ } else if (sysId != null) {
771
+ buffer.append(" SYSTEM \"");
772
+ buffer.append(sysId);
773
+ buffer.append("\"");
774
+ }
775
+ buffer.append(">");
776
+ return true;
777
+ }
778
+
779
+ public void
780
+ leave(Notation notation)
781
+ {
782
+ // no-op
783
+ }
784
+
785
+ public boolean
786
+ enter(ProcessingInstruction pi)
787
+ {
788
+ buffer.append("<?");
789
+ buffer.append(pi.getTarget());
790
+ buffer.append(" ");
791
+ buffer.append(pi.getData());
792
+ if (asHtml) { buffer.append(">"); }
793
+ else { buffer.append("?>"); }
794
+ buffer.append("\n");
795
+ if (canonical) { c14nNodeList.add(pi); }
796
+ return true;
797
+ }
798
+
799
+ public void
800
+ leave(ProcessingInstruction pi)
801
+ {
802
+ // no-op
803
+ }
804
+
805
+ private boolean
806
+ isHtmlScript(Text text)
807
+ {
808
+ return htmlDoc && text.getParentNode().getNodeName().equals("script");
809
+ }
810
+
811
+ private boolean
812
+ isHtmlStyle(Text text)
813
+ {
814
+ return htmlDoc && text.getParentNode().getNodeName().equals("style");
815
+ }
816
+
817
+ public boolean
818
+ enter(Text text)
819
+ {
820
+ CharSequence textContent = text.getNodeValue();
821
+ if (canonical) {
822
+ c14nNodeList.add(text);
823
+ if (isBlank(textContent)) {
824
+ buffer.append(canonicalizeWhitespace(textContent));
706
825
  return true;
707
- }
708
-
709
- public void leave(Notation notation) {
710
- // no-op
711
- }
712
-
713
- public boolean enter(ProcessingInstruction pi) {
714
- buffer.append("<?");
715
- buffer.append(pi.getTarget());
716
- buffer.append(" ");
717
- buffer.append(pi.getData());
718
- if (asHtml) buffer.append(">");
719
- else buffer.append("?>");
720
- buffer.append("\n");
721
- if (canonical) c14nNodeList.add(pi);
722
- return true;
723
- }
724
-
725
- public void leave(ProcessingInstruction pi) {
726
- // no-op
727
- }
728
-
729
- private boolean isHtmlScript(Text text) {
730
- return htmlDoc && text.getParentNode().getNodeName().equals("script");
731
- }
732
-
733
- private boolean isHtmlStyle(Text text) {
734
- return htmlDoc && text.getParentNode().getNodeName().equals("style");
735
- }
736
-
737
- public boolean enter(Text text) {
738
- CharSequence textContent = text.getNodeValue();
739
- if (canonical) {
740
- c14nNodeList.add(text);
741
- if (isBlank(textContent)) {
742
- buffer.append(canonicalizeWhitespace(textContent));
743
- return true;
744
- }
745
- }
746
-
747
- if (shouldEncode(text) && !isHtmlScript(text) && !isHtmlStyle(text)) {
748
- textContent = encodeJavaString(textContent);
749
- }
750
-
751
- textContent = encodeStringToHtmlEntity(textContent);
752
- buffer.append(textContent);
753
- return true;
754
- }
755
-
756
- private CharSequence encodeStringToHtmlEntity(CharSequence text) {
757
- if (encoding == null) return text;
758
-
759
- CharsetEncoder encoder = Charset.forName(encoding).newEncoder();
760
- StringBuilder sb = new StringBuilder(text.length() + 16);
761
- // make sure we can handle code points that are higher than 2 bytes
762
- for ( int i = 0; i < text.length(); ) {
763
- int code = Character.codePointAt(text, i);
764
- // TODO not sure about bigger offset then 2 ?!
765
- int offset = code > 65535 ? 2 : 1;
766
- CharSequence substr = text.subSequence(i, i + offset);
767
- boolean canEncode = encoder.canEncode(substr);
768
- if (canEncode) {
769
- sb.append(substr);
770
- }
771
- else {
772
- sb.append("&#x").append(Integer.toHexString(code)).append(';');
773
- }
774
- i += offset;
775
- }
776
- return sb;
777
- }
826
+ }
827
+ }
828
+
829
+ if (shouldEncode(text) && !isHtmlScript(text) && !isHtmlStyle(text)) {
830
+ textContent = encodeJavaString(textContent);
831
+ }
832
+
833
+ textContent = encodeStringToHtmlEntity(textContent);
834
+ buffer.append(textContent);
835
+ return true;
836
+ }
837
+
838
+ private CharSequence
839
+ encodeStringToHtmlEntity(CharSequence text)
840
+ {
841
+ if (encoding == null) { return text; }
842
+
843
+ CharsetEncoder encoder = Charset.forName(encoding).newEncoder();
844
+ StringBuilder sb = new StringBuilder(text.length() + 16);
845
+ // make sure we can handle code points that are higher than 2 bytes
846
+ for (int i = 0; i < text.length();) {
847
+ int code = Character.codePointAt(text, i);
848
+ // TODO not sure about bigger offset then 2 ?!
849
+ int offset = code > 65535 ? 2 : 1;
850
+ CharSequence substr = text.subSequence(i, i + offset);
851
+ boolean canEncode = encoder.canEncode(substr);
852
+ if (canEncode) {
853
+ sb.append(substr);
854
+ } else {
855
+ sb.append("&#x").append(Integer.toHexString(code)).append(';');
856
+ }
857
+ i += offset;
858
+ }
859
+ return sb;
860
+ }
778
861
  }