nokogiri 1.11.0.rc1-java → 1.11.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +171 -94
  6. data/ext/java/nokogiri/EncodingHandler.java +78 -59
  7. data/ext/java/nokogiri/HtmlDocument.java +137 -114
  8. data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
  12. data/ext/java/nokogiri/NokogiriService.java +597 -526
  13. data/ext/java/nokogiri/XmlAttr.java +120 -96
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
  15. data/ext/java/nokogiri/XmlCdata.java +35 -26
  16. data/ext/java/nokogiri/XmlComment.java +48 -37
  17. data/ext/java/nokogiri/XmlDocument.java +642 -540
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
  19. data/ext/java/nokogiri/XmlDtd.java +450 -384
  20. data/ext/java/nokogiri/XmlElement.java +25 -18
  21. data/ext/java/nokogiri/XmlElementContent.java +345 -286
  22. data/ext/java/nokogiri/XmlElementDecl.java +126 -95
  23. data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
  24. data/ext/java/nokogiri/XmlEntityReference.java +51 -42
  25. data/ext/java/nokogiri/XmlNamespace.java +177 -145
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1590
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -299
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
  29. data/ext/java/nokogiri/XmlReader.java +513 -418
  30. data/ext/java/nokogiri/XmlRelaxng.java +92 -72
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +330 -280
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
  33. data/ext/java/nokogiri/XmlSchema.java +335 -210
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -87
  35. data/ext/java/nokogiri/XmlText.java +57 -46
  36. data/ext/java/nokogiri/XmlXpathContext.java +242 -178
  37. data/ext/java/nokogiri/XsltStylesheet.java +282 -239
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +203 -160
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -59
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +121 -48
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -22
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -179
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
  93. data/ext/nokogiri/depend +37 -358
  94. data/ext/nokogiri/extconf.rb +585 -374
  95. data/ext/nokogiri/html_document.c +78 -82
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  99. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +192 -93
  102. data/ext/nokogiri/test_global_handlers.c +40 -0
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +225 -163
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +338 -286
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +195 -172
  120. data/ext/nokogiri/xml_relax_ng.c +52 -28
  121. data/ext/nokogiri/xml_sax_parser.c +118 -118
  122. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  123. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  124. data/ext/nokogiri/xml_schema.c +111 -34
  125. data/ext/nokogiri/xml_syntax_error.c +42 -21
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +206 -123
  128. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  129. data/lib/nokogiri.rb +4 -8
  130. data/lib/nokogiri/css/parser.rb +62 -62
  131. data/lib/nokogiri/css/parser.y +2 -2
  132. data/lib/nokogiri/css/parser_extras.rb +38 -36
  133. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  134. data/lib/nokogiri/extension.rb +26 -0
  135. data/lib/nokogiri/html/document.rb +12 -26
  136. data/lib/nokogiri/html/document_fragment.rb +15 -15
  137. data/lib/nokogiri/nokogiri.jar +0 -0
  138. data/lib/nokogiri/version.rb +2 -148
  139. data/lib/nokogiri/version/constant.rb +5 -0
  140. data/lib/nokogiri/version/info.rb +205 -0
  141. data/lib/nokogiri/xml/builder.rb +2 -2
  142. data/lib/nokogiri/xml/document.rb +48 -18
  143. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  144. data/lib/nokogiri/xml/node.rb +599 -279
  145. data/lib/nokogiri/xml/parse_options.rb +6 -0
  146. data/lib/nokogiri/xml/reader.rb +2 -9
  147. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  148. data/lib/nokogiri/xml/schema.rb +12 -4
  149. data/lib/nokogiri/xml/searchable.rb +24 -16
  150. data/lib/nokogiri/xml/xpath.rb +1 -3
  151. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  152. metadata +87 -158
  153. data/ext/nokogiri/html_document.h +0 -10
  154. data/ext/nokogiri/html_element_description.h +0 -10
  155. data/ext/nokogiri/html_entity_lookup.h +0 -8
  156. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  157. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  158. data/ext/nokogiri/nokogiri.h +0 -122
  159. data/ext/nokogiri/xml_attr.h +0 -9
  160. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  161. data/ext/nokogiri/xml_cdata.h +0 -9
  162. data/ext/nokogiri/xml_comment.h +0 -9
  163. data/ext/nokogiri/xml_document.h +0 -23
  164. data/ext/nokogiri/xml_document_fragment.h +0 -10
  165. data/ext/nokogiri/xml_dtd.h +0 -10
  166. data/ext/nokogiri/xml_element_content.h +0 -10
  167. data/ext/nokogiri/xml_element_decl.h +0 -9
  168. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  169. data/ext/nokogiri/xml_entity_decl.h +0 -10
  170. data/ext/nokogiri/xml_entity_reference.h +0 -9
  171. data/ext/nokogiri/xml_io.c +0 -61
  172. data/ext/nokogiri/xml_io.h +0 -11
  173. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  174. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  175. data/ext/nokogiri/xml_namespace.h +0 -14
  176. data/ext/nokogiri/xml_node.h +0 -13
  177. data/ext/nokogiri/xml_node_set.h +0 -12
  178. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  179. data/ext/nokogiri/xml_reader.h +0 -10
  180. data/ext/nokogiri/xml_relax_ng.h +0 -9
  181. data/ext/nokogiri/xml_sax_parser.h +0 -39
  182. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  183. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_schema.h +0 -9
  185. data/ext/nokogiri/xml_syntax_error.h +0 -13
  186. data/ext/nokogiri/xml_text.h +0 -9
  187. data/ext/nokogiri/xml_xpath_context.h +0 -10
  188. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -66,713 +66,828 @@ import org.w3c.dom.Text;
66
66
  * @author Patrick Mahoney <pat@polycrystal.org>
67
67
  * @author Yoko Harada <yokolet@gmail.com>
68
68
  */
69
- public class SaveContextVisitor {
70
-
71
- private final StringBuilder buffer;
72
- private final Stack<String> indentation;
73
- private String encoding;
74
- private final CharSequence indentString;
75
- private boolean format;
76
- private final boolean noDecl;
77
- private final boolean noEmpty;
78
- private final boolean noXhtml;
79
- private final boolean asXhtml;
80
- private boolean asXml;
81
- private final boolean asHtml;
82
- private final boolean asBuilder;
83
- private boolean htmlDoc;
84
- private final boolean fragment;
85
- private final boolean canonical, incl_ns, with_comments;
86
- private boolean subsets;
87
- private boolean exclusive;
88
- private final List<Node> c14nNodeList;
89
- private final Deque<Attr[]> c14nNamespaceStack;
90
- private final Deque<Attr[]> c14nAttrStack;
91
- //private List<String> c14nExclusiveInclusivePrefixes = null;
92
-
93
- /*
94
- * U can't touch this.
95
- * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
96
- *
97
- * Taken from libxml save options.
98
- */
99
-
100
- public static final int FORMAT = 1;
101
- public static final int NO_DECL = 2;
102
- public static final int NO_EMPTY = 4;
103
- public static final int NO_XHTML = 8;
104
- public static final int AS_XHTML = 16;
105
- public static final int AS_XML = 32;
106
- public static final int AS_HTML = 64;
107
- public static final int AS_BUILDER = 128;
108
-
109
- public static final int CANONICAL = 1;
110
- public static final int INCL_NS = 2;
111
- public static final int WITH_COMMENTS = 4;
112
- public static final int SUBSETS = 8;
113
- public static final int EXCLUSIVE = 16;
114
-
115
- public SaveContextVisitor(int options, CharSequence indent, String encoding, boolean htmlDoc, boolean fragment, int canonicalOpts) {
116
- buffer = new StringBuilder();
117
- this.encoding = encoding;
118
- indentation = new Stack<String>(); indentation.push("");
119
- this.htmlDoc = htmlDoc;
120
- this.fragment = fragment;
121
- c14nNodeList = new ArrayList<Node>();
122
- c14nNamespaceStack = new ArrayDeque<Attr[]>();
123
- c14nAttrStack = new ArrayDeque<Attr[]>();
124
- format = (options & FORMAT) == FORMAT;
125
-
126
- noDecl = (options & NO_DECL) == NO_DECL;
127
- noEmpty = (options & NO_EMPTY) == NO_EMPTY;
128
- noXhtml = (options & NO_XHTML) == NO_XHTML;
129
- asXhtml = (options & AS_XHTML) == AS_XHTML;
130
- asXml = (options & AS_XML) == AS_XML;
131
- asHtml = (options & AS_HTML) == AS_HTML;
132
- asBuilder = (options & AS_BUILDER) == AS_BUILDER;
133
-
134
- canonical = (canonicalOpts & CANONICAL) == CANONICAL;
135
- incl_ns = (canonicalOpts & INCL_NS) == INCL_NS;
136
- with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS;
137
- subsets = (canonicalOpts & SUBSETS) == SUBSETS;
138
-
139
- if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces
140
- if ((!format && indent != null) && indent.length() > 0) format = true;
141
- if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces
142
- indentString = indent;
143
- if (!asXml && !asHtml && !asXhtml && !asBuilder) asXml = true;
144
- }
145
-
146
- @Override
147
- public String toString() {
148
- return buffer.toString();
149
- }
150
-
151
- public StringBuilder getInternalBuffer() { return buffer; }
152
-
153
- public void setHtmlDoc(boolean htmlDoc) {
154
- this.htmlDoc = htmlDoc;
155
- }
156
-
157
- public void setEncoding(String encoding) {
158
- this.encoding = encoding;
159
- }
160
-
161
- public boolean enter(Node node) {
162
- if (node instanceof Document) {
163
- return enter((Document)node);
164
- }
165
- if (node instanceof Element) {
166
- return enter((Element)node);
167
- }
168
- if (node instanceof Attr) {
169
- return enter((Attr)node);
170
- }
171
- if (node instanceof Text) {
172
- return enter((Text)node);
173
- }
174
- if (node instanceof CDATASection) {
175
- return enter((CDATASection)node);
176
- }
177
- if (node instanceof Comment) {
178
- return enter((Comment)node);
179
- }
180
- if (node instanceof DocumentType) {
181
- return enter((DocumentType)node);
182
- }
183
- if (node instanceof Entity) {
184
- return enter((Entity)node);
185
- }
186
- if (node instanceof EntityReference) {
187
- return enter((EntityReference) node);
188
- }
189
- if (node instanceof Notation) {
190
- return enter((Notation)node);
191
- }
192
- if (node instanceof ProcessingInstruction) {
193
- return enter((ProcessingInstruction)node);
194
- }
195
- return false;
196
- }
197
-
198
- public void leave(Node node) {
199
- if (node instanceof Document) {
200
- leave((Document)node);
201
- return;
202
- }
203
- if (node instanceof Element) {
204
- leave((Element)node);
205
- return;
206
- }
207
- if (node instanceof Attr) {
208
- leave((Attr)node);
209
- return;
210
- }
211
- if (node instanceof Text) {
212
- return;
213
- }
214
- if (node instanceof CDATASection) {
215
- leave((CDATASection)node);
216
- return;
217
- }
218
- if (node instanceof Comment) {
219
- leave((Comment)node);
220
- return;
221
- }
222
- if (node instanceof DocumentType) {
223
- leave((DocumentType)node);
224
- return;
225
- }
226
- if (node instanceof Entity) {
227
- leave((Entity)node);
228
- return;
229
- }
230
- if (node instanceof EntityReference) {
231
- leave((EntityReference) node);
232
- return;
233
- }
234
- if (node instanceof Notation) {
235
- leave((Notation)node);
236
- return;
237
- }
238
- if (node instanceof ProcessingInstruction) {
239
- leave((ProcessingInstruction)node);
240
- return;
241
- }
242
- }
243
-
244
- public boolean enter(String string) {
245
- buffer.append(string);
246
- return true;
247
- }
248
-
249
- public void leave(String string) {
250
- // no-op
251
- }
252
-
253
- public boolean enter(Attr attr) {
254
- String name = attr.getName();
255
- buffer.append(name);
256
- if (!asHtml || !isHtmlBooleanAttr(name)) {
257
- buffer.append('=');
258
- buffer.append('"');
259
- String value = replaceCharsetIfNecessary(attr);
260
- buffer.append(serializeAttrTextContent(value, htmlDoc));
261
- buffer.append('"');
262
- }
263
- return true;
264
- }
265
-
266
- private static final Pattern CHARSET =
267
- Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE);
268
-
269
- private String replaceCharsetIfNecessary(Attr attr) {
270
- String value = attr.getValue();
271
- if (encoding == null) return value; // unable to replace in any case
272
- if (!"content".equals(attr.getName().toLowerCase())) return value; // must be content attr
273
- if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) return value;
274
- Matcher m = CHARSET.matcher(value);
275
- if (!m.find()) return value;
276
- if (value.contains(encoding)) return value; // no need to replace
277
- return value.replace(m.group(), "charset=" + encoding);
278
- }
279
-
280
- static final Set<String> HTML_BOOLEAN_ATTRS;
281
- static {
282
- final String[] _HTML_BOOLEAN_ATTRS = {
283
- "checked", "compact", "declare", "defer", "disabled", "ismap",
284
- "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
285
- "selected"
286
- };
287
- HTML_BOOLEAN_ATTRS = new HashSet<String>(Arrays.asList(_HTML_BOOLEAN_ATTRS));
288
- }
289
-
290
- private static boolean isHtmlBooleanAttr(String name) {
291
- return HTML_BOOLEAN_ATTRS.contains(name);
292
- }
293
-
294
- private static CharSequence serializeAttrTextContent(String str, boolean htmlDoc) {
295
- if (str == null || str.length() == 0) return "";
296
-
297
- StringBuilder buffer = new StringBuilder(str.length() + 16);
298
-
299
- for (int i = 0; i < str.length(); i++) {
300
- char c; switch (c = str.charAt(i)) {
301
- case '\n': buffer.append("&#10;"); break;
302
- case '\r': buffer.append("&#13;"); break;
303
- case '\t': buffer.append("&#9;"); break;
304
- case '"': if (htmlDoc) buffer.append("%22");
305
- else buffer.append("&quot;");
306
- break;
307
- case '<': buffer.append("&lt;"); break;
308
- case '>': buffer.append("&gt;"); break;
309
- case '&': buffer.append("&amp;"); break;
310
- default: buffer.append(c);
311
- }
312
- }
313
-
314
- return buffer;
315
- }
316
-
317
- public void leave(Attr attr) {
318
- // no-op
319
- }
320
-
321
- public boolean enter(CDATASection cdata) {
322
- buffer.append("<![CDATA[");
323
- buffer.append(cdata.getData());
324
- buffer.append("]]>");
325
- return true;
326
- }
327
-
328
- public void leave(CDATASection cdata) {
329
- // no-op
330
- }
331
-
332
- public boolean enter(Comment comment) {
333
- if (canonical) {
334
- c14nNodeList.add(comment);
335
- if (!with_comments) return true;
336
- }
337
- buffer.append("<!--");
338
- buffer.append(comment.getData());
339
- buffer.append("-->");
340
- return true;
341
- }
342
-
343
- public void leave(Comment comment) {
344
- // no-op
345
- }
346
-
347
- public boolean enter(Document document) {
348
- if (!noDecl) {
349
- buffer.append("<?xml version=\"");
350
- buffer.append(document.getXmlVersion());
351
- buffer.append("\"");
352
-
353
- if (encoding != null) {
354
- buffer.append(" encoding=\"");
355
- buffer.append(encoding);
356
- buffer.append("\"");
357
- }
358
- buffer.append("?>\n");
359
- }
360
- return true;
361
- }
362
-
363
- public void leave(Document document) {
364
- // no-op
365
- }
366
-
367
- public boolean enter(DocumentType docType) {
368
- if (canonical) {
369
- c14nNodeList.add(docType);
370
- return true;
371
- }
372
- String name = docType.getName();
373
- String pubId = docType.getPublicId();
374
- String sysId = docType.getSystemId();
375
- String internalSubset = docType.getInternalSubset();
376
- if (docType.getPreviousSibling() != null) {
377
- buffer.append('\n');
378
- }
379
- buffer.append("<!DOCTYPE ").append(name).append(' ');
380
- if (pubId != null) {
381
- buffer.append("PUBLIC \"").append(pubId).append('"');
382
- if (sysId != null) buffer.append(" \"").append(sysId).append('"');
383
- } else if (sysId != null) {
384
- buffer.append("SYSTEM \"").append(sysId).append('"');
385
- }
386
- if (internalSubset != null) {
387
- buffer.append(' ').append('[');
388
- buffer.append(internalSubset);
389
- buffer.append(']');
390
- }
391
- buffer.append(">\n");
392
- return true;
393
- }
394
-
395
- public void leave(DocumentType docType) {
396
- // no-op
397
- }
398
-
399
- public boolean enter(Element element) {
400
- if (canonical) {
401
- c14nNodeList.add(element);
402
- if (element == element.getOwnerDocument().getDocumentElement()) {
403
- c14nNodeList.add(element.getOwnerDocument());
404
- }
405
- }
406
- String current = indentation.peek();
407
- buffer.append(current);
408
- if (needIndent(element)) {
409
- indentation.push(current + indentString);
410
- }
411
- String name = element.getTagName();
412
- buffer.append('<').append(name);
413
- Attr[] attrs = getAttrsAndNamespaces(element);
414
- for (Attr attr : attrs) {
415
- if (attr.getSpecified()) {
416
- buffer.append(' ');
417
- enter(attr);
418
- leave(attr);
419
- }
420
- }
421
- if (element.hasChildNodes()) {
422
- buffer.append('>');
423
- if (needBreakInOpening(element)) buffer.append('\n');
424
- return true;
425
- }
426
- // no child
427
- if (asHtml) {
428
- buffer.append('>');
429
- } else if (asXml && noEmpty) {
430
- buffer.append('>');
431
- } else if (asXhtml) {
432
- if (isEmpty(name)) {
433
- buffer.append(" />"); // see http://www.w3.org/TR/xhtml1/#C_2
434
- } else {
435
- buffer.append('>');
436
- }
69
+ public class SaveContextVisitor
70
+ {
71
+
72
+ private final StringBuilder buffer;
73
+ private final Stack<String> indentation;
74
+ private String encoding;
75
+ private final CharSequence indentString;
76
+ private boolean format;
77
+ private final boolean noDecl;
78
+ private final boolean noEmpty;
79
+ private final boolean noXhtml;
80
+ private final boolean asXhtml;
81
+ private boolean asXml;
82
+ private final boolean asHtml;
83
+ private final boolean asBuilder;
84
+ private boolean htmlDoc;
85
+ private final boolean fragment;
86
+ private final boolean canonical, incl_ns, with_comments;
87
+ private boolean subsets;
88
+ private boolean exclusive;
89
+ private final List<Node> c14nNodeList;
90
+ private final Deque<Attr[]> c14nNamespaceStack;
91
+ private final Deque<Attr[]> c14nAttrStack;
92
+ //private List<String> c14nExclusiveInclusivePrefixes = null;
93
+
94
+ /*
95
+ * U can't touch this.
96
+ * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
97
+ *
98
+ * Taken from libxml save options.
99
+ */
100
+
101
+ public static final int FORMAT = 1;
102
+ public static final int NO_DECL = 2;
103
+ public static final int NO_EMPTY = 4;
104
+ public static final int NO_XHTML = 8;
105
+ public static final int AS_XHTML = 16;
106
+ public static final int AS_XML = 32;
107
+ public static final int AS_HTML = 64;
108
+ public static final int AS_BUILDER = 128;
109
+
110
+ public static final int CANONICAL = 1;
111
+ public static final int INCL_NS = 2;
112
+ public static final int WITH_COMMENTS = 4;
113
+ public static final int SUBSETS = 8;
114
+ public static final int EXCLUSIVE = 16;
115
+
116
+ public
117
+ SaveContextVisitor(int options, CharSequence indent, String encoding, boolean htmlDoc, boolean fragment,
118
+ int canonicalOpts)
119
+ {
120
+ buffer = new StringBuilder();
121
+ this.encoding = encoding;
122
+ indentation = new Stack<String>();
123
+ indentation.push("");
124
+ this.htmlDoc = htmlDoc;
125
+ this.fragment = fragment;
126
+ c14nNodeList = new ArrayList<Node>();
127
+ c14nNamespaceStack = new ArrayDeque<Attr[]>();
128
+ c14nAttrStack = new ArrayDeque<Attr[]>();
129
+ format = (options & FORMAT) == FORMAT;
130
+
131
+ noDecl = (options & NO_DECL) == NO_DECL;
132
+ noEmpty = (options & NO_EMPTY) == NO_EMPTY;
133
+ noXhtml = (options & NO_XHTML) == NO_XHTML;
134
+ asXhtml = (options & AS_XHTML) == AS_XHTML;
135
+ asXml = (options & AS_XML) == AS_XML;
136
+ asHtml = (options & AS_HTML) == AS_HTML;
137
+ asBuilder = (options & AS_BUILDER) == AS_BUILDER;
138
+
139
+ canonical = (canonicalOpts & CANONICAL) == CANONICAL;
140
+ incl_ns = (canonicalOpts & INCL_NS) == INCL_NS;
141
+ with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS;
142
+ subsets = (canonicalOpts & SUBSETS) == SUBSETS;
143
+
144
+ if ((format && indent == null) || (format && indent.length() == 0)) { indent = " "; } // default, two spaces
145
+ if ((!format && indent != null) && indent.length() > 0) { format = true; }
146
+ if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) { indent = " "; } // default, two spaces
147
+ indentString = indent;
148
+ if (!asXml && !asHtml && !asXhtml && !asBuilder) { asXml = true; }
149
+ }
150
+
151
+ @Override
152
+ public String
153
+ toString()
154
+ {
155
+ return buffer.toString();
156
+ }
157
+
158
+ public StringBuilder
159
+ getInternalBuffer() { return buffer; }
160
+
161
+ public void
162
+ setHtmlDoc(boolean htmlDoc)
163
+ {
164
+ this.htmlDoc = htmlDoc;
165
+ }
166
+
167
+ public void
168
+ setEncoding(String encoding)
169
+ {
170
+ this.encoding = encoding;
171
+ }
172
+
173
+ public boolean
174
+ enter(Node node)
175
+ {
176
+ if (node instanceof Document) {
177
+ return enter((Document)node);
178
+ }
179
+ if (node instanceof Element) {
180
+ return enter((Element)node);
181
+ }
182
+ if (node instanceof Attr) {
183
+ return enter((Attr)node);
184
+ }
185
+ if (node instanceof Text) {
186
+ return enter((Text)node);
187
+ }
188
+ if (node instanceof CDATASection) {
189
+ return enter((CDATASection)node);
190
+ }
191
+ if (node instanceof Comment) {
192
+ return enter((Comment)node);
193
+ }
194
+ if (node instanceof DocumentType) {
195
+ return enter((DocumentType)node);
196
+ }
197
+ if (node instanceof Entity) {
198
+ return enter((Entity)node);
199
+ }
200
+ if (node instanceof EntityReference) {
201
+ return enter((EntityReference) node);
202
+ }
203
+ if (node instanceof Notation) {
204
+ return enter((Notation)node);
205
+ }
206
+ if (node instanceof ProcessingInstruction) {
207
+ return enter((ProcessingInstruction)node);
208
+ }
209
+ return false;
210
+ }
211
+
212
+ public void
213
+ leave(Node node)
214
+ {
215
+ if (node instanceof Document) {
216
+ leave((Document)node);
217
+ return;
218
+ }
219
+ if (node instanceof Element) {
220
+ leave((Element)node);
221
+ return;
222
+ }
223
+ if (node instanceof Attr) {
224
+ leave((Attr)node);
225
+ return;
226
+ }
227
+ if (node instanceof Text) {
228
+ return;
229
+ }
230
+ if (node instanceof CDATASection) {
231
+ leave((CDATASection)node);
232
+ return;
233
+ }
234
+ if (node instanceof Comment) {
235
+ leave((Comment)node);
236
+ return;
237
+ }
238
+ if (node instanceof DocumentType) {
239
+ leave((DocumentType)node);
240
+ return;
241
+ }
242
+ if (node instanceof Entity) {
243
+ leave((Entity)node);
244
+ return;
245
+ }
246
+ if (node instanceof EntityReference) {
247
+ leave((EntityReference) node);
248
+ return;
249
+ }
250
+ if (node instanceof Notation) {
251
+ leave((Notation)node);
252
+ return;
253
+ }
254
+ if (node instanceof ProcessingInstruction) {
255
+ leave((ProcessingInstruction)node);
256
+ return;
257
+ }
258
+ }
259
+
260
+ public boolean
261
+ enter(String string)
262
+ {
263
+ buffer.append(string);
264
+ return true;
265
+ }
266
+
267
+ public void
268
+ leave(String string)
269
+ {
270
+ // no-op
271
+ }
272
+
273
+ public boolean
274
+ enter(Attr attr)
275
+ {
276
+ String name = attr.getName();
277
+ buffer.append(name);
278
+ if (!asHtml || !isHtmlBooleanAttr(name)) {
279
+ buffer.append('=');
280
+ buffer.append('"');
281
+ String value = replaceCharsetIfNecessary(attr);
282
+ buffer.append(serializeAttrTextContent(value, htmlDoc));
283
+ buffer.append('"');
284
+ }
285
+ return true;
286
+ }
287
+
288
+ private static final Pattern CHARSET =
289
+ Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE);
290
+
291
+ private String
292
+ replaceCharsetIfNecessary(Attr attr)
293
+ {
294
+ String value = attr.getValue();
295
+ if (encoding == null) { return value; } // unable to replace in any case
296
+ if (!"content".equals(attr.getName().toLowerCase())) { return value; } // must be content attr
297
+ if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) { return value; }
298
+ Matcher m = CHARSET.matcher(value);
299
+ if (!m.find()) { return value; }
300
+ if (value.contains(encoding)) { return value; } // no need to replace
301
+ return value.replace(m.group(), "charset=" + encoding);
302
+ }
303
+
304
+ static final Set<String> HTML_BOOLEAN_ATTRS;
305
+ static
306
+ {
307
+ final String[] _HTML_BOOLEAN_ATTRS = {
308
+ "checked", "compact", "declare", "defer", "disabled", "ismap",
309
+ "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
310
+ "selected"
311
+ };
312
+ HTML_BOOLEAN_ATTRS = new HashSet<String>(Arrays.asList(_HTML_BOOLEAN_ATTRS));
313
+ }
314
+
315
+ private static boolean
316
+ isHtmlBooleanAttr(String name)
317
+ {
318
+ return HTML_BOOLEAN_ATTRS.contains(name);
319
+ }
320
+
321
+ private static CharSequence
322
+ serializeAttrTextContent(String str, boolean htmlDoc)
323
+ {
324
+ if (str == null || str.length() == 0) { return ""; }
325
+
326
+ StringBuilder buffer = new StringBuilder(str.length() + 16);
327
+
328
+ for (int i = 0; i < str.length(); i++) {
329
+ char c;
330
+ switch (c = str.charAt(i)) {
331
+ case '\n':
332
+ buffer.append("&#10;");
333
+ break;
334
+ case '\r':
335
+ buffer.append("&#13;");
336
+ break;
337
+ case '\t':
338
+ buffer.append("&#9;");
339
+ break;
340
+ case '"':
341
+ if (htmlDoc) { buffer.append("%22"); }
342
+ else { buffer.append("&quot;"); }
343
+ break;
344
+ case '<':
345
+ buffer.append("&lt;");
346
+ break;
347
+ case '>':
348
+ buffer.append("&gt;");
349
+ break;
350
+ case '&':
351
+ buffer.append("&amp;");
352
+ break;
353
+ default:
354
+ buffer.append(c);
355
+ }
356
+ }
357
+
358
+ return buffer;
359
+ }
360
+
361
+ public void
362
+ leave(Attr attr)
363
+ {
364
+ // no-op
365
+ }
366
+
367
+ public boolean
368
+ enter(CDATASection cdata)
369
+ {
370
+ buffer.append("<![CDATA[");
371
+ buffer.append(cdata.getData());
372
+ buffer.append("]]>");
373
+ return true;
374
+ }
375
+
376
+ public void
377
+ leave(CDATASection cdata)
378
+ {
379
+ // no-op
380
+ }
381
+
382
+ public boolean
383
+ enter(Comment comment)
384
+ {
385
+ if (canonical) {
386
+ c14nNodeList.add(comment);
387
+ if (!with_comments) { return true; }
388
+ }
389
+ buffer.append("<!--");
390
+ buffer.append(comment.getData());
391
+ buffer.append("-->");
392
+ return true;
393
+ }
394
+
395
+ public void
396
+ leave(Comment comment)
397
+ {
398
+ // no-op
399
+ }
400
+
401
+ public boolean
402
+ enter(Document document)
403
+ {
404
+ if (!noDecl) {
405
+ buffer.append("<?xml version=\"");
406
+ buffer.append(document.getXmlVersion());
407
+ buffer.append("\"");
408
+
409
+ if (encoding != null) {
410
+ buffer.append(" encoding=\"");
411
+ buffer.append(encoding);
412
+ buffer.append("\"");
413
+ }
414
+ buffer.append("?>\n");
415
+ }
416
+ return true;
417
+ }
418
+
419
+ public void
420
+ leave(Document document)
421
+ {
422
+ // no-op
423
+ }
424
+
425
+ public boolean
426
+ enter(DocumentType docType)
427
+ {
428
+ if (canonical) {
429
+ c14nNodeList.add(docType);
430
+ return true;
431
+ }
432
+ String name = docType.getName();
433
+ String pubId = docType.getPublicId();
434
+ String sysId = docType.getSystemId();
435
+ String internalSubset = docType.getInternalSubset();
436
+ if (docType.getPreviousSibling() != null) {
437
+ buffer.append('\n');
438
+ }
439
+ buffer.append("<!DOCTYPE ").append(name).append(' ');
440
+ if (pubId != null) {
441
+ buffer.append("PUBLIC \"").append(pubId).append('"');
442
+ if (sysId != null) { buffer.append(" \"").append(sysId).append('"'); }
443
+ } else if (sysId != null) {
444
+ buffer.append("SYSTEM \"").append(sysId).append('"');
445
+ }
446
+ if (internalSubset != null) {
447
+ buffer.append(' ').append('[');
448
+ buffer.append(internalSubset);
449
+ buffer.append(']');
450
+ }
451
+ buffer.append(">\n");
452
+ return true;
453
+ }
454
+
455
+ public void
456
+ leave(DocumentType docType)
457
+ {
458
+ // no-op
459
+ }
460
+
461
+ public boolean
462
+ enter(Element element)
463
+ {
464
+ if (canonical) {
465
+ c14nNodeList.add(element);
466
+ if (element == element.getOwnerDocument().getDocumentElement()) {
467
+ c14nNodeList.add(element.getOwnerDocument());
468
+ }
469
+ }
470
+ String current = indentation.peek();
471
+ buffer.append(current);
472
+ if (needIndent(element)) {
473
+ indentation.push(current + indentString);
474
+ }
475
+ String name = element.getTagName();
476
+ buffer.append('<').append(name);
477
+ Attr[] attrs = getAttrsAndNamespaces(element);
478
+ for (Attr attr : attrs) {
479
+ if (attr.getSpecified()) {
480
+ buffer.append(' ');
481
+ enter(attr);
482
+ leave(attr);
483
+ }
484
+ }
485
+ if (element.hasChildNodes()) {
486
+ buffer.append('>');
487
+ if (needBreakInOpening(element)) { buffer.append('\n'); }
488
+ return true;
489
+ }
490
+ // no child
491
+ if (asHtml) {
492
+ buffer.append('>');
493
+ } else if (asXml && noEmpty) {
494
+ buffer.append('>');
495
+ } else if (asXhtml) {
496
+ if (isEmpty(name)) {
497
+ buffer.append(" />"); // see http://www.w3.org/TR/xhtml1/#C_2
498
+ } else {
499
+ buffer.append('>');
500
+ }
501
+ } else {
502
+ buffer.append("/>");
503
+ }
504
+ if (needBreakInOpening(element)) {
505
+ buffer.append('\n');
506
+ }
507
+ return true;
508
+ }
509
+
510
+ private boolean
511
+ needIndent(Element element)
512
+ {
513
+ if (containsText(element)) { return false; }
514
+ if (fragment) { return false; } // a given option might be fragment and format. fragment matters
515
+ if (format || asBuilder) { return true; }
516
+ return false;
517
+ }
518
+
519
+ private boolean
520
+ needBreakInOpening(Element element)
521
+ {
522
+ if (containsText(element)) { return false; }
523
+ if (fragment) { return false; }
524
+ if (format) { return true; }
525
+ if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) { return true; }
526
+ if (format && element.getNextSibling() == null && element.hasChildNodes()) { return true; }
527
+ return false;
528
+ }
529
+
530
+ private boolean
531
+ isEmpty(String name)
532
+ {
533
+ HTMLElements.Element element = HTMLElements.getElement(name);
534
+ return element.isEmpty();
535
+ }
536
+
537
+ private Attr[]
538
+ getAttrsAndNamespaces(Element element)
539
+ {
540
+ NamedNodeMap attrs = element.getAttributes();
541
+ if (!canonical) {
542
+ if (attrs == null || attrs.getLength() == 0) { return new Attr[0]; }
543
+ Attr[] attrsAndNamespaces = new Attr[attrs.getLength()];
544
+ for (int i = 0; i < attrs.getLength(); i++) {
545
+ attrsAndNamespaces[i] = (Attr) attrs.item(i);
546
+ }
547
+ return attrsAndNamespaces;
548
+ } else {
549
+ List<Attr> namespaces = new ArrayList<Attr>();
550
+ List<Attr> attributes = new ArrayList<Attr>();
551
+ if (subsets) {
552
+ getAttrsOfAncestors(element.getParentNode(), namespaces, attributes);
553
+ Attr[] namespaceOfAncestors = getSortedArray(namespaces);
554
+ Attr[] attributeOfAncestors = getSortedArray(attributes);
555
+ c14nNamespaceStack.push(namespaceOfAncestors);
556
+ c14nAttrStack.push(attributeOfAncestors);
557
+ subsets = false; // namespace propagation should be done only once on top level node.
558
+ }
559
+
560
+ getNamespacesAndAttrs(element, namespaces, attributes);
561
+
562
+ Attr[] namespaceArray = getSortedArray(namespaces);
563
+ Attr[] attributeArray = getSortedArray(attributes);
564
+ Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length];
565
+ for (int i = 0; i < allAttrs.length; i++) {
566
+ if (i < namespaceArray.length) {
567
+ allAttrs[i] = namespaceArray[i];
437
568
  } else {
438
- buffer.append("/>");
439
- }
440
- if (needBreakInOpening(element)) {
441
- buffer.append('\n');
442
- }
569
+ allAttrs[i] = attributeArray[i - namespaceArray.length];
570
+ }
571
+ }
572
+ c14nNamespaceStack.push(namespaceArray);
573
+ c14nAttrStack.push(attributeArray);
574
+ return allAttrs;
575
+ }
576
+
577
+ }
578
+
579
+ private void
580
+ getAttrsOfAncestors(Node parent, List<Attr> namespaces, List<Attr> attributes)
581
+ {
582
+ if (parent == null) { return; }
583
+ NamedNodeMap attrs = parent.getAttributes();
584
+ if (attrs == null || attrs.getLength() == 0) { return; }
585
+ for (int i = 0; i < attrs.getLength(); i++) {
586
+ Attr attr = (Attr)attrs.item(i);
587
+ if (isNamespace(attr.getNodeName())) { namespaces.add(attr); }
588
+ else { attributes.add(attr); }
589
+ }
590
+ getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes);
591
+ }
592
+
593
+ private void
594
+ getNamespacesAndAttrs(Node current, List<Attr> namespaces, List<Attr> attributes)
595
+ {
596
+ NamedNodeMap attrs = current.getAttributes();
597
+ for (int i = 0; i < attrs.getLength(); i++) {
598
+ Attr attr = (Attr)attrs.item(i);
599
+ if (isNamespace(attr.getNodeName())) {
600
+ getNamespacesWithPropagated(namespaces, attr);
601
+ } else {
602
+ getAttributesWithPropagated(attributes, attr);
603
+ }
604
+ if (exclusive) {
605
+ verifyXmlSpace(attributes, attrs);
606
+ }
607
+ }
608
+ }
609
+
610
+ private void
611
+ getNamespacesWithPropagated(List<Attr> namespaces, Attr attr)
612
+ {
613
+ boolean newNamespace = true;
614
+ Iterator<Attr[]> iter = c14nNamespaceStack.iterator();
615
+ while (iter.hasNext()) {
616
+ Attr[] parentNamespaces = iter.next();
617
+ for (int n = 0; n < parentNamespaces.length; n++) {
618
+ if (parentNamespaces[n].getNodeName().equals(attr.getNodeName())) {
619
+ if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) {
620
+ // exactly the same namespace should not be added
621
+ newNamespace = false;
622
+ } else {
623
+ // in case of namespace url change, propagated namespace will be override
624
+ namespaces.remove(parentNamespaces[n]);
625
+ }
626
+ }
627
+ }
628
+ if (newNamespace && !namespaces.contains(attr)) { namespaces.add(attr); }
629
+ }
630
+ }
631
+
632
+ private void
633
+ getAttributesWithPropagated(List<Attr> attributes, Attr attr)
634
+ {
635
+ boolean newAttribute = true;
636
+ Iterator<Attr[]> iter = c14nAttrStack.iterator();
637
+ while (iter.hasNext()) {
638
+ Attr[] parentAttr = iter.next();
639
+ for (int n = 0; n < parentAttr.length; n++) {
640
+ if (!parentAttr[n].getNodeName().startsWith("xml:")) { continue; }
641
+ if (parentAttr[n].getNodeName().equals(attr.getNodeName())) {
642
+ if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) {
643
+ // exactly the same attribute should not be added
644
+ newAttribute = false;
645
+ } else {
646
+ // in case of attribute value change, propagated attribute will be override
647
+ attributes.remove(parentAttr[n]);
648
+ }
649
+ }
650
+ }
651
+ if (newAttribute) { attributes.add(attr); }
652
+ }
653
+ }
654
+
655
+ private void
656
+ verifyXmlSpace(List<Attr> attributes, NamedNodeMap attrs)
657
+ {
658
+ Attr attr = (Attr) attrs.getNamedItem("xml:space");
659
+ if (attr == null) {
660
+ for (int i = 0; i < attributes.size(); i++) {
661
+ if (attributes.get(i).getNodeName().equals("xml:space")) {
662
+ attributes.remove(i);
663
+ break;
664
+ }
665
+ }
666
+ }
667
+ }
668
+
669
+ private Attr[]
670
+ getSortedArray(List<Attr> attrList)
671
+ {
672
+ Attr[] attrArray = attrList.toArray(new Attr[0]);
673
+ Arrays.sort(attrArray, new Comparator<Attr>() {
674
+ @Override
675
+ public int compare(Attr attr0, Attr attr1) {
676
+ return attr0.getNodeName().compareTo(attr1.getNodeName());
677
+ }
678
+ });
679
+ return attrArray;
680
+ }
681
+
682
+ public void
683
+ leave(Element element)
684
+ {
685
+ if (canonical) {
686
+ c14nNamespaceStack.poll();
687
+ c14nAttrStack.poll();
688
+ }
689
+ String name = element.getTagName();
690
+ if (element.hasChildNodes()) {
691
+ if (needIndentInClosing(element)) {
692
+ indentation.pop();
693
+ buffer.append(indentation.peek());
694
+ } else if (asBuilder) {
695
+ if (!containsText(element)) { indentation.pop(); }
696
+ }
697
+ buffer.append("</").append(name).append('>');
698
+ if (needBreakInClosing(element)) {
699
+ buffer.append('\n');
700
+ }
701
+ return;
702
+ }
703
+ // no child, but HTML might need a closing tag.
704
+ if (asHtml || noEmpty) {
705
+ if (!isEmpty(name) && noEmpty) {
706
+ buffer.append("</").append(name).append('>');
707
+ }
708
+ }
709
+ if (needBreakInClosing(element)) {
710
+ if (!containsText(element)) { indentation.pop(); }
711
+ buffer.append('\n');
712
+ }
713
+ }
714
+
715
+ private boolean
716
+ needIndentInClosing(Element element)
717
+ {
718
+ if (containsText(element)) { return false; }
719
+
720
+ if (fragment) { return false; } // a given option might be fragment and format. fragment matters
721
+ if (format) { return true; }
722
+ if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) { return true; }
723
+ return false;
724
+ }
725
+
726
+ private boolean
727
+ needBreakInClosing(Element element)
728
+ {
729
+ if (fragment) { return false; }
730
+ if (format || asBuilder) { return true; }
731
+ return false;
732
+ }
733
+
734
+ private boolean
735
+ containsText(Element element)
736
+ {
737
+ return (element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.TEXT_NODE);
738
+ }
739
+
740
+ public boolean
741
+ enter(Entity entity)
742
+ {
743
+ String name = entity.getNodeName();
744
+ String pubId = entity.getPublicId();
745
+ String sysId = entity.getSystemId();
746
+ String notation = entity.getNotationName();
747
+ buffer.append("<!ENTITY ");
748
+ buffer.append(name);
749
+ if (pubId != null) {
750
+ buffer.append(" PUBLIC \"");
751
+ buffer.append(pubId);
752
+ buffer.append("\"");
753
+ }
754
+ if (sysId != null) {
755
+ buffer.append(" SYSTEM \"");
756
+ buffer.append(sysId);
757
+ buffer.append("\"");
758
+ }
759
+ if (notation != null) {
760
+ buffer.append(" NDATA ");
761
+ buffer.append(notation);
762
+ }
763
+ buffer.append(">");
764
+ return true;
765
+ }
766
+
767
+ public void
768
+ leave(Entity entity)
769
+ {
770
+ // no-op
771
+ }
772
+
773
+ public boolean
774
+ enter(EntityReference entityRef)
775
+ {
776
+ buffer.append('&').append(entityRef.getNodeName()).append(';');
777
+ return true;
778
+ }
779
+ public void
780
+ leave(EntityReference entityRef)
781
+ {
782
+ // no-op
783
+ }
784
+
785
+ public boolean
786
+ enter(Notation notation)
787
+ {
788
+ String name = notation.getNodeName();
789
+ String pubId = notation.getPublicId();
790
+ String sysId = notation.getSystemId();
791
+ buffer.append("<!NOTATION ");
792
+ buffer.append(name);
793
+ if (pubId != null) {
794
+ buffer.append(" PUBLIC \"");
795
+ buffer.append(pubId);
796
+ buffer.append("\"");
797
+ if (sysId != null) {
798
+ buffer.append(" \"");
799
+ buffer.append(sysId);
800
+ buffer.append("\"");
801
+ }
802
+ } else if (sysId != null) {
803
+ buffer.append(" SYSTEM \"");
804
+ buffer.append(sysId);
805
+ buffer.append("\"");
806
+ }
807
+ buffer.append(">");
808
+ return true;
809
+ }
810
+
811
+ public void
812
+ leave(Notation notation)
813
+ {
814
+ // no-op
815
+ }
816
+
817
+ public boolean
818
+ enter(ProcessingInstruction pi)
819
+ {
820
+ buffer.append("<?");
821
+ buffer.append(pi.getTarget());
822
+ buffer.append(" ");
823
+ buffer.append(pi.getData());
824
+ if (asHtml) { buffer.append(">"); }
825
+ else { buffer.append("?>"); }
826
+ buffer.append("\n");
827
+ if (canonical) { c14nNodeList.add(pi); }
828
+ return true;
829
+ }
830
+
831
+ public void
832
+ leave(ProcessingInstruction pi)
833
+ {
834
+ // no-op
835
+ }
836
+
837
+ private boolean
838
+ isHtmlScript(Text text)
839
+ {
840
+ return htmlDoc && text.getParentNode().getNodeName().equals("script");
841
+ }
842
+
843
+ private boolean
844
+ isHtmlStyle(Text text)
845
+ {
846
+ return htmlDoc && text.getParentNode().getNodeName().equals("style");
847
+ }
848
+
849
+ public boolean
850
+ enter(Text text)
851
+ {
852
+ CharSequence textContent = text.getNodeValue();
853
+ if (canonical) {
854
+ c14nNodeList.add(text);
855
+ if (isBlank(textContent)) {
856
+ buffer.append(canonicalizeWhitespace(textContent));
443
857
  return true;
444
- }
445
-
446
- private boolean needIndent(Element element) {
447
- if (containsText(element)) return false;
448
- if (fragment) return false; // a given option might be fragment and format. fragment matters
449
- if (format || asBuilder) return true;
450
- return false;
451
- }
452
-
453
- private boolean needBreakInOpening(Element element) {
454
- if (containsText(element)) return false;
455
- if (fragment) return false;
456
- if (format) return true;
457
- if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
458
- if (format && element.getNextSibling() == null && element.hasChildNodes()) return true;
459
- return false;
460
- }
461
-
462
- private boolean isEmpty(String name) {
463
- HTMLElements.Element element = HTMLElements.getElement(name);
464
- return element.isEmpty();
465
- }
466
-
467
- private Attr[] getAttrsAndNamespaces(Element element) {
468
- NamedNodeMap attrs = element.getAttributes();
469
- if (!canonical) {
470
- if (attrs == null || attrs.getLength() == 0) return new Attr[0];
471
- Attr[] attrsAndNamespaces = new Attr[attrs.getLength()];
472
- for (int i=0; i<attrs.getLength(); i++) {
473
- attrsAndNamespaces[i] = (Attr) attrs.item(i);
474
- }
475
- return attrsAndNamespaces;
476
- } else {
477
- List<Attr> namespaces = new ArrayList<Attr>();
478
- List<Attr> attributes = new ArrayList<Attr>();
479
- if (subsets) {
480
- getAttrsOfAncestors(element.getParentNode(), namespaces, attributes);
481
- Attr[] namespaceOfAncestors = getSortedArray(namespaces);
482
- Attr[] attributeOfAncestors = getSortedArray(attributes);
483
- c14nNamespaceStack.push(namespaceOfAncestors);
484
- c14nAttrStack.push(attributeOfAncestors);
485
- subsets = false; // namespace propagation should be done only once on top level node.
486
- }
487
-
488
- getNamespacesAndAttrs(element, namespaces, attributes);
489
-
490
- Attr[] namespaceArray = getSortedArray(namespaces);
491
- Attr[] attributeArray = getSortedArray(attributes);
492
- Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length];
493
- for (int i=0; i<allAttrs.length; i++) {
494
- if (i < namespaceArray.length) {
495
- allAttrs[i] = namespaceArray[i];
496
- } else {
497
- allAttrs[i] = attributeArray[i-namespaceArray.length];
498
- }
499
- }
500
- c14nNamespaceStack.push(namespaceArray);
501
- c14nAttrStack.push(attributeArray);
502
- return allAttrs;
503
- }
504
-
505
- }
506
-
507
- private void getAttrsOfAncestors(Node parent, List<Attr> namespaces, List<Attr> attributes) {
508
- if (parent == null) return;
509
- NamedNodeMap attrs = parent.getAttributes();
510
- if (attrs == null || attrs.getLength() == 0) return;
511
- for (int i=0; i < attrs.getLength(); i++) {
512
- Attr attr = (Attr)attrs.item(i);
513
- if (isNamespace(attr.getNodeName())) namespaces.add(attr);
514
- else attributes.add(attr);
515
- }
516
- getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes);
517
- }
518
-
519
- private void getNamespacesAndAttrs(Node current, List<Attr> namespaces, List<Attr> attributes) {
520
- NamedNodeMap attrs = current.getAttributes();
521
- for (int i=0; i<attrs.getLength(); i++) {
522
- Attr attr = (Attr)attrs.item(i);
523
- if (isNamespace(attr.getNodeName())) {
524
- getNamespacesWithPropagated(namespaces, attr);
525
- } else {
526
- getAttributesWithPropagated(attributes, attr);
527
- }
528
- if (exclusive) {
529
- verifyXmlSpace(attributes, attrs);
530
- }
531
- }
532
- }
533
-
534
- private void getNamespacesWithPropagated(List<Attr> namespaces, Attr attr) {
535
- boolean newNamespace = true;
536
- Iterator<Attr[]> iter = c14nNamespaceStack.iterator();
537
- while (iter.hasNext()) {
538
- Attr[] parentNamespaces = iter.next();
539
- for (int n=0; n < parentNamespaces.length; n++) {
540
- if (parentNamespaces[n].getNodeName().equals(attr.getNodeName())) {
541
- if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) {
542
- // exactly the same namespace should not be added
543
- newNamespace = false;
544
- } else {
545
- // in case of namespace url change, propagated namespace will be override
546
- namespaces.remove(parentNamespaces[n]);
547
- }
548
- }
549
- }
550
- if (newNamespace && !namespaces.contains(attr)) namespaces.add(attr);
551
- }
552
- }
553
-
554
- private void getAttributesWithPropagated(List<Attr> attributes, Attr attr) {
555
- boolean newAttribute = true;
556
- Iterator<Attr[]> iter = c14nAttrStack.iterator();
557
- while (iter.hasNext()) {
558
- Attr[] parentAttr = iter.next();
559
- for (int n=0; n < parentAttr.length; n++) {
560
- if (!parentAttr[n].getNodeName().startsWith("xml:")) continue;
561
- if (parentAttr[n].getNodeName().equals(attr.getNodeName())) {
562
- if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) {
563
- // exactly the same attribute should not be added
564
- newAttribute = false;
565
- } else {
566
- // in case of attribute value change, propagated attribute will be override
567
- attributes.remove(parentAttr[n]);
568
- }
569
- }
570
- }
571
- if (newAttribute) attributes.add(attr);
572
- }
573
- }
574
-
575
- private void verifyXmlSpace(List<Attr> attributes, NamedNodeMap attrs) {
576
- Attr attr = (Attr) attrs.getNamedItem("xml:space");
577
- if (attr == null) {
578
- for (int i=0; i < attributes.size(); i++) {
579
- if (attributes.get(i).getNodeName().equals("xml:space")) {
580
- attributes.remove(i);
581
- break;
582
- }
583
- }
584
- }
585
- }
586
-
587
- private Attr[] getSortedArray(List<Attr> attrList) {
588
- Attr[] attrArray = attrList.toArray(new Attr[0]);
589
- Arrays.sort(attrArray, new Comparator<Attr>() {
590
- @Override
591
- public int compare(Attr attr0, Attr attr1) {
592
- return attr0.getNodeName().compareTo(attr1.getNodeName());
593
- }
594
- });
595
- return attrArray;
596
- }
597
-
598
- public void leave(Element element) {
599
- if (canonical) {
600
- c14nNamespaceStack.poll();
601
- c14nAttrStack.poll();
602
- }
603
- String name = element.getTagName();
604
- if (element.hasChildNodes()) {
605
- if (needIndentInClosing(element)) {
606
- indentation.pop();
607
- buffer.append(indentation.peek());
608
- } else if (asBuilder) {
609
- if (!containsText(element)) indentation.pop();
610
- }
611
- buffer.append("</").append(name).append('>');
612
- if (needBreakInClosing(element)) {
613
- buffer.append('\n');
614
- }
615
- return;
616
- }
617
- // no child, but HTML might need a closing tag.
618
- if (asHtml || noEmpty) {
619
- if (!isEmpty(name) && noEmpty) {
620
- buffer.append("</").append(name).append('>');
621
- }
622
- }
623
- if (needBreakInClosing(element)) {
624
- if (!containsText(element)) indentation.pop();
625
- buffer.append('\n');
626
- }
627
- }
628
-
629
- private boolean needIndentInClosing(Element element) {
630
- if (containsText(element)) return false;
631
-
632
- if (fragment) return false; // a given option might be fragment and format. fragment matters
633
- if (format) return true;
634
- if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
635
- return false;
636
- }
637
-
638
- private boolean needBreakInClosing(Element element) {
639
- if (fragment) return false;
640
- if (format || asBuilder) return true;
641
- return false;
642
- }
643
-
644
- private boolean containsText(Element element) {
645
- return (element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.TEXT_NODE);
646
- }
647
-
648
- public boolean enter(Entity entity) {
649
- String name = entity.getNodeName();
650
- String pubId = entity.getPublicId();
651
- String sysId = entity.getSystemId();
652
- String notation = entity.getNotationName();
653
- buffer.append("<!ENTITY ");
654
- buffer.append(name);
655
- if (pubId != null) {
656
- buffer.append(" PUBLIC \"");
657
- buffer.append(pubId);
658
- buffer.append("\"");
659
- }
660
- if (sysId != null) {
661
- buffer.append(" SYSTEM \"");
662
- buffer.append(sysId);
663
- buffer.append("\"");
664
- }
665
- if (notation != null) {
666
- buffer.append(" NDATA ");
667
- buffer.append(notation);
668
- }
669
- buffer.append(">");
670
- return true;
671
- }
672
-
673
- public void leave(Entity entity) {
674
- // no-op
675
- }
676
-
677
- public boolean enter(EntityReference entityRef) {
678
- buffer.append('&').append(entityRef.getNodeName()).append(';');
679
- return true;
680
- }
681
- public void leave(EntityReference entityRef) {
682
- // no-op
683
- }
684
-
685
- public boolean enter(Notation notation) {
686
- String name = notation.getNodeName();
687
- String pubId = notation.getPublicId();
688
- String sysId = notation.getSystemId();
689
- buffer.append("<!NOTATION ");
690
- buffer.append(name);
691
- if (pubId != null) {
692
- buffer.append(" PUBLIC \"");
693
- buffer.append(pubId);
694
- buffer.append("\"");
695
- if (sysId != null) {
696
- buffer.append(" \"");
697
- buffer.append(sysId);
698
- buffer.append("\"");
699
- }
700
- } else if (sysId != null) {
701
- buffer.append(" SYSTEM \"");
702
- buffer.append(sysId);
703
- buffer.append("\"");
704
- }
705
- buffer.append(">");
706
- return true;
707
- }
708
-
709
- public void leave(Notation notation) {
710
- // no-op
711
- }
712
-
713
- public boolean enter(ProcessingInstruction pi) {
714
- buffer.append("<?");
715
- buffer.append(pi.getTarget());
716
- buffer.append(" ");
717
- buffer.append(pi.getData());
718
- if (asHtml) buffer.append(">");
719
- else buffer.append("?>");
720
- buffer.append("\n");
721
- if (canonical) c14nNodeList.add(pi);
722
- return true;
723
- }
724
-
725
- public void leave(ProcessingInstruction pi) {
726
- // no-op
727
- }
728
-
729
- private boolean isHtmlScript(Text text) {
730
- return htmlDoc && text.getParentNode().getNodeName().equals("script");
731
- }
732
-
733
- private boolean isHtmlStyle(Text text) {
734
- return htmlDoc && text.getParentNode().getNodeName().equals("style");
735
- }
736
-
737
- public boolean enter(Text text) {
738
- CharSequence textContent = text.getNodeValue();
739
- if (canonical) {
740
- c14nNodeList.add(text);
741
- if (isBlank(textContent)) {
742
- buffer.append(canonicalizeWhitespace(textContent));
743
- return true;
744
- }
745
- }
746
-
747
- if (shouldEncode(text) && !isHtmlScript(text) && !isHtmlStyle(text)) {
748
- textContent = encodeJavaString(textContent);
749
- }
750
-
751
- textContent = encodeStringToHtmlEntity(textContent);
752
- buffer.append(textContent);
753
- return true;
754
- }
755
-
756
- private CharSequence encodeStringToHtmlEntity(CharSequence text) {
757
- if (encoding == null) return text;
758
-
759
- CharsetEncoder encoder = Charset.forName(encoding).newEncoder();
760
- StringBuilder sb = new StringBuilder(text.length() + 16);
761
- // make sure we can handle code points that are higher than 2 bytes
762
- for ( int i = 0; i < text.length(); ) {
763
- int code = Character.codePointAt(text, i);
764
- // TODO not sure about bigger offset then 2 ?!
765
- int offset = code > 65535 ? 2 : 1;
766
- CharSequence substr = text.subSequence(i, i + offset);
767
- boolean canEncode = encoder.canEncode(substr);
768
- if (canEncode) {
769
- sb.append(substr);
770
- }
771
- else {
772
- sb.append("&#x").append(Integer.toHexString(code)).append(';');
773
- }
774
- i += offset;
775
- }
776
- return sb;
777
- }
858
+ }
859
+ }
860
+
861
+ if (shouldEncode(text) && !isHtmlScript(text) && !isHtmlStyle(text)) {
862
+ textContent = encodeJavaString(textContent);
863
+ }
864
+
865
+ textContent = encodeStringToHtmlEntity(textContent);
866
+ buffer.append(textContent);
867
+ return true;
868
+ }
869
+
870
+ private CharSequence
871
+ encodeStringToHtmlEntity(CharSequence text)
872
+ {
873
+ if (encoding == null) { return text; }
874
+
875
+ CharsetEncoder encoder = Charset.forName(encoding).newEncoder();
876
+ StringBuilder sb = new StringBuilder(text.length() + 16);
877
+ // make sure we can handle code points that are higher than 2 bytes
878
+ for (int i = 0; i < text.length();) {
879
+ int code = Character.codePointAt(text, i);
880
+ // TODO not sure about bigger offset then 2 ?!
881
+ int offset = code > 65535 ? 2 : 1;
882
+ CharSequence substr = text.subSequence(i, i + offset);
883
+ boolean canEncode = encoder.canEncode(substr);
884
+ if (canEncode) {
885
+ sb.append(substr);
886
+ } else {
887
+ sb.append("&#x").append(Integer.toHexString(code)).append(';');
888
+ }
889
+ i += offset;
890
+ }
891
+ return sb;
892
+ }
778
893
  }