nokogiri 1.11.1-java → 1.11.2-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +12 -12
- data/LICENSE.md +1 -1
- data/README.md +20 -15
- data/ext/java/nokogiri/EncodingHandler.java +78 -59
- data/ext/java/nokogiri/HtmlDocument.java +137 -114
- data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
- data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
- data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
- data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
- data/ext/java/nokogiri/NokogiriService.java +597 -526
- data/ext/java/nokogiri/XmlAttr.java +120 -96
- data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
- data/ext/java/nokogiri/XmlCdata.java +35 -26
- data/ext/java/nokogiri/XmlComment.java +48 -37
- data/ext/java/nokogiri/XmlDocument.java +642 -540
- data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
- data/ext/java/nokogiri/XmlDtd.java +450 -384
- data/ext/java/nokogiri/XmlElement.java +25 -18
- data/ext/java/nokogiri/XmlElementContent.java +345 -286
- data/ext/java/nokogiri/XmlElementDecl.java +126 -95
- data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
- data/ext/java/nokogiri/XmlEntityReference.java +51 -42
- data/ext/java/nokogiri/XmlNamespace.java +177 -145
- data/ext/java/nokogiri/XmlNode.java +1843 -1588
- data/ext/java/nokogiri/XmlNodeSet.java +361 -299
- data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
- data/ext/java/nokogiri/XmlReader.java +513 -418
- data/ext/java/nokogiri/XmlRelaxng.java +91 -78
- data/ext/java/nokogiri/XmlSaxParserContext.java +330 -285
- data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
- data/ext/java/nokogiri/XmlSchema.java +328 -263
- data/ext/java/nokogiri/XmlSyntaxError.java +113 -83
- data/ext/java/nokogiri/XmlText.java +57 -46
- data/ext/java/nokogiri/XmlXpathContext.java +240 -206
- data/ext/java/nokogiri/XsltStylesheet.java +282 -239
- data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +199 -168
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
- data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +83 -68
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +118 -101
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -24
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
- data/ext/java/nokogiri/internals/ParserContext.java +206 -179
- data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
- data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
- data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
- data/ext/nokogiri/depend +34 -474
- data/ext/nokogiri/extconf.rb +253 -183
- data/ext/nokogiri/html_document.c +10 -15
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +66 -65
- data/ext/nokogiri/html_sax_push_parser.c +29 -27
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +171 -63
- data/ext/nokogiri/test_global_handlers.c +3 -4
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +221 -164
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +338 -286
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +117 -112
- data/ext/nokogiri/xml_sax_parser_context.c +100 -85
- data/ext/nokogiri/xml_sax_push_parser.c +34 -27
- data/ext/nokogiri/xml_schema.c +48 -42
- data/ext/nokogiri/xml_syntax_error.c +21 -23
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +134 -127
- data/ext/nokogiri/xslt_stylesheet.c +157 -157
- data/lib/nokogiri.rb +1 -22
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +31 -8
- data/lib/nokogiri/xml/document.rb +31 -11
- data/lib/nokogiri/xml/node.rb +38 -42
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- metadata +7 -8
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -66,713 +66,828 @@ import org.w3c.dom.Text;
|
|
66
66
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
67
67
|
* @author Yoko Harada <yokolet@gmail.com>
|
68
68
|
*/
|
69
|
-
public class SaveContextVisitor
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
}
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
}
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
buffer.append(
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
buffer.append(
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
69
|
+
public class SaveContextVisitor
|
70
|
+
{
|
71
|
+
|
72
|
+
private final StringBuilder buffer;
|
73
|
+
private final Stack<String> indentation;
|
74
|
+
private String encoding;
|
75
|
+
private final CharSequence indentString;
|
76
|
+
private boolean format;
|
77
|
+
private final boolean noDecl;
|
78
|
+
private final boolean noEmpty;
|
79
|
+
private final boolean noXhtml;
|
80
|
+
private final boolean asXhtml;
|
81
|
+
private boolean asXml;
|
82
|
+
private final boolean asHtml;
|
83
|
+
private final boolean asBuilder;
|
84
|
+
private boolean htmlDoc;
|
85
|
+
private final boolean fragment;
|
86
|
+
private final boolean canonical, incl_ns, with_comments;
|
87
|
+
private boolean subsets;
|
88
|
+
private boolean exclusive;
|
89
|
+
private final List<Node> c14nNodeList;
|
90
|
+
private final Deque<Attr[]> c14nNamespaceStack;
|
91
|
+
private final Deque<Attr[]> c14nAttrStack;
|
92
|
+
//private List<String> c14nExclusiveInclusivePrefixes = null;
|
93
|
+
|
94
|
+
/*
|
95
|
+
* U can't touch this.
|
96
|
+
* http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
|
97
|
+
*
|
98
|
+
* Taken from libxml save options.
|
99
|
+
*/
|
100
|
+
|
101
|
+
public static final int FORMAT = 1;
|
102
|
+
public static final int NO_DECL = 2;
|
103
|
+
public static final int NO_EMPTY = 4;
|
104
|
+
public static final int NO_XHTML = 8;
|
105
|
+
public static final int AS_XHTML = 16;
|
106
|
+
public static final int AS_XML = 32;
|
107
|
+
public static final int AS_HTML = 64;
|
108
|
+
public static final int AS_BUILDER = 128;
|
109
|
+
|
110
|
+
public static final int CANONICAL = 1;
|
111
|
+
public static final int INCL_NS = 2;
|
112
|
+
public static final int WITH_COMMENTS = 4;
|
113
|
+
public static final int SUBSETS = 8;
|
114
|
+
public static final int EXCLUSIVE = 16;
|
115
|
+
|
116
|
+
public
|
117
|
+
SaveContextVisitor(int options, CharSequence indent, String encoding, boolean htmlDoc, boolean fragment,
|
118
|
+
int canonicalOpts)
|
119
|
+
{
|
120
|
+
buffer = new StringBuilder();
|
121
|
+
this.encoding = encoding;
|
122
|
+
indentation = new Stack<String>();
|
123
|
+
indentation.push("");
|
124
|
+
this.htmlDoc = htmlDoc;
|
125
|
+
this.fragment = fragment;
|
126
|
+
c14nNodeList = new ArrayList<Node>();
|
127
|
+
c14nNamespaceStack = new ArrayDeque<Attr[]>();
|
128
|
+
c14nAttrStack = new ArrayDeque<Attr[]>();
|
129
|
+
format = (options & FORMAT) == FORMAT;
|
130
|
+
|
131
|
+
noDecl = (options & NO_DECL) == NO_DECL;
|
132
|
+
noEmpty = (options & NO_EMPTY) == NO_EMPTY;
|
133
|
+
noXhtml = (options & NO_XHTML) == NO_XHTML;
|
134
|
+
asXhtml = (options & AS_XHTML) == AS_XHTML;
|
135
|
+
asXml = (options & AS_XML) == AS_XML;
|
136
|
+
asHtml = (options & AS_HTML) == AS_HTML;
|
137
|
+
asBuilder = (options & AS_BUILDER) == AS_BUILDER;
|
138
|
+
|
139
|
+
canonical = (canonicalOpts & CANONICAL) == CANONICAL;
|
140
|
+
incl_ns = (canonicalOpts & INCL_NS) == INCL_NS;
|
141
|
+
with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS;
|
142
|
+
subsets = (canonicalOpts & SUBSETS) == SUBSETS;
|
143
|
+
|
144
|
+
if ((format && indent == null) || (format && indent.length() == 0)) { indent = " "; } // default, two spaces
|
145
|
+
if ((!format && indent != null) && indent.length() > 0) { format = true; }
|
146
|
+
if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) { indent = " "; } // default, two spaces
|
147
|
+
indentString = indent;
|
148
|
+
if (!asXml && !asHtml && !asXhtml && !asBuilder) { asXml = true; }
|
149
|
+
}
|
150
|
+
|
151
|
+
@Override
|
152
|
+
public String
|
153
|
+
toString()
|
154
|
+
{
|
155
|
+
return buffer.toString();
|
156
|
+
}
|
157
|
+
|
158
|
+
public StringBuilder
|
159
|
+
getInternalBuffer() { return buffer; }
|
160
|
+
|
161
|
+
public void
|
162
|
+
setHtmlDoc(boolean htmlDoc)
|
163
|
+
{
|
164
|
+
this.htmlDoc = htmlDoc;
|
165
|
+
}
|
166
|
+
|
167
|
+
public void
|
168
|
+
setEncoding(String encoding)
|
169
|
+
{
|
170
|
+
this.encoding = encoding;
|
171
|
+
}
|
172
|
+
|
173
|
+
public boolean
|
174
|
+
enter(Node node)
|
175
|
+
{
|
176
|
+
if (node instanceof Document) {
|
177
|
+
return enter((Document)node);
|
178
|
+
}
|
179
|
+
if (node instanceof Element) {
|
180
|
+
return enter((Element)node);
|
181
|
+
}
|
182
|
+
if (node instanceof Attr) {
|
183
|
+
return enter((Attr)node);
|
184
|
+
}
|
185
|
+
if (node instanceof Text) {
|
186
|
+
return enter((Text)node);
|
187
|
+
}
|
188
|
+
if (node instanceof CDATASection) {
|
189
|
+
return enter((CDATASection)node);
|
190
|
+
}
|
191
|
+
if (node instanceof Comment) {
|
192
|
+
return enter((Comment)node);
|
193
|
+
}
|
194
|
+
if (node instanceof DocumentType) {
|
195
|
+
return enter((DocumentType)node);
|
196
|
+
}
|
197
|
+
if (node instanceof Entity) {
|
198
|
+
return enter((Entity)node);
|
199
|
+
}
|
200
|
+
if (node instanceof EntityReference) {
|
201
|
+
return enter((EntityReference) node);
|
202
|
+
}
|
203
|
+
if (node instanceof Notation) {
|
204
|
+
return enter((Notation)node);
|
205
|
+
}
|
206
|
+
if (node instanceof ProcessingInstruction) {
|
207
|
+
return enter((ProcessingInstruction)node);
|
208
|
+
}
|
209
|
+
return false;
|
210
|
+
}
|
211
|
+
|
212
|
+
public void
|
213
|
+
leave(Node node)
|
214
|
+
{
|
215
|
+
if (node instanceof Document) {
|
216
|
+
leave((Document)node);
|
217
|
+
return;
|
218
|
+
}
|
219
|
+
if (node instanceof Element) {
|
220
|
+
leave((Element)node);
|
221
|
+
return;
|
222
|
+
}
|
223
|
+
if (node instanceof Attr) {
|
224
|
+
leave((Attr)node);
|
225
|
+
return;
|
226
|
+
}
|
227
|
+
if (node instanceof Text) {
|
228
|
+
return;
|
229
|
+
}
|
230
|
+
if (node instanceof CDATASection) {
|
231
|
+
leave((CDATASection)node);
|
232
|
+
return;
|
233
|
+
}
|
234
|
+
if (node instanceof Comment) {
|
235
|
+
leave((Comment)node);
|
236
|
+
return;
|
237
|
+
}
|
238
|
+
if (node instanceof DocumentType) {
|
239
|
+
leave((DocumentType)node);
|
240
|
+
return;
|
241
|
+
}
|
242
|
+
if (node instanceof Entity) {
|
243
|
+
leave((Entity)node);
|
244
|
+
return;
|
245
|
+
}
|
246
|
+
if (node instanceof EntityReference) {
|
247
|
+
leave((EntityReference) node);
|
248
|
+
return;
|
249
|
+
}
|
250
|
+
if (node instanceof Notation) {
|
251
|
+
leave((Notation)node);
|
252
|
+
return;
|
253
|
+
}
|
254
|
+
if (node instanceof ProcessingInstruction) {
|
255
|
+
leave((ProcessingInstruction)node);
|
256
|
+
return;
|
257
|
+
}
|
258
|
+
}
|
259
|
+
|
260
|
+
public boolean
|
261
|
+
enter(String string)
|
262
|
+
{
|
263
|
+
buffer.append(string);
|
264
|
+
return true;
|
265
|
+
}
|
266
|
+
|
267
|
+
public void
|
268
|
+
leave(String string)
|
269
|
+
{
|
270
|
+
// no-op
|
271
|
+
}
|
272
|
+
|
273
|
+
public boolean
|
274
|
+
enter(Attr attr)
|
275
|
+
{
|
276
|
+
String name = attr.getName();
|
277
|
+
buffer.append(name);
|
278
|
+
if (!asHtml || !isHtmlBooleanAttr(name)) {
|
279
|
+
buffer.append('=');
|
280
|
+
buffer.append('"');
|
281
|
+
String value = replaceCharsetIfNecessary(attr);
|
282
|
+
buffer.append(serializeAttrTextContent(value, htmlDoc));
|
283
|
+
buffer.append('"');
|
284
|
+
}
|
285
|
+
return true;
|
286
|
+
}
|
287
|
+
|
288
|
+
private static final Pattern CHARSET =
|
289
|
+
Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE);
|
290
|
+
|
291
|
+
private String
|
292
|
+
replaceCharsetIfNecessary(Attr attr)
|
293
|
+
{
|
294
|
+
String value = attr.getValue();
|
295
|
+
if (encoding == null) { return value; } // unable to replace in any case
|
296
|
+
if (!"content".equals(attr.getName().toLowerCase())) { return value; } // must be content attr
|
297
|
+
if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) { return value; }
|
298
|
+
Matcher m = CHARSET.matcher(value);
|
299
|
+
if (!m.find()) { return value; }
|
300
|
+
if (value.contains(encoding)) { return value; } // no need to replace
|
301
|
+
return value.replace(m.group(), "charset=" + encoding);
|
302
|
+
}
|
303
|
+
|
304
|
+
static final Set<String> HTML_BOOLEAN_ATTRS;
|
305
|
+
static
|
306
|
+
{
|
307
|
+
final String[] _HTML_BOOLEAN_ATTRS = {
|
308
|
+
"checked", "compact", "declare", "defer", "disabled", "ismap",
|
309
|
+
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
|
310
|
+
"selected"
|
311
|
+
};
|
312
|
+
HTML_BOOLEAN_ATTRS = new HashSet<String>(Arrays.asList(_HTML_BOOLEAN_ATTRS));
|
313
|
+
}
|
314
|
+
|
315
|
+
private static boolean
|
316
|
+
isHtmlBooleanAttr(String name)
|
317
|
+
{
|
318
|
+
return HTML_BOOLEAN_ATTRS.contains(name);
|
319
|
+
}
|
320
|
+
|
321
|
+
private static CharSequence
|
322
|
+
serializeAttrTextContent(String str, boolean htmlDoc)
|
323
|
+
{
|
324
|
+
if (str == null || str.length() == 0) { return ""; }
|
325
|
+
|
326
|
+
StringBuilder buffer = new StringBuilder(str.length() + 16);
|
327
|
+
|
328
|
+
for (int i = 0; i < str.length(); i++) {
|
329
|
+
char c;
|
330
|
+
switch (c = str.charAt(i)) {
|
331
|
+
case '\n':
|
332
|
+
buffer.append(" ");
|
333
|
+
break;
|
334
|
+
case '\r':
|
335
|
+
buffer.append(" ");
|
336
|
+
break;
|
337
|
+
case '\t':
|
338
|
+
buffer.append("	");
|
339
|
+
break;
|
340
|
+
case '"':
|
341
|
+
if (htmlDoc) { buffer.append("%22"); }
|
342
|
+
else { buffer.append("""); }
|
343
|
+
break;
|
344
|
+
case '<':
|
345
|
+
buffer.append("<");
|
346
|
+
break;
|
347
|
+
case '>':
|
348
|
+
buffer.append(">");
|
349
|
+
break;
|
350
|
+
case '&':
|
351
|
+
buffer.append("&");
|
352
|
+
break;
|
353
|
+
default:
|
354
|
+
buffer.append(c);
|
355
|
+
}
|
356
|
+
}
|
357
|
+
|
358
|
+
return buffer;
|
359
|
+
}
|
360
|
+
|
361
|
+
public void
|
362
|
+
leave(Attr attr)
|
363
|
+
{
|
364
|
+
// no-op
|
365
|
+
}
|
366
|
+
|
367
|
+
public boolean
|
368
|
+
enter(CDATASection cdata)
|
369
|
+
{
|
370
|
+
buffer.append("<![CDATA[");
|
371
|
+
buffer.append(cdata.getData());
|
372
|
+
buffer.append("]]>");
|
373
|
+
return true;
|
374
|
+
}
|
375
|
+
|
376
|
+
public void
|
377
|
+
leave(CDATASection cdata)
|
378
|
+
{
|
379
|
+
// no-op
|
380
|
+
}
|
381
|
+
|
382
|
+
public boolean
|
383
|
+
enter(Comment comment)
|
384
|
+
{
|
385
|
+
if (canonical) {
|
386
|
+
c14nNodeList.add(comment);
|
387
|
+
if (!with_comments) { return true; }
|
388
|
+
}
|
389
|
+
buffer.append("<!--");
|
390
|
+
buffer.append(comment.getData());
|
391
|
+
buffer.append("-->");
|
392
|
+
return true;
|
393
|
+
}
|
394
|
+
|
395
|
+
public void
|
396
|
+
leave(Comment comment)
|
397
|
+
{
|
398
|
+
// no-op
|
399
|
+
}
|
400
|
+
|
401
|
+
public boolean
|
402
|
+
enter(Document document)
|
403
|
+
{
|
404
|
+
if (!noDecl) {
|
405
|
+
buffer.append("<?xml version=\"");
|
406
|
+
buffer.append(document.getXmlVersion());
|
407
|
+
buffer.append("\"");
|
408
|
+
|
409
|
+
if (encoding != null) {
|
410
|
+
buffer.append(" encoding=\"");
|
411
|
+
buffer.append(encoding);
|
412
|
+
buffer.append("\"");
|
413
|
+
}
|
414
|
+
buffer.append("?>\n");
|
415
|
+
}
|
416
|
+
return true;
|
417
|
+
}
|
418
|
+
|
419
|
+
public void
|
420
|
+
leave(Document document)
|
421
|
+
{
|
422
|
+
// no-op
|
423
|
+
}
|
424
|
+
|
425
|
+
public boolean
|
426
|
+
enter(DocumentType docType)
|
427
|
+
{
|
428
|
+
if (canonical) {
|
429
|
+
c14nNodeList.add(docType);
|
430
|
+
return true;
|
431
|
+
}
|
432
|
+
String name = docType.getName();
|
433
|
+
String pubId = docType.getPublicId();
|
434
|
+
String sysId = docType.getSystemId();
|
435
|
+
String internalSubset = docType.getInternalSubset();
|
436
|
+
if (docType.getPreviousSibling() != null) {
|
437
|
+
buffer.append('\n');
|
438
|
+
}
|
439
|
+
buffer.append("<!DOCTYPE ").append(name).append(' ');
|
440
|
+
if (pubId != null) {
|
441
|
+
buffer.append("PUBLIC \"").append(pubId).append('"');
|
442
|
+
if (sysId != null) { buffer.append(" \"").append(sysId).append('"'); }
|
443
|
+
} else if (sysId != null) {
|
444
|
+
buffer.append("SYSTEM \"").append(sysId).append('"');
|
445
|
+
}
|
446
|
+
if (internalSubset != null) {
|
447
|
+
buffer.append(' ').append('[');
|
448
|
+
buffer.append(internalSubset);
|
449
|
+
buffer.append(']');
|
450
|
+
}
|
451
|
+
buffer.append(">\n");
|
452
|
+
return true;
|
453
|
+
}
|
454
|
+
|
455
|
+
public void
|
456
|
+
leave(DocumentType docType)
|
457
|
+
{
|
458
|
+
// no-op
|
459
|
+
}
|
460
|
+
|
461
|
+
public boolean
|
462
|
+
enter(Element element)
|
463
|
+
{
|
464
|
+
if (canonical) {
|
465
|
+
c14nNodeList.add(element);
|
466
|
+
if (element == element.getOwnerDocument().getDocumentElement()) {
|
467
|
+
c14nNodeList.add(element.getOwnerDocument());
|
468
|
+
}
|
469
|
+
}
|
470
|
+
String current = indentation.peek();
|
471
|
+
buffer.append(current);
|
472
|
+
if (needIndent(element)) {
|
473
|
+
indentation.push(current + indentString);
|
474
|
+
}
|
475
|
+
String name = element.getTagName();
|
476
|
+
buffer.append('<').append(name);
|
477
|
+
Attr[] attrs = getAttrsAndNamespaces(element);
|
478
|
+
for (Attr attr : attrs) {
|
479
|
+
if (attr.getSpecified()) {
|
480
|
+
buffer.append(' ');
|
481
|
+
enter(attr);
|
482
|
+
leave(attr);
|
483
|
+
}
|
484
|
+
}
|
485
|
+
if (element.hasChildNodes()) {
|
486
|
+
buffer.append('>');
|
487
|
+
if (needBreakInOpening(element)) { buffer.append('\n'); }
|
488
|
+
return true;
|
489
|
+
}
|
490
|
+
// no child
|
491
|
+
if (asHtml) {
|
492
|
+
buffer.append('>');
|
493
|
+
} else if (asXml && noEmpty) {
|
494
|
+
buffer.append('>');
|
495
|
+
} else if (asXhtml) {
|
496
|
+
if (isEmpty(name)) {
|
497
|
+
buffer.append(" />"); // see http://www.w3.org/TR/xhtml1/#C_2
|
498
|
+
} else {
|
499
|
+
buffer.append('>');
|
500
|
+
}
|
501
|
+
} else {
|
502
|
+
buffer.append("/>");
|
503
|
+
}
|
504
|
+
if (needBreakInOpening(element)) {
|
505
|
+
buffer.append('\n');
|
506
|
+
}
|
507
|
+
return true;
|
508
|
+
}
|
509
|
+
|
510
|
+
private boolean
|
511
|
+
needIndent(Element element)
|
512
|
+
{
|
513
|
+
if (containsText(element)) { return false; }
|
514
|
+
if (fragment) { return false; } // a given option might be fragment and format. fragment matters
|
515
|
+
if (format || asBuilder) { return true; }
|
516
|
+
return false;
|
517
|
+
}
|
518
|
+
|
519
|
+
private boolean
|
520
|
+
needBreakInOpening(Element element)
|
521
|
+
{
|
522
|
+
if (containsText(element)) { return false; }
|
523
|
+
if (fragment) { return false; }
|
524
|
+
if (format) { return true; }
|
525
|
+
if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) { return true; }
|
526
|
+
if (format && element.getNextSibling() == null && element.hasChildNodes()) { return true; }
|
527
|
+
return false;
|
528
|
+
}
|
529
|
+
|
530
|
+
private boolean
|
531
|
+
isEmpty(String name)
|
532
|
+
{
|
533
|
+
HTMLElements.Element element = HTMLElements.getElement(name);
|
534
|
+
return element.isEmpty();
|
535
|
+
}
|
536
|
+
|
537
|
+
private Attr[]
|
538
|
+
getAttrsAndNamespaces(Element element)
|
539
|
+
{
|
540
|
+
NamedNodeMap attrs = element.getAttributes();
|
541
|
+
if (!canonical) {
|
542
|
+
if (attrs == null || attrs.getLength() == 0) { return new Attr[0]; }
|
543
|
+
Attr[] attrsAndNamespaces = new Attr[attrs.getLength()];
|
544
|
+
for (int i = 0; i < attrs.getLength(); i++) {
|
545
|
+
attrsAndNamespaces[i] = (Attr) attrs.item(i);
|
546
|
+
}
|
547
|
+
return attrsAndNamespaces;
|
548
|
+
} else {
|
549
|
+
List<Attr> namespaces = new ArrayList<Attr>();
|
550
|
+
List<Attr> attributes = new ArrayList<Attr>();
|
551
|
+
if (subsets) {
|
552
|
+
getAttrsOfAncestors(element.getParentNode(), namespaces, attributes);
|
553
|
+
Attr[] namespaceOfAncestors = getSortedArray(namespaces);
|
554
|
+
Attr[] attributeOfAncestors = getSortedArray(attributes);
|
555
|
+
c14nNamespaceStack.push(namespaceOfAncestors);
|
556
|
+
c14nAttrStack.push(attributeOfAncestors);
|
557
|
+
subsets = false; // namespace propagation should be done only once on top level node.
|
558
|
+
}
|
559
|
+
|
560
|
+
getNamespacesAndAttrs(element, namespaces, attributes);
|
561
|
+
|
562
|
+
Attr[] namespaceArray = getSortedArray(namespaces);
|
563
|
+
Attr[] attributeArray = getSortedArray(attributes);
|
564
|
+
Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length];
|
565
|
+
for (int i = 0; i < allAttrs.length; i++) {
|
566
|
+
if (i < namespaceArray.length) {
|
567
|
+
allAttrs[i] = namespaceArray[i];
|
437
568
|
} else {
|
438
|
-
|
439
|
-
}
|
440
|
-
|
441
|
-
|
442
|
-
|
569
|
+
allAttrs[i] = attributeArray[i - namespaceArray.length];
|
570
|
+
}
|
571
|
+
}
|
572
|
+
c14nNamespaceStack.push(namespaceArray);
|
573
|
+
c14nAttrStack.push(attributeArray);
|
574
|
+
return allAttrs;
|
575
|
+
}
|
576
|
+
|
577
|
+
}
|
578
|
+
|
579
|
+
private void
|
580
|
+
getAttrsOfAncestors(Node parent, List<Attr> namespaces, List<Attr> attributes)
|
581
|
+
{
|
582
|
+
if (parent == null) { return; }
|
583
|
+
NamedNodeMap attrs = parent.getAttributes();
|
584
|
+
if (attrs == null || attrs.getLength() == 0) { return; }
|
585
|
+
for (int i = 0; i < attrs.getLength(); i++) {
|
586
|
+
Attr attr = (Attr)attrs.item(i);
|
587
|
+
if (isNamespace(attr.getNodeName())) { namespaces.add(attr); }
|
588
|
+
else { attributes.add(attr); }
|
589
|
+
}
|
590
|
+
getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes);
|
591
|
+
}
|
592
|
+
|
593
|
+
private void
|
594
|
+
getNamespacesAndAttrs(Node current, List<Attr> namespaces, List<Attr> attributes)
|
595
|
+
{
|
596
|
+
NamedNodeMap attrs = current.getAttributes();
|
597
|
+
for (int i = 0; i < attrs.getLength(); i++) {
|
598
|
+
Attr attr = (Attr)attrs.item(i);
|
599
|
+
if (isNamespace(attr.getNodeName())) {
|
600
|
+
getNamespacesWithPropagated(namespaces, attr);
|
601
|
+
} else {
|
602
|
+
getAttributesWithPropagated(attributes, attr);
|
603
|
+
}
|
604
|
+
if (exclusive) {
|
605
|
+
verifyXmlSpace(attributes, attrs);
|
606
|
+
}
|
607
|
+
}
|
608
|
+
}
|
609
|
+
|
610
|
+
private void
|
611
|
+
getNamespacesWithPropagated(List<Attr> namespaces, Attr attr)
|
612
|
+
{
|
613
|
+
boolean newNamespace = true;
|
614
|
+
Iterator<Attr[]> iter = c14nNamespaceStack.iterator();
|
615
|
+
while (iter.hasNext()) {
|
616
|
+
Attr[] parentNamespaces = iter.next();
|
617
|
+
for (int n = 0; n < parentNamespaces.length; n++) {
|
618
|
+
if (parentNamespaces[n].getNodeName().equals(attr.getNodeName())) {
|
619
|
+
if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) {
|
620
|
+
// exactly the same namespace should not be added
|
621
|
+
newNamespace = false;
|
622
|
+
} else {
|
623
|
+
// in case of namespace url change, propagated namespace will be override
|
624
|
+
namespaces.remove(parentNamespaces[n]);
|
625
|
+
}
|
626
|
+
}
|
627
|
+
}
|
628
|
+
if (newNamespace && !namespaces.contains(attr)) { namespaces.add(attr); }
|
629
|
+
}
|
630
|
+
}
|
631
|
+
|
632
|
+
private void
|
633
|
+
getAttributesWithPropagated(List<Attr> attributes, Attr attr)
|
634
|
+
{
|
635
|
+
boolean newAttribute = true;
|
636
|
+
Iterator<Attr[]> iter = c14nAttrStack.iterator();
|
637
|
+
while (iter.hasNext()) {
|
638
|
+
Attr[] parentAttr = iter.next();
|
639
|
+
for (int n = 0; n < parentAttr.length; n++) {
|
640
|
+
if (!parentAttr[n].getNodeName().startsWith("xml:")) { continue; }
|
641
|
+
if (parentAttr[n].getNodeName().equals(attr.getNodeName())) {
|
642
|
+
if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) {
|
643
|
+
// exactly the same attribute should not be added
|
644
|
+
newAttribute = false;
|
645
|
+
} else {
|
646
|
+
// in case of attribute value change, propagated attribute will be override
|
647
|
+
attributes.remove(parentAttr[n]);
|
648
|
+
}
|
649
|
+
}
|
650
|
+
}
|
651
|
+
if (newAttribute) { attributes.add(attr); }
|
652
|
+
}
|
653
|
+
}
|
654
|
+
|
655
|
+
private void
|
656
|
+
verifyXmlSpace(List<Attr> attributes, NamedNodeMap attrs)
|
657
|
+
{
|
658
|
+
Attr attr = (Attr) attrs.getNamedItem("xml:space");
|
659
|
+
if (attr == null) {
|
660
|
+
for (int i = 0; i < attributes.size(); i++) {
|
661
|
+
if (attributes.get(i).getNodeName().equals("xml:space")) {
|
662
|
+
attributes.remove(i);
|
663
|
+
break;
|
664
|
+
}
|
665
|
+
}
|
666
|
+
}
|
667
|
+
}
|
668
|
+
|
669
|
+
private Attr[]
|
670
|
+
getSortedArray(List<Attr> attrList)
|
671
|
+
{
|
672
|
+
Attr[] attrArray = attrList.toArray(new Attr[0]);
|
673
|
+
Arrays.sort(attrArray, new Comparator<Attr>() {
|
674
|
+
@Override
|
675
|
+
public int compare(Attr attr0, Attr attr1) {
|
676
|
+
return attr0.getNodeName().compareTo(attr1.getNodeName());
|
677
|
+
}
|
678
|
+
});
|
679
|
+
return attrArray;
|
680
|
+
}
|
681
|
+
|
682
|
+
public void
|
683
|
+
leave(Element element)
|
684
|
+
{
|
685
|
+
if (canonical) {
|
686
|
+
c14nNamespaceStack.poll();
|
687
|
+
c14nAttrStack.poll();
|
688
|
+
}
|
689
|
+
String name = element.getTagName();
|
690
|
+
if (element.hasChildNodes()) {
|
691
|
+
if (needIndentInClosing(element)) {
|
692
|
+
indentation.pop();
|
693
|
+
buffer.append(indentation.peek());
|
694
|
+
} else if (asBuilder) {
|
695
|
+
if (!containsText(element)) { indentation.pop(); }
|
696
|
+
}
|
697
|
+
buffer.append("</").append(name).append('>');
|
698
|
+
if (needBreakInClosing(element)) {
|
699
|
+
buffer.append('\n');
|
700
|
+
}
|
701
|
+
return;
|
702
|
+
}
|
703
|
+
// no child, but HTML might need a closing tag.
|
704
|
+
if (asHtml || noEmpty) {
|
705
|
+
if (!isEmpty(name) && noEmpty) {
|
706
|
+
buffer.append("</").append(name).append('>');
|
707
|
+
}
|
708
|
+
}
|
709
|
+
if (needBreakInClosing(element)) {
|
710
|
+
if (!containsText(element)) { indentation.pop(); }
|
711
|
+
buffer.append('\n');
|
712
|
+
}
|
713
|
+
}
|
714
|
+
|
715
|
+
private boolean
|
716
|
+
needIndentInClosing(Element element)
|
717
|
+
{
|
718
|
+
if (containsText(element)) { return false; }
|
719
|
+
|
720
|
+
if (fragment) { return false; } // a given option might be fragment and format. fragment matters
|
721
|
+
if (format) { return true; }
|
722
|
+
if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) { return true; }
|
723
|
+
return false;
|
724
|
+
}
|
725
|
+
|
726
|
+
private boolean
|
727
|
+
needBreakInClosing(Element element)
|
728
|
+
{
|
729
|
+
if (fragment) { return false; }
|
730
|
+
if (format || asBuilder) { return true; }
|
731
|
+
return false;
|
732
|
+
}
|
733
|
+
|
734
|
+
private boolean
|
735
|
+
containsText(Element element)
|
736
|
+
{
|
737
|
+
return (element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.TEXT_NODE);
|
738
|
+
}
|
739
|
+
|
740
|
+
public boolean
|
741
|
+
enter(Entity entity)
|
742
|
+
{
|
743
|
+
String name = entity.getNodeName();
|
744
|
+
String pubId = entity.getPublicId();
|
745
|
+
String sysId = entity.getSystemId();
|
746
|
+
String notation = entity.getNotationName();
|
747
|
+
buffer.append("<!ENTITY ");
|
748
|
+
buffer.append(name);
|
749
|
+
if (pubId != null) {
|
750
|
+
buffer.append(" PUBLIC \"");
|
751
|
+
buffer.append(pubId);
|
752
|
+
buffer.append("\"");
|
753
|
+
}
|
754
|
+
if (sysId != null) {
|
755
|
+
buffer.append(" SYSTEM \"");
|
756
|
+
buffer.append(sysId);
|
757
|
+
buffer.append("\"");
|
758
|
+
}
|
759
|
+
if (notation != null) {
|
760
|
+
buffer.append(" NDATA ");
|
761
|
+
buffer.append(notation);
|
762
|
+
}
|
763
|
+
buffer.append(">");
|
764
|
+
return true;
|
765
|
+
}
|
766
|
+
|
767
|
+
public void
|
768
|
+
leave(Entity entity)
|
769
|
+
{
|
770
|
+
// no-op
|
771
|
+
}
|
772
|
+
|
773
|
+
public boolean
|
774
|
+
enter(EntityReference entityRef)
|
775
|
+
{
|
776
|
+
buffer.append('&').append(entityRef.getNodeName()).append(';');
|
777
|
+
return true;
|
778
|
+
}
|
779
|
+
public void
|
780
|
+
leave(EntityReference entityRef)
|
781
|
+
{
|
782
|
+
// no-op
|
783
|
+
}
|
784
|
+
|
785
|
+
public boolean
|
786
|
+
enter(Notation notation)
|
787
|
+
{
|
788
|
+
String name = notation.getNodeName();
|
789
|
+
String pubId = notation.getPublicId();
|
790
|
+
String sysId = notation.getSystemId();
|
791
|
+
buffer.append("<!NOTATION ");
|
792
|
+
buffer.append(name);
|
793
|
+
if (pubId != null) {
|
794
|
+
buffer.append(" PUBLIC \"");
|
795
|
+
buffer.append(pubId);
|
796
|
+
buffer.append("\"");
|
797
|
+
if (sysId != null) {
|
798
|
+
buffer.append(" \"");
|
799
|
+
buffer.append(sysId);
|
800
|
+
buffer.append("\"");
|
801
|
+
}
|
802
|
+
} else if (sysId != null) {
|
803
|
+
buffer.append(" SYSTEM \"");
|
804
|
+
buffer.append(sysId);
|
805
|
+
buffer.append("\"");
|
806
|
+
}
|
807
|
+
buffer.append(">");
|
808
|
+
return true;
|
809
|
+
}
|
810
|
+
|
811
|
+
public void
|
812
|
+
leave(Notation notation)
|
813
|
+
{
|
814
|
+
// no-op
|
815
|
+
}
|
816
|
+
|
817
|
+
public boolean
|
818
|
+
enter(ProcessingInstruction pi)
|
819
|
+
{
|
820
|
+
buffer.append("<?");
|
821
|
+
buffer.append(pi.getTarget());
|
822
|
+
buffer.append(" ");
|
823
|
+
buffer.append(pi.getData());
|
824
|
+
if (asHtml) { buffer.append(">"); }
|
825
|
+
else { buffer.append("?>"); }
|
826
|
+
buffer.append("\n");
|
827
|
+
if (canonical) { c14nNodeList.add(pi); }
|
828
|
+
return true;
|
829
|
+
}
|
830
|
+
|
831
|
+
public void
|
832
|
+
leave(ProcessingInstruction pi)
|
833
|
+
{
|
834
|
+
// no-op
|
835
|
+
}
|
836
|
+
|
837
|
+
private boolean
|
838
|
+
isHtmlScript(Text text)
|
839
|
+
{
|
840
|
+
return htmlDoc && text.getParentNode().getNodeName().equals("script");
|
841
|
+
}
|
842
|
+
|
843
|
+
private boolean
|
844
|
+
isHtmlStyle(Text text)
|
845
|
+
{
|
846
|
+
return htmlDoc && text.getParentNode().getNodeName().equals("style");
|
847
|
+
}
|
848
|
+
|
849
|
+
public boolean
|
850
|
+
enter(Text text)
|
851
|
+
{
|
852
|
+
CharSequence textContent = text.getNodeValue();
|
853
|
+
if (canonical) {
|
854
|
+
c14nNodeList.add(text);
|
855
|
+
if (isBlank(textContent)) {
|
856
|
+
buffer.append(canonicalizeWhitespace(textContent));
|
443
857
|
return true;
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
if (subsets) {
|
480
|
-
getAttrsOfAncestors(element.getParentNode(), namespaces, attributes);
|
481
|
-
Attr[] namespaceOfAncestors = getSortedArray(namespaces);
|
482
|
-
Attr[] attributeOfAncestors = getSortedArray(attributes);
|
483
|
-
c14nNamespaceStack.push(namespaceOfAncestors);
|
484
|
-
c14nAttrStack.push(attributeOfAncestors);
|
485
|
-
subsets = false; // namespace propagation should be done only once on top level node.
|
486
|
-
}
|
487
|
-
|
488
|
-
getNamespacesAndAttrs(element, namespaces, attributes);
|
489
|
-
|
490
|
-
Attr[] namespaceArray = getSortedArray(namespaces);
|
491
|
-
Attr[] attributeArray = getSortedArray(attributes);
|
492
|
-
Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length];
|
493
|
-
for (int i=0; i<allAttrs.length; i++) {
|
494
|
-
if (i < namespaceArray.length) {
|
495
|
-
allAttrs[i] = namespaceArray[i];
|
496
|
-
} else {
|
497
|
-
allAttrs[i] = attributeArray[i-namespaceArray.length];
|
498
|
-
}
|
499
|
-
}
|
500
|
-
c14nNamespaceStack.push(namespaceArray);
|
501
|
-
c14nAttrStack.push(attributeArray);
|
502
|
-
return allAttrs;
|
503
|
-
}
|
504
|
-
|
505
|
-
}
|
506
|
-
|
507
|
-
private void getAttrsOfAncestors(Node parent, List<Attr> namespaces, List<Attr> attributes) {
|
508
|
-
if (parent == null) return;
|
509
|
-
NamedNodeMap attrs = parent.getAttributes();
|
510
|
-
if (attrs == null || attrs.getLength() == 0) return;
|
511
|
-
for (int i=0; i < attrs.getLength(); i++) {
|
512
|
-
Attr attr = (Attr)attrs.item(i);
|
513
|
-
if (isNamespace(attr.getNodeName())) namespaces.add(attr);
|
514
|
-
else attributes.add(attr);
|
515
|
-
}
|
516
|
-
getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes);
|
517
|
-
}
|
518
|
-
|
519
|
-
private void getNamespacesAndAttrs(Node current, List<Attr> namespaces, List<Attr> attributes) {
|
520
|
-
NamedNodeMap attrs = current.getAttributes();
|
521
|
-
for (int i=0; i<attrs.getLength(); i++) {
|
522
|
-
Attr attr = (Attr)attrs.item(i);
|
523
|
-
if (isNamespace(attr.getNodeName())) {
|
524
|
-
getNamespacesWithPropagated(namespaces, attr);
|
525
|
-
} else {
|
526
|
-
getAttributesWithPropagated(attributes, attr);
|
527
|
-
}
|
528
|
-
if (exclusive) {
|
529
|
-
verifyXmlSpace(attributes, attrs);
|
530
|
-
}
|
531
|
-
}
|
532
|
-
}
|
533
|
-
|
534
|
-
private void getNamespacesWithPropagated(List<Attr> namespaces, Attr attr) {
|
535
|
-
boolean newNamespace = true;
|
536
|
-
Iterator<Attr[]> iter = c14nNamespaceStack.iterator();
|
537
|
-
while (iter.hasNext()) {
|
538
|
-
Attr[] parentNamespaces = iter.next();
|
539
|
-
for (int n=0; n < parentNamespaces.length; n++) {
|
540
|
-
if (parentNamespaces[n].getNodeName().equals(attr.getNodeName())) {
|
541
|
-
if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) {
|
542
|
-
// exactly the same namespace should not be added
|
543
|
-
newNamespace = false;
|
544
|
-
} else {
|
545
|
-
// in case of namespace url change, propagated namespace will be override
|
546
|
-
namespaces.remove(parentNamespaces[n]);
|
547
|
-
}
|
548
|
-
}
|
549
|
-
}
|
550
|
-
if (newNamespace && !namespaces.contains(attr)) namespaces.add(attr);
|
551
|
-
}
|
552
|
-
}
|
553
|
-
|
554
|
-
private void getAttributesWithPropagated(List<Attr> attributes, Attr attr) {
|
555
|
-
boolean newAttribute = true;
|
556
|
-
Iterator<Attr[]> iter = c14nAttrStack.iterator();
|
557
|
-
while (iter.hasNext()) {
|
558
|
-
Attr[] parentAttr = iter.next();
|
559
|
-
for (int n=0; n < parentAttr.length; n++) {
|
560
|
-
if (!parentAttr[n].getNodeName().startsWith("xml:")) continue;
|
561
|
-
if (parentAttr[n].getNodeName().equals(attr.getNodeName())) {
|
562
|
-
if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) {
|
563
|
-
// exactly the same attribute should not be added
|
564
|
-
newAttribute = false;
|
565
|
-
} else {
|
566
|
-
// in case of attribute value change, propagated attribute will be override
|
567
|
-
attributes.remove(parentAttr[n]);
|
568
|
-
}
|
569
|
-
}
|
570
|
-
}
|
571
|
-
if (newAttribute) attributes.add(attr);
|
572
|
-
}
|
573
|
-
}
|
574
|
-
|
575
|
-
private void verifyXmlSpace(List<Attr> attributes, NamedNodeMap attrs) {
|
576
|
-
Attr attr = (Attr) attrs.getNamedItem("xml:space");
|
577
|
-
if (attr == null) {
|
578
|
-
for (int i=0; i < attributes.size(); i++) {
|
579
|
-
if (attributes.get(i).getNodeName().equals("xml:space")) {
|
580
|
-
attributes.remove(i);
|
581
|
-
break;
|
582
|
-
}
|
583
|
-
}
|
584
|
-
}
|
585
|
-
}
|
586
|
-
|
587
|
-
private Attr[] getSortedArray(List<Attr> attrList) {
|
588
|
-
Attr[] attrArray = attrList.toArray(new Attr[0]);
|
589
|
-
Arrays.sort(attrArray, new Comparator<Attr>() {
|
590
|
-
@Override
|
591
|
-
public int compare(Attr attr0, Attr attr1) {
|
592
|
-
return attr0.getNodeName().compareTo(attr1.getNodeName());
|
593
|
-
}
|
594
|
-
});
|
595
|
-
return attrArray;
|
596
|
-
}
|
597
|
-
|
598
|
-
public void leave(Element element) {
|
599
|
-
if (canonical) {
|
600
|
-
c14nNamespaceStack.poll();
|
601
|
-
c14nAttrStack.poll();
|
602
|
-
}
|
603
|
-
String name = element.getTagName();
|
604
|
-
if (element.hasChildNodes()) {
|
605
|
-
if (needIndentInClosing(element)) {
|
606
|
-
indentation.pop();
|
607
|
-
buffer.append(indentation.peek());
|
608
|
-
} else if (asBuilder) {
|
609
|
-
if (!containsText(element)) indentation.pop();
|
610
|
-
}
|
611
|
-
buffer.append("</").append(name).append('>');
|
612
|
-
if (needBreakInClosing(element)) {
|
613
|
-
buffer.append('\n');
|
614
|
-
}
|
615
|
-
return;
|
616
|
-
}
|
617
|
-
// no child, but HTML might need a closing tag.
|
618
|
-
if (asHtml || noEmpty) {
|
619
|
-
if (!isEmpty(name) && noEmpty) {
|
620
|
-
buffer.append("</").append(name).append('>');
|
621
|
-
}
|
622
|
-
}
|
623
|
-
if (needBreakInClosing(element)) {
|
624
|
-
if (!containsText(element)) indentation.pop();
|
625
|
-
buffer.append('\n');
|
626
|
-
}
|
627
|
-
}
|
628
|
-
|
629
|
-
private boolean needIndentInClosing(Element element) {
|
630
|
-
if (containsText(element)) return false;
|
631
|
-
|
632
|
-
if (fragment) return false; // a given option might be fragment and format. fragment matters
|
633
|
-
if (format) return true;
|
634
|
-
if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
|
635
|
-
return false;
|
636
|
-
}
|
637
|
-
|
638
|
-
private boolean needBreakInClosing(Element element) {
|
639
|
-
if (fragment) return false;
|
640
|
-
if (format || asBuilder) return true;
|
641
|
-
return false;
|
642
|
-
}
|
643
|
-
|
644
|
-
private boolean containsText(Element element) {
|
645
|
-
return (element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.TEXT_NODE);
|
646
|
-
}
|
647
|
-
|
648
|
-
public boolean enter(Entity entity) {
|
649
|
-
String name = entity.getNodeName();
|
650
|
-
String pubId = entity.getPublicId();
|
651
|
-
String sysId = entity.getSystemId();
|
652
|
-
String notation = entity.getNotationName();
|
653
|
-
buffer.append("<!ENTITY ");
|
654
|
-
buffer.append(name);
|
655
|
-
if (pubId != null) {
|
656
|
-
buffer.append(" PUBLIC \"");
|
657
|
-
buffer.append(pubId);
|
658
|
-
buffer.append("\"");
|
659
|
-
}
|
660
|
-
if (sysId != null) {
|
661
|
-
buffer.append(" SYSTEM \"");
|
662
|
-
buffer.append(sysId);
|
663
|
-
buffer.append("\"");
|
664
|
-
}
|
665
|
-
if (notation != null) {
|
666
|
-
buffer.append(" NDATA ");
|
667
|
-
buffer.append(notation);
|
668
|
-
}
|
669
|
-
buffer.append(">");
|
670
|
-
return true;
|
671
|
-
}
|
672
|
-
|
673
|
-
public void leave(Entity entity) {
|
674
|
-
// no-op
|
675
|
-
}
|
676
|
-
|
677
|
-
public boolean enter(EntityReference entityRef) {
|
678
|
-
buffer.append('&').append(entityRef.getNodeName()).append(';');
|
679
|
-
return true;
|
680
|
-
}
|
681
|
-
public void leave(EntityReference entityRef) {
|
682
|
-
// no-op
|
683
|
-
}
|
684
|
-
|
685
|
-
public boolean enter(Notation notation) {
|
686
|
-
String name = notation.getNodeName();
|
687
|
-
String pubId = notation.getPublicId();
|
688
|
-
String sysId = notation.getSystemId();
|
689
|
-
buffer.append("<!NOTATION ");
|
690
|
-
buffer.append(name);
|
691
|
-
if (pubId != null) {
|
692
|
-
buffer.append(" PUBLIC \"");
|
693
|
-
buffer.append(pubId);
|
694
|
-
buffer.append("\"");
|
695
|
-
if (sysId != null) {
|
696
|
-
buffer.append(" \"");
|
697
|
-
buffer.append(sysId);
|
698
|
-
buffer.append("\"");
|
699
|
-
}
|
700
|
-
} else if (sysId != null) {
|
701
|
-
buffer.append(" SYSTEM \"");
|
702
|
-
buffer.append(sysId);
|
703
|
-
buffer.append("\"");
|
704
|
-
}
|
705
|
-
buffer.append(">");
|
706
|
-
return true;
|
707
|
-
}
|
708
|
-
|
709
|
-
public void leave(Notation notation) {
|
710
|
-
// no-op
|
711
|
-
}
|
712
|
-
|
713
|
-
public boolean enter(ProcessingInstruction pi) {
|
714
|
-
buffer.append("<?");
|
715
|
-
buffer.append(pi.getTarget());
|
716
|
-
buffer.append(" ");
|
717
|
-
buffer.append(pi.getData());
|
718
|
-
if (asHtml) buffer.append(">");
|
719
|
-
else buffer.append("?>");
|
720
|
-
buffer.append("\n");
|
721
|
-
if (canonical) c14nNodeList.add(pi);
|
722
|
-
return true;
|
723
|
-
}
|
724
|
-
|
725
|
-
public void leave(ProcessingInstruction pi) {
|
726
|
-
// no-op
|
727
|
-
}
|
728
|
-
|
729
|
-
private boolean isHtmlScript(Text text) {
|
730
|
-
return htmlDoc && text.getParentNode().getNodeName().equals("script");
|
731
|
-
}
|
732
|
-
|
733
|
-
private boolean isHtmlStyle(Text text) {
|
734
|
-
return htmlDoc && text.getParentNode().getNodeName().equals("style");
|
735
|
-
}
|
736
|
-
|
737
|
-
public boolean enter(Text text) {
|
738
|
-
CharSequence textContent = text.getNodeValue();
|
739
|
-
if (canonical) {
|
740
|
-
c14nNodeList.add(text);
|
741
|
-
if (isBlank(textContent)) {
|
742
|
-
buffer.append(canonicalizeWhitespace(textContent));
|
743
|
-
return true;
|
744
|
-
}
|
745
|
-
}
|
746
|
-
|
747
|
-
if (shouldEncode(text) && !isHtmlScript(text) && !isHtmlStyle(text)) {
|
748
|
-
textContent = encodeJavaString(textContent);
|
749
|
-
}
|
750
|
-
|
751
|
-
textContent = encodeStringToHtmlEntity(textContent);
|
752
|
-
buffer.append(textContent);
|
753
|
-
return true;
|
754
|
-
}
|
755
|
-
|
756
|
-
private CharSequence encodeStringToHtmlEntity(CharSequence text) {
|
757
|
-
if (encoding == null) return text;
|
758
|
-
|
759
|
-
CharsetEncoder encoder = Charset.forName(encoding).newEncoder();
|
760
|
-
StringBuilder sb = new StringBuilder(text.length() + 16);
|
761
|
-
// make sure we can handle code points that are higher than 2 bytes
|
762
|
-
for ( int i = 0; i < text.length(); ) {
|
763
|
-
int code = Character.codePointAt(text, i);
|
764
|
-
// TODO not sure about bigger offset then 2 ?!
|
765
|
-
int offset = code > 65535 ? 2 : 1;
|
766
|
-
CharSequence substr = text.subSequence(i, i + offset);
|
767
|
-
boolean canEncode = encoder.canEncode(substr);
|
768
|
-
if (canEncode) {
|
769
|
-
sb.append(substr);
|
770
|
-
}
|
771
|
-
else {
|
772
|
-
sb.append("&#x").append(Integer.toHexString(code)).append(';');
|
773
|
-
}
|
774
|
-
i += offset;
|
775
|
-
}
|
776
|
-
return sb;
|
777
|
-
}
|
858
|
+
}
|
859
|
+
}
|
860
|
+
|
861
|
+
if (shouldEncode(text) && !isHtmlScript(text) && !isHtmlStyle(text)) {
|
862
|
+
textContent = encodeJavaString(textContent);
|
863
|
+
}
|
864
|
+
|
865
|
+
textContent = encodeStringToHtmlEntity(textContent);
|
866
|
+
buffer.append(textContent);
|
867
|
+
return true;
|
868
|
+
}
|
869
|
+
|
870
|
+
private CharSequence
|
871
|
+
encodeStringToHtmlEntity(CharSequence text)
|
872
|
+
{
|
873
|
+
if (encoding == null) { return text; }
|
874
|
+
|
875
|
+
CharsetEncoder encoder = Charset.forName(encoding).newEncoder();
|
876
|
+
StringBuilder sb = new StringBuilder(text.length() + 16);
|
877
|
+
// make sure we can handle code points that are higher than 2 bytes
|
878
|
+
for (int i = 0; i < text.length();) {
|
879
|
+
int code = Character.codePointAt(text, i);
|
880
|
+
// TODO not sure about bigger offset then 2 ?!
|
881
|
+
int offset = code > 65535 ? 2 : 1;
|
882
|
+
CharSequence substr = text.subSequence(i, i + offset);
|
883
|
+
boolean canEncode = encoder.canEncode(substr);
|
884
|
+
if (canEncode) {
|
885
|
+
sb.append(substr);
|
886
|
+
} else {
|
887
|
+
sb.append("&#x").append(Integer.toHexString(code)).append(';');
|
888
|
+
}
|
889
|
+
i += offset;
|
890
|
+
}
|
891
|
+
return sb;
|
892
|
+
}
|
778
893
|
}
|