nokogiri 1.5.6.rc2-java → 1.5.6.rc3-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +23 -6
- data/CHANGELOG.rdoc +20 -4
- data/README.rdoc +3 -6
- data/ROADMAP.md +3 -2
- data/Rakefile +4 -2
- data/bin/nokogiri +19 -4
- data/build_all +5 -1
- data/ext/java/nokogiri/XmlDocument.java +1 -0
- data/ext/java/nokogiri/XmlNamespace.java +0 -1
- data/ext/java/nokogiri/XmlNode.java +71 -19
- data/ext/java/nokogiri/XmlText.java +2 -14
- data/ext/java/nokogiri/internals/NokogiriHandler.java +10 -4
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +12 -4
- data/ext/java/nokogiri/internals/ParserContext.java +2 -1
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +96 -99
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +9 -3
- data/ext/nokogiri/xml_document.c +1 -1
- data/ext/nokogiri/xml_node.c +31 -14
- data/ext/nokogiri/xml_sax_parser.c +16 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/document.rb +8 -6
- data/lib/nokogiri/xml/document_fragment.rb +10 -1
- data/lib/nokogiri/xml/node.rb +15 -11
- data/lib/nokogiri/xml/sax/document.rb +7 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -1
- data/test/helper.rb +6 -0
- data/test/html/test_document_fragment.rb +5 -0
- data/test/xml/sax/test_parser.rb +15 -1
- data/test/xml/test_builder.rb +19 -0
- data/test/xml/test_document.rb +42 -9
- data/test/xml/test_document_fragment.rb +7 -0
- data/test/xml/test_node.rb +62 -0
- data/test/xml/test_node_attributes.rb +22 -2
- data/test/xml/test_unparented_node.rb +9 -0
- data/test_all +6 -2
- metadata +742 -587
@@ -45,7 +45,6 @@ import java.nio.charset.CharsetEncoder;
|
|
45
45
|
import java.util.ArrayList;
|
46
46
|
import java.util.List;
|
47
47
|
import java.util.Set;
|
48
|
-
import java.util.SortedMap;
|
49
48
|
import java.util.regex.Matcher;
|
50
49
|
import java.util.regex.Pattern;
|
51
50
|
|
@@ -685,9 +684,9 @@ public class NokogiriHelpers {
|
|
685
684
|
if (name == null) name = "UTF-8";
|
686
685
|
return name;
|
687
686
|
}
|
688
|
-
|
689
|
-
private static Set<String> charsetNames =
|
690
|
-
|
687
|
+
|
688
|
+
private static Set<String> charsetNames = Charset.availableCharsets().keySet();
|
689
|
+
|
691
690
|
private static String ignoreInvalidEncoding(Ruby runtime, IRubyObject encoding) {
|
692
691
|
String givenEncoding = rubyStringToString(encoding);
|
693
692
|
if (charsetNames.contains(givenEncoding)) return givenEncoding;
|
@@ -807,4 +806,13 @@ public class NokogiriHelpers {
|
|
807
806
|
private static Charset shift_jis = Charset.forName("Shift_JIS");
|
808
807
|
private static Charset jis = Charset.forName("ISO-2022-JP");
|
809
808
|
private static Charset euc_jp = Charset.forName("EUC-JP");
|
809
|
+
|
810
|
+
public static boolean shouldEncode(Node text) {
|
811
|
+
return text.getUserData(NokogiriHelpers.ENCODED_STRING) == null ||
|
812
|
+
!((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING));
|
813
|
+
}
|
814
|
+
|
815
|
+
public static boolean shouldDecode(Node text) {
|
816
|
+
return !shouldEncode(text);
|
817
|
+
}
|
810
818
|
}
|
@@ -108,7 +108,8 @@ public class ParserContext extends RubyObject {
|
|
108
108
|
(RubyIO) TypeConverter.convertToType(data,
|
109
109
|
ruby.getIO(),
|
110
110
|
"to_io");
|
111
|
-
|
111
|
+
// use unclosedable input stream to fix #495
|
112
|
+
source.setByteStream(new UncloseableInputStream(io.getInStream()));
|
112
113
|
} else {
|
113
114
|
if (invoke(context, data, "respond_to?",
|
114
115
|
ruby.newSymbol("string").to_sym()).isTrue()) {
|
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -65,21 +65,33 @@ import org.w3c.dom.Text;
|
|
65
65
|
|
66
66
|
/**
|
67
67
|
* A class for serializing a document.
|
68
|
-
*
|
68
|
+
*
|
69
69
|
* @author sergio
|
70
70
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
71
71
|
* @author Yoko Harada <yokolet@gmail.com>
|
72
72
|
*/
|
73
73
|
public class SaveContextVisitor {
|
74
74
|
|
75
|
-
private StringBuffer buffer;
|
76
|
-
private Stack<String> indentation;
|
77
|
-
private String encoding
|
78
|
-
private
|
79
|
-
private boolean
|
80
|
-
private
|
81
|
-
private
|
82
|
-
private
|
75
|
+
private final StringBuffer buffer;
|
76
|
+
private final Stack<String> indentation;
|
77
|
+
private String encoding;
|
78
|
+
private final String indentString;
|
79
|
+
private boolean format;
|
80
|
+
private final boolean noDecl;
|
81
|
+
private final boolean noEmpty;
|
82
|
+
private final boolean noXhtml;
|
83
|
+
private final boolean asXhtml;
|
84
|
+
private boolean asXml;
|
85
|
+
private final boolean asHtml;
|
86
|
+
private final boolean asBuilder;
|
87
|
+
private boolean htmlDoc;
|
88
|
+
private final boolean fragment;
|
89
|
+
private final boolean canonical, incl_ns, with_comments;
|
90
|
+
private boolean subsets;
|
91
|
+
private boolean exclusive;
|
92
|
+
private final List<Node> c14nNodeList;
|
93
|
+
private final Deque<Attr[]> c14nNamespaceStack;
|
94
|
+
private final Deque<Attr[]> c14nAttrStack;
|
83
95
|
private List<String> c14nExclusiveInclusivePrefixes = null;
|
84
96
|
/*
|
85
97
|
* U can't touch this.
|
@@ -96,7 +108,7 @@ public class SaveContextVisitor {
|
|
96
108
|
public static final int AS_XML = 32;
|
97
109
|
public static final int AS_HTML = 64;
|
98
110
|
public static final int AS_BUILDER = 128;
|
99
|
-
|
111
|
+
|
100
112
|
public static final int CANONICAL = 1;
|
101
113
|
public static final int INCL_NS = 2;
|
102
114
|
public static final int WITH_COMMENTS = 4;
|
@@ -113,7 +125,7 @@ public class SaveContextVisitor {
|
|
113
125
|
c14nNamespaceStack = new ArrayDeque<Attr[]>();
|
114
126
|
c14nAttrStack = new ArrayDeque<Attr[]>();
|
115
127
|
format = (options & FORMAT) == FORMAT;
|
116
|
-
|
128
|
+
|
117
129
|
noDecl = (options & NO_DECL) == NO_DECL;
|
118
130
|
noEmpty = (options & NO_EMPTY) == NO_EMPTY;
|
119
131
|
noXhtml = (options & NO_XHTML) == NO_XHTML;
|
@@ -121,40 +133,40 @@ public class SaveContextVisitor {
|
|
121
133
|
asXml = (options & AS_XML) == AS_XML;
|
122
134
|
asHtml = (options & AS_HTML) == AS_HTML;
|
123
135
|
asBuilder = (options & AS_BUILDER) == AS_BUILDER;
|
124
|
-
|
136
|
+
|
125
137
|
canonical = (canonicalOpts & CANONICAL) == CANONICAL;
|
126
138
|
incl_ns = (canonicalOpts & INCL_NS) == INCL_NS;
|
127
139
|
with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS;
|
128
140
|
subsets = (canonicalOpts & SUBSETS) == SUBSETS;
|
129
|
-
|
141
|
+
|
130
142
|
if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces
|
131
143
|
if ((!format && indent != null) && indent.length() > 0) format = true;
|
132
144
|
if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces
|
133
145
|
indentString = indent;
|
134
146
|
if (!asXml && !asHtml && !asXhtml && !asBuilder) asXml = true;
|
135
147
|
}
|
136
|
-
|
148
|
+
|
137
149
|
@Override
|
138
150
|
public String toString() {
|
139
151
|
return (new String(buffer));
|
140
152
|
}
|
141
|
-
|
153
|
+
|
142
154
|
public void setHtmlDoc(boolean htmlDoc) {
|
143
155
|
this.htmlDoc = htmlDoc;
|
144
156
|
}
|
145
|
-
|
157
|
+
|
146
158
|
public void setEncoding(String encoding) {
|
147
159
|
this.encoding = encoding;
|
148
160
|
}
|
149
|
-
|
161
|
+
|
150
162
|
public List<Node> getC14nNodeList() {
|
151
163
|
return c14nNodeList;
|
152
164
|
}
|
153
|
-
|
165
|
+
|
154
166
|
public void setC14nExclusiveInclusivePrefixes(List<String> prefixes) {
|
155
167
|
c14nExclusiveInclusivePrefixes = prefixes;
|
156
168
|
}
|
157
|
-
|
169
|
+
|
158
170
|
public boolean enter(Node node) {
|
159
171
|
if (node instanceof Document) {
|
160
172
|
return enter((Document)node);
|
@@ -181,7 +193,7 @@ public class SaveContextVisitor {
|
|
181
193
|
return enter((Entity)node);
|
182
194
|
}
|
183
195
|
if (node instanceof EntityReference) {
|
184
|
-
return enter(
|
196
|
+
return enter(node);
|
185
197
|
}
|
186
198
|
if (node instanceof Notation) {
|
187
199
|
return enter((Notation)node);
|
@@ -191,7 +203,7 @@ public class SaveContextVisitor {
|
|
191
203
|
}
|
192
204
|
return false;
|
193
205
|
}
|
194
|
-
|
206
|
+
|
195
207
|
public void leave(Node node) {
|
196
208
|
if (node instanceof Document) {
|
197
209
|
leave((Document)node);
|
@@ -206,7 +218,6 @@ public class SaveContextVisitor {
|
|
206
218
|
return;
|
207
219
|
}
|
208
220
|
if (node instanceof Text) {
|
209
|
-
leave((Text)node);
|
210
221
|
return;
|
211
222
|
}
|
212
223
|
if (node instanceof CDATASection) {
|
@@ -226,7 +237,7 @@ public class SaveContextVisitor {
|
|
226
237
|
return;
|
227
238
|
}
|
228
239
|
if (node instanceof EntityReference) {
|
229
|
-
leave(
|
240
|
+
leave(node);
|
230
241
|
return;
|
231
242
|
}
|
232
243
|
if (node instanceof Notation) {
|
@@ -238,16 +249,16 @@ public class SaveContextVisitor {
|
|
238
249
|
return;
|
239
250
|
}
|
240
251
|
}
|
241
|
-
|
252
|
+
|
242
253
|
public boolean enter(String string) {
|
243
254
|
buffer.append(string);
|
244
255
|
return true;
|
245
256
|
}
|
246
|
-
|
257
|
+
|
247
258
|
public void leave(String string) {
|
248
259
|
// no-op
|
249
260
|
}
|
250
|
-
|
261
|
+
|
251
262
|
public boolean enter(Attr attr) {
|
252
263
|
String name = attr.getName();
|
253
264
|
buffer.append(name);
|
@@ -260,34 +271,34 @@ public class SaveContextVisitor {
|
|
260
271
|
}
|
261
272
|
return true;
|
262
273
|
}
|
263
|
-
|
264
|
-
private static Pattern p =
|
274
|
+
|
275
|
+
private static Pattern p =
|
265
276
|
Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE);
|
266
|
-
|
277
|
+
|
267
278
|
private String replaceCharsetIfNecessary(Attr attr) {
|
268
279
|
String value = attr.getValue();
|
269
280
|
if (encoding == null) return value; // unable to replace in any case
|
270
281
|
if (!"content".equals(attr.getName().toLowerCase())) return value; // must be content attr
|
271
|
-
if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) return value;
|
282
|
+
if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) return value;
|
272
283
|
Matcher m = p.matcher(value);
|
273
284
|
if (!m.find()) return value;
|
274
285
|
if (value.contains(encoding)) return value; // no need to replace
|
275
286
|
return value.replace(m.group(), "charset=" + encoding);
|
276
287
|
}
|
277
|
-
|
288
|
+
|
278
289
|
public static final String[] HTML_BOOLEAN_ATTRS = {
|
279
290
|
"checked", "compact", "declare", "defer", "disabled", "ismap",
|
280
291
|
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
|
281
292
|
"selected"
|
282
293
|
};
|
283
|
-
|
294
|
+
|
284
295
|
private boolean isHtmlBooleanAttr(String name) {
|
285
296
|
for (String s : HTML_BOOLEAN_ATTRS) {
|
286
297
|
if (s.equals(name)) return true;
|
287
298
|
}
|
288
299
|
return false;
|
289
300
|
}
|
290
|
-
|
301
|
+
|
291
302
|
private String serializeAttrTextContent(String s, boolean htmlDoc) {
|
292
303
|
if (s == null) return "";
|
293
304
|
|
@@ -299,7 +310,7 @@ public class SaveContextVisitor {
|
|
299
310
|
case '\n': buffer.append(" "); break;
|
300
311
|
case '\r': buffer.append(" "); break;
|
301
312
|
case '\t': buffer.append("	"); break;
|
302
|
-
case '"': if (htmlDoc) buffer.append("%22");
|
313
|
+
case '"': if (htmlDoc) buffer.append("%22");
|
303
314
|
else buffer.append(""");
|
304
315
|
break;
|
305
316
|
case '<': buffer.append("<"); break;
|
@@ -315,14 +326,14 @@ public class SaveContextVisitor {
|
|
315
326
|
public void leave(Attr attr) {
|
316
327
|
// no-op
|
317
328
|
}
|
318
|
-
|
329
|
+
|
319
330
|
public boolean enter(CDATASection cdata) {
|
320
331
|
buffer.append("<![CDATA[");
|
321
332
|
buffer.append(cdata.getData());
|
322
333
|
buffer.append("]]>");
|
323
334
|
return true;
|
324
335
|
}
|
325
|
-
|
336
|
+
|
326
337
|
public void leave(CDATASection cdata) {
|
327
338
|
// no-op
|
328
339
|
}
|
@@ -337,11 +348,11 @@ public class SaveContextVisitor {
|
|
337
348
|
buffer.append("-->");
|
338
349
|
return true;
|
339
350
|
}
|
340
|
-
|
351
|
+
|
341
352
|
public void leave(Comment comment) {
|
342
353
|
// no-op
|
343
354
|
}
|
344
|
-
|
355
|
+
|
345
356
|
public boolean enter(Document document) {
|
346
357
|
if (!noDecl) {
|
347
358
|
buffer.append("<?xml version=\"");
|
@@ -357,11 +368,11 @@ public class SaveContextVisitor {
|
|
357
368
|
}
|
358
369
|
return true;
|
359
370
|
}
|
360
|
-
|
371
|
+
|
361
372
|
public void leave(Document document) {
|
362
373
|
// no-op
|
363
374
|
}
|
364
|
-
|
375
|
+
|
365
376
|
public boolean enter(DocumentType docType) {
|
366
377
|
if (canonical) {
|
367
378
|
c14nNodeList.add(docType);
|
@@ -389,7 +400,7 @@ public class SaveContextVisitor {
|
|
389
400
|
buffer.append(">\n");
|
390
401
|
return true;
|
391
402
|
}
|
392
|
-
|
403
|
+
|
393
404
|
public void leave(DocumentType docType) {
|
394
405
|
// no-op
|
395
406
|
}
|
@@ -403,12 +414,12 @@ public class SaveContextVisitor {
|
|
403
414
|
}
|
404
415
|
String current = indentation.peek();
|
405
416
|
buffer.append(current);
|
406
|
-
if (needIndent()) {
|
417
|
+
if (needIndent(element)) {
|
407
418
|
indentation.push(current + indentString);
|
408
419
|
}
|
409
420
|
String name = element.getTagName();
|
410
421
|
buffer.append("<" + name);
|
411
|
-
Attr[] attrs = getAttrsAndNamespaces(element);
|
422
|
+
Attr[] attrs = getAttrsAndNamespaces(element);
|
412
423
|
for (Attr attr : attrs) {
|
413
424
|
if (attr.getSpecified()) {
|
414
425
|
buffer.append(" ");
|
@@ -423,7 +434,7 @@ public class SaveContextVisitor {
|
|
423
434
|
}
|
424
435
|
// no child
|
425
436
|
if (asHtml || asXhtml) {
|
426
|
-
buffer.append(">");
|
437
|
+
buffer.append(">");
|
427
438
|
} else if (asXml && noEmpty) {
|
428
439
|
buffer.append(">");
|
429
440
|
} else {
|
@@ -434,26 +445,28 @@ public class SaveContextVisitor {
|
|
434
445
|
}
|
435
446
|
return true;
|
436
447
|
}
|
437
|
-
|
438
|
-
private boolean needIndent() {
|
448
|
+
|
449
|
+
private boolean needIndent(Element element) {
|
450
|
+
if (containsText(element)) return false;
|
439
451
|
if (fragment) return false; // a given option might be fragment and format. fragment matters
|
440
452
|
if (format || asBuilder) return true;
|
441
453
|
return false;
|
442
454
|
}
|
443
|
-
|
455
|
+
|
444
456
|
private boolean needBreakInOpening(Element element) {
|
457
|
+
if (containsText(element)) return false;
|
445
458
|
if (fragment) return false;
|
446
459
|
if (format) return true;
|
447
460
|
if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
|
448
461
|
if (format && element.getNextSibling() == null && element.hasChildNodes()) return true;
|
449
462
|
return false;
|
450
463
|
}
|
451
|
-
|
464
|
+
|
452
465
|
private boolean isEmpty(String name) {
|
453
466
|
HTMLElements.Element element = HTMLElements.getElement(name);
|
454
467
|
return element.isEmpty();
|
455
468
|
}
|
456
|
-
|
469
|
+
|
457
470
|
private Attr[] getAttrsAndNamespaces(Element element) {
|
458
471
|
NamedNodeMap attrs = element.getAttributes();
|
459
472
|
if (!canonical) {
|
@@ -474,7 +487,7 @@ public class SaveContextVisitor {
|
|
474
487
|
c14nAttrStack.push(attributeOfAncestors);
|
475
488
|
subsets = false; // namespace propagation should be done only once on top level node.
|
476
489
|
}
|
477
|
-
|
490
|
+
|
478
491
|
getNamespacesAndAttrs(element, namespaces, attributes);
|
479
492
|
|
480
493
|
Attr[] namespaceArray = getSortedArray(namespaces);
|
@@ -491,9 +504,9 @@ public class SaveContextVisitor {
|
|
491
504
|
c14nAttrStack.push(attributeArray);
|
492
505
|
return allAttrs;
|
493
506
|
}
|
494
|
-
|
507
|
+
|
495
508
|
}
|
496
|
-
|
509
|
+
|
497
510
|
private void getAttrsOfAncestors(Node parent, List<Attr> namespaces, List<Attr> attributes) {
|
498
511
|
if (parent == null) return;
|
499
512
|
NamedNodeMap attrs = parent.getAttributes();
|
@@ -505,7 +518,7 @@ public class SaveContextVisitor {
|
|
505
518
|
}
|
506
519
|
getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes);
|
507
520
|
}
|
508
|
-
|
521
|
+
|
509
522
|
private void getNamespacesAndAttrs(Node current, List<Attr> namespaces, List<Attr> attributes) {
|
510
523
|
NamedNodeMap attrs = current.getAttributes();
|
511
524
|
for (int i=0; i<attrs.getLength(); i++) {
|
@@ -531,7 +544,7 @@ public class SaveContextVisitor {
|
|
531
544
|
if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) {
|
532
545
|
// exactly the same namespace should not be added
|
533
546
|
newNamespace = false;
|
534
|
-
} else {
|
547
|
+
} else {
|
535
548
|
// in case of namespace url change, propagated namespace will be override
|
536
549
|
namespaces.remove(parentNamespaces[n]);
|
537
550
|
}
|
@@ -540,7 +553,7 @@ public class SaveContextVisitor {
|
|
540
553
|
if (newNamespace && !namespaces.contains(attr)) namespaces.add(attr);
|
541
554
|
}
|
542
555
|
}
|
543
|
-
|
556
|
+
|
544
557
|
private void getAttributesWithPropagated(List<Attr> attributes, Attr attr) {
|
545
558
|
boolean newAttribute = true;
|
546
559
|
Iterator<Attr[]> iter = c14nAttrStack.iterator();
|
@@ -552,7 +565,7 @@ public class SaveContextVisitor {
|
|
552
565
|
if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) {
|
553
566
|
// exactly the same attribute should not be added
|
554
567
|
newAttribute = false;
|
555
|
-
} else {
|
568
|
+
} else {
|
556
569
|
// in case of attribute value change, propagated attribute will be override
|
557
570
|
attributes.remove(parentAttr[n]);
|
558
571
|
}
|
@@ -561,7 +574,7 @@ public class SaveContextVisitor {
|
|
561
574
|
if (newAttribute) attributes.add(attr);
|
562
575
|
}
|
563
576
|
}
|
564
|
-
|
577
|
+
|
565
578
|
private void verifyXmlSpace(List<Attr> attributes, NamedNodeMap attrs) {
|
566
579
|
Attr attr = (Attr) attrs.getNamedItem("xml:space");
|
567
580
|
if (attr == null) {
|
@@ -573,7 +586,7 @@ public class SaveContextVisitor {
|
|
573
586
|
}
|
574
587
|
}
|
575
588
|
}
|
576
|
-
|
589
|
+
|
577
590
|
private Attr[] getSortedArray(List<Attr> attrList) {
|
578
591
|
Attr[] attrArray = attrList.toArray(new Attr[0]);
|
579
592
|
Arrays.sort(attrArray, new Comparator<Attr>() {
|
@@ -584,7 +597,7 @@ public class SaveContextVisitor {
|
|
584
597
|
});
|
585
598
|
return attrArray;
|
586
599
|
}
|
587
|
-
|
600
|
+
|
588
601
|
public void leave(Element element) {
|
589
602
|
if (canonical) {
|
590
603
|
c14nNamespaceStack.poll();
|
@@ -596,10 +609,10 @@ public class SaveContextVisitor {
|
|
596
609
|
indentation.pop();
|
597
610
|
buffer.append(indentation.peek());
|
598
611
|
} else if (asBuilder) {
|
599
|
-
indentation.pop();
|
612
|
+
if (!containsText(element)) indentation.pop();
|
600
613
|
}
|
601
614
|
buffer.append("</" + name + ">");
|
602
|
-
if (needBreakInClosing()) {
|
615
|
+
if (needBreakInClosing(element)) {
|
603
616
|
buffer.append("\n");
|
604
617
|
}
|
605
618
|
return;
|
@@ -610,25 +623,31 @@ public class SaveContextVisitor {
|
|
610
623
|
buffer.append("</" + name + ">");
|
611
624
|
}
|
612
625
|
}
|
613
|
-
if (needBreakInClosing()) {
|
614
|
-
indentation.pop();
|
626
|
+
if (needBreakInClosing(element)) {
|
627
|
+
if (!containsText(element)) indentation.pop();
|
615
628
|
buffer.append("\n");
|
616
629
|
}
|
617
630
|
}
|
618
|
-
|
631
|
+
|
619
632
|
private boolean needIndentInClosing(Element element) {
|
633
|
+
if (containsText(element)) return false;
|
634
|
+
|
620
635
|
if (fragment) return false; // a given option might be fragment and format. fragment matters
|
621
636
|
if (format) return true;
|
622
637
|
if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
|
623
638
|
return false;
|
624
639
|
}
|
625
|
-
|
626
|
-
private boolean needBreakInClosing() {
|
640
|
+
|
641
|
+
private boolean needBreakInClosing(Element element) {
|
627
642
|
if (fragment) return false;
|
628
643
|
if (format || asBuilder) return true;
|
629
644
|
return false;
|
630
645
|
}
|
631
646
|
|
647
|
+
private boolean containsText(Element element) {
|
648
|
+
return (element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.TEXT_NODE);
|
649
|
+
}
|
650
|
+
|
632
651
|
public boolean enter(Entity entity) {
|
633
652
|
String name = entity.getNodeName();
|
634
653
|
String pubId = entity.getPublicId();
|
@@ -653,7 +672,7 @@ public class SaveContextVisitor {
|
|
653
672
|
buffer.append(">");
|
654
673
|
return true;
|
655
674
|
}
|
656
|
-
|
675
|
+
|
657
676
|
public void leave(Entity entity) {
|
658
677
|
// no-op
|
659
678
|
}
|
@@ -663,11 +682,11 @@ public class SaveContextVisitor {
|
|
663
682
|
buffer.append("&" + name + ";");
|
664
683
|
return true;
|
665
684
|
}
|
666
|
-
|
685
|
+
|
667
686
|
public void leaveEntityReference(Text entityRef) {
|
668
687
|
// no-op
|
669
688
|
}
|
670
|
-
|
689
|
+
|
671
690
|
public boolean enter(Notation notation) {
|
672
691
|
String name = notation.getNodeName();
|
673
692
|
String pubId = notation.getPublicId();
|
@@ -691,7 +710,7 @@ public class SaveContextVisitor {
|
|
691
710
|
buffer.append(">");
|
692
711
|
return true;
|
693
712
|
}
|
694
|
-
|
713
|
+
|
695
714
|
public void leave(Notation notation) {
|
696
715
|
// no-op
|
697
716
|
}
|
@@ -707,7 +726,7 @@ public class SaveContextVisitor {
|
|
707
726
|
if (canonical) c14nNodeList.add(pi);
|
708
727
|
return true;
|
709
728
|
}
|
710
|
-
|
729
|
+
|
711
730
|
public void leave(ProcessingInstruction pi) {
|
712
731
|
// no-op
|
713
732
|
}
|
@@ -722,14 +741,8 @@ public class SaveContextVisitor {
|
|
722
741
|
return true;
|
723
742
|
}
|
724
743
|
}
|
725
|
-
|
726
|
-
if (
|
727
|
-
String current = indentation.peek();
|
728
|
-
buffer.append(current);
|
729
|
-
indentation.push(current + indentString);
|
730
|
-
if (textContent.charAt(0) == lineSeparator) textContent = textContent.substring(1);
|
731
|
-
}
|
732
|
-
if (text.getUserData(NokogiriHelpers.ENCODED_STRING) == null || !((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING))) {
|
744
|
+
|
745
|
+
if (NokogiriHelpers.shouldEncode(text)) {
|
733
746
|
textContent = encodeJavaString(textContent);
|
734
747
|
}
|
735
748
|
|
@@ -739,29 +752,13 @@ public class SaveContextVisitor {
|
|
739
752
|
buffer.append(textContent);
|
740
753
|
return true;
|
741
754
|
}
|
742
|
-
|
743
|
-
private boolean needIndentText() {
|
744
|
-
if (fragment) return false;
|
745
|
-
if (format) return true;
|
746
|
-
return false;
|
747
|
-
}
|
748
|
-
|
749
|
-
public void leave(Text text) {
|
750
|
-
String textContent = text.getNodeValue();
|
751
|
-
if (needIndentText() && !"".equals(textContent.trim())) {
|
752
|
-
indentation.pop();
|
753
|
-
if (textContent.charAt(textContent.length()-1) != lineSeparator) {
|
754
|
-
buffer.append("\n");
|
755
|
-
}
|
756
|
-
}
|
757
|
-
}
|
758
|
-
|
755
|
+
|
759
756
|
private String getEncoding(Text text) {
|
760
757
|
if (encoding != null) return encoding;
|
761
758
|
encoding = text.getOwnerDocument().getInputEncoding();
|
762
759
|
return encoding;
|
763
760
|
}
|
764
|
-
|
761
|
+
|
765
762
|
private String encodeStringToHtmlEntity(String text) {
|
766
763
|
int last = 126; // = U+007E. No need to encode under U+007E.
|
767
764
|
StringBuffer sb = new StringBuffer();
|