nokogiri 1.5.0.beta.4-java → 1.5.0-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +34 -0
- data/CHANGELOG.rdoc +40 -1
- data/Manifest.txt +11 -2
- data/README.rdoc +1 -1
- data/Rakefile +96 -105
- data/bin/nokogiri +1 -2
- data/ext/java/nokogiri/HtmlDocument.java +1 -31
- data/ext/java/nokogiri/HtmlSaxParserContext.java +1 -1
- data/ext/java/nokogiri/NokogiriService.java +77 -22
- data/ext/java/nokogiri/XmlAttr.java +5 -16
- data/ext/java/nokogiri/XmlCdata.java +4 -11
- data/ext/java/nokogiri/XmlComment.java +5 -5
- data/ext/java/nokogiri/XmlDocument.java +49 -59
- data/ext/java/nokogiri/XmlDocumentFragment.java +14 -8
- data/ext/java/nokogiri/XmlDtd.java +45 -43
- data/ext/java/nokogiri/XmlElement.java +19 -46
- data/ext/java/nokogiri/XmlElementDecl.java +9 -5
- data/ext/java/nokogiri/XmlEntityReference.java +24 -2
- data/ext/java/nokogiri/XmlNamespace.java +89 -34
- data/ext/java/nokogiri/XmlNode.java +31 -52
- data/ext/java/nokogiri/XmlNodeSet.java +42 -86
- data/ext/java/nokogiri/XmlProcessingInstruction.java +15 -19
- data/ext/java/nokogiri/XmlReader.java +40 -43
- data/ext/java/nokogiri/XmlSaxParserContext.java +2 -2
- data/ext/java/nokogiri/XmlSchema.java +14 -9
- data/ext/java/nokogiri/XmlText.java +18 -35
- data/ext/java/nokogiri/XmlXpathContext.java +43 -23
- data/ext/java/nokogiri/XsltStylesheet.java +17 -3
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -4
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +77 -20
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +13 -17
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +13 -1
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +23 -8
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +20 -3
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +67 -0
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +86 -0
- data/ext/java/nokogiri/internals/ParserContext.java +25 -27
- data/ext/java/nokogiri/internals/ReaderNode.java +58 -1
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +567 -0
- data/ext/java/nokogiri/internals/XmlDomParser.java +1 -2
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +6 -0
- data/ext/nokogiri/nokogiri.c +24 -1
- data/ext/nokogiri/xml_io.c +32 -7
- data/ext/nokogiri/xml_node.c +14 -13
- data/ext/nokogiri/xml_sax_parser.c +9 -4
- data/ext/nokogiri/xslt_stylesheet.c +7 -1
- data/lib/nokogiri.rb +3 -22
- data/lib/nokogiri/css.rb +4 -0
- data/lib/nokogiri/html/document.rb +10 -14
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +76 -23
- data/lib/nokogiri/xml/builder.rb +7 -0
- data/lib/nokogiri/xml/document.rb +17 -1
- data/lib/nokogiri/xml/document_fragment.rb +14 -0
- data/lib/nokogiri/xml/node.rb +36 -28
- data/lib/nokogiri/xml/node/save_options.rb +17 -1
- data/lib/nokogiri/xml/node_set.rb +7 -0
- data/lib/nokogiri/xml/parse_options.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +6 -6
- data/lib/nokogiri/xml/schema.rb +7 -1
- data/lib/xercesImpl.jar +0 -0
- data/nokogiri_help_responses.md +40 -0
- data/tasks/cross_compile.rb +134 -159
- data/tasks/nokogiri.org.rb +18 -0
- data/tasks/test.rb +1 -1
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/noencoding.html +47 -0
- data/test/helper.rb +2 -0
- data/test/html/test_document.rb +15 -0
- data/test/html/test_document_encoding.rb +13 -0
- data/test/test_memory_leak.rb +20 -0
- data/test/test_reader.rb +22 -0
- data/test/test_xslt_transforms.rb +6 -2
- data/test/xml/node/test_save_options.rb +10 -2
- data/test/xml/test_builder.rb +17 -0
- data/test/xml/test_document.rb +22 -0
- data/test/xml/test_node.rb +19 -1
- data/test/xml/test_node_reparenting.rb +16 -3
- data/test/xml/test_node_set.rb +34 -0
- data/test/xml/test_schema.rb +5 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- metadata +141 -107
- data/deps.rip +0 -5
- data/ext/java/nokogiri/internals/SaveContext.java +0 -288
@@ -33,11 +33,11 @@
|
|
33
33
|
package nokogiri.internals;
|
34
34
|
|
35
35
|
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
36
|
+
import static nokogiri.internals.NokogiriHelpers.adjustSystemIdIfNecessary;
|
36
37
|
import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
|
37
38
|
|
38
39
|
import java.io.ByteArrayInputStream;
|
39
40
|
import java.io.File;
|
40
|
-
import java.io.FileInputStream;
|
41
41
|
import java.io.IOException;
|
42
42
|
import java.io.InputStream;
|
43
43
|
|
@@ -61,6 +61,7 @@ import org.xml.sax.ext.EntityResolver2;
|
|
61
61
|
* Ruby objects to InputSource objects.
|
62
62
|
*
|
63
63
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
64
|
+
* @author Yoko Harada <yokolet@gmail.com>
|
64
65
|
*/
|
65
66
|
public class ParserContext extends RubyObject {
|
66
67
|
protected InputSource source = null;
|
@@ -69,24 +70,14 @@ public class ParserContext extends RubyObject {
|
|
69
70
|
* Create a file base input source taking into account the current
|
70
71
|
* directory of <code>runtime</code>.
|
71
72
|
*/
|
72
|
-
public static InputSource resolveEntity(Ruby runtime,
|
73
|
-
String publicId,
|
74
|
-
String baseURI,
|
75
|
-
String systemId)
|
73
|
+
public static InputSource resolveEntity(Ruby runtime, String publicId, String baseURI, String systemId)
|
76
74
|
throws IOException {
|
77
|
-
|
78
|
-
|
79
|
-
if (
|
80
|
-
|
81
|
-
} else if (baseURI != null) {
|
82
|
-
path = (new File(baseURI, systemId)).getAbsolutePath();
|
83
|
-
} else {
|
84
|
-
String rubyDir = runtime.getCurrentDirectory();
|
85
|
-
path = (new File(rubyDir, systemId)).getAbsolutePath();
|
75
|
+
InputSource s = new InputSource();
|
76
|
+
String adjusted = adjustSystemIdIfNecessary(runtime.getCurrentDirectory(), runtime.getInstanceConfig().getScriptFileName(), baseURI, systemId);
|
77
|
+
if (adjusted == null && publicId == null) {
|
78
|
+
throw runtime.newRuntimeError("SystemId \"" + systemId + "\" is not correct.");
|
86
79
|
}
|
87
|
-
|
88
|
-
InputSource s = new InputSource(new FileInputStream(path));
|
89
|
-
s.setSystemId(systemId);
|
80
|
+
s.setSystemId(adjusted);
|
90
81
|
s.setPublicId(publicId);
|
91
82
|
return s;
|
92
83
|
}
|
@@ -105,12 +96,17 @@ public class ParserContext extends RubyObject {
|
|
105
96
|
}
|
106
97
|
|
107
98
|
/**
|
108
|
-
* Set the InputSource from <code>
|
109
|
-
* object, a String, or a StringIO.
|
99
|
+
* Set the InputSource from <code>url</code> or <code>data</code>,
|
100
|
+
* which may be an IO object, a String, or a StringIO.
|
110
101
|
*/
|
111
|
-
public void setInputSource(ThreadContext context,
|
112
|
-
IRubyObject data) {
|
102
|
+
public void setInputSource(ThreadContext context, IRubyObject data, IRubyObject url) {
|
113
103
|
Ruby ruby = context.getRuntime();
|
104
|
+
String path = (String) url.toJava(String.class);
|
105
|
+
if (isAbsolutePath(path)) {
|
106
|
+
source = new InputSource();
|
107
|
+
source.setSystemId(path);
|
108
|
+
return;
|
109
|
+
}
|
114
110
|
RubyString stringData = null;
|
115
111
|
if (invoke(context, data, "respond_to?",
|
116
112
|
ruby.newSymbol("to_io").to_sym()).isTrue()) {
|
@@ -147,6 +143,11 @@ public class ParserContext extends RubyObject {
|
|
147
143
|
source = new InputSource(new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length()));
|
148
144
|
}
|
149
145
|
}
|
146
|
+
|
147
|
+
private boolean isAbsolutePath(String url) {
|
148
|
+
if (url == null) return false;
|
149
|
+
return (new File(url)).isAbsolute();
|
150
|
+
}
|
150
151
|
|
151
152
|
/**
|
152
153
|
* Set the InputSource to read from <code>file</code>, a String filename.
|
@@ -155,11 +156,9 @@ public class ParserContext extends RubyObject {
|
|
155
156
|
String filename = rubyStringToString(file);
|
156
157
|
|
157
158
|
try{
|
158
|
-
source = resolveEntity(context.getRuntime(),
|
159
|
-
null, null, filename);
|
159
|
+
source = resolveEntity(context.getRuntime(), null, null, filename);
|
160
160
|
} catch (Exception e) {
|
161
|
-
throw RaiseException
|
162
|
-
.createNativeRaiseException(context.getRuntime(), e);
|
161
|
+
throw RaiseException.createNativeRaiseException(context.getRuntime(), e);
|
163
162
|
}
|
164
163
|
|
165
164
|
}
|
@@ -269,8 +268,7 @@ public class ParserContext extends RubyObject {
|
|
269
268
|
String baseURI,
|
270
269
|
String systemId)
|
271
270
|
throws SAXException, IOException {
|
272
|
-
return ParserContext
|
273
|
-
.resolveEntity(runtime, publicId, baseURI, systemId);
|
271
|
+
return ParserContext.resolveEntity(runtime, publicId, baseURI, systemId);
|
274
272
|
}
|
275
273
|
|
276
274
|
}
|
@@ -78,7 +78,11 @@ public abstract class ReaderNode {
|
|
78
78
|
public boolean hasChildren = false;
|
79
79
|
public abstract String getString();
|
80
80
|
private Document document = null;
|
81
|
-
|
81
|
+
|
82
|
+
private static ElementNode elementNode = null;
|
83
|
+
private static ClosingNode closingNode = null;
|
84
|
+
private static TextNode textNode = null;
|
85
|
+
|
82
86
|
public IRubyObject getAttributeByIndex(IRubyObject index){
|
83
87
|
if(index.isNil()) return index;
|
84
88
|
|
@@ -246,10 +250,28 @@ public abstract class ReaderNode {
|
|
246
250
|
return value;
|
247
251
|
}
|
248
252
|
}
|
253
|
+
|
254
|
+
public static ClosingNode createClosingNode(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
255
|
+
if (closingNode == null) closingNode = new ClosingNode();
|
256
|
+
ClosingNode clone;
|
257
|
+
try {
|
258
|
+
clone = (ClosingNode) closingNode.clone();
|
259
|
+
} catch (CloneNotSupportedException e) {
|
260
|
+
clone = new ClosingNode();
|
261
|
+
}
|
262
|
+
clone.init(ruby, uri, localName, qName, depth, langStack, xmlBaseStack);
|
263
|
+
return clone;
|
264
|
+
}
|
249
265
|
|
250
266
|
public static class ClosingNode extends ReaderNode {
|
267
|
+
|
268
|
+
public ClosingNode() {}
|
251
269
|
|
252
270
|
public ClosingNode(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
271
|
+
init(ruby, uri, localName, qName, depth, langStack, xmlBaseStack);
|
272
|
+
}
|
273
|
+
|
274
|
+
public void init(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
253
275
|
this.ruby = ruby;
|
254
276
|
nodeType = ReaderNodeType.END_ELEMENT.getValue();
|
255
277
|
this.uri = "".equals(uri) ? null : uri;
|
@@ -279,10 +301,28 @@ public abstract class ReaderNode {
|
|
279
301
|
}
|
280
302
|
}
|
281
303
|
|
304
|
+
public static ElementNode createElementNode(Ruby ruby, String uri, String localName, String qName, Attributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
305
|
+
if (elementNode == null) elementNode = new ElementNode();
|
306
|
+
ElementNode clone;
|
307
|
+
try {
|
308
|
+
clone = (ElementNode) elementNode.clone();
|
309
|
+
} catch (CloneNotSupportedException e) {
|
310
|
+
clone = new ElementNode();
|
311
|
+
}
|
312
|
+
clone.init(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
|
313
|
+
return clone;
|
314
|
+
}
|
315
|
+
|
282
316
|
public static class ElementNode extends ReaderNode {
|
283
317
|
private List<String> attributeStrings = new ArrayList<String>();
|
284
318
|
|
319
|
+
public ElementNode() {}
|
320
|
+
|
285
321
|
public ElementNode(Ruby ruby, String uri, String localName, String qName, Attributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
322
|
+
init(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
|
323
|
+
}
|
324
|
+
|
325
|
+
public void init(Ruby ruby, String uri, String localName, String qName, Attributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
286
326
|
this.ruby = ruby;
|
287
327
|
this.nodeType = ReaderNodeType.ELEMENT.getValue();
|
288
328
|
this.uri = "".equals(uri) ? null : uri;
|
@@ -446,10 +486,27 @@ public abstract class ReaderNode {
|
|
446
486
|
return this.exception;
|
447
487
|
}
|
448
488
|
}
|
489
|
+
|
490
|
+
public static TextNode createTextNode(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
491
|
+
if (textNode == null) textNode = new TextNode();
|
492
|
+
TextNode clone;
|
493
|
+
try {
|
494
|
+
clone = (TextNode) textNode.clone();
|
495
|
+
} catch (CloneNotSupportedException e) {
|
496
|
+
clone = new TextNode();
|
497
|
+
}
|
498
|
+
clone.init(ruby, content, depth, langStack, xmlBaseStack);
|
499
|
+
return clone;
|
500
|
+
}
|
449
501
|
|
450
502
|
public static class TextNode extends ReaderNode {
|
503
|
+
public TextNode() {}
|
451
504
|
|
452
505
|
public TextNode(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
506
|
+
init(ruby, content, depth, langStack, xmlBaseStack);
|
507
|
+
}
|
508
|
+
|
509
|
+
public void init(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
453
510
|
this.ruby = ruby;
|
454
511
|
this.value = content;
|
455
512
|
this.localName = "#text";
|
@@ -0,0 +1,567 @@
|
|
1
|
+
/**
|
2
|
+
* (The MIT License)
|
3
|
+
*
|
4
|
+
* Copyright (c) 2008 - 2011:
|
5
|
+
*
|
6
|
+
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
+
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
+
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
+
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
+
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
+
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
+
*
|
13
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
* a copy of this software and associated documentation files (the
|
15
|
+
* 'Software'), to deal in the Software without restriction, including
|
16
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
* the following conditions:
|
20
|
+
*
|
21
|
+
* The above copyright notice and this permission notice shall be
|
22
|
+
* included in all copies or substantial portions of the Software.
|
23
|
+
*
|
24
|
+
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
*/
|
32
|
+
|
33
|
+
package nokogiri.internals;
|
34
|
+
|
35
|
+
import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
|
36
|
+
import static nokogiri.internals.NokogiriHelpers.isNotXmlEscaped;
|
37
|
+
|
38
|
+
import java.util.Stack;
|
39
|
+
|
40
|
+
import org.cyberneko.html.HTMLElements;
|
41
|
+
import org.w3c.dom.Attr;
|
42
|
+
import org.w3c.dom.CDATASection;
|
43
|
+
import org.w3c.dom.Comment;
|
44
|
+
import org.w3c.dom.Document;
|
45
|
+
import org.w3c.dom.DocumentType;
|
46
|
+
import org.w3c.dom.Element;
|
47
|
+
import org.w3c.dom.Entity;
|
48
|
+
import org.w3c.dom.EntityReference;
|
49
|
+
import org.w3c.dom.NamedNodeMap;
|
50
|
+
import org.w3c.dom.Node;
|
51
|
+
import org.w3c.dom.Notation;
|
52
|
+
import org.w3c.dom.ProcessingInstruction;
|
53
|
+
import org.w3c.dom.Text;
|
54
|
+
|
55
|
+
/**
|
56
|
+
* A class for serializing a document.
|
57
|
+
*
|
58
|
+
* @author sergio
|
59
|
+
* @author Patrick Mahoney <pat@polycrystal.org>
|
60
|
+
* @author Yoko Harada <yokolet@gmail.com>
|
61
|
+
*/
|
62
|
+
public class SaveContextVisitor {
|
63
|
+
|
64
|
+
private StringBuffer buffer;
|
65
|
+
private Stack<String> indentation;
|
66
|
+
private String encoding, indentString;
|
67
|
+
private boolean format, noDecl, noEmpty, noXhtml, asXhtml, asXml, asHtml, asBuilder, htmlDoc, fragment;
|
68
|
+
|
69
|
+
/*
|
70
|
+
* U can't touch this.
|
71
|
+
* http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
|
72
|
+
*
|
73
|
+
* Taken from libxml save options.
|
74
|
+
*/
|
75
|
+
|
76
|
+
public static final int FORMAT = 1;
|
77
|
+
public static final int NO_DECL = 2;
|
78
|
+
public static final int NO_EMPTY = 4;
|
79
|
+
public static final int NO_XHTML = 8;
|
80
|
+
public static final int AS_XHTML = 16;
|
81
|
+
public static final int AS_XML = 32;
|
82
|
+
public static final int AS_HTML = 64;
|
83
|
+
public static final int AS_BUILDER = 128;
|
84
|
+
|
85
|
+
public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment) {
|
86
|
+
buffer = new StringBuffer();
|
87
|
+
this.encoding = encoding;
|
88
|
+
indentation = new Stack<String>(); indentation.push("");
|
89
|
+
this.htmlDoc = htmlDoc;
|
90
|
+
this.fragment = fragment;
|
91
|
+
format = (options & FORMAT) == FORMAT;
|
92
|
+
|
93
|
+
noDecl = (options & NO_DECL) == NO_DECL;
|
94
|
+
noEmpty = (options & NO_EMPTY) == NO_EMPTY;
|
95
|
+
noXhtml = (options & NO_XHTML) == NO_XHTML;
|
96
|
+
asXhtml = (options & AS_XHTML) == AS_XHTML;
|
97
|
+
asXml = (options & AS_XML) == AS_XML;
|
98
|
+
asHtml = (options & AS_HTML) == AS_HTML;
|
99
|
+
asBuilder = (options & AS_BUILDER) == AS_BUILDER;
|
100
|
+
if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces
|
101
|
+
if ((!format && indent != null) && indent.length() > 0) format = true;
|
102
|
+
if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces
|
103
|
+
indentString = indent;
|
104
|
+
if (!asXml && !asHtml && !asXhtml && !asBuilder) asXml = true;
|
105
|
+
}
|
106
|
+
|
107
|
+
@Override
|
108
|
+
public String toString() {
|
109
|
+
return (new String(buffer));
|
110
|
+
}
|
111
|
+
|
112
|
+
public void setHtmlDoc(boolean htmlDoc) {
|
113
|
+
this.htmlDoc = htmlDoc;
|
114
|
+
}
|
115
|
+
|
116
|
+
public void setEncoding(String encoding) {
|
117
|
+
this.encoding = encoding;
|
118
|
+
}
|
119
|
+
|
120
|
+
public boolean enter(Node node) {
|
121
|
+
if (node instanceof Document) {
|
122
|
+
return enter((Document)node);
|
123
|
+
}
|
124
|
+
if (node instanceof Element) {
|
125
|
+
return enter((Element)node);
|
126
|
+
}
|
127
|
+
if (node instanceof Attr) {
|
128
|
+
return enter((Attr)node);
|
129
|
+
}
|
130
|
+
if (node instanceof Text) {
|
131
|
+
return enter((Text)node);
|
132
|
+
}
|
133
|
+
if (node instanceof CDATASection) {
|
134
|
+
return enter((CDATASection)node);
|
135
|
+
}
|
136
|
+
if (node instanceof Comment) {
|
137
|
+
return enter((Comment)node);
|
138
|
+
}
|
139
|
+
if (node instanceof DocumentType) {
|
140
|
+
return enter((DocumentType)node);
|
141
|
+
}
|
142
|
+
if (node instanceof Entity) {
|
143
|
+
return enter((Entity)node);
|
144
|
+
}
|
145
|
+
if (node instanceof EntityReference) {
|
146
|
+
return enter((EntityReference)node);
|
147
|
+
}
|
148
|
+
if (node instanceof Notation) {
|
149
|
+
return enter((Notation)node);
|
150
|
+
}
|
151
|
+
if (node instanceof ProcessingInstruction) {
|
152
|
+
return enter((ProcessingInstruction)node);
|
153
|
+
}
|
154
|
+
return false;
|
155
|
+
}
|
156
|
+
|
157
|
+
public void leave(Node node) {
|
158
|
+
if (node instanceof Document) {
|
159
|
+
leave((Document)node);
|
160
|
+
return;
|
161
|
+
}
|
162
|
+
if (node instanceof Element) {
|
163
|
+
leave((Element)node);
|
164
|
+
return;
|
165
|
+
}
|
166
|
+
if (node instanceof Attr) {
|
167
|
+
leave((Attr)node);
|
168
|
+
return;
|
169
|
+
}
|
170
|
+
if (node instanceof Text) {
|
171
|
+
leave((Text)node);
|
172
|
+
return;
|
173
|
+
}
|
174
|
+
if (node instanceof CDATASection) {
|
175
|
+
leave((CDATASection)node);
|
176
|
+
return;
|
177
|
+
}
|
178
|
+
if (node instanceof Comment) {
|
179
|
+
leave((Comment)node);
|
180
|
+
return;
|
181
|
+
}
|
182
|
+
if (node instanceof DocumentType) {
|
183
|
+
leave((DocumentType)node);
|
184
|
+
return;
|
185
|
+
}
|
186
|
+
if (node instanceof Entity) {
|
187
|
+
leave((Entity)node);
|
188
|
+
return;
|
189
|
+
}
|
190
|
+
if (node instanceof EntityReference) {
|
191
|
+
leave((EntityReference)node);
|
192
|
+
return;
|
193
|
+
}
|
194
|
+
if (node instanceof Notation) {
|
195
|
+
leave((Notation)node);
|
196
|
+
return;
|
197
|
+
}
|
198
|
+
if (node instanceof ProcessingInstruction) {
|
199
|
+
leave((ProcessingInstruction)node);
|
200
|
+
return;
|
201
|
+
}
|
202
|
+
}
|
203
|
+
|
204
|
+
public boolean enter(String string) {
|
205
|
+
buffer.append(string);
|
206
|
+
return true;
|
207
|
+
}
|
208
|
+
|
209
|
+
public void leave(String string) {
|
210
|
+
// no-op
|
211
|
+
}
|
212
|
+
|
213
|
+
public boolean enter(Attr attr) {
|
214
|
+
String name = attr.getName();
|
215
|
+
buffer.append(name);
|
216
|
+
if (!asHtml || !isHtmlBooleanAttr(name)) {
|
217
|
+
buffer.append("=");
|
218
|
+
buffer.append("\"");
|
219
|
+
buffer.append(serializeAttrTextContent(attr.getValue(), htmlDoc));
|
220
|
+
buffer.append("\"");
|
221
|
+
}
|
222
|
+
return true;
|
223
|
+
}
|
224
|
+
|
225
|
+
public static final String[] HTML_BOOLEAN_ATTRS = {
|
226
|
+
"checked", "compact", "declare", "defer", "disabled", "ismap",
|
227
|
+
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
|
228
|
+
"selected"
|
229
|
+
};
|
230
|
+
|
231
|
+
private boolean isHtmlBooleanAttr(String name) {
|
232
|
+
for (String s : HTML_BOOLEAN_ATTRS) {
|
233
|
+
if (s.equals(name)) return true;
|
234
|
+
}
|
235
|
+
return false;
|
236
|
+
}
|
237
|
+
|
238
|
+
private String serializeAttrTextContent(String s, boolean htmlDoc) {
|
239
|
+
if (s == null) return "";
|
240
|
+
|
241
|
+
char[] c = s.toCharArray();
|
242
|
+
StringBuffer buffer = new StringBuffer(c.length);
|
243
|
+
|
244
|
+
for(int i = 0; i < c.length; i++) {
|
245
|
+
switch(c[i]){
|
246
|
+
case '\n': buffer.append(" "); break;
|
247
|
+
case '\r': buffer.append(" "); break;
|
248
|
+
case '\t': buffer.append("	"); break;
|
249
|
+
case '"': if (htmlDoc) buffer.append("%22");
|
250
|
+
else buffer.append(""");
|
251
|
+
break;
|
252
|
+
case '<': buffer.append("<"); break;
|
253
|
+
case '>': buffer.append(">"); break;
|
254
|
+
case '&': buffer.append("&"); break;
|
255
|
+
default: buffer.append(c[i]);
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
259
|
+
return buffer.toString();
|
260
|
+
}
|
261
|
+
|
262
|
+
public void leave(Attr attr) {
|
263
|
+
// no-op
|
264
|
+
}
|
265
|
+
|
266
|
+
public boolean enter(CDATASection cdata) {
|
267
|
+
buffer.append("<![CDATA[");
|
268
|
+
buffer.append(cdata.getData());
|
269
|
+
buffer.append("]]>");
|
270
|
+
return true;
|
271
|
+
}
|
272
|
+
|
273
|
+
public void leave(CDATASection cdata) {
|
274
|
+
// no-op
|
275
|
+
}
|
276
|
+
|
277
|
+
public boolean enter(Comment comment) {
|
278
|
+
buffer.append("<!--");
|
279
|
+
buffer.append(comment.getData());
|
280
|
+
buffer.append("-->");
|
281
|
+
return true;
|
282
|
+
}
|
283
|
+
|
284
|
+
public void leave(Comment comment) {
|
285
|
+
// no-op
|
286
|
+
}
|
287
|
+
|
288
|
+
public boolean enter(Document document) {
|
289
|
+
if (!noDecl) {
|
290
|
+
buffer.append("<?xml version=\"");
|
291
|
+
buffer.append(document.getXmlVersion());
|
292
|
+
buffer.append("\"");
|
293
|
+
|
294
|
+
if (encoding != null) {
|
295
|
+
buffer.append(" encoding=\"");
|
296
|
+
buffer.append(encoding.toUpperCase());
|
297
|
+
buffer.append("\"");
|
298
|
+
}
|
299
|
+
buffer.append("?>\n");
|
300
|
+
}
|
301
|
+
return true;
|
302
|
+
}
|
303
|
+
|
304
|
+
public void leave(Document document) {
|
305
|
+
// no-op
|
306
|
+
}
|
307
|
+
|
308
|
+
public boolean enter(DocumentType docType) {
|
309
|
+
String name = docType.getName();
|
310
|
+
String pubId = docType.getPublicId();
|
311
|
+
String sysId = docType.getSystemId();
|
312
|
+
String internalSubset = docType.getInternalSubset();
|
313
|
+
if (docType.getPreviousSibling() != null) {
|
314
|
+
buffer.append("\n");
|
315
|
+
}
|
316
|
+
buffer.append("<!DOCTYPE " + name + " ");
|
317
|
+
if (pubId != null) {
|
318
|
+
buffer.append("PUBLIC \"" + pubId + "\"");
|
319
|
+
if (sysId != null) buffer.append(" \"" + sysId + "\"");
|
320
|
+
} else if (sysId != null) {
|
321
|
+
buffer.append("SYSTEM \"" + sysId + "\"");
|
322
|
+
}
|
323
|
+
if (internalSubset != null) {
|
324
|
+
buffer.append(" [");
|
325
|
+
buffer.append(internalSubset);
|
326
|
+
buffer.append("]");
|
327
|
+
}
|
328
|
+
buffer.append(">\n");
|
329
|
+
return true;
|
330
|
+
}
|
331
|
+
|
332
|
+
public void leave(DocumentType docType) {
|
333
|
+
// no-op
|
334
|
+
}
|
335
|
+
|
336
|
+
public boolean enter(Element element) {
|
337
|
+
String current = indentation.peek();
|
338
|
+
buffer.append(current);
|
339
|
+
if (needIndent()) {
|
340
|
+
indentation.push(current + indentString);
|
341
|
+
}
|
342
|
+
String name = element.getTagName();
|
343
|
+
buffer.append("<" + name);
|
344
|
+
NamedNodeMap attrs = element.getAttributes();
|
345
|
+
for (int i=0; i<attrs.getLength(); i++) {
|
346
|
+
Attr attr = (Attr) attrs.item(i);
|
347
|
+
if (attr.getSpecified()) {
|
348
|
+
buffer.append(" ");
|
349
|
+
enter(attr);
|
350
|
+
leave(attr);
|
351
|
+
}
|
352
|
+
}
|
353
|
+
if (element.hasChildNodes()) {
|
354
|
+
buffer.append(">");
|
355
|
+
if (needBreakInOpening(element)) buffer.append("\n");
|
356
|
+
return true;
|
357
|
+
}
|
358
|
+
// no child
|
359
|
+
if (asHtml) {
|
360
|
+
buffer.append(">");
|
361
|
+
} else if (asXhtml) {
|
362
|
+
buffer.append(" />");
|
363
|
+
} else if (asXml && noEmpty) {
|
364
|
+
buffer.append(">");
|
365
|
+
} else {
|
366
|
+
buffer.append("/>");
|
367
|
+
}
|
368
|
+
if (needBreakInOpening(element)) {
|
369
|
+
buffer.append("\n");
|
370
|
+
}
|
371
|
+
return true;
|
372
|
+
}
|
373
|
+
|
374
|
+
private boolean needIndent() {
|
375
|
+
if (fragment) return false; // a given option might be fragment and format. fragment matters
|
376
|
+
if (format || asBuilder) return true;
|
377
|
+
return false;
|
378
|
+
}
|
379
|
+
|
380
|
+
private boolean needBreakInOpening(Element element) {
|
381
|
+
if (fragment) return false;
|
382
|
+
if (format) return true;
|
383
|
+
if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
|
384
|
+
if (format && element.getNextSibling() == null && element.hasChildNodes()) return true;
|
385
|
+
return false;
|
386
|
+
}
|
387
|
+
|
388
|
+
private boolean isEmpty(String name) {
|
389
|
+
HTMLElements.Element element = HTMLElements.getElement(name);
|
390
|
+
return element.isEmpty();
|
391
|
+
}
|
392
|
+
|
393
|
+
public void leave(Element element) {
|
394
|
+
String name = element.getTagName();
|
395
|
+
if (element.hasChildNodes()) {
|
396
|
+
if (needIndentInClosing(element)) {
|
397
|
+
indentation.pop();
|
398
|
+
buffer.append(indentation.peek());
|
399
|
+
} else if (asBuilder) {
|
400
|
+
indentation.pop();
|
401
|
+
}
|
402
|
+
buffer.append("</" + name + ">");
|
403
|
+
if (needBreakInClosing()) {
|
404
|
+
buffer.append("\n");
|
405
|
+
}
|
406
|
+
return;
|
407
|
+
}
|
408
|
+
// no child, but HTML might need a closing tag.
|
409
|
+
if (asHtml || noEmpty) {
|
410
|
+
if (!isEmpty(name) && noEmpty) {
|
411
|
+
buffer.append("</" + name + ">");
|
412
|
+
}
|
413
|
+
}
|
414
|
+
if (needBreakInClosing()) {
|
415
|
+
indentation.pop();
|
416
|
+
buffer.append("\n");
|
417
|
+
}
|
418
|
+
}
|
419
|
+
|
420
|
+
private boolean needIndentInClosing(Element element) {
|
421
|
+
if (fragment) return false; // a given option might be fragment and format. fragment matters
|
422
|
+
if (format) return true;
|
423
|
+
if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
|
424
|
+
return false;
|
425
|
+
}
|
426
|
+
|
427
|
+
private boolean needBreakInClosing() {
|
428
|
+
if (fragment) return false;
|
429
|
+
if (format || asBuilder) return true;
|
430
|
+
return false;
|
431
|
+
}
|
432
|
+
|
433
|
+
public boolean enter(Entity entity) {
|
434
|
+
String name = entity.getNodeName();
|
435
|
+
String pubId = entity.getPublicId();
|
436
|
+
String sysId = entity.getSystemId();
|
437
|
+
String notation = entity.getNotationName();
|
438
|
+
buffer.append("<!ENTITY ");
|
439
|
+
buffer.append(name);
|
440
|
+
if (pubId != null) {
|
441
|
+
buffer.append(" PUBLIC \"");
|
442
|
+
buffer.append(pubId);
|
443
|
+
buffer.append("\"");
|
444
|
+
}
|
445
|
+
if (sysId != null) {
|
446
|
+
buffer.append(" SYSTEM \"");
|
447
|
+
buffer.append(sysId);
|
448
|
+
buffer.append("\"");
|
449
|
+
}
|
450
|
+
if (notation != null) {
|
451
|
+
buffer.append(" NDATA ");
|
452
|
+
buffer.append(notation);
|
453
|
+
}
|
454
|
+
buffer.append(">");
|
455
|
+
return true;
|
456
|
+
}
|
457
|
+
|
458
|
+
public void leave(Entity entity) {
|
459
|
+
// no-op
|
460
|
+
}
|
461
|
+
|
462
|
+
public boolean enter(EntityReference entityRef) {
|
463
|
+
// no-op?
|
464
|
+
return true;
|
465
|
+
}
|
466
|
+
|
467
|
+
public void leave(EntityReference entityRef) {
|
468
|
+
// no-op
|
469
|
+
}
|
470
|
+
|
471
|
+
public boolean enter(Notation notation) {
|
472
|
+
String name = notation.getNodeName();
|
473
|
+
String pubId = notation.getPublicId();
|
474
|
+
String sysId = notation.getSystemId();
|
475
|
+
buffer.append("<!NOTATION ");
|
476
|
+
buffer.append(name);
|
477
|
+
if (pubId != null) {
|
478
|
+
buffer.append(" PUBLIC \"");
|
479
|
+
buffer.append(pubId);
|
480
|
+
buffer.append("\"");
|
481
|
+
if (sysId != null) {
|
482
|
+
buffer.append(" \"");
|
483
|
+
buffer.append(sysId);
|
484
|
+
buffer.append("\"");
|
485
|
+
}
|
486
|
+
} else if (sysId != null) {
|
487
|
+
buffer.append(" SYSTEM \"");
|
488
|
+
buffer.append(sysId);
|
489
|
+
buffer.append("\"");
|
490
|
+
}
|
491
|
+
buffer.append(">");
|
492
|
+
return true;
|
493
|
+
}
|
494
|
+
|
495
|
+
public void leave(Notation notation) {
|
496
|
+
// no-op
|
497
|
+
}
|
498
|
+
|
499
|
+
public boolean enter(ProcessingInstruction pi) {
|
500
|
+
buffer.append("<?");
|
501
|
+
buffer.append(pi.getTarget());
|
502
|
+
buffer.append(" ");
|
503
|
+
buffer.append(pi.getData());
|
504
|
+
if (asHtml) buffer.append(">");
|
505
|
+
else buffer.append("?>");
|
506
|
+
buffer.append("\n");
|
507
|
+
return true;
|
508
|
+
}
|
509
|
+
|
510
|
+
public void leave(ProcessingInstruction pi) {
|
511
|
+
// no-op
|
512
|
+
}
|
513
|
+
|
514
|
+
private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
|
515
|
+
public boolean enter(Text text) {
|
516
|
+
String textContent = text.getNodeValue();
|
517
|
+
if (needIndentText() && "".equals(textContent.trim())) return true;
|
518
|
+
if (needIndentText()) {
|
519
|
+
String current = indentation.peek();
|
520
|
+
buffer.append(current);
|
521
|
+
indentation.push(current + indentString);
|
522
|
+
if (textContent.charAt(0) == lineSeparator) textContent = textContent.substring(1);
|
523
|
+
}
|
524
|
+
if (isNotXmlEscaped(textContent)) {
|
525
|
+
textContent = encodeJavaString(textContent);
|
526
|
+
}
|
527
|
+
if (getEncoding(text) == null) {
|
528
|
+
textContent = encodeStringToHtmlEntity(textContent);
|
529
|
+
}
|
530
|
+
buffer.append(textContent);
|
531
|
+
return true;
|
532
|
+
}
|
533
|
+
|
534
|
+
private boolean needIndentText() {
|
535
|
+
if (fragment) return false;
|
536
|
+
if (format) return true;
|
537
|
+
return false;
|
538
|
+
}
|
539
|
+
|
540
|
+
public void leave(Text text) {
|
541
|
+
String textContent = text.getNodeValue();
|
542
|
+
if (needIndentText() && !"".equals(textContent.trim())) {
|
543
|
+
indentation.pop();
|
544
|
+
if (textContent.charAt(textContent.length()-1) != lineSeparator) {
|
545
|
+
buffer.append("\n");
|
546
|
+
}
|
547
|
+
}
|
548
|
+
}
|
549
|
+
|
550
|
+
private String getEncoding(Text text) {
|
551
|
+
if (encoding != null) return encoding;
|
552
|
+
encoding = text.getOwnerDocument().getInputEncoding();
|
553
|
+
return encoding;
|
554
|
+
}
|
555
|
+
|
556
|
+
private String encodeStringToHtmlEntity(String text) {
|
557
|
+
int last = 126; // = U+007E. No need to encode under U+007E.
|
558
|
+
StringBuffer sb = new StringBuffer();
|
559
|
+
for (int i=0; i<text.length(); i++) {
|
560
|
+
int codePoint = text.codePointAt(i);
|
561
|
+
if (codePoint > last) sb.append("&#x" + Integer.toHexString(codePoint) + ";");
|
562
|
+
else sb.append(text.charAt(i));
|
563
|
+
}
|
564
|
+
return new String(sb);
|
565
|
+
}
|
566
|
+
|
567
|
+
}
|