nokogiri 1.5.0.beta.4-java → 1.5.0-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (87) hide show
  1. data/.gemtest +0 -0
  2. data/CHANGELOG.ja.rdoc +34 -0
  3. data/CHANGELOG.rdoc +40 -1
  4. data/Manifest.txt +11 -2
  5. data/README.rdoc +1 -1
  6. data/Rakefile +96 -105
  7. data/bin/nokogiri +1 -2
  8. data/ext/java/nokogiri/HtmlDocument.java +1 -31
  9. data/ext/java/nokogiri/HtmlSaxParserContext.java +1 -1
  10. data/ext/java/nokogiri/NokogiriService.java +77 -22
  11. data/ext/java/nokogiri/XmlAttr.java +5 -16
  12. data/ext/java/nokogiri/XmlCdata.java +4 -11
  13. data/ext/java/nokogiri/XmlComment.java +5 -5
  14. data/ext/java/nokogiri/XmlDocument.java +49 -59
  15. data/ext/java/nokogiri/XmlDocumentFragment.java +14 -8
  16. data/ext/java/nokogiri/XmlDtd.java +45 -43
  17. data/ext/java/nokogiri/XmlElement.java +19 -46
  18. data/ext/java/nokogiri/XmlElementDecl.java +9 -5
  19. data/ext/java/nokogiri/XmlEntityReference.java +24 -2
  20. data/ext/java/nokogiri/XmlNamespace.java +89 -34
  21. data/ext/java/nokogiri/XmlNode.java +31 -52
  22. data/ext/java/nokogiri/XmlNodeSet.java +42 -86
  23. data/ext/java/nokogiri/XmlProcessingInstruction.java +15 -19
  24. data/ext/java/nokogiri/XmlReader.java +40 -43
  25. data/ext/java/nokogiri/XmlSaxParserContext.java +2 -2
  26. data/ext/java/nokogiri/XmlSchema.java +14 -9
  27. data/ext/java/nokogiri/XmlText.java +18 -35
  28. data/ext/java/nokogiri/XmlXpathContext.java +43 -23
  29. data/ext/java/nokogiri/XsltStylesheet.java +17 -3
  30. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -4
  31. data/ext/java/nokogiri/internals/NokogiriHelpers.java +77 -20
  32. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +13 -17
  33. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +13 -1
  34. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +23 -8
  35. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +20 -3
  36. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +67 -0
  37. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +86 -0
  38. data/ext/java/nokogiri/internals/ParserContext.java +25 -27
  39. data/ext/java/nokogiri/internals/ReaderNode.java +58 -1
  40. data/ext/java/nokogiri/internals/SaveContextVisitor.java +567 -0
  41. data/ext/java/nokogiri/internals/XmlDomParser.java +1 -2
  42. data/ext/java/nokogiri/internals/XmlDomParserContext.java +6 -0
  43. data/ext/nokogiri/nokogiri.c +24 -1
  44. data/ext/nokogiri/xml_io.c +32 -7
  45. data/ext/nokogiri/xml_node.c +14 -13
  46. data/ext/nokogiri/xml_sax_parser.c +9 -4
  47. data/ext/nokogiri/xslt_stylesheet.c +7 -1
  48. data/lib/nokogiri.rb +3 -22
  49. data/lib/nokogiri/css.rb +4 -0
  50. data/lib/nokogiri/html/document.rb +10 -14
  51. data/lib/nokogiri/nokogiri.jar +0 -0
  52. data/lib/nokogiri/version.rb +76 -23
  53. data/lib/nokogiri/xml/builder.rb +7 -0
  54. data/lib/nokogiri/xml/document.rb +17 -1
  55. data/lib/nokogiri/xml/document_fragment.rb +14 -0
  56. data/lib/nokogiri/xml/node.rb +36 -28
  57. data/lib/nokogiri/xml/node/save_options.rb +17 -1
  58. data/lib/nokogiri/xml/node_set.rb +7 -0
  59. data/lib/nokogiri/xml/parse_options.rb +8 -0
  60. data/lib/nokogiri/xml/reader.rb +6 -6
  61. data/lib/nokogiri/xml/schema.rb +7 -1
  62. data/lib/xercesImpl.jar +0 -0
  63. data/nokogiri_help_responses.md +40 -0
  64. data/tasks/cross_compile.rb +134 -159
  65. data/tasks/nokogiri.org.rb +18 -0
  66. data/tasks/test.rb +1 -1
  67. data/test/files/encoding.html +82 -0
  68. data/test/files/encoding.xhtml +84 -0
  69. data/test/files/metacharset.html +10 -0
  70. data/test/files/noencoding.html +47 -0
  71. data/test/helper.rb +2 -0
  72. data/test/html/test_document.rb +15 -0
  73. data/test/html/test_document_encoding.rb +13 -0
  74. data/test/test_memory_leak.rb +20 -0
  75. data/test/test_reader.rb +22 -0
  76. data/test/test_xslt_transforms.rb +6 -2
  77. data/test/xml/node/test_save_options.rb +10 -2
  78. data/test/xml/test_builder.rb +17 -0
  79. data/test/xml/test_document.rb +22 -0
  80. data/test/xml/test_node.rb +19 -1
  81. data/test/xml/test_node_reparenting.rb +16 -3
  82. data/test/xml/test_node_set.rb +34 -0
  83. data/test/xml/test_schema.rb +5 -0
  84. data/test/xslt/test_exception_handling.rb +37 -0
  85. metadata +141 -107
  86. data/deps.rip +0 -5
  87. data/ext/java/nokogiri/internals/SaveContext.java +0 -288
@@ -33,11 +33,11 @@
33
33
  package nokogiri.internals;
34
34
 
35
35
  import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
36
+ import static nokogiri.internals.NokogiriHelpers.adjustSystemIdIfNecessary;
36
37
  import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
37
38
 
38
39
  import java.io.ByteArrayInputStream;
39
40
  import java.io.File;
40
- import java.io.FileInputStream;
41
41
  import java.io.IOException;
42
42
  import java.io.InputStream;
43
43
 
@@ -61,6 +61,7 @@ import org.xml.sax.ext.EntityResolver2;
61
61
  * Ruby objects to InputSource objects.
62
62
  *
63
63
  * @author Patrick Mahoney <pat@polycrystal.org>
64
+ * @author Yoko Harada <yokolet@gmail.com>
64
65
  */
65
66
  public class ParserContext extends RubyObject {
66
67
  protected InputSource source = null;
@@ -69,24 +70,14 @@ public class ParserContext extends RubyObject {
69
70
  * Create a file base input source taking into account the current
70
71
  * directory of <code>runtime</code>.
71
72
  */
72
- public static InputSource resolveEntity(Ruby runtime,
73
- String publicId,
74
- String baseURI,
75
- String systemId)
73
+ public static InputSource resolveEntity(Ruby runtime, String publicId, String baseURI, String systemId)
76
74
  throws IOException {
77
- String path;
78
-
79
- if ((new File(systemId)).isAbsolute()) {
80
- path = systemId;
81
- } else if (baseURI != null) {
82
- path = (new File(baseURI, systemId)).getAbsolutePath();
83
- } else {
84
- String rubyDir = runtime.getCurrentDirectory();
85
- path = (new File(rubyDir, systemId)).getAbsolutePath();
75
+ InputSource s = new InputSource();
76
+ String adjusted = adjustSystemIdIfNecessary(runtime.getCurrentDirectory(), runtime.getInstanceConfig().getScriptFileName(), baseURI, systemId);
77
+ if (adjusted == null && publicId == null) {
78
+ throw runtime.newRuntimeError("SystemId \"" + systemId + "\" is not correct.");
86
79
  }
87
-
88
- InputSource s = new InputSource(new FileInputStream(path));
89
- s.setSystemId(systemId);
80
+ s.setSystemId(adjusted);
90
81
  s.setPublicId(publicId);
91
82
  return s;
92
83
  }
@@ -105,12 +96,17 @@ public class ParserContext extends RubyObject {
105
96
  }
106
97
 
107
98
  /**
108
- * Set the InputSource from <code>data</code> which may be an IO
109
- * object, a String, or a StringIO.
99
+ * Set the InputSource from <code>url</code> or <code>data</code>,
100
+ * which may be an IO object, a String, or a StringIO.
110
101
  */
111
- public void setInputSource(ThreadContext context,
112
- IRubyObject data) {
102
+ public void setInputSource(ThreadContext context, IRubyObject data, IRubyObject url) {
113
103
  Ruby ruby = context.getRuntime();
104
+ String path = (String) url.toJava(String.class);
105
+ if (isAbsolutePath(path)) {
106
+ source = new InputSource();
107
+ source.setSystemId(path);
108
+ return;
109
+ }
114
110
  RubyString stringData = null;
115
111
  if (invoke(context, data, "respond_to?",
116
112
  ruby.newSymbol("to_io").to_sym()).isTrue()) {
@@ -147,6 +143,11 @@ public class ParserContext extends RubyObject {
147
143
  source = new InputSource(new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length()));
148
144
  }
149
145
  }
146
+
147
+ private boolean isAbsolutePath(String url) {
148
+ if (url == null) return false;
149
+ return (new File(url)).isAbsolute();
150
+ }
150
151
 
151
152
  /**
152
153
  * Set the InputSource to read from <code>file</code>, a String filename.
@@ -155,11 +156,9 @@ public class ParserContext extends RubyObject {
155
156
  String filename = rubyStringToString(file);
156
157
 
157
158
  try{
158
- source = resolveEntity(context.getRuntime(),
159
- null, null, filename);
159
+ source = resolveEntity(context.getRuntime(), null, null, filename);
160
160
  } catch (Exception e) {
161
- throw RaiseException
162
- .createNativeRaiseException(context.getRuntime(), e);
161
+ throw RaiseException.createNativeRaiseException(context.getRuntime(), e);
163
162
  }
164
163
 
165
164
  }
@@ -269,8 +268,7 @@ public class ParserContext extends RubyObject {
269
268
  String baseURI,
270
269
  String systemId)
271
270
  throws SAXException, IOException {
272
- return ParserContext
273
- .resolveEntity(runtime, publicId, baseURI, systemId);
271
+ return ParserContext.resolveEntity(runtime, publicId, baseURI, systemId);
274
272
  }
275
273
 
276
274
  }
@@ -78,7 +78,11 @@ public abstract class ReaderNode {
78
78
  public boolean hasChildren = false;
79
79
  public abstract String getString();
80
80
  private Document document = null;
81
-
81
+
82
+ private static ElementNode elementNode = null;
83
+ private static ClosingNode closingNode = null;
84
+ private static TextNode textNode = null;
85
+
82
86
  public IRubyObject getAttributeByIndex(IRubyObject index){
83
87
  if(index.isNil()) return index;
84
88
 
@@ -246,10 +250,28 @@ public abstract class ReaderNode {
246
250
  return value;
247
251
  }
248
252
  }
253
+
254
+ public static ClosingNode createClosingNode(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
255
+ if (closingNode == null) closingNode = new ClosingNode();
256
+ ClosingNode clone;
257
+ try {
258
+ clone = (ClosingNode) closingNode.clone();
259
+ } catch (CloneNotSupportedException e) {
260
+ clone = new ClosingNode();
261
+ }
262
+ clone.init(ruby, uri, localName, qName, depth, langStack, xmlBaseStack);
263
+ return clone;
264
+ }
249
265
 
250
266
  public static class ClosingNode extends ReaderNode {
267
+
268
+ public ClosingNode() {}
251
269
 
252
270
  public ClosingNode(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
271
+ init(ruby, uri, localName, qName, depth, langStack, xmlBaseStack);
272
+ }
273
+
274
+ public void init(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
253
275
  this.ruby = ruby;
254
276
  nodeType = ReaderNodeType.END_ELEMENT.getValue();
255
277
  this.uri = "".equals(uri) ? null : uri;
@@ -279,10 +301,28 @@ public abstract class ReaderNode {
279
301
  }
280
302
  }
281
303
 
304
+ public static ElementNode createElementNode(Ruby ruby, String uri, String localName, String qName, Attributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
305
+ if (elementNode == null) elementNode = new ElementNode();
306
+ ElementNode clone;
307
+ try {
308
+ clone = (ElementNode) elementNode.clone();
309
+ } catch (CloneNotSupportedException e) {
310
+ clone = new ElementNode();
311
+ }
312
+ clone.init(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
313
+ return clone;
314
+ }
315
+
282
316
  public static class ElementNode extends ReaderNode {
283
317
  private List<String> attributeStrings = new ArrayList<String>();
284
318
 
319
+ public ElementNode() {}
320
+
285
321
  public ElementNode(Ruby ruby, String uri, String localName, String qName, Attributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
322
+ init(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
323
+ }
324
+
325
+ public void init(Ruby ruby, String uri, String localName, String qName, Attributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
286
326
  this.ruby = ruby;
287
327
  this.nodeType = ReaderNodeType.ELEMENT.getValue();
288
328
  this.uri = "".equals(uri) ? null : uri;
@@ -446,10 +486,27 @@ public abstract class ReaderNode {
446
486
  return this.exception;
447
487
  }
448
488
  }
489
+
490
+ public static TextNode createTextNode(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
491
+ if (textNode == null) textNode = new TextNode();
492
+ TextNode clone;
493
+ try {
494
+ clone = (TextNode) textNode.clone();
495
+ } catch (CloneNotSupportedException e) {
496
+ clone = new TextNode();
497
+ }
498
+ clone.init(ruby, content, depth, langStack, xmlBaseStack);
499
+ return clone;
500
+ }
449
501
 
450
502
  public static class TextNode extends ReaderNode {
503
+ public TextNode() {}
451
504
 
452
505
  public TextNode(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
506
+ init(ruby, content, depth, langStack, xmlBaseStack);
507
+ }
508
+
509
+ public void init(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
453
510
  this.ruby = ruby;
454
511
  this.value = content;
455
512
  this.localName = "#text";
@@ -0,0 +1,567 @@
1
+ /**
2
+ * (The MIT License)
3
+ *
4
+ * Copyright (c) 2008 - 2011:
5
+ *
6
+ * * {Aaron Patterson}[http://tenderlovemaking.com]
7
+ * * {Mike Dalessio}[http://mike.daless.io]
8
+ * * {Charles Nutter}[http://blog.headius.com]
9
+ * * {Sergio Arbeo}[http://www.serabe.com]
10
+ * * {Patrick Mahoney}[http://polycrystal.org]
11
+ * * {Yoko Harada}[http://yokolet.blogspot.com]
12
+ *
13
+ * Permission is hereby granted, free of charge, to any person obtaining
14
+ * a copy of this software and associated documentation files (the
15
+ * 'Software'), to deal in the Software without restriction, including
16
+ * without limitation the rights to use, copy, modify, merge, publish,
17
+ * distribute, sublicense, and/or sell copies of the Software, and to
18
+ * permit persons to whom the Software is furnished to do so, subject to
19
+ * the following conditions:
20
+ *
21
+ * The above copyright notice and this permission notice shall be
22
+ * included in all copies or substantial portions of the Software.
23
+ *
24
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
+ */
32
+
33
+ package nokogiri.internals;
34
+
35
+ import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
36
+ import static nokogiri.internals.NokogiriHelpers.isNotXmlEscaped;
37
+
38
+ import java.util.Stack;
39
+
40
+ import org.cyberneko.html.HTMLElements;
41
+ import org.w3c.dom.Attr;
42
+ import org.w3c.dom.CDATASection;
43
+ import org.w3c.dom.Comment;
44
+ import org.w3c.dom.Document;
45
+ import org.w3c.dom.DocumentType;
46
+ import org.w3c.dom.Element;
47
+ import org.w3c.dom.Entity;
48
+ import org.w3c.dom.EntityReference;
49
+ import org.w3c.dom.NamedNodeMap;
50
+ import org.w3c.dom.Node;
51
+ import org.w3c.dom.Notation;
52
+ import org.w3c.dom.ProcessingInstruction;
53
+ import org.w3c.dom.Text;
54
+
55
+ /**
56
+ * A class for serializing a document.
57
+ *
58
+ * @author sergio
59
+ * @author Patrick Mahoney <pat@polycrystal.org>
60
+ * @author Yoko Harada <yokolet@gmail.com>
61
+ */
62
+ public class SaveContextVisitor {
63
+
64
+ private StringBuffer buffer;
65
+ private Stack<String> indentation;
66
+ private String encoding, indentString;
67
+ private boolean format, noDecl, noEmpty, noXhtml, asXhtml, asXml, asHtml, asBuilder, htmlDoc, fragment;
68
+
69
+ /*
70
+ * U can't touch this.
71
+ * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
72
+ *
73
+ * Taken from libxml save options.
74
+ */
75
+
76
+ public static final int FORMAT = 1;
77
+ public static final int NO_DECL = 2;
78
+ public static final int NO_EMPTY = 4;
79
+ public static final int NO_XHTML = 8;
80
+ public static final int AS_XHTML = 16;
81
+ public static final int AS_XML = 32;
82
+ public static final int AS_HTML = 64;
83
+ public static final int AS_BUILDER = 128;
84
+
85
+ public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment) {
86
+ buffer = new StringBuffer();
87
+ this.encoding = encoding;
88
+ indentation = new Stack<String>(); indentation.push("");
89
+ this.htmlDoc = htmlDoc;
90
+ this.fragment = fragment;
91
+ format = (options & FORMAT) == FORMAT;
92
+
93
+ noDecl = (options & NO_DECL) == NO_DECL;
94
+ noEmpty = (options & NO_EMPTY) == NO_EMPTY;
95
+ noXhtml = (options & NO_XHTML) == NO_XHTML;
96
+ asXhtml = (options & AS_XHTML) == AS_XHTML;
97
+ asXml = (options & AS_XML) == AS_XML;
98
+ asHtml = (options & AS_HTML) == AS_HTML;
99
+ asBuilder = (options & AS_BUILDER) == AS_BUILDER;
100
+ if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces
101
+ if ((!format && indent != null) && indent.length() > 0) format = true;
102
+ if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces
103
+ indentString = indent;
104
+ if (!asXml && !asHtml && !asXhtml && !asBuilder) asXml = true;
105
+ }
106
+
107
+ @Override
108
+ public String toString() {
109
+ return (new String(buffer));
110
+ }
111
+
112
+ public void setHtmlDoc(boolean htmlDoc) {
113
+ this.htmlDoc = htmlDoc;
114
+ }
115
+
116
+ public void setEncoding(String encoding) {
117
+ this.encoding = encoding;
118
+ }
119
+
120
+ public boolean enter(Node node) {
121
+ if (node instanceof Document) {
122
+ return enter((Document)node);
123
+ }
124
+ if (node instanceof Element) {
125
+ return enter((Element)node);
126
+ }
127
+ if (node instanceof Attr) {
128
+ return enter((Attr)node);
129
+ }
130
+ if (node instanceof Text) {
131
+ return enter((Text)node);
132
+ }
133
+ if (node instanceof CDATASection) {
134
+ return enter((CDATASection)node);
135
+ }
136
+ if (node instanceof Comment) {
137
+ return enter((Comment)node);
138
+ }
139
+ if (node instanceof DocumentType) {
140
+ return enter((DocumentType)node);
141
+ }
142
+ if (node instanceof Entity) {
143
+ return enter((Entity)node);
144
+ }
145
+ if (node instanceof EntityReference) {
146
+ return enter((EntityReference)node);
147
+ }
148
+ if (node instanceof Notation) {
149
+ return enter((Notation)node);
150
+ }
151
+ if (node instanceof ProcessingInstruction) {
152
+ return enter((ProcessingInstruction)node);
153
+ }
154
+ return false;
155
+ }
156
+
157
+ public void leave(Node node) {
158
+ if (node instanceof Document) {
159
+ leave((Document)node);
160
+ return;
161
+ }
162
+ if (node instanceof Element) {
163
+ leave((Element)node);
164
+ return;
165
+ }
166
+ if (node instanceof Attr) {
167
+ leave((Attr)node);
168
+ return;
169
+ }
170
+ if (node instanceof Text) {
171
+ leave((Text)node);
172
+ return;
173
+ }
174
+ if (node instanceof CDATASection) {
175
+ leave((CDATASection)node);
176
+ return;
177
+ }
178
+ if (node instanceof Comment) {
179
+ leave((Comment)node);
180
+ return;
181
+ }
182
+ if (node instanceof DocumentType) {
183
+ leave((DocumentType)node);
184
+ return;
185
+ }
186
+ if (node instanceof Entity) {
187
+ leave((Entity)node);
188
+ return;
189
+ }
190
+ if (node instanceof EntityReference) {
191
+ leave((EntityReference)node);
192
+ return;
193
+ }
194
+ if (node instanceof Notation) {
195
+ leave((Notation)node);
196
+ return;
197
+ }
198
+ if (node instanceof ProcessingInstruction) {
199
+ leave((ProcessingInstruction)node);
200
+ return;
201
+ }
202
+ }
203
+
204
+ public boolean enter(String string) {
205
+ buffer.append(string);
206
+ return true;
207
+ }
208
+
209
+ public void leave(String string) {
210
+ // no-op
211
+ }
212
+
213
+ public boolean enter(Attr attr) {
214
+ String name = attr.getName();
215
+ buffer.append(name);
216
+ if (!asHtml || !isHtmlBooleanAttr(name)) {
217
+ buffer.append("=");
218
+ buffer.append("\"");
219
+ buffer.append(serializeAttrTextContent(attr.getValue(), htmlDoc));
220
+ buffer.append("\"");
221
+ }
222
+ return true;
223
+ }
224
+
225
+ public static final String[] HTML_BOOLEAN_ATTRS = {
226
+ "checked", "compact", "declare", "defer", "disabled", "ismap",
227
+ "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
228
+ "selected"
229
+ };
230
+
231
+ private boolean isHtmlBooleanAttr(String name) {
232
+ for (String s : HTML_BOOLEAN_ATTRS) {
233
+ if (s.equals(name)) return true;
234
+ }
235
+ return false;
236
+ }
237
+
238
+ private String serializeAttrTextContent(String s, boolean htmlDoc) {
239
+ if (s == null) return "";
240
+
241
+ char[] c = s.toCharArray();
242
+ StringBuffer buffer = new StringBuffer(c.length);
243
+
244
+ for(int i = 0; i < c.length; i++) {
245
+ switch(c[i]){
246
+ case '\n': buffer.append("&#10;"); break;
247
+ case '\r': buffer.append("&#13;"); break;
248
+ case '\t': buffer.append("&#9;"); break;
249
+ case '"': if (htmlDoc) buffer.append("%22");
250
+ else buffer.append("&quot;");
251
+ break;
252
+ case '<': buffer.append("&lt;"); break;
253
+ case '>': buffer.append("&gt;"); break;
254
+ case '&': buffer.append("&amp;"); break;
255
+ default: buffer.append(c[i]);
256
+ }
257
+ }
258
+
259
+ return buffer.toString();
260
+ }
261
+
262
+ public void leave(Attr attr) {
263
+ // no-op
264
+ }
265
+
266
+ public boolean enter(CDATASection cdata) {
267
+ buffer.append("<![CDATA[");
268
+ buffer.append(cdata.getData());
269
+ buffer.append("]]>");
270
+ return true;
271
+ }
272
+
273
+ public void leave(CDATASection cdata) {
274
+ // no-op
275
+ }
276
+
277
+ public boolean enter(Comment comment) {
278
+ buffer.append("<!--");
279
+ buffer.append(comment.getData());
280
+ buffer.append("-->");
281
+ return true;
282
+ }
283
+
284
+ public void leave(Comment comment) {
285
+ // no-op
286
+ }
287
+
288
+ public boolean enter(Document document) {
289
+ if (!noDecl) {
290
+ buffer.append("<?xml version=\"");
291
+ buffer.append(document.getXmlVersion());
292
+ buffer.append("\"");
293
+
294
+ if (encoding != null) {
295
+ buffer.append(" encoding=\"");
296
+ buffer.append(encoding.toUpperCase());
297
+ buffer.append("\"");
298
+ }
299
+ buffer.append("?>\n");
300
+ }
301
+ return true;
302
+ }
303
+
304
+ public void leave(Document document) {
305
+ // no-op
306
+ }
307
+
308
+ public boolean enter(DocumentType docType) {
309
+ String name = docType.getName();
310
+ String pubId = docType.getPublicId();
311
+ String sysId = docType.getSystemId();
312
+ String internalSubset = docType.getInternalSubset();
313
+ if (docType.getPreviousSibling() != null) {
314
+ buffer.append("\n");
315
+ }
316
+ buffer.append("<!DOCTYPE " + name + " ");
317
+ if (pubId != null) {
318
+ buffer.append("PUBLIC \"" + pubId + "\"");
319
+ if (sysId != null) buffer.append(" \"" + sysId + "\"");
320
+ } else if (sysId != null) {
321
+ buffer.append("SYSTEM \"" + sysId + "\"");
322
+ }
323
+ if (internalSubset != null) {
324
+ buffer.append(" [");
325
+ buffer.append(internalSubset);
326
+ buffer.append("]");
327
+ }
328
+ buffer.append(">\n");
329
+ return true;
330
+ }
331
+
332
+ public void leave(DocumentType docType) {
333
+ // no-op
334
+ }
335
+
336
+ public boolean enter(Element element) {
337
+ String current = indentation.peek();
338
+ buffer.append(current);
339
+ if (needIndent()) {
340
+ indentation.push(current + indentString);
341
+ }
342
+ String name = element.getTagName();
343
+ buffer.append("<" + name);
344
+ NamedNodeMap attrs = element.getAttributes();
345
+ for (int i=0; i<attrs.getLength(); i++) {
346
+ Attr attr = (Attr) attrs.item(i);
347
+ if (attr.getSpecified()) {
348
+ buffer.append(" ");
349
+ enter(attr);
350
+ leave(attr);
351
+ }
352
+ }
353
+ if (element.hasChildNodes()) {
354
+ buffer.append(">");
355
+ if (needBreakInOpening(element)) buffer.append("\n");
356
+ return true;
357
+ }
358
+ // no child
359
+ if (asHtml) {
360
+ buffer.append(">");
361
+ } else if (asXhtml) {
362
+ buffer.append(" />");
363
+ } else if (asXml && noEmpty) {
364
+ buffer.append(">");
365
+ } else {
366
+ buffer.append("/>");
367
+ }
368
+ if (needBreakInOpening(element)) {
369
+ buffer.append("\n");
370
+ }
371
+ return true;
372
+ }
373
+
374
+ private boolean needIndent() {
375
+ if (fragment) return false; // a given option might be fragment and format. fragment matters
376
+ if (format || asBuilder) return true;
377
+ return false;
378
+ }
379
+
380
+ private boolean needBreakInOpening(Element element) {
381
+ if (fragment) return false;
382
+ if (format) return true;
383
+ if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
384
+ if (format && element.getNextSibling() == null && element.hasChildNodes()) return true;
385
+ return false;
386
+ }
387
+
388
+ private boolean isEmpty(String name) {
389
+ HTMLElements.Element element = HTMLElements.getElement(name);
390
+ return element.isEmpty();
391
+ }
392
+
393
+ public void leave(Element element) {
394
+ String name = element.getTagName();
395
+ if (element.hasChildNodes()) {
396
+ if (needIndentInClosing(element)) {
397
+ indentation.pop();
398
+ buffer.append(indentation.peek());
399
+ } else if (asBuilder) {
400
+ indentation.pop();
401
+ }
402
+ buffer.append("</" + name + ">");
403
+ if (needBreakInClosing()) {
404
+ buffer.append("\n");
405
+ }
406
+ return;
407
+ }
408
+ // no child, but HTML might need a closing tag.
409
+ if (asHtml || noEmpty) {
410
+ if (!isEmpty(name) && noEmpty) {
411
+ buffer.append("</" + name + ">");
412
+ }
413
+ }
414
+ if (needBreakInClosing()) {
415
+ indentation.pop();
416
+ buffer.append("\n");
417
+ }
418
+ }
419
+
420
+ private boolean needIndentInClosing(Element element) {
421
+ if (fragment) return false; // a given option might be fragment and format. fragment matters
422
+ if (format) return true;
423
+ if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
424
+ return false;
425
+ }
426
+
427
+ private boolean needBreakInClosing() {
428
+ if (fragment) return false;
429
+ if (format || asBuilder) return true;
430
+ return false;
431
+ }
432
+
433
+ public boolean enter(Entity entity) {
434
+ String name = entity.getNodeName();
435
+ String pubId = entity.getPublicId();
436
+ String sysId = entity.getSystemId();
437
+ String notation = entity.getNotationName();
438
+ buffer.append("<!ENTITY ");
439
+ buffer.append(name);
440
+ if (pubId != null) {
441
+ buffer.append(" PUBLIC \"");
442
+ buffer.append(pubId);
443
+ buffer.append("\"");
444
+ }
445
+ if (sysId != null) {
446
+ buffer.append(" SYSTEM \"");
447
+ buffer.append(sysId);
448
+ buffer.append("\"");
449
+ }
450
+ if (notation != null) {
451
+ buffer.append(" NDATA ");
452
+ buffer.append(notation);
453
+ }
454
+ buffer.append(">");
455
+ return true;
456
+ }
457
+
458
+ public void leave(Entity entity) {
459
+ // no-op
460
+ }
461
+
462
+ public boolean enter(EntityReference entityRef) {
463
+ // no-op?
464
+ return true;
465
+ }
466
+
467
+ public void leave(EntityReference entityRef) {
468
+ // no-op
469
+ }
470
+
471
+ public boolean enter(Notation notation) {
472
+ String name = notation.getNodeName();
473
+ String pubId = notation.getPublicId();
474
+ String sysId = notation.getSystemId();
475
+ buffer.append("<!NOTATION ");
476
+ buffer.append(name);
477
+ if (pubId != null) {
478
+ buffer.append(" PUBLIC \"");
479
+ buffer.append(pubId);
480
+ buffer.append("\"");
481
+ if (sysId != null) {
482
+ buffer.append(" \"");
483
+ buffer.append(sysId);
484
+ buffer.append("\"");
485
+ }
486
+ } else if (sysId != null) {
487
+ buffer.append(" SYSTEM \"");
488
+ buffer.append(sysId);
489
+ buffer.append("\"");
490
+ }
491
+ buffer.append(">");
492
+ return true;
493
+ }
494
+
495
+ public void leave(Notation notation) {
496
+ // no-op
497
+ }
498
+
499
+ public boolean enter(ProcessingInstruction pi) {
500
+ buffer.append("<?");
501
+ buffer.append(pi.getTarget());
502
+ buffer.append(" ");
503
+ buffer.append(pi.getData());
504
+ if (asHtml) buffer.append(">");
505
+ else buffer.append("?>");
506
+ buffer.append("\n");
507
+ return true;
508
+ }
509
+
510
+ public void leave(ProcessingInstruction pi) {
511
+ // no-op
512
+ }
513
+
514
+ private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
515
+ public boolean enter(Text text) {
516
+ String textContent = text.getNodeValue();
517
+ if (needIndentText() && "".equals(textContent.trim())) return true;
518
+ if (needIndentText()) {
519
+ String current = indentation.peek();
520
+ buffer.append(current);
521
+ indentation.push(current + indentString);
522
+ if (textContent.charAt(0) == lineSeparator) textContent = textContent.substring(1);
523
+ }
524
+ if (isNotXmlEscaped(textContent)) {
525
+ textContent = encodeJavaString(textContent);
526
+ }
527
+ if (getEncoding(text) == null) {
528
+ textContent = encodeStringToHtmlEntity(textContent);
529
+ }
530
+ buffer.append(textContent);
531
+ return true;
532
+ }
533
+
534
+ private boolean needIndentText() {
535
+ if (fragment) return false;
536
+ if (format) return true;
537
+ return false;
538
+ }
539
+
540
+ public void leave(Text text) {
541
+ String textContent = text.getNodeValue();
542
+ if (needIndentText() && !"".equals(textContent.trim())) {
543
+ indentation.pop();
544
+ if (textContent.charAt(textContent.length()-1) != lineSeparator) {
545
+ buffer.append("\n");
546
+ }
547
+ }
548
+ }
549
+
550
+ private String getEncoding(Text text) {
551
+ if (encoding != null) return encoding;
552
+ encoding = text.getOwnerDocument().getInputEncoding();
553
+ return encoding;
554
+ }
555
+
556
+ private String encodeStringToHtmlEntity(String text) {
557
+ int last = 126; // = U+007E. No need to encode under U+007E.
558
+ StringBuffer sb = new StringBuffer();
559
+ for (int i=0; i<text.length(); i++) {
560
+ int codePoint = text.codePointAt(i);
561
+ if (codePoint > last) sb.append("&#x" + Integer.toHexString(codePoint) + ";");
562
+ else sb.append(text.charAt(i));
563
+ }
564
+ return new String(sb);
565
+ }
566
+
567
+ }