nokogiri-maven 1.5.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (278) hide show
  1. data/CHANGELOG.ja.rdoc +544 -0
  2. data/CHANGELOG.rdoc +532 -0
  3. data/Manifest.txt +283 -0
  4. data/README.ja.rdoc +106 -0
  5. data/README.rdoc +174 -0
  6. data/Rakefile +164 -0
  7. data/bin/nokogiri +53 -0
  8. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  9. data/ext/java/nokogiri/HtmlDocument.java +119 -0
  10. data/ext/java/nokogiri/HtmlElementDescription.java +145 -0
  11. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  12. data/ext/java/nokogiri/HtmlSaxParserContext.java +259 -0
  13. data/ext/java/nokogiri/NokogiriService.java +590 -0
  14. data/ext/java/nokogiri/XmlAttr.java +180 -0
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +130 -0
  16. data/ext/java/nokogiri/XmlCdata.java +84 -0
  17. data/ext/java/nokogiri/XmlComment.java +86 -0
  18. data/ext/java/nokogiri/XmlDocument.java +519 -0
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +223 -0
  20. data/ext/java/nokogiri/XmlDtd.java +469 -0
  21. data/ext/java/nokogiri/XmlElement.java +195 -0
  22. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  23. data/ext/java/nokogiri/XmlElementDecl.java +152 -0
  24. data/ext/java/nokogiri/XmlEntityDecl.java +162 -0
  25. data/ext/java/nokogiri/XmlEntityReference.java +97 -0
  26. data/ext/java/nokogiri/XmlNamespace.java +183 -0
  27. data/ext/java/nokogiri/XmlNode.java +1378 -0
  28. data/ext/java/nokogiri/XmlNodeSet.java +267 -0
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +99 -0
  30. data/ext/java/nokogiri/XmlReader.java +408 -0
  31. data/ext/java/nokogiri/XmlRelaxng.java +144 -0
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +367 -0
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +184 -0
  34. data/ext/java/nokogiri/XmlSchema.java +324 -0
  35. data/ext/java/nokogiri/XmlSyntaxError.java +119 -0
  36. data/ext/java/nokogiri/XmlText.java +119 -0
  37. data/ext/java/nokogiri/XmlXpathContext.java +199 -0
  38. data/ext/java/nokogiri/XsltStylesheet.java +197 -0
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +204 -0
  40. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +73 -0
  41. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +86 -0
  42. data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
  43. data/ext/java/nokogiri/internals/NokogiriHelpers.java +639 -0
  44. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +167 -0
  45. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +130 -0
  46. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +74 -0
  47. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  48. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +79 -0
  49. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +141 -0
  50. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +73 -0
  51. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +67 -0
  52. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +86 -0
  53. data/ext/java/nokogiri/internals/ParserContext.java +276 -0
  54. data/ext/java/nokogiri/internals/PushInputStream.java +411 -0
  55. data/ext/java/nokogiri/internals/ReaderNode.java +531 -0
  56. data/ext/java/nokogiri/internals/SaveContextVisitor.java +567 -0
  57. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +76 -0
  58. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  59. data/ext/java/nokogiri/internals/XmlDomParser.java +76 -0
  60. data/ext/java/nokogiri/internals/XmlDomParserContext.java +244 -0
  61. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  62. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
  63. data/ext/nokogiri/depend +358 -0
  64. data/ext/nokogiri/extconf.rb +124 -0
  65. data/ext/nokogiri/html_document.c +154 -0
  66. data/ext/nokogiri/html_document.h +10 -0
  67. data/ext/nokogiri/html_element_description.c +276 -0
  68. data/ext/nokogiri/html_element_description.h +10 -0
  69. data/ext/nokogiri/html_entity_lookup.c +32 -0
  70. data/ext/nokogiri/html_entity_lookup.h +8 -0
  71. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  72. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  73. data/ext/nokogiri/nokogiri.c +115 -0
  74. data/ext/nokogiri/nokogiri.h +160 -0
  75. data/ext/nokogiri/xml_attr.c +94 -0
  76. data/ext/nokogiri/xml_attr.h +9 -0
  77. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  78. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  79. data/ext/nokogiri/xml_cdata.c +56 -0
  80. data/ext/nokogiri/xml_cdata.h +9 -0
  81. data/ext/nokogiri/xml_comment.c +54 -0
  82. data/ext/nokogiri/xml_comment.h +9 -0
  83. data/ext/nokogiri/xml_document.c +478 -0
  84. data/ext/nokogiri/xml_document.h +23 -0
  85. data/ext/nokogiri/xml_document_fragment.c +48 -0
  86. data/ext/nokogiri/xml_document_fragment.h +10 -0
  87. data/ext/nokogiri/xml_dtd.c +202 -0
  88. data/ext/nokogiri/xml_dtd.h +10 -0
  89. data/ext/nokogiri/xml_element_content.c +123 -0
  90. data/ext/nokogiri/xml_element_content.h +10 -0
  91. data/ext/nokogiri/xml_element_decl.c +69 -0
  92. data/ext/nokogiri/xml_element_decl.h +9 -0
  93. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  94. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  95. data/ext/nokogiri/xml_entity_decl.c +110 -0
  96. data/ext/nokogiri/xml_entity_decl.h +10 -0
  97. data/ext/nokogiri/xml_entity_reference.c +52 -0
  98. data/ext/nokogiri/xml_entity_reference.h +9 -0
  99. data/ext/nokogiri/xml_io.c +56 -0
  100. data/ext/nokogiri/xml_io.h +11 -0
  101. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  102. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  103. data/ext/nokogiri/xml_namespace.c +84 -0
  104. data/ext/nokogiri/xml_namespace.h +13 -0
  105. data/ext/nokogiri/xml_node.c +1385 -0
  106. data/ext/nokogiri/xml_node.h +13 -0
  107. data/ext/nokogiri/xml_node_set.c +418 -0
  108. data/ext/nokogiri/xml_node_set.h +9 -0
  109. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  110. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  111. data/ext/nokogiri/xml_reader.c +684 -0
  112. data/ext/nokogiri/xml_reader.h +10 -0
  113. data/ext/nokogiri/xml_relax_ng.c +161 -0
  114. data/ext/nokogiri/xml_relax_ng.h +9 -0
  115. data/ext/nokogiri/xml_sax_parser.c +293 -0
  116. data/ext/nokogiri/xml_sax_parser.h +39 -0
  117. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  118. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  119. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  120. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  121. data/ext/nokogiri/xml_schema.c +205 -0
  122. data/ext/nokogiri/xml_schema.h +9 -0
  123. data/ext/nokogiri/xml_syntax_error.c +58 -0
  124. data/ext/nokogiri/xml_syntax_error.h +13 -0
  125. data/ext/nokogiri/xml_text.c +50 -0
  126. data/ext/nokogiri/xml_text.h +9 -0
  127. data/ext/nokogiri/xml_xpath_context.c +309 -0
  128. data/ext/nokogiri/xml_xpath_context.h +9 -0
  129. data/ext/nokogiri/xslt_stylesheet.c +264 -0
  130. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  131. data/lib/nokogiri.rb +127 -0
  132. data/lib/nokogiri/css.rb +27 -0
  133. data/lib/nokogiri/css/node.rb +99 -0
  134. data/lib/nokogiri/css/parser.rb +677 -0
  135. data/lib/nokogiri/css/parser.y +237 -0
  136. data/lib/nokogiri/css/parser_extras.rb +91 -0
  137. data/lib/nokogiri/css/syntax_error.rb +7 -0
  138. data/lib/nokogiri/css/tokenizer.rb +152 -0
  139. data/lib/nokogiri/css/tokenizer.rex +55 -0
  140. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  141. data/lib/nokogiri/decorators/slop.rb +35 -0
  142. data/lib/nokogiri/html.rb +36 -0
  143. data/lib/nokogiri/html/builder.rb +35 -0
  144. data/lib/nokogiri/html/document.rb +213 -0
  145. data/lib/nokogiri/html/document_fragment.rb +41 -0
  146. data/lib/nokogiri/html/element_description.rb +23 -0
  147. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  148. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  149. data/lib/nokogiri/html/sax/parser.rb +52 -0
  150. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  151. data/lib/nokogiri/nokogiri.jar +0 -0
  152. data/lib/nokogiri/syntax_error.rb +4 -0
  153. data/lib/nokogiri/version.rb +88 -0
  154. data/lib/nokogiri/xml.rb +67 -0
  155. data/lib/nokogiri/xml/attr.rb +14 -0
  156. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  157. data/lib/nokogiri/xml/builder.rb +425 -0
  158. data/lib/nokogiri/xml/cdata.rb +11 -0
  159. data/lib/nokogiri/xml/character_data.rb +7 -0
  160. data/lib/nokogiri/xml/document.rb +234 -0
  161. data/lib/nokogiri/xml/document_fragment.rb +98 -0
  162. data/lib/nokogiri/xml/dtd.rb +22 -0
  163. data/lib/nokogiri/xml/element_content.rb +36 -0
  164. data/lib/nokogiri/xml/element_decl.rb +13 -0
  165. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  166. data/lib/nokogiri/xml/namespace.rb +13 -0
  167. data/lib/nokogiri/xml/node.rb +915 -0
  168. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  169. data/lib/nokogiri/xml/node_set.rb +357 -0
  170. data/lib/nokogiri/xml/notation.rb +6 -0
  171. data/lib/nokogiri/xml/parse_options.rb +93 -0
  172. data/lib/nokogiri/xml/pp.rb +2 -0
  173. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  174. data/lib/nokogiri/xml/pp/node.rb +56 -0
  175. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  176. data/lib/nokogiri/xml/reader.rb +112 -0
  177. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  178. data/lib/nokogiri/xml/sax.rb +4 -0
  179. data/lib/nokogiri/xml/sax/document.rb +164 -0
  180. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  181. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  182. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  183. data/lib/nokogiri/xml/schema.rb +63 -0
  184. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  185. data/lib/nokogiri/xml/text.rb +9 -0
  186. data/lib/nokogiri/xml/xpath.rb +10 -0
  187. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  188. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  189. data/lib/nokogiri/xslt.rb +52 -0
  190. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  191. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  192. data/nokogiri_help_responses.md +40 -0
  193. data/tasks/cross_compile.rb +152 -0
  194. data/tasks/nokogiri.org.rb +18 -0
  195. data/tasks/test.rb +94 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +303 -0
  198. data/test/css/test_tokenizer.rb +198 -0
  199. data/test/css/test_xpath_visitor.rb +85 -0
  200. data/test/decorators/test_slop.rb +16 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/encoding.html +82 -0
  207. data/test/files/encoding.xhtml +84 -0
  208. data/test/files/exslt.xml +8 -0
  209. data/test/files/exslt.xslt +35 -0
  210. data/test/files/foo/foo.xsd +4 -0
  211. data/test/files/metacharset.html +10 -0
  212. data/test/files/noencoding.html +47 -0
  213. data/test/files/po.xml +32 -0
  214. data/test/files/po.xsd +66 -0
  215. data/test/files/shift_jis.html +10 -0
  216. data/test/files/shift_jis.xml +5 -0
  217. data/test/files/snuggles.xml +3 -0
  218. data/test/files/staff.dtd +10 -0
  219. data/test/files/staff.xml +59 -0
  220. data/test/files/staff.xslt +32 -0
  221. data/test/files/tlm.html +850 -0
  222. data/test/files/valid_bar.xml +2 -0
  223. data/test/helper.rb +173 -0
  224. data/test/html/sax/test_parser.rb +136 -0
  225. data/test/html/sax/test_parser_context.rb +48 -0
  226. data/test/html/test_builder.rb +164 -0
  227. data/test/html/test_document.rb +472 -0
  228. data/test/html/test_document_encoding.rb +138 -0
  229. data/test/html/test_document_fragment.rb +255 -0
  230. data/test/html/test_element_description.rb +100 -0
  231. data/test/html/test_named_characters.rb +14 -0
  232. data/test/html/test_node.rb +190 -0
  233. data/test/html/test_node_encoding.rb +27 -0
  234. data/test/test_convert_xpath.rb +135 -0
  235. data/test/test_css_cache.rb +45 -0
  236. data/test/test_encoding_handler.rb +46 -0
  237. data/test/test_memory_leak.rb +72 -0
  238. data/test/test_nokogiri.rb +132 -0
  239. data/test/test_reader.rb +425 -0
  240. data/test/test_soap4r_sax.rb +52 -0
  241. data/test/test_xslt_transforms.rb +193 -0
  242. data/test/xml/node/test_save_options.rb +28 -0
  243. data/test/xml/node/test_subclass.rb +44 -0
  244. data/test/xml/sax/test_parser.rb +338 -0
  245. data/test/xml/sax/test_parser_context.rb +113 -0
  246. data/test/xml/sax/test_push_parser.rb +156 -0
  247. data/test/xml/test_attr.rb +65 -0
  248. data/test/xml/test_attribute_decl.rb +86 -0
  249. data/test/xml/test_builder.rb +227 -0
  250. data/test/xml/test_cdata.rb +50 -0
  251. data/test/xml/test_comment.rb +29 -0
  252. data/test/xml/test_document.rb +697 -0
  253. data/test/xml/test_document_encoding.rb +26 -0
  254. data/test/xml/test_document_fragment.rb +192 -0
  255. data/test/xml/test_dtd.rb +107 -0
  256. data/test/xml/test_dtd_encoding.rb +33 -0
  257. data/test/xml/test_element_content.rb +56 -0
  258. data/test/xml/test_element_decl.rb +73 -0
  259. data/test/xml/test_entity_decl.rb +122 -0
  260. data/test/xml/test_entity_reference.rb +21 -0
  261. data/test/xml/test_namespace.rb +70 -0
  262. data/test/xml/test_node.rb +917 -0
  263. data/test/xml/test_node_attributes.rb +34 -0
  264. data/test/xml/test_node_encoding.rb +107 -0
  265. data/test/xml/test_node_reparenting.rb +334 -0
  266. data/test/xml/test_node_set.rb +742 -0
  267. data/test/xml/test_parse_options.rb +52 -0
  268. data/test/xml/test_processing_instruction.rb +30 -0
  269. data/test/xml/test_reader_encoding.rb +126 -0
  270. data/test/xml/test_relax_ng.rb +60 -0
  271. data/test/xml/test_schema.rb +94 -0
  272. data/test/xml/test_syntax_error.rb +12 -0
  273. data/test/xml/test_text.rb +47 -0
  274. data/test/xml/test_unparented_node.rb +381 -0
  275. data/test/xml/test_xpath.rb +237 -0
  276. data/test/xslt/test_custom_functions.rb +94 -0
  277. data/test/xslt/test_exception_handling.rb +37 -0
  278. metadata +552 -0
@@ -0,0 +1,567 @@
1
+ /**
2
+ * (The MIT License)
3
+ *
4
+ * Copyright (c) 2008 - 2011:
5
+ *
6
+ * * {Aaron Patterson}[http://tenderlovemaking.com]
7
+ * * {Mike Dalessio}[http://mike.daless.io]
8
+ * * {Charles Nutter}[http://blog.headius.com]
9
+ * * {Sergio Arbeo}[http://www.serabe.com]
10
+ * * {Patrick Mahoney}[http://polycrystal.org]
11
+ * * {Yoko Harada}[http://yokolet.blogspot.com]
12
+ *
13
+ * Permission is hereby granted, free of charge, to any person obtaining
14
+ * a copy of this software and associated documentation files (the
15
+ * 'Software'), to deal in the Software without restriction, including
16
+ * without limitation the rights to use, copy, modify, merge, publish,
17
+ * distribute, sublicense, and/or sell copies of the Software, and to
18
+ * permit persons to whom the Software is furnished to do so, subject to
19
+ * the following conditions:
20
+ *
21
+ * The above copyright notice and this permission notice shall be
22
+ * included in all copies or substantial portions of the Software.
23
+ *
24
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
+ */
32
+
33
+ package nokogiri.internals;
34
+
35
+ import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
36
+ import static nokogiri.internals.NokogiriHelpers.isNotXmlEscaped;
37
+
38
+ import java.util.Stack;
39
+
40
+ import org.cyberneko.html.HTMLElements;
41
+ import org.w3c.dom.Attr;
42
+ import org.w3c.dom.CDATASection;
43
+ import org.w3c.dom.Comment;
44
+ import org.w3c.dom.Document;
45
+ import org.w3c.dom.DocumentType;
46
+ import org.w3c.dom.Element;
47
+ import org.w3c.dom.Entity;
48
+ import org.w3c.dom.EntityReference;
49
+ import org.w3c.dom.NamedNodeMap;
50
+ import org.w3c.dom.Node;
51
+ import org.w3c.dom.Notation;
52
+ import org.w3c.dom.ProcessingInstruction;
53
+ import org.w3c.dom.Text;
54
+
55
+ /**
56
+ * A class for serializing a document.
57
+ *
58
+ * @author sergio
59
+ * @author Patrick Mahoney <pat@polycrystal.org>
60
+ * @author Yoko Harada <yokolet@gmail.com>
61
+ */
62
+ public class SaveContextVisitor {
63
+
64
+ private StringBuffer buffer;
65
+ private Stack<String> indentation;
66
+ private String encoding, indentString;
67
+ private boolean format, noDecl, noEmpty, noXhtml, asXhtml, asXml, asHtml, asBuilder, htmlDoc, fragment;
68
+
69
+ /*
70
+ * U can't touch this.
71
+ * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
72
+ *
73
+ * Taken from libxml save options.
74
+ */
75
+
76
+ public static final int FORMAT = 1;
77
+ public static final int NO_DECL = 2;
78
+ public static final int NO_EMPTY = 4;
79
+ public static final int NO_XHTML = 8;
80
+ public static final int AS_XHTML = 16;
81
+ public static final int AS_XML = 32;
82
+ public static final int AS_HTML = 64;
83
+ public static final int AS_BUILDER = 128;
84
+
85
+ public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment) {
86
+ buffer = new StringBuffer();
87
+ this.encoding = encoding;
88
+ indentation = new Stack<String>(); indentation.push("");
89
+ this.htmlDoc = htmlDoc;
90
+ this.fragment = fragment;
91
+ format = (options & FORMAT) == FORMAT;
92
+
93
+ noDecl = (options & NO_DECL) == NO_DECL;
94
+ noEmpty = (options & NO_EMPTY) == NO_EMPTY;
95
+ noXhtml = (options & NO_XHTML) == NO_XHTML;
96
+ asXhtml = (options & AS_XHTML) == AS_XHTML;
97
+ asXml = (options & AS_XML) == AS_XML;
98
+ asHtml = (options & AS_HTML) == AS_HTML;
99
+ asBuilder = (options & AS_BUILDER) == AS_BUILDER;
100
+ if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces
101
+ if ((!format && indent != null) && indent.length() > 0) format = true;
102
+ if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces
103
+ indentString = indent;
104
+ if (!asXml && !asHtml && !asXhtml && !asBuilder) asXml = true;
105
+ }
106
+
107
+ @Override
108
+ public String toString() {
109
+ return (new String(buffer));
110
+ }
111
+
112
+ public void setHtmlDoc(boolean htmlDoc) {
113
+ this.htmlDoc = htmlDoc;
114
+ }
115
+
116
+ public void setEncoding(String encoding) {
117
+ this.encoding = encoding;
118
+ }
119
+
120
+ public boolean enter(Node node) {
121
+ if (node instanceof Document) {
122
+ return enter((Document)node);
123
+ }
124
+ if (node instanceof Element) {
125
+ return enter((Element)node);
126
+ }
127
+ if (node instanceof Attr) {
128
+ return enter((Attr)node);
129
+ }
130
+ if (node instanceof Text) {
131
+ return enter((Text)node);
132
+ }
133
+ if (node instanceof CDATASection) {
134
+ return enter((CDATASection)node);
135
+ }
136
+ if (node instanceof Comment) {
137
+ return enter((Comment)node);
138
+ }
139
+ if (node instanceof DocumentType) {
140
+ return enter((DocumentType)node);
141
+ }
142
+ if (node instanceof Entity) {
143
+ return enter((Entity)node);
144
+ }
145
+ if (node instanceof EntityReference) {
146
+ return enter((EntityReference)node);
147
+ }
148
+ if (node instanceof Notation) {
149
+ return enter((Notation)node);
150
+ }
151
+ if (node instanceof ProcessingInstruction) {
152
+ return enter((ProcessingInstruction)node);
153
+ }
154
+ return false;
155
+ }
156
+
157
+ public void leave(Node node) {
158
+ if (node instanceof Document) {
159
+ leave((Document)node);
160
+ return;
161
+ }
162
+ if (node instanceof Element) {
163
+ leave((Element)node);
164
+ return;
165
+ }
166
+ if (node instanceof Attr) {
167
+ leave((Attr)node);
168
+ return;
169
+ }
170
+ if (node instanceof Text) {
171
+ leave((Text)node);
172
+ return;
173
+ }
174
+ if (node instanceof CDATASection) {
175
+ leave((CDATASection)node);
176
+ return;
177
+ }
178
+ if (node instanceof Comment) {
179
+ leave((Comment)node);
180
+ return;
181
+ }
182
+ if (node instanceof DocumentType) {
183
+ leave((DocumentType)node);
184
+ return;
185
+ }
186
+ if (node instanceof Entity) {
187
+ leave((Entity)node);
188
+ return;
189
+ }
190
+ if (node instanceof EntityReference) {
191
+ leave((EntityReference)node);
192
+ return;
193
+ }
194
+ if (node instanceof Notation) {
195
+ leave((Notation)node);
196
+ return;
197
+ }
198
+ if (node instanceof ProcessingInstruction) {
199
+ leave((ProcessingInstruction)node);
200
+ return;
201
+ }
202
+ }
203
+
204
+ public boolean enter(String string) {
205
+ buffer.append(string);
206
+ return true;
207
+ }
208
+
209
+ public void leave(String string) {
210
+ // no-op
211
+ }
212
+
213
+ public boolean enter(Attr attr) {
214
+ String name = attr.getName();
215
+ buffer.append(name);
216
+ if (!asHtml || !isHtmlBooleanAttr(name)) {
217
+ buffer.append("=");
218
+ buffer.append("\"");
219
+ buffer.append(serializeAttrTextContent(attr.getValue(), htmlDoc));
220
+ buffer.append("\"");
221
+ }
222
+ return true;
223
+ }
224
+
225
+ public static final String[] HTML_BOOLEAN_ATTRS = {
226
+ "checked", "compact", "declare", "defer", "disabled", "ismap",
227
+ "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
228
+ "selected"
229
+ };
230
+
231
+ private boolean isHtmlBooleanAttr(String name) {
232
+ for (String s : HTML_BOOLEAN_ATTRS) {
233
+ if (s.equals(name)) return true;
234
+ }
235
+ return false;
236
+ }
237
+
238
+ private String serializeAttrTextContent(String s, boolean htmlDoc) {
239
+ if (s == null) return "";
240
+
241
+ char[] c = s.toCharArray();
242
+ StringBuffer buffer = new StringBuffer(c.length);
243
+
244
+ for(int i = 0; i < c.length; i++) {
245
+ switch(c[i]){
246
+ case '\n': buffer.append("&#10;"); break;
247
+ case '\r': buffer.append("&#13;"); break;
248
+ case '\t': buffer.append("&#9;"); break;
249
+ case '"': if (htmlDoc) buffer.append("%22");
250
+ else buffer.append("&quot;");
251
+ break;
252
+ case '<': buffer.append("&lt;"); break;
253
+ case '>': buffer.append("&gt;"); break;
254
+ case '&': buffer.append("&amp;"); break;
255
+ default: buffer.append(c[i]);
256
+ }
257
+ }
258
+
259
+ return buffer.toString();
260
+ }
261
+
262
+ public void leave(Attr attr) {
263
+ // no-op
264
+ }
265
+
266
+ public boolean enter(CDATASection cdata) {
267
+ buffer.append("<![CDATA[");
268
+ buffer.append(cdata.getData());
269
+ buffer.append("]]>");
270
+ return true;
271
+ }
272
+
273
+ public void leave(CDATASection cdata) {
274
+ // no-op
275
+ }
276
+
277
+ public boolean enter(Comment comment) {
278
+ buffer.append("<!--");
279
+ buffer.append(comment.getData());
280
+ buffer.append("-->");
281
+ return true;
282
+ }
283
+
284
+ public void leave(Comment comment) {
285
+ // no-op
286
+ }
287
+
288
+ public boolean enter(Document document) {
289
+ if (!noDecl) {
290
+ buffer.append("<?xml version=\"");
291
+ buffer.append(document.getXmlVersion());
292
+ buffer.append("\"");
293
+
294
+ if (encoding != null) {
295
+ buffer.append(" encoding=\"");
296
+ buffer.append(encoding.toUpperCase());
297
+ buffer.append("\"");
298
+ }
299
+ buffer.append("?>\n");
300
+ }
301
+ return true;
302
+ }
303
+
304
+ public void leave(Document document) {
305
+ // no-op
306
+ }
307
+
308
+ public boolean enter(DocumentType docType) {
309
+ String name = docType.getName();
310
+ String pubId = docType.getPublicId();
311
+ String sysId = docType.getSystemId();
312
+ String internalSubset = docType.getInternalSubset();
313
+ if (docType.getPreviousSibling() != null) {
314
+ buffer.append("\n");
315
+ }
316
+ buffer.append("<!DOCTYPE " + name + " ");
317
+ if (pubId != null) {
318
+ buffer.append("PUBLIC \"" + pubId + "\"");
319
+ if (sysId != null) buffer.append(" \"" + sysId + "\"");
320
+ } else if (sysId != null) {
321
+ buffer.append("SYSTEM \"" + sysId + "\"");
322
+ }
323
+ if (internalSubset != null) {
324
+ buffer.append(" [");
325
+ buffer.append(internalSubset);
326
+ buffer.append("]");
327
+ }
328
+ buffer.append(">\n");
329
+ return true;
330
+ }
331
+
332
+ public void leave(DocumentType docType) {
333
+ // no-op
334
+ }
335
+
336
+ public boolean enter(Element element) {
337
+ String current = indentation.peek();
338
+ buffer.append(current);
339
+ if (needIndent()) {
340
+ indentation.push(current + indentString);
341
+ }
342
+ String name = element.getTagName();
343
+ buffer.append("<" + name);
344
+ NamedNodeMap attrs = element.getAttributes();
345
+ for (int i=0; i<attrs.getLength(); i++) {
346
+ Attr attr = (Attr) attrs.item(i);
347
+ if (attr.getSpecified()) {
348
+ buffer.append(" ");
349
+ enter(attr);
350
+ leave(attr);
351
+ }
352
+ }
353
+ if (element.hasChildNodes()) {
354
+ buffer.append(">");
355
+ if (needBreakInOpening(element)) buffer.append("\n");
356
+ return true;
357
+ }
358
+ // no child
359
+ if (asHtml) {
360
+ buffer.append(">");
361
+ } else if (asXhtml) {
362
+ buffer.append(" />");
363
+ } else if (asXml && noEmpty) {
364
+ buffer.append(">");
365
+ } else {
366
+ buffer.append("/>");
367
+ }
368
+ if (needBreakInOpening(element)) {
369
+ buffer.append("\n");
370
+ }
371
+ return true;
372
+ }
373
+
374
+ private boolean needIndent() {
375
+ if (fragment) return false; // a given option might be fragment and format. fragment matters
376
+ if (format || asBuilder) return true;
377
+ return false;
378
+ }
379
+
380
+ private boolean needBreakInOpening(Element element) {
381
+ if (fragment) return false;
382
+ if (format) return true;
383
+ if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
384
+ if (format && element.getNextSibling() == null && element.hasChildNodes()) return true;
385
+ return false;
386
+ }
387
+
388
+ private boolean isEmpty(String name) {
389
+ HTMLElements.Element element = HTMLElements.getElement(name);
390
+ return element.isEmpty();
391
+ }
392
+
393
+ public void leave(Element element) {
394
+ String name = element.getTagName();
395
+ if (element.hasChildNodes()) {
396
+ if (needIndentInClosing(element)) {
397
+ indentation.pop();
398
+ buffer.append(indentation.peek());
399
+ } else if (asBuilder) {
400
+ indentation.pop();
401
+ }
402
+ buffer.append("</" + name + ">");
403
+ if (needBreakInClosing()) {
404
+ buffer.append("\n");
405
+ }
406
+ return;
407
+ }
408
+ // no child, but HTML might need a closing tag.
409
+ if (asHtml || noEmpty) {
410
+ if (!isEmpty(name) && noEmpty) {
411
+ buffer.append("</" + name + ">");
412
+ }
413
+ }
414
+ if (needBreakInClosing()) {
415
+ indentation.pop();
416
+ buffer.append("\n");
417
+ }
418
+ }
419
+
420
+ private boolean needIndentInClosing(Element element) {
421
+ if (fragment) return false; // a given option might be fragment and format. fragment matters
422
+ if (format) return true;
423
+ if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
424
+ return false;
425
+ }
426
+
427
+ private boolean needBreakInClosing() {
428
+ if (fragment) return false;
429
+ if (format || asBuilder) return true;
430
+ return false;
431
+ }
432
+
433
+ public boolean enter(Entity entity) {
434
+ String name = entity.getNodeName();
435
+ String pubId = entity.getPublicId();
436
+ String sysId = entity.getSystemId();
437
+ String notation = entity.getNotationName();
438
+ buffer.append("<!ENTITY ");
439
+ buffer.append(name);
440
+ if (pubId != null) {
441
+ buffer.append(" PUBLIC \"");
442
+ buffer.append(pubId);
443
+ buffer.append("\"");
444
+ }
445
+ if (sysId != null) {
446
+ buffer.append(" SYSTEM \"");
447
+ buffer.append(sysId);
448
+ buffer.append("\"");
449
+ }
450
+ if (notation != null) {
451
+ buffer.append(" NDATA ");
452
+ buffer.append(notation);
453
+ }
454
+ buffer.append(">");
455
+ return true;
456
+ }
457
+
458
+ public void leave(Entity entity) {
459
+ // no-op
460
+ }
461
+
462
+ public boolean enter(EntityReference entityRef) {
463
+ // no-op?
464
+ return true;
465
+ }
466
+
467
+ public void leave(EntityReference entityRef) {
468
+ // no-op
469
+ }
470
+
471
+ public boolean enter(Notation notation) {
472
+ String name = notation.getNodeName();
473
+ String pubId = notation.getPublicId();
474
+ String sysId = notation.getSystemId();
475
+ buffer.append("<!NOTATION ");
476
+ buffer.append(name);
477
+ if (pubId != null) {
478
+ buffer.append(" PUBLIC \"");
479
+ buffer.append(pubId);
480
+ buffer.append("\"");
481
+ if (sysId != null) {
482
+ buffer.append(" \"");
483
+ buffer.append(sysId);
484
+ buffer.append("\"");
485
+ }
486
+ } else if (sysId != null) {
487
+ buffer.append(" SYSTEM \"");
488
+ buffer.append(sysId);
489
+ buffer.append("\"");
490
+ }
491
+ buffer.append(">");
492
+ return true;
493
+ }
494
+
495
+ public void leave(Notation notation) {
496
+ // no-op
497
+ }
498
+
499
+ public boolean enter(ProcessingInstruction pi) {
500
+ buffer.append("<?");
501
+ buffer.append(pi.getTarget());
502
+ buffer.append(" ");
503
+ buffer.append(pi.getData());
504
+ if (asHtml) buffer.append(">");
505
+ else buffer.append("?>");
506
+ buffer.append("\n");
507
+ return true;
508
+ }
509
+
510
+ public void leave(ProcessingInstruction pi) {
511
+ // no-op
512
+ }
513
+
514
+ private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
515
+ public boolean enter(Text text) {
516
+ String textContent = text.getNodeValue();
517
+ if (needIndentText() && "".equals(textContent.trim())) return true;
518
+ if (needIndentText()) {
519
+ String current = indentation.peek();
520
+ buffer.append(current);
521
+ indentation.push(current + indentString);
522
+ if (textContent.charAt(0) == lineSeparator) textContent = textContent.substring(1);
523
+ }
524
+ if (isNotXmlEscaped(textContent)) {
525
+ textContent = encodeJavaString(textContent);
526
+ }
527
+ if (getEncoding(text) == null) {
528
+ textContent = encodeStringToHtmlEntity(textContent);
529
+ }
530
+ buffer.append(textContent);
531
+ return true;
532
+ }
533
+
534
+ private boolean needIndentText() {
535
+ if (fragment) return false;
536
+ if (format) return true;
537
+ return false;
538
+ }
539
+
540
+ public void leave(Text text) {
541
+ String textContent = text.getNodeValue();
542
+ if (needIndentText() && !"".equals(textContent.trim())) {
543
+ indentation.pop();
544
+ if (textContent.charAt(textContent.length()-1) != lineSeparator) {
545
+ buffer.append("\n");
546
+ }
547
+ }
548
+ }
549
+
550
+ private String getEncoding(Text text) {
551
+ if (encoding != null) return encoding;
552
+ encoding = text.getOwnerDocument().getInputEncoding();
553
+ return encoding;
554
+ }
555
+
556
+ private String encodeStringToHtmlEntity(String text) {
557
+ int last = 126; // = U+007E. No need to encode under U+007E.
558
+ StringBuffer sb = new StringBuffer();
559
+ for (int i=0; i<text.length(); i++) {
560
+ int codePoint = text.codePointAt(i);
561
+ if (codePoint > last) sb.append("&#x" + Integer.toHexString(codePoint) + ";");
562
+ else sb.append(text.charAt(i));
563
+ }
564
+ return new String(sb);
565
+ }
566
+
567
+ }