nokogiri-maven 1.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. data/CHANGELOG.ja.rdoc +544 -0
  2. data/CHANGELOG.rdoc +532 -0
  3. data/Manifest.txt +283 -0
  4. data/README.ja.rdoc +106 -0
  5. data/README.rdoc +174 -0
  6. data/Rakefile +164 -0
  7. data/bin/nokogiri +53 -0
  8. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  9. data/ext/java/nokogiri/HtmlDocument.java +119 -0
  10. data/ext/java/nokogiri/HtmlElementDescription.java +145 -0
  11. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  12. data/ext/java/nokogiri/HtmlSaxParserContext.java +259 -0
  13. data/ext/java/nokogiri/NokogiriService.java +590 -0
  14. data/ext/java/nokogiri/XmlAttr.java +180 -0
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +130 -0
  16. data/ext/java/nokogiri/XmlCdata.java +84 -0
  17. data/ext/java/nokogiri/XmlComment.java +86 -0
  18. data/ext/java/nokogiri/XmlDocument.java +519 -0
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +223 -0
  20. data/ext/java/nokogiri/XmlDtd.java +469 -0
  21. data/ext/java/nokogiri/XmlElement.java +195 -0
  22. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  23. data/ext/java/nokogiri/XmlElementDecl.java +152 -0
  24. data/ext/java/nokogiri/XmlEntityDecl.java +162 -0
  25. data/ext/java/nokogiri/XmlEntityReference.java +97 -0
  26. data/ext/java/nokogiri/XmlNamespace.java +183 -0
  27. data/ext/java/nokogiri/XmlNode.java +1378 -0
  28. data/ext/java/nokogiri/XmlNodeSet.java +267 -0
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +99 -0
  30. data/ext/java/nokogiri/XmlReader.java +408 -0
  31. data/ext/java/nokogiri/XmlRelaxng.java +144 -0
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +367 -0
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +184 -0
  34. data/ext/java/nokogiri/XmlSchema.java +324 -0
  35. data/ext/java/nokogiri/XmlSyntaxError.java +119 -0
  36. data/ext/java/nokogiri/XmlText.java +119 -0
  37. data/ext/java/nokogiri/XmlXpathContext.java +199 -0
  38. data/ext/java/nokogiri/XsltStylesheet.java +197 -0
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +204 -0
  40. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +73 -0
  41. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +86 -0
  42. data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
  43. data/ext/java/nokogiri/internals/NokogiriHelpers.java +639 -0
  44. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +167 -0
  45. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +130 -0
  46. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +74 -0
  47. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  48. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +79 -0
  49. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +141 -0
  50. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +73 -0
  51. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +67 -0
  52. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +86 -0
  53. data/ext/java/nokogiri/internals/ParserContext.java +276 -0
  54. data/ext/java/nokogiri/internals/PushInputStream.java +411 -0
  55. data/ext/java/nokogiri/internals/ReaderNode.java +531 -0
  56. data/ext/java/nokogiri/internals/SaveContextVisitor.java +567 -0
  57. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +76 -0
  58. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  59. data/ext/java/nokogiri/internals/XmlDomParser.java +76 -0
  60. data/ext/java/nokogiri/internals/XmlDomParserContext.java +244 -0
  61. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  62. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
  63. data/ext/nokogiri/depend +358 -0
  64. data/ext/nokogiri/extconf.rb +124 -0
  65. data/ext/nokogiri/html_document.c +154 -0
  66. data/ext/nokogiri/html_document.h +10 -0
  67. data/ext/nokogiri/html_element_description.c +276 -0
  68. data/ext/nokogiri/html_element_description.h +10 -0
  69. data/ext/nokogiri/html_entity_lookup.c +32 -0
  70. data/ext/nokogiri/html_entity_lookup.h +8 -0
  71. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  72. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  73. data/ext/nokogiri/nokogiri.c +115 -0
  74. data/ext/nokogiri/nokogiri.h +160 -0
  75. data/ext/nokogiri/xml_attr.c +94 -0
  76. data/ext/nokogiri/xml_attr.h +9 -0
  77. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  78. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  79. data/ext/nokogiri/xml_cdata.c +56 -0
  80. data/ext/nokogiri/xml_cdata.h +9 -0
  81. data/ext/nokogiri/xml_comment.c +54 -0
  82. data/ext/nokogiri/xml_comment.h +9 -0
  83. data/ext/nokogiri/xml_document.c +478 -0
  84. data/ext/nokogiri/xml_document.h +23 -0
  85. data/ext/nokogiri/xml_document_fragment.c +48 -0
  86. data/ext/nokogiri/xml_document_fragment.h +10 -0
  87. data/ext/nokogiri/xml_dtd.c +202 -0
  88. data/ext/nokogiri/xml_dtd.h +10 -0
  89. data/ext/nokogiri/xml_element_content.c +123 -0
  90. data/ext/nokogiri/xml_element_content.h +10 -0
  91. data/ext/nokogiri/xml_element_decl.c +69 -0
  92. data/ext/nokogiri/xml_element_decl.h +9 -0
  93. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  94. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  95. data/ext/nokogiri/xml_entity_decl.c +110 -0
  96. data/ext/nokogiri/xml_entity_decl.h +10 -0
  97. data/ext/nokogiri/xml_entity_reference.c +52 -0
  98. data/ext/nokogiri/xml_entity_reference.h +9 -0
  99. data/ext/nokogiri/xml_io.c +56 -0
  100. data/ext/nokogiri/xml_io.h +11 -0
  101. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  102. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  103. data/ext/nokogiri/xml_namespace.c +84 -0
  104. data/ext/nokogiri/xml_namespace.h +13 -0
  105. data/ext/nokogiri/xml_node.c +1385 -0
  106. data/ext/nokogiri/xml_node.h +13 -0
  107. data/ext/nokogiri/xml_node_set.c +418 -0
  108. data/ext/nokogiri/xml_node_set.h +9 -0
  109. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  110. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  111. data/ext/nokogiri/xml_reader.c +684 -0
  112. data/ext/nokogiri/xml_reader.h +10 -0
  113. data/ext/nokogiri/xml_relax_ng.c +161 -0
  114. data/ext/nokogiri/xml_relax_ng.h +9 -0
  115. data/ext/nokogiri/xml_sax_parser.c +293 -0
  116. data/ext/nokogiri/xml_sax_parser.h +39 -0
  117. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  118. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  119. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  120. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  121. data/ext/nokogiri/xml_schema.c +205 -0
  122. data/ext/nokogiri/xml_schema.h +9 -0
  123. data/ext/nokogiri/xml_syntax_error.c +58 -0
  124. data/ext/nokogiri/xml_syntax_error.h +13 -0
  125. data/ext/nokogiri/xml_text.c +50 -0
  126. data/ext/nokogiri/xml_text.h +9 -0
  127. data/ext/nokogiri/xml_xpath_context.c +309 -0
  128. data/ext/nokogiri/xml_xpath_context.h +9 -0
  129. data/ext/nokogiri/xslt_stylesheet.c +264 -0
  130. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  131. data/lib/nokogiri.rb +127 -0
  132. data/lib/nokogiri/css.rb +27 -0
  133. data/lib/nokogiri/css/node.rb +99 -0
  134. data/lib/nokogiri/css/parser.rb +677 -0
  135. data/lib/nokogiri/css/parser.y +237 -0
  136. data/lib/nokogiri/css/parser_extras.rb +91 -0
  137. data/lib/nokogiri/css/syntax_error.rb +7 -0
  138. data/lib/nokogiri/css/tokenizer.rb +152 -0
  139. data/lib/nokogiri/css/tokenizer.rex +55 -0
  140. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  141. data/lib/nokogiri/decorators/slop.rb +35 -0
  142. data/lib/nokogiri/html.rb +36 -0
  143. data/lib/nokogiri/html/builder.rb +35 -0
  144. data/lib/nokogiri/html/document.rb +213 -0
  145. data/lib/nokogiri/html/document_fragment.rb +41 -0
  146. data/lib/nokogiri/html/element_description.rb +23 -0
  147. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  148. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  149. data/lib/nokogiri/html/sax/parser.rb +52 -0
  150. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  151. data/lib/nokogiri/nokogiri.jar +0 -0
  152. data/lib/nokogiri/syntax_error.rb +4 -0
  153. data/lib/nokogiri/version.rb +88 -0
  154. data/lib/nokogiri/xml.rb +67 -0
  155. data/lib/nokogiri/xml/attr.rb +14 -0
  156. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  157. data/lib/nokogiri/xml/builder.rb +425 -0
  158. data/lib/nokogiri/xml/cdata.rb +11 -0
  159. data/lib/nokogiri/xml/character_data.rb +7 -0
  160. data/lib/nokogiri/xml/document.rb +234 -0
  161. data/lib/nokogiri/xml/document_fragment.rb +98 -0
  162. data/lib/nokogiri/xml/dtd.rb +22 -0
  163. data/lib/nokogiri/xml/element_content.rb +36 -0
  164. data/lib/nokogiri/xml/element_decl.rb +13 -0
  165. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  166. data/lib/nokogiri/xml/namespace.rb +13 -0
  167. data/lib/nokogiri/xml/node.rb +915 -0
  168. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  169. data/lib/nokogiri/xml/node_set.rb +357 -0
  170. data/lib/nokogiri/xml/notation.rb +6 -0
  171. data/lib/nokogiri/xml/parse_options.rb +93 -0
  172. data/lib/nokogiri/xml/pp.rb +2 -0
  173. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  174. data/lib/nokogiri/xml/pp/node.rb +56 -0
  175. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  176. data/lib/nokogiri/xml/reader.rb +112 -0
  177. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  178. data/lib/nokogiri/xml/sax.rb +4 -0
  179. data/lib/nokogiri/xml/sax/document.rb +164 -0
  180. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  181. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  182. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  183. data/lib/nokogiri/xml/schema.rb +63 -0
  184. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  185. data/lib/nokogiri/xml/text.rb +9 -0
  186. data/lib/nokogiri/xml/xpath.rb +10 -0
  187. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  188. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  189. data/lib/nokogiri/xslt.rb +52 -0
  190. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  191. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  192. data/nokogiri_help_responses.md +40 -0
  193. data/tasks/cross_compile.rb +152 -0
  194. data/tasks/nokogiri.org.rb +18 -0
  195. data/tasks/test.rb +94 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +303 -0
  198. data/test/css/test_tokenizer.rb +198 -0
  199. data/test/css/test_xpath_visitor.rb +85 -0
  200. data/test/decorators/test_slop.rb +16 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/encoding.html +82 -0
  207. data/test/files/encoding.xhtml +84 -0
  208. data/test/files/exslt.xml +8 -0
  209. data/test/files/exslt.xslt +35 -0
  210. data/test/files/foo/foo.xsd +4 -0
  211. data/test/files/metacharset.html +10 -0
  212. data/test/files/noencoding.html +47 -0
  213. data/test/files/po.xml +32 -0
  214. data/test/files/po.xsd +66 -0
  215. data/test/files/shift_jis.html +10 -0
  216. data/test/files/shift_jis.xml +5 -0
  217. data/test/files/snuggles.xml +3 -0
  218. data/test/files/staff.dtd +10 -0
  219. data/test/files/staff.xml +59 -0
  220. data/test/files/staff.xslt +32 -0
  221. data/test/files/tlm.html +850 -0
  222. data/test/files/valid_bar.xml +2 -0
  223. data/test/helper.rb +173 -0
  224. data/test/html/sax/test_parser.rb +136 -0
  225. data/test/html/sax/test_parser_context.rb +48 -0
  226. data/test/html/test_builder.rb +164 -0
  227. data/test/html/test_document.rb +472 -0
  228. data/test/html/test_document_encoding.rb +138 -0
  229. data/test/html/test_document_fragment.rb +255 -0
  230. data/test/html/test_element_description.rb +100 -0
  231. data/test/html/test_named_characters.rb +14 -0
  232. data/test/html/test_node.rb +190 -0
  233. data/test/html/test_node_encoding.rb +27 -0
  234. data/test/test_convert_xpath.rb +135 -0
  235. data/test/test_css_cache.rb +45 -0
  236. data/test/test_encoding_handler.rb +46 -0
  237. data/test/test_memory_leak.rb +72 -0
  238. data/test/test_nokogiri.rb +132 -0
  239. data/test/test_reader.rb +425 -0
  240. data/test/test_soap4r_sax.rb +52 -0
  241. data/test/test_xslt_transforms.rb +193 -0
  242. data/test/xml/node/test_save_options.rb +28 -0
  243. data/test/xml/node/test_subclass.rb +44 -0
  244. data/test/xml/sax/test_parser.rb +338 -0
  245. data/test/xml/sax/test_parser_context.rb +113 -0
  246. data/test/xml/sax/test_push_parser.rb +156 -0
  247. data/test/xml/test_attr.rb +65 -0
  248. data/test/xml/test_attribute_decl.rb +86 -0
  249. data/test/xml/test_builder.rb +227 -0
  250. data/test/xml/test_cdata.rb +50 -0
  251. data/test/xml/test_comment.rb +29 -0
  252. data/test/xml/test_document.rb +697 -0
  253. data/test/xml/test_document_encoding.rb +26 -0
  254. data/test/xml/test_document_fragment.rb +192 -0
  255. data/test/xml/test_dtd.rb +107 -0
  256. data/test/xml/test_dtd_encoding.rb +33 -0
  257. data/test/xml/test_element_content.rb +56 -0
  258. data/test/xml/test_element_decl.rb +73 -0
  259. data/test/xml/test_entity_decl.rb +122 -0
  260. data/test/xml/test_entity_reference.rb +21 -0
  261. data/test/xml/test_namespace.rb +70 -0
  262. data/test/xml/test_node.rb +917 -0
  263. data/test/xml/test_node_attributes.rb +34 -0
  264. data/test/xml/test_node_encoding.rb +107 -0
  265. data/test/xml/test_node_reparenting.rb +334 -0
  266. data/test/xml/test_node_set.rb +742 -0
  267. data/test/xml/test_parse_options.rb +52 -0
  268. data/test/xml/test_processing_instruction.rb +30 -0
  269. data/test/xml/test_reader_encoding.rb +126 -0
  270. data/test/xml/test_relax_ng.rb +60 -0
  271. data/test/xml/test_schema.rb +94 -0
  272. data/test/xml/test_syntax_error.rb +12 -0
  273. data/test/xml/test_text.rb +47 -0
  274. data/test/xml/test_unparented_node.rb +381 -0
  275. data/test/xml/test_xpath.rb +237 -0
  276. data/test/xslt/test_custom_functions.rb +94 -0
  277. data/test/xslt/test_exception_handling.rb +37 -0
  278. metadata +552 -0
@@ -0,0 +1,567 @@
1
+ /**
2
+ * (The MIT License)
3
+ *
4
+ * Copyright (c) 2008 - 2011:
5
+ *
6
+ * * {Aaron Patterson}[http://tenderlovemaking.com]
7
+ * * {Mike Dalessio}[http://mike.daless.io]
8
+ * * {Charles Nutter}[http://blog.headius.com]
9
+ * * {Sergio Arbeo}[http://www.serabe.com]
10
+ * * {Patrick Mahoney}[http://polycrystal.org]
11
+ * * {Yoko Harada}[http://yokolet.blogspot.com]
12
+ *
13
+ * Permission is hereby granted, free of charge, to any person obtaining
14
+ * a copy of this software and associated documentation files (the
15
+ * 'Software'), to deal in the Software without restriction, including
16
+ * without limitation the rights to use, copy, modify, merge, publish,
17
+ * distribute, sublicense, and/or sell copies of the Software, and to
18
+ * permit persons to whom the Software is furnished to do so, subject to
19
+ * the following conditions:
20
+ *
21
+ * The above copyright notice and this permission notice shall be
22
+ * included in all copies or substantial portions of the Software.
23
+ *
24
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
+ */
32
+
33
+ package nokogiri.internals;
34
+
35
+ import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
36
+ import static nokogiri.internals.NokogiriHelpers.isNotXmlEscaped;
37
+
38
+ import java.util.Stack;
39
+
40
+ import org.cyberneko.html.HTMLElements;
41
+ import org.w3c.dom.Attr;
42
+ import org.w3c.dom.CDATASection;
43
+ import org.w3c.dom.Comment;
44
+ import org.w3c.dom.Document;
45
+ import org.w3c.dom.DocumentType;
46
+ import org.w3c.dom.Element;
47
+ import org.w3c.dom.Entity;
48
+ import org.w3c.dom.EntityReference;
49
+ import org.w3c.dom.NamedNodeMap;
50
+ import org.w3c.dom.Node;
51
+ import org.w3c.dom.Notation;
52
+ import org.w3c.dom.ProcessingInstruction;
53
+ import org.w3c.dom.Text;
54
+
55
+ /**
56
+ * A class for serializing a document.
57
+ *
58
+ * @author sergio
59
+ * @author Patrick Mahoney <pat@polycrystal.org>
60
+ * @author Yoko Harada <yokolet@gmail.com>
61
+ */
62
+ public class SaveContextVisitor {
63
+
64
+ private StringBuffer buffer;
65
+ private Stack<String> indentation;
66
+ private String encoding, indentString;
67
+ private boolean format, noDecl, noEmpty, noXhtml, asXhtml, asXml, asHtml, asBuilder, htmlDoc, fragment;
68
+
69
+ /*
70
+ * U can't touch this.
71
+ * http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
72
+ *
73
+ * Taken from libxml save options.
74
+ */
75
+
76
+ public static final int FORMAT = 1;
77
+ public static final int NO_DECL = 2;
78
+ public static final int NO_EMPTY = 4;
79
+ public static final int NO_XHTML = 8;
80
+ public static final int AS_XHTML = 16;
81
+ public static final int AS_XML = 32;
82
+ public static final int AS_HTML = 64;
83
+ public static final int AS_BUILDER = 128;
84
+
85
+ public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment) {
86
+ buffer = new StringBuffer();
87
+ this.encoding = encoding;
88
+ indentation = new Stack<String>(); indentation.push("");
89
+ this.htmlDoc = htmlDoc;
90
+ this.fragment = fragment;
91
+ format = (options & FORMAT) == FORMAT;
92
+
93
+ noDecl = (options & NO_DECL) == NO_DECL;
94
+ noEmpty = (options & NO_EMPTY) == NO_EMPTY;
95
+ noXhtml = (options & NO_XHTML) == NO_XHTML;
96
+ asXhtml = (options & AS_XHTML) == AS_XHTML;
97
+ asXml = (options & AS_XML) == AS_XML;
98
+ asHtml = (options & AS_HTML) == AS_HTML;
99
+ asBuilder = (options & AS_BUILDER) == AS_BUILDER;
100
+ if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces
101
+ if ((!format && indent != null) && indent.length() > 0) format = true;
102
+ if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces
103
+ indentString = indent;
104
+ if (!asXml && !asHtml && !asXhtml && !asBuilder) asXml = true;
105
+ }
106
+
107
+ @Override
108
+ public String toString() {
109
+ return (new String(buffer));
110
+ }
111
+
112
+ public void setHtmlDoc(boolean htmlDoc) {
113
+ this.htmlDoc = htmlDoc;
114
+ }
115
+
116
+ public void setEncoding(String encoding) {
117
+ this.encoding = encoding;
118
+ }
119
+
120
+ public boolean enter(Node node) {
121
+ if (node instanceof Document) {
122
+ return enter((Document)node);
123
+ }
124
+ if (node instanceof Element) {
125
+ return enter((Element)node);
126
+ }
127
+ if (node instanceof Attr) {
128
+ return enter((Attr)node);
129
+ }
130
+ if (node instanceof Text) {
131
+ return enter((Text)node);
132
+ }
133
+ if (node instanceof CDATASection) {
134
+ return enter((CDATASection)node);
135
+ }
136
+ if (node instanceof Comment) {
137
+ return enter((Comment)node);
138
+ }
139
+ if (node instanceof DocumentType) {
140
+ return enter((DocumentType)node);
141
+ }
142
+ if (node instanceof Entity) {
143
+ return enter((Entity)node);
144
+ }
145
+ if (node instanceof EntityReference) {
146
+ return enter((EntityReference)node);
147
+ }
148
+ if (node instanceof Notation) {
149
+ return enter((Notation)node);
150
+ }
151
+ if (node instanceof ProcessingInstruction) {
152
+ return enter((ProcessingInstruction)node);
153
+ }
154
+ return false;
155
+ }
156
+
157
+ public void leave(Node node) {
158
+ if (node instanceof Document) {
159
+ leave((Document)node);
160
+ return;
161
+ }
162
+ if (node instanceof Element) {
163
+ leave((Element)node);
164
+ return;
165
+ }
166
+ if (node instanceof Attr) {
167
+ leave((Attr)node);
168
+ return;
169
+ }
170
+ if (node instanceof Text) {
171
+ leave((Text)node);
172
+ return;
173
+ }
174
+ if (node instanceof CDATASection) {
175
+ leave((CDATASection)node);
176
+ return;
177
+ }
178
+ if (node instanceof Comment) {
179
+ leave((Comment)node);
180
+ return;
181
+ }
182
+ if (node instanceof DocumentType) {
183
+ leave((DocumentType)node);
184
+ return;
185
+ }
186
+ if (node instanceof Entity) {
187
+ leave((Entity)node);
188
+ return;
189
+ }
190
+ if (node instanceof EntityReference) {
191
+ leave((EntityReference)node);
192
+ return;
193
+ }
194
+ if (node instanceof Notation) {
195
+ leave((Notation)node);
196
+ return;
197
+ }
198
+ if (node instanceof ProcessingInstruction) {
199
+ leave((ProcessingInstruction)node);
200
+ return;
201
+ }
202
+ }
203
+
204
+ public boolean enter(String string) {
205
+ buffer.append(string);
206
+ return true;
207
+ }
208
+
209
+ public void leave(String string) {
210
+ // no-op
211
+ }
212
+
213
+ public boolean enter(Attr attr) {
214
+ String name = attr.getName();
215
+ buffer.append(name);
216
+ if (!asHtml || !isHtmlBooleanAttr(name)) {
217
+ buffer.append("=");
218
+ buffer.append("\"");
219
+ buffer.append(serializeAttrTextContent(attr.getValue(), htmlDoc));
220
+ buffer.append("\"");
221
+ }
222
+ return true;
223
+ }
224
+
225
+ public static final String[] HTML_BOOLEAN_ATTRS = {
226
+ "checked", "compact", "declare", "defer", "disabled", "ismap",
227
+ "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
228
+ "selected"
229
+ };
230
+
231
+ private boolean isHtmlBooleanAttr(String name) {
232
+ for (String s : HTML_BOOLEAN_ATTRS) {
233
+ if (s.equals(name)) return true;
234
+ }
235
+ return false;
236
+ }
237
+
238
+ private String serializeAttrTextContent(String s, boolean htmlDoc) {
239
+ if (s == null) return "";
240
+
241
+ char[] c = s.toCharArray();
242
+ StringBuffer buffer = new StringBuffer(c.length);
243
+
244
+ for(int i = 0; i < c.length; i++) {
245
+ switch(c[i]){
246
+ case '\n': buffer.append("&#10;"); break;
247
+ case '\r': buffer.append("&#13;"); break;
248
+ case '\t': buffer.append("&#9;"); break;
249
+ case '"': if (htmlDoc) buffer.append("%22");
250
+ else buffer.append("&quot;");
251
+ break;
252
+ case '<': buffer.append("&lt;"); break;
253
+ case '>': buffer.append("&gt;"); break;
254
+ case '&': buffer.append("&amp;"); break;
255
+ default: buffer.append(c[i]);
256
+ }
257
+ }
258
+
259
+ return buffer.toString();
260
+ }
261
+
262
+ public void leave(Attr attr) {
263
+ // no-op
264
+ }
265
+
266
+ public boolean enter(CDATASection cdata) {
267
+ buffer.append("<![CDATA[");
268
+ buffer.append(cdata.getData());
269
+ buffer.append("]]>");
270
+ return true;
271
+ }
272
+
273
+ public void leave(CDATASection cdata) {
274
+ // no-op
275
+ }
276
+
277
+ public boolean enter(Comment comment) {
278
+ buffer.append("<!--");
279
+ buffer.append(comment.getData());
280
+ buffer.append("-->");
281
+ return true;
282
+ }
283
+
284
+ public void leave(Comment comment) {
285
+ // no-op
286
+ }
287
+
288
+ public boolean enter(Document document) {
289
+ if (!noDecl) {
290
+ buffer.append("<?xml version=\"");
291
+ buffer.append(document.getXmlVersion());
292
+ buffer.append("\"");
293
+
294
+ if (encoding != null) {
295
+ buffer.append(" encoding=\"");
296
+ buffer.append(encoding.toUpperCase());
297
+ buffer.append("\"");
298
+ }
299
+ buffer.append("?>\n");
300
+ }
301
+ return true;
302
+ }
303
+
304
+ public void leave(Document document) {
305
+ // no-op
306
+ }
307
+
308
+ public boolean enter(DocumentType docType) {
309
+ String name = docType.getName();
310
+ String pubId = docType.getPublicId();
311
+ String sysId = docType.getSystemId();
312
+ String internalSubset = docType.getInternalSubset();
313
+ if (docType.getPreviousSibling() != null) {
314
+ buffer.append("\n");
315
+ }
316
+ buffer.append("<!DOCTYPE " + name + " ");
317
+ if (pubId != null) {
318
+ buffer.append("PUBLIC \"" + pubId + "\"");
319
+ if (sysId != null) buffer.append(" \"" + sysId + "\"");
320
+ } else if (sysId != null) {
321
+ buffer.append("SYSTEM \"" + sysId + "\"");
322
+ }
323
+ if (internalSubset != null) {
324
+ buffer.append(" [");
325
+ buffer.append(internalSubset);
326
+ buffer.append("]");
327
+ }
328
+ buffer.append(">\n");
329
+ return true;
330
+ }
331
+
332
+ public void leave(DocumentType docType) {
333
+ // no-op
334
+ }
335
+
336
+ public boolean enter(Element element) {
337
+ String current = indentation.peek();
338
+ buffer.append(current);
339
+ if (needIndent()) {
340
+ indentation.push(current + indentString);
341
+ }
342
+ String name = element.getTagName();
343
+ buffer.append("<" + name);
344
+ NamedNodeMap attrs = element.getAttributes();
345
+ for (int i=0; i<attrs.getLength(); i++) {
346
+ Attr attr = (Attr) attrs.item(i);
347
+ if (attr.getSpecified()) {
348
+ buffer.append(" ");
349
+ enter(attr);
350
+ leave(attr);
351
+ }
352
+ }
353
+ if (element.hasChildNodes()) {
354
+ buffer.append(">");
355
+ if (needBreakInOpening(element)) buffer.append("\n");
356
+ return true;
357
+ }
358
+ // no child
359
+ if (asHtml) {
360
+ buffer.append(">");
361
+ } else if (asXhtml) {
362
+ buffer.append(" />");
363
+ } else if (asXml && noEmpty) {
364
+ buffer.append(">");
365
+ } else {
366
+ buffer.append("/>");
367
+ }
368
+ if (needBreakInOpening(element)) {
369
+ buffer.append("\n");
370
+ }
371
+ return true;
372
+ }
373
+
374
+ private boolean needIndent() {
375
+ if (fragment) return false; // a given option might be fragment and format. fragment matters
376
+ if (format || asBuilder) return true;
377
+ return false;
378
+ }
379
+
380
+ private boolean needBreakInOpening(Element element) {
381
+ if (fragment) return false;
382
+ if (format) return true;
383
+ if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
384
+ if (format && element.getNextSibling() == null && element.hasChildNodes()) return true;
385
+ return false;
386
+ }
387
+
388
+ private boolean isEmpty(String name) {
389
+ HTMLElements.Element element = HTMLElements.getElement(name);
390
+ return element.isEmpty();
391
+ }
392
+
393
+ public void leave(Element element) {
394
+ String name = element.getTagName();
395
+ if (element.hasChildNodes()) {
396
+ if (needIndentInClosing(element)) {
397
+ indentation.pop();
398
+ buffer.append(indentation.peek());
399
+ } else if (asBuilder) {
400
+ indentation.pop();
401
+ }
402
+ buffer.append("</" + name + ">");
403
+ if (needBreakInClosing()) {
404
+ buffer.append("\n");
405
+ }
406
+ return;
407
+ }
408
+ // no child, but HTML might need a closing tag.
409
+ if (asHtml || noEmpty) {
410
+ if (!isEmpty(name) && noEmpty) {
411
+ buffer.append("</" + name + ">");
412
+ }
413
+ }
414
+ if (needBreakInClosing()) {
415
+ indentation.pop();
416
+ buffer.append("\n");
417
+ }
418
+ }
419
+
420
+ private boolean needIndentInClosing(Element element) {
421
+ if (fragment) return false; // a given option might be fragment and format. fragment matters
422
+ if (format) return true;
423
+ if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
424
+ return false;
425
+ }
426
+
427
+ private boolean needBreakInClosing() {
428
+ if (fragment) return false;
429
+ if (format || asBuilder) return true;
430
+ return false;
431
+ }
432
+
433
+ public boolean enter(Entity entity) {
434
+ String name = entity.getNodeName();
435
+ String pubId = entity.getPublicId();
436
+ String sysId = entity.getSystemId();
437
+ String notation = entity.getNotationName();
438
+ buffer.append("<!ENTITY ");
439
+ buffer.append(name);
440
+ if (pubId != null) {
441
+ buffer.append(" PUBLIC \"");
442
+ buffer.append(pubId);
443
+ buffer.append("\"");
444
+ }
445
+ if (sysId != null) {
446
+ buffer.append(" SYSTEM \"");
447
+ buffer.append(sysId);
448
+ buffer.append("\"");
449
+ }
450
+ if (notation != null) {
451
+ buffer.append(" NDATA ");
452
+ buffer.append(notation);
453
+ }
454
+ buffer.append(">");
455
+ return true;
456
+ }
457
+
458
+ public void leave(Entity entity) {
459
+ // no-op
460
+ }
461
+
462
+ public boolean enter(EntityReference entityRef) {
463
+ // no-op?
464
+ return true;
465
+ }
466
+
467
+ public void leave(EntityReference entityRef) {
468
+ // no-op
469
+ }
470
+
471
+ public boolean enter(Notation notation) {
472
+ String name = notation.getNodeName();
473
+ String pubId = notation.getPublicId();
474
+ String sysId = notation.getSystemId();
475
+ buffer.append("<!NOTATION ");
476
+ buffer.append(name);
477
+ if (pubId != null) {
478
+ buffer.append(" PUBLIC \"");
479
+ buffer.append(pubId);
480
+ buffer.append("\"");
481
+ if (sysId != null) {
482
+ buffer.append(" \"");
483
+ buffer.append(sysId);
484
+ buffer.append("\"");
485
+ }
486
+ } else if (sysId != null) {
487
+ buffer.append(" SYSTEM \"");
488
+ buffer.append(sysId);
489
+ buffer.append("\"");
490
+ }
491
+ buffer.append(">");
492
+ return true;
493
+ }
494
+
495
+ public void leave(Notation notation) {
496
+ // no-op
497
+ }
498
+
499
+ public boolean enter(ProcessingInstruction pi) {
500
+ buffer.append("<?");
501
+ buffer.append(pi.getTarget());
502
+ buffer.append(" ");
503
+ buffer.append(pi.getData());
504
+ if (asHtml) buffer.append(">");
505
+ else buffer.append("?>");
506
+ buffer.append("\n");
507
+ return true;
508
+ }
509
+
510
+ public void leave(ProcessingInstruction pi) {
511
+ // no-op
512
+ }
513
+
514
+ private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
515
+ public boolean enter(Text text) {
516
+ String textContent = text.getNodeValue();
517
+ if (needIndentText() && "".equals(textContent.trim())) return true;
518
+ if (needIndentText()) {
519
+ String current = indentation.peek();
520
+ buffer.append(current);
521
+ indentation.push(current + indentString);
522
+ if (textContent.charAt(0) == lineSeparator) textContent = textContent.substring(1);
523
+ }
524
+ if (isNotXmlEscaped(textContent)) {
525
+ textContent = encodeJavaString(textContent);
526
+ }
527
+ if (getEncoding(text) == null) {
528
+ textContent = encodeStringToHtmlEntity(textContent);
529
+ }
530
+ buffer.append(textContent);
531
+ return true;
532
+ }
533
+
534
+ private boolean needIndentText() {
535
+ if (fragment) return false;
536
+ if (format) return true;
537
+ return false;
538
+ }
539
+
540
+ public void leave(Text text) {
541
+ String textContent = text.getNodeValue();
542
+ if (needIndentText() && !"".equals(textContent.trim())) {
543
+ indentation.pop();
544
+ if (textContent.charAt(textContent.length()-1) != lineSeparator) {
545
+ buffer.append("\n");
546
+ }
547
+ }
548
+ }
549
+
550
+ private String getEncoding(Text text) {
551
+ if (encoding != null) return encoding;
552
+ encoding = text.getOwnerDocument().getInputEncoding();
553
+ return encoding;
554
+ }
555
+
556
+ private String encodeStringToHtmlEntity(String text) {
557
+ int last = 126; // = U+007E. No need to encode under U+007E.
558
+ StringBuffer sb = new StringBuffer();
559
+ for (int i=0; i<text.length(); i++) {
560
+ int codePoint = text.codePointAt(i);
561
+ if (codePoint > last) sb.append("&#x" + Integer.toHexString(codePoint) + ";");
562
+ else sb.append(text.charAt(i));
563
+ }
564
+ return new String(sb);
565
+ }
566
+
567
+ }