nokogiri-backupify 1.5.0.beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. data/.autotest +26 -0
  2. data/CHANGELOG.ja.rdoc +509 -0
  3. data/CHANGELOG.rdoc +490 -0
  4. data/Manifest.txt +274 -0
  5. data/README.ja.rdoc +106 -0
  6. data/README.rdoc +150 -0
  7. data/Rakefile +217 -0
  8. data/bin/nokogiri +54 -0
  9. data/deps.rip +5 -0
  10. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  11. data/ext/java/nokogiri/HtmlDocument.java +146 -0
  12. data/ext/java/nokogiri/HtmlElementDescription.java +145 -0
  13. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  14. data/ext/java/nokogiri/HtmlSaxParserContext.java +256 -0
  15. data/ext/java/nokogiri/NokogiriService.java +466 -0
  16. data/ext/java/nokogiri/XmlAttr.java +183 -0
  17. data/ext/java/nokogiri/XmlAttributeDecl.java +130 -0
  18. data/ext/java/nokogiri/XmlCdata.java +89 -0
  19. data/ext/java/nokogiri/XmlComment.java +84 -0
  20. data/ext/java/nokogiri/XmlDocument.java +514 -0
  21. data/ext/java/nokogiri/XmlDocumentFragment.java +216 -0
  22. data/ext/java/nokogiri/XmlDtd.java +464 -0
  23. data/ext/java/nokogiri/XmlElement.java +221 -0
  24. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  25. data/ext/java/nokogiri/XmlElementDecl.java +147 -0
  26. data/ext/java/nokogiri/XmlEntityDecl.java +161 -0
  27. data/ext/java/nokogiri/XmlEntityReference.java +75 -0
  28. data/ext/java/nokogiri/XmlNamespace.java +127 -0
  29. data/ext/java/nokogiri/XmlNode.java +1392 -0
  30. data/ext/java/nokogiri/XmlNodeSet.java +284 -0
  31. data/ext/java/nokogiri/XmlProcessingInstruction.java +103 -0
  32. data/ext/java/nokogiri/XmlReader.java +409 -0
  33. data/ext/java/nokogiri/XmlRelaxng.java +199 -0
  34. data/ext/java/nokogiri/XmlSaxParserContext.java +353 -0
  35. data/ext/java/nokogiri/XmlSaxPushParser.java +182 -0
  36. data/ext/java/nokogiri/XmlSchema.java +175 -0
  37. data/ext/java/nokogiri/XmlSyntaxError.java +114 -0
  38. data/ext/java/nokogiri/XmlText.java +135 -0
  39. data/ext/java/nokogiri/XmlXpathContext.java +175 -0
  40. data/ext/java/nokogiri/XsltStylesheet.java +181 -0
  41. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +205 -0
  42. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +73 -0
  43. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +80 -0
  44. data/ext/java/nokogiri/internals/NokogiriHandler.java +326 -0
  45. data/ext/java/nokogiri/internals/NokogiriHelpers.java +583 -0
  46. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +170 -0
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +118 -0
  48. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +73 -0
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  50. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
  51. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +120 -0
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +56 -0
  53. data/ext/java/nokogiri/internals/ParserContext.java +278 -0
  54. data/ext/java/nokogiri/internals/PushInputStream.java +411 -0
  55. data/ext/java/nokogiri/internals/ReaderNode.java +473 -0
  56. data/ext/java/nokogiri/internals/SaveContext.java +282 -0
  57. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +68 -0
  58. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  59. data/ext/java/nokogiri/internals/XmlDomParser.java +77 -0
  60. data/ext/java/nokogiri/internals/XmlDomParserContext.java +233 -0
  61. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  62. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
  63. data/ext/nokogiri/depend +358 -0
  64. data/ext/nokogiri/extconf.rb +124 -0
  65. data/ext/nokogiri/html_document.c +154 -0
  66. data/ext/nokogiri/html_document.h +10 -0
  67. data/ext/nokogiri/html_element_description.c +276 -0
  68. data/ext/nokogiri/html_element_description.h +10 -0
  69. data/ext/nokogiri/html_entity_lookup.c +32 -0
  70. data/ext/nokogiri/html_entity_lookup.h +8 -0
  71. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  72. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  73. data/ext/nokogiri/nokogiri.c +92 -0
  74. data/ext/nokogiri/nokogiri.h +160 -0
  75. data/ext/nokogiri/xml_attr.c +94 -0
  76. data/ext/nokogiri/xml_attr.h +9 -0
  77. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  78. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  79. data/ext/nokogiri/xml_cdata.c +56 -0
  80. data/ext/nokogiri/xml_cdata.h +9 -0
  81. data/ext/nokogiri/xml_comment.c +54 -0
  82. data/ext/nokogiri/xml_comment.h +9 -0
  83. data/ext/nokogiri/xml_document.c +478 -0
  84. data/ext/nokogiri/xml_document.h +23 -0
  85. data/ext/nokogiri/xml_document_fragment.c +48 -0
  86. data/ext/nokogiri/xml_document_fragment.h +10 -0
  87. data/ext/nokogiri/xml_dtd.c +202 -0
  88. data/ext/nokogiri/xml_dtd.h +10 -0
  89. data/ext/nokogiri/xml_element_content.c +123 -0
  90. data/ext/nokogiri/xml_element_content.h +10 -0
  91. data/ext/nokogiri/xml_element_decl.c +69 -0
  92. data/ext/nokogiri/xml_element_decl.h +9 -0
  93. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  94. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  95. data/ext/nokogiri/xml_entity_decl.c +110 -0
  96. data/ext/nokogiri/xml_entity_decl.h +10 -0
  97. data/ext/nokogiri/xml_entity_reference.c +52 -0
  98. data/ext/nokogiri/xml_entity_reference.h +9 -0
  99. data/ext/nokogiri/xml_io.c +31 -0
  100. data/ext/nokogiri/xml_io.h +11 -0
  101. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  102. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  103. data/ext/nokogiri/xml_namespace.c +84 -0
  104. data/ext/nokogiri/xml_namespace.h +13 -0
  105. data/ext/nokogiri/xml_node.c +1384 -0
  106. data/ext/nokogiri/xml_node.h +13 -0
  107. data/ext/nokogiri/xml_node_set.c +418 -0
  108. data/ext/nokogiri/xml_node_set.h +9 -0
  109. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  110. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  111. data/ext/nokogiri/xml_reader.c +684 -0
  112. data/ext/nokogiri/xml_reader.h +10 -0
  113. data/ext/nokogiri/xml_relax_ng.c +161 -0
  114. data/ext/nokogiri/xml_relax_ng.h +9 -0
  115. data/ext/nokogiri/xml_sax_parser.c +288 -0
  116. data/ext/nokogiri/xml_sax_parser.h +39 -0
  117. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  118. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  119. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  120. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  121. data/ext/nokogiri/xml_schema.c +205 -0
  122. data/ext/nokogiri/xml_schema.h +9 -0
  123. data/ext/nokogiri/xml_syntax_error.c +58 -0
  124. data/ext/nokogiri/xml_syntax_error.h +13 -0
  125. data/ext/nokogiri/xml_text.c +50 -0
  126. data/ext/nokogiri/xml_text.h +9 -0
  127. data/ext/nokogiri/xml_xpath_context.c +309 -0
  128. data/ext/nokogiri/xml_xpath_context.h +9 -0
  129. data/ext/nokogiri/xslt_stylesheet.c +258 -0
  130. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  131. data/lib/isorelax.jar +0 -0
  132. data/lib/jing.jar +0 -0
  133. data/lib/nekodtd.jar +0 -0
  134. data/lib/nekohtml.jar +0 -0
  135. data/lib/nokogiri.rb +143 -0
  136. data/lib/nokogiri/css.rb +23 -0
  137. data/lib/nokogiri/css/node.rb +99 -0
  138. data/lib/nokogiri/css/parser.rb +677 -0
  139. data/lib/nokogiri/css/parser.y +237 -0
  140. data/lib/nokogiri/css/parser_extras.rb +91 -0
  141. data/lib/nokogiri/css/syntax_error.rb +7 -0
  142. data/lib/nokogiri/css/tokenizer.rb +152 -0
  143. data/lib/nokogiri/css/tokenizer.rex +55 -0
  144. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  145. data/lib/nokogiri/decorators/slop.rb +35 -0
  146. data/lib/nokogiri/html.rb +36 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +221 -0
  149. data/lib/nokogiri/html/document_fragment.rb +41 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  152. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  153. data/lib/nokogiri/html/sax/parser.rb +52 -0
  154. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  155. data/lib/nokogiri/syntax_error.rb +4 -0
  156. data/lib/nokogiri/version.rb +35 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +418 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +218 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +84 -0
  165. data/lib/nokogiri/xml/dtd.rb +22 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  169. data/lib/nokogiri/xml/namespace.rb +13 -0
  170. data/lib/nokogiri/xml/node.rb +907 -0
  171. data/lib/nokogiri/xml/node/save_options.rb +45 -0
  172. data/lib/nokogiri/xml/node_set.rb +350 -0
  173. data/lib/nokogiri/xml/notation.rb +6 -0
  174. data/lib/nokogiri/xml/parse_options.rb +85 -0
  175. data/lib/nokogiri/xml/pp.rb +2 -0
  176. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  177. data/lib/nokogiri/xml/pp/node.rb +56 -0
  178. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  179. data/lib/nokogiri/xml/reader.rb +112 -0
  180. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  181. data/lib/nokogiri/xml/sax.rb +4 -0
  182. data/lib/nokogiri/xml/sax/document.rb +164 -0
  183. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  184. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  185. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  186. data/lib/nokogiri/xml/schema.rb +57 -0
  187. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  188. data/lib/nokogiri/xml/text.rb +9 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +52 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xercesImpl.jar +0 -0
  195. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  196. data/tasks/cross_compile.rb +177 -0
  197. data/tasks/test.rb +94 -0
  198. data/test/css/test_nthiness.rb +159 -0
  199. data/test/css/test_parser.rb +303 -0
  200. data/test/css/test_tokenizer.rb +198 -0
  201. data/test/css/test_xpath_visitor.rb +85 -0
  202. data/test/decorators/test_slop.rb +16 -0
  203. data/test/files/2ch.html +108 -0
  204. data/test/files/address_book.rlx +12 -0
  205. data/test/files/address_book.xml +10 -0
  206. data/test/files/bar/bar.xsd +4 -0
  207. data/test/files/dont_hurt_em_why.xml +422 -0
  208. data/test/files/exslt.xml +8 -0
  209. data/test/files/exslt.xslt +35 -0
  210. data/test/files/foo/foo.xsd +4 -0
  211. data/test/files/po.xml +32 -0
  212. data/test/files/po.xsd +66 -0
  213. data/test/files/shift_jis.html +10 -0
  214. data/test/files/shift_jis.xml +5 -0
  215. data/test/files/snuggles.xml +3 -0
  216. data/test/files/staff.dtd +10 -0
  217. data/test/files/staff.xml +59 -0
  218. data/test/files/staff.xslt +32 -0
  219. data/test/files/tlm.html +850 -0
  220. data/test/files/valid_bar.xml +2 -0
  221. data/test/helper.rb +171 -0
  222. data/test/html/sax/test_parser.rb +136 -0
  223. data/test/html/sax/test_parser_context.rb +48 -0
  224. data/test/html/test_builder.rb +164 -0
  225. data/test/html/test_document.rb +457 -0
  226. data/test/html/test_document_encoding.rb +123 -0
  227. data/test/html/test_document_fragment.rb +255 -0
  228. data/test/html/test_element_description.rb +100 -0
  229. data/test/html/test_named_characters.rb +14 -0
  230. data/test/html/test_node.rb +190 -0
  231. data/test/html/test_node_encoding.rb +27 -0
  232. data/test/test_convert_xpath.rb +135 -0
  233. data/test/test_css_cache.rb +45 -0
  234. data/test/test_encoding_handler.rb +46 -0
  235. data/test/test_memory_leak.rb +52 -0
  236. data/test/test_nokogiri.rb +132 -0
  237. data/test/test_reader.rb +403 -0
  238. data/test/test_soap4r_sax.rb +52 -0
  239. data/test/test_xslt_transforms.rb +189 -0
  240. data/test/xml/node/test_save_options.rb +20 -0
  241. data/test/xml/node/test_subclass.rb +44 -0
  242. data/test/xml/sax/test_parser.rb +338 -0
  243. data/test/xml/sax/test_parser_context.rb +113 -0
  244. data/test/xml/sax/test_push_parser.rb +156 -0
  245. data/test/xml/test_attr.rb +65 -0
  246. data/test/xml/test_attribute_decl.rb +86 -0
  247. data/test/xml/test_builder.rb +210 -0
  248. data/test/xml/test_cdata.rb +50 -0
  249. data/test/xml/test_comment.rb +29 -0
  250. data/test/xml/test_document.rb +675 -0
  251. data/test/xml/test_document_encoding.rb +26 -0
  252. data/test/xml/test_document_fragment.rb +192 -0
  253. data/test/xml/test_dtd.rb +107 -0
  254. data/test/xml/test_dtd_encoding.rb +33 -0
  255. data/test/xml/test_element_content.rb +56 -0
  256. data/test/xml/test_element_decl.rb +73 -0
  257. data/test/xml/test_entity_decl.rb +122 -0
  258. data/test/xml/test_entity_reference.rb +21 -0
  259. data/test/xml/test_namespace.rb +70 -0
  260. data/test/xml/test_node.rb +899 -0
  261. data/test/xml/test_node_attributes.rb +34 -0
  262. data/test/xml/test_node_encoding.rb +107 -0
  263. data/test/xml/test_node_reparenting.rb +321 -0
  264. data/test/xml/test_node_set.rb +708 -0
  265. data/test/xml/test_parse_options.rb +52 -0
  266. data/test/xml/test_processing_instruction.rb +30 -0
  267. data/test/xml/test_reader_encoding.rb +126 -0
  268. data/test/xml/test_relax_ng.rb +60 -0
  269. data/test/xml/test_schema.rb +89 -0
  270. data/test/xml/test_syntax_error.rb +12 -0
  271. data/test/xml/test_text.rb +47 -0
  272. data/test/xml/test_unparented_node.rb +381 -0
  273. data/test/xml/test_xpath.rb +237 -0
  274. data/test/xslt/test_custom_functions.rb +94 -0
  275. metadata +525 -0
@@ -0,0 +1,583 @@
1
+ /**
2
+ * (The MIT License)
3
+ *
4
+ * Copyright (c) 2008 - 2010:
5
+ *
6
+ * * {Aaron Patterson}[http://tenderlovemaking.com]
7
+ * * {Mike Dalessio}[http://mike.daless.io]
8
+ * * {Charles Nutter}[http://blog.headius.com]
9
+ * * {Sergio Arbeo}[http://www.serabe.com]
10
+ * * {Patrick Mahoney}[http://polycrystal.org]
11
+ * * {Yoko Harada}[http://yokolet.blogspot.com]
12
+ *
13
+ * Permission is hereby granted, free of charge, to any person obtaining
14
+ * a copy of this software and associated documentation files (the
15
+ * 'Software'), to deal in the Software without restriction, including
16
+ * without limitation the rights to use, copy, modify, merge, publish,
17
+ * distribute, sublicense, and/or sell copies of the Software, and to
18
+ * permit persons to whom the Software is furnished to do so, subject to
19
+ * the following conditions:
20
+ *
21
+ * The above copyright notice and this permission notice shall be
22
+ * included in all copies or substantial portions of the Software.
23
+ *
24
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
+ */
32
+
33
+ package nokogiri.internals;
34
+
35
+ import java.io.UnsupportedEncodingException;
36
+ import java.nio.ByteBuffer;
37
+ import java.nio.charset.Charset;
38
+
39
+ import nokogiri.NokogiriService;
40
+ import nokogiri.XmlAttr;
41
+ import nokogiri.XmlCdata;
42
+ import nokogiri.XmlComment;
43
+ import nokogiri.XmlDocument;
44
+ import nokogiri.XmlElement;
45
+ import nokogiri.XmlNamespace;
46
+ import nokogiri.XmlNode;
47
+ import nokogiri.XmlText;
48
+
49
+ import org.jruby.Ruby;
50
+ import org.jruby.RubyArray;
51
+ import org.jruby.RubyClass;
52
+ import org.jruby.RubyEncoding;
53
+ import org.jruby.RubyHash;
54
+ import org.jruby.RubyString;
55
+ import org.jruby.runtime.builtin.IRubyObject;
56
+ import org.jruby.util.ByteList;
57
+ import org.w3c.dom.Attr;
58
+ import org.w3c.dom.NamedNodeMap;
59
+ import org.w3c.dom.Node;
60
+ import org.w3c.dom.NodeList;
61
+
62
+ /**
63
+ * A class for various utility methods.
64
+ *
65
+ * @author serabe
66
+ */
67
+ public class NokogiriHelpers {
68
+ public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
69
+ public static final String VALID_ROOT_NODE = "NOKOGIRI_VALIDE_ROOT_NODE";
70
+
71
+ public static XmlNode getCachedNode(Node node) {
72
+ return (XmlNode) node.getUserData(CACHED_NODE);
73
+ }
74
+
75
+ /**
76
+ * Get the XmlNode associated with the underlying
77
+ * <code>node</code>. Creates a new XmlNode (or appropriate subclass)
78
+ * or XmlNamespace wrapping <code>node</code> if there is no cached
79
+ * value.
80
+ */
81
+ public static IRubyObject getCachedNodeOrCreate(Ruby ruby, Node node) {
82
+ if(node == null) return ruby.getNil();
83
+ if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
84
+ XmlDocument xmlDocument = (XmlDocument)node.getOwnerDocument().getUserData(CACHED_NODE);
85
+ String prefix = getLocalNameForNamespace(((Attr)node).getName());
86
+ prefix = prefix != null ? prefix : "";
87
+ String href = ((Attr)node).getValue();
88
+ XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
89
+ if (xmlNamespace == null) {
90
+ return xmlDocument.getNamespaceCache().put(ruby, prefix, ((Attr)node).getValue(), node, xmlDocument);
91
+ }
92
+ }
93
+ XmlNode xmlNode = getCachedNode(node);
94
+ if(xmlNode == null) {
95
+ xmlNode = (XmlNode)constructNode(ruby, node);
96
+ node.setUserData(CACHED_NODE, xmlNode, null);
97
+ }
98
+ return xmlNode;
99
+ }
100
+
101
+ /**
102
+ * Construct a new XmlNode wrapping <code>node</code>. The proper
103
+ * subclass of XmlNode is chosen based on the type of
104
+ * <code>node</code>.
105
+ */
106
+ public static IRubyObject constructNode(Ruby ruby, Node node) {
107
+ if (node == null) return ruby.getNil();
108
+ // this is slow; need a way to cache nokogiri classes/modules somewhere
109
+ switch (node.getNodeType()) {
110
+ case Node.ELEMENT_NODE:
111
+ XmlElement xmlElement = (XmlElement) getNokogiriClass(ruby, "Nokogiri::XML::Element").allocate();
112
+ xmlElement.setNode(ruby.getCurrentContext(), node);
113
+ return xmlElement;
114
+ case Node.ATTRIBUTE_NODE:
115
+ XmlAttr xmlAttr = (XmlAttr) getNokogiriClass(ruby, "Nokogiri::XML::Attr").allocate();
116
+ xmlAttr.setNode(ruby.getCurrentContext(), node);
117
+ return xmlAttr;
118
+ case Node.TEXT_NODE:
119
+ XmlText xmlText = (XmlText) getNokogiriClass(ruby, "Nokogiri::XML::Text").allocate();
120
+ xmlText.setNode(ruby.getCurrentContext(), node);
121
+ return xmlText;
122
+ case Node.COMMENT_NODE:
123
+ XmlComment xmlComment = (XmlComment) getNokogiriClass(ruby, "Nokogiri::XML::Comment").allocate();
124
+ xmlComment.setNode(ruby.getCurrentContext(), node);
125
+ return xmlComment;
126
+ case Node.ENTITY_NODE:
127
+ return new XmlNode(ruby, getNokogiriClass(ruby, "Nokogiri::XML::EntityDecl"), node);
128
+ case Node.CDATA_SECTION_NODE:
129
+ XmlCdata xmlCdata = (XmlCdata) getNokogiriClass(ruby, "Nokogiri::XML::CDATA").allocate();
130
+ xmlCdata.setNode(ruby.getCurrentContext(), node);
131
+ return xmlCdata;
132
+ case Node.DOCUMENT_NODE:
133
+ XmlDocument xmlDocument = (XmlDocument) getNokogiriClass(ruby, "Nokogiri::XML::Document").allocate();
134
+ xmlDocument.setNode(ruby.getCurrentContext(), node);
135
+ return xmlDocument;
136
+ default:
137
+ XmlNode xmlNode = (XmlNode) getNokogiriClass(ruby, "Nokogiri::XML::Node").allocate();
138
+ xmlNode.setNode(ruby.getCurrentContext(), node);
139
+ return xmlNode;
140
+ }
141
+ }
142
+
143
+ public static RubyClass getNokogiriClass(Ruby ruby, String name) {
144
+ RubyHash classCache = (RubyHash) ruby.getGlobalVariables().get(NokogiriService.nokogiriClassCacheGvarName);
145
+ IRubyObject rubyName = RubyString.newString(ruby, name);
146
+ return (RubyClass)classCache.fastARef(rubyName);
147
+ }
148
+
149
+ public static IRubyObject stringOrNil(Ruby runtime, String s) {
150
+ if (s == null) return runtime.getNil();
151
+ return RubyString.newString(runtime, s);
152
+ }
153
+
154
+ public static IRubyObject stringOrBlank(Ruby runtime, String s) {
155
+ if (s == null) return runtime.newString();
156
+ return RubyString.newString(runtime, s);
157
+ }
158
+
159
+ /**
160
+ * Convert <code>s</code> to a RubyString, or if s is null or
161
+ * empty return RubyNil.
162
+ */
163
+ public static IRubyObject nonEmptyStringOrNil(Ruby runtime, String s) {
164
+ if (s == null || s.length() == 0) return runtime.getNil();
165
+ return RubyString.newString(runtime, s);
166
+ }
167
+
168
+ /**
169
+ * Return the prefix of a qualified name like "prefix:local".
170
+ * Returns null if there is no prefix.
171
+ */
172
+ public static String getPrefix(String qName) {
173
+ if (qName == null) return null;
174
+
175
+ int pos = qName.indexOf(':');
176
+ if (pos > 0)
177
+ return qName.substring(0, pos);
178
+ else
179
+ return null;
180
+ }
181
+
182
+ /**
183
+ * Return the local part of a qualified name like "prefix:local".
184
+ * Returns <code>qName</code> if there is no prefix.
185
+ */
186
+ public static String getLocalPart(String qName) {
187
+ if (qName == null) return null;
188
+
189
+ int pos = qName.indexOf(':');
190
+ if (pos > 0)
191
+ return qName.substring(pos + 1);
192
+ else
193
+ return qName;
194
+ }
195
+
196
+ public static String getLocalNameForNamespace(String name) {
197
+ String localName = getLocalPart(name);
198
+ return ("xmlns".equals(localName)) ? null : localName;
199
+ }
200
+
201
+ private static Charset utf8 = null;
202
+
203
+ private static Charset getCharsetUTF8() {
204
+ if (utf8 == null) utf8 = Charset.forName("UTF-8");
205
+ return utf8;
206
+ }
207
+
208
+ /**
209
+ * Converts a RubyString in to a Java String. Assumes the
210
+ * RubyString is encoded as UTF-8. This is generally the case for
211
+ * RubyStrings created with getRuntime().newString("java string").
212
+ * It also seems to be the case for strings created within Ruby
213
+ * where $KCODE has not been set.
214
+ *
215
+ * Note that RubyString#toString() decodes the string data as
216
+ * ISO-8859-1 (See org.jruby.util.ByteList.java). This is not
217
+ * what you want if you have any multibyte characters in your
218
+ * UTF-8 string.
219
+ *
220
+ * FIXME: This really needs to be more robust in terms of
221
+ * detecting the encoding and properly converting to a Java
222
+ * String. It's unfortunate that RubyString#toString() doesn't do
223
+ * this for us.
224
+ */
225
+ public static String rubyStringToString(IRubyObject str) {
226
+ //return rubyStringToString(str.convertToString());
227
+ return toJavaString(str.convertToString());
228
+ }
229
+
230
+ private static String toJavaString(RubyString str) {
231
+ ByteList value = str.getByteList();
232
+ try {
233
+ if (str.getRuntime().is1_9()) {
234
+ return new String(value.getUnsafeBytes(), value.begin(), value.length(), str.getEncoding().toString());
235
+ }
236
+ return RubyEncoding.decodeUTF8(value.getUnsafeBytes(), value.begin(), value.length());
237
+ } catch (UnsupportedEncodingException uee) {
238
+ return str.toString();
239
+ }
240
+ }
241
+
242
+ public static String rubyStringToString(RubyString str) {
243
+ ByteList byteList = str.getByteList();
244
+ byte[] data = byteList.unsafeBytes();
245
+ int offset = byteList.begin();
246
+ int len = byteList.length();
247
+ ByteBuffer buf = ByteBuffer.wrap(data, offset, len);
248
+ return getCharsetUTF8().decode(buf).toString();
249
+ }
250
+
251
+ public static String getNodeCompletePath(Node node) {
252
+
253
+ Node cur, tmp, next;
254
+
255
+ // TODO: Rename buffer to path.
256
+ String buffer = "";
257
+ String sep;
258
+ String name;
259
+
260
+ int occur = 0;
261
+ boolean generic;
262
+
263
+ cur = node;
264
+
265
+ do {
266
+ name = "";
267
+ sep = "?";
268
+ occur = 0;
269
+ generic = false;
270
+
271
+ if(cur.getNodeType() == Node.DOCUMENT_NODE) {
272
+ if(buffer.startsWith("/")) break;
273
+
274
+ sep = "/";
275
+ next = null;
276
+ } else if(cur.getNodeType() == Node.ELEMENT_NODE) {
277
+ generic = false;
278
+ sep = "/";
279
+
280
+ name = cur.getLocalName();
281
+ if (name == null) name = cur.getNodeName();
282
+ if(cur.getNamespaceURI() != null) {
283
+ if(cur.getPrefix() != null) {
284
+ name = cur.getPrefix() + ":" + name;
285
+ } else {
286
+ generic = true;
287
+ name = "*";
288
+ }
289
+ }
290
+
291
+ next = cur.getParentNode();
292
+
293
+ /*
294
+ * Thumbler index computation
295
+ */
296
+
297
+ tmp = cur.getPreviousSibling();
298
+
299
+ while(tmp != null) {
300
+ if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
301
+ (generic || fullNamesMatch(tmp, cur))) {
302
+ occur++;
303
+ }
304
+ tmp = tmp.getPreviousSibling();
305
+ }
306
+
307
+ if(occur == 0) {
308
+ tmp = cur.getNextSibling();
309
+
310
+ while(tmp != null && occur == 0) {
311
+ if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
312
+ (generic || fullNamesMatch(tmp,cur))) {
313
+ occur++;
314
+ }
315
+ tmp = tmp.getNextSibling();
316
+ }
317
+
318
+ if(occur != 0) occur = 1;
319
+
320
+ } else {
321
+ occur++;
322
+ }
323
+ } else if(cur.getNodeType() == Node.COMMENT_NODE) {
324
+ sep = "/";
325
+ name = "comment()";
326
+ next = cur.getParentNode();
327
+
328
+ /*
329
+ * Thumbler index computation.
330
+ */
331
+
332
+ tmp = cur.getPreviousSibling();
333
+
334
+ while(tmp != null) {
335
+ if(tmp.getNodeType() == Node.COMMENT_NODE) {
336
+ occur++;
337
+ }
338
+ tmp = tmp.getPreviousSibling();
339
+ }
340
+
341
+ if(occur == 0) {
342
+ tmp = cur.getNextSibling();
343
+ while(tmp != null && occur == 0) {
344
+ if(tmp.getNodeType() == Node.COMMENT_NODE) {
345
+ occur++;
346
+ }
347
+ tmp = tmp.getNextSibling();
348
+ }
349
+ if(occur != 0) occur = 1;
350
+ } else {
351
+ occur = 1;
352
+ }
353
+
354
+ } else if(cur.getNodeType() == Node.TEXT_NODE ||
355
+ cur.getNodeType() == Node.CDATA_SECTION_NODE) {
356
+ // I'm here. gist:129
357
+ // http://gist.github.com/144923
358
+
359
+ sep = "/";
360
+ name = "text()";
361
+ next = cur.getParentNode();
362
+
363
+ /*
364
+ * Thumbler index computation.
365
+ */
366
+
367
+ tmp = cur.getPreviousSibling();
368
+ while(tmp != null) {
369
+ if(tmp.getNodeType() == Node.TEXT_NODE ||
370
+ tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
371
+ occur++;
372
+ }
373
+ tmp = tmp.getPreviousSibling();
374
+ }
375
+
376
+ if(occur == 0) {
377
+ tmp = cur.getNextSibling();
378
+
379
+ while(tmp != null && occur == 0) {
380
+ if(tmp.getNodeType() == Node.TEXT_NODE ||
381
+ tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
382
+ occur++;
383
+ }
384
+ tmp = tmp.getNextSibling();
385
+ }
386
+ } else {
387
+ occur++;
388
+ }
389
+
390
+ } else if(cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
391
+ sep = "/";
392
+ name = "processing-instruction('"+cur.getLocalName()+"')";
393
+ next = cur.getParentNode();
394
+
395
+ /*
396
+ * Thumbler index computation.
397
+ */
398
+
399
+ tmp = cur.getParentNode();
400
+
401
+ while(tmp != null) {
402
+ if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
403
+ tmp.getLocalName().equals(cur.getLocalName())) {
404
+ occur++;
405
+ }
406
+ tmp = tmp.getPreviousSibling();
407
+ }
408
+
409
+ if(occur == 0) {
410
+ tmp = cur.getNextSibling();
411
+
412
+ while(tmp != null && occur == 0) {
413
+ if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
414
+ tmp.getLocalName().equals(cur.getLocalName())){
415
+ occur++;
416
+ }
417
+ tmp = tmp.getNextSibling();
418
+ }
419
+
420
+ if(occur != 0) {
421
+ occur = 1;
422
+ }
423
+
424
+ } else {
425
+ occur++;
426
+ }
427
+
428
+ } else if(cur.getNodeType() == Node.ATTRIBUTE_NODE) {
429
+ sep = "/@";
430
+ name = cur.getLocalName();
431
+
432
+ if(cur.getNamespaceURI() != null) {
433
+ if(cur.getPrefix() != null) {
434
+ name = cur.getPrefix() + ":" + name;
435
+ }
436
+ }
437
+
438
+ next = ((Attr) cur).getOwnerElement();
439
+
440
+ } else {
441
+ next = cur.getParentNode();
442
+ }
443
+
444
+ if(occur == 0){
445
+ buffer = sep+name+buffer;
446
+ } else {
447
+ buffer = sep+name+"["+occur+"]"+buffer;
448
+ }
449
+
450
+ cur = next;
451
+
452
+ } while(cur != null);
453
+
454
+ return buffer;
455
+ }
456
+
457
+ protected static boolean compareTwoNodes(Node m, Node n) {
458
+ return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
459
+ nodesAreEqual(m.getPrefix(), n.getPrefix());
460
+ }
461
+
462
+ protected static boolean fullNamesMatch(Node a, Node b) {
463
+ return a.getNodeName().equals(b.getNodeName());
464
+ }
465
+
466
+ protected static String getFullName(Node n) {
467
+ String lname = n.getLocalName();
468
+ String prefix = n.getPrefix();
469
+ if (lname != null) {
470
+ if (prefix != null)
471
+ return prefix + ":" + lname;
472
+ else
473
+ return lname;
474
+ } else {
475
+ return n.getNodeName();
476
+ }
477
+ }
478
+
479
+ private static boolean nodesAreEqual(Object a, Object b) {
480
+ return (((a == null) && (a == null)) ||
481
+ (a != null) && (b != null) &&
482
+ (b.equals(a)));
483
+ }
484
+
485
+ public static String encodeJavaString(String s) {
486
+
487
+ // From entities.c
488
+ s = s.replaceAll("&", "&amp;");
489
+ s = s.replaceAll("<", "&lt;");
490
+ s = s.replaceAll(">", "&gt;");
491
+ // s = s.replaceAll("\"", "&quot;");
492
+ return s.replaceAll("\r", "&#13;");
493
+ }
494
+
495
+ public static String decodeJavaString(String s) {
496
+ s = s.replaceAll("&amp;", "&");
497
+ s = s.replaceAll("&lt;", "<");
498
+ s = s.replaceAll("&gt;", ">");
499
+ return s.replaceAll("&#13;", "\r");
500
+ }
501
+
502
+ public static boolean isXmlEscaped(String s) {
503
+ if (s == null) return true;
504
+ if (s.contains("<") || s.contains(">") || s.contains("\r")) return false;
505
+ if (s.contains("&") && !s.contains("&amp;")) return false;
506
+ return true;
507
+ }
508
+
509
+ public static String getNodeName(Node node) {
510
+ if(node == null) { System.out.println("node is null"); return ""; }
511
+ String name = node.getNodeName();
512
+ if(name == null) { System.out.println("name is null"); return ""; }
513
+ if(name.equals("#document")) {
514
+ return "document";
515
+ } else if(name.equals("#text")) {
516
+ return "text";
517
+ } else {
518
+ name = getLocalPart(name);
519
+ return (name == null) ? "" : name;
520
+ }
521
+ }
522
+
523
+ public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
524
+ public static boolean isNamespace(Node node) {
525
+ return (XMLNS_URI.equals(node.getNamespaceURI()) ||
526
+ isNamespace(node.getNodeName()));
527
+ }
528
+
529
+ public static boolean isNamespace(String nodeName) {
530
+ return (nodeName.equals("xmlns") || nodeName.startsWith("xmlns:"));
531
+ }
532
+
533
+ public static boolean isNonDefaultNamespace(Node node) {
534
+ return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
535
+ }
536
+
537
+ public static boolean isXmlBase(String attrName) {
538
+ return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
539
+ }
540
+
541
+ public static String newQName(String newPrefix, Node node) {
542
+ if(newPrefix == null) {
543
+ return node.getLocalName();
544
+ } else {
545
+ return newPrefix + ":" + node.getLocalName();
546
+ }
547
+ }
548
+
549
+ public static RubyArray nodeListToRubyArray(Ruby ruby, NodeList nodes) {
550
+ RubyArray array = RubyArray.newArray(ruby, nodes.getLength());
551
+ return nodeListToRubyArray(ruby, nodes, array);
552
+ }
553
+
554
+ public static RubyArray nodeListToRubyArray(Ruby ruby, NodeList nodes, RubyArray array) {
555
+ for(int i = 0; i < nodes.getLength(); i++) {
556
+ array.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.item(i)));
557
+ }
558
+ return array;
559
+ }
560
+
561
+ public static RubyArray nodeArrayToRubyArray(Ruby ruby, Node[] nodes) {
562
+ RubyArray n = RubyArray.newArray(ruby, nodes.length);
563
+ for(int i = 0; i < nodes.length; i++) {
564
+ n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
565
+ }
566
+ return n;
567
+ }
568
+
569
+ public static RubyArray namedNodeMapToRubyArray(Ruby ruby, NamedNodeMap map) {
570
+ RubyArray n = RubyArray.newArray(ruby, map.getLength());
571
+ for(int i = 0; i < map.getLength(); i++) {
572
+ n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, map.item(i)));
573
+ }
574
+ return n;
575
+ }
576
+
577
+ public static String guessEncoding(Ruby ruby) {
578
+ String name = null;
579
+ if (name == null) name = System.getProperty("file.encoding");
580
+ if (name == null) name = "UTF-8";
581
+ return name;
582
+ }
583
+ }