Nokogiri_precompiled_aarch64_dedshit 1.14.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +44 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/java/nokogiri/Html4Document.java +157 -0
  9. data/ext/java/nokogiri/Html4ElementDescription.java +133 -0
  10. data/ext/java/nokogiri/Html4EntityLookup.java +63 -0
  11. data/ext/java/nokogiri/Html4SaxParserContext.java +289 -0
  12. data/ext/java/nokogiri/Html4SaxPushParser.java +213 -0
  13. data/ext/java/nokogiri/NokogiriService.java +613 -0
  14. data/ext/java/nokogiri/XmlAttr.java +154 -0
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +119 -0
  16. data/ext/java/nokogiri/XmlCdata.java +60 -0
  17. data/ext/java/nokogiri/XmlComment.java +77 -0
  18. data/ext/java/nokogiri/XmlDocument.java +705 -0
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +163 -0
  20. data/ext/java/nokogiri/XmlDtd.java +516 -0
  21. data/ext/java/nokogiri/XmlElement.java +44 -0
  22. data/ext/java/nokogiri/XmlElementContent.java +412 -0
  23. data/ext/java/nokogiri/XmlElementDecl.java +148 -0
  24. data/ext/java/nokogiri/XmlEntityDecl.java +151 -0
  25. data/ext/java/nokogiri/XmlEntityReference.java +79 -0
  26. data/ext/java/nokogiri/XmlNamespace.java +193 -0
  27. data/ext/java/nokogiri/XmlNode.java +1938 -0
  28. data/ext/java/nokogiri/XmlNodeSet.java +463 -0
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +79 -0
  30. data/ext/java/nokogiri/XmlReader.java +615 -0
  31. data/ext/java/nokogiri/XmlRelaxng.java +133 -0
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +329 -0
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +288 -0
  34. data/ext/java/nokogiri/XmlSchema.java +423 -0
  35. data/ext/java/nokogiri/XmlSyntaxError.java +137 -0
  36. data/ext/java/nokogiri/XmlText.java +90 -0
  37. data/ext/java/nokogiri/XmlXpathContext.java +305 -0
  38. data/ext/java/nokogiri/XsltStylesheet.java +368 -0
  39. data/ext/java/nokogiri/internals/ClosedStreamException.java +13 -0
  40. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +252 -0
  41. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +27 -0
  42. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +178 -0
  43. data/ext/java/nokogiri/internals/NokogiriDomParser.java +99 -0
  44. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +140 -0
  45. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +65 -0
  46. data/ext/java/nokogiri/internals/NokogiriHandler.java +339 -0
  47. data/ext/java/nokogiri/internals/NokogiriHelpers.java +817 -0
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +228 -0
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +110 -0
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +86 -0
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +107 -0
  52. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +62 -0
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +165 -0
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +50 -0
  55. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +37 -0
  56. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +70 -0
  57. data/ext/java/nokogiri/internals/ParserContext.java +262 -0
  58. data/ext/java/nokogiri/internals/ReaderNode.java +564 -0
  59. data/ext/java/nokogiri/internals/SaveContextVisitor.java +865 -0
  60. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +50 -0
  61. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +174 -0
  62. data/ext/java/nokogiri/internals/XmlDeclHandler.java +11 -0
  63. data/ext/java/nokogiri/internals/XmlDomParserContext.java +265 -0
  64. data/ext/java/nokogiri/internals/XmlSaxParser.java +40 -0
  65. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +122 -0
  66. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +178 -0
  67. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +43 -0
  68. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +106 -0
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +278 -0
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +664 -0
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +45 -0
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +45 -0
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +388 -0
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +308 -0
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +47 -0
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +51 -0
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +51 -0
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +50 -0
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +660 -0
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +194 -0
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +77 -0
  82. data/ext/java/nokogiri/internals/c14n/Constants.java +45 -0
  83. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +325 -0
  84. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +106 -0
  85. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +86 -0
  86. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +181 -0
  87. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +87 -0
  88. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +452 -0
  89. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +52 -0
  90. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +190 -0
  91. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +540 -0
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1712 -0
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +737 -0
  94. data/ext/nokogiri/depend +38 -0
  95. data/ext/nokogiri/extconf.rb +1086 -0
  96. data/ext/nokogiri/gumbo.c +594 -0
  97. data/ext/nokogiri/html4_document.c +167 -0
  98. data/ext/nokogiri/html4_element_description.c +294 -0
  99. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  100. data/ext/nokogiri/html4_sax_parser_context.c +116 -0
  101. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  102. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  103. data/ext/nokogiri/nokogiri.c +265 -0
  104. data/ext/nokogiri/nokogiri.h +235 -0
  105. data/ext/nokogiri/test_global_handlers.c +42 -0
  106. data/ext/nokogiri/xml_attr.c +103 -0
  107. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  108. data/ext/nokogiri/xml_cdata.c +57 -0
  109. data/ext/nokogiri/xml_comment.c +62 -0
  110. data/ext/nokogiri/xml_document.c +689 -0
  111. data/ext/nokogiri/xml_document_fragment.c +44 -0
  112. data/ext/nokogiri/xml_dtd.c +210 -0
  113. data/ext/nokogiri/xml_element_content.c +128 -0
  114. data/ext/nokogiri/xml_element_decl.c +69 -0
  115. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  116. data/ext/nokogiri/xml_entity_decl.c +112 -0
  117. data/ext/nokogiri/xml_entity_reference.c +50 -0
  118. data/ext/nokogiri/xml_namespace.c +186 -0
  119. data/ext/nokogiri/xml_node.c +2426 -0
  120. data/ext/nokogiri/xml_node_set.c +496 -0
  121. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  122. data/ext/nokogiri/xml_reader.c +794 -0
  123. data/ext/nokogiri/xml_relax_ng.c +164 -0
  124. data/ext/nokogiri/xml_sax_parser.c +316 -0
  125. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  126. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  127. data/ext/nokogiri/xml_schema.c +260 -0
  128. data/ext/nokogiri/xml_syntax_error.c +85 -0
  129. data/ext/nokogiri/xml_text.c +48 -0
  130. data/ext/nokogiri/xml_xpath_context.c +415 -0
  131. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  132. data/gumbo-parser/CHANGES.md +63 -0
  133. data/gumbo-parser/Makefile +111 -0
  134. data/gumbo-parser/THANKS +27 -0
  135. data/gumbo-parser/src/Makefile +34 -0
  136. data/gumbo-parser/src/README.md +41 -0
  137. data/gumbo-parser/src/ascii.c +75 -0
  138. data/gumbo-parser/src/ascii.h +115 -0
  139. data/gumbo-parser/src/attribute.c +42 -0
  140. data/gumbo-parser/src/attribute.h +17 -0
  141. data/gumbo-parser/src/char_ref.c +22225 -0
  142. data/gumbo-parser/src/char_ref.h +29 -0
  143. data/gumbo-parser/src/char_ref.rl +2154 -0
  144. data/gumbo-parser/src/error.c +626 -0
  145. data/gumbo-parser/src/error.h +148 -0
  146. data/gumbo-parser/src/foreign_attrs.c +104 -0
  147. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  148. data/gumbo-parser/src/insertion_mode.h +33 -0
  149. data/gumbo-parser/src/macros.h +91 -0
  150. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  151. data/gumbo-parser/src/parser.c +4878 -0
  152. data/gumbo-parser/src/parser.h +41 -0
  153. data/gumbo-parser/src/replacement.h +33 -0
  154. data/gumbo-parser/src/string_buffer.c +103 -0
  155. data/gumbo-parser/src/string_buffer.h +68 -0
  156. data/gumbo-parser/src/string_piece.c +48 -0
  157. data/gumbo-parser/src/svg_attrs.c +174 -0
  158. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  159. data/gumbo-parser/src/svg_tags.c +137 -0
  160. data/gumbo-parser/src/svg_tags.gperf +55 -0
  161. data/gumbo-parser/src/tag.c +223 -0
  162. data/gumbo-parser/src/tag_lookup.c +382 -0
  163. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  164. data/gumbo-parser/src/tag_lookup.h +13 -0
  165. data/gumbo-parser/src/token_buffer.c +79 -0
  166. data/gumbo-parser/src/token_buffer.h +71 -0
  167. data/gumbo-parser/src/token_type.h +17 -0
  168. data/gumbo-parser/src/tokenizer.c +3463 -0
  169. data/gumbo-parser/src/tokenizer.h +112 -0
  170. data/gumbo-parser/src/tokenizer_states.h +339 -0
  171. data/gumbo-parser/src/utf8.c +245 -0
  172. data/gumbo-parser/src/utf8.h +164 -0
  173. data/gumbo-parser/src/util.c +66 -0
  174. data/gumbo-parser/src/util.h +34 -0
  175. data/gumbo-parser/src/vector.c +111 -0
  176. data/gumbo-parser/src/vector.h +45 -0
  177. data/lib/nokogiri/class_resolver.rb +67 -0
  178. data/lib/nokogiri/css/node.rb +54 -0
  179. data/lib/nokogiri/css/parser.rb +770 -0
  180. data/lib/nokogiri/css/parser.y +277 -0
  181. data/lib/nokogiri/css/parser_extras.rb +96 -0
  182. data/lib/nokogiri/css/syntax_error.rb +9 -0
  183. data/lib/nokogiri/css/tokenizer.rb +155 -0
  184. data/lib/nokogiri/css/tokenizer.rex +56 -0
  185. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  186. data/lib/nokogiri/css.rb +66 -0
  187. data/lib/nokogiri/decorators/slop.rb +44 -0
  188. data/lib/nokogiri/encoding_handler.rb +57 -0
  189. data/lib/nokogiri/extension.rb +32 -0
  190. data/lib/nokogiri/gumbo.rb +15 -0
  191. data/lib/nokogiri/html.rb +48 -0
  192. data/lib/nokogiri/html4/builder.rb +37 -0
  193. data/lib/nokogiri/html4/document.rb +214 -0
  194. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  195. data/lib/nokogiri/html4/element_description.rb +25 -0
  196. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  197. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  198. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  199. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  200. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  201. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  202. data/lib/nokogiri/html4.rb +47 -0
  203. data/lib/nokogiri/html5/document.rb +168 -0
  204. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  205. data/lib/nokogiri/html5/node.rb +98 -0
  206. data/lib/nokogiri/html5.rb +389 -0
  207. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  208. data/lib/nokogiri/jruby/isorelax/isorelax/20030108/isorelax-20030108.jar +0 -0
  209. data/lib/nokogiri/jruby/net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar +0 -0
  210. data/lib/nokogiri/jruby/net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar +0 -0
  211. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  212. data/lib/nokogiri/jruby/nu/validator/jing/20200702VNU/jing-20200702VNU.jar +0 -0
  213. data/lib/nokogiri/jruby/org/nokogiri/nekodtd/0.1.11.noko2/nekodtd-0.1.11.noko2.jar +0 -0
  214. data/lib/nokogiri/jruby/xalan/serializer/2.7.3/serializer-2.7.3.jar +0 -0
  215. data/lib/nokogiri/jruby/xalan/xalan/2.7.3/xalan-2.7.3.jar +0 -0
  216. data/lib/nokogiri/jruby/xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar +0 -0
  217. data/lib/nokogiri/jruby/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar +0 -0
  218. data/lib/nokogiri/syntax_error.rb +6 -0
  219. data/lib/nokogiri/version/constant.rb +6 -0
  220. data/lib/nokogiri/version/info.rb +223 -0
  221. data/lib/nokogiri/version.rb +4 -0
  222. data/lib/nokogiri/xml/attr.rb +66 -0
  223. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  224. data/lib/nokogiri/xml/builder.rb +487 -0
  225. data/lib/nokogiri/xml/cdata.rb +13 -0
  226. data/lib/nokogiri/xml/character_data.rb +9 -0
  227. data/lib/nokogiri/xml/document.rb +471 -0
  228. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  229. data/lib/nokogiri/xml/dtd.rb +34 -0
  230. data/lib/nokogiri/xml/element_content.rb +38 -0
  231. data/lib/nokogiri/xml/element_decl.rb +15 -0
  232. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  233. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  234. data/lib/nokogiri/xml/namespace.rb +58 -0
  235. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  236. data/lib/nokogiri/xml/node.rb +1563 -0
  237. data/lib/nokogiri/xml/node_set.rb +447 -0
  238. data/lib/nokogiri/xml/notation.rb +19 -0
  239. data/lib/nokogiri/xml/parse_options.rb +213 -0
  240. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  241. data/lib/nokogiri/xml/pp/node.rb +57 -0
  242. data/lib/nokogiri/xml/pp.rb +4 -0
  243. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  244. data/lib/nokogiri/xml/reader.rb +105 -0
  245. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  246. data/lib/nokogiri/xml/sax/document.rb +167 -0
  247. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  248. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  249. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  250. data/lib/nokogiri/xml/sax.rb +6 -0
  251. data/lib/nokogiri/xml/schema.rb +73 -0
  252. data/lib/nokogiri/xml/searchable.rb +270 -0
  253. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  254. data/lib/nokogiri/xml/text.rb +11 -0
  255. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  256. data/lib/nokogiri/xml/xpath.rb +21 -0
  257. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  258. data/lib/nokogiri/xml.rb +76 -0
  259. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  260. data/lib/nokogiri/xslt.rb +65 -0
  261. data/lib/nokogiri.rb +120 -0
  262. data/lib/xsd/xmlparser/nokogiri.rb +106 -0
  263. metadata +391 -0
@@ -0,0 +1,817 @@
1
+ package nokogiri.internals;
2
+
3
+ import java.io.ByteArrayInputStream;
4
+ import java.io.File;
5
+ import java.lang.reflect.InvocationTargetException;
6
+ import java.lang.reflect.Method;
7
+ import java.nio.ByteBuffer;
8
+ import java.nio.CharBuffer;
9
+ import java.nio.charset.Charset;
10
+ import java.util.List;
11
+ import java.util.Set;
12
+ import java.util.regex.Matcher;
13
+ import java.util.regex.Pattern;
14
+
15
+ import org.jruby.Ruby;
16
+ import org.jruby.RubyArray;
17
+ import org.jruby.RubyClass;
18
+ import org.jruby.RubyString;
19
+ import org.jruby.runtime.ThreadContext;
20
+ import org.jruby.runtime.builtin.IRubyObject;
21
+ import org.jruby.util.ByteList;
22
+ import org.w3c.dom.Attr;
23
+ import org.w3c.dom.DOMException;
24
+ import org.w3c.dom.Document;
25
+ import org.w3c.dom.Node;
26
+ import org.w3c.dom.NodeList;
27
+
28
+ import nokogiri.Html4Document;
29
+ import nokogiri.NokogiriService;
30
+ import nokogiri.XmlAttr;
31
+ import nokogiri.XmlCdata;
32
+ import nokogiri.XmlComment;
33
+ import nokogiri.XmlDocument;
34
+ import nokogiri.XmlDtd;
35
+ import nokogiri.XmlElement;
36
+ import nokogiri.XmlEntityReference;
37
+ import nokogiri.XmlNamespace;
38
+ import nokogiri.XmlNode;
39
+ import nokogiri.XmlProcessingInstruction;
40
+ import nokogiri.XmlText;
41
+ import nokogiri.XmlXpathContext;
42
+
43
+ /**
44
+ * A class for various utility methods.
45
+ *
46
+ * @author serabe
47
+ * @author Patrick Mahoney <pat@polycrystal.org>
48
+ * @author Yoko Harada <yokolet@gmail.com>
49
+ */
50
+ public class NokogiriHelpers
51
+ {
52
+ public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
53
+ public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
54
+ public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
55
+
56
+ public static XmlNode
57
+ getCachedNode(Node node)
58
+ {
59
+ return (XmlNode) node.getUserData(CACHED_NODE);
60
+ }
61
+
62
+ public static void
63
+ clearCachedNode(Node node)
64
+ {
65
+ node.setUserData(CACHED_NODE, null, null);
66
+ }
67
+
68
+ public static void
69
+ clearXpathContext(Node node)
70
+ {
71
+ if (node == null) { return; }
72
+
73
+ Node ownerDocument = node.getOwnerDocument();
74
+ if (ownerDocument == null) {
75
+ ownerDocument = node;
76
+ }
77
+ ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null);
78
+ }
79
+
80
+ /**
81
+ * Get the XmlNode associated with the underlying
82
+ * <code>node</code>. Creates a new XmlNode (or appropriate subclass)
83
+ * or XmlNamespace wrapping <code>node</code> if there is no cached
84
+ * value.
85
+ */
86
+ public static IRubyObject
87
+ getCachedNodeOrCreate(Ruby runtime, Node node)
88
+ {
89
+ if (node == null) { return runtime.getNil(); }
90
+ if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
91
+ XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
92
+ if (!(xmlDocument instanceof Html4Document)) {
93
+ String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
94
+ String href = ((Attr) node).getValue();
95
+ XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
96
+ if (xmlNamespace != null) { return xmlNamespace; }
97
+ return XmlNamespace.createFromAttr(runtime, (Attr) node);
98
+ }
99
+ }
100
+ XmlNode xmlNode = getCachedNode(node);
101
+ if (xmlNode == null) {
102
+ xmlNode = (XmlNode) constructNode(runtime, node);
103
+ node.setUserData(CACHED_NODE, xmlNode, null);
104
+ }
105
+ return xmlNode;
106
+ }
107
+
108
+ /**
109
+ * Construct a new XmlNode wrapping <code>node</code>. The proper
110
+ * subclass of XmlNode is chosen based on the type of
111
+ * <code>node</code>.
112
+ */
113
+ public static IRubyObject
114
+ constructNode(Ruby runtime, Node node)
115
+ {
116
+ if (node == null) { return runtime.getNil(); }
117
+ // this is slow; need a way to cache nokogiri classes/modules somewhere
118
+ switch (node.getNodeType()) {
119
+ case Node.ELEMENT_NODE:
120
+ XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
121
+ "Nokogiri::XML::Element"));
122
+ xmlElement.setNode(runtime, node);
123
+ return xmlElement;
124
+ case Node.ATTRIBUTE_NODE:
125
+ XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
126
+ "Nokogiri::XML::Attr"));
127
+ xmlAttr.setNode(runtime, node);
128
+ return xmlAttr;
129
+ case Node.TEXT_NODE:
130
+ XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
131
+ "Nokogiri::XML::Text"));
132
+ xmlText.setNode(runtime, node);
133
+ return xmlText;
134
+ case Node.COMMENT_NODE:
135
+ XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
136
+ "Nokogiri::XML::Comment"));
137
+ xmlComment.setNode(runtime, node);
138
+ return xmlComment;
139
+ case Node.ENTITY_NODE:
140
+ return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
141
+ case Node.ENTITY_REFERENCE_NODE:
142
+ XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime,
143
+ getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
144
+ xmlEntityRef.setNode(runtime, node);
145
+ return xmlEntityRef;
146
+ case Node.PROCESSING_INSTRUCTION_NODE:
147
+ XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction)
148
+ NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
149
+ "Nokogiri::XML::ProcessingInstruction"));
150
+ xmlProcessingInstruction.setNode(runtime, node);
151
+ return xmlProcessingInstruction;
152
+ case Node.CDATA_SECTION_NODE:
153
+ XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
154
+ "Nokogiri::XML::CDATA"));
155
+ xmlCdata.setNode(runtime, node);
156
+ return xmlCdata;
157
+ case Node.DOCUMENT_NODE:
158
+ XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime,
159
+ getNokogiriClass(runtime, "Nokogiri::XML::Document"));
160
+ xmlDocument.setDocumentNode(runtime, (Document) node);
161
+ return xmlDocument;
162
+ case Node.DOCUMENT_TYPE_NODE:
163
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
164
+ "Nokogiri::XML::DTD"));
165
+ xmlDtd.setNode(runtime, node);
166
+ return xmlDtd;
167
+ default:
168
+ XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
169
+ "Nokogiri::XML::Node"));
170
+ xmlNode.setNode(runtime, node);
171
+ return xmlNode;
172
+ }
173
+ }
174
+
175
+ public static RubyClass
176
+ getNokogiriClass(Ruby ruby, String name)
177
+ {
178
+ return NokogiriService.getNokogiriClassCache(ruby).get(name);
179
+ }
180
+
181
+ public static IRubyObject
182
+ stringOrNil(Ruby runtime, String str)
183
+ {
184
+ return str == null ? runtime.getNil() : convertString(runtime, str);
185
+ }
186
+
187
+ public static IRubyObject
188
+ stringOrNil(Ruby runtime, CharSequence str)
189
+ {
190
+ return str == null ? runtime.getNil() : convertString(runtime, str);
191
+ }
192
+
193
+ public static IRubyObject
194
+ stringOrNil(Ruby runtime, byte[] bytes)
195
+ {
196
+ return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
197
+ }
198
+
199
+ public static IRubyObject
200
+ stringOrBlank(Ruby runtime, String str)
201
+ {
202
+ return str == null ? runtime.newString() : convertString(runtime, str);
203
+ }
204
+
205
+ public static RubyString
206
+ convertString(Ruby runtime, String str)
207
+ {
208
+ return RubyString.newUTF8String(runtime, str);
209
+ }
210
+
211
+ public static RubyString
212
+ convertString(Ruby runtime, CharSequence str)
213
+ {
214
+ return RubyString.newUTF8String(runtime, str);
215
+ }
216
+
217
+ /**
218
+ * Convert <code>s</code> to a RubyString, or if s is null or
219
+ * empty return RubyNil.
220
+ */
221
+ public static IRubyObject
222
+ nonEmptyStringOrNil(Ruby runtime, String s)
223
+ {
224
+ if (s == null || s.length() == 0) { return runtime.getNil(); }
225
+ return RubyString.newString(runtime, s);
226
+ }
227
+
228
+ /**
229
+ * Return the prefix of a qualified name like "prefix:local".
230
+ * Returns null if there is no prefix.
231
+ */
232
+ public static String
233
+ getPrefix(String qName)
234
+ {
235
+ if (qName == null) { return null; }
236
+
237
+ final int pos = qName.indexOf(':');
238
+ return pos > 0 ? qName.substring(0, pos) : null;
239
+ }
240
+
241
+ /**
242
+ * Return the local part of a qualified name like "prefix:local".
243
+ * Returns <code>qName</code> if there is no prefix.
244
+ */
245
+ public static String
246
+ getLocalPart(String qName)
247
+ {
248
+ if (qName == null) { return null; }
249
+
250
+ final int pos = qName.indexOf(':');
251
+ return pos > 0 ? qName.substring(pos + 1) : qName;
252
+ }
253
+
254
+ public static String
255
+ getLocalNameForNamespace(String name, String defValue)
256
+ {
257
+ String localName = getLocalPart(name);
258
+ return ("xmlns".equals(localName)) ? defValue : localName;
259
+ }
260
+
261
+ public static String
262
+ rubyStringToString(IRubyObject str)
263
+ {
264
+ if (str.isNil()) { return null; }
265
+ return str.convertToString().decodeString();
266
+ }
267
+
268
+ public static String
269
+ rubyStringToString(RubyString str)
270
+ {
271
+ return str.decodeString(); // if encoding UTF-8 will decode UTF-8
272
+ }
273
+
274
+ public static ByteArrayInputStream
275
+ stringBytesToStream(final IRubyObject str)
276
+ {
277
+ if (str instanceof RubyString || str.respondsTo("to_str")) {
278
+ final ByteList bytes = str.convertToString().getByteList();
279
+ return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
280
+ }
281
+ return null;
282
+ }
283
+
284
+ public static String
285
+ getNodeCompletePath(Node node)
286
+ {
287
+
288
+ Node cur, tmp, next;
289
+
290
+ String buffer = "";
291
+
292
+ cur = node;
293
+
294
+ do {
295
+ String name = "";
296
+ String sep = "?";
297
+ int occur = 0;
298
+ boolean generic = false;
299
+
300
+ if (cur.getNodeType() == Node.DOCUMENT_NODE) {
301
+ if (buffer.startsWith("/")) { break; }
302
+
303
+ sep = "/";
304
+ next = null;
305
+ } else if (cur.getNodeType() == Node.ELEMENT_NODE) {
306
+ generic = false;
307
+ sep = "/";
308
+
309
+ name = cur.getLocalName();
310
+ if (name == null) { name = cur.getNodeName(); }
311
+ if (cur.getNamespaceURI() != null) {
312
+ if (cur.getPrefix() != null) {
313
+ name = cur.getPrefix() + ":" + name;
314
+ } else {
315
+ generic = true;
316
+ name = "*";
317
+ }
318
+ }
319
+
320
+ next = cur.getParentNode();
321
+
322
+ /*
323
+ * Thumbler index computation
324
+ */
325
+
326
+ tmp = cur.getPreviousSibling();
327
+
328
+ while (tmp != null) {
329
+ if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
330
+ (generic || fullNamesMatch(tmp, cur))) {
331
+ occur++;
332
+ }
333
+ tmp = tmp.getPreviousSibling();
334
+ }
335
+
336
+ if (occur == 0) {
337
+ tmp = cur.getNextSibling();
338
+
339
+ while (tmp != null && occur == 0) {
340
+ if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
341
+ (generic || fullNamesMatch(tmp, cur))) {
342
+ occur++;
343
+ }
344
+ tmp = tmp.getNextSibling();
345
+ }
346
+
347
+ if (occur != 0) { occur = 1; }
348
+
349
+ } else {
350
+ occur++;
351
+ }
352
+ } else if (cur.getNodeType() == Node.COMMENT_NODE) {
353
+ sep = "/";
354
+ name = "comment()";
355
+ next = cur.getParentNode();
356
+
357
+ /*
358
+ * Thumbler index computation.
359
+ */
360
+
361
+ tmp = cur.getPreviousSibling();
362
+
363
+ while (tmp != null) {
364
+ if (tmp.getNodeType() == Node.COMMENT_NODE) {
365
+ occur++;
366
+ }
367
+ tmp = tmp.getPreviousSibling();
368
+ }
369
+
370
+ if (occur == 0) {
371
+ tmp = cur.getNextSibling();
372
+ while (tmp != null && occur == 0) {
373
+ if (tmp.getNodeType() == Node.COMMENT_NODE) {
374
+ occur++;
375
+ }
376
+ tmp = tmp.getNextSibling();
377
+ }
378
+ if (occur != 0) { occur = 1; }
379
+ } else {
380
+ occur = 1;
381
+ }
382
+
383
+ } else if (cur.getNodeType() == Node.TEXT_NODE ||
384
+ cur.getNodeType() == Node.CDATA_SECTION_NODE) {
385
+ // I'm here. gist:129
386
+ // http://gist.github.com/144923
387
+
388
+ sep = "/";
389
+ name = "text()";
390
+ next = cur.getParentNode();
391
+
392
+ /*
393
+ * Thumbler index computation.
394
+ */
395
+
396
+ tmp = cur.getPreviousSibling();
397
+ while (tmp != null) {
398
+ if (tmp.getNodeType() == Node.TEXT_NODE ||
399
+ tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
400
+ occur++;
401
+ }
402
+ tmp = tmp.getPreviousSibling();
403
+ }
404
+
405
+ if (occur == 0) {
406
+ tmp = cur.getNextSibling();
407
+
408
+ while (tmp != null && occur == 0) {
409
+ if (tmp.getNodeType() == Node.TEXT_NODE ||
410
+ tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
411
+ occur++;
412
+ }
413
+ tmp = tmp.getNextSibling();
414
+ }
415
+ } else {
416
+ occur++;
417
+ }
418
+
419
+ } else if (cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
420
+ sep = "/";
421
+ name = "processing-instruction('" + cur.getLocalName() + "')";
422
+ next = cur.getParentNode();
423
+
424
+ /*
425
+ * Thumbler index computation.
426
+ */
427
+
428
+ tmp = cur.getParentNode();
429
+
430
+ while (tmp != null) {
431
+ if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
432
+ tmp.getLocalName().equals(cur.getLocalName())) {
433
+ occur++;
434
+ }
435
+ tmp = tmp.getPreviousSibling();
436
+ }
437
+
438
+ if (occur == 0) {
439
+ tmp = cur.getNextSibling();
440
+
441
+ while (tmp != null && occur == 0) {
442
+ if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
443
+ tmp.getLocalName().equals(cur.getLocalName())) {
444
+ occur++;
445
+ }
446
+ tmp = tmp.getNextSibling();
447
+ }
448
+
449
+ if (occur != 0) {
450
+ occur = 1;
451
+ }
452
+
453
+ } else {
454
+ occur++;
455
+ }
456
+
457
+ } else if (cur.getNodeType() == Node.ATTRIBUTE_NODE) {
458
+ sep = "/@";
459
+ name = cur.getLocalName();
460
+
461
+ if (cur.getNamespaceURI() != null) {
462
+ if (cur.getPrefix() != null) {
463
+ name = cur.getPrefix() + ":" + name;
464
+ }
465
+ }
466
+
467
+ next = ((Attr) cur).getOwnerElement();
468
+
469
+ } else {
470
+ next = cur.getParentNode();
471
+ }
472
+
473
+ if (occur == 0) {
474
+ buffer = sep + name + buffer;
475
+ } else {
476
+ buffer = sep + name + "[" + occur + "]" + buffer;
477
+ }
478
+
479
+ cur = next;
480
+
481
+ } while (cur != null);
482
+
483
+ return buffer;
484
+ }
485
+
486
+ static boolean
487
+ compareTwoNodes(Node m, Node n)
488
+ {
489
+ return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
490
+ nodesAreEqual(m.getPrefix(), n.getPrefix());
491
+ }
492
+
493
+ private static boolean
494
+ nodesAreEqual(Object a, Object b)
495
+ {
496
+ return (((a == null) && (b == null)) ||
497
+ ((a != null) && (b != null) && (b.equals(a))));
498
+ }
499
+
500
+ private static boolean
501
+ fullNamesMatch(Node a, Node b)
502
+ {
503
+ return a.getNodeName().equals(b.getNodeName());
504
+ }
505
+
506
+ private static final Pattern encoded_pattern = Pattern.compile("&amp;|&gt;|&lt;|&#13;");
507
+ private static final String[] encoded = {"&amp;", "&gt;", "&lt;", "&#13;"};
508
+ private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
509
+ private static final String[] decoded = {"&", ">", "<", "\r"};
510
+
511
+ private static StringBuffer
512
+ convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars)
513
+ {
514
+ Matcher matcher = ptn.matcher(input);
515
+ boolean result = matcher.find();
516
+ StringBuffer sb = new StringBuffer(input.length() + 8);
517
+ while (result) {
518
+ String matched = matcher.group();
519
+ String replacement = "";
520
+ for (int i = 0; i < oldChars.length; i++) {
521
+ if (matched.contains(oldChars[i])) {
522
+ replacement = matched.replace(oldChars[i], newChars[i]);
523
+ break;
524
+ }
525
+ }
526
+ matcher.appendReplacement(sb, replacement);
527
+ result = matcher.find();
528
+ }
529
+ matcher.appendTail(sb);
530
+ return sb;
531
+ }
532
+
533
+ public static CharSequence
534
+ encodeJavaString(CharSequence str)
535
+ {
536
+ return convert(decoded_pattern, str, decoded, encoded);
537
+ }
538
+
539
+ public static CharSequence
540
+ decodeJavaString(CharSequence str)
541
+ {
542
+ return convert(encoded_pattern, str, encoded, decoded);
543
+ }
544
+
545
+ public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
546
+ public static boolean
547
+ isNamespace(Node node)
548
+ {
549
+ return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
550
+ }
551
+
552
+ public static boolean
553
+ isNamespace(String nodeName)
554
+ {
555
+ return (nodeName.startsWith("xmlns"));
556
+ }
557
+
558
+ public static boolean
559
+ isNonDefaultNamespace(Node node)
560
+ {
561
+ return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
562
+ }
563
+
564
+ public static boolean
565
+ isXmlBase(String attrName)
566
+ {
567
+ return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
568
+ }
569
+
570
+ public static boolean
571
+ isBlank(IRubyObject obj)
572
+ {
573
+ if (!(obj instanceof XmlText)) { return false; }
574
+
575
+ CharSequence content = ((XmlNode) obj).getContentImpl();
576
+ return content == null || isBlank(content);
577
+ }
578
+
579
+ public static boolean
580
+ isBlank(CharSequence str)
581
+ {
582
+ int len = str.length();
583
+ int beg = 0;
584
+ while ((beg < len) && (str.charAt(beg) <= ' ')) { beg++; }
585
+ return beg == len;
586
+ }
587
+
588
+ public static boolean
589
+ isBlank(String str)
590
+ {
591
+ return str.isEmpty() || isBlank((CharSequence) str);
592
+ }
593
+
594
+ public static boolean
595
+ isNullOrEmpty(String str)
596
+ {
597
+ return str == null || str.isEmpty();
598
+ }
599
+
600
+ public static CharSequence
601
+ canonicalizeWhitespace(CharSequence str)
602
+ {
603
+ final int len = str.length();
604
+ StringBuilder sb = new StringBuilder(len);
605
+ boolean newline_added = false;
606
+ for (int i = 0; i < len; i++) {
607
+ char c = str.charAt(i);
608
+ if (c == '\n') {
609
+ if (! newline_added) {
610
+ sb.append(c);
611
+ newline_added = true;
612
+ }
613
+ } else {
614
+ sb.append(c);
615
+ }
616
+ }
617
+ return sb;
618
+ }
619
+
620
+ public static String
621
+ newQName(String newPrefix, Node node)
622
+ {
623
+ String tagName = getLocalPart(node.getNodeName());
624
+ if (newPrefix == null) { return tagName; }
625
+ return newPrefix + ':' + tagName;
626
+ }
627
+
628
+ public static IRubyObject[]
629
+ nodeListToRubyArray(Ruby runtime, NodeList nodes)
630
+ {
631
+ IRubyObject[] array = new IRubyObject[nodes.getLength()];
632
+ for (int i = 0; i < nodes.getLength(); i++) {
633
+ array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
634
+ }
635
+ return array;
636
+ }
637
+
638
+ public static IRubyObject[]
639
+ nodeListToArray(Ruby ruby, List<Node> nodes)
640
+ {
641
+ IRubyObject[] result = new IRubyObject[nodes.size()];
642
+ for (int i = 0; i < result.length; i++) {
643
+ result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
644
+ }
645
+ return result;
646
+ }
647
+
648
+ public static RubyArray<?>
649
+ nodeArrayToRubyArray(Ruby ruby, Node[] nodes)
650
+ {
651
+ RubyArray<?> n = RubyArray.newArray(ruby, nodes.length);
652
+ for (int i = 0; i < nodes.length; i++) {
653
+ n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
654
+ }
655
+ return n;
656
+ }
657
+
658
+ public static String
659
+ getValidEncodingOrNull(IRubyObject encoding)
660
+ {
661
+ if (encoding.isNil()) { return null; } // charsetNames does not like contains(null)
662
+ String enc = rubyStringToString(encoding.convertToString());
663
+ if (CharsetNames.contains(enc)) { return enc; }
664
+ return null;
665
+ }
666
+
667
+ public static String
668
+ getValidEncoding(IRubyObject encoding)
669
+ {
670
+ String validEncoding = getValidEncodingOrNull(encoding);
671
+ if (validEncoding != null) { return validEncoding; }
672
+ return Charset.defaultCharset().name();
673
+ }
674
+
675
+ private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
676
+
677
+ public static String
678
+ adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId)
679
+ {
680
+ if (systemId == null) { return systemId; }
681
+ File file = new File(systemId);
682
+ if (file.isAbsolute()) { return systemId; }
683
+ String path = resolveSystemId(baseURI, systemId);
684
+ if (path != null) { return path; }
685
+ path = resolveSystemId(currentDir, systemId);
686
+ if (path != null) { return path; }
687
+ return resolveSystemId(scriptFileName, systemId);
688
+ }
689
+
690
+ private static String
691
+ resolveSystemId(String baseName, String systemId)
692
+ {
693
+ if (baseName == null || baseName.length() < 1) { return null; }
694
+ String parentName;
695
+ baseName = baseName.replace("%20", " ");
696
+ File base = new File(baseName);
697
+ if (base.isDirectory()) { parentName = baseName; }
698
+ else { parentName = base.getParent(); }
699
+ if (parentName == null) { return null; }
700
+ if (parentName.toLowerCase().startsWith("file:")) { parentName = parentName.substring("file:".length()); }
701
+ File dtdFile = new File(parentName + "/" + systemId);
702
+ if (dtdFile.exists()) { return dtdFile.getPath(); }
703
+ return null;
704
+ }
705
+
706
+ private static final Charset UTF8 = Charset.forName("UTF-8");
707
+
708
+ public static boolean
709
+ isUTF8(String encoding)
710
+ {
711
+ if (encoding == null) { return true; } // no need to convert encoding
712
+
713
+ if ("UTF-8".equals(encoding)) { return true; }
714
+ return UTF8.aliases().contains(encoding);
715
+ }
716
+
717
+ public static ByteBuffer
718
+ convertEncoding(Charset output_charset, CharSequence input_string)
719
+ {
720
+ return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters
721
+ }
722
+
723
+ public static CharSequence
724
+ convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str)
725
+ {
726
+ if (!(doc instanceof Html4Document)) { return str; }
727
+ String parsed_encoding = ((Html4Document)doc).getPraedEncoding();
728
+ if (parsed_encoding == null) { return str; }
729
+ String ruby_encoding = rubyStringToString(doc.getEncoding());
730
+ if (ruby_encoding == null) { return str; }
731
+ Charset encoding = Charset.forName(ruby_encoding);
732
+ if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) { return str; }
733
+ if (str.length() == 0) { return str; } // no need to convert
734
+ return NokogiriHelpers.nkf(context, encoding, str);
735
+ }
736
+
737
+ private static final ByteList _Sw = new ByteList(new byte[] { '-', 'S', 'w' }, false);
738
+ private static final ByteList _Jw = new ByteList(new byte[] { '-', 'J', 'w' }, false);
739
+ private static final ByteList _Ew = new ByteList(new byte[] { '-', 'E', 'w' }, false);
740
+ private static final ByteList _Ww = new ByteList(new byte[] { '-', 'W', 'w' }, false);
741
+
742
+ // This method is used from HTML documents. HTML meta tag with encoding specification
743
+ // might appear after non-ascii characters are used. For example, a title tag before
744
+ // a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
745
+ // Nokogiri uses NKF library to convert characters correct encoding. This means the method
746
+ // works only for JIS/Shift_JIS/EUC-JP.
747
+ private static CharSequence
748
+ nkf(ThreadContext context, Charset encoding, CharSequence str)
749
+ {
750
+ final Ruby runtime = context.getRuntime();
751
+ final ByteList opt;
752
+ if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) { opt = _Sw; }
753
+ else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) { opt = _Jw; }
754
+ else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) { opt = _Ew; }
755
+ else { opt = _Ww; } // should not come here. should be treated before this method.
756
+
757
+ Class<?> nkfClass;
758
+ try {
759
+ nkfClass = Ruby.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
760
+ } catch (ClassNotFoundException e1) {
761
+ return str;
762
+ }
763
+ Method nkf_method;
764
+ try {
765
+ nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
766
+ RubyString r_str =
767
+ (RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
768
+ return NokogiriHelpers.rubyStringToString(r_str);
769
+ } catch (SecurityException e) {
770
+ return str;
771
+ } catch (NoSuchMethodException e) {
772
+ return str;
773
+ } catch (IllegalArgumentException e) {
774
+ return str;
775
+ } catch (IllegalAccessException e) {
776
+ return str;
777
+ } catch (InvocationTargetException e) {
778
+ return str;
779
+ }
780
+ }
781
+
782
+ private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
783
+ private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
784
+ private static final Charset EUC_JP = Charset.forName("EUC-JP");
785
+
786
+ public static boolean
787
+ shouldEncode(Node text)
788
+ {
789
+ final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
790
+ return encoded == null || ! encoded;
791
+ }
792
+
793
+ public static boolean
794
+ shouldDecode(Node text)
795
+ {
796
+ return !shouldEncode(text);
797
+ }
798
+
799
+ public static NokogiriNamespaceCache
800
+ getNamespaceCache(Node node)
801
+ {
802
+ XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
803
+ return xmlDoc.getNamespaceCache();
804
+ }
805
+
806
+ public static Node
807
+ renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException
808
+ {
809
+ Document doc = node.getOwnerDocument();
810
+ NokogiriNamespaceCache nsCache = getNamespaceCache(node);
811
+ Node result = doc.renameNode(node, namespaceURI, qualifiedName);
812
+ if (result != node) {
813
+ nsCache.replaceNode(node, result);
814
+ }
815
+ return result;
816
+ }
817
+ }