nokogiri 1.11.1-java → 1.11.6-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +12 -12
  3. data/LICENSE.md +1 -1
  4. data/README.md +21 -16
  5. data/dependencies.yml +12 -12
  6. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  7. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  8. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  12. data/ext/java/nokogiri/NokogiriService.java +595 -556
  13. data/ext/java/nokogiri/XmlAttr.java +118 -126
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  15. data/ext/java/nokogiri/XmlCdata.java +35 -58
  16. data/ext/java/nokogiri/XmlComment.java +46 -67
  17. data/ext/java/nokogiri/XmlDocument.java +645 -572
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  19. data/ext/java/nokogiri/XmlDtd.java +448 -414
  20. data/ext/java/nokogiri/XmlElement.java +23 -48
  21. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  22. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  23. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  24. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  25. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1620
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  29. data/ext/java/nokogiri/XmlReader.java +513 -450
  30. data/ext/java/nokogiri/XmlRelaxng.java +85 -104
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -315
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  33. data/ext/java/nokogiri/XmlSchema.java +328 -295
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -115
  35. data/ext/java/nokogiri/XmlText.java +55 -76
  36. data/ext/java/nokogiri/XmlXpathContext.java +240 -238
  37. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -202
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -98
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +116 -131
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -56
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  93. data/ext/nokogiri/depend +34 -474
  94. data/ext/nokogiri/extconf.rb +253 -183
  95. data/ext/nokogiri/html_document.c +10 -15
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +66 -65
  99. data/ext/nokogiri/html_sax_push_parser.c +29 -27
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +190 -63
  102. data/ext/nokogiri/test_global_handlers.c +3 -4
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +246 -188
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +385 -326
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +191 -157
  120. data/ext/nokogiri/xml_relax_ng.c +29 -23
  121. data/ext/nokogiri/xml_sax_parser.c +117 -112
  122. data/ext/nokogiri/xml_sax_parser_context.c +100 -85
  123. data/ext/nokogiri/xml_sax_push_parser.c +34 -27
  124. data/ext/nokogiri/xml_schema.c +48 -42
  125. data/ext/nokogiri/xml_syntax_error.c +21 -23
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +134 -127
  128. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  129. data/lib/nokogiri.rb +1 -22
  130. data/lib/nokogiri/css/parser.rb +1 -1
  131. data/lib/nokogiri/extension.rb +26 -0
  132. data/lib/nokogiri/html/document_fragment.rb +15 -15
  133. data/lib/nokogiri/nokogiri.jar +0 -0
  134. data/lib/nokogiri/version/constant.rb +1 -1
  135. data/lib/nokogiri/version/info.rb +32 -8
  136. data/lib/nokogiri/xml/document.rb +74 -28
  137. data/lib/nokogiri/xml/node.rb +39 -42
  138. data/lib/nokogiri/xml/reader.rb +2 -9
  139. data/lib/nokogiri/xml/xpath.rb +1 -3
  140. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  141. metadata +7 -8
  142. data/ext/nokogiri/xml_io.c +0 -63
  143. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2014:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri.internals;
34
2
 
35
3
  import java.io.ByteArrayInputStream;
@@ -74,661 +42,777 @@ import nokogiri.XmlXpathContext;
74
42
 
75
43
  /**
76
44
  * A class for various utility methods.
77
- *
45
+ *
78
46
  * @author serabe
79
47
  * @author Patrick Mahoney <pat@polycrystal.org>
80
48
  * @author Yoko Harada <yokolet@gmail.com>
81
49
  */
82
- public class NokogiriHelpers {
83
- public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
84
- public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
85
- public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
86
-
87
- public static XmlNode getCachedNode(Node node) {
88
- return (XmlNode) node.getUserData(CACHED_NODE);
89
- }
90
-
91
- public static void clearCachedNode(Node node) {
92
- node.setUserData(CACHED_NODE, null, null);
93
- }
94
-
95
- public static void clearXpathContext(Node node) {
96
- if (node == null) return;
97
-
98
- Node ownerDocument = node.getOwnerDocument();
99
- if (ownerDocument == null) {
100
- ownerDocument = node;
101
- }
102
- ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null);
103
- }
104
-
105
- /**
106
- * Get the XmlNode associated with the underlying
107
- * <code>node</code>. Creates a new XmlNode (or appropriate subclass)
108
- * or XmlNamespace wrapping <code>node</code> if there is no cached
109
- * value.
110
- */
111
- public static IRubyObject getCachedNodeOrCreate(Ruby runtime, Node node) {
112
- if (node == null) return runtime.getNil();
113
- if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
114
- XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
115
- if (!(xmlDocument instanceof HtmlDocument)) {
116
- String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
117
- String href = ((Attr) node).getValue();
118
- XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
119
- if (xmlNamespace != null) return xmlNamespace;
120
- return XmlNamespace.createFromAttr(runtime, (Attr) node);
121
- }
122
- }
123
- XmlNode xmlNode = getCachedNode(node);
124
- if (xmlNode == null) {
125
- xmlNode = (XmlNode) constructNode(runtime, node);
126
- node.setUserData(CACHED_NODE, xmlNode, null);
127
- }
128
- return xmlNode;
129
- }
130
-
131
- /**
132
- * Construct a new XmlNode wrapping <code>node</code>. The proper
133
- * subclass of XmlNode is chosen based on the type of
134
- * <code>node</code>.
135
- */
136
- public static IRubyObject constructNode(Ruby runtime, Node node) {
137
- if (node == null) return runtime.getNil();
138
- // this is slow; need a way to cache nokogiri classes/modules somewhere
139
- switch (node.getNodeType()) {
140
- case Node.ELEMENT_NODE:
141
- XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Element"));
142
- xmlElement.setNode(runtime, node);
143
- return xmlElement;
144
- case Node.ATTRIBUTE_NODE:
145
- XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Attr"));
146
- xmlAttr.setNode(runtime, node);
147
- return xmlAttr;
148
- case Node.TEXT_NODE:
149
- XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Text"));
150
- xmlText.setNode(runtime, node);
151
- return xmlText;
152
- case Node.COMMENT_NODE:
153
- XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Comment"));
154
- xmlComment.setNode(runtime, node);
155
- return xmlComment;
156
- case Node.ENTITY_NODE:
157
- return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
158
- case Node.ENTITY_REFERENCE_NODE:
159
- XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
160
- xmlEntityRef.setNode(runtime, node);
161
- return xmlEntityRef;
162
- case Node.PROCESSING_INSTRUCTION_NODE:
163
- XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::ProcessingInstruction"));
164
- xmlProcessingInstruction.setNode(runtime, node);
165
- return xmlProcessingInstruction;
166
- case Node.CDATA_SECTION_NODE:
167
- XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::CDATA"));
168
- xmlCdata.setNode(runtime, node);
169
- return xmlCdata;
170
- case Node.DOCUMENT_NODE:
171
- XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"));
172
- xmlDocument.setDocumentNode(runtime, (Document) node);
173
- return xmlDocument;
174
- case Node.DOCUMENT_TYPE_NODE:
175
- XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
176
- xmlDtd.setNode(runtime, node);
177
- return xmlDtd;
178
- default:
179
- XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Node"));
180
- xmlNode.setNode(runtime, node);
181
- return xmlNode;
50
+ public class NokogiriHelpers
51
+ {
52
+ public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
53
+ public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
54
+ public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
55
+
56
+ public static XmlNode
57
+ getCachedNode(Node node)
58
+ {
59
+ return (XmlNode) node.getUserData(CACHED_NODE);
60
+ }
61
+
62
+ public static void
63
+ clearCachedNode(Node node)
64
+ {
65
+ node.setUserData(CACHED_NODE, null, null);
66
+ }
67
+
68
+ public static void
69
+ clearXpathContext(Node node)
70
+ {
71
+ if (node == null) { return; }
72
+
73
+ Node ownerDocument = node.getOwnerDocument();
74
+ if (ownerDocument == null) {
75
+ ownerDocument = node;
76
+ }
77
+ ownerDocument.setUserData(XmlXpathContext.XPATH_CONTEXT, null, null);
78
+ }
79
+
80
+ /**
81
+ * Get the XmlNode associated with the underlying
82
+ * <code>node</code>. Creates a new XmlNode (or appropriate subclass)
83
+ * or XmlNamespace wrapping <code>node</code> if there is no cached
84
+ * value.
85
+ */
86
+ public static IRubyObject
87
+ getCachedNodeOrCreate(Ruby runtime, Node node)
88
+ {
89
+ if (node == null) { return runtime.getNil(); }
90
+ if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
91
+ XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
92
+ if (!(xmlDocument instanceof HtmlDocument)) {
93
+ String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
94
+ String href = ((Attr) node).getValue();
95
+ XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
96
+ if (xmlNamespace != null) { return xmlNamespace; }
97
+ return XmlNamespace.createFromAttr(runtime, (Attr) node);
98
+ }
99
+ }
100
+ XmlNode xmlNode = getCachedNode(node);
101
+ if (xmlNode == null) {
102
+ xmlNode = (XmlNode) constructNode(runtime, node);
103
+ node.setUserData(CACHED_NODE, xmlNode, null);
104
+ }
105
+ return xmlNode;
106
+ }
107
+
108
+ /**
109
+ * Construct a new XmlNode wrapping <code>node</code>. The proper
110
+ * subclass of XmlNode is chosen based on the type of
111
+ * <code>node</code>.
112
+ */
113
+ public static IRubyObject
114
+ constructNode(Ruby runtime, Node node)
115
+ {
116
+ if (node == null) { return runtime.getNil(); }
117
+ // this is slow; need a way to cache nokogiri classes/modules somewhere
118
+ switch (node.getNodeType()) {
119
+ case Node.ELEMENT_NODE:
120
+ XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
121
+ "Nokogiri::XML::Element"));
122
+ xmlElement.setNode(runtime, node);
123
+ return xmlElement;
124
+ case Node.ATTRIBUTE_NODE:
125
+ XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
126
+ "Nokogiri::XML::Attr"));
127
+ xmlAttr.setNode(runtime, node);
128
+ return xmlAttr;
129
+ case Node.TEXT_NODE:
130
+ XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
131
+ "Nokogiri::XML::Text"));
132
+ xmlText.setNode(runtime, node);
133
+ return xmlText;
134
+ case Node.COMMENT_NODE:
135
+ XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
136
+ "Nokogiri::XML::Comment"));
137
+ xmlComment.setNode(runtime, node);
138
+ return xmlComment;
139
+ case Node.ENTITY_NODE:
140
+ return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
141
+ case Node.ENTITY_REFERENCE_NODE:
142
+ XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime,
143
+ getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
144
+ xmlEntityRef.setNode(runtime, node);
145
+ return xmlEntityRef;
146
+ case Node.PROCESSING_INSTRUCTION_NODE:
147
+ XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction)
148
+ NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
149
+ "Nokogiri::XML::ProcessingInstruction"));
150
+ xmlProcessingInstruction.setNode(runtime, node);
151
+ return xmlProcessingInstruction;
152
+ case Node.CDATA_SECTION_NODE:
153
+ XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
154
+ "Nokogiri::XML::CDATA"));
155
+ xmlCdata.setNode(runtime, node);
156
+ return xmlCdata;
157
+ case Node.DOCUMENT_NODE:
158
+ XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime,
159
+ getNokogiriClass(runtime, "Nokogiri::XML::Document"));
160
+ xmlDocument.setDocumentNode(runtime, (Document) node);
161
+ return xmlDocument;
162
+ case Node.DOCUMENT_TYPE_NODE:
163
+ XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
164
+ "Nokogiri::XML::DTD"));
165
+ xmlDtd.setNode(runtime, node);
166
+ return xmlDtd;
167
+ default:
168
+ XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime,
169
+ "Nokogiri::XML::Node"));
170
+ xmlNode.setNode(runtime, node);
171
+ return xmlNode;
172
+ }
173
+ }
174
+
175
+ public static RubyClass
176
+ getNokogiriClass(Ruby ruby, String name)
177
+ {
178
+ return NokogiriService.getNokogiriClassCache(ruby).get(name);
179
+ }
180
+
181
+ public static IRubyObject
182
+ stringOrNil(Ruby runtime, String str)
183
+ {
184
+ return str == null ? runtime.getNil() : convertString(runtime, str);
185
+ }
186
+
187
+ public static IRubyObject
188
+ stringOrNil(Ruby runtime, CharSequence str)
189
+ {
190
+ return str == null ? runtime.getNil() : convertString(runtime, str);
191
+ }
192
+
193
+ public static IRubyObject
194
+ stringOrNil(Ruby runtime, byte[] bytes)
195
+ {
196
+ return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
197
+ }
198
+
199
+ public static IRubyObject
200
+ stringOrBlank(Ruby runtime, String str)
201
+ {
202
+ return str == null ? runtime.newString() : convertString(runtime, str);
203
+ }
204
+
205
+ public static RubyString
206
+ convertString(Ruby runtime, String str)
207
+ {
208
+ return RubyString.newUTF8String(runtime, str);
209
+ }
210
+
211
+ public static RubyString
212
+ convertString(Ruby runtime, CharSequence str)
213
+ {
214
+ return RubyString.newUTF8String(runtime, str);
215
+ }
216
+
217
+ /**
218
+ * Convert <code>s</code> to a RubyString, or if s is null or
219
+ * empty return RubyNil.
220
+ */
221
+ public static IRubyObject
222
+ nonEmptyStringOrNil(Ruby runtime, String s)
223
+ {
224
+ if (s == null || s.length() == 0) { return runtime.getNil(); }
225
+ return RubyString.newString(runtime, s);
226
+ }
227
+
228
+ /**
229
+ * Return the prefix of a qualified name like "prefix:local".
230
+ * Returns null if there is no prefix.
231
+ */
232
+ public static String
233
+ getPrefix(String qName)
234
+ {
235
+ if (qName == null) { return null; }
236
+
237
+ final int pos = qName.indexOf(':');
238
+ return pos > 0 ? qName.substring(0, pos) : null;
239
+ }
240
+
241
+ /**
242
+ * Return the local part of a qualified name like "prefix:local".
243
+ * Returns <code>qName</code> if there is no prefix.
244
+ */
245
+ public static String
246
+ getLocalPart(String qName)
247
+ {
248
+ if (qName == null) { return null; }
249
+
250
+ final int pos = qName.indexOf(':');
251
+ return pos > 0 ? qName.substring(pos + 1) : qName;
252
+ }
253
+
254
+ public static String
255
+ getLocalNameForNamespace(String name, String defValue)
256
+ {
257
+ String localName = getLocalPart(name);
258
+ return ("xmlns".equals(localName)) ? defValue : localName;
259
+ }
260
+
261
+ public static String
262
+ rubyStringToString(IRubyObject str)
263
+ {
264
+ if (str.isNil()) { return null; }
265
+ return str.convertToString().decodeString();
266
+ }
267
+
268
+ public static String
269
+ rubyStringToString(RubyString str)
270
+ {
271
+ return str.decodeString(); // if encoding UTF-8 will decode UTF-8
272
+ }
273
+
274
+ public static ByteArrayInputStream
275
+ stringBytesToStream(final IRubyObject str)
276
+ {
277
+ if (str instanceof RubyString || str.respondsTo("to_str")) {
278
+ final ByteList bytes = str.convertToString().getByteList();
279
+ return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
280
+ }
281
+ return null;
282
+ }
283
+
284
+ public static String
285
+ getNodeCompletePath(Node node)
286
+ {
287
+
288
+ Node cur, tmp, next;
289
+
290
+ String buffer = "";
291
+
292
+ cur = node;
293
+
294
+ do {
295
+ String name = "";
296
+ String sep = "?";
297
+ int occur = 0;
298
+ boolean generic = false;
299
+
300
+ if (cur.getNodeType() == Node.DOCUMENT_NODE) {
301
+ if (buffer.startsWith("/")) { break; }
302
+
303
+ sep = "/";
304
+ next = null;
305
+ } else if (cur.getNodeType() == Node.ELEMENT_NODE) {
306
+ generic = false;
307
+ sep = "/";
308
+
309
+ name = cur.getLocalName();
310
+ if (name == null) { name = cur.getNodeName(); }
311
+ if (cur.getNamespaceURI() != null) {
312
+ if (cur.getPrefix() != null) {
313
+ name = cur.getPrefix() + ":" + name;
314
+ } else {
315
+ generic = true;
316
+ name = "*";
317
+ }
182
318
  }
183
- }
184
-
185
- public static RubyClass getNokogiriClass(Ruby ruby, String name) {
186
- return NokogiriService.getNokogiriClassCache(ruby).get(name);
187
- }
188
319
 
189
- public static IRubyObject stringOrNil(Ruby runtime, String str) {
190
- return str == null ? runtime.getNil() : convertString(runtime, str);
191
- }
320
+ next = cur.getParentNode();
192
321
 
193
- public static IRubyObject stringOrNil(Ruby runtime, CharSequence str) {
194
- return str == null ? runtime.getNil() : convertString(runtime, str);
195
- }
322
+ /*
323
+ * Thumbler index computation
324
+ */
196
325
 
197
- public static IRubyObject stringOrNil(Ruby runtime, byte[] bytes) {
198
- return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
199
- }
326
+ tmp = cur.getPreviousSibling();
200
327
 
201
- public static IRubyObject stringOrBlank(Ruby runtime, String str) {
202
- return str == null ? runtime.newString() : convertString(runtime, str);
203
- }
204
-
205
- public static RubyString convertString(Ruby runtime, String str) {
206
- return RubyString.newUTF8String(runtime, str);
207
- }
208
-
209
- public static RubyString convertString(Ruby runtime, CharSequence str) {
210
- return RubyString.newUTF8String(runtime, str);
211
- }
212
-
213
- /**
214
- * Convert <code>s</code> to a RubyString, or if s is null or
215
- * empty return RubyNil.
216
- */
217
- public static IRubyObject nonEmptyStringOrNil(Ruby runtime, String s) {
218
- if (s == null || s.length() == 0) return runtime.getNil();
219
- return RubyString.newString(runtime, s);
220
- }
221
-
222
- /**
223
- * Return the prefix of a qualified name like "prefix:local".
224
- * Returns null if there is no prefix.
225
- */
226
- public static String getPrefix(String qName) {
227
- if (qName == null) return null;
228
-
229
- final int pos = qName.indexOf(':');
230
- return pos > 0 ? qName.substring(0, pos) : null;
231
- }
232
-
233
- /**
234
- * Return the local part of a qualified name like "prefix:local".
235
- * Returns <code>qName</code> if there is no prefix.
236
- */
237
- public static String getLocalPart(String qName) {
238
- if (qName == null) return null;
239
-
240
- final int pos = qName.indexOf(':');
241
- return pos > 0 ? qName.substring(pos + 1) : qName;
242
- }
243
-
244
- public static String getLocalNameForNamespace(String name, String defValue) {
245
- String localName = getLocalPart(name);
246
- return ("xmlns".equals(localName)) ? defValue : localName;
247
- }
248
-
249
- public static String rubyStringToString(IRubyObject str) {
250
- if (str.isNil()) return null;
251
- return str.convertToString().decodeString();
252
- }
253
-
254
- public static String rubyStringToString(RubyString str) {
255
- return str.decodeString(); // if encoding UTF-8 will decode UTF-8
256
- }
257
-
258
- public static ByteArrayInputStream stringBytesToStream(final IRubyObject str) {
259
- if (str instanceof RubyString || str.respondsTo("to_str")) {
260
- final ByteList bytes = str.convertToString().getByteList();
261
- return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
328
+ while (tmp != null) {
329
+ if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
330
+ (generic || fullNamesMatch(tmp, cur))) {
331
+ occur++;
332
+ }
333
+ tmp = tmp.getPreviousSibling();
262
334
  }
263
- return null;
264
- }
265
335
 
266
- public static String getNodeCompletePath(Node node) {
267
-
268
- Node cur, tmp, next;
269
-
270
- String buffer = "";
271
-
272
- cur = node;
273
-
274
- do {
275
- String name = "";
276
- String sep = "?";
277
- int occur = 0;
278
- boolean generic = false;
279
-
280
- if(cur.getNodeType() == Node.DOCUMENT_NODE) {
281
- if(buffer.startsWith("/")) break;
282
-
283
- sep = "/";
284
- next = null;
285
- } else if(cur.getNodeType() == Node.ELEMENT_NODE) {
286
- generic = false;
287
- sep = "/";
288
-
289
- name = cur.getLocalName();
290
- if (name == null) name = cur.getNodeName();
291
- if(cur.getNamespaceURI() != null) {
292
- if(cur.getPrefix() != null) {
293
- name = cur.getPrefix() + ":" + name;
294
- } else {
295
- generic = true;
296
- name = "*";
297
- }
298
- }
299
-
300
- next = cur.getParentNode();
301
-
302
- /*
303
- * Thumbler index computation
304
- */
305
-
306
- tmp = cur.getPreviousSibling();
307
-
308
- while(tmp != null) {
309
- if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
310
- (generic || fullNamesMatch(tmp, cur))) {
311
- occur++;
312
- }
313
- tmp = tmp.getPreviousSibling();
314
- }
315
-
316
- if(occur == 0) {
317
- tmp = cur.getNextSibling();
318
-
319
- while(tmp != null && occur == 0) {
320
- if((tmp.getNodeType() == Node.ELEMENT_NODE) &&
321
- (generic || fullNamesMatch(tmp,cur))) {
322
- occur++;
323
- }
324
- tmp = tmp.getNextSibling();
325
- }
326
-
327
- if(occur != 0) occur = 1;
328
-
329
- } else {
330
- occur++;
331
- }
332
- } else if(cur.getNodeType() == Node.COMMENT_NODE) {
333
- sep = "/";
334
- name = "comment()";
335
- next = cur.getParentNode();
336
-
337
- /*
338
- * Thumbler index computation.
339
- */
340
-
341
- tmp = cur.getPreviousSibling();
342
-
343
- while(tmp != null) {
344
- if(tmp.getNodeType() == Node.COMMENT_NODE) {
345
- occur++;
346
- }
347
- tmp = tmp.getPreviousSibling();
348
- }
349
-
350
- if(occur == 0) {
351
- tmp = cur.getNextSibling();
352
- while(tmp != null && occur == 0) {
353
- if(tmp.getNodeType() == Node.COMMENT_NODE) {
354
- occur++;
355
- }
356
- tmp = tmp.getNextSibling();
357
- }
358
- if(occur != 0) occur = 1;
359
- } else {
360
- occur = 1;
361
- }
362
-
363
- } else if(cur.getNodeType() == Node.TEXT_NODE ||
364
- cur.getNodeType() == Node.CDATA_SECTION_NODE) {
365
- // I'm here. gist:129
366
- // http://gist.github.com/144923
367
-
368
- sep = "/";
369
- name = "text()";
370
- next = cur.getParentNode();
371
-
372
- /*
373
- * Thumbler index computation.
374
- */
375
-
376
- tmp = cur.getPreviousSibling();
377
- while(tmp != null) {
378
- if(tmp.getNodeType() == Node.TEXT_NODE ||
379
- tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
380
- occur++;
381
- }
382
- tmp = tmp.getPreviousSibling();
383
- }
384
-
385
- if(occur == 0) {
386
- tmp = cur.getNextSibling();
387
-
388
- while(tmp != null && occur == 0) {
389
- if(tmp.getNodeType() == Node.TEXT_NODE ||
390
- tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
391
- occur++;
392
- }
393
- tmp = tmp.getNextSibling();
394
- }
395
- } else {
396
- occur++;
397
- }
398
-
399
- } else if(cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
400
- sep = "/";
401
- name = "processing-instruction('"+cur.getLocalName()+"')";
402
- next = cur.getParentNode();
403
-
404
- /*
405
- * Thumbler index computation.
406
- */
407
-
408
- tmp = cur.getParentNode();
409
-
410
- while(tmp != null) {
411
- if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
412
- tmp.getLocalName().equals(cur.getLocalName())) {
413
- occur++;
414
- }
415
- tmp = tmp.getPreviousSibling();
416
- }
417
-
418
- if(occur == 0) {
419
- tmp = cur.getNextSibling();
420
-
421
- while(tmp != null && occur == 0) {
422
- if(tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
423
- tmp.getLocalName().equals(cur.getLocalName())){
424
- occur++;
425
- }
426
- tmp = tmp.getNextSibling();
427
- }
428
-
429
- if(occur != 0) {
430
- occur = 1;
431
- }
432
-
433
- } else {
434
- occur++;
435
- }
436
-
437
- } else if(cur.getNodeType() == Node.ATTRIBUTE_NODE) {
438
- sep = "/@";
439
- name = cur.getLocalName();
440
-
441
- if(cur.getNamespaceURI() != null) {
442
- if(cur.getPrefix() != null) {
443
- name = cur.getPrefix() + ":" + name;
444
- }
445
- }
446
-
447
- next = ((Attr) cur).getOwnerElement();
448
-
449
- } else {
450
- next = cur.getParentNode();
451
- }
336
+ if (occur == 0) {
337
+ tmp = cur.getNextSibling();
452
338
 
453
- if(occur == 0){
454
- buffer = sep+name+buffer;
455
- } else {
456
- buffer = sep+name+"["+occur+"]"+buffer;
339
+ while (tmp != null && occur == 0) {
340
+ if ((tmp.getNodeType() == Node.ELEMENT_NODE) &&
341
+ (generic || fullNamesMatch(tmp, cur))) {
342
+ occur++;
457
343
  }
344
+ tmp = tmp.getNextSibling();
345
+ }
458
346
 
459
- cur = next;
460
-
461
- } while(cur != null);
347
+ if (occur != 0) { occur = 1; }
462
348
 
463
- return buffer;
464
- }
465
-
466
- static boolean compareTwoNodes(Node m, Node n) {
467
- return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
468
- nodesAreEqual(m.getPrefix(), n.getPrefix());
469
- }
470
-
471
- private static boolean nodesAreEqual(Object a, Object b) {
472
- return (((a == null) && (b == null)) ||
473
- ((a != null) && (b != null) && (b.equals(a))));
474
- }
475
-
476
- private static boolean fullNamesMatch(Node a, Node b) {
477
- return a.getNodeName().equals(b.getNodeName());
478
- }
479
-
480
- private static final Pattern encoded_pattern = Pattern.compile("&amp;|&gt;|&lt;|&#13;");
481
- private static final String[] encoded = {"&amp;", "&gt;", "&lt;", "&#13;"};
482
- private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
483
- private static final String[] decoded = {"&", ">", "<", "\r"};
484
-
485
- private static StringBuffer convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars) {
486
- Matcher matcher = ptn.matcher(input);
487
- boolean result = matcher.find();
488
- StringBuffer sb = new StringBuffer(input.length() + 8);
489
- while (result) {
490
- String matched = matcher.group();
491
- String replacement = "";
492
- for (int i=0; i<oldChars.length; i++) {
493
- if (matched.contains(oldChars[i])) {
494
- replacement = matched.replace(oldChars[i], newChars[i]);
495
- break;
496
- }
497
- }
498
- matcher.appendReplacement(sb, replacement);
499
- result = matcher.find();
349
+ } else {
350
+ occur++;
351
+ }
352
+ } else if (cur.getNodeType() == Node.COMMENT_NODE) {
353
+ sep = "/";
354
+ name = "comment()";
355
+ next = cur.getParentNode();
356
+
357
+ /*
358
+ * Thumbler index computation.
359
+ */
360
+
361
+ tmp = cur.getPreviousSibling();
362
+
363
+ while (tmp != null) {
364
+ if (tmp.getNodeType() == Node.COMMENT_NODE) {
365
+ occur++;
366
+ }
367
+ tmp = tmp.getPreviousSibling();
500
368
  }
501
- matcher.appendTail(sb);
502
- return sb;
503
- }
504
-
505
- public static CharSequence encodeJavaString(CharSequence str) {
506
- return convert(decoded_pattern, str, decoded, encoded);
507
- }
508
-
509
- public static CharSequence decodeJavaString(CharSequence str) {
510
- return convert(encoded_pattern, str, encoded, decoded);
511
- }
512
-
513
- public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
514
- public static boolean isNamespace(Node node) {
515
- return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
516
- }
517
-
518
- public static boolean isNamespace(String nodeName) {
519
- return (nodeName.startsWith("xmlns"));
520
- }
521
-
522
- public static boolean isNonDefaultNamespace(Node node) {
523
- return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
524
- }
525
-
526
- public static boolean isXmlBase(String attrName) {
527
- return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
528
- }
529
-
530
- public static boolean isBlank(IRubyObject obj) {
531
- if ( !(obj instanceof XmlText) ) return false;
532
-
533
- CharSequence content = ((XmlNode) obj).getContentImpl();
534
- return content == null || isBlank(content);
535
- }
536
-
537
- public static boolean isBlank(CharSequence str) {
538
- int len = str.length(); int beg = 0;
539
- while ((beg < len) && (str.charAt(beg) <= ' ')) beg++;
540
- return beg == len;
541
- }
542
-
543
- public static boolean isBlank(String str) {
544
- return str.isEmpty() || isBlank((CharSequence) str);
545
- }
546
-
547
- public static boolean isNullOrEmpty(String str) {
548
- return str == null || str.isEmpty();
549
- }
550
369
 
551
- public static CharSequence canonicalizeWhitespace(CharSequence str) {
552
- final int len = str.length();
553
- StringBuilder sb = new StringBuilder(len);
554
- boolean newline_added = false;
555
- for ( int i = 0; i < len; i++ ) {
556
- char c = str.charAt(i);
557
- if ( c == '\n' ) {
558
- if ( ! newline_added ) {
559
- sb.append(c); newline_added = true;
560
- }
561
- } else {
562
- sb.append(c);
370
+ if (occur == 0) {
371
+ tmp = cur.getNextSibling();
372
+ while (tmp != null && occur == 0) {
373
+ if (tmp.getNodeType() == Node.COMMENT_NODE) {
374
+ occur++;
563
375
  }
376
+ tmp = tmp.getNextSibling();
377
+ }
378
+ if (occur != 0) { occur = 1; }
379
+ } else {
380
+ occur = 1;
564
381
  }
565
- return sb;
566
- }
567
-
568
- public static String newQName(String newPrefix, Node node) {
569
- String tagName = getLocalPart(node.getNodeName());
570
- if (newPrefix == null) return tagName;
571
- return newPrefix + ':' + tagName;
572
- }
573
382
 
574
- public static IRubyObject[] nodeListToRubyArray(Ruby runtime, NodeList nodes) {
575
- IRubyObject[] array = new IRubyObject[nodes.getLength()];
576
- for (int i = 0; i < nodes.getLength(); i++) {
577
- array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
383
+ } else if (cur.getNodeType() == Node.TEXT_NODE ||
384
+ cur.getNodeType() == Node.CDATA_SECTION_NODE) {
385
+ // I'm here. gist:129
386
+ // http://gist.github.com/144923
387
+
388
+ sep = "/";
389
+ name = "text()";
390
+ next = cur.getParentNode();
391
+
392
+ /*
393
+ * Thumbler index computation.
394
+ */
395
+
396
+ tmp = cur.getPreviousSibling();
397
+ while (tmp != null) {
398
+ if (tmp.getNodeType() == Node.TEXT_NODE ||
399
+ tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
400
+ occur++;
401
+ }
402
+ tmp = tmp.getPreviousSibling();
578
403
  }
579
- return array;
580
- }
581
404
 
582
- public static IRubyObject[] nodeListToArray(Ruby ruby, List<Node> nodes) {
583
- IRubyObject[] result = new IRubyObject[nodes.size()];
584
- for (int i = 0; i < result.length; i++) {
585
- result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
586
- }
587
- return result;
588
- }
405
+ if (occur == 0) {
406
+ tmp = cur.getNextSibling();
589
407
 
590
- public static RubyArray nodeArrayToRubyArray(Ruby ruby, Node[] nodes) {
591
- RubyArray n = RubyArray.newArray(ruby, nodes.length);
592
- for (int i = 0; i < nodes.length; i++) {
593
- n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
408
+ while (tmp != null && occur == 0) {
409
+ if (tmp.getNodeType() == Node.TEXT_NODE ||
410
+ tmp.getNodeType() == Node.CDATA_SECTION_NODE) {
411
+ occur++;
412
+ }
413
+ tmp = tmp.getNextSibling();
414
+ }
415
+ } else {
416
+ occur++;
594
417
  }
595
- return n;
596
- }
597
-
598
- public static String getValidEncodingOrNull(IRubyObject encoding) {
599
- if (encoding.isNil()) return null; // charsetNames does not like contains(null)
600
- String enc = rubyStringToString(encoding.convertToString());
601
- if (CharsetNames.contains(enc)) return enc;
602
- return null;
603
- }
604
418
 
605
- public static String getValidEncoding(IRubyObject encoding) {
606
- String validEncoding = getValidEncodingOrNull(encoding);
607
- if (validEncoding != null) return validEncoding;
608
- return Charset.defaultCharset().name();
609
- }
419
+ } else if (cur.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
420
+ sep = "/";
421
+ name = "processing-instruction('" + cur.getLocalName() + "')";
422
+ next = cur.getParentNode();
610
423
 
611
- private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
612
-
613
- public static String adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId) {
614
- if (systemId == null) return systemId;
615
- File file = new File(systemId);
616
- if (file.isAbsolute()) return systemId;
617
- String path = resolveSystemId(baseURI, systemId);
618
- if (path != null) return path;
619
- path = resolveSystemId(currentDir, systemId);
620
- if (path != null) return path;
621
- return resolveSystemId(scriptFileName, systemId);
622
- }
424
+ /*
425
+ * Thumbler index computation.
426
+ */
623
427
 
624
- private static String resolveSystemId(String baseName, String systemId) {
625
- if (baseName == null || baseName.length() < 1) return null;
626
- String parentName;
627
- baseName = baseName.replace("%20", " ");
628
- File base = new File(baseName);
629
- if (base.isDirectory()) parentName = baseName;
630
- else parentName = base.getParent();
631
- if (parentName == null) return null;
632
- if (parentName.toLowerCase().startsWith("file:")) parentName = parentName.substring("file:".length());
633
- File dtdFile = new File(parentName + "/" + systemId);
634
- if (dtdFile.exists()) return dtdFile.getPath();
635
- return null;
636
- }
428
+ tmp = cur.getParentNode();
637
429
 
638
- private static final Charset UTF8 = Charset.forName("UTF-8");
639
-
640
- public static boolean isUTF8(String encoding) {
641
- if (encoding == null) return true; // no need to convert encoding
430
+ while (tmp != null) {
431
+ if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
432
+ tmp.getLocalName().equals(cur.getLocalName())) {
433
+ occur++;
434
+ }
435
+ tmp = tmp.getPreviousSibling();
436
+ }
642
437
 
643
- if ("UTF-8".equals(encoding)) return true;
644
- return UTF8.aliases().contains(encoding);
645
- }
438
+ if (occur == 0) {
439
+ tmp = cur.getNextSibling();
646
440
 
647
- public static ByteBuffer convertEncoding(Charset output_charset, CharSequence input_string) {
648
- return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters
649
- }
441
+ while (tmp != null && occur == 0) {
442
+ if (tmp.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE &&
443
+ tmp.getLocalName().equals(cur.getLocalName())) {
444
+ occur++;
445
+ }
446
+ tmp = tmp.getNextSibling();
447
+ }
650
448
 
651
- public static CharSequence convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str) {
652
- if (!(doc instanceof HtmlDocument)) return str;
653
- String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
654
- if (parsed_encoding == null) return str;
655
- String ruby_encoding = rubyStringToString(doc.getEncoding());
656
- if (ruby_encoding == null) return str;
657
- Charset encoding = Charset.forName(ruby_encoding);
658
- if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) return str;
659
- if (str.length() == 0) return str; // no need to convert
660
- return NokogiriHelpers.nkf(context, encoding, str);
661
- }
449
+ if (occur != 0) {
450
+ occur = 1;
451
+ }
662
452
 
663
- private static final ByteList _Sw = new ByteList(new byte[] { '-','S','w' }, false);
664
- private static final ByteList _Jw = new ByteList(new byte[] { '-','J','w' }, false);
665
- private static final ByteList _Ew = new ByteList(new byte[] { '-','E','w' }, false);
666
- private static final ByteList _Ww = new ByteList(new byte[] { '-','W','w' }, false);
667
-
668
- // This method is used from HTML documents. HTML meta tag with encoding specification
669
- // might appear after non-ascii characters are used. For example, a title tag before
670
- // a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
671
- // Nokogiri uses NKF library to convert characters correct encoding. This means the method
672
- // works only for JIS/Shift_JIS/EUC-JP.
673
- private static CharSequence nkf(ThreadContext context, Charset encoding, CharSequence str) {
674
- final Ruby runtime = context.getRuntime();
675
- final ByteList opt;
676
- if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) opt = _Sw;
677
- else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) opt = _Jw;
678
- else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) opt = _Ew;
679
- else opt = _Ww; // should not come here. should be treated before this method.
680
-
681
- Class nkfClass;
682
- try {
683
- // JRuby 1.7 and later
684
- nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
685
- } catch (ClassNotFoundException e1) {
686
- return str;
687
- }
688
- Method nkf_method;
689
- try {
690
- nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
691
- RubyString r_str =
692
- (RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
693
- return NokogiriHelpers.rubyStringToString(r_str);
694
- } catch (SecurityException e) {
695
- return str;
696
- } catch (NoSuchMethodException e) {
697
- return str;
698
- } catch (IllegalArgumentException e) {
699
- return str;
700
- } catch (IllegalAccessException e) {
701
- return str;
702
- } catch (InvocationTargetException e) {
703
- return str;
453
+ } else {
454
+ occur++;
704
455
  }
705
- }
706
-
707
- private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
708
- private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
709
- private static final Charset EUC_JP = Charset.forName("EUC-JP");
710
456
 
711
- public static boolean shouldEncode(Node text) {
712
- final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
713
- return encoded == null || ! encoded;
714
- }
457
+ } else if (cur.getNodeType() == Node.ATTRIBUTE_NODE) {
458
+ sep = "/@";
459
+ name = cur.getLocalName();
715
460
 
716
- public static boolean shouldDecode(Node text) {
717
- return !shouldEncode(text);
718
- }
719
-
720
- public static NokogiriNamespaceCache getNamespaceCache(Node node) {
721
- XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
722
- return xmlDoc.getNamespaceCache();
723
- }
461
+ if (cur.getNamespaceURI() != null) {
462
+ if (cur.getPrefix() != null) {
463
+ name = cur.getPrefix() + ":" + name;
464
+ }
465
+ }
724
466
 
725
- public static Node renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException {
726
- Document doc = node.getOwnerDocument();
727
- NokogiriNamespaceCache nsCache = getNamespaceCache(node);
728
- Node result = doc.renameNode(node, namespaceURI, qualifiedName);
729
- if (result != node) {
730
- nsCache.replaceNode(node, result);
467
+ next = ((Attr) cur).getOwnerElement();
468
+
469
+ } else {
470
+ next = cur.getParentNode();
471
+ }
472
+
473
+ if (occur == 0) {
474
+ buffer = sep + name + buffer;
475
+ } else {
476
+ buffer = sep + name + "[" + occur + "]" + buffer;
477
+ }
478
+
479
+ cur = next;
480
+
481
+ } while (cur != null);
482
+
483
+ return buffer;
484
+ }
485
+
486
+ static boolean
487
+ compareTwoNodes(Node m, Node n)
488
+ {
489
+ return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
490
+ nodesAreEqual(m.getPrefix(), n.getPrefix());
491
+ }
492
+
493
+ private static boolean
494
+ nodesAreEqual(Object a, Object b)
495
+ {
496
+ return (((a == null) && (b == null)) ||
497
+ ((a != null) && (b != null) && (b.equals(a))));
498
+ }
499
+
500
+ private static boolean
501
+ fullNamesMatch(Node a, Node b)
502
+ {
503
+ return a.getNodeName().equals(b.getNodeName());
504
+ }
505
+
506
+ private static final Pattern encoded_pattern = Pattern.compile("&amp;|&gt;|&lt;|&#13;");
507
+ private static final String[] encoded = {"&amp;", "&gt;", "&lt;", "&#13;"};
508
+ private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
509
+ private static final String[] decoded = {"&", ">", "<", "\r"};
510
+
511
+ private static StringBuffer
512
+ convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars)
513
+ {
514
+ Matcher matcher = ptn.matcher(input);
515
+ boolean result = matcher.find();
516
+ StringBuffer sb = new StringBuffer(input.length() + 8);
517
+ while (result) {
518
+ String matched = matcher.group();
519
+ String replacement = "";
520
+ for (int i = 0; i < oldChars.length; i++) {
521
+ if (matched.contains(oldChars[i])) {
522
+ replacement = matched.replace(oldChars[i], newChars[i]);
523
+ break;
731
524
  }
732
- return result;
733
- }
525
+ }
526
+ matcher.appendReplacement(sb, replacement);
527
+ result = matcher.find();
528
+ }
529
+ matcher.appendTail(sb);
530
+ return sb;
531
+ }
532
+
533
+ public static CharSequence
534
+ encodeJavaString(CharSequence str)
535
+ {
536
+ return convert(decoded_pattern, str, decoded, encoded);
537
+ }
538
+
539
+ public static CharSequence
540
+ decodeJavaString(CharSequence str)
541
+ {
542
+ return convert(encoded_pattern, str, encoded, decoded);
543
+ }
544
+
545
+ public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
546
+ public static boolean
547
+ isNamespace(Node node)
548
+ {
549
+ return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
550
+ }
551
+
552
+ public static boolean
553
+ isNamespace(String nodeName)
554
+ {
555
+ return (nodeName.startsWith("xmlns"));
556
+ }
557
+
558
+ public static boolean
559
+ isNonDefaultNamespace(Node node)
560
+ {
561
+ return (isNamespace(node) && ! "xmlns".equals(node.getNodeName()));
562
+ }
563
+
564
+ public static boolean
565
+ isXmlBase(String attrName)
566
+ {
567
+ return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
568
+ }
569
+
570
+ public static boolean
571
+ isBlank(IRubyObject obj)
572
+ {
573
+ if (!(obj instanceof XmlText)) { return false; }
574
+
575
+ CharSequence content = ((XmlNode) obj).getContentImpl();
576
+ return content == null || isBlank(content);
577
+ }
578
+
579
+ public static boolean
580
+ isBlank(CharSequence str)
581
+ {
582
+ int len = str.length();
583
+ int beg = 0;
584
+ while ((beg < len) && (str.charAt(beg) <= ' ')) { beg++; }
585
+ return beg == len;
586
+ }
587
+
588
+ public static boolean
589
+ isBlank(String str)
590
+ {
591
+ return str.isEmpty() || isBlank((CharSequence) str);
592
+ }
593
+
594
+ public static boolean
595
+ isNullOrEmpty(String str)
596
+ {
597
+ return str == null || str.isEmpty();
598
+ }
599
+
600
+ public static CharSequence
601
+ canonicalizeWhitespace(CharSequence str)
602
+ {
603
+ final int len = str.length();
604
+ StringBuilder sb = new StringBuilder(len);
605
+ boolean newline_added = false;
606
+ for (int i = 0; i < len; i++) {
607
+ char c = str.charAt(i);
608
+ if (c == '\n') {
609
+ if (! newline_added) {
610
+ sb.append(c);
611
+ newline_added = true;
612
+ }
613
+ } else {
614
+ sb.append(c);
615
+ }
616
+ }
617
+ return sb;
618
+ }
619
+
620
+ public static String
621
+ newQName(String newPrefix, Node node)
622
+ {
623
+ String tagName = getLocalPart(node.getNodeName());
624
+ if (newPrefix == null) { return tagName; }
625
+ return newPrefix + ':' + tagName;
626
+ }
627
+
628
+ public static IRubyObject[]
629
+ nodeListToRubyArray(Ruby runtime, NodeList nodes)
630
+ {
631
+ IRubyObject[] array = new IRubyObject[nodes.getLength()];
632
+ for (int i = 0; i < nodes.getLength(); i++) {
633
+ array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
634
+ }
635
+ return array;
636
+ }
637
+
638
+ public static IRubyObject[]
639
+ nodeListToArray(Ruby ruby, List<Node> nodes)
640
+ {
641
+ IRubyObject[] result = new IRubyObject[nodes.size()];
642
+ for (int i = 0; i < result.length; i++) {
643
+ result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
644
+ }
645
+ return result;
646
+ }
647
+
648
+ public static RubyArray
649
+ nodeArrayToRubyArray(Ruby ruby, Node[] nodes)
650
+ {
651
+ RubyArray n = RubyArray.newArray(ruby, nodes.length);
652
+ for (int i = 0; i < nodes.length; i++) {
653
+ n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
654
+ }
655
+ return n;
656
+ }
657
+
658
+ public static String
659
+ getValidEncodingOrNull(IRubyObject encoding)
660
+ {
661
+ if (encoding.isNil()) { return null; } // charsetNames does not like contains(null)
662
+ String enc = rubyStringToString(encoding.convertToString());
663
+ if (CharsetNames.contains(enc)) { return enc; }
664
+ return null;
665
+ }
666
+
667
+ public static String
668
+ getValidEncoding(IRubyObject encoding)
669
+ {
670
+ String validEncoding = getValidEncodingOrNull(encoding);
671
+ if (validEncoding != null) { return validEncoding; }
672
+ return Charset.defaultCharset().name();
673
+ }
674
+
675
+ private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
676
+
677
+ public static String
678
+ adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId)
679
+ {
680
+ if (systemId == null) { return systemId; }
681
+ File file = new File(systemId);
682
+ if (file.isAbsolute()) { return systemId; }
683
+ String path = resolveSystemId(baseURI, systemId);
684
+ if (path != null) { return path; }
685
+ path = resolveSystemId(currentDir, systemId);
686
+ if (path != null) { return path; }
687
+ return resolveSystemId(scriptFileName, systemId);
688
+ }
689
+
690
+ private static String
691
+ resolveSystemId(String baseName, String systemId)
692
+ {
693
+ if (baseName == null || baseName.length() < 1) { return null; }
694
+ String parentName;
695
+ baseName = baseName.replace("%20", " ");
696
+ File base = new File(baseName);
697
+ if (base.isDirectory()) { parentName = baseName; }
698
+ else { parentName = base.getParent(); }
699
+ if (parentName == null) { return null; }
700
+ if (parentName.toLowerCase().startsWith("file:")) { parentName = parentName.substring("file:".length()); }
701
+ File dtdFile = new File(parentName + "/" + systemId);
702
+ if (dtdFile.exists()) { return dtdFile.getPath(); }
703
+ return null;
704
+ }
705
+
706
+ private static final Charset UTF8 = Charset.forName("UTF-8");
707
+
708
+ public static boolean
709
+ isUTF8(String encoding)
710
+ {
711
+ if (encoding == null) { return true; } // no need to convert encoding
712
+
713
+ if ("UTF-8".equals(encoding)) { return true; }
714
+ return UTF8.aliases().contains(encoding);
715
+ }
716
+
717
+ public static ByteBuffer
718
+ convertEncoding(Charset output_charset, CharSequence input_string)
719
+ {
720
+ return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters
721
+ }
722
+
723
+ public static CharSequence
724
+ convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str)
725
+ {
726
+ if (!(doc instanceof HtmlDocument)) { return str; }
727
+ String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
728
+ if (parsed_encoding == null) { return str; }
729
+ String ruby_encoding = rubyStringToString(doc.getEncoding());
730
+ if (ruby_encoding == null) { return str; }
731
+ Charset encoding = Charset.forName(ruby_encoding);
732
+ if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) { return str; }
733
+ if (str.length() == 0) { return str; } // no need to convert
734
+ return NokogiriHelpers.nkf(context, encoding, str);
735
+ }
736
+
737
+ private static final ByteList _Sw = new ByteList(new byte[] { '-', 'S', 'w' }, false);
738
+ private static final ByteList _Jw = new ByteList(new byte[] { '-', 'J', 'w' }, false);
739
+ private static final ByteList _Ew = new ByteList(new byte[] { '-', 'E', 'w' }, false);
740
+ private static final ByteList _Ww = new ByteList(new byte[] { '-', 'W', 'w' }, false);
741
+
742
+ // This method is used from HTML documents. HTML meta tag with encoding specification
743
+ // might appear after non-ascii characters are used. For example, a title tag before
744
+ // a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
745
+ // Nokogiri uses NKF library to convert characters correct encoding. This means the method
746
+ // works only for JIS/Shift_JIS/EUC-JP.
747
+ private static CharSequence
748
+ nkf(ThreadContext context, Charset encoding, CharSequence str)
749
+ {
750
+ final Ruby runtime = context.getRuntime();
751
+ final ByteList opt;
752
+ if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) { opt = _Sw; }
753
+ else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) { opt = _Jw; }
754
+ else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) { opt = _Ew; }
755
+ else { opt = _Ww; } // should not come here. should be treated before this method.
756
+
757
+ Class nkfClass;
758
+ try {
759
+ // JRuby 1.7 and later
760
+ nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
761
+ } catch (ClassNotFoundException e1) {
762
+ return str;
763
+ }
764
+ Method nkf_method;
765
+ try {
766
+ nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
767
+ RubyString r_str =
768
+ (RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
769
+ return NokogiriHelpers.rubyStringToString(r_str);
770
+ } catch (SecurityException e) {
771
+ return str;
772
+ } catch (NoSuchMethodException e) {
773
+ return str;
774
+ } catch (IllegalArgumentException e) {
775
+ return str;
776
+ } catch (IllegalAccessException e) {
777
+ return str;
778
+ } catch (InvocationTargetException e) {
779
+ return str;
780
+ }
781
+ }
782
+
783
+ private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
784
+ private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
785
+ private static final Charset EUC_JP = Charset.forName("EUC-JP");
786
+
787
+ public static boolean
788
+ shouldEncode(Node text)
789
+ {
790
+ final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
791
+ return encoded == null || ! encoded;
792
+ }
793
+
794
+ public static boolean
795
+ shouldDecode(Node text)
796
+ {
797
+ return !shouldEncode(text);
798
+ }
799
+
800
+ public static NokogiriNamespaceCache
801
+ getNamespaceCache(Node node)
802
+ {
803
+ XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
804
+ return xmlDoc.getNamespaceCache();
805
+ }
806
+
807
+ public static Node
808
+ renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException
809
+ {
810
+ Document doc = node.getOwnerDocument();
811
+ NokogiriNamespaceCache nsCache = getNamespaceCache(node);
812
+ Node result = doc.renameNode(node, namespaceURI, qualifiedName);
813
+ if (result != node) {
814
+ nsCache.replaceNode(node, result);
815
+ }
816
+ return result;
817
+ }
734
818
  }