nokogiri 1.10.10-java → 1.11.0-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/README.md +165 -91
  5. data/ext/java/nokogiri/HtmlDocument.java +34 -46
  6. data/ext/java/nokogiri/HtmlSaxParserContext.java +88 -58
  7. data/ext/java/nokogiri/HtmlSaxPushParser.java +1 -1
  8. data/ext/java/nokogiri/NokogiriService.java +1 -1
  9. data/ext/java/nokogiri/XmlAttr.java +13 -20
  10. data/ext/java/nokogiri/XmlAttributeDecl.java +11 -12
  11. data/ext/java/nokogiri/XmlCdata.java +3 -4
  12. data/ext/java/nokogiri/XmlComment.java +1 -1
  13. data/ext/java/nokogiri/XmlDocument.java +148 -175
  14. data/ext/java/nokogiri/XmlDocumentFragment.java +13 -31
  15. data/ext/java/nokogiri/XmlDtd.java +5 -8
  16. data/ext/java/nokogiri/XmlElement.java +1 -20
  17. data/ext/java/nokogiri/XmlElementDecl.java +23 -28
  18. data/ext/java/nokogiri/XmlEntityDecl.java +23 -27
  19. data/ext/java/nokogiri/XmlEntityReference.java +2 -2
  20. data/ext/java/nokogiri/XmlNamespace.java +72 -89
  21. data/ext/java/nokogiri/XmlNode.java +303 -406
  22. data/ext/java/nokogiri/XmlNodeSet.java +70 -76
  23. data/ext/java/nokogiri/XmlReader.java +12 -13
  24. data/ext/java/nokogiri/XmlRelaxng.java +10 -3
  25. data/ext/java/nokogiri/XmlSaxParserContext.java +15 -10
  26. data/ext/java/nokogiri/XmlSchema.java +87 -27
  27. data/ext/java/nokogiri/XmlSyntaxError.java +2 -6
  28. data/ext/java/nokogiri/XmlText.java +12 -9
  29. data/ext/java/nokogiri/XmlXpathContext.java +55 -25
  30. data/ext/java/nokogiri/XsltStylesheet.java +7 -15
  31. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +52 -46
  32. data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
  33. data/ext/java/nokogiri/internals/NokogiriHelpers.java +71 -135
  34. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +90 -58
  35. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +9 -2
  36. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +67 -10
  37. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +4 -2
  38. data/ext/java/nokogiri/internals/ParserContext.java +27 -73
  39. data/ext/java/nokogiri/internals/ReaderNode.java +2 -4
  40. data/ext/java/nokogiri/internals/XmlDomParserContext.java +18 -33
  41. data/ext/nokogiri/depend +476 -357
  42. data/ext/nokogiri/extconf.rb +507 -357
  43. data/ext/nokogiri/html_document.c +79 -78
  44. data/ext/nokogiri/html_sax_parser_context.c +2 -2
  45. data/ext/nokogiri/nokogiri.c +34 -40
  46. data/ext/nokogiri/xml_document.c +18 -4
  47. data/ext/nokogiri/xml_io.c +8 -6
  48. data/ext/nokogiri/xml_node.c +21 -1
  49. data/ext/nokogiri/xml_node_set.c +1 -1
  50. data/ext/nokogiri/xml_reader.c +6 -17
  51. data/ext/nokogiri/xml_relax_ng.c +29 -11
  52. data/ext/nokogiri/xml_sax_parser.c +2 -7
  53. data/ext/nokogiri/xml_sax_parser_context.c +2 -2
  54. data/ext/nokogiri/xml_schema.c +55 -13
  55. data/ext/nokogiri/xml_xpath_context.c +80 -4
  56. data/ext/nokogiri/xslt_stylesheet.c +1 -8
  57. data/lib/nokogiri.rb +22 -22
  58. data/lib/nokogiri/css.rb +1 -0
  59. data/lib/nokogiri/css/node.rb +1 -0
  60. data/lib/nokogiri/css/parser.rb +63 -62
  61. data/lib/nokogiri/css/parser.y +2 -2
  62. data/lib/nokogiri/css/parser_extras.rb +39 -36
  63. data/lib/nokogiri/css/syntax_error.rb +1 -0
  64. data/lib/nokogiri/css/tokenizer.rb +1 -0
  65. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  66. data/lib/nokogiri/decorators/slop.rb +1 -0
  67. data/lib/nokogiri/html.rb +1 -0
  68. data/lib/nokogiri/html/builder.rb +1 -0
  69. data/lib/nokogiri/html/document.rb +13 -26
  70. data/lib/nokogiri/html/document_fragment.rb +1 -0
  71. data/lib/nokogiri/html/element_description.rb +1 -0
  72. data/lib/nokogiri/html/element_description_defaults.rb +1 -0
  73. data/lib/nokogiri/html/entity_lookup.rb +1 -0
  74. data/lib/nokogiri/html/sax/parser.rb +1 -0
  75. data/lib/nokogiri/html/sax/parser_context.rb +1 -0
  76. data/lib/nokogiri/html/sax/push_parser.rb +1 -0
  77. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  78. data/lib/nokogiri/nokogiri.jar +0 -0
  79. data/lib/nokogiri/syntax_error.rb +1 -0
  80. data/lib/nokogiri/version.rb +3 -109
  81. data/lib/nokogiri/version/constant.rb +5 -0
  82. data/lib/nokogiri/version/info.rb +182 -0
  83. data/lib/nokogiri/xml.rb +1 -0
  84. data/lib/nokogiri/xml/attr.rb +1 -0
  85. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  86. data/lib/nokogiri/xml/builder.rb +3 -2
  87. data/lib/nokogiri/xml/cdata.rb +1 -0
  88. data/lib/nokogiri/xml/character_data.rb +1 -0
  89. data/lib/nokogiri/xml/document.rb +20 -15
  90. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  91. data/lib/nokogiri/xml/dtd.rb +1 -0
  92. data/lib/nokogiri/xml/element_content.rb +1 -0
  93. data/lib/nokogiri/xml/element_decl.rb +1 -0
  94. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  95. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  96. data/lib/nokogiri/xml/namespace.rb +1 -0
  97. data/lib/nokogiri/xml/node.rb +587 -249
  98. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  99. data/lib/nokogiri/xml/node_set.rb +1 -0
  100. data/lib/nokogiri/xml/notation.rb +1 -0
  101. data/lib/nokogiri/xml/parse_options.rb +10 -3
  102. data/lib/nokogiri/xml/pp.rb +1 -0
  103. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  104. data/lib/nokogiri/xml/pp/node.rb +1 -0
  105. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  106. data/lib/nokogiri/xml/reader.rb +7 -3
  107. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  108. data/lib/nokogiri/xml/sax.rb +1 -0
  109. data/lib/nokogiri/xml/sax/document.rb +1 -0
  110. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  111. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  112. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  113. data/lib/nokogiri/xml/schema.rb +13 -4
  114. data/lib/nokogiri/xml/searchable.rb +25 -16
  115. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  116. data/lib/nokogiri/xml/text.rb +1 -0
  117. data/lib/nokogiri/xml/xpath.rb +1 -0
  118. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
  119. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  120. data/lib/nokogiri/xslt.rb +1 -0
  121. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  122. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  123. metadata +86 -159
  124. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +0 -107
  125. data/ext/java/nokogiri/internals/UncloseableInputStream.java +0 -102
  126. data/ext/nokogiri/html_document.h +0 -10
  127. data/ext/nokogiri/html_element_description.h +0 -10
  128. data/ext/nokogiri/html_entity_lookup.h +0 -8
  129. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  130. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  131. data/ext/nokogiri/nokogiri.h +0 -121
  132. data/ext/nokogiri/xml_attr.h +0 -9
  133. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  134. data/ext/nokogiri/xml_cdata.h +0 -9
  135. data/ext/nokogiri/xml_comment.h +0 -9
  136. data/ext/nokogiri/xml_document.h +0 -23
  137. data/ext/nokogiri/xml_document_fragment.h +0 -10
  138. data/ext/nokogiri/xml_dtd.h +0 -10
  139. data/ext/nokogiri/xml_element_content.h +0 -10
  140. data/ext/nokogiri/xml_element_decl.h +0 -9
  141. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  142. data/ext/nokogiri/xml_entity_decl.h +0 -10
  143. data/ext/nokogiri/xml_entity_reference.h +0 -9
  144. data/ext/nokogiri/xml_io.h +0 -11
  145. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  146. data/ext/nokogiri/xml_namespace.h +0 -14
  147. data/ext/nokogiri/xml_node.h +0 -13
  148. data/ext/nokogiri/xml_node_set.h +0 -12
  149. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  150. data/ext/nokogiri/xml_reader.h +0 -10
  151. data/ext/nokogiri/xml_relax_ng.h +0 -9
  152. data/ext/nokogiri/xml_sax_parser.h +0 -39
  153. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  154. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  155. data/ext/nokogiri/xml_schema.h +0 -9
  156. data/ext/nokogiri/xml_syntax_error.h +0 -13
  157. data/ext/nokogiri/xml_text.h +0 -9
  158. data/ext/nokogiri/xml_xpath_context.h +0 -10
  159. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -35,9 +35,11 @@ package nokogiri.internals;
35
35
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
36
  import static nokogiri.internals.NokogiriHelpers.isNamespace;
37
37
  import static nokogiri.internals.NokogiriHelpers.stringOrNil;
38
+
38
39
  import nokogiri.HtmlDocument;
39
40
  import nokogiri.NokogiriService;
40
41
  import nokogiri.XmlDocument;
42
+ import nokogiri.XmlSyntaxError;
41
43
 
42
44
  import org.apache.xerces.xni.Augmentations;
43
45
  import org.apache.xerces.xni.QName;
@@ -65,21 +67,13 @@ import org.w3c.dom.NodeList;
65
67
  */
66
68
  public class HtmlDomParserContext extends XmlDomParserContext {
67
69
 
68
- public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
69
- super(runtime, options);
70
+ public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
71
+ this(runtime, runtime.getNil(), options);
70
72
  }
71
-
73
+
72
74
  public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
73
75
  super(runtime, encoding, options);
74
- }
75
-
76
- @Override
77
- protected void initErrorHandler() {
78
- if (options.strict) {
79
- errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
80
- } else {
81
- errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError, options.noWarning);
82
- }
76
+ java_encoding = NokogiriHelpers.getValidEncoding(encoding);
83
77
  }
84
78
 
85
79
  @Override
@@ -105,7 +99,7 @@ public class HtmlDomParserContext extends XmlDomParserContext {
105
99
 
106
100
  @Override
107
101
  public void setEncoding(String encoding) {
108
- super.setEncoding(encoding);
102
+ super.setEncoding(encoding);
109
103
  }
110
104
 
111
105
  /**
@@ -118,15 +112,27 @@ public class HtmlDomParserContext extends XmlDomParserContext {
118
112
  }
119
113
 
120
114
  @Override
121
- protected XmlDocument getNewEmptyDocument(ThreadContext context) {
122
- IRubyObject[] args = IRubyObject.NULL_ARRAY;
123
- return (XmlDocument) XmlDocument.rbNew(context, getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document"), args);
115
+ public XmlDocument parse(ThreadContext context, RubyClass klass, IRubyObject url) {
116
+ XmlDocument xmlDoc = super.parse(context, klass, url);
117
+
118
+ // let's be consistent in how we handle RECOVER and NORECOVER (a.k.a. STRICT)
119
+ // https://github.com/sparklemotion/nokogiri/issues/2130
120
+ if (!options.recover && errorHandler.getErrors().size() > 0) {
121
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
122
+ String exceptionMsg = String.format("%s: '%s'",
123
+ "Parser without recover option encountered error or warning",
124
+ errorHandler.getErrors().get(0));
125
+ xmlSyntaxError.setException(new Exception(exceptionMsg));
126
+ throw xmlSyntaxError.toThrowable();
127
+ }
128
+
129
+ return xmlDoc;
124
130
  }
125
131
 
126
132
  @Override
127
- protected XmlDocument wrapDocument(ThreadContext context, RubyClass klazz, Document document) {
128
- HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
129
- htmlDocument.setDocumentNode(context, document);
133
+ protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document document) {
134
+ HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
135
+ htmlDocument.setDocumentNode(context.runtime, document);
130
136
  if (ruby_encoding.isNil()) {
131
137
  // ruby_encoding might have detected by HtmlDocument::EncodingReader
132
138
  if (detected_encoding != null && !detected_encoding.isNil()) {
@@ -134,7 +140,7 @@ public class HtmlDomParserContext extends XmlDomParserContext {
134
140
  } else {
135
141
  // no encoding given & no encoding detected, then try to get it
136
142
  String charset = tryGetCharsetFromHtml5MetaTag(document);
137
- ruby_encoding = stringOrNil(context.getRuntime(), charset);
143
+ ruby_encoding = stringOrNil(context.runtime, charset);
138
144
  }
139
145
  }
140
146
  htmlDocument.setEncoding(ruby_encoding);
@@ -195,32 +201,32 @@ public class HtmlDomParserContext extends XmlDomParserContext {
195
201
 
196
202
  // element names from xhtml1-strict.dtd
197
203
  private static String[][] element_names = {
198
- {"a", "abbr", "acronym", "address", "area"},
199
- {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
200
- {"caption", "cite", "code", "col", "colgroup"},
201
- {"dd", "del", "dfn", "div", "dl", "dt"},
202
- {"em"},
203
- {"fieldset", "font", "form", "frame", "frameset"},
204
- {}, // g
205
- {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
206
- {"i", "iframe", "img", "input", "ins"},
207
- {}, // j
208
- {"kbd"},
209
- {"label", "legend", "li", "link"},
210
- {"map", "meta"},
211
- {"noframes", "noscript"},
212
- {"object", "ol", "optgroup", "option"},
213
- {"p", "param", "pre"},
214
- {"q"},
215
- {}, // r
216
- {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
217
- {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
218
- {"u", "ul"},
219
- {"var"},
220
- {}, // w
221
- {}, // x
222
- {}, // y
223
- {} // z
204
+ {"a", "abbr", "acronym", "address", "area"},
205
+ {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
206
+ {"caption", "cite", "code", "col", "colgroup"},
207
+ {"dd", "del", "dfn", "div", "dl", "dt"},
208
+ {"em"},
209
+ {"fieldset", "font", "form", "frame", "frameset"},
210
+ {}, // g
211
+ {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
212
+ {"i", "iframe", "img", "input", "ins"},
213
+ {}, // j
214
+ {"kbd"},
215
+ {"label", "legend", "li", "link"},
216
+ {"map", "meta"},
217
+ {"noframes", "noscript"},
218
+ {"object", "ol", "optgroup", "option"},
219
+ {"p", "param", "pre"},
220
+ {"q"},
221
+ {}, // r
222
+ {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
223
+ {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
224
+ {"u", "ul"},
225
+ {"var"},
226
+ {}, // w
227
+ {}, // x
228
+ {}, // y
229
+ {} // z
224
230
  };
225
231
 
226
232
  private static boolean isValid(final String name) {
@@ -249,7 +249,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
249
249
  try {
250
250
  final String msg = ex.getMessage();
251
251
  call("error", runtime.newString(msg == null ? "" : msg));
252
- addError(new RaiseException(XmlSyntaxError.createError(runtime, ex), true));
252
+ addError(XmlSyntaxError.createError(runtime, ex).toThrowable());
253
253
  } catch( RaiseException e) {
254
254
  addError(e);
255
255
  throw e;
@@ -39,6 +39,7 @@ import java.lang.reflect.Method;
39
39
  import java.nio.ByteBuffer;
40
40
  import java.nio.CharBuffer;
41
41
  import java.nio.charset.Charset;
42
+ import java.util.List;
42
43
  import java.util.Set;
43
44
  import java.util.regex.Matcher;
44
45
  import java.util.regex.Pattern;
@@ -53,7 +54,6 @@ import org.jruby.util.ByteList;
53
54
  import org.w3c.dom.Attr;
54
55
  import org.w3c.dom.DOMException;
55
56
  import org.w3c.dom.Document;
56
- import org.w3c.dom.NamedNodeMap;
57
57
  import org.w3c.dom.Node;
58
58
  import org.w3c.dom.NodeList;
59
59
 
@@ -81,7 +81,7 @@ import nokogiri.XmlXpathContext;
81
81
  */
82
82
  public class NokogiriHelpers {
83
83
  public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
84
- public static final String VALID_ROOT_NODE = "NOKOGIRI_VALIDE_ROOT_NODE";
84
+ public static final String ROOT_NODE_INVALID = "NOKOGIRI_ROOT_NODE_INVALID";
85
85
  public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
86
86
 
87
87
  public static XmlNode getCachedNode(Node node) {
@@ -108,22 +108,21 @@ public class NokogiriHelpers {
108
108
  * or XmlNamespace wrapping <code>node</code> if there is no cached
109
109
  * value.
110
110
  */
111
- public static IRubyObject getCachedNodeOrCreate(Ruby ruby, Node node) {
112
- if(node == null) return ruby.getNil();
111
+ public static IRubyObject getCachedNodeOrCreate(Ruby runtime, Node node) {
112
+ if (node == null) return runtime.getNil();
113
113
  if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
114
- XmlDocument xmlDocument = (XmlDocument)node.getOwnerDocument().getUserData(CACHED_NODE);
114
+ XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE);
115
115
  if (!(xmlDocument instanceof HtmlDocument)) {
116
- String prefix = getLocalNameForNamespace(((Attr)node).getName());
117
- prefix = prefix != null ? prefix : "";
118
- String href = ((Attr)node).getValue();
116
+ String prefix = getLocalNameForNamespace(((Attr) node).getName(), null);
117
+ String href = ((Attr) node).getValue();
119
118
  XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
120
119
  if (xmlNamespace != null) return xmlNamespace;
121
- else return XmlNamespace.createFromAttr(ruby, (Attr)node);
120
+ return XmlNamespace.createFromAttr(runtime, (Attr) node);
122
121
  }
123
122
  }
124
123
  XmlNode xmlNode = getCachedNode(node);
125
- if(xmlNode == null) {
126
- xmlNode = (XmlNode)constructNode(ruby, node);
124
+ if (xmlNode == null) {
125
+ xmlNode = (XmlNode) constructNode(runtime, node);
127
126
  node.setUserData(CACHED_NODE, xmlNode, null);
128
127
  }
129
128
  return xmlNode;
@@ -140,37 +139,37 @@ public class NokogiriHelpers {
140
139
  switch (node.getNodeType()) {
141
140
  case Node.ELEMENT_NODE:
142
141
  XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Element"));
143
- xmlElement.setNode(runtime.getCurrentContext(), node);
142
+ xmlElement.setNode(runtime, node);
144
143
  return xmlElement;
145
144
  case Node.ATTRIBUTE_NODE:
146
145
  XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Attr"));
147
- xmlAttr.setNode(runtime.getCurrentContext(), node);
146
+ xmlAttr.setNode(runtime, node);
148
147
  return xmlAttr;
149
148
  case Node.TEXT_NODE:
150
149
  XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Text"));
151
- xmlText.setNode(runtime.getCurrentContext(), node);
150
+ xmlText.setNode(runtime, node);
152
151
  return xmlText;
153
152
  case Node.COMMENT_NODE:
154
153
  XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Comment"));
155
- xmlComment.setNode(runtime.getCurrentContext(), node);
154
+ xmlComment.setNode(runtime, node);
156
155
  return xmlComment;
157
156
  case Node.ENTITY_NODE:
158
157
  return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node);
159
158
  case Node.ENTITY_REFERENCE_NODE:
160
159
  XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityReference"));
161
- xmlEntityRef.setNode(runtime.getCurrentContext(), node);
160
+ xmlEntityRef.setNode(runtime, node);
162
161
  return xmlEntityRef;
163
162
  case Node.PROCESSING_INSTRUCTION_NODE:
164
163
  XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::ProcessingInstruction"));
165
- xmlProcessingInstruction.setNode(runtime.getCurrentContext(), node);
164
+ xmlProcessingInstruction.setNode(runtime, node);
166
165
  return xmlProcessingInstruction;
167
166
  case Node.CDATA_SECTION_NODE:
168
167
  XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::CDATA"));
169
- xmlCdata.setNode(runtime.getCurrentContext(), node);
168
+ xmlCdata.setNode(runtime, node);
170
169
  return xmlCdata;
171
170
  case Node.DOCUMENT_NODE:
172
171
  XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"));
173
- xmlDocument.setDocumentNode(runtime.getCurrentContext(), node);
172
+ xmlDocument.setDocumentNode(runtime, (Document) node);
174
173
  return xmlDocument;
175
174
  case Node.DOCUMENT_TYPE_NODE:
176
175
  XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
@@ -178,7 +177,7 @@ public class NokogiriHelpers {
178
177
  return xmlDtd;
179
178
  default:
180
179
  XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Node"));
181
- xmlNode.setNode(runtime.getCurrentContext(), node);
180
+ xmlNode.setNode(runtime, node);
182
181
  return xmlNode;
183
182
  }
184
183
  }
@@ -242,47 +241,18 @@ public class NokogiriHelpers {
242
241
  return pos > 0 ? qName.substring(pos + 1) : qName;
243
242
  }
244
243
 
245
- public static String getLocalNameForNamespace(String name) {
244
+ public static String getLocalNameForNamespace(String name, String defValue) {
246
245
  String localName = getLocalPart(name);
247
- return ("xmlns".equals(localName)) ? null : localName;
246
+ return ("xmlns".equals(localName)) ? defValue : localName;
248
247
  }
249
248
 
250
- private static final Charset UTF8 = Charset.forName("UTF-8");
251
-
252
- /**
253
- * Converts a RubyString in to a Java String. Assumes the
254
- * RubyString is encoded as UTF-8. This is generally the case for
255
- * RubyStrings created with getRuntime().newString("java string").
256
- * It also seems to be the case for strings created within Ruby
257
- * where $KCODE has not been set.
258
- *
259
- * Note that RubyString#toString() decodes the string data as
260
- * ISO-8859-1 (See org.jruby.util.ByteList.java). This is not
261
- * what you want if you have any multibyte characters in your
262
- * UTF-8 string.
263
- *
264
- * FIXME: This really needs to be more robust in terms of
265
- * detecting the encoding and properly converting to a Java
266
- * String. It's unfortunate that RubyString#toString() doesn't do
267
- * this for us.
268
- */
269
249
  public static String rubyStringToString(IRubyObject str) {
270
250
  if (str.isNil()) return null;
271
- //return rubyStringToString(str.convertToString());
272
- return toJavaString(str.convertToString());
273
- }
274
-
275
- private static String toJavaString(RubyString str) {
276
- return str.decodeString(); // toString()
251
+ return str.convertToString().decodeString();
277
252
  }
278
253
 
279
254
  public static String rubyStringToString(RubyString str) {
280
- ByteList byteList = str.getByteList();
281
- byte[] data = byteList.unsafeBytes();
282
- int offset = byteList.begin();
283
- int len = byteList.length();
284
- ByteBuffer buf = ByteBuffer.wrap(data, offset, len);
285
- return UTF8.decode(buf).toString();
255
+ return str.decodeString(); // if encoding UTF-8 will decode UTF-8
286
256
  }
287
257
 
288
258
  public static ByteArrayInputStream stringBytesToStream(final IRubyObject str) {
@@ -297,7 +267,6 @@ public class NokogiriHelpers {
297
267
 
298
268
  Node cur, tmp, next;
299
269
 
300
- // TODO: Rename buffer to path.
301
270
  String buffer = "";
302
271
 
303
272
  cur = node;
@@ -494,33 +463,20 @@ public class NokogiriHelpers {
494
463
  return buffer;
495
464
  }
496
465
 
497
- protected static boolean compareTwoNodes(Node m, Node n) {
466
+ static boolean compareTwoNodes(Node m, Node n) {
498
467
  return nodesAreEqual(m.getLocalName(), n.getLocalName()) &&
499
468
  nodesAreEqual(m.getPrefix(), n.getPrefix());
500
469
  }
501
470
 
502
- protected static boolean fullNamesMatch(Node a, Node b) {
503
- return a.getNodeName().equals(b.getNodeName());
504
- }
505
-
506
- protected static String getFullName(Node n) {
507
- String lname = n.getLocalName();
508
- String prefix = n.getPrefix();
509
- if (lname != null) {
510
- if (prefix != null)
511
- return prefix + ":" + lname;
512
- else
513
- return lname;
514
- } else {
515
- return n.getNodeName();
516
- }
517
- }
518
-
519
471
  private static boolean nodesAreEqual(Object a, Object b) {
520
472
  return (((a == null) && (b == null)) ||
521
473
  ((a != null) && (b != null) && (b.equals(a))));
522
474
  }
523
475
 
476
+ private static boolean fullNamesMatch(Node a, Node b) {
477
+ return a.getNodeName().equals(b.getNodeName());
478
+ }
479
+
524
480
  private static final Pattern encoded_pattern = Pattern.compile("&amp;|&gt;|&lt;|&#13;");
525
481
  private static final String[] encoded = {"&amp;", "&gt;", "&lt;", "&#13;"};
526
482
  private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
@@ -554,20 +510,6 @@ public class NokogiriHelpers {
554
510
  return convert(encoded_pattern, str, encoded, decoded);
555
511
  }
556
512
 
557
- public static String getNodeName(Node node) {
558
- if(node == null) { System.out.println("node is null"); return ""; }
559
- String name = node.getNodeName();
560
- if(name == null) { System.out.println("name is null"); return ""; }
561
- if(name.equals("#document")) {
562
- return "document";
563
- } else if(name.equals("#text")) {
564
- return "text";
565
- } else {
566
- name = getLocalPart(name);
567
- return (name == null) ? "" : name;
568
- }
569
- }
570
-
571
513
  public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
572
514
  public static boolean isNamespace(Node node) {
573
515
  return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
@@ -602,6 +544,10 @@ public class NokogiriHelpers {
602
544
  return str.isEmpty() || isBlank((CharSequence) str);
603
545
  }
604
546
 
547
+ public static boolean isNullOrEmpty(String str) {
548
+ return str == null || str.isEmpty();
549
+ }
550
+
605
551
  public static CharSequence canonicalizeWhitespace(CharSequence str) {
606
552
  final int len = str.length();
607
553
  StringBuilder sb = new StringBuilder(len);
@@ -625,59 +571,44 @@ public class NokogiriHelpers {
625
571
  return newPrefix + ':' + tagName;
626
572
  }
627
573
 
628
- public static IRubyObject[] nodeListToRubyArray(Ruby ruby, NodeList nodes) {
574
+ public static IRubyObject[] nodeListToRubyArray(Ruby runtime, NodeList nodes) {
629
575
  IRubyObject[] array = new IRubyObject[nodes.getLength()];
630
576
  for (int i = 0; i < nodes.getLength(); i++) {
631
- array[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.item(i));
577
+ array[i] = NokogiriHelpers.getCachedNodeOrCreate(runtime, nodes.item(i));
632
578
  }
633
579
  return array;
634
580
  }
635
581
 
636
- public static IRubyObject[] nodeArrayToArray(Ruby ruby, Node[] nodes) {
637
- IRubyObject[] result = new IRubyObject[nodes.length];
638
- for(int i = 0; i < nodes.length; i++) {
639
- result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]);
582
+ public static IRubyObject[] nodeListToArray(Ruby ruby, List<Node> nodes) {
583
+ IRubyObject[] result = new IRubyObject[nodes.size()];
584
+ for (int i = 0; i < result.length; i++) {
585
+ result[i] = NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes.get(i));
640
586
  }
641
587
  return result;
642
588
  }
643
589
 
644
590
  public static RubyArray nodeArrayToRubyArray(Ruby ruby, Node[] nodes) {
645
591
  RubyArray n = RubyArray.newArray(ruby, nodes.length);
646
- for(int i = 0; i < nodes.length; i++) {
592
+ for (int i = 0; i < nodes.length; i++) {
647
593
  n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodes[i]));
648
594
  }
649
595
  return n;
650
596
  }
651
597
 
652
- public static RubyArray namedNodeMapToRubyArray(Ruby ruby, NamedNodeMap map) {
653
- RubyArray n = RubyArray.newArray(ruby, map.getLength());
654
- for(int i = 0; i < map.getLength(); i++) {
655
- n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, map.item(i)));
656
- }
657
- return n;
658
- }
659
-
660
- public static String getValidEncoding(Ruby runtime, IRubyObject encoding) {
661
- if (encoding.isNil()) {
662
- return guessEncoding();
663
- } else {
664
- return ignoreInvalidEncoding(runtime, encoding);
665
- }
598
+ public static String getValidEncodingOrNull(IRubyObject encoding) {
599
+ if (encoding.isNil()) return null; // charsetNames does not like contains(null)
600
+ String enc = rubyStringToString(encoding.convertToString());
601
+ if (CharsetNames.contains(enc)) return enc;
602
+ return null;
666
603
  }
667
604
 
668
- private static String guessEncoding() {
669
- String name = System.getProperty("file.encoding");
670
- if (name == null) name = "UTF-8";
671
- return name;
605
+ public static String getValidEncoding(IRubyObject encoding) {
606
+ String validEncoding = getValidEncodingOrNull(encoding);
607
+ if (validEncoding != null) return validEncoding;
608
+ return Charset.defaultCharset().name();
672
609
  }
673
610
 
674
- private static Set<String> charsetNames = Charset.availableCharsets().keySet();
675
-
676
- private static String ignoreInvalidEncoding(Ruby runtime, IRubyObject encoding) {
677
- String givenEncoding = rubyStringToString(encoding);
678
- if (charsetNames.contains(givenEncoding)) return givenEncoding;
679
- else return guessEncoding();
680
- }
611
+ private static final Set<String> CharsetNames = Charset.availableCharsets().keySet();
681
612
 
682
613
  public static String adjustSystemIdIfNecessary(String currentDir, String scriptFileName, String baseURI, String systemId) {
683
614
  if (systemId == null) return systemId;
@@ -704,9 +635,13 @@ public class NokogiriHelpers {
704
635
  return null;
705
636
  }
706
637
 
638
+ private static final Charset UTF8 = Charset.forName("UTF-8");
639
+
707
640
  public static boolean isUTF8(String encoding) {
708
641
  if (encoding == null) return true; // no need to convert encoding
709
- return Charset.forName(encoding).compareTo(UTF8) == 0;
642
+
643
+ if ("UTF-8".equals(encoding)) return true;
644
+ return UTF8.aliases().contains(encoding);
710
645
  }
711
646
 
712
647
  public static ByteBuffer convertEncoding(Charset output_charset, CharSequence input_string) {
@@ -738,15 +673,16 @@ public class NokogiriHelpers {
738
673
  private static CharSequence nkf(ThreadContext context, Charset encoding, CharSequence str) {
739
674
  final Ruby runtime = context.getRuntime();
740
675
  final ByteList opt;
741
- if (NokogiriHelpers.shift_jis.compareTo(encoding) == 0) opt = _Sw;
742
- else if (NokogiriHelpers.jis.compareTo(encoding) == 0) opt = _Jw;
743
- else if (NokogiriHelpers.euc_jp.compareTo(encoding) == 0) opt = _Ew;
676
+ if (NokogiriHelpers.Shift_JIS.compareTo(encoding) == 0) opt = _Sw;
677
+ else if (NokogiriHelpers.ISO_2022_JP.compareTo(encoding) == 0) opt = _Jw;
678
+ else if (NokogiriHelpers.EUC_JP.compareTo(encoding) == 0) opt = _Ew;
744
679
  else opt = _Ww; // should not come here. should be treated before this method.
745
680
 
746
681
  Class nkfClass;
747
682
  try {
748
- nkfClass = runtime.getClassLoader().loadClass("org.jruby.RubyNKF");
749
- } catch (ClassNotFoundException e2) {
683
+ // JRuby 1.7 and later
684
+ nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
685
+ } catch (ClassNotFoundException e1) {
750
686
  return str;
751
687
  }
752
688
  Method nkf_method;
@@ -768,9 +704,9 @@ public class NokogiriHelpers {
768
704
  }
769
705
  }
770
706
 
771
- private static final Charset shift_jis = Charset.forName("Shift_JIS");
772
- private static final Charset jis = Charset.forName("ISO-2022-JP");
773
- private static final Charset euc_jp = Charset.forName("EUC-JP");
707
+ private static final Charset Shift_JIS = Charset.forName("Shift_JIS");
708
+ private static final Charset ISO_2022_JP = Charset.forName("ISO-2022-JP"); // JIS
709
+ private static final Charset EUC_JP = Charset.forName("EUC-JP");
774
710
 
775
711
  public static boolean shouldEncode(Node text) {
776
712
  final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
@@ -781,17 +717,17 @@ public class NokogiriHelpers {
781
717
  return !shouldEncode(text);
782
718
  }
783
719
 
784
- public static NokogiriNamespaceCache getNamespaceCacheFormNode(Node n) {
785
- XmlDocument xmlDoc = (XmlDocument)getCachedNode(n.getOwnerDocument());
720
+ public static NokogiriNamespaceCache getNamespaceCache(Node node) {
721
+ XmlDocument xmlDoc = (XmlDocument) getCachedNode(node.getOwnerDocument());
786
722
  return xmlDoc.getNamespaceCache();
787
723
  }
788
724
 
789
- public static Node renameNode(Node n, String namespaceURI, String qualifiedName) throws DOMException {
790
- Document doc = n.getOwnerDocument();
791
- NokogiriNamespaceCache nsCache = getNamespaceCacheFormNode(n);
792
- Node result = doc.renameNode(n, namespaceURI, qualifiedName);
793
- if (result != n) {
794
- nsCache.replaceNode(n, result);
725
+ public static Node renameNode(Node node, String namespaceURI, String qualifiedName) throws DOMException {
726
+ Document doc = node.getOwnerDocument();
727
+ NokogiriNamespaceCache nsCache = getNamespaceCache(node);
728
+ Node result = doc.renameNode(node, namespaceURI, qualifiedName);
729
+ if (result != node) {
730
+ nsCache.replaceNode(node, result);
795
731
  }
796
732
  return result;
797
733
  }