nokogiri 1.7.2-java → 1.8.0-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.cross_rubies +4 -4
  3. data/.travis.yml +43 -24
  4. data/CHANGELOG.md +54 -6
  5. data/Gemfile +8 -7
  6. data/Gemfile-libxml-ruby +3 -0
  7. data/LICENSE-DEPENDENCIES.md +1612 -0
  8. data/{LICENSE.txt → LICENSE.md} +1 -1
  9. data/Manifest.txt +5 -8
  10. data/README.md +8 -5
  11. data/Rakefile +15 -31
  12. data/appveyor.yml +2 -0
  13. data/dependencies.yml +12 -7
  14. data/ext/java/nokogiri/HtmlDocument.java +2 -2
  15. data/ext/java/nokogiri/HtmlSaxParserContext.java +20 -21
  16. data/ext/java/nokogiri/HtmlSaxPushParser.java +6 -10
  17. data/ext/java/nokogiri/NokogiriService.java +10 -31
  18. data/ext/java/nokogiri/XmlAttr.java +1 -26
  19. data/ext/java/nokogiri/XmlCdata.java +0 -1
  20. data/ext/java/nokogiri/XmlComment.java +1 -1
  21. data/ext/java/nokogiri/XmlDocument.java +4 -5
  22. data/ext/java/nokogiri/XmlDocumentFragment.java +29 -21
  23. data/ext/java/nokogiri/XmlDtd.java +1 -1
  24. data/ext/java/nokogiri/XmlElement.java +9 -10
  25. data/ext/java/nokogiri/XmlEntityDecl.java +4 -5
  26. data/ext/java/nokogiri/XmlNode.java +105 -103
  27. data/ext/java/nokogiri/XmlNodeSet.java +64 -76
  28. data/ext/java/nokogiri/XmlReader.java +48 -48
  29. data/ext/java/nokogiri/XmlRelaxng.java +1 -1
  30. data/ext/java/nokogiri/XmlSaxPushParser.java +37 -17
  31. data/ext/java/nokogiri/XmlSchema.java +7 -5
  32. data/ext/java/nokogiri/XmlSyntaxError.java +47 -35
  33. data/ext/java/nokogiri/XmlXpathContext.java +160 -132
  34. data/ext/java/nokogiri/XsltStylesheet.java +15 -24
  35. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +19 -23
  36. data/ext/java/nokogiri/internals/NokogiriDomParser.java +1 -1
  37. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +1 -1
  38. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +11 -13
  39. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +5 -21
  40. data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
  41. data/ext/java/nokogiri/internals/NokogiriHelpers.java +105 -142
  42. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +16 -26
  43. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +32 -50
  44. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +10 -13
  45. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +3 -10
  46. data/ext/java/nokogiri/internals/ParserContext.java +4 -8
  47. data/ext/java/nokogiri/internals/ReaderNode.java +53 -93
  48. data/ext/java/nokogiri/internals/SaveContextVisitor.java +77 -89
  49. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +6 -9
  50. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +167 -0
  51. data/ext/java/nokogiri/internals/XmlDomParserContext.java +17 -6
  52. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +1 -1
  53. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +28 -28
  54. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +3 -4
  55. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +2 -2
  56. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +10 -10
  57. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +5 -5
  58. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +2 -2
  59. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +1 -1
  60. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +2 -2
  61. data/ext/java/org/apache/xml/dtm/ref/dom2dtm/DOM2DTMExt.java +1749 -0
  62. data/ext/nokogiri/extconf.rb +12 -17
  63. data/ext/nokogiri/nokogiri.h +0 -10
  64. data/ext/nokogiri/xml_attr.c +12 -8
  65. data/ext/nokogiri/xml_node.c +17 -14
  66. data/ext/nokogiri/xml_sax_push_parser.c +56 -12
  67. data/lib/nokogiri/html/sax/parser.rb +10 -0
  68. data/lib/nokogiri/nokogiri.jar +0 -0
  69. data/lib/nokogiri/version.rb +5 -4
  70. data/lib/nokogiri/xml/document.rb +9 -9
  71. data/lib/nokogiri/xml/node.rb +7 -7
  72. data/lib/nokogiri/xml/node_set.rb +12 -7
  73. data/lib/nokogiri/xml/sax/parser.rb +6 -7
  74. data/lib/nokogiri/xml/searchable.rb +34 -25
  75. data/lib/nokogiri/xml/syntax_error.rb +24 -1
  76. data/test/decorators/test_slop.rb +4 -1
  77. data/test/helper.rb +10 -0
  78. data/test/html/sax/test_parser.rb +27 -0
  79. data/test/html/test_document.rb +12 -1
  80. data/test/html/test_document_encoding.rb +1 -3
  81. data/test/html/test_document_fragment.rb +3 -0
  82. data/test/xml/sax/test_push_parser.rb +48 -0
  83. data/test/xml/test_attr.rb +7 -0
  84. data/test/xml/test_document.rb +1 -1
  85. data/test/xml/test_document_fragment.rb +27 -0
  86. data/test/xml/test_entity_reference.rb +2 -2
  87. data/test/xml/test_node.rb +12 -15
  88. data/test/xml/test_node_reparenting.rb +14 -0
  89. data/test/xml/test_node_set.rb +8 -6
  90. data/test/xml/test_reader.rb +19 -0
  91. data/test/xml/test_syntax_error.rb +21 -15
  92. data/test/xml/test_unparented_node.rb +54 -11
  93. data/test/xml/test_xpath.rb +23 -6
  94. metadata +32 -20
  95. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +0 -73
  96. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +0 -72
  97. data/suppressions/nokogiri_ree-1.8.7.358.supp +0 -61
  98. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  99. data/suppressions/nokogiri_ruby-1.9.2.320.supp +0 -28
  100. data/suppressions/nokogiri_ruby-1.9.3.327.supp +0 -28
  101. data/test_all +0 -105
@@ -82,16 +82,12 @@ import org.w3c.dom.Document;
82
82
  */
83
83
  @JRubyClass(name="Nokogiri::XSLT::Stylesheet")
84
84
  public class XsltStylesheet extends RubyObject {
85
- private static Map<String, Object> registry = new HashMap<String, Object>();
85
+
86
86
  private TransformerFactory factory = null;
87
87
  private Templates sheet = null;
88
88
  private IRubyObject stylesheet = null;
89
89
  private boolean htmlish = false;
90
90
 
91
- public static Map<String, Object> getRegistry() {
92
- return registry;
93
- }
94
-
95
91
  public XsltStylesheet(Ruby ruby, RubyClass rubyClass) {
96
92
  super(ruby, rubyClass);
97
93
  }
@@ -215,8 +211,7 @@ public class XsltStylesheet extends RubyObject {
215
211
 
216
212
  NokogiriXsltErrorListener elistener = new NokogiriXsltErrorListener();
217
213
  DOMSource domSource = new DOMSource(((XmlDocument) args[0]).getDocument());
218
- DOMResult result = null;
219
- String stringResult = null;
214
+ final DOMResult result; String stringResult = null;
220
215
  try{
221
216
  result = tryXsltTransformation(context, args, domSource, elistener); // DOMResult
222
217
  if (result.getNode().getFirstChild() == null) {
@@ -275,16 +270,17 @@ public class XsltStylesheet extends RubyObject {
275
270
  pwriter.connect(preader);
276
271
  StreamResult result = new StreamResult(pwriter);
277
272
  transf.transform(domSource, result);
273
+
278
274
  char[] cbuf = new char[1024];
279
275
  int len = preader.read(cbuf, 0, 1024);
280
- StringBuilder builder = new StringBuilder();
281
- builder.append(CharBuffer.wrap(cbuf, 0, len));
282
- htmlish = isHtml(builder.toString()); // judge from the first chunk
276
+ StringBuilder builder = new StringBuilder(len);
277
+ builder.append(cbuf, 0, len);
278
+ htmlish = isHtml(builder); // judge from the first chunk
283
279
 
284
280
  while (len == 1024) {
285
281
  len = preader.read(cbuf, 0, 1024);
286
282
  if (len > 0) {
287
- builder.append(CharBuffer.wrap(cbuf, 0, len));
283
+ builder.append(cbuf, 0, len);
288
284
  }
289
285
  }
290
286
 
@@ -308,20 +304,18 @@ public class XsltStylesheet extends RubyObject {
308
304
 
309
305
  private Templates getTemplatesFromStreamSource() throws TransformerConfigurationException {
310
306
  if (stylesheet instanceof RubyString) {
311
- StringReader reader = new StringReader((String)stylesheet.toJava(String.class));
307
+ StringReader reader = new StringReader(stylesheet.asJavaString());
312
308
  StreamSource xsltStreamSource = new StreamSource(reader);
313
309
  return factory.newTemplates(xsltStreamSource);
314
310
  }
315
311
  return null;
316
312
  }
317
313
 
318
- private static Pattern html_tag =
319
- Pattern.compile("<(%s)*html", Pattern.CASE_INSENSITIVE);
314
+ private static final Pattern HTML_TAG = Pattern.compile("<(%s)*html", Pattern.CASE_INSENSITIVE);
320
315
 
321
- private boolean isHtml(String chunk) {
322
- Matcher m = XsltStylesheet.html_tag.matcher(chunk);
323
- if (m.find()) return true;
324
- else return false;
316
+ private static boolean isHtml(CharSequence chunk) {
317
+ Matcher match = HTML_TAG.matcher(chunk);
318
+ return match.find();
325
319
  }
326
320
 
327
321
  private IRubyObject createDocumentFromString(ThreadContext context, Ruby runtime, String stringResult) {
@@ -346,12 +340,9 @@ public class XsltStylesheet extends RubyObject {
346
340
  }
347
341
  }
348
342
 
349
- private void argumentTypeCheck(Ruby runtime, IRubyObject arg) {
350
- if (arg instanceof XmlDocument) {
351
- return;
352
- } else {
353
- throw runtime.newArgumentError("argument must be a Nokogiri::XML::Document");
354
- }
343
+ private static void argumentTypeCheck(Ruby runtime, IRubyObject arg) {
344
+ if (arg instanceof XmlDocument) return;
345
+ throw runtime.newArgumentError("argument must be a Nokogiri::XML::Document");
355
346
  }
356
347
 
357
348
  @JRubyMethod(name = {"registr", "register"}, meta = true)
@@ -39,7 +39,6 @@ import nokogiri.HtmlDocument;
39
39
  import nokogiri.NokogiriService;
40
40
  import nokogiri.XmlDocument;
41
41
 
42
- import org.apache.xerces.parsers.DOMParser;
43
42
  import org.apache.xerces.xni.Augmentations;
44
43
  import org.apache.xerces.xni.QName;
45
44
  import org.apache.xerces.xni.XMLAttributes;
@@ -54,6 +53,7 @@ import org.jruby.runtime.ThreadContext;
54
53
  import org.jruby.runtime.builtin.IRubyObject;
55
54
  import org.w3c.dom.Document;
56
55
  import org.w3c.dom.NamedNodeMap;
56
+ import org.w3c.dom.Node;
57
57
  import org.w3c.dom.NodeList;
58
58
 
59
59
  /**
@@ -65,8 +65,6 @@ import org.w3c.dom.NodeList;
65
65
  */
66
66
  public class HtmlDomParserContext extends XmlDomParserContext {
67
67
 
68
- private String encoding;
69
-
70
68
  public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
71
69
  super(runtime, options);
72
70
  }
@@ -87,7 +85,7 @@ public class HtmlDomParserContext extends XmlDomParserContext {
87
85
  @Override
88
86
  protected void initParser(Ruby runtime) {
89
87
  XMLParserConfiguration config = new HTMLConfiguration();
90
- XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
88
+ //XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
91
89
  XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
92
90
  //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
93
91
  XMLDocumentFilter[] filters = { elementValidityCheckFilter};
@@ -121,14 +119,12 @@ public class HtmlDomParserContext extends XmlDomParserContext {
121
119
 
122
120
  @Override
123
121
  protected XmlDocument getNewEmptyDocument(ThreadContext context) {
124
- IRubyObject[] args = new IRubyObject[0];
122
+ IRubyObject[] args = IRubyObject.NULL_ARRAY;
125
123
  return (XmlDocument) XmlDocument.rbNew(context, getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document"), args);
126
124
  }
127
125
 
128
126
  @Override
129
- protected XmlDocument wrapDocument(ThreadContext context,
130
- RubyClass klazz,
131
- Document document) {
127
+ protected XmlDocument wrapDocument(ThreadContext context, RubyClass klazz, Document document) {
132
128
  HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
133
129
  htmlDocument.setDocumentNode(context, document);
134
130
  if (ruby_encoding.isNil()) {
@@ -149,18 +145,18 @@ public class HtmlDomParserContext extends XmlDomParserContext {
149
145
  // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
150
146
  // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
151
147
  // so, this method attempts to find the charset.
152
- private String tryGetCharsetFromHtml5MetaTag(Document document) {
148
+ private static String tryGetCharsetFromHtml5MetaTag(Document document) {
153
149
  if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null;
154
- NodeList list = document.getDocumentElement().getChildNodes();
150
+ NodeList list = document.getDocumentElement().getChildNodes(); Node item;
155
151
  for (int i = 0; i < list.getLength(); i++) {
156
- if ("head".equalsIgnoreCase(list.item(i).getNodeName())) {
157
- NodeList headers = list.item(i).getChildNodes();
152
+ if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
153
+ NodeList headers = item.getChildNodes();
158
154
  for (int j = 0; j < headers.getLength(); j++) {
159
- if ("meta".equalsIgnoreCase(headers.item(j).getNodeName())) {
160
- NamedNodeMap nodeMap = headers.item(j).getAttributes();
155
+ if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
156
+ NamedNodeMap nodeMap = item.getAttributes();
161
157
  for (int k = 0; k < nodeMap.getLength(); k++) {
162
- if ("charset".equalsIgnoreCase(nodeMap.item(k).getNodeName())) {
163
- return nodeMap.item(k).getNodeValue();
158
+ if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
159
+ return item.getNodeValue();
164
160
  }
165
161
  }
166
162
  }
@@ -227,12 +223,12 @@ public class HtmlDomParserContext extends XmlDomParserContext {
227
223
  {} // z
228
224
  };
229
225
 
230
- private boolean isValid(String testee) {
231
- char[] c = testee.toCharArray();
232
- int index = new Integer(c[0]) - 97;
233
- if (index > 25) return false;
234
- for (int i=0; i<element_names[index].length; i++) {
235
- if (testee.equals(element_names[index][i])) {
226
+ private static boolean isValid(final String name) {
227
+ int index = name.charAt(0) - 97;
228
+ if (index >= element_names.length) return false;
229
+ String[] elementNames = element_names[index];
230
+ for (int i=0; i<elementNames.length; i++) {
231
+ if (name.equals(elementNames[i])) {
236
232
  return true;
237
233
  }
238
234
  }
@@ -242,7 +238,7 @@ public class HtmlDomParserContext extends XmlDomParserContext {
242
238
  @Override
243
239
  public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
244
240
  if (!isValid(name.rawname)) {
245
- errorHandler.getErrors().add(new Exception("Tag " + name.rawname + " invalid"));
241
+ errorHandler.addError(new Exception("Tag " + name.rawname + " invalid"));
246
242
  }
247
243
  super.startElement(name, attrs, augs);
248
244
  }
@@ -99,7 +99,7 @@ public class NokogiriDomParser extends DOMParser {
99
99
  doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null);
100
100
  }
101
101
 
102
- private class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver {
102
+ private static class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver {
103
103
  InputSource source;
104
104
  private NokogiriXInlcudeEntityResolver(InputSource source) {
105
105
  this.source = source;
@@ -41,7 +41,7 @@ public class NokogiriEncodingReaderWrapper extends InputStream {
41
41
  this.encodingReader = encodingReader;
42
42
  this.ruby = context.getRuntime();
43
43
 
44
- if (!RuntimeHelpers.invoke(context, encodingReader, "respond_to?", ruby.newSymbol("read").to_sym()).isTrue()
44
+ if (!RuntimeHelpers.invoke(context, encodingReader, "respond_to?", ruby.newSymbol("read")).isTrue()
45
45
  || encodingReader.getInstanceVariable("@io") == null) {
46
46
  throw ruby.newArgumentError("Argument doesn't respond to read or doesn't have instance variable @io");
47
47
  }
@@ -19,7 +19,7 @@ import org.xml.sax.ext.EntityResolver2;
19
19
  * to be relative to the current directory of the Ruby runtime.
20
20
  */
21
21
  public class NokogiriEntityResolver implements EntityResolver2 {
22
- protected Ruby runtime;
22
+ protected final Ruby runtime;
23
23
  private final NokogiriErrorHandler handler;
24
24
  private final Options options;
25
25
 
@@ -33,7 +33,7 @@ public class NokogiriEntityResolver implements EntityResolver2 {
33
33
  @Override
34
34
  public InputSource getExternalSubset(String name, String baseURI)
35
35
  throws SAXException, IOException {
36
- return null;
36
+ return null;
37
37
  }
38
38
 
39
39
  @Override
@@ -51,17 +51,16 @@ public class NokogiriEntityResolver implements EntityResolver2 {
51
51
  return resolveEntity(runtime, name, publicId, baseURI, systemId);
52
52
  }
53
53
 
54
- private File join(String parent, String child) {
55
- if (new File(parent).isFile()) {
56
- parent = new File(parent).getParent();
57
- }
58
-
59
- return new File(parent, child);
54
+ private static File join(String parent, String child) {
55
+ if (new File(parent).isFile()) {
56
+ parent = new File(parent).getParent();
57
+ }
58
+ return new File(parent, child);
60
59
  }
61
60
 
62
- private InputSource emptyInputSource(InputSource source) {
63
- source.setByteStream(new ByteArrayInputStream(new byte[0]));
64
- return source;
61
+ private static InputSource emptyInputSource(InputSource source) {
62
+ source.setByteStream(new ByteArrayInputStream(new byte[0]));
63
+ return source;
65
64
  }
66
65
 
67
66
  private boolean shouldLoadDtd() {
@@ -69,8 +68,7 @@ public class NokogiriEntityResolver implements EntityResolver2 {
69
68
  }
70
69
 
71
70
  private void addError(String errorMessage) {
72
- if (handler != null)
73
- handler.errors.add(new Exception(errorMessage));
71
+ if (handler != null) handler.errors.add(new Exception(errorMessage));
74
72
  }
75
73
 
76
74
  /**
@@ -32,18 +32,10 @@
32
32
 
33
33
  package nokogiri.internals;
34
34
 
35
- import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
-
37
35
  import java.util.ArrayList;
38
36
  import java.util.List;
39
37
 
40
- import nokogiri.NokogiriService;
41
- import nokogiri.XmlSyntaxError;
42
-
43
38
  import org.apache.xerces.xni.parser.XMLErrorHandler;
44
- import org.jruby.Ruby;
45
- import org.jruby.runtime.ThreadContext;
46
- import org.jruby.runtime.builtin.IRubyObject;
47
39
  import org.xml.sax.ErrorHandler;
48
40
 
49
41
  /**
@@ -56,30 +48,22 @@ import org.xml.sax.ErrorHandler;
56
48
  * @author Yoko Harada <yokolet@gmail.com>
57
49
  */
58
50
  public abstract class NokogiriErrorHandler implements ErrorHandler, XMLErrorHandler {
59
- protected List<Exception> errors;
51
+ protected final List<Exception> errors;
60
52
  protected boolean noerror;
61
53
  protected boolean nowarning;
62
54
 
63
55
  public NokogiriErrorHandler(boolean noerror, boolean nowarning) {
64
- errors = new ArrayList<Exception>();
56
+ this.errors = new ArrayList<Exception>(4);
65
57
  this.noerror = noerror;
66
58
  this.nowarning = nowarning;
67
59
  }
68
60
 
69
- public List<Exception> getErrors() { return errors; }
61
+ List<Exception> getErrors() { return errors; }
70
62
 
71
- public List<IRubyObject> getErrorsReadyForRuby(ThreadContext context) {
72
- Ruby runtime = context.getRuntime();
73
- List<IRubyObject> res = new ArrayList<IRubyObject>();
74
- for (int i = 0; i < errors.size(); i++) {
75
- XmlSyntaxError xmlSyntaxError = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::SyntaxError"));
76
- xmlSyntaxError.setException(errors.get(i));
77
- res.add(xmlSyntaxError);
78
- }
79
- return res;
80
- }
63
+ public void addError(Exception ex) { errors.add(ex); }
81
64
 
82
65
  protected boolean usesNekoHtml(String domain) {
83
66
  return "http://cyberneko.org/html".equals(domain);
84
67
  }
68
+
85
69
  }
@@ -242,7 +242,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
242
242
  @Override
243
243
  public void characters(char[] ch, int start, int length) throws SAXException {
244
244
  StringBuffer sb = characterStack.peek();
245
- sb.append(new String(ch, start, length));
245
+ sb.append(ch, start, length);
246
246
  }
247
247
 
248
248
  @Override
@@ -32,17 +32,14 @@
32
32
 
33
33
  package nokogiri.internals;
34
34
 
35
+ import java.io.ByteArrayInputStream;
35
36
  import java.io.File;
36
37
  import java.io.UnsupportedEncodingException;
37
38
  import java.lang.reflect.InvocationTargetException;
38
39
  import java.lang.reflect.Method;
39
40
  import java.nio.ByteBuffer;
40
41
  import java.nio.CharBuffer;
41
- import java.nio.charset.CharacterCodingException;
42
42
  import java.nio.charset.Charset;
43
- import java.nio.charset.CharsetEncoder;
44
- import java.util.ArrayList;
45
- import java.util.List;
46
43
  import java.util.Set;
47
44
  import java.util.regex.Matcher;
48
45
  import java.util.regex.Pattern;
@@ -62,7 +59,6 @@ import nokogiri.XmlProcessingInstruction;
62
59
  import nokogiri.XmlText;
63
60
  import nokogiri.XmlXpathContext;
64
61
 
65
- import org.jcodings.specific.UTF8Encoding;
66
62
  import org.jruby.Ruby;
67
63
  import org.jruby.RubyArray;
68
64
  import org.jruby.RubyClass;
@@ -193,28 +189,28 @@ public class NokogiriHelpers {
193
189
  return NokogiriService.getNokogiriClassCache(ruby).get(name);
194
190
  }
195
191
 
196
- public static IRubyObject stringOrNil(Ruby runtime, String s) {
197
- if (s == null) return runtime.getNil();
198
- return convertJavaStringToRuby(runtime, s);
192
+ public static IRubyObject stringOrNil(Ruby runtime, String str) {
193
+ return str == null ? runtime.getNil() : convertString(runtime, str);
199
194
  }
200
-
195
+
196
+ public static IRubyObject stringOrNil(Ruby runtime, CharSequence str) {
197
+ return str == null ? runtime.getNil() : convertString(runtime, str);
198
+ }
199
+
201
200
  public static IRubyObject stringOrNil(Ruby runtime, byte[] bytes) {
202
- if (bytes == null) return runtime.getNil();
203
- return RubyString.newString(runtime, bytes);
201
+ return bytes == null ? runtime.getNil() : RubyString.newString(runtime, bytes);
204
202
  }
205
-
206
- public static IRubyObject stringOrBlank(Ruby runtime, String s) {
207
- if (s == null) return runtime.newString();
208
- return convertJavaStringToRuby(runtime, s);
203
+
204
+ public static IRubyObject stringOrBlank(Ruby runtime, String str) {
205
+ return str == null ? runtime.newString() : convertString(runtime, str);
209
206
  }
210
207
 
211
- private static IRubyObject convertJavaStringToRuby(Ruby runtime, String str) {
212
- if (runtime.is1_9()) {
213
- ByteList bytes = new ByteList(str.getBytes(RubyEncoding.UTF8), UTF8Encoding.INSTANCE);
214
- return RubyString.newString(runtime, bytes);
215
- } else {
216
- return RubyString.newString(runtime, str);
217
- }
208
+ public static RubyString convertString(Ruby runtime, String str) {
209
+ return RubyString.newUTF8String(runtime, str);
210
+ }
211
+
212
+ public static RubyString convertString(Ruby runtime, CharSequence str) {
213
+ return RubyString.newUTF8String(runtime, str);
218
214
  }
219
215
 
220
216
  /**
@@ -259,12 +255,7 @@ public class NokogiriHelpers {
259
255
  return ("xmlns".equals(localName)) ? null : localName;
260
256
  }
261
257
 
262
- private static Charset utf8 = null;
263
-
264
- private static Charset getCharsetUTF8() {
265
- if (utf8 == null) utf8 = Charset.forName("UTF-8");
266
- return utf8;
267
- }
258
+ private static final Charset UTF8 = Charset.forName("UTF-8");
268
259
 
269
260
  /**
270
261
  * Converts a RubyString in to a Java String. Assumes the
@@ -290,15 +281,7 @@ public class NokogiriHelpers {
290
281
  }
291
282
 
292
283
  private static String toJavaString(RubyString str) {
293
- ByteList value = str.getByteList();
294
- try {
295
- if (str.getRuntime().is1_9()) {
296
- return new String(value.getUnsafeBytes(), value.begin(), value.length(), str.getEncoding().toString());
297
- }
298
- return RubyEncoding.decodeUTF8(value.getUnsafeBytes(), value.begin(), value.length());
299
- } catch (UnsupportedEncodingException uee) {
300
- return str.toString();
301
- }
284
+ return str.decodeString(); // toString()
302
285
  }
303
286
 
304
287
  public static String rubyStringToString(RubyString str) {
@@ -307,16 +290,15 @@ public class NokogiriHelpers {
307
290
  int offset = byteList.begin();
308
291
  int len = byteList.length();
309
292
  ByteBuffer buf = ByteBuffer.wrap(data, offset, len);
310
- return getCharsetUTF8().decode(buf).toString();
293
+ return UTF8.decode(buf).toString();
311
294
  }
312
-
313
- public static List<String> rubyStringArrayToJavaList(RubyArray ary) {
314
- List<String> list = new ArrayList<String>();
315
- for (int i=0; i < ary.getLength(); i++) {
316
- Object obj = ary.get(i);
317
- if (obj != null) list.add(obj.toString());
295
+
296
+ public static ByteArrayInputStream stringBytesToStream(final IRubyObject str) {
297
+ if (str instanceof RubyString || str.respondsTo("to_str")) {
298
+ final ByteList bytes = str.convertToString().getByteList();
299
+ return new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
318
300
  }
319
- return list;
301
+ return null;
320
302
  }
321
303
 
322
304
  public static String getNodeCompletePath(Node node) {
@@ -325,19 +307,14 @@ public class NokogiriHelpers {
325
307
 
326
308
  // TODO: Rename buffer to path.
327
309
  String buffer = "";
328
- String sep;
329
- String name;
330
-
331
- int occur = 0;
332
- boolean generic;
333
310
 
334
311
  cur = node;
335
312
 
336
313
  do {
337
- name = "";
338
- sep = "?";
339
- occur = 0;
340
- generic = false;
314
+ String name = "";
315
+ String sep = "?";
316
+ int occur = 0;
317
+ boolean generic = false;
341
318
 
342
319
  if(cur.getNodeType() == Node.DOCUMENT_NODE) {
343
320
  if(buffer.startsWith("/")) break;
@@ -552,16 +529,16 @@ public class NokogiriHelpers {
552
529
  ((a != null) && (b != null) && (b.equals(a))));
553
530
  }
554
531
 
555
- private static Pattern encoded_pattern = Pattern.compile("&amp;|&gt;|&lt;|&#13;");
556
- private static Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
557
- private static String[] encoded = {"&amp;", "&gt;", "&lt;", "&#13;"};
558
- private static String[] decoded = {"&", ">", "<", "\r"};
532
+ private static final Pattern encoded_pattern = Pattern.compile("&amp;|&gt;|&lt;|&#13;");
533
+ private static final String[] encoded = {"&amp;", "&gt;", "&lt;", "&#13;"};
534
+ private static final Pattern decoded_pattern = Pattern.compile("&|>|<|\r");
535
+ private static final String[] decoded = {"&", ">", "<", "\r"};
559
536
 
560
- private static String convert(Pattern ptn, String input, String[] oldChars, String[] newChars) {
537
+ private static StringBuffer convert(Pattern ptn, CharSequence input, String[] oldChars, String[] newChars) {
561
538
  Matcher matcher = ptn.matcher(input);
562
539
  boolean result = matcher.find();
563
- StringBuffer sb = new StringBuffer();
564
- while(result) {
540
+ StringBuffer sb = new StringBuffer(input.length() + 8);
541
+ while (result) {
565
542
  String matched = matcher.group();
566
543
  String replacement = "";
567
544
  for (int i=0; i<oldChars.length; i++) {
@@ -574,15 +551,15 @@ public class NokogiriHelpers {
574
551
  result = matcher.find();
575
552
  }
576
553
  matcher.appendTail(sb);
577
- return sb.toString();
554
+ return sb;
578
555
  }
579
556
 
580
- public static String encodeJavaString(String s) {
581
- return convert(decoded_pattern, s, decoded, encoded);
557
+ public static CharSequence encodeJavaString(CharSequence str) {
558
+ return convert(decoded_pattern, str, decoded, encoded);
582
559
  }
583
560
 
584
- public static String decodeJavaString(String s) {
585
- return convert(encoded_pattern, s, encoded, decoded);
561
+ public static CharSequence decodeJavaString(CharSequence str) {
562
+ return convert(encoded_pattern, str, encoded, decoded);
586
563
  }
587
564
 
588
565
  public static String getNodeName(Node node) {
@@ -601,8 +578,7 @@ public class NokogiriHelpers {
601
578
 
602
579
  public static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
603
580
  public static boolean isNamespace(Node node) {
604
- return (XMLNS_URI.equals(node.getNamespaceURI()) ||
605
- isNamespace(node.getNodeName()));
581
+ return (XMLNS_URI.equals(node.getNamespaceURI()) || isNamespace(node.getNodeName()));
606
582
  }
607
583
 
608
584
  public static boolean isNamespace(String nodeName) {
@@ -618,44 +594,40 @@ public class NokogiriHelpers {
618
594
  }
619
595
 
620
596
  public static boolean isWhitespaceText(ThreadContext context, IRubyObject obj) {
621
- if (obj == null || obj.isNil()) return false;
622
-
623
- XmlNode node = (XmlNode) obj;
624
- if (!(node instanceof XmlText))
625
- return false;
597
+ //if (obj == null || obj.isNil()) return false;
598
+ if ( !(obj instanceof XmlText) ) return false;
626
599
 
627
- String content = rubyStringToString(node.content(context));
628
- return content.trim().length() == 0;
600
+ CharSequence content = ((XmlNode) obj).getContentImpl();
601
+ return content == null || isWhitespaceText(content);
629
602
  }
630
603
 
631
- public static boolean isWhitespaceText(String s) {
632
- return s.trim().length() == 0;
604
+ public static boolean isWhitespaceText(CharSequence str) {
605
+ int len = str.length(); int beg = 0;
606
+ while ((beg < len) && (str.charAt(beg) <= ' ')) beg++;
607
+ return beg == len;
633
608
  }
634
609
 
635
- public static String canonicalizeWhitespce(String s) {
636
- StringBuilder sb = new StringBuilder();
637
- char[] chars = s.toCharArray();
610
+ public static CharSequence canonicalizeWhitespace(CharSequence str) {
611
+ final int len = str.length();
612
+ StringBuilder sb = new StringBuilder(len);
638
613
  boolean newline_added = false;
639
- for (int i=0; i<chars.length; i++) {
640
- if (chars[i] == '\n') {
641
- if (!newline_added) {
642
- sb.append(chars[i]);
643
- newline_added = true;
614
+ for ( int i = 0; i < len; i++ ) {
615
+ char c = str.charAt(i);
616
+ if ( c == '\n' ) {
617
+ if ( ! newline_added ) {
618
+ sb.append(c); newline_added = true;
644
619
  }
645
620
  } else {
646
- sb.append(chars[i]);
621
+ sb.append(c);
647
622
  }
648
623
  }
649
- return sb.toString();
624
+ return sb;
650
625
  }
651
626
 
652
627
  public static String newQName(String newPrefix, Node node) {
653
628
  String tagName = getLocalPart(node.getNodeName());
654
- if(newPrefix == null) {
655
- return tagName;
656
- } else {
657
- return newPrefix + ":" + tagName;
658
- }
629
+ if (newPrefix == null) return tagName;
630
+ return newPrefix + ':' + tagName;
659
631
  }
660
632
 
661
633
  public static RubyArray nodeListToRubyArray(Ruby ruby, NodeList nodes) {
@@ -695,8 +667,7 @@ public class NokogiriHelpers {
695
667
  }
696
668
 
697
669
  private static String guessEncoding() {
698
- String name = null;
699
- if (name == null) name = System.getProperty("file.encoding");
670
+ String name = System.getProperty("file.encoding");
700
671
  if (name == null) name = "UTF-8";
701
672
  return name;
702
673
  }
@@ -722,8 +693,8 @@ public class NokogiriHelpers {
722
693
 
723
694
  private static String resolveSystemId(String baseName, String systemId) {
724
695
  if (baseName == null || baseName.length() < 1) return null;
725
- String parentName = null;
726
- baseName = baseName.replaceAll("%20", " ");
696
+ String parentName;
697
+ baseName = baseName.replace("%20", " ");
727
698
  File base = new File(baseName);
728
699
  if (base.isDirectory()) parentName = baseName;
729
700
  else parentName = base.getParent();
@@ -735,53 +706,45 @@ public class NokogiriHelpers {
735
706
  }
736
707
 
737
708
  public static boolean isUTF8(String encoding) {
738
- if (encoding == null) return true; // no need to convert encoding
739
- int ret = Charset.forName(encoding).compareTo(Charset.forName("UTF-8"));
740
- return ret == 0;
709
+ if (encoding == null) return true; // no need to convert encoding
710
+ return Charset.forName(encoding).compareTo(UTF8) == 0;
741
711
  }
742
712
 
743
- public static byte[] convertEncoding(Charset output_charset, String input_string) throws CharacterCodingException {
744
- CharsetEncoder encoder = output_charset.newEncoder();
745
- CharBuffer charBuffer = CharBuffer.wrap(input_string);
746
- ByteBuffer byteBuffer = encoder.encode(charBuffer);
747
- byte[] buffer = new byte[byteBuffer.remaining()];
748
- byteBuffer.get(buffer);
749
- return buffer;
713
+ public static ByteBuffer convertEncoding(Charset output_charset, CharSequence input_string) {
714
+ return output_charset.encode(CharBuffer.wrap(input_string)); // does replace implicitly on un-mappable characters
750
715
  }
751
716
 
752
- public static String convertEncodingByNKFIfNecessary(Ruby runtime, XmlDocument doc, String thing) {
753
- if (!(doc instanceof HtmlDocument)) return thing;
717
+ public static CharSequence convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str) {
718
+ if (!(doc instanceof HtmlDocument)) return str;
754
719
  String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
755
- if (parsed_encoding == null) return thing;
720
+ if (parsed_encoding == null) return str;
756
721
  String ruby_encoding = rubyStringToString(doc.getEncoding());
757
- if (ruby_encoding == null) return thing;
758
- if (Charset.forName(parsed_encoding).compareTo(Charset.forName(ruby_encoding)) == 0) {
759
- return thing;
760
- } else {
761
- return NokogiriHelpers.nkf(runtime, ruby_encoding, thing);
762
- }
722
+ if (ruby_encoding == null) return str;
723
+ Charset encoding = Charset.forName(ruby_encoding);
724
+ if (Charset.forName(parsed_encoding).compareTo(encoding) == 0) return str;
725
+ if (str.length() == 0) return str; // no need to convert
726
+ return NokogiriHelpers.nkf(context, encoding, str);
763
727
  }
764
728
 
729
+ private static final ByteList _Sw = new ByteList(new byte[] { '-','S','w' }, false);
730
+ private static final ByteList _Jw = new ByteList(new byte[] { '-','J','w' }, false);
731
+ private static final ByteList _Ew = new ByteList(new byte[] { '-','E','w' }, false);
732
+ private static final ByteList _Ww = new ByteList(new byte[] { '-','W','w' }, false);
733
+
765
734
  // This method is used from HTML documents. HTML meta tag with encoding specification
766
735
  // might appear after non-ascii characters are used. For example, a title tag before
767
736
  // a meta tag. In such a case, Xerces encodes characters in UTF-8 without seeing meta tag.
768
737
  // Nokogiri uses NKF library to convert characters correct encoding. This means the method
769
738
  // works only for JIS/Shift_JIS/EUC-JP.
770
- public static String nkf(Ruby runtime, String ruby_encoding, String thing) {
771
- StringBuffer sb = new StringBuffer("-");
772
- Charset that = Charset.forName(ruby_encoding);
773
- if (NokogiriHelpers.shift_jis.compareTo(that) == 0) {
774
- sb.append("S");
775
- } else if (NokogiriHelpers.jis.compareTo(that) == 0) {
776
- sb.append("J");
777
- } else if (NokogiriHelpers.euc_jp.compareTo(that) == 0) {
778
- sb.append("E");
779
- } else {
780
- // should not come here. should be treated before this method.
781
- sb.append("W");
782
- }
783
- sb.append("w");
784
- Class nkfClass = null;
739
+ private static CharSequence nkf(ThreadContext context, Charset encoding, CharSequence str) {
740
+ final Ruby runtime = context.getRuntime();
741
+ final ByteList opt;
742
+ if (NokogiriHelpers.shift_jis.compareTo(encoding) == 0) opt = _Sw;
743
+ else if (NokogiriHelpers.jis.compareTo(encoding) == 0) opt = _Jw;
744
+ else if (NokogiriHelpers.euc_jp.compareTo(encoding) == 0) opt = _Ew;
745
+ else opt = _Ww; // should not come here. should be treated before this method.
746
+
747
+ Class nkfClass;
785
748
  try {
786
749
  // JRuby 1.7 and later
787
750
  nkfClass = runtime.getClassLoader().loadClass("org.jruby.ext.nkf.RubyNKF");
@@ -790,35 +753,35 @@ public class NokogiriHelpers {
790
753
  // Before JRuby 1.7
791
754
  nkfClass = runtime.getClassLoader().loadClass("org.jruby.RubyNKF");
792
755
  } catch (ClassNotFoundException e2) {
793
- return thing;
756
+ return str;
794
757
  }
795
758
  }
796
759
  Method nkf_method;
797
760
  try {
798
761
  nkf_method = nkfClass.getMethod("nkf", ThreadContext.class, IRubyObject.class, IRubyObject.class, IRubyObject.class);
799
762
  RubyString r_str =
800
- (RubyString)nkf_method.invoke(null, runtime.getCurrentContext(), null, runtime.newString(new String(sb)), runtime.newString(thing));
763
+ (RubyString)nkf_method.invoke(null, context, null, runtime.newString(opt), runtime.newString(str.toString()));
801
764
  return NokogiriHelpers.rubyStringToString(r_str);
802
765
  } catch (SecurityException e) {
803
- return thing;
766
+ return str;
804
767
  } catch (NoSuchMethodException e) {
805
- return thing;
768
+ return str;
806
769
  } catch (IllegalArgumentException e) {
807
- return thing;
770
+ return str;
808
771
  } catch (IllegalAccessException e) {
809
- return thing;
772
+ return str;
810
773
  } catch (InvocationTargetException e) {
811
- return thing;
774
+ return str;
812
775
  }
813
776
  }
814
777
 
815
- private static Charset shift_jis = Charset.forName("Shift_JIS");
816
- private static Charset jis = Charset.forName("ISO-2022-JP");
817
- private static Charset euc_jp = Charset.forName("EUC-JP");
778
+ private static final Charset shift_jis = Charset.forName("Shift_JIS");
779
+ private static final Charset jis = Charset.forName("ISO-2022-JP");
780
+ private static final Charset euc_jp = Charset.forName("EUC-JP");
818
781
 
819
782
  public static boolean shouldEncode(Node text) {
820
- return text.getUserData(NokogiriHelpers.ENCODED_STRING) == null ||
821
- !((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING));
783
+ final Boolean encoded = (Boolean) text.getUserData(NokogiriHelpers.ENCODED_STRING);
784
+ return encoded == null || ! encoded;
822
785
  }
823
786
 
824
787
  public static boolean shouldDecode(Node text) {