nokogiri 1.10.10-java → 1.11.0-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/README.md +165 -91
  5. data/ext/java/nokogiri/HtmlDocument.java +34 -46
  6. data/ext/java/nokogiri/HtmlSaxParserContext.java +88 -58
  7. data/ext/java/nokogiri/HtmlSaxPushParser.java +1 -1
  8. data/ext/java/nokogiri/NokogiriService.java +1 -1
  9. data/ext/java/nokogiri/XmlAttr.java +13 -20
  10. data/ext/java/nokogiri/XmlAttributeDecl.java +11 -12
  11. data/ext/java/nokogiri/XmlCdata.java +3 -4
  12. data/ext/java/nokogiri/XmlComment.java +1 -1
  13. data/ext/java/nokogiri/XmlDocument.java +148 -175
  14. data/ext/java/nokogiri/XmlDocumentFragment.java +13 -31
  15. data/ext/java/nokogiri/XmlDtd.java +5 -8
  16. data/ext/java/nokogiri/XmlElement.java +1 -20
  17. data/ext/java/nokogiri/XmlElementDecl.java +23 -28
  18. data/ext/java/nokogiri/XmlEntityDecl.java +23 -27
  19. data/ext/java/nokogiri/XmlEntityReference.java +2 -2
  20. data/ext/java/nokogiri/XmlNamespace.java +72 -89
  21. data/ext/java/nokogiri/XmlNode.java +303 -406
  22. data/ext/java/nokogiri/XmlNodeSet.java +70 -76
  23. data/ext/java/nokogiri/XmlReader.java +12 -13
  24. data/ext/java/nokogiri/XmlRelaxng.java +10 -3
  25. data/ext/java/nokogiri/XmlSaxParserContext.java +15 -10
  26. data/ext/java/nokogiri/XmlSchema.java +87 -27
  27. data/ext/java/nokogiri/XmlSyntaxError.java +2 -6
  28. data/ext/java/nokogiri/XmlText.java +12 -9
  29. data/ext/java/nokogiri/XmlXpathContext.java +55 -25
  30. data/ext/java/nokogiri/XsltStylesheet.java +7 -15
  31. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +52 -46
  32. data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
  33. data/ext/java/nokogiri/internals/NokogiriHelpers.java +71 -135
  34. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +90 -58
  35. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +9 -2
  36. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +67 -10
  37. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +4 -2
  38. data/ext/java/nokogiri/internals/ParserContext.java +27 -73
  39. data/ext/java/nokogiri/internals/ReaderNode.java +2 -4
  40. data/ext/java/nokogiri/internals/XmlDomParserContext.java +18 -33
  41. data/ext/nokogiri/depend +476 -357
  42. data/ext/nokogiri/extconf.rb +507 -357
  43. data/ext/nokogiri/html_document.c +79 -78
  44. data/ext/nokogiri/html_sax_parser_context.c +2 -2
  45. data/ext/nokogiri/nokogiri.c +34 -40
  46. data/ext/nokogiri/xml_document.c +18 -4
  47. data/ext/nokogiri/xml_io.c +8 -6
  48. data/ext/nokogiri/xml_node.c +21 -1
  49. data/ext/nokogiri/xml_node_set.c +1 -1
  50. data/ext/nokogiri/xml_reader.c +6 -17
  51. data/ext/nokogiri/xml_relax_ng.c +29 -11
  52. data/ext/nokogiri/xml_sax_parser.c +2 -7
  53. data/ext/nokogiri/xml_sax_parser_context.c +2 -2
  54. data/ext/nokogiri/xml_schema.c +55 -13
  55. data/ext/nokogiri/xml_xpath_context.c +80 -4
  56. data/ext/nokogiri/xslt_stylesheet.c +1 -8
  57. data/lib/nokogiri.rb +22 -22
  58. data/lib/nokogiri/css.rb +1 -0
  59. data/lib/nokogiri/css/node.rb +1 -0
  60. data/lib/nokogiri/css/parser.rb +63 -62
  61. data/lib/nokogiri/css/parser.y +2 -2
  62. data/lib/nokogiri/css/parser_extras.rb +39 -36
  63. data/lib/nokogiri/css/syntax_error.rb +1 -0
  64. data/lib/nokogiri/css/tokenizer.rb +1 -0
  65. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  66. data/lib/nokogiri/decorators/slop.rb +1 -0
  67. data/lib/nokogiri/html.rb +1 -0
  68. data/lib/nokogiri/html/builder.rb +1 -0
  69. data/lib/nokogiri/html/document.rb +13 -26
  70. data/lib/nokogiri/html/document_fragment.rb +1 -0
  71. data/lib/nokogiri/html/element_description.rb +1 -0
  72. data/lib/nokogiri/html/element_description_defaults.rb +1 -0
  73. data/lib/nokogiri/html/entity_lookup.rb +1 -0
  74. data/lib/nokogiri/html/sax/parser.rb +1 -0
  75. data/lib/nokogiri/html/sax/parser_context.rb +1 -0
  76. data/lib/nokogiri/html/sax/push_parser.rb +1 -0
  77. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  78. data/lib/nokogiri/nokogiri.jar +0 -0
  79. data/lib/nokogiri/syntax_error.rb +1 -0
  80. data/lib/nokogiri/version.rb +3 -109
  81. data/lib/nokogiri/version/constant.rb +5 -0
  82. data/lib/nokogiri/version/info.rb +182 -0
  83. data/lib/nokogiri/xml.rb +1 -0
  84. data/lib/nokogiri/xml/attr.rb +1 -0
  85. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  86. data/lib/nokogiri/xml/builder.rb +3 -2
  87. data/lib/nokogiri/xml/cdata.rb +1 -0
  88. data/lib/nokogiri/xml/character_data.rb +1 -0
  89. data/lib/nokogiri/xml/document.rb +20 -15
  90. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  91. data/lib/nokogiri/xml/dtd.rb +1 -0
  92. data/lib/nokogiri/xml/element_content.rb +1 -0
  93. data/lib/nokogiri/xml/element_decl.rb +1 -0
  94. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  95. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  96. data/lib/nokogiri/xml/namespace.rb +1 -0
  97. data/lib/nokogiri/xml/node.rb +587 -249
  98. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  99. data/lib/nokogiri/xml/node_set.rb +1 -0
  100. data/lib/nokogiri/xml/notation.rb +1 -0
  101. data/lib/nokogiri/xml/parse_options.rb +10 -3
  102. data/lib/nokogiri/xml/pp.rb +1 -0
  103. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  104. data/lib/nokogiri/xml/pp/node.rb +1 -0
  105. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  106. data/lib/nokogiri/xml/reader.rb +7 -3
  107. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  108. data/lib/nokogiri/xml/sax.rb +1 -0
  109. data/lib/nokogiri/xml/sax/document.rb +1 -0
  110. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  111. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  112. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  113. data/lib/nokogiri/xml/schema.rb +13 -4
  114. data/lib/nokogiri/xml/searchable.rb +25 -16
  115. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  116. data/lib/nokogiri/xml/text.rb +1 -0
  117. data/lib/nokogiri/xml/xpath.rb +1 -0
  118. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
  119. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  120. data/lib/nokogiri/xslt.rb +1 -0
  121. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  122. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  123. metadata +86 -159
  124. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +0 -107
  125. data/ext/java/nokogiri/internals/UncloseableInputStream.java +0 -102
  126. data/ext/nokogiri/html_document.h +0 -10
  127. data/ext/nokogiri/html_element_description.h +0 -10
  128. data/ext/nokogiri/html_entity_lookup.h +0 -8
  129. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  130. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  131. data/ext/nokogiri/nokogiri.h +0 -121
  132. data/ext/nokogiri/xml_attr.h +0 -9
  133. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  134. data/ext/nokogiri/xml_cdata.h +0 -9
  135. data/ext/nokogiri/xml_comment.h +0 -9
  136. data/ext/nokogiri/xml_document.h +0 -23
  137. data/ext/nokogiri/xml_document_fragment.h +0 -10
  138. data/ext/nokogiri/xml_dtd.h +0 -10
  139. data/ext/nokogiri/xml_element_content.h +0 -10
  140. data/ext/nokogiri/xml_element_decl.h +0 -9
  141. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  142. data/ext/nokogiri/xml_entity_decl.h +0 -10
  143. data/ext/nokogiri/xml_entity_reference.h +0 -9
  144. data/ext/nokogiri/xml_io.h +0 -11
  145. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  146. data/ext/nokogiri/xml_namespace.h +0 -14
  147. data/ext/nokogiri/xml_node.h +0 -13
  148. data/ext/nokogiri/xml_node_set.h +0 -12
  149. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  150. data/ext/nokogiri/xml_reader.h +0 -10
  151. data/ext/nokogiri/xml_relax_ng.h +0 -9
  152. data/ext/nokogiri/xml_sax_parser.h +0 -39
  153. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  154. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  155. data/ext/nokogiri/xml_schema.h +0 -9
  156. data/ext/nokogiri/xml_syntax_error.h +0 -13
  157. data/ext/nokogiri/xml_text.h +0 -9
  158. data/ext/nokogiri/xml_xpath_context.h +0 -10
  159. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -32,28 +32,29 @@
32
32
 
33
33
  package nokogiri;
34
34
 
35
- import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
36
-
37
35
  import java.io.ByteArrayInputStream;
38
36
  import java.io.InputStream;
39
37
  import java.nio.charset.Charset;
40
38
  import java.nio.charset.IllegalCharsetNameException;
41
39
  import java.nio.charset.UnsupportedCharsetException;
42
- import java.util.EnumSet;
43
40
  import java.util.regex.Matcher;
44
41
  import java.util.regex.Pattern;
45
42
 
46
- import nokogiri.internals.NokogiriHandler;
47
-
48
43
  import org.apache.xerces.parsers.AbstractSAXParser;
49
44
  import org.cyberneko.html.parsers.SAXParser;
50
- import org.jruby.*;
45
+ import org.jruby.Ruby;
46
+ import org.jruby.RubyClass;
47
+ import org.jruby.RubyFixnum;
48
+ import org.jruby.RubyString;
51
49
  import org.jruby.anno.JRubyClass;
52
50
  import org.jruby.anno.JRubyMethod;
53
51
  import org.jruby.runtime.ThreadContext;
54
52
  import org.jruby.runtime.builtin.IRubyObject;
55
53
  import org.xml.sax.SAXException;
56
54
 
55
+ import nokogiri.internals.NokogiriHandler;
56
+ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
57
+
57
58
  /**
58
59
  * Class for Nokogiri::HTML::SAX::ParserContext.
59
60
  *
@@ -65,10 +66,16 @@ import org.xml.sax.SAXException;
65
66
  @JRubyClass(name="Nokogiri::HTML::SAX::ParserContext", parent="Nokogiri::XML::SAX::ParserContext")
66
67
  public class HtmlSaxParserContext extends XmlSaxParserContext {
67
68
 
69
+ static HtmlSaxParserContext newInstance(final Ruby runtime, final RubyClass klazz) {
70
+ HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
71
+ instance.initialize(runtime);
72
+ return instance;
73
+ }
74
+
68
75
  public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) {
69
76
  super(ruby, rubyClass);
70
77
  }
71
-
78
+
72
79
  @Override
73
80
  protected AbstractSAXParser createParser() throws SAXException {
74
81
  SAXParser parser = new SAXParser();
@@ -78,6 +85,11 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
78
85
  "http://cyberneko.org/html/properties/names/elems", "lower");
79
86
  parser.setProperty(
80
87
  "http://cyberneko.org/html/properties/names/attrs", "lower");
88
+
89
+ // NekoHTML should not try to guess the encoding based on the meta
90
+ // tags or other information in the document. This is already
91
+ // handled by the EncodingReader.
92
+ parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
81
93
  return parser;
82
94
  } catch(SAXException ex) {
83
95
  throw new SAXException(
@@ -90,18 +102,17 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
90
102
  IRubyObject klazz,
91
103
  IRubyObject data,
92
104
  IRubyObject encoding) {
93
- HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz);
94
- ctx.initialize(context.getRuntime());
95
- String javaEncoding = findEncoding(context, encoding);
105
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
106
+ String javaEncoding = findEncodingName(context, encoding);
96
107
  if (javaEncoding != null) {
97
- String input = applyEncoding(rubyStringToString(data), javaEncoding);
98
- ByteArrayInputStream istream = new ByteArrayInputStream(input.getBytes());
108
+ CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
109
+ ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
99
110
  ctx.setInputSource(istream);
100
111
  ctx.getInputSource().setEncoding(javaEncoding);
101
112
  }
102
113
  return ctx;
103
114
  }
104
-
115
+
105
116
  public enum EncodingType {
106
117
  NONE(0, "NONE"),
107
118
  UTF_8(1, "UTF-8"),
@@ -142,23 +153,38 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
142
153
  public String toString() {
143
154
  return name;
144
155
  }
145
- }
146
-
147
- private static String findName(final int value) {
148
- for (EncodingType type : EncodingType.values()) {
149
- if (type.getValue() == value) return type.toString();
156
+
157
+ private static transient EncodingType[] values;
158
+
159
+ // NOTE: assuming ordinal == value
160
+ static EncodingType get(final int ordinal) {
161
+ EncodingType[] values = EncodingType.values;
162
+ if (values == null) {
163
+ values = EncodingType.values();
164
+ EncodingType.values = values;
165
+ }
166
+ if (ordinal >= 0 && ordinal < values.length) {
167
+ return values[ordinal];
168
+ }
169
+ return null;
150
170
  }
151
- return null;
171
+
152
172
  }
153
-
154
- private static String findEncoding(ThreadContext context, IRubyObject encoding) {
173
+
174
+ private static String findEncodingName(final int value) {
175
+ EncodingType type = EncodingType.get(value);
176
+ if (type == null) return null;
177
+ assert type.value == value;
178
+ return type.name;
179
+ }
180
+
181
+ private static String findEncodingName(ThreadContext context, IRubyObject encoding) {
155
182
  String rubyEncoding = null;
156
183
  if (encoding instanceof RubyString) {
157
- rubyEncoding = rubyStringToString(encoding);
184
+ rubyEncoding = rubyStringToString((RubyString) encoding);
158
185
  }
159
186
  else if (encoding instanceof RubyFixnum) {
160
- int value = RubyFixnum.fix2int((RubyFixnum) encoding);
161
- rubyEncoding = findName(value);
187
+ rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
162
188
  }
163
189
  if (rubyEncoding == null) return null;
164
190
  try {
@@ -168,39 +194,56 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
168
194
  throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
169
195
  }
170
196
  catch (IllegalCharsetNameException e) {
171
- throw context.getRuntime().newInvalidEncoding(e.getMessage());
197
+ throw context.getRuntime().newEncodingError(e.getMessage());
172
198
  }
173
199
  }
174
200
 
175
- private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+");
201
+ private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+", Pattern.CASE_INSENSITIVE);
176
202
 
177
- private static String applyEncoding(String input, String enc) {
178
- String str = input.toLowerCase();
179
- int start_pos = 0;
180
- int end_pos = 0;
181
- if (input.contains("meta") && input.contains("charset")) {
182
- Matcher m = CHARSET_PATTERN.matcher(str);
203
+ private static CharSequence applyEncoding(final String input, final String enc) {
204
+ int start_pos = 0; int end_pos = 0;
205
+ if (containsIgnoreCase(input, "charset")) {
206
+ Matcher m = CHARSET_PATTERN.matcher(input);
183
207
  while (m.find()) {
184
208
  start_pos = m.start();
185
209
  end_pos = m.end();
186
210
  }
187
211
  }
188
212
  if (start_pos != end_pos) {
189
- String substr = input.substring(start_pos, end_pos);
190
- input = input.replace(substr, "charset=" + enc);
213
+ return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
191
214
  }
192
215
  return input;
193
216
  }
194
217
 
218
+ private static boolean containsIgnoreCase(final String str, final String sub) {
219
+ final int len = sub.length();
220
+ final int max = str.length() - len;
221
+
222
+ if (len == 0) return true;
223
+ final char c0Lower = Character.toLowerCase(sub.charAt(0));
224
+ final char c0Upper = Character.toUpperCase(sub.charAt(0));
225
+
226
+ for (int i = 0; i <= max; i++) {
227
+ final char ch = str.charAt(i);
228
+ if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
229
+ continue; // first char doesn't match
230
+ }
231
+
232
+ if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
233
+ return true;
234
+ }
235
+ }
236
+ return false;
237
+ }
238
+
195
239
  @JRubyMethod(name="file", meta=true)
196
240
  public static IRubyObject parse_file(ThreadContext context,
197
- IRubyObject klazz,
241
+ IRubyObject klass,
198
242
  IRubyObject data,
199
243
  IRubyObject encoding) {
200
- HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz);
201
- ctx.initialize(context.getRuntime());
244
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
202
245
  ctx.setInputSourceFile(context, data);
203
- String javaEncoding = findEncoding(context, encoding);
246
+ String javaEncoding = findEncodingName(context, encoding);
204
247
  if (javaEncoding != null) {
205
248
  ctx.getInputSource().setEncoding(javaEncoding);
206
249
  }
@@ -209,13 +252,12 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
209
252
 
210
253
  @JRubyMethod(name="io", meta=true)
211
254
  public static IRubyObject parse_io(ThreadContext context,
212
- IRubyObject klazz,
255
+ IRubyObject klass,
213
256
  IRubyObject data,
214
257
  IRubyObject encoding) {
215
- HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz);
216
- ctx.initialize(context.getRuntime());
217
- ctx.setInputSource(context, data, context.getRuntime().getNil());
218
- String javaEncoding = findEncoding(context, encoding);
258
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
259
+ ctx.setIOInputSource(context, data, context.nil);
260
+ String javaEncoding = findEncodingName(context, encoding);
219
261
  if (javaEncoding != null) {
220
262
  ctx.getInputSource().setEncoding(javaEncoding);
221
263
  }
@@ -226,27 +268,15 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
226
268
  * Create a new parser context that will read from a raw input stream.
227
269
  * Meant to be run in a separate thread by HtmlSaxPushParser.
228
270
  */
229
- static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klazz, InputStream stream) {
230
- HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz);
231
- ctx.initialize(runtime);
271
+ static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klass, InputStream stream) {
272
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
232
273
  ctx.setInputSource(stream);
233
274
  return ctx;
234
275
  }
235
276
 
236
277
  @Override
237
278
  protected void preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
238
- // final String path = "Nokogiri::XML::FragmentHandler";
239
- // final String docFrag =
240
- // "http://cyberneko.org/html/features/balance-tags/document-fragment";
241
- // RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
242
- // IRubyObject doc = adapter.getInstanceVariable(handlerRuby, "@document");
243
- // RubyModule mod = runtime.getClassFromPath(path);
244
- // try {
245
- // if (doc != null && !doc.isNil() && adapter.isKindOf(doc, mod))
246
- // parser.setFeature(docFrag, true);
247
- // } catch (Exception e) {
248
- // // ignore
249
- // }
279
+ // this function is meant to be empty. It overrides the one in XmlSaxParserContext
250
280
  }
251
281
 
252
282
  }
@@ -129,7 +129,7 @@ public class HtmlSaxPushParser extends RubyObject {
129
129
  final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk);
130
130
  if (data == null) {
131
131
  terminateTask(context.runtime);
132
- throw new RaiseException(XmlSyntaxError.createHTMLSyntaxError(context.runtime)); // Nokogiri::HTML::SyntaxError
132
+ throw XmlSyntaxError.createHTMLSyntaxError(context.runtime).toThrowable(); // Nokogiri::HTML::SyntaxError
133
133
  }
134
134
 
135
135
  int errorCount0 = parserTask.getErrorCount();
@@ -258,7 +258,7 @@ public class NokogiriService implements BasicLibraryService {
258
258
  }
259
259
  };
260
260
 
261
- public static final ObjectAllocator HTML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator() {
261
+ private static final ObjectAllocator HTML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator() {
262
262
  private HtmlSaxParserContext htmlSaxParserContext = null;
263
263
  public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
264
264
  if (htmlSaxParserContext == null) htmlSaxParserContext = new HtmlSaxParserContext(runtime, klazz);
@@ -79,22 +79,20 @@ public class XmlAttr extends XmlNode {
79
79
  @Override
80
80
  protected void init(ThreadContext context, IRubyObject[] args) {
81
81
  if (args.length < 2) {
82
- throw getRuntime().newArgumentError(args.length, 2);
82
+ throw context.runtime.newArgumentError(args.length, 2);
83
83
  }
84
84
 
85
85
  IRubyObject doc = args[0];
86
86
  IRubyObject content = args[1];
87
87
 
88
- if(!(doc instanceof XmlDocument)) {
89
- final String msg =
90
- "document must be an instance of Nokogiri::XML::Document";
91
- throw getRuntime().newArgumentError(msg);
88
+ if (!(doc instanceof XmlDocument)) {
89
+ throw context.runtime.newArgumentError("document must be an instance of Nokogiri::XML::Document");
92
90
  }
93
91
 
94
92
  XmlDocument xmlDoc = (XmlDocument)doc;
95
93
  String str = rubyStringToString(content);
96
94
  Node attr = xmlDoc.getDocument().createAttribute(str);
97
- setNode(context, attr);
95
+ setNode(context.runtime, attr);
98
96
  }
99
97
 
100
98
 
@@ -103,20 +101,10 @@ public class XmlAttr extends XmlNode {
103
101
  // the default namespace should be registered for this attribute
104
102
  void setNamespaceIfNecessary(Ruby runtime) {
105
103
  if ("xml".equals(node.getPrefix())) {
106
- XmlNamespace.createDefaultNamespace(runtime, node);
104
+ XmlNamespace.createDefaultNamespace(runtime, node);
107
105
  }
108
106
  }
109
107
 
110
- private boolean isHtmlBooleanAttr() {
111
- String name = node.getNodeName().toLowerCase();
112
-
113
- for(String s : HTML_BOOLEAN_ATTRS) {
114
- if(s.equals(name)) return true;
115
- }
116
-
117
- return false;
118
- }
119
-
120
108
  @Override
121
109
  @JRubyMethod(name = {"content", "value", "to_s"})
122
110
  public IRubyObject content(ThreadContext context) {
@@ -140,11 +128,16 @@ public class XmlAttr extends XmlNode {
140
128
  @Override
141
129
  protected IRubyObject getNodeName(ThreadContext context) {
142
130
  if (name != null) return name;
143
- String attrName = ((Attr)node).getName();
144
- if (!(doc instanceof HtmlDocument) && node.getNamespaceURI() != null) {
131
+
132
+ String attrName = ((Attr) node).getName();
133
+ if (attrName == null) return context.nil;
134
+
135
+ if (node.getNamespaceURI() != null && !(document(context.runtime) instanceof HtmlDocument)) {
145
136
  attrName = NokogiriHelpers.getLocalPart(attrName);
137
+ if (attrName == null) return context.nil;
146
138
  }
147
- return attrName == null ? context.getRuntime().getNil() : RubyString.newString(context.getRuntime(), attrName);
139
+
140
+ return name = RubyString.newString(context.runtime, attrName);
148
141
  }
149
142
 
150
143
  @Override
@@ -67,12 +67,11 @@ public class XmlAttributeDecl extends XmlNode {
67
67
  super(ruby, klass, attrDeclNode);
68
68
  }
69
69
 
70
- public static IRubyObject create(ThreadContext context, Node attrDeclNode) {
71
- XmlAttributeDecl self =
72
- new XmlAttributeDecl(context.getRuntime(),
73
- getNokogiriClass(context.getRuntime(), "Nokogiri::XML::AttributeDecl"),
74
- attrDeclNode);
75
- return self;
70
+ static XmlAttributeDecl create(ThreadContext context, Node attrDeclNode) {
71
+ return new XmlAttributeDecl(context.runtime,
72
+ getNokogiriClass(context.runtime, "Nokogiri::XML::AttributeDecl"),
73
+ attrDeclNode
74
+ );
76
75
  }
77
76
 
78
77
  @Override
@@ -84,8 +83,7 @@ public class XmlAttributeDecl extends XmlNode {
84
83
  @Override
85
84
  @JRubyMethod(name = "node_name=")
86
85
  public IRubyObject node_name_set(ThreadContext context, IRubyObject name) {
87
- throw context.getRuntime()
88
- .newRuntimeError("cannot change name of DTD decl");
86
+ throw context.runtime.newRuntimeError("cannot change name of DTD decl");
89
87
  }
90
88
 
91
89
  public IRubyObject element_name(ThreadContext context) {
@@ -112,19 +110,20 @@ public class XmlAttributeDecl extends XmlNode {
112
110
  */
113
111
  @JRubyMethod
114
112
  public IRubyObject enumeration(ThreadContext context) {
115
- RubyArray enumVals = RubyArray.newArray(context.getRuntime());
116
- String atype = ((Element)node).getAttribute("atype");
113
+ final String atype = ((Element) node).getAttribute("atype");
117
114
 
118
115
  if (atype != null && atype.length() != 0 && atype.charAt(0) == '(') {
119
116
  // removed enclosing parens
120
117
  String valueStr = atype.substring(1, atype.length() - 1);
121
118
  String[] values = valueStr.split("\\|");
119
+ RubyArray enumVals = RubyArray.newArray(context.runtime, values.length);
122
120
  for (int i = 0; i < values.length; i++) {
123
- enumVals.append(context.getRuntime().newString(values[i]));
121
+ enumVals.append(context.runtime.newString(values[i]));
124
122
  }
123
+ return enumVals;
125
124
  }
126
125
 
127
- return enumVals;
126
+ return context.runtime.newEmptyArray();
128
127
  }
129
128
 
130
129
  }
@@ -69,10 +69,9 @@ public class XmlCdata extends XmlText {
69
69
  }
70
70
  IRubyObject doc = args[0];
71
71
  content = args[1];
72
- XmlDocument xmlDoc =(XmlDocument) ((XmlNode) doc).document(context);
73
- Document document = xmlDoc.getDocument();
74
- Node node = document.createCDATASection((content.isNil()) ? null : rubyStringToString(content));
75
- setNode(context, node);
72
+ Document document = ((XmlNode) doc).getOwnerDocument();
73
+ Node node = document.createCDATASection(rubyStringToString(content));
74
+ setNode(context.runtime, node);
76
75
  }
77
76
 
78
77
  @Override
@@ -82,7 +82,7 @@ public class XmlComment extends XmlNode {
82
82
  if (xmlDoc != null) {
83
83
  Document document = xmlDoc.getDocument();
84
84
  Node node = document.createComment(rubyStringToString(text));
85
- setNode(context, node);
85
+ setNode(context.runtime, node);
86
86
  }
87
87
  }
88
88
 
@@ -39,25 +39,28 @@ import static nokogiri.internals.NokogiriHelpers.isNamespace;
39
39
  import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
40
40
  import static nokogiri.internals.NokogiriHelpers.stringOrNil;
41
41
 
42
- import java.io.UnsupportedEncodingException;
43
42
  import java.util.List;
44
43
 
45
44
  import javax.xml.parsers.DocumentBuilderFactory;
46
45
  import javax.xml.parsers.ParserConfigurationException;
47
46
 
47
+ import org.jcodings.specific.USASCIIEncoding;
48
+ import org.jcodings.specific.UTF8Encoding;
48
49
  import org.jruby.Ruby;
49
50
  import org.jruby.RubyArray;
50
51
  import org.jruby.RubyClass;
51
52
  import org.jruby.RubyFixnum;
52
- import org.jruby.RubyNil;
53
+ import org.jruby.RubyString;
53
54
  import org.jruby.anno.JRubyClass;
54
55
  import org.jruby.anno.JRubyMethod;
56
+ import org.jruby.exceptions.RaiseException;
55
57
  import org.jruby.javasupport.JavaUtil;
56
- import org.jruby.runtime.Arity;
57
58
  import org.jruby.runtime.Block;
58
59
  import org.jruby.runtime.Helpers;
59
60
  import org.jruby.runtime.ThreadContext;
61
+ import org.jruby.runtime.Visibility;
60
62
  import org.jruby.runtime.builtin.IRubyObject;
63
+ import org.jruby.util.ByteList;
61
64
  import org.w3c.dom.Attr;
62
65
  import org.w3c.dom.Document;
63
66
  import org.w3c.dom.DocumentType;
@@ -95,41 +98,43 @@ public class XmlDocument extends XmlNode {
95
98
  */
96
99
  private static final String DOCUMENTBUILDERFACTORY_IMPLE_NAME = "org.apache.xerces.jaxp.DocumentBuilderFactoryImpl";
97
100
 
101
+ private static final ByteList DOCUMENT = ByteList.create("document");
102
+ static { DOCUMENT.setEncoding(USASCIIEncoding.INSTANCE); }
103
+
98
104
  private static boolean substituteEntities = false;
99
105
  private static boolean loadExternalSubset = false; // TODO: Verify this.
100
106
 
101
107
  /** cache variables */
102
- protected IRubyObject encoding = null;
103
- protected IRubyObject url = null;
108
+ protected IRubyObject encoding;
109
+ protected IRubyObject url;
104
110
 
105
- public XmlDocument(Ruby ruby, RubyClass klazz) {
106
- super(ruby, klazz, createNewDocument());
111
+ public XmlDocument(Ruby runtime, RubyClass klazz) {
112
+ super(runtime, klazz, createNewDocument(runtime));
107
113
  }
108
114
 
109
- public XmlDocument(Ruby ruby, Document document) {
110
- this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::Document"), document);
115
+ public XmlDocument(Ruby runtime, Document document) {
116
+ this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"), document);
111
117
  }
112
118
 
113
- public XmlDocument(Ruby ruby, RubyClass klass, Document document) {
114
- super(ruby, klass, document);
115
- initializeNamespaceCacheIfNecessary();
116
- createAndCacheNamespaces(ruby, document.getDocumentElement());
117
- stabilizeTextContent(document);
118
- setInstanceVariable("@decorators", ruby.getNil());
119
+ public XmlDocument(Ruby runtime, RubyClass klass, Document document) {
120
+ super(runtime, klass, document);
121
+ init(runtime, document);
119
122
  }
120
123
 
121
- public void setDocumentNode(ThreadContext context, Node node) {
122
- super.setNode(context, node);
123
- initializeNamespaceCacheIfNecessary();
124
- Ruby runtime = context.getRuntime();
125
- if (node != null) {
126
- Document document = (Document)node;
127
- stabilizeTextContent(document);
124
+ void init(Ruby runtime, Document document) {
125
+ stabilizeTextContent(document);
126
+ if (document.getDocumentElement() != null) {
128
127
  createAndCacheNamespaces(runtime, document.getDocumentElement());
129
128
  }
130
129
  setInstanceVariable("@decorators", runtime.getNil());
131
130
  }
132
131
 
132
+ public final void setDocumentNode(Ruby runtime, Document node) {
133
+ super.setNode(runtime, node);
134
+ if (node != null) init(runtime, node);
135
+ else setInstanceVariable("@decorators", runtime.getNil());
136
+ }
137
+
133
138
  public void setEncoding(IRubyObject encoding) {
134
139
  this.encoding = encoding;
135
140
  }
@@ -141,72 +146,64 @@ public class XmlDocument extends XmlNode {
141
146
  // not sure, but like attribute values, text value will be lost
142
147
  // unless it is referred once before this document is used.
143
148
  // this seems to happen only when the fragment is parsed from Node#in_context.
144
- protected void stabilizeTextContent(Document document) {
149
+ protected static void stabilizeTextContent(Document document) {
145
150
  if (document.getDocumentElement() != null) document.getDocumentElement().getTextContent();
146
151
  }
147
152
 
148
- private void createAndCacheNamespaces(Ruby ruby, Node node) {
149
- if (node == null) return;
153
+ private static void createAndCacheNamespaces(Ruby runtime, Node node) {
150
154
  if (node.hasAttributes()) {
151
155
  NamedNodeMap nodeMap = node.getAttributes();
152
156
  for (int i=0; i<nodeMap.getLength(); i++) {
153
157
  Node n = nodeMap.item(i);
154
158
  if (n instanceof Attr) {
155
- Attr attr = (Attr)n;
156
- String attrName = attr.getName();
157
- // not sure, but need to get value always before document is referred.
158
- // or lose attribute value
159
- String attrValue = attr.getValue(); // don't delete this line
160
- if (isNamespace(attrName)) {
159
+ Attr attr = (Attr) n;
160
+ stabilizeAttr(attr);
161
+ if (isNamespace(attr.getName())) {
161
162
  // create and cache
162
- XmlNamespace.createFromAttr(ruby, attr);
163
+ XmlNamespace.createFromAttr(runtime, attr);
163
164
  }
164
165
  }
165
166
  }
166
167
  }
167
168
  NodeList children = node.getChildNodes();
168
169
  for (int i=0; i<children.getLength(); i++) {
169
- createAndCacheNamespaces(ruby, children.item(i));
170
+ createAndCacheNamespaces(runtime, children.item(i));
170
171
  }
171
172
  }
172
173
 
174
+ static void stabilizeAttr(final Attr attr) {
175
+ // TODO not sure, but need to get value always before document is referred or lose attribute value
176
+ attr.getValue(); // don't delete this line
177
+ }
178
+
173
179
  // When a document is created from fragment with a context (reference) document,
174
180
  // namespace should be resolved based on the context document.
175
181
  public XmlDocument(Ruby ruby, RubyClass klass, Document document, XmlDocument contextDoc) {
176
182
  super(ruby, klass, document);
177
183
  nsCache = contextDoc.getNamespaceCache();
178
- XmlNamespace default_ns = nsCache.getDefault();
179
- String default_href = rubyStringToString(default_ns.href(ruby.getCurrentContext()));
180
- resolveNamespaceIfNecessary(ruby.getCurrentContext(), document.getDocumentElement(), default_href);
184
+ String default_href = nsCache.getDefault().getHref();
185
+ resolveNamespaceIfNecessary(document.getDocumentElement(), default_href);
181
186
  }
182
187
 
183
- private void resolveNamespaceIfNecessary(ThreadContext context, Node node, String default_href) {
188
+ private void resolveNamespaceIfNecessary(Node node, String default_href) {
184
189
  if (node == null) return;
185
190
  String nodePrefix = node.getPrefix();
186
191
  if (nodePrefix == null) { // default namespace
187
192
  NokogiriHelpers.renameNode(node, default_href, node.getNodeName());
188
193
  } else {
189
- XmlNamespace xmlNamespace = nsCache.get(node, nodePrefix);
190
- String href = rubyStringToString(xmlNamespace.href(context));
194
+ String href = getNamespaceCache().get(node, nodePrefix).getHref();
191
195
  NokogiriHelpers.renameNode(node, href, node.getNodeName());
192
196
  }
193
- resolveNamespaceIfNecessary(context, node.getNextSibling(), default_href);
197
+ resolveNamespaceIfNecessary(node.getNextSibling(), default_href);
194
198
  NodeList children = node.getChildNodes();
195
199
  for (int i=0; i<children.getLength(); i++) {
196
- resolveNamespaceIfNecessary(context, children.item(i), default_href);
200
+ resolveNamespaceIfNecessary(children.item(i), default_href);
197
201
  }
198
202
  }
199
203
 
200
204
  public NokogiriNamespaceCache getNamespaceCache() {
201
- return nsCache;
202
- }
203
-
204
- public void initializeNamespaceCacheIfNecessary() {
205
205
  if (nsCache == null) nsCache = new NokogiriNamespaceCache();
206
- }
207
-
208
- public void setNamespaceCache(NokogiriNamespaceCache nsCache) {
209
- this.nsCache = nsCache;
206
+ return nsCache;
210
207
  }
211
208
 
212
209
  public Document getDocument() {
@@ -215,7 +212,7 @@ public class XmlDocument extends XmlNode {
215
212
 
216
213
  @Override
217
214
  protected IRubyObject getNodeName(ThreadContext context) {
218
- if (name == null) name = context.getRuntime().newString("document");
215
+ if (name == null) name = RubyString.newStringShared(context.runtime, DOCUMENT);
219
216
  return name;
220
217
  }
221
218
 
@@ -232,15 +229,31 @@ public class XmlDocument extends XmlNode {
232
229
  return getUrl();
233
230
  }
234
231
 
235
- public static Document createNewDocument() {
232
+ public static Document createNewDocument(final Ruby runtime) {
236
233
  try {
237
- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(DOCUMENTBUILDERFACTORY_IMPLE_NAME, NokogiriService.class.getClassLoader());
238
- return factory.newDocumentBuilder().newDocument();
234
+ return DocumentBuilderFactoryHolder.INSTANCE.newDocumentBuilder().newDocument();
239
235
  } catch (ParserConfigurationException e) {
240
- return null; // this will end is disaster...
236
+ throw asRuntimeError(runtime, null, e);
241
237
  }
242
238
  }
243
239
 
240
+ private static class DocumentBuilderFactoryHolder {
241
+ static final DocumentBuilderFactory INSTANCE;
242
+ static {
243
+ INSTANCE = DocumentBuilderFactory.newInstance(DOCUMENTBUILDERFACTORY_IMPLE_NAME, NokogiriService.class.getClassLoader());
244
+ }
245
+ }
246
+
247
+ static RaiseException asRuntimeError(Ruby runtime, String message, Exception cause) {
248
+ if (cause instanceof RaiseException) return (RaiseException) cause;
249
+
250
+ if (message == null) message = cause.toString();
251
+ else message = message + '(' + cause.toString() + ')';
252
+ RaiseException ex = runtime.newRuntimeError(message);
253
+ ex.initCause(cause);
254
+ return ex;
255
+ }
256
+
244
257
  /*
245
258
  * call-seq:
246
259
  * new(version = default)
@@ -249,19 +262,17 @@ public class XmlDocument extends XmlNode {
249
262
  */
250
263
  @JRubyMethod(name="new", meta = true, rest = true, required=0)
251
264
  public static IRubyObject rbNew(ThreadContext context, IRubyObject klazz, IRubyObject[] args) {
265
+ final Ruby runtime = context.runtime;
252
266
  XmlDocument xmlDocument;
253
267
  try {
254
- Document docNode = createNewDocument();
268
+ Document docNode = createNewDocument(runtime);
255
269
  if ("Nokogiri::HTML::Document".equals(((RubyClass)klazz).getName())) {
256
- xmlDocument = (XmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass) klazz);
257
- xmlDocument.setDocumentNode(context, docNode);
270
+ xmlDocument = new HtmlDocument(context.runtime, (RubyClass) klazz, docNode);
258
271
  } else {
259
- // XML::Document and sublass
260
- xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass) klazz);
261
- xmlDocument.setDocumentNode(context, docNode);
272
+ xmlDocument = new XmlDocument(context.runtime, (RubyClass) klazz, docNode);
262
273
  }
263
274
  } catch (Exception ex) {
264
- throw context.getRuntime().newRuntimeError("couldn't create document: "+ex.toString());
275
+ throw asRuntimeError(runtime, "couldn't create document: ", ex);
265
276
  }
266
277
 
267
278
  Helpers.invoke(context, xmlDocument, "initialize", args);
@@ -274,19 +285,19 @@ public class XmlDocument extends XmlNode {
274
285
  // FIXME: Entity node should be create by some right way.
275
286
  // this impl passes tests, but entity doesn't exists in DTD, which
276
287
  // would cause validation failure.
277
- if (argv.length == 0) throw context.getRuntime().newRuntimeError("Could not create entity");
288
+ if (argv.length == 0) throw context.runtime.newRuntimeError("Could not create entity");
278
289
  String tagName = rubyStringToString(argv[0]);
279
- Node n = this.getOwnerDocument().createElement(tagName);
280
- return XmlEntityDecl.create(context, n, argv);
290
+ Node node = getOwnerDocument().createElement(tagName);
291
+ return XmlEntityDecl.create(context, node, argv);
281
292
  }
282
293
 
283
294
  @Override
284
- IRubyObject document(Ruby runtime) {
295
+ XmlDocument document(Ruby runtime) {
285
296
  return this;
286
297
  }
287
298
 
288
299
  @JRubyMethod(name="encoding=")
289
- public IRubyObject encoding_set(ThreadContext context, IRubyObject encoding) {
300
+ public IRubyObject encoding_set(IRubyObject encoding) {
290
301
  this.encoding = encoding;
291
302
  return this;
292
303
  }
@@ -294,10 +305,11 @@ public class XmlDocument extends XmlNode {
294
305
  @JRubyMethod
295
306
  public IRubyObject encoding(ThreadContext context) {
296
307
  if (this.encoding == null || this.encoding.isNil()) {
297
- if (getDocument().getXmlEncoding() == null) {
298
- this.encoding = context.getRuntime().getNil();
308
+ final String enc = getDocument().getXmlEncoding();
309
+ if (enc == null) {
310
+ this.encoding = context.nil;
299
311
  } else {
300
- this.encoding = context.getRuntime().newString(getDocument().getXmlEncoding());
312
+ this.encoding = context.runtime.newString(enc);
301
313
  }
302
314
  }
303
315
 
@@ -307,59 +319,32 @@ public class XmlDocument extends XmlNode {
307
319
  @JRubyMethod(meta = true)
308
320
  public static IRubyObject load_external_subsets_set(ThreadContext context, IRubyObject cls, IRubyObject value) {
309
321
  XmlDocument.loadExternalSubset = value.isTrue();
310
- return context.getRuntime().getNil();
311
- }
312
-
313
- /**
314
- * TODO: handle encoding?
315
- *
316
- * @param args[0] a Ruby IO or StringIO
317
- * @param args[1] url or nil
318
- * @param args[2] encoding
319
- * @param args[3] bitset of parser options
320
- */
321
- public static IRubyObject newFromData(ThreadContext context,
322
- IRubyObject klass,
323
- IRubyObject[] args) {
324
- Ruby ruby = context.getRuntime();
325
- Arity.checkArgumentCount(ruby, args, 4, 4);
326
- XmlDomParserContext ctx =
327
- new XmlDomParserContext(ruby, args[2], args[3]);
328
- ctx.setInputSource(context, args[0], args[1]);
329
- return ctx.parse(context, klass, args[1]);
330
- }
331
-
332
- @JRubyMethod(meta = true, rest = true)
333
- public static IRubyObject read_io(ThreadContext context,
334
- IRubyObject klass,
335
- IRubyObject[] args) {
336
- return newFromData(context, klass, args);
322
+ return context.nil;
337
323
  }
338
324
 
339
- @JRubyMethod(meta = true, rest = true)
340
- public static IRubyObject read_memory(ThreadContext context,
341
- IRubyObject klass,
342
- IRubyObject[] args) {
343
- return newFromData(context, klass, args);
325
+ @JRubyMethod(meta = true, required = 4)
326
+ public static IRubyObject read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args) {
327
+ XmlDomParserContext ctx = new XmlDomParserContext(context.runtime, args[2], args[3]);
328
+ ctx.setIOInputSource(context, args[0], args[1]);
329
+ return ctx.parse(context, (RubyClass) klass, args[1]);
344
330
  }
345
331
 
346
- /** not a JRubyMethod */
347
- public static IRubyObject read_memory(ThreadContext context,
348
- IRubyObject[] args) {
349
- return read_memory(context,
350
- getNokogiriClass(context.getRuntime(), "Nokogiri::XML::Document"),
351
- args);
332
+ @JRubyMethod(meta = true, required = 4)
333
+ public static IRubyObject read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args) {
334
+ XmlDomParserContext ctx = new XmlDomParserContext(context.runtime, args[2], args[3]);
335
+ ctx.setStringInputSource(context, args[0], args[1]);
336
+ return ctx.parse(context, (RubyClass) klass, args[1]);
352
337
  }
353
338
 
354
339
  @JRubyMethod(name="remove_namespaces!")
355
340
  public IRubyObject remove_namespaces(ThreadContext context) {
356
- removeNamespceRecursively(context, this);
357
- nsCache.clear();
341
+ removeNamespaceRecursively(this);
342
+ if (nsCache != null) nsCache.clear();
358
343
  clearXpathContext(getNode());
359
344
  return this;
360
345
  }
361
346
 
362
- private void removeNamespceRecursively(ThreadContext context, XmlNode xmlNode) {
347
+ private void removeNamespaceRecursively(XmlNode xmlNode) {
363
348
  Node node = xmlNode.node;
364
349
  if (node.getNodeType() == Node.ELEMENT_NODE) {
365
350
  node.setPrefix(null);
@@ -368,33 +353,29 @@ public class XmlDocument extends XmlNode {
368
353
  for (int i=0; i<attrs.getLength(); i++) {
369
354
  Attr attr = (Attr) attrs.item(i);
370
355
  if (isNamespace(attr.getNodeName())) {
371
- ((org.w3c.dom.Element)node).removeAttributeNode(attr);
356
+ ((org.w3c.dom.Element) node).removeAttributeNode(attr);
372
357
  } else {
373
358
  attr.setPrefix(null);
374
359
  NokogiriHelpers.renameNode(attr, null, attr.getLocalName());
375
360
  }
376
361
  }
377
362
  }
378
- XmlNodeSet nodeSet = (XmlNodeSet) xmlNode.children(context);
379
- for (long i=0; i < nodeSet.length(); i++) {
380
- XmlNode childNode = (XmlNode)nodeSet.slice(context, RubyFixnum.newFixnum(context.getRuntime(), i));
381
- removeNamespceRecursively(context, childNode);
363
+ IRubyObject[] nodes = xmlNode.getChildren();
364
+ for (int i=0; i < nodes.length; i++) {
365
+ XmlNode childNode = (XmlNode) nodes[i];
366
+ removeNamespaceRecursively(childNode);
382
367
  }
383
368
  }
384
369
 
385
370
  @JRubyMethod
386
371
  public IRubyObject root(ThreadContext context) {
387
372
  Node rootNode = getDocument().getDocumentElement();
388
- try {
389
- Boolean isValid = (Boolean)rootNode.getUserData(NokogiriHelpers.VALID_ROOT_NODE);
390
- if (!isValid) return context.getRuntime().getNil();
391
- } catch (NullPointerException e) {
392
- // does nothing since nil wasn't set to the root node before.
393
- }
394
- if (rootNode == null)
395
- return context.getRuntime().getNil();
396
- else
397
- return getCachedNodeOrCreate(context.getRuntime(), rootNode);
373
+ if (rootNode == null) return context.nil;
374
+
375
+ Object invalid = rootNode.getUserData(NokogiriHelpers.ROOT_NODE_INVALID);
376
+ if (invalid != null && ((Boolean) invalid)) return context.nil;
377
+
378
+ return getCachedNodeOrCreate(context.runtime, rootNode);
398
379
  }
399
380
 
400
381
  protected IRubyObject dup_implementation(Ruby runtime, boolean deep) {
@@ -412,17 +393,17 @@ public class XmlDocument extends XmlNode {
412
393
  }
413
394
 
414
395
  @JRubyMethod(name="root=")
415
- public IRubyObject root_set(ThreadContext context, IRubyObject newRoot_) {
396
+ public IRubyObject root_set(ThreadContext context, IRubyObject new_root) {
416
397
  // in case of document fragment, temporary root node should be deleted.
417
398
 
418
399
  // Java can't have a root whose value is null. Instead of setting null,
419
400
  // the method sets user data so that other methods are able to know the root
420
401
  // should be nil.
421
- if (newRoot_ instanceof RubyNil) {
422
- getDocument().getDocumentElement().setUserData(NokogiriHelpers.VALID_ROOT_NODE, false, null);
423
- return newRoot_;
402
+ if (new_root == context.nil) {
403
+ getDocument().getDocumentElement().setUserData(NokogiriHelpers.ROOT_NODE_INVALID, Boolean.TRUE, null);
404
+ return new_root;
424
405
  }
425
- XmlNode newRoot = asXmlNode(context, newRoot_);
406
+ XmlNode newRoot = asXmlNode(context, new_root);
426
407
 
427
408
  IRubyObject root = root(context);
428
409
  if (root.isNil()) {
@@ -434,10 +415,10 @@ public class XmlDocument extends XmlNode {
434
415
  // with different owner document.
435
416
  newRootNode = getDocument().importNode(newRoot.node, true);
436
417
  }
437
- add_child_node(context, getCachedNodeOrCreate(context.getRuntime(), newRootNode));
418
+ add_child_node(context, getCachedNodeOrCreate(context.runtime, newRootNode));
438
419
  } else {
439
420
  Node rootNode = asXmlNode(context, root).node;
440
- ((XmlNode)getCachedNodeOrCreate(context.getRuntime(), rootNode)).replace_node(context, newRoot);
421
+ ((XmlNode) getCachedNodeOrCreate(context.runtime, rootNode)).replace_node(context, newRoot);
441
422
  }
442
423
 
443
424
  return newRoot;
@@ -445,13 +426,13 @@ public class XmlDocument extends XmlNode {
445
426
 
446
427
  @JRubyMethod
447
428
  public IRubyObject version(ThreadContext context) {
448
- return stringOrNil(context.getRuntime(), getDocument().getXmlVersion());
429
+ return stringOrNil(context.runtime, getDocument().getXmlVersion());
449
430
  }
450
431
 
451
432
  @JRubyMethod(meta = true)
452
433
  public static IRubyObject substitute_entities_set(ThreadContext context, IRubyObject cls, IRubyObject value) {
453
434
  XmlDocument.substituteEntities = value.isTrue();
454
- return context.getRuntime().getNil();
435
+ return context.nil;
455
436
  }
456
437
 
457
438
  public IRubyObject getInternalSubset(ThreadContext context) {
@@ -460,27 +441,23 @@ public class XmlDocument extends XmlNode {
460
441
  if (dtd == null) {
461
442
  Document document = getDocument();
462
443
  if (document.getUserData(XmlDocument.DTD_RAW_DOCUMENT) != null) {
463
- dtd = XmlDtd.newFromInternalSubset(context.getRuntime(), document);
444
+ dtd = XmlDtd.newFromInternalSubset(context.runtime, document);
464
445
  } else if (document.getDoctype() != null) {
465
446
  DocumentType docType = document.getDoctype();
466
447
  IRubyObject name, publicId, systemId;
467
- name = publicId = systemId = context.getRuntime().getNil();
448
+ name = publicId = systemId = context.nil;
468
449
  if (docType.getName() != null) {
469
- name = context.getRuntime().newString(docType.getName());
450
+ name = context.runtime.newString(docType.getName());
470
451
  }
471
452
  if (docType.getPublicId() != null) {
472
- publicId = context.getRuntime().newString(docType.getPublicId());
453
+ publicId = context.runtime.newString(docType.getPublicId());
473
454
  }
474
455
  if (docType.getSystemId() != null) {
475
- systemId = context.getRuntime().newString(docType.getSystemId());
456
+ systemId = context.runtime.newString(docType.getSystemId());
476
457
  }
477
- dtd = XmlDtd.newEmpty(context.getRuntime(),
478
- document,
479
- name,
480
- publicId,
481
- systemId);
458
+ dtd = XmlDtd.newEmpty(context.runtime, document, name, publicId, systemId);
482
459
  } else {
483
- dtd = context.getRuntime().getNil();
460
+ dtd = context.nil;
484
461
  }
485
462
 
486
463
  setInternalSubset(dtd);
@@ -497,9 +474,7 @@ public class XmlDocument extends XmlNode {
497
474
  IRubyObject name,
498
475
  IRubyObject external_id,
499
476
  IRubyObject system_id) {
500
- XmlDtd dtd = XmlDtd.newEmpty(context.getRuntime(),
501
- this.getDocument(),
502
- name, external_id, system_id);
477
+ XmlDtd dtd = XmlDtd.newEmpty(context.runtime, getDocument(), name, external_id, system_id);
503
478
  setInternalSubset(dtd);
504
479
  return dtd;
505
480
  }
@@ -511,7 +486,7 @@ public class XmlDocument extends XmlNode {
511
486
  public IRubyObject getExternalSubset(ThreadContext context) {
512
487
  IRubyObject dtd = (IRubyObject) node.getUserData(DTD_EXTERNAL_SUBSET);
513
488
 
514
- if (dtd == null) return context.getRuntime().getNil();
489
+ if (dtd == null) return context.nil;
515
490
  return dtd;
516
491
  }
517
492
 
@@ -523,9 +498,7 @@ public class XmlDocument extends XmlNode {
523
498
  IRubyObject name,
524
499
  IRubyObject external_id,
525
500
  IRubyObject system_id) {
526
- XmlDtd dtd = XmlDtd.newEmpty(context.getRuntime(),
527
- this.getDocument(),
528
- name, external_id, system_id);
501
+ XmlDtd dtd = XmlDtd.newEmpty(context.runtime, getDocument(), name, external_id, system_id);
529
502
  setExternalSubset(dtd);
530
503
  return dtd;
531
504
  }
@@ -543,35 +516,40 @@ public class XmlDocument extends XmlNode {
543
516
  Node child = children.item(i);
544
517
  short type = child.getNodeType();
545
518
  if (type == Node.COMMENT_NODE) {
546
- XmlComment xmlComment = (XmlComment) getCachedNodeOrCreate(context.getRuntime(), child);
519
+ XmlComment xmlComment = (XmlComment) getCachedNodeOrCreate(context.runtime, child);
547
520
  xmlComment.accept(context, visitor);
548
521
  } else if (type == Node.DOCUMENT_TYPE_NODE) {
549
- XmlDtd xmlDtd = (XmlDtd) getCachedNodeOrCreate(context.getRuntime(), child);
522
+ XmlDtd xmlDtd = (XmlDtd) getCachedNodeOrCreate(context.runtime, child);
550
523
  xmlDtd.accept(context, visitor);
551
524
  } else if (type == Node.PROCESSING_INSTRUCTION_NODE) {
552
- XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) getCachedNodeOrCreate(context.getRuntime(), child);
525
+ XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) getCachedNodeOrCreate(context.runtime, child);
553
526
  xmlProcessingInstruction.accept(context, visitor);
554
527
  } else if (type == Node.TEXT_NODE) {
555
- XmlText xmlText = (XmlText) getCachedNodeOrCreate(context.getRuntime(), child);
528
+ XmlText xmlText = (XmlText) getCachedNodeOrCreate(context.runtime, child);
556
529
  xmlText.accept(context, visitor);
557
530
  } else if (type == Node.ELEMENT_NODE) {
558
- XmlElement xmlElement = (XmlElement) getCachedNodeOrCreate(context.getRuntime(), child);
531
+ XmlElement xmlElement = (XmlElement) getCachedNodeOrCreate(context.runtime, child);
559
532
  xmlElement.accept(context, visitor);
560
533
  }
561
534
  }
562
535
  visitor.leave(document);
563
536
  }
564
537
 
565
- @JRubyMethod(meta=true)
566
- public static IRubyObject wrapJavaDocument(ThreadContext context, IRubyObject klazz, IRubyObject arg) {
567
- XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::Document"));
538
+ @JRubyMethod(meta = true)
539
+ public static IRubyObject wrap(ThreadContext context, IRubyObject klass, IRubyObject arg) {
540
+ XmlDocument xmlDocument = new XmlDocument(context.runtime, (RubyClass) klass, (Document) arg.toJava(Document.class));
568
541
  Helpers.invoke(context, xmlDocument, "initialize");
569
- Document document = (Document)arg.toJava(Document.class);
570
- xmlDocument.setDocumentNode(context, document);
571
542
  return xmlDocument;
572
543
  }
573
544
 
574
- @JRubyMethod
545
+ @Deprecated
546
+ @JRubyMethod(meta = true, visibility = Visibility.PRIVATE)
547
+ public static IRubyObject wrapJavaDocument(ThreadContext context, IRubyObject klass, IRubyObject arg) {
548
+ return wrap(context, klass, arg);
549
+ }
550
+
551
+ @Deprecated // default to_java works (due inherited from XmlNode#toJava)
552
+ @JRubyMethod(visibility = Visibility.PRIVATE)
575
553
  public IRubyObject toJavaDocument(ThreadContext context) {
576
554
  return JavaUtil.convertJavaToUsableRubyObject(context.getRuntime(), node);
577
555
  }
@@ -596,11 +574,11 @@ public class XmlDocument extends XmlNode {
596
574
  }
597
575
  if (args.length > 1 ) {
598
576
  if (!args[1].isNil() && !(args[1] instanceof List)) {
599
- throw context.getRuntime().newTypeError("Expected array");
577
+ throw context.runtime.newTypeError("Expected array");
600
578
  }
601
579
  if (!args[1].isNil()) {
602
580
  inclusive_namespace = ((RubyArray)args[1])
603
- .join(context, context.getRuntime().newString(" "))
581
+ .join(context, context.runtime.newString(" "))
604
582
  .asString()
605
583
  .asJavaString(); // OMG I wish I knew JRuby better, this is ugly
606
584
  }
@@ -632,29 +610,24 @@ public class XmlDocument extends XmlNode {
632
610
  } else {
633
611
  result = canonicalizer.canonicalizeSubtree(startingNode.getNode(), inclusive_namespace, filter);
634
612
  }
635
- String resultString = new String(result, "UTF-8");
636
- return stringOrNil(context.getRuntime(), resultString);
613
+ return RubyString.newString(context.runtime, new ByteList(result, UTF8Encoding.INSTANCE));
637
614
  } catch (CanonicalizationException e) {
638
615
  // TODO Auto-generated catch block
639
616
  e.printStackTrace();
640
- } catch (UnsupportedEncodingException e) {
641
- // TODO Auto-generated catch block
642
- e.printStackTrace();
643
617
  }
644
- return context.getRuntime().getNil();
618
+ return context.nil;
645
619
  }
646
620
 
647
621
  private XmlNode getStartingNode(Block block) {
648
622
  if (block.isGiven()) {
649
- if (block.getBinding().getSelf() instanceof XmlNode) {
650
- return (XmlNode)block.getBinding().getSelf();
651
- }
623
+ IRubyObject boundSelf = block.getBinding().getSelf();
624
+ if (boundSelf instanceof XmlNode) return (XmlNode) boundSelf;
652
625
  }
653
626
  return this;
654
627
  }
655
628
 
656
629
  public void resetNamespaceCache(ThreadContext context) {
657
630
  nsCache = new NokogiriNamespaceCache();
658
- createAndCacheNamespaces(context.getRuntime(), node);
631
+ createAndCacheNamespaces(context.runtime, node);
659
632
  }
660
633
  }