nokogiri 1.16.8-java → 1.17.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/java/nokogiri/Html4Document.java +3 -3
  6. data/ext/java/nokogiri/Html4SaxParserContext.java +47 -175
  7. data/ext/java/nokogiri/NokogiriService.java +2 -2
  8. data/ext/java/nokogiri/XmlCdata.java +3 -0
  9. data/ext/java/nokogiri/XmlDocument.java +7 -14
  10. data/ext/java/nokogiri/XmlDocumentFragment.java +4 -92
  11. data/ext/java/nokogiri/XmlDtd.java +2 -2
  12. data/ext/java/nokogiri/XmlEntityReference.java +16 -12
  13. data/ext/java/nokogiri/XmlNode.java +26 -47
  14. data/ext/java/nokogiri/XmlNodeSet.java +10 -1
  15. data/ext/java/nokogiri/XmlSaxParserContext.java +73 -36
  16. data/ext/java/nokogiri/XmlSchema.java +15 -16
  17. data/ext/java/nokogiri/XsltStylesheet.java +1 -1
  18. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +1 -1
  19. data/ext/java/nokogiri/internals/NokogiriDomParser.java +3 -3
  20. data/ext/java/nokogiri/internals/NokogiriHandler.java +59 -15
  21. data/ext/java/nokogiri/internals/NokogiriHelpers.java +1 -1
  22. data/ext/java/nokogiri/internals/ParserContext.java +51 -21
  23. data/ext/java/nokogiri/internals/ReaderNode.java +1 -1
  24. data/ext/java/nokogiri/internals/XmlDomParserContext.java +8 -19
  25. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +1 -1
  26. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +1 -1
  27. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +10 -11
  28. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +5 -5
  29. data/ext/java/nokogiri/internals/c14n/{UtfHelpper.java → UtfHelper.java} +2 -2
  30. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +8 -8
  31. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +2 -2
  32. data/ext/nokogiri/extconf.rb +191 -137
  33. data/ext/nokogiri/gumbo.c +69 -53
  34. data/ext/nokogiri/html4_document.c +10 -4
  35. data/ext/nokogiri/html4_element_description.c +18 -18
  36. data/ext/nokogiri/html4_sax_parser.c +40 -0
  37. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  38. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  39. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  40. data/ext/nokogiri/nokogiri.c +9 -2
  41. data/ext/nokogiri/xml_attr.c +1 -1
  42. data/ext/nokogiri/xml_cdata.c +2 -10
  43. data/ext/nokogiri/xml_comment.c +3 -8
  44. data/ext/nokogiri/xml_document.c +163 -156
  45. data/ext/nokogiri/xml_document_fragment.c +10 -25
  46. data/ext/nokogiri/xml_dtd.c +1 -1
  47. data/ext/nokogiri/xml_element_content.c +9 -9
  48. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  49. data/ext/nokogiri/xml_namespace.c +6 -6
  50. data/ext/nokogiri/xml_node.c +130 -104
  51. data/ext/nokogiri/xml_node_set.c +46 -44
  52. data/ext/nokogiri/xml_reader.c +54 -58
  53. data/ext/nokogiri/xml_relax_ng.c +35 -56
  54. data/ext/nokogiri/xml_sax_parser.c +156 -88
  55. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  56. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  57. data/ext/nokogiri/xml_schema.c +50 -85
  58. data/ext/nokogiri/xml_syntax_error.c +19 -11
  59. data/ext/nokogiri/xml_text.c +2 -4
  60. data/ext/nokogiri/xml_xpath_context.c +2 -2
  61. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  62. data/lib/nokogiri/class_resolver.rb +1 -1
  63. data/lib/nokogiri/css/node.rb +6 -2
  64. data/lib/nokogiri/css/parser.rb +6 -4
  65. data/lib/nokogiri/css/parser.y +2 -2
  66. data/lib/nokogiri/css/parser_extras.rb +6 -66
  67. data/lib/nokogiri/css/selector_cache.rb +38 -0
  68. data/lib/nokogiri/css/tokenizer.rb +4 -4
  69. data/lib/nokogiri/css/tokenizer.rex +9 -8
  70. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  71. data/lib/nokogiri/css.rb +86 -20
  72. data/lib/nokogiri/decorators/slop.rb +3 -5
  73. data/lib/nokogiri/encoding_handler.rb +2 -2
  74. data/lib/nokogiri/html4/document.rb +44 -23
  75. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  76. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  77. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  78. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  79. data/lib/nokogiri/html4.rb +9 -14
  80. data/lib/nokogiri/html5/builder.rb +40 -0
  81. data/lib/nokogiri/html5/document.rb +61 -30
  82. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  83. data/lib/nokogiri/html5/node.rb +4 -4
  84. data/lib/nokogiri/html5.rb +114 -72
  85. data/lib/nokogiri/nokogiri.jar +0 -0
  86. data/lib/nokogiri/version/constant.rb +1 -1
  87. data/lib/nokogiri/xml/builder.rb +8 -1
  88. data/lib/nokogiri/xml/document.rb +70 -26
  89. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  90. data/lib/nokogiri/xml/node.rb +82 -11
  91. data/lib/nokogiri/xml/node_set.rb +9 -7
  92. data/lib/nokogiri/xml/parse_options.rb +1 -1
  93. data/lib/nokogiri/xml/pp/node.rb +6 -1
  94. data/lib/nokogiri/xml/reader.rb +46 -13
  95. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  96. data/lib/nokogiri/xml/sax/document.rb +174 -83
  97. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  98. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  99. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  100. data/lib/nokogiri/xml/sax.rb +48 -0
  101. data/lib/nokogiri/xml/schema.rb +112 -45
  102. data/lib/nokogiri/xml/searchable.rb +6 -8
  103. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  104. data/lib/nokogiri/xml.rb +13 -24
  105. data/lib/nokogiri/xslt.rb +3 -9
  106. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  107. metadata +9 -5
  108. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -287,7 +287,7 @@ public class XmlNode extends RubyObject
287
287
  * args but not for an exact number. Any extra args will then be
288
288
  * passed to 'initialize'. The way 'new' and this 'init' function
289
289
  * interact means that subclasses cannot arbitrarily change the
290
- * require aruments by defining an 'initialize' method. This is
290
+ * require arguments by defining an 'initialize' method. This is
291
291
  * how the C libxml wrapper works also.
292
292
  *
293
293
  * As written it performs initialization for a new Element with
@@ -407,7 +407,7 @@ public class XmlNode extends RubyObject
407
407
  /**
408
408
  * This method should be called after a node has been adopted in a new
409
409
  * document. This method will ensure that the node is renamed with the
410
- * appriopriate NS uri. First the prefix of the node is extracted, then is
410
+ * appropriate NS uri. First the prefix of the node is extracted, then is
411
411
  * used to lookup the namespace uri in the new document starting at the
412
412
  * current node and traversing the ancestors. If the namespace uri wasn't
413
413
  * empty (or null) all children and the node has attributes and/or children
@@ -643,12 +643,22 @@ public class XmlNode extends RubyObject
643
643
 
644
644
  @JRubyMethod(name = {"attribute", "attr"})
645
645
  public IRubyObject
646
- attribute(ThreadContext context, IRubyObject name)
646
+ attribute(ThreadContext context, IRubyObject rbName)
647
647
  {
648
- NamedNodeMap attrs = this.node.getAttributes();
649
- Node attr = attrs.getNamedItem(rubyStringToString(name));
650
- if (attr == null) { return context.nil; }
651
- return getCachedNodeOrCreate(context.runtime, attr);
648
+ NamedNodeMap attributes = this.node.getAttributes();
649
+ String name = rubyStringToString(rbName);
650
+
651
+ for (int j = 0 ; j < attributes.getLength() ; j++) {
652
+ Node attribute = attributes.item(j);
653
+ String localName = attribute.getLocalName();
654
+ if (localName == null) {
655
+ continue;
656
+ }
657
+ if (localName.equals(name)) {
658
+ return getCachedNodeOrCreate(context.runtime, attribute);
659
+ }
660
+ }
661
+ return context.nil;
652
662
  }
653
663
 
654
664
  @JRubyMethod
@@ -966,45 +976,13 @@ public class XmlNode extends RubyObject
966
976
  return doc;
967
977
  }
968
978
 
979
+ @JRubyMethod(visibility = Visibility.PROTECTED)
969
980
  public IRubyObject
970
- dup()
971
- {
972
- return dup_implementation(getMetaClass().getClassRuntime(), true);
973
- }
974
-
975
- @JRubyMethod
976
- public IRubyObject
977
- dup(ThreadContext context)
978
- {
979
- return dup_implementation(context, true);
980
- }
981
-
982
- @JRubyMethod
983
- public IRubyObject
984
- dup(ThreadContext context, IRubyObject depth)
981
+ initialize_copy_with_args(ThreadContext context, IRubyObject other, IRubyObject level, IRubyObject _ignored)
985
982
  {
986
- boolean deep = depth instanceof RubyInteger && RubyFixnum.fix2int(depth) != 0;
987
- return dup_implementation(context, deep);
988
- }
989
-
990
- protected final IRubyObject
991
- dup_implementation(ThreadContext context, boolean deep)
992
- {
993
- return dup_implementation(context.runtime, deep);
994
- }
995
-
996
- protected IRubyObject
997
- dup_implementation(Ruby runtime, boolean deep)
998
- {
999
- XmlNode clone;
1000
- try {
1001
- clone = (XmlNode) clone();
1002
- } catch (CloneNotSupportedException e) {
1003
- throw runtime.newRuntimeError(e.toString());
1004
- }
1005
- Node newNode = node.cloneNode(deep);
1006
- clone.node = newNode;
1007
- return clone;
983
+ boolean deep = level instanceof RubyInteger && RubyFixnum.fix2int(level) != 0;
984
+ this.node = asXmlNode(context, other).node.cloneNode(deep);
985
+ return this;
1008
986
  }
1009
987
 
1010
988
  public static RubyString
@@ -1447,11 +1425,12 @@ public class XmlNode extends RubyObject
1447
1425
  }
1448
1426
  }
1449
1427
 
1450
- if (uri != null) {
1451
- element.setAttributeNS(uri, key, val);
1452
- } else {
1428
+ if (colonIndex > 0 && uri == null) {
1453
1429
  element.setAttribute(key, val);
1430
+ } else {
1431
+ element.setAttributeNS(uri, key, val);
1454
1432
  }
1433
+
1455
1434
  clearXpathContext(node);
1456
1435
  }
1457
1436
 
@@ -15,6 +15,7 @@ import org.jruby.RubyRange;
15
15
  import org.jruby.anno.JRubyClass;
16
16
  import org.jruby.anno.JRubyMethod;
17
17
  import org.jruby.runtime.ThreadContext;
18
+ import org.jruby.runtime.Visibility;
18
19
  import org.jruby.runtime.builtin.IRubyObject;
19
20
  import org.w3c.dom.Node;
20
21
  import org.w3c.dom.NodeList;
@@ -201,7 +202,6 @@ public class XmlNodeSet extends RubyObject implements NodeList
201
202
  return context.nil;
202
203
  }
203
204
 
204
- @JRubyMethod
205
205
  public IRubyObject
206
206
  dup(ThreadContext context)
207
207
  {
@@ -210,6 +210,15 @@ public class XmlNodeSet extends RubyObject implements NodeList
210
210
  return dup;
211
211
  }
212
212
 
213
+ @JRubyMethod(visibility = Visibility.PROTECTED)
214
+ public IRubyObject
215
+ initialize_copy(ThreadContext context, IRubyObject other)
216
+ {
217
+ setNodes(getNodes(context, other));
218
+ initializeFrom(context, (XmlNodeSet)other);
219
+ return this;
220
+ }
221
+
213
222
  @JRubyMethod(name = "include?")
214
223
  public IRubyObject
215
224
  include_p(ThreadContext context, IRubyObject node_or_namespace)
@@ -1,10 +1,14 @@
1
1
  package nokogiri;
2
2
 
3
3
  import nokogiri.internals.*;
4
+ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
5
+
4
6
  import org.apache.xerces.parsers.AbstractSAXParser;
5
7
  import org.jruby.Ruby;
6
8
  import org.jruby.RubyClass;
9
+ import org.jruby.RubyEncoding;
7
10
  import org.jruby.RubyFixnum;
11
+ import org.jruby.RubyString;
8
12
  import org.jruby.anno.JRubyClass;
9
13
  import org.jruby.anno.JRubyMethod;
10
14
  import org.jruby.exceptions.RaiseException;
@@ -14,6 +18,7 @@ import org.jruby.runtime.builtin.IRubyObject;
14
18
  import org.xml.sax.SAXException;
15
19
  import org.xml.sax.SAXParseException;
16
20
 
21
+ import java.io.ByteArrayInputStream;
17
22
  import java.io.IOException;
18
23
  import java.io.InputStream;
19
24
 
@@ -43,7 +48,7 @@ public class XmlSaxParserContext extends ParserContext
43
48
 
44
49
  protected NokogiriHandler handler;
45
50
  protected NokogiriErrorHandler errorHandler;
46
- private boolean replaceEntities = true;
51
+ private boolean replaceEntities = false;
47
52
  private boolean recovery = false;
48
53
 
49
54
  public
@@ -90,16 +95,26 @@ public class XmlSaxParserContext extends ParserContext
90
95
  * Create a new parser context that will parse the string
91
96
  * <code>data</code>.
92
97
  */
93
- @JRubyMethod(name = "memory", meta = true)
98
+ @JRubyMethod(name = "native_memory", meta = true)
94
99
  public static IRubyObject
95
- parse_memory(ThreadContext context,
96
- IRubyObject klazz,
97
- IRubyObject data)
100
+ parse_memory(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
98
101
  {
99
- final Ruby runtime = context.runtime;
100
- XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
101
- ctx.initialize(runtime);
102
- ctx.setStringInputSource(context, data, runtime.getNil());
102
+ String java_encoding = null;
103
+ if (encoding != context.runtime.getNil()) {
104
+ if (!(encoding instanceof RubyEncoding)) {
105
+ throw context.runtime.newTypeError("encoding must be kind_of Encoding");
106
+ }
107
+ java_encoding = ((RubyEncoding)encoding).toString();
108
+ }
109
+
110
+ XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
111
+ ctx.initialize(context.runtime);
112
+ ctx.setStringInputSourceNoEnc(context, data, context.runtime.getNil());
113
+
114
+ if (java_encoding != null) {
115
+ ctx.getInputSource().setEncoding(java_encoding);
116
+ }
117
+
103
118
  return ctx;
104
119
  }
105
120
 
@@ -107,16 +122,26 @@ public class XmlSaxParserContext extends ParserContext
107
122
  * Create a new parser context that will read from the file
108
123
  * <code>data</code> and parse.
109
124
  */
110
- @JRubyMethod(name = "file", meta = true)
125
+ @JRubyMethod(name = "native_file", meta = true)
111
126
  public static IRubyObject
112
- parse_file(ThreadContext context,
113
- IRubyObject klazz,
114
- IRubyObject data)
127
+ parse_file(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
115
128
  {
116
- final Ruby runtime = context.runtime;
117
- XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
118
- ctx.initialize(context.getRuntime());
129
+ String java_encoding = null;
130
+ if (encoding != context.runtime.getNil()) {
131
+ if (!(encoding instanceof RubyEncoding)) {
132
+ throw context.runtime.newTypeError("encoding must be kind_of Encoding");
133
+ }
134
+ java_encoding = ((RubyEncoding)encoding).toString();
135
+ }
136
+
137
+ XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
138
+ ctx.initialize(context.runtime);
119
139
  ctx.setInputSourceFile(context, data);
140
+
141
+ if (java_encoding != null) {
142
+ ctx.getInputSource().setEncoding(java_encoding);
143
+ }
144
+
120
145
  return ctx;
121
146
  }
122
147
 
@@ -126,21 +151,30 @@ public class XmlSaxParserContext extends ParserContext
126
151
  *
127
152
  * TODO: Currently ignores encoding <code>enc</code>.
128
153
  */
129
- @JRubyMethod(name = "io", meta = true)
154
+ @JRubyMethod(name = "native_io", meta = true)
130
155
  public static IRubyObject
131
- parse_io(ThreadContext context,
132
- IRubyObject klazz,
133
- IRubyObject data,
134
- IRubyObject encoding)
156
+ parse_io(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
135
157
  {
136
- // check the type of the unused encoding to match behavior of CRuby
137
- if (!(encoding instanceof RubyFixnum)) {
138
- throw context.getRuntime().newTypeError("encoding must be kind_of String");
158
+ if (!invoke(context, data, "respond_to?", context.runtime.newSymbol("read")).isTrue()) {
159
+ throw context.runtime.newTypeError("argument expected to respond to :read");
139
160
  }
140
- final Ruby runtime = context.runtime;
141
- XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
142
- ctx.initialize(runtime);
143
- ctx.setIOInputSource(context, data, runtime.getNil());
161
+
162
+ String java_encoding = null;
163
+ if (encoding != context.runtime.getNil()) {
164
+ if (!(encoding instanceof RubyEncoding)) {
165
+ throw context.runtime.newTypeError("encoding must be kind_of Encoding");
166
+ }
167
+ java_encoding = ((RubyEncoding)encoding).toString();
168
+ }
169
+
170
+ XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
171
+ ctx.initialize(context.runtime);
172
+ ctx.setIOInputSource(context, data, context.runtime.getNil());
173
+
174
+ if (java_encoding != null) {
175
+ ctx.getInputSource().setEncoding(java_encoding);
176
+ }
177
+
144
178
  return ctx;
145
179
  }
146
180
 
@@ -222,9 +256,12 @@ public class XmlSaxParserContext extends ParserContext
222
256
 
223
257
  /* TODO: how should we pass in parse options? */
224
258
  ParserContext.Options options = defaultParseOptions(context);
259
+ if (replaceEntities) {
260
+ options.noEnt = true;
261
+ }
225
262
 
226
263
  errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning);
227
- handler = new NokogiriHandler(runtime, handlerRuby, errorHandler);
264
+ handler = new NokogiriHandler(runtime, handlerRuby, errorHandler, options.noEnt);
228
265
 
229
266
  preParse(runtime, handlerRuby, handler);
230
267
  parser.setContentHandler(handler);
@@ -233,6 +270,7 @@ public class XmlSaxParserContext extends ParserContext
233
270
 
234
271
  try {
235
272
  parser.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
273
+ parser.setProperty("http://xml.org/sax/properties/declaration-handler", handler);
236
274
  } catch (Exception ex) {
237
275
  throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString());
238
276
  }
@@ -241,16 +279,15 @@ public class XmlSaxParserContext extends ParserContext
241
279
  try {
242
280
  do_parse();
243
281
  } catch (SAXParseException ex) {
244
- // A bad document (<foo><bar></foo>) should call the
245
- // error handler instead of raising a SAX exception.
246
-
247
- // However, an EMPTY document should raise a RuntimeError.
248
- // This is a bit kludgy, but AFAIK SAX doesn't distinguish
249
- // between empty and bad whereas Nokogiri does.
282
+ // An EMPTY document should raise a RuntimeError. This is a bit kludgy, but AFAIK SAX
283
+ // doesn't distinguish between empty and bad whereas Nokogiri does.
250
284
  String message = ex.getMessage();
251
285
  if (message != null && message.contains("Premature end of file.") && stringDataSize < 1) {
252
- throw runtime.newRuntimeError("couldn't parse document: " + message);
286
+ throw runtime.newRuntimeError("input string cannot be empty");
253
287
  }
288
+
289
+ // A bad document (<foo><bar></foo>) should call the
290
+ // error handler instead of raising a SAX exception.
254
291
  handler.error(ex);
255
292
  }
256
293
  } catch (SAXException ex) {
@@ -175,19 +175,6 @@ public class XmlSchema extends RubyObject
175
175
  return getSchema(context, (RubyClass)klazz, source, parseOptions);
176
176
  }
177
177
 
178
- @JRubyMethod(meta = true, required = 1, optional = 1)
179
- public static IRubyObject
180
- read_memory(ThreadContext context, IRubyObject klazz, IRubyObject[] args)
181
- {
182
- IRubyObject content = args[0];
183
- IRubyObject parseOptions = null;
184
- if (args.length > 1) {
185
- parseOptions = args[1];
186
- }
187
- String data = content.convertToString().asJavaString();
188
- return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)), parseOptions);
189
- }
190
-
191
178
  private static IRubyObject
192
179
  getSchema(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions)
193
180
  {
@@ -215,14 +202,22 @@ public class XmlSchema extends RubyObject
215
202
 
216
203
  XmlDomParserContext ctx = new XmlDomParserContext(runtime, RubyFixnum.newFixnum(runtime, 1L));
217
204
  ctx.setInputSourceFile(context, file);
218
- XmlDocument xmlDocument = ctx.parse(context, getNokogiriClass(runtime, "Nokogiri::XML::Document"), context.nil);
219
- return validate_document_or_file(context, xmlDocument);
205
+ try {
206
+ XmlDocument xmlDocument = ctx.parse(context, getNokogiriClass(runtime, "Nokogiri::XML::Document"), context.nil);
207
+ return validate_document_or_file(context, xmlDocument);
208
+ } catch (Exception ex) {
209
+ RubyArray errors = (RubyArray)context.runtime.newEmptyArray();
210
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
211
+ xmlSyntaxError.setException(ex);
212
+ errors.append(xmlSyntaxError);
213
+ return errors;
214
+ }
220
215
  }
221
216
 
222
217
  IRubyObject
223
218
  validate_document_or_file(ThreadContext context, XmlDocument xmlDocument)
224
219
  {
225
- RubyArray<?> errors = (RubyArray) this.getInstanceVariable("@errors");
220
+ RubyArray errors = context.runtime.newEmptyArray();
226
221
  ErrorHandler errorHandler = new SchemaErrorHandler(context.runtime, errors);
227
222
  setErrorHandler(errorHandler);
228
223
 
@@ -248,6 +243,10 @@ public class XmlSchema extends RubyObject
248
243
  protected void
249
244
  validate(Document document) throws SAXException, IOException
250
245
  {
246
+ if (document.getDocumentElement() == null) {
247
+ throw new SAXException("Document is empty");
248
+ }
249
+
251
250
  DOMSource docSource = new DOMSource(document);
252
251
  validator.validate(docSource);
253
252
  }
@@ -135,7 +135,7 @@ public class XsltStylesheet extends RubyObject
135
135
  XmlDocument xmlDoc = (XmlDocument) args[0];
136
136
  ensureDocumentHasNoError(context, xmlDoc);
137
137
 
138
- Document doc = ((XmlDocument) xmlDoc.dup_implementation(context, true)).getDocument();
138
+ Document doc = ((XmlDocument)xmlDoc.callMethod(context, "dup", runtime.newFixnum(1))).getDocument();
139
139
 
140
140
  XsltStylesheet xslt =
141
141
  (XsltStylesheet) NokogiriService.XSLT_STYLESHEET_ALLOCATOR.allocate(runtime, (RubyClass)klazz);
@@ -87,7 +87,7 @@ public class NokogiriBlockingQueueInputStream extends InputStream
87
87
 
88
88
  /**
89
89
  * This method shouldn't be called unless the parser has finished parsing or
90
- * threw an exception while doing so, otherwise, there'll be the protential
90
+ * threw an exception while doing so, otherwise, there'll be the potential
91
91
  * that the read method will block indefinitely.
92
92
  */
93
93
  @Override
@@ -66,7 +66,7 @@ public class NokogiriDomParser extends DOMParser
66
66
  {
67
67
  dtd.reset();
68
68
  if (xInclude) {
69
- setEntityResolver(new NokogiriXInlcudeEntityResolver(source));
69
+ setEntityResolver(new NokogiriXIncludeEntityResolver(source));
70
70
  }
71
71
  super.parse(source);
72
72
  Document doc = getDocument();
@@ -77,11 +77,11 @@ public class NokogiriDomParser extends DOMParser
77
77
  doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null);
78
78
  }
79
79
 
80
- private static class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver
80
+ private static class NokogiriXIncludeEntityResolver implements org.xml.sax.EntityResolver
81
81
  {
82
82
  InputSource source;
83
83
  private
84
- NokogiriXInlcudeEntityResolver(InputSource source)
84
+ NokogiriXIncludeEntityResolver(InputSource source)
85
85
  {
86
86
  this.source = source;
87
87
  }
@@ -42,27 +42,26 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
42
42
 
43
43
  private Locator locator;
44
44
  private boolean needEmptyAttrCheck;
45
+ private boolean replaceEntities;
46
+ private Set<String> entities = new HashSet<String>();
45
47
 
46
48
  public
47
- NokogiriHandler(Ruby runtime, IRubyObject object, NokogiriErrorHandler errorHandler)
49
+ NokogiriHandler(Ruby runtime,
50
+ IRubyObject object,
51
+ NokogiriErrorHandler errorHandler,
52
+ boolean replaceEntities)
48
53
  {
49
54
  assert object != null;
50
55
  this.runtime = runtime;
51
56
  this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute");
52
57
  this.object = object;
53
58
  this.errorHandler = errorHandler;
59
+ this.replaceEntities = replaceEntities;
54
60
  charactersBuilder = new StringBuilder();
55
61
  String objectName = object.getMetaClass().getName();
56
62
  if ("Nokogiri::HTML4::SAX::Parser".equals(objectName)) { needEmptyAttrCheck = true; }
57
63
  }
58
64
 
59
- @Override
60
- public void
61
- skippedEntity(String skippedEntity)
62
- {
63
- call("error", runtime.newString("Entity '" + skippedEntity + "' not defined\n"));
64
- }
65
-
66
65
  @Override
67
66
  public void
68
67
  setDocumentLocator(Locator locator)
@@ -88,7 +87,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
88
87
  public void
89
88
  endDocument()
90
89
  {
91
- populateCharacters();
90
+ flushCharacters();
92
91
  call("end_document");
93
92
  }
94
93
 
@@ -161,12 +160,12 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
161
160
  }
162
161
 
163
162
  if (localName == null || localName.isEmpty()) { localName = getLocalPart(qName); }
164
- populateCharacters();
163
+ flushCharacters();
165
164
  call("start_element_namespace",
166
165
  stringOrNil(runtime, localName),
167
166
  rubyAttr,
168
167
  stringOrNil(runtime, getPrefix(qName)),
169
- stringOrNil(runtime, uri),
168
+ uri.length() > 0 ? stringOrNil(runtime, uri) : runtime.getNil(),
170
169
  rubyNSAttr);
171
170
  }
172
171
 
@@ -204,7 +203,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
204
203
  public void
205
204
  endElement(String uri, String localName, String qName)
206
205
  {
207
- populateCharacters();
206
+ flushCharacters();
208
207
  call("end_element_namespace",
209
208
  stringOrNil(runtime, localName),
210
209
  stringOrNil(runtime, getPrefix(qName)),
@@ -218,11 +217,56 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
218
217
  charactersBuilder.append(ch, start, length);
219
218
  }
220
219
 
220
+ @Override
221
+ public void
222
+ externalEntityDecl(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
223
+ throws SAXException
224
+ {
225
+ entities.add(name);
226
+ }
227
+
228
+ @Override
229
+ public void
230
+ internalEntityDecl(java.lang.String name, java.lang.String value)
231
+ throws SAXException
232
+ {
233
+ entities.add(name);
234
+ }
235
+
236
+ @Override
237
+ public void
238
+ skippedEntity(String name)
239
+ {
240
+ call("error", runtime.newString("Entity '" + name + "' not defined\n"));
241
+ if (!replaceEntities) {
242
+ call("reference", runtime.newString(name), runtime.getNil());
243
+ }
244
+ }
245
+
246
+ @Override
247
+ public void
248
+ startEntity(String name)
249
+ {
250
+ flushCharacters();
251
+ }
252
+
253
+ @Override
254
+ public void
255
+ endEntity(String name)
256
+ {
257
+ IRubyObject content = charactersBuilder.length() > 0 ? runtime.newString(charactersBuilder.toString()) :
258
+ runtime.getNil();
259
+ if (!replaceEntities && entities.contains(name)) {
260
+ call("reference", runtime.newString(name), content);
261
+ }
262
+ flushCharacters();
263
+ }
264
+
221
265
  @Override
222
266
  public void
223
267
  comment(char[] ch, int start, int length)
224
268
  {
225
- populateCharacters();
269
+ flushCharacters();
226
270
  call("comment", runtime.newString(new String(ch, start, length)));
227
271
  }
228
272
 
@@ -230,7 +274,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
230
274
  public void
231
275
  startCDATA()
232
276
  {
233
- populateCharacters();
277
+ flushCharacters();
234
278
  }
235
279
 
236
280
  @Override
@@ -329,7 +373,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
329
373
  }
330
374
 
331
375
  protected void
332
- populateCharacters()
376
+ flushCharacters()
333
377
  {
334
378
  if (charactersBuilder.length() > 0) {
335
379
  call("characters", runtime.newString(charactersBuilder.toString()));
@@ -724,7 +724,7 @@ public class NokogiriHelpers
724
724
  convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str)
725
725
  {
726
726
  if (!(doc instanceof Html4Document)) { return str; }
727
- String parsed_encoding = ((Html4Document)doc).getPraedEncoding();
727
+ String parsed_encoding = ((Html4Document)doc).getParsedEncoding();
728
728
  if (parsed_encoding == null) { return str; }
729
729
  String ruby_encoding = rubyStringToString(doc.getEncoding());
730
730
  if (ruby_encoding == null) { return str; }