nokogiri 1.16.8-java → 1.17.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/java/nokogiri/Html4Document.java +3 -3
  6. data/ext/java/nokogiri/Html4SaxParserContext.java +47 -175
  7. data/ext/java/nokogiri/NokogiriService.java +2 -2
  8. data/ext/java/nokogiri/XmlCdata.java +3 -0
  9. data/ext/java/nokogiri/XmlDocument.java +7 -14
  10. data/ext/java/nokogiri/XmlDocumentFragment.java +4 -92
  11. data/ext/java/nokogiri/XmlDtd.java +2 -2
  12. data/ext/java/nokogiri/XmlEntityReference.java +16 -12
  13. data/ext/java/nokogiri/XmlNode.java +26 -47
  14. data/ext/java/nokogiri/XmlNodeSet.java +10 -1
  15. data/ext/java/nokogiri/XmlSaxParserContext.java +73 -36
  16. data/ext/java/nokogiri/XmlSchema.java +15 -16
  17. data/ext/java/nokogiri/XsltStylesheet.java +1 -1
  18. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +1 -1
  19. data/ext/java/nokogiri/internals/NokogiriDomParser.java +3 -3
  20. data/ext/java/nokogiri/internals/NokogiriHandler.java +59 -15
  21. data/ext/java/nokogiri/internals/NokogiriHelpers.java +1 -1
  22. data/ext/java/nokogiri/internals/ParserContext.java +51 -21
  23. data/ext/java/nokogiri/internals/ReaderNode.java +1 -1
  24. data/ext/java/nokogiri/internals/XmlDomParserContext.java +8 -19
  25. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +1 -1
  26. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +1 -1
  27. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +10 -11
  28. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +5 -5
  29. data/ext/java/nokogiri/internals/c14n/{UtfHelpper.java → UtfHelper.java} +2 -2
  30. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +8 -8
  31. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +2 -2
  32. data/ext/nokogiri/extconf.rb +191 -137
  33. data/ext/nokogiri/gumbo.c +69 -53
  34. data/ext/nokogiri/html4_document.c +10 -4
  35. data/ext/nokogiri/html4_element_description.c +18 -18
  36. data/ext/nokogiri/html4_sax_parser.c +40 -0
  37. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  38. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  39. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  40. data/ext/nokogiri/nokogiri.c +9 -2
  41. data/ext/nokogiri/xml_attr.c +1 -1
  42. data/ext/nokogiri/xml_cdata.c +2 -10
  43. data/ext/nokogiri/xml_comment.c +3 -8
  44. data/ext/nokogiri/xml_document.c +163 -156
  45. data/ext/nokogiri/xml_document_fragment.c +10 -25
  46. data/ext/nokogiri/xml_dtd.c +1 -1
  47. data/ext/nokogiri/xml_element_content.c +9 -9
  48. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  49. data/ext/nokogiri/xml_namespace.c +6 -6
  50. data/ext/nokogiri/xml_node.c +130 -104
  51. data/ext/nokogiri/xml_node_set.c +46 -44
  52. data/ext/nokogiri/xml_reader.c +54 -58
  53. data/ext/nokogiri/xml_relax_ng.c +35 -56
  54. data/ext/nokogiri/xml_sax_parser.c +156 -88
  55. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  56. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  57. data/ext/nokogiri/xml_schema.c +50 -85
  58. data/ext/nokogiri/xml_syntax_error.c +19 -11
  59. data/ext/nokogiri/xml_text.c +2 -4
  60. data/ext/nokogiri/xml_xpath_context.c +2 -2
  61. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  62. data/lib/nokogiri/class_resolver.rb +1 -1
  63. data/lib/nokogiri/css/node.rb +6 -2
  64. data/lib/nokogiri/css/parser.rb +6 -4
  65. data/lib/nokogiri/css/parser.y +2 -2
  66. data/lib/nokogiri/css/parser_extras.rb +6 -66
  67. data/lib/nokogiri/css/selector_cache.rb +38 -0
  68. data/lib/nokogiri/css/tokenizer.rb +4 -4
  69. data/lib/nokogiri/css/tokenizer.rex +9 -8
  70. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  71. data/lib/nokogiri/css.rb +86 -20
  72. data/lib/nokogiri/decorators/slop.rb +3 -5
  73. data/lib/nokogiri/encoding_handler.rb +2 -2
  74. data/lib/nokogiri/html4/document.rb +44 -23
  75. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  76. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  77. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  78. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  79. data/lib/nokogiri/html4.rb +9 -14
  80. data/lib/nokogiri/html5/builder.rb +40 -0
  81. data/lib/nokogiri/html5/document.rb +61 -30
  82. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  83. data/lib/nokogiri/html5/node.rb +4 -4
  84. data/lib/nokogiri/html5.rb +114 -72
  85. data/lib/nokogiri/nokogiri.jar +0 -0
  86. data/lib/nokogiri/version/constant.rb +1 -1
  87. data/lib/nokogiri/xml/builder.rb +8 -1
  88. data/lib/nokogiri/xml/document.rb +70 -26
  89. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  90. data/lib/nokogiri/xml/node.rb +82 -11
  91. data/lib/nokogiri/xml/node_set.rb +9 -7
  92. data/lib/nokogiri/xml/parse_options.rb +1 -1
  93. data/lib/nokogiri/xml/pp/node.rb +6 -1
  94. data/lib/nokogiri/xml/reader.rb +46 -13
  95. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  96. data/lib/nokogiri/xml/sax/document.rb +174 -83
  97. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  98. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  99. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  100. data/lib/nokogiri/xml/sax.rb +48 -0
  101. data/lib/nokogiri/xml/schema.rb +112 -45
  102. data/lib/nokogiri/xml/searchable.rb +6 -8
  103. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  104. data/lib/nokogiri/xml.rb +13 -24
  105. data/lib/nokogiri/xslt.rb +3 -9
  106. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  107. metadata +9 -5
  108. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -287,7 +287,7 @@ public class XmlNode extends RubyObject
287
287
  * args but not for an exact number. Any extra args will then be
288
288
  * passed to 'initialize'. The way 'new' and this 'init' function
289
289
  * interact means that subclasses cannot arbitrarily change the
290
- * require aruments by defining an 'initialize' method. This is
290
+ * require arguments by defining an 'initialize' method. This is
291
291
  * how the C libxml wrapper works also.
292
292
  *
293
293
  * As written it performs initialization for a new Element with
@@ -407,7 +407,7 @@ public class XmlNode extends RubyObject
407
407
  /**
408
408
  * This method should be called after a node has been adopted in a new
409
409
  * document. This method will ensure that the node is renamed with the
410
- * appriopriate NS uri. First the prefix of the node is extracted, then is
410
+ * appropriate NS uri. First the prefix of the node is extracted, then is
411
411
  * used to lookup the namespace uri in the new document starting at the
412
412
  * current node and traversing the ancestors. If the namespace uri wasn't
413
413
  * empty (or null) all children and the node has attributes and/or children
@@ -643,12 +643,22 @@ public class XmlNode extends RubyObject
643
643
 
644
644
  @JRubyMethod(name = {"attribute", "attr"})
645
645
  public IRubyObject
646
- attribute(ThreadContext context, IRubyObject name)
646
+ attribute(ThreadContext context, IRubyObject rbName)
647
647
  {
648
- NamedNodeMap attrs = this.node.getAttributes();
649
- Node attr = attrs.getNamedItem(rubyStringToString(name));
650
- if (attr == null) { return context.nil; }
651
- return getCachedNodeOrCreate(context.runtime, attr);
648
+ NamedNodeMap attributes = this.node.getAttributes();
649
+ String name = rubyStringToString(rbName);
650
+
651
+ for (int j = 0 ; j < attributes.getLength() ; j++) {
652
+ Node attribute = attributes.item(j);
653
+ String localName = attribute.getLocalName();
654
+ if (localName == null) {
655
+ continue;
656
+ }
657
+ if (localName.equals(name)) {
658
+ return getCachedNodeOrCreate(context.runtime, attribute);
659
+ }
660
+ }
661
+ return context.nil;
652
662
  }
653
663
 
654
664
  @JRubyMethod
@@ -966,45 +976,13 @@ public class XmlNode extends RubyObject
966
976
  return doc;
967
977
  }
968
978
 
979
+ @JRubyMethod(visibility = Visibility.PROTECTED)
969
980
  public IRubyObject
970
- dup()
971
- {
972
- return dup_implementation(getMetaClass().getClassRuntime(), true);
973
- }
974
-
975
- @JRubyMethod
976
- public IRubyObject
977
- dup(ThreadContext context)
978
- {
979
- return dup_implementation(context, true);
980
- }
981
-
982
- @JRubyMethod
983
- public IRubyObject
984
- dup(ThreadContext context, IRubyObject depth)
981
+ initialize_copy_with_args(ThreadContext context, IRubyObject other, IRubyObject level, IRubyObject _ignored)
985
982
  {
986
- boolean deep = depth instanceof RubyInteger && RubyFixnum.fix2int(depth) != 0;
987
- return dup_implementation(context, deep);
988
- }
989
-
990
- protected final IRubyObject
991
- dup_implementation(ThreadContext context, boolean deep)
992
- {
993
- return dup_implementation(context.runtime, deep);
994
- }
995
-
996
- protected IRubyObject
997
- dup_implementation(Ruby runtime, boolean deep)
998
- {
999
- XmlNode clone;
1000
- try {
1001
- clone = (XmlNode) clone();
1002
- } catch (CloneNotSupportedException e) {
1003
- throw runtime.newRuntimeError(e.toString());
1004
- }
1005
- Node newNode = node.cloneNode(deep);
1006
- clone.node = newNode;
1007
- return clone;
983
+ boolean deep = level instanceof RubyInteger && RubyFixnum.fix2int(level) != 0;
984
+ this.node = asXmlNode(context, other).node.cloneNode(deep);
985
+ return this;
1008
986
  }
1009
987
 
1010
988
  public static RubyString
@@ -1447,11 +1425,12 @@ public class XmlNode extends RubyObject
1447
1425
  }
1448
1426
  }
1449
1427
 
1450
- if (uri != null) {
1451
- element.setAttributeNS(uri, key, val);
1452
- } else {
1428
+ if (colonIndex > 0 && uri == null) {
1453
1429
  element.setAttribute(key, val);
1430
+ } else {
1431
+ element.setAttributeNS(uri, key, val);
1454
1432
  }
1433
+
1455
1434
  clearXpathContext(node);
1456
1435
  }
1457
1436
 
@@ -15,6 +15,7 @@ import org.jruby.RubyRange;
15
15
  import org.jruby.anno.JRubyClass;
16
16
  import org.jruby.anno.JRubyMethod;
17
17
  import org.jruby.runtime.ThreadContext;
18
+ import org.jruby.runtime.Visibility;
18
19
  import org.jruby.runtime.builtin.IRubyObject;
19
20
  import org.w3c.dom.Node;
20
21
  import org.w3c.dom.NodeList;
@@ -201,7 +202,6 @@ public class XmlNodeSet extends RubyObject implements NodeList
201
202
  return context.nil;
202
203
  }
203
204
 
204
- @JRubyMethod
205
205
  public IRubyObject
206
206
  dup(ThreadContext context)
207
207
  {
@@ -210,6 +210,15 @@ public class XmlNodeSet extends RubyObject implements NodeList
210
210
  return dup;
211
211
  }
212
212
 
213
+ @JRubyMethod(visibility = Visibility.PROTECTED)
214
+ public IRubyObject
215
+ initialize_copy(ThreadContext context, IRubyObject other)
216
+ {
217
+ setNodes(getNodes(context, other));
218
+ initializeFrom(context, (XmlNodeSet)other);
219
+ return this;
220
+ }
221
+
213
222
  @JRubyMethod(name = "include?")
214
223
  public IRubyObject
215
224
  include_p(ThreadContext context, IRubyObject node_or_namespace)
@@ -1,10 +1,14 @@
1
1
  package nokogiri;
2
2
 
3
3
  import nokogiri.internals.*;
4
+ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
5
+
4
6
  import org.apache.xerces.parsers.AbstractSAXParser;
5
7
  import org.jruby.Ruby;
6
8
  import org.jruby.RubyClass;
9
+ import org.jruby.RubyEncoding;
7
10
  import org.jruby.RubyFixnum;
11
+ import org.jruby.RubyString;
8
12
  import org.jruby.anno.JRubyClass;
9
13
  import org.jruby.anno.JRubyMethod;
10
14
  import org.jruby.exceptions.RaiseException;
@@ -14,6 +18,7 @@ import org.jruby.runtime.builtin.IRubyObject;
14
18
  import org.xml.sax.SAXException;
15
19
  import org.xml.sax.SAXParseException;
16
20
 
21
+ import java.io.ByteArrayInputStream;
17
22
  import java.io.IOException;
18
23
  import java.io.InputStream;
19
24
 
@@ -43,7 +48,7 @@ public class XmlSaxParserContext extends ParserContext
43
48
 
44
49
  protected NokogiriHandler handler;
45
50
  protected NokogiriErrorHandler errorHandler;
46
- private boolean replaceEntities = true;
51
+ private boolean replaceEntities = false;
47
52
  private boolean recovery = false;
48
53
 
49
54
  public
@@ -90,16 +95,26 @@ public class XmlSaxParserContext extends ParserContext
90
95
  * Create a new parser context that will parse the string
91
96
  * <code>data</code>.
92
97
  */
93
- @JRubyMethod(name = "memory", meta = true)
98
+ @JRubyMethod(name = "native_memory", meta = true)
94
99
  public static IRubyObject
95
- parse_memory(ThreadContext context,
96
- IRubyObject klazz,
97
- IRubyObject data)
100
+ parse_memory(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
98
101
  {
99
- final Ruby runtime = context.runtime;
100
- XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
101
- ctx.initialize(runtime);
102
- ctx.setStringInputSource(context, data, runtime.getNil());
102
+ String java_encoding = null;
103
+ if (encoding != context.runtime.getNil()) {
104
+ if (!(encoding instanceof RubyEncoding)) {
105
+ throw context.runtime.newTypeError("encoding must be kind_of Encoding");
106
+ }
107
+ java_encoding = ((RubyEncoding)encoding).toString();
108
+ }
109
+
110
+ XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
111
+ ctx.initialize(context.runtime);
112
+ ctx.setStringInputSourceNoEnc(context, data, context.runtime.getNil());
113
+
114
+ if (java_encoding != null) {
115
+ ctx.getInputSource().setEncoding(java_encoding);
116
+ }
117
+
103
118
  return ctx;
104
119
  }
105
120
 
@@ -107,16 +122,26 @@ public class XmlSaxParserContext extends ParserContext
107
122
  * Create a new parser context that will read from the file
108
123
  * <code>data</code> and parse.
109
124
  */
110
- @JRubyMethod(name = "file", meta = true)
125
+ @JRubyMethod(name = "native_file", meta = true)
111
126
  public static IRubyObject
112
- parse_file(ThreadContext context,
113
- IRubyObject klazz,
114
- IRubyObject data)
127
+ parse_file(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
115
128
  {
116
- final Ruby runtime = context.runtime;
117
- XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
118
- ctx.initialize(context.getRuntime());
129
+ String java_encoding = null;
130
+ if (encoding != context.runtime.getNil()) {
131
+ if (!(encoding instanceof RubyEncoding)) {
132
+ throw context.runtime.newTypeError("encoding must be kind_of Encoding");
133
+ }
134
+ java_encoding = ((RubyEncoding)encoding).toString();
135
+ }
136
+
137
+ XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
138
+ ctx.initialize(context.runtime);
119
139
  ctx.setInputSourceFile(context, data);
140
+
141
+ if (java_encoding != null) {
142
+ ctx.getInputSource().setEncoding(java_encoding);
143
+ }
144
+
120
145
  return ctx;
121
146
  }
122
147
 
@@ -126,21 +151,30 @@ public class XmlSaxParserContext extends ParserContext
126
151
  *
127
152
  * TODO: Currently ignores encoding <code>enc</code>.
128
153
  */
129
- @JRubyMethod(name = "io", meta = true)
154
+ @JRubyMethod(name = "native_io", meta = true)
130
155
  public static IRubyObject
131
- parse_io(ThreadContext context,
132
- IRubyObject klazz,
133
- IRubyObject data,
134
- IRubyObject encoding)
156
+ parse_io(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
135
157
  {
136
- // check the type of the unused encoding to match behavior of CRuby
137
- if (!(encoding instanceof RubyFixnum)) {
138
- throw context.getRuntime().newTypeError("encoding must be kind_of String");
158
+ if (!invoke(context, data, "respond_to?", context.runtime.newSymbol("read")).isTrue()) {
159
+ throw context.runtime.newTypeError("argument expected to respond to :read");
139
160
  }
140
- final Ruby runtime = context.runtime;
141
- XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
142
- ctx.initialize(runtime);
143
- ctx.setIOInputSource(context, data, runtime.getNil());
161
+
162
+ String java_encoding = null;
163
+ if (encoding != context.runtime.getNil()) {
164
+ if (!(encoding instanceof RubyEncoding)) {
165
+ throw context.runtime.newTypeError("encoding must be kind_of Encoding");
166
+ }
167
+ java_encoding = ((RubyEncoding)encoding).toString();
168
+ }
169
+
170
+ XmlSaxParserContext ctx = newInstance(context.runtime, (RubyClass) klazz);
171
+ ctx.initialize(context.runtime);
172
+ ctx.setIOInputSource(context, data, context.runtime.getNil());
173
+
174
+ if (java_encoding != null) {
175
+ ctx.getInputSource().setEncoding(java_encoding);
176
+ }
177
+
144
178
  return ctx;
145
179
  }
146
180
 
@@ -222,9 +256,12 @@ public class XmlSaxParserContext extends ParserContext
222
256
 
223
257
  /* TODO: how should we pass in parse options? */
224
258
  ParserContext.Options options = defaultParseOptions(context);
259
+ if (replaceEntities) {
260
+ options.noEnt = true;
261
+ }
225
262
 
226
263
  errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning);
227
- handler = new NokogiriHandler(runtime, handlerRuby, errorHandler);
264
+ handler = new NokogiriHandler(runtime, handlerRuby, errorHandler, options.noEnt);
228
265
 
229
266
  preParse(runtime, handlerRuby, handler);
230
267
  parser.setContentHandler(handler);
@@ -233,6 +270,7 @@ public class XmlSaxParserContext extends ParserContext
233
270
 
234
271
  try {
235
272
  parser.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
273
+ parser.setProperty("http://xml.org/sax/properties/declaration-handler", handler);
236
274
  } catch (Exception ex) {
237
275
  throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString());
238
276
  }
@@ -241,16 +279,15 @@ public class XmlSaxParserContext extends ParserContext
241
279
  try {
242
280
  do_parse();
243
281
  } catch (SAXParseException ex) {
244
- // A bad document (<foo><bar></foo>) should call the
245
- // error handler instead of raising a SAX exception.
246
-
247
- // However, an EMPTY document should raise a RuntimeError.
248
- // This is a bit kludgy, but AFAIK SAX doesn't distinguish
249
- // between empty and bad whereas Nokogiri does.
282
+ // An EMPTY document should raise a RuntimeError. This is a bit kludgy, but AFAIK SAX
283
+ // doesn't distinguish between empty and bad whereas Nokogiri does.
250
284
  String message = ex.getMessage();
251
285
  if (message != null && message.contains("Premature end of file.") && stringDataSize < 1) {
252
- throw runtime.newRuntimeError("couldn't parse document: " + message);
286
+ throw runtime.newRuntimeError("input string cannot be empty");
253
287
  }
288
+
289
+ // A bad document (<foo><bar></foo>) should call the
290
+ // error handler instead of raising a SAX exception.
254
291
  handler.error(ex);
255
292
  }
256
293
  } catch (SAXException ex) {
@@ -175,19 +175,6 @@ public class XmlSchema extends RubyObject
175
175
  return getSchema(context, (RubyClass)klazz, source, parseOptions);
176
176
  }
177
177
 
178
- @JRubyMethod(meta = true, required = 1, optional = 1)
179
- public static IRubyObject
180
- read_memory(ThreadContext context, IRubyObject klazz, IRubyObject[] args)
181
- {
182
- IRubyObject content = args[0];
183
- IRubyObject parseOptions = null;
184
- if (args.length > 1) {
185
- parseOptions = args[1];
186
- }
187
- String data = content.convertToString().asJavaString();
188
- return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)), parseOptions);
189
- }
190
-
191
178
  private static IRubyObject
192
179
  getSchema(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions)
193
180
  {
@@ -215,14 +202,22 @@ public class XmlSchema extends RubyObject
215
202
 
216
203
  XmlDomParserContext ctx = new XmlDomParserContext(runtime, RubyFixnum.newFixnum(runtime, 1L));
217
204
  ctx.setInputSourceFile(context, file);
218
- XmlDocument xmlDocument = ctx.parse(context, getNokogiriClass(runtime, "Nokogiri::XML::Document"), context.nil);
219
- return validate_document_or_file(context, xmlDocument);
205
+ try {
206
+ XmlDocument xmlDocument = ctx.parse(context, getNokogiriClass(runtime, "Nokogiri::XML::Document"), context.nil);
207
+ return validate_document_or_file(context, xmlDocument);
208
+ } catch (Exception ex) {
209
+ RubyArray errors = (RubyArray)context.runtime.newEmptyArray();
210
+ XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
211
+ xmlSyntaxError.setException(ex);
212
+ errors.append(xmlSyntaxError);
213
+ return errors;
214
+ }
220
215
  }
221
216
 
222
217
  IRubyObject
223
218
  validate_document_or_file(ThreadContext context, XmlDocument xmlDocument)
224
219
  {
225
- RubyArray<?> errors = (RubyArray) this.getInstanceVariable("@errors");
220
+ RubyArray errors = context.runtime.newEmptyArray();
226
221
  ErrorHandler errorHandler = new SchemaErrorHandler(context.runtime, errors);
227
222
  setErrorHandler(errorHandler);
228
223
 
@@ -248,6 +243,10 @@ public class XmlSchema extends RubyObject
248
243
  protected void
249
244
  validate(Document document) throws SAXException, IOException
250
245
  {
246
+ if (document.getDocumentElement() == null) {
247
+ throw new SAXException("Document is empty");
248
+ }
249
+
251
250
  DOMSource docSource = new DOMSource(document);
252
251
  validator.validate(docSource);
253
252
  }
@@ -135,7 +135,7 @@ public class XsltStylesheet extends RubyObject
135
135
  XmlDocument xmlDoc = (XmlDocument) args[0];
136
136
  ensureDocumentHasNoError(context, xmlDoc);
137
137
 
138
- Document doc = ((XmlDocument) xmlDoc.dup_implementation(context, true)).getDocument();
138
+ Document doc = ((XmlDocument)xmlDoc.callMethod(context, "dup", runtime.newFixnum(1))).getDocument();
139
139
 
140
140
  XsltStylesheet xslt =
141
141
  (XsltStylesheet) NokogiriService.XSLT_STYLESHEET_ALLOCATOR.allocate(runtime, (RubyClass)klazz);
@@ -87,7 +87,7 @@ public class NokogiriBlockingQueueInputStream extends InputStream
87
87
 
88
88
  /**
89
89
  * This method shouldn't be called unless the parser has finished parsing or
90
- * threw an exception while doing so, otherwise, there'll be the protential
90
+ * threw an exception while doing so, otherwise, there'll be the potential
91
91
  * that the read method will block indefinitely.
92
92
  */
93
93
  @Override
@@ -66,7 +66,7 @@ public class NokogiriDomParser extends DOMParser
66
66
  {
67
67
  dtd.reset();
68
68
  if (xInclude) {
69
- setEntityResolver(new NokogiriXInlcudeEntityResolver(source));
69
+ setEntityResolver(new NokogiriXIncludeEntityResolver(source));
70
70
  }
71
71
  super.parse(source);
72
72
  Document doc = getDocument();
@@ -77,11 +77,11 @@ public class NokogiriDomParser extends DOMParser
77
77
  doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null);
78
78
  }
79
79
 
80
- private static class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver
80
+ private static class NokogiriXIncludeEntityResolver implements org.xml.sax.EntityResolver
81
81
  {
82
82
  InputSource source;
83
83
  private
84
- NokogiriXInlcudeEntityResolver(InputSource source)
84
+ NokogiriXIncludeEntityResolver(InputSource source)
85
85
  {
86
86
  this.source = source;
87
87
  }
@@ -42,27 +42,26 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
42
42
 
43
43
  private Locator locator;
44
44
  private boolean needEmptyAttrCheck;
45
+ private boolean replaceEntities;
46
+ private Set<String> entities = new HashSet<String>();
45
47
 
46
48
  public
47
- NokogiriHandler(Ruby runtime, IRubyObject object, NokogiriErrorHandler errorHandler)
49
+ NokogiriHandler(Ruby runtime,
50
+ IRubyObject object,
51
+ NokogiriErrorHandler errorHandler,
52
+ boolean replaceEntities)
48
53
  {
49
54
  assert object != null;
50
55
  this.runtime = runtime;
51
56
  this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute");
52
57
  this.object = object;
53
58
  this.errorHandler = errorHandler;
59
+ this.replaceEntities = replaceEntities;
54
60
  charactersBuilder = new StringBuilder();
55
61
  String objectName = object.getMetaClass().getName();
56
62
  if ("Nokogiri::HTML4::SAX::Parser".equals(objectName)) { needEmptyAttrCheck = true; }
57
63
  }
58
64
 
59
- @Override
60
- public void
61
- skippedEntity(String skippedEntity)
62
- {
63
- call("error", runtime.newString("Entity '" + skippedEntity + "' not defined\n"));
64
- }
65
-
66
65
  @Override
67
66
  public void
68
67
  setDocumentLocator(Locator locator)
@@ -88,7 +87,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
88
87
  public void
89
88
  endDocument()
90
89
  {
91
- populateCharacters();
90
+ flushCharacters();
92
91
  call("end_document");
93
92
  }
94
93
 
@@ -161,12 +160,12 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
161
160
  }
162
161
 
163
162
  if (localName == null || localName.isEmpty()) { localName = getLocalPart(qName); }
164
- populateCharacters();
163
+ flushCharacters();
165
164
  call("start_element_namespace",
166
165
  stringOrNil(runtime, localName),
167
166
  rubyAttr,
168
167
  stringOrNil(runtime, getPrefix(qName)),
169
- stringOrNil(runtime, uri),
168
+ uri.length() > 0 ? stringOrNil(runtime, uri) : runtime.getNil(),
170
169
  rubyNSAttr);
171
170
  }
172
171
 
@@ -204,7 +203,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
204
203
  public void
205
204
  endElement(String uri, String localName, String qName)
206
205
  {
207
- populateCharacters();
206
+ flushCharacters();
208
207
  call("end_element_namespace",
209
208
  stringOrNil(runtime, localName),
210
209
  stringOrNil(runtime, getPrefix(qName)),
@@ -218,11 +217,56 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
218
217
  charactersBuilder.append(ch, start, length);
219
218
  }
220
219
 
220
+ @Override
221
+ public void
222
+ externalEntityDecl(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
223
+ throws SAXException
224
+ {
225
+ entities.add(name);
226
+ }
227
+
228
+ @Override
229
+ public void
230
+ internalEntityDecl(java.lang.String name, java.lang.String value)
231
+ throws SAXException
232
+ {
233
+ entities.add(name);
234
+ }
235
+
236
+ @Override
237
+ public void
238
+ skippedEntity(String name)
239
+ {
240
+ call("error", runtime.newString("Entity '" + name + "' not defined\n"));
241
+ if (!replaceEntities) {
242
+ call("reference", runtime.newString(name), runtime.getNil());
243
+ }
244
+ }
245
+
246
+ @Override
247
+ public void
248
+ startEntity(String name)
249
+ {
250
+ flushCharacters();
251
+ }
252
+
253
+ @Override
254
+ public void
255
+ endEntity(String name)
256
+ {
257
+ IRubyObject content = charactersBuilder.length() > 0 ? runtime.newString(charactersBuilder.toString()) :
258
+ runtime.getNil();
259
+ if (!replaceEntities && entities.contains(name)) {
260
+ call("reference", runtime.newString(name), content);
261
+ }
262
+ flushCharacters();
263
+ }
264
+
221
265
  @Override
222
266
  public void
223
267
  comment(char[] ch, int start, int length)
224
268
  {
225
- populateCharacters();
269
+ flushCharacters();
226
270
  call("comment", runtime.newString(new String(ch, start, length)));
227
271
  }
228
272
 
@@ -230,7 +274,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
230
274
  public void
231
275
  startCDATA()
232
276
  {
233
- populateCharacters();
277
+ flushCharacters();
234
278
  }
235
279
 
236
280
  @Override
@@ -329,7 +373,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler
329
373
  }
330
374
 
331
375
  protected void
332
- populateCharacters()
376
+ flushCharacters()
333
377
  {
334
378
  if (charactersBuilder.length() > 0) {
335
379
  call("characters", runtime.newString(charactersBuilder.toString()));
@@ -724,7 +724,7 @@ public class NokogiriHelpers
724
724
  convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str)
725
725
  {
726
726
  if (!(doc instanceof Html4Document)) { return str; }
727
- String parsed_encoding = ((Html4Document)doc).getPraedEncoding();
727
+ String parsed_encoding = ((Html4Document)doc).getParsedEncoding();
728
728
  if (parsed_encoding == null) { return str; }
729
729
  String ruby_encoding = rubyStringToString(doc.getEncoding());
730
730
  if (ruby_encoding == null) { return str; }