nokogiri 1.19.3-java → 1.19.4-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2bb4f7cf56ac58bc4e99300bd71fe3d36420c82b98cbe7f01c4cf6473ce4baf3
4
- data.tar.gz: f3eb382fa3fcb71e4c8376e503d8a7815890934279e43679c7d48bb1b34c9aca
3
+ metadata.gz: 32e93c6e2814b80a5552965415a65afff14ae5c0253df5b08fdc0e036dd67a1b
4
+ data.tar.gz: 61a91323350c59f22132ca3e67266dc46f9d69bbd7ecec92c192d7a079b84c33
5
5
  SHA512:
6
- metadata.gz: b91a5e7138ed2e23eae23a550079089a4b11c48737772ef696fc5696a2df0e5cd67a928f3cff8834ed678cfa09ef2a3dc39aac2ecbdbdcae68327d507a402456
7
- data.tar.gz: f50bd24b0f17b221e477f6be5d388a8dd02d9a2ae8bf0b60f34db93dce64ba002f152286b0a1e439527915e29ac0739b2f8713e874aa4521147ac52875c4d3a7
6
+ metadata.gz: daeef7e9be94229f49fba17511aa0f3be1fefe88a82cac0c94a2ea3b5452e6c937584ec6f1383ac1cbbbcd223c46f91cceb1044e10068b9649ab64fb239d82c6
7
+ data.tar.gz: 7f5552b74774881aa06739963f1368b69b3b8e937fc3ce91a89e6c4fd55056acb1bb11d56f5916d35115bc862314eccc7a9cca8a2e4bcf794d88a195fd5de833
@@ -100,6 +100,9 @@ public class XmlAttr extends XmlNode
100
100
  public IRubyObject
101
101
  value_set(ThreadContext context, IRubyObject content)
102
102
  {
103
+ if (node == null) {
104
+ throw context.runtime.newRuntimeError("Uninitialized " + getMetaClass().getRealClass().getName() + " struct (null data pointer)");
105
+ }
103
106
  Attr attr = (Attr) node;
104
107
  if (content != null && !content.isNil()) {
105
108
  attr.setValue(rubyStringToString(XmlNode.encode_special_chars(context, content)));
@@ -319,7 +319,7 @@ public class XmlDocument extends XmlNode
319
319
  public IRubyObject
320
320
  encoding_set(IRubyObject encoding)
321
321
  {
322
- this.encoding = encoding;
322
+ this.encoding = encoding.convertToString();
323
323
  return this;
324
324
  }
325
325
 
@@ -441,6 +441,10 @@ public class XmlDocument extends XmlNode
441
441
  }
442
442
  XmlNode newRoot = asXmlNode(context, new_root);
443
443
 
444
+ if (newRoot.node.getNodeType() != Node.ELEMENT_NODE) {
445
+ throw context.runtime.newTypeError("root must be a Nokogiri::XML::Element");
446
+ }
447
+
444
448
  IRubyObject root = root(context);
445
449
  if (root.isNil()) {
446
450
  Node newRootNode;
@@ -326,16 +326,16 @@ public class XmlNodeSet extends RubyObject implements NodeList
326
326
  // https://github.com/jruby/jruby/blame/13a3ec76d883a162b9d46c374c6e9eeea27b3261/core/src/main/java/org/jruby/RubyRange.java#L974
327
327
  // once we upgraded the min JRuby version to >= 9.2
328
328
  private static IRubyObject
329
- rangeBeginLength(ThreadContext context, IRubyObject rangeMaybe, int len, int[] begLen)
329
+ rangeBeginLength(ThreadContext context, IRubyObject rangeMaybe, int len, long[] begLen)
330
330
  {
331
331
  RubyRange range = (RubyRange) rangeMaybe;
332
- int min = range.begin(context).convertToInteger().getIntValue();
333
- int max = range.end(context).convertToInteger().getIntValue();
332
+ long min = range.begin(context).convertToInteger().getLongValue();
333
+ long max = range.end(context).convertToInteger().getLongValue();
334
334
 
335
335
  if (min < 0) {
336
336
  min += len;
337
337
  if (min < 0) {
338
- throw context.runtime.newRangeError(min + ".." + (range.isExcludeEnd() ? "." : "") + max + " out of range");
338
+ return context.nil;
339
339
  }
340
340
  }
341
341
 
@@ -358,20 +358,22 @@ public class XmlNodeSet extends RubyObject implements NodeList
358
358
  slice(ThreadContext context, IRubyObject indexOrRange)
359
359
  {
360
360
  if (indexOrRange instanceof RubyFixnum) {
361
- return slice(context, ((RubyFixnum) indexOrRange).getIntValue());
361
+ return slice(context, ((RubyFixnum) indexOrRange).getLongValue());
362
362
  }
363
363
  if (indexOrRange instanceof RubyRange) {
364
- int[] begLen = new int[2];
365
- rangeBeginLength(context, indexOrRange, nodes.length, begLen);
366
- int min = begLen[0];
367
- int max = begLen[1];
364
+ long[] begLen = new long[2];
365
+ if (rangeBeginLength(context, indexOrRange, nodes.length, begLen).isNil()) {
366
+ return context.nil;
367
+ }
368
+ long min = begLen[0];
369
+ long max = begLen[1];
368
370
  return subseq(context, min, max - min);
369
371
  }
370
372
  throw context.runtime.newTypeError("index must be an Integer or a Range");
371
373
  }
372
374
 
373
375
  IRubyObject
374
- slice(ThreadContext context, int idx)
376
+ slice(ThreadContext context, long idx)
375
377
  {
376
378
  if (idx < 0) {
377
379
  idx += nodes.length;
@@ -381,15 +383,15 @@ public class XmlNodeSet extends RubyObject implements NodeList
381
383
  return context.nil;
382
384
  }
383
385
 
384
- return nodes[idx];
386
+ return nodes[(int) idx];
385
387
  }
386
388
 
387
389
  @JRubyMethod(name = {"[]", "slice"})
388
390
  public IRubyObject
389
391
  slice(ThreadContext context, IRubyObject start, IRubyObject length)
390
392
  {
391
- int s = ((RubyFixnum) start).getIntValue();
392
- int l = ((RubyFixnum) length).getIntValue();
393
+ long s = ((RubyFixnum) start).getLongValue();
394
+ long l = ((RubyFixnum) length).getLongValue();
393
395
 
394
396
  if (s < 0) {
395
397
  s += nodes.length;
@@ -399,23 +401,15 @@ public class XmlNodeSet extends RubyObject implements NodeList
399
401
  }
400
402
 
401
403
  public IRubyObject
402
- subseq(ThreadContext context, int start, int length)
404
+ subseq(ThreadContext context, long start, long length)
403
405
  {
404
- if (start > nodes.length) {
405
- return context.nil;
406
- }
407
-
408
- if (start < 0 || length < 0) {
406
+ if (start < 0 || length < 0 || start > nodes.length) {
409
407
  return context.nil;
410
408
  }
411
409
 
412
- if (start + length > nodes.length) {
413
- length = nodes.length - start;
414
- }
415
-
416
- int to = start + length;
410
+ long end = start + Math.min(length, nodes.length - start);
417
411
 
418
- return newNodeSet(context.runtime, Arrays.copyOfRange(nodes, start, to));
412
+ return newNodeSet(context.runtime, Arrays.copyOfRange(nodes, (int) start, (int) end));
419
413
  }
420
414
 
421
415
  @JRubyMethod(name = {"to_a", "to_ary"})
@@ -8,6 +8,9 @@ import java.io.InputStream;
8
8
  import java.io.Reader;
9
9
  import java.io.StringReader;
10
10
 
11
+ import java.net.URI;
12
+ import java.net.URISyntaxException;
13
+
11
14
  import javax.xml.XMLConstants;
12
15
  import javax.xml.transform.Source;
13
16
  import javax.xml.transform.dom.DOMSource;
@@ -285,24 +288,103 @@ public class XmlSchema extends RubyObject
285
288
  String systemId,
286
289
  String baseURI)
287
290
  {
288
- if (noNet && systemId != null && (systemId.startsWith("http://") || systemId.startsWith("ftp://"))) {
289
- if (systemId.startsWith(XMLConstants.W3C_XML_SCHEMA_NS_URI)) {
290
- return null; // use default resolver
291
- }
291
+ if (noNet && !effectiveResourceIsLocal(systemId, baseURI)) {
292
292
  try {
293
293
  this.errorHandler.warning(new SAXParseException(String.format("Attempt to load network entity '%s'", systemId), null));
294
294
  } catch (SAXException ex) {
295
295
  }
296
- } else {
297
- String adjusted = adjustSystemIdIfNecessary(currentDir, scriptFileName, baseURI, systemId);
298
- lsInput.setPublicId(publicId);
299
- lsInput.setSystemId(adjusted != null ? adjusted : systemId);
300
- lsInput.setBaseURI(baseURI);
296
+ return new SchemaLSInput(); // an empty input blocks the fetch
301
297
  }
298
+
299
+ String adjusted = adjustSystemIdIfNecessary(currentDir, scriptFileName, baseURI, systemId);
300
+ lsInput.setPublicId(publicId);
301
+ lsInput.setSystemId(adjusted != null ? adjusted : systemId);
302
+ lsInput.setBaseURI(baseURI);
302
303
  return lsInput;
303
304
  }
304
305
  }
305
306
 
307
+ // We enforce NONET for schema resolution by hand because Xerces-J (the JAXP implementation
308
+ // backing XML::Schema on JRuby) does not implement the standard JAXP property
309
+ // XMLConstants.ACCESS_EXTERNAL_SCHEMA — so we cannot simply restrict external access on the
310
+ // SchemaFactory and must classify each resolved resource in the LSResourceResolver instead.
311
+ //
312
+ // Decides whether a schema-import resource may be resolved while NONET is on: true means
313
+ // local (allowed), false means a network resource (blocked). A relative systemId inherits
314
+ // its document's base, so it is resolved against baseURI before classification — a relative
315
+ // import under a remote base is a network fetch even though the systemId alone looks local.
316
+ private static boolean
317
+ effectiveResourceIsLocal(String systemId, String baseURI)
318
+ {
319
+ // a null systemId means there is nothing external to resolve
320
+ if (systemId == null) {
321
+ return true;
322
+ }
323
+ try {
324
+ URI uri = new URI(systemId);
325
+ if (baseURI != null && !baseURI.isEmpty()) {
326
+ uri = new URI(baseURI).resolve(uri);
327
+ }
328
+ return isLocalResource(uri);
329
+ } catch (URISyntaxException | IllegalArgumentException e) {
330
+ // fail closed: an unparseable base or systemId (e.g. a raw UNC path "\\host\share") is
331
+ // not provably local, and the JVM's file/URL handling may still reach the network
332
+ return false;
333
+ }
334
+ }
335
+
336
+ // Test seam for the Ruby suite: local_resource?(systemId, baseURI = nil).
337
+ @JRubyMethod(meta = true, name = "local_resource?", required = 1, optional = 1, visibility = Visibility.PRIVATE)
338
+ public static IRubyObject
339
+ local_resource_eh(ThreadContext context, IRubyObject klazz, IRubyObject[] args)
340
+ {
341
+ String systemId = args[0].isNil() ? null : args[0].asJavaString();
342
+ String baseURI = (args.length > 1 && !args[1].isNil()) ? args[1].asJavaString() : null;
343
+ return context.runtime.newBoolean(effectiveResourceIsLocal(systemId, baseURI));
344
+ }
345
+
346
+ // Classifies an already-parsed URI. Local is a missing scheme, or the "file" scheme, with
347
+ // no remote authority and no UNC-shaped path. This is intentionally stricter than libxml2's
348
+ // xmlNoNetExternalEntityLoader, which folds a remote host (file://host/...) into a local
349
+ // path rather than rejecting it.
350
+ //
351
+ // TODO: a Windows drive-letter path like "C:\path" parses as scheme "c" and would be
352
+ // blocked; support those if we need it later.
353
+ private static boolean
354
+ isLocalResource(URI uri)
355
+ {
356
+ // only a missing scheme (a relative or absolute path) or file: can be local; any
357
+ // other scheme is a network resource
358
+ String scheme = uri.getScheme();
359
+ if (scheme != null && !scheme.equalsIgnoreCase("file")) {
360
+ return false;
361
+ }
362
+
363
+ // an opaque "file:" URI (e.g. file:foo, with no "//") is not a usable local path; reject
364
+ // it, matching libxml2, which does not resolve that form as a local file either
365
+ if (uri.isOpaque()) {
366
+ return false;
367
+ }
368
+
369
+ // a non-empty, non-localhost authority is a remote host — file://host/path, or the
370
+ // schemeless network-path form //host/path. Stricter than libxml2, which folds such a
371
+ // host into a (failing) local path.
372
+ String authority = uri.getRawAuthority();
373
+ if (authority != null && !authority.isEmpty() && !authority.equalsIgnoreCase("localhost")) {
374
+ return false;
375
+ }
376
+
377
+ // reject UNC-shaped paths even under an allowed authority: file:////host/share,
378
+ // file://localhost//host/share, and %2f/%5c-encoded variants. getPath() is decoded, so
379
+ // the encoded forms are normalized before this check.
380
+ String path = uri.getPath();
381
+ if (path != null && (path.startsWith("//") || path.indexOf('\\') >= 0)) {
382
+ return false;
383
+ }
384
+
385
+ return true;
386
+ }
387
+
306
388
  private class SchemaLSInput implements LSInput
307
389
  {
308
390
  protected String fPublicId;
@@ -203,9 +203,6 @@ Init_nokogiri(void)
203
203
  rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
204
204
  rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
205
205
 
206
- rb_const_set(mNokogiri, rb_intern("LIBXML_ZLIB_ENABLED"),
207
- xmlHasFeature(XML_WITH_ZLIB) == 1 ? Qtrue : Qfalse);
208
-
209
206
  #ifdef NOKOGIRI_PACKAGED_LIBRARIES
210
207
  rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
211
208
  # ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
@@ -228,6 +225,12 @@ Init_nokogiri(void)
228
225
  rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
229
226
  #endif
230
227
 
228
+ rb_const_set(mNokogiri, rb_intern("LIBXML_ZLIB_ENABLED"),
229
+ xmlHasFeature(XML_WITH_ZLIB) == 1 ? Qtrue : Qfalse);
230
+
231
+ rb_const_set(mNokogiri, rb_intern("LIBXML_HTTP_ENABLED"),
232
+ xmlHasFeature(XML_WITH_HTTP) == 1 ? Qtrue : Qfalse);
233
+
231
234
  #ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS
232
235
  rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
233
236
  #endif
@@ -10,37 +10,42 @@ VALUE cNokogiriXmlAttr;
10
10
  * (e.g., a HTML boolean attribute).
11
11
  */
12
12
  static VALUE
13
- set_value(VALUE self, VALUE content)
13
+ noko_xml_attr_set_value(VALUE self, VALUE content)
14
14
  {
15
15
  xmlAttrPtr attr;
16
- xmlChar *value;
17
- xmlNode *cur;
18
16
 
19
17
  Noko_Node_Get_Struct(self, xmlAttr, attr);
20
18
 
21
- if (attr->children) {
22
- xmlFreeNodeList(attr->children);
19
+ {
20
+ /* Unlink and pin any wrapped children */
21
+ xmlNode *cur = attr->children;
22
+ xmlNode *next;
23
+
24
+ while (cur) {
25
+ next = cur->next;
26
+ if (cur->_private) {
27
+ xmlUnlinkNode(cur);
28
+ noko_xml_document_pin_node(cur);
29
+ }
30
+ cur = next;
31
+ }
23
32
  }
24
- attr->children = attr->last = NULL;
25
33
 
26
34
  if (content == Qnil) {
27
- return content;
28
- }
29
-
30
- value = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValueCStr(content));
31
- if (xmlStrlen(value) == 0) {
32
- attr->children = xmlNewDocText(attr->doc, value);
35
+ xmlNodeSetContent((xmlNodePtr)attr, NULL); /* Clear any remaining unwrapped children. */
33
36
  } else {
34
- attr->children = xmlStringGetNodeList(attr->doc, value);
35
- }
36
- xmlFree(value);
37
+ xmlChar *value = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValueCStr(content));
38
+
39
+ if (xmlStrlen(value) == 0) {
40
+ xmlNodeSetContent((xmlNodePtr)attr, NULL); /* Clear any remaining unwrapped children. */
37
41
 
38
- for (cur = attr->children; cur; cur = cur->next) {
39
- cur->parent = (xmlNode *)attr;
40
- cur->doc = attr->doc;
41
- if (cur->next == NULL) {
42
- attr->last = cur;
42
+ /* Preserve empty-string attributes as `foo=""` and not boolean `foo` */
43
+ attr->children = attr->last = xmlNewDocText(attr->doc, value);
44
+ attr->children->parent = (xmlNode *)attr;
45
+ } else {
46
+ xmlNodeSetContent((xmlNodePtr)attr, value);
43
47
  }
48
+ xmlFree(value);
44
49
  }
45
50
 
46
51
  return content;
@@ -53,7 +58,7 @@ set_value(VALUE self, VALUE content)
53
58
  * Create a new Attr element on the +document+ with +name+
54
59
  */
55
60
  static VALUE
56
- new (int argc, VALUE *argv, VALUE klass)
61
+ noko_xml_attr__new(int argc, VALUE *argv, VALUE klass)
57
62
  {
58
63
  xmlDocPtr xml_doc;
59
64
  VALUE document;
@@ -97,7 +102,7 @@ noko_init_xml_attr(void)
97
102
  */
98
103
  cNokogiriXmlAttr = rb_define_class_under(mNokogiriXml, "Attr", cNokogiriXmlNode);
99
104
 
100
- rb_define_singleton_method(cNokogiriXmlAttr, "new", new, -1);
105
+ rb_define_singleton_method(cNokogiriXmlAttr, "new", noko_xml_attr__new, -1);
101
106
 
102
- rb_define_method(cNokogiriXmlAttr, "value=", set_value, 1);
107
+ rb_define_method(cNokogiriXmlAttr, "value=", noko_xml_attr_set_value, 1);
103
108
  }
@@ -255,12 +255,6 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
255
255
 
256
256
  c_document = noko_xml_document_unwrap(self);
257
257
 
258
- c_current_root = xmlDocGetRootElement(c_document);
259
- if (c_current_root) {
260
- xmlUnlinkNode(c_current_root);
261
- noko_xml_document_pin_node(c_current_root);
262
- }
263
-
264
258
  if (!NIL_P(rb_new_root)) {
265
259
  if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
266
260
  rb_raise(rb_eArgError,
@@ -270,13 +264,23 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
270
264
 
271
265
  Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
272
266
 
273
- /* If the new root's document is not the same as the current document,
274
- * then we need to dup the node in to this document. */
275
- if (c_new_root->doc != c_document) {
276
- c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
277
- if (!c_new_root) {
278
- rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
279
- }
267
+ if (c_new_root->type != XML_ELEMENT_NODE) {
268
+ rb_raise(rb_eTypeError, "root must be a Nokogiri::XML::Element");
269
+ }
270
+ }
271
+
272
+ c_current_root = xmlDocGetRootElement(c_document);
273
+ if (c_current_root) {
274
+ xmlUnlinkNode(c_current_root);
275
+ noko_xml_document_pin_node(c_current_root);
276
+ }
277
+
278
+ /* If the new root's document is not the same as the current document,
279
+ * then we need to dup the node in to this document. */
280
+ if (c_new_root && c_new_root->doc != c_document) {
281
+ c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
282
+ if (!c_new_root) {
283
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
280
284
  }
281
285
  }
282
286
 
@@ -317,12 +321,13 @@ static VALUE
317
321
  set_encoding(VALUE self, VALUE encoding)
318
322
  {
319
323
  xmlDocPtr doc = noko_xml_document_unwrap(self);
324
+ xmlChar *new_encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
320
325
 
321
326
  if (doc->encoding) {
322
327
  xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
323
328
  }
324
329
 
325
- doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
330
+ doc->encoding = new_encoding;
326
331
 
327
332
  return encoding;
328
333
  }
@@ -708,6 +713,9 @@ noko_xml_document_unwrap(VALUE rb_document)
708
713
  {
709
714
  xmlDocPtr c_document;
710
715
  TypedData_Get_Struct(rb_document, xmlDoc, &xml_doc_type, c_document);
716
+ if (c_document == NULL) {
717
+ rb_raise(rb_eRuntimeError, "Uninitialized %" PRIsVALUE " struct (null data pointer)", rb_obj_class(rb_document));
718
+ }
711
719
  return c_document;
712
720
  }
713
721
 
@@ -971,6 +971,10 @@ rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_le
971
971
  xmlDocPtr c_new_parent_doc;
972
972
  VALUE rb_node_cache;
973
973
 
974
+ if (!rb_obj_is_kind_of(rb_other, cNokogiriXmlNode)) {
975
+ rb_raise(rb_eTypeError, "argument must be a kind of Nokogiri::XML::Node");
976
+ }
977
+
974
978
  Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
975
979
  c_level = (int)NUM2INT(rb_level);
976
980
  c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
@@ -2150,25 +2154,20 @@ compare(VALUE self, VALUE _other)
2150
2154
 
2151
2155
 
2152
2156
  /*
2153
- * call-seq:
2154
- * process_xincludes(flags)
2155
- *
2156
- * Loads and substitutes all xinclude elements below the node. The
2157
- * parser context will be initialized with +flags+.
2157
+ * Run XInclude substitution over the tree rooted at +c_node+, with the parser context initialized
2158
+ * from +c_flags+. Collects libxml2's structured errors and raises Nokogiri::XML::SyntaxError (or
2159
+ * RuntimeError) on failure.
2158
2160
  */
2159
- static VALUE
2160
- noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
2161
+ static void
2162
+ _noko_xml_node_process_xinclude_subtree(xmlNodePtr c_node, int c_flags)
2161
2163
  {
2162
2164
  int status ;
2163
- xmlNodePtr c_node;
2164
2165
  VALUE rb_errors = rb_ary_new();
2165
2166
  libxmlStructuredErrorHandlerState handler_state;
2166
2167
 
2167
- Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2168
-
2169
2168
  noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
2170
2169
 
2171
- status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
2170
+ status = xmlXIncludeProcessTreeFlags(c_node, c_flags);
2172
2171
 
2173
2172
  noko__structured_error_func_restore(&handler_state);
2174
2173
 
@@ -2181,11 +2180,77 @@ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
2181
2180
  rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
2182
2181
  }
2183
2182
  }
2183
+ }
2184
+
2185
+
2186
+ /*
2187
+ * Whether +c_node+ is an <xi:include> element in either the 2001 or 2003 XInclude namespace.
2188
+ */
2189
+ static int
2190
+ _noko_xml_node_xinclude_element_p(xmlNodePtr c_node)
2191
+ {
2192
+ return c_node->type == XML_ELEMENT_NODE
2193
+ && xmlStrEqual(c_node->name, XINCLUDE_NODE)
2194
+ && c_node->ns != NULL
2195
+ && (xmlStrEqual(c_node->ns->href, XINCLUDE_NS) || xmlStrEqual(c_node->ns->href, XINCLUDE_OLD_NS));
2196
+ }
2197
+
2198
+
2199
+ /*
2200
+ * call-seq:
2201
+ * process_xincludes(flags)
2202
+ *
2203
+ * Loads and substitutes all xinclude elements below the node. The
2204
+ * parser context will be initialized with +flags+.
2205
+ */
2206
+ static VALUE
2207
+ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
2208
+ {
2209
+ xmlNodePtr c_node;
2210
+
2211
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2212
+
2213
+ if (c_node->parent == NULL && _noko_xml_node_xinclude_element_p(c_node)) {
2214
+ rb_raise(rb_eRuntimeError, "cannot process XInclude on an unlinked <xi:include> node");
2215
+ }
2216
+
2217
+ _noko_xml_node_process_xinclude_subtree(c_node, (int)NUM2INT(rb_flags));
2184
2218
 
2185
2219
  return rb_node;
2186
2220
  }
2187
2221
 
2188
2222
 
2223
+ /*
2224
+ * Process this single <xi:include> node, substituting an unwrapped copy of it in its place so
2225
+ * that libxml2 frees the copy. This node is unlinked and pinned to the document, so any Ruby
2226
+ * wrapper for it (or for its descendants or namespaces) keeps pointing at valid memory. The
2227
+ * parser context is initialized with +flags+.
2228
+ */
2229
+ static VALUE
2230
+ noko_xml_node__safe_process_xinclude(VALUE rb_node, VALUE rb_flags)
2231
+ {
2232
+ xmlNodePtr c_node, c_copy;
2233
+
2234
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2235
+
2236
+ if (c_node->parent == NULL) {
2237
+ rb_raise(rb_eRuntimeError, "cannot process XInclude on an unlinked <xi:include> node");
2238
+ }
2239
+
2240
+ c_copy = xmlDocCopyNode(c_node, c_node->doc, 1);
2241
+ if (c_copy == NULL) {
2242
+ rb_raise(rb_eRuntimeError, "Could not copy node for xinclude substitution");
2243
+ }
2244
+
2245
+ xmlReplaceNode(c_node, c_copy);
2246
+ noko_xml_document_pin_node(c_node);
2247
+
2248
+ _noko_xml_node_process_xinclude_subtree(c_copy, (int)NUM2INT(rb_flags));
2249
+
2250
+ return Qnil;
2251
+ }
2252
+
2253
+
2189
2254
  /* TODO: DOCUMENT ME */
2190
2255
  static VALUE
2191
2256
  in_context(VALUE self, VALUE _str, VALUE _options)
@@ -2286,9 +2351,7 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2286
2351
  VALUE
2287
2352
  rb_xml_node_data_ptr_eh(VALUE self)
2288
2353
  {
2289
- xmlNodePtr c_node;
2290
- Noko_Node_Get_Struct(self, xmlNode, c_node);
2291
- return c_node ? Qtrue : Qfalse;
2354
+ return DATA_PTR(self) ? Qtrue : Qfalse;
2292
2355
  }
2293
2356
 
2294
2357
  VALUE
@@ -2438,6 +2501,7 @@ noko_init_xml_node(void)
2438
2501
  rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2439
2502
 
2440
2503
  rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
2504
+ rb_define_protected_method(cNokogiriXmlNode, "safe_process_xinclude", noko_xml_node__safe_process_xinclude, 1);
2441
2505
 
2442
2506
  rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2443
2507
  rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
@@ -304,7 +304,7 @@ index_at(VALUE rb_self, long offset)
304
304
 
305
305
  TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
306
306
 
307
- if (offset >= c_self->nodeNr || abs((int)offset) > c_self->nodeNr) {
307
+ if (offset >= c_self->nodeNr || offset < -c_self->nodeNr) {
308
308
  return Qnil;
309
309
  }
310
310
 
@@ -11,6 +11,15 @@ static const xmlChar *NOKOGIRI_URI = (const xmlChar *)"http://www.nokogiri.org/d
11
11
  static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
12
12
  static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
13
13
 
14
+ static void
15
+ _noko_xml_xpath_context_dmark(void *data)
16
+ {
17
+ xmlXPathContextPtr c_context = data;
18
+ if (c_context->doc && DOC_RUBY_OBJECT_TEST(c_context->doc)) {
19
+ rb_gc_mark(DOC_RUBY_OBJECT(c_context->doc));
20
+ }
21
+ }
22
+
14
23
  static void
15
24
  _noko_xml_xpath_context_dfree(void *data)
16
25
  {
@@ -21,9 +30,10 @@ _noko_xml_xpath_context_dfree(void *data)
21
30
  static const rb_data_type_t _noko_xml_xpath_context_type = {
22
31
  .wrap_struct_name = "xmlXPathContext",
23
32
  .function = {
33
+ .dmark = _noko_xml_xpath_context_dmark,
24
34
  .dfree = _noko_xml_xpath_context_dfree,
25
35
  },
26
- .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
36
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
27
37
  };
28
38
 
29
39
  /* find a CSS class in an HTML element's `class` attribute */
Binary file
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Nokogiri
4
4
  # The version of Nokogiri you are using
5
- VERSION = "1.19.3"
5
+ VERSION = "1.19.4"
6
6
  end
@@ -53,6 +53,14 @@ module Nokogiri
53
53
  defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
54
54
  end
55
55
 
56
+ def libxml2_has_zlib?
57
+ defined?(Nokogiri::LIBXML_ZLIB_ENABLED) && Nokogiri::LIBXML_ZLIB_ENABLED
58
+ end
59
+
60
+ def libxml2_has_http?
61
+ defined?(Nokogiri::LIBXML_HTTP_ENABLED) && Nokogiri::LIBXML_HTTP_ENABLED
62
+ end
63
+
56
64
  def libxslt_has_datetime?
57
65
  defined?(Nokogiri::LIBXSLT_DATETIME_ENABLED) && Nokogiri::LIBXSLT_DATETIME_ENABLED
58
66
  end
@@ -145,6 +153,8 @@ module Nokogiri
145
153
  end
146
154
  libxml["memory_management"] = Nokogiri::LIBXML_MEMORY_MANAGEMENT
147
155
  libxml["iconv_enabled"] = libxml2_has_iconv?
156
+ libxml["zlib_enabled"] = libxml2_has_zlib?
157
+ libxml["http_enabled"] = libxml2_has_http?
148
158
  libxml["compiled"] = compiled_libxml_version.to_s
149
159
  libxml["loaded"] = loaded_libxml_version.to_s
150
160
  end
@@ -85,8 +85,9 @@ module Nokogiri
85
85
  read_memory(string_or_io, url, encoding, options.to_i)
86
86
  end
87
87
 
88
- # do xinclude processing
89
- doc.do_xinclude(options) if options.xinclude?
88
+ # do xinclude processing; the document is freshly parsed and unexposed to Ruby, so the
89
+ # defensive copy is unnecessary
90
+ doc.do_xinclude(options, safe_copy: false) if options.xinclude?
90
91
 
91
92
  doc
92
93
  end
@@ -523,16 +523,67 @@ module Nokogiri
523
523
  set_namespace(ns)
524
524
  end
525
525
 
526
+ XINCLUDE_NAMESPACES = {
527
+ "xi2001" => "http://www.w3.org/2001/XInclude",
528
+ "xi2003" => "http://www.w3.org/2003/XInclude",
529
+ }.freeze
530
+ private_constant :XINCLUDE_NAMESPACES
531
+
532
+ # Every top-level <xi:include> in the subtree, in either XInclude namespace, excluding
533
+ # includes nested inside another include's fallback (libxml2 only expands those if the
534
+ # parent include fails).
535
+ XINCLUDE_QUERY =
536
+ "descendant-or-self::xi2001:include[not(ancestor::xi2001:include) and not(ancestor::xi2003:include)] | " \
537
+ "descendant-or-self::xi2003:include[not(ancestor::xi2001:include) and not(ancestor::xi2003:include)]"
538
+ private_constant :XINCLUDE_QUERY
539
+
526
540
  ###
527
- # Do xinclude substitution on the subtree below node. If given a block, a
528
- # Nokogiri::XML::ParseOptions object initialized from +options+, will be
529
- # passed to it, allowing more convenient modification of the parser options.
530
- def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
541
+ # :call-seq:
542
+ # do_xinclude(options = ParseOptions::DEFAULT_XML, safe_copy: true) self
543
+ # do_xinclude(options = ParseOptions::DEFAULT_XML, safe_copy: true) { |options| ... } self
544
+ #
545
+ # Do XInclude substitution on the subtree below this node, replacing each +<xi:include>+ with
546
+ # the content it references.
547
+ #
548
+ # [Parameters]
549
+ # - +options+ (Nokogiri::XML::ParseOptions) The parser options for the substitution. (default
550
+ # +ParseOptions::DEFAULT_XML+)
551
+ #
552
+ # [Optional Keyword Arguments]
553
+ # - +safe_copy:+ (Boolean) Operate on a defensive copy of each +<xi:include>+ element, to
554
+ # prevent libxml2 from freeing memory that is bound to live Ruby objects. (default +true+)
555
+ #
556
+ # When +true+, each +<xi:include>+ is processed on an unwrapped copy of itself, so libxml2
557
+ # frees the copy while the original node is unlinked from the document and kept alive. This
558
+ # prevents a use-after-free when the +<xi:include>+ node, or any of its descendants or
559
+ # namespaces, has already been exposed to Ruby; as a consequence such a wrapped node ends up
560
+ # detached from the document rather than removed or converted in place.
561
+ #
562
+ # When +false+, the document is processed in place. This is faster but only safe when nothing
563
+ # in the subtree has been exposed to Ruby (for example, immediately after parsing), which is
564
+ # why Document.parse uses it.
565
+ #
566
+ # This option has no effect on the pure-Java backend, which performs XInclude substitution
567
+ # during parsing.
568
+ #
569
+ # [Yields]
570
+ # If a block is given, a Nokogiri::XML::ParseOptions object initialized from +options+ is
571
+ # yielded to it, which can be configured before substitution.
572
+ #
573
+ # [Returns] +self+ (Nokogiri::XML::Node)
574
+ def do_xinclude(options = XML::ParseOptions::DEFAULT_XML, safe_copy: true)
531
575
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
532
576
  yield options if block_given?
533
577
 
534
- # call c extension
535
- process_xincludes(options.to_i)
578
+ if safe_copy && Nokogiri.uses_libxml?
579
+ xpath(XINCLUDE_QUERY, XINCLUDE_NAMESPACES).each do |include_node|
580
+ include_node.safe_process_xinclude(options.to_i)
581
+ end
582
+ else
583
+ process_xincludes(options.to_i)
584
+ end
585
+
586
+ self
536
587
  end
537
588
 
538
589
  alias_method :next, :next_sibling
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.19.3
4
+ version: 1.19.4
5
5
  platform: java
6
6
  authors:
7
7
  - Mike Dalessio
@@ -19,7 +19,7 @@ authors:
19
19
  - Nobuyoshi Nakada
20
20
  bindir: bin
21
21
  cert_chain: []
22
- date: 2026-04-27 00:00:00.000000000 Z
22
+ date: 2026-06-18 00:00:00.000000000 Z
23
23
  dependencies:
24
24
  - !ruby/object:Gem::Dependency
25
25
  name: jar-dependencies