nokogiri 1.19.3-java → 1.19.4-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/java/nokogiri/XmlAttr.java +3 -0
- data/ext/java/nokogiri/XmlDocument.java +5 -1
- data/ext/java/nokogiri/XmlNodeSet.java +19 -25
- data/ext/java/nokogiri/XmlSchema.java +91 -9
- data/ext/nokogiri/nokogiri.c +6 -3
- data/ext/nokogiri/xml_attr.c +28 -23
- data/ext/nokogiri/xml_document.c +22 -14
- data/ext/nokogiri/xml_node.c +78 -14
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +11 -1
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +10 -0
- data/lib/nokogiri/xml/document.rb +3 -2
- data/lib/nokogiri/xml/node.rb +57 -6
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 32e93c6e2814b80a5552965415a65afff14ae5c0253df5b08fdc0e036dd67a1b
|
|
4
|
+
data.tar.gz: 61a91323350c59f22132ca3e67266dc46f9d69bbd7ecec92c192d7a079b84c33
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: daeef7e9be94229f49fba17511aa0f3be1fefe88a82cac0c94a2ea3b5452e6c937584ec6f1383ac1cbbbcd223c46f91cceb1044e10068b9649ab64fb239d82c6
|
|
7
|
+
data.tar.gz: 7f5552b74774881aa06739963f1368b69b3b8e937fc3ce91a89e6c4fd55056acb1bb11d56f5916d35115bc862314eccc7a9cca8a2e4bcf794d88a195fd5de833
|
|
@@ -100,6 +100,9 @@ public class XmlAttr extends XmlNode
|
|
|
100
100
|
public IRubyObject
|
|
101
101
|
value_set(ThreadContext context, IRubyObject content)
|
|
102
102
|
{
|
|
103
|
+
if (node == null) {
|
|
104
|
+
throw context.runtime.newRuntimeError("Uninitialized " + getMetaClass().getRealClass().getName() + " struct (null data pointer)");
|
|
105
|
+
}
|
|
103
106
|
Attr attr = (Attr) node;
|
|
104
107
|
if (content != null && !content.isNil()) {
|
|
105
108
|
attr.setValue(rubyStringToString(XmlNode.encode_special_chars(context, content)));
|
|
@@ -319,7 +319,7 @@ public class XmlDocument extends XmlNode
|
|
|
319
319
|
public IRubyObject
|
|
320
320
|
encoding_set(IRubyObject encoding)
|
|
321
321
|
{
|
|
322
|
-
this.encoding = encoding;
|
|
322
|
+
this.encoding = encoding.convertToString();
|
|
323
323
|
return this;
|
|
324
324
|
}
|
|
325
325
|
|
|
@@ -441,6 +441,10 @@ public class XmlDocument extends XmlNode
|
|
|
441
441
|
}
|
|
442
442
|
XmlNode newRoot = asXmlNode(context, new_root);
|
|
443
443
|
|
|
444
|
+
if (newRoot.node.getNodeType() != Node.ELEMENT_NODE) {
|
|
445
|
+
throw context.runtime.newTypeError("root must be a Nokogiri::XML::Element");
|
|
446
|
+
}
|
|
447
|
+
|
|
444
448
|
IRubyObject root = root(context);
|
|
445
449
|
if (root.isNil()) {
|
|
446
450
|
Node newRootNode;
|
|
@@ -326,16 +326,16 @@ public class XmlNodeSet extends RubyObject implements NodeList
|
|
|
326
326
|
// https://github.com/jruby/jruby/blame/13a3ec76d883a162b9d46c374c6e9eeea27b3261/core/src/main/java/org/jruby/RubyRange.java#L974
|
|
327
327
|
// once we upgraded the min JRuby version to >= 9.2
|
|
328
328
|
private static IRubyObject
|
|
329
|
-
rangeBeginLength(ThreadContext context, IRubyObject rangeMaybe, int len,
|
|
329
|
+
rangeBeginLength(ThreadContext context, IRubyObject rangeMaybe, int len, long[] begLen)
|
|
330
330
|
{
|
|
331
331
|
RubyRange range = (RubyRange) rangeMaybe;
|
|
332
|
-
|
|
333
|
-
|
|
332
|
+
long min = range.begin(context).convertToInteger().getLongValue();
|
|
333
|
+
long max = range.end(context).convertToInteger().getLongValue();
|
|
334
334
|
|
|
335
335
|
if (min < 0) {
|
|
336
336
|
min += len;
|
|
337
337
|
if (min < 0) {
|
|
338
|
-
|
|
338
|
+
return context.nil;
|
|
339
339
|
}
|
|
340
340
|
}
|
|
341
341
|
|
|
@@ -358,20 +358,22 @@ public class XmlNodeSet extends RubyObject implements NodeList
|
|
|
358
358
|
slice(ThreadContext context, IRubyObject indexOrRange)
|
|
359
359
|
{
|
|
360
360
|
if (indexOrRange instanceof RubyFixnum) {
|
|
361
|
-
return slice(context, ((RubyFixnum) indexOrRange).
|
|
361
|
+
return slice(context, ((RubyFixnum) indexOrRange).getLongValue());
|
|
362
362
|
}
|
|
363
363
|
if (indexOrRange instanceof RubyRange) {
|
|
364
|
-
|
|
365
|
-
rangeBeginLength(context, indexOrRange, nodes.length, begLen)
|
|
366
|
-
|
|
367
|
-
|
|
364
|
+
long[] begLen = new long[2];
|
|
365
|
+
if (rangeBeginLength(context, indexOrRange, nodes.length, begLen).isNil()) {
|
|
366
|
+
return context.nil;
|
|
367
|
+
}
|
|
368
|
+
long min = begLen[0];
|
|
369
|
+
long max = begLen[1];
|
|
368
370
|
return subseq(context, min, max - min);
|
|
369
371
|
}
|
|
370
372
|
throw context.runtime.newTypeError("index must be an Integer or a Range");
|
|
371
373
|
}
|
|
372
374
|
|
|
373
375
|
IRubyObject
|
|
374
|
-
slice(ThreadContext context,
|
|
376
|
+
slice(ThreadContext context, long idx)
|
|
375
377
|
{
|
|
376
378
|
if (idx < 0) {
|
|
377
379
|
idx += nodes.length;
|
|
@@ -381,15 +383,15 @@ public class XmlNodeSet extends RubyObject implements NodeList
|
|
|
381
383
|
return context.nil;
|
|
382
384
|
}
|
|
383
385
|
|
|
384
|
-
return nodes[idx];
|
|
386
|
+
return nodes[(int) idx];
|
|
385
387
|
}
|
|
386
388
|
|
|
387
389
|
@JRubyMethod(name = {"[]", "slice"})
|
|
388
390
|
public IRubyObject
|
|
389
391
|
slice(ThreadContext context, IRubyObject start, IRubyObject length)
|
|
390
392
|
{
|
|
391
|
-
|
|
392
|
-
|
|
393
|
+
long s = ((RubyFixnum) start).getLongValue();
|
|
394
|
+
long l = ((RubyFixnum) length).getLongValue();
|
|
393
395
|
|
|
394
396
|
if (s < 0) {
|
|
395
397
|
s += nodes.length;
|
|
@@ -399,23 +401,15 @@ public class XmlNodeSet extends RubyObject implements NodeList
|
|
|
399
401
|
}
|
|
400
402
|
|
|
401
403
|
public IRubyObject
|
|
402
|
-
subseq(ThreadContext context,
|
|
404
|
+
subseq(ThreadContext context, long start, long length)
|
|
403
405
|
{
|
|
404
|
-
if (start > nodes.length) {
|
|
405
|
-
return context.nil;
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
if (start < 0 || length < 0) {
|
|
406
|
+
if (start < 0 || length < 0 || start > nodes.length) {
|
|
409
407
|
return context.nil;
|
|
410
408
|
}
|
|
411
409
|
|
|
412
|
-
|
|
413
|
-
length = nodes.length - start;
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
int to = start + length;
|
|
410
|
+
long end = start + Math.min(length, nodes.length - start);
|
|
417
411
|
|
|
418
|
-
return newNodeSet(context.runtime, Arrays.copyOfRange(nodes, start,
|
|
412
|
+
return newNodeSet(context.runtime, Arrays.copyOfRange(nodes, (int) start, (int) end));
|
|
419
413
|
}
|
|
420
414
|
|
|
421
415
|
@JRubyMethod(name = {"to_a", "to_ary"})
|
|
@@ -8,6 +8,9 @@ import java.io.InputStream;
|
|
|
8
8
|
import java.io.Reader;
|
|
9
9
|
import java.io.StringReader;
|
|
10
10
|
|
|
11
|
+
import java.net.URI;
|
|
12
|
+
import java.net.URISyntaxException;
|
|
13
|
+
|
|
11
14
|
import javax.xml.XMLConstants;
|
|
12
15
|
import javax.xml.transform.Source;
|
|
13
16
|
import javax.xml.transform.dom.DOMSource;
|
|
@@ -285,24 +288,103 @@ public class XmlSchema extends RubyObject
|
|
|
285
288
|
String systemId,
|
|
286
289
|
String baseURI)
|
|
287
290
|
{
|
|
288
|
-
if (noNet &&
|
|
289
|
-
if (systemId.startsWith(XMLConstants.W3C_XML_SCHEMA_NS_URI)) {
|
|
290
|
-
return null; // use default resolver
|
|
291
|
-
}
|
|
291
|
+
if (noNet && !effectiveResourceIsLocal(systemId, baseURI)) {
|
|
292
292
|
try {
|
|
293
293
|
this.errorHandler.warning(new SAXParseException(String.format("Attempt to load network entity '%s'", systemId), null));
|
|
294
294
|
} catch (SAXException ex) {
|
|
295
295
|
}
|
|
296
|
-
|
|
297
|
-
String adjusted = adjustSystemIdIfNecessary(currentDir, scriptFileName, baseURI, systemId);
|
|
298
|
-
lsInput.setPublicId(publicId);
|
|
299
|
-
lsInput.setSystemId(adjusted != null ? adjusted : systemId);
|
|
300
|
-
lsInput.setBaseURI(baseURI);
|
|
296
|
+
return new SchemaLSInput(); // an empty input blocks the fetch
|
|
301
297
|
}
|
|
298
|
+
|
|
299
|
+
String adjusted = adjustSystemIdIfNecessary(currentDir, scriptFileName, baseURI, systemId);
|
|
300
|
+
lsInput.setPublicId(publicId);
|
|
301
|
+
lsInput.setSystemId(adjusted != null ? adjusted : systemId);
|
|
302
|
+
lsInput.setBaseURI(baseURI);
|
|
302
303
|
return lsInput;
|
|
303
304
|
}
|
|
304
305
|
}
|
|
305
306
|
|
|
307
|
+
// We enforce NONET for schema resolution by hand because Xerces-J (the JAXP implementation
|
|
308
|
+
// backing XML::Schema on JRuby) does not implement the standard JAXP property
|
|
309
|
+
// XMLConstants.ACCESS_EXTERNAL_SCHEMA — so we cannot simply restrict external access on the
|
|
310
|
+
// SchemaFactory and must classify each resolved resource in the LSResourceResolver instead.
|
|
311
|
+
//
|
|
312
|
+
// Decides whether a schema-import resource may be resolved while NONET is on: true means
|
|
313
|
+
// local (allowed), false means a network resource (blocked). A relative systemId inherits
|
|
314
|
+
// its document's base, so it is resolved against baseURI before classification — a relative
|
|
315
|
+
// import under a remote base is a network fetch even though the systemId alone looks local.
|
|
316
|
+
private static boolean
|
|
317
|
+
effectiveResourceIsLocal(String systemId, String baseURI)
|
|
318
|
+
{
|
|
319
|
+
// a null systemId means there is nothing external to resolve
|
|
320
|
+
if (systemId == null) {
|
|
321
|
+
return true;
|
|
322
|
+
}
|
|
323
|
+
try {
|
|
324
|
+
URI uri = new URI(systemId);
|
|
325
|
+
if (baseURI != null && !baseURI.isEmpty()) {
|
|
326
|
+
uri = new URI(baseURI).resolve(uri);
|
|
327
|
+
}
|
|
328
|
+
return isLocalResource(uri);
|
|
329
|
+
} catch (URISyntaxException | IllegalArgumentException e) {
|
|
330
|
+
// fail closed: an unparseable base or systemId (e.g. a raw UNC path "\\host\share") is
|
|
331
|
+
// not provably local, and the JVM's file/URL handling may still reach the network
|
|
332
|
+
return false;
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Test seam for the Ruby suite: local_resource?(systemId, baseURI = nil).
|
|
337
|
+
@JRubyMethod(meta = true, name = "local_resource?", required = 1, optional = 1, visibility = Visibility.PRIVATE)
|
|
338
|
+
public static IRubyObject
|
|
339
|
+
local_resource_eh(ThreadContext context, IRubyObject klazz, IRubyObject[] args)
|
|
340
|
+
{
|
|
341
|
+
String systemId = args[0].isNil() ? null : args[0].asJavaString();
|
|
342
|
+
String baseURI = (args.length > 1 && !args[1].isNil()) ? args[1].asJavaString() : null;
|
|
343
|
+
return context.runtime.newBoolean(effectiveResourceIsLocal(systemId, baseURI));
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Classifies an already-parsed URI. Local is a missing scheme, or the "file" scheme, with
|
|
347
|
+
// no remote authority and no UNC-shaped path. This is intentionally stricter than libxml2's
|
|
348
|
+
// xmlNoNetExternalEntityLoader, which folds a remote host (file://host/...) into a local
|
|
349
|
+
// path rather than rejecting it.
|
|
350
|
+
//
|
|
351
|
+
// TODO: a Windows drive-letter path like "C:\path" parses as scheme "c" and would be
|
|
352
|
+
// blocked; support those if we need it later.
|
|
353
|
+
private static boolean
|
|
354
|
+
isLocalResource(URI uri)
|
|
355
|
+
{
|
|
356
|
+
// only a missing scheme (a relative or absolute path) or file: can be local; any
|
|
357
|
+
// other scheme is a network resource
|
|
358
|
+
String scheme = uri.getScheme();
|
|
359
|
+
if (scheme != null && !scheme.equalsIgnoreCase("file")) {
|
|
360
|
+
return false;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// an opaque "file:" URI (e.g. file:foo, with no "//") is not a usable local path; reject
|
|
364
|
+
// it, matching libxml2, which does not resolve that form as a local file either
|
|
365
|
+
if (uri.isOpaque()) {
|
|
366
|
+
return false;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// a non-empty, non-localhost authority is a remote host — file://host/path, or the
|
|
370
|
+
// schemeless network-path form //host/path. Stricter than libxml2, which folds such a
|
|
371
|
+
// host into a (failing) local path.
|
|
372
|
+
String authority = uri.getRawAuthority();
|
|
373
|
+
if (authority != null && !authority.isEmpty() && !authority.equalsIgnoreCase("localhost")) {
|
|
374
|
+
return false;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// reject UNC-shaped paths even under an allowed authority: file:////host/share,
|
|
378
|
+
// file://localhost//host/share, and %2f/%5c-encoded variants. getPath() is decoded, so
|
|
379
|
+
// the encoded forms are normalized before this check.
|
|
380
|
+
String path = uri.getPath();
|
|
381
|
+
if (path != null && (path.startsWith("//") || path.indexOf('\\') >= 0)) {
|
|
382
|
+
return false;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
return true;
|
|
386
|
+
}
|
|
387
|
+
|
|
306
388
|
private class SchemaLSInput implements LSInput
|
|
307
389
|
{
|
|
308
390
|
protected String fPublicId;
|
data/ext/nokogiri/nokogiri.c
CHANGED
|
@@ -203,9 +203,6 @@ Init_nokogiri(void)
|
|
|
203
203
|
rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
|
|
204
204
|
rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
|
|
205
205
|
|
|
206
|
-
rb_const_set(mNokogiri, rb_intern("LIBXML_ZLIB_ENABLED"),
|
|
207
|
-
xmlHasFeature(XML_WITH_ZLIB) == 1 ? Qtrue : Qfalse);
|
|
208
|
-
|
|
209
206
|
#ifdef NOKOGIRI_PACKAGED_LIBRARIES
|
|
210
207
|
rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
|
|
211
208
|
# ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
|
|
@@ -228,6 +225,12 @@ Init_nokogiri(void)
|
|
|
228
225
|
rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
|
|
229
226
|
#endif
|
|
230
227
|
|
|
228
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_ZLIB_ENABLED"),
|
|
229
|
+
xmlHasFeature(XML_WITH_ZLIB) == 1 ? Qtrue : Qfalse);
|
|
230
|
+
|
|
231
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_HTTP_ENABLED"),
|
|
232
|
+
xmlHasFeature(XML_WITH_HTTP) == 1 ? Qtrue : Qfalse);
|
|
233
|
+
|
|
231
234
|
#ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS
|
|
232
235
|
rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
|
|
233
236
|
#endif
|
data/ext/nokogiri/xml_attr.c
CHANGED
|
@@ -10,37 +10,42 @@ VALUE cNokogiriXmlAttr;
|
|
|
10
10
|
* (e.g., a HTML boolean attribute).
|
|
11
11
|
*/
|
|
12
12
|
static VALUE
|
|
13
|
-
|
|
13
|
+
noko_xml_attr_set_value(VALUE self, VALUE content)
|
|
14
14
|
{
|
|
15
15
|
xmlAttrPtr attr;
|
|
16
|
-
xmlChar *value;
|
|
17
|
-
xmlNode *cur;
|
|
18
16
|
|
|
19
17
|
Noko_Node_Get_Struct(self, xmlAttr, attr);
|
|
20
18
|
|
|
21
|
-
|
|
22
|
-
|
|
19
|
+
{
|
|
20
|
+
/* Unlink and pin any wrapped children */
|
|
21
|
+
xmlNode *cur = attr->children;
|
|
22
|
+
xmlNode *next;
|
|
23
|
+
|
|
24
|
+
while (cur) {
|
|
25
|
+
next = cur->next;
|
|
26
|
+
if (cur->_private) {
|
|
27
|
+
xmlUnlinkNode(cur);
|
|
28
|
+
noko_xml_document_pin_node(cur);
|
|
29
|
+
}
|
|
30
|
+
cur = next;
|
|
31
|
+
}
|
|
23
32
|
}
|
|
24
|
-
attr->children = attr->last = NULL;
|
|
25
33
|
|
|
26
34
|
if (content == Qnil) {
|
|
27
|
-
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
value = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValueCStr(content));
|
|
31
|
-
if (xmlStrlen(value) == 0) {
|
|
32
|
-
attr->children = xmlNewDocText(attr->doc, value);
|
|
35
|
+
xmlNodeSetContent((xmlNodePtr)attr, NULL); /* Clear any remaining unwrapped children. */
|
|
33
36
|
} else {
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
xmlChar *value = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValueCStr(content));
|
|
38
|
+
|
|
39
|
+
if (xmlStrlen(value) == 0) {
|
|
40
|
+
xmlNodeSetContent((xmlNodePtr)attr, NULL); /* Clear any remaining unwrapped children. */
|
|
37
41
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
attr
|
|
42
|
+
/* Preserve empty-string attributes as `foo=""` and not boolean `foo` */
|
|
43
|
+
attr->children = attr->last = xmlNewDocText(attr->doc, value);
|
|
44
|
+
attr->children->parent = (xmlNode *)attr;
|
|
45
|
+
} else {
|
|
46
|
+
xmlNodeSetContent((xmlNodePtr)attr, value);
|
|
43
47
|
}
|
|
48
|
+
xmlFree(value);
|
|
44
49
|
}
|
|
45
50
|
|
|
46
51
|
return content;
|
|
@@ -53,7 +58,7 @@ set_value(VALUE self, VALUE content)
|
|
|
53
58
|
* Create a new Attr element on the +document+ with +name+
|
|
54
59
|
*/
|
|
55
60
|
static VALUE
|
|
56
|
-
|
|
61
|
+
noko_xml_attr__new(int argc, VALUE *argv, VALUE klass)
|
|
57
62
|
{
|
|
58
63
|
xmlDocPtr xml_doc;
|
|
59
64
|
VALUE document;
|
|
@@ -97,7 +102,7 @@ noko_init_xml_attr(void)
|
|
|
97
102
|
*/
|
|
98
103
|
cNokogiriXmlAttr = rb_define_class_under(mNokogiriXml, "Attr", cNokogiriXmlNode);
|
|
99
104
|
|
|
100
|
-
rb_define_singleton_method(cNokogiriXmlAttr, "new",
|
|
105
|
+
rb_define_singleton_method(cNokogiriXmlAttr, "new", noko_xml_attr__new, -1);
|
|
101
106
|
|
|
102
|
-
rb_define_method(cNokogiriXmlAttr, "value=",
|
|
107
|
+
rb_define_method(cNokogiriXmlAttr, "value=", noko_xml_attr_set_value, 1);
|
|
103
108
|
}
|
data/ext/nokogiri/xml_document.c
CHANGED
|
@@ -255,12 +255,6 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
|
|
255
255
|
|
|
256
256
|
c_document = noko_xml_document_unwrap(self);
|
|
257
257
|
|
|
258
|
-
c_current_root = xmlDocGetRootElement(c_document);
|
|
259
|
-
if (c_current_root) {
|
|
260
|
-
xmlUnlinkNode(c_current_root);
|
|
261
|
-
noko_xml_document_pin_node(c_current_root);
|
|
262
|
-
}
|
|
263
|
-
|
|
264
258
|
if (!NIL_P(rb_new_root)) {
|
|
265
259
|
if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
|
|
266
260
|
rb_raise(rb_eArgError,
|
|
@@ -270,13 +264,23 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
|
|
270
264
|
|
|
271
265
|
Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
|
|
272
266
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
267
|
+
if (c_new_root->type != XML_ELEMENT_NODE) {
|
|
268
|
+
rb_raise(rb_eTypeError, "root must be a Nokogiri::XML::Element");
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
c_current_root = xmlDocGetRootElement(c_document);
|
|
273
|
+
if (c_current_root) {
|
|
274
|
+
xmlUnlinkNode(c_current_root);
|
|
275
|
+
noko_xml_document_pin_node(c_current_root);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/* If the new root's document is not the same as the current document,
|
|
279
|
+
* then we need to dup the node in to this document. */
|
|
280
|
+
if (c_new_root && c_new_root->doc != c_document) {
|
|
281
|
+
c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
|
|
282
|
+
if (!c_new_root) {
|
|
283
|
+
rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
|
|
280
284
|
}
|
|
281
285
|
}
|
|
282
286
|
|
|
@@ -317,12 +321,13 @@ static VALUE
|
|
|
317
321
|
set_encoding(VALUE self, VALUE encoding)
|
|
318
322
|
{
|
|
319
323
|
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
|
324
|
+
xmlChar *new_encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
|
|
320
325
|
|
|
321
326
|
if (doc->encoding) {
|
|
322
327
|
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
|
|
323
328
|
}
|
|
324
329
|
|
|
325
|
-
doc->encoding =
|
|
330
|
+
doc->encoding = new_encoding;
|
|
326
331
|
|
|
327
332
|
return encoding;
|
|
328
333
|
}
|
|
@@ -708,6 +713,9 @@ noko_xml_document_unwrap(VALUE rb_document)
|
|
|
708
713
|
{
|
|
709
714
|
xmlDocPtr c_document;
|
|
710
715
|
TypedData_Get_Struct(rb_document, xmlDoc, &xml_doc_type, c_document);
|
|
716
|
+
if (c_document == NULL) {
|
|
717
|
+
rb_raise(rb_eRuntimeError, "Uninitialized %" PRIsVALUE " struct (null data pointer)", rb_obj_class(rb_document));
|
|
718
|
+
}
|
|
711
719
|
return c_document;
|
|
712
720
|
}
|
|
713
721
|
|
data/ext/nokogiri/xml_node.c
CHANGED
|
@@ -971,6 +971,10 @@ rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_le
|
|
|
971
971
|
xmlDocPtr c_new_parent_doc;
|
|
972
972
|
VALUE rb_node_cache;
|
|
973
973
|
|
|
974
|
+
if (!rb_obj_is_kind_of(rb_other, cNokogiriXmlNode)) {
|
|
975
|
+
rb_raise(rb_eTypeError, "argument must be a kind of Nokogiri::XML::Node");
|
|
976
|
+
}
|
|
977
|
+
|
|
974
978
|
Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
|
|
975
979
|
c_level = (int)NUM2INT(rb_level);
|
|
976
980
|
c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
|
|
@@ -2150,25 +2154,20 @@ compare(VALUE self, VALUE _other)
|
|
|
2150
2154
|
|
|
2151
2155
|
|
|
2152
2156
|
/*
|
|
2153
|
-
*
|
|
2154
|
-
*
|
|
2155
|
-
*
|
|
2156
|
-
* Loads and substitutes all xinclude elements below the node. The
|
|
2157
|
-
* parser context will be initialized with +flags+.
|
|
2157
|
+
* Run XInclude substitution over the tree rooted at +c_node+, with the parser context initialized
|
|
2158
|
+
* from +c_flags+. Collects libxml2's structured errors and raises Nokogiri::XML::SyntaxError (or
|
|
2159
|
+
* RuntimeError) on failure.
|
|
2158
2160
|
*/
|
|
2159
|
-
static
|
|
2160
|
-
|
|
2161
|
+
static void
|
|
2162
|
+
_noko_xml_node_process_xinclude_subtree(xmlNodePtr c_node, int c_flags)
|
|
2161
2163
|
{
|
|
2162
2164
|
int status ;
|
|
2163
|
-
xmlNodePtr c_node;
|
|
2164
2165
|
VALUE rb_errors = rb_ary_new();
|
|
2165
2166
|
libxmlStructuredErrorHandlerState handler_state;
|
|
2166
2167
|
|
|
2167
|
-
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
|
2168
|
-
|
|
2169
2168
|
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
|
2170
2169
|
|
|
2171
|
-
status = xmlXIncludeProcessTreeFlags(c_node,
|
|
2170
|
+
status = xmlXIncludeProcessTreeFlags(c_node, c_flags);
|
|
2172
2171
|
|
|
2173
2172
|
noko__structured_error_func_restore(&handler_state);
|
|
2174
2173
|
|
|
@@ -2181,11 +2180,77 @@ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
|
|
|
2181
2180
|
rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
|
|
2182
2181
|
}
|
|
2183
2182
|
}
|
|
2183
|
+
}
|
|
2184
|
+
|
|
2185
|
+
|
|
2186
|
+
/*
|
|
2187
|
+
* Whether +c_node+ is an <xi:include> element in either the 2001 or 2003 XInclude namespace.
|
|
2188
|
+
*/
|
|
2189
|
+
static int
|
|
2190
|
+
_noko_xml_node_xinclude_element_p(xmlNodePtr c_node)
|
|
2191
|
+
{
|
|
2192
|
+
return c_node->type == XML_ELEMENT_NODE
|
|
2193
|
+
&& xmlStrEqual(c_node->name, XINCLUDE_NODE)
|
|
2194
|
+
&& c_node->ns != NULL
|
|
2195
|
+
&& (xmlStrEqual(c_node->ns->href, XINCLUDE_NS) || xmlStrEqual(c_node->ns->href, XINCLUDE_OLD_NS));
|
|
2196
|
+
}
|
|
2197
|
+
|
|
2198
|
+
|
|
2199
|
+
/*
|
|
2200
|
+
* call-seq:
|
|
2201
|
+
* process_xincludes(flags)
|
|
2202
|
+
*
|
|
2203
|
+
* Loads and substitutes all xinclude elements below the node. The
|
|
2204
|
+
* parser context will be initialized with +flags+.
|
|
2205
|
+
*/
|
|
2206
|
+
static VALUE
|
|
2207
|
+
noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
|
|
2208
|
+
{
|
|
2209
|
+
xmlNodePtr c_node;
|
|
2210
|
+
|
|
2211
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
|
2212
|
+
|
|
2213
|
+
if (c_node->parent == NULL && _noko_xml_node_xinclude_element_p(c_node)) {
|
|
2214
|
+
rb_raise(rb_eRuntimeError, "cannot process XInclude on an unlinked <xi:include> node");
|
|
2215
|
+
}
|
|
2216
|
+
|
|
2217
|
+
_noko_xml_node_process_xinclude_subtree(c_node, (int)NUM2INT(rb_flags));
|
|
2184
2218
|
|
|
2185
2219
|
return rb_node;
|
|
2186
2220
|
}
|
|
2187
2221
|
|
|
2188
2222
|
|
|
2223
|
+
/*
|
|
2224
|
+
* Process this single <xi:include> node, substituting an unwrapped copy of it in its place so
|
|
2225
|
+
* that libxml2 frees the copy. This node is unlinked and pinned to the document, so any Ruby
|
|
2226
|
+
* wrapper for it (or for its descendants or namespaces) keeps pointing at valid memory. The
|
|
2227
|
+
* parser context is initialized with +flags+.
|
|
2228
|
+
*/
|
|
2229
|
+
static VALUE
|
|
2230
|
+
noko_xml_node__safe_process_xinclude(VALUE rb_node, VALUE rb_flags)
|
|
2231
|
+
{
|
|
2232
|
+
xmlNodePtr c_node, c_copy;
|
|
2233
|
+
|
|
2234
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
|
2235
|
+
|
|
2236
|
+
if (c_node->parent == NULL) {
|
|
2237
|
+
rb_raise(rb_eRuntimeError, "cannot process XInclude on an unlinked <xi:include> node");
|
|
2238
|
+
}
|
|
2239
|
+
|
|
2240
|
+
c_copy = xmlDocCopyNode(c_node, c_node->doc, 1);
|
|
2241
|
+
if (c_copy == NULL) {
|
|
2242
|
+
rb_raise(rb_eRuntimeError, "Could not copy node for xinclude substitution");
|
|
2243
|
+
}
|
|
2244
|
+
|
|
2245
|
+
xmlReplaceNode(c_node, c_copy);
|
|
2246
|
+
noko_xml_document_pin_node(c_node);
|
|
2247
|
+
|
|
2248
|
+
_noko_xml_node_process_xinclude_subtree(c_copy, (int)NUM2INT(rb_flags));
|
|
2249
|
+
|
|
2250
|
+
return Qnil;
|
|
2251
|
+
}
|
|
2252
|
+
|
|
2253
|
+
|
|
2189
2254
|
/* TODO: DOCUMENT ME */
|
|
2190
2255
|
static VALUE
|
|
2191
2256
|
in_context(VALUE self, VALUE _str, VALUE _options)
|
|
@@ -2286,9 +2351,7 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
|
2286
2351
|
VALUE
|
|
2287
2352
|
rb_xml_node_data_ptr_eh(VALUE self)
|
|
2288
2353
|
{
|
|
2289
|
-
|
|
2290
|
-
Noko_Node_Get_Struct(self, xmlNode, c_node);
|
|
2291
|
-
return c_node ? Qtrue : Qfalse;
|
|
2354
|
+
return DATA_PTR(self) ? Qtrue : Qfalse;
|
|
2292
2355
|
}
|
|
2293
2356
|
|
|
2294
2357
|
VALUE
|
|
@@ -2438,6 +2501,7 @@ noko_init_xml_node(void)
|
|
|
2438
2501
|
rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
|
|
2439
2502
|
|
|
2440
2503
|
rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
|
|
2504
|
+
rb_define_protected_method(cNokogiriXmlNode, "safe_process_xinclude", noko_xml_node__safe_process_xinclude, 1);
|
|
2441
2505
|
|
|
2442
2506
|
rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
|
|
2443
2507
|
rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
|
data/ext/nokogiri/xml_node_set.c
CHANGED
|
@@ -304,7 +304,7 @@ index_at(VALUE rb_self, long offset)
|
|
|
304
304
|
|
|
305
305
|
TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
|
|
306
306
|
|
|
307
|
-
if (offset >= c_self->nodeNr ||
|
|
307
|
+
if (offset >= c_self->nodeNr || offset < -c_self->nodeNr) {
|
|
308
308
|
return Qnil;
|
|
309
309
|
}
|
|
310
310
|
|
|
@@ -11,6 +11,15 @@ static const xmlChar *NOKOGIRI_URI = (const xmlChar *)"http://www.nokogiri.org/d
|
|
|
11
11
|
static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
|
|
12
12
|
static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
|
|
13
13
|
|
|
14
|
+
static void
|
|
15
|
+
_noko_xml_xpath_context_dmark(void *data)
|
|
16
|
+
{
|
|
17
|
+
xmlXPathContextPtr c_context = data;
|
|
18
|
+
if (c_context->doc && DOC_RUBY_OBJECT_TEST(c_context->doc)) {
|
|
19
|
+
rb_gc_mark(DOC_RUBY_OBJECT(c_context->doc));
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
14
23
|
static void
|
|
15
24
|
_noko_xml_xpath_context_dfree(void *data)
|
|
16
25
|
{
|
|
@@ -21,9 +30,10 @@ _noko_xml_xpath_context_dfree(void *data)
|
|
|
21
30
|
static const rb_data_type_t _noko_xml_xpath_context_type = {
|
|
22
31
|
.wrap_struct_name = "xmlXPathContext",
|
|
23
32
|
.function = {
|
|
33
|
+
.dmark = _noko_xml_xpath_context_dmark,
|
|
24
34
|
.dfree = _noko_xml_xpath_context_dfree,
|
|
25
35
|
},
|
|
26
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
|
36
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
27
37
|
};
|
|
28
38
|
|
|
29
39
|
/* find a CSS class in an HTML element's `class` attribute */
|
data/lib/nokogiri/nokogiri.jar
CHANGED
|
Binary file
|
|
@@ -53,6 +53,14 @@ module Nokogiri
|
|
|
53
53
|
defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
|
|
54
54
|
end
|
|
55
55
|
|
|
56
|
+
def libxml2_has_zlib?
|
|
57
|
+
defined?(Nokogiri::LIBXML_ZLIB_ENABLED) && Nokogiri::LIBXML_ZLIB_ENABLED
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def libxml2_has_http?
|
|
61
|
+
defined?(Nokogiri::LIBXML_HTTP_ENABLED) && Nokogiri::LIBXML_HTTP_ENABLED
|
|
62
|
+
end
|
|
63
|
+
|
|
56
64
|
def libxslt_has_datetime?
|
|
57
65
|
defined?(Nokogiri::LIBXSLT_DATETIME_ENABLED) && Nokogiri::LIBXSLT_DATETIME_ENABLED
|
|
58
66
|
end
|
|
@@ -145,6 +153,8 @@ module Nokogiri
|
|
|
145
153
|
end
|
|
146
154
|
libxml["memory_management"] = Nokogiri::LIBXML_MEMORY_MANAGEMENT
|
|
147
155
|
libxml["iconv_enabled"] = libxml2_has_iconv?
|
|
156
|
+
libxml["zlib_enabled"] = libxml2_has_zlib?
|
|
157
|
+
libxml["http_enabled"] = libxml2_has_http?
|
|
148
158
|
libxml["compiled"] = compiled_libxml_version.to_s
|
|
149
159
|
libxml["loaded"] = loaded_libxml_version.to_s
|
|
150
160
|
end
|
|
@@ -85,8 +85,9 @@ module Nokogiri
|
|
|
85
85
|
read_memory(string_or_io, url, encoding, options.to_i)
|
|
86
86
|
end
|
|
87
87
|
|
|
88
|
-
# do xinclude processing
|
|
89
|
-
|
|
88
|
+
# do xinclude processing; the document is freshly parsed and unexposed to Ruby, so the
|
|
89
|
+
# defensive copy is unnecessary
|
|
90
|
+
doc.do_xinclude(options, safe_copy: false) if options.xinclude?
|
|
90
91
|
|
|
91
92
|
doc
|
|
92
93
|
end
|
data/lib/nokogiri/xml/node.rb
CHANGED
|
@@ -523,16 +523,67 @@ module Nokogiri
|
|
|
523
523
|
set_namespace(ns)
|
|
524
524
|
end
|
|
525
525
|
|
|
526
|
+
XINCLUDE_NAMESPACES = {
|
|
527
|
+
"xi2001" => "http://www.w3.org/2001/XInclude",
|
|
528
|
+
"xi2003" => "http://www.w3.org/2003/XInclude",
|
|
529
|
+
}.freeze
|
|
530
|
+
private_constant :XINCLUDE_NAMESPACES
|
|
531
|
+
|
|
532
|
+
# Every top-level <xi:include> in the subtree, in either XInclude namespace, excluding
|
|
533
|
+
# includes nested inside another include's fallback (libxml2 only expands those if the
|
|
534
|
+
# parent include fails).
|
|
535
|
+
XINCLUDE_QUERY =
|
|
536
|
+
"descendant-or-self::xi2001:include[not(ancestor::xi2001:include) and not(ancestor::xi2003:include)] | " \
|
|
537
|
+
"descendant-or-self::xi2003:include[not(ancestor::xi2001:include) and not(ancestor::xi2003:include)]"
|
|
538
|
+
private_constant :XINCLUDE_QUERY
|
|
539
|
+
|
|
526
540
|
###
|
|
527
|
-
#
|
|
528
|
-
#
|
|
529
|
-
#
|
|
530
|
-
|
|
541
|
+
# :call-seq:
|
|
542
|
+
# do_xinclude(options = ParseOptions::DEFAULT_XML, safe_copy: true) → self
|
|
543
|
+
# do_xinclude(options = ParseOptions::DEFAULT_XML, safe_copy: true) { |options| ... } → self
|
|
544
|
+
#
|
|
545
|
+
# Do XInclude substitution on the subtree below this node, replacing each +<xi:include>+ with
|
|
546
|
+
# the content it references.
|
|
547
|
+
#
|
|
548
|
+
# [Parameters]
|
|
549
|
+
# - +options+ (Nokogiri::XML::ParseOptions) The parser options for the substitution. (default
|
|
550
|
+
# +ParseOptions::DEFAULT_XML+)
|
|
551
|
+
#
|
|
552
|
+
# [Optional Keyword Arguments]
|
|
553
|
+
# - +safe_copy:+ (Boolean) Operate on a defensive copy of each +<xi:include>+ element, to
|
|
554
|
+
# prevent libxml2 from freeing memory that is bound to live Ruby objects. (default +true+)
|
|
555
|
+
#
|
|
556
|
+
# When +true+, each +<xi:include>+ is processed on an unwrapped copy of itself, so libxml2
|
|
557
|
+
# frees the copy while the original node is unlinked from the document and kept alive. This
|
|
558
|
+
# prevents a use-after-free when the +<xi:include>+ node, or any of its descendants or
|
|
559
|
+
# namespaces, has already been exposed to Ruby; as a consequence such a wrapped node ends up
|
|
560
|
+
# detached from the document rather than removed or converted in place.
|
|
561
|
+
#
|
|
562
|
+
# When +false+, the document is processed in place. This is faster but only safe when nothing
|
|
563
|
+
# in the subtree has been exposed to Ruby (for example, immediately after parsing), which is
|
|
564
|
+
# why Document.parse uses it.
|
|
565
|
+
#
|
|
566
|
+
# This option has no effect on the pure-Java backend, which performs XInclude substitution
|
|
567
|
+
# during parsing.
|
|
568
|
+
#
|
|
569
|
+
# [Yields]
|
|
570
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object initialized from +options+ is
|
|
571
|
+
# yielded to it, which can be configured before substitution.
|
|
572
|
+
#
|
|
573
|
+
# [Returns] +self+ (Nokogiri::XML::Node)
|
|
574
|
+
def do_xinclude(options = XML::ParseOptions::DEFAULT_XML, safe_copy: true)
|
|
531
575
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
|
532
576
|
yield options if block_given?
|
|
533
577
|
|
|
534
|
-
|
|
535
|
-
|
|
578
|
+
if safe_copy && Nokogiri.uses_libxml?
|
|
579
|
+
xpath(XINCLUDE_QUERY, XINCLUDE_NAMESPACES).each do |include_node|
|
|
580
|
+
include_node.safe_process_xinclude(options.to_i)
|
|
581
|
+
end
|
|
582
|
+
else
|
|
583
|
+
process_xincludes(options.to_i)
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
self
|
|
536
587
|
end
|
|
537
588
|
|
|
538
589
|
alias_method :next, :next_sibling
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: nokogiri
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.19.
|
|
4
|
+
version: 1.19.4
|
|
5
5
|
platform: java
|
|
6
6
|
authors:
|
|
7
7
|
- Mike Dalessio
|
|
@@ -19,7 +19,7 @@ authors:
|
|
|
19
19
|
- Nobuyoshi Nakada
|
|
20
20
|
bindir: bin
|
|
21
21
|
cert_chain: []
|
|
22
|
-
date: 2026-
|
|
22
|
+
date: 2026-06-18 00:00:00.000000000 Z
|
|
23
23
|
dependencies:
|
|
24
24
|
- !ruby/object:Gem::Dependency
|
|
25
25
|
name: jar-dependencies
|