nokogiri 1.13.8 → 1.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +18 -11
  6. data/dependencies.yml +33 -15
  7. data/ext/nokogiri/extconf.rb +164 -46
  8. data/ext/nokogiri/gumbo.c +20 -10
  9. data/ext/nokogiri/html4_document.c +3 -4
  10. data/ext/nokogiri/html4_element_description.c +20 -15
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +11 -22
  13. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  14. data/ext/nokogiri/nokogiri.c +84 -75
  15. data/ext/nokogiri/nokogiri.h +31 -16
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +2 -2
  18. data/ext/nokogiri/xml_attribute_decl.c +2 -2
  19. data/ext/nokogiri/xml_cdata.c +32 -18
  20. data/ext/nokogiri/xml_comment.c +2 -2
  21. data/ext/nokogiri/xml_document.c +127 -34
  22. data/ext/nokogiri/xml_document_fragment.c +2 -2
  23. data/ext/nokogiri/xml_dtd.c +2 -2
  24. data/ext/nokogiri/xml_element_content.c +34 -31
  25. data/ext/nokogiri/xml_element_decl.c +7 -7
  26. data/ext/nokogiri/xml_encoding_handler.c +15 -7
  27. data/ext/nokogiri/xml_entity_decl.c +1 -1
  28. data/ext/nokogiri/xml_entity_reference.c +2 -2
  29. data/ext/nokogiri/xml_namespace.c +79 -14
  30. data/ext/nokogiri/xml_node.c +300 -34
  31. data/ext/nokogiri/xml_node_set.c +125 -107
  32. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  33. data/ext/nokogiri/xml_reader.c +81 -48
  34. data/ext/nokogiri/xml_relax_ng.c +66 -81
  35. data/ext/nokogiri/xml_sax_parser.c +45 -20
  36. data/ext/nokogiri/xml_sax_parser_context.c +46 -30
  37. data/ext/nokogiri/xml_sax_push_parser.c +30 -11
  38. data/ext/nokogiri/xml_schema.c +95 -117
  39. data/ext/nokogiri/xml_syntax_error.c +1 -1
  40. data/ext/nokogiri/xml_text.c +28 -14
  41. data/ext/nokogiri/xml_xpath_context.c +216 -136
  42. data/ext/nokogiri/xslt_stylesheet.c +118 -64
  43. data/gumbo-parser/Makefile +10 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +10 -6
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +15 -16
  48. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +21 -5
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/css/node.rb +2 -2
  69. data/lib/nokogiri/css/xpath_visitor.rb +7 -5
  70. data/lib/nokogiri/css.rb +6 -0
  71. data/lib/nokogiri/decorators/slop.rb +1 -1
  72. data/lib/nokogiri/encoding_handler.rb +57 -0
  73. data/lib/nokogiri/extension.rb +4 -3
  74. data/lib/nokogiri/html4/document.rb +2 -121
  75. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  76. data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
  77. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  78. data/lib/nokogiri/html4.rb +1 -0
  79. data/lib/nokogiri/html5/document.rb +113 -36
  80. data/lib/nokogiri/html5/document_fragment.rb +10 -3
  81. data/lib/nokogiri/html5/node.rb +8 -5
  82. data/lib/nokogiri/html5.rb +130 -216
  83. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  84. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  85. data/lib/nokogiri/version/constant.rb +1 -1
  86. data/lib/nokogiri/version/info.rb +11 -10
  87. data/lib/nokogiri/xml/attr.rb +49 -0
  88. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  89. data/lib/nokogiri/xml/builder.rb +1 -1
  90. data/lib/nokogiri/xml/document.rb +102 -55
  91. data/lib/nokogiri/xml/document_fragment.rb +50 -7
  92. data/lib/nokogiri/xml/element_content.rb +10 -2
  93. data/lib/nokogiri/xml/element_decl.rb +4 -2
  94. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  95. data/lib/nokogiri/xml/namespace.rb +42 -0
  96. data/lib/nokogiri/xml/node/save_options.rb +14 -4
  97. data/lib/nokogiri/xml/node.rb +212 -48
  98. data/lib/nokogiri/xml/node_set.rb +88 -9
  99. data/lib/nokogiri/xml/parse_options.rb +129 -50
  100. data/lib/nokogiri/xml/pp/node.rb +28 -15
  101. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  102. data/lib/nokogiri/xml/sax/document.rb +1 -1
  103. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  104. data/lib/nokogiri/xml/searchable.rb +18 -10
  105. data/lib/nokogiri/xslt.rb +74 -4
  106. data/lib/nokogiri.rb +15 -15
  107. data/lib/xsd/xmlparser/nokogiri.rb +4 -2
  108. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  109. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  110. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  111. data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
  112. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  113. metadata +19 -242
  114. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  115. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  116. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  117. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  118. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  119. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  120. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -1,6 +1,8 @@
1
1
  #ifndef NOKOGIRI_NATIVE
2
2
  #define NOKOGIRI_NATIVE
3
3
 
4
+ #include <ruby/defines.h> // https://github.com/sparklemotion/nokogiri/issues/2696
5
+
4
6
  #ifdef _MSC_VER
5
7
  # ifndef WIN32_LEAN_AND_MEAN
6
8
  # define WIN32_LEAN_AND_MEAN
@@ -23,7 +25,6 @@
23
25
  # define NOKOPUBVAR extern
24
26
  #endif
25
27
 
26
-
27
28
  #include <stdlib.h>
28
29
  #include <string.h>
29
30
  #include <assert.h>
@@ -50,6 +51,7 @@
50
51
  #include <libxslt/xsltconfig.h>
51
52
  #include <libxslt/xsltutils.h>
52
53
  #include <libxslt/transform.h>
54
+ #include <libxslt/imports.h>
53
55
  #include <libxslt/xsltInternals.h>
54
56
 
55
57
  #include <libexslt/exslt.h>
@@ -75,22 +77,25 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
75
77
  #define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
76
78
  #define RBSTR_OR_QNIL(_str) (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
77
79
 
78
- #ifdef DEBUG
79
- # define NOKOGIRI_DEBUG_START(p) if (getenv("NOKOGIRI_NO_FREE")) return ; if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p start\n", __FILE__, __LINE__, p);
80
- # define NOKOGIRI_DEBUG_END(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p end\n", __FILE__, __LINE__, p);
81
- #else
82
- # define NOKOGIRI_DEBUG_START(p)
83
- # define NOKOGIRI_DEBUG_END(p)
80
+ #ifndef NORETURN_DECL
81
+ # if defined(__GNUC__)
82
+ # define NORETURN_DECL __attribute__ ((noreturn))
83
+ # else
84
+ # define NORETURN_DECL
85
+ # endif
84
86
  #endif
85
87
 
86
- #ifndef NORETURN
88
+ #ifndef PRINTFLIKE_DECL
87
89
  # if defined(__GNUC__)
88
- # define NORETURN(name) __attribute__((noreturn)) name
90
+ # define PRINTFLIKE_DECL(stringidx, argidx) __attribute__ ((format(printf,stringidx,argidx)))
89
91
  # else
90
- # define NORETURN(name) name
92
+ # define PRINTFLIKE_DECL(stringidx, argidx)
91
93
  # endif
92
94
  #endif
93
95
 
96
+ #if defined(TRUFFLERUBY) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
97
+ # define TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
98
+ #endif
94
99
 
95
100
  NOKOPUBVAR VALUE mNokogiri ;
96
101
  NOKOPUBVAR VALUE mNokogiriGumbo ;
@@ -162,15 +167,16 @@ typedef struct _nokogiriXsltStylesheetTuple {
162
167
  VALUE func_instances;
163
168
  } nokogiriXsltStylesheetTuple;
164
169
 
165
- int vasprintf(char **strp, const char *fmt, va_list ap);
166
170
  void noko_xml_document_pin_node(xmlNodePtr);
167
171
  void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
172
+ int noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document);
168
173
 
169
174
  int noko_io_read(void *ctx, char *buffer, int len);
170
175
  int noko_io_write(void *ctx, char *buffer, int len);
171
176
  int noko_io_close(void *ctx);
172
177
 
173
178
  #define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
179
+ #define Noko_Namespace_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
174
180
 
175
181
  VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ;
176
182
  VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ;
@@ -182,12 +188,21 @@ VALUE noko_xml_namespace_wrap_xpath_copy(xmlNsPtr node);
182
188
  VALUE noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element);
183
189
 
184
190
  VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
191
+ xmlNodeSetPtr noko_xml_node_set_unwrap(VALUE rb_node_set) ;
185
192
 
186
193
  VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
187
194
  VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
195
+ xmlDocPtr noko_xml_document_unwrap(VALUE rb_document);
188
196
  NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
189
197
  xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
190
198
 
199
+ xmlSAXHandlerPtr noko_sax_handler_unwrap(VALUE rb_sax_handler);
200
+
201
+ xmlParserCtxtPtr noko_xml_sax_push_parser_unwrap(VALUE rb_parser);
202
+
203
+ VALUE noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context);
204
+ xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context);
205
+
191
206
  #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
192
207
  #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
193
208
  #define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
@@ -197,15 +212,15 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
197
212
  #define NOKOGIRI_SAX_SELF(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->self
198
213
  #define NOKOGIRI_SAX_CTXT(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->ctxt
199
214
  #define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self)
200
- #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) free(_tuple)
215
+ #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) ruby_xfree(_tuple)
201
216
 
202
217
  #define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
203
218
  #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
204
219
 
205
220
  #if HAVE_RB_CATEGORY_WARNING
206
- # define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
221
+ # define NOKO_WARN_DEPRECATION(message...) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
207
222
  #else
208
- # define NOKO_WARN_DEPRECATION(message) rb_warning(message)
223
+ # define NOKO_WARN_DEPRECATION(message...) rb_warning(message)
209
224
  #endif
210
225
 
211
226
  void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
@@ -214,7 +229,7 @@ void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerSta
214
229
  void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
215
230
  VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
216
231
  void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
217
- NORETURN(void Nokogiri_error_raise(void *ctx, xmlErrorPtr error));
232
+ NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorPtr error);
218
233
  void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
219
234
  const char *function_name) ;
220
235
 
@@ -222,7 +237,7 @@ static inline
222
237
  nokogiriSAXTuplePtr
223
238
  nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self)
224
239
  {
225
- nokogiriSAXTuplePtr tuple = malloc(sizeof(nokogiriSAXTuple));
240
+ nokogiriSAXTuplePtr tuple = ruby_xmalloc(sizeof(nokogiriSAXTuple));
226
241
  tuple->self = self;
227
242
  tuple->ctxt = ctxt;
228
243
  return tuple;
@@ -32,7 +32,7 @@ rb_foreign_error_handler(VALUE klass)
32
32
  * Do NOT use this outside of the Nokogiri test suite.
33
33
  */
34
34
  void
35
- noko_init_test_global_handlers()
35
+ noko_init_test_global_handlers(void)
36
36
  {
37
37
  VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test");
38
38
 
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
68
68
  rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
69
69
  }
70
70
 
71
- Noko_Node_Get_Struct(document, xmlDoc, xml_doc);
71
+ xml_doc = noko_xml_document_unwrap(document);
72
72
 
73
73
  node = xmlNewDocProp(
74
74
  xml_doc,
@@ -89,7 +89,7 @@ new (int argc, VALUE *argv, VALUE klass)
89
89
  }
90
90
 
91
91
  void
92
- noko_init_xml_attr()
92
+ noko_init_xml_attr(void)
93
93
  {
94
94
  assert(cNokogiriXmlNode);
95
95
  /*
@@ -13,7 +13,7 @@ attribute_type(VALUE self)
13
13
  {
14
14
  xmlAttributePtr node;
15
15
  Noko_Node_Get_Struct(self, xmlAttribute, node);
16
- return INT2NUM((long)node->atype);
16
+ return INT2NUM(node->atype);
17
17
  }
18
18
 
19
19
  /*
@@ -59,7 +59,7 @@ enumeration(VALUE self)
59
59
  }
60
60
 
61
61
  void
62
- noko_init_xml_attribute_decl()
62
+ noko_init_xml_attribute_decl(void)
63
63
  {
64
64
  assert(cNokogiriXmlNode);
65
65
  cNokogiriXmlAttributeDecl = rb_define_class_under(mNokogiriXml, "AttributeDecl", cNokogiriXmlNode);
@@ -12,31 +12,45 @@ VALUE cNokogiriXmlCData;
12
12
  * raise a TypeError exception.
13
13
  */
14
14
  static VALUE
15
- new (int argc, VALUE *argv, VALUE klass)
15
+ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
16
16
  {
17
- xmlDocPtr xml_doc;
18
- xmlNodePtr node;
19
- VALUE doc;
20
- VALUE content;
21
- VALUE rest;
17
+ xmlDocPtr c_document;
18
+ xmlNodePtr c_node;
19
+ VALUE rb_document;
20
+ VALUE rb_content;
21
+ VALUE rb_rest;
22
22
  VALUE rb_node;
23
- xmlChar *content_str = NULL;
24
- int content_str_len = 0;
23
+ xmlChar *c_content = NULL;
24
+ int c_content_len = 0;
25
25
 
26
- rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
26
+ rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
27
27
 
28
- Noko_Node_Get_Struct(doc, xmlDoc, xml_doc);
28
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
29
+ rb_raise(rb_eTypeError,
30
+ "expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
31
+ rb_obj_class(rb_document));
32
+ }
33
+
34
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
35
+ xmlNodePtr deprecated_node_type_arg;
36
+ // TODO: deprecate allowing Node
37
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
38
+ Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
39
+ c_document = deprecated_node_type_arg->doc;
40
+ } else {
41
+ c_document = noko_xml_document_unwrap(rb_document);
42
+ }
29
43
 
30
- if (!NIL_P(content)) {
31
- content_str = (xmlChar *)StringValuePtr(content);
32
- content_str_len = RSTRING_LEN(content);
44
+ if (!NIL_P(rb_content)) {
45
+ c_content = (xmlChar *)StringValuePtr(rb_content);
46
+ c_content_len = RSTRING_LENINT(rb_content);
33
47
  }
34
48
 
35
- node = xmlNewCDataBlock(xml_doc->doc, content_str, content_str_len);
49
+ c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
36
50
 
37
- noko_xml_document_pin_node(node);
51
+ noko_xml_document_pin_node(c_node);
38
52
 
39
- rb_node = noko_xml_node_wrap(klass, node);
53
+ rb_node = noko_xml_node_wrap(klass, c_node);
40
54
  rb_obj_call_init(rb_node, argc, argv);
41
55
 
42
56
  if (rb_block_given_p()) { rb_yield(rb_node); }
@@ -45,7 +59,7 @@ new (int argc, VALUE *argv, VALUE klass)
45
59
  }
46
60
 
47
61
  void
48
- noko_init_xml_cdata()
62
+ noko_init_xml_cdata(void)
49
63
  {
50
64
  assert(cNokogiriXmlText);
51
65
  /*
@@ -53,5 +67,5 @@ noko_init_xml_cdata()
53
67
  */
54
68
  cNokogiriXmlCData = rb_define_class_under(mNokogiriXml, "CDATA", cNokogiriXmlText);
55
69
 
56
- rb_define_singleton_method(cNokogiriXmlCData, "new", new, -1);
70
+ rb_define_singleton_method(cNokogiriXmlCData, "new", rb_xml_cdata_s_new, -1);
57
71
  }
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
30
30
  rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
31
31
  }
32
32
 
33
- Data_Get_Struct(document, xmlDoc, xml_doc);
33
+ xml_doc = noko_xml_document_unwrap(document);
34
34
 
35
35
  node = xmlNewDocComment(
36
36
  xml_doc,
@@ -48,7 +48,7 @@ new (int argc, VALUE *argv, VALUE klass)
48
48
  }
49
49
 
50
50
  void
51
- noko_init_xml_comment()
51
+ noko_init_xml_comment(void)
52
52
  {
53
53
  assert(cNokogiriXmlCharacterData);
54
54
  /*
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
51
51
  }
52
52
 
53
53
  static void
54
- mark(xmlDocPtr doc)
54
+ mark(void *data)
55
55
  {
56
+ xmlDocPtr doc = (xmlDocPtr)data;
56
57
  nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
57
58
  if (tuple) {
58
59
  rb_gc_mark(tuple->doc);
@@ -61,33 +62,83 @@ mark(xmlDocPtr doc)
61
62
  }
62
63
 
63
64
  static void
64
- dealloc(xmlDocPtr doc)
65
+ dealloc(void *data)
65
66
  {
67
+ xmlDocPtr doc = (xmlDocPtr)data;
66
68
  st_table *node_hash;
67
69
 
68
- NOKOGIRI_DEBUG_START(doc);
69
-
70
70
  node_hash = DOC_UNLINKED_NODE_HASH(doc);
71
71
 
72
72
  st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
73
73
  st_free_table(node_hash);
74
74
 
75
- free(doc->_private);
75
+ ruby_xfree(doc->_private);
76
76
 
77
- /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
78
- * have their _private pointers cleared. This is to avoid libxml-ruby's
79
- * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
80
- * free context, which can result in segfaults.
77
+ #pragma GCC diagnostic push
78
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
79
+ /*
80
+ * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
81
+ * versions, the registered callback from libxml-ruby will access the _private pointers set by
82
+ * nokogiri, which will result in segfaults.
83
+ *
84
+ * To avoid this, we need to clear the _private pointers from all nodes in this document tree
85
+ * before that callback gets invoked.
86
+ *
87
+ * libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
88
+ * safeguard (though probably pairing with a runtime check on the libxml-ruby version).
81
89
  */
82
90
  if (xmlDeregisterNodeDefaultValue) {
83
91
  remove_private((xmlNodePtr)doc);
84
92
  }
93
+ #pragma GCC diagnostic pop
85
94
 
86
95
  xmlFreeDoc(doc);
96
+ }
97
+
98
+ static size_t
99
+ memsize_node(const xmlNodePtr node)
100
+ {
101
+ /* note we don't count namespace definitions, just going for a good-enough number here */
102
+ xmlNodePtr child;
103
+ xmlAttrPtr property;
104
+ size_t memsize = 0;
105
+
106
+ memsize += xmlStrlen(node->name);
107
+
108
+ if (node->type == XML_ELEMENT_NODE) {
109
+ for (property = node->properties; property; property = property->next) {
110
+ memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
111
+ }
112
+ }
113
+ if (node->type == XML_TEXT_NODE) {
114
+ memsize += xmlStrlen(node->content);
115
+ }
116
+ for (child = node->children; child; child = child->next) {
117
+ memsize += sizeof(xmlNode) + memsize_node(child);
118
+ }
119
+ return memsize;
120
+ }
87
121
 
88
- NOKOGIRI_DEBUG_END(doc);
122
+ static size_t
123
+ memsize(const void *data)
124
+ {
125
+ xmlDocPtr doc = (const xmlDocPtr)data;
126
+ size_t memsize = sizeof(xmlDoc);
127
+ /* This may not account for all memory use */
128
+ memsize += memsize_node((xmlNodePtr)doc);
129
+ return memsize;
89
130
  }
90
131
 
132
+ static const rb_data_type_t noko_xml_document_data_type = {
133
+ .wrap_struct_name = "Nokogiri::XML::Document",
134
+ .function = {
135
+ .dmark = mark,
136
+ .dfree = dealloc,
137
+ .dsize = memsize,
138
+ },
139
+ // .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
140
+ };
141
+
91
142
  static void
92
143
  recursively_remove_namespaces_from_node(xmlNodePtr node)
93
144
  {
@@ -104,7 +155,11 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
104
155
  (node->type == XML_XINCLUDE_START) ||
105
156
  (node->type == XML_XINCLUDE_END)) &&
106
157
  node->nsDef) {
107
- xmlFreeNsList(node->nsDef);
158
+ xmlNsPtr curr = node->nsDef;
159
+ while (curr) {
160
+ noko_xml_document_pin_namespace(curr, node->doc);
161
+ curr = curr->next;
162
+ }
108
163
  node->nsDef = NULL;
109
164
  }
110
165
 
@@ -126,8 +181,7 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
126
181
  static VALUE
127
182
  url(VALUE self)
128
183
  {
129
- xmlDocPtr doc;
130
- Data_Get_Struct(self, xmlDoc, doc);
184
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
131
185
 
132
186
  if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
133
187
 
@@ -146,7 +200,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
146
200
  xmlDocPtr c_document;
147
201
  xmlNodePtr c_new_root = NULL, c_current_root;
148
202
 
149
- Data_Get_Struct(self, xmlDoc, c_document);
203
+ c_document = noko_xml_document_unwrap(self);
150
204
 
151
205
  c_current_root = xmlDocGetRootElement(c_document);
152
206
  if (c_current_root) {
@@ -190,7 +244,7 @@ rb_xml_document_root(VALUE self)
190
244
  xmlDocPtr c_document;
191
245
  xmlNodePtr c_root;
192
246
 
193
- Data_Get_Struct(self, xmlDoc, c_document);
247
+ c_document = noko_xml_document_unwrap(self);
194
248
 
195
249
  c_root = xmlDocGetRootElement(c_document);
196
250
  if (!c_root) {
@@ -209,8 +263,7 @@ rb_xml_document_root(VALUE self)
209
263
  static VALUE
210
264
  set_encoding(VALUE self, VALUE encoding)
211
265
  {
212
- xmlDocPtr doc;
213
- Data_Get_Struct(self, xmlDoc, doc);
266
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
214
267
 
215
268
  if (doc->encoding) {
216
269
  xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
@@ -230,8 +283,7 @@ set_encoding(VALUE self, VALUE encoding)
230
283
  static VALUE
231
284
  encoding(VALUE self)
232
285
  {
233
- xmlDocPtr doc;
234
- Data_Get_Struct(self, xmlDoc, doc);
286
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
235
287
 
236
288
  if (!doc->encoding) { return Qnil; }
237
289
  return NOKOGIRI_STR_NEW2(doc->encoding);
@@ -246,8 +298,7 @@ encoding(VALUE self)
246
298
  static VALUE
247
299
  version(VALUE self)
248
300
  {
249
- xmlDocPtr doc;
250
- Data_Get_Struct(self, xmlDoc, doc);
301
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
251
302
 
252
303
  if (!doc->version) { return Qnil; }
253
304
  return NOKOGIRI_STR_NEW2(doc->version);
@@ -369,7 +420,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
369
420
  level = INT2NUM((long)1);
370
421
  }
371
422
 
372
- Data_Get_Struct(self, xmlDoc, doc);
423
+ doc = noko_xml_document_unwrap(self);
373
424
 
374
425
  dup = xmlCopyDoc(doc, (int)NUM2INT(level));
375
426
 
@@ -442,8 +493,7 @@ new (int argc, VALUE *argv, VALUE klass)
442
493
  static VALUE
443
494
  remove_namespaces_bang(VALUE self)
444
495
  {
445
- xmlDocPtr doc ;
446
- Data_Get_Struct(self, xmlDoc, doc);
496
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
447
497
 
448
498
  recursively_remove_namespaces_from_node((xmlNodePtr)doc);
449
499
  return self;
@@ -471,7 +521,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
471
521
  xmlEntityPtr ptr;
472
522
  xmlDocPtr doc ;
473
523
 
474
- Data_Get_Struct(self, xmlDoc, doc);
524
+ doc = noko_xml_document_unwrap(self);
475
525
 
476
526
  rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
477
527
  &content);
@@ -536,6 +586,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
536
586
  VALUE rb_mode;
537
587
  VALUE rb_namespaces;
538
588
  VALUE rb_comments_p;
589
+ int c_mode = 0;
539
590
  xmlChar **c_namespaces;
540
591
 
541
592
  xmlDocPtr c_doc;
@@ -547,10 +598,18 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
547
598
  VALUE rb_io;
548
599
 
549
600
  rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
550
- if (!NIL_P(rb_mode)) { Check_Type(rb_mode, T_FIXNUM); }
551
- if (!NIL_P(rb_namespaces)) { Check_Type(rb_namespaces, T_ARRAY); }
601
+ if (!NIL_P(rb_mode)) {
602
+ Check_Type(rb_mode, T_FIXNUM);
603
+ c_mode = NUM2INT(rb_mode);
604
+ }
605
+ if (!NIL_P(rb_namespaces)) {
606
+ Check_Type(rb_namespaces, T_ARRAY);
607
+ if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
608
+ rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
609
+ }
610
+ }
552
611
 
553
- Data_Get_Struct(self, xmlDoc, c_doc);
612
+ c_doc = noko_xml_document_unwrap(self);
554
613
 
555
614
  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
556
615
  rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
@@ -569,7 +628,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
569
628
  c_namespaces = NULL;
570
629
  } else {
571
630
  long ns_len = RARRAY_LEN(rb_namespaces);
572
- c_namespaces = calloc((size_t)ns_len + 1, sizeof(xmlChar *));
631
+ c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
573
632
  for (int j = 0 ; j < ns_len ; j++) {
574
633
  VALUE entry = rb_ary_entry(rb_namespaces, j);
575
634
  c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
@@ -577,12 +636,12 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
577
636
  }
578
637
 
579
638
  xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
580
- (int)(NIL_P(rb_mode) ? 0 : NUM2INT(rb_mode)),
639
+ c_mode,
581
640
  c_namespaces,
582
641
  (int)RTEST(rb_comments_p),
583
642
  c_obuf);
584
643
 
585
- free(c_namespaces);
644
+ ruby_xfree(c_namespaces);
586
645
  xmlOutputBufferClose(c_obuf);
587
646
 
588
647
  return rb_funcall(rb_io, rb_intern("string"), 0);
@@ -598,9 +657,9 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
598
657
  klass = cNokogiriXmlDocument;
599
658
  }
600
659
 
601
- rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
660
+ rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
602
661
 
603
- tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
662
+ tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
604
663
  tuple->doc = rb_document;
605
664
  tuple->unlinkedNodes = st_init_numtable_with_size(128);
606
665
  tuple->node_cache = rb_ary_new();
@@ -631,6 +690,40 @@ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
631
690
  return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
632
691
  }
633
692
 
693
+ xmlDocPtr
694
+ noko_xml_document_unwrap(VALUE rb_document)
695
+ {
696
+ xmlDocPtr c_document;
697
+ TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
698
+ return c_document;
699
+ }
700
+
701
+ /* Schema creation will remove and deallocate "blank" nodes.
702
+ * If those blank nodes have been exposed to Ruby, they could get freed
703
+ * out from under the VALUE pointer. This function checks to see if any of
704
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
705
+ */
706
+ int
707
+ noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
708
+ {
709
+ VALUE cache = DOC_NODE_CACHE(c_document);
710
+
711
+ if (NIL_P(cache)) {
712
+ return 0;
713
+ }
714
+
715
+ for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
716
+ xmlNodePtr node;
717
+ VALUE element = rb_ary_entry(cache, jnode);
718
+
719
+ Noko_Node_Get_Struct(element, xmlNode, node);
720
+ if (xmlIsBlankNode(node)) {
721
+ return 1;
722
+ }
723
+ }
724
+
725
+ return 0;
726
+ }
634
727
 
635
728
  void
636
729
  noko_xml_document_pin_node(xmlNodePtr node)
@@ -655,7 +748,7 @@ noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
655
748
 
656
749
 
657
750
  void
658
- noko_init_xml_document()
751
+ noko_init_xml_document(void)
659
752
  {
660
753
  assert(cNokogiriXmlNode);
661
754
  /*
@@ -19,7 +19,7 @@ new (int argc, VALUE *argv, VALUE klass)
19
19
 
20
20
  rb_scan_args(argc, argv, "1*", &document, &rest);
21
21
 
22
- Data_Get_Struct(document, xmlDoc, xml_doc);
22
+ xml_doc = noko_xml_document_unwrap(document);
23
23
 
24
24
  node = xmlNewDocFragment(xml_doc->doc);
25
25
 
@@ -32,7 +32,7 @@ new (int argc, VALUE *argv, VALUE klass)
32
32
  }
33
33
 
34
34
  void
35
- noko_init_xml_document_fragment()
35
+ noko_init_xml_document_fragment(void)
36
36
  {
37
37
  assert(cNokogiriXmlNode);
38
38
  /*
@@ -139,7 +139,7 @@ validate(VALUE self, VALUE document)
139
139
  VALUE error_list;
140
140
 
141
141
  Noko_Node_Get_Struct(self, xmlDtd, dtd);
142
- Noko_Node_Get_Struct(document, xmlDoc, doc);
142
+ doc = noko_xml_document_unwrap(document);
143
143
  error_list = rb_ary_new();
144
144
 
145
145
  ctxt = xmlNewValidCtxt();
@@ -190,7 +190,7 @@ external_id(VALUE self)
190
190
  }
191
191
 
192
192
  void
193
- noko_init_xml_dtd()
193
+ noko_init_xml_dtd(void)
194
194
  {
195
195
  assert(cNokogiriXmlNode);
196
196
  /*