nokogiri 1.13.6 → 1.16.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +43 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +21 -11
  6. data/dependencies.yml +34 -15
  7. data/ext/nokogiri/extconf.rb +167 -48
  8. data/ext/nokogiri/gumbo.c +21 -11
  9. data/ext/nokogiri/html4_document.c +3 -4
  10. data/ext/nokogiri/html4_element_description.c +20 -15
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +11 -22
  13. data/ext/nokogiri/html4_sax_push_parser.c +4 -4
  14. data/ext/nokogiri/nokogiri.c +84 -75
  15. data/ext/nokogiri/nokogiri.h +46 -16
  16. data/ext/nokogiri/test_global_handlers.c +2 -2
  17. data/ext/nokogiri/xml_attr.c +3 -3
  18. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  19. data/ext/nokogiri/xml_cdata.c +31 -18
  20. data/ext/nokogiri/xml_comment.c +2 -2
  21. data/ext/nokogiri/xml_document.c +135 -38
  22. data/ext/nokogiri/xml_document_fragment.c +2 -2
  23. data/ext/nokogiri/xml_dtd.c +9 -9
  24. data/ext/nokogiri/xml_element_content.c +34 -31
  25. data/ext/nokogiri/xml_element_decl.c +10 -10
  26. data/ext/nokogiri/xml_encoding_handler.c +15 -7
  27. data/ext/nokogiri/xml_entity_decl.c +6 -6
  28. data/ext/nokogiri/xml_entity_reference.c +2 -2
  29. data/ext/nokogiri/xml_namespace.c +75 -14
  30. data/ext/nokogiri/xml_node.c +365 -87
  31. data/ext/nokogiri/xml_node_set.c +129 -111
  32. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  33. data/ext/nokogiri/xml_reader.c +126 -64
  34. data/ext/nokogiri/xml_relax_ng.c +67 -82
  35. data/ext/nokogiri/xml_sax_parser.c +45 -20
  36. data/ext/nokogiri/xml_sax_parser_context.c +50 -30
  37. data/ext/nokogiri/xml_sax_push_parser.c +31 -12
  38. data/ext/nokogiri/xml_schema.c +95 -118
  39. data/ext/nokogiri/xml_syntax_error.c +4 -4
  40. data/ext/nokogiri/xml_text.c +27 -14
  41. data/ext/nokogiri/xml_xpath_context.c +213 -136
  42. data/ext/nokogiri/xslt_stylesheet.c +126 -67
  43. data/gumbo-parser/Makefile +28 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +10 -6
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +15 -16
  48. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +29 -10
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +2 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/css/node.rb +2 -2
  69. data/lib/nokogiri/css/parser_extras.rb +1 -1
  70. data/lib/nokogiri/css/xpath_visitor.rb +8 -26
  71. data/lib/nokogiri/css.rb +6 -0
  72. data/lib/nokogiri/decorators/slop.rb +1 -1
  73. data/lib/nokogiri/encoding_handler.rb +57 -0
  74. data/lib/nokogiri/extension.rb +4 -3
  75. data/lib/nokogiri/html4/document.rb +3 -122
  76. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  77. data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
  78. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  79. data/lib/nokogiri/html4.rb +1 -0
  80. data/lib/nokogiri/html5/document.rb +113 -36
  81. data/lib/nokogiri/html5/document_fragment.rb +10 -3
  82. data/lib/nokogiri/html5/node.rb +8 -5
  83. data/lib/nokogiri/html5.rb +74 -226
  84. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  85. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  86. data/lib/nokogiri/version/constant.rb +1 -1
  87. data/lib/nokogiri/version/info.rb +16 -14
  88. data/lib/nokogiri/xml/attr.rb +49 -0
  89. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  90. data/lib/nokogiri/xml/builder.rb +1 -1
  91. data/lib/nokogiri/xml/document.rb +103 -56
  92. data/lib/nokogiri/xml/document_fragment.rb +50 -7
  93. data/lib/nokogiri/xml/element_content.rb +10 -2
  94. data/lib/nokogiri/xml/element_decl.rb +4 -2
  95. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  96. data/lib/nokogiri/xml/namespace.rb +41 -0
  97. data/lib/nokogiri/xml/node/save_options.rb +14 -4
  98. data/lib/nokogiri/xml/node.rb +241 -70
  99. data/lib/nokogiri/xml/node_set.rb +90 -11
  100. data/lib/nokogiri/xml/parse_options.rb +129 -50
  101. data/lib/nokogiri/xml/pp/node.rb +28 -15
  102. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  103. data/lib/nokogiri/xml/reader.rb +16 -17
  104. data/lib/nokogiri/xml/sax/document.rb +1 -1
  105. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  106. data/lib/nokogiri/xml/searchable.rb +21 -13
  107. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  108. data/lib/nokogiri/xml.rb +1 -1
  109. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  110. data/lib/nokogiri/xslt.rb +75 -5
  111. data/lib/nokogiri.rb +15 -15
  112. data/lib/xsd/xmlparser/nokogiri.rb +4 -2
  113. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  114. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  115. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  116. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  117. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  118. metadata +21 -248
  119. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  120. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  121. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  122. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  123. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  124. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  125. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -1,6 +1,8 @@
1
1
  #ifndef NOKOGIRI_NATIVE
2
2
  #define NOKOGIRI_NATIVE
3
3
 
4
+ #include <ruby/defines.h> // https://github.com/sparklemotion/nokogiri/issues/2696
5
+
4
6
  #ifdef _MSC_VER
5
7
  # ifndef WIN32_LEAN_AND_MEAN
6
8
  # define WIN32_LEAN_AND_MEAN
@@ -23,7 +25,6 @@
23
25
  # define NOKOPUBVAR extern
24
26
  #endif
25
27
 
26
-
27
28
  #include <stdlib.h>
28
29
  #include <string.h>
29
30
  #include <assert.h>
@@ -50,6 +51,7 @@
50
51
  #include <libxslt/xsltconfig.h>
51
52
  #include <libxslt/xsltutils.h>
52
53
  #include <libxslt/transform.h>
54
+ #include <libxslt/imports.h>
53
55
  #include <libxslt/xsltInternals.h>
54
56
 
55
57
  #include <libexslt/exslt.h>
@@ -64,6 +66,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
64
66
  #define XMLNS_PREFIX "xmlns"
65
67
  #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
66
68
 
69
+ #ifndef xmlErrorConstPtr
70
+ # if LIBXML_VERSION >= 21200
71
+ # define xmlErrorConstPtr const xmlError *
72
+ # else
73
+ # define xmlErrorConstPtr xmlError *
74
+ # endif
75
+ #endif
67
76
 
68
77
  #include <ruby.h>
69
78
  #include <ruby/st.h>
@@ -75,22 +84,25 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
75
84
  #define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
76
85
  #define RBSTR_OR_QNIL(_str) (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
77
86
 
78
- #ifdef DEBUG
79
- # define NOKOGIRI_DEBUG_START(p) if (getenv("NOKOGIRI_NO_FREE")) return ; if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p start\n", __FILE__, __LINE__, p);
80
- # define NOKOGIRI_DEBUG_END(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p end\n", __FILE__, __LINE__, p);
81
- #else
82
- # define NOKOGIRI_DEBUG_START(p)
83
- # define NOKOGIRI_DEBUG_END(p)
87
+ #ifndef NORETURN_DECL
88
+ # if defined(__GNUC__)
89
+ # define NORETURN_DECL __attribute__ ((noreturn))
90
+ # else
91
+ # define NORETURN_DECL
92
+ # endif
84
93
  #endif
85
94
 
86
- #ifndef NORETURN
95
+ #ifndef PRINTFLIKE_DECL
87
96
  # if defined(__GNUC__)
88
- # define NORETURN(name) __attribute__((noreturn)) name
97
+ # define PRINTFLIKE_DECL(stringidx, argidx) __attribute__ ((format(printf,stringidx,argidx)))
89
98
  # else
90
- # define NORETURN(name) name
99
+ # define PRINTFLIKE_DECL(stringidx, argidx)
91
100
  # endif
92
101
  #endif
93
102
 
103
+ #if defined(TRUFFLERUBY) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
104
+ # define TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
105
+ #endif
94
106
 
95
107
  NOKOPUBVAR VALUE mNokogiri ;
96
108
  NOKOPUBVAR VALUE mNokogiriGumbo ;
@@ -162,14 +174,17 @@ typedef struct _nokogiriXsltStylesheetTuple {
162
174
  VALUE func_instances;
163
175
  } nokogiriXsltStylesheetTuple;
164
176
 
165
- int vasprintf(char **strp, const char *fmt, va_list ap);
166
177
  void noko_xml_document_pin_node(xmlNodePtr);
167
178
  void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
179
+ int noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document);
168
180
 
169
181
  int noko_io_read(void *ctx, char *buffer, int len);
170
182
  int noko_io_write(void *ctx, char *buffer, int len);
171
183
  int noko_io_close(void *ctx);
172
184
 
185
+ #define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
186
+ #define Noko_Namespace_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
187
+
173
188
  VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ;
174
189
  VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ;
175
190
  VALUE noko_xml_node_attrs(xmlNodePtr node) ;
@@ -180,12 +195,21 @@ VALUE noko_xml_namespace_wrap_xpath_copy(xmlNsPtr node);
180
195
  VALUE noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element);
181
196
 
182
197
  VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
198
+ xmlNodeSetPtr noko_xml_node_set_unwrap(VALUE rb_node_set) ;
183
199
 
184
200
  VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
185
201
  VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
202
+ xmlDocPtr noko_xml_document_unwrap(VALUE rb_document);
186
203
  NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
187
204
  xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
188
205
 
206
+ xmlSAXHandlerPtr noko_sax_handler_unwrap(VALUE rb_sax_handler);
207
+
208
+ xmlParserCtxtPtr noko_xml_sax_push_parser_unwrap(VALUE rb_parser);
209
+
210
+ VALUE noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context);
211
+ xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context);
212
+
189
213
  #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
190
214
  #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
191
215
  #define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
@@ -195,18 +219,24 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
195
219
  #define NOKOGIRI_SAX_SELF(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->self
196
220
  #define NOKOGIRI_SAX_CTXT(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->ctxt
197
221
  #define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self)
198
- #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) free(_tuple)
222
+ #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) ruby_xfree(_tuple)
199
223
 
200
224
  #define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
201
225
  #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
202
226
 
227
+ #if HAVE_RB_CATEGORY_WARNING
228
+ # define NOKO_WARN_DEPRECATION(message...) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
229
+ #else
230
+ # define NOKO_WARN_DEPRECATION(message...) rb_warning(message)
231
+ #endif
232
+
203
233
  void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
204
234
  void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
205
235
  xmlStructuredErrorFunc handler);
206
236
  void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
207
- VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
208
- void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
209
- NORETURN(void Nokogiri_error_raise(void *ctx, xmlErrorPtr error));
237
+ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error);
238
+ void Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error);
239
+ NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error);
210
240
  void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
211
241
  const char *function_name) ;
212
242
 
@@ -214,7 +244,7 @@ static inline
214
244
  nokogiriSAXTuplePtr
215
245
  nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self)
216
246
  {
217
- nokogiriSAXTuplePtr tuple = malloc(sizeof(nokogiriSAXTuple));
247
+ nokogiriSAXTuplePtr tuple = ruby_xmalloc(sizeof(nokogiriSAXTuple));
218
248
  tuple->self = self;
219
249
  tuple->ctxt = ctxt;
220
250
  return tuple;
@@ -3,7 +3,7 @@
3
3
  static VALUE foreign_error_handler_block = Qnil;
4
4
 
5
5
  static void
6
- foreign_error_handler(void *user_data, xmlErrorPtr c_error)
6
+ foreign_error_handler(void *user_data, xmlErrorConstPtr c_error)
7
7
  {
8
8
  rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
9
9
  }
@@ -32,7 +32,7 @@ rb_foreign_error_handler(VALUE klass)
32
32
  * Do NOT use this outside of the Nokogiri test suite.
33
33
  */
34
34
  void
35
- noko_init_test_global_handlers()
35
+ noko_init_test_global_handlers(void)
36
36
  {
37
37
  VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test");
38
38
 
@@ -16,7 +16,7 @@ set_value(VALUE self, VALUE content)
16
16
  xmlChar *value;
17
17
  xmlNode *cur;
18
18
 
19
- Data_Get_Struct(self, xmlAttr, attr);
19
+ Noko_Node_Get_Struct(self, xmlAttr, attr);
20
20
 
21
21
  if (attr->children) {
22
22
  xmlFreeNodeList(attr->children);
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
68
68
  rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
69
69
  }
70
70
 
71
- Data_Get_Struct(document, xmlDoc, xml_doc);
71
+ xml_doc = noko_xml_document_unwrap(document);
72
72
 
73
73
  node = xmlNewDocProp(
74
74
  xml_doc,
@@ -89,7 +89,7 @@ new (int argc, VALUE *argv, VALUE klass)
89
89
  }
90
90
 
91
91
  void
92
- noko_init_xml_attr()
92
+ noko_init_xml_attr(void)
93
93
  {
94
94
  assert(cNokogiriXmlNode);
95
95
  /*
@@ -12,8 +12,8 @@ static VALUE
12
12
  attribute_type(VALUE self)
13
13
  {
14
14
  xmlAttributePtr node;
15
- Data_Get_Struct(self, xmlAttribute, node);
16
- return INT2NUM((long)node->atype);
15
+ Noko_Node_Get_Struct(self, xmlAttribute, node);
16
+ return INT2NUM(node->atype);
17
17
  }
18
18
 
19
19
  /*
@@ -26,7 +26,7 @@ static VALUE
26
26
  default_value(VALUE self)
27
27
  {
28
28
  xmlAttributePtr node;
29
- Data_Get_Struct(self, xmlAttribute, node);
29
+ Noko_Node_Get_Struct(self, xmlAttribute, node);
30
30
 
31
31
  if (node->defaultValue) { return NOKOGIRI_STR_NEW2(node->defaultValue); }
32
32
  return Qnil;
@@ -45,7 +45,7 @@ enumeration(VALUE self)
45
45
  xmlEnumerationPtr enm;
46
46
  VALUE list;
47
47
 
48
- Data_Get_Struct(self, xmlAttribute, node);
48
+ Noko_Node_Get_Struct(self, xmlAttribute, node);
49
49
 
50
50
  list = rb_ary_new();
51
51
  enm = node->tree;
@@ -59,7 +59,7 @@ enumeration(VALUE self)
59
59
  }
60
60
 
61
61
  void
62
- noko_init_xml_attribute_decl()
62
+ noko_init_xml_attribute_decl(void)
63
63
  {
64
64
  assert(cNokogiriXmlNode);
65
65
  cNokogiriXmlAttributeDecl = rb_define_class_under(mNokogiriXml, "AttributeDecl", cNokogiriXmlNode);
@@ -12,31 +12,44 @@ VALUE cNokogiriXmlCData;
12
12
  * raise a TypeError exception.
13
13
  */
14
14
  static VALUE
15
- new (int argc, VALUE *argv, VALUE klass)
15
+ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
16
16
  {
17
- xmlDocPtr xml_doc;
18
- xmlNodePtr node;
19
- VALUE doc;
20
- VALUE content;
21
- VALUE rest;
17
+ xmlDocPtr c_document;
18
+ xmlNodePtr c_node;
19
+ VALUE rb_document;
20
+ VALUE rb_content;
21
+ VALUE rb_rest;
22
22
  VALUE rb_node;
23
- xmlChar *content_str = NULL;
24
- int content_str_len = 0;
23
+ xmlChar *c_content = NULL;
24
+ int c_content_len = 0;
25
25
 
26
- rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
26
+ rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
27
27
 
28
- Data_Get_Struct(doc, xmlDoc, xml_doc);
28
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
29
+ rb_raise(rb_eTypeError,
30
+ "expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
31
+ rb_obj_class(rb_document));
32
+ }
33
+
34
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
35
+ xmlNodePtr deprecated_node_type_arg;
36
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
37
+ Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
38
+ c_document = deprecated_node_type_arg->doc;
39
+ } else {
40
+ c_document = noko_xml_document_unwrap(rb_document);
41
+ }
29
42
 
30
- if (!NIL_P(content)) {
31
- content_str = (xmlChar *)StringValuePtr(content);
32
- content_str_len = RSTRING_LEN(content);
43
+ if (!NIL_P(rb_content)) {
44
+ c_content = (xmlChar *)StringValuePtr(rb_content);
45
+ c_content_len = RSTRING_LENINT(rb_content);
33
46
  }
34
47
 
35
- node = xmlNewCDataBlock(xml_doc->doc, content_str, content_str_len);
48
+ c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
36
49
 
37
- noko_xml_document_pin_node(node);
50
+ noko_xml_document_pin_node(c_node);
38
51
 
39
- rb_node = noko_xml_node_wrap(klass, node);
52
+ rb_node = noko_xml_node_wrap(klass, c_node);
40
53
  rb_obj_call_init(rb_node, argc, argv);
41
54
 
42
55
  if (rb_block_given_p()) { rb_yield(rb_node); }
@@ -45,7 +58,7 @@ new (int argc, VALUE *argv, VALUE klass)
45
58
  }
46
59
 
47
60
  void
48
- noko_init_xml_cdata()
61
+ noko_init_xml_cdata(void)
49
62
  {
50
63
  assert(cNokogiriXmlText);
51
64
  /*
@@ -53,5 +66,5 @@ noko_init_xml_cdata()
53
66
  */
54
67
  cNokogiriXmlCData = rb_define_class_under(mNokogiriXml, "CDATA", cNokogiriXmlText);
55
68
 
56
- rb_define_singleton_method(cNokogiriXmlCData, "new", new, -1);
69
+ rb_define_singleton_method(cNokogiriXmlCData, "new", rb_xml_cdata_s_new, -1);
57
70
  }
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
30
30
  rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
31
31
  }
32
32
 
33
- Data_Get_Struct(document, xmlDoc, xml_doc);
33
+ xml_doc = noko_xml_document_unwrap(document);
34
34
 
35
35
  node = xmlNewDocComment(
36
36
  xml_doc,
@@ -48,7 +48,7 @@ new (int argc, VALUE *argv, VALUE klass)
48
48
  }
49
49
 
50
50
  void
51
- noko_init_xml_comment()
51
+ noko_init_xml_comment(void)
52
52
  {
53
53
  assert(cNokogiriXmlCharacterData);
54
54
  /*
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
51
51
  }
52
52
 
53
53
  static void
54
- mark(xmlDocPtr doc)
54
+ mark(void *data)
55
55
  {
56
+ xmlDocPtr doc = (xmlDocPtr)data;
56
57
  nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
57
58
  if (tuple) {
58
59
  rb_gc_mark(tuple->doc);
@@ -61,33 +62,87 @@ mark(xmlDocPtr doc)
61
62
  }
62
63
 
63
64
  static void
64
- dealloc(xmlDocPtr doc)
65
+ dealloc(void *data)
65
66
  {
67
+ xmlDocPtr doc = (xmlDocPtr)data;
66
68
  st_table *node_hash;
67
69
 
68
- NOKOGIRI_DEBUG_START(doc);
69
-
70
70
  node_hash = DOC_UNLINKED_NODE_HASH(doc);
71
71
 
72
72
  st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
73
73
  st_free_table(node_hash);
74
74
 
75
- free(doc->_private);
75
+ ruby_xfree(doc->_private);
76
76
 
77
- /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
78
- * have their _private pointers cleared. This is to avoid libxml-ruby's
79
- * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
80
- * free context, which can result in segfaults.
77
+ #if defined(__GNUC__) && __GNUC__ >= 5
78
+ #pragma GCC diagnostic push
79
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
80
+ #endif
81
+ /*
82
+ * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
83
+ * versions, the registered callback from libxml-ruby will access the _private pointers set by
84
+ * nokogiri, which will result in segfaults.
85
+ *
86
+ * To avoid this, we need to clear the _private pointers from all nodes in this document tree
87
+ * before that callback gets invoked.
88
+ *
89
+ * libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
90
+ * safeguard (though probably pairing with a runtime check on the libxml-ruby version).
81
91
  */
82
92
  if (xmlDeregisterNodeDefaultValue) {
83
93
  remove_private((xmlNodePtr)doc);
84
94
  }
95
+ #if defined(__GNUC__) && __GNUC__ >= 5
96
+ #pragma GCC diagnostic pop
97
+ #endif
85
98
 
86
99
  xmlFreeDoc(doc);
100
+ }
101
+
102
+ static size_t
103
+ memsize_node(const xmlNodePtr node)
104
+ {
105
+ /* note we don't count namespace definitions, just going for a good-enough number here */
106
+ xmlNodePtr child;
107
+ xmlAttrPtr property;
108
+ size_t memsize = 0;
109
+
110
+ memsize += xmlStrlen(node->name);
111
+
112
+ if (node->type == XML_ELEMENT_NODE) {
113
+ for (property = node->properties; property; property = property->next) {
114
+ memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
115
+ }
116
+ }
117
+ if (node->type == XML_TEXT_NODE) {
118
+ memsize += xmlStrlen(node->content);
119
+ }
120
+ for (child = node->children; child; child = child->next) {
121
+ memsize += sizeof(xmlNode) + memsize_node(child);
122
+ }
123
+ return memsize;
124
+ }
87
125
 
88
- NOKOGIRI_DEBUG_END(doc);
126
+ static size_t
127
+ memsize(const void *data)
128
+ {
129
+ xmlDocPtr doc = (const xmlDocPtr)data;
130
+ size_t memsize = sizeof(xmlDoc);
131
+ /* This may not account for all memory use */
132
+ memsize += memsize_node((xmlNodePtr)doc);
133
+ return memsize;
89
134
  }
90
135
 
136
+ static const rb_data_type_t noko_xml_document_data_type = {
137
+ .wrap_struct_name = "Nokogiri::XML::Document",
138
+ .function = {
139
+ .dmark = mark,
140
+ .dfree = dealloc,
141
+ .dsize = memsize,
142
+ },
143
+ // .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
144
+ };
145
+
91
146
  static void
92
147
  recursively_remove_namespaces_from_node(xmlNodePtr node)
93
148
  {
@@ -104,7 +159,11 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
104
159
  (node->type == XML_XINCLUDE_START) ||
105
160
  (node->type == XML_XINCLUDE_END)) &&
106
161
  node->nsDef) {
107
- xmlFreeNsList(node->nsDef);
162
+ xmlNsPtr curr = node->nsDef;
163
+ while (curr) {
164
+ noko_xml_document_pin_namespace(curr, node->doc);
165
+ curr = curr->next;
166
+ }
108
167
  node->nsDef = NULL;
109
168
  }
110
169
 
@@ -126,8 +185,7 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
126
185
  static VALUE
127
186
  url(VALUE self)
128
187
  {
129
- xmlDocPtr doc;
130
- Data_Get_Struct(self, xmlDoc, doc);
188
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
131
189
 
132
190
  if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
133
191
 
@@ -146,7 +204,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
146
204
  xmlDocPtr c_document;
147
205
  xmlNodePtr c_new_root = NULL, c_current_root;
148
206
 
149
- Data_Get_Struct(self, xmlDoc, c_document);
207
+ c_document = noko_xml_document_unwrap(self);
150
208
 
151
209
  c_current_root = xmlDocGetRootElement(c_document);
152
210
  if (c_current_root) {
@@ -161,7 +219,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
161
219
  rb_obj_class(rb_new_root));
162
220
  }
163
221
 
164
- Data_Get_Struct(rb_new_root, xmlNode, c_new_root);
222
+ Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
165
223
 
166
224
  /* If the new root's document is not the same as the current document,
167
225
  * then we need to dup the node in to this document. */
@@ -190,7 +248,7 @@ rb_xml_document_root(VALUE self)
190
248
  xmlDocPtr c_document;
191
249
  xmlNodePtr c_root;
192
250
 
193
- Data_Get_Struct(self, xmlDoc, c_document);
251
+ c_document = noko_xml_document_unwrap(self);
194
252
 
195
253
  c_root = xmlDocGetRootElement(c_document);
196
254
  if (!c_root) {
@@ -209,8 +267,7 @@ rb_xml_document_root(VALUE self)
209
267
  static VALUE
210
268
  set_encoding(VALUE self, VALUE encoding)
211
269
  {
212
- xmlDocPtr doc;
213
- Data_Get_Struct(self, xmlDoc, doc);
270
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
214
271
 
215
272
  if (doc->encoding) {
216
273
  xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
@@ -230,8 +287,7 @@ set_encoding(VALUE self, VALUE encoding)
230
287
  static VALUE
231
288
  encoding(VALUE self)
232
289
  {
233
- xmlDocPtr doc;
234
- Data_Get_Struct(self, xmlDoc, doc);
290
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
235
291
 
236
292
  if (!doc->encoding) { return Qnil; }
237
293
  return NOKOGIRI_STR_NEW2(doc->encoding);
@@ -246,8 +302,7 @@ encoding(VALUE self)
246
302
  static VALUE
247
303
  version(VALUE self)
248
304
  {
249
- xmlDocPtr doc;
250
- Data_Get_Struct(self, xmlDoc, doc);
305
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
251
306
 
252
307
  if (!doc->version) { return Qnil; }
253
308
  return NOKOGIRI_STR_NEW2(doc->version);
@@ -286,7 +341,7 @@ read_io(VALUE klass,
286
341
  xmlSetStructuredErrorFunc(NULL, NULL);
287
342
 
288
343
  if (doc == NULL) {
289
- xmlErrorPtr error;
344
+ xmlErrorConstPtr error;
290
345
 
291
346
  xmlFreeDoc(doc);
292
347
 
@@ -332,7 +387,7 @@ read_memory(VALUE klass,
332
387
  xmlSetStructuredErrorFunc(NULL, NULL);
333
388
 
334
389
  if (doc == NULL) {
335
- xmlErrorPtr error;
390
+ xmlErrorConstPtr error;
336
391
 
337
392
  xmlFreeDoc(doc);
338
393
 
@@ -369,7 +424,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
369
424
  level = INT2NUM((long)1);
370
425
  }
371
426
 
372
- Data_Get_Struct(self, xmlDoc, doc);
427
+ doc = noko_xml_document_unwrap(self);
373
428
 
374
429
  dup = xmlCopyDoc(doc, (int)NUM2INT(level));
375
430
 
@@ -442,8 +497,7 @@ new (int argc, VALUE *argv, VALUE klass)
442
497
  static VALUE
443
498
  remove_namespaces_bang(VALUE self)
444
499
  {
445
- xmlDocPtr doc ;
446
- Data_Get_Struct(self, xmlDoc, doc);
500
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
447
501
 
448
502
  recursively_remove_namespaces_from_node((xmlNodePtr)doc);
449
503
  return self;
@@ -471,7 +525,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
471
525
  xmlEntityPtr ptr;
472
526
  xmlDocPtr doc ;
473
527
 
474
- Data_Get_Struct(self, xmlDoc, doc);
528
+ doc = noko_xml_document_unwrap(self);
475
529
 
476
530
  rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
477
531
  &content);
@@ -487,7 +541,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
487
541
  );
488
542
 
489
543
  if (NULL == ptr) {
490
- xmlErrorPtr error = xmlGetLastError();
544
+ xmlErrorConstPtr error = xmlGetLastError();
491
545
  if (error) {
492
546
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
493
547
  } else {
@@ -536,6 +590,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
536
590
  VALUE rb_mode;
537
591
  VALUE rb_namespaces;
538
592
  VALUE rb_comments_p;
593
+ int c_mode = 0;
539
594
  xmlChar **c_namespaces;
540
595
 
541
596
  xmlDocPtr c_doc;
@@ -547,10 +602,18 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
547
602
  VALUE rb_io;
548
603
 
549
604
  rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
550
- if (!NIL_P(rb_mode)) { Check_Type(rb_mode, T_FIXNUM); }
551
- if (!NIL_P(rb_namespaces)) { Check_Type(rb_namespaces, T_ARRAY); }
605
+ if (!NIL_P(rb_mode)) {
606
+ Check_Type(rb_mode, T_FIXNUM);
607
+ c_mode = NUM2INT(rb_mode);
608
+ }
609
+ if (!NIL_P(rb_namespaces)) {
610
+ Check_Type(rb_namespaces, T_ARRAY);
611
+ if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
612
+ rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
613
+ }
614
+ }
552
615
 
553
- Data_Get_Struct(self, xmlDoc, c_doc);
616
+ c_doc = noko_xml_document_unwrap(self);
554
617
 
555
618
  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
556
619
  rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
@@ -569,7 +632,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
569
632
  c_namespaces = NULL;
570
633
  } else {
571
634
  long ns_len = RARRAY_LEN(rb_namespaces);
572
- c_namespaces = calloc((size_t)ns_len + 1, sizeof(xmlChar *));
635
+ c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
573
636
  for (int j = 0 ; j < ns_len ; j++) {
574
637
  VALUE entry = rb_ary_entry(rb_namespaces, j);
575
638
  c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
@@ -577,12 +640,12 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
577
640
  }
578
641
 
579
642
  xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
580
- (int)(NIL_P(rb_mode) ? 0 : NUM2INT(rb_mode)),
643
+ c_mode,
581
644
  c_namespaces,
582
645
  (int)RTEST(rb_comments_p),
583
646
  c_obuf);
584
647
 
585
- free(c_namespaces);
648
+ ruby_xfree(c_namespaces);
586
649
  xmlOutputBufferClose(c_obuf);
587
650
 
588
651
  return rb_funcall(rb_io, rb_intern("string"), 0);
@@ -598,9 +661,9 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
598
661
  klass = cNokogiriXmlDocument;
599
662
  }
600
663
 
601
- rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
664
+ rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
602
665
 
603
- tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
666
+ tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
604
667
  tuple->doc = rb_document;
605
668
  tuple->unlinkedNodes = st_init_numtable_with_size(128);
606
669
  tuple->node_cache = rb_ary_new();
@@ -631,6 +694,40 @@ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
631
694
  return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
632
695
  }
633
696
 
697
+ xmlDocPtr
698
+ noko_xml_document_unwrap(VALUE rb_document)
699
+ {
700
+ xmlDocPtr c_document;
701
+ TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
702
+ return c_document;
703
+ }
704
+
705
+ /* Schema creation will remove and deallocate "blank" nodes.
706
+ * If those blank nodes have been exposed to Ruby, they could get freed
707
+ * out from under the VALUE pointer. This function checks to see if any of
708
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
709
+ */
710
+ int
711
+ noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
712
+ {
713
+ VALUE cache = DOC_NODE_CACHE(c_document);
714
+
715
+ if (NIL_P(cache)) {
716
+ return 0;
717
+ }
718
+
719
+ for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
720
+ xmlNodePtr node;
721
+ VALUE element = rb_ary_entry(cache, jnode);
722
+
723
+ Noko_Node_Get_Struct(element, xmlNode, node);
724
+ if (xmlIsBlankNode(node)) {
725
+ return 1;
726
+ }
727
+ }
728
+
729
+ return 0;
730
+ }
634
731
 
635
732
  void
636
733
  noko_xml_document_pin_node(xmlNodePtr node)
@@ -655,7 +752,7 @@ noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
655
752
 
656
753
 
657
754
  void
658
- noko_init_xml_document()
755
+ noko_init_xml_document(void)
659
756
  {
660
757
  assert(cNokogiriXmlNode);
661
758
  /*