nokogiri 1.14.4-x86-linux → 1.15.0-x86-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +9 -8
  3. data/dependencies.yml +6 -6
  4. data/ext/nokogiri/extconf.rb +66 -22
  5. data/ext/nokogiri/html4_document.c +1 -2
  6. data/ext/nokogiri/html4_element_description.c +19 -14
  7. data/ext/nokogiri/html4_sax_parser_context.c +10 -16
  8. data/ext/nokogiri/html4_sax_push_parser.c +2 -2
  9. data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
  10. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +52 -38
  11. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +18 -18
  12. data/ext/nokogiri/include/libxml2/libxml/SAX.h +36 -36
  13. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +37 -36
  14. data/ext/nokogiri/include/libxml2/libxml/c14n.h +4 -4
  15. data/ext/nokogiri/include/libxml2/libxml/catalog.h +36 -36
  16. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +9 -9
  17. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +28 -28
  18. data/ext/nokogiri/include/libxml2/libxml/dict.h +13 -13
  19. data/ext/nokogiri/include/libxml2/libxml/encoding.h +20 -19
  20. data/ext/nokogiri/include/libxml2/libxml/entities.h +19 -21
  21. data/ext/nokogiri/include/libxml2/libxml/globals.h +89 -59
  22. data/ext/nokogiri/include/libxml2/libxml/hash.h +25 -25
  23. data/ext/nokogiri/include/libxml2/libxml/list.h +26 -26
  24. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +22 -22
  25. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -17
  26. data/ext/nokogiri/include/libxml2/libxml/parser.h +100 -79
  27. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +149 -143
  28. data/ext/nokogiri/include/libxml2/libxml/pattern.h +15 -15
  29. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +26 -26
  30. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +2 -2
  31. data/ext/nokogiri/include/libxml2/libxml/schematron.h +18 -18
  32. data/ext/nokogiri/include/libxml2/libxml/threads.h +23 -20
  33. data/ext/nokogiri/include/libxml2/libxml/tree.h +166 -164
  34. data/ext/nokogiri/include/libxml2/libxml/uri.h +15 -15
  35. data/ext/nokogiri/include/libxml2/libxml/valid.h +72 -87
  36. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +10 -10
  37. data/ext/nokogiri/include/libxml2/libxml/xlink.h +5 -5
  38. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +60 -59
  39. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +21 -21
  40. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +18 -46
  41. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +14 -41
  42. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +26 -24
  43. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +4 -4
  44. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +87 -87
  45. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +30 -30
  46. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +10 -10
  47. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +30 -30
  48. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +34 -34
  49. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +30 -30
  50. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +166 -166
  51. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +50 -27
  52. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +80 -80
  53. data/ext/nokogiri/include/libxml2/libxml/xpath.h +41 -41
  54. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +117 -117
  55. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +21 -21
  56. data/ext/nokogiri/include/libxslt/attributes.h +1 -0
  57. data/ext/nokogiri/include/libxslt/variables.h +3 -3
  58. data/ext/nokogiri/include/libxslt/xsltInternals.h +14 -4
  59. data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -4
  60. data/ext/nokogiri/include/libxslt/xsltlocale.h +14 -54
  61. data/ext/nokogiri/include/libxslt/xsltutils.h +35 -2
  62. data/ext/nokogiri/nokogiri.c +46 -24
  63. data/ext/nokogiri/nokogiri.h +13 -2
  64. data/ext/nokogiri/xml_attr.c +1 -1
  65. data/ext/nokogiri/xml_cdata.c +10 -2
  66. data/ext/nokogiri/xml_comment.c +1 -1
  67. data/ext/nokogiri/xml_document.c +102 -22
  68. data/ext/nokogiri/xml_document_fragment.c +1 -1
  69. data/ext/nokogiri/xml_dtd.c +1 -1
  70. data/ext/nokogiri/xml_element_content.c +32 -29
  71. data/ext/nokogiri/xml_element_decl.c +5 -5
  72. data/ext/nokogiri/xml_encoding_handler.c +12 -4
  73. data/ext/nokogiri/xml_entity_reference.c +1 -1
  74. data/ext/nokogiri/xml_namespace.c +11 -12
  75. data/ext/nokogiri/xml_node.c +7 -7
  76. data/ext/nokogiri/xml_node_set.c +125 -105
  77. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  78. data/ext/nokogiri/xml_reader.c +37 -28
  79. data/ext/nokogiri/xml_relax_ng.c +65 -78
  80. data/ext/nokogiri/xml_sax_parser.c +24 -5
  81. data/ext/nokogiri/xml_sax_parser_context.c +46 -25
  82. data/ext/nokogiri/xml_sax_push_parser.c +29 -8
  83. data/ext/nokogiri/xml_schema.c +90 -116
  84. data/ext/nokogiri/xml_text.c +10 -2
  85. data/ext/nokogiri/xml_xpath_context.c +156 -83
  86. data/ext/nokogiri/xslt_stylesheet.c +103 -50
  87. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  88. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  89. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  90. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  91. data/lib/nokogiri/css/xpath_visitor.rb +2 -2
  92. data/lib/nokogiri/extension.rb +1 -1
  93. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  94. data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
  95. data/lib/nokogiri/html5/document_fragment.rb +1 -1
  96. data/lib/nokogiri/html5/node.rb +5 -0
  97. data/lib/nokogiri/html5.rb +5 -2
  98. data/lib/nokogiri/version/constant.rb +1 -1
  99. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  100. data/lib/nokogiri/xml/document_fragment.rb +1 -1
  101. data/lib/nokogiri/xml/element_content.rb +10 -2
  102. data/lib/nokogiri/xml/element_decl.rb +4 -2
  103. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  104. data/lib/nokogiri/xml/node/save_options.rb +8 -0
  105. data/lib/nokogiri/xml/node.rb +22 -13
  106. data/lib/nokogiri/xml/pp/node.rb +23 -12
  107. data/lib/nokogiri/xml/sax/document.rb +1 -1
  108. data/lib/nokogiri/xml/searchable.rb +18 -10
  109. data/lib/nokogiri/xslt.rb +73 -3
  110. data/lib/nokogiri.rb +12 -4
  111. data/lib/xsd/xmlparser/nokogiri.rb +1 -1
  112. metadata +2 -2
@@ -14,63 +14,23 @@
14
14
  #include <libxml/xmlstring.h>
15
15
  #include "xsltexports.h"
16
16
 
17
- #ifdef HAVE_STRXFRM_L
18
-
19
- /*
20
- * XSLT_LOCALE_POSIX:
21
- * Macro indicating to use POSIX locale extensions
22
- */
23
- #define XSLT_LOCALE_POSIX
24
-
25
- #ifdef HAVE_LOCALE_H
26
- #include <locale.h>
27
- #endif
28
- #ifdef HAVE_XLOCALE_H
29
- #include <xlocale.h>
30
- #endif
31
-
32
- typedef locale_t xsltLocale;
33
- typedef xmlChar xsltLocaleChar;
34
-
35
- #elif defined(_WIN32)
36
-
37
- /*
38
- * XSLT_LOCALE_WINAPI:
39
- * Macro indicating to use WinAPI for extended locale support
40
- */
41
- #define XSLT_LOCALE_WINAPI
42
-
43
- #include <windows.h>
44
- #include <winnls.h>
45
-
46
- typedef LCID xsltLocale;
47
- typedef wchar_t xsltLocaleChar;
48
-
49
- #else
50
-
51
- /*
52
- * XSLT_LOCALE_NONE:
53
- * Macro indicating that there's no extended locale support
54
- */
55
- #define XSLT_LOCALE_NONE
56
-
57
- typedef void *xsltLocale;
58
- typedef xmlChar xsltLocaleChar;
59
-
60
- #endif
61
-
62
- XSLTPUBFUN xsltLocale XSLTCALL
63
- xsltNewLocale (const xmlChar *langName);
17
+ XSLTPUBFUN void * XSLTCALL
18
+ xsltNewLocale (const xmlChar *langName,
19
+ int lowerFirst);
64
20
  XSLTPUBFUN void XSLTCALL
65
- xsltFreeLocale (xsltLocale locale);
66
- XSLTPUBFUN xsltLocaleChar * XSLTCALL
67
- xsltStrxfrm (xsltLocale locale,
21
+ xsltFreeLocale (void *locale);
22
+ XSLTPUBFUN xmlChar * XSLTCALL
23
+ xsltStrxfrm (void *locale,
68
24
  const xmlChar *string);
69
- XSLTPUBFUN int XSLTCALL
70
- xsltLocaleStrcmp (xsltLocale locale,
71
- const xsltLocaleChar *str1,
72
- const xsltLocaleChar *str2);
73
25
  XSLTPUBFUN void XSLTCALL
74
26
  xsltFreeLocales (void);
75
27
 
28
+ /* Backward compatibility */
29
+ typedef void *xsltLocale;
30
+ typedef xmlChar xsltLocaleChar;
31
+ XSLTPUBFUN int XSLTCALL
32
+ xsltLocaleStrcmp (void *locale,
33
+ const xmlChar *str1,
34
+ const xmlChar *str2);
35
+
76
36
  #endif /* __XML_XSLTLOCALE_H__ */
@@ -91,6 +91,13 @@ XSLTPUBFUN const xmlChar * XSLTCALL
91
91
  XSLTPUBFUN int XSLTCALL
92
92
  xsltGetUTF8Char (const unsigned char *utf,
93
93
  int *len);
94
+ #ifdef IN_LIBXSLT
95
+ /** DOC_DISABLE */
96
+ XSLTPUBFUN int XSLTCALL
97
+ xsltGetUTF8CharZ (const unsigned char *utf,
98
+ int *len);
99
+ /** DOC_ENABLE */
100
+ #endif
94
101
 
95
102
  /*
96
103
  * XSLT Debug Tracing Tracing Types
@@ -178,6 +185,11 @@ XSLTPUBFUN void XSLTCALL
178
185
  XSLTPUBFUN void XSLTCALL
179
186
  xsltSetCtxtSortFunc (xsltTransformContextPtr ctxt,
180
187
  xsltSortFunc handler);
188
+ XSLTPUBFUN void XSLTCALL
189
+ xsltSetCtxtLocaleHandlers (xsltTransformContextPtr ctxt,
190
+ xsltNewLocaleFunc newLocale,
191
+ xsltFreeLocaleFunc freeLocale,
192
+ xsltGenSortKeyFunc genSortKey);
181
193
  XSLTPUBFUN void XSLTCALL
182
194
  xsltDefaultSortFunction (xsltTransformContextPtr ctxt,
183
195
  xmlNodePtr *sorts,
@@ -244,6 +256,24 @@ XSLTPUBFUN xmlXPathCompExprPtr XSLTCALL
244
256
  const xmlChar *str,
245
257
  int flags);
246
258
 
259
+ #ifdef IN_LIBXSLT
260
+ /** DOC_DISABLE */
261
+ #define XSLT_SOURCE_NODE_MASK 15u
262
+ #define XSLT_SOURCE_NODE_HAS_KEY 1u
263
+ #define XSLT_SOURCE_NODE_HAS_ID 2u
264
+ int
265
+ xsltGetSourceNodeFlags(xmlNodePtr node);
266
+ int
267
+ xsltSetSourceNodeFlags(xsltTransformContextPtr ctxt, xmlNodePtr node,
268
+ int flags);
269
+ int
270
+ xsltClearSourceNodeFlags(xmlNodePtr node, int flags);
271
+ void **
272
+ xsltGetPSVIPtr(xmlNodePtr cur);
273
+ /** DOC_ENABLE */
274
+ #endif
275
+
276
+ #ifdef WITH_PROFILER
247
277
  /*
248
278
  * Profiling.
249
279
  */
@@ -257,6 +287,7 @@ XSLTPUBFUN long XSLTCALL
257
287
  xsltTimestamp (void);
258
288
  XSLTPUBFUN void XSLTCALL
259
289
  xsltCalibrateAdjust (long delta);
290
+ #endif
260
291
 
261
292
  /**
262
293
  * XSLT_TIMESTAMP_TICS_PER_SEC:
@@ -289,10 +320,11 @@ typedef void (*xsltHandleDebuggerCallback) (xmlNodePtr cur, xmlNodePtr node,
289
320
  typedef int (*xsltAddCallCallback) (xsltTemplatePtr templ, xmlNodePtr source);
290
321
  typedef void (*xsltDropCallCallback) (void);
291
322
 
292
- XSLTPUBFUN void XSLTCALL
293
- xsltSetDebuggerStatus (int value);
294
323
  XSLTPUBFUN int XSLTCALL
295
324
  xsltGetDebuggerStatus (void);
325
+ #ifdef WITH_DEBUGGER
326
+ XSLTPUBFUN void XSLTCALL
327
+ xsltSetDebuggerStatus (int value);
296
328
  XSLTPUBFUN int XSLTCALL
297
329
  xsltSetDebuggerCallbacks (int no, void *block);
298
330
  XSLTPUBFUN int XSLTCALL
@@ -300,6 +332,7 @@ XSLTPUBFUN int XSLTCALL
300
332
  xmlNodePtr source);
301
333
  XSLTPUBFUN void XSLTCALL
302
334
  xslDropCall (void);
335
+ #endif
303
336
 
304
337
  #ifdef __cplusplus
305
338
  }
@@ -137,6 +137,48 @@ noko_io_close(void *io)
137
137
  }
138
138
 
139
139
 
140
+ #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
141
+ # define NOKOGIRI_WINDOWS_DLLS 1
142
+ #else
143
+ # define NOKOGIRI_WINDOWS_DLLS 0
144
+ #endif
145
+
146
+ //
147
+ // | dlls || true | false |
148
+ // | nlmm || | |
149
+ // |-----------++---------+---------|
150
+ // | NULL || default | ruby |
151
+ // | "random" || default | ruby |
152
+ // | "ruby" || ruby | ruby |
153
+ // | "default" || default | default |
154
+ //
155
+ // We choose *not* to use Ruby's memory management functions with windows DLLs because of this
156
+ // issue: https://github.com/sparklemotion/nokogiri/issues/2241
157
+ //
158
+ static void
159
+ set_libxml_memory_management(void)
160
+ {
161
+ const char *nlmm = getenv("NOKOGIRI_LIBXML_MEMORY_MANAGEMENT");
162
+ if (nlmm) {
163
+ if (strcmp(nlmm, "default") == 0) {
164
+ goto libxml_uses_default_memory_management;
165
+ } else if (strcmp(nlmm, "ruby") == 0) {
166
+ goto libxml_uses_ruby_memory_management;
167
+ }
168
+ }
169
+ if (NOKOGIRI_WINDOWS_DLLS) {
170
+ libxml_uses_default_memory_management:
171
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
172
+ return;
173
+ } else {
174
+ libxml_uses_ruby_memory_management:
175
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
176
+ xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
177
+ return;
178
+ }
179
+ }
180
+
181
+
140
182
  void
141
183
  Init_nokogiri(void)
142
184
  {
@@ -150,6 +192,10 @@ Init_nokogiri(void)
150
192
  mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
151
193
  mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
152
194
 
195
+ set_libxml_memory_management(); /* must be before any function calls that might invoke xmlInitParser() */
196
+ xmlInitParser();
197
+ exsltRegisterAll();
198
+
153
199
  rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
154
200
  rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
155
201
 
@@ -182,30 +228,6 @@ Init_nokogiri(void)
182
228
  rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
183
229
  #endif
184
230
 
185
- #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
186
- /*
187
- * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
188
- * issue in libxml 2.9.12:
189
- *
190
- * https://github.com/sparklemotion/nokogiri/issues/2241
191
- *
192
- * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
193
- * this config only for the specific libxml2 versions 2.9.12.
194
- *
195
- * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
196
- * default memory management functions (recall that this config was introduced to reduce memory
197
- * bloat and allow Ruby to GC more often); but we should *really* test with production workloads
198
- * before making that kind of a potentially-invasive change.
199
- */
200
- rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
201
- #else
202
- rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
203
- xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
204
- #endif
205
-
206
- xmlInitParser();
207
- exsltRegisterAll();
208
-
209
231
  if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
210
232
  rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
211
233
  } else {
@@ -51,6 +51,7 @@
51
51
  #include <libxslt/xsltconfig.h>
52
52
  #include <libxslt/xsltutils.h>
53
53
  #include <libxslt/transform.h>
54
+ #include <libxslt/imports.h>
54
55
  #include <libxslt/xsltInternals.h>
55
56
 
56
57
  #include <libexslt/exslt.h>
@@ -168,6 +169,7 @@ typedef struct _nokogiriXsltStylesheetTuple {
168
169
 
169
170
  void noko_xml_document_pin_node(xmlNodePtr);
170
171
  void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
172
+ int noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document);
171
173
 
172
174
  int noko_io_read(void *ctx, char *buffer, int len);
173
175
  int noko_io_write(void *ctx, char *buffer, int len);
@@ -186,12 +188,21 @@ VALUE noko_xml_namespace_wrap_xpath_copy(xmlNsPtr node);
186
188
  VALUE noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element);
187
189
 
188
190
  VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
191
+ xmlNodeSetPtr noko_xml_node_set_unwrap(VALUE rb_node_set) ;
189
192
 
190
193
  VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
191
194
  VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
195
+ xmlDocPtr noko_xml_document_unwrap(VALUE rb_document);
192
196
  NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
193
197
  xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
194
198
 
199
+ xmlSAXHandlerPtr noko_sax_handler_unwrap(VALUE rb_sax_handler);
200
+
201
+ xmlParserCtxtPtr noko_xml_sax_push_parser_unwrap(VALUE rb_parser);
202
+
203
+ VALUE noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context);
204
+ xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context);
205
+
195
206
  #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
196
207
  #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
197
208
  #define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
@@ -207,9 +218,9 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
207
218
  #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
208
219
 
209
220
  #if HAVE_RB_CATEGORY_WARNING
210
- # define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
221
+ # define NOKO_WARN_DEPRECATION(message...) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
211
222
  #else
212
- # define NOKO_WARN_DEPRECATION(message) rb_warning(message)
223
+ # define NOKO_WARN_DEPRECATION(message...) rb_warning(message)
213
224
  #endif
214
225
 
215
226
  void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
68
68
  rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
69
69
  }
70
70
 
71
- Noko_Node_Get_Struct(document, xmlDoc, xml_doc);
71
+ xml_doc = noko_xml_document_unwrap(document);
72
72
 
73
73
  node = xmlNewDocProp(
74
74
  xml_doc,
@@ -25,14 +25,22 @@ new (int argc, VALUE *argv, VALUE klass)
25
25
 
26
26
  rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
27
27
 
28
- Noko_Node_Get_Struct(doc, xmlDoc, xml_doc);
28
+ if (rb_obj_is_kind_of(doc, cNokogiriXmlDocument)) {
29
+ xml_doc = noko_xml_document_unwrap(doc);
30
+ } else {
31
+ xmlNodePtr deprecated_node_type_arg;
32
+ // TODO: deprecate allowing Node
33
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
34
+ Noko_Node_Get_Struct(doc, xmlNode, deprecated_node_type_arg);
35
+ xml_doc = deprecated_node_type_arg->doc;
36
+ }
29
37
 
30
38
  if (!NIL_P(content)) {
31
39
  content_str = (xmlChar *)StringValuePtr(content);
32
40
  content_str_len = RSTRING_LENINT(content);
33
41
  }
34
42
 
35
- node = xmlNewCDataBlock(xml_doc->doc, content_str, content_str_len);
43
+ node = xmlNewCDataBlock(xml_doc, content_str, content_str_len);
36
44
 
37
45
  noko_xml_document_pin_node(node);
38
46
 
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
30
30
  rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
31
31
  }
32
32
 
33
- Data_Get_Struct(document, xmlDoc, xml_doc);
33
+ xml_doc = noko_xml_document_unwrap(document);
34
34
 
35
35
  node = xmlNewDocComment(
36
36
  xml_doc,
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
51
51
  }
52
52
 
53
53
  static void
54
- mark(xmlDocPtr doc)
54
+ mark(void *data)
55
55
  {
56
+ xmlDocPtr doc = (xmlDocPtr)data;
56
57
  nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
57
58
  if (tuple) {
58
59
  rb_gc_mark(tuple->doc);
@@ -61,8 +62,9 @@ mark(xmlDocPtr doc)
61
62
  }
62
63
 
63
64
  static void
64
- dealloc(xmlDocPtr doc)
65
+ dealloc(void *data)
65
66
  {
67
+ xmlDocPtr doc = (xmlDocPtr)data;
66
68
  st_table *node_hash;
67
69
 
68
70
  node_hash = DOC_UNLINKED_NODE_HASH(doc);
@@ -72,18 +74,67 @@ dealloc(xmlDocPtr doc)
72
74
 
73
75
  ruby_xfree(doc->_private);
74
76
 
75
- /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
76
- * have their _private pointers cleared. This is to avoid libxml-ruby's
77
- * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
78
- * free context, which can result in segfaults.
77
+ #pragma GCC diagnostic push
78
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
79
+ /*
80
+ * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
81
+ * versions, the registered callback from libxml-ruby will access the _private pointers set by
82
+ * nokogiri, which will result in segfaults.
83
+ *
84
+ * To avoid this, we need to clear the _private pointers from all nodes in this document tree
85
+ * before that callback gets invoked.
86
+ *
87
+ * libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
88
+ * safeguard (though probably pairing with a runtime check on the libxml-ruby version).
79
89
  */
80
90
  if (xmlDeregisterNodeDefaultValue) {
81
91
  remove_private((xmlNodePtr)doc);
82
92
  }
93
+ #pragma GCC diagnostic pop
83
94
 
84
95
  xmlFreeDoc(doc);
85
96
  }
86
97
 
98
+ static size_t
99
+ memsize_node(const xmlNodePtr node)
100
+ {
101
+ /* note we don't count namespace definitions, just going for a good-enough number here */
102
+ xmlNodePtr child;
103
+ size_t memsize = 0;
104
+
105
+ memsize += xmlStrlen(node->name);
106
+ for (child = (xmlNodePtr)node->properties; child; child = child->next) {
107
+ memsize += sizeof(xmlAttr) + memsize_node(child);
108
+ }
109
+ if (node->type == XML_TEXT_NODE) {
110
+ memsize += xmlStrlen(node->content);
111
+ }
112
+ for (child = node->children; child; child = child->next) {
113
+ memsize += sizeof(xmlNode) + memsize_node(child);
114
+ }
115
+ return memsize;
116
+ }
117
+
118
+ static size_t
119
+ memsize(const void *data)
120
+ {
121
+ xmlDocPtr doc = (const xmlDocPtr)data;
122
+ size_t memsize = sizeof(xmlDoc);
123
+ /* This may not account for all memory use */
124
+ memsize += memsize_node((xmlNodePtr)doc);
125
+ return memsize;
126
+ }
127
+
128
+ static const rb_data_type_t noko_xml_document_data_type = {
129
+ .wrap_struct_name = "Nokogiri::XML::Document",
130
+ .function = {
131
+ .dmark = mark,
132
+ .dfree = dealloc,
133
+ .dsize = memsize,
134
+ },
135
+ // .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
136
+ };
137
+
87
138
  static void
88
139
  recursively_remove_namespaces_from_node(xmlNodePtr node)
89
140
  {
@@ -126,8 +177,7 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
126
177
  static VALUE
127
178
  url(VALUE self)
128
179
  {
129
- xmlDocPtr doc;
130
- Data_Get_Struct(self, xmlDoc, doc);
180
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
131
181
 
132
182
  if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
133
183
 
@@ -146,7 +196,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
146
196
  xmlDocPtr c_document;
147
197
  xmlNodePtr c_new_root = NULL, c_current_root;
148
198
 
149
- Data_Get_Struct(self, xmlDoc, c_document);
199
+ c_document = noko_xml_document_unwrap(self);
150
200
 
151
201
  c_current_root = xmlDocGetRootElement(c_document);
152
202
  if (c_current_root) {
@@ -190,7 +240,7 @@ rb_xml_document_root(VALUE self)
190
240
  xmlDocPtr c_document;
191
241
  xmlNodePtr c_root;
192
242
 
193
- Data_Get_Struct(self, xmlDoc, c_document);
243
+ c_document = noko_xml_document_unwrap(self);
194
244
 
195
245
  c_root = xmlDocGetRootElement(c_document);
196
246
  if (!c_root) {
@@ -209,8 +259,7 @@ rb_xml_document_root(VALUE self)
209
259
  static VALUE
210
260
  set_encoding(VALUE self, VALUE encoding)
211
261
  {
212
- xmlDocPtr doc;
213
- Data_Get_Struct(self, xmlDoc, doc);
262
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
214
263
 
215
264
  if (doc->encoding) {
216
265
  xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
@@ -230,8 +279,7 @@ set_encoding(VALUE self, VALUE encoding)
230
279
  static VALUE
231
280
  encoding(VALUE self)
232
281
  {
233
- xmlDocPtr doc;
234
- Data_Get_Struct(self, xmlDoc, doc);
282
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
235
283
 
236
284
  if (!doc->encoding) { return Qnil; }
237
285
  return NOKOGIRI_STR_NEW2(doc->encoding);
@@ -246,8 +294,7 @@ encoding(VALUE self)
246
294
  static VALUE
247
295
  version(VALUE self)
248
296
  {
249
- xmlDocPtr doc;
250
- Data_Get_Struct(self, xmlDoc, doc);
297
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
251
298
 
252
299
  if (!doc->version) { return Qnil; }
253
300
  return NOKOGIRI_STR_NEW2(doc->version);
@@ -369,7 +416,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
369
416
  level = INT2NUM((long)1);
370
417
  }
371
418
 
372
- Data_Get_Struct(self, xmlDoc, doc);
419
+ doc = noko_xml_document_unwrap(self);
373
420
 
374
421
  dup = xmlCopyDoc(doc, (int)NUM2INT(level));
375
422
 
@@ -442,8 +489,7 @@ new (int argc, VALUE *argv, VALUE klass)
442
489
  static VALUE
443
490
  remove_namespaces_bang(VALUE self)
444
491
  {
445
- xmlDocPtr doc ;
446
- Data_Get_Struct(self, xmlDoc, doc);
492
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
447
493
 
448
494
  recursively_remove_namespaces_from_node((xmlNodePtr)doc);
449
495
  return self;
@@ -471,7 +517,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
471
517
  xmlEntityPtr ptr;
472
518
  xmlDocPtr doc ;
473
519
 
474
- Data_Get_Struct(self, xmlDoc, doc);
520
+ doc = noko_xml_document_unwrap(self);
475
521
 
476
522
  rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
477
523
  &content);
@@ -559,7 +605,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
559
605
  }
560
606
  }
561
607
 
562
- Data_Get_Struct(self, xmlDoc, c_doc);
608
+ c_doc = noko_xml_document_unwrap(self);
563
609
 
564
610
  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
565
611
  rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
@@ -607,7 +653,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
607
653
  klass = cNokogiriXmlDocument;
608
654
  }
609
655
 
610
- rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
656
+ rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
611
657
 
612
658
  tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
613
659
  tuple->doc = rb_document;
@@ -640,6 +686,40 @@ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
640
686
  return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
641
687
  }
642
688
 
689
+ xmlDocPtr
690
+ noko_xml_document_unwrap(VALUE rb_document)
691
+ {
692
+ xmlDocPtr c_document;
693
+ TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
694
+ return c_document;
695
+ }
696
+
697
+ /* Schema creation will remove and deallocate "blank" nodes.
698
+ * If those blank nodes have been exposed to Ruby, they could get freed
699
+ * out from under the VALUE pointer. This function checks to see if any of
700
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
701
+ */
702
+ int
703
+ noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
704
+ {
705
+ VALUE cache = DOC_NODE_CACHE(c_document);
706
+
707
+ if (NIL_P(cache)) {
708
+ return 0;
709
+ }
710
+
711
+ for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
712
+ xmlNodePtr node;
713
+ VALUE element = rb_ary_entry(cache, jnode);
714
+
715
+ Noko_Node_Get_Struct(element, xmlNode, node);
716
+ if (xmlIsBlankNode(node)) {
717
+ return 1;
718
+ }
719
+ }
720
+
721
+ return 0;
722
+ }
643
723
 
644
724
  void
645
725
  noko_xml_document_pin_node(xmlNodePtr node)
@@ -19,7 +19,7 @@ new (int argc, VALUE *argv, VALUE klass)
19
19
 
20
20
  rb_scan_args(argc, argv, "1*", &document, &rest);
21
21
 
22
- Data_Get_Struct(document, xmlDoc, xml_doc);
22
+ xml_doc = noko_xml_document_unwrap(document);
23
23
 
24
24
  node = xmlNewDocFragment(xml_doc->doc);
25
25
 
@@ -139,7 +139,7 @@ validate(VALUE self, VALUE document)
139
139
  VALUE error_list;
140
140
 
141
141
  Noko_Node_Get_Struct(self, xmlDtd, dtd);
142
- Noko_Node_Get_Struct(document, xmlDoc, doc);
142
+ doc = noko_xml_document_unwrap(document);
143
143
  error_list = rb_ary_new();
144
144
 
145
145
  ctxt = xmlNewValidCtxt();