nokogiri 1.14.2 → 1.16.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +19 -15
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +69 -26
  6. data/ext/nokogiri/html4_document.c +1 -2
  7. data/ext/nokogiri/html4_element_description.c +19 -14
  8. data/ext/nokogiri/html4_sax_parser_context.c +10 -16
  9. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  10. data/ext/nokogiri/nokogiri.c +46 -24
  11. data/ext/nokogiri/nokogiri.h +23 -5
  12. data/ext/nokogiri/test_global_handlers.c +1 -1
  13. data/ext/nokogiri/xml_attr.c +1 -1
  14. data/ext/nokogiri/xml_cdata.c +30 -17
  15. data/ext/nokogiri/xml_comment.c +1 -1
  16. data/ext/nokogiri/xml_document.c +113 -25
  17. data/ext/nokogiri/xml_document_fragment.c +1 -1
  18. data/ext/nokogiri/xml_dtd.c +1 -1
  19. data/ext/nokogiri/xml_element_content.c +32 -29
  20. data/ext/nokogiri/xml_element_decl.c +5 -5
  21. data/ext/nokogiri/xml_encoding_handler.c +12 -4
  22. data/ext/nokogiri/xml_entity_reference.c +1 -1
  23. data/ext/nokogiri/xml_namespace.c +11 -16
  24. data/ext/nokogiri/xml_node.c +13 -16
  25. data/ext/nokogiri/xml_node_set.c +125 -105
  26. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  27. data/ext/nokogiri/xml_reader.c +61 -74
  28. data/ext/nokogiri/xml_relax_ng.c +66 -79
  29. data/ext/nokogiri/xml_sax_parser.c +24 -5
  30. data/ext/nokogiri/xml_sax_parser_context.c +50 -25
  31. data/ext/nokogiri/xml_sax_push_parser.c +30 -9
  32. data/ext/nokogiri/xml_schema.c +94 -115
  33. data/ext/nokogiri/xml_syntax_error.c +3 -3
  34. data/ext/nokogiri/xml_text.c +26 -13
  35. data/ext/nokogiri/xml_xpath_context.c +153 -83
  36. data/ext/nokogiri/xslt_stylesheet.c +111 -53
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +8 -4
  39. data/gumbo-parser/src/foreign_attrs.c +13 -14
  40. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  41. data/gumbo-parser/src/parser.c +21 -5
  42. data/gumbo-parser/src/tokenizer.c +1 -0
  43. data/lib/nokogiri/css/parser_extras.rb +1 -1
  44. data/lib/nokogiri/css/xpath_visitor.rb +3 -23
  45. data/lib/nokogiri/extension.rb +1 -1
  46. data/lib/nokogiri/html4/document.rb +1 -1
  47. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  48. data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
  49. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  50. data/lib/nokogiri/html5/document_fragment.rb +1 -1
  51. data/lib/nokogiri/html5/node.rb +5 -0
  52. data/lib/nokogiri/html5.rb +0 -63
  53. data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
  54. data/lib/nokogiri/version/constant.rb +1 -1
  55. data/lib/nokogiri/version/info.rb +6 -5
  56. data/lib/nokogiri/xml/attr.rb +2 -2
  57. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  58. data/lib/nokogiri/xml/document.rb +4 -5
  59. data/lib/nokogiri/xml/document_fragment.rb +3 -3
  60. data/lib/nokogiri/xml/element_content.rb +10 -2
  61. data/lib/nokogiri/xml/element_decl.rb +4 -2
  62. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  63. data/lib/nokogiri/xml/namespace.rb +1 -2
  64. data/lib/nokogiri/xml/node/save_options.rb +8 -0
  65. data/lib/nokogiri/xml/node.rb +53 -37
  66. data/lib/nokogiri/xml/node_set.rb +3 -3
  67. data/lib/nokogiri/xml/pp/node.rb +23 -12
  68. data/lib/nokogiri/xml/reader.rb +10 -9
  69. data/lib/nokogiri/xml/sax/document.rb +1 -1
  70. data/lib/nokogiri/xml/searchable.rb +21 -13
  71. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  72. data/lib/nokogiri/xml.rb +1 -1
  73. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  74. data/lib/nokogiri/xslt.rb +74 -4
  75. data/lib/nokogiri.rb +13 -5
  76. data/lib/xsd/xmlparser/nokogiri.rb +1 -1
  77. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  78. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  79. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  80. data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
  81. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  82. metadata +16 -12
  83. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  84. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  85. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -51,6 +51,7 @@
51
51
  #include <libxslt/xsltconfig.h>
52
52
  #include <libxslt/xsltutils.h>
53
53
  #include <libxslt/transform.h>
54
+ #include <libxslt/imports.h>
54
55
  #include <libxslt/xsltInternals.h>
55
56
 
56
57
  #include <libexslt/exslt.h>
@@ -65,6 +66,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
65
66
  #define XMLNS_PREFIX "xmlns"
66
67
  #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
67
68
 
69
+ #ifndef xmlErrorConstPtr
70
+ # if LIBXML_VERSION >= 21200
71
+ # define xmlErrorConstPtr const xmlError *
72
+ # else
73
+ # define xmlErrorConstPtr xmlError *
74
+ # endif
75
+ #endif
68
76
 
69
77
  #include <ruby.h>
70
78
  #include <ruby/st.h>
@@ -168,6 +176,7 @@ typedef struct _nokogiriXsltStylesheetTuple {
168
176
 
169
177
  void noko_xml_document_pin_node(xmlNodePtr);
170
178
  void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
179
+ int noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document);
171
180
 
172
181
  int noko_io_read(void *ctx, char *buffer, int len);
173
182
  int noko_io_write(void *ctx, char *buffer, int len);
@@ -186,12 +195,21 @@ VALUE noko_xml_namespace_wrap_xpath_copy(xmlNsPtr node);
186
195
  VALUE noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element);
187
196
 
188
197
  VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
198
+ xmlNodeSetPtr noko_xml_node_set_unwrap(VALUE rb_node_set) ;
189
199
 
190
200
  VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
191
201
  VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
202
+ xmlDocPtr noko_xml_document_unwrap(VALUE rb_document);
192
203
  NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
193
204
  xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
194
205
 
206
+ xmlSAXHandlerPtr noko_sax_handler_unwrap(VALUE rb_sax_handler);
207
+
208
+ xmlParserCtxtPtr noko_xml_sax_push_parser_unwrap(VALUE rb_parser);
209
+
210
+ VALUE noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context);
211
+ xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context);
212
+
195
213
  #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
196
214
  #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
197
215
  #define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
@@ -207,18 +225,18 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
207
225
  #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
208
226
 
209
227
  #if HAVE_RB_CATEGORY_WARNING
210
- # define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
228
+ # define NOKO_WARN_DEPRECATION(message...) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
211
229
  #else
212
- # define NOKO_WARN_DEPRECATION(message) rb_warning(message)
230
+ # define NOKO_WARN_DEPRECATION(message...) rb_warning(message)
213
231
  #endif
214
232
 
215
233
  void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
216
234
  void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
217
235
  xmlStructuredErrorFunc handler);
218
236
  void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
219
- VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
220
- void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
221
- NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorPtr error);
237
+ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error);
238
+ void Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error);
239
+ NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error);
222
240
  void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
223
241
  const char *function_name) ;
224
242
 
@@ -3,7 +3,7 @@
3
3
  static VALUE foreign_error_handler_block = Qnil;
4
4
 
5
5
  static void
6
- foreign_error_handler(void *user_data, xmlErrorPtr c_error)
6
+ foreign_error_handler(void *user_data, xmlErrorConstPtr c_error)
7
7
  {
8
8
  rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
9
9
  }
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
68
68
  rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
69
69
  }
70
70
 
71
- Noko_Node_Get_Struct(document, xmlDoc, xml_doc);
71
+ xml_doc = noko_xml_document_unwrap(document);
72
72
 
73
73
  node = xmlNewDocProp(
74
74
  xml_doc,
@@ -12,31 +12,44 @@ VALUE cNokogiriXmlCData;
12
12
  * raise a TypeError exception.
13
13
  */
14
14
  static VALUE
15
- new (int argc, VALUE *argv, VALUE klass)
15
+ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
16
16
  {
17
- xmlDocPtr xml_doc;
18
- xmlNodePtr node;
19
- VALUE doc;
20
- VALUE content;
21
- VALUE rest;
17
+ xmlDocPtr c_document;
18
+ xmlNodePtr c_node;
19
+ VALUE rb_document;
20
+ VALUE rb_content;
21
+ VALUE rb_rest;
22
22
  VALUE rb_node;
23
- xmlChar *content_str = NULL;
24
- int content_str_len = 0;
23
+ xmlChar *c_content = NULL;
24
+ int c_content_len = 0;
25
25
 
26
- rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
26
+ rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
27
27
 
28
- Noko_Node_Get_Struct(doc, xmlDoc, xml_doc);
28
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
29
+ rb_raise(rb_eTypeError,
30
+ "expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
31
+ rb_obj_class(rb_document));
32
+ }
33
+
34
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
35
+ xmlNodePtr deprecated_node_type_arg;
36
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
37
+ Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
38
+ c_document = deprecated_node_type_arg->doc;
39
+ } else {
40
+ c_document = noko_xml_document_unwrap(rb_document);
41
+ }
29
42
 
30
- if (!NIL_P(content)) {
31
- content_str = (xmlChar *)StringValuePtr(content);
32
- content_str_len = RSTRING_LENINT(content);
43
+ if (!NIL_P(rb_content)) {
44
+ c_content = (xmlChar *)StringValuePtr(rb_content);
45
+ c_content_len = RSTRING_LENINT(rb_content);
33
46
  }
34
47
 
35
- node = xmlNewCDataBlock(xml_doc->doc, content_str, content_str_len);
48
+ c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
36
49
 
37
- noko_xml_document_pin_node(node);
50
+ noko_xml_document_pin_node(c_node);
38
51
 
39
- rb_node = noko_xml_node_wrap(klass, node);
52
+ rb_node = noko_xml_node_wrap(klass, c_node);
40
53
  rb_obj_call_init(rb_node, argc, argv);
41
54
 
42
55
  if (rb_block_given_p()) { rb_yield(rb_node); }
@@ -53,5 +66,5 @@ noko_init_xml_cdata(void)
53
66
  */
54
67
  cNokogiriXmlCData = rb_define_class_under(mNokogiriXml, "CDATA", cNokogiriXmlText);
55
68
 
56
- rb_define_singleton_method(cNokogiriXmlCData, "new", new, -1);
69
+ rb_define_singleton_method(cNokogiriXmlCData, "new", rb_xml_cdata_s_new, -1);
57
70
  }
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
30
30
  rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
31
31
  }
32
32
 
33
- Data_Get_Struct(document, xmlDoc, xml_doc);
33
+ xml_doc = noko_xml_document_unwrap(document);
34
34
 
35
35
  node = xmlNewDocComment(
36
36
  xml_doc,
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
51
51
  }
52
52
 
53
53
  static void
54
- mark(xmlDocPtr doc)
54
+ mark(void *data)
55
55
  {
56
+ xmlDocPtr doc = (xmlDocPtr)data;
56
57
  nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
57
58
  if (tuple) {
58
59
  rb_gc_mark(tuple->doc);
@@ -61,8 +62,9 @@ mark(xmlDocPtr doc)
61
62
  }
62
63
 
63
64
  static void
64
- dealloc(xmlDocPtr doc)
65
+ dealloc(void *data)
65
66
  {
67
+ xmlDocPtr doc = (xmlDocPtr)data;
66
68
  st_table *node_hash;
67
69
 
68
70
  node_hash = DOC_UNLINKED_NODE_HASH(doc);
@@ -72,18 +74,75 @@ dealloc(xmlDocPtr doc)
72
74
 
73
75
  ruby_xfree(doc->_private);
74
76
 
75
- /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
76
- * have their _private pointers cleared. This is to avoid libxml-ruby's
77
- * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
78
- * free context, which can result in segfaults.
77
+ #if defined(__GNUC__) && __GNUC__ >= 5
78
+ #pragma GCC diagnostic push
79
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
80
+ #endif
81
+ /*
82
+ * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
83
+ * versions, the registered callback from libxml-ruby will access the _private pointers set by
84
+ * nokogiri, which will result in segfaults.
85
+ *
86
+ * To avoid this, we need to clear the _private pointers from all nodes in this document tree
87
+ * before that callback gets invoked.
88
+ *
89
+ * libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
90
+ * safeguard (though probably pairing with a runtime check on the libxml-ruby version).
79
91
  */
80
92
  if (xmlDeregisterNodeDefaultValue) {
81
93
  remove_private((xmlNodePtr)doc);
82
94
  }
95
+ #if defined(__GNUC__) && __GNUC__ >= 5
96
+ #pragma GCC diagnostic pop
97
+ #endif
83
98
 
84
99
  xmlFreeDoc(doc);
85
100
  }
86
101
 
102
+ static size_t
103
+ memsize_node(const xmlNodePtr node)
104
+ {
105
+ /* note we don't count namespace definitions, just going for a good-enough number here */
106
+ xmlNodePtr child;
107
+ xmlAttrPtr property;
108
+ size_t memsize = 0;
109
+
110
+ memsize += xmlStrlen(node->name);
111
+
112
+ if (node->type == XML_ELEMENT_NODE) {
113
+ for (property = node->properties; property; property = property->next) {
114
+ memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
115
+ }
116
+ }
117
+ if (node->type == XML_TEXT_NODE) {
118
+ memsize += xmlStrlen(node->content);
119
+ }
120
+ for (child = node->children; child; child = child->next) {
121
+ memsize += sizeof(xmlNode) + memsize_node(child);
122
+ }
123
+ return memsize;
124
+ }
125
+
126
+ static size_t
127
+ memsize(const void *data)
128
+ {
129
+ xmlDocPtr doc = (const xmlDocPtr)data;
130
+ size_t memsize = sizeof(xmlDoc);
131
+ /* This may not account for all memory use */
132
+ memsize += memsize_node((xmlNodePtr)doc);
133
+ return memsize;
134
+ }
135
+
136
+ static const rb_data_type_t noko_xml_document_data_type = {
137
+ .wrap_struct_name = "Nokogiri::XML::Document",
138
+ .function = {
139
+ .dmark = mark,
140
+ .dfree = dealloc,
141
+ .dsize = memsize,
142
+ },
143
+ // .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
144
+ };
145
+
87
146
  static void
88
147
  recursively_remove_namespaces_from_node(xmlNodePtr node)
89
148
  {
@@ -126,8 +185,7 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
126
185
  static VALUE
127
186
  url(VALUE self)
128
187
  {
129
- xmlDocPtr doc;
130
- Data_Get_Struct(self, xmlDoc, doc);
188
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
131
189
 
132
190
  if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
133
191
 
@@ -146,7 +204,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
146
204
  xmlDocPtr c_document;
147
205
  xmlNodePtr c_new_root = NULL, c_current_root;
148
206
 
149
- Data_Get_Struct(self, xmlDoc, c_document);
207
+ c_document = noko_xml_document_unwrap(self);
150
208
 
151
209
  c_current_root = xmlDocGetRootElement(c_document);
152
210
  if (c_current_root) {
@@ -190,7 +248,7 @@ rb_xml_document_root(VALUE self)
190
248
  xmlDocPtr c_document;
191
249
  xmlNodePtr c_root;
192
250
 
193
- Data_Get_Struct(self, xmlDoc, c_document);
251
+ c_document = noko_xml_document_unwrap(self);
194
252
 
195
253
  c_root = xmlDocGetRootElement(c_document);
196
254
  if (!c_root) {
@@ -209,8 +267,7 @@ rb_xml_document_root(VALUE self)
209
267
  static VALUE
210
268
  set_encoding(VALUE self, VALUE encoding)
211
269
  {
212
- xmlDocPtr doc;
213
- Data_Get_Struct(self, xmlDoc, doc);
270
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
214
271
 
215
272
  if (doc->encoding) {
216
273
  xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
@@ -230,8 +287,7 @@ set_encoding(VALUE self, VALUE encoding)
230
287
  static VALUE
231
288
  encoding(VALUE self)
232
289
  {
233
- xmlDocPtr doc;
234
- Data_Get_Struct(self, xmlDoc, doc);
290
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
235
291
 
236
292
  if (!doc->encoding) { return Qnil; }
237
293
  return NOKOGIRI_STR_NEW2(doc->encoding);
@@ -246,8 +302,7 @@ encoding(VALUE self)
246
302
  static VALUE
247
303
  version(VALUE self)
248
304
  {
249
- xmlDocPtr doc;
250
- Data_Get_Struct(self, xmlDoc, doc);
305
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
251
306
 
252
307
  if (!doc->version) { return Qnil; }
253
308
  return NOKOGIRI_STR_NEW2(doc->version);
@@ -286,7 +341,7 @@ read_io(VALUE klass,
286
341
  xmlSetStructuredErrorFunc(NULL, NULL);
287
342
 
288
343
  if (doc == NULL) {
289
- xmlErrorPtr error;
344
+ xmlErrorConstPtr error;
290
345
 
291
346
  xmlFreeDoc(doc);
292
347
 
@@ -332,7 +387,7 @@ read_memory(VALUE klass,
332
387
  xmlSetStructuredErrorFunc(NULL, NULL);
333
388
 
334
389
  if (doc == NULL) {
335
- xmlErrorPtr error;
390
+ xmlErrorConstPtr error;
336
391
 
337
392
  xmlFreeDoc(doc);
338
393
 
@@ -369,7 +424,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
369
424
  level = INT2NUM((long)1);
370
425
  }
371
426
 
372
- Data_Get_Struct(self, xmlDoc, doc);
427
+ doc = noko_xml_document_unwrap(self);
373
428
 
374
429
  dup = xmlCopyDoc(doc, (int)NUM2INT(level));
375
430
 
@@ -442,8 +497,7 @@ new (int argc, VALUE *argv, VALUE klass)
442
497
  static VALUE
443
498
  remove_namespaces_bang(VALUE self)
444
499
  {
445
- xmlDocPtr doc ;
446
- Data_Get_Struct(self, xmlDoc, doc);
500
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
447
501
 
448
502
  recursively_remove_namespaces_from_node((xmlNodePtr)doc);
449
503
  return self;
@@ -471,7 +525,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
471
525
  xmlEntityPtr ptr;
472
526
  xmlDocPtr doc ;
473
527
 
474
- Data_Get_Struct(self, xmlDoc, doc);
528
+ doc = noko_xml_document_unwrap(self);
475
529
 
476
530
  rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
477
531
  &content);
@@ -487,7 +541,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
487
541
  );
488
542
 
489
543
  if (NULL == ptr) {
490
- xmlErrorPtr error = xmlGetLastError();
544
+ xmlErrorConstPtr error = xmlGetLastError();
491
545
  if (error) {
492
546
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
493
547
  } else {
@@ -559,7 +613,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
559
613
  }
560
614
  }
561
615
 
562
- Data_Get_Struct(self, xmlDoc, c_doc);
616
+ c_doc = noko_xml_document_unwrap(self);
563
617
 
564
618
  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
565
619
  rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
@@ -607,7 +661,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
607
661
  klass = cNokogiriXmlDocument;
608
662
  }
609
663
 
610
- rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
664
+ rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
611
665
 
612
666
  tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
613
667
  tuple->doc = rb_document;
@@ -640,6 +694,40 @@ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
640
694
  return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
641
695
  }
642
696
 
697
+ xmlDocPtr
698
+ noko_xml_document_unwrap(VALUE rb_document)
699
+ {
700
+ xmlDocPtr c_document;
701
+ TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
702
+ return c_document;
703
+ }
704
+
705
+ /* Schema creation will remove and deallocate "blank" nodes.
706
+ * If those blank nodes have been exposed to Ruby, they could get freed
707
+ * out from under the VALUE pointer. This function checks to see if any of
708
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
709
+ */
710
+ int
711
+ noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
712
+ {
713
+ VALUE cache = DOC_NODE_CACHE(c_document);
714
+
715
+ if (NIL_P(cache)) {
716
+ return 0;
717
+ }
718
+
719
+ for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
720
+ xmlNodePtr node;
721
+ VALUE element = rb_ary_entry(cache, jnode);
722
+
723
+ Noko_Node_Get_Struct(element, xmlNode, node);
724
+ if (xmlIsBlankNode(node)) {
725
+ return 1;
726
+ }
727
+ }
728
+
729
+ return 0;
730
+ }
643
731
 
644
732
  void
645
733
  noko_xml_document_pin_node(xmlNodePtr node)
@@ -19,7 +19,7 @@ new (int argc, VALUE *argv, VALUE klass)
19
19
 
20
20
  rb_scan_args(argc, argv, "1*", &document, &rest);
21
21
 
22
- Data_Get_Struct(document, xmlDoc, xml_doc);
22
+ xml_doc = noko_xml_document_unwrap(document);
23
23
 
24
24
  node = xmlNewDocFragment(xml_doc->doc);
25
25
 
@@ -139,7 +139,7 @@ validate(VALUE self, VALUE document)
139
139
  VALUE error_list;
140
140
 
141
141
  Noko_Node_Get_Struct(self, xmlDtd, dtd);
142
- Noko_Node_Get_Struct(document, xmlDoc, doc);
142
+ doc = noko_xml_document_unwrap(document);
143
143
  error_list = rb_ary_new();
144
144
 
145
145
  ctxt = xmlNewValidCtxt();
@@ -2,17 +2,22 @@
2
2
 
3
3
  VALUE cNokogiriXmlElementContent;
4
4
 
5
+ static const rb_data_type_t element_content_data_type = {
6
+ .wrap_struct_name = "Nokogiri::XML::ElementContent",
7
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
8
+ };
9
+
5
10
  /*
6
11
  * call-seq:
7
- * name
12
+ * name → String
8
13
  *
9
- * Get the require element +name+
14
+ * [Returns] The content element's +name+
10
15
  */
11
16
  static VALUE
12
17
  get_name(VALUE self)
13
18
  {
14
19
  xmlElementContentPtr elem;
15
- Data_Get_Struct(self, xmlElementContent, elem);
20
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
16
21
 
17
22
  if (!elem->name) { return Qnil; }
18
23
  return NOKOGIRI_STR_NEW2(elem->name);
@@ -20,47 +25,40 @@ get_name(VALUE self)
20
25
 
21
26
  /*
22
27
  * call-seq:
23
- * type
28
+ * type → Integer
24
29
  *
25
- * Get the element content +type+. Possible values are PCDATA, ELEMENT, SEQ,
26
- * or OR.
30
+ * [Returns] The content element's +type+. Possible values are +PCDATA+, +ELEMENT+, +SEQ+, or +OR+.
27
31
  */
28
32
  static VALUE
29
33
  get_type(VALUE self)
30
34
  {
31
35
  xmlElementContentPtr elem;
32
- Data_Get_Struct(self, xmlElementContent, elem);
36
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
33
37
 
34
38
  return INT2NUM(elem->type);
35
39
  }
36
40
 
37
41
  /*
38
- * call-seq:
39
- * c1
40
- *
41
42
  * Get the first child.
42
43
  */
43
44
  static VALUE
44
45
  get_c1(VALUE self)
45
46
  {
46
47
  xmlElementContentPtr elem;
47
- Data_Get_Struct(self, xmlElementContent, elem);
48
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
48
49
 
49
50
  if (!elem->c1) { return Qnil; }
50
51
  return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c1);
51
52
  }
52
53
 
53
54
  /*
54
- * call-seq:
55
- * c2
56
- *
57
- * Get the first child.
55
+ * Get the second child.
58
56
  */
59
57
  static VALUE
60
58
  get_c2(VALUE self)
61
59
  {
62
60
  xmlElementContentPtr elem;
63
- Data_Get_Struct(self, xmlElementContent, elem);
61
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
64
62
 
65
63
  if (!elem->c2) { return Qnil; }
66
64
  return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c2);
@@ -68,45 +66,50 @@ get_c2(VALUE self)
68
66
 
69
67
  /*
70
68
  * call-seq:
71
- * occur
69
+ * occur → Integer
72
70
  *
73
- * Get the element content +occur+ flag. Possible values are ONCE, OPT, MULT
74
- * or PLUS.
71
+ * [Returns] The content element's +occur+ flag. Possible values are +ONCE+, +OPT+, +MULT+ or +PLUS+.
75
72
  */
76
73
  static VALUE
77
74
  get_occur(VALUE self)
78
75
  {
79
76
  xmlElementContentPtr elem;
80
- Data_Get_Struct(self, xmlElementContent, elem);
77
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
81
78
 
82
79
  return INT2NUM(elem->ocur);
83
80
  }
84
81
 
85
82
  /*
86
83
  * call-seq:
87
- * prefix
84
+ * prefix → String
88
85
  *
89
- * Get the element content namespace +prefix+.
86
+ * [Returns] The content element's namespace +prefix+.
90
87
  */
91
88
  static VALUE
92
89
  get_prefix(VALUE self)
93
90
  {
94
91
  xmlElementContentPtr elem;
95
- Data_Get_Struct(self, xmlElementContent, elem);
92
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
96
93
 
97
94
  if (!elem->prefix) { return Qnil; }
98
95
 
99
96
  return NOKOGIRI_STR_NEW2(elem->prefix);
100
97
  }
101
98
 
99
+ /*
100
+ * create a Nokogiri::XML::ElementContent object around an +element+.
101
+ */
102
102
  VALUE
103
- noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element)
103
+ noko_xml_element_content_wrap(VALUE rb_document, xmlElementContentPtr c_element_content)
104
104
  {
105
- VALUE elem = Data_Wrap_Struct(cNokogiriXmlElementContent, 0, 0, element);
106
-
107
- /* Setting the document is necessary so that this does not get GC'd until */
108
- /* the document is GC'd */
109
- rb_iv_set(elem, "@document", doc);
105
+ VALUE elem = TypedData_Wrap_Struct(
106
+ cNokogiriXmlElementContent,
107
+ &element_content_data_type,
108
+ c_element_content
109
+ );
110
+
111
+ /* keep a handle on the document for GC marking */
112
+ rb_iv_set(elem, "@document", rb_document);
110
113
 
111
114
  return elem;
112
115
  }
@@ -6,7 +6,7 @@ static ID id_document;
6
6
 
7
7
  /*
8
8
  * call-seq:
9
- * element_type
9
+ * element_type → Integer
10
10
  *
11
11
  * The element_type
12
12
  */
@@ -20,9 +20,9 @@ element_type(VALUE self)
20
20
 
21
21
  /*
22
22
  * call-seq:
23
- * content
23
+ * content → Nokogiri::XML::ElementContent
24
24
  *
25
- * The allowed content for this ElementDecl
25
+ * [Returns] The root of this element declaration's content tree.
26
26
  */
27
27
  static VALUE
28
28
  content(VALUE self)
@@ -40,9 +40,9 @@ content(VALUE self)
40
40
 
41
41
  /*
42
42
  * call-seq:
43
- * prefix
43
+ * prefix → String
44
44
  *
45
- * The namespace prefix for this ElementDecl
45
+ * [Returns] The namespace +prefix+ for this element declaration.
46
46
  */
47
47
  static VALUE
48
48
  prefix(VALUE self)
@@ -2,14 +2,22 @@
2
2
 
3
3
  VALUE cNokogiriEncodingHandler;
4
4
 
5
-
6
5
  static void
7
- _xml_encoding_handler_dealloc(xmlCharEncodingHandlerPtr c_handler)
6
+ xml_encoding_handler_dealloc(void *data)
8
7
  {
9
8
  /* make sure iconv handlers are cleaned up and freed */
9
+ xmlCharEncodingHandlerPtr c_handler = data;
10
10
  xmlCharEncCloseFunc(c_handler);
11
11
  }
12
12
 
13
+ static const rb_data_type_t xml_encoding_handler_type = {
14
+ .wrap_struct_name = "Nokogiri::EncodingHandler",
15
+ .function = {
16
+ .dfree = xml_encoding_handler_dealloc,
17
+ },
18
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
19
+ };
20
+
13
21
 
14
22
  /*
15
23
  * call-seq: Nokogiri::EncodingHandler.[](name)
@@ -23,7 +31,7 @@ rb_xml_encoding_handler_s_get(VALUE klass, VALUE key)
23
31
 
24
32
  handler = xmlFindCharEncodingHandler(StringValueCStr(key));
25
33
  if (handler) {
26
- return Data_Wrap_Struct(klass, NULL, _xml_encoding_handler_dealloc, handler);
34
+ return TypedData_Wrap_Struct(klass, &xml_encoding_handler_type, handler);
27
35
  }
28
36
 
29
37
  return Qnil;
@@ -82,7 +90,7 @@ rb_xml_encoding_handler_name(VALUE self)
82
90
  {
83
91
  xmlCharEncodingHandlerPtr handler;
84
92
 
85
- Data_Get_Struct(self, xmlCharEncodingHandler, handler);
93
+ TypedData_Get_Struct(self, xmlCharEncodingHandler, &xml_encoding_handler_type, handler);
86
94
 
87
95
  return NOKOGIRI_STR_NEW2(handler->name);
88
96
  }