nokogiri 1.14.2 → 1.16.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +19 -15
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +69 -26
  6. data/ext/nokogiri/html4_document.c +1 -2
  7. data/ext/nokogiri/html4_element_description.c +19 -14
  8. data/ext/nokogiri/html4_sax_parser_context.c +10 -16
  9. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  10. data/ext/nokogiri/nokogiri.c +46 -24
  11. data/ext/nokogiri/nokogiri.h +23 -5
  12. data/ext/nokogiri/test_global_handlers.c +1 -1
  13. data/ext/nokogiri/xml_attr.c +1 -1
  14. data/ext/nokogiri/xml_cdata.c +30 -17
  15. data/ext/nokogiri/xml_comment.c +1 -1
  16. data/ext/nokogiri/xml_document.c +113 -25
  17. data/ext/nokogiri/xml_document_fragment.c +1 -1
  18. data/ext/nokogiri/xml_dtd.c +1 -1
  19. data/ext/nokogiri/xml_element_content.c +32 -29
  20. data/ext/nokogiri/xml_element_decl.c +5 -5
  21. data/ext/nokogiri/xml_encoding_handler.c +12 -4
  22. data/ext/nokogiri/xml_entity_reference.c +1 -1
  23. data/ext/nokogiri/xml_namespace.c +11 -16
  24. data/ext/nokogiri/xml_node.c +13 -16
  25. data/ext/nokogiri/xml_node_set.c +125 -105
  26. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  27. data/ext/nokogiri/xml_reader.c +61 -74
  28. data/ext/nokogiri/xml_relax_ng.c +66 -79
  29. data/ext/nokogiri/xml_sax_parser.c +24 -5
  30. data/ext/nokogiri/xml_sax_parser_context.c +50 -25
  31. data/ext/nokogiri/xml_sax_push_parser.c +30 -9
  32. data/ext/nokogiri/xml_schema.c +94 -115
  33. data/ext/nokogiri/xml_syntax_error.c +3 -3
  34. data/ext/nokogiri/xml_text.c +26 -13
  35. data/ext/nokogiri/xml_xpath_context.c +153 -83
  36. data/ext/nokogiri/xslt_stylesheet.c +111 -53
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +8 -4
  39. data/gumbo-parser/src/foreign_attrs.c +13 -14
  40. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  41. data/gumbo-parser/src/parser.c +21 -5
  42. data/gumbo-parser/src/tokenizer.c +1 -0
  43. data/lib/nokogiri/css/parser_extras.rb +1 -1
  44. data/lib/nokogiri/css/xpath_visitor.rb +3 -23
  45. data/lib/nokogiri/extension.rb +1 -1
  46. data/lib/nokogiri/html4/document.rb +1 -1
  47. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  48. data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
  49. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  50. data/lib/nokogiri/html5/document_fragment.rb +1 -1
  51. data/lib/nokogiri/html5/node.rb +5 -0
  52. data/lib/nokogiri/html5.rb +0 -63
  53. data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
  54. data/lib/nokogiri/version/constant.rb +1 -1
  55. data/lib/nokogiri/version/info.rb +6 -5
  56. data/lib/nokogiri/xml/attr.rb +2 -2
  57. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  58. data/lib/nokogiri/xml/document.rb +4 -5
  59. data/lib/nokogiri/xml/document_fragment.rb +3 -3
  60. data/lib/nokogiri/xml/element_content.rb +10 -2
  61. data/lib/nokogiri/xml/element_decl.rb +4 -2
  62. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  63. data/lib/nokogiri/xml/namespace.rb +1 -2
  64. data/lib/nokogiri/xml/node/save_options.rb +8 -0
  65. data/lib/nokogiri/xml/node.rb +53 -37
  66. data/lib/nokogiri/xml/node_set.rb +3 -3
  67. data/lib/nokogiri/xml/pp/node.rb +23 -12
  68. data/lib/nokogiri/xml/reader.rb +10 -9
  69. data/lib/nokogiri/xml/sax/document.rb +1 -1
  70. data/lib/nokogiri/xml/searchable.rb +21 -13
  71. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  72. data/lib/nokogiri/xml.rb +1 -1
  73. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  74. data/lib/nokogiri/xslt.rb +74 -4
  75. data/lib/nokogiri.rb +13 -5
  76. data/lib/xsd/xmlparser/nokogiri.rb +1 -1
  77. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  78. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  79. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  80. data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
  81. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  82. metadata +16 -12
  83. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  84. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  85. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -51,6 +51,7 @@
51
51
  #include <libxslt/xsltconfig.h>
52
52
  #include <libxslt/xsltutils.h>
53
53
  #include <libxslt/transform.h>
54
+ #include <libxslt/imports.h>
54
55
  #include <libxslt/xsltInternals.h>
55
56
 
56
57
  #include <libexslt/exslt.h>
@@ -65,6 +66,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
65
66
  #define XMLNS_PREFIX "xmlns"
66
67
  #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
67
68
 
69
+ #ifndef xmlErrorConstPtr
70
+ # if LIBXML_VERSION >= 21200
71
+ # define xmlErrorConstPtr const xmlError *
72
+ # else
73
+ # define xmlErrorConstPtr xmlError *
74
+ # endif
75
+ #endif
68
76
 
69
77
  #include <ruby.h>
70
78
  #include <ruby/st.h>
@@ -168,6 +176,7 @@ typedef struct _nokogiriXsltStylesheetTuple {
168
176
 
169
177
  void noko_xml_document_pin_node(xmlNodePtr);
170
178
  void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
179
+ int noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document);
171
180
 
172
181
  int noko_io_read(void *ctx, char *buffer, int len);
173
182
  int noko_io_write(void *ctx, char *buffer, int len);
@@ -186,12 +195,21 @@ VALUE noko_xml_namespace_wrap_xpath_copy(xmlNsPtr node);
186
195
  VALUE noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element);
187
196
 
188
197
  VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
198
+ xmlNodeSetPtr noko_xml_node_set_unwrap(VALUE rb_node_set) ;
189
199
 
190
200
  VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
191
201
  VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
202
+ xmlDocPtr noko_xml_document_unwrap(VALUE rb_document);
192
203
  NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
193
204
  xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
194
205
 
206
+ xmlSAXHandlerPtr noko_sax_handler_unwrap(VALUE rb_sax_handler);
207
+
208
+ xmlParserCtxtPtr noko_xml_sax_push_parser_unwrap(VALUE rb_parser);
209
+
210
+ VALUE noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context);
211
+ xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context);
212
+
195
213
  #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
196
214
  #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
197
215
  #define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
@@ -207,18 +225,18 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
207
225
  #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
208
226
 
209
227
  #if HAVE_RB_CATEGORY_WARNING
210
- # define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
228
+ # define NOKO_WARN_DEPRECATION(message...) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
211
229
  #else
212
- # define NOKO_WARN_DEPRECATION(message) rb_warning(message)
230
+ # define NOKO_WARN_DEPRECATION(message...) rb_warning(message)
213
231
  #endif
214
232
 
215
233
  void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
216
234
  void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
217
235
  xmlStructuredErrorFunc handler);
218
236
  void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
219
- VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
220
- void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
221
- NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorPtr error);
237
+ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error);
238
+ void Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error);
239
+ NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error);
222
240
  void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
223
241
  const char *function_name) ;
224
242
 
@@ -3,7 +3,7 @@
3
3
  static VALUE foreign_error_handler_block = Qnil;
4
4
 
5
5
  static void
6
- foreign_error_handler(void *user_data, xmlErrorPtr c_error)
6
+ foreign_error_handler(void *user_data, xmlErrorConstPtr c_error)
7
7
  {
8
8
  rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
9
9
  }
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
68
68
  rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
69
69
  }
70
70
 
71
- Noko_Node_Get_Struct(document, xmlDoc, xml_doc);
71
+ xml_doc = noko_xml_document_unwrap(document);
72
72
 
73
73
  node = xmlNewDocProp(
74
74
  xml_doc,
@@ -12,31 +12,44 @@ VALUE cNokogiriXmlCData;
12
12
  * raise a TypeError exception.
13
13
  */
14
14
  static VALUE
15
- new (int argc, VALUE *argv, VALUE klass)
15
+ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
16
16
  {
17
- xmlDocPtr xml_doc;
18
- xmlNodePtr node;
19
- VALUE doc;
20
- VALUE content;
21
- VALUE rest;
17
+ xmlDocPtr c_document;
18
+ xmlNodePtr c_node;
19
+ VALUE rb_document;
20
+ VALUE rb_content;
21
+ VALUE rb_rest;
22
22
  VALUE rb_node;
23
- xmlChar *content_str = NULL;
24
- int content_str_len = 0;
23
+ xmlChar *c_content = NULL;
24
+ int c_content_len = 0;
25
25
 
26
- rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
26
+ rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
27
27
 
28
- Noko_Node_Get_Struct(doc, xmlDoc, xml_doc);
28
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
29
+ rb_raise(rb_eTypeError,
30
+ "expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
31
+ rb_obj_class(rb_document));
32
+ }
33
+
34
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
35
+ xmlNodePtr deprecated_node_type_arg;
36
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
37
+ Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
38
+ c_document = deprecated_node_type_arg->doc;
39
+ } else {
40
+ c_document = noko_xml_document_unwrap(rb_document);
41
+ }
29
42
 
30
- if (!NIL_P(content)) {
31
- content_str = (xmlChar *)StringValuePtr(content);
32
- content_str_len = RSTRING_LENINT(content);
43
+ if (!NIL_P(rb_content)) {
44
+ c_content = (xmlChar *)StringValuePtr(rb_content);
45
+ c_content_len = RSTRING_LENINT(rb_content);
33
46
  }
34
47
 
35
- node = xmlNewCDataBlock(xml_doc->doc, content_str, content_str_len);
48
+ c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
36
49
 
37
- noko_xml_document_pin_node(node);
50
+ noko_xml_document_pin_node(c_node);
38
51
 
39
- rb_node = noko_xml_node_wrap(klass, node);
52
+ rb_node = noko_xml_node_wrap(klass, c_node);
40
53
  rb_obj_call_init(rb_node, argc, argv);
41
54
 
42
55
  if (rb_block_given_p()) { rb_yield(rb_node); }
@@ -53,5 +66,5 @@ noko_init_xml_cdata(void)
53
66
  */
54
67
  cNokogiriXmlCData = rb_define_class_under(mNokogiriXml, "CDATA", cNokogiriXmlText);
55
68
 
56
- rb_define_singleton_method(cNokogiriXmlCData, "new", new, -1);
69
+ rb_define_singleton_method(cNokogiriXmlCData, "new", rb_xml_cdata_s_new, -1);
57
70
  }
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
30
30
  rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
31
31
  }
32
32
 
33
- Data_Get_Struct(document, xmlDoc, xml_doc);
33
+ xml_doc = noko_xml_document_unwrap(document);
34
34
 
35
35
  node = xmlNewDocComment(
36
36
  xml_doc,
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
51
51
  }
52
52
 
53
53
  static void
54
- mark(xmlDocPtr doc)
54
+ mark(void *data)
55
55
  {
56
+ xmlDocPtr doc = (xmlDocPtr)data;
56
57
  nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
57
58
  if (tuple) {
58
59
  rb_gc_mark(tuple->doc);
@@ -61,8 +62,9 @@ mark(xmlDocPtr doc)
61
62
  }
62
63
 
63
64
  static void
64
- dealloc(xmlDocPtr doc)
65
+ dealloc(void *data)
65
66
  {
67
+ xmlDocPtr doc = (xmlDocPtr)data;
66
68
  st_table *node_hash;
67
69
 
68
70
  node_hash = DOC_UNLINKED_NODE_HASH(doc);
@@ -72,18 +74,75 @@ dealloc(xmlDocPtr doc)
72
74
 
73
75
  ruby_xfree(doc->_private);
74
76
 
75
- /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
76
- * have their _private pointers cleared. This is to avoid libxml-ruby's
77
- * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
78
- * free context, which can result in segfaults.
77
+ #if defined(__GNUC__) && __GNUC__ >= 5
78
+ #pragma GCC diagnostic push
79
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
80
+ #endif
81
+ /*
82
+ * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
83
+ * versions, the registered callback from libxml-ruby will access the _private pointers set by
84
+ * nokogiri, which will result in segfaults.
85
+ *
86
+ * To avoid this, we need to clear the _private pointers from all nodes in this document tree
87
+ * before that callback gets invoked.
88
+ *
89
+ * libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
90
+ * safeguard (though probably pairing with a runtime check on the libxml-ruby version).
79
91
  */
80
92
  if (xmlDeregisterNodeDefaultValue) {
81
93
  remove_private((xmlNodePtr)doc);
82
94
  }
95
+ #if defined(__GNUC__) && __GNUC__ >= 5
96
+ #pragma GCC diagnostic pop
97
+ #endif
83
98
 
84
99
  xmlFreeDoc(doc);
85
100
  }
86
101
 
102
+ static size_t
103
+ memsize_node(const xmlNodePtr node)
104
+ {
105
+ /* note we don't count namespace definitions, just going for a good-enough number here */
106
+ xmlNodePtr child;
107
+ xmlAttrPtr property;
108
+ size_t memsize = 0;
109
+
110
+ memsize += xmlStrlen(node->name);
111
+
112
+ if (node->type == XML_ELEMENT_NODE) {
113
+ for (property = node->properties; property; property = property->next) {
114
+ memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
115
+ }
116
+ }
117
+ if (node->type == XML_TEXT_NODE) {
118
+ memsize += xmlStrlen(node->content);
119
+ }
120
+ for (child = node->children; child; child = child->next) {
121
+ memsize += sizeof(xmlNode) + memsize_node(child);
122
+ }
123
+ return memsize;
124
+ }
125
+
126
+ static size_t
127
+ memsize(const void *data)
128
+ {
129
+ xmlDocPtr doc = (const xmlDocPtr)data;
130
+ size_t memsize = sizeof(xmlDoc);
131
+ /* This may not account for all memory use */
132
+ memsize += memsize_node((xmlNodePtr)doc);
133
+ return memsize;
134
+ }
135
+
136
+ static const rb_data_type_t noko_xml_document_data_type = {
137
+ .wrap_struct_name = "Nokogiri::XML::Document",
138
+ .function = {
139
+ .dmark = mark,
140
+ .dfree = dealloc,
141
+ .dsize = memsize,
142
+ },
143
+ // .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
144
+ };
145
+
87
146
  static void
88
147
  recursively_remove_namespaces_from_node(xmlNodePtr node)
89
148
  {
@@ -126,8 +185,7 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
126
185
  static VALUE
127
186
  url(VALUE self)
128
187
  {
129
- xmlDocPtr doc;
130
- Data_Get_Struct(self, xmlDoc, doc);
188
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
131
189
 
132
190
  if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
133
191
 
@@ -146,7 +204,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
146
204
  xmlDocPtr c_document;
147
205
  xmlNodePtr c_new_root = NULL, c_current_root;
148
206
 
149
- Data_Get_Struct(self, xmlDoc, c_document);
207
+ c_document = noko_xml_document_unwrap(self);
150
208
 
151
209
  c_current_root = xmlDocGetRootElement(c_document);
152
210
  if (c_current_root) {
@@ -190,7 +248,7 @@ rb_xml_document_root(VALUE self)
190
248
  xmlDocPtr c_document;
191
249
  xmlNodePtr c_root;
192
250
 
193
- Data_Get_Struct(self, xmlDoc, c_document);
251
+ c_document = noko_xml_document_unwrap(self);
194
252
 
195
253
  c_root = xmlDocGetRootElement(c_document);
196
254
  if (!c_root) {
@@ -209,8 +267,7 @@ rb_xml_document_root(VALUE self)
209
267
  static VALUE
210
268
  set_encoding(VALUE self, VALUE encoding)
211
269
  {
212
- xmlDocPtr doc;
213
- Data_Get_Struct(self, xmlDoc, doc);
270
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
214
271
 
215
272
  if (doc->encoding) {
216
273
  xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
@@ -230,8 +287,7 @@ set_encoding(VALUE self, VALUE encoding)
230
287
  static VALUE
231
288
  encoding(VALUE self)
232
289
  {
233
- xmlDocPtr doc;
234
- Data_Get_Struct(self, xmlDoc, doc);
290
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
235
291
 
236
292
  if (!doc->encoding) { return Qnil; }
237
293
  return NOKOGIRI_STR_NEW2(doc->encoding);
@@ -246,8 +302,7 @@ encoding(VALUE self)
246
302
  static VALUE
247
303
  version(VALUE self)
248
304
  {
249
- xmlDocPtr doc;
250
- Data_Get_Struct(self, xmlDoc, doc);
305
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
251
306
 
252
307
  if (!doc->version) { return Qnil; }
253
308
  return NOKOGIRI_STR_NEW2(doc->version);
@@ -286,7 +341,7 @@ read_io(VALUE klass,
286
341
  xmlSetStructuredErrorFunc(NULL, NULL);
287
342
 
288
343
  if (doc == NULL) {
289
- xmlErrorPtr error;
344
+ xmlErrorConstPtr error;
290
345
 
291
346
  xmlFreeDoc(doc);
292
347
 
@@ -332,7 +387,7 @@ read_memory(VALUE klass,
332
387
  xmlSetStructuredErrorFunc(NULL, NULL);
333
388
 
334
389
  if (doc == NULL) {
335
- xmlErrorPtr error;
390
+ xmlErrorConstPtr error;
336
391
 
337
392
  xmlFreeDoc(doc);
338
393
 
@@ -369,7 +424,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
369
424
  level = INT2NUM((long)1);
370
425
  }
371
426
 
372
- Data_Get_Struct(self, xmlDoc, doc);
427
+ doc = noko_xml_document_unwrap(self);
373
428
 
374
429
  dup = xmlCopyDoc(doc, (int)NUM2INT(level));
375
430
 
@@ -442,8 +497,7 @@ new (int argc, VALUE *argv, VALUE klass)
442
497
  static VALUE
443
498
  remove_namespaces_bang(VALUE self)
444
499
  {
445
- xmlDocPtr doc ;
446
- Data_Get_Struct(self, xmlDoc, doc);
500
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
447
501
 
448
502
  recursively_remove_namespaces_from_node((xmlNodePtr)doc);
449
503
  return self;
@@ -471,7 +525,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
471
525
  xmlEntityPtr ptr;
472
526
  xmlDocPtr doc ;
473
527
 
474
- Data_Get_Struct(self, xmlDoc, doc);
528
+ doc = noko_xml_document_unwrap(self);
475
529
 
476
530
  rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
477
531
  &content);
@@ -487,7 +541,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
487
541
  );
488
542
 
489
543
  if (NULL == ptr) {
490
- xmlErrorPtr error = xmlGetLastError();
544
+ xmlErrorConstPtr error = xmlGetLastError();
491
545
  if (error) {
492
546
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
493
547
  } else {
@@ -559,7 +613,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
559
613
  }
560
614
  }
561
615
 
562
- Data_Get_Struct(self, xmlDoc, c_doc);
616
+ c_doc = noko_xml_document_unwrap(self);
563
617
 
564
618
  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
565
619
  rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
@@ -607,7 +661,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
607
661
  klass = cNokogiriXmlDocument;
608
662
  }
609
663
 
610
- rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
664
+ rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
611
665
 
612
666
  tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
613
667
  tuple->doc = rb_document;
@@ -640,6 +694,40 @@ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
640
694
  return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
641
695
  }
642
696
 
697
+ xmlDocPtr
698
+ noko_xml_document_unwrap(VALUE rb_document)
699
+ {
700
+ xmlDocPtr c_document;
701
+ TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
702
+ return c_document;
703
+ }
704
+
705
+ /* Schema creation will remove and deallocate "blank" nodes.
706
+ * If those blank nodes have been exposed to Ruby, they could get freed
707
+ * out from under the VALUE pointer. This function checks to see if any of
708
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
709
+ */
710
+ int
711
+ noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
712
+ {
713
+ VALUE cache = DOC_NODE_CACHE(c_document);
714
+
715
+ if (NIL_P(cache)) {
716
+ return 0;
717
+ }
718
+
719
+ for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
720
+ xmlNodePtr node;
721
+ VALUE element = rb_ary_entry(cache, jnode);
722
+
723
+ Noko_Node_Get_Struct(element, xmlNode, node);
724
+ if (xmlIsBlankNode(node)) {
725
+ return 1;
726
+ }
727
+ }
728
+
729
+ return 0;
730
+ }
643
731
 
644
732
  void
645
733
  noko_xml_document_pin_node(xmlNodePtr node)
@@ -19,7 +19,7 @@ new (int argc, VALUE *argv, VALUE klass)
19
19
 
20
20
  rb_scan_args(argc, argv, "1*", &document, &rest);
21
21
 
22
- Data_Get_Struct(document, xmlDoc, xml_doc);
22
+ xml_doc = noko_xml_document_unwrap(document);
23
23
 
24
24
  node = xmlNewDocFragment(xml_doc->doc);
25
25
 
@@ -139,7 +139,7 @@ validate(VALUE self, VALUE document)
139
139
  VALUE error_list;
140
140
 
141
141
  Noko_Node_Get_Struct(self, xmlDtd, dtd);
142
- Noko_Node_Get_Struct(document, xmlDoc, doc);
142
+ doc = noko_xml_document_unwrap(document);
143
143
  error_list = rb_ary_new();
144
144
 
145
145
  ctxt = xmlNewValidCtxt();
@@ -2,17 +2,22 @@
2
2
 
3
3
  VALUE cNokogiriXmlElementContent;
4
4
 
5
+ static const rb_data_type_t element_content_data_type = {
6
+ .wrap_struct_name = "Nokogiri::XML::ElementContent",
7
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
8
+ };
9
+
5
10
  /*
6
11
  * call-seq:
7
- * name
12
+ * name → String
8
13
  *
9
- * Get the require element +name+
14
+ * [Returns] The content element's +name+
10
15
  */
11
16
  static VALUE
12
17
  get_name(VALUE self)
13
18
  {
14
19
  xmlElementContentPtr elem;
15
- Data_Get_Struct(self, xmlElementContent, elem);
20
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
16
21
 
17
22
  if (!elem->name) { return Qnil; }
18
23
  return NOKOGIRI_STR_NEW2(elem->name);
@@ -20,47 +25,40 @@ get_name(VALUE self)
20
25
 
21
26
  /*
22
27
  * call-seq:
23
- * type
28
+ * type → Integer
24
29
  *
25
- * Get the element content +type+. Possible values are PCDATA, ELEMENT, SEQ,
26
- * or OR.
30
+ * [Returns] The content element's +type+. Possible values are +PCDATA+, +ELEMENT+, +SEQ+, or +OR+.
27
31
  */
28
32
  static VALUE
29
33
  get_type(VALUE self)
30
34
  {
31
35
  xmlElementContentPtr elem;
32
- Data_Get_Struct(self, xmlElementContent, elem);
36
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
33
37
 
34
38
  return INT2NUM(elem->type);
35
39
  }
36
40
 
37
41
  /*
38
- * call-seq:
39
- * c1
40
- *
41
42
  * Get the first child.
42
43
  */
43
44
  static VALUE
44
45
  get_c1(VALUE self)
45
46
  {
46
47
  xmlElementContentPtr elem;
47
- Data_Get_Struct(self, xmlElementContent, elem);
48
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
48
49
 
49
50
  if (!elem->c1) { return Qnil; }
50
51
  return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c1);
51
52
  }
52
53
 
53
54
  /*
54
- * call-seq:
55
- * c2
56
- *
57
- * Get the first child.
55
+ * Get the second child.
58
56
  */
59
57
  static VALUE
60
58
  get_c2(VALUE self)
61
59
  {
62
60
  xmlElementContentPtr elem;
63
- Data_Get_Struct(self, xmlElementContent, elem);
61
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
64
62
 
65
63
  if (!elem->c2) { return Qnil; }
66
64
  return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c2);
@@ -68,45 +66,50 @@ get_c2(VALUE self)
68
66
 
69
67
  /*
70
68
  * call-seq:
71
- * occur
69
+ * occur → Integer
72
70
  *
73
- * Get the element content +occur+ flag. Possible values are ONCE, OPT, MULT
74
- * or PLUS.
71
+ * [Returns] The content element's +occur+ flag. Possible values are +ONCE+, +OPT+, +MULT+ or +PLUS+.
75
72
  */
76
73
  static VALUE
77
74
  get_occur(VALUE self)
78
75
  {
79
76
  xmlElementContentPtr elem;
80
- Data_Get_Struct(self, xmlElementContent, elem);
77
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
81
78
 
82
79
  return INT2NUM(elem->ocur);
83
80
  }
84
81
 
85
82
  /*
86
83
  * call-seq:
87
- * prefix
84
+ * prefix → String
88
85
  *
89
- * Get the element content namespace +prefix+.
86
+ * [Returns] The content element's namespace +prefix+.
90
87
  */
91
88
  static VALUE
92
89
  get_prefix(VALUE self)
93
90
  {
94
91
  xmlElementContentPtr elem;
95
- Data_Get_Struct(self, xmlElementContent, elem);
92
+ TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
96
93
 
97
94
  if (!elem->prefix) { return Qnil; }
98
95
 
99
96
  return NOKOGIRI_STR_NEW2(elem->prefix);
100
97
  }
101
98
 
99
+ /*
100
+ * create a Nokogiri::XML::ElementContent object around an +element+.
101
+ */
102
102
  VALUE
103
- noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element)
103
+ noko_xml_element_content_wrap(VALUE rb_document, xmlElementContentPtr c_element_content)
104
104
  {
105
- VALUE elem = Data_Wrap_Struct(cNokogiriXmlElementContent, 0, 0, element);
106
-
107
- /* Setting the document is necessary so that this does not get GC'd until */
108
- /* the document is GC'd */
109
- rb_iv_set(elem, "@document", doc);
105
+ VALUE elem = TypedData_Wrap_Struct(
106
+ cNokogiriXmlElementContent,
107
+ &element_content_data_type,
108
+ c_element_content
109
+ );
110
+
111
+ /* keep a handle on the document for GC marking */
112
+ rb_iv_set(elem, "@document", rb_document);
110
113
 
111
114
  return elem;
112
115
  }
@@ -6,7 +6,7 @@ static ID id_document;
6
6
 
7
7
  /*
8
8
  * call-seq:
9
- * element_type
9
+ * element_type → Integer
10
10
  *
11
11
  * The element_type
12
12
  */
@@ -20,9 +20,9 @@ element_type(VALUE self)
20
20
 
21
21
  /*
22
22
  * call-seq:
23
- * content
23
+ * content → Nokogiri::XML::ElementContent
24
24
  *
25
- * The allowed content for this ElementDecl
25
+ * [Returns] The root of this element declaration's content tree.
26
26
  */
27
27
  static VALUE
28
28
  content(VALUE self)
@@ -40,9 +40,9 @@ content(VALUE self)
40
40
 
41
41
  /*
42
42
  * call-seq:
43
- * prefix
43
+ * prefix → String
44
44
  *
45
- * The namespace prefix for this ElementDecl
45
+ * [Returns] The namespace +prefix+ for this element declaration.
46
46
  */
47
47
  static VALUE
48
48
  prefix(VALUE self)
@@ -2,14 +2,22 @@
2
2
 
3
3
  VALUE cNokogiriEncodingHandler;
4
4
 
5
-
6
5
  static void
7
- _xml_encoding_handler_dealloc(xmlCharEncodingHandlerPtr c_handler)
6
+ xml_encoding_handler_dealloc(void *data)
8
7
  {
9
8
  /* make sure iconv handlers are cleaned up and freed */
9
+ xmlCharEncodingHandlerPtr c_handler = data;
10
10
  xmlCharEncCloseFunc(c_handler);
11
11
  }
12
12
 
13
+ static const rb_data_type_t xml_encoding_handler_type = {
14
+ .wrap_struct_name = "Nokogiri::EncodingHandler",
15
+ .function = {
16
+ .dfree = xml_encoding_handler_dealloc,
17
+ },
18
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
19
+ };
20
+
13
21
 
14
22
  /*
15
23
  * call-seq: Nokogiri::EncodingHandler.[](name)
@@ -23,7 +31,7 @@ rb_xml_encoding_handler_s_get(VALUE klass, VALUE key)
23
31
 
24
32
  handler = xmlFindCharEncodingHandler(StringValueCStr(key));
25
33
  if (handler) {
26
- return Data_Wrap_Struct(klass, NULL, _xml_encoding_handler_dealloc, handler);
34
+ return TypedData_Wrap_Struct(klass, &xml_encoding_handler_type, handler);
27
35
  }
28
36
 
29
37
  return Qnil;
@@ -82,7 +90,7 @@ rb_xml_encoding_handler_name(VALUE self)
82
90
  {
83
91
  xmlCharEncodingHandlerPtr handler;
84
92
 
85
- Data_Get_Struct(self, xmlCharEncodingHandler, handler);
93
+ TypedData_Get_Struct(self, xmlCharEncodingHandler, &xml_encoding_handler_type, handler);
86
94
 
87
95
  return NOKOGIRI_STR_NEW2(handler->name);
88
96
  }