nokogiri 1.11.0.rc4 → 1.11.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +12 -12
  4. data/LICENSE.md +1 -1
  5. data/README.md +168 -91
  6. data/ext/nokogiri/depend +34 -474
  7. data/ext/nokogiri/extconf.rb +270 -183
  8. data/ext/nokogiri/html_document.c +10 -15
  9. data/ext/nokogiri/html_element_description.c +84 -71
  10. data/ext/nokogiri/html_entity_lookup.c +21 -16
  11. data/ext/nokogiri/html_sax_parser_context.c +67 -64
  12. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  13. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  14. data/ext/nokogiri/nokogiri.c +171 -60
  15. data/ext/nokogiri/nokogiri.h +158 -75
  16. data/ext/nokogiri/test_global_handlers.c +40 -0
  17. data/ext/nokogiri/xml_attr.c +15 -15
  18. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  19. data/ext/nokogiri/xml_cdata.c +13 -18
  20. data/ext/nokogiri/xml_comment.c +19 -26
  21. data/ext/nokogiri/xml_document.c +246 -188
  22. data/ext/nokogiri/xml_document_fragment.c +13 -15
  23. data/ext/nokogiri/xml_dtd.c +54 -48
  24. data/ext/nokogiri/xml_element_content.c +30 -27
  25. data/ext/nokogiri/xml_element_decl.c +22 -22
  26. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  27. data/ext/nokogiri/xml_entity_decl.c +32 -30
  28. data/ext/nokogiri/xml_entity_reference.c +16 -18
  29. data/ext/nokogiri/xml_namespace.c +56 -49
  30. data/ext/nokogiri/xml_node.c +371 -320
  31. data/ext/nokogiri/xml_node_set.c +168 -156
  32. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  33. data/ext/nokogiri/xml_reader.c +191 -157
  34. data/ext/nokogiri/xml_relax_ng.c +29 -23
  35. data/ext/nokogiri/xml_sax_parser.c +117 -112
  36. data/ext/nokogiri/xml_sax_parser_context.c +101 -84
  37. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  38. data/ext/nokogiri/xml_schema.c +48 -42
  39. data/ext/nokogiri/xml_syntax_error.c +42 -21
  40. data/ext/nokogiri/xml_text.c +13 -17
  41. data/ext/nokogiri/xml_xpath_context.c +134 -127
  42. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  43. data/lib/nokogiri/css/parser.rb +1 -1
  44. data/lib/nokogiri/extension.rb +26 -0
  45. data/lib/nokogiri/html/document_fragment.rb +15 -15
  46. data/lib/nokogiri/version/constant.rb +1 -1
  47. data/lib/nokogiri/version/info.rb +31 -8
  48. data/lib/nokogiri/xml/document.rb +74 -28
  49. data/lib/nokogiri/xml/node.rb +39 -42
  50. data/lib/nokogiri/xml/reader.rb +2 -9
  51. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  52. data/lib/nokogiri/xml/xpath.rb +1 -3
  53. data/lib/nokogiri.rb +2 -6
  54. data/patches/libxml2/0009-avoid-isnan-isinf.patch +81 -0
  55. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +70 -0
  56. data/patches/libxml2/0011-update-automake-files-for-arm64.patch +2511 -0
  57. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  58. metadata +66 -160
  59. data/ext/nokogiri/html_document.h +0 -10
  60. data/ext/nokogiri/html_element_description.h +0 -10
  61. data/ext/nokogiri/html_entity_lookup.h +0 -8
  62. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  63. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  64. data/ext/nokogiri/xml_attr.h +0 -9
  65. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  66. data/ext/nokogiri/xml_cdata.h +0 -9
  67. data/ext/nokogiri/xml_comment.h +0 -9
  68. data/ext/nokogiri/xml_document.h +0 -23
  69. data/ext/nokogiri/xml_document_fragment.h +0 -10
  70. data/ext/nokogiri/xml_dtd.h +0 -10
  71. data/ext/nokogiri/xml_element_content.h +0 -10
  72. data/ext/nokogiri/xml_element_decl.h +0 -9
  73. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  74. data/ext/nokogiri/xml_entity_decl.h +0 -10
  75. data/ext/nokogiri/xml_entity_reference.h +0 -9
  76. data/ext/nokogiri/xml_io.c +0 -63
  77. data/ext/nokogiri/xml_io.h +0 -11
  78. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  79. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  80. data/ext/nokogiri/xml_namespace.h +0 -14
  81. data/ext/nokogiri/xml_node.h +0 -13
  82. data/ext/nokogiri/xml_node_set.h +0 -12
  83. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  84. data/ext/nokogiri/xml_reader.h +0 -10
  85. data/ext/nokogiri/xml_relax_ng.h +0 -9
  86. data/ext/nokogiri/xml_sax_parser.h +0 -39
  87. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  88. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  89. data/ext/nokogiri/xml_schema.h +0 -9
  90. data/ext/nokogiri/xml_syntax_error.h +0 -13
  91. data/ext/nokogiri/xml_text.h +0 -9
  92. data/ext/nokogiri/xml_xpath_context.h +0 -10
  93. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,4 +1,6 @@
1
- #include <xml_entity_reference.h>
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlEntityReference;
2
4
 
3
5
  /*
4
6
  * call-seq:
@@ -6,7 +8,8 @@
6
8
  *
7
9
  * Create a new EntityReference element on the +document+ with +name+
8
10
  */
9
- static VALUE new(int argc, VALUE *argv, VALUE klass)
11
+ static VALUE
12
+ new (int argc, VALUE *argv, VALUE klass)
10
13
  {
11
14
  xmlDocPtr xml_doc;
12
15
  xmlNodePtr node;
@@ -20,33 +23,28 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
20
23
  Data_Get_Struct(document, xmlDoc, xml_doc);
21
24
 
22
25
  node = xmlNewReference(
23
- xml_doc,
24
- (const xmlChar *)StringValueCStr(name)
25
- );
26
+ xml_doc,
27
+ (const xmlChar *)StringValueCStr(name)
28
+ );
26
29
 
27
- nokogiri_root_node(node);
30
+ noko_xml_document_pin_node(node);
28
31
 
29
- rb_node = Nokogiri_wrap_xml_node(klass, node);
32
+ rb_node = noko_xml_node_wrap(klass, node);
30
33
  rb_obj_call_init(rb_node, argc, argv);
31
34
 
32
- if(rb_block_given_p()) rb_yield(rb_node);
35
+ if (rb_block_given_p()) { rb_yield(rb_node); }
33
36
 
34
37
  return rb_node;
35
38
  }
36
39
 
37
- VALUE cNokogiriXmlEntityReference;
38
- void init_xml_entity_reference()
40
+ void
41
+ noko_init_xml_entity_reference()
39
42
  {
40
- VALUE nokogiri = rb_define_module("Nokogiri");
41
- VALUE xml = rb_define_module_under(nokogiri, "XML");
42
- VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
43
-
43
+ assert(cNokogiriXmlNode);
44
44
  /*
45
45
  * EntityReference represents an EntityReference node in an xml document.
46
46
  */
47
- VALUE klass = rb_define_class_under(xml, "EntityReference", node);
48
-
49
- cNokogiriXmlEntityReference = klass;
47
+ cNokogiriXmlEntityReference = rb_define_class_under(mNokogiriXml, "EntityReference", cNokogiriXmlNode);
50
48
 
51
- rb_define_singleton_method(klass, "new", new, -1);
49
+ rb_define_singleton_method(cNokogiriXmlEntityReference, "new", new, -1);
52
50
  }
@@ -1,16 +1,35 @@
1
- #include <xml_namespace.h>
1
+ #include <nokogiri.h>
2
+
3
+ /*
4
+ * The lifecycle of a Namespace node is more complicated than other Nodes, for two reasons:
5
+ *
6
+ * 1. the underlying C structure has a different layout than all the other node structs, with the
7
+ * `_private` member where we store a pointer to Ruby object data not being in first position.
8
+ * 2. xmlNs structures returned in an xmlNodeset from an XPath query are copies of the document's
9
+ * namespaces, and so do not share the same memory lifecycle as everything else in a document.
10
+ *
11
+ * As a result of 1, you may see special handling of XML_NAMESPACE_DECL node types throughout the
12
+ * Nokogiri C code, though I intend to wrap up that logic in ruby_object_{get,set} functions
13
+ * shortly.
14
+ *
15
+ * As a result of 2, you will see we have special handling in this file and in xml_node_set.c to
16
+ * carefully manage the memory lifecycle of xmlNs structs to match the Ruby object's GC
17
+ * lifecycle. In xml_node_set.c we have local versions of xmlXPathNodeSetDel() and
18
+ * xmlXPathFreeNodeSet() that avoid freeing xmlNs structs in the node set. In this file, we decide
19
+ * whether or not to call dealloc_namespace() depending on whether the xmlNs struct appears to be
20
+ * in an xmlNodeSet (and thus the result of an XPath query) or not.
21
+ *
22
+ * Yes, this is madness.
23
+ */
2
24
 
3
25
  VALUE cNokogiriXmlNamespace ;
4
26
 
5
- static void dealloc_namespace(xmlNsPtr ns)
27
+ static void
28
+ dealloc_namespace(xmlNsPtr ns)
6
29
  {
7
30
  /*
8
- *
9
31
  * this deallocator is only used for namespace nodes that are part of an xpath
10
- * node set.
11
- *
12
- * see Nokogiri_wrap_xml_namespace() for more details.
13
- *
32
+ * node set. see noko_xml_namespace_wrap().
14
33
  */
15
34
  NOKOGIRI_DEBUG_START(ns) ;
16
35
  if (ns->href) {
@@ -30,12 +49,13 @@ static void dealloc_namespace(xmlNsPtr ns)
30
49
  *
31
50
  * Get the prefix for this namespace. Returns +nil+ if there is no prefix.
32
51
  */
33
- static VALUE prefix(VALUE self)
52
+ static VALUE
53
+ prefix(VALUE self)
34
54
  {
35
55
  xmlNsPtr ns;
36
56
 
37
57
  Data_Get_Struct(self, xmlNs, ns);
38
- if(!ns->prefix) return Qnil;
58
+ if (!ns->prefix) { return Qnil; }
39
59
 
40
60
  return NOKOGIRI_STR_NEW2(ns->prefix);
41
61
  }
@@ -46,66 +66,53 @@ static VALUE prefix(VALUE self)
46
66
  *
47
67
  * Get the href for this namespace
48
68
  */
49
- static VALUE href(VALUE self)
69
+ static VALUE
70
+ href(VALUE self)
50
71
  {
51
72
  xmlNsPtr ns;
52
73
 
53
74
  Data_Get_Struct(self, xmlNs, ns);
54
- if(!ns->href) return Qnil;
75
+ if (!ns->href) { return Qnil; }
55
76
 
56
77
  return NOKOGIRI_STR_NEW2(ns->href);
57
78
  }
58
79
 
59
- static int part_of_an_xpath_node_set_eh(xmlNsPtr node)
60
- {
61
- return (node->next && ! NOKOGIRI_NAMESPACE_EH(node->next));
62
- }
63
-
64
- VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node)
80
+ VALUE
81
+ noko_xml_namespace_wrap(xmlNsPtr c_namespace, xmlDocPtr c_document)
65
82
  {
66
- VALUE ns = 0, document, node_cache;
67
-
68
- assert(doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE);
69
-
70
- if (node->_private) return (VALUE)node->_private;
83
+ VALUE rb_namespace;
71
84
 
72
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
85
+ if (c_namespace->_private) {
86
+ return (VALUE)c_namespace->_private;
87
+ }
73
88
 
74
- if (DOC_RUBY_OBJECT_TEST(doc)) {
75
- document = DOC_RUBY_OBJECT(doc);
89
+ if (c_document) {
90
+ rb_namespace = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, c_namespace);
76
91
 
77
- if (part_of_an_xpath_node_set_eh(node)) {
78
- /*
79
- * this is a duplicate returned as part of an xpath query node set, and so
80
- * we need to make sure we manage this memory.
81
- *
82
- * see comments in xml_node_set.c for more details.
83
- */
84
- ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, dealloc_namespace, node);
85
- } else {
86
- ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node);
87
- node_cache = rb_iv_get(document, "@node_cache");
88
- rb_ary_push(node_cache, ns);
92
+ if (DOC_RUBY_OBJECT_TEST(c_document)) {
93
+ rb_iv_set(rb_namespace, "@document", DOC_RUBY_OBJECT(c_document));
94
+ rb_ary_push(DOC_NODE_CACHE(c_document), rb_namespace);
89
95
  }
90
-
91
- rb_iv_set(ns, "@document", document);
92
96
  } else {
93
- ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node);
97
+ rb_namespace = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, dealloc_namespace, c_namespace);
94
98
  }
95
99
 
96
- node->_private = (void *)ns;
100
+ c_namespace->_private = (void *)rb_namespace;
97
101
 
98
- return ns;
102
+ return rb_namespace;
99
103
  }
100
104
 
101
- void init_xml_namespace()
105
+ VALUE
106
+ noko_xml_namespace_wrap_xpath_copy(xmlNsPtr c_namespace)
102
107
  {
103
- VALUE nokogiri = rb_define_module("Nokogiri");
104
- VALUE xml = rb_define_module_under(nokogiri, "XML");
105
- VALUE klass = rb_define_class_under(xml, "Namespace", rb_cObject);
108
+ return noko_xml_namespace_wrap(c_namespace, NULL);
109
+ }
106
110
 
107
- cNokogiriXmlNamespace = klass;
111
+ void
112
+ noko_init_xml_namespace()
113
+ {
114
+ cNokogiriXmlNamespace = rb_define_class_under(mNokogiriXml, "Namespace", rb_cObject);
108
115
 
109
- rb_define_method(klass, "prefix", prefix, 0);
110
- rb_define_method(klass, "href", href, 0);
116
+ rb_define_method(cNokogiriXmlNamespace, "prefix", prefix, 0);
117
+ rb_define_method(cNokogiriXmlNamespace, "href", href, 0);
111
118
  }