nokogiri 1.11.2 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (127) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +243 -22
  3. data/LICENSE.md +1 -1
  4. data/README.md +7 -6
  5. data/dependencies.yml +12 -12
  6. data/ext/nokogiri/depend +35 -34
  7. data/ext/nokogiri/extconf.rb +185 -103
  8. data/ext/nokogiri/gumbo.c +584 -0
  9. data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
  10. data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
  11. data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
  12. data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +6 -5
  13. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
  14. data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
  15. data/ext/nokogiri/nokogiri.c +70 -38
  16. data/ext/nokogiri/nokogiri.h +19 -9
  17. data/ext/nokogiri/xml_document.c +50 -49
  18. data/ext/nokogiri/xml_element_content.c +2 -0
  19. data/ext/nokogiri/xml_encoding_handler.c +11 -6
  20. data/ext/nokogiri/xml_namespace.c +4 -2
  21. data/ext/nokogiri/xml_node.c +147 -133
  22. data/ext/nokogiri/xml_node_set.c +20 -20
  23. data/ext/nokogiri/xml_reader.c +2 -0
  24. data/ext/nokogiri/xml_sax_parser.c +6 -6
  25. data/ext/nokogiri/xml_sax_parser_context.c +2 -0
  26. data/ext/nokogiri/xml_schema.c +2 -0
  27. data/ext/nokogiri/xml_xpath_context.c +67 -65
  28. data/ext/nokogiri/xslt_stylesheet.c +2 -1
  29. data/gumbo-parser/CHANGES.md +63 -0
  30. data/gumbo-parser/Makefile +101 -0
  31. data/gumbo-parser/THANKS +27 -0
  32. data/gumbo-parser/src/Makefile +34 -0
  33. data/gumbo-parser/src/README.md +41 -0
  34. data/gumbo-parser/src/ascii.c +75 -0
  35. data/gumbo-parser/src/ascii.h +115 -0
  36. data/gumbo-parser/src/attribute.c +42 -0
  37. data/gumbo-parser/src/attribute.h +17 -0
  38. data/gumbo-parser/src/char_ref.c +22225 -0
  39. data/gumbo-parser/src/char_ref.h +29 -0
  40. data/gumbo-parser/src/char_ref.rl +2154 -0
  41. data/gumbo-parser/src/error.c +626 -0
  42. data/gumbo-parser/src/error.h +148 -0
  43. data/gumbo-parser/src/foreign_attrs.c +104 -0
  44. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  45. data/gumbo-parser/src/gumbo.h +943 -0
  46. data/gumbo-parser/src/insertion_mode.h +33 -0
  47. data/gumbo-parser/src/macros.h +91 -0
  48. data/gumbo-parser/src/parser.c +4886 -0
  49. data/gumbo-parser/src/parser.h +41 -0
  50. data/gumbo-parser/src/replacement.h +33 -0
  51. data/gumbo-parser/src/string_buffer.c +103 -0
  52. data/gumbo-parser/src/string_buffer.h +68 -0
  53. data/gumbo-parser/src/string_piece.c +48 -0
  54. data/gumbo-parser/src/svg_attrs.c +174 -0
  55. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  56. data/gumbo-parser/src/svg_tags.c +137 -0
  57. data/gumbo-parser/src/svg_tags.gperf +55 -0
  58. data/gumbo-parser/src/tag.c +222 -0
  59. data/gumbo-parser/src/tag_lookup.c +382 -0
  60. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  61. data/gumbo-parser/src/tag_lookup.h +13 -0
  62. data/gumbo-parser/src/token_buffer.c +79 -0
  63. data/gumbo-parser/src/token_buffer.h +71 -0
  64. data/gumbo-parser/src/token_type.h +17 -0
  65. data/gumbo-parser/src/tokenizer.c +3463 -0
  66. data/gumbo-parser/src/tokenizer.h +112 -0
  67. data/gumbo-parser/src/tokenizer_states.h +339 -0
  68. data/gumbo-parser/src/utf8.c +245 -0
  69. data/gumbo-parser/src/utf8.h +164 -0
  70. data/gumbo-parser/src/util.c +68 -0
  71. data/gumbo-parser/src/util.h +30 -0
  72. data/gumbo-parser/src/vector.c +111 -0
  73. data/gumbo-parser/src/vector.h +45 -0
  74. data/lib/nokogiri/css/parser.rb +1 -1
  75. data/lib/nokogiri/css/parser.y +1 -1
  76. data/lib/nokogiri/css/syntax_error.rb +1 -1
  77. data/lib/nokogiri/css.rb +14 -14
  78. data/lib/nokogiri/extension.rb +7 -2
  79. data/lib/nokogiri/gumbo.rb +14 -0
  80. data/lib/nokogiri/html.rb +31 -27
  81. data/lib/nokogiri/{html → html4}/builder.rb +2 -2
  82. data/lib/nokogiri/{html → html4}/document.rb +4 -4
  83. data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
  84. data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
  85. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
  86. data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
  87. data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
  88. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  89. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
  90. data/lib/nokogiri/html4.rb +40 -0
  91. data/lib/nokogiri/html5/document.rb +74 -0
  92. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  93. data/lib/nokogiri/html5/node.rb +93 -0
  94. data/lib/nokogiri/html5.rb +473 -0
  95. data/lib/nokogiri/version/constant.rb +1 -1
  96. data/lib/nokogiri/version/info.rb +12 -2
  97. data/lib/nokogiri/xml/builder.rb +38 -0
  98. data/lib/nokogiri/xml/document.rb +89 -17
  99. data/lib/nokogiri/xml/node/save_options.rb +1 -1
  100. data/lib/nokogiri/xml/node.rb +7 -5
  101. data/lib/nokogiri/xml/parse_options.rb +2 -0
  102. data/lib/nokogiri/xml/pp.rb +2 -2
  103. data/lib/nokogiri/xml/sax/document.rb +24 -30
  104. data/lib/nokogiri/xml/sax.rb +4 -4
  105. data/lib/nokogiri/xml/xpath.rb +2 -2
  106. data/lib/nokogiri/xml.rb +35 -36
  107. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  108. data/lib/nokogiri/xslt.rb +16 -16
  109. data/lib/nokogiri.rb +31 -29
  110. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  111. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  112. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  113. data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +0 -0
  114. data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
  115. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  116. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  117. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  118. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  119. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  120. metadata +110 -69
  121. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
  122. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  123. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  124. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
  125. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
  126. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
  127. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -1,17 +1,17 @@
1
1
  #include <nokogiri.h>
2
2
 
3
- static VALUE cNokogiriHtmlEntityLookup;
3
+ static VALUE cNokogiriHtml4EntityLookup;
4
4
 
5
5
  /*
6
6
  * call-seq:
7
7
  * get(key)
8
8
  *
9
- * Get the HTML::EntityDescription for +key+
9
+ * Get the HTML4::EntityDescription for +key+
10
10
  */
11
11
  static VALUE
12
12
  get(VALUE _, VALUE rb_entity_name)
13
13
  {
14
- VALUE cNokogiriHtmlEntityDescription;
14
+ VALUE cNokogiriHtml4EntityDescription;
15
15
  const htmlEntityDesc *c_entity_desc;
16
16
  VALUE rb_constructor_args[3];
17
17
 
@@ -24,14 +24,14 @@ get(VALUE _, VALUE rb_entity_name)
24
24
  rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
25
25
  rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
26
26
 
27
- cNokogiriHtmlEntityDescription = rb_const_get_at(mNokogiriHtml, rb_intern("EntityDescription"));
28
- return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtmlEntityDescription);
27
+ cNokogiriHtml4EntityDescription = rb_const_get_at(mNokogiriHtml4, rb_intern("EntityDescription"));
28
+ return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtml4EntityDescription);
29
29
  }
30
30
 
31
31
  void
32
32
  noko_init_html_entity_lookup()
33
33
  {
34
- cNokogiriHtmlEntityLookup = rb_define_class_under(mNokogiriHtml, "EntityLookup", rb_cObject);
34
+ cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject);
35
35
 
36
- rb_define_method(cNokogiriHtmlEntityLookup, "get", get, 1);
36
+ rb_define_method(cNokogiriHtml4EntityLookup, "get", get, 1);
37
37
  }
@@ -1,6 +1,6 @@
1
1
  #include <nokogiri.h>
2
2
 
3
- VALUE cNokogiriHtmlSaxParserContext ;
3
+ VALUE cNokogiriHtml4SaxParserContext ;
4
4
 
5
5
  static void
6
6
  deallocate(xmlParserCtxtPtr ctxt)
@@ -110,10 +110,11 @@ void
110
110
  noko_init_html_sax_parser_context()
111
111
  {
112
112
  assert(cNokogiriXmlSaxParserContext);
113
- cNokogiriHtmlSaxParserContext = rb_define_class_under(mNokogiriHtmlSax, "ParserContext", cNokogiriXmlSaxParserContext);
113
+ cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext",
114
+ cNokogiriXmlSaxParserContext);
114
115
 
115
- rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "memory", parse_memory, 2);
116
- rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "file", parse_file, 2);
116
+ rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "memory", parse_memory, 2);
117
+ rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "file", parse_file, 2);
117
118
 
118
- rb_define_method(cNokogiriHtmlSaxParserContext, "parse_with", parse_with, 1);
119
+ rb_define_method(cNokogiriHtml4SaxParserContext, "parse_with", parse_with, 1);
119
120
  }
@@ -1,6 +1,6 @@
1
1
  #include <nokogiri.h>
2
2
 
3
- VALUE cNokogiriHtmlSaxPushParser;
3
+ VALUE cNokogiriHtml4SaxPushParser;
4
4
 
5
5
  /*
6
6
  * call-seq:
@@ -88,8 +88,8 @@ void
88
88
  noko_init_html_sax_push_parser()
89
89
  {
90
90
  assert(cNokogiriXmlSaxPushParser);
91
- cNokogiriHtmlSaxPushParser = rb_define_class_under(mNokogiriHtmlSax, "PushParser", cNokogiriXmlSaxPushParser);
91
+ cNokogiriHtml4SaxPushParser = rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
92
92
 
93
- rb_define_private_method(cNokogiriHtmlSaxPushParser, "initialize_native", initialize_native, 3);
94
- rb_define_private_method(cNokogiriHtmlSaxPushParser, "native_write", native_write, 2);
93
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native", initialize_native, 3);
94
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write", native_write, 2);
95
95
  }
@@ -20,14 +20,14 @@ xmlFirstElementChild(xmlNodePtr parent)
20
20
  return (NULL);
21
21
  }
22
22
  switch (parent->type) {
23
- case XML_ELEMENT_NODE:
24
- case XML_ENTITY_NODE:
25
- case XML_DOCUMENT_NODE:
26
- case XML_HTML_DOCUMENT_NODE:
27
- cur = parent->children;
28
- break;
29
- default:
30
- return (NULL);
23
+ case XML_ELEMENT_NODE:
24
+ case XML_ENTITY_NODE:
25
+ case XML_DOCUMENT_NODE:
26
+ case XML_HTML_DOCUMENT_NODE:
27
+ cur = parent->children;
28
+ break;
29
+ default:
30
+ return (NULL);
31
31
  }
32
32
  while (cur != NULL) {
33
33
  if (cur->type == XML_ELEMENT_NODE) {
@@ -57,20 +57,20 @@ xmlNextElementSibling(xmlNodePtr node)
57
57
  return (NULL);
58
58
  }
59
59
  switch (node->type) {
60
- case XML_ELEMENT_NODE:
61
- case XML_TEXT_NODE:
62
- case XML_CDATA_SECTION_NODE:
63
- case XML_ENTITY_REF_NODE:
64
- case XML_ENTITY_NODE:
65
- case XML_PI_NODE:
66
- case XML_COMMENT_NODE:
67
- case XML_DTD_NODE:
68
- case XML_XINCLUDE_START:
69
- case XML_XINCLUDE_END:
70
- node = node->next;
71
- break;
72
- default:
73
- return (NULL);
60
+ case XML_ELEMENT_NODE:
61
+ case XML_TEXT_NODE:
62
+ case XML_CDATA_SECTION_NODE:
63
+ case XML_ENTITY_REF_NODE:
64
+ case XML_ENTITY_NODE:
65
+ case XML_PI_NODE:
66
+ case XML_COMMENT_NODE:
67
+ case XML_DTD_NODE:
68
+ case XML_XINCLUDE_START:
69
+ case XML_XINCLUDE_END:
70
+ node = node->next;
71
+ break;
72
+ default:
73
+ return (NULL);
74
74
  }
75
75
  while (node != NULL) {
76
76
  if (node->type == XML_ELEMENT_NODE) {
@@ -101,14 +101,14 @@ xmlLastElementChild(xmlNodePtr parent)
101
101
  return (NULL);
102
102
  }
103
103
  switch (parent->type) {
104
- case XML_ELEMENT_NODE:
105
- case XML_ENTITY_NODE:
106
- case XML_DOCUMENT_NODE:
107
- case XML_HTML_DOCUMENT_NODE:
108
- cur = parent->last;
109
- break;
110
- default:
111
- return (NULL);
104
+ case XML_ELEMENT_NODE:
105
+ case XML_ENTITY_NODE:
106
+ case XML_DOCUMENT_NODE:
107
+ case XML_HTML_DOCUMENT_NODE:
108
+ cur = parent->last;
109
+ break;
110
+ default:
111
+ return (NULL);
112
112
  }
113
113
  while (cur != NULL) {
114
114
  if (cur->type == XML_ELEMENT_NODE) {
@@ -1,8 +1,10 @@
1
1
  #include <nokogiri.h>
2
2
 
3
3
  VALUE mNokogiri ;
4
- VALUE mNokogiriHtml ;
5
- VALUE mNokogiriHtmlSax ;
4
+ VALUE mNokogiriGumbo ;
5
+ VALUE mNokogiriHtml4 ;
6
+ VALUE mNokogiriHtml4Sax ;
7
+ VALUE mNokogiriHtml5 ;
6
8
  VALUE mNokogiriXml ;
7
9
  VALUE mNokogiriXmlSax ;
8
10
  VALUE mNokogiriXmlXpath ;
@@ -13,38 +15,39 @@ VALUE cNokogiriXmlCharacterData;
13
15
  VALUE cNokogiriXmlElement;
14
16
  VALUE cNokogiriXmlXpathSyntaxError;
15
17
 
16
- void noko_init_xml_attr();
17
- void noko_init_xml_attribute_decl();
18
- void noko_init_xml_cdata();
19
- void noko_init_xml_comment();
20
- void noko_init_xml_document();
21
- void noko_init_xml_document_fragment();
22
- void noko_init_xml_dtd();
23
- void noko_init_xml_element_content();
24
- void noko_init_xml_element_decl();
25
- void noko_init_xml_encoding_handler();
26
- void noko_init_xml_entity_decl();
27
- void noko_init_xml_entity_reference();
28
- void noko_init_xml_namespace();
29
- void noko_init_xml_node();
30
- void noko_init_xml_node_set();
31
- void noko_init_xml_processing_instruction();
32
- void noko_init_xml_reader();
33
- void noko_init_xml_relax_ng();
34
- void noko_init_xml_sax_parser();
35
- void noko_init_xml_sax_parser_context();
36
- void noko_init_xml_sax_push_parser();
37
- void noko_init_xml_schema();
38
- void noko_init_xml_syntax_error();
39
- void noko_init_xml_text();
40
- void noko_init_xml_xpath_context();
41
- void noko_init_xslt_stylesheet();
42
- void noko_init_html_document();
43
- void noko_init_html_element_description();
44
- void noko_init_html_entity_lookup();
45
- void noko_init_html_sax_parser_context();
46
- void noko_init_html_sax_push_parser();
47
- void noko_init_test_global_handlers();
18
+ void noko_init_xml_attr(void);
19
+ void noko_init_xml_attribute_decl(void);
20
+ void noko_init_xml_cdata(void);
21
+ void noko_init_xml_comment(void);
22
+ void noko_init_xml_document(void);
23
+ void noko_init_xml_document_fragment(void);
24
+ void noko_init_xml_dtd(void);
25
+ void noko_init_xml_element_content(void);
26
+ void noko_init_xml_element_decl(void);
27
+ void noko_init_xml_encoding_handler(void);
28
+ void noko_init_xml_entity_decl(void);
29
+ void noko_init_xml_entity_reference(void);
30
+ void noko_init_xml_namespace(void);
31
+ void noko_init_xml_node(void);
32
+ void noko_init_xml_node_set(void);
33
+ void noko_init_xml_processing_instruction(void);
34
+ void noko_init_xml_reader(void);
35
+ void noko_init_xml_relax_ng(void);
36
+ void noko_init_xml_sax_parser(void);
37
+ void noko_init_xml_sax_parser_context(void);
38
+ void noko_init_xml_sax_push_parser(void);
39
+ void noko_init_xml_schema(void);
40
+ void noko_init_xml_syntax_error(void);
41
+ void noko_init_xml_text(void);
42
+ void noko_init_xml_xpath_context(void);
43
+ void noko_init_xslt_stylesheet(void);
44
+ void noko_init_html_document(void);
45
+ void noko_init_html_element_description(void);
46
+ void noko_init_html_entity_lookup(void);
47
+ void noko_init_html_sax_parser_context(void);
48
+ void noko_init_html_sax_push_parser(void);
49
+ void noko_init_gumbo(void);
50
+ void noko_init_test_global_handlers(void);
48
51
 
49
52
  static ID id_read, id_write;
50
53
 
@@ -152,12 +155,14 @@ void
152
155
  Init_nokogiri()
153
156
  {
154
157
  mNokogiri = rb_define_module("Nokogiri");
158
+ mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
159
+ mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
160
+ mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
161
+ mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
155
162
  mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
156
- mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
157
- mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
158
- mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
159
163
  mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
160
- mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
164
+ mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
165
+ mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
161
166
 
162
167
  rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
163
168
  rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
@@ -191,9 +196,35 @@ Init_nokogiri()
191
196
  rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
192
197
  #endif
193
198
 
199
+ #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
200
+ /*
201
+ * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
202
+ * issue in libxml 2.9.12:
203
+ *
204
+ * https://github.com/sparklemotion/nokogiri/issues/2241
205
+ *
206
+ * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
207
+ * this config only for the specific libxml2 versions 2.9.12.
208
+ *
209
+ * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
210
+ * default memory management functions (recall that this config was introduced to reduce memory
211
+ * bloat and allow Ruby to GC more often); but we should *really* test with production workloads
212
+ * before making that kind of a potentially-invasive change.
213
+ */
214
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
215
+ #else
216
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
194
217
  xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
218
+ #endif
195
219
 
196
220
  xmlInitParser();
221
+ exsltRegisterAll();
222
+
223
+ if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
224
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
225
+ } else {
226
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse);
227
+ }
197
228
 
198
229
  cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError);
199
230
  noko_init_xml_syntax_error();
@@ -238,6 +269,7 @@ Init_nokogiri()
238
269
  noko_init_xml_document_fragment();
239
270
  noko_init_xml_document();
240
271
  noko_init_html_document();
272
+ noko_init_gumbo();
241
273
 
242
274
  noko_init_test_global_handlers();
243
275
 
@@ -1,7 +1,7 @@
1
1
  #ifndef NOKOGIRI_NATIVE
2
2
  #define NOKOGIRI_NATIVE
3
3
 
4
- #if _MSC_VER
4
+ #ifdef _MSC_VER
5
5
  # ifndef WIN32_LEAN_AND_MEAN
6
6
  # define WIN32_LEAN_AND_MEAN
7
7
  # endif /* WIN32_LEAN_AND_MEAN */
@@ -15,7 +15,7 @@
15
15
  # include <windows.h>
16
16
  #endif
17
17
 
18
- #if _WIN32
18
+ #ifdef _WIN32
19
19
  # define NOKOPUBFUN __declspec(dllexport)
20
20
  # define NOKOPUBVAR __declspec(dllexport) extern
21
21
  #else
@@ -69,6 +69,7 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
69
69
  #include <ruby/st.h>
70
70
  #include <ruby/encoding.h>
71
71
  #include <ruby/util.h>
72
+ #include <ruby/version.h>
72
73
 
73
74
  #define NOKOGIRI_STR_NEW2(str) NOKOGIRI_STR_NEW(str, strlen((const char *)(str)))
74
75
  #define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
@@ -92,12 +93,16 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
92
93
 
93
94
 
94
95
  NOKOPUBVAR VALUE mNokogiri ;
95
- NOKOPUBVAR VALUE mNokogiriHtml ;
96
- NOKOPUBVAR VALUE mNokogiriHtmlSax ;
96
+ NOKOPUBVAR VALUE mNokogiriGumbo ;
97
+ NOKOPUBVAR VALUE mNokogiriHtml4 ;
98
+ NOKOPUBVAR VALUE mNokogiriHtml4Sax ;
99
+ NOKOPUBVAR VALUE mNokogiriHtml5 ;
97
100
  NOKOPUBVAR VALUE mNokogiriXml ;
98
101
  NOKOPUBVAR VALUE mNokogiriXmlSax ;
102
+ NOKOPUBVAR VALUE mNokogiriXmlXpath ;
99
103
  NOKOPUBVAR VALUE mNokogiriXslt ;
100
104
 
105
+ NOKOPUBVAR VALUE cNokogiriEncodingHandler;
101
106
  NOKOPUBVAR VALUE cNokogiriSyntaxError;
102
107
  NOKOPUBVAR VALUE cNokogiriXmlAttr;
103
108
  NOKOPUBVAR VALUE cNokogiriXmlAttributeDecl;
@@ -128,10 +133,11 @@ NOKOPUBVAR VALUE cNokogiriXmlXpathContext;
128
133
  NOKOPUBVAR VALUE cNokogiriXmlXpathSyntaxError;
129
134
  NOKOPUBVAR VALUE cNokogiriXsltStylesheet ;
130
135
 
131
- NOKOPUBVAR VALUE cNokogiriHtmlDocument ;
132
- NOKOPUBVAR VALUE cNokogiriHtmlSaxPushParser ;
133
- NOKOPUBVAR VALUE cNokogiriHtmlElementDescription ;
134
- NOKOPUBVAR VALUE cNokogiriHtmlSaxParserContext;
136
+ NOKOPUBVAR VALUE cNokogiriHtml4Document ;
137
+ NOKOPUBVAR VALUE cNokogiriHtml4SaxPushParser ;
138
+ NOKOPUBVAR VALUE cNokogiriHtml4ElementDescription ;
139
+ NOKOPUBVAR VALUE cNokogiriHtml4SaxParserContext;
140
+ NOKOPUBVAR VALUE cNokogiriHtml5Document ;
135
141
 
136
142
  typedef struct _nokogiriTuple {
137
143
  VALUE doc;
@@ -177,7 +183,8 @@ VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
177
183
 
178
184
  VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
179
185
  VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
180
- NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
186
+ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
187
+ xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
181
188
 
182
189
  #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
183
190
  #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
@@ -190,6 +197,9 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc); /* depr
190
197
  #define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self)
191
198
  #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) free(_tuple)
192
199
 
200
+ #define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
201
+ #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
202
+
193
203
  void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
194
204
  void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
195
205
  xmlStructuredErrorFunc handler);
@@ -6,19 +6,19 @@ static int
6
6
  dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
7
7
  {
8
8
  switch (node->type) {
9
- case XML_ATTRIBUTE_NODE:
10
- xmlFreePropList((xmlAttrPtr)node);
11
- break;
12
- case XML_NAMESPACE_DECL:
13
- xmlFreeNs((xmlNsPtr)node);
14
- break;
15
- case XML_DTD_NODE:
16
- xmlFreeDtd((xmlDtdPtr)node);
17
- break;
18
- default:
19
- if (node->parent == NULL) {
20
- xmlAddChild((xmlNodePtr)doc, node);
21
- }
9
+ case XML_ATTRIBUTE_NODE:
10
+ xmlFreePropList((xmlAttrPtr)node);
11
+ break;
12
+ case XML_NAMESPACE_DECL:
13
+ xmlFreeNs((xmlNsPtr)node);
14
+ break;
15
+ case XML_DTD_NODE:
16
+ xmlFreeDtd((xmlDtdPtr)node);
17
+ break;
18
+ default:
19
+ if (node->parent == NULL) {
20
+ xmlAddChild((xmlNodePtr)doc, node);
21
+ }
22
22
  }
23
23
  return ST_CONTINUE;
24
24
  }
@@ -141,42 +141,41 @@ url(VALUE self)
141
141
  * Set the root element on this document
142
142
  */
143
143
  static VALUE
144
- set_root(VALUE self, VALUE root)
144
+ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
145
145
  {
146
- xmlDocPtr doc;
147
- xmlNodePtr new_root;
148
- xmlNodePtr old_root;
149
-
150
- Data_Get_Struct(self, xmlDoc, doc);
146
+ xmlDocPtr c_document;
147
+ xmlNodePtr c_new_root = NULL, c_current_root;
151
148
 
152
- old_root = NULL;
153
-
154
- if (NIL_P(root)) {
155
- old_root = xmlDocGetRootElement(doc);
156
-
157
- if (old_root) {
158
- xmlUnlinkNode(old_root);
159
- noko_xml_document_pin_node(old_root);
160
- }
149
+ Data_Get_Struct(self, xmlDoc, c_document);
161
150
 
162
- return root;
151
+ c_current_root = xmlDocGetRootElement(c_document);
152
+ if (c_current_root) {
153
+ xmlUnlinkNode(c_current_root);
154
+ noko_xml_document_pin_node(c_current_root);
163
155
  }
164
156
 
165
- Data_Get_Struct(root, xmlNode, new_root);
157
+ if (!NIL_P(rb_new_root)) {
158
+ if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
159
+ rb_raise(rb_eArgError,
160
+ "expected Nokogiri::XML::Node but received %"PRIsVALUE,
161
+ rb_obj_class(rb_new_root));
162
+ }
166
163
 
164
+ Data_Get_Struct(rb_new_root, xmlNode, c_new_root);
167
165
 
168
- /* If the new root's document is not the same as the current document,
169
- * then we need to dup the node in to this document. */
170
- if (new_root->doc != doc) {
171
- old_root = xmlDocGetRootElement(doc);
172
- if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
173
- rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
166
+ /* If the new root's document is not the same as the current document,
167
+ * then we need to dup the node in to this document. */
168
+ if (c_new_root->doc != c_document) {
169
+ c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
170
+ if (!c_new_root) {
171
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
172
+ }
174
173
  }
175
174
  }
176
175
 
177
- xmlDocSetRootElement(doc, new_root);
178
- if (old_root) { noko_xml_document_pin_node(old_root); }
179
- return root;
176
+ xmlDocSetRootElement(c_document, c_new_root);
177
+
178
+ return rb_new_root;
180
179
  }
181
180
 
182
181
  /*
@@ -186,17 +185,19 @@ set_root(VALUE self, VALUE root)
186
185
  * Get the root node for this document.
187
186
  */
188
187
  static VALUE
189
- root(VALUE self)
188
+ rb_xml_document_root(VALUE self)
190
189
  {
191
- xmlDocPtr doc;
192
- xmlNodePtr root;
190
+ xmlDocPtr c_document;
191
+ xmlNodePtr c_root;
193
192
 
194
- Data_Get_Struct(self, xmlDoc, doc);
193
+ Data_Get_Struct(self, xmlDoc, c_document);
195
194
 
196
- root = xmlDocGetRootElement(doc);
195
+ c_root = xmlDocGetRootElement(c_document);
196
+ if (!c_root) {
197
+ return Qnil;
198
+ }
197
199
 
198
- if (!root) { return Qnil; }
199
- return noko_xml_node_wrap(Qnil, root) ;
200
+ return noko_xml_node_wrap(Qnil, c_root) ;
200
201
  }
201
202
 
202
203
  /*
@@ -212,7 +213,7 @@ set_encoding(VALUE self, VALUE encoding)
212
213
  Data_Get_Struct(self, xmlDoc, doc);
213
214
 
214
215
  if (doc->encoding) {
215
- free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */
216
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
216
217
  }
217
218
 
218
219
  doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
@@ -666,8 +667,8 @@ noko_init_xml_document()
666
667
  rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4);
667
668
  rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
668
669
 
669
- rb_define_method(cNokogiriXmlDocument, "root", root, 0);
670
- rb_define_method(cNokogiriXmlDocument, "root=", set_root, 1);
670
+ rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
671
+ rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
671
672
  rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
672
673
  rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
673
674
  rb_define_method(cNokogiriXmlDocument, "version", version, 0);
@@ -116,6 +116,8 @@ noko_init_xml_element_content()
116
116
  {
117
117
  cNokogiriXmlElementContent = rb_define_class_under(mNokogiriXml, "ElementContent", rb_cObject);
118
118
 
119
+ rb_undef_alloc_func(cNokogiriXmlElementContent);
120
+
119
121
  rb_define_method(cNokogiriXmlElementContent, "name", get_name, 0);
120
122
  rb_define_method(cNokogiriXmlElementContent, "type", get_type, 0);
121
123
  rb_define_method(cNokogiriXmlElementContent, "occur", get_occur, 0);
@@ -1,5 +1,8 @@
1
1
  #include <nokogiri.h>
2
2
 
3
+ VALUE cNokogiriEncodingHandler;
4
+
5
+
3
6
  /*
4
7
  * call-seq: Nokogiri::EncodingHandler.[](name)
5
8
  *
@@ -75,11 +78,13 @@ name(VALUE self)
75
78
  void
76
79
  noko_init_xml_encoding_handler()
77
80
  {
78
- VALUE klass = rb_define_class_under(mNokogiri, "EncodingHandler", rb_cObject);
81
+ cNokogiriEncodingHandler = rb_define_class_under(mNokogiri, "EncodingHandler", rb_cObject);
82
+
83
+ rb_undef_alloc_func(cNokogiriEncodingHandler);
79
84
 
80
- rb_define_singleton_method(klass, "[]", get, 1);
81
- rb_define_singleton_method(klass, "delete", delete, 1);
82
- rb_define_singleton_method(klass, "alias", alias, 2);
83
- rb_define_singleton_method(klass, "clear_aliases!", clear_aliases, 0);
84
- rb_define_method(klass, "name", name, 0);
85
+ rb_define_singleton_method(cNokogiriEncodingHandler, "[]", get, 1);
86
+ rb_define_singleton_method(cNokogiriEncodingHandler, "delete", delete, 1);
87
+ rb_define_singleton_method(cNokogiriEncodingHandler, "alias", alias, 2);
88
+ rb_define_singleton_method(cNokogiriEncodingHandler, "clear_aliases!", clear_aliases, 0);
89
+ rb_define_method(cNokogiriEncodingHandler, "name", name, 0);
85
90
  }
@@ -33,10 +33,10 @@ dealloc_namespace(xmlNsPtr ns)
33
33
  */
34
34
  NOKOGIRI_DEBUG_START(ns) ;
35
35
  if (ns->href) {
36
- xmlFree((xmlChar *)(uintptr_t)ns->href);
36
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href));
37
37
  }
38
38
  if (ns->prefix) {
39
- xmlFree((xmlChar *)(uintptr_t)ns->prefix);
39
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix));
40
40
  }
41
41
  xmlFree(ns);
42
42
  NOKOGIRI_DEBUG_END(ns) ;
@@ -113,6 +113,8 @@ noko_init_xml_namespace()
113
113
  {
114
114
  cNokogiriXmlNamespace = rb_define_class_under(mNokogiriXml, "Namespace", rb_cObject);
115
115
 
116
+ rb_undef_alloc_func(cNokogiriXmlNamespace);
117
+
116
118
  rb_define_method(cNokogiriXmlNamespace, "prefix", prefix, 0);
117
119
  rb_define_method(cNokogiriXmlNamespace, "href", href, 0);
118
120
  }