nokogiri 1.11.3 → 1.13.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (179) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/LICENSE-DEPENDENCIES.md +243 -22
  4. data/LICENSE.md +1 -1
  5. data/README.md +14 -11
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -64
  8. data/ext/nokogiri/depend +35 -34
  9. data/ext/nokogiri/extconf.rb +237 -133
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
  12. data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
  13. data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
  14. data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +8 -8
  15. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
  16. data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
  17. data/ext/nokogiri/nokogiri.c +70 -38
  18. data/ext/nokogiri/nokogiri.h +27 -9
  19. data/ext/nokogiri/xml_attr.c +2 -2
  20. data/ext/nokogiri/xml_attribute_decl.c +3 -3
  21. data/ext/nokogiri/xml_cdata.c +1 -1
  22. data/ext/nokogiri/xml_document.c +50 -50
  23. data/ext/nokogiri/xml_document_fragment.c +0 -2
  24. data/ext/nokogiri/xml_dtd.c +10 -10
  25. data/ext/nokogiri/xml_element_content.c +2 -0
  26. data/ext/nokogiri/xml_element_decl.c +3 -3
  27. data/ext/nokogiri/xml_encoding_handler.c +31 -12
  28. data/ext/nokogiri/xml_entity_decl.c +5 -5
  29. data/ext/nokogiri/xml_namespace.c +4 -2
  30. data/ext/nokogiri/xml_node.c +833 -492
  31. data/ext/nokogiri/xml_node_set.c +24 -24
  32. data/ext/nokogiri/xml_reader.c +90 -11
  33. data/ext/nokogiri/xml_sax_parser.c +6 -6
  34. data/ext/nokogiri/xml_sax_parser_context.c +12 -3
  35. data/ext/nokogiri/xml_schema.c +5 -3
  36. data/ext/nokogiri/xml_text.c +1 -1
  37. data/ext/nokogiri/xml_xpath_context.c +110 -85
  38. data/ext/nokogiri/xslt_stylesheet.c +109 -10
  39. data/gumbo-parser/CHANGES.md +63 -0
  40. data/gumbo-parser/Makefile +101 -0
  41. data/gumbo-parser/THANKS +27 -0
  42. data/gumbo-parser/src/Makefile +34 -0
  43. data/gumbo-parser/src/README.md +41 -0
  44. data/gumbo-parser/src/ascii.c +75 -0
  45. data/gumbo-parser/src/ascii.h +115 -0
  46. data/gumbo-parser/src/attribute.c +42 -0
  47. data/gumbo-parser/src/attribute.h +17 -0
  48. data/gumbo-parser/src/char_ref.c +22225 -0
  49. data/gumbo-parser/src/char_ref.h +29 -0
  50. data/gumbo-parser/src/char_ref.rl +2154 -0
  51. data/gumbo-parser/src/error.c +626 -0
  52. data/gumbo-parser/src/error.h +148 -0
  53. data/gumbo-parser/src/foreign_attrs.c +104 -0
  54. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  55. data/gumbo-parser/src/gumbo.h +943 -0
  56. data/gumbo-parser/src/insertion_mode.h +33 -0
  57. data/gumbo-parser/src/macros.h +91 -0
  58. data/gumbo-parser/src/parser.c +4875 -0
  59. data/gumbo-parser/src/parser.h +41 -0
  60. data/gumbo-parser/src/replacement.h +33 -0
  61. data/gumbo-parser/src/string_buffer.c +103 -0
  62. data/gumbo-parser/src/string_buffer.h +68 -0
  63. data/gumbo-parser/src/string_piece.c +48 -0
  64. data/gumbo-parser/src/svg_attrs.c +174 -0
  65. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  66. data/gumbo-parser/src/svg_tags.c +137 -0
  67. data/gumbo-parser/src/svg_tags.gperf +55 -0
  68. data/gumbo-parser/src/tag.c +222 -0
  69. data/gumbo-parser/src/tag_lookup.c +382 -0
  70. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  71. data/gumbo-parser/src/tag_lookup.h +13 -0
  72. data/gumbo-parser/src/token_buffer.c +79 -0
  73. data/gumbo-parser/src/token_buffer.h +71 -0
  74. data/gumbo-parser/src/token_type.h +17 -0
  75. data/gumbo-parser/src/tokenizer.c +3463 -0
  76. data/gumbo-parser/src/tokenizer.h +112 -0
  77. data/gumbo-parser/src/tokenizer_states.h +339 -0
  78. data/gumbo-parser/src/utf8.c +245 -0
  79. data/gumbo-parser/src/utf8.h +164 -0
  80. data/gumbo-parser/src/util.c +68 -0
  81. data/gumbo-parser/src/util.h +30 -0
  82. data/gumbo-parser/src/vector.c +111 -0
  83. data/gumbo-parser/src/vector.h +45 -0
  84. data/lib/nokogiri/class_resolver.rb +67 -0
  85. data/lib/nokogiri/css/node.rb +9 -8
  86. data/lib/nokogiri/css/parser.rb +361 -342
  87. data/lib/nokogiri/css/parser.y +250 -245
  88. data/lib/nokogiri/css/parser_extras.rb +22 -20
  89. data/lib/nokogiri/css/syntax_error.rb +2 -1
  90. data/lib/nokogiri/css/tokenizer.rb +4 -3
  91. data/lib/nokogiri/css/tokenizer.rex +3 -2
  92. data/lib/nokogiri/css/xpath_visitor.rb +179 -82
  93. data/lib/nokogiri/css.rb +49 -17
  94. data/lib/nokogiri/decorators/slop.rb +8 -7
  95. data/lib/nokogiri/extension.rb +8 -3
  96. data/lib/nokogiri/gumbo.rb +15 -0
  97. data/lib/nokogiri/html.rb +37 -27
  98. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  99. data/lib/nokogiri/{html → html4}/document.rb +92 -81
  100. data/lib/nokogiri/{html → html4}/document_fragment.rb +13 -9
  101. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  102. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  103. data/lib/nokogiri/{html → html4}/entity_lookup.rb +3 -2
  104. data/lib/nokogiri/{html → html4}/sax/parser.rb +16 -16
  105. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  106. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +11 -11
  107. data/lib/nokogiri/html4.rb +46 -0
  108. data/lib/nokogiri/html5/document.rb +91 -0
  109. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  110. data/lib/nokogiri/html5/node.rb +100 -0
  111. data/lib/nokogiri/html5.rb +478 -0
  112. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  113. data/lib/nokogiri/syntax_error.rb +1 -0
  114. data/lib/nokogiri/version/constant.rb +2 -1
  115. data/lib/nokogiri/version/info.rb +31 -14
  116. data/lib/nokogiri/version.rb +1 -0
  117. data/lib/nokogiri/xml/attr.rb +5 -3
  118. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  119. data/lib/nokogiri/xml/builder.rb +71 -31
  120. data/lib/nokogiri/xml/cdata.rb +2 -1
  121. data/lib/nokogiri/xml/character_data.rb +1 -0
  122. data/lib/nokogiri/xml/document.rb +183 -96
  123. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  124. data/lib/nokogiri/xml/dtd.rb +3 -2
  125. data/lib/nokogiri/xml/element_content.rb +1 -0
  126. data/lib/nokogiri/xml/element_decl.rb +2 -1
  127. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  128. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  129. data/lib/nokogiri/xml/namespace.rb +2 -0
  130. data/lib/nokogiri/xml/node/save_options.rb +9 -5
  131. data/lib/nokogiri/xml/node.rb +525 -354
  132. data/lib/nokogiri/xml/node_set.rb +50 -54
  133. data/lib/nokogiri/xml/notation.rb +12 -0
  134. data/lib/nokogiri/xml/parse_options.rb +13 -6
  135. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  136. data/lib/nokogiri/xml/pp/node.rb +24 -26
  137. data/lib/nokogiri/xml/pp.rb +3 -2
  138. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  139. data/lib/nokogiri/xml/reader.rb +20 -24
  140. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  141. data/lib/nokogiri/xml/sax/document.rb +44 -49
  142. data/lib/nokogiri/xml/sax/parser.rb +37 -34
  143. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  144. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  145. data/lib/nokogiri/xml/sax.rb +5 -4
  146. data/lib/nokogiri/xml/schema.rb +7 -6
  147. data/lib/nokogiri/xml/searchable.rb +93 -62
  148. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  149. data/lib/nokogiri/xml/text.rb +1 -0
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  151. data/lib/nokogiri/xml/xpath.rb +13 -1
  152. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  153. data/lib/nokogiri/xml.rb +37 -37
  154. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  155. data/lib/nokogiri/xslt.rb +28 -20
  156. data/lib/nokogiri.rb +48 -43
  157. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  158. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  159. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  160. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  161. data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +3 -3
  162. data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
  163. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  164. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  165. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  166. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  167. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  168. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  169. metadata +204 -93
  170. data/lib/nokogiri/html/element_description_defaults.rb +0 -672
  171. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
  172. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  173. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  174. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
  175. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
  176. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
  177. data/patches/libxml2/0011-update-automake-files-for-arm64.patch +0 -2511
  178. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  179. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,17 +1,17 @@
1
1
  #include <nokogiri.h>
2
2
 
3
- static VALUE cNokogiriHtmlEntityLookup;
3
+ static VALUE cNokogiriHtml4EntityLookup;
4
4
 
5
5
  /*
6
6
  * call-seq:
7
7
  * get(key)
8
8
  *
9
- * Get the HTML::EntityDescription for +key+
9
+ * Get the HTML4::EntityDescription for +key+
10
10
  */
11
11
  static VALUE
12
12
  get(VALUE _, VALUE rb_entity_name)
13
13
  {
14
- VALUE cNokogiriHtmlEntityDescription;
14
+ VALUE cNokogiriHtml4EntityDescription;
15
15
  const htmlEntityDesc *c_entity_desc;
16
16
  VALUE rb_constructor_args[3];
17
17
 
@@ -24,14 +24,14 @@ get(VALUE _, VALUE rb_entity_name)
24
24
  rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
25
25
  rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
26
26
 
27
- cNokogiriHtmlEntityDescription = rb_const_get_at(mNokogiriHtml, rb_intern("EntityDescription"));
28
- return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtmlEntityDescription);
27
+ cNokogiriHtml4EntityDescription = rb_const_get_at(mNokogiriHtml4, rb_intern("EntityDescription"));
28
+ return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtml4EntityDescription);
29
29
  }
30
30
 
31
31
  void
32
32
  noko_init_html_entity_lookup()
33
33
  {
34
- cNokogiriHtmlEntityLookup = rb_define_class_under(mNokogiriHtml, "EntityLookup", rb_cObject);
34
+ cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject);
35
35
 
36
- rb_define_method(cNokogiriHtmlEntityLookup, "get", get, 1);
36
+ rb_define_method(cNokogiriHtml4EntityLookup, "get", get, 1);
37
37
  }
@@ -1,6 +1,6 @@
1
1
  #include <nokogiri.h>
2
2
 
3
- VALUE cNokogiriHtmlSaxParserContext ;
3
+ VALUE cNokogiriHtml4SaxParserContext ;
4
4
 
5
5
  static void
6
6
  deallocate(xmlParserCtxtPtr ctxt)
@@ -19,9 +19,8 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
19
19
  {
20
20
  htmlParserCtxtPtr ctxt;
21
21
 
22
- if (NIL_P(data)) {
23
- rb_raise(rb_eArgError, "data cannot be nil");
24
- }
22
+ Check_Type(data, T_STRING);
23
+
25
24
  if (!(int)RSTRING_LEN(data)) {
26
25
  rb_raise(rb_eRuntimeError, "data cannot be empty");
27
26
  }
@@ -110,10 +109,11 @@ void
110
109
  noko_init_html_sax_parser_context()
111
110
  {
112
111
  assert(cNokogiriXmlSaxParserContext);
113
- cNokogiriHtmlSaxParserContext = rb_define_class_under(mNokogiriHtmlSax, "ParserContext", cNokogiriXmlSaxParserContext);
112
+ cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext",
113
+ cNokogiriXmlSaxParserContext);
114
114
 
115
- rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "memory", parse_memory, 2);
116
- rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "file", parse_file, 2);
115
+ rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "memory", parse_memory, 2);
116
+ rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "file", parse_file, 2);
117
117
 
118
- rb_define_method(cNokogiriHtmlSaxParserContext, "parse_with", parse_with, 1);
118
+ rb_define_method(cNokogiriHtml4SaxParserContext, "parse_with", parse_with, 1);
119
119
  }
@@ -1,6 +1,6 @@
1
1
  #include <nokogiri.h>
2
2
 
3
- VALUE cNokogiriHtmlSaxPushParser;
3
+ VALUE cNokogiriHtml4SaxPushParser;
4
4
 
5
5
  /*
6
6
  * call-seq:
@@ -88,8 +88,8 @@ void
88
88
  noko_init_html_sax_push_parser()
89
89
  {
90
90
  assert(cNokogiriXmlSaxPushParser);
91
- cNokogiriHtmlSaxPushParser = rb_define_class_under(mNokogiriHtmlSax, "PushParser", cNokogiriXmlSaxPushParser);
91
+ cNokogiriHtml4SaxPushParser = rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
92
92
 
93
- rb_define_private_method(cNokogiriHtmlSaxPushParser, "initialize_native", initialize_native, 3);
94
- rb_define_private_method(cNokogiriHtmlSaxPushParser, "native_write", native_write, 2);
93
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native", initialize_native, 3);
94
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write", native_write, 2);
95
95
  }
@@ -20,14 +20,14 @@ xmlFirstElementChild(xmlNodePtr parent)
20
20
  return (NULL);
21
21
  }
22
22
  switch (parent->type) {
23
- case XML_ELEMENT_NODE:
24
- case XML_ENTITY_NODE:
25
- case XML_DOCUMENT_NODE:
26
- case XML_HTML_DOCUMENT_NODE:
27
- cur = parent->children;
28
- break;
29
- default:
30
- return (NULL);
23
+ case XML_ELEMENT_NODE:
24
+ case XML_ENTITY_NODE:
25
+ case XML_DOCUMENT_NODE:
26
+ case XML_HTML_DOCUMENT_NODE:
27
+ cur = parent->children;
28
+ break;
29
+ default:
30
+ return (NULL);
31
31
  }
32
32
  while (cur != NULL) {
33
33
  if (cur->type == XML_ELEMENT_NODE) {
@@ -57,20 +57,20 @@ xmlNextElementSibling(xmlNodePtr node)
57
57
  return (NULL);
58
58
  }
59
59
  switch (node->type) {
60
- case XML_ELEMENT_NODE:
61
- case XML_TEXT_NODE:
62
- case XML_CDATA_SECTION_NODE:
63
- case XML_ENTITY_REF_NODE:
64
- case XML_ENTITY_NODE:
65
- case XML_PI_NODE:
66
- case XML_COMMENT_NODE:
67
- case XML_DTD_NODE:
68
- case XML_XINCLUDE_START:
69
- case XML_XINCLUDE_END:
70
- node = node->next;
71
- break;
72
- default:
73
- return (NULL);
60
+ case XML_ELEMENT_NODE:
61
+ case XML_TEXT_NODE:
62
+ case XML_CDATA_SECTION_NODE:
63
+ case XML_ENTITY_REF_NODE:
64
+ case XML_ENTITY_NODE:
65
+ case XML_PI_NODE:
66
+ case XML_COMMENT_NODE:
67
+ case XML_DTD_NODE:
68
+ case XML_XINCLUDE_START:
69
+ case XML_XINCLUDE_END:
70
+ node = node->next;
71
+ break;
72
+ default:
73
+ return (NULL);
74
74
  }
75
75
  while (node != NULL) {
76
76
  if (node->type == XML_ELEMENT_NODE) {
@@ -101,14 +101,14 @@ xmlLastElementChild(xmlNodePtr parent)
101
101
  return (NULL);
102
102
  }
103
103
  switch (parent->type) {
104
- case XML_ELEMENT_NODE:
105
- case XML_ENTITY_NODE:
106
- case XML_DOCUMENT_NODE:
107
- case XML_HTML_DOCUMENT_NODE:
108
- cur = parent->last;
109
- break;
110
- default:
111
- return (NULL);
104
+ case XML_ELEMENT_NODE:
105
+ case XML_ENTITY_NODE:
106
+ case XML_DOCUMENT_NODE:
107
+ case XML_HTML_DOCUMENT_NODE:
108
+ cur = parent->last;
109
+ break;
110
+ default:
111
+ return (NULL);
112
112
  }
113
113
  while (cur != NULL) {
114
114
  if (cur->type == XML_ELEMENT_NODE) {
@@ -1,8 +1,10 @@
1
1
  #include <nokogiri.h>
2
2
 
3
3
  VALUE mNokogiri ;
4
- VALUE mNokogiriHtml ;
5
- VALUE mNokogiriHtmlSax ;
4
+ VALUE mNokogiriGumbo ;
5
+ VALUE mNokogiriHtml4 ;
6
+ VALUE mNokogiriHtml4Sax ;
7
+ VALUE mNokogiriHtml5 ;
6
8
  VALUE mNokogiriXml ;
7
9
  VALUE mNokogiriXmlSax ;
8
10
  VALUE mNokogiriXmlXpath ;
@@ -13,38 +15,39 @@ VALUE cNokogiriXmlCharacterData;
13
15
  VALUE cNokogiriXmlElement;
14
16
  VALUE cNokogiriXmlXpathSyntaxError;
15
17
 
16
- void noko_init_xml_attr();
17
- void noko_init_xml_attribute_decl();
18
- void noko_init_xml_cdata();
19
- void noko_init_xml_comment();
20
- void noko_init_xml_document();
21
- void noko_init_xml_document_fragment();
22
- void noko_init_xml_dtd();
23
- void noko_init_xml_element_content();
24
- void noko_init_xml_element_decl();
25
- void noko_init_xml_encoding_handler();
26
- void noko_init_xml_entity_decl();
27
- void noko_init_xml_entity_reference();
28
- void noko_init_xml_namespace();
29
- void noko_init_xml_node();
30
- void noko_init_xml_node_set();
31
- void noko_init_xml_processing_instruction();
32
- void noko_init_xml_reader();
33
- void noko_init_xml_relax_ng();
34
- void noko_init_xml_sax_parser();
35
- void noko_init_xml_sax_parser_context();
36
- void noko_init_xml_sax_push_parser();
37
- void noko_init_xml_schema();
38
- void noko_init_xml_syntax_error();
39
- void noko_init_xml_text();
40
- void noko_init_xml_xpath_context();
41
- void noko_init_xslt_stylesheet();
42
- void noko_init_html_document();
43
- void noko_init_html_element_description();
44
- void noko_init_html_entity_lookup();
45
- void noko_init_html_sax_parser_context();
46
- void noko_init_html_sax_push_parser();
47
- void noko_init_test_global_handlers();
18
+ void noko_init_xml_attr(void);
19
+ void noko_init_xml_attribute_decl(void);
20
+ void noko_init_xml_cdata(void);
21
+ void noko_init_xml_comment(void);
22
+ void noko_init_xml_document(void);
23
+ void noko_init_xml_document_fragment(void);
24
+ void noko_init_xml_dtd(void);
25
+ void noko_init_xml_element_content(void);
26
+ void noko_init_xml_element_decl(void);
27
+ void noko_init_xml_encoding_handler(void);
28
+ void noko_init_xml_entity_decl(void);
29
+ void noko_init_xml_entity_reference(void);
30
+ void noko_init_xml_namespace(void);
31
+ void noko_init_xml_node(void);
32
+ void noko_init_xml_node_set(void);
33
+ void noko_init_xml_processing_instruction(void);
34
+ void noko_init_xml_reader(void);
35
+ void noko_init_xml_relax_ng(void);
36
+ void noko_init_xml_sax_parser(void);
37
+ void noko_init_xml_sax_parser_context(void);
38
+ void noko_init_xml_sax_push_parser(void);
39
+ void noko_init_xml_schema(void);
40
+ void noko_init_xml_syntax_error(void);
41
+ void noko_init_xml_text(void);
42
+ void noko_init_xml_xpath_context(void);
43
+ void noko_init_xslt_stylesheet(void);
44
+ void noko_init_html_document(void);
45
+ void noko_init_html_element_description(void);
46
+ void noko_init_html_entity_lookup(void);
47
+ void noko_init_html_sax_parser_context(void);
48
+ void noko_init_html_sax_push_parser(void);
49
+ void noko_init_gumbo(void);
50
+ void noko_init_test_global_handlers(void);
48
51
 
49
52
  static ID id_read, id_write;
50
53
 
@@ -152,12 +155,14 @@ void
152
155
  Init_nokogiri()
153
156
  {
154
157
  mNokogiri = rb_define_module("Nokogiri");
158
+ mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
159
+ mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
160
+ mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
161
+ mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
155
162
  mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
156
- mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
157
- mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
158
- mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
159
163
  mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
160
- mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
164
+ mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
165
+ mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
161
166
 
162
167
  rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
163
168
  rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
@@ -191,9 +196,35 @@ Init_nokogiri()
191
196
  rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
192
197
  #endif
193
198
 
199
+ #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
200
+ /*
201
+ * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
202
+ * issue in libxml 2.9.12:
203
+ *
204
+ * https://github.com/sparklemotion/nokogiri/issues/2241
205
+ *
206
+ * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
207
+ * this config only for the specific libxml2 versions 2.9.12.
208
+ *
209
+ * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
210
+ * default memory management functions (recall that this config was introduced to reduce memory
211
+ * bloat and allow Ruby to GC more often); but we should *really* test with production workloads
212
+ * before making that kind of a potentially-invasive change.
213
+ */
214
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
215
+ #else
216
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
194
217
  xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
218
+ #endif
195
219
 
196
220
  xmlInitParser();
221
+ exsltRegisterAll();
222
+
223
+ if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
224
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
225
+ } else {
226
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse);
227
+ }
197
228
 
198
229
  cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError);
199
230
  noko_init_xml_syntax_error();
@@ -238,6 +269,7 @@ Init_nokogiri()
238
269
  noko_init_xml_document_fragment();
239
270
  noko_init_xml_document();
240
271
  noko_init_html_document();
272
+ noko_init_gumbo();
241
273
 
242
274
  noko_init_test_global_handlers();
243
275
 
@@ -1,7 +1,7 @@
1
1
  #ifndef NOKOGIRI_NATIVE
2
2
  #define NOKOGIRI_NATIVE
3
3
 
4
- #if _MSC_VER
4
+ #ifdef _MSC_VER
5
5
  # ifndef WIN32_LEAN_AND_MEAN
6
6
  # define WIN32_LEAN_AND_MEAN
7
7
  # endif /* WIN32_LEAN_AND_MEAN */
@@ -15,7 +15,7 @@
15
15
  # include <windows.h>
16
16
  #endif
17
17
 
18
- #if _WIN32
18
+ #ifdef _WIN32
19
19
  # define NOKOPUBFUN __declspec(dllexport)
20
20
  # define NOKOPUBVAR __declspec(dllexport) extern
21
21
  #else
@@ -69,6 +69,7 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
69
69
  #include <ruby/st.h>
70
70
  #include <ruby/encoding.h>
71
71
  #include <ruby/util.h>
72
+ #include <ruby/version.h>
72
73
 
73
74
  #define NOKOGIRI_STR_NEW2(str) NOKOGIRI_STR_NEW(str, strlen((const char *)(str)))
74
75
  #define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
@@ -92,12 +93,16 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
92
93
 
93
94
 
94
95
  NOKOPUBVAR VALUE mNokogiri ;
95
- NOKOPUBVAR VALUE mNokogiriHtml ;
96
- NOKOPUBVAR VALUE mNokogiriHtmlSax ;
96
+ NOKOPUBVAR VALUE mNokogiriGumbo ;
97
+ NOKOPUBVAR VALUE mNokogiriHtml4 ;
98
+ NOKOPUBVAR VALUE mNokogiriHtml4Sax ;
99
+ NOKOPUBVAR VALUE mNokogiriHtml5 ;
97
100
  NOKOPUBVAR VALUE mNokogiriXml ;
98
101
  NOKOPUBVAR VALUE mNokogiriXmlSax ;
102
+ NOKOPUBVAR VALUE mNokogiriXmlXpath ;
99
103
  NOKOPUBVAR VALUE mNokogiriXslt ;
100
104
 
105
+ NOKOPUBVAR VALUE cNokogiriEncodingHandler;
101
106
  NOKOPUBVAR VALUE cNokogiriSyntaxError;
102
107
  NOKOPUBVAR VALUE cNokogiriXmlAttr;
103
108
  NOKOPUBVAR VALUE cNokogiriXmlAttributeDecl;
@@ -128,10 +133,11 @@ NOKOPUBVAR VALUE cNokogiriXmlXpathContext;
128
133
  NOKOPUBVAR VALUE cNokogiriXmlXpathSyntaxError;
129
134
  NOKOPUBVAR VALUE cNokogiriXsltStylesheet ;
130
135
 
131
- NOKOPUBVAR VALUE cNokogiriHtmlDocument ;
132
- NOKOPUBVAR VALUE cNokogiriHtmlSaxPushParser ;
133
- NOKOPUBVAR VALUE cNokogiriHtmlElementDescription ;
134
- NOKOPUBVAR VALUE cNokogiriHtmlSaxParserContext;
136
+ NOKOPUBVAR VALUE cNokogiriHtml4Document ;
137
+ NOKOPUBVAR VALUE cNokogiriHtml4SaxPushParser ;
138
+ NOKOPUBVAR VALUE cNokogiriHtml4ElementDescription ;
139
+ NOKOPUBVAR VALUE cNokogiriHtml4SaxParserContext;
140
+ NOKOPUBVAR VALUE cNokogiriHtml5Document ;
135
141
 
136
142
  typedef struct _nokogiriTuple {
137
143
  VALUE doc;
@@ -164,6 +170,8 @@ int noko_io_read(void *ctx, char *buffer, int len);
164
170
  int noko_io_write(void *ctx, char *buffer, int len);
165
171
  int noko_io_close(void *ctx);
166
172
 
173
+ #define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
174
+
167
175
  VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ;
168
176
  VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ;
169
177
  VALUE noko_xml_node_attrs(xmlNodePtr node) ;
@@ -177,7 +185,8 @@ VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
177
185
 
178
186
  VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
179
187
  VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
180
- NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
188
+ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
189
+ xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
181
190
 
182
191
  #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
183
192
  #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
@@ -190,6 +199,15 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc); /* depr
190
199
  #define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self)
191
200
  #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) free(_tuple)
192
201
 
202
+ #define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
203
+ #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
204
+
205
+ #if HAVE_RB_CATEGORY_WARNING
206
+ # define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
207
+ #else
208
+ # define NOKO_WARN_DEPRECATION(message) rb_warning(message)
209
+ #endif
210
+
193
211
  void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
194
212
  void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
195
213
  xmlStructuredErrorFunc handler);
@@ -16,7 +16,7 @@ set_value(VALUE self, VALUE content)
16
16
  xmlChar *value;
17
17
  xmlNode *cur;
18
18
 
19
- Data_Get_Struct(self, xmlAttr, attr);
19
+ Noko_Node_Get_Struct(self, xmlAttr, attr);
20
20
 
21
21
  if (attr->children) {
22
22
  xmlFreeNodeList(attr->children);
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
68
68
  rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
69
69
  }
70
70
 
71
- Data_Get_Struct(document, xmlDoc, xml_doc);
71
+ Noko_Node_Get_Struct(document, xmlDoc, xml_doc);
72
72
 
73
73
  node = xmlNewDocProp(
74
74
  xml_doc,
@@ -12,7 +12,7 @@ static VALUE
12
12
  attribute_type(VALUE self)
13
13
  {
14
14
  xmlAttributePtr node;
15
- Data_Get_Struct(self, xmlAttribute, node);
15
+ Noko_Node_Get_Struct(self, xmlAttribute, node);
16
16
  return INT2NUM((long)node->atype);
17
17
  }
18
18
 
@@ -26,7 +26,7 @@ static VALUE
26
26
  default_value(VALUE self)
27
27
  {
28
28
  xmlAttributePtr node;
29
- Data_Get_Struct(self, xmlAttribute, node);
29
+ Noko_Node_Get_Struct(self, xmlAttribute, node);
30
30
 
31
31
  if (node->defaultValue) { return NOKOGIRI_STR_NEW2(node->defaultValue); }
32
32
  return Qnil;
@@ -45,7 +45,7 @@ enumeration(VALUE self)
45
45
  xmlEnumerationPtr enm;
46
46
  VALUE list;
47
47
 
48
- Data_Get_Struct(self, xmlAttribute, node);
48
+ Noko_Node_Get_Struct(self, xmlAttribute, node);
49
49
 
50
50
  list = rb_ary_new();
51
51
  enm = node->tree;
@@ -25,7 +25,7 @@ new (int argc, VALUE *argv, VALUE klass)
25
25
 
26
26
  rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
27
27
 
28
- Data_Get_Struct(doc, xmlDoc, xml_doc);
28
+ Noko_Node_Get_Struct(doc, xmlDoc, xml_doc);
29
29
 
30
30
  if (!NIL_P(content)) {
31
31
  content_str = (xmlChar *)StringValuePtr(content);
@@ -6,19 +6,19 @@ static int
6
6
  dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
7
7
  {
8
8
  switch (node->type) {
9
- case XML_ATTRIBUTE_NODE:
10
- xmlFreePropList((xmlAttrPtr)node);
11
- break;
12
- case XML_NAMESPACE_DECL:
13
- xmlFreeNs((xmlNsPtr)node);
14
- break;
15
- case XML_DTD_NODE:
16
- xmlFreeDtd((xmlDtdPtr)node);
17
- break;
18
- default:
19
- if (node->parent == NULL) {
20
- xmlAddChild((xmlNodePtr)doc, node);
21
- }
9
+ case XML_ATTRIBUTE_NODE:
10
+ xmlFreePropList((xmlAttrPtr)node);
11
+ break;
12
+ case XML_NAMESPACE_DECL:
13
+ xmlFreeNs((xmlNsPtr)node);
14
+ break;
15
+ case XML_DTD_NODE:
16
+ xmlFreeDtd((xmlDtdPtr)node);
17
+ break;
18
+ default:
19
+ if (node->parent == NULL) {
20
+ xmlAddChild((xmlNodePtr)doc, node);
21
+ }
22
22
  }
23
23
  return ST_CONTINUE;
24
24
  }
@@ -161,7 +161,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
161
161
  rb_obj_class(rb_new_root));
162
162
  }
163
163
 
164
- Data_Get_Struct(rb_new_root, xmlNode, c_new_root);
164
+ Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
165
165
 
166
166
  /* If the new root's document is not the same as the current document,
167
167
  * then we need to dup the node in to this document. */
@@ -213,7 +213,7 @@ set_encoding(VALUE self, VALUE encoding)
213
213
  Data_Get_Struct(self, xmlDoc, doc);
214
214
 
215
215
  if (doc->encoding) {
216
- free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */
216
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
217
217
  }
218
218
 
219
219
  doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
@@ -533,59 +533,59 @@ block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
533
533
  static VALUE
534
534
  rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
535
535
  {
536
- VALUE mode;
537
- VALUE incl_ns;
538
- VALUE with_comments;
539
- xmlChar **ns;
540
- long ns_len, i;
536
+ VALUE rb_mode;
537
+ VALUE rb_namespaces;
538
+ VALUE rb_comments_p;
539
+ xmlChar **c_namespaces;
541
540
 
542
- xmlDocPtr doc;
543
- xmlOutputBufferPtr buf;
544
- xmlC14NIsVisibleCallback cb = NULL;
545
- void *ctx = NULL;
541
+ xmlDocPtr c_doc;
542
+ xmlOutputBufferPtr c_obuf;
543
+ xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
544
+ void *rb_callback = NULL;
546
545
 
547
546
  VALUE rb_cStringIO;
548
- VALUE io;
547
+ VALUE rb_io;
549
548
 
550
- rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
549
+ rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
550
+ if (!NIL_P(rb_mode)) { Check_Type(rb_mode, T_FIXNUM); }
551
+ if (!NIL_P(rb_namespaces)) { Check_Type(rb_namespaces, T_ARRAY); }
551
552
 
552
- Data_Get_Struct(self, xmlDoc, doc);
553
+ Data_Get_Struct(self, xmlDoc, c_doc);
553
554
 
554
555
  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
555
- io = rb_class_new_instance(0, 0, rb_cStringIO);
556
- buf = xmlAllocOutputBuffer(NULL);
556
+ rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
557
+ c_obuf = xmlAllocOutputBuffer(NULL);
557
558
 
558
- buf->writecallback = (xmlOutputWriteCallback)noko_io_write;
559
- buf->closecallback = (xmlOutputCloseCallback)noko_io_close;
560
- buf->context = (void *)io;
559
+ c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
560
+ c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
561
+ c_obuf->context = (void *)rb_io;
561
562
 
562
563
  if (rb_block_given_p()) {
563
- cb = block_caller;
564
- ctx = (void *)rb_block_proc();
564
+ c_callback_wrapper = block_caller;
565
+ rb_callback = (void *)rb_block_proc();
565
566
  }
566
567
 
567
- if (NIL_P(incl_ns)) {
568
- ns = NULL;
568
+ if (NIL_P(rb_namespaces)) {
569
+ c_namespaces = NULL;
569
570
  } else {
570
- Check_Type(incl_ns, T_ARRAY);
571
- ns_len = RARRAY_LEN(incl_ns);
572
- ns = calloc((size_t)ns_len + 1, sizeof(xmlChar *));
573
- for (i = 0 ; i < ns_len ; i++) {
574
- VALUE entry = rb_ary_entry(incl_ns, i);
575
- ns[i] = (xmlChar *)StringValueCStr(entry);
571
+ long ns_len = RARRAY_LEN(rb_namespaces);
572
+ c_namespaces = calloc((size_t)ns_len + 1, sizeof(xmlChar *));
573
+ for (int j = 0 ; j < ns_len ; j++) {
574
+ VALUE entry = rb_ary_entry(rb_namespaces, j);
575
+ c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
576
576
  }
577
577
  }
578
578
 
579
+ xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
580
+ (int)(NIL_P(rb_mode) ? 0 : NUM2INT(rb_mode)),
581
+ c_namespaces,
582
+ (int)RTEST(rb_comments_p),
583
+ c_obuf);
579
584
 
580
- xmlC14NExecute(doc, cb, ctx,
581
- (int)(NIL_P(mode) ? 0 : NUM2INT(mode)),
582
- ns,
583
- (int) RTEST(with_comments),
584
- buf);
585
-
586
- xmlOutputBufferClose(buf);
585
+ free(c_namespaces);
586
+ xmlOutputBufferClose(c_obuf);
587
587
 
588
- return rb_funcall(io, rb_intern("string"), 0);
588
+ return rb_funcall(rb_io, rb_intern("string"), 0);
589
589
  }
590
590
 
591
591
  VALUE
@@ -28,8 +28,6 @@ new (int argc, VALUE *argv, VALUE klass)
28
28
  rb_node = noko_xml_node_wrap(klass, node);
29
29
  rb_obj_call_init(rb_node, argc, argv);
30
30
 
31
- if (rb_block_given_p()) { rb_yield(rb_node); }
32
-
33
31
  return rb_node;
34
32
  }
35
33