nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -0,0 +1,96 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtml4SaxPushParser;
4
+
5
+ /*
6
+ * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
7
+ */
8
+ static VALUE
9
+ noko_html4_sax_push_parser__native_write(VALUE self, VALUE rb_chunk, VALUE rb_last_chunk)
10
+ {
11
+ xmlParserCtxtPtr ctx;
12
+ const char *chunk = NULL;
13
+ int size = 0;
14
+ int status = 0;
15
+ libxmlStructuredErrorHandlerState handler_state;
16
+
17
+ ctx = noko_xml_sax_push_parser_unwrap(self);
18
+
19
+ if (Qnil != rb_chunk) {
20
+ chunk = StringValuePtr(rb_chunk);
21
+ size = (int)RSTRING_LEN(rb_chunk);
22
+ }
23
+
24
+ noko__structured_error_func_save_and_set(&handler_state, NULL, NULL);
25
+
26
+ status = htmlParseChunk(ctx, chunk, size, Qtrue == rb_last_chunk ? 1 : 0);
27
+
28
+ noko__structured_error_func_restore(&handler_state);
29
+
30
+ if ((status != 0) && !(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) {
31
+ // TODO: there appear to be no tests for this block
32
+ xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
33
+ noko__error_raise(NULL, e);
34
+ }
35
+
36
+ return self;
37
+ }
38
+
39
+ /*
40
+ * Initialize the push parser with +xml_sax+ using +filename+
41
+ */
42
+ static VALUE
43
+ noko_html4_sax_push_parser__initialize_native(
44
+ VALUE self,
45
+ VALUE rb_xml_sax,
46
+ VALUE rb_filename,
47
+ VALUE encoding
48
+ )
49
+ {
50
+ htmlSAXHandlerPtr sax;
51
+ const char *filename = NULL;
52
+ htmlParserCtxtPtr ctx;
53
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
54
+
55
+ sax = noko_xml_sax_parser_unwrap(rb_xml_sax);
56
+
57
+ if (rb_filename != Qnil) { filename = StringValueCStr(rb_filename); }
58
+
59
+ if (!NIL_P(encoding)) {
60
+ enc = xmlParseCharEncoding(StringValueCStr(encoding));
61
+ if (enc == XML_CHAR_ENCODING_ERROR) {
62
+ rb_raise(rb_eArgError, "Unsupported Encoding");
63
+ }
64
+ }
65
+
66
+ ctx = htmlCreatePushParserCtxt(
67
+ sax,
68
+ NULL,
69
+ NULL,
70
+ 0,
71
+ filename,
72
+ enc
73
+ );
74
+ if (ctx == NULL) {
75
+ rb_raise(rb_eRuntimeError, "Could not create a parser context");
76
+ }
77
+
78
+ ctx->userData = ctx;
79
+ ctx->_private = (void *)rb_xml_sax;
80
+
81
+ DATA_PTR(self) = ctx;
82
+ return self;
83
+ }
84
+
85
+ void
86
+ noko_init_html_sax_push_parser(void)
87
+ {
88
+ assert(cNokogiriXmlSaxPushParser);
89
+ cNokogiriHtml4SaxPushParser =
90
+ rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
91
+
92
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native",
93
+ noko_html4_sax_push_parser__initialize_native, 3);
94
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write",
95
+ noko_html4_sax_push_parser__native_write, 2);
96
+ }
@@ -0,0 +1,114 @@
1
+ #include <nokogiri.h>
2
+
3
+ #ifndef HAVE_XMLCTXTSETOPTIONS
4
+ /* based on libxml2-2.14.0-dev (1d8bd126) parser.c xmlCtxtSetInternalOptions */
5
+ int
6
+ xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
7
+ {
8
+ int keepMask = 0;
9
+ int allMask;
10
+
11
+ if (ctxt == NULL) {
12
+ return (-1);
13
+ }
14
+
15
+ /*
16
+ * XInclude options aren't handled by the parser.
17
+ *
18
+ * XML_PARSE_XINCLUDE
19
+ * XML_PARSE_NOXINCNODE
20
+ * XML_PARSE_NOBASEFIX
21
+ */
22
+ allMask = XML_PARSE_RECOVER |
23
+ XML_PARSE_NOENT |
24
+ XML_PARSE_DTDLOAD |
25
+ XML_PARSE_DTDATTR |
26
+ XML_PARSE_DTDVALID |
27
+ XML_PARSE_NOERROR |
28
+ XML_PARSE_NOWARNING |
29
+ XML_PARSE_PEDANTIC |
30
+ XML_PARSE_NOBLANKS |
31
+ #ifdef LIBXML_SAX1_ENABLED
32
+ XML_PARSE_SAX1 |
33
+ #endif
34
+ XML_PARSE_NONET |
35
+ XML_PARSE_NODICT |
36
+ XML_PARSE_NSCLEAN |
37
+ XML_PARSE_NOCDATA |
38
+ XML_PARSE_COMPACT |
39
+ XML_PARSE_OLD10 |
40
+ XML_PARSE_HUGE |
41
+ XML_PARSE_OLDSAX |
42
+ XML_PARSE_IGNORE_ENC |
43
+ XML_PARSE_BIG_LINES;
44
+
45
+ ctxt->options = (ctxt->options & keepMask) | (options & allMask);
46
+
47
+ /*
48
+ * For some options, struct members are historically the source
49
+ * of truth. The values are initalized from global variables and
50
+ * old code could also modify them directly. Several older API
51
+ * functions that don't take an options argument rely on these
52
+ * deprecated mechanisms.
53
+ *
54
+ * Once public access to struct members and the globals are
55
+ * disabled, we can use the options bitmask as source of
56
+ * truth, making all these struct members obsolete.
57
+ *
58
+ * The XML_DETECT_IDS flags is misnamed. It simply enables
59
+ * loading of the external subset.
60
+ */
61
+ ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
62
+ ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
63
+ ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
64
+ ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
65
+ ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
66
+ ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
67
+ ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
68
+ ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
69
+
70
+ /*
71
+ * Changing SAX callbacks is a bad idea. This should be fixed.
72
+ */
73
+ if (options & XML_PARSE_NOBLANKS) {
74
+ ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
75
+ }
76
+ if (options & XML_PARSE_NOCDATA) {
77
+ ctxt->sax->cdataBlock = NULL;
78
+ }
79
+ if (options & XML_PARSE_HUGE) {
80
+ if (ctxt->dict != NULL) {
81
+ xmlDictSetLimit(ctxt->dict, 0);
82
+ }
83
+ }
84
+
85
+ ctxt->linenumbers = 1;
86
+
87
+ return (options & ~allMask);
88
+ }
89
+ #endif
90
+
91
+ #ifndef HAVE_XMLCTXTGETOPTIONS
92
+ int
93
+ xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
94
+ {
95
+ return (ctxt->options);
96
+ }
97
+ #endif
98
+
99
+ #ifndef HAVE_XMLSWITCHENCODINGNAME
100
+ int
101
+ xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding)
102
+ {
103
+ if (ctxt == NULL) {
104
+ return (-1);
105
+ }
106
+
107
+ xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
108
+ if (handler == NULL) {
109
+ return (-1);
110
+ }
111
+
112
+ return (xmlSwitchToEncoding(ctxt, handler));
113
+ }
114
+ #endif
@@ -1,101 +1,225 @@
1
1
  #include <nokogiri.h>
2
2
 
3
3
  VALUE mNokogiri ;
4
+ VALUE mNokogiriGumbo ;
5
+ VALUE mNokogiriHtml4 ;
6
+ VALUE mNokogiriHtml4Sax ;
7
+ VALUE mNokogiriHtml5 ;
4
8
  VALUE mNokogiriXml ;
5
- VALUE mNokogiriHtml ;
6
- VALUE mNokogiriXslt ;
7
9
  VALUE mNokogiriXmlSax ;
8
- VALUE mNokogiriHtmlSax ;
9
-
10
- #ifdef USE_INCLUDED_VASPRINTF
11
- /*
12
- * I srsly hate windows. it doesn't have vasprintf.
13
- * Thank you Geoffroy Couprie for this implementation of vasprintf!
14
- */
15
- int vasprintf (char **strp, const char *fmt, va_list ap)
10
+ VALUE mNokogiriXmlXpath ;
11
+ VALUE mNokogiriXslt ;
12
+
13
+ VALUE cNokogiriSyntaxError;
14
+ VALUE cNokogiriXmlCharacterData;
15
+ VALUE cNokogiriXmlElement;
16
+ VALUE cNokogiriXmlXpathSyntaxError;
17
+
18
+ void noko_init_xml_attr(void);
19
+ void noko_init_xml_attribute_decl(void);
20
+ void noko_init_xml_cdata(void);
21
+ void noko_init_xml_comment(void);
22
+ void noko_init_xml_document(void);
23
+ void noko_init_xml_document_fragment(void);
24
+ void noko_init_xml_dtd(void);
25
+ void noko_init_xml_element_content(void);
26
+ void noko_init_xml_element_decl(void);
27
+ void noko_init_xml_encoding_handler(void);
28
+ void noko_init_xml_entity_decl(void);
29
+ void noko_init_xml_entity_reference(void);
30
+ void noko_init_xml_namespace(void);
31
+ void noko_init_xml_node(void);
32
+ void noko_init_xml_node_set(void);
33
+ void noko_init_xml_processing_instruction(void);
34
+ void noko_init_xml_reader(void);
35
+ void noko_init_xml_relax_ng(void);
36
+ void noko_init_xml_sax_parser(void);
37
+ void noko_init_xml_sax_parser_context(void);
38
+ void noko_init_xml_sax_push_parser(void);
39
+ void noko_init_xml_schema(void);
40
+ void noko_init_xml_syntax_error(void);
41
+ void noko_init_xml_text(void);
42
+ void noko_init_xml_xpath_context(void);
43
+ void noko_init_xslt_stylesheet(void);
44
+ void noko_init_html_document(void);
45
+ void noko_init_html_element_description(void);
46
+ void noko_init_html_entity_lookup(void);
47
+ void noko_init_html_sax_parser_context(void);
48
+ void noko_init_html_sax_push_parser(void);
49
+ void noko_init_html4_sax_parser(void);
50
+ void noko_init_gumbo(void);
51
+ void noko_init_test_global_handlers(void);
52
+
53
+ static ID id_read, id_write, id_external_encoding;
54
+
55
+
56
+ static VALUE
57
+ noko_io_read_check(VALUE val)
16
58
  {
17
- /* Mingw32/64 have a broken vsnprintf implementation that fails when
18
- * using a zero-byte limit in order to retrieve the required size for malloc.
19
- * So we use a one byte buffer instead.
20
- */
21
- char tmp[1];
22
- int len = vsnprintf (tmp, 1, fmt, ap) + 1;
23
- char *res = (char *)malloc((unsigned int)len);
24
- if (res == NULL)
25
- return -1;
26
- *strp = res;
27
- return vsnprintf(res, (unsigned int)len, fmt, ap);
59
+ VALUE *args = (VALUE *)val;
60
+ return rb_funcall(args[0], id_read, 1, args[1]);
28
61
  }
29
- #endif
30
62
 
31
- void vasprintf_free (void *p)
63
+
64
+ static VALUE
65
+ noko_io_read_failed(VALUE arg, VALUE exc)
32
66
  {
33
- free(p);
67
+ return Qundef;
34
68
  }
35
69
 
36
- #ifdef HAVE_RUBY_UTIL_H
37
- #include "ruby/util.h"
38
- #else
39
- #include "util.h"
40
- #endif
41
70
 
42
- void nokogiri_root_node(xmlNodePtr node)
71
+ int
72
+ noko_io_read(void *io, char *c_buffer, int c_buffer_len)
73
+ {
74
+ VALUE rb_io = (VALUE)io;
75
+ VALUE rb_read_string, rb_args[2];
76
+ size_t n_bytes_read, safe_len;
77
+
78
+ rb_args[0] = rb_io;
79
+ rb_args[1] = INT2NUM(c_buffer_len);
80
+
81
+ rb_read_string = rb_rescue(noko_io_read_check, (VALUE)rb_args, noko_io_read_failed, 0);
82
+
83
+ if (NIL_P(rb_read_string)) { return 0; }
84
+ if (rb_read_string == Qundef) { return -1; }
85
+ if (TYPE(rb_read_string) != T_STRING) { return -1; }
86
+
87
+ n_bytes_read = (size_t)RSTRING_LEN(rb_read_string);
88
+ safe_len = (n_bytes_read > (size_t)c_buffer_len) ? (size_t)c_buffer_len : n_bytes_read;
89
+ memcpy(c_buffer, StringValuePtr(rb_read_string), safe_len);
90
+
91
+ return (int)safe_len;
92
+ }
93
+
94
+
95
+ static VALUE
96
+ noko_io_write_check(VALUE rb_args)
43
97
  {
44
- xmlDocPtr doc;
45
- nokogiriTuplePtr tuple;
98
+ VALUE rb_io = ((VALUE *)rb_args)[0];
99
+ VALUE rb_output = ((VALUE *)rb_args)[1];
100
+ return rb_funcall(rb_io, id_write, 1, rb_output);
101
+ }
46
102
 
47
- doc = node->doc;
48
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
49
- tuple = (nokogiriTuplePtr)doc->_private;
50
- st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
103
+
104
+ static VALUE
105
+ noko_io_write_failed(VALUE arg, VALUE exc)
106
+ {
107
+ return Qundef;
51
108
  }
52
109
 
53
- void nokogiri_root_nsdef(xmlNsPtr ns, xmlDocPtr doc)
110
+
111
+ int
112
+ noko_io_write(void *io, char *c_buffer, int c_buffer_len)
54
113
  {
55
- nokogiriTuplePtr tuple;
114
+ VALUE rb_args[2], rb_n_bytes_written;
115
+ VALUE rb_io = (VALUE)io;
116
+ VALUE rb_enc = Qnil;
117
+ rb_encoding *io_encoding;
118
+
119
+ if (rb_respond_to(rb_io, id_external_encoding)) {
120
+ rb_enc = rb_funcall(rb_io, id_external_encoding, 0);
121
+ }
122
+ io_encoding = RB_NIL_P(rb_enc) ? rb_ascii8bit_encoding() : rb_to_encoding(rb_enc);
123
+
124
+ rb_args[0] = rb_io;
125
+ rb_args[1] = rb_enc_str_new(c_buffer, (long)c_buffer_len, io_encoding);
126
+
127
+ rb_n_bytes_written = rb_rescue(noko_io_write_check, (VALUE)rb_args, noko_io_write_failed, 0);
128
+ if (rb_n_bytes_written == Qundef) { return -1; }
56
129
 
57
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
58
- tuple = (nokogiriTuplePtr)doc->_private;
59
- st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
130
+ return NUM2INT(rb_n_bytes_written);
60
131
  }
61
132
 
62
- void Init_nokogiri()
133
+
134
+ int
135
+ noko_io_close(void *io)
63
136
  {
64
- xmlMemSetup(
65
- (xmlFreeFunc)ruby_xfree,
66
- (xmlMallocFunc)ruby_xmalloc,
67
- (xmlReallocFunc)ruby_xrealloc,
68
- ruby_strdup
69
- );
137
+ return 0;
138
+ }
139
+
70
140
 
141
+ #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
142
+ # define NOKOGIRI_WINDOWS_DLLS 1
143
+ #else
144
+ # define NOKOGIRI_WINDOWS_DLLS 0
145
+ #endif
146
+
147
+ //
148
+ // | dlls || true | false |
149
+ // | nlmm || | |
150
+ // |-----------++---------+---------|
151
+ // | NULL || default | ruby |
152
+ // | "random" || default | ruby |
153
+ // | "ruby" || ruby | ruby |
154
+ // | "default" || default | default |
155
+ //
156
+ // We choose *not* to use Ruby's memory management functions with windows DLLs because of this
157
+ // issue: https://github.com/sparklemotion/nokogiri/issues/2241
158
+ //
159
+ static void
160
+ set_libxml_memory_management(void)
161
+ {
162
+ const char *nlmm = getenv("NOKOGIRI_LIBXML_MEMORY_MANAGEMENT");
163
+ if (nlmm) {
164
+ if (strcmp(nlmm, "default") == 0) {
165
+ goto libxml_uses_default_memory_management;
166
+ } else if (strcmp(nlmm, "ruby") == 0) {
167
+ goto libxml_uses_ruby_memory_management;
168
+ }
169
+ }
170
+ if (NOKOGIRI_WINDOWS_DLLS) {
171
+ libxml_uses_default_memory_management:
172
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
173
+ return;
174
+ } else {
175
+ libxml_uses_ruby_memory_management:
176
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
177
+ xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
178
+ return;
179
+ }
180
+ }
181
+
182
+
183
+ void
184
+ Init_nokogiri(void)
185
+ {
71
186
  mNokogiri = rb_define_module("Nokogiri");
187
+ mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
188
+ mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
189
+ mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
190
+ mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
72
191
  mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
73
- mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
74
- mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
75
192
  mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
76
- mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
77
-
78
- rb_const_set( mNokogiri,
79
- rb_intern("LIBXML_VERSION"),
80
- NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)
81
- );
82
- rb_const_set( mNokogiri,
83
- rb_intern("LIBXML_PARSER_VERSION"),
84
- NOKOGIRI_STR_NEW2(xmlParserVersion)
85
- );
86
-
87
- #ifdef NOKOGIRI_USE_PACKAGED_LIBRARIES
88
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qtrue);
89
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATH));
90
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATH));
91
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
92
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
193
+ mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
194
+ mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
195
+
196
+ set_libxml_memory_management(); /* must be before any function calls that might invoke xmlInitParser() */
197
+ xmlInitParser();
198
+ exsltRegisterAll();
199
+
200
+ rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
201
+ rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
202
+
203
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
204
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
205
+
206
+ rb_const_set(mNokogiri, rb_intern("LIBXML_ZLIB_ENABLED"),
207
+ xmlHasFeature(XML_WITH_ZLIB) == 1 ? Qtrue : Qfalse);
208
+
209
+ #ifdef NOKOGIRI_PACKAGED_LIBRARIES
210
+ rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
211
+ # ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
212
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qtrue);
213
+ # else
214
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
215
+ # endif
216
+ rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
217
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
93
218
  #else
94
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qfalse);
95
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), Qnil);
96
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), Qnil);
97
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), Qnil);
98
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), Qnil);
219
+ rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qfalse);
220
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
221
+ rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), Qnil);
222
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), Qnil);
99
223
  #endif
100
224
 
101
225
  #ifdef LIBXML_ICONV_ENABLED
@@ -104,38 +228,67 @@ void Init_nokogiri()
104
228
  rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
105
229
  #endif
106
230
 
107
- xmlInitParser();
231
+ #ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS
232
+ rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
233
+ #endif
234
+
235
+ if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
236
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
237
+ } else {
238
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse);
239
+ }
240
+
241
+ cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError);
242
+ noko_init_xml_syntax_error();
243
+ assert(cNokogiriXmlSyntaxError);
244
+ cNokogiriXmlXpathSyntaxError = rb_define_class_under(mNokogiriXmlXpath, "SyntaxError", cNokogiriXmlSyntaxError);
245
+
246
+ noko_init_xml_element_content();
247
+ noko_init_xml_encoding_handler();
248
+ noko_init_xml_namespace();
249
+ noko_init_xml_node_set();
250
+ noko_init_xml_reader();
251
+
252
+ noko_init_xml_sax_parser();
253
+ noko_init_html4_sax_parser();
254
+
255
+ noko_init_xml_xpath_context();
256
+ noko_init_xslt_stylesheet();
257
+ noko_init_html_element_description();
258
+ noko_init_html_entity_lookup();
259
+
260
+ noko_init_xml_schema();
261
+ noko_init_xml_relax_ng();
262
+
263
+ noko_init_xml_sax_parser_context();
264
+ noko_init_html_sax_parser_context();
265
+
266
+ noko_init_xml_sax_push_parser();
267
+ noko_init_html_sax_push_parser();
268
+
269
+ noko_init_xml_node();
270
+ noko_init_xml_attr();
271
+ noko_init_xml_attribute_decl();
272
+ noko_init_xml_dtd();
273
+ noko_init_xml_element_decl();
274
+ noko_init_xml_entity_decl();
275
+ noko_init_xml_entity_reference();
276
+ noko_init_xml_processing_instruction();
277
+ assert(cNokogiriXmlNode);
278
+ cNokogiriXmlElement = rb_define_class_under(mNokogiriXml, "Element", cNokogiriXmlNode);
279
+ cNokogiriXmlCharacterData = rb_define_class_under(mNokogiriXml, "CharacterData", cNokogiriXmlNode);
280
+ noko_init_xml_comment();
281
+ noko_init_xml_text();
282
+ noko_init_xml_cdata();
283
+
284
+ noko_init_xml_document_fragment();
285
+ noko_init_xml_document();
286
+ noko_init_html_document();
287
+ noko_init_gumbo();
288
+
289
+ noko_init_test_global_handlers();
108
290
 
109
- init_xml_document();
110
- init_html_document();
111
- init_xml_node();
112
- init_xml_document_fragment();
113
- init_xml_text();
114
- init_xml_cdata();
115
- init_xml_processing_instruction();
116
- init_xml_attr();
117
- init_xml_entity_reference();
118
- init_xml_comment();
119
- init_xml_node_set();
120
- init_xml_xpath_context();
121
- init_xml_sax_parser_context();
122
- init_xml_sax_parser();
123
- init_xml_sax_push_parser();
124
- init_xml_reader();
125
- init_xml_dtd();
126
- init_xml_element_content();
127
- init_xml_attribute_decl();
128
- init_xml_element_decl();
129
- init_xml_entity_decl();
130
- init_xml_namespace();
131
- init_html_sax_parser_context();
132
- init_html_sax_push_parser();
133
- init_xslt_stylesheet();
134
- init_xml_syntax_error();
135
- init_html_entity_lookup();
136
- init_html_element_description();
137
- init_xml_schema();
138
- init_xml_relax_ng();
139
- init_nokogiri_io();
140
- init_xml_encoding_handler();
291
+ id_read = rb_intern("read");
292
+ id_write = rb_intern("write");
293
+ id_external_encoding = rb_intern("external_encoding");
141
294
  }