nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,44 +1,64 @@
1
- #include <xml_sax_push_parser.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static void deallocate(xmlParserCtxtPtr ctx)
3
+ VALUE cNokogiriXmlSaxPushParser ;
4
+
5
+ static void
6
+ xml_sax_push_parser_free(void *data)
4
7
  {
5
- NOKOGIRI_DEBUG_START(ctx);
6
- if (ctx != NULL) {
7
- NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
8
+ xmlParserCtxtPtr ctx = data;
9
+ if (ctx->myDoc) {
10
+ xmlFreeDoc(ctx->myDoc);
11
+ }
12
+ if (ctx) {
8
13
  xmlFreeParserCtxt(ctx);
9
14
  }
10
- NOKOGIRI_DEBUG_END(ctx);
11
15
  }
12
16
 
13
- static VALUE allocate(VALUE klass)
17
+ static const rb_data_type_t xml_sax_push_parser_type = {
18
+ .wrap_struct_name = "xmlParserCtxt",
19
+ .function = {
20
+ .dfree = xml_sax_push_parser_free,
21
+ },
22
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
23
+ };
24
+
25
+ static VALUE
26
+ xml_sax_push_parser_allocate(VALUE klass)
27
+ {
28
+ return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
29
+ }
30
+
31
+ xmlParserCtxtPtr
32
+ noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
14
33
  {
15
- return Data_Wrap_Struct(klass, NULL, deallocate, NULL);
34
+ xmlParserCtxtPtr c_parser;
35
+ TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
36
+ return c_parser;
16
37
  }
17
38
 
18
39
  /*
19
- * call-seq:
20
- * native_write(chunk, last_chunk)
21
- *
22
40
  * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
23
41
  */
24
- static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
42
+ static VALUE
43
+ noko_xml_sax_push_parser__native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
25
44
  {
26
45
  xmlParserCtxtPtr ctx;
27
- const char * chunk = NULL;
46
+ const char *chunk = NULL;
28
47
  int size = 0;
29
48
 
30
-
31
- Data_Get_Struct(self, xmlParserCtxt, ctx);
49
+ ctx = noko_xml_sax_push_parser_unwrap(self);
32
50
 
33
51
  if (Qnil != _chunk) {
34
52
  chunk = StringValuePtr(_chunk);
35
53
  size = (int)RSTRING_LEN(_chunk);
36
54
  }
37
55
 
56
+ xmlSetStructuredErrorFunc(NULL, NULL);
57
+
38
58
  if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
39
- if (!(ctx->options & XML_PARSE_RECOVER)) {
40
- xmlErrorPtr e = xmlCtxtGetLastError(ctx);
41
- Nokogiri_error_raise(NULL, e);
59
+ if (!(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) {
60
+ xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
61
+ noko__error_raise(NULL, e);
42
62
  }
43
63
  }
44
64
 
@@ -51,13 +71,14 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
51
71
  *
52
72
  * Initialize the push parser with +xml_sax+ using +filename+
53
73
  */
54
- static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
74
+ static VALUE
75
+ noko_xml_sax_push_parser__initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
55
76
  {
56
77
  xmlSAXHandlerPtr sax;
57
- const char * filename = NULL;
78
+ const char *filename = NULL;
58
79
  xmlParserCtxtPtr ctx;
59
80
 
60
- Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
81
+ sax = noko_xml_sax_parser_unwrap(_xml_sax);
61
82
 
62
83
  if (_filename != Qnil) { filename = StringValueCStr(_filename); }
63
84
 
@@ -72,28 +93,34 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
72
93
  rb_raise(rb_eRuntimeError, "Could not create a parser context");
73
94
  }
74
95
 
75
- ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
96
+ ctx->userData = ctx;
97
+ ctx->_private = (void *)_xml_sax;
76
98
 
77
- ctx->sax2 = 1;
78
99
  DATA_PTR(self) = ctx;
79
100
  return self;
80
101
  }
81
102
 
82
- static VALUE get_options(VALUE self)
103
+ static VALUE
104
+ noko_xml_sax_push_parser__options_get(VALUE self)
83
105
  {
84
106
  xmlParserCtxtPtr ctx;
85
- Data_Get_Struct(self, xmlParserCtxt, ctx);
86
107
 
87
- return INT2NUM(ctx->options);
108
+ ctx = noko_xml_sax_push_parser_unwrap(self);
109
+
110
+ return INT2NUM(xmlCtxtGetOptions(ctx));
88
111
  }
89
112
 
90
- static VALUE set_options(VALUE self, VALUE options)
113
+ static VALUE
114
+ noko_xml_sax_push_parser__options_set(VALUE self, VALUE options)
91
115
  {
116
+ int error;
92
117
  xmlParserCtxtPtr ctx;
93
- Data_Get_Struct(self, xmlParserCtxt, ctx);
94
118
 
95
- if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
96
- rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
119
+ ctx = noko_xml_sax_push_parser_unwrap(self);
120
+
121
+ error = xmlCtxtSetOptions(ctx, (int)NUM2INT(options));
122
+ if (error) {
123
+ rb_raise(rb_eRuntimeError, "Cannot set XML parser context options (%x)", error);
97
124
  }
98
125
 
99
126
  return Qnil;
@@ -101,59 +128,79 @@ static VALUE set_options(VALUE self, VALUE options)
101
128
 
102
129
  /*
103
130
  * call-seq:
104
- * replace_entities
131
+ * replace_entities
132
+ *
133
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
105
134
  *
106
- * Should this parser replace entities? &amp; will get converted to '&' if
107
- * set to true
135
+ * [Returns] (Boolean) Value of the parse option. (Default +false+)
136
+ *
137
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
138
+ * replacement.
108
139
  */
109
- static VALUE get_replace_entities(VALUE self)
140
+ static VALUE
141
+ noko_xml_sax_push_parser__replace_entities_get(VALUE self)
110
142
  {
111
- xmlParserCtxtPtr ctx;
112
- Data_Get_Struct(self, xmlParserCtxt, ctx);
143
+ xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self);
113
144
 
114
- if (0 == ctx->replaceEntities) {
115
- return Qfalse;
116
- } else {
145
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
117
146
  return Qtrue;
147
+ } else {
148
+ return Qfalse;
118
149
  }
119
150
  }
120
151
 
121
152
  /*
122
153
  * call-seq:
123
- * replace_entities=(boolean)
154
+ * replace_entities=(value)
155
+ *
156
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
124
157
  *
125
- * Should this parser replace entities? &amp; will get converted to '&' if
126
- * set to true
158
+ * [Parameters]
159
+ * - +value+ (Boolean) Whether external parsed entities will be resolved.
160
+ *
161
+ * ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
162
+ * defaults to +false+ for this reason.
163
+ *
164
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
165
+ * replacement.
127
166
  */
128
- static VALUE set_replace_entities(VALUE self, VALUE value)
167
+ static VALUE
168
+ noko_xml_sax_push_parser__replace_entities_set(VALUE self, VALUE value)
129
169
  {
130
- xmlParserCtxtPtr ctx;
131
- Data_Get_Struct(self, xmlParserCtxt, ctx);
170
+ int error;
171
+ xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self);
132
172
 
133
- if (Qfalse == value) {
134
- ctx->replaceEntities = 0;
173
+ if (RB_TEST(value)) {
174
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
135
175
  } else {
136
- ctx->replaceEntities = 1;
176
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
177
+ }
178
+
179
+ if (error) {
180
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
137
181
  }
138
182
 
139
183
  return value;
140
184
  }
141
185
 
142
- VALUE cNokogiriXmlSaxPushParser ;
143
- void init_xml_sax_push_parser()
186
+ void
187
+ noko_init_xml_sax_push_parser(void)
144
188
  {
145
- VALUE nokogiri = rb_define_module("Nokogiri");
146
- VALUE xml = rb_define_module_under(nokogiri, "XML");
147
- VALUE sax = rb_define_module_under(xml, "SAX");
148
- VALUE klass = rb_define_class_under(sax, "PushParser", rb_cObject);
149
-
150
- cNokogiriXmlSaxPushParser = klass;
151
-
152
- rb_define_alloc_func(klass, allocate);
153
- rb_define_private_method(klass, "initialize_native", initialize_native, 2);
154
- rb_define_private_method(klass, "native_write", native_write, 2);
155
- rb_define_method(klass, "options", get_options, 0);
156
- rb_define_method(klass, "options=", set_options, 1);
157
- rb_define_method(klass, "replace_entities", get_replace_entities, 0);
158
- rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
189
+ cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
190
+
191
+ rb_define_alloc_func(cNokogiriXmlSaxPushParser, xml_sax_push_parser_allocate);
192
+
193
+ rb_define_method(cNokogiriXmlSaxPushParser, "options",
194
+ noko_xml_sax_push_parser__options_get, 0);
195
+ rb_define_method(cNokogiriXmlSaxPushParser, "options=",
196
+ noko_xml_sax_push_parser__options_set, 1);
197
+ rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities",
198
+ noko_xml_sax_push_parser__replace_entities_get, 0);
199
+ rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=",
200
+ noko_xml_sax_push_parser__replace_entities_set, 1);
201
+
202
+ rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native",
203
+ noko_xml_sax_push_parser__initialize_native, 2);
204
+ rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write",
205
+ noko_xml_sax_push_parser__native_write, 2);
159
206
  }
@@ -1,234 +1,226 @@
1
- #include <xml_schema.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static void dealloc(xmlSchemaPtr schema)
3
+ VALUE cNokogiriXmlSchema;
4
+
5
+ static void
6
+ xml_schema_deallocate(void *data)
4
7
  {
5
- NOKOGIRI_DEBUG_START(schema);
8
+ xmlSchemaPtr schema = data;
6
9
  xmlSchemaFree(schema);
7
- NOKOGIRI_DEBUG_END(schema);
8
10
  }
9
11
 
10
- /*
11
- * call-seq:
12
- * validate_document(document)
13
- *
14
- * Validate a Nokogiri::XML::Document against this Schema.
15
- */
16
- static VALUE validate_document(VALUE self, VALUE document)
12
+ static const rb_data_type_t xml_schema_type = {
13
+ .wrap_struct_name = "xmlSchema",
14
+ .function = {
15
+ .dfree = xml_schema_deallocate,
16
+ },
17
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
18
+ };
19
+
20
+ static VALUE
21
+ noko_xml_schema__validate_document(VALUE self, VALUE document)
17
22
  {
18
23
  xmlDocPtr doc;
19
24
  xmlSchemaPtr schema;
20
25
  xmlSchemaValidCtxtPtr valid_ctxt;
21
26
  VALUE errors;
22
27
 
23
- Data_Get_Struct(self, xmlSchema, schema);
24
- Data_Get_Struct(document, xmlDoc, doc);
28
+ TypedData_Get_Struct(self, xmlSchema, &xml_schema_type, schema);
29
+ doc = noko_xml_document_unwrap(document);
25
30
 
26
31
  errors = rb_ary_new();
27
32
 
28
33
  valid_ctxt = xmlSchemaNewValidCtxt(schema);
29
34
 
30
- if(NULL == valid_ctxt) {
35
+ if (NULL == valid_ctxt) {
31
36
  /* we have a problem */
32
37
  rb_raise(rb_eRuntimeError, "Could not create a validation context");
33
38
  }
34
39
 
35
- #ifdef HAVE_XMLSCHEMASETVALIDSTRUCTUREDERRORS
36
40
  xmlSchemaSetValidStructuredErrors(
37
41
  valid_ctxt,
38
- Nokogiri_error_array_pusher,
42
+ noko__error_array_pusher,
39
43
  (void *)errors
40
44
  );
41
- #endif
42
45
 
43
- xmlSchemaValidateDoc(valid_ctxt, doc);
46
+ int status = xmlSchemaValidateDoc(valid_ctxt, doc);
44
47
 
45
48
  xmlSchemaFreeValidCtxt(valid_ctxt);
46
49
 
50
+ if (status != 0) {
51
+ if (RARRAY_LEN(errors) == 0) {
52
+ rb_ary_push(errors, rb_str_new2("Could not validate document"));
53
+ }
54
+ }
55
+
47
56
  return errors;
48
57
  }
49
58
 
50
- /*
51
- * call-seq:
52
- * validate_file(filename)
53
- *
54
- * Validate a file against this Schema.
55
- */
56
- static VALUE validate_file(VALUE self, VALUE rb_filename)
59
+ static VALUE
60
+ noko_xml_schema__validate_file(VALUE self, VALUE rb_filename)
57
61
  {
58
62
  xmlSchemaPtr schema;
59
63
  xmlSchemaValidCtxtPtr valid_ctxt;
60
64
  const char *filename ;
61
65
  VALUE errors;
62
66
 
63
- Data_Get_Struct(self, xmlSchema, schema);
64
- filename = (const char*)StringValueCStr(rb_filename) ;
67
+ TypedData_Get_Struct(self, xmlSchema, &xml_schema_type, schema);
68
+ filename = (const char *)StringValueCStr(rb_filename) ;
65
69
 
66
70
  errors = rb_ary_new();
67
71
 
68
72
  valid_ctxt = xmlSchemaNewValidCtxt(schema);
69
73
 
70
- if(NULL == valid_ctxt) {
74
+ if (NULL == valid_ctxt) {
71
75
  /* we have a problem */
72
76
  rb_raise(rb_eRuntimeError, "Could not create a validation context");
73
77
  }
74
78
 
75
- #ifdef HAVE_XMLSCHEMASETVALIDSTRUCTUREDERRORS
76
79
  xmlSchemaSetValidStructuredErrors(
77
80
  valid_ctxt,
78
- Nokogiri_error_array_pusher,
81
+ noko__error_array_pusher,
79
82
  (void *)errors
80
83
  );
81
- #endif
82
84
 
83
- xmlSchemaValidateFile(valid_ctxt, filename, 0);
85
+ int status = xmlSchemaValidateFile(valid_ctxt, filename, 0);
84
86
 
85
87
  xmlSchemaFreeValidCtxt(valid_ctxt);
86
88
 
89
+ if (status != 0) {
90
+ if (RARRAY_LEN(errors) == 0) {
91
+ rb_ary_push(errors, rb_str_new2("Could not validate file."));
92
+ }
93
+ }
94
+
87
95
  return errors;
88
96
  }
89
97
 
90
- /*
91
- * call-seq:
92
- * read_memory(string)
93
- *
94
- * Create a new Schema from the contents of +string+
95
- */
96
- static VALUE read_memory(VALUE klass, VALUE content)
98
+ static VALUE
99
+ xml_schema_parse_schema(
100
+ VALUE rb_class,
101
+ xmlSchemaParserCtxtPtr c_parser_context,
102
+ VALUE rb_parse_options
103
+ )
97
104
  {
98
- xmlSchemaPtr schema;
99
- xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt(
100
- (const char *)StringValuePtr(content),
101
- (int)RSTRING_LEN(content)
102
- );
103
- VALUE rb_schema;
104
- VALUE errors = rb_ary_new();
105
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
105
+ xmlExternalEntityLoader saved_loader = 0;
106
+ libxmlStructuredErrorHandlerState handler_state;
107
+
108
+ if (NIL_P(rb_parse_options)) {
109
+ rb_parse_options = rb_const_get_at(
110
+ rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
111
+ rb_intern("DEFAULT_SCHEMA")
112
+ );
113
+ }
114
+ int c_parse_options = (int)NUM2INT(rb_funcall(rb_parse_options, rb_intern("to_i"), 0));
115
+
116
+ VALUE rb_errors = rb_ary_new();
117
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
106
118
 
107
- #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
108
119
  xmlSchemaSetParserStructuredErrors(
109
- ctx,
110
- Nokogiri_error_array_pusher,
111
- (void *)errors
120
+ c_parser_context,
121
+ noko__error_array_pusher,
122
+ (void *)rb_errors
112
123
  );
113
- #endif
114
-
115
- schema = xmlSchemaParse(ctx);
116
-
117
- xmlSetStructuredErrorFunc(NULL, NULL);
118
- xmlSchemaFreeParserCtxt(ctx);
119
-
120
- if(NULL == schema) {
121
- xmlErrorPtr error = xmlGetLastError();
122
- if(error)
123
- Nokogiri_error_raise(NULL, error);
124
- else
125
- rb_raise(rb_eRuntimeError, "Could not parse document");
126
124
 
127
- return Qnil;
125
+ if (c_parse_options & XML_PARSE_NONET) {
126
+ saved_loader = xmlGetExternalEntityLoader();
127
+ xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
128
128
  }
129
129
 
130
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
131
- rb_iv_set(rb_schema, "@errors", errors);
130
+ xmlSchemaPtr c_schema = xmlSchemaParse(c_parser_context);
132
131
 
133
- return rb_schema;
134
- }
132
+ if (saved_loader) {
133
+ xmlSetExternalEntityLoader(saved_loader);
134
+ }
135
135
 
136
- /* Schema creation will remove and deallocate "blank" nodes.
137
- * If those blank nodes have been exposed to Ruby, they could get freed
138
- * out from under the VALUE pointer. This function checks to see if any of
139
- * those nodes have been exposed to Ruby, and if so we should raise an exception.
140
- */
141
- static int has_blank_nodes_p(VALUE cache)
142
- {
143
- long i;
136
+ xmlSchemaFreeParserCtxt(c_parser_context);
137
+ noko__structured_error_func_restore(&handler_state);
144
138
 
145
- if (NIL_P(cache)) {
146
- return 0;
139
+ if (NULL == c_schema) {
140
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
141
+ if (RB_TEST(exception)) {
142
+ rb_exc_raise(exception);
143
+ } else {
144
+ rb_raise(rb_eRuntimeError, "Could not parse document");
147
145
  }
146
+ }
148
147
 
149
- for (i = 0; i < RARRAY_LEN(cache); i++) {
150
- xmlNodePtr node;
151
- VALUE element = rb_ary_entry(cache, i);
152
- Data_Get_Struct(element, xmlNode, node);
153
- if (xmlIsBlankNode(node)) {
154
- return 1;
155
- }
156
- }
148
+ VALUE rb_schema = TypedData_Wrap_Struct(rb_class, &xml_schema_type, c_schema);
149
+ rb_iv_set(rb_schema, "@errors", rb_errors);
150
+ rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
157
151
 
158
- return 0;
152
+ return rb_schema;
159
153
  }
160
154
 
161
155
  /*
162
- * call-seq:
163
- * from_document(doc)
156
+ * :call-seq:
157
+ * from_document(input) → Nokogiri::XML::Schema
158
+ * from_document(input, parse_options) → Nokogiri::XML::Schema
159
+ *
160
+ * Parse an \XSD schema definition from a Document to create a new Nokogiri::XML::Schema
161
+ *
162
+ * [Parameters]
163
+ * - +input+ (XML::Document) A document containing the \XSD schema definition
164
+ * - +parse_options+ (Nokogiri::XML::ParseOptions)
165
+ * Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA
164
166
  *
165
- * Create a new Schema from the Nokogiri::XML::Document +doc+
167
+ * [Returns] Nokogiri::XML::Schema
166
168
  */
167
- static VALUE from_document(VALUE klass, VALUE document)
169
+ static VALUE
170
+ noko_xml_schema_s_from_document(int argc, VALUE *argv, VALUE rb_class)
168
171
  {
169
- xmlDocPtr doc;
170
- xmlSchemaParserCtxtPtr ctx;
171
- xmlSchemaPtr schema;
172
- VALUE errors;
172
+ /* TODO: deprecate this method and put file-or-string logic into .new so that becomes the
173
+ * preferred entry point, and this can become a private method */
174
+ VALUE rb_document;
175
+ VALUE rb_parse_options;
173
176
  VALUE rb_schema;
177
+ xmlDocPtr c_document;
178
+ xmlSchemaParserCtxtPtr c_parser_context;
179
+ int defensive_copy_p = 0;
174
180
 
175
- Data_Get_Struct(document, xmlDoc, doc);
181
+ rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
176
182
 
177
- /* In case someone passes us a node. ugh. */
178
- doc = doc->doc;
179
-
180
- if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
181
- rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
183
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
184
+ rb_raise(rb_eTypeError,
185
+ "expected parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
186
+ rb_obj_class(rb_document));
182
187
  }
183
188
 
184
- ctx = xmlSchemaNewDocParserCtxt(doc);
185
-
186
- errors = rb_ary_new();
187
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
188
-
189
- #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
190
- xmlSchemaSetParserStructuredErrors(
191
- ctx,
192
- Nokogiri_error_array_pusher,
193
- (void *)errors
194
- );
195
- #endif
196
-
197
- schema = xmlSchemaParse(ctx);
189
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
190
+ xmlNodePtr deprecated_node_type_arg;
191
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
192
+ Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
193
+ c_document = deprecated_node_type_arg->doc;
194
+ } else {
195
+ c_document = noko_xml_document_unwrap(rb_document);
196
+ }
198
197
 
199
- xmlSetStructuredErrorFunc(NULL, NULL);
200
- xmlSchemaFreeParserCtxt(ctx);
198
+ if (noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
199
+ // see https://github.com/sparklemotion/nokogiri/pull/2001
200
+ c_document = xmlCopyDoc(c_document, 1);
201
+ defensive_copy_p = 1;
202
+ }
201
203
 
202
- if(NULL == schema) {
203
- xmlErrorPtr error = xmlGetLastError();
204
- if(error)
205
- Nokogiri_error_raise(NULL, error);
206
- else
207
- rb_raise(rb_eRuntimeError, "Could not parse document");
204
+ c_parser_context = xmlSchemaNewDocParserCtxt(c_document);
205
+ rb_schema = xml_schema_parse_schema(rb_class, c_parser_context, rb_parse_options);
208
206
 
209
- return Qnil;
207
+ if (defensive_copy_p) {
208
+ xmlFreeDoc(c_document);
209
+ c_document = NULL;
210
210
  }
211
211
 
212
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
213
- rb_iv_set(rb_schema, "@errors", errors);
214
-
215
212
  return rb_schema;
216
-
217
- return Qnil;
218
213
  }
219
214
 
220
- VALUE cNokogiriXmlSchema;
221
- void init_xml_schema()
215
+ void
216
+ noko_init_xml_schema(void)
222
217
  {
223
- VALUE nokogiri = rb_define_module("Nokogiri");
224
- VALUE xml = rb_define_module_under(nokogiri, "XML");
225
- VALUE klass = rb_define_class_under(xml, "Schema", rb_cObject);
218
+ cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject);
226
219
 
227
- cNokogiriXmlSchema = klass;
220
+ rb_undef_alloc_func(cNokogiriXmlSchema);
228
221
 
229
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
230
- rb_define_singleton_method(klass, "from_document", from_document, 1);
222
+ rb_define_singleton_method(cNokogiriXmlSchema, "from_document", noko_xml_schema_s_from_document, -1);
231
223
 
232
- rb_define_private_method(klass, "validate_document", validate_document, 1);
233
- rb_define_private_method(klass, "validate_file", validate_file, 1);
224
+ rb_define_private_method(cNokogiriXmlSchema, "validate_document", noko_xml_schema__validate_document, 1);
225
+ rb_define_private_method(cNokogiriXmlSchema, "validate_file", noko_xml_schema__validate_file, 1);
234
226
  }