nokogiri 1.15.3 → 1.18.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +12 -17
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +11 -5
  5. data/dependencies.yml +9 -8
  6. data/ext/nokogiri/extconf.rb +191 -154
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +26 -25
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +25 -33
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +1 -1
  18. data/ext/nokogiri/xml_cdata.c +3 -12
  19. data/ext/nokogiri/xml_comment.c +3 -8
  20. data/ext/nokogiri/xml_document.c +173 -158
  21. data/ext/nokogiri/xml_document_fragment.c +10 -25
  22. data/ext/nokogiri/xml_dtd.c +1 -1
  23. data/ext/nokogiri/xml_element_content.c +9 -9
  24. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  25. data/ext/nokogiri/xml_namespace.c +6 -10
  26. data/ext/nokogiri/xml_node.c +142 -108
  27. data/ext/nokogiri/xml_node_set.c +46 -44
  28. data/ext/nokogiri/xml_reader.c +74 -100
  29. data/ext/nokogiri/xml_relax_ng.c +35 -56
  30. data/ext/nokogiri/xml_sax_parser.c +156 -88
  31. data/ext/nokogiri/xml_sax_parser_context.c +220 -128
  32. data/ext/nokogiri/xml_sax_push_parser.c +69 -50
  33. data/ext/nokogiri/xml_schema.c +51 -87
  34. data/ext/nokogiri/xml_syntax_error.c +19 -11
  35. data/ext/nokogiri/xml_text.c +3 -6
  36. data/ext/nokogiri/xml_xpath_context.c +104 -104
  37. data/ext/nokogiri/xslt_stylesheet.c +16 -11
  38. data/gumbo-parser/Makefile +18 -0
  39. data/gumbo-parser/src/ascii.c +2 -2
  40. data/gumbo-parser/src/error.c +76 -48
  41. data/gumbo-parser/src/error.h +5 -1
  42. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  43. data/gumbo-parser/src/parser.c +66 -25
  44. data/gumbo-parser/src/tokenizer.c +7 -6
  45. data/lib/nokogiri/class_resolver.rb +1 -1
  46. data/lib/nokogiri/css/node.rb +6 -2
  47. data/lib/nokogiri/css/parser.rb +6 -4
  48. data/lib/nokogiri/css/parser.y +2 -2
  49. data/lib/nokogiri/css/parser_extras.rb +6 -66
  50. data/lib/nokogiri/css/selector_cache.rb +38 -0
  51. data/lib/nokogiri/css/tokenizer.rb +4 -4
  52. data/lib/nokogiri/css/tokenizer.rex +9 -8
  53. data/lib/nokogiri/css/xpath_visitor.rb +44 -27
  54. data/lib/nokogiri/css.rb +86 -20
  55. data/lib/nokogiri/decorators/slop.rb +3 -5
  56. data/lib/nokogiri/encoding_handler.rb +2 -2
  57. data/lib/nokogiri/html4/document.rb +45 -24
  58. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  59. data/lib/nokogiri/html4/encoding_reader.rb +2 -2
  60. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  61. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  62. data/lib/nokogiri/html4.rb +9 -14
  63. data/lib/nokogiri/html5/builder.rb +40 -0
  64. data/lib/nokogiri/html5/document.rb +61 -30
  65. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  66. data/lib/nokogiri/html5/node.rb +4 -4
  67. data/lib/nokogiri/html5.rb +114 -138
  68. data/lib/nokogiri/version/constant.rb +1 -1
  69. data/lib/nokogiri/version/info.rb +6 -5
  70. data/lib/nokogiri/xml/attr.rb +2 -2
  71. data/lib/nokogiri/xml/builder.rb +8 -1
  72. data/lib/nokogiri/xml/document.rb +74 -31
  73. data/lib/nokogiri/xml/document_fragment.rb +86 -15
  74. data/lib/nokogiri/xml/namespace.rb +1 -2
  75. data/lib/nokogiri/xml/node.rb +113 -35
  76. data/lib/nokogiri/xml/node_set.rb +12 -10
  77. data/lib/nokogiri/xml/parse_options.rb +1 -1
  78. data/lib/nokogiri/xml/pp/node.rb +6 -1
  79. data/lib/nokogiri/xml/reader.rb +51 -17
  80. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  81. data/lib/nokogiri/xml/sax/document.rb +174 -83
  82. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  83. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  84. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  85. data/lib/nokogiri/xml/sax.rb +48 -0
  86. data/lib/nokogiri/xml/schema.rb +112 -45
  87. data/lib/nokogiri/xml/searchable.rb +39 -43
  88. data/lib/nokogiri/xml/syntax_error.rb +23 -1
  89. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  90. data/lib/nokogiri/xml.rb +14 -25
  91. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  92. data/lib/nokogiri/xslt.rb +4 -10
  93. data/lib/nokogiri.rb +1 -1
  94. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  95. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  96. data/ports/archives/libxml2-2.13.7.tar.xz +0 -0
  97. data/ports/archives/libxslt-1.1.43.tar.xz +0 -0
  98. metadata +13 -14
  99. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  100. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  101. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  102. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
5
5
  static ID id_read;
6
6
 
7
7
  static void
8
- xml_sax_parser_context_free(void *data)
8
+ xml_sax_parser_context_type_free(void *data)
9
9
  {
10
10
  xmlParserCtxtPtr ctxt = data;
11
11
  ctxt->sax = NULL;
12
- xmlFreeParserCtxt(ctxt);
12
+ if (ctxt->myDoc) {
13
+ xmlFreeDoc(ctxt->myDoc);
14
+ }
15
+ if (ctxt) {
16
+ xmlFreeParserCtxt(ctxt);
17
+ }
13
18
  }
14
19
 
15
20
  /*
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
17
22
  * so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
18
23
  */
19
24
  static const rb_data_type_t xml_sax_parser_context_type = {
20
- .wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
25
+ .wrap_struct_name = "xmlParserCtxt",
21
26
  .function = {
22
- .dfree = xml_sax_parser_context_free,
27
+ .dfree = xml_sax_parser_context_type_free,
23
28
  },
24
29
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
25
30
  };
@@ -38,100 +43,124 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
38
43
  return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
39
44
  }
40
45
 
46
+ void
47
+ noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
48
+ {
49
+ if (!NIL_P(rb_encoding)) {
50
+ VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
51
+
52
+ char *encoding_name = StringValueCStr(rb_encoding_name);
53
+ if (encoding_name) {
54
+ libxmlStructuredErrorHandlerState handler_state;
55
+ VALUE rb_errors = rb_ary_new();
41
56
 
42
- /*
43
- * call-seq:
44
- * parse_io(io, encoding)
45
- *
46
- * Parse +io+ object with +encoding+
47
- */
57
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
58
+
59
+ int result = xmlSwitchEncodingName(c_context, encoding_name);
60
+
61
+ noko__structured_error_func_restore(&handler_state);
62
+
63
+ if (result != 0) {
64
+ xmlFreeParserCtxt(c_context);
65
+
66
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
67
+ if (!NIL_P(exception)) {
68
+ rb_exc_raise(exception);
69
+ } else {
70
+ rb_raise(rb_eRuntimeError, "could not set encoding");
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+
77
+ /* :nodoc: */
48
78
  static VALUE
49
- parse_io(VALUE klass, VALUE io, VALUE encoding)
79
+ noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
50
80
  {
51
- xmlParserCtxtPtr ctxt;
52
- xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
53
-
54
- if (!rb_respond_to(io, id_read)) {
81
+ if (!rb_respond_to(rb_io, id_read)) {
55
82
  rb_raise(rb_eTypeError, "argument expected to respond to :read");
56
83
  }
57
84
 
58
- ctxt = xmlCreateIOParserCtxt(NULL, NULL,
59
- (xmlInputReadCallback)noko_io_read,
60
- (xmlInputCloseCallback)noko_io_close,
61
- (void *)io, enc);
62
- if (ctxt->sax) {
63
- xmlFree(ctxt->sax);
64
- ctxt->sax = NULL;
85
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
86
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
65
87
  }
66
88
 
67
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
68
- }
89
+ xmlParserCtxtPtr c_context =
90
+ xmlCreateIOParserCtxt(NULL, NULL,
91
+ (xmlInputReadCallback)noko_io_read,
92
+ (xmlInputCloseCallback)noko_io_close,
93
+ (void *)rb_io, XML_CHAR_ENCODING_NONE);
94
+ if (!c_context) {
95
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
96
+ }
69
97
 
70
- /*
71
- * call-seq:
72
- * parse_file(filename)
73
- *
74
- * Parse file given +filename+
75
- */
76
- static VALUE
77
- parse_file(VALUE klass, VALUE filename)
78
- {
79
- xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
98
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
80
99
 
81
- if (ctxt->sax) {
82
- xmlFree(ctxt->sax);
83
- ctxt->sax = NULL;
100
+ if (c_context->sax) {
101
+ xmlFree(c_context->sax);
102
+ c_context->sax = NULL;
84
103
  }
85
104
 
86
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
105
+ VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
106
+ rb_iv_set(rb_context, "@input", rb_io);
107
+
108
+ return rb_context;
87
109
  }
88
110
 
89
- /*
90
- * call-seq:
91
- * parse_memory(data)
92
- *
93
- * Parse the XML stored in memory in +data+
94
- */
111
+ /* :nodoc: */
95
112
  static VALUE
96
- parse_memory(VALUE klass, VALUE data)
113
+ noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
97
114
  {
98
- xmlParserCtxtPtr ctxt;
99
-
100
- Check_Type(data, T_STRING);
115
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
116
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
117
+ }
101
118
 
102
- if (!(int)RSTRING_LEN(data)) {
103
- rb_raise(rb_eRuntimeError, "data cannot be empty");
119
+ xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
120
+ if (!c_context) {
121
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
104
122
  }
105
123
 
106
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
107
- (int)RSTRING_LEN(data));
108
- if (ctxt->sax) {
109
- xmlFree(ctxt->sax);
110
- ctxt->sax = NULL;
124
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
125
+
126
+ if (c_context->sax) {
127
+ xmlFree(c_context->sax);
128
+ c_context->sax = NULL;
111
129
  }
112
130
 
113
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
131
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
114
132
  }
115
133
 
134
+ /* :nodoc: */
116
135
  static VALUE
117
- parse_doc(VALUE ctxt_val)
136
+ noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
118
137
  {
119
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
120
- xmlParseDocument(ctxt);
121
- return Qnil;
122
- }
138
+ Check_Type(rb_input, T_STRING);
139
+ if (!(int)RSTRING_LEN(rb_input)) {
140
+ rb_raise(rb_eRuntimeError, "input string cannot be empty");
141
+ }
123
142
 
124
- static VALUE
125
- parse_doc_finalize(VALUE ctxt_val)
126
- {
127
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
143
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
144
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
145
+ }
128
146
 
129
- if (NULL != ctxt->myDoc) {
130
- xmlFreeDoc(ctxt->myDoc);
147
+ xmlParserCtxtPtr c_context =
148
+ xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
149
+ if (!c_context) {
150
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
131
151
  }
132
152
 
133
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
134
- return Qnil;
153
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
154
+
155
+ if (c_context->sax) {
156
+ xmlFree(c_context->sax);
157
+ c_context->sax = NULL;
158
+ }
159
+
160
+ VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
161
+ rb_iv_set(rb_context, "@input", rb_input);
162
+
163
+ return rb_context;
135
164
  }
136
165
 
137
166
  /*
@@ -139,80 +168,113 @@ parse_doc_finalize(VALUE ctxt_val)
139
168
  * parse_with(sax_handler)
140
169
  *
141
170
  * Use +sax_handler+ and parse the current document
171
+ *
172
+ * 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
173
+ * more convenient for most use cases.
142
174
  */
143
175
  static VALUE
144
- parse_with(VALUE self, VALUE sax_handler)
176
+ noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
145
177
  {
146
- xmlParserCtxtPtr ctxt;
178
+ xmlParserCtxtPtr c_context;
147
179
  xmlSAXHandlerPtr sax;
148
180
 
149
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
181
+ if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
150
182
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
151
183
  }
152
184
 
153
- ctxt = noko_xml_sax_parser_context_unwrap(self);
154
- sax = noko_sax_handler_unwrap(sax_handler);
185
+ c_context = noko_xml_sax_parser_context_unwrap(rb_context);
186
+ sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
155
187
 
156
- ctxt->sax = sax;
157
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
188
+ c_context->sax = sax;
189
+ c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
190
+ c_context->_private = (void *)rb_sax_parser;
158
191
 
159
192
  xmlSetStructuredErrorFunc(NULL, NULL);
160
193
 
161
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
194
+ /* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
195
+ * don't have any cleanup to do. The only memory we need to free is handled by
196
+ * xml_sax_parser_context_type_free */
197
+ xmlParseDocument(c_context);
162
198
 
163
199
  return Qnil;
164
200
  }
165
201
 
166
202
  /*
167
203
  * call-seq:
168
- * replace_entities=(boolean)
204
+ * replace_entities=(value)
205
+ *
206
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
207
+ *
208
+ * [Parameters]
209
+ * - +value+ (Boolean) Whether external parsed entities will be resolved.
210
+ *
211
+ * ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
212
+ * defaults to +false+ for this reason.
169
213
  *
170
- * Should this parser replace entities? &amp; will get converted to '&' if
171
- * set to true
214
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
215
+ * replacement.
216
+ *
217
+ * [Example]
218
+ * Because this class is generally not instantiated directly, you would typically set this option
219
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
220
+ *
221
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
222
+ * parser.parse(xml) do |ctx|
223
+ * ctx.replace_entities = true # this is UNSAFE for untrusted documents!
224
+ * end
172
225
  */
173
226
  static VALUE
174
- set_replace_entities(VALUE self, VALUE value)
227
+ noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
175
228
  {
176
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
229
+ int error;
230
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
177
231
 
178
- if (Qfalse == value) {
179
- ctxt->replaceEntities = 0;
232
+ if (RB_TEST(rb_value)) {
233
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
180
234
  } else {
181
- ctxt->replaceEntities = 1;
235
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
236
+ }
237
+
238
+ if (error) {
239
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
182
240
  }
183
241
 
184
- return value;
242
+ return rb_value;
185
243
  }
186
244
 
187
245
  /*
188
246
  * call-seq:
189
- * replace_entities
247
+ * replace_entities
190
248
  *
191
- * Should this parser replace entities? &amp; will get converted to '&' if
192
- * set to true
249
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
250
+ *
251
+ * [Returns] (Boolean) Value of the parse option. (Default +false+)
252
+ *
253
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
254
+ * replacement.
193
255
  */
194
256
  static VALUE
195
- get_replace_entities(VALUE self)
257
+ noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
196
258
  {
197
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
259
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
198
260
 
199
- if (0 == ctxt->replaceEntities) {
200
- return Qfalse;
201
- } else {
261
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
202
262
  return Qtrue;
263
+ } else {
264
+ return Qfalse;
203
265
  }
204
266
  }
205
267
 
206
268
  /*
207
269
  * call-seq: line
208
270
  *
209
- * Get the current line the parser context is processing.
271
+ * [Returns] (Integer) the line number of the line being currently parsed.
210
272
  */
211
273
  static VALUE
212
- line(VALUE self)
274
+ noko_xml_sax_parser_context__line(VALUE rb_context)
213
275
  {
214
276
  xmlParserInputPtr io;
215
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
277
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
216
278
 
217
279
  io = ctxt->input;
218
280
  if (io) {
@@ -225,12 +287,12 @@ line(VALUE self)
225
287
  /*
226
288
  * call-seq: column
227
289
  *
228
- * Get the current column the parser context is processing.
290
+ * [Returns] (Integer) the column number of the column being currently parsed.
229
291
  */
230
292
  static VALUE
231
- column(VALUE self)
293
+ noko_xml_sax_parser_context__column(VALUE rb_context)
232
294
  {
233
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
295
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
234
296
  xmlParserInputPtr io;
235
297
 
236
298
  io = ctxt->input;
@@ -243,41 +305,66 @@ column(VALUE self)
243
305
 
244
306
  /*
245
307
  * call-seq:
246
- * recovery=(boolean)
308
+ * recovery=(value)
309
+ *
310
+ * Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
311
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
312
+ * parser will stop processing the file on the first parsing error.
313
+ *
314
+ * [Parameters]
315
+ * - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
316
+ *
317
+ * [Returns] (Boolean) The passed +value+.
247
318
  *
248
- * Should this parser recover from structural errors? It will not stop processing
249
- * file on structural errors if set to true
319
+ * [Example]
320
+ * Because this class is generally not instantiated directly, you would typically set this option
321
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
322
+ *
323
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
324
+ * parser.parse(xml) do |ctx|
325
+ * ctx.recovery = true
326
+ * end
250
327
  */
251
328
  static VALUE
252
- set_recovery(VALUE self, VALUE value)
329
+ noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
253
330
  {
254
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
331
+ int error;
332
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
255
333
 
256
- if (value == Qfalse) {
257
- ctxt->recovery = 0;
334
+ if (RB_TEST(rb_value)) {
335
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
258
336
  } else {
259
- ctxt->recovery = 1;
337
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
338
+ }
339
+
340
+ if (error) {
341
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
260
342
  }
261
343
 
262
- return value;
344
+ return rb_value;
263
345
  }
264
346
 
265
347
  /*
266
348
  * call-seq:
267
- * recovery
349
+ * recovery
350
+ *
351
+ * Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
352
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
353
+ * parser will stop processing the file on the first parsing error.
354
+ *
355
+ * [Returns] (Boolean) Whether this parser will recover from parsing errors.
268
356
  *
269
- * Should this parser recover from structural errors? It will not stop processing
270
- * file on structural errors if set to true
357
+ * Default is +false+ for XML and +true+ for HTML.
271
358
  */
272
359
  static VALUE
273
- get_recovery(VALUE self)
360
+ noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
274
361
  {
275
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
362
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
276
363
 
277
- if (ctxt->recovery == 0) {
278
- return Qfalse;
279
- } else {
364
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
280
365
  return Qtrue;
366
+ } else {
367
+ return Qfalse;
281
368
  }
282
369
  }
283
370
 
@@ -288,17 +375,22 @@ noko_init_xml_sax_parser_context(void)
288
375
 
289
376
  rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
290
377
 
291
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
292
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
293
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
294
-
295
- rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
296
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
297
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
298
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
299
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
300
- rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
301
- rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
378
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
379
+ noko_xml_sax_parser_context_s_native_io, 2);
380
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
381
+ noko_xml_sax_parser_context_s_native_memory, 2);
382
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
383
+ noko_xml_sax_parser_context_s_native_file, 2);
384
+
385
+ rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
386
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
387
+ noko_xml_sax_parser_context__replace_entities_set, 1);
388
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
389
+ noko_xml_sax_parser_context__replace_entities_get, 0);
390
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
391
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
392
+ rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
393
+ rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
302
394
 
303
395
  id_read = rb_intern("read");
304
396
  }