nokogiri 1.16.8 → 1.18.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +14 -22
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +8 -5
  5. data/dependencies.yml +6 -6
  6. data/ext/nokogiri/extconf.rb +188 -142
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +18 -33
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +2 -10
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +163 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -6
  25. data/ext/nokogiri/xml_node.c +134 -103
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +54 -58
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +219 -131
  31. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  32. data/ext/nokogiri/xml_schema.c +50 -85
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +2 -4
  35. data/ext/nokogiri/xml_xpath_context.c +103 -100
  36. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  37. data/gumbo-parser/src/ascii.c +2 -2
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +63 -25
  42. data/gumbo-parser/src/tokenizer.c +6 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +44 -23
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -72
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/xml/builder.rb +8 -1
  68. data/lib/nokogiri/xml/document.rb +70 -26
  69. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  70. data/lib/nokogiri/xml/node.rb +82 -11
  71. data/lib/nokogiri/xml/node_set.rb +9 -7
  72. data/lib/nokogiri/xml/parse_options.rb +1 -1
  73. data/lib/nokogiri/xml/pp/node.rb +6 -1
  74. data/lib/nokogiri/xml/reader.rb +46 -13
  75. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  76. data/lib/nokogiri/xml/sax/document.rb +174 -83
  77. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  78. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  79. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  80. data/lib/nokogiri/xml/sax.rb +48 -0
  81. data/lib/nokogiri/xml/schema.rb +112 -45
  82. data/lib/nokogiri/xml/searchable.rb +38 -42
  83. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  84. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  85. data/lib/nokogiri/xml.rb +13 -24
  86. data/lib/nokogiri/xslt.rb +3 -9
  87. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  88. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  89. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  90. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  91. metadata +13 -12
  92. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  93. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  94. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  95. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
5
5
  static ID id_read;
6
6
 
7
7
  static void
8
- xml_sax_parser_context_free(void *data)
8
+ xml_sax_parser_context_type_free(void *data)
9
9
  {
10
10
  xmlParserCtxtPtr ctxt = data;
11
11
  ctxt->sax = NULL;
12
- xmlFreeParserCtxt(ctxt);
12
+ if (ctxt->myDoc) {
13
+ xmlFreeDoc(ctxt->myDoc);
14
+ }
15
+ if (ctxt) {
16
+ xmlFreeParserCtxt(ctxt);
17
+ }
13
18
  }
14
19
 
15
20
  /*
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
17
22
  * so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
18
23
  */
19
24
  static const rb_data_type_t xml_sax_parser_context_type = {
20
- .wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
25
+ .wrap_struct_name = "xmlParserCtxt",
21
26
  .function = {
22
- .dfree = xml_sax_parser_context_free,
27
+ .dfree = xml_sax_parser_context_type_free,
23
28
  },
24
29
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
25
30
  };
@@ -38,104 +43,124 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
38
43
  return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
39
44
  }
40
45
 
46
+ void
47
+ noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
48
+ {
49
+ if (!NIL_P(rb_encoding)) {
50
+ VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
51
+
52
+ char *encoding_name = StringValueCStr(rb_encoding_name);
53
+ if (encoding_name) {
54
+ libxmlStructuredErrorHandlerState handler_state;
55
+ VALUE rb_errors = rb_ary_new();
41
56
 
42
- /*
43
- * call-seq:
44
- * parse_io(io, encoding)
45
- *
46
- * Parse +io+ object with +encoding+
47
- */
57
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
58
+
59
+ int result = xmlSwitchEncodingName(c_context, encoding_name);
60
+
61
+ noko__structured_error_func_restore(&handler_state);
62
+
63
+ if (result != 0) {
64
+ xmlFreeParserCtxt(c_context);
65
+
66
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
67
+ if (!NIL_P(exception)) {
68
+ rb_exc_raise(exception);
69
+ } else {
70
+ rb_raise(rb_eRuntimeError, "could not set encoding");
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+
77
+ /* :nodoc: */
48
78
  static VALUE
49
- parse_io(VALUE klass, VALUE io, VALUE encoding)
79
+ noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
50
80
  {
51
- xmlParserCtxtPtr ctxt;
52
- xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
53
-
54
- if (!rb_respond_to(io, id_read)) {
81
+ if (!rb_respond_to(rb_io, id_read)) {
55
82
  rb_raise(rb_eTypeError, "argument expected to respond to :read");
56
83
  }
57
84
 
58
- ctxt = xmlCreateIOParserCtxt(NULL, NULL,
59
- (xmlInputReadCallback)noko_io_read,
60
- (xmlInputCloseCallback)noko_io_close,
61
- (void *)io, enc);
62
- if (!ctxt) {
63
- rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
85
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
86
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
64
87
  }
65
88
 
66
- if (ctxt->sax) {
67
- xmlFree(ctxt->sax);
68
- ctxt->sax = NULL;
89
+ xmlParserCtxtPtr c_context =
90
+ xmlCreateIOParserCtxt(NULL, NULL,
91
+ (xmlInputReadCallback)noko_io_read,
92
+ (xmlInputCloseCallback)noko_io_close,
93
+ (void *)rb_io, XML_CHAR_ENCODING_NONE);
94
+ if (!c_context) {
95
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
69
96
  }
70
97
 
71
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
72
- }
73
-
74
- /*
75
- * call-seq:
76
- * parse_file(filename)
77
- *
78
- * Parse file given +filename+
79
- */
80
- static VALUE
81
- parse_file(VALUE klass, VALUE filename)
82
- {
83
- xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
98
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
84
99
 
85
- if (ctxt->sax) {
86
- xmlFree(ctxt->sax);
87
- ctxt->sax = NULL;
100
+ if (c_context->sax) {
101
+ xmlFree(c_context->sax);
102
+ c_context->sax = NULL;
88
103
  }
89
104
 
90
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
105
+ VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
106
+ rb_iv_set(rb_context, "@input", rb_io);
107
+
108
+ return rb_context;
91
109
  }
92
110
 
93
- /*
94
- * call-seq:
95
- * parse_memory(data)
96
- *
97
- * Parse the XML stored in memory in +data+
98
- */
111
+ /* :nodoc: */
99
112
  static VALUE
100
- parse_memory(VALUE klass, VALUE data)
113
+ noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
101
114
  {
102
- xmlParserCtxtPtr ctxt;
103
-
104
- Check_Type(data, T_STRING);
115
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
116
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
117
+ }
105
118
 
106
- if (!(int)RSTRING_LEN(data)) {
107
- rb_raise(rb_eRuntimeError, "data cannot be empty");
119
+ xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
120
+ if (!c_context) {
121
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
108
122
  }
109
123
 
110
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
111
- (int)RSTRING_LEN(data));
112
- if (ctxt->sax) {
113
- xmlFree(ctxt->sax);
114
- ctxt->sax = NULL;
124
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
125
+
126
+ if (c_context->sax) {
127
+ xmlFree(c_context->sax);
128
+ c_context->sax = NULL;
115
129
  }
116
130
 
117
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
131
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
118
132
  }
119
133
 
134
+ /* :nodoc: */
120
135
  static VALUE
121
- parse_doc(VALUE ctxt_val)
136
+ noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
122
137
  {
123
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
124
- xmlParseDocument(ctxt);
125
- return Qnil;
126
- }
138
+ Check_Type(rb_input, T_STRING);
139
+ if (!(int)RSTRING_LEN(rb_input)) {
140
+ rb_raise(rb_eRuntimeError, "input string cannot be empty");
141
+ }
127
142
 
128
- static VALUE
129
- parse_doc_finalize(VALUE ctxt_val)
130
- {
131
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
143
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
144
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
145
+ }
132
146
 
133
- if (NULL != ctxt->myDoc) {
134
- xmlFreeDoc(ctxt->myDoc);
147
+ xmlParserCtxtPtr c_context =
148
+ xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
149
+ if (!c_context) {
150
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
135
151
  }
136
152
 
137
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
138
- return Qnil;
153
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
154
+
155
+ if (c_context->sax) {
156
+ xmlFree(c_context->sax);
157
+ c_context->sax = NULL;
158
+ }
159
+
160
+ VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
161
+ rb_iv_set(rb_context, "@input", rb_input);
162
+
163
+ return rb_context;
139
164
  }
140
165
 
141
166
  /*
@@ -143,80 +168,113 @@ parse_doc_finalize(VALUE ctxt_val)
143
168
  * parse_with(sax_handler)
144
169
  *
145
170
  * Use +sax_handler+ and parse the current document
171
+ *
172
+ * 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
173
+ * more convenient for most use cases.
146
174
  */
147
175
  static VALUE
148
- parse_with(VALUE self, VALUE sax_handler)
176
+ noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
149
177
  {
150
- xmlParserCtxtPtr ctxt;
178
+ xmlParserCtxtPtr c_context;
151
179
  xmlSAXHandlerPtr sax;
152
180
 
153
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
181
+ if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
154
182
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
155
183
  }
156
184
 
157
- ctxt = noko_xml_sax_parser_context_unwrap(self);
158
- sax = noko_sax_handler_unwrap(sax_handler);
185
+ c_context = noko_xml_sax_parser_context_unwrap(rb_context);
186
+ sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
159
187
 
160
- ctxt->sax = sax;
161
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
188
+ c_context->sax = sax;
189
+ c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
190
+ c_context->_private = (void *)rb_sax_parser;
162
191
 
163
192
  xmlSetStructuredErrorFunc(NULL, NULL);
164
193
 
165
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
194
+ /* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
195
+ * don't have any cleanup to do. The only memory we need to free is handled by
196
+ * xml_sax_parser_context_type_free */
197
+ xmlParseDocument(c_context);
166
198
 
167
199
  return Qnil;
168
200
  }
169
201
 
170
202
  /*
171
203
  * call-seq:
172
- * replace_entities=(boolean)
204
+ * replace_entities=(value)
205
+ *
206
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
207
+ *
208
+ * [Parameters]
209
+ * - +value+ (Boolean) Whether external parsed entities will be resolved.
173
210
  *
174
- * Should this parser replace entities? & will get converted to '&' if
175
- * set to true
211
+ * <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
212
+ * defaults to +false+ for this reason.
213
+ *
214
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
215
+ * replacement.
216
+ *
217
+ * [Example]
218
+ * Because this class is generally not instantiated directly, you would typically set this option
219
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
220
+ *
221
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
222
+ * parser.parse(xml) do |ctx|
223
+ * ctx.replace_entities = true # this is UNSAFE for untrusted documents!
224
+ * end
176
225
  */
177
226
  static VALUE
178
- set_replace_entities(VALUE self, VALUE value)
227
+ noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
179
228
  {
180
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
229
+ int error;
230
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
181
231
 
182
- if (Qfalse == value) {
183
- ctxt->replaceEntities = 0;
232
+ if (RB_TEST(rb_value)) {
233
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
184
234
  } else {
185
- ctxt->replaceEntities = 1;
235
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
236
+ }
237
+
238
+ if (error) {
239
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
186
240
  }
187
241
 
188
- return value;
242
+ return rb_value;
189
243
  }
190
244
 
191
245
  /*
192
246
  * call-seq:
193
- * replace_entities
247
+ * replace_entities
248
+ *
249
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
250
+ *
251
+ * [Returns] (Boolean) Value of the parse option. (Default +false+)
194
252
  *
195
- * Should this parser replace entities? &amp; will get converted to '&' if
196
- * set to true
253
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
254
+ * replacement.
197
255
  */
198
256
  static VALUE
199
- get_replace_entities(VALUE self)
257
+ noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
200
258
  {
201
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
259
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
202
260
 
203
- if (0 == ctxt->replaceEntities) {
204
- return Qfalse;
205
- } else {
261
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
206
262
  return Qtrue;
263
+ } else {
264
+ return Qfalse;
207
265
  }
208
266
  }
209
267
 
210
268
  /*
211
269
  * call-seq: line
212
270
  *
213
- * Get the current line the parser context is processing.
271
+ * [Returns] (Integer) the line number of the line being currently parsed.
214
272
  */
215
273
  static VALUE
216
- line(VALUE self)
274
+ noko_xml_sax_parser_context__line(VALUE rb_context)
217
275
  {
218
276
  xmlParserInputPtr io;
219
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
277
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
220
278
 
221
279
  io = ctxt->input;
222
280
  if (io) {
@@ -229,12 +287,12 @@ line(VALUE self)
229
287
  /*
230
288
  * call-seq: column
231
289
  *
232
- * Get the current column the parser context is processing.
290
+ * [Returns] (Integer) the column number of the column being currently parsed.
233
291
  */
234
292
  static VALUE
235
- column(VALUE self)
293
+ noko_xml_sax_parser_context__column(VALUE rb_context)
236
294
  {
237
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
295
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
238
296
  xmlParserInputPtr io;
239
297
 
240
298
  io = ctxt->input;
@@ -247,41 +305,66 @@ column(VALUE self)
247
305
 
248
306
  /*
249
307
  * call-seq:
250
- * recovery=(boolean)
308
+ * recovery=(value)
309
+ *
310
+ * Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
311
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
312
+ * parser will stop processing the file on the first parsing error.
313
+ *
314
+ * [Parameters]
315
+ * - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
316
+ *
317
+ * [Returns] (Boolean) The passed +value+.
251
318
  *
252
- * Should this parser recover from structural errors? It will not stop processing
253
- * file on structural errors if set to true
319
+ * [Example]
320
+ * Because this class is generally not instantiated directly, you would typically set this option
321
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
322
+ *
323
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
324
+ * parser.parse(xml) do |ctx|
325
+ * ctx.recovery = true
326
+ * end
254
327
  */
255
328
  static VALUE
256
- set_recovery(VALUE self, VALUE value)
329
+ noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
257
330
  {
258
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
331
+ int error;
332
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
259
333
 
260
- if (value == Qfalse) {
261
- ctxt->recovery = 0;
334
+ if (RB_TEST(rb_value)) {
335
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
262
336
  } else {
263
- ctxt->recovery = 1;
337
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
338
+ }
339
+
340
+ if (error) {
341
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
264
342
  }
265
343
 
266
- return value;
344
+ return rb_value;
267
345
  }
268
346
 
269
347
  /*
270
348
  * call-seq:
271
- * recovery
349
+ * recovery
350
+ *
351
+ * Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
352
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
353
+ * parser will stop processing the file on the first parsing error.
354
+ *
355
+ * [Returns] (Boolean) Whether this parser will recover from parsing errors.
272
356
  *
273
- * Should this parser recover from structural errors? It will not stop processing
274
- * file on structural errors if set to true
357
+ * Default is +false+ for XML and +true+ for HTML.
275
358
  */
276
359
  static VALUE
277
- get_recovery(VALUE self)
360
+ noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
278
361
  {
279
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
362
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
280
363
 
281
- if (ctxt->recovery == 0) {
282
- return Qfalse;
283
- } else {
364
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
284
365
  return Qtrue;
366
+ } else {
367
+ return Qfalse;
285
368
  }
286
369
  }
287
370
 
@@ -292,17 +375,22 @@ noko_init_xml_sax_parser_context(void)
292
375
 
293
376
  rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
294
377
 
295
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
296
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
297
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
298
-
299
- rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
300
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
301
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
302
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
303
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
304
- rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
305
- rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
378
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
379
+ noko_xml_sax_parser_context_s_native_io, 2);
380
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
381
+ noko_xml_sax_parser_context_s_native_memory, 2);
382
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
383
+ noko_xml_sax_parser_context_s_native_file, 2);
384
+
385
+ rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
386
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
387
+ noko_xml_sax_parser_context__replace_entities_set, 1);
388
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
389
+ noko_xml_sax_parser_context__replace_entities_get, 0);
390
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
391
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
392
+ rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
393
+ rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
306
394
 
307
395
  id_read = rb_intern("read");
308
396
  }