nokogiri 1.16.8 → 1.18.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +8 -5
  5. data/dependencies.yml +6 -6
  6. data/ext/nokogiri/extconf.rb +188 -142
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +18 -33
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +2 -10
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +163 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -6
  25. data/ext/nokogiri/xml_node.c +134 -103
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +54 -58
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +219 -131
  31. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  32. data/ext/nokogiri/xml_schema.c +50 -85
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +2 -4
  35. data/ext/nokogiri/xml_xpath_context.c +103 -100
  36. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  37. data/gumbo-parser/src/ascii.c +2 -2
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +63 -25
  42. data/gumbo-parser/src/tokenizer.c +6 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +43 -6
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +44 -23
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -72
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/xml/builder.rb +8 -1
  68. data/lib/nokogiri/xml/document.rb +70 -26
  69. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  70. data/lib/nokogiri/xml/node.rb +82 -11
  71. data/lib/nokogiri/xml/node_set.rb +9 -7
  72. data/lib/nokogiri/xml/parse_options.rb +1 -1
  73. data/lib/nokogiri/xml/pp/node.rb +6 -1
  74. data/lib/nokogiri/xml/reader.rb +46 -13
  75. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  76. data/lib/nokogiri/xml/sax/document.rb +174 -83
  77. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  78. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  79. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  80. data/lib/nokogiri/xml/sax.rb +48 -0
  81. data/lib/nokogiri/xml/schema.rb +112 -45
  82. data/lib/nokogiri/xml/searchable.rb +38 -42
  83. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  84. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  85. data/lib/nokogiri/xml.rb +13 -24
  86. data/lib/nokogiri/xslt.rb +3 -9
  87. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  88. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  89. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  90. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  91. metadata +13 -12
  92. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  93. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  94. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  95. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
5
5
  static ID id_read;
6
6
 
7
7
  static void
8
- xml_sax_parser_context_free(void *data)
8
+ xml_sax_parser_context_type_free(void *data)
9
9
  {
10
10
  xmlParserCtxtPtr ctxt = data;
11
11
  ctxt->sax = NULL;
12
- xmlFreeParserCtxt(ctxt);
12
+ if (ctxt->myDoc) {
13
+ xmlFreeDoc(ctxt->myDoc);
14
+ }
15
+ if (ctxt) {
16
+ xmlFreeParserCtxt(ctxt);
17
+ }
13
18
  }
14
19
 
15
20
  /*
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
17
22
  * so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
18
23
  */
19
24
  static const rb_data_type_t xml_sax_parser_context_type = {
20
- .wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
25
+ .wrap_struct_name = "xmlParserCtxt",
21
26
  .function = {
22
- .dfree = xml_sax_parser_context_free,
27
+ .dfree = xml_sax_parser_context_type_free,
23
28
  },
24
29
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
25
30
  };
@@ -38,104 +43,124 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
38
43
  return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
39
44
  }
40
45
 
46
+ void
47
+ noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
48
+ {
49
+ if (!NIL_P(rb_encoding)) {
50
+ VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
51
+
52
+ char *encoding_name = StringValueCStr(rb_encoding_name);
53
+ if (encoding_name) {
54
+ libxmlStructuredErrorHandlerState handler_state;
55
+ VALUE rb_errors = rb_ary_new();
41
56
 
42
- /*
43
- * call-seq:
44
- * parse_io(io, encoding)
45
- *
46
- * Parse +io+ object with +encoding+
47
- */
57
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
58
+
59
+ int result = xmlSwitchEncodingName(c_context, encoding_name);
60
+
61
+ noko__structured_error_func_restore(&handler_state);
62
+
63
+ if (result != 0) {
64
+ xmlFreeParserCtxt(c_context);
65
+
66
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
67
+ if (!NIL_P(exception)) {
68
+ rb_exc_raise(exception);
69
+ } else {
70
+ rb_raise(rb_eRuntimeError, "could not set encoding");
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+
77
+ /* :nodoc: */
48
78
  static VALUE
49
- parse_io(VALUE klass, VALUE io, VALUE encoding)
79
+ noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
50
80
  {
51
- xmlParserCtxtPtr ctxt;
52
- xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
53
-
54
- if (!rb_respond_to(io, id_read)) {
81
+ if (!rb_respond_to(rb_io, id_read)) {
55
82
  rb_raise(rb_eTypeError, "argument expected to respond to :read");
56
83
  }
57
84
 
58
- ctxt = xmlCreateIOParserCtxt(NULL, NULL,
59
- (xmlInputReadCallback)noko_io_read,
60
- (xmlInputCloseCallback)noko_io_close,
61
- (void *)io, enc);
62
- if (!ctxt) {
63
- rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
85
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
86
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
64
87
  }
65
88
 
66
- if (ctxt->sax) {
67
- xmlFree(ctxt->sax);
68
- ctxt->sax = NULL;
89
+ xmlParserCtxtPtr c_context =
90
+ xmlCreateIOParserCtxt(NULL, NULL,
91
+ (xmlInputReadCallback)noko_io_read,
92
+ (xmlInputCloseCallback)noko_io_close,
93
+ (void *)rb_io, XML_CHAR_ENCODING_NONE);
94
+ if (!c_context) {
95
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
69
96
  }
70
97
 
71
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
72
- }
73
-
74
- /*
75
- * call-seq:
76
- * parse_file(filename)
77
- *
78
- * Parse file given +filename+
79
- */
80
- static VALUE
81
- parse_file(VALUE klass, VALUE filename)
82
- {
83
- xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
98
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
84
99
 
85
- if (ctxt->sax) {
86
- xmlFree(ctxt->sax);
87
- ctxt->sax = NULL;
100
+ if (c_context->sax) {
101
+ xmlFree(c_context->sax);
102
+ c_context->sax = NULL;
88
103
  }
89
104
 
90
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
105
+ VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
106
+ rb_iv_set(rb_context, "@input", rb_io);
107
+
108
+ return rb_context;
91
109
  }
92
110
 
93
- /*
94
- * call-seq:
95
- * parse_memory(data)
96
- *
97
- * Parse the XML stored in memory in +data+
98
- */
111
+ /* :nodoc: */
99
112
  static VALUE
100
- parse_memory(VALUE klass, VALUE data)
113
+ noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
101
114
  {
102
- xmlParserCtxtPtr ctxt;
103
-
104
- Check_Type(data, T_STRING);
115
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
116
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
117
+ }
105
118
 
106
- if (!(int)RSTRING_LEN(data)) {
107
- rb_raise(rb_eRuntimeError, "data cannot be empty");
119
+ xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
120
+ if (!c_context) {
121
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
108
122
  }
109
123
 
110
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
111
- (int)RSTRING_LEN(data));
112
- if (ctxt->sax) {
113
- xmlFree(ctxt->sax);
114
- ctxt->sax = NULL;
124
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
125
+
126
+ if (c_context->sax) {
127
+ xmlFree(c_context->sax);
128
+ c_context->sax = NULL;
115
129
  }
116
130
 
117
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
131
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
118
132
  }
119
133
 
134
+ /* :nodoc: */
120
135
  static VALUE
121
- parse_doc(VALUE ctxt_val)
136
+ noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
122
137
  {
123
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
124
- xmlParseDocument(ctxt);
125
- return Qnil;
126
- }
138
+ Check_Type(rb_input, T_STRING);
139
+ if (!(int)RSTRING_LEN(rb_input)) {
140
+ rb_raise(rb_eRuntimeError, "input string cannot be empty");
141
+ }
127
142
 
128
- static VALUE
129
- parse_doc_finalize(VALUE ctxt_val)
130
- {
131
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
143
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
144
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
145
+ }
132
146
 
133
- if (NULL != ctxt->myDoc) {
134
- xmlFreeDoc(ctxt->myDoc);
147
+ xmlParserCtxtPtr c_context =
148
+ xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
149
+ if (!c_context) {
150
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
135
151
  }
136
152
 
137
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
138
- return Qnil;
153
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
154
+
155
+ if (c_context->sax) {
156
+ xmlFree(c_context->sax);
157
+ c_context->sax = NULL;
158
+ }
159
+
160
+ VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
161
+ rb_iv_set(rb_context, "@input", rb_input);
162
+
163
+ return rb_context;
139
164
  }
140
165
 
141
166
  /*
@@ -143,80 +168,113 @@ parse_doc_finalize(VALUE ctxt_val)
143
168
  * parse_with(sax_handler)
144
169
  *
145
170
  * Use +sax_handler+ and parse the current document
171
+ *
172
+ * 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
173
+ * more convenient for most use cases.
146
174
  */
147
175
  static VALUE
148
- parse_with(VALUE self, VALUE sax_handler)
176
+ noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
149
177
  {
150
- xmlParserCtxtPtr ctxt;
178
+ xmlParserCtxtPtr c_context;
151
179
  xmlSAXHandlerPtr sax;
152
180
 
153
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
181
+ if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
154
182
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
155
183
  }
156
184
 
157
- ctxt = noko_xml_sax_parser_context_unwrap(self);
158
- sax = noko_sax_handler_unwrap(sax_handler);
185
+ c_context = noko_xml_sax_parser_context_unwrap(rb_context);
186
+ sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
159
187
 
160
- ctxt->sax = sax;
161
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
188
+ c_context->sax = sax;
189
+ c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
190
+ c_context->_private = (void *)rb_sax_parser;
162
191
 
163
192
  xmlSetStructuredErrorFunc(NULL, NULL);
164
193
 
165
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
194
+ /* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
195
+ * don't have any cleanup to do. The only memory we need to free is handled by
196
+ * xml_sax_parser_context_type_free */
197
+ xmlParseDocument(c_context);
166
198
 
167
199
  return Qnil;
168
200
  }
169
201
 
170
202
  /*
171
203
  * call-seq:
172
- * replace_entities=(boolean)
204
+ * replace_entities=(value)
205
+ *
206
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
207
+ *
208
+ * [Parameters]
209
+ * - +value+ (Boolean) Whether external parsed entities will be resolved.
173
210
  *
174
- * Should this parser replace entities? & will get converted to '&' if
175
- * set to true
211
+ * <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
212
+ * defaults to +false+ for this reason.
213
+ *
214
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
215
+ * replacement.
216
+ *
217
+ * [Example]
218
+ * Because this class is generally not instantiated directly, you would typically set this option
219
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
220
+ *
221
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
222
+ * parser.parse(xml) do |ctx|
223
+ * ctx.replace_entities = true # this is UNSAFE for untrusted documents!
224
+ * end
176
225
  */
177
226
  static VALUE
178
- set_replace_entities(VALUE self, VALUE value)
227
+ noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
179
228
  {
180
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
229
+ int error;
230
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
181
231
 
182
- if (Qfalse == value) {
183
- ctxt->replaceEntities = 0;
232
+ if (RB_TEST(rb_value)) {
233
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
184
234
  } else {
185
- ctxt->replaceEntities = 1;
235
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
236
+ }
237
+
238
+ if (error) {
239
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
186
240
  }
187
241
 
188
- return value;
242
+ return rb_value;
189
243
  }
190
244
 
191
245
  /*
192
246
  * call-seq:
193
- * replace_entities
247
+ * replace_entities
248
+ *
249
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
250
+ *
251
+ * [Returns] (Boolean) Value of the parse option. (Default +false+)
194
252
  *
195
- * Should this parser replace entities? &amp; will get converted to '&' if
196
- * set to true
253
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
254
+ * replacement.
197
255
  */
198
256
  static VALUE
199
- get_replace_entities(VALUE self)
257
+ noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
200
258
  {
201
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
259
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
202
260
 
203
- if (0 == ctxt->replaceEntities) {
204
- return Qfalse;
205
- } else {
261
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
206
262
  return Qtrue;
263
+ } else {
264
+ return Qfalse;
207
265
  }
208
266
  }
209
267
 
210
268
  /*
211
269
  * call-seq: line
212
270
  *
213
- * Get the current line the parser context is processing.
271
+ * [Returns] (Integer) the line number of the line being currently parsed.
214
272
  */
215
273
  static VALUE
216
- line(VALUE self)
274
+ noko_xml_sax_parser_context__line(VALUE rb_context)
217
275
  {
218
276
  xmlParserInputPtr io;
219
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
277
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
220
278
 
221
279
  io = ctxt->input;
222
280
  if (io) {
@@ -229,12 +287,12 @@ line(VALUE self)
229
287
  /*
230
288
  * call-seq: column
231
289
  *
232
- * Get the current column the parser context is processing.
290
+ * [Returns] (Integer) the column number of the column being currently parsed.
233
291
  */
234
292
  static VALUE
235
- column(VALUE self)
293
+ noko_xml_sax_parser_context__column(VALUE rb_context)
236
294
  {
237
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
295
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
238
296
  xmlParserInputPtr io;
239
297
 
240
298
  io = ctxt->input;
@@ -247,41 +305,66 @@ column(VALUE self)
247
305
 
248
306
  /*
249
307
  * call-seq:
250
- * recovery=(boolean)
308
+ * recovery=(value)
309
+ *
310
+ * Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
311
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
312
+ * parser will stop processing the file on the first parsing error.
313
+ *
314
+ * [Parameters]
315
+ * - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
316
+ *
317
+ * [Returns] (Boolean) The passed +value+.
251
318
  *
252
- * Should this parser recover from structural errors? It will not stop processing
253
- * file on structural errors if set to true
319
+ * [Example]
320
+ * Because this class is generally not instantiated directly, you would typically set this option
321
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
322
+ *
323
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
324
+ * parser.parse(xml) do |ctx|
325
+ * ctx.recovery = true
326
+ * end
254
327
  */
255
328
  static VALUE
256
- set_recovery(VALUE self, VALUE value)
329
+ noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
257
330
  {
258
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
331
+ int error;
332
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
259
333
 
260
- if (value == Qfalse) {
261
- ctxt->recovery = 0;
334
+ if (RB_TEST(rb_value)) {
335
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
262
336
  } else {
263
- ctxt->recovery = 1;
337
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
338
+ }
339
+
340
+ if (error) {
341
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
264
342
  }
265
343
 
266
- return value;
344
+ return rb_value;
267
345
  }
268
346
 
269
347
  /*
270
348
  * call-seq:
271
- * recovery
349
+ * recovery
350
+ *
351
+ * Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
352
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
353
+ * parser will stop processing the file on the first parsing error.
354
+ *
355
+ * [Returns] (Boolean) Whether this parser will recover from parsing errors.
272
356
  *
273
- * Should this parser recover from structural errors? It will not stop processing
274
- * file on structural errors if set to true
357
+ * Default is +false+ for XML and +true+ for HTML.
275
358
  */
276
359
  static VALUE
277
- get_recovery(VALUE self)
360
+ noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
278
361
  {
279
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
362
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
280
363
 
281
- if (ctxt->recovery == 0) {
282
- return Qfalse;
283
- } else {
364
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
284
365
  return Qtrue;
366
+ } else {
367
+ return Qfalse;
285
368
  }
286
369
  }
287
370
 
@@ -292,17 +375,22 @@ noko_init_xml_sax_parser_context(void)
292
375
 
293
376
  rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
294
377
 
295
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
296
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
297
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
298
-
299
- rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
300
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
301
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
302
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
303
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
304
- rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
305
- rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
378
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
379
+ noko_xml_sax_parser_context_s_native_io, 2);
380
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
381
+ noko_xml_sax_parser_context_s_native_memory, 2);
382
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
383
+ noko_xml_sax_parser_context_s_native_file, 2);
384
+
385
+ rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
386
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
387
+ noko_xml_sax_parser_context__replace_entities_set, 1);
388
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
389
+ noko_xml_sax_parser_context__replace_entities_get, 0);
390
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
391
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
392
+ rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
393
+ rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
306
394
 
307
395
  id_read = rb_intern("read");
308
396
  }