nokogiri 1.15.4 → 1.17.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +12 -19
  3. data/README.md +8 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +194 -141
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +26 -25
  12. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  13. data/ext/nokogiri/nokogiri.c +9 -2
  14. data/ext/nokogiri/nokogiri.h +25 -33
  15. data/ext/nokogiri/test_global_handlers.c +1 -1
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +3 -12
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +167 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -10
  25. data/ext/nokogiri/xml_node.c +142 -108
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +74 -100
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +214 -128
  31. data/ext/nokogiri/xml_sax_push_parser.c +69 -50
  32. data/ext/nokogiri/xml_schema.c +51 -87
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +3 -6
  35. data/ext/nokogiri/xml_xpath_context.c +4 -7
  36. data/ext/nokogiri/xslt_stylesheet.c +16 -11
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +64 -23
  42. data/gumbo-parser/src/tokenizer.c +7 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +43 -27
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +45 -24
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +2 -2
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -138
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/version/info.rb +6 -5
  68. data/lib/nokogiri/xml/attr.rb +2 -2
  69. data/lib/nokogiri/xml/builder.rb +8 -1
  70. data/lib/nokogiri/xml/document.rb +74 -31
  71. data/lib/nokogiri/xml/document_fragment.rb +86 -15
  72. data/lib/nokogiri/xml/namespace.rb +1 -2
  73. data/lib/nokogiri/xml/node.rb +113 -35
  74. data/lib/nokogiri/xml/node_set.rb +12 -10
  75. data/lib/nokogiri/xml/parse_options.rb +1 -1
  76. data/lib/nokogiri/xml/pp/node.rb +6 -1
  77. data/lib/nokogiri/xml/reader.rb +51 -17
  78. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  79. data/lib/nokogiri/xml/sax/document.rb +174 -83
  80. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  81. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  82. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  83. data/lib/nokogiri/xml/sax.rb +48 -0
  84. data/lib/nokogiri/xml/schema.rb +112 -45
  85. data/lib/nokogiri/xml/searchable.rb +9 -11
  86. data/lib/nokogiri/xml/syntax_error.rb +23 -1
  87. data/lib/nokogiri/xml.rb +14 -25
  88. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  89. data/lib/nokogiri/xslt.rb +4 -10
  90. data/lib/nokogiri.rb +1 -1
  91. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  92. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  93. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  94. metadata +15 -14
  95. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  96. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  97. data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
  98. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
5
5
  static ID id_read;
6
6
 
7
7
  static void
8
- xml_sax_parser_context_free(void *data)
8
+ xml_sax_parser_context_type_free(void *data)
9
9
  {
10
10
  xmlParserCtxtPtr ctxt = data;
11
11
  ctxt->sax = NULL;
12
- xmlFreeParserCtxt(ctxt);
12
+ if (ctxt->myDoc) {
13
+ xmlFreeDoc(ctxt->myDoc);
14
+ }
15
+ if (ctxt) {
16
+ xmlFreeParserCtxt(ctxt);
17
+ }
13
18
  }
14
19
 
15
20
  /*
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
17
22
  * so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
18
23
  */
19
24
  static const rb_data_type_t xml_sax_parser_context_type = {
20
- .wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
25
+ .wrap_struct_name = "xmlParserCtxt",
21
26
  .function = {
22
- .dfree = xml_sax_parser_context_free,
27
+ .dfree = xml_sax_parser_context_type_free,
23
28
  },
24
29
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
25
30
  };
@@ -38,100 +43,118 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
38
43
  return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
39
44
  }
40
45
 
46
+ void
47
+ noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
48
+ {
49
+ if (!NIL_P(rb_encoding)) {
50
+ VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
41
51
 
42
- /*
43
- * call-seq:
44
- * parse_io(io, encoding)
45
- *
46
- * Parse +io+ object with +encoding+
47
- */
52
+ char *encoding_name = StringValueCStr(rb_encoding_name);
53
+ if (encoding_name) {
54
+ libxmlStructuredErrorHandlerState handler_state;
55
+ VALUE rb_errors = rb_ary_new();
56
+
57
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
58
+
59
+ int result = xmlSwitchEncodingName(c_context, encoding_name);
60
+
61
+ noko__structured_error_func_restore(&handler_state);
62
+
63
+ if (result != 0) {
64
+ xmlFreeParserCtxt(c_context);
65
+
66
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
67
+ if (!NIL_P(exception)) {
68
+ rb_exc_raise(exception);
69
+ } else {
70
+ rb_raise(rb_eRuntimeError, "could not set encoding");
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+
77
+ /* :nodoc: */
48
78
  static VALUE
49
- parse_io(VALUE klass, VALUE io, VALUE encoding)
79
+ noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
50
80
  {
51
- xmlParserCtxtPtr ctxt;
52
- xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
53
-
54
- if (!rb_respond_to(io, id_read)) {
81
+ if (!rb_respond_to(rb_io, id_read)) {
55
82
  rb_raise(rb_eTypeError, "argument expected to respond to :read");
56
83
  }
57
84
 
58
- ctxt = xmlCreateIOParserCtxt(NULL, NULL,
59
- (xmlInputReadCallback)noko_io_read,
60
- (xmlInputCloseCallback)noko_io_close,
61
- (void *)io, enc);
62
- if (ctxt->sax) {
63
- xmlFree(ctxt->sax);
64
- ctxt->sax = NULL;
85
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
86
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
65
87
  }
66
88
 
67
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
68
- }
89
+ xmlParserCtxtPtr c_context =
90
+ xmlCreateIOParserCtxt(NULL, NULL,
91
+ (xmlInputReadCallback)noko_io_read,
92
+ (xmlInputCloseCallback)noko_io_close,
93
+ (void *)rb_io, XML_CHAR_ENCODING_NONE);
94
+ if (!c_context) {
95
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
96
+ }
69
97
 
70
- /*
71
- * call-seq:
72
- * parse_file(filename)
73
- *
74
- * Parse file given +filename+
75
- */
76
- static VALUE
77
- parse_file(VALUE klass, VALUE filename)
78
- {
79
- xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
98
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
80
99
 
81
- if (ctxt->sax) {
82
- xmlFree(ctxt->sax);
83
- ctxt->sax = NULL;
100
+ if (c_context->sax) {
101
+ xmlFree(c_context->sax);
102
+ c_context->sax = NULL;
84
103
  }
85
104
 
86
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
105
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
87
106
  }
88
107
 
89
- /*
90
- * call-seq:
91
- * parse_memory(data)
92
- *
93
- * Parse the XML stored in memory in +data+
94
- */
108
+ /* :nodoc: */
95
109
  static VALUE
96
- parse_memory(VALUE klass, VALUE data)
110
+ noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
97
111
  {
98
- xmlParserCtxtPtr ctxt;
99
-
100
- Check_Type(data, T_STRING);
112
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
113
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
114
+ }
101
115
 
102
- if (!(int)RSTRING_LEN(data)) {
103
- rb_raise(rb_eRuntimeError, "data cannot be empty");
116
+ xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
117
+ if (!c_context) {
118
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
104
119
  }
105
120
 
106
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
107
- (int)RSTRING_LEN(data));
108
- if (ctxt->sax) {
109
- xmlFree(ctxt->sax);
110
- ctxt->sax = NULL;
121
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
122
+
123
+ if (c_context->sax) {
124
+ xmlFree(c_context->sax);
125
+ c_context->sax = NULL;
111
126
  }
112
127
 
113
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
128
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
114
129
  }
115
130
 
131
+ /* :nodoc: */
116
132
  static VALUE
117
- parse_doc(VALUE ctxt_val)
133
+ noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
118
134
  {
119
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
120
- xmlParseDocument(ctxt);
121
- return Qnil;
122
- }
135
+ Check_Type(rb_input, T_STRING);
136
+ if (!(int)RSTRING_LEN(rb_input)) {
137
+ rb_raise(rb_eRuntimeError, "input string cannot be empty");
138
+ }
123
139
 
124
- static VALUE
125
- parse_doc_finalize(VALUE ctxt_val)
126
- {
127
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
140
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
141
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
142
+ }
128
143
 
129
- if (NULL != ctxt->myDoc) {
130
- xmlFreeDoc(ctxt->myDoc);
144
+ xmlParserCtxtPtr c_context =
145
+ xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
146
+ if (!c_context) {
147
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
131
148
  }
132
149
 
133
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
134
- return Qnil;
150
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
151
+
152
+ if (c_context->sax) {
153
+ xmlFree(c_context->sax);
154
+ c_context->sax = NULL;
155
+ }
156
+
157
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
135
158
  }
136
159
 
137
160
  /*
@@ -139,80 +162,113 @@ parse_doc_finalize(VALUE ctxt_val)
139
162
  * parse_with(sax_handler)
140
163
  *
141
164
  * Use +sax_handler+ and parse the current document
165
+ *
166
+ * 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
167
+ * more convenient for most use cases.
142
168
  */
143
169
  static VALUE
144
- parse_with(VALUE self, VALUE sax_handler)
170
+ noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
145
171
  {
146
- xmlParserCtxtPtr ctxt;
172
+ xmlParserCtxtPtr c_context;
147
173
  xmlSAXHandlerPtr sax;
148
174
 
149
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
175
+ if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
150
176
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
151
177
  }
152
178
 
153
- ctxt = noko_xml_sax_parser_context_unwrap(self);
154
- sax = noko_sax_handler_unwrap(sax_handler);
179
+ c_context = noko_xml_sax_parser_context_unwrap(rb_context);
180
+ sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
155
181
 
156
- ctxt->sax = sax;
157
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
182
+ c_context->sax = sax;
183
+ c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
184
+ c_context->_private = (void *)rb_sax_parser;
158
185
 
159
186
  xmlSetStructuredErrorFunc(NULL, NULL);
160
187
 
161
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
188
+ /* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
189
+ * don't have any cleanup to do. The only memory we need to free is handled by
190
+ * xml_sax_parser_context_type_free */
191
+ xmlParseDocument(c_context);
162
192
 
163
193
  return Qnil;
164
194
  }
165
195
 
166
196
  /*
167
197
  * call-seq:
168
- * replace_entities=(boolean)
198
+ * replace_entities=(value)
199
+ *
200
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
201
+ *
202
+ * [Parameters]
203
+ * - +value+ (Boolean) Whether external parsed entities will be resolved.
204
+ *
205
+ * ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
206
+ * defaults to +false+ for this reason.
207
+ *
208
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
209
+ * replacement.
169
210
  *
170
- * Should this parser replace entities? &amp; will get converted to '&' if
171
- * set to true
211
+ * [Example]
212
+ * Because this class is generally not instantiated directly, you would typically set this option
213
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
214
+ *
215
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
216
+ * parser.parse(xml) do |ctx|
217
+ * ctx.replace_entities = true # this is UNSAFE for untrusted documents!
218
+ * end
172
219
  */
173
220
  static VALUE
174
- set_replace_entities(VALUE self, VALUE value)
221
+ noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
175
222
  {
176
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
223
+ int error;
224
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
177
225
 
178
- if (Qfalse == value) {
179
- ctxt->replaceEntities = 0;
226
+ if (RB_TEST(rb_value)) {
227
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
180
228
  } else {
181
- ctxt->replaceEntities = 1;
229
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
230
+ }
231
+
232
+ if (error) {
233
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
182
234
  }
183
235
 
184
- return value;
236
+ return rb_value;
185
237
  }
186
238
 
187
239
  /*
188
240
  * call-seq:
189
- * replace_entities
241
+ * replace_entities
242
+ *
243
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
190
244
  *
191
- * Should this parser replace entities? &amp; will get converted to '&' if
192
- * set to true
245
+ * [Returns] (Boolean) Value of the parse option. (Default +false+)
246
+ *
247
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
248
+ * replacement.
193
249
  */
194
250
  static VALUE
195
- get_replace_entities(VALUE self)
251
+ noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
196
252
  {
197
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
253
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
198
254
 
199
- if (0 == ctxt->replaceEntities) {
200
- return Qfalse;
201
- } else {
255
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
202
256
  return Qtrue;
257
+ } else {
258
+ return Qfalse;
203
259
  }
204
260
  }
205
261
 
206
262
  /*
207
263
  * call-seq: line
208
264
  *
209
- * Get the current line the parser context is processing.
265
+ * [Returns] (Integer) the line number of the line being currently parsed.
210
266
  */
211
267
  static VALUE
212
- line(VALUE self)
268
+ noko_xml_sax_parser_context__line(VALUE rb_context)
213
269
  {
214
270
  xmlParserInputPtr io;
215
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
271
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
216
272
 
217
273
  io = ctxt->input;
218
274
  if (io) {
@@ -225,12 +281,12 @@ line(VALUE self)
225
281
  /*
226
282
  * call-seq: column
227
283
  *
228
- * Get the current column the parser context is processing.
284
+ * [Returns] (Integer) the column number of the column being currently parsed.
229
285
  */
230
286
  static VALUE
231
- column(VALUE self)
287
+ noko_xml_sax_parser_context__column(VALUE rb_context)
232
288
  {
233
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
289
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
234
290
  xmlParserInputPtr io;
235
291
 
236
292
  io = ctxt->input;
@@ -243,41 +299,66 @@ column(VALUE self)
243
299
 
244
300
  /*
245
301
  * call-seq:
246
- * recovery=(boolean)
302
+ * recovery=(value)
303
+ *
304
+ * Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
305
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
306
+ * parser will stop processing the file on the first parsing error.
307
+ *
308
+ * [Parameters]
309
+ * - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
310
+ *
311
+ * [Returns] (Boolean) The passed +value+.
312
+ *
313
+ * [Example]
314
+ * Because this class is generally not instantiated directly, you would typically set this option
315
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
247
316
  *
248
- * Should this parser recover from structural errors? It will not stop processing
249
- * file on structural errors if set to true
317
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
318
+ * parser.parse(xml) do |ctx|
319
+ * ctx.recovery = true
320
+ * end
250
321
  */
251
322
  static VALUE
252
- set_recovery(VALUE self, VALUE value)
323
+ noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
253
324
  {
254
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
325
+ int error;
326
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
255
327
 
256
- if (value == Qfalse) {
257
- ctxt->recovery = 0;
328
+ if (RB_TEST(rb_value)) {
329
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
258
330
  } else {
259
- ctxt->recovery = 1;
331
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
260
332
  }
261
333
 
262
- return value;
334
+ if (error) {
335
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
336
+ }
337
+
338
+ return rb_value;
263
339
  }
264
340
 
265
341
  /*
266
342
  * call-seq:
267
- * recovery
343
+ * recovery
344
+ *
345
+ * Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
346
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
347
+ * parser will stop processing the file on the first parsing error.
348
+ *
349
+ * [Returns] (Boolean) Whether this parser will recover from parsing errors.
268
350
  *
269
- * Should this parser recover from structural errors? It will not stop processing
270
- * file on structural errors if set to true
351
+ * Default is +false+ for XML and +true+ for HTML.
271
352
  */
272
353
  static VALUE
273
- get_recovery(VALUE self)
354
+ noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
274
355
  {
275
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
356
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
276
357
 
277
- if (ctxt->recovery == 0) {
278
- return Qfalse;
279
- } else {
358
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
280
359
  return Qtrue;
360
+ } else {
361
+ return Qfalse;
281
362
  }
282
363
  }
283
364
 
@@ -288,17 +369,22 @@ noko_init_xml_sax_parser_context(void)
288
369
 
289
370
  rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
290
371
 
291
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
292
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
293
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
294
-
295
- rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
296
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
297
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
298
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
299
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
300
- rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
301
- rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
372
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
373
+ noko_xml_sax_parser_context_s_native_io, 2);
374
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
375
+ noko_xml_sax_parser_context_s_native_memory, 2);
376
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
377
+ noko_xml_sax_parser_context_s_native_file, 2);
378
+
379
+ rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
380
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
381
+ noko_xml_sax_parser_context__replace_entities_set, 1);
382
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
383
+ noko_xml_sax_parser_context__replace_entities_get, 0);
384
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
385
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
386
+ rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
387
+ rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
302
388
 
303
389
  id_read = rb_intern("read");
304
390
  }