nokogiri 1.16.8-x86_64-linux → 1.17.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/nokogiri/extconf.rb +191 -137
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  12. data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
  13. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
  14. data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
  15. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
  16. data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
  17. data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
  18. data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
  19. data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
  20. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
  21. data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
  22. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
  23. data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
  24. data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
  25. data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
  26. data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
  27. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
  28. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
  29. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
  30. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
  31. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
  32. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
  33. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
  34. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
  35. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
  36. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
  37. data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
  38. data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
  39. data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
  40. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  41. data/ext/nokogiri/nokogiri.c +9 -2
  42. data/ext/nokogiri/nokogiri.h +18 -33
  43. data/ext/nokogiri/xml_attr.c +1 -1
  44. data/ext/nokogiri/xml_cdata.c +2 -10
  45. data/ext/nokogiri/xml_comment.c +3 -8
  46. data/ext/nokogiri/xml_document.c +163 -156
  47. data/ext/nokogiri/xml_document_fragment.c +10 -25
  48. data/ext/nokogiri/xml_dtd.c +1 -1
  49. data/ext/nokogiri/xml_element_content.c +9 -9
  50. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  51. data/ext/nokogiri/xml_namespace.c +6 -6
  52. data/ext/nokogiri/xml_node.c +130 -104
  53. data/ext/nokogiri/xml_node_set.c +46 -44
  54. data/ext/nokogiri/xml_reader.c +54 -58
  55. data/ext/nokogiri/xml_relax_ng.c +35 -56
  56. data/ext/nokogiri/xml_sax_parser.c +156 -88
  57. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  58. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  59. data/ext/nokogiri/xml_schema.c +50 -85
  60. data/ext/nokogiri/xml_syntax_error.c +19 -11
  61. data/ext/nokogiri/xml_text.c +2 -4
  62. data/ext/nokogiri/xml_xpath_context.c +2 -2
  63. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  64. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  65. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  66. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  67. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  68. data/lib/nokogiri/class_resolver.rb +1 -1
  69. data/lib/nokogiri/css/node.rb +6 -2
  70. data/lib/nokogiri/css/parser.rb +6 -4
  71. data/lib/nokogiri/css/parser.y +2 -2
  72. data/lib/nokogiri/css/parser_extras.rb +6 -66
  73. data/lib/nokogiri/css/selector_cache.rb +38 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -4
  75. data/lib/nokogiri/css/tokenizer.rex +9 -8
  76. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  77. data/lib/nokogiri/css.rb +86 -20
  78. data/lib/nokogiri/decorators/slop.rb +3 -5
  79. data/lib/nokogiri/encoding_handler.rb +2 -2
  80. data/lib/nokogiri/html4/document.rb +44 -23
  81. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  82. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  83. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  84. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  85. data/lib/nokogiri/html4.rb +9 -14
  86. data/lib/nokogiri/html5/builder.rb +40 -0
  87. data/lib/nokogiri/html5/document.rb +61 -30
  88. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  89. data/lib/nokogiri/html5/node.rb +4 -4
  90. data/lib/nokogiri/html5.rb +114 -72
  91. data/lib/nokogiri/version/constant.rb +1 -1
  92. data/lib/nokogiri/xml/builder.rb +8 -1
  93. data/lib/nokogiri/xml/document.rb +70 -26
  94. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  95. data/lib/nokogiri/xml/node.rb +82 -11
  96. data/lib/nokogiri/xml/node_set.rb +9 -7
  97. data/lib/nokogiri/xml/parse_options.rb +1 -1
  98. data/lib/nokogiri/xml/pp/node.rb +6 -1
  99. data/lib/nokogiri/xml/reader.rb +46 -13
  100. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  101. data/lib/nokogiri/xml/sax/document.rb +174 -83
  102. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  103. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  104. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  105. data/lib/nokogiri/xml/sax.rb +48 -0
  106. data/lib/nokogiri/xml/schema.rb +112 -45
  107. data/lib/nokogiri/xml/searchable.rb +6 -8
  108. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  109. data/lib/nokogiri/xml.rb +13 -24
  110. data/lib/nokogiri/xslt.rb +3 -9
  111. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  112. metadata +8 -4
  113. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
5
5
  static ID id_read;
6
6
 
7
7
  static void
8
- xml_sax_parser_context_free(void *data)
8
+ xml_sax_parser_context_type_free(void *data)
9
9
  {
10
10
  xmlParserCtxtPtr ctxt = data;
11
11
  ctxt->sax = NULL;
12
- xmlFreeParserCtxt(ctxt);
12
+ if (ctxt->myDoc) {
13
+ xmlFreeDoc(ctxt->myDoc);
14
+ }
15
+ if (ctxt) {
16
+ xmlFreeParserCtxt(ctxt);
17
+ }
13
18
  }
14
19
 
15
20
  /*
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
17
22
  * so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
18
23
  */
19
24
  static const rb_data_type_t xml_sax_parser_context_type = {
20
- .wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
25
+ .wrap_struct_name = "xmlParserCtxt",
21
26
  .function = {
22
- .dfree = xml_sax_parser_context_free,
27
+ .dfree = xml_sax_parser_context_type_free,
23
28
  },
24
29
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
25
30
  };
@@ -38,104 +43,118 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
38
43
  return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
39
44
  }
40
45
 
46
+ void
47
+ noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
48
+ {
49
+ if (!NIL_P(rb_encoding)) {
50
+ VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
51
+
52
+ char *encoding_name = StringValueCStr(rb_encoding_name);
53
+ if (encoding_name) {
54
+ libxmlStructuredErrorHandlerState handler_state;
55
+ VALUE rb_errors = rb_ary_new();
41
56
 
42
- /*
43
- * call-seq:
44
- * parse_io(io, encoding)
45
- *
46
- * Parse +io+ object with +encoding+
47
- */
57
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
58
+
59
+ int result = xmlSwitchEncodingName(c_context, encoding_name);
60
+
61
+ noko__structured_error_func_restore(&handler_state);
62
+
63
+ if (result != 0) {
64
+ xmlFreeParserCtxt(c_context);
65
+
66
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
67
+ if (!NIL_P(exception)) {
68
+ rb_exc_raise(exception);
69
+ } else {
70
+ rb_raise(rb_eRuntimeError, "could not set encoding");
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+
77
+ /* :nodoc: */
48
78
  static VALUE
49
- parse_io(VALUE klass, VALUE io, VALUE encoding)
79
+ noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
50
80
  {
51
- xmlParserCtxtPtr ctxt;
52
- xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
53
-
54
- if (!rb_respond_to(io, id_read)) {
81
+ if (!rb_respond_to(rb_io, id_read)) {
55
82
  rb_raise(rb_eTypeError, "argument expected to respond to :read");
56
83
  }
57
84
 
58
- ctxt = xmlCreateIOParserCtxt(NULL, NULL,
59
- (xmlInputReadCallback)noko_io_read,
60
- (xmlInputCloseCallback)noko_io_close,
61
- (void *)io, enc);
62
- if (!ctxt) {
63
- rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
85
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
86
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
64
87
  }
65
88
 
66
- if (ctxt->sax) {
67
- xmlFree(ctxt->sax);
68
- ctxt->sax = NULL;
89
+ xmlParserCtxtPtr c_context =
90
+ xmlCreateIOParserCtxt(NULL, NULL,
91
+ (xmlInputReadCallback)noko_io_read,
92
+ (xmlInputCloseCallback)noko_io_close,
93
+ (void *)rb_io, XML_CHAR_ENCODING_NONE);
94
+ if (!c_context) {
95
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
69
96
  }
70
97
 
71
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
72
- }
73
-
74
- /*
75
- * call-seq:
76
- * parse_file(filename)
77
- *
78
- * Parse file given +filename+
79
- */
80
- static VALUE
81
- parse_file(VALUE klass, VALUE filename)
82
- {
83
- xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
98
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
84
99
 
85
- if (ctxt->sax) {
86
- xmlFree(ctxt->sax);
87
- ctxt->sax = NULL;
100
+ if (c_context->sax) {
101
+ xmlFree(c_context->sax);
102
+ c_context->sax = NULL;
88
103
  }
89
104
 
90
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
105
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
91
106
  }
92
107
 
93
- /*
94
- * call-seq:
95
- * parse_memory(data)
96
- *
97
- * Parse the XML stored in memory in +data+
98
- */
108
+ /* :nodoc: */
99
109
  static VALUE
100
- parse_memory(VALUE klass, VALUE data)
110
+ noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
101
111
  {
102
- xmlParserCtxtPtr ctxt;
103
-
104
- Check_Type(data, T_STRING);
112
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
113
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
114
+ }
105
115
 
106
- if (!(int)RSTRING_LEN(data)) {
107
- rb_raise(rb_eRuntimeError, "data cannot be empty");
116
+ xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
117
+ if (!c_context) {
118
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
108
119
  }
109
120
 
110
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
111
- (int)RSTRING_LEN(data));
112
- if (ctxt->sax) {
113
- xmlFree(ctxt->sax);
114
- ctxt->sax = NULL;
121
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
122
+
123
+ if (c_context->sax) {
124
+ xmlFree(c_context->sax);
125
+ c_context->sax = NULL;
115
126
  }
116
127
 
117
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
128
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
118
129
  }
119
130
 
131
+ /* :nodoc: */
120
132
  static VALUE
121
- parse_doc(VALUE ctxt_val)
133
+ noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
122
134
  {
123
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
124
- xmlParseDocument(ctxt);
125
- return Qnil;
126
- }
135
+ Check_Type(rb_input, T_STRING);
136
+ if (!(int)RSTRING_LEN(rb_input)) {
137
+ rb_raise(rb_eRuntimeError, "input string cannot be empty");
138
+ }
127
139
 
128
- static VALUE
129
- parse_doc_finalize(VALUE ctxt_val)
130
- {
131
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
140
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
141
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
142
+ }
132
143
 
133
- if (NULL != ctxt->myDoc) {
134
- xmlFreeDoc(ctxt->myDoc);
144
+ xmlParserCtxtPtr c_context =
145
+ xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
146
+ if (!c_context) {
147
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
135
148
  }
136
149
 
137
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
138
- return Qnil;
150
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
151
+
152
+ if (c_context->sax) {
153
+ xmlFree(c_context->sax);
154
+ c_context->sax = NULL;
155
+ }
156
+
157
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
139
158
  }
140
159
 
141
160
  /*
@@ -143,80 +162,113 @@ parse_doc_finalize(VALUE ctxt_val)
143
162
  * parse_with(sax_handler)
144
163
  *
145
164
  * Use +sax_handler+ and parse the current document
165
+ *
166
+ * 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
167
+ * more convenient for most use cases.
146
168
  */
147
169
  static VALUE
148
- parse_with(VALUE self, VALUE sax_handler)
170
+ noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
149
171
  {
150
- xmlParserCtxtPtr ctxt;
172
+ xmlParserCtxtPtr c_context;
151
173
  xmlSAXHandlerPtr sax;
152
174
 
153
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
175
+ if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
154
176
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
155
177
  }
156
178
 
157
- ctxt = noko_xml_sax_parser_context_unwrap(self);
158
- sax = noko_sax_handler_unwrap(sax_handler);
179
+ c_context = noko_xml_sax_parser_context_unwrap(rb_context);
180
+ sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
159
181
 
160
- ctxt->sax = sax;
161
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
182
+ c_context->sax = sax;
183
+ c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
184
+ c_context->_private = (void *)rb_sax_parser;
162
185
 
163
186
  xmlSetStructuredErrorFunc(NULL, NULL);
164
187
 
165
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
188
+ /* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
189
+ * don't have any cleanup to do. The only memory we need to free is handled by
190
+ * xml_sax_parser_context_type_free */
191
+ xmlParseDocument(c_context);
166
192
 
167
193
  return Qnil;
168
194
  }
169
195
 
170
196
  /*
171
197
  * call-seq:
172
- * replace_entities=(boolean)
198
+ * replace_entities=(value)
199
+ *
200
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
201
+ *
202
+ * [Parameters]
203
+ * - +value+ (Boolean) Whether external parsed entities will be resolved.
204
+ *
205
+ * ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
206
+ * defaults to +false+ for this reason.
173
207
  *
174
- * Should this parser replace entities? &amp; will get converted to '&' if
175
- * set to true
208
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
209
+ * replacement.
210
+ *
211
+ * [Example]
212
+ * Because this class is generally not instantiated directly, you would typically set this option
213
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
214
+ *
215
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
216
+ * parser.parse(xml) do |ctx|
217
+ * ctx.replace_entities = true # this is UNSAFE for untrusted documents!
218
+ * end
176
219
  */
177
220
  static VALUE
178
- set_replace_entities(VALUE self, VALUE value)
221
+ noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
179
222
  {
180
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
223
+ int error;
224
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
181
225
 
182
- if (Qfalse == value) {
183
- ctxt->replaceEntities = 0;
226
+ if (RB_TEST(rb_value)) {
227
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
184
228
  } else {
185
- ctxt->replaceEntities = 1;
229
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
230
+ }
231
+
232
+ if (error) {
233
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
186
234
  }
187
235
 
188
- return value;
236
+ return rb_value;
189
237
  }
190
238
 
191
239
  /*
192
240
  * call-seq:
193
- * replace_entities
241
+ * replace_entities
194
242
  *
195
- * Should this parser replace entities? &amp; will get converted to '&' if
196
- * set to true
243
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
244
+ *
245
+ * [Returns] (Boolean) Value of the parse option. (Default +false+)
246
+ *
247
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
248
+ * replacement.
197
249
  */
198
250
  static VALUE
199
- get_replace_entities(VALUE self)
251
+ noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
200
252
  {
201
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
253
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
202
254
 
203
- if (0 == ctxt->replaceEntities) {
204
- return Qfalse;
205
- } else {
255
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
206
256
  return Qtrue;
257
+ } else {
258
+ return Qfalse;
207
259
  }
208
260
  }
209
261
 
210
262
  /*
211
263
  * call-seq: line
212
264
  *
213
- * Get the current line the parser context is processing.
265
+ * [Returns] (Integer) the line number of the line being currently parsed.
214
266
  */
215
267
  static VALUE
216
- line(VALUE self)
268
+ noko_xml_sax_parser_context__line(VALUE rb_context)
217
269
  {
218
270
  xmlParserInputPtr io;
219
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
271
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
220
272
 
221
273
  io = ctxt->input;
222
274
  if (io) {
@@ -229,12 +281,12 @@ line(VALUE self)
229
281
  /*
230
282
  * call-seq: column
231
283
  *
232
- * Get the current column the parser context is processing.
284
+ * [Returns] (Integer) the column number of the column being currently parsed.
233
285
  */
234
286
  static VALUE
235
- column(VALUE self)
287
+ noko_xml_sax_parser_context__column(VALUE rb_context)
236
288
  {
237
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
289
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
238
290
  xmlParserInputPtr io;
239
291
 
240
292
  io = ctxt->input;
@@ -247,41 +299,66 @@ column(VALUE self)
247
299
 
248
300
  /*
249
301
  * call-seq:
250
- * recovery=(boolean)
302
+ * recovery=(value)
303
+ *
304
+ * Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
305
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
306
+ * parser will stop processing the file on the first parsing error.
307
+ *
308
+ * [Parameters]
309
+ * - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
310
+ *
311
+ * [Returns] (Boolean) The passed +value+.
251
312
  *
252
- * Should this parser recover from structural errors? It will not stop processing
253
- * file on structural errors if set to true
313
+ * [Example]
314
+ * Because this class is generally not instantiated directly, you would typically set this option
315
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
316
+ *
317
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
318
+ * parser.parse(xml) do |ctx|
319
+ * ctx.recovery = true
320
+ * end
254
321
  */
255
322
  static VALUE
256
- set_recovery(VALUE self, VALUE value)
323
+ noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
257
324
  {
258
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
325
+ int error;
326
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
259
327
 
260
- if (value == Qfalse) {
261
- ctxt->recovery = 0;
328
+ if (RB_TEST(rb_value)) {
329
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
262
330
  } else {
263
- ctxt->recovery = 1;
331
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
332
+ }
333
+
334
+ if (error) {
335
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
264
336
  }
265
337
 
266
- return value;
338
+ return rb_value;
267
339
  }
268
340
 
269
341
  /*
270
342
  * call-seq:
271
- * recovery
343
+ * recovery
344
+ *
345
+ * Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
346
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
347
+ * parser will stop processing the file on the first parsing error.
348
+ *
349
+ * [Returns] (Boolean) Whether this parser will recover from parsing errors.
272
350
  *
273
- * Should this parser recover from structural errors? It will not stop processing
274
- * file on structural errors if set to true
351
+ * Default is +false+ for XML and +true+ for HTML.
275
352
  */
276
353
  static VALUE
277
- get_recovery(VALUE self)
354
+ noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
278
355
  {
279
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
356
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
280
357
 
281
- if (ctxt->recovery == 0) {
282
- return Qfalse;
283
- } else {
358
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
284
359
  return Qtrue;
360
+ } else {
361
+ return Qfalse;
285
362
  }
286
363
  }
287
364
 
@@ -292,17 +369,22 @@ noko_init_xml_sax_parser_context(void)
292
369
 
293
370
  rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
294
371
 
295
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
296
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
297
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
298
-
299
- rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
300
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
301
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
302
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
303
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
304
- rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
305
- rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
372
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
373
+ noko_xml_sax_parser_context_s_native_io, 2);
374
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
375
+ noko_xml_sax_parser_context_s_native_memory, 2);
376
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
377
+ noko_xml_sax_parser_context_s_native_file, 2);
378
+
379
+ rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
380
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
381
+ noko_xml_sax_parser_context__replace_entities_set, 1);
382
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
383
+ noko_xml_sax_parser_context__replace_entities_get, 0);
384
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
385
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
386
+ rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
387
+ rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
306
388
 
307
389
  id_read = rb_intern("read");
308
390
  }