nokogiri 1.16.8-x86_64-darwin → 1.17.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/nokogiri/extconf.rb +191 -137
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  12. data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
  13. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
  14. data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
  15. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
  16. data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
  17. data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
  18. data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
  19. data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
  20. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
  21. data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
  22. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
  23. data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
  24. data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
  25. data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
  26. data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
  27. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
  28. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
  29. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
  30. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
  31. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
  32. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
  33. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
  34. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
  35. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
  36. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
  37. data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
  38. data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
  39. data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
  40. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  41. data/ext/nokogiri/nokogiri.c +9 -2
  42. data/ext/nokogiri/nokogiri.h +18 -33
  43. data/ext/nokogiri/xml_attr.c +1 -1
  44. data/ext/nokogiri/xml_cdata.c +2 -10
  45. data/ext/nokogiri/xml_comment.c +3 -8
  46. data/ext/nokogiri/xml_document.c +163 -156
  47. data/ext/nokogiri/xml_document_fragment.c +10 -25
  48. data/ext/nokogiri/xml_dtd.c +1 -1
  49. data/ext/nokogiri/xml_element_content.c +9 -9
  50. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  51. data/ext/nokogiri/xml_namespace.c +6 -6
  52. data/ext/nokogiri/xml_node.c +130 -104
  53. data/ext/nokogiri/xml_node_set.c +46 -44
  54. data/ext/nokogiri/xml_reader.c +54 -58
  55. data/ext/nokogiri/xml_relax_ng.c +35 -56
  56. data/ext/nokogiri/xml_sax_parser.c +156 -88
  57. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  58. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  59. data/ext/nokogiri/xml_schema.c +50 -85
  60. data/ext/nokogiri/xml_syntax_error.c +19 -11
  61. data/ext/nokogiri/xml_text.c +2 -4
  62. data/ext/nokogiri/xml_xpath_context.c +2 -2
  63. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  64. data/lib/nokogiri/3.0/nokogiri.bundle +0 -0
  65. data/lib/nokogiri/3.1/nokogiri.bundle +0 -0
  66. data/lib/nokogiri/3.2/nokogiri.bundle +0 -0
  67. data/lib/nokogiri/3.3/nokogiri.bundle +0 -0
  68. data/lib/nokogiri/class_resolver.rb +1 -1
  69. data/lib/nokogiri/css/node.rb +6 -2
  70. data/lib/nokogiri/css/parser.rb +6 -4
  71. data/lib/nokogiri/css/parser.y +2 -2
  72. data/lib/nokogiri/css/parser_extras.rb +6 -66
  73. data/lib/nokogiri/css/selector_cache.rb +38 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -4
  75. data/lib/nokogiri/css/tokenizer.rex +9 -8
  76. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  77. data/lib/nokogiri/css.rb +86 -20
  78. data/lib/nokogiri/decorators/slop.rb +3 -5
  79. data/lib/nokogiri/encoding_handler.rb +2 -2
  80. data/lib/nokogiri/html4/document.rb +44 -23
  81. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  82. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  83. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  84. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  85. data/lib/nokogiri/html4.rb +9 -14
  86. data/lib/nokogiri/html5/builder.rb +40 -0
  87. data/lib/nokogiri/html5/document.rb +61 -30
  88. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  89. data/lib/nokogiri/html5/node.rb +4 -4
  90. data/lib/nokogiri/html5.rb +114 -72
  91. data/lib/nokogiri/version/constant.rb +1 -1
  92. data/lib/nokogiri/xml/builder.rb +8 -1
  93. data/lib/nokogiri/xml/document.rb +70 -26
  94. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  95. data/lib/nokogiri/xml/node.rb +82 -11
  96. data/lib/nokogiri/xml/node_set.rb +9 -7
  97. data/lib/nokogiri/xml/parse_options.rb +1 -1
  98. data/lib/nokogiri/xml/pp/node.rb +6 -1
  99. data/lib/nokogiri/xml/reader.rb +46 -13
  100. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  101. data/lib/nokogiri/xml/sax/document.rb +174 -83
  102. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  103. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  104. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  105. data/lib/nokogiri/xml/sax.rb +48 -0
  106. data/lib/nokogiri/xml/schema.rb +112 -45
  107. data/lib/nokogiri/xml/searchable.rb +6 -8
  108. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  109. data/lib/nokogiri/xml.rb +13 -24
  110. data/lib/nokogiri/xslt.rb +3 -9
  111. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  112. metadata +8 -4
  113. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
5
5
  static ID id_read;
6
6
 
7
7
  static void
8
- xml_sax_parser_context_free(void *data)
8
+ xml_sax_parser_context_type_free(void *data)
9
9
  {
10
10
  xmlParserCtxtPtr ctxt = data;
11
11
  ctxt->sax = NULL;
12
- xmlFreeParserCtxt(ctxt);
12
+ if (ctxt->myDoc) {
13
+ xmlFreeDoc(ctxt->myDoc);
14
+ }
15
+ if (ctxt) {
16
+ xmlFreeParserCtxt(ctxt);
17
+ }
13
18
  }
14
19
 
15
20
  /*
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
17
22
  * so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
18
23
  */
19
24
  static const rb_data_type_t xml_sax_parser_context_type = {
20
- .wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
25
+ .wrap_struct_name = "xmlParserCtxt",
21
26
  .function = {
22
- .dfree = xml_sax_parser_context_free,
27
+ .dfree = xml_sax_parser_context_type_free,
23
28
  },
24
29
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
25
30
  };
@@ -38,104 +43,118 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
38
43
  return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
39
44
  }
40
45
 
46
+ void
47
+ noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
48
+ {
49
+ if (!NIL_P(rb_encoding)) {
50
+ VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
51
+
52
+ char *encoding_name = StringValueCStr(rb_encoding_name);
53
+ if (encoding_name) {
54
+ libxmlStructuredErrorHandlerState handler_state;
55
+ VALUE rb_errors = rb_ary_new();
41
56
 
42
- /*
43
- * call-seq:
44
- * parse_io(io, encoding)
45
- *
46
- * Parse +io+ object with +encoding+
47
- */
57
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
58
+
59
+ int result = xmlSwitchEncodingName(c_context, encoding_name);
60
+
61
+ noko__structured_error_func_restore(&handler_state);
62
+
63
+ if (result != 0) {
64
+ xmlFreeParserCtxt(c_context);
65
+
66
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
67
+ if (!NIL_P(exception)) {
68
+ rb_exc_raise(exception);
69
+ } else {
70
+ rb_raise(rb_eRuntimeError, "could not set encoding");
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+
77
+ /* :nodoc: */
48
78
  static VALUE
49
- parse_io(VALUE klass, VALUE io, VALUE encoding)
79
+ noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
50
80
  {
51
- xmlParserCtxtPtr ctxt;
52
- xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
53
-
54
- if (!rb_respond_to(io, id_read)) {
81
+ if (!rb_respond_to(rb_io, id_read)) {
55
82
  rb_raise(rb_eTypeError, "argument expected to respond to :read");
56
83
  }
57
84
 
58
- ctxt = xmlCreateIOParserCtxt(NULL, NULL,
59
- (xmlInputReadCallback)noko_io_read,
60
- (xmlInputCloseCallback)noko_io_close,
61
- (void *)io, enc);
62
- if (!ctxt) {
63
- rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
85
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
86
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
64
87
  }
65
88
 
66
- if (ctxt->sax) {
67
- xmlFree(ctxt->sax);
68
- ctxt->sax = NULL;
89
+ xmlParserCtxtPtr c_context =
90
+ xmlCreateIOParserCtxt(NULL, NULL,
91
+ (xmlInputReadCallback)noko_io_read,
92
+ (xmlInputCloseCallback)noko_io_close,
93
+ (void *)rb_io, XML_CHAR_ENCODING_NONE);
94
+ if (!c_context) {
95
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
69
96
  }
70
97
 
71
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
72
- }
73
-
74
- /*
75
- * call-seq:
76
- * parse_file(filename)
77
- *
78
- * Parse file given +filename+
79
- */
80
- static VALUE
81
- parse_file(VALUE klass, VALUE filename)
82
- {
83
- xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
98
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
84
99
 
85
- if (ctxt->sax) {
86
- xmlFree(ctxt->sax);
87
- ctxt->sax = NULL;
100
+ if (c_context->sax) {
101
+ xmlFree(c_context->sax);
102
+ c_context->sax = NULL;
88
103
  }
89
104
 
90
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
105
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
91
106
  }
92
107
 
93
- /*
94
- * call-seq:
95
- * parse_memory(data)
96
- *
97
- * Parse the XML stored in memory in +data+
98
- */
108
+ /* :nodoc: */
99
109
  static VALUE
100
- parse_memory(VALUE klass, VALUE data)
110
+ noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
101
111
  {
102
- xmlParserCtxtPtr ctxt;
103
-
104
- Check_Type(data, T_STRING);
112
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
113
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
114
+ }
105
115
 
106
- if (!(int)RSTRING_LEN(data)) {
107
- rb_raise(rb_eRuntimeError, "data cannot be empty");
116
+ xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
117
+ if (!c_context) {
118
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
108
119
  }
109
120
 
110
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
111
- (int)RSTRING_LEN(data));
112
- if (ctxt->sax) {
113
- xmlFree(ctxt->sax);
114
- ctxt->sax = NULL;
121
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
122
+
123
+ if (c_context->sax) {
124
+ xmlFree(c_context->sax);
125
+ c_context->sax = NULL;
115
126
  }
116
127
 
117
- return noko_xml_sax_parser_context_wrap(klass, ctxt);
128
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
118
129
  }
119
130
 
131
+ /* :nodoc: */
120
132
  static VALUE
121
- parse_doc(VALUE ctxt_val)
133
+ noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
122
134
  {
123
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
124
- xmlParseDocument(ctxt);
125
- return Qnil;
126
- }
135
+ Check_Type(rb_input, T_STRING);
136
+ if (!(int)RSTRING_LEN(rb_input)) {
137
+ rb_raise(rb_eRuntimeError, "input string cannot be empty");
138
+ }
127
139
 
128
- static VALUE
129
- parse_doc_finalize(VALUE ctxt_val)
130
- {
131
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
140
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
141
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
142
+ }
132
143
 
133
- if (NULL != ctxt->myDoc) {
134
- xmlFreeDoc(ctxt->myDoc);
144
+ xmlParserCtxtPtr c_context =
145
+ xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
146
+ if (!c_context) {
147
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
135
148
  }
136
149
 
137
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
138
- return Qnil;
150
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
151
+
152
+ if (c_context->sax) {
153
+ xmlFree(c_context->sax);
154
+ c_context->sax = NULL;
155
+ }
156
+
157
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
139
158
  }
140
159
 
141
160
  /*
@@ -143,80 +162,113 @@ parse_doc_finalize(VALUE ctxt_val)
143
162
  * parse_with(sax_handler)
144
163
  *
145
164
  * Use +sax_handler+ and parse the current document
165
+ *
166
+ * 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
167
+ * more convenient for most use cases.
146
168
  */
147
169
  static VALUE
148
- parse_with(VALUE self, VALUE sax_handler)
170
+ noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
149
171
  {
150
- xmlParserCtxtPtr ctxt;
172
+ xmlParserCtxtPtr c_context;
151
173
  xmlSAXHandlerPtr sax;
152
174
 
153
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
175
+ if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
154
176
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
155
177
  }
156
178
 
157
- ctxt = noko_xml_sax_parser_context_unwrap(self);
158
- sax = noko_sax_handler_unwrap(sax_handler);
179
+ c_context = noko_xml_sax_parser_context_unwrap(rb_context);
180
+ sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
159
181
 
160
- ctxt->sax = sax;
161
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
182
+ c_context->sax = sax;
183
+ c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
184
+ c_context->_private = (void *)rb_sax_parser;
162
185
 
163
186
  xmlSetStructuredErrorFunc(NULL, NULL);
164
187
 
165
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
188
+ /* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
189
+ * don't have any cleanup to do. The only memory we need to free is handled by
190
+ * xml_sax_parser_context_type_free */
191
+ xmlParseDocument(c_context);
166
192
 
167
193
  return Qnil;
168
194
  }
169
195
 
170
196
  /*
171
197
  * call-seq:
172
- * replace_entities=(boolean)
198
+ * replace_entities=(value)
199
+ *
200
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
201
+ *
202
+ * [Parameters]
203
+ * - +value+ (Boolean) Whether external parsed entities will be resolved.
204
+ *
205
+ * ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
206
+ * defaults to +false+ for this reason.
173
207
  *
174
- * Should this parser replace entities? &amp; will get converted to '&' if
175
- * set to true
208
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
209
+ * replacement.
210
+ *
211
+ * [Example]
212
+ * Because this class is generally not instantiated directly, you would typically set this option
213
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
214
+ *
215
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
216
+ * parser.parse(xml) do |ctx|
217
+ * ctx.replace_entities = true # this is UNSAFE for untrusted documents!
218
+ * end
176
219
  */
177
220
  static VALUE
178
- set_replace_entities(VALUE self, VALUE value)
221
+ noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
179
222
  {
180
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
223
+ int error;
224
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
181
225
 
182
- if (Qfalse == value) {
183
- ctxt->replaceEntities = 0;
226
+ if (RB_TEST(rb_value)) {
227
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
184
228
  } else {
185
- ctxt->replaceEntities = 1;
229
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
230
+ }
231
+
232
+ if (error) {
233
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
186
234
  }
187
235
 
188
- return value;
236
+ return rb_value;
189
237
  }
190
238
 
191
239
  /*
192
240
  * call-seq:
193
- * replace_entities
241
+ * replace_entities
194
242
  *
195
- * Should this parser replace entities? &amp; will get converted to '&' if
196
- * set to true
243
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
244
+ *
245
+ * [Returns] (Boolean) Value of the parse option. (Default +false+)
246
+ *
247
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
248
+ * replacement.
197
249
  */
198
250
  static VALUE
199
- get_replace_entities(VALUE self)
251
+ noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
200
252
  {
201
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
253
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
202
254
 
203
- if (0 == ctxt->replaceEntities) {
204
- return Qfalse;
205
- } else {
255
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
206
256
  return Qtrue;
257
+ } else {
258
+ return Qfalse;
207
259
  }
208
260
  }
209
261
 
210
262
  /*
211
263
  * call-seq: line
212
264
  *
213
- * Get the current line the parser context is processing.
265
+ * [Returns] (Integer) the line number of the line being currently parsed.
214
266
  */
215
267
  static VALUE
216
- line(VALUE self)
268
+ noko_xml_sax_parser_context__line(VALUE rb_context)
217
269
  {
218
270
  xmlParserInputPtr io;
219
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
271
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
220
272
 
221
273
  io = ctxt->input;
222
274
  if (io) {
@@ -229,12 +281,12 @@ line(VALUE self)
229
281
  /*
230
282
  * call-seq: column
231
283
  *
232
- * Get the current column the parser context is processing.
284
+ * [Returns] (Integer) the column number of the column being currently parsed.
233
285
  */
234
286
  static VALUE
235
- column(VALUE self)
287
+ noko_xml_sax_parser_context__column(VALUE rb_context)
236
288
  {
237
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
289
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
238
290
  xmlParserInputPtr io;
239
291
 
240
292
  io = ctxt->input;
@@ -247,41 +299,66 @@ column(VALUE self)
247
299
 
248
300
  /*
249
301
  * call-seq:
250
- * recovery=(boolean)
302
+ * recovery=(value)
303
+ *
304
+ * Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
305
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
306
+ * parser will stop processing the file on the first parsing error.
307
+ *
308
+ * [Parameters]
309
+ * - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
310
+ *
311
+ * [Returns] (Boolean) The passed +value+.
251
312
  *
252
- * Should this parser recover from structural errors? It will not stop processing
253
- * file on structural errors if set to true
313
+ * [Example]
314
+ * Because this class is generally not instantiated directly, you would typically set this option
315
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
316
+ *
317
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
318
+ * parser.parse(xml) do |ctx|
319
+ * ctx.recovery = true
320
+ * end
254
321
  */
255
322
  static VALUE
256
- set_recovery(VALUE self, VALUE value)
323
+ noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
257
324
  {
258
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
325
+ int error;
326
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
259
327
 
260
- if (value == Qfalse) {
261
- ctxt->recovery = 0;
328
+ if (RB_TEST(rb_value)) {
329
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
262
330
  } else {
263
- ctxt->recovery = 1;
331
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
332
+ }
333
+
334
+ if (error) {
335
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
264
336
  }
265
337
 
266
- return value;
338
+ return rb_value;
267
339
  }
268
340
 
269
341
  /*
270
342
  * call-seq:
271
- * recovery
343
+ * recovery
344
+ *
345
+ * Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
346
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
347
+ * parser will stop processing the file on the first parsing error.
348
+ *
349
+ * [Returns] (Boolean) Whether this parser will recover from parsing errors.
272
350
  *
273
- * Should this parser recover from structural errors? It will not stop processing
274
- * file on structural errors if set to true
351
+ * Default is +false+ for XML and +true+ for HTML.
275
352
  */
276
353
  static VALUE
277
- get_recovery(VALUE self)
354
+ noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
278
355
  {
279
- xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
356
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
280
357
 
281
- if (ctxt->recovery == 0) {
282
- return Qfalse;
283
- } else {
358
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
284
359
  return Qtrue;
360
+ } else {
361
+ return Qfalse;
285
362
  }
286
363
  }
287
364
 
@@ -292,17 +369,22 @@ noko_init_xml_sax_parser_context(void)
292
369
 
293
370
  rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
294
371
 
295
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
296
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
297
- rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
298
-
299
- rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
300
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
301
- rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
302
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
303
- rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
304
- rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
305
- rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
372
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
373
+ noko_xml_sax_parser_context_s_native_io, 2);
374
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
375
+ noko_xml_sax_parser_context_s_native_memory, 2);
376
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
377
+ noko_xml_sax_parser_context_s_native_file, 2);
378
+
379
+ rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
380
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
381
+ noko_xml_sax_parser_context__replace_entities_set, 1);
382
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
383
+ noko_xml_sax_parser_context__replace_entities_get, 0);
384
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
385
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
386
+ rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
387
+ rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
306
388
 
307
389
  id_read = rb_intern("read");
308
390
  }