nokogiri 1.16.0 → 1.18.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +14 -16
- data/LICENSE-DEPENDENCIES.md +6 -6
- data/README.md +8 -5
- data/dependencies.yml +9 -9
- data/ext/nokogiri/extconf.rb +188 -142
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +167 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +141 -104
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +74 -57
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +219 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +103 -100
- data/ext/nokogiri/xslt_stylesheet.c +16 -11
- data/gumbo-parser/Makefile +3 -0
- data/gumbo-parser/src/ascii.c +2 -2
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +66 -25
- data/gumbo-parser/src/tokenizer.c +6 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +51 -17
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +38 -42
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml/xpath_context.rb +14 -3
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +13 -14
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
|
|
5
5
|
static ID id_read;
|
6
6
|
|
7
7
|
static void
|
8
|
-
|
8
|
+
xml_sax_parser_context_type_free(void *data)
|
9
9
|
{
|
10
10
|
xmlParserCtxtPtr ctxt = data;
|
11
11
|
ctxt->sax = NULL;
|
12
|
-
|
12
|
+
if (ctxt->myDoc) {
|
13
|
+
xmlFreeDoc(ctxt->myDoc);
|
14
|
+
}
|
15
|
+
if (ctxt) {
|
16
|
+
xmlFreeParserCtxt(ctxt);
|
17
|
+
}
|
13
18
|
}
|
14
19
|
|
15
20
|
/*
|
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
|
|
17
22
|
* so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
|
18
23
|
*/
|
19
24
|
static const rb_data_type_t xml_sax_parser_context_type = {
|
20
|
-
.wrap_struct_name = "
|
25
|
+
.wrap_struct_name = "xmlParserCtxt",
|
21
26
|
.function = {
|
22
|
-
.dfree =
|
27
|
+
.dfree = xml_sax_parser_context_type_free,
|
23
28
|
},
|
24
29
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
25
30
|
};
|
@@ -38,104 +43,124 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
|
|
38
43
|
return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
|
39
44
|
}
|
40
45
|
|
46
|
+
void
|
47
|
+
noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
|
48
|
+
{
|
49
|
+
if (!NIL_P(rb_encoding)) {
|
50
|
+
VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
|
51
|
+
|
52
|
+
char *encoding_name = StringValueCStr(rb_encoding_name);
|
53
|
+
if (encoding_name) {
|
54
|
+
libxmlStructuredErrorHandlerState handler_state;
|
55
|
+
VALUE rb_errors = rb_ary_new();
|
41
56
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
57
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
58
|
+
|
59
|
+
int result = xmlSwitchEncodingName(c_context, encoding_name);
|
60
|
+
|
61
|
+
noko__structured_error_func_restore(&handler_state);
|
62
|
+
|
63
|
+
if (result != 0) {
|
64
|
+
xmlFreeParserCtxt(c_context);
|
65
|
+
|
66
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
67
|
+
if (!NIL_P(exception)) {
|
68
|
+
rb_exc_raise(exception);
|
69
|
+
} else {
|
70
|
+
rb_raise(rb_eRuntimeError, "could not set encoding");
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
/* :nodoc: */
|
48
78
|
static VALUE
|
49
|
-
|
79
|
+
noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
|
50
80
|
{
|
51
|
-
|
52
|
-
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
53
|
-
|
54
|
-
if (!rb_respond_to(io, id_read)) {
|
81
|
+
if (!rb_respond_to(rb_io, id_read)) {
|
55
82
|
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
56
83
|
}
|
57
84
|
|
58
|
-
|
59
|
-
|
60
|
-
(xmlInputCloseCallback)noko_io_close,
|
61
|
-
(void *)io, enc);
|
62
|
-
if (!ctxt) {
|
63
|
-
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
85
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
86
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
64
87
|
}
|
65
88
|
|
66
|
-
|
67
|
-
|
68
|
-
|
89
|
+
xmlParserCtxtPtr c_context =
|
90
|
+
xmlCreateIOParserCtxt(NULL, NULL,
|
91
|
+
(xmlInputReadCallback)noko_io_read,
|
92
|
+
(xmlInputCloseCallback)noko_io_close,
|
93
|
+
(void *)rb_io, XML_CHAR_ENCODING_NONE);
|
94
|
+
if (!c_context) {
|
95
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
69
96
|
}
|
70
97
|
|
71
|
-
|
72
|
-
}
|
73
|
-
|
74
|
-
/*
|
75
|
-
* call-seq:
|
76
|
-
* parse_file(filename)
|
77
|
-
*
|
78
|
-
* Parse file given +filename+
|
79
|
-
*/
|
80
|
-
static VALUE
|
81
|
-
parse_file(VALUE klass, VALUE filename)
|
82
|
-
{
|
83
|
-
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
98
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
84
99
|
|
85
|
-
if (
|
86
|
-
xmlFree(
|
87
|
-
|
100
|
+
if (c_context->sax) {
|
101
|
+
xmlFree(c_context->sax);
|
102
|
+
c_context->sax = NULL;
|
88
103
|
}
|
89
104
|
|
90
|
-
|
105
|
+
VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
106
|
+
rb_iv_set(rb_context, "@input", rb_io);
|
107
|
+
|
108
|
+
return rb_context;
|
91
109
|
}
|
92
110
|
|
93
|
-
/*
|
94
|
-
* call-seq:
|
95
|
-
* parse_memory(data)
|
96
|
-
*
|
97
|
-
* Parse the XML stored in memory in +data+
|
98
|
-
*/
|
111
|
+
/* :nodoc: */
|
99
112
|
static VALUE
|
100
|
-
|
113
|
+
noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
|
101
114
|
{
|
102
|
-
|
103
|
-
|
104
|
-
|
115
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
116
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
117
|
+
}
|
105
118
|
|
106
|
-
|
107
|
-
|
119
|
+
xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
|
120
|
+
if (!c_context) {
|
121
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
108
122
|
}
|
109
123
|
|
110
|
-
|
111
|
-
|
112
|
-
if (
|
113
|
-
xmlFree(
|
114
|
-
|
124
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
125
|
+
|
126
|
+
if (c_context->sax) {
|
127
|
+
xmlFree(c_context->sax);
|
128
|
+
c_context->sax = NULL;
|
115
129
|
}
|
116
130
|
|
117
|
-
return noko_xml_sax_parser_context_wrap(
|
131
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
118
132
|
}
|
119
133
|
|
134
|
+
/* :nodoc: */
|
120
135
|
static VALUE
|
121
|
-
|
136
|
+
noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
|
122
137
|
{
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
}
|
138
|
+
Check_Type(rb_input, T_STRING);
|
139
|
+
if (!(int)RSTRING_LEN(rb_input)) {
|
140
|
+
rb_raise(rb_eRuntimeError, "input string cannot be empty");
|
141
|
+
}
|
127
142
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
143
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
144
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
145
|
+
}
|
132
146
|
|
133
|
-
|
134
|
-
|
147
|
+
xmlParserCtxtPtr c_context =
|
148
|
+
xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
|
149
|
+
if (!c_context) {
|
150
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
135
151
|
}
|
136
152
|
|
137
|
-
|
138
|
-
|
153
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
154
|
+
|
155
|
+
if (c_context->sax) {
|
156
|
+
xmlFree(c_context->sax);
|
157
|
+
c_context->sax = NULL;
|
158
|
+
}
|
159
|
+
|
160
|
+
VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
161
|
+
rb_iv_set(rb_context, "@input", rb_input);
|
162
|
+
|
163
|
+
return rb_context;
|
139
164
|
}
|
140
165
|
|
141
166
|
/*
|
@@ -143,80 +168,113 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
143
168
|
* parse_with(sax_handler)
|
144
169
|
*
|
145
170
|
* Use +sax_handler+ and parse the current document
|
171
|
+
*
|
172
|
+
* 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
|
173
|
+
* more convenient for most use cases.
|
146
174
|
*/
|
147
175
|
static VALUE
|
148
|
-
|
176
|
+
noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
|
149
177
|
{
|
150
|
-
xmlParserCtxtPtr
|
178
|
+
xmlParserCtxtPtr c_context;
|
151
179
|
xmlSAXHandlerPtr sax;
|
152
180
|
|
153
|
-
if (!rb_obj_is_kind_of(
|
181
|
+
if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
|
154
182
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
155
183
|
}
|
156
184
|
|
157
|
-
|
158
|
-
sax =
|
185
|
+
c_context = noko_xml_sax_parser_context_unwrap(rb_context);
|
186
|
+
sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
|
159
187
|
|
160
|
-
|
161
|
-
|
188
|
+
c_context->sax = sax;
|
189
|
+
c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
|
190
|
+
c_context->_private = (void *)rb_sax_parser;
|
162
191
|
|
163
192
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
164
193
|
|
165
|
-
|
194
|
+
/* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
|
195
|
+
* don't have any cleanup to do. The only memory we need to free is handled by
|
196
|
+
* xml_sax_parser_context_type_free */
|
197
|
+
xmlParseDocument(c_context);
|
166
198
|
|
167
199
|
return Qnil;
|
168
200
|
}
|
169
201
|
|
170
202
|
/*
|
171
203
|
* call-seq:
|
172
|
-
*
|
204
|
+
* replace_entities=(value)
|
205
|
+
*
|
206
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
207
|
+
*
|
208
|
+
* [Parameters]
|
209
|
+
* - +value+ (Boolean) Whether external parsed entities will be resolved.
|
173
210
|
*
|
174
|
-
*
|
175
|
-
*
|
211
|
+
* ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
|
212
|
+
* defaults to +false+ for this reason.
|
213
|
+
*
|
214
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
215
|
+
* replacement.
|
216
|
+
*
|
217
|
+
* [Example]
|
218
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
219
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
220
|
+
*
|
221
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
222
|
+
* parser.parse(xml) do |ctx|
|
223
|
+
* ctx.replace_entities = true # this is UNSAFE for untrusted documents!
|
224
|
+
* end
|
176
225
|
*/
|
177
226
|
static VALUE
|
178
|
-
|
227
|
+
noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
|
179
228
|
{
|
180
|
-
|
229
|
+
int error;
|
230
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
181
231
|
|
182
|
-
if (
|
183
|
-
|
232
|
+
if (RB_TEST(rb_value)) {
|
233
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
|
184
234
|
} else {
|
185
|
-
|
235
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
|
236
|
+
}
|
237
|
+
|
238
|
+
if (error) {
|
239
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
186
240
|
}
|
187
241
|
|
188
|
-
return
|
242
|
+
return rb_value;
|
189
243
|
}
|
190
244
|
|
191
245
|
/*
|
192
246
|
* call-seq:
|
193
|
-
*
|
247
|
+
* replace_entities
|
248
|
+
*
|
249
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
250
|
+
*
|
251
|
+
* [Returns] (Boolean) Value of the parse option. (Default +false+)
|
194
252
|
*
|
195
|
-
*
|
196
|
-
*
|
253
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
254
|
+
* replacement.
|
197
255
|
*/
|
198
256
|
static VALUE
|
199
|
-
|
257
|
+
noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
|
200
258
|
{
|
201
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
259
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
202
260
|
|
203
|
-
if (
|
204
|
-
return Qfalse;
|
205
|
-
} else {
|
261
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
|
206
262
|
return Qtrue;
|
263
|
+
} else {
|
264
|
+
return Qfalse;
|
207
265
|
}
|
208
266
|
}
|
209
267
|
|
210
268
|
/*
|
211
269
|
* call-seq: line
|
212
270
|
*
|
213
|
-
*
|
271
|
+
* [Returns] (Integer) the line number of the line being currently parsed.
|
214
272
|
*/
|
215
273
|
static VALUE
|
216
|
-
|
274
|
+
noko_xml_sax_parser_context__line(VALUE rb_context)
|
217
275
|
{
|
218
276
|
xmlParserInputPtr io;
|
219
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
277
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
220
278
|
|
221
279
|
io = ctxt->input;
|
222
280
|
if (io) {
|
@@ -229,12 +287,12 @@ line(VALUE self)
|
|
229
287
|
/*
|
230
288
|
* call-seq: column
|
231
289
|
*
|
232
|
-
*
|
290
|
+
* [Returns] (Integer) the column number of the column being currently parsed.
|
233
291
|
*/
|
234
292
|
static VALUE
|
235
|
-
|
293
|
+
noko_xml_sax_parser_context__column(VALUE rb_context)
|
236
294
|
{
|
237
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
295
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
238
296
|
xmlParserInputPtr io;
|
239
297
|
|
240
298
|
io = ctxt->input;
|
@@ -247,41 +305,66 @@ column(VALUE self)
|
|
247
305
|
|
248
306
|
/*
|
249
307
|
* call-seq:
|
250
|
-
*
|
308
|
+
* recovery=(value)
|
309
|
+
*
|
310
|
+
* Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
|
311
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
312
|
+
* parser will stop processing the file on the first parsing error.
|
313
|
+
*
|
314
|
+
* [Parameters]
|
315
|
+
* - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
|
316
|
+
*
|
317
|
+
* [Returns] (Boolean) The passed +value+.
|
251
318
|
*
|
252
|
-
*
|
253
|
-
*
|
319
|
+
* [Example]
|
320
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
321
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
322
|
+
*
|
323
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
324
|
+
* parser.parse(xml) do |ctx|
|
325
|
+
* ctx.recovery = true
|
326
|
+
* end
|
254
327
|
*/
|
255
328
|
static VALUE
|
256
|
-
|
329
|
+
noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
|
257
330
|
{
|
258
|
-
|
331
|
+
int error;
|
332
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
259
333
|
|
260
|
-
if (
|
261
|
-
|
334
|
+
if (RB_TEST(rb_value)) {
|
335
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
|
262
336
|
} else {
|
263
|
-
|
337
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
|
338
|
+
}
|
339
|
+
|
340
|
+
if (error) {
|
341
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
264
342
|
}
|
265
343
|
|
266
|
-
return
|
344
|
+
return rb_value;
|
267
345
|
}
|
268
346
|
|
269
347
|
/*
|
270
348
|
* call-seq:
|
271
|
-
*
|
349
|
+
* recovery
|
350
|
+
*
|
351
|
+
* Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
|
352
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
353
|
+
* parser will stop processing the file on the first parsing error.
|
354
|
+
*
|
355
|
+
* [Returns] (Boolean) Whether this parser will recover from parsing errors.
|
272
356
|
*
|
273
|
-
*
|
274
|
-
* file on structural errors if set to true
|
357
|
+
* Default is +false+ for XML and +true+ for HTML.
|
275
358
|
*/
|
276
359
|
static VALUE
|
277
|
-
|
360
|
+
noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
|
278
361
|
{
|
279
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
362
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
280
363
|
|
281
|
-
if (ctxt
|
282
|
-
return Qfalse;
|
283
|
-
} else {
|
364
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
|
284
365
|
return Qtrue;
|
366
|
+
} else {
|
367
|
+
return Qfalse;
|
285
368
|
}
|
286
369
|
}
|
287
370
|
|
@@ -292,17 +375,22 @@ noko_init_xml_sax_parser_context(void)
|
|
292
375
|
|
293
376
|
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
294
377
|
|
295
|
-
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "
|
296
|
-
|
297
|
-
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
303
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
304
|
-
|
305
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
378
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
|
379
|
+
noko_xml_sax_parser_context_s_native_io, 2);
|
380
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
|
381
|
+
noko_xml_sax_parser_context_s_native_memory, 2);
|
382
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
|
383
|
+
noko_xml_sax_parser_context_s_native_file, 2);
|
384
|
+
|
385
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
|
386
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
|
387
|
+
noko_xml_sax_parser_context__replace_entities_set, 1);
|
388
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
|
389
|
+
noko_xml_sax_parser_context__replace_entities_get, 0);
|
390
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
|
391
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
|
392
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
|
393
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
|
306
394
|
|
307
395
|
id_read = rb_intern("read");
|
308
396
|
}
|