nokogiri 1.16.8 → 1.18.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/LICENSE-DEPENDENCIES.md +6 -6
- data/README.md +8 -5
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +188 -142
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +134 -103
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +219 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +103 -100
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/gumbo-parser/src/ascii.c +2 -2
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +63 -25
- data/gumbo-parser/src/tokenizer.c +6 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +43 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +38 -42
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml/xpath_context.rb +14 -3
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +13 -12
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
|
|
5
5
|
static ID id_read;
|
6
6
|
|
7
7
|
static void
|
8
|
-
|
8
|
+
xml_sax_parser_context_type_free(void *data)
|
9
9
|
{
|
10
10
|
xmlParserCtxtPtr ctxt = data;
|
11
11
|
ctxt->sax = NULL;
|
12
|
-
|
12
|
+
if (ctxt->myDoc) {
|
13
|
+
xmlFreeDoc(ctxt->myDoc);
|
14
|
+
}
|
15
|
+
if (ctxt) {
|
16
|
+
xmlFreeParserCtxt(ctxt);
|
17
|
+
}
|
13
18
|
}
|
14
19
|
|
15
20
|
/*
|
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
|
|
17
22
|
* so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
|
18
23
|
*/
|
19
24
|
static const rb_data_type_t xml_sax_parser_context_type = {
|
20
|
-
.wrap_struct_name = "
|
25
|
+
.wrap_struct_name = "xmlParserCtxt",
|
21
26
|
.function = {
|
22
|
-
.dfree =
|
27
|
+
.dfree = xml_sax_parser_context_type_free,
|
23
28
|
},
|
24
29
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
25
30
|
};
|
@@ -38,104 +43,124 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
|
|
38
43
|
return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
|
39
44
|
}
|
40
45
|
|
46
|
+
void
|
47
|
+
noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
|
48
|
+
{
|
49
|
+
if (!NIL_P(rb_encoding)) {
|
50
|
+
VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
|
51
|
+
|
52
|
+
char *encoding_name = StringValueCStr(rb_encoding_name);
|
53
|
+
if (encoding_name) {
|
54
|
+
libxmlStructuredErrorHandlerState handler_state;
|
55
|
+
VALUE rb_errors = rb_ary_new();
|
41
56
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
57
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
58
|
+
|
59
|
+
int result = xmlSwitchEncodingName(c_context, encoding_name);
|
60
|
+
|
61
|
+
noko__structured_error_func_restore(&handler_state);
|
62
|
+
|
63
|
+
if (result != 0) {
|
64
|
+
xmlFreeParserCtxt(c_context);
|
65
|
+
|
66
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
67
|
+
if (!NIL_P(exception)) {
|
68
|
+
rb_exc_raise(exception);
|
69
|
+
} else {
|
70
|
+
rb_raise(rb_eRuntimeError, "could not set encoding");
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
/* :nodoc: */
|
48
78
|
static VALUE
|
49
|
-
|
79
|
+
noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
|
50
80
|
{
|
51
|
-
|
52
|
-
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
53
|
-
|
54
|
-
if (!rb_respond_to(io, id_read)) {
|
81
|
+
if (!rb_respond_to(rb_io, id_read)) {
|
55
82
|
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
56
83
|
}
|
57
84
|
|
58
|
-
|
59
|
-
|
60
|
-
(xmlInputCloseCallback)noko_io_close,
|
61
|
-
(void *)io, enc);
|
62
|
-
if (!ctxt) {
|
63
|
-
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
85
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
86
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
64
87
|
}
|
65
88
|
|
66
|
-
|
67
|
-
|
68
|
-
|
89
|
+
xmlParserCtxtPtr c_context =
|
90
|
+
xmlCreateIOParserCtxt(NULL, NULL,
|
91
|
+
(xmlInputReadCallback)noko_io_read,
|
92
|
+
(xmlInputCloseCallback)noko_io_close,
|
93
|
+
(void *)rb_io, XML_CHAR_ENCODING_NONE);
|
94
|
+
if (!c_context) {
|
95
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
69
96
|
}
|
70
97
|
|
71
|
-
|
72
|
-
}
|
73
|
-
|
74
|
-
/*
|
75
|
-
* call-seq:
|
76
|
-
* parse_file(filename)
|
77
|
-
*
|
78
|
-
* Parse file given +filename+
|
79
|
-
*/
|
80
|
-
static VALUE
|
81
|
-
parse_file(VALUE klass, VALUE filename)
|
82
|
-
{
|
83
|
-
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
98
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
84
99
|
|
85
|
-
if (
|
86
|
-
xmlFree(
|
87
|
-
|
100
|
+
if (c_context->sax) {
|
101
|
+
xmlFree(c_context->sax);
|
102
|
+
c_context->sax = NULL;
|
88
103
|
}
|
89
104
|
|
90
|
-
|
105
|
+
VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
106
|
+
rb_iv_set(rb_context, "@input", rb_io);
|
107
|
+
|
108
|
+
return rb_context;
|
91
109
|
}
|
92
110
|
|
93
|
-
/*
|
94
|
-
* call-seq:
|
95
|
-
* parse_memory(data)
|
96
|
-
*
|
97
|
-
* Parse the XML stored in memory in +data+
|
98
|
-
*/
|
111
|
+
/* :nodoc: */
|
99
112
|
static VALUE
|
100
|
-
|
113
|
+
noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
|
101
114
|
{
|
102
|
-
|
103
|
-
|
104
|
-
|
115
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
116
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
117
|
+
}
|
105
118
|
|
106
|
-
|
107
|
-
|
119
|
+
xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
|
120
|
+
if (!c_context) {
|
121
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
108
122
|
}
|
109
123
|
|
110
|
-
|
111
|
-
|
112
|
-
if (
|
113
|
-
xmlFree(
|
114
|
-
|
124
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
125
|
+
|
126
|
+
if (c_context->sax) {
|
127
|
+
xmlFree(c_context->sax);
|
128
|
+
c_context->sax = NULL;
|
115
129
|
}
|
116
130
|
|
117
|
-
return noko_xml_sax_parser_context_wrap(
|
131
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
118
132
|
}
|
119
133
|
|
134
|
+
/* :nodoc: */
|
120
135
|
static VALUE
|
121
|
-
|
136
|
+
noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
|
122
137
|
{
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
}
|
138
|
+
Check_Type(rb_input, T_STRING);
|
139
|
+
if (!(int)RSTRING_LEN(rb_input)) {
|
140
|
+
rb_raise(rb_eRuntimeError, "input string cannot be empty");
|
141
|
+
}
|
127
142
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
143
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
144
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
145
|
+
}
|
132
146
|
|
133
|
-
|
134
|
-
|
147
|
+
xmlParserCtxtPtr c_context =
|
148
|
+
xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
|
149
|
+
if (!c_context) {
|
150
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
135
151
|
}
|
136
152
|
|
137
|
-
|
138
|
-
|
153
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
154
|
+
|
155
|
+
if (c_context->sax) {
|
156
|
+
xmlFree(c_context->sax);
|
157
|
+
c_context->sax = NULL;
|
158
|
+
}
|
159
|
+
|
160
|
+
VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
161
|
+
rb_iv_set(rb_context, "@input", rb_input);
|
162
|
+
|
163
|
+
return rb_context;
|
139
164
|
}
|
140
165
|
|
141
166
|
/*
|
@@ -143,80 +168,113 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
143
168
|
* parse_with(sax_handler)
|
144
169
|
*
|
145
170
|
* Use +sax_handler+ and parse the current document
|
171
|
+
*
|
172
|
+
* 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
|
173
|
+
* more convenient for most use cases.
|
146
174
|
*/
|
147
175
|
static VALUE
|
148
|
-
|
176
|
+
noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
|
149
177
|
{
|
150
|
-
xmlParserCtxtPtr
|
178
|
+
xmlParserCtxtPtr c_context;
|
151
179
|
xmlSAXHandlerPtr sax;
|
152
180
|
|
153
|
-
if (!rb_obj_is_kind_of(
|
181
|
+
if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
|
154
182
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
155
183
|
}
|
156
184
|
|
157
|
-
|
158
|
-
sax =
|
185
|
+
c_context = noko_xml_sax_parser_context_unwrap(rb_context);
|
186
|
+
sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
|
159
187
|
|
160
|
-
|
161
|
-
|
188
|
+
c_context->sax = sax;
|
189
|
+
c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
|
190
|
+
c_context->_private = (void *)rb_sax_parser;
|
162
191
|
|
163
192
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
164
193
|
|
165
|
-
|
194
|
+
/* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
|
195
|
+
* don't have any cleanup to do. The only memory we need to free is handled by
|
196
|
+
* xml_sax_parser_context_type_free */
|
197
|
+
xmlParseDocument(c_context);
|
166
198
|
|
167
199
|
return Qnil;
|
168
200
|
}
|
169
201
|
|
170
202
|
/*
|
171
203
|
* call-seq:
|
172
|
-
*
|
204
|
+
* replace_entities=(value)
|
205
|
+
*
|
206
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
207
|
+
*
|
208
|
+
* [Parameters]
|
209
|
+
* - +value+ (Boolean) Whether external parsed entities will be resolved.
|
173
210
|
*
|
174
|
-
*
|
175
|
-
*
|
211
|
+
* ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
|
212
|
+
* defaults to +false+ for this reason.
|
213
|
+
*
|
214
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
215
|
+
* replacement.
|
216
|
+
*
|
217
|
+
* [Example]
|
218
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
219
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
220
|
+
*
|
221
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
222
|
+
* parser.parse(xml) do |ctx|
|
223
|
+
* ctx.replace_entities = true # this is UNSAFE for untrusted documents!
|
224
|
+
* end
|
176
225
|
*/
|
177
226
|
static VALUE
|
178
|
-
|
227
|
+
noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
|
179
228
|
{
|
180
|
-
|
229
|
+
int error;
|
230
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
181
231
|
|
182
|
-
if (
|
183
|
-
|
232
|
+
if (RB_TEST(rb_value)) {
|
233
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
|
184
234
|
} else {
|
185
|
-
|
235
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
|
236
|
+
}
|
237
|
+
|
238
|
+
if (error) {
|
239
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
186
240
|
}
|
187
241
|
|
188
|
-
return
|
242
|
+
return rb_value;
|
189
243
|
}
|
190
244
|
|
191
245
|
/*
|
192
246
|
* call-seq:
|
193
|
-
*
|
247
|
+
* replace_entities
|
248
|
+
*
|
249
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
250
|
+
*
|
251
|
+
* [Returns] (Boolean) Value of the parse option. (Default +false+)
|
194
252
|
*
|
195
|
-
*
|
196
|
-
*
|
253
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
254
|
+
* replacement.
|
197
255
|
*/
|
198
256
|
static VALUE
|
199
|
-
|
257
|
+
noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
|
200
258
|
{
|
201
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
259
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
202
260
|
|
203
|
-
if (
|
204
|
-
return Qfalse;
|
205
|
-
} else {
|
261
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
|
206
262
|
return Qtrue;
|
263
|
+
} else {
|
264
|
+
return Qfalse;
|
207
265
|
}
|
208
266
|
}
|
209
267
|
|
210
268
|
/*
|
211
269
|
* call-seq: line
|
212
270
|
*
|
213
|
-
*
|
271
|
+
* [Returns] (Integer) the line number of the line being currently parsed.
|
214
272
|
*/
|
215
273
|
static VALUE
|
216
|
-
|
274
|
+
noko_xml_sax_parser_context__line(VALUE rb_context)
|
217
275
|
{
|
218
276
|
xmlParserInputPtr io;
|
219
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
277
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
220
278
|
|
221
279
|
io = ctxt->input;
|
222
280
|
if (io) {
|
@@ -229,12 +287,12 @@ line(VALUE self)
|
|
229
287
|
/*
|
230
288
|
* call-seq: column
|
231
289
|
*
|
232
|
-
*
|
290
|
+
* [Returns] (Integer) the column number of the column being currently parsed.
|
233
291
|
*/
|
234
292
|
static VALUE
|
235
|
-
|
293
|
+
noko_xml_sax_parser_context__column(VALUE rb_context)
|
236
294
|
{
|
237
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
295
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
238
296
|
xmlParserInputPtr io;
|
239
297
|
|
240
298
|
io = ctxt->input;
|
@@ -247,41 +305,66 @@ column(VALUE self)
|
|
247
305
|
|
248
306
|
/*
|
249
307
|
* call-seq:
|
250
|
-
*
|
308
|
+
* recovery=(value)
|
309
|
+
*
|
310
|
+
* Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
|
311
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
312
|
+
* parser will stop processing the file on the first parsing error.
|
313
|
+
*
|
314
|
+
* [Parameters]
|
315
|
+
* - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
|
316
|
+
*
|
317
|
+
* [Returns] (Boolean) The passed +value+.
|
251
318
|
*
|
252
|
-
*
|
253
|
-
*
|
319
|
+
* [Example]
|
320
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
321
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
322
|
+
*
|
323
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
324
|
+
* parser.parse(xml) do |ctx|
|
325
|
+
* ctx.recovery = true
|
326
|
+
* end
|
254
327
|
*/
|
255
328
|
static VALUE
|
256
|
-
|
329
|
+
noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
|
257
330
|
{
|
258
|
-
|
331
|
+
int error;
|
332
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
259
333
|
|
260
|
-
if (
|
261
|
-
|
334
|
+
if (RB_TEST(rb_value)) {
|
335
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
|
262
336
|
} else {
|
263
|
-
|
337
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
|
338
|
+
}
|
339
|
+
|
340
|
+
if (error) {
|
341
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
264
342
|
}
|
265
343
|
|
266
|
-
return
|
344
|
+
return rb_value;
|
267
345
|
}
|
268
346
|
|
269
347
|
/*
|
270
348
|
* call-seq:
|
271
|
-
*
|
349
|
+
* recovery
|
350
|
+
*
|
351
|
+
* Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
|
352
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
353
|
+
* parser will stop processing the file on the first parsing error.
|
354
|
+
*
|
355
|
+
* [Returns] (Boolean) Whether this parser will recover from parsing errors.
|
272
356
|
*
|
273
|
-
*
|
274
|
-
* file on structural errors if set to true
|
357
|
+
* Default is +false+ for XML and +true+ for HTML.
|
275
358
|
*/
|
276
359
|
static VALUE
|
277
|
-
|
360
|
+
noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
|
278
361
|
{
|
279
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
362
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
280
363
|
|
281
|
-
if (ctxt
|
282
|
-
return Qfalse;
|
283
|
-
} else {
|
364
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
|
284
365
|
return Qtrue;
|
366
|
+
} else {
|
367
|
+
return Qfalse;
|
285
368
|
}
|
286
369
|
}
|
287
370
|
|
@@ -292,17 +375,22 @@ noko_init_xml_sax_parser_context(void)
|
|
292
375
|
|
293
376
|
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
294
377
|
|
295
|
-
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "
|
296
|
-
|
297
|
-
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
303
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
304
|
-
|
305
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
378
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
|
379
|
+
noko_xml_sax_parser_context_s_native_io, 2);
|
380
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
|
381
|
+
noko_xml_sax_parser_context_s_native_memory, 2);
|
382
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
|
383
|
+
noko_xml_sax_parser_context_s_native_file, 2);
|
384
|
+
|
385
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
|
386
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
|
387
|
+
noko_xml_sax_parser_context__replace_entities_set, 1);
|
388
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
|
389
|
+
noko_xml_sax_parser_context__replace_entities_get, 0);
|
390
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
|
391
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
|
392
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
|
393
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
|
306
394
|
|
307
395
|
id_read = rb_intern("read");
|
308
396
|
}
|