nokogiri 1.15.4 → 1.17.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +12 -19
- data/README.md +8 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +194 -141
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +26 -25
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +25 -33
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +3 -12
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +167 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -10
- data/ext/nokogiri/xml_node.c +142 -108
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +74 -100
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +214 -128
- data/ext/nokogiri/xml_sax_push_parser.c +69 -50
- data/ext/nokogiri/xml_schema.c +51 -87
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +3 -6
- data/ext/nokogiri/xml_xpath_context.c +4 -7
- data/ext/nokogiri/xslt_stylesheet.c +16 -11
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +64 -23
- data/gumbo-parser/src/tokenizer.c +7 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +43 -27
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +45 -24
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +2 -2
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -138
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +74 -31
- data/lib/nokogiri/xml/document_fragment.rb +86 -15
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +113 -35
- data/lib/nokogiri/xml/node_set.rb +12 -10
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +51 -17
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +9 -11
- data/lib/nokogiri/xml/syntax_error.rb +23 -1
- data/lib/nokogiri/xml.rb +14 -25
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +4 -10
- data/lib/nokogiri.rb +1 -1
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +15 -14
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
|
|
5
5
|
static ID id_read;
|
6
6
|
|
7
7
|
static void
|
8
|
-
|
8
|
+
xml_sax_parser_context_type_free(void *data)
|
9
9
|
{
|
10
10
|
xmlParserCtxtPtr ctxt = data;
|
11
11
|
ctxt->sax = NULL;
|
12
|
-
|
12
|
+
if (ctxt->myDoc) {
|
13
|
+
xmlFreeDoc(ctxt->myDoc);
|
14
|
+
}
|
15
|
+
if (ctxt) {
|
16
|
+
xmlFreeParserCtxt(ctxt);
|
17
|
+
}
|
13
18
|
}
|
14
19
|
|
15
20
|
/*
|
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
|
|
17
22
|
* so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
|
18
23
|
*/
|
19
24
|
static const rb_data_type_t xml_sax_parser_context_type = {
|
20
|
-
.wrap_struct_name = "
|
25
|
+
.wrap_struct_name = "xmlParserCtxt",
|
21
26
|
.function = {
|
22
|
-
.dfree =
|
27
|
+
.dfree = xml_sax_parser_context_type_free,
|
23
28
|
},
|
24
29
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
25
30
|
};
|
@@ -38,100 +43,118 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
|
|
38
43
|
return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
|
39
44
|
}
|
40
45
|
|
46
|
+
void
|
47
|
+
noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
|
48
|
+
{
|
49
|
+
if (!NIL_P(rb_encoding)) {
|
50
|
+
VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
|
41
51
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
52
|
+
char *encoding_name = StringValueCStr(rb_encoding_name);
|
53
|
+
if (encoding_name) {
|
54
|
+
libxmlStructuredErrorHandlerState handler_state;
|
55
|
+
VALUE rb_errors = rb_ary_new();
|
56
|
+
|
57
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
58
|
+
|
59
|
+
int result = xmlSwitchEncodingName(c_context, encoding_name);
|
60
|
+
|
61
|
+
noko__structured_error_func_restore(&handler_state);
|
62
|
+
|
63
|
+
if (result != 0) {
|
64
|
+
xmlFreeParserCtxt(c_context);
|
65
|
+
|
66
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
67
|
+
if (!NIL_P(exception)) {
|
68
|
+
rb_exc_raise(exception);
|
69
|
+
} else {
|
70
|
+
rb_raise(rb_eRuntimeError, "could not set encoding");
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
/* :nodoc: */
|
48
78
|
static VALUE
|
49
|
-
|
79
|
+
noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
|
50
80
|
{
|
51
|
-
|
52
|
-
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
53
|
-
|
54
|
-
if (!rb_respond_to(io, id_read)) {
|
81
|
+
if (!rb_respond_to(rb_io, id_read)) {
|
55
82
|
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
56
83
|
}
|
57
84
|
|
58
|
-
|
59
|
-
|
60
|
-
(xmlInputCloseCallback)noko_io_close,
|
61
|
-
(void *)io, enc);
|
62
|
-
if (ctxt->sax) {
|
63
|
-
xmlFree(ctxt->sax);
|
64
|
-
ctxt->sax = NULL;
|
85
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
86
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
65
87
|
}
|
66
88
|
|
67
|
-
|
68
|
-
|
89
|
+
xmlParserCtxtPtr c_context =
|
90
|
+
xmlCreateIOParserCtxt(NULL, NULL,
|
91
|
+
(xmlInputReadCallback)noko_io_read,
|
92
|
+
(xmlInputCloseCallback)noko_io_close,
|
93
|
+
(void *)rb_io, XML_CHAR_ENCODING_NONE);
|
94
|
+
if (!c_context) {
|
95
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
96
|
+
}
|
69
97
|
|
70
|
-
|
71
|
-
* call-seq:
|
72
|
-
* parse_file(filename)
|
73
|
-
*
|
74
|
-
* Parse file given +filename+
|
75
|
-
*/
|
76
|
-
static VALUE
|
77
|
-
parse_file(VALUE klass, VALUE filename)
|
78
|
-
{
|
79
|
-
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
98
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
80
99
|
|
81
|
-
if (
|
82
|
-
xmlFree(
|
83
|
-
|
100
|
+
if (c_context->sax) {
|
101
|
+
xmlFree(c_context->sax);
|
102
|
+
c_context->sax = NULL;
|
84
103
|
}
|
85
104
|
|
86
|
-
return noko_xml_sax_parser_context_wrap(
|
105
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
87
106
|
}
|
88
107
|
|
89
|
-
/*
|
90
|
-
* call-seq:
|
91
|
-
* parse_memory(data)
|
92
|
-
*
|
93
|
-
* Parse the XML stored in memory in +data+
|
94
|
-
*/
|
108
|
+
/* :nodoc: */
|
95
109
|
static VALUE
|
96
|
-
|
110
|
+
noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
|
97
111
|
{
|
98
|
-
|
99
|
-
|
100
|
-
|
112
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
113
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
114
|
+
}
|
101
115
|
|
102
|
-
|
103
|
-
|
116
|
+
xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
|
117
|
+
if (!c_context) {
|
118
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
104
119
|
}
|
105
120
|
|
106
|
-
|
107
|
-
|
108
|
-
if (
|
109
|
-
xmlFree(
|
110
|
-
|
121
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
122
|
+
|
123
|
+
if (c_context->sax) {
|
124
|
+
xmlFree(c_context->sax);
|
125
|
+
c_context->sax = NULL;
|
111
126
|
}
|
112
127
|
|
113
|
-
return noko_xml_sax_parser_context_wrap(
|
128
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
114
129
|
}
|
115
130
|
|
131
|
+
/* :nodoc: */
|
116
132
|
static VALUE
|
117
|
-
|
133
|
+
noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
|
118
134
|
{
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
}
|
135
|
+
Check_Type(rb_input, T_STRING);
|
136
|
+
if (!(int)RSTRING_LEN(rb_input)) {
|
137
|
+
rb_raise(rb_eRuntimeError, "input string cannot be empty");
|
138
|
+
}
|
123
139
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
140
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
141
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
142
|
+
}
|
128
143
|
|
129
|
-
|
130
|
-
|
144
|
+
xmlParserCtxtPtr c_context =
|
145
|
+
xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
|
146
|
+
if (!c_context) {
|
147
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
131
148
|
}
|
132
149
|
|
133
|
-
|
134
|
-
|
150
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
151
|
+
|
152
|
+
if (c_context->sax) {
|
153
|
+
xmlFree(c_context->sax);
|
154
|
+
c_context->sax = NULL;
|
155
|
+
}
|
156
|
+
|
157
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
135
158
|
}
|
136
159
|
|
137
160
|
/*
|
@@ -139,80 +162,113 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
139
162
|
* parse_with(sax_handler)
|
140
163
|
*
|
141
164
|
* Use +sax_handler+ and parse the current document
|
165
|
+
*
|
166
|
+
* 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
|
167
|
+
* more convenient for most use cases.
|
142
168
|
*/
|
143
169
|
static VALUE
|
144
|
-
|
170
|
+
noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
|
145
171
|
{
|
146
|
-
xmlParserCtxtPtr
|
172
|
+
xmlParserCtxtPtr c_context;
|
147
173
|
xmlSAXHandlerPtr sax;
|
148
174
|
|
149
|
-
if (!rb_obj_is_kind_of(
|
175
|
+
if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
|
150
176
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
151
177
|
}
|
152
178
|
|
153
|
-
|
154
|
-
sax =
|
179
|
+
c_context = noko_xml_sax_parser_context_unwrap(rb_context);
|
180
|
+
sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
|
155
181
|
|
156
|
-
|
157
|
-
|
182
|
+
c_context->sax = sax;
|
183
|
+
c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
|
184
|
+
c_context->_private = (void *)rb_sax_parser;
|
158
185
|
|
159
186
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
160
187
|
|
161
|
-
|
188
|
+
/* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
|
189
|
+
* don't have any cleanup to do. The only memory we need to free is handled by
|
190
|
+
* xml_sax_parser_context_type_free */
|
191
|
+
xmlParseDocument(c_context);
|
162
192
|
|
163
193
|
return Qnil;
|
164
194
|
}
|
165
195
|
|
166
196
|
/*
|
167
197
|
* call-seq:
|
168
|
-
*
|
198
|
+
* replace_entities=(value)
|
199
|
+
*
|
200
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
201
|
+
*
|
202
|
+
* [Parameters]
|
203
|
+
* - +value+ (Boolean) Whether external parsed entities will be resolved.
|
204
|
+
*
|
205
|
+
* ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
|
206
|
+
* defaults to +false+ for this reason.
|
207
|
+
*
|
208
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
209
|
+
* replacement.
|
169
210
|
*
|
170
|
-
*
|
171
|
-
* set
|
211
|
+
* [Example]
|
212
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
213
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
214
|
+
*
|
215
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
216
|
+
* parser.parse(xml) do |ctx|
|
217
|
+
* ctx.replace_entities = true # this is UNSAFE for untrusted documents!
|
218
|
+
* end
|
172
219
|
*/
|
173
220
|
static VALUE
|
174
|
-
|
221
|
+
noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
|
175
222
|
{
|
176
|
-
|
223
|
+
int error;
|
224
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
177
225
|
|
178
|
-
if (
|
179
|
-
|
226
|
+
if (RB_TEST(rb_value)) {
|
227
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
|
180
228
|
} else {
|
181
|
-
|
229
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
|
230
|
+
}
|
231
|
+
|
232
|
+
if (error) {
|
233
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
182
234
|
}
|
183
235
|
|
184
|
-
return
|
236
|
+
return rb_value;
|
185
237
|
}
|
186
238
|
|
187
239
|
/*
|
188
240
|
* call-seq:
|
189
|
-
*
|
241
|
+
* replace_entities
|
242
|
+
*
|
243
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
190
244
|
*
|
191
|
-
*
|
192
|
-
*
|
245
|
+
* [Returns] (Boolean) Value of the parse option. (Default +false+)
|
246
|
+
*
|
247
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
248
|
+
* replacement.
|
193
249
|
*/
|
194
250
|
static VALUE
|
195
|
-
|
251
|
+
noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
|
196
252
|
{
|
197
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
253
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
198
254
|
|
199
|
-
if (
|
200
|
-
return Qfalse;
|
201
|
-
} else {
|
255
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
|
202
256
|
return Qtrue;
|
257
|
+
} else {
|
258
|
+
return Qfalse;
|
203
259
|
}
|
204
260
|
}
|
205
261
|
|
206
262
|
/*
|
207
263
|
* call-seq: line
|
208
264
|
*
|
209
|
-
*
|
265
|
+
* [Returns] (Integer) the line number of the line being currently parsed.
|
210
266
|
*/
|
211
267
|
static VALUE
|
212
|
-
|
268
|
+
noko_xml_sax_parser_context__line(VALUE rb_context)
|
213
269
|
{
|
214
270
|
xmlParserInputPtr io;
|
215
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
271
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
216
272
|
|
217
273
|
io = ctxt->input;
|
218
274
|
if (io) {
|
@@ -225,12 +281,12 @@ line(VALUE self)
|
|
225
281
|
/*
|
226
282
|
* call-seq: column
|
227
283
|
*
|
228
|
-
*
|
284
|
+
* [Returns] (Integer) the column number of the column being currently parsed.
|
229
285
|
*/
|
230
286
|
static VALUE
|
231
|
-
|
287
|
+
noko_xml_sax_parser_context__column(VALUE rb_context)
|
232
288
|
{
|
233
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
289
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
234
290
|
xmlParserInputPtr io;
|
235
291
|
|
236
292
|
io = ctxt->input;
|
@@ -243,41 +299,66 @@ column(VALUE self)
|
|
243
299
|
|
244
300
|
/*
|
245
301
|
* call-seq:
|
246
|
-
*
|
302
|
+
* recovery=(value)
|
303
|
+
*
|
304
|
+
* Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
|
305
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
306
|
+
* parser will stop processing the file on the first parsing error.
|
307
|
+
*
|
308
|
+
* [Parameters]
|
309
|
+
* - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
|
310
|
+
*
|
311
|
+
* [Returns] (Boolean) The passed +value+.
|
312
|
+
*
|
313
|
+
* [Example]
|
314
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
315
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
247
316
|
*
|
248
|
-
*
|
249
|
-
*
|
317
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
318
|
+
* parser.parse(xml) do |ctx|
|
319
|
+
* ctx.recovery = true
|
320
|
+
* end
|
250
321
|
*/
|
251
322
|
static VALUE
|
252
|
-
|
323
|
+
noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
|
253
324
|
{
|
254
|
-
|
325
|
+
int error;
|
326
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
255
327
|
|
256
|
-
if (
|
257
|
-
|
328
|
+
if (RB_TEST(rb_value)) {
|
329
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
|
258
330
|
} else {
|
259
|
-
|
331
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
|
260
332
|
}
|
261
333
|
|
262
|
-
|
334
|
+
if (error) {
|
335
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
336
|
+
}
|
337
|
+
|
338
|
+
return rb_value;
|
263
339
|
}
|
264
340
|
|
265
341
|
/*
|
266
342
|
* call-seq:
|
267
|
-
*
|
343
|
+
* recovery
|
344
|
+
*
|
345
|
+
* Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
|
346
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
347
|
+
* parser will stop processing the file on the first parsing error.
|
348
|
+
*
|
349
|
+
* [Returns] (Boolean) Whether this parser will recover from parsing errors.
|
268
350
|
*
|
269
|
-
*
|
270
|
-
* file on structural errors if set to true
|
351
|
+
* Default is +false+ for XML and +true+ for HTML.
|
271
352
|
*/
|
272
353
|
static VALUE
|
273
|
-
|
354
|
+
noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
|
274
355
|
{
|
275
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
356
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
276
357
|
|
277
|
-
if (ctxt
|
278
|
-
return Qfalse;
|
279
|
-
} else {
|
358
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
|
280
359
|
return Qtrue;
|
360
|
+
} else {
|
361
|
+
return Qfalse;
|
281
362
|
}
|
282
363
|
}
|
283
364
|
|
@@ -288,17 +369,22 @@ noko_init_xml_sax_parser_context(void)
|
|
288
369
|
|
289
370
|
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
290
371
|
|
291
|
-
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "
|
292
|
-
|
293
|
-
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
299
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
300
|
-
|
301
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
372
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
|
373
|
+
noko_xml_sax_parser_context_s_native_io, 2);
|
374
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
|
375
|
+
noko_xml_sax_parser_context_s_native_memory, 2);
|
376
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
|
377
|
+
noko_xml_sax_parser_context_s_native_file, 2);
|
378
|
+
|
379
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
|
380
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
|
381
|
+
noko_xml_sax_parser_context__replace_entities_set, 1);
|
382
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
|
383
|
+
noko_xml_sax_parser_context__replace_entities_get, 0);
|
384
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
|
385
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
|
386
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
|
387
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
|
302
388
|
|
303
389
|
id_read = rb_intern("read");
|
304
390
|
}
|