nokogiri 1.15.3 → 1.18.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +12 -17
- data/LICENSE-DEPENDENCIES.md +6 -6
- data/README.md +11 -5
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +191 -154
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +26 -25
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +25 -33
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +3 -12
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +173 -158
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -10
- data/ext/nokogiri/xml_node.c +142 -108
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +74 -100
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +220 -128
- data/ext/nokogiri/xml_sax_push_parser.c +69 -50
- data/ext/nokogiri/xml_schema.c +51 -87
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +3 -6
- data/ext/nokogiri/xml_xpath_context.c +104 -104
- data/ext/nokogiri/xslt_stylesheet.c +16 -11
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/ascii.c +2 -2
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +66 -25
- data/gumbo-parser/src/tokenizer.c +7 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +44 -27
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +45 -24
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +2 -2
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -138
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +74 -31
- data/lib/nokogiri/xml/document_fragment.rb +86 -15
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +113 -35
- data/lib/nokogiri/xml/node_set.rb +12 -10
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +51 -17
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +39 -43
- data/lib/nokogiri/xml/syntax_error.rb +23 -1
- data/lib/nokogiri/xml/xpath_context.rb +14 -3
- data/lib/nokogiri/xml.rb +14 -25
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +4 -10
- data/lib/nokogiri.rb +1 -1
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/ports/archives/libxml2-2.13.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.43.tar.xz +0 -0
- metadata +13 -14
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
|
|
5
5
|
static ID id_read;
|
6
6
|
|
7
7
|
static void
|
8
|
-
|
8
|
+
xml_sax_parser_context_type_free(void *data)
|
9
9
|
{
|
10
10
|
xmlParserCtxtPtr ctxt = data;
|
11
11
|
ctxt->sax = NULL;
|
12
|
-
|
12
|
+
if (ctxt->myDoc) {
|
13
|
+
xmlFreeDoc(ctxt->myDoc);
|
14
|
+
}
|
15
|
+
if (ctxt) {
|
16
|
+
xmlFreeParserCtxt(ctxt);
|
17
|
+
}
|
13
18
|
}
|
14
19
|
|
15
20
|
/*
|
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
|
|
17
22
|
* so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
|
18
23
|
*/
|
19
24
|
static const rb_data_type_t xml_sax_parser_context_type = {
|
20
|
-
.wrap_struct_name = "
|
25
|
+
.wrap_struct_name = "xmlParserCtxt",
|
21
26
|
.function = {
|
22
|
-
.dfree =
|
27
|
+
.dfree = xml_sax_parser_context_type_free,
|
23
28
|
},
|
24
29
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
25
30
|
};
|
@@ -38,100 +43,124 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
|
|
38
43
|
return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
|
39
44
|
}
|
40
45
|
|
46
|
+
void
|
47
|
+
noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
|
48
|
+
{
|
49
|
+
if (!NIL_P(rb_encoding)) {
|
50
|
+
VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
|
51
|
+
|
52
|
+
char *encoding_name = StringValueCStr(rb_encoding_name);
|
53
|
+
if (encoding_name) {
|
54
|
+
libxmlStructuredErrorHandlerState handler_state;
|
55
|
+
VALUE rb_errors = rb_ary_new();
|
41
56
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
57
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
58
|
+
|
59
|
+
int result = xmlSwitchEncodingName(c_context, encoding_name);
|
60
|
+
|
61
|
+
noko__structured_error_func_restore(&handler_state);
|
62
|
+
|
63
|
+
if (result != 0) {
|
64
|
+
xmlFreeParserCtxt(c_context);
|
65
|
+
|
66
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
67
|
+
if (!NIL_P(exception)) {
|
68
|
+
rb_exc_raise(exception);
|
69
|
+
} else {
|
70
|
+
rb_raise(rb_eRuntimeError, "could not set encoding");
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
/* :nodoc: */
|
48
78
|
static VALUE
|
49
|
-
|
79
|
+
noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
|
50
80
|
{
|
51
|
-
|
52
|
-
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
53
|
-
|
54
|
-
if (!rb_respond_to(io, id_read)) {
|
81
|
+
if (!rb_respond_to(rb_io, id_read)) {
|
55
82
|
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
56
83
|
}
|
57
84
|
|
58
|
-
|
59
|
-
|
60
|
-
(xmlInputCloseCallback)noko_io_close,
|
61
|
-
(void *)io, enc);
|
62
|
-
if (ctxt->sax) {
|
63
|
-
xmlFree(ctxt->sax);
|
64
|
-
ctxt->sax = NULL;
|
85
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
86
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
65
87
|
}
|
66
88
|
|
67
|
-
|
68
|
-
|
89
|
+
xmlParserCtxtPtr c_context =
|
90
|
+
xmlCreateIOParserCtxt(NULL, NULL,
|
91
|
+
(xmlInputReadCallback)noko_io_read,
|
92
|
+
(xmlInputCloseCallback)noko_io_close,
|
93
|
+
(void *)rb_io, XML_CHAR_ENCODING_NONE);
|
94
|
+
if (!c_context) {
|
95
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
96
|
+
}
|
69
97
|
|
70
|
-
|
71
|
-
* call-seq:
|
72
|
-
* parse_file(filename)
|
73
|
-
*
|
74
|
-
* Parse file given +filename+
|
75
|
-
*/
|
76
|
-
static VALUE
|
77
|
-
parse_file(VALUE klass, VALUE filename)
|
78
|
-
{
|
79
|
-
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
98
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
80
99
|
|
81
|
-
if (
|
82
|
-
xmlFree(
|
83
|
-
|
100
|
+
if (c_context->sax) {
|
101
|
+
xmlFree(c_context->sax);
|
102
|
+
c_context->sax = NULL;
|
84
103
|
}
|
85
104
|
|
86
|
-
|
105
|
+
VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
106
|
+
rb_iv_set(rb_context, "@input", rb_io);
|
107
|
+
|
108
|
+
return rb_context;
|
87
109
|
}
|
88
110
|
|
89
|
-
/*
|
90
|
-
* call-seq:
|
91
|
-
* parse_memory(data)
|
92
|
-
*
|
93
|
-
* Parse the XML stored in memory in +data+
|
94
|
-
*/
|
111
|
+
/* :nodoc: */
|
95
112
|
static VALUE
|
96
|
-
|
113
|
+
noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
|
97
114
|
{
|
98
|
-
|
99
|
-
|
100
|
-
|
115
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
116
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
117
|
+
}
|
101
118
|
|
102
|
-
|
103
|
-
|
119
|
+
xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
|
120
|
+
if (!c_context) {
|
121
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
104
122
|
}
|
105
123
|
|
106
|
-
|
107
|
-
|
108
|
-
if (
|
109
|
-
xmlFree(
|
110
|
-
|
124
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
125
|
+
|
126
|
+
if (c_context->sax) {
|
127
|
+
xmlFree(c_context->sax);
|
128
|
+
c_context->sax = NULL;
|
111
129
|
}
|
112
130
|
|
113
|
-
return noko_xml_sax_parser_context_wrap(
|
131
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
114
132
|
}
|
115
133
|
|
134
|
+
/* :nodoc: */
|
116
135
|
static VALUE
|
117
|
-
|
136
|
+
noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
|
118
137
|
{
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
}
|
138
|
+
Check_Type(rb_input, T_STRING);
|
139
|
+
if (!(int)RSTRING_LEN(rb_input)) {
|
140
|
+
rb_raise(rb_eRuntimeError, "input string cannot be empty");
|
141
|
+
}
|
123
142
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
143
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
144
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
145
|
+
}
|
128
146
|
|
129
|
-
|
130
|
-
|
147
|
+
xmlParserCtxtPtr c_context =
|
148
|
+
xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
|
149
|
+
if (!c_context) {
|
150
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
131
151
|
}
|
132
152
|
|
133
|
-
|
134
|
-
|
153
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
154
|
+
|
155
|
+
if (c_context->sax) {
|
156
|
+
xmlFree(c_context->sax);
|
157
|
+
c_context->sax = NULL;
|
158
|
+
}
|
159
|
+
|
160
|
+
VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
161
|
+
rb_iv_set(rb_context, "@input", rb_input);
|
162
|
+
|
163
|
+
return rb_context;
|
135
164
|
}
|
136
165
|
|
137
166
|
/*
|
@@ -139,80 +168,113 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
139
168
|
* parse_with(sax_handler)
|
140
169
|
*
|
141
170
|
* Use +sax_handler+ and parse the current document
|
171
|
+
*
|
172
|
+
* 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
|
173
|
+
* more convenient for most use cases.
|
142
174
|
*/
|
143
175
|
static VALUE
|
144
|
-
|
176
|
+
noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
|
145
177
|
{
|
146
|
-
xmlParserCtxtPtr
|
178
|
+
xmlParserCtxtPtr c_context;
|
147
179
|
xmlSAXHandlerPtr sax;
|
148
180
|
|
149
|
-
if (!rb_obj_is_kind_of(
|
181
|
+
if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
|
150
182
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
151
183
|
}
|
152
184
|
|
153
|
-
|
154
|
-
sax =
|
185
|
+
c_context = noko_xml_sax_parser_context_unwrap(rb_context);
|
186
|
+
sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
|
155
187
|
|
156
|
-
|
157
|
-
|
188
|
+
c_context->sax = sax;
|
189
|
+
c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
|
190
|
+
c_context->_private = (void *)rb_sax_parser;
|
158
191
|
|
159
192
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
160
193
|
|
161
|
-
|
194
|
+
/* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
|
195
|
+
* don't have any cleanup to do. The only memory we need to free is handled by
|
196
|
+
* xml_sax_parser_context_type_free */
|
197
|
+
xmlParseDocument(c_context);
|
162
198
|
|
163
199
|
return Qnil;
|
164
200
|
}
|
165
201
|
|
166
202
|
/*
|
167
203
|
* call-seq:
|
168
|
-
*
|
204
|
+
* replace_entities=(value)
|
205
|
+
*
|
206
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
207
|
+
*
|
208
|
+
* [Parameters]
|
209
|
+
* - +value+ (Boolean) Whether external parsed entities will be resolved.
|
210
|
+
*
|
211
|
+
* ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
|
212
|
+
* defaults to +false+ for this reason.
|
169
213
|
*
|
170
|
-
*
|
171
|
-
*
|
214
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
215
|
+
* replacement.
|
216
|
+
*
|
217
|
+
* [Example]
|
218
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
219
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
220
|
+
*
|
221
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
222
|
+
* parser.parse(xml) do |ctx|
|
223
|
+
* ctx.replace_entities = true # this is UNSAFE for untrusted documents!
|
224
|
+
* end
|
172
225
|
*/
|
173
226
|
static VALUE
|
174
|
-
|
227
|
+
noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
|
175
228
|
{
|
176
|
-
|
229
|
+
int error;
|
230
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
177
231
|
|
178
|
-
if (
|
179
|
-
|
232
|
+
if (RB_TEST(rb_value)) {
|
233
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
|
180
234
|
} else {
|
181
|
-
|
235
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
|
236
|
+
}
|
237
|
+
|
238
|
+
if (error) {
|
239
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
182
240
|
}
|
183
241
|
|
184
|
-
return
|
242
|
+
return rb_value;
|
185
243
|
}
|
186
244
|
|
187
245
|
/*
|
188
246
|
* call-seq:
|
189
|
-
*
|
247
|
+
* replace_entities
|
190
248
|
*
|
191
|
-
*
|
192
|
-
*
|
249
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
250
|
+
*
|
251
|
+
* [Returns] (Boolean) Value of the parse option. (Default +false+)
|
252
|
+
*
|
253
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
254
|
+
* replacement.
|
193
255
|
*/
|
194
256
|
static VALUE
|
195
|
-
|
257
|
+
noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
|
196
258
|
{
|
197
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
259
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
198
260
|
|
199
|
-
if (
|
200
|
-
return Qfalse;
|
201
|
-
} else {
|
261
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
|
202
262
|
return Qtrue;
|
263
|
+
} else {
|
264
|
+
return Qfalse;
|
203
265
|
}
|
204
266
|
}
|
205
267
|
|
206
268
|
/*
|
207
269
|
* call-seq: line
|
208
270
|
*
|
209
|
-
*
|
271
|
+
* [Returns] (Integer) the line number of the line being currently parsed.
|
210
272
|
*/
|
211
273
|
static VALUE
|
212
|
-
|
274
|
+
noko_xml_sax_parser_context__line(VALUE rb_context)
|
213
275
|
{
|
214
276
|
xmlParserInputPtr io;
|
215
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
277
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
216
278
|
|
217
279
|
io = ctxt->input;
|
218
280
|
if (io) {
|
@@ -225,12 +287,12 @@ line(VALUE self)
|
|
225
287
|
/*
|
226
288
|
* call-seq: column
|
227
289
|
*
|
228
|
-
*
|
290
|
+
* [Returns] (Integer) the column number of the column being currently parsed.
|
229
291
|
*/
|
230
292
|
static VALUE
|
231
|
-
|
293
|
+
noko_xml_sax_parser_context__column(VALUE rb_context)
|
232
294
|
{
|
233
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
295
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
234
296
|
xmlParserInputPtr io;
|
235
297
|
|
236
298
|
io = ctxt->input;
|
@@ -243,41 +305,66 @@ column(VALUE self)
|
|
243
305
|
|
244
306
|
/*
|
245
307
|
* call-seq:
|
246
|
-
*
|
308
|
+
* recovery=(value)
|
309
|
+
*
|
310
|
+
* Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
|
311
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
312
|
+
* parser will stop processing the file on the first parsing error.
|
313
|
+
*
|
314
|
+
* [Parameters]
|
315
|
+
* - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
|
316
|
+
*
|
317
|
+
* [Returns] (Boolean) The passed +value+.
|
247
318
|
*
|
248
|
-
*
|
249
|
-
*
|
319
|
+
* [Example]
|
320
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
321
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
322
|
+
*
|
323
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
324
|
+
* parser.parse(xml) do |ctx|
|
325
|
+
* ctx.recovery = true
|
326
|
+
* end
|
250
327
|
*/
|
251
328
|
static VALUE
|
252
|
-
|
329
|
+
noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
|
253
330
|
{
|
254
|
-
|
331
|
+
int error;
|
332
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
255
333
|
|
256
|
-
if (
|
257
|
-
|
334
|
+
if (RB_TEST(rb_value)) {
|
335
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
|
258
336
|
} else {
|
259
|
-
|
337
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
|
338
|
+
}
|
339
|
+
|
340
|
+
if (error) {
|
341
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
260
342
|
}
|
261
343
|
|
262
|
-
return
|
344
|
+
return rb_value;
|
263
345
|
}
|
264
346
|
|
265
347
|
/*
|
266
348
|
* call-seq:
|
267
|
-
*
|
349
|
+
* recovery
|
350
|
+
*
|
351
|
+
* Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
|
352
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
353
|
+
* parser will stop processing the file on the first parsing error.
|
354
|
+
*
|
355
|
+
* [Returns] (Boolean) Whether this parser will recover from parsing errors.
|
268
356
|
*
|
269
|
-
*
|
270
|
-
* file on structural errors if set to true
|
357
|
+
* Default is +false+ for XML and +true+ for HTML.
|
271
358
|
*/
|
272
359
|
static VALUE
|
273
|
-
|
360
|
+
noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
|
274
361
|
{
|
275
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
362
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
276
363
|
|
277
|
-
if (ctxt
|
278
|
-
return Qfalse;
|
279
|
-
} else {
|
364
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
|
280
365
|
return Qtrue;
|
366
|
+
} else {
|
367
|
+
return Qfalse;
|
281
368
|
}
|
282
369
|
}
|
283
370
|
|
@@ -288,17 +375,22 @@ noko_init_xml_sax_parser_context(void)
|
|
288
375
|
|
289
376
|
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
290
377
|
|
291
|
-
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "
|
292
|
-
|
293
|
-
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
299
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
300
|
-
|
301
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
378
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
|
379
|
+
noko_xml_sax_parser_context_s_native_io, 2);
|
380
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
|
381
|
+
noko_xml_sax_parser_context_s_native_memory, 2);
|
382
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
|
383
|
+
noko_xml_sax_parser_context_s_native_file, 2);
|
384
|
+
|
385
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
|
386
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
|
387
|
+
noko_xml_sax_parser_context__replace_entities_set, 1);
|
388
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
|
389
|
+
noko_xml_sax_parser_context__replace_entities_get, 0);
|
390
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
|
391
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
|
392
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
|
393
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
|
302
394
|
|
303
395
|
id_read = rb_intern("read");
|
304
396
|
}
|