nokogiri 1.16.8-x64-mingw-ucrt → 1.17.0-x64-mingw-ucrt
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/README.md +4 -0
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +191 -137
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
- data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
- data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +130 -104
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +213 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +2 -2
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +6 -8
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- metadata +8 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -5,11 +5,16 @@ VALUE cNokogiriXmlSaxParserContext ;
|
|
5
5
|
static ID id_read;
|
6
6
|
|
7
7
|
static void
|
8
|
-
|
8
|
+
xml_sax_parser_context_type_free(void *data)
|
9
9
|
{
|
10
10
|
xmlParserCtxtPtr ctxt = data;
|
11
11
|
ctxt->sax = NULL;
|
12
|
-
|
12
|
+
if (ctxt->myDoc) {
|
13
|
+
xmlFreeDoc(ctxt->myDoc);
|
14
|
+
}
|
15
|
+
if (ctxt) {
|
16
|
+
xmlFreeParserCtxt(ctxt);
|
17
|
+
}
|
13
18
|
}
|
14
19
|
|
15
20
|
/*
|
@@ -17,9 +22,9 @@ xml_sax_parser_context_free(void *data)
|
|
17
22
|
* so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
|
18
23
|
*/
|
19
24
|
static const rb_data_type_t xml_sax_parser_context_type = {
|
20
|
-
.wrap_struct_name = "
|
25
|
+
.wrap_struct_name = "xmlParserCtxt",
|
21
26
|
.function = {
|
22
|
-
.dfree =
|
27
|
+
.dfree = xml_sax_parser_context_type_free,
|
23
28
|
},
|
24
29
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
25
30
|
};
|
@@ -38,104 +43,118 @@ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
|
|
38
43
|
return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
|
39
44
|
}
|
40
45
|
|
46
|
+
void
|
47
|
+
noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
|
48
|
+
{
|
49
|
+
if (!NIL_P(rb_encoding)) {
|
50
|
+
VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
|
51
|
+
|
52
|
+
char *encoding_name = StringValueCStr(rb_encoding_name);
|
53
|
+
if (encoding_name) {
|
54
|
+
libxmlStructuredErrorHandlerState handler_state;
|
55
|
+
VALUE rb_errors = rb_ary_new();
|
41
56
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
57
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
58
|
+
|
59
|
+
int result = xmlSwitchEncodingName(c_context, encoding_name);
|
60
|
+
|
61
|
+
noko__structured_error_func_restore(&handler_state);
|
62
|
+
|
63
|
+
if (result != 0) {
|
64
|
+
xmlFreeParserCtxt(c_context);
|
65
|
+
|
66
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
67
|
+
if (!NIL_P(exception)) {
|
68
|
+
rb_exc_raise(exception);
|
69
|
+
} else {
|
70
|
+
rb_raise(rb_eRuntimeError, "could not set encoding");
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
/* :nodoc: */
|
48
78
|
static VALUE
|
49
|
-
|
79
|
+
noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
|
50
80
|
{
|
51
|
-
|
52
|
-
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
53
|
-
|
54
|
-
if (!rb_respond_to(io, id_read)) {
|
81
|
+
if (!rb_respond_to(rb_io, id_read)) {
|
55
82
|
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
56
83
|
}
|
57
84
|
|
58
|
-
|
59
|
-
|
60
|
-
(xmlInputCloseCallback)noko_io_close,
|
61
|
-
(void *)io, enc);
|
62
|
-
if (!ctxt) {
|
63
|
-
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
85
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
86
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
64
87
|
}
|
65
88
|
|
66
|
-
|
67
|
-
|
68
|
-
|
89
|
+
xmlParserCtxtPtr c_context =
|
90
|
+
xmlCreateIOParserCtxt(NULL, NULL,
|
91
|
+
(xmlInputReadCallback)noko_io_read,
|
92
|
+
(xmlInputCloseCallback)noko_io_close,
|
93
|
+
(void *)rb_io, XML_CHAR_ENCODING_NONE);
|
94
|
+
if (!c_context) {
|
95
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
69
96
|
}
|
70
97
|
|
71
|
-
|
72
|
-
}
|
73
|
-
|
74
|
-
/*
|
75
|
-
* call-seq:
|
76
|
-
* parse_file(filename)
|
77
|
-
*
|
78
|
-
* Parse file given +filename+
|
79
|
-
*/
|
80
|
-
static VALUE
|
81
|
-
parse_file(VALUE klass, VALUE filename)
|
82
|
-
{
|
83
|
-
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
98
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
84
99
|
|
85
|
-
if (
|
86
|
-
xmlFree(
|
87
|
-
|
100
|
+
if (c_context->sax) {
|
101
|
+
xmlFree(c_context->sax);
|
102
|
+
c_context->sax = NULL;
|
88
103
|
}
|
89
104
|
|
90
|
-
return noko_xml_sax_parser_context_wrap(
|
105
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
91
106
|
}
|
92
107
|
|
93
|
-
/*
|
94
|
-
* call-seq:
|
95
|
-
* parse_memory(data)
|
96
|
-
*
|
97
|
-
* Parse the XML stored in memory in +data+
|
98
|
-
*/
|
108
|
+
/* :nodoc: */
|
99
109
|
static VALUE
|
100
|
-
|
110
|
+
noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
|
101
111
|
{
|
102
|
-
|
103
|
-
|
104
|
-
|
112
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
113
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
114
|
+
}
|
105
115
|
|
106
|
-
|
107
|
-
|
116
|
+
xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
|
117
|
+
if (!c_context) {
|
118
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
108
119
|
}
|
109
120
|
|
110
|
-
|
111
|
-
|
112
|
-
if (
|
113
|
-
xmlFree(
|
114
|
-
|
121
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
122
|
+
|
123
|
+
if (c_context->sax) {
|
124
|
+
xmlFree(c_context->sax);
|
125
|
+
c_context->sax = NULL;
|
115
126
|
}
|
116
127
|
|
117
|
-
return noko_xml_sax_parser_context_wrap(
|
128
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
118
129
|
}
|
119
130
|
|
131
|
+
/* :nodoc: */
|
120
132
|
static VALUE
|
121
|
-
|
133
|
+
noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
|
122
134
|
{
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
}
|
135
|
+
Check_Type(rb_input, T_STRING);
|
136
|
+
if (!(int)RSTRING_LEN(rb_input)) {
|
137
|
+
rb_raise(rb_eRuntimeError, "input string cannot be empty");
|
138
|
+
}
|
127
139
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
140
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
141
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
142
|
+
}
|
132
143
|
|
133
|
-
|
134
|
-
|
144
|
+
xmlParserCtxtPtr c_context =
|
145
|
+
xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
|
146
|
+
if (!c_context) {
|
147
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
135
148
|
}
|
136
149
|
|
137
|
-
|
138
|
-
|
150
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
151
|
+
|
152
|
+
if (c_context->sax) {
|
153
|
+
xmlFree(c_context->sax);
|
154
|
+
c_context->sax = NULL;
|
155
|
+
}
|
156
|
+
|
157
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
139
158
|
}
|
140
159
|
|
141
160
|
/*
|
@@ -143,80 +162,113 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
143
162
|
* parse_with(sax_handler)
|
144
163
|
*
|
145
164
|
* Use +sax_handler+ and parse the current document
|
165
|
+
*
|
166
|
+
* 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
|
167
|
+
* more convenient for most use cases.
|
146
168
|
*/
|
147
169
|
static VALUE
|
148
|
-
|
170
|
+
noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
|
149
171
|
{
|
150
|
-
xmlParserCtxtPtr
|
172
|
+
xmlParserCtxtPtr c_context;
|
151
173
|
xmlSAXHandlerPtr sax;
|
152
174
|
|
153
|
-
if (!rb_obj_is_kind_of(
|
175
|
+
if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
|
154
176
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
155
177
|
}
|
156
178
|
|
157
|
-
|
158
|
-
sax =
|
179
|
+
c_context = noko_xml_sax_parser_context_unwrap(rb_context);
|
180
|
+
sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
|
159
181
|
|
160
|
-
|
161
|
-
|
182
|
+
c_context->sax = sax;
|
183
|
+
c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
|
184
|
+
c_context->_private = (void *)rb_sax_parser;
|
162
185
|
|
163
186
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
164
187
|
|
165
|
-
|
188
|
+
/* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
|
189
|
+
* don't have any cleanup to do. The only memory we need to free is handled by
|
190
|
+
* xml_sax_parser_context_type_free */
|
191
|
+
xmlParseDocument(c_context);
|
166
192
|
|
167
193
|
return Qnil;
|
168
194
|
}
|
169
195
|
|
170
196
|
/*
|
171
197
|
* call-seq:
|
172
|
-
*
|
198
|
+
* replace_entities=(value)
|
199
|
+
*
|
200
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
201
|
+
*
|
202
|
+
* [Parameters]
|
203
|
+
* - +value+ (Boolean) Whether external parsed entities will be resolved.
|
204
|
+
*
|
205
|
+
* ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
|
206
|
+
* defaults to +false+ for this reason.
|
173
207
|
*
|
174
|
-
*
|
175
|
-
*
|
208
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
209
|
+
* replacement.
|
210
|
+
*
|
211
|
+
* [Example]
|
212
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
213
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
214
|
+
*
|
215
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
216
|
+
* parser.parse(xml) do |ctx|
|
217
|
+
* ctx.replace_entities = true # this is UNSAFE for untrusted documents!
|
218
|
+
* end
|
176
219
|
*/
|
177
220
|
static VALUE
|
178
|
-
|
221
|
+
noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
|
179
222
|
{
|
180
|
-
|
223
|
+
int error;
|
224
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
181
225
|
|
182
|
-
if (
|
183
|
-
|
226
|
+
if (RB_TEST(rb_value)) {
|
227
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
|
184
228
|
} else {
|
185
|
-
|
229
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
|
230
|
+
}
|
231
|
+
|
232
|
+
if (error) {
|
233
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
186
234
|
}
|
187
235
|
|
188
|
-
return
|
236
|
+
return rb_value;
|
189
237
|
}
|
190
238
|
|
191
239
|
/*
|
192
240
|
* call-seq:
|
193
|
-
*
|
241
|
+
* replace_entities
|
194
242
|
*
|
195
|
-
*
|
196
|
-
*
|
243
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
244
|
+
*
|
245
|
+
* [Returns] (Boolean) Value of the parse option. (Default +false+)
|
246
|
+
*
|
247
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
248
|
+
* replacement.
|
197
249
|
*/
|
198
250
|
static VALUE
|
199
|
-
|
251
|
+
noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
|
200
252
|
{
|
201
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
253
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
202
254
|
|
203
|
-
if (
|
204
|
-
return Qfalse;
|
205
|
-
} else {
|
255
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
|
206
256
|
return Qtrue;
|
257
|
+
} else {
|
258
|
+
return Qfalse;
|
207
259
|
}
|
208
260
|
}
|
209
261
|
|
210
262
|
/*
|
211
263
|
* call-seq: line
|
212
264
|
*
|
213
|
-
*
|
265
|
+
* [Returns] (Integer) the line number of the line being currently parsed.
|
214
266
|
*/
|
215
267
|
static VALUE
|
216
|
-
|
268
|
+
noko_xml_sax_parser_context__line(VALUE rb_context)
|
217
269
|
{
|
218
270
|
xmlParserInputPtr io;
|
219
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
271
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
220
272
|
|
221
273
|
io = ctxt->input;
|
222
274
|
if (io) {
|
@@ -229,12 +281,12 @@ line(VALUE self)
|
|
229
281
|
/*
|
230
282
|
* call-seq: column
|
231
283
|
*
|
232
|
-
*
|
284
|
+
* [Returns] (Integer) the column number of the column being currently parsed.
|
233
285
|
*/
|
234
286
|
static VALUE
|
235
|
-
|
287
|
+
noko_xml_sax_parser_context__column(VALUE rb_context)
|
236
288
|
{
|
237
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
289
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
238
290
|
xmlParserInputPtr io;
|
239
291
|
|
240
292
|
io = ctxt->input;
|
@@ -247,41 +299,66 @@ column(VALUE self)
|
|
247
299
|
|
248
300
|
/*
|
249
301
|
* call-seq:
|
250
|
-
*
|
302
|
+
* recovery=(value)
|
303
|
+
*
|
304
|
+
* Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
|
305
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
306
|
+
* parser will stop processing the file on the first parsing error.
|
307
|
+
*
|
308
|
+
* [Parameters]
|
309
|
+
* - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
|
310
|
+
*
|
311
|
+
* [Returns] (Boolean) The passed +value+.
|
251
312
|
*
|
252
|
-
*
|
253
|
-
*
|
313
|
+
* [Example]
|
314
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
315
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
316
|
+
*
|
317
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
318
|
+
* parser.parse(xml) do |ctx|
|
319
|
+
* ctx.recovery = true
|
320
|
+
* end
|
254
321
|
*/
|
255
322
|
static VALUE
|
256
|
-
|
323
|
+
noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
|
257
324
|
{
|
258
|
-
|
325
|
+
int error;
|
326
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
259
327
|
|
260
|
-
if (
|
261
|
-
|
328
|
+
if (RB_TEST(rb_value)) {
|
329
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
|
262
330
|
} else {
|
263
|
-
|
331
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
|
332
|
+
}
|
333
|
+
|
334
|
+
if (error) {
|
335
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
264
336
|
}
|
265
337
|
|
266
|
-
return
|
338
|
+
return rb_value;
|
267
339
|
}
|
268
340
|
|
269
341
|
/*
|
270
342
|
* call-seq:
|
271
|
-
*
|
343
|
+
* recovery
|
344
|
+
*
|
345
|
+
* Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
|
346
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
347
|
+
* parser will stop processing the file on the first parsing error.
|
348
|
+
*
|
349
|
+
* [Returns] (Boolean) Whether this parser will recover from parsing errors.
|
272
350
|
*
|
273
|
-
*
|
274
|
-
* file on structural errors if set to true
|
351
|
+
* Default is +false+ for XML and +true+ for HTML.
|
275
352
|
*/
|
276
353
|
static VALUE
|
277
|
-
|
354
|
+
noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
|
278
355
|
{
|
279
|
-
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(
|
356
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
280
357
|
|
281
|
-
if (ctxt
|
282
|
-
return Qfalse;
|
283
|
-
} else {
|
358
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
|
284
359
|
return Qtrue;
|
360
|
+
} else {
|
361
|
+
return Qfalse;
|
285
362
|
}
|
286
363
|
}
|
287
364
|
|
@@ -292,17 +369,22 @@ noko_init_xml_sax_parser_context(void)
|
|
292
369
|
|
293
370
|
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
294
371
|
|
295
|
-
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "
|
296
|
-
|
297
|
-
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
303
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
304
|
-
|
305
|
-
rb_define_method(cNokogiriXmlSaxParserContext, "
|
372
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
|
373
|
+
noko_xml_sax_parser_context_s_native_io, 2);
|
374
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
|
375
|
+
noko_xml_sax_parser_context_s_native_memory, 2);
|
376
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
|
377
|
+
noko_xml_sax_parser_context_s_native_file, 2);
|
378
|
+
|
379
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
|
380
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
|
381
|
+
noko_xml_sax_parser_context__replace_entities_set, 1);
|
382
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
|
383
|
+
noko_xml_sax_parser_context__replace_entities_get, 0);
|
384
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
|
385
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
|
386
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
|
387
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
|
306
388
|
|
307
389
|
id_read = rb_intern("read");
|
308
390
|
}
|