nokogiri 1.18.0.rc1-x86_64-linux-musl
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +502 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +297 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +49 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +324 -0
@@ -0,0 +1,390 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXmlSaxParserContext ;
|
4
|
+
|
5
|
+
static ID id_read;
|
6
|
+
|
7
|
+
static void
|
8
|
+
xml_sax_parser_context_type_free(void *data)
|
9
|
+
{
|
10
|
+
xmlParserCtxtPtr ctxt = data;
|
11
|
+
ctxt->sax = NULL;
|
12
|
+
if (ctxt->myDoc) {
|
13
|
+
xmlFreeDoc(ctxt->myDoc);
|
14
|
+
}
|
15
|
+
if (ctxt) {
|
16
|
+
xmlFreeParserCtxt(ctxt);
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
/*
|
21
|
+
* note that htmlParserCtxtPtr == xmlParserCtxtPtr and xmlFreeParserCtxt() == htmlFreeParserCtxt()
|
22
|
+
* so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
|
23
|
+
*/
|
24
|
+
static const rb_data_type_t xml_sax_parser_context_type = {
|
25
|
+
.wrap_struct_name = "xmlParserCtxt",
|
26
|
+
.function = {
|
27
|
+
.dfree = xml_sax_parser_context_type_free,
|
28
|
+
},
|
29
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
30
|
+
};
|
31
|
+
|
32
|
+
xmlParserCtxtPtr
|
33
|
+
noko_xml_sax_parser_context_unwrap(VALUE rb_context)
|
34
|
+
{
|
35
|
+
xmlParserCtxtPtr c_context;
|
36
|
+
TypedData_Get_Struct(rb_context, xmlParserCtxt, &xml_sax_parser_context_type, c_context);
|
37
|
+
return c_context;
|
38
|
+
}
|
39
|
+
|
40
|
+
VALUE
|
41
|
+
noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
|
42
|
+
{
|
43
|
+
return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
|
44
|
+
}
|
45
|
+
|
46
|
+
void
|
47
|
+
noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
|
48
|
+
{
|
49
|
+
if (!NIL_P(rb_encoding)) {
|
50
|
+
VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
|
51
|
+
|
52
|
+
char *encoding_name = StringValueCStr(rb_encoding_name);
|
53
|
+
if (encoding_name) {
|
54
|
+
libxmlStructuredErrorHandlerState handler_state;
|
55
|
+
VALUE rb_errors = rb_ary_new();
|
56
|
+
|
57
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
58
|
+
|
59
|
+
int result = xmlSwitchEncodingName(c_context, encoding_name);
|
60
|
+
|
61
|
+
noko__structured_error_func_restore(&handler_state);
|
62
|
+
|
63
|
+
if (result != 0) {
|
64
|
+
xmlFreeParserCtxt(c_context);
|
65
|
+
|
66
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
67
|
+
if (!NIL_P(exception)) {
|
68
|
+
rb_exc_raise(exception);
|
69
|
+
} else {
|
70
|
+
rb_raise(rb_eRuntimeError, "could not set encoding");
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
/* :nodoc: */
|
78
|
+
static VALUE
|
79
|
+
noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
|
80
|
+
{
|
81
|
+
if (!rb_respond_to(rb_io, id_read)) {
|
82
|
+
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
83
|
+
}
|
84
|
+
|
85
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
86
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
87
|
+
}
|
88
|
+
|
89
|
+
xmlParserCtxtPtr c_context =
|
90
|
+
xmlCreateIOParserCtxt(NULL, NULL,
|
91
|
+
(xmlInputReadCallback)noko_io_read,
|
92
|
+
(xmlInputCloseCallback)noko_io_close,
|
93
|
+
(void *)rb_io, XML_CHAR_ENCODING_NONE);
|
94
|
+
if (!c_context) {
|
95
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
96
|
+
}
|
97
|
+
|
98
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
99
|
+
|
100
|
+
if (c_context->sax) {
|
101
|
+
xmlFree(c_context->sax);
|
102
|
+
c_context->sax = NULL;
|
103
|
+
}
|
104
|
+
|
105
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
106
|
+
}
|
107
|
+
|
108
|
+
/* :nodoc: */
|
109
|
+
static VALUE
|
110
|
+
noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
|
111
|
+
{
|
112
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
113
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
114
|
+
}
|
115
|
+
|
116
|
+
xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
|
117
|
+
if (!c_context) {
|
118
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
119
|
+
}
|
120
|
+
|
121
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
122
|
+
|
123
|
+
if (c_context->sax) {
|
124
|
+
xmlFree(c_context->sax);
|
125
|
+
c_context->sax = NULL;
|
126
|
+
}
|
127
|
+
|
128
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
129
|
+
}
|
130
|
+
|
131
|
+
/* :nodoc: */
|
132
|
+
static VALUE
|
133
|
+
noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
|
134
|
+
{
|
135
|
+
Check_Type(rb_input, T_STRING);
|
136
|
+
if (!(int)RSTRING_LEN(rb_input)) {
|
137
|
+
rb_raise(rb_eRuntimeError, "input string cannot be empty");
|
138
|
+
}
|
139
|
+
|
140
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
141
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
142
|
+
}
|
143
|
+
|
144
|
+
xmlParserCtxtPtr c_context =
|
145
|
+
xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
|
146
|
+
if (!c_context) {
|
147
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
148
|
+
}
|
149
|
+
|
150
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
151
|
+
|
152
|
+
if (c_context->sax) {
|
153
|
+
xmlFree(c_context->sax);
|
154
|
+
c_context->sax = NULL;
|
155
|
+
}
|
156
|
+
|
157
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
158
|
+
}
|
159
|
+
|
160
|
+
/*
|
161
|
+
* call-seq:
|
162
|
+
* parse_with(sax_handler)
|
163
|
+
*
|
164
|
+
* Use +sax_handler+ and parse the current document
|
165
|
+
*
|
166
|
+
* 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
|
167
|
+
* more convenient for most use cases.
|
168
|
+
*/
|
169
|
+
static VALUE
|
170
|
+
noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
|
171
|
+
{
|
172
|
+
xmlParserCtxtPtr c_context;
|
173
|
+
xmlSAXHandlerPtr sax;
|
174
|
+
|
175
|
+
if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
|
176
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
177
|
+
}
|
178
|
+
|
179
|
+
c_context = noko_xml_sax_parser_context_unwrap(rb_context);
|
180
|
+
sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
|
181
|
+
|
182
|
+
c_context->sax = sax;
|
183
|
+
c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
|
184
|
+
c_context->_private = (void *)rb_sax_parser;
|
185
|
+
|
186
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
187
|
+
|
188
|
+
/* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
|
189
|
+
* don't have any cleanup to do. The only memory we need to free is handled by
|
190
|
+
* xml_sax_parser_context_type_free */
|
191
|
+
xmlParseDocument(c_context);
|
192
|
+
|
193
|
+
return Qnil;
|
194
|
+
}
|
195
|
+
|
196
|
+
/*
|
197
|
+
* call-seq:
|
198
|
+
* replace_entities=(value)
|
199
|
+
*
|
200
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
201
|
+
*
|
202
|
+
* [Parameters]
|
203
|
+
* - +value+ (Boolean) Whether external parsed entities will be resolved.
|
204
|
+
*
|
205
|
+
* ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
|
206
|
+
* defaults to +false+ for this reason.
|
207
|
+
*
|
208
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
209
|
+
* replacement.
|
210
|
+
*
|
211
|
+
* [Example]
|
212
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
213
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
214
|
+
*
|
215
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
216
|
+
* parser.parse(xml) do |ctx|
|
217
|
+
* ctx.replace_entities = true # this is UNSAFE for untrusted documents!
|
218
|
+
* end
|
219
|
+
*/
|
220
|
+
static VALUE
|
221
|
+
noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
|
222
|
+
{
|
223
|
+
int error;
|
224
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
225
|
+
|
226
|
+
if (RB_TEST(rb_value)) {
|
227
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
|
228
|
+
} else {
|
229
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
|
230
|
+
}
|
231
|
+
|
232
|
+
if (error) {
|
233
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
234
|
+
}
|
235
|
+
|
236
|
+
return rb_value;
|
237
|
+
}
|
238
|
+
|
239
|
+
/*
|
240
|
+
* call-seq:
|
241
|
+
* replace_entities
|
242
|
+
*
|
243
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
244
|
+
*
|
245
|
+
* [Returns] (Boolean) Value of the parse option. (Default +false+)
|
246
|
+
*
|
247
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
248
|
+
* replacement.
|
249
|
+
*/
|
250
|
+
static VALUE
|
251
|
+
noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
|
252
|
+
{
|
253
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
254
|
+
|
255
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
|
256
|
+
return Qtrue;
|
257
|
+
} else {
|
258
|
+
return Qfalse;
|
259
|
+
}
|
260
|
+
}
|
261
|
+
|
262
|
+
/*
|
263
|
+
* call-seq: line
|
264
|
+
*
|
265
|
+
* [Returns] (Integer) the line number of the line being currently parsed.
|
266
|
+
*/
|
267
|
+
static VALUE
|
268
|
+
noko_xml_sax_parser_context__line(VALUE rb_context)
|
269
|
+
{
|
270
|
+
xmlParserInputPtr io;
|
271
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
272
|
+
|
273
|
+
io = ctxt->input;
|
274
|
+
if (io) {
|
275
|
+
return INT2NUM(io->line);
|
276
|
+
}
|
277
|
+
|
278
|
+
return Qnil;
|
279
|
+
}
|
280
|
+
|
281
|
+
/*
|
282
|
+
* call-seq: column
|
283
|
+
*
|
284
|
+
* [Returns] (Integer) the column number of the column being currently parsed.
|
285
|
+
*/
|
286
|
+
static VALUE
|
287
|
+
noko_xml_sax_parser_context__column(VALUE rb_context)
|
288
|
+
{
|
289
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
290
|
+
xmlParserInputPtr io;
|
291
|
+
|
292
|
+
io = ctxt->input;
|
293
|
+
if (io) {
|
294
|
+
return INT2NUM(io->col);
|
295
|
+
}
|
296
|
+
|
297
|
+
return Qnil;
|
298
|
+
}
|
299
|
+
|
300
|
+
/*
|
301
|
+
* call-seq:
|
302
|
+
* recovery=(value)
|
303
|
+
*
|
304
|
+
* Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
|
305
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
306
|
+
* parser will stop processing the file on the first parsing error.
|
307
|
+
*
|
308
|
+
* [Parameters]
|
309
|
+
* - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
|
310
|
+
*
|
311
|
+
* [Returns] (Boolean) The passed +value+.
|
312
|
+
*
|
313
|
+
* [Example]
|
314
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
315
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
316
|
+
*
|
317
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
318
|
+
* parser.parse(xml) do |ctx|
|
319
|
+
* ctx.recovery = true
|
320
|
+
* end
|
321
|
+
*/
|
322
|
+
static VALUE
|
323
|
+
noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
|
324
|
+
{
|
325
|
+
int error;
|
326
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
327
|
+
|
328
|
+
if (RB_TEST(rb_value)) {
|
329
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
|
330
|
+
} else {
|
331
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
|
332
|
+
}
|
333
|
+
|
334
|
+
if (error) {
|
335
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
336
|
+
}
|
337
|
+
|
338
|
+
return rb_value;
|
339
|
+
}
|
340
|
+
|
341
|
+
/*
|
342
|
+
* call-seq:
|
343
|
+
* recovery
|
344
|
+
*
|
345
|
+
* Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
|
346
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
347
|
+
* parser will stop processing the file on the first parsing error.
|
348
|
+
*
|
349
|
+
* [Returns] (Boolean) Whether this parser will recover from parsing errors.
|
350
|
+
*
|
351
|
+
* Default is +false+ for XML and +true+ for HTML.
|
352
|
+
*/
|
353
|
+
static VALUE
|
354
|
+
noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
|
355
|
+
{
|
356
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
357
|
+
|
358
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
|
359
|
+
return Qtrue;
|
360
|
+
} else {
|
361
|
+
return Qfalse;
|
362
|
+
}
|
363
|
+
}
|
364
|
+
|
365
|
+
void
|
366
|
+
noko_init_xml_sax_parser_context(void)
|
367
|
+
{
|
368
|
+
cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
|
369
|
+
|
370
|
+
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
371
|
+
|
372
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
|
373
|
+
noko_xml_sax_parser_context_s_native_io, 2);
|
374
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
|
375
|
+
noko_xml_sax_parser_context_s_native_memory, 2);
|
376
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
|
377
|
+
noko_xml_sax_parser_context_s_native_file, 2);
|
378
|
+
|
379
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
|
380
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
|
381
|
+
noko_xml_sax_parser_context__replace_entities_set, 1);
|
382
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
|
383
|
+
noko_xml_sax_parser_context__replace_entities_get, 0);
|
384
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
|
385
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
|
386
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
|
387
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
|
388
|
+
|
389
|
+
id_read = rb_intern("read");
|
390
|
+
}
|
@@ -0,0 +1,206 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXmlSaxPushParser ;
|
4
|
+
|
5
|
+
static void
|
6
|
+
xml_sax_push_parser_free(void *data)
|
7
|
+
{
|
8
|
+
xmlParserCtxtPtr ctx = data;
|
9
|
+
if (ctx->myDoc) {
|
10
|
+
xmlFreeDoc(ctx->myDoc);
|
11
|
+
}
|
12
|
+
if (ctx) {
|
13
|
+
xmlFreeParserCtxt(ctx);
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
static const rb_data_type_t xml_sax_push_parser_type = {
|
18
|
+
.wrap_struct_name = "xmlParserCtxt",
|
19
|
+
.function = {
|
20
|
+
.dfree = xml_sax_push_parser_free,
|
21
|
+
},
|
22
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
23
|
+
};
|
24
|
+
|
25
|
+
static VALUE
|
26
|
+
xml_sax_push_parser_allocate(VALUE klass)
|
27
|
+
{
|
28
|
+
return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
|
29
|
+
}
|
30
|
+
|
31
|
+
xmlParserCtxtPtr
|
32
|
+
noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
|
33
|
+
{
|
34
|
+
xmlParserCtxtPtr c_parser;
|
35
|
+
TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
|
36
|
+
return c_parser;
|
37
|
+
}
|
38
|
+
|
39
|
+
/*
|
40
|
+
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
41
|
+
*/
|
42
|
+
static VALUE
|
43
|
+
noko_xml_sax_push_parser__native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
44
|
+
{
|
45
|
+
xmlParserCtxtPtr ctx;
|
46
|
+
const char *chunk = NULL;
|
47
|
+
int size = 0;
|
48
|
+
|
49
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
50
|
+
|
51
|
+
if (Qnil != _chunk) {
|
52
|
+
chunk = StringValuePtr(_chunk);
|
53
|
+
size = (int)RSTRING_LEN(_chunk);
|
54
|
+
}
|
55
|
+
|
56
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
57
|
+
|
58
|
+
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
59
|
+
if (!(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) {
|
60
|
+
xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
|
61
|
+
noko__error_raise(NULL, e);
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
return self;
|
66
|
+
}
|
67
|
+
|
68
|
+
/*
|
69
|
+
* call-seq:
|
70
|
+
* initialize_native(xml_sax, filename)
|
71
|
+
*
|
72
|
+
* Initialize the push parser with +xml_sax+ using +filename+
|
73
|
+
*/
|
74
|
+
static VALUE
|
75
|
+
noko_xml_sax_push_parser__initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
76
|
+
{
|
77
|
+
xmlSAXHandlerPtr sax;
|
78
|
+
const char *filename = NULL;
|
79
|
+
xmlParserCtxtPtr ctx;
|
80
|
+
|
81
|
+
sax = noko_xml_sax_parser_unwrap(_xml_sax);
|
82
|
+
|
83
|
+
if (_filename != Qnil) { filename = StringValueCStr(_filename); }
|
84
|
+
|
85
|
+
ctx = xmlCreatePushParserCtxt(
|
86
|
+
sax,
|
87
|
+
NULL,
|
88
|
+
NULL,
|
89
|
+
0,
|
90
|
+
filename
|
91
|
+
);
|
92
|
+
if (ctx == NULL) {
|
93
|
+
rb_raise(rb_eRuntimeError, "Could not create a parser context");
|
94
|
+
}
|
95
|
+
|
96
|
+
ctx->userData = ctx;
|
97
|
+
ctx->_private = (void *)_xml_sax;
|
98
|
+
|
99
|
+
DATA_PTR(self) = ctx;
|
100
|
+
return self;
|
101
|
+
}
|
102
|
+
|
103
|
+
static VALUE
|
104
|
+
noko_xml_sax_push_parser__options_get(VALUE self)
|
105
|
+
{
|
106
|
+
xmlParserCtxtPtr ctx;
|
107
|
+
|
108
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
109
|
+
|
110
|
+
return INT2NUM(xmlCtxtGetOptions(ctx));
|
111
|
+
}
|
112
|
+
|
113
|
+
static VALUE
|
114
|
+
noko_xml_sax_push_parser__options_set(VALUE self, VALUE options)
|
115
|
+
{
|
116
|
+
int error;
|
117
|
+
xmlParserCtxtPtr ctx;
|
118
|
+
|
119
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
120
|
+
|
121
|
+
error = xmlCtxtSetOptions(ctx, (int)NUM2INT(options));
|
122
|
+
if (error) {
|
123
|
+
rb_raise(rb_eRuntimeError, "Cannot set XML parser context options (%x)", error);
|
124
|
+
}
|
125
|
+
|
126
|
+
return Qnil;
|
127
|
+
}
|
128
|
+
|
129
|
+
/*
|
130
|
+
* call-seq:
|
131
|
+
* replace_entities
|
132
|
+
*
|
133
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
134
|
+
*
|
135
|
+
* [Returns] (Boolean) Value of the parse option. (Default +false+)
|
136
|
+
*
|
137
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
138
|
+
* replacement.
|
139
|
+
*/
|
140
|
+
static VALUE
|
141
|
+
noko_xml_sax_push_parser__replace_entities_get(VALUE self)
|
142
|
+
{
|
143
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self);
|
144
|
+
|
145
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
|
146
|
+
return Qtrue;
|
147
|
+
} else {
|
148
|
+
return Qfalse;
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
/*
|
153
|
+
* call-seq:
|
154
|
+
* replace_entities=(value)
|
155
|
+
*
|
156
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
157
|
+
*
|
158
|
+
* [Parameters]
|
159
|
+
* - +value+ (Boolean) Whether external parsed entities will be resolved.
|
160
|
+
*
|
161
|
+
* ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
|
162
|
+
* defaults to +false+ for this reason.
|
163
|
+
*
|
164
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
165
|
+
* replacement.
|
166
|
+
*/
|
167
|
+
static VALUE
|
168
|
+
noko_xml_sax_push_parser__replace_entities_set(VALUE self, VALUE value)
|
169
|
+
{
|
170
|
+
int error;
|
171
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self);
|
172
|
+
|
173
|
+
if (RB_TEST(value)) {
|
174
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
|
175
|
+
} else {
|
176
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
|
177
|
+
}
|
178
|
+
|
179
|
+
if (error) {
|
180
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
181
|
+
}
|
182
|
+
|
183
|
+
return value;
|
184
|
+
}
|
185
|
+
|
186
|
+
void
|
187
|
+
noko_init_xml_sax_push_parser(void)
|
188
|
+
{
|
189
|
+
cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
|
190
|
+
|
191
|
+
rb_define_alloc_func(cNokogiriXmlSaxPushParser, xml_sax_push_parser_allocate);
|
192
|
+
|
193
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options",
|
194
|
+
noko_xml_sax_push_parser__options_get, 0);
|
195
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options=",
|
196
|
+
noko_xml_sax_push_parser__options_set, 1);
|
197
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities",
|
198
|
+
noko_xml_sax_push_parser__replace_entities_get, 0);
|
199
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=",
|
200
|
+
noko_xml_sax_push_parser__replace_entities_set, 1);
|
201
|
+
|
202
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native",
|
203
|
+
noko_xml_sax_push_parser__initialize_native, 2);
|
204
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write",
|
205
|
+
noko_xml_sax_push_parser__native_write, 2);
|
206
|
+
}
|