nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,98 +1,166 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlSaxParserContext ;
|
4
4
|
|
5
|
-
static
|
6
|
-
{
|
7
|
-
NOKOGIRI_DEBUG_START(handler);
|
5
|
+
static ID id_read;
|
8
6
|
|
7
|
+
static void
|
8
|
+
xml_sax_parser_context_type_free(void *data)
|
9
|
+
{
|
10
|
+
xmlParserCtxtPtr ctxt = data;
|
9
11
|
ctxt->sax = NULL;
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
12
|
+
if (ctxt->myDoc) {
|
13
|
+
xmlFreeDoc(ctxt->myDoc);
|
14
|
+
}
|
15
|
+
if (ctxt) {
|
16
|
+
xmlFreeParserCtxt(ctxt);
|
17
|
+
}
|
14
18
|
}
|
15
19
|
|
16
20
|
/*
|
17
|
-
*
|
18
|
-
*
|
19
|
-
*
|
20
|
-
* Parse +io+ object with +encoding+
|
21
|
+
* note that htmlParserCtxtPtr == xmlParserCtxtPtr and xmlFreeParserCtxt() == htmlFreeParserCtxt()
|
22
|
+
* so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
|
21
23
|
*/
|
22
|
-
static
|
23
|
-
|
24
|
+
static const rb_data_type_t xml_sax_parser_context_type = {
|
25
|
+
.wrap_struct_name = "xmlParserCtxt",
|
26
|
+
.function = {
|
27
|
+
.dfree = xml_sax_parser_context_type_free,
|
28
|
+
},
|
29
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
30
|
+
};
|
31
|
+
|
32
|
+
xmlParserCtxtPtr
|
33
|
+
noko_xml_sax_parser_context_unwrap(VALUE rb_context)
|
24
34
|
{
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
|
29
|
-
(xmlInputReadCallback)io_read_callback,
|
30
|
-
(xmlInputCloseCallback)io_close_callback,
|
31
|
-
(void *)io, enc);
|
32
|
-
if (ctxt->sax) {
|
33
|
-
xmlFree(ctxt->sax);
|
34
|
-
ctxt->sax = NULL;
|
35
|
-
}
|
36
|
-
|
37
|
-
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
35
|
+
xmlParserCtxtPtr c_context;
|
36
|
+
TypedData_Get_Struct(rb_context, xmlParserCtxt, &xml_sax_parser_context_type, c_context);
|
37
|
+
return c_context;
|
38
38
|
}
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
* parse_file(filename)
|
43
|
-
*
|
44
|
-
* Parse file given +filename+
|
45
|
-
*/
|
46
|
-
static VALUE parse_file(VALUE klass, VALUE filename)
|
40
|
+
VALUE
|
41
|
+
noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
|
47
42
|
{
|
48
|
-
|
49
|
-
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
43
|
+
return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
|
50
44
|
}
|
51
45
|
|
52
|
-
|
53
|
-
|
54
|
-
* parse_memory(data)
|
55
|
-
*
|
56
|
-
* Parse the XML stored in memory in +data+
|
57
|
-
*/
|
58
|
-
static VALUE
|
59
|
-
parse_memory(VALUE klass, VALUE data)
|
46
|
+
void
|
47
|
+
noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
|
60
48
|
{
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
if (
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
49
|
+
if (!NIL_P(rb_encoding)) {
|
50
|
+
VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
|
51
|
+
|
52
|
+
char *encoding_name = StringValueCStr(rb_encoding_name);
|
53
|
+
if (encoding_name) {
|
54
|
+
libxmlStructuredErrorHandlerState handler_state;
|
55
|
+
VALUE rb_errors = rb_ary_new();
|
56
|
+
|
57
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
58
|
+
|
59
|
+
int result = xmlSwitchEncodingName(c_context, encoding_name);
|
60
|
+
|
61
|
+
noko__structured_error_func_restore(&handler_state);
|
62
|
+
|
63
|
+
if (result != 0) {
|
64
|
+
xmlFreeParserCtxt(c_context);
|
65
|
+
|
66
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
67
|
+
if (!NIL_P(exception)) {
|
68
|
+
rb_exc_raise(exception);
|
69
|
+
} else {
|
70
|
+
rb_raise(rb_eRuntimeError, "could not set encoding");
|
71
|
+
}
|
72
|
+
}
|
73
73
|
}
|
74
|
+
}
|
75
|
+
}
|
74
76
|
|
75
|
-
|
77
|
+
/* :nodoc: */
|
78
|
+
static VALUE
|
79
|
+
noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
|
80
|
+
{
|
81
|
+
if (!rb_respond_to(rb_io, id_read)) {
|
82
|
+
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
83
|
+
}
|
84
|
+
|
85
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
86
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
87
|
+
}
|
88
|
+
|
89
|
+
xmlParserCtxtPtr c_context =
|
90
|
+
xmlCreateIOParserCtxt(NULL, NULL,
|
91
|
+
(xmlInputReadCallback)noko_io_read,
|
92
|
+
(xmlInputCloseCallback)noko_io_close,
|
93
|
+
(void *)rb_io, XML_CHAR_ENCODING_NONE);
|
94
|
+
if (!c_context) {
|
95
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
96
|
+
}
|
97
|
+
|
98
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
99
|
+
|
100
|
+
if (c_context->sax) {
|
101
|
+
xmlFree(c_context->sax);
|
102
|
+
c_context->sax = NULL;
|
103
|
+
}
|
104
|
+
|
105
|
+
VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
106
|
+
rb_iv_set(rb_context, "@input", rb_io);
|
107
|
+
|
108
|
+
return rb_context;
|
76
109
|
}
|
77
110
|
|
111
|
+
/* :nodoc: */
|
78
112
|
static VALUE
|
79
|
-
|
113
|
+
noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
|
80
114
|
{
|
81
|
-
|
82
|
-
|
83
|
-
|
115
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
116
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
117
|
+
}
|
118
|
+
|
119
|
+
xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
|
120
|
+
if (!c_context) {
|
121
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
122
|
+
}
|
123
|
+
|
124
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
125
|
+
|
126
|
+
if (c_context->sax) {
|
127
|
+
xmlFree(c_context->sax);
|
128
|
+
c_context->sax = NULL;
|
129
|
+
}
|
130
|
+
|
131
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
84
132
|
}
|
85
133
|
|
134
|
+
/* :nodoc: */
|
86
135
|
static VALUE
|
87
|
-
|
136
|
+
noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
|
88
137
|
{
|
89
|
-
|
138
|
+
Check_Type(rb_input, T_STRING);
|
139
|
+
if (!(int)RSTRING_LEN(rb_input)) {
|
140
|
+
rb_raise(rb_eRuntimeError, "input string cannot be empty");
|
141
|
+
}
|
142
|
+
|
143
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
144
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
145
|
+
}
|
90
146
|
|
91
|
-
|
92
|
-
|
147
|
+
xmlParserCtxtPtr c_context =
|
148
|
+
xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
|
149
|
+
if (!c_context) {
|
150
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
151
|
+
}
|
93
152
|
|
94
|
-
|
95
|
-
|
153
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
154
|
+
|
155
|
+
if (c_context->sax) {
|
156
|
+
xmlFree(c_context->sax);
|
157
|
+
c_context->sax = NULL;
|
158
|
+
}
|
159
|
+
|
160
|
+
VALUE rb_context = noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
161
|
+
rb_iv_set(rb_context, "@input", rb_input);
|
162
|
+
|
163
|
+
return rb_context;
|
96
164
|
}
|
97
165
|
|
98
166
|
/*
|
@@ -100,84 +168,118 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
100
168
|
* parse_with(sax_handler)
|
101
169
|
*
|
102
170
|
* Use +sax_handler+ and parse the current document
|
171
|
+
*
|
172
|
+
* 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
|
173
|
+
* more convenient for most use cases.
|
103
174
|
*/
|
104
175
|
static VALUE
|
105
|
-
|
176
|
+
noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
|
106
177
|
{
|
107
|
-
|
108
|
-
|
178
|
+
xmlParserCtxtPtr c_context;
|
179
|
+
xmlSAXHandlerPtr sax;
|
109
180
|
|
110
|
-
|
111
|
-
|
181
|
+
if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
|
182
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
183
|
+
}
|
112
184
|
|
113
|
-
|
114
|
-
|
185
|
+
c_context = noko_xml_sax_parser_context_unwrap(rb_context);
|
186
|
+
sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
|
115
187
|
|
116
|
-
|
117
|
-
|
118
|
-
|
188
|
+
c_context->sax = sax;
|
189
|
+
c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
|
190
|
+
c_context->_private = (void *)rb_sax_parser;
|
119
191
|
|
120
|
-
|
121
|
-
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
192
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
122
193
|
|
123
|
-
|
194
|
+
/* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
|
195
|
+
* don't have any cleanup to do. The only memory we need to free is handled by
|
196
|
+
* xml_sax_parser_context_type_free */
|
197
|
+
xmlParseDocument(c_context);
|
124
198
|
|
125
|
-
|
199
|
+
return Qnil;
|
126
200
|
}
|
127
201
|
|
128
202
|
/*
|
129
203
|
* call-seq:
|
130
|
-
*
|
204
|
+
* replace_entities=(value)
|
205
|
+
*
|
206
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
207
|
+
*
|
208
|
+
* [Parameters]
|
209
|
+
* - +value+ (Boolean) Whether external parsed entities will be resolved.
|
210
|
+
*
|
211
|
+
* ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
|
212
|
+
* defaults to +false+ for this reason.
|
131
213
|
*
|
132
|
-
*
|
133
|
-
*
|
214
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
215
|
+
* replacement.
|
216
|
+
*
|
217
|
+
* [Example]
|
218
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
219
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
220
|
+
*
|
221
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
222
|
+
* parser.parse(xml) do |ctx|
|
223
|
+
* ctx.replace_entities = true # this is UNSAFE for untrusted documents!
|
224
|
+
* end
|
134
225
|
*/
|
135
|
-
static VALUE
|
226
|
+
static VALUE
|
227
|
+
noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
|
136
228
|
{
|
137
|
-
|
138
|
-
|
229
|
+
int error;
|
230
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
231
|
+
|
232
|
+
if (RB_TEST(rb_value)) {
|
233
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
|
234
|
+
} else {
|
235
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
|
236
|
+
}
|
139
237
|
|
140
|
-
if(
|
141
|
-
|
142
|
-
|
143
|
-
ctxt->replaceEntities = 1;
|
238
|
+
if (error) {
|
239
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
240
|
+
}
|
144
241
|
|
145
|
-
return
|
242
|
+
return rb_value;
|
146
243
|
}
|
147
244
|
|
148
245
|
/*
|
149
246
|
* call-seq:
|
150
|
-
*
|
247
|
+
* replace_entities
|
151
248
|
*
|
152
|
-
*
|
153
|
-
*
|
249
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
250
|
+
*
|
251
|
+
* [Returns] (Boolean) Value of the parse option. (Default +false+)
|
252
|
+
*
|
253
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
254
|
+
* replacement.
|
154
255
|
*/
|
155
|
-
static VALUE
|
256
|
+
static VALUE
|
257
|
+
noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
|
156
258
|
{
|
157
|
-
xmlParserCtxtPtr ctxt;
|
158
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
259
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
159
260
|
|
160
|
-
if(
|
161
|
-
return Qfalse;
|
162
|
-
else
|
261
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
|
163
262
|
return Qtrue;
|
263
|
+
} else {
|
264
|
+
return Qfalse;
|
265
|
+
}
|
164
266
|
}
|
165
267
|
|
166
268
|
/*
|
167
269
|
* call-seq: line
|
168
270
|
*
|
169
|
-
*
|
271
|
+
* [Returns] (Integer) the line number of the line being currently parsed.
|
170
272
|
*/
|
171
|
-
static VALUE
|
273
|
+
static VALUE
|
274
|
+
noko_xml_sax_parser_context__line(VALUE rb_context)
|
172
275
|
{
|
173
|
-
xmlParserCtxtPtr ctxt;
|
174
276
|
xmlParserInputPtr io;
|
175
|
-
|
176
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
277
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
177
278
|
|
178
279
|
io = ctxt->input;
|
179
|
-
if(io)
|
280
|
+
if (io) {
|
180
281
|
return INT2NUM(io->line);
|
282
|
+
}
|
181
283
|
|
182
284
|
return Qnil;
|
183
285
|
}
|
@@ -185,78 +287,110 @@ static VALUE line(VALUE self)
|
|
185
287
|
/*
|
186
288
|
* call-seq: column
|
187
289
|
*
|
188
|
-
*
|
290
|
+
* [Returns] (Integer) the column number of the column being currently parsed.
|
189
291
|
*/
|
190
|
-
static VALUE
|
292
|
+
static VALUE
|
293
|
+
noko_xml_sax_parser_context__column(VALUE rb_context)
|
191
294
|
{
|
192
|
-
xmlParserCtxtPtr ctxt;
|
295
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
193
296
|
xmlParserInputPtr io;
|
194
297
|
|
195
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
196
|
-
|
197
298
|
io = ctxt->input;
|
198
|
-
if(io)
|
299
|
+
if (io) {
|
199
300
|
return INT2NUM(io->col);
|
301
|
+
}
|
200
302
|
|
201
303
|
return Qnil;
|
202
304
|
}
|
203
305
|
|
204
306
|
/*
|
205
307
|
* call-seq:
|
206
|
-
*
|
308
|
+
* recovery=(value)
|
309
|
+
*
|
310
|
+
* Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
|
311
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
312
|
+
* parser will stop processing the file on the first parsing error.
|
313
|
+
*
|
314
|
+
* [Parameters]
|
315
|
+
* - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
|
316
|
+
*
|
317
|
+
* [Returns] (Boolean) The passed +value+.
|
207
318
|
*
|
208
|
-
*
|
209
|
-
*
|
319
|
+
* [Example]
|
320
|
+
* Because this class is generally not instantiated directly, you would typically set this option
|
321
|
+
* via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
|
322
|
+
*
|
323
|
+
* parser = Nokogiri::XML::SAX::Parser.new(document_handler)
|
324
|
+
* parser.parse(xml) do |ctx|
|
325
|
+
* ctx.recovery = true
|
326
|
+
* end
|
210
327
|
*/
|
211
|
-
static VALUE
|
328
|
+
static VALUE
|
329
|
+
noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
|
212
330
|
{
|
213
|
-
|
214
|
-
|
331
|
+
int error;
|
332
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
215
333
|
|
216
|
-
if(
|
217
|
-
|
218
|
-
else
|
219
|
-
|
334
|
+
if (RB_TEST(rb_value)) {
|
335
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
|
336
|
+
} else {
|
337
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
|
338
|
+
}
|
220
339
|
|
221
|
-
|
340
|
+
if (error) {
|
341
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
342
|
+
}
|
343
|
+
|
344
|
+
return rb_value;
|
222
345
|
}
|
223
346
|
|
224
347
|
/*
|
225
348
|
* call-seq:
|
226
|
-
*
|
349
|
+
* recovery
|
350
|
+
*
|
351
|
+
* Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
|
352
|
+
* invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
|
353
|
+
* parser will stop processing the file on the first parsing error.
|
227
354
|
*
|
228
|
-
*
|
229
|
-
*
|
355
|
+
* [Returns] (Boolean) Whether this parser will recover from parsing errors.
|
356
|
+
*
|
357
|
+
* Default is +false+ for XML and +true+ for HTML.
|
230
358
|
*/
|
231
|
-
static VALUE
|
359
|
+
static VALUE
|
360
|
+
noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
|
232
361
|
{
|
233
|
-
xmlParserCtxtPtr ctxt;
|
234
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
362
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
235
363
|
|
236
|
-
if(ctxt
|
237
|
-
return Qfalse;
|
238
|
-
else
|
364
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
|
239
365
|
return Qtrue;
|
366
|
+
} else {
|
367
|
+
return Qfalse;
|
368
|
+
}
|
240
369
|
}
|
241
370
|
|
242
|
-
void
|
371
|
+
void
|
372
|
+
noko_init_xml_sax_parser_context(void)
|
243
373
|
{
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
rb_define_singleton_method(
|
253
|
-
|
254
|
-
|
255
|
-
rb_define_method(
|
256
|
-
rb_define_method(
|
257
|
-
|
258
|
-
rb_define_method(
|
259
|
-
|
260
|
-
rb_define_method(
|
261
|
-
rb_define_method(
|
374
|
+
cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
|
375
|
+
|
376
|
+
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
377
|
+
|
378
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
|
379
|
+
noko_xml_sax_parser_context_s_native_io, 2);
|
380
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
|
381
|
+
noko_xml_sax_parser_context_s_native_memory, 2);
|
382
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
|
383
|
+
noko_xml_sax_parser_context_s_native_file, 2);
|
384
|
+
|
385
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
|
386
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
|
387
|
+
noko_xml_sax_parser_context__replace_entities_set, 1);
|
388
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
|
389
|
+
noko_xml_sax_parser_context__replace_entities_get, 0);
|
390
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
|
391
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
|
392
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
|
393
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
|
394
|
+
|
395
|
+
id_read = rb_intern("read");
|
262
396
|
}
|