nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -0,0 +1,96 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtml4SaxPushParser;
|
4
|
+
|
5
|
+
/*
|
6
|
+
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
7
|
+
*/
|
8
|
+
static VALUE
|
9
|
+
noko_html4_sax_push_parser__native_write(VALUE self, VALUE rb_chunk, VALUE rb_last_chunk)
|
10
|
+
{
|
11
|
+
xmlParserCtxtPtr ctx;
|
12
|
+
const char *chunk = NULL;
|
13
|
+
int size = 0;
|
14
|
+
int status = 0;
|
15
|
+
libxmlStructuredErrorHandlerState handler_state;
|
16
|
+
|
17
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
18
|
+
|
19
|
+
if (Qnil != rb_chunk) {
|
20
|
+
chunk = StringValuePtr(rb_chunk);
|
21
|
+
size = (int)RSTRING_LEN(rb_chunk);
|
22
|
+
}
|
23
|
+
|
24
|
+
noko__structured_error_func_save_and_set(&handler_state, NULL, NULL);
|
25
|
+
|
26
|
+
status = htmlParseChunk(ctx, chunk, size, Qtrue == rb_last_chunk ? 1 : 0);
|
27
|
+
|
28
|
+
noko__structured_error_func_restore(&handler_state);
|
29
|
+
|
30
|
+
if ((status != 0) && !(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) {
|
31
|
+
// TODO: there appear to be no tests for this block
|
32
|
+
xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
|
33
|
+
noko__error_raise(NULL, e);
|
34
|
+
}
|
35
|
+
|
36
|
+
return self;
|
37
|
+
}
|
38
|
+
|
39
|
+
/*
|
40
|
+
* Initialize the push parser with +xml_sax+ using +filename+
|
41
|
+
*/
|
42
|
+
static VALUE
|
43
|
+
noko_html4_sax_push_parser__initialize_native(
|
44
|
+
VALUE self,
|
45
|
+
VALUE rb_xml_sax,
|
46
|
+
VALUE rb_filename,
|
47
|
+
VALUE encoding
|
48
|
+
)
|
49
|
+
{
|
50
|
+
htmlSAXHandlerPtr sax;
|
51
|
+
const char *filename = NULL;
|
52
|
+
htmlParserCtxtPtr ctx;
|
53
|
+
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
54
|
+
|
55
|
+
sax = noko_xml_sax_parser_unwrap(rb_xml_sax);
|
56
|
+
|
57
|
+
if (rb_filename != Qnil) { filename = StringValueCStr(rb_filename); }
|
58
|
+
|
59
|
+
if (!NIL_P(encoding)) {
|
60
|
+
enc = xmlParseCharEncoding(StringValueCStr(encoding));
|
61
|
+
if (enc == XML_CHAR_ENCODING_ERROR) {
|
62
|
+
rb_raise(rb_eArgError, "Unsupported Encoding");
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
ctx = htmlCreatePushParserCtxt(
|
67
|
+
sax,
|
68
|
+
NULL,
|
69
|
+
NULL,
|
70
|
+
0,
|
71
|
+
filename,
|
72
|
+
enc
|
73
|
+
);
|
74
|
+
if (ctx == NULL) {
|
75
|
+
rb_raise(rb_eRuntimeError, "Could not create a parser context");
|
76
|
+
}
|
77
|
+
|
78
|
+
ctx->userData = ctx;
|
79
|
+
ctx->_private = (void *)rb_xml_sax;
|
80
|
+
|
81
|
+
DATA_PTR(self) = ctx;
|
82
|
+
return self;
|
83
|
+
}
|
84
|
+
|
85
|
+
void
|
86
|
+
noko_init_html_sax_push_parser(void)
|
87
|
+
{
|
88
|
+
assert(cNokogiriXmlSaxPushParser);
|
89
|
+
cNokogiriHtml4SaxPushParser =
|
90
|
+
rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
|
91
|
+
|
92
|
+
rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native",
|
93
|
+
noko_html4_sax_push_parser__initialize_native, 3);
|
94
|
+
rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write",
|
95
|
+
noko_html4_sax_push_parser__native_write, 2);
|
96
|
+
}
|
@@ -0,0 +1,114 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
#ifndef HAVE_XMLCTXTSETOPTIONS
|
4
|
+
/* based on libxml2-2.14.0-dev (1d8bd126) parser.c xmlCtxtSetInternalOptions */
|
5
|
+
int
|
6
|
+
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
|
7
|
+
{
|
8
|
+
int keepMask = 0;
|
9
|
+
int allMask;
|
10
|
+
|
11
|
+
if (ctxt == NULL) {
|
12
|
+
return (-1);
|
13
|
+
}
|
14
|
+
|
15
|
+
/*
|
16
|
+
* XInclude options aren't handled by the parser.
|
17
|
+
*
|
18
|
+
* XML_PARSE_XINCLUDE
|
19
|
+
* XML_PARSE_NOXINCNODE
|
20
|
+
* XML_PARSE_NOBASEFIX
|
21
|
+
*/
|
22
|
+
allMask = XML_PARSE_RECOVER |
|
23
|
+
XML_PARSE_NOENT |
|
24
|
+
XML_PARSE_DTDLOAD |
|
25
|
+
XML_PARSE_DTDATTR |
|
26
|
+
XML_PARSE_DTDVALID |
|
27
|
+
XML_PARSE_NOERROR |
|
28
|
+
XML_PARSE_NOWARNING |
|
29
|
+
XML_PARSE_PEDANTIC |
|
30
|
+
XML_PARSE_NOBLANKS |
|
31
|
+
#ifdef LIBXML_SAX1_ENABLED
|
32
|
+
XML_PARSE_SAX1 |
|
33
|
+
#endif
|
34
|
+
XML_PARSE_NONET |
|
35
|
+
XML_PARSE_NODICT |
|
36
|
+
XML_PARSE_NSCLEAN |
|
37
|
+
XML_PARSE_NOCDATA |
|
38
|
+
XML_PARSE_COMPACT |
|
39
|
+
XML_PARSE_OLD10 |
|
40
|
+
XML_PARSE_HUGE |
|
41
|
+
XML_PARSE_OLDSAX |
|
42
|
+
XML_PARSE_IGNORE_ENC |
|
43
|
+
XML_PARSE_BIG_LINES;
|
44
|
+
|
45
|
+
ctxt->options = (ctxt->options & keepMask) | (options & allMask);
|
46
|
+
|
47
|
+
/*
|
48
|
+
* For some options, struct members are historically the source
|
49
|
+
* of truth. The values are initalized from global variables and
|
50
|
+
* old code could also modify them directly. Several older API
|
51
|
+
* functions that don't take an options argument rely on these
|
52
|
+
* deprecated mechanisms.
|
53
|
+
*
|
54
|
+
* Once public access to struct members and the globals are
|
55
|
+
* disabled, we can use the options bitmask as source of
|
56
|
+
* truth, making all these struct members obsolete.
|
57
|
+
*
|
58
|
+
* The XML_DETECT_IDS flags is misnamed. It simply enables
|
59
|
+
* loading of the external subset.
|
60
|
+
*/
|
61
|
+
ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
|
62
|
+
ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
|
63
|
+
ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
|
64
|
+
ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
|
65
|
+
ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
|
66
|
+
ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
|
67
|
+
ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
|
68
|
+
ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
|
69
|
+
|
70
|
+
/*
|
71
|
+
* Changing SAX callbacks is a bad idea. This should be fixed.
|
72
|
+
*/
|
73
|
+
if (options & XML_PARSE_NOBLANKS) {
|
74
|
+
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
|
75
|
+
}
|
76
|
+
if (options & XML_PARSE_NOCDATA) {
|
77
|
+
ctxt->sax->cdataBlock = NULL;
|
78
|
+
}
|
79
|
+
if (options & XML_PARSE_HUGE) {
|
80
|
+
if (ctxt->dict != NULL) {
|
81
|
+
xmlDictSetLimit(ctxt->dict, 0);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
ctxt->linenumbers = 1;
|
86
|
+
|
87
|
+
return (options & ~allMask);
|
88
|
+
}
|
89
|
+
#endif
|
90
|
+
|
91
|
+
#ifndef HAVE_XMLCTXTGETOPTIONS
|
92
|
+
int
|
93
|
+
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
|
94
|
+
{
|
95
|
+
return (ctxt->options);
|
96
|
+
}
|
97
|
+
#endif
|
98
|
+
|
99
|
+
#ifndef HAVE_XMLSWITCHENCODINGNAME
|
100
|
+
int
|
101
|
+
xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding)
|
102
|
+
{
|
103
|
+
if (ctxt == NULL) {
|
104
|
+
return (-1);
|
105
|
+
}
|
106
|
+
|
107
|
+
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
|
108
|
+
if (handler == NULL) {
|
109
|
+
return (-1);
|
110
|
+
}
|
111
|
+
|
112
|
+
return (xmlSwitchToEncoding(ctxt, handler));
|
113
|
+
}
|
114
|
+
#endif
|
data/ext/nokogiri/nokogiri.c
CHANGED
@@ -1,101 +1,225 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE mNokogiri ;
|
4
|
+
VALUE mNokogiriGumbo ;
|
5
|
+
VALUE mNokogiriHtml4 ;
|
6
|
+
VALUE mNokogiriHtml4Sax ;
|
7
|
+
VALUE mNokogiriHtml5 ;
|
4
8
|
VALUE mNokogiriXml ;
|
5
|
-
VALUE mNokogiriHtml ;
|
6
|
-
VALUE mNokogiriXslt ;
|
7
9
|
VALUE mNokogiriXmlSax ;
|
8
|
-
VALUE
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
10
|
+
VALUE mNokogiriXmlXpath ;
|
11
|
+
VALUE mNokogiriXslt ;
|
12
|
+
|
13
|
+
VALUE cNokogiriSyntaxError;
|
14
|
+
VALUE cNokogiriXmlCharacterData;
|
15
|
+
VALUE cNokogiriXmlElement;
|
16
|
+
VALUE cNokogiriXmlXpathSyntaxError;
|
17
|
+
|
18
|
+
void noko_init_xml_attr(void);
|
19
|
+
void noko_init_xml_attribute_decl(void);
|
20
|
+
void noko_init_xml_cdata(void);
|
21
|
+
void noko_init_xml_comment(void);
|
22
|
+
void noko_init_xml_document(void);
|
23
|
+
void noko_init_xml_document_fragment(void);
|
24
|
+
void noko_init_xml_dtd(void);
|
25
|
+
void noko_init_xml_element_content(void);
|
26
|
+
void noko_init_xml_element_decl(void);
|
27
|
+
void noko_init_xml_encoding_handler(void);
|
28
|
+
void noko_init_xml_entity_decl(void);
|
29
|
+
void noko_init_xml_entity_reference(void);
|
30
|
+
void noko_init_xml_namespace(void);
|
31
|
+
void noko_init_xml_node(void);
|
32
|
+
void noko_init_xml_node_set(void);
|
33
|
+
void noko_init_xml_processing_instruction(void);
|
34
|
+
void noko_init_xml_reader(void);
|
35
|
+
void noko_init_xml_relax_ng(void);
|
36
|
+
void noko_init_xml_sax_parser(void);
|
37
|
+
void noko_init_xml_sax_parser_context(void);
|
38
|
+
void noko_init_xml_sax_push_parser(void);
|
39
|
+
void noko_init_xml_schema(void);
|
40
|
+
void noko_init_xml_syntax_error(void);
|
41
|
+
void noko_init_xml_text(void);
|
42
|
+
void noko_init_xml_xpath_context(void);
|
43
|
+
void noko_init_xslt_stylesheet(void);
|
44
|
+
void noko_init_html_document(void);
|
45
|
+
void noko_init_html_element_description(void);
|
46
|
+
void noko_init_html_entity_lookup(void);
|
47
|
+
void noko_init_html_sax_parser_context(void);
|
48
|
+
void noko_init_html_sax_push_parser(void);
|
49
|
+
void noko_init_html4_sax_parser(void);
|
50
|
+
void noko_init_gumbo(void);
|
51
|
+
void noko_init_test_global_handlers(void);
|
52
|
+
|
53
|
+
static ID id_read, id_write, id_external_encoding;
|
54
|
+
|
55
|
+
|
56
|
+
static VALUE
|
57
|
+
noko_io_read_check(VALUE val)
|
16
58
|
{
|
17
|
-
|
18
|
-
|
19
|
-
* So we use a one byte buffer instead.
|
20
|
-
*/
|
21
|
-
char tmp[1];
|
22
|
-
int len = vsnprintf (tmp, 1, fmt, ap) + 1;
|
23
|
-
char *res = (char *)malloc((unsigned int)len);
|
24
|
-
if (res == NULL)
|
25
|
-
return -1;
|
26
|
-
*strp = res;
|
27
|
-
return vsnprintf(res, (unsigned int)len, fmt, ap);
|
59
|
+
VALUE *args = (VALUE *)val;
|
60
|
+
return rb_funcall(args[0], id_read, 1, args[1]);
|
28
61
|
}
|
29
|
-
#endif
|
30
62
|
|
31
|
-
|
63
|
+
|
64
|
+
static VALUE
|
65
|
+
noko_io_read_failed(VALUE arg, VALUE exc)
|
32
66
|
{
|
33
|
-
|
67
|
+
return Qundef;
|
34
68
|
}
|
35
69
|
|
36
|
-
#ifdef HAVE_RUBY_UTIL_H
|
37
|
-
#include "ruby/util.h"
|
38
|
-
#else
|
39
|
-
#include "util.h"
|
40
|
-
#endif
|
41
70
|
|
42
|
-
|
71
|
+
int
|
72
|
+
noko_io_read(void *io, char *c_buffer, int c_buffer_len)
|
73
|
+
{
|
74
|
+
VALUE rb_io = (VALUE)io;
|
75
|
+
VALUE rb_read_string, rb_args[2];
|
76
|
+
size_t n_bytes_read, safe_len;
|
77
|
+
|
78
|
+
rb_args[0] = rb_io;
|
79
|
+
rb_args[1] = INT2NUM(c_buffer_len);
|
80
|
+
|
81
|
+
rb_read_string = rb_rescue(noko_io_read_check, (VALUE)rb_args, noko_io_read_failed, 0);
|
82
|
+
|
83
|
+
if (NIL_P(rb_read_string)) { return 0; }
|
84
|
+
if (rb_read_string == Qundef) { return -1; }
|
85
|
+
if (TYPE(rb_read_string) != T_STRING) { return -1; }
|
86
|
+
|
87
|
+
n_bytes_read = (size_t)RSTRING_LEN(rb_read_string);
|
88
|
+
safe_len = (n_bytes_read > (size_t)c_buffer_len) ? (size_t)c_buffer_len : n_bytes_read;
|
89
|
+
memcpy(c_buffer, StringValuePtr(rb_read_string), safe_len);
|
90
|
+
|
91
|
+
return (int)safe_len;
|
92
|
+
}
|
93
|
+
|
94
|
+
|
95
|
+
static VALUE
|
96
|
+
noko_io_write_check(VALUE rb_args)
|
43
97
|
{
|
44
|
-
|
45
|
-
|
98
|
+
VALUE rb_io = ((VALUE *)rb_args)[0];
|
99
|
+
VALUE rb_output = ((VALUE *)rb_args)[1];
|
100
|
+
return rb_funcall(rb_io, id_write, 1, rb_output);
|
101
|
+
}
|
46
102
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
103
|
+
|
104
|
+
static VALUE
|
105
|
+
noko_io_write_failed(VALUE arg, VALUE exc)
|
106
|
+
{
|
107
|
+
return Qundef;
|
51
108
|
}
|
52
109
|
|
53
|
-
|
110
|
+
|
111
|
+
int
|
112
|
+
noko_io_write(void *io, char *c_buffer, int c_buffer_len)
|
54
113
|
{
|
55
|
-
|
114
|
+
VALUE rb_args[2], rb_n_bytes_written;
|
115
|
+
VALUE rb_io = (VALUE)io;
|
116
|
+
VALUE rb_enc = Qnil;
|
117
|
+
rb_encoding *io_encoding;
|
118
|
+
|
119
|
+
if (rb_respond_to(rb_io, id_external_encoding)) {
|
120
|
+
rb_enc = rb_funcall(rb_io, id_external_encoding, 0);
|
121
|
+
}
|
122
|
+
io_encoding = RB_NIL_P(rb_enc) ? rb_ascii8bit_encoding() : rb_to_encoding(rb_enc);
|
123
|
+
|
124
|
+
rb_args[0] = rb_io;
|
125
|
+
rb_args[1] = rb_enc_str_new(c_buffer, (long)c_buffer_len, io_encoding);
|
126
|
+
|
127
|
+
rb_n_bytes_written = rb_rescue(noko_io_write_check, (VALUE)rb_args, noko_io_write_failed, 0);
|
128
|
+
if (rb_n_bytes_written == Qundef) { return -1; }
|
56
129
|
|
57
|
-
|
58
|
-
tuple = (nokogiriTuplePtr)doc->_private;
|
59
|
-
st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
|
130
|
+
return NUM2INT(rb_n_bytes_written);
|
60
131
|
}
|
61
132
|
|
62
|
-
|
133
|
+
|
134
|
+
int
|
135
|
+
noko_io_close(void *io)
|
63
136
|
{
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
(xmlReallocFunc)ruby_xrealloc,
|
68
|
-
ruby_strdup
|
69
|
-
);
|
137
|
+
return 0;
|
138
|
+
}
|
139
|
+
|
70
140
|
|
141
|
+
#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
142
|
+
# define NOKOGIRI_WINDOWS_DLLS 1
|
143
|
+
#else
|
144
|
+
# define NOKOGIRI_WINDOWS_DLLS 0
|
145
|
+
#endif
|
146
|
+
|
147
|
+
//
|
148
|
+
// | dlls || true | false |
|
149
|
+
// | nlmm || | |
|
150
|
+
// |-----------++---------+---------|
|
151
|
+
// | NULL || default | ruby |
|
152
|
+
// | "random" || default | ruby |
|
153
|
+
// | "ruby" || ruby | ruby |
|
154
|
+
// | "default" || default | default |
|
155
|
+
//
|
156
|
+
// We choose *not* to use Ruby's memory management functions with windows DLLs because of this
|
157
|
+
// issue: https://github.com/sparklemotion/nokogiri/issues/2241
|
158
|
+
//
|
159
|
+
static void
|
160
|
+
set_libxml_memory_management(void)
|
161
|
+
{
|
162
|
+
const char *nlmm = getenv("NOKOGIRI_LIBXML_MEMORY_MANAGEMENT");
|
163
|
+
if (nlmm) {
|
164
|
+
if (strcmp(nlmm, "default") == 0) {
|
165
|
+
goto libxml_uses_default_memory_management;
|
166
|
+
} else if (strcmp(nlmm, "ruby") == 0) {
|
167
|
+
goto libxml_uses_ruby_memory_management;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
if (NOKOGIRI_WINDOWS_DLLS) {
|
171
|
+
libxml_uses_default_memory_management:
|
172
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
|
173
|
+
return;
|
174
|
+
} else {
|
175
|
+
libxml_uses_ruby_memory_management:
|
176
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
|
177
|
+
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
178
|
+
return;
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
|
183
|
+
void
|
184
|
+
Init_nokogiri(void)
|
185
|
+
{
|
71
186
|
mNokogiri = rb_define_module("Nokogiri");
|
187
|
+
mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
|
188
|
+
mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
|
189
|
+
mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
|
190
|
+
mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
|
72
191
|
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
|
73
|
-
mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
|
74
|
-
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
75
192
|
mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
rb_const_set(mNokogiri, rb_intern("
|
90
|
-
|
91
|
-
|
92
|
-
|
193
|
+
mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
|
194
|
+
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
195
|
+
|
196
|
+
set_libxml_memory_management(); /* must be before any function calls that might invoke xmlInitParser() */
|
197
|
+
xmlInitParser();
|
198
|
+
exsltRegisterAll();
|
199
|
+
|
200
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
|
201
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
|
202
|
+
|
203
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
|
204
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
|
205
|
+
|
206
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_ZLIB_ENABLED"),
|
207
|
+
xmlHasFeature(XML_WITH_ZLIB) == 1 ? Qtrue : Qfalse);
|
208
|
+
|
209
|
+
#ifdef NOKOGIRI_PACKAGED_LIBRARIES
|
210
|
+
rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
|
211
|
+
# ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
|
212
|
+
rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qtrue);
|
213
|
+
# else
|
214
|
+
rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
|
215
|
+
# endif
|
216
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
|
217
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
|
93
218
|
#else
|
94
|
-
rb_const_set(mNokogiri, rb_intern("
|
95
|
-
rb_const_set(mNokogiri, rb_intern("
|
96
|
-
rb_const_set(mNokogiri, rb_intern("
|
97
|
-
rb_const_set(mNokogiri, rb_intern("
|
98
|
-
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), Qnil);
|
219
|
+
rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qfalse);
|
220
|
+
rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
|
221
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), Qnil);
|
222
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), Qnil);
|
99
223
|
#endif
|
100
224
|
|
101
225
|
#ifdef LIBXML_ICONV_ENABLED
|
@@ -104,38 +228,67 @@ void Init_nokogiri()
|
|
104
228
|
rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
|
105
229
|
#endif
|
106
230
|
|
107
|
-
|
231
|
+
#ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS
|
232
|
+
rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
|
233
|
+
#endif
|
234
|
+
|
235
|
+
if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
|
236
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
|
237
|
+
} else {
|
238
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse);
|
239
|
+
}
|
240
|
+
|
241
|
+
cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError);
|
242
|
+
noko_init_xml_syntax_error();
|
243
|
+
assert(cNokogiriXmlSyntaxError);
|
244
|
+
cNokogiriXmlXpathSyntaxError = rb_define_class_under(mNokogiriXmlXpath, "SyntaxError", cNokogiriXmlSyntaxError);
|
245
|
+
|
246
|
+
noko_init_xml_element_content();
|
247
|
+
noko_init_xml_encoding_handler();
|
248
|
+
noko_init_xml_namespace();
|
249
|
+
noko_init_xml_node_set();
|
250
|
+
noko_init_xml_reader();
|
251
|
+
|
252
|
+
noko_init_xml_sax_parser();
|
253
|
+
noko_init_html4_sax_parser();
|
254
|
+
|
255
|
+
noko_init_xml_xpath_context();
|
256
|
+
noko_init_xslt_stylesheet();
|
257
|
+
noko_init_html_element_description();
|
258
|
+
noko_init_html_entity_lookup();
|
259
|
+
|
260
|
+
noko_init_xml_schema();
|
261
|
+
noko_init_xml_relax_ng();
|
262
|
+
|
263
|
+
noko_init_xml_sax_parser_context();
|
264
|
+
noko_init_html_sax_parser_context();
|
265
|
+
|
266
|
+
noko_init_xml_sax_push_parser();
|
267
|
+
noko_init_html_sax_push_parser();
|
268
|
+
|
269
|
+
noko_init_xml_node();
|
270
|
+
noko_init_xml_attr();
|
271
|
+
noko_init_xml_attribute_decl();
|
272
|
+
noko_init_xml_dtd();
|
273
|
+
noko_init_xml_element_decl();
|
274
|
+
noko_init_xml_entity_decl();
|
275
|
+
noko_init_xml_entity_reference();
|
276
|
+
noko_init_xml_processing_instruction();
|
277
|
+
assert(cNokogiriXmlNode);
|
278
|
+
cNokogiriXmlElement = rb_define_class_under(mNokogiriXml, "Element", cNokogiriXmlNode);
|
279
|
+
cNokogiriXmlCharacterData = rb_define_class_under(mNokogiriXml, "CharacterData", cNokogiriXmlNode);
|
280
|
+
noko_init_xml_comment();
|
281
|
+
noko_init_xml_text();
|
282
|
+
noko_init_xml_cdata();
|
283
|
+
|
284
|
+
noko_init_xml_document_fragment();
|
285
|
+
noko_init_xml_document();
|
286
|
+
noko_init_html_document();
|
287
|
+
noko_init_gumbo();
|
288
|
+
|
289
|
+
noko_init_test_global_handlers();
|
108
290
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
init_xml_document_fragment();
|
113
|
-
init_xml_text();
|
114
|
-
init_xml_cdata();
|
115
|
-
init_xml_processing_instruction();
|
116
|
-
init_xml_attr();
|
117
|
-
init_xml_entity_reference();
|
118
|
-
init_xml_comment();
|
119
|
-
init_xml_node_set();
|
120
|
-
init_xml_xpath_context();
|
121
|
-
init_xml_sax_parser_context();
|
122
|
-
init_xml_sax_parser();
|
123
|
-
init_xml_sax_push_parser();
|
124
|
-
init_xml_reader();
|
125
|
-
init_xml_dtd();
|
126
|
-
init_xml_element_content();
|
127
|
-
init_xml_attribute_decl();
|
128
|
-
init_xml_element_decl();
|
129
|
-
init_xml_entity_decl();
|
130
|
-
init_xml_namespace();
|
131
|
-
init_html_sax_parser_context();
|
132
|
-
init_html_sax_push_parser();
|
133
|
-
init_xslt_stylesheet();
|
134
|
-
init_xml_syntax_error();
|
135
|
-
init_html_entity_lookup();
|
136
|
-
init_html_element_description();
|
137
|
-
init_xml_schema();
|
138
|
-
init_xml_relax_ng();
|
139
|
-
init_nokogiri_io();
|
140
|
-
init_xml_encoding_handler();
|
291
|
+
id_read = rb_intern("read");
|
292
|
+
id_write = rb_intern("write");
|
293
|
+
id_external_encoding = rb_intern("external_encoding");
|
141
294
|
}
|