nokogiri 1.11.2 → 1.12.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +7 -6
- data/dependencies.yml +12 -12
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +185 -103
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +6 -5
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +70 -38
- data/ext/nokogiri/nokogiri.h +19 -9
- data/ext/nokogiri/xml_document.c +50 -49
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_encoding_handler.c +11 -6
- data/ext/nokogiri/xml_namespace.c +4 -2
- data/ext/nokogiri/xml_node.c +147 -133
- data/ext/nokogiri/xml_node_set.c +20 -20
- data/ext/nokogiri/xml_reader.c +2 -0
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +2 -0
- data/ext/nokogiri/xml_schema.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +67 -65
- data/ext/nokogiri/xslt_stylesheet.c +2 -1
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/extension.rb +7 -2
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +12 -2
- data/lib/nokogiri/xml/builder.rb +38 -0
- data/lib/nokogiri/xml/document.rb +89 -17
- data/lib/nokogiri/xml/node/save_options.rb +1 -1
- data/lib/nokogiri/xml/node.rb +7 -5
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/xpath.rb +2 -2
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri.rb +31 -29
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +0 -0
- data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- metadata +110 -69
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
- data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -1,17 +1,17 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
-
static VALUE
|
3
|
+
static VALUE cNokogiriHtml4EntityLookup;
|
4
4
|
|
5
5
|
/*
|
6
6
|
* call-seq:
|
7
7
|
* get(key)
|
8
8
|
*
|
9
|
-
* Get the
|
9
|
+
* Get the HTML4::EntityDescription for +key+
|
10
10
|
*/
|
11
11
|
static VALUE
|
12
12
|
get(VALUE _, VALUE rb_entity_name)
|
13
13
|
{
|
14
|
-
VALUE
|
14
|
+
VALUE cNokogiriHtml4EntityDescription;
|
15
15
|
const htmlEntityDesc *c_entity_desc;
|
16
16
|
VALUE rb_constructor_args[3];
|
17
17
|
|
@@ -24,14 +24,14 @@ get(VALUE _, VALUE rb_entity_name)
|
|
24
24
|
rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
|
25
25
|
rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
|
26
26
|
|
27
|
-
|
28
|
-
return rb_class_new_instance(3, rb_constructor_args,
|
27
|
+
cNokogiriHtml4EntityDescription = rb_const_get_at(mNokogiriHtml4, rb_intern("EntityDescription"));
|
28
|
+
return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtml4EntityDescription);
|
29
29
|
}
|
30
30
|
|
31
31
|
void
|
32
32
|
noko_init_html_entity_lookup()
|
33
33
|
{
|
34
|
-
|
34
|
+
cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject);
|
35
35
|
|
36
|
-
rb_define_method(
|
36
|
+
rb_define_method(cNokogiriHtml4EntityLookup, "get", get, 1);
|
37
37
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
-
VALUE
|
3
|
+
VALUE cNokogiriHtml4SaxParserContext ;
|
4
4
|
|
5
5
|
static void
|
6
6
|
deallocate(xmlParserCtxtPtr ctxt)
|
@@ -110,10 +110,11 @@ void
|
|
110
110
|
noko_init_html_sax_parser_context()
|
111
111
|
{
|
112
112
|
assert(cNokogiriXmlSaxParserContext);
|
113
|
-
|
113
|
+
cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext",
|
114
|
+
cNokogiriXmlSaxParserContext);
|
114
115
|
|
115
|
-
rb_define_singleton_method(
|
116
|
-
rb_define_singleton_method(
|
116
|
+
rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "memory", parse_memory, 2);
|
117
|
+
rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "file", parse_file, 2);
|
117
118
|
|
118
|
-
rb_define_method(
|
119
|
+
rb_define_method(cNokogiriHtml4SaxParserContext, "parse_with", parse_with, 1);
|
119
120
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
-
VALUE
|
3
|
+
VALUE cNokogiriHtml4SaxPushParser;
|
4
4
|
|
5
5
|
/*
|
6
6
|
* call-seq:
|
@@ -88,8 +88,8 @@ void
|
|
88
88
|
noko_init_html_sax_push_parser()
|
89
89
|
{
|
90
90
|
assert(cNokogiriXmlSaxPushParser);
|
91
|
-
|
91
|
+
cNokogiriHtml4SaxPushParser = rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
|
92
92
|
|
93
|
-
rb_define_private_method(
|
94
|
-
rb_define_private_method(
|
93
|
+
rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native", initialize_native, 3);
|
94
|
+
rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write", native_write, 2);
|
95
95
|
}
|
@@ -20,14 +20,14 @@ xmlFirstElementChild(xmlNodePtr parent)
|
|
20
20
|
return (NULL);
|
21
21
|
}
|
22
22
|
switch (parent->type) {
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
23
|
+
case XML_ELEMENT_NODE:
|
24
|
+
case XML_ENTITY_NODE:
|
25
|
+
case XML_DOCUMENT_NODE:
|
26
|
+
case XML_HTML_DOCUMENT_NODE:
|
27
|
+
cur = parent->children;
|
28
|
+
break;
|
29
|
+
default:
|
30
|
+
return (NULL);
|
31
31
|
}
|
32
32
|
while (cur != NULL) {
|
33
33
|
if (cur->type == XML_ELEMENT_NODE) {
|
@@ -57,20 +57,20 @@ xmlNextElementSibling(xmlNodePtr node)
|
|
57
57
|
return (NULL);
|
58
58
|
}
|
59
59
|
switch (node->type) {
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
60
|
+
case XML_ELEMENT_NODE:
|
61
|
+
case XML_TEXT_NODE:
|
62
|
+
case XML_CDATA_SECTION_NODE:
|
63
|
+
case XML_ENTITY_REF_NODE:
|
64
|
+
case XML_ENTITY_NODE:
|
65
|
+
case XML_PI_NODE:
|
66
|
+
case XML_COMMENT_NODE:
|
67
|
+
case XML_DTD_NODE:
|
68
|
+
case XML_XINCLUDE_START:
|
69
|
+
case XML_XINCLUDE_END:
|
70
|
+
node = node->next;
|
71
|
+
break;
|
72
|
+
default:
|
73
|
+
return (NULL);
|
74
74
|
}
|
75
75
|
while (node != NULL) {
|
76
76
|
if (node->type == XML_ELEMENT_NODE) {
|
@@ -101,14 +101,14 @@ xmlLastElementChild(xmlNodePtr parent)
|
|
101
101
|
return (NULL);
|
102
102
|
}
|
103
103
|
switch (parent->type) {
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
104
|
+
case XML_ELEMENT_NODE:
|
105
|
+
case XML_ENTITY_NODE:
|
106
|
+
case XML_DOCUMENT_NODE:
|
107
|
+
case XML_HTML_DOCUMENT_NODE:
|
108
|
+
cur = parent->last;
|
109
|
+
break;
|
110
|
+
default:
|
111
|
+
return (NULL);
|
112
112
|
}
|
113
113
|
while (cur != NULL) {
|
114
114
|
if (cur->type == XML_ELEMENT_NODE) {
|
data/ext/nokogiri/nokogiri.c
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE mNokogiri ;
|
4
|
-
VALUE
|
5
|
-
VALUE
|
4
|
+
VALUE mNokogiriGumbo ;
|
5
|
+
VALUE mNokogiriHtml4 ;
|
6
|
+
VALUE mNokogiriHtml4Sax ;
|
7
|
+
VALUE mNokogiriHtml5 ;
|
6
8
|
VALUE mNokogiriXml ;
|
7
9
|
VALUE mNokogiriXmlSax ;
|
8
10
|
VALUE mNokogiriXmlXpath ;
|
@@ -13,38 +15,39 @@ VALUE cNokogiriXmlCharacterData;
|
|
13
15
|
VALUE cNokogiriXmlElement;
|
14
16
|
VALUE cNokogiriXmlXpathSyntaxError;
|
15
17
|
|
16
|
-
void noko_init_xml_attr();
|
17
|
-
void noko_init_xml_attribute_decl();
|
18
|
-
void noko_init_xml_cdata();
|
19
|
-
void noko_init_xml_comment();
|
20
|
-
void noko_init_xml_document();
|
21
|
-
void noko_init_xml_document_fragment();
|
22
|
-
void noko_init_xml_dtd();
|
23
|
-
void noko_init_xml_element_content();
|
24
|
-
void noko_init_xml_element_decl();
|
25
|
-
void noko_init_xml_encoding_handler();
|
26
|
-
void noko_init_xml_entity_decl();
|
27
|
-
void noko_init_xml_entity_reference();
|
28
|
-
void noko_init_xml_namespace();
|
29
|
-
void noko_init_xml_node();
|
30
|
-
void noko_init_xml_node_set();
|
31
|
-
void noko_init_xml_processing_instruction();
|
32
|
-
void noko_init_xml_reader();
|
33
|
-
void noko_init_xml_relax_ng();
|
34
|
-
void noko_init_xml_sax_parser();
|
35
|
-
void noko_init_xml_sax_parser_context();
|
36
|
-
void noko_init_xml_sax_push_parser();
|
37
|
-
void noko_init_xml_schema();
|
38
|
-
void noko_init_xml_syntax_error();
|
39
|
-
void noko_init_xml_text();
|
40
|
-
void noko_init_xml_xpath_context();
|
41
|
-
void noko_init_xslt_stylesheet();
|
42
|
-
void noko_init_html_document();
|
43
|
-
void noko_init_html_element_description();
|
44
|
-
void noko_init_html_entity_lookup();
|
45
|
-
void noko_init_html_sax_parser_context();
|
46
|
-
void noko_init_html_sax_push_parser();
|
47
|
-
void
|
18
|
+
void noko_init_xml_attr(void);
|
19
|
+
void noko_init_xml_attribute_decl(void);
|
20
|
+
void noko_init_xml_cdata(void);
|
21
|
+
void noko_init_xml_comment(void);
|
22
|
+
void noko_init_xml_document(void);
|
23
|
+
void noko_init_xml_document_fragment(void);
|
24
|
+
void noko_init_xml_dtd(void);
|
25
|
+
void noko_init_xml_element_content(void);
|
26
|
+
void noko_init_xml_element_decl(void);
|
27
|
+
void noko_init_xml_encoding_handler(void);
|
28
|
+
void noko_init_xml_entity_decl(void);
|
29
|
+
void noko_init_xml_entity_reference(void);
|
30
|
+
void noko_init_xml_namespace(void);
|
31
|
+
void noko_init_xml_node(void);
|
32
|
+
void noko_init_xml_node_set(void);
|
33
|
+
void noko_init_xml_processing_instruction(void);
|
34
|
+
void noko_init_xml_reader(void);
|
35
|
+
void noko_init_xml_relax_ng(void);
|
36
|
+
void noko_init_xml_sax_parser(void);
|
37
|
+
void noko_init_xml_sax_parser_context(void);
|
38
|
+
void noko_init_xml_sax_push_parser(void);
|
39
|
+
void noko_init_xml_schema(void);
|
40
|
+
void noko_init_xml_syntax_error(void);
|
41
|
+
void noko_init_xml_text(void);
|
42
|
+
void noko_init_xml_xpath_context(void);
|
43
|
+
void noko_init_xslt_stylesheet(void);
|
44
|
+
void noko_init_html_document(void);
|
45
|
+
void noko_init_html_element_description(void);
|
46
|
+
void noko_init_html_entity_lookup(void);
|
47
|
+
void noko_init_html_sax_parser_context(void);
|
48
|
+
void noko_init_html_sax_push_parser(void);
|
49
|
+
void noko_init_gumbo(void);
|
50
|
+
void noko_init_test_global_handlers(void);
|
48
51
|
|
49
52
|
static ID id_read, id_write;
|
50
53
|
|
@@ -152,12 +155,14 @@ void
|
|
152
155
|
Init_nokogiri()
|
153
156
|
{
|
154
157
|
mNokogiri = rb_define_module("Nokogiri");
|
158
|
+
mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
|
159
|
+
mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
|
160
|
+
mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
|
161
|
+
mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
|
155
162
|
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
|
156
|
-
mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
|
157
|
-
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
158
|
-
mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
|
159
163
|
mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
|
160
|
-
|
164
|
+
mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
|
165
|
+
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
161
166
|
|
162
167
|
rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
|
163
168
|
rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
|
@@ -191,9 +196,35 @@ Init_nokogiri()
|
|
191
196
|
rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
|
192
197
|
#endif
|
193
198
|
|
199
|
+
#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
200
|
+
/*
|
201
|
+
* We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
|
202
|
+
* issue in libxml 2.9.12:
|
203
|
+
*
|
204
|
+
* https://github.com/sparklemotion/nokogiri/issues/2241
|
205
|
+
*
|
206
|
+
* If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
|
207
|
+
* this config only for the specific libxml2 versions 2.9.12.
|
208
|
+
*
|
209
|
+
* Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
|
210
|
+
* default memory management functions (recall that this config was introduced to reduce memory
|
211
|
+
* bloat and allow Ruby to GC more often); but we should *really* test with production workloads
|
212
|
+
* before making that kind of a potentially-invasive change.
|
213
|
+
*/
|
214
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
|
215
|
+
#else
|
216
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
|
194
217
|
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
218
|
+
#endif
|
195
219
|
|
196
220
|
xmlInitParser();
|
221
|
+
exsltRegisterAll();
|
222
|
+
|
223
|
+
if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
|
224
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
|
225
|
+
} else {
|
226
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse);
|
227
|
+
}
|
197
228
|
|
198
229
|
cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError);
|
199
230
|
noko_init_xml_syntax_error();
|
@@ -238,6 +269,7 @@ Init_nokogiri()
|
|
238
269
|
noko_init_xml_document_fragment();
|
239
270
|
noko_init_xml_document();
|
240
271
|
noko_init_html_document();
|
272
|
+
noko_init_gumbo();
|
241
273
|
|
242
274
|
noko_init_test_global_handlers();
|
243
275
|
|
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#ifndef NOKOGIRI_NATIVE
|
2
2
|
#define NOKOGIRI_NATIVE
|
3
3
|
|
4
|
-
#
|
4
|
+
#ifdef _MSC_VER
|
5
5
|
# ifndef WIN32_LEAN_AND_MEAN
|
6
6
|
# define WIN32_LEAN_AND_MEAN
|
7
7
|
# endif /* WIN32_LEAN_AND_MEAN */
|
@@ -15,7 +15,7 @@
|
|
15
15
|
# include <windows.h>
|
16
16
|
#endif
|
17
17
|
|
18
|
-
#
|
18
|
+
#ifdef _WIN32
|
19
19
|
# define NOKOPUBFUN __declspec(dllexport)
|
20
20
|
# define NOKOPUBVAR __declspec(dllexport) extern
|
21
21
|
#else
|
@@ -69,6 +69,7 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
|
69
69
|
#include <ruby/st.h>
|
70
70
|
#include <ruby/encoding.h>
|
71
71
|
#include <ruby/util.h>
|
72
|
+
#include <ruby/version.h>
|
72
73
|
|
73
74
|
#define NOKOGIRI_STR_NEW2(str) NOKOGIRI_STR_NEW(str, strlen((const char *)(str)))
|
74
75
|
#define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
|
@@ -92,12 +93,16 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
|
92
93
|
|
93
94
|
|
94
95
|
NOKOPUBVAR VALUE mNokogiri ;
|
95
|
-
NOKOPUBVAR VALUE
|
96
|
-
NOKOPUBVAR VALUE
|
96
|
+
NOKOPUBVAR VALUE mNokogiriGumbo ;
|
97
|
+
NOKOPUBVAR VALUE mNokogiriHtml4 ;
|
98
|
+
NOKOPUBVAR VALUE mNokogiriHtml4Sax ;
|
99
|
+
NOKOPUBVAR VALUE mNokogiriHtml5 ;
|
97
100
|
NOKOPUBVAR VALUE mNokogiriXml ;
|
98
101
|
NOKOPUBVAR VALUE mNokogiriXmlSax ;
|
102
|
+
NOKOPUBVAR VALUE mNokogiriXmlXpath ;
|
99
103
|
NOKOPUBVAR VALUE mNokogiriXslt ;
|
100
104
|
|
105
|
+
NOKOPUBVAR VALUE cNokogiriEncodingHandler;
|
101
106
|
NOKOPUBVAR VALUE cNokogiriSyntaxError;
|
102
107
|
NOKOPUBVAR VALUE cNokogiriXmlAttr;
|
103
108
|
NOKOPUBVAR VALUE cNokogiriXmlAttributeDecl;
|
@@ -128,10 +133,11 @@ NOKOPUBVAR VALUE cNokogiriXmlXpathContext;
|
|
128
133
|
NOKOPUBVAR VALUE cNokogiriXmlXpathSyntaxError;
|
129
134
|
NOKOPUBVAR VALUE cNokogiriXsltStylesheet ;
|
130
135
|
|
131
|
-
NOKOPUBVAR VALUE
|
132
|
-
NOKOPUBVAR VALUE
|
133
|
-
NOKOPUBVAR VALUE
|
134
|
-
NOKOPUBVAR VALUE
|
136
|
+
NOKOPUBVAR VALUE cNokogiriHtml4Document ;
|
137
|
+
NOKOPUBVAR VALUE cNokogiriHtml4SaxPushParser ;
|
138
|
+
NOKOPUBVAR VALUE cNokogiriHtml4ElementDescription ;
|
139
|
+
NOKOPUBVAR VALUE cNokogiriHtml4SaxParserContext;
|
140
|
+
NOKOPUBVAR VALUE cNokogiriHtml5Document ;
|
135
141
|
|
136
142
|
typedef struct _nokogiriTuple {
|
137
143
|
VALUE doc;
|
@@ -177,7 +183,8 @@ VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
|
|
177
183
|
|
178
184
|
VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
|
179
185
|
VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
|
180
|
-
NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
186
|
+
NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
187
|
+
xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
|
181
188
|
|
182
189
|
#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
|
183
190
|
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
|
@@ -190,6 +197,9 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc); /* depr
|
|
190
197
|
#define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self)
|
191
198
|
#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) free(_tuple)
|
192
199
|
|
200
|
+
#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
|
201
|
+
#define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
|
202
|
+
|
193
203
|
void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
|
194
204
|
void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
|
195
205
|
xmlStructuredErrorFunc handler);
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -6,19 +6,19 @@ static int
|
|
6
6
|
dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
|
7
7
|
{
|
8
8
|
switch (node->type) {
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
9
|
+
case XML_ATTRIBUTE_NODE:
|
10
|
+
xmlFreePropList((xmlAttrPtr)node);
|
11
|
+
break;
|
12
|
+
case XML_NAMESPACE_DECL:
|
13
|
+
xmlFreeNs((xmlNsPtr)node);
|
14
|
+
break;
|
15
|
+
case XML_DTD_NODE:
|
16
|
+
xmlFreeDtd((xmlDtdPtr)node);
|
17
|
+
break;
|
18
|
+
default:
|
19
|
+
if (node->parent == NULL) {
|
20
|
+
xmlAddChild((xmlNodePtr)doc, node);
|
21
|
+
}
|
22
22
|
}
|
23
23
|
return ST_CONTINUE;
|
24
24
|
}
|
@@ -141,42 +141,41 @@ url(VALUE self)
|
|
141
141
|
* Set the root element on this document
|
142
142
|
*/
|
143
143
|
static VALUE
|
144
|
-
|
144
|
+
rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
145
145
|
{
|
146
|
-
xmlDocPtr
|
147
|
-
xmlNodePtr
|
148
|
-
xmlNodePtr old_root;
|
149
|
-
|
150
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
146
|
+
xmlDocPtr c_document;
|
147
|
+
xmlNodePtr c_new_root = NULL, c_current_root;
|
151
148
|
|
152
|
-
|
153
|
-
|
154
|
-
if (NIL_P(root)) {
|
155
|
-
old_root = xmlDocGetRootElement(doc);
|
156
|
-
|
157
|
-
if (old_root) {
|
158
|
-
xmlUnlinkNode(old_root);
|
159
|
-
noko_xml_document_pin_node(old_root);
|
160
|
-
}
|
149
|
+
Data_Get_Struct(self, xmlDoc, c_document);
|
161
150
|
|
162
|
-
|
151
|
+
c_current_root = xmlDocGetRootElement(c_document);
|
152
|
+
if (c_current_root) {
|
153
|
+
xmlUnlinkNode(c_current_root);
|
154
|
+
noko_xml_document_pin_node(c_current_root);
|
163
155
|
}
|
164
156
|
|
165
|
-
|
157
|
+
if (!NIL_P(rb_new_root)) {
|
158
|
+
if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
|
159
|
+
rb_raise(rb_eArgError,
|
160
|
+
"expected Nokogiri::XML::Node but received %"PRIsVALUE,
|
161
|
+
rb_obj_class(rb_new_root));
|
162
|
+
}
|
166
163
|
|
164
|
+
Data_Get_Struct(rb_new_root, xmlNode, c_new_root);
|
167
165
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
166
|
+
/* If the new root's document is not the same as the current document,
|
167
|
+
* then we need to dup the node in to this document. */
|
168
|
+
if (c_new_root->doc != c_document) {
|
169
|
+
c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
|
170
|
+
if (!c_new_root) {
|
171
|
+
rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
|
172
|
+
}
|
174
173
|
}
|
175
174
|
}
|
176
175
|
|
177
|
-
xmlDocSetRootElement(
|
178
|
-
|
179
|
-
return
|
176
|
+
xmlDocSetRootElement(c_document, c_new_root);
|
177
|
+
|
178
|
+
return rb_new_root;
|
180
179
|
}
|
181
180
|
|
182
181
|
/*
|
@@ -186,17 +185,19 @@ set_root(VALUE self, VALUE root)
|
|
186
185
|
* Get the root node for this document.
|
187
186
|
*/
|
188
187
|
static VALUE
|
189
|
-
|
188
|
+
rb_xml_document_root(VALUE self)
|
190
189
|
{
|
191
|
-
xmlDocPtr
|
192
|
-
xmlNodePtr
|
190
|
+
xmlDocPtr c_document;
|
191
|
+
xmlNodePtr c_root;
|
193
192
|
|
194
|
-
Data_Get_Struct(self, xmlDoc,
|
193
|
+
Data_Get_Struct(self, xmlDoc, c_document);
|
195
194
|
|
196
|
-
|
195
|
+
c_root = xmlDocGetRootElement(c_document);
|
196
|
+
if (!c_root) {
|
197
|
+
return Qnil;
|
198
|
+
}
|
197
199
|
|
198
|
-
|
199
|
-
return noko_xml_node_wrap(Qnil, root) ;
|
200
|
+
return noko_xml_node_wrap(Qnil, c_root) ;
|
200
201
|
}
|
201
202
|
|
202
203
|
/*
|
@@ -212,7 +213,7 @@ set_encoding(VALUE self, VALUE encoding)
|
|
212
213
|
Data_Get_Struct(self, xmlDoc, doc);
|
213
214
|
|
214
215
|
if (doc->encoding) {
|
215
|
-
|
216
|
+
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
|
216
217
|
}
|
217
218
|
|
218
219
|
doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
|
@@ -666,8 +667,8 @@ noko_init_xml_document()
|
|
666
667
|
rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4);
|
667
668
|
rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
|
668
669
|
|
669
|
-
rb_define_method(cNokogiriXmlDocument, "root",
|
670
|
-
rb_define_method(cNokogiriXmlDocument, "root=",
|
670
|
+
rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
|
671
|
+
rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
|
671
672
|
rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
|
672
673
|
rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
|
673
674
|
rb_define_method(cNokogiriXmlDocument, "version", version, 0);
|
@@ -116,6 +116,8 @@ noko_init_xml_element_content()
|
|
116
116
|
{
|
117
117
|
cNokogiriXmlElementContent = rb_define_class_under(mNokogiriXml, "ElementContent", rb_cObject);
|
118
118
|
|
119
|
+
rb_undef_alloc_func(cNokogiriXmlElementContent);
|
120
|
+
|
119
121
|
rb_define_method(cNokogiriXmlElementContent, "name", get_name, 0);
|
120
122
|
rb_define_method(cNokogiriXmlElementContent, "type", get_type, 0);
|
121
123
|
rb_define_method(cNokogiriXmlElementContent, "occur", get_occur, 0);
|
@@ -1,5 +1,8 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
+
VALUE cNokogiriEncodingHandler;
|
4
|
+
|
5
|
+
|
3
6
|
/*
|
4
7
|
* call-seq: Nokogiri::EncodingHandler.[](name)
|
5
8
|
*
|
@@ -75,11 +78,13 @@ name(VALUE self)
|
|
75
78
|
void
|
76
79
|
noko_init_xml_encoding_handler()
|
77
80
|
{
|
78
|
-
|
81
|
+
cNokogiriEncodingHandler = rb_define_class_under(mNokogiri, "EncodingHandler", rb_cObject);
|
82
|
+
|
83
|
+
rb_undef_alloc_func(cNokogiriEncodingHandler);
|
79
84
|
|
80
|
-
rb_define_singleton_method(
|
81
|
-
rb_define_singleton_method(
|
82
|
-
rb_define_singleton_method(
|
83
|
-
rb_define_singleton_method(
|
84
|
-
rb_define_method(
|
85
|
+
rb_define_singleton_method(cNokogiriEncodingHandler, "[]", get, 1);
|
86
|
+
rb_define_singleton_method(cNokogiriEncodingHandler, "delete", delete, 1);
|
87
|
+
rb_define_singleton_method(cNokogiriEncodingHandler, "alias", alias, 2);
|
88
|
+
rb_define_singleton_method(cNokogiriEncodingHandler, "clear_aliases!", clear_aliases, 0);
|
89
|
+
rb_define_method(cNokogiriEncodingHandler, "name", name, 0);
|
85
90
|
}
|
@@ -33,10 +33,10 @@ dealloc_namespace(xmlNsPtr ns)
|
|
33
33
|
*/
|
34
34
|
NOKOGIRI_DEBUG_START(ns) ;
|
35
35
|
if (ns->href) {
|
36
|
-
xmlFree((
|
36
|
+
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href));
|
37
37
|
}
|
38
38
|
if (ns->prefix) {
|
39
|
-
xmlFree((
|
39
|
+
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix));
|
40
40
|
}
|
41
41
|
xmlFree(ns);
|
42
42
|
NOKOGIRI_DEBUG_END(ns) ;
|
@@ -113,6 +113,8 @@ noko_init_xml_namespace()
|
|
113
113
|
{
|
114
114
|
cNokogiriXmlNamespace = rb_define_class_under(mNokogiriXml, "Namespace", rb_cObject);
|
115
115
|
|
116
|
+
rb_undef_alloc_func(cNokogiriXmlNamespace);
|
117
|
+
|
116
118
|
rb_define_method(cNokogiriXmlNamespace, "prefix", prefix, 0);
|
117
119
|
rb_define_method(cNokogiriXmlNamespace, "href", href, 0);
|
118
120
|
}
|