nokogiri 1.8.5 → 1.13.9
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -21
- data/LICENSE-DEPENDENCIES.md +1159 -868
- data/LICENSE.md +5 -28
- data/README.md +196 -90
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -59
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +765 -420
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +119 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +199 -88
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +42 -37
- data/ext/nokogiri/xml_attribute_decl.c +21 -21
- data/ext/nokogiri/xml_cdata.c +14 -19
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +296 -217
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +25 -25
- data/ext/nokogiri/xml_encoding_handler.c +43 -18
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +99 -54
- data/ext/nokogiri/xml_node.c +1107 -658
- data/ext/nokogiri/xml_node_set.c +178 -166
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +277 -175
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +112 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +114 -35
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +226 -115
- data/ext/nokogiri/xslt_stylesheet.c +265 -173
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +218 -91
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/{html → html4}/document.rb +103 -105
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +91 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +100 -0
- data/lib/nokogiri/html5.rb +478 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +222 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +97 -53
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +224 -86
- data/lib/nokogiri/xml/document_fragment.rb +57 -44
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +10 -5
- data/lib/nokogiri/xml/node.rb +895 -377
- data/lib/nokogiri/xml/node_set.rb +92 -65
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +22 -8
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +38 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +112 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +29 -20
- data/lib/nokogiri.rb +49 -65
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
- metadata +211 -266
- data/.autotest +0 -22
- data/.cross_rubies +0 -8
- data/.editorconfig +0 -17
- data/.gemtest +0 -0
- data/.travis.yml +0 -63
- data/CHANGELOG.md +0 -1368
- data/CONTRIBUTING.md +0 -42
- data/C_CODING_STYLE.rdoc +0 -33
- data/Gemfile-libxml-ruby +0 -3
- data/Manifest.txt +0 -370
- data/ROADMAP.md +0 -111
- data/Rakefile +0 -348
- data/SECURITY.md +0 -19
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/appveyor.yml +0 -29
- data/build_all +0 -44
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -15
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
- data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
- data/patches/sort-patches-by-date +0 -25
- data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
- data/suppressions/README.txt +0 -1
- data/suppressions/nokogiri_ruby-2.supp +0 -10
- data/tasks/test.rb +0 -100
- data/test/css/test_nthiness.rb +0 -226
- data/test/css/test_parser.rb +0 -386
- data/test/css/test_tokenizer.rb +0 -215
- data/test/css/test_xpath_visitor.rb +0 -96
- data/test/decorators/test_slop.rb +0 -23
- data/test/files/2ch.html +0 -108
- data/test/files/GH_1042.html +0 -18
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/atom.xml +0 -344
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/namespace_pressure_test.xml +0 -1684
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/saml/saml20assertion_schema.xsd +0 -283
- data/test/files/saml/saml20protocol_schema.xsd +0 -302
- data/test/files/saml/xenc_schema.xsd +0 -146
- data/test/files/saml/xmldsig_schema.xsd +0 -318
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/shift_jis_no_charset.html +0 -9
- data/test/files/slow-xpath.xml +0 -25509
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -851
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -271
- data/test/html/sax/test_parser.rb +0 -168
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/sax/test_parser_text.rb +0 -163
- data/test/html/sax/test_push_parser.rb +0 -87
- data/test/html/test_attributes.rb +0 -85
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -712
- data/test/html/test_document_encoding.rb +0 -143
- data/test/html/test_document_fragment.rb +0 -310
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -212
- data/test/html/test_node_encoding.rb +0 -91
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
- data/test/namespaces/test_namespaces_preservation.rb +0 -31
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -47
- data/test/test_encoding_handler.rb +0 -48
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -138
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -314
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -402
- data/test/xml/sax/test_parser_context.rb +0 -115
- data/test/xml/sax/test_parser_text.rb +0 -202
- data/test/xml/sax/test_push_parser.rb +0 -265
- data/test/xml/test_attr.rb +0 -74
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -341
- data/test/xml/test_c14n.rb +0 -180
- data/test/xml/test_cdata.rb +0 -54
- data/test/xml/test_comment.rb +0 -40
- data/test/xml/test_document.rb +0 -982
- data/test/xml/test_document_encoding.rb +0 -31
- data/test/xml/test_document_fragment.rb +0 -298
- data/test/xml/test_dtd.rb +0 -187
- data/test/xml/test_dtd_encoding.rb +0 -31
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -262
- data/test/xml/test_namespace.rb +0 -96
- data/test/xml/test_node.rb +0 -1325
- data/test/xml/test_node_attributes.rb +0 -115
- data/test/xml/test_node_encoding.rb +0 -75
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -592
- data/test/xml/test_node_set.rb +0 -809
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader.rb +0 -620
- data/test/xml/test_reader_encoding.rb +0 -134
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -142
- data/test/xml/test_syntax_error.rb +0 -36
- data/test/xml/test_text.rb +0 -60
- data/test/xml/test_unparented_node.rb +0 -483
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -470
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
@@ -0,0 +1,121 @@
|
|
1
|
+
#ifndef HAVE_XMLFIRSTELEMENTCHILD
|
2
|
+
#include <nokogiri.h>
|
3
|
+
/**
|
4
|
+
* xmlFirstElementChild:
|
5
|
+
* @parent: the parent node
|
6
|
+
*
|
7
|
+
* Finds the first child node of that element which is a Element node
|
8
|
+
* Note the handling of entities references is different than in
|
9
|
+
* the W3C DOM element traversal spec since we don't have back reference
|
10
|
+
* from entities content to entities references.
|
11
|
+
*
|
12
|
+
* Returns the first element child or NULL if not available
|
13
|
+
*/
|
14
|
+
xmlNodePtr
|
15
|
+
xmlFirstElementChild(xmlNodePtr parent)
|
16
|
+
{
|
17
|
+
xmlNodePtr cur = NULL;
|
18
|
+
|
19
|
+
if (parent == NULL) {
|
20
|
+
return (NULL);
|
21
|
+
}
|
22
|
+
switch (parent->type) {
|
23
|
+
case XML_ELEMENT_NODE:
|
24
|
+
case XML_ENTITY_NODE:
|
25
|
+
case XML_DOCUMENT_NODE:
|
26
|
+
case XML_HTML_DOCUMENT_NODE:
|
27
|
+
cur = parent->children;
|
28
|
+
break;
|
29
|
+
default:
|
30
|
+
return (NULL);
|
31
|
+
}
|
32
|
+
while (cur != NULL) {
|
33
|
+
if (cur->type == XML_ELEMENT_NODE) {
|
34
|
+
return (cur);
|
35
|
+
}
|
36
|
+
cur = cur->next;
|
37
|
+
}
|
38
|
+
return (NULL);
|
39
|
+
}
|
40
|
+
|
41
|
+
/**
|
42
|
+
* xmlNextElementSibling:
|
43
|
+
* @node: the current node
|
44
|
+
*
|
45
|
+
* Finds the first closest next sibling of the node which is an
|
46
|
+
* element node.
|
47
|
+
* Note the handling of entities references is different than in
|
48
|
+
* the W3C DOM element traversal spec since we don't have back reference
|
49
|
+
* from entities content to entities references.
|
50
|
+
*
|
51
|
+
* Returns the next element sibling or NULL if not available
|
52
|
+
*/
|
53
|
+
xmlNodePtr
|
54
|
+
xmlNextElementSibling(xmlNodePtr node)
|
55
|
+
{
|
56
|
+
if (node == NULL) {
|
57
|
+
return (NULL);
|
58
|
+
}
|
59
|
+
switch (node->type) {
|
60
|
+
case XML_ELEMENT_NODE:
|
61
|
+
case XML_TEXT_NODE:
|
62
|
+
case XML_CDATA_SECTION_NODE:
|
63
|
+
case XML_ENTITY_REF_NODE:
|
64
|
+
case XML_ENTITY_NODE:
|
65
|
+
case XML_PI_NODE:
|
66
|
+
case XML_COMMENT_NODE:
|
67
|
+
case XML_DTD_NODE:
|
68
|
+
case XML_XINCLUDE_START:
|
69
|
+
case XML_XINCLUDE_END:
|
70
|
+
node = node->next;
|
71
|
+
break;
|
72
|
+
default:
|
73
|
+
return (NULL);
|
74
|
+
}
|
75
|
+
while (node != NULL) {
|
76
|
+
if (node->type == XML_ELEMENT_NODE) {
|
77
|
+
return (node);
|
78
|
+
}
|
79
|
+
node = node->next;
|
80
|
+
}
|
81
|
+
return (NULL);
|
82
|
+
}
|
83
|
+
|
84
|
+
/**
|
85
|
+
* xmlLastElementChild:
|
86
|
+
* @parent: the parent node
|
87
|
+
*
|
88
|
+
* Finds the last child node of that element which is a Element node
|
89
|
+
* Note the handling of entities references is different than in
|
90
|
+
* the W3C DOM element traversal spec since we don't have back reference
|
91
|
+
* from entities content to entities references.
|
92
|
+
*
|
93
|
+
* Returns the last element child or NULL if not available
|
94
|
+
*/
|
95
|
+
xmlNodePtr
|
96
|
+
xmlLastElementChild(xmlNodePtr parent)
|
97
|
+
{
|
98
|
+
xmlNodePtr cur = NULL;
|
99
|
+
|
100
|
+
if (parent == NULL) {
|
101
|
+
return (NULL);
|
102
|
+
}
|
103
|
+
switch (parent->type) {
|
104
|
+
case XML_ELEMENT_NODE:
|
105
|
+
case XML_ENTITY_NODE:
|
106
|
+
case XML_DOCUMENT_NODE:
|
107
|
+
case XML_HTML_DOCUMENT_NODE:
|
108
|
+
cur = parent->last;
|
109
|
+
break;
|
110
|
+
default:
|
111
|
+
return (NULL);
|
112
|
+
}
|
113
|
+
while (cur != NULL) {
|
114
|
+
if (cur->type == XML_ELEMENT_NODE) {
|
115
|
+
return (cur);
|
116
|
+
}
|
117
|
+
cur = cur->prev;
|
118
|
+
}
|
119
|
+
return (NULL);
|
120
|
+
}
|
121
|
+
#endif
|
data/ext/nokogiri/nokogiri.c
CHANGED
@@ -1,101 +1,189 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE mNokogiri ;
|
4
|
+
VALUE mNokogiriGumbo ;
|
5
|
+
VALUE mNokogiriHtml4 ;
|
6
|
+
VALUE mNokogiriHtml4Sax ;
|
7
|
+
VALUE mNokogiriHtml5 ;
|
4
8
|
VALUE mNokogiriXml ;
|
5
|
-
VALUE mNokogiriHtml ;
|
6
|
-
VALUE mNokogiriXslt ;
|
7
9
|
VALUE mNokogiriXmlSax ;
|
8
|
-
VALUE
|
10
|
+
VALUE mNokogiriXmlXpath ;
|
11
|
+
VALUE mNokogiriXslt ;
|
12
|
+
|
13
|
+
VALUE cNokogiriSyntaxError;
|
14
|
+
VALUE cNokogiriXmlCharacterData;
|
15
|
+
VALUE cNokogiriXmlElement;
|
16
|
+
VALUE cNokogiriXmlXpathSyntaxError;
|
17
|
+
|
18
|
+
void noko_init_xml_attr(void);
|
19
|
+
void noko_init_xml_attribute_decl(void);
|
20
|
+
void noko_init_xml_cdata(void);
|
21
|
+
void noko_init_xml_comment(void);
|
22
|
+
void noko_init_xml_document(void);
|
23
|
+
void noko_init_xml_document_fragment(void);
|
24
|
+
void noko_init_xml_dtd(void);
|
25
|
+
void noko_init_xml_element_content(void);
|
26
|
+
void noko_init_xml_element_decl(void);
|
27
|
+
void noko_init_xml_encoding_handler(void);
|
28
|
+
void noko_init_xml_entity_decl(void);
|
29
|
+
void noko_init_xml_entity_reference(void);
|
30
|
+
void noko_init_xml_namespace(void);
|
31
|
+
void noko_init_xml_node(void);
|
32
|
+
void noko_init_xml_node_set(void);
|
33
|
+
void noko_init_xml_processing_instruction(void);
|
34
|
+
void noko_init_xml_reader(void);
|
35
|
+
void noko_init_xml_relax_ng(void);
|
36
|
+
void noko_init_xml_sax_parser(void);
|
37
|
+
void noko_init_xml_sax_parser_context(void);
|
38
|
+
void noko_init_xml_sax_push_parser(void);
|
39
|
+
void noko_init_xml_schema(void);
|
40
|
+
void noko_init_xml_syntax_error(void);
|
41
|
+
void noko_init_xml_text(void);
|
42
|
+
void noko_init_xml_xpath_context(void);
|
43
|
+
void noko_init_xslt_stylesheet(void);
|
44
|
+
void noko_init_html_document(void);
|
45
|
+
void noko_init_html_element_description(void);
|
46
|
+
void noko_init_html_entity_lookup(void);
|
47
|
+
void noko_init_html_sax_parser_context(void);
|
48
|
+
void noko_init_html_sax_push_parser(void);
|
49
|
+
void noko_init_gumbo(void);
|
50
|
+
void noko_init_test_global_handlers(void);
|
9
51
|
|
10
|
-
|
52
|
+
static ID id_read, id_write;
|
53
|
+
|
54
|
+
|
55
|
+
#ifndef HAVE_VASPRINTF
|
11
56
|
/*
|
12
|
-
* I srsly hate windows. it doesn't have vasprintf.
|
13
57
|
* Thank you Geoffroy Couprie for this implementation of vasprintf!
|
14
58
|
*/
|
15
|
-
int
|
59
|
+
int
|
60
|
+
vasprintf(char **strp, const char *fmt, va_list ap)
|
16
61
|
{
|
17
62
|
/* Mingw32/64 have a broken vsnprintf implementation that fails when
|
18
63
|
* using a zero-byte limit in order to retrieve the required size for malloc.
|
19
64
|
* So we use a one byte buffer instead.
|
20
65
|
*/
|
21
66
|
char tmp[1];
|
22
|
-
int len = vsnprintf
|
67
|
+
int len = vsnprintf(tmp, 1, fmt, ap) + 1;
|
23
68
|
char *res = (char *)malloc((unsigned int)len);
|
24
|
-
if (res == NULL)
|
25
|
-
|
69
|
+
if (res == NULL) {
|
70
|
+
return -1;
|
71
|
+
}
|
26
72
|
*strp = res;
|
27
73
|
return vsnprintf(res, (unsigned int)len, fmt, ap);
|
28
74
|
}
|
29
75
|
#endif
|
30
76
|
|
31
|
-
|
77
|
+
|
78
|
+
static VALUE
|
79
|
+
read_check(VALUE val)
|
32
80
|
{
|
33
|
-
|
81
|
+
VALUE *args = (VALUE *)val;
|
82
|
+
return rb_funcall(args[0], id_read, 1, args[1]);
|
34
83
|
}
|
35
84
|
|
36
|
-
#ifdef HAVE_RUBY_UTIL_H
|
37
|
-
#include "ruby/util.h"
|
38
|
-
#else
|
39
|
-
#include "util.h"
|
40
|
-
#endif
|
41
85
|
|
42
|
-
|
86
|
+
static VALUE
|
87
|
+
read_failed(VALUE arg, VALUE exc)
|
43
88
|
{
|
44
|
-
|
45
|
-
|
89
|
+
return Qundef;
|
90
|
+
}
|
91
|
+
|
92
|
+
|
93
|
+
int
|
94
|
+
noko_io_read(void *ctx, char *buffer, int len)
|
95
|
+
{
|
96
|
+
VALUE string, args[2];
|
97
|
+
size_t str_len, safe_len;
|
98
|
+
|
99
|
+
args[0] = (VALUE)ctx;
|
100
|
+
args[1] = INT2NUM(len);
|
46
101
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
102
|
+
string = rb_rescue(read_check, (VALUE)args, read_failed, 0);
|
103
|
+
|
104
|
+
if (NIL_P(string)) { return 0; }
|
105
|
+
if (string == Qundef) { return -1; }
|
106
|
+
if (TYPE(string) != T_STRING) { return -1; }
|
107
|
+
|
108
|
+
str_len = (size_t)RSTRING_LEN(string);
|
109
|
+
safe_len = str_len > (size_t)len ? (size_t)len : str_len;
|
110
|
+
memcpy(buffer, StringValuePtr(string), safe_len);
|
111
|
+
|
112
|
+
return (int)safe_len;
|
51
113
|
}
|
52
114
|
|
53
|
-
|
115
|
+
|
116
|
+
static VALUE
|
117
|
+
write_check(VALUE val)
|
54
118
|
{
|
55
|
-
|
119
|
+
VALUE *args = (VALUE *)val;
|
120
|
+
return rb_funcall(args[0], id_write, 1, args[1]);
|
121
|
+
}
|
122
|
+
|
56
123
|
|
57
|
-
|
58
|
-
|
59
|
-
|
124
|
+
static VALUE
|
125
|
+
write_failed(VALUE arg, VALUE exc)
|
126
|
+
{
|
127
|
+
return Qundef;
|
60
128
|
}
|
61
129
|
|
62
|
-
|
130
|
+
|
131
|
+
int
|
132
|
+
noko_io_write(void *ctx, char *buffer, int len)
|
63
133
|
{
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
);
|
134
|
+
VALUE args[2], size;
|
135
|
+
|
136
|
+
args[0] = (VALUE)ctx;
|
137
|
+
args[1] = rb_str_new(buffer, (long)len);
|
138
|
+
|
139
|
+
size = rb_rescue(write_check, (VALUE)args, write_failed, 0);
|
140
|
+
|
141
|
+
if (size == Qundef) { return -1; }
|
142
|
+
|
143
|
+
return NUM2INT(size);
|
144
|
+
}
|
70
145
|
|
146
|
+
|
147
|
+
int
|
148
|
+
noko_io_close(void *ctx)
|
149
|
+
{
|
150
|
+
return 0;
|
151
|
+
}
|
152
|
+
|
153
|
+
|
154
|
+
void
|
155
|
+
Init_nokogiri()
|
156
|
+
{
|
71
157
|
mNokogiri = rb_define_module("Nokogiri");
|
158
|
+
mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
|
159
|
+
mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
|
160
|
+
mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
|
161
|
+
mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
|
72
162
|
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
|
73
|
-
mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
|
74
|
-
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
75
163
|
mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
rb_const_set(
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
#ifdef
|
88
|
-
rb_const_set(mNokogiri, rb_intern("
|
89
|
-
|
90
|
-
rb_const_set(mNokogiri, rb_intern("
|
91
|
-
|
92
|
-
rb_const_set(mNokogiri, rb_intern("
|
164
|
+
mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
|
165
|
+
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
166
|
+
|
167
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
|
168
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
|
169
|
+
|
170
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
|
171
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
|
172
|
+
|
173
|
+
#ifdef NOKOGIRI_PACKAGED_LIBRARIES
|
174
|
+
rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
|
175
|
+
# ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
|
176
|
+
rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qtrue);
|
177
|
+
# else
|
178
|
+
rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
|
179
|
+
# endif
|
180
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
|
181
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
|
93
182
|
#else
|
94
|
-
rb_const_set(mNokogiri, rb_intern("
|
95
|
-
rb_const_set(mNokogiri, rb_intern("
|
96
|
-
rb_const_set(mNokogiri, rb_intern("
|
97
|
-
rb_const_set(mNokogiri, rb_intern("
|
98
|
-
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), Qnil);
|
183
|
+
rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qfalse);
|
184
|
+
rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
|
185
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), Qnil);
|
186
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), Qnil);
|
99
187
|
#endif
|
100
188
|
|
101
189
|
#ifdef LIBXML_ICONV_ENABLED
|
@@ -104,38 +192,87 @@ void Init_nokogiri()
|
|
104
192
|
rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
|
105
193
|
#endif
|
106
194
|
|
195
|
+
#ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS
|
196
|
+
rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
|
197
|
+
#endif
|
198
|
+
|
199
|
+
#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
200
|
+
/*
|
201
|
+
* We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
|
202
|
+
* issue in libxml 2.9.12:
|
203
|
+
*
|
204
|
+
* https://github.com/sparklemotion/nokogiri/issues/2241
|
205
|
+
*
|
206
|
+
* If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
|
207
|
+
* this config only for the specific libxml2 versions 2.9.12.
|
208
|
+
*
|
209
|
+
* Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
|
210
|
+
* default memory management functions (recall that this config was introduced to reduce memory
|
211
|
+
* bloat and allow Ruby to GC more often); but we should *really* test with production workloads
|
212
|
+
* before making that kind of a potentially-invasive change.
|
213
|
+
*/
|
214
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
|
215
|
+
#else
|
216
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
|
217
|
+
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
218
|
+
#endif
|
219
|
+
|
107
220
|
xmlInitParser();
|
221
|
+
exsltRegisterAll();
|
222
|
+
|
223
|
+
if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
|
224
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
|
225
|
+
} else {
|
226
|
+
rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse);
|
227
|
+
}
|
228
|
+
|
229
|
+
cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError);
|
230
|
+
noko_init_xml_syntax_error();
|
231
|
+
assert(cNokogiriXmlSyntaxError);
|
232
|
+
cNokogiriXmlXpathSyntaxError = rb_define_class_under(mNokogiriXmlXpath, "SyntaxError", cNokogiriXmlSyntaxError);
|
233
|
+
|
234
|
+
noko_init_xml_element_content();
|
235
|
+
noko_init_xml_encoding_handler();
|
236
|
+
noko_init_xml_namespace();
|
237
|
+
noko_init_xml_node_set();
|
238
|
+
noko_init_xml_reader();
|
239
|
+
noko_init_xml_sax_parser();
|
240
|
+
noko_init_xml_xpath_context();
|
241
|
+
noko_init_xslt_stylesheet();
|
242
|
+
noko_init_html_element_description();
|
243
|
+
noko_init_html_entity_lookup();
|
244
|
+
|
245
|
+
noko_init_xml_schema();
|
246
|
+
noko_init_xml_relax_ng();
|
247
|
+
|
248
|
+
noko_init_xml_sax_parser_context();
|
249
|
+
noko_init_html_sax_parser_context();
|
250
|
+
|
251
|
+
noko_init_xml_sax_push_parser();
|
252
|
+
noko_init_html_sax_push_parser();
|
253
|
+
|
254
|
+
noko_init_xml_node();
|
255
|
+
noko_init_xml_attr();
|
256
|
+
noko_init_xml_attribute_decl();
|
257
|
+
noko_init_xml_dtd();
|
258
|
+
noko_init_xml_element_decl();
|
259
|
+
noko_init_xml_entity_decl();
|
260
|
+
noko_init_xml_entity_reference();
|
261
|
+
noko_init_xml_processing_instruction();
|
262
|
+
assert(cNokogiriXmlNode);
|
263
|
+
cNokogiriXmlElement = rb_define_class_under(mNokogiriXml, "Element", cNokogiriXmlNode);
|
264
|
+
cNokogiriXmlCharacterData = rb_define_class_under(mNokogiriXml, "CharacterData", cNokogiriXmlNode);
|
265
|
+
noko_init_xml_comment();
|
266
|
+
noko_init_xml_text();
|
267
|
+
noko_init_xml_cdata();
|
268
|
+
|
269
|
+
noko_init_xml_document_fragment();
|
270
|
+
noko_init_xml_document();
|
271
|
+
noko_init_html_document();
|
272
|
+
noko_init_gumbo();
|
273
|
+
|
274
|
+
noko_init_test_global_handlers();
|
108
275
|
|
109
|
-
|
110
|
-
|
111
|
-
init_xml_node();
|
112
|
-
init_xml_document_fragment();
|
113
|
-
init_xml_text();
|
114
|
-
init_xml_cdata();
|
115
|
-
init_xml_processing_instruction();
|
116
|
-
init_xml_attr();
|
117
|
-
init_xml_entity_reference();
|
118
|
-
init_xml_comment();
|
119
|
-
init_xml_node_set();
|
120
|
-
init_xml_xpath_context();
|
121
|
-
init_xml_sax_parser_context();
|
122
|
-
init_xml_sax_parser();
|
123
|
-
init_xml_sax_push_parser();
|
124
|
-
init_xml_reader();
|
125
|
-
init_xml_dtd();
|
126
|
-
init_xml_element_content();
|
127
|
-
init_xml_attribute_decl();
|
128
|
-
init_xml_element_decl();
|
129
|
-
init_xml_entity_decl();
|
130
|
-
init_xml_namespace();
|
131
|
-
init_html_sax_parser_context();
|
132
|
-
init_html_sax_push_parser();
|
133
|
-
init_xslt_stylesheet();
|
134
|
-
init_xml_syntax_error();
|
135
|
-
init_html_entity_lookup();
|
136
|
-
init_html_element_description();
|
137
|
-
init_xml_schema();
|
138
|
-
init_xml_relax_ng();
|
139
|
-
init_nokogiri_io();
|
140
|
-
init_xml_encoding_handler();
|
276
|
+
id_read = rb_intern("read");
|
277
|
+
id_write = rb_intern("write");
|
141
278
|
}
|