nokogiri 1.8.5 → 1.15.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +40 -18
- data/LICENSE-DEPENDENCIES.md +1636 -1024
- data/LICENSE.md +5 -28
- data/README.md +203 -90
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -61
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +867 -417
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +215 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +42 -37
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +40 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +401 -237
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +136 -62
- data/ext/nokogiri/xml_node.c +1387 -678
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +347 -212
- data/ext/nokogiri/xml_relax_ng.c +86 -77
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +145 -103
- data/ext/nokogiri/xml_sax_push_parser.c +64 -36
- data/ext/nokogiri/xml_schema.c +138 -81
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +36 -26
- data/ext/nokogiri/xml_xpath_context.c +366 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +224 -95
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +392 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +98 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -126
- data/lib/nokogiri/xml/document_fragment.rb +104 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +45 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1093 -411
- data/lib/nokogiri/xml/node_set.rb +173 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
- metadata +126 -399
- data/.autotest +0 -22
- data/.cross_rubies +0 -8
- data/.editorconfig +0 -17
- data/.gemtest +0 -0
- data/.travis.yml +0 -63
- data/CHANGELOG.md +0 -1368
- data/CONTRIBUTING.md +0 -42
- data/C_CODING_STYLE.rdoc +0 -33
- data/Gemfile-libxml-ruby +0 -3
- data/Manifest.txt +0 -370
- data/ROADMAP.md +0 -111
- data/Rakefile +0 -348
- data/SECURITY.md +0 -19
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/appveyor.yml +0 -29
- data/build_all +0 -44
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -15
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
- data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
- data/patches/sort-patches-by-date +0 -25
- data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
- data/suppressions/README.txt +0 -1
- data/suppressions/nokogiri_ruby-2.supp +0 -10
- data/tasks/test.rb +0 -100
- data/test/css/test_nthiness.rb +0 -226
- data/test/css/test_parser.rb +0 -386
- data/test/css/test_tokenizer.rb +0 -215
- data/test/css/test_xpath_visitor.rb +0 -96
- data/test/decorators/test_slop.rb +0 -23
- data/test/files/2ch.html +0 -108
- data/test/files/GH_1042.html +0 -18
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/atom.xml +0 -344
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/namespace_pressure_test.xml +0 -1684
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/saml/saml20assertion_schema.xsd +0 -283
- data/test/files/saml/saml20protocol_schema.xsd +0 -302
- data/test/files/saml/xenc_schema.xsd +0 -146
- data/test/files/saml/xmldsig_schema.xsd +0 -318
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/shift_jis_no_charset.html +0 -9
- data/test/files/slow-xpath.xml +0 -25509
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -851
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -271
- data/test/html/sax/test_parser.rb +0 -168
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/sax/test_parser_text.rb +0 -163
- data/test/html/sax/test_push_parser.rb +0 -87
- data/test/html/test_attributes.rb +0 -85
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -712
- data/test/html/test_document_encoding.rb +0 -143
- data/test/html/test_document_fragment.rb +0 -310
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -212
- data/test/html/test_node_encoding.rb +0 -91
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
- data/test/namespaces/test_namespaces_preservation.rb +0 -31
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -47
- data/test/test_encoding_handler.rb +0 -48
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -138
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -314
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -402
- data/test/xml/sax/test_parser_context.rb +0 -115
- data/test/xml/sax/test_parser_text.rb +0 -202
- data/test/xml/sax/test_push_parser.rb +0 -265
- data/test/xml/test_attr.rb +0 -74
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -341
- data/test/xml/test_c14n.rb +0 -180
- data/test/xml/test_cdata.rb +0 -54
- data/test/xml/test_comment.rb +0 -40
- data/test/xml/test_document.rb +0 -982
- data/test/xml/test_document_encoding.rb +0 -31
- data/test/xml/test_document_fragment.rb +0 -298
- data/test/xml/test_dtd.rb +0 -187
- data/test/xml/test_dtd_encoding.rb +0 -31
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -262
- data/test/xml/test_namespace.rb +0 -96
- data/test/xml/test_node.rb +0 -1325
- data/test/xml/test_node_attributes.rb +0 -115
- data/test/xml/test_node_encoding.rb +0 -75
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -592
- data/test/xml/test_node_set.rb +0 -809
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader.rb +0 -620
- data/test/xml/test_reader_encoding.rb +0 -134
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -142
- data/test/xml/test_syntax_error.rb +0 -36
- data/test/xml/test_text.rb +0 -60
- data/test/xml/test_unparented_node.rb +0 -483
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -470
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
data/ext/nokogiri/xml_document.c
CHANGED
@@ -1,88 +1,168 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlDocument ;
|
4
|
+
|
5
|
+
static int
|
6
|
+
dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
|
4
7
|
{
|
5
|
-
switch(node->type) {
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
8
|
+
switch (node->type) {
|
9
|
+
case XML_ATTRIBUTE_NODE:
|
10
|
+
xmlFreePropList((xmlAttrPtr)node);
|
11
|
+
break;
|
12
|
+
case XML_NAMESPACE_DECL:
|
13
|
+
xmlFreeNs((xmlNsPtr)node);
|
14
|
+
break;
|
15
|
+
case XML_DTD_NODE:
|
16
|
+
xmlFreeDtd((xmlDtdPtr)node);
|
17
|
+
break;
|
18
|
+
default:
|
19
|
+
if (node->parent == NULL) {
|
20
|
+
xmlAddChild((xmlNodePtr)doc, node);
|
21
|
+
}
|
16
22
|
}
|
17
23
|
return ST_CONTINUE;
|
18
24
|
}
|
19
25
|
|
20
|
-
static
|
26
|
+
static int
|
27
|
+
dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
|
28
|
+
{
|
29
|
+
return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
|
30
|
+
}
|
31
|
+
|
32
|
+
static void
|
33
|
+
remove_private(xmlNodePtr node)
|
21
34
|
{
|
22
35
|
xmlNodePtr child;
|
23
36
|
|
24
|
-
for (child = node->children; child; child = child->next)
|
37
|
+
for (child = node->children; child; child = child->next) {
|
25
38
|
remove_private(child);
|
39
|
+
}
|
26
40
|
|
27
41
|
if ((node->type == XML_ELEMENT_NODE ||
|
28
42
|
node->type == XML_XINCLUDE_START ||
|
29
43
|
node->type == XML_XINCLUDE_END) &&
|
30
44
|
node->properties) {
|
31
|
-
for (child = (xmlNodePtr)node->properties; child; child = child->next)
|
45
|
+
for (child = (xmlNodePtr)node->properties; child; child = child->next) {
|
32
46
|
remove_private(child);
|
47
|
+
}
|
33
48
|
}
|
34
49
|
|
35
50
|
node->_private = NULL;
|
36
51
|
}
|
37
52
|
|
38
|
-
static void
|
53
|
+
static void
|
54
|
+
mark(void *data)
|
39
55
|
{
|
40
|
-
|
56
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
57
|
+
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
|
58
|
+
if (tuple) {
|
59
|
+
rb_gc_mark(tuple->doc);
|
60
|
+
rb_gc_mark(tuple->node_cache);
|
61
|
+
}
|
62
|
+
}
|
41
63
|
|
42
|
-
|
64
|
+
static void
|
65
|
+
dealloc(void *data)
|
66
|
+
{
|
67
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
68
|
+
st_table *node_hash;
|
43
69
|
|
44
70
|
node_hash = DOC_UNLINKED_NODE_HASH(doc);
|
45
71
|
|
46
72
|
st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
|
47
73
|
st_free_table(node_hash);
|
48
74
|
|
49
|
-
|
75
|
+
ruby_xfree(doc->_private);
|
50
76
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
*
|
77
|
+
#pragma GCC diagnostic push
|
78
|
+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
79
|
+
/*
|
80
|
+
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
81
|
+
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
82
|
+
* nokogiri, which will result in segfaults.
|
83
|
+
*
|
84
|
+
* To avoid this, we need to clear the _private pointers from all nodes in this document tree
|
85
|
+
* before that callback gets invoked.
|
86
|
+
*
|
87
|
+
* libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
|
88
|
+
* safeguard (though probably pairing with a runtime check on the libxml-ruby version).
|
55
89
|
*/
|
56
|
-
if (xmlDeregisterNodeDefaultValue)
|
90
|
+
if (xmlDeregisterNodeDefaultValue) {
|
57
91
|
remove_private((xmlNodePtr)doc);
|
92
|
+
}
|
93
|
+
#pragma GCC diagnostic pop
|
58
94
|
|
59
95
|
xmlFreeDoc(doc);
|
96
|
+
}
|
97
|
+
|
98
|
+
static size_t
|
99
|
+
memsize_node(const xmlNodePtr node)
|
100
|
+
{
|
101
|
+
/* note we don't count namespace definitions, just going for a good-enough number here */
|
102
|
+
xmlNodePtr child;
|
103
|
+
size_t memsize = 0;
|
104
|
+
|
105
|
+
memsize += xmlStrlen(node->name);
|
106
|
+
for (child = (xmlNodePtr)node->properties; child; child = child->next) {
|
107
|
+
memsize += sizeof(xmlAttr) + memsize_node(child);
|
108
|
+
}
|
109
|
+
if (node->type == XML_TEXT_NODE) {
|
110
|
+
memsize += xmlStrlen(node->content);
|
111
|
+
}
|
112
|
+
for (child = node->children; child; child = child->next) {
|
113
|
+
memsize += sizeof(xmlNode) + memsize_node(child);
|
114
|
+
}
|
115
|
+
return memsize;
|
116
|
+
}
|
60
117
|
|
61
|
-
|
118
|
+
static size_t
|
119
|
+
memsize(const void *data)
|
120
|
+
{
|
121
|
+
xmlDocPtr doc = (const xmlDocPtr)data;
|
122
|
+
size_t memsize = sizeof(xmlDoc);
|
123
|
+
/* This may not account for all memory use */
|
124
|
+
memsize += memsize_node((xmlNodePtr)doc);
|
125
|
+
return memsize;
|
62
126
|
}
|
63
127
|
|
64
|
-
static
|
128
|
+
static const rb_data_type_t noko_xml_document_data_type = {
|
129
|
+
.wrap_struct_name = "Nokogiri::XML::Document",
|
130
|
+
.function = {
|
131
|
+
.dmark = mark,
|
132
|
+
.dfree = dealloc,
|
133
|
+
.dsize = memsize,
|
134
|
+
},
|
135
|
+
// .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
|
136
|
+
};
|
137
|
+
|
138
|
+
static void
|
139
|
+
recursively_remove_namespaces_from_node(xmlNodePtr node)
|
65
140
|
{
|
66
141
|
xmlNodePtr child ;
|
67
142
|
xmlAttrPtr property ;
|
68
143
|
|
69
144
|
xmlSetNs(node, NULL);
|
70
145
|
|
71
|
-
for (child = node->children ; child ; child = child->next)
|
146
|
+
for (child = node->children ; child ; child = child->next) {
|
72
147
|
recursively_remove_namespaces_from_node(child);
|
148
|
+
}
|
73
149
|
|
74
150
|
if (((node->type == XML_ELEMENT_NODE) ||
|
75
151
|
(node->type == XML_XINCLUDE_START) ||
|
76
152
|
(node->type == XML_XINCLUDE_END)) &&
|
77
153
|
node->nsDef) {
|
78
|
-
|
154
|
+
xmlNsPtr curr = node->nsDef;
|
155
|
+
while (curr) {
|
156
|
+
noko_xml_document_pin_namespace(curr, node->doc);
|
157
|
+
curr = curr->next;
|
158
|
+
}
|
79
159
|
node->nsDef = NULL;
|
80
160
|
}
|
81
161
|
|
82
162
|
if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
|
83
163
|
property = node->properties ;
|
84
164
|
while (property != NULL) {
|
85
|
-
if (property->ns) property->ns = NULL ;
|
165
|
+
if (property->ns) { property->ns = NULL ; }
|
86
166
|
property = property->next ;
|
87
167
|
}
|
88
168
|
}
|
@@ -94,12 +174,12 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
94
174
|
*
|
95
175
|
* Get the url name for this document.
|
96
176
|
*/
|
97
|
-
static VALUE
|
177
|
+
static VALUE
|
178
|
+
url(VALUE self)
|
98
179
|
{
|
99
|
-
xmlDocPtr doc;
|
100
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
180
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
101
181
|
|
102
|
-
if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
|
182
|
+
if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
|
103
183
|
|
104
184
|
return Qnil;
|
105
185
|
}
|
@@ -110,42 +190,42 @@ static VALUE url(VALUE self)
|
|
110
190
|
*
|
111
191
|
* Set the root element on this document
|
112
192
|
*/
|
113
|
-
static VALUE
|
193
|
+
static VALUE
|
194
|
+
rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
114
195
|
{
|
115
|
-
xmlDocPtr
|
116
|
-
xmlNodePtr
|
117
|
-
xmlNodePtr old_root;
|
118
|
-
|
119
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
196
|
+
xmlDocPtr c_document;
|
197
|
+
xmlNodePtr c_new_root = NULL, c_current_root;
|
120
198
|
|
121
|
-
|
199
|
+
c_document = noko_xml_document_unwrap(self);
|
122
200
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
xmlUnlinkNode(old_root);
|
128
|
-
nokogiri_root_node(old_root);
|
129
|
-
}
|
130
|
-
|
131
|
-
return root;
|
201
|
+
c_current_root = xmlDocGetRootElement(c_document);
|
202
|
+
if (c_current_root) {
|
203
|
+
xmlUnlinkNode(c_current_root);
|
204
|
+
noko_xml_document_pin_node(c_current_root);
|
132
205
|
}
|
133
206
|
|
134
|
-
|
207
|
+
if (!NIL_P(rb_new_root)) {
|
208
|
+
if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
|
209
|
+
rb_raise(rb_eArgError,
|
210
|
+
"expected Nokogiri::XML::Node but received %"PRIsVALUE,
|
211
|
+
rb_obj_class(rb_new_root));
|
212
|
+
}
|
135
213
|
|
214
|
+
Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
|
136
215
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
216
|
+
/* If the new root's document is not the same as the current document,
|
217
|
+
* then we need to dup the node in to this document. */
|
218
|
+
if (c_new_root->doc != c_document) {
|
219
|
+
c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
|
220
|
+
if (!c_new_root) {
|
221
|
+
rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
|
222
|
+
}
|
143
223
|
}
|
144
224
|
}
|
145
225
|
|
146
|
-
xmlDocSetRootElement(
|
147
|
-
|
148
|
-
return
|
226
|
+
xmlDocSetRootElement(c_document, c_new_root);
|
227
|
+
|
228
|
+
return rb_new_root;
|
149
229
|
}
|
150
230
|
|
151
231
|
/*
|
@@ -154,17 +234,20 @@ static VALUE set_root(VALUE self, VALUE root)
|
|
154
234
|
*
|
155
235
|
* Get the root node for this document.
|
156
236
|
*/
|
157
|
-
static VALUE
|
237
|
+
static VALUE
|
238
|
+
rb_xml_document_root(VALUE self)
|
158
239
|
{
|
159
|
-
xmlDocPtr
|
160
|
-
xmlNodePtr
|
240
|
+
xmlDocPtr c_document;
|
241
|
+
xmlNodePtr c_root;
|
161
242
|
|
162
|
-
|
243
|
+
c_document = noko_xml_document_unwrap(self);
|
163
244
|
|
164
|
-
|
245
|
+
c_root = xmlDocGetRootElement(c_document);
|
246
|
+
if (!c_root) {
|
247
|
+
return Qnil;
|
248
|
+
}
|
165
249
|
|
166
|
-
|
167
|
-
return Nokogiri_wrap_xml_node(Qnil, root) ;
|
250
|
+
return noko_xml_node_wrap(Qnil, c_root) ;
|
168
251
|
}
|
169
252
|
|
170
253
|
/*
|
@@ -173,13 +256,14 @@ static VALUE root(VALUE self)
|
|
173
256
|
*
|
174
257
|
* Set the encoding string for this Document
|
175
258
|
*/
|
176
|
-
static VALUE
|
259
|
+
static VALUE
|
260
|
+
set_encoding(VALUE self, VALUE encoding)
|
177
261
|
{
|
178
|
-
xmlDocPtr doc;
|
179
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
262
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
180
263
|
|
181
|
-
if (doc->encoding)
|
182
|
-
|
264
|
+
if (doc->encoding) {
|
265
|
+
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
|
266
|
+
}
|
183
267
|
|
184
268
|
doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
|
185
269
|
|
@@ -192,12 +276,12 @@ static VALUE set_encoding(VALUE self, VALUE encoding)
|
|
192
276
|
*
|
193
277
|
* Get the encoding for this Document
|
194
278
|
*/
|
195
|
-
static VALUE
|
279
|
+
static VALUE
|
280
|
+
encoding(VALUE self)
|
196
281
|
{
|
197
|
-
xmlDocPtr doc;
|
198
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
282
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
199
283
|
|
200
|
-
if(!doc->encoding) return Qnil;
|
284
|
+
if (!doc->encoding) { return Qnil; }
|
201
285
|
return NOKOGIRI_STR_NEW2(doc->encoding);
|
202
286
|
}
|
203
287
|
|
@@ -207,12 +291,12 @@ static VALUE encoding(VALUE self)
|
|
207
291
|
*
|
208
292
|
* Get the XML version for this Document
|
209
293
|
*/
|
210
|
-
static VALUE
|
294
|
+
static VALUE
|
295
|
+
version(VALUE self)
|
211
296
|
{
|
212
|
-
xmlDocPtr doc;
|
213
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
297
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
214
298
|
|
215
|
-
if(!doc->version) return Qnil;
|
299
|
+
if (!doc->version) { return Qnil; }
|
216
300
|
return NOKOGIRI_STR_NEW2(doc->version);
|
217
301
|
}
|
218
302
|
|
@@ -222,14 +306,15 @@ static VALUE version(VALUE self)
|
|
222
306
|
*
|
223
307
|
* Create a new document from an IO object
|
224
308
|
*/
|
225
|
-
static VALUE
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
309
|
+
static VALUE
|
310
|
+
read_io(VALUE klass,
|
311
|
+
VALUE io,
|
312
|
+
VALUE url,
|
313
|
+
VALUE encoding,
|
314
|
+
VALUE options)
|
230
315
|
{
|
231
|
-
const char *
|
232
|
-
const char *
|
316
|
+
const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
|
317
|
+
const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
|
233
318
|
VALUE error_list = rb_ary_new();
|
234
319
|
VALUE document;
|
235
320
|
xmlDocPtr doc;
|
@@ -238,30 +323,31 @@ static VALUE read_io( VALUE klass,
|
|
238
323
|
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
239
324
|
|
240
325
|
doc = xmlReadIO(
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
326
|
+
(xmlInputReadCallback)noko_io_read,
|
327
|
+
(xmlInputCloseCallback)noko_io_close,
|
328
|
+
(void *)io,
|
329
|
+
c_url,
|
330
|
+
c_enc,
|
331
|
+
(int)NUM2INT(options)
|
332
|
+
);
|
248
333
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
249
334
|
|
250
|
-
if(doc == NULL) {
|
335
|
+
if (doc == NULL) {
|
251
336
|
xmlErrorPtr error;
|
252
337
|
|
253
338
|
xmlFreeDoc(doc);
|
254
339
|
|
255
340
|
error = xmlGetLastError();
|
256
|
-
if(error)
|
341
|
+
if (error) {
|
257
342
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
258
|
-
else
|
343
|
+
} else {
|
259
344
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
345
|
+
}
|
260
346
|
|
261
347
|
return Qnil;
|
262
348
|
}
|
263
349
|
|
264
|
-
document =
|
350
|
+
document = noko_xml_document_wrap(klass, doc);
|
265
351
|
rb_iv_set(document, "@errors", error_list);
|
266
352
|
return document;
|
267
353
|
}
|
@@ -272,15 +358,16 @@ static VALUE read_io( VALUE klass,
|
|
272
358
|
*
|
273
359
|
* Create a new document from a String
|
274
360
|
*/
|
275
|
-
static VALUE
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
361
|
+
static VALUE
|
362
|
+
read_memory(VALUE klass,
|
363
|
+
VALUE string,
|
364
|
+
VALUE url,
|
365
|
+
VALUE encoding,
|
366
|
+
VALUE options)
|
280
367
|
{
|
281
|
-
const char *
|
282
|
-
const char *
|
283
|
-
const char *
|
368
|
+
const char *c_buffer = StringValuePtr(string);
|
369
|
+
const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
|
370
|
+
const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
|
284
371
|
int len = (int)RSTRING_LEN(string);
|
285
372
|
VALUE error_list = rb_ary_new();
|
286
373
|
VALUE document;
|
@@ -291,21 +378,22 @@ static VALUE read_memory( VALUE klass,
|
|
291
378
|
doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
|
292
379
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
293
380
|
|
294
|
-
if(doc == NULL) {
|
381
|
+
if (doc == NULL) {
|
295
382
|
xmlErrorPtr error;
|
296
383
|
|
297
384
|
xmlFreeDoc(doc);
|
298
385
|
|
299
386
|
error = xmlGetLastError();
|
300
|
-
if(error)
|
387
|
+
if (error) {
|
301
388
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
302
|
-
else
|
389
|
+
} else {
|
303
390
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
391
|
+
}
|
304
392
|
|
305
393
|
return Qnil;
|
306
394
|
}
|
307
395
|
|
308
|
-
document =
|
396
|
+
document = noko_xml_document_wrap(klass, doc);
|
309
397
|
rb_iv_set(document, "@errors", error_list);
|
310
398
|
return document;
|
311
399
|
}
|
@@ -317,26 +405,26 @@ static VALUE read_memory( VALUE klass,
|
|
317
405
|
* Copy this Document. An optional depth may be passed in, but it defaults
|
318
406
|
* to a deep copy. 0 is a shallow copy, 1 is a deep copy.
|
319
407
|
*/
|
320
|
-
static VALUE
|
408
|
+
static VALUE
|
409
|
+
duplicate_document(int argc, VALUE *argv, VALUE self)
|
321
410
|
{
|
322
411
|
xmlDocPtr doc, dup;
|
323
412
|
VALUE copy;
|
324
413
|
VALUE level;
|
325
|
-
VALUE error_list;
|
326
414
|
|
327
|
-
if(rb_scan_args(argc, argv, "01", &level) == 0)
|
415
|
+
if (rb_scan_args(argc, argv, "01", &level) == 0) {
|
328
416
|
level = INT2NUM((long)1);
|
417
|
+
}
|
329
418
|
|
330
|
-
|
419
|
+
doc = noko_xml_document_unwrap(self);
|
331
420
|
|
332
421
|
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
|
333
422
|
|
334
|
-
if(dup == NULL) return Qnil;
|
423
|
+
if (dup == NULL) { return Qnil; }
|
335
424
|
|
336
425
|
dup->type = doc->type;
|
337
|
-
copy =
|
338
|
-
|
339
|
-
rb_iv_set(copy, "@errors", error_list);
|
426
|
+
copy = noko_xml_document_wrap(rb_obj_class(self), dup);
|
427
|
+
rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors"));
|
340
428
|
return copy ;
|
341
429
|
}
|
342
430
|
|
@@ -346,18 +434,18 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
|
|
346
434
|
*
|
347
435
|
* Create a new document with +version+ (defaults to "1.0")
|
348
436
|
*/
|
349
|
-
static VALUE
|
437
|
+
static VALUE
|
438
|
+
new (int argc, VALUE *argv, VALUE klass)
|
350
439
|
{
|
351
440
|
xmlDocPtr doc;
|
352
441
|
VALUE version, rest, rb_doc ;
|
353
442
|
|
354
443
|
rb_scan_args(argc, argv, "0*", &rest);
|
355
444
|
version = rb_ary_entry(rest, (long)0);
|
356
|
-
if (NIL_P(version)) version = rb_str_new2("1.0");
|
445
|
+
if (NIL_P(version)) { version = rb_str_new2("1.0"); }
|
357
446
|
|
358
447
|
doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
|
359
|
-
rb_doc =
|
360
|
-
rb_obj_call_init(rb_doc, argc, argv);
|
448
|
+
rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
|
361
449
|
return rb_doc ;
|
362
450
|
}
|
363
451
|
|
@@ -398,10 +486,10 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
|
|
398
486
|
* please direct your browser to
|
399
487
|
* http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
|
400
488
|
*/
|
401
|
-
|
489
|
+
static VALUE
|
490
|
+
remove_namespaces_bang(VALUE self)
|
402
491
|
{
|
403
|
-
xmlDocPtr doc ;
|
404
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
492
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
405
493
|
|
406
494
|
recursively_remove_namespaces_from_node((xmlNodePtr)doc);
|
407
495
|
return self;
|
@@ -418,7 +506,8 @@ VALUE remove_namespaces_bang(VALUE self)
|
|
418
506
|
* +external_id+, +system_id+, and +content+ set the External ID, System ID,
|
419
507
|
* and content respectively. All of these parameters are optional.
|
420
508
|
*/
|
421
|
-
static VALUE
|
509
|
+
static VALUE
|
510
|
+
create_entity(int argc, VALUE *argv, VALUE self)
|
422
511
|
{
|
423
512
|
VALUE name;
|
424
513
|
VALUE type;
|
@@ -428,55 +517,53 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
|
|
428
517
|
xmlEntityPtr ptr;
|
429
518
|
xmlDocPtr doc ;
|
430
519
|
|
431
|
-
|
520
|
+
doc = noko_xml_document_unwrap(self);
|
432
521
|
|
433
522
|
rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
|
434
|
-
|
523
|
+
&content);
|
435
524
|
|
436
525
|
xmlResetLastError();
|
437
526
|
ptr = xmlAddDocEntity(
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
if(NULL == ptr) {
|
527
|
+
doc,
|
528
|
+
(xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
|
529
|
+
(int)(NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
|
530
|
+
(xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
|
531
|
+
(xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
|
532
|
+
(xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
|
533
|
+
);
|
534
|
+
|
535
|
+
if (NULL == ptr) {
|
447
536
|
xmlErrorPtr error = xmlGetLastError();
|
448
|
-
if(error)
|
537
|
+
if (error) {
|
449
538
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
450
|
-
else
|
539
|
+
} else {
|
451
540
|
rb_raise(rb_eRuntimeError, "Could not create entity");
|
541
|
+
}
|
452
542
|
|
453
543
|
return Qnil;
|
454
544
|
}
|
455
545
|
|
456
|
-
return
|
546
|
+
return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
|
457
547
|
}
|
458
548
|
|
459
|
-
static int
|
549
|
+
static int
|
550
|
+
block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
|
460
551
|
{
|
461
|
-
VALUE block;
|
462
|
-
VALUE
|
463
|
-
VALUE
|
552
|
+
VALUE block = (VALUE)ctx;
|
553
|
+
VALUE rb_node;
|
554
|
+
VALUE rb_parent_node;
|
464
555
|
VALUE ret;
|
465
556
|
|
466
|
-
if(
|
467
|
-
|
468
|
-
}
|
469
|
-
|
470
|
-
node = Nokogiri_wrap_xml_node(Qnil, _node);
|
557
|
+
if (c_node->type == XML_NAMESPACE_DECL) {
|
558
|
+
rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
|
559
|
+
} else {
|
560
|
+
rb_node = noko_xml_node_wrap(Qnil, c_node);
|
471
561
|
}
|
472
|
-
|
473
|
-
block = (VALUE)ctx;
|
562
|
+
rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;
|
474
563
|
|
475
|
-
ret = rb_funcall(block, rb_intern("call"), 2,
|
564
|
+
ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);
|
476
565
|
|
477
|
-
|
478
|
-
|
479
|
-
return 1;
|
566
|
+
return (Qfalse == ret || Qnil == ret) ? 0 : 1;
|
480
567
|
}
|
481
568
|
|
482
569
|
/* call-seq:
|
@@ -489,117 +576,194 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
|
|
489
576
|
* The block must return a non-nil, non-false value if the +obj+ passed in
|
490
577
|
* should be included in the canonicalized document.
|
491
578
|
*/
|
492
|
-
static VALUE
|
579
|
+
static VALUE
|
580
|
+
rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
493
581
|
{
|
494
|
-
VALUE
|
495
|
-
VALUE
|
496
|
-
VALUE
|
497
|
-
|
498
|
-
|
582
|
+
VALUE rb_mode;
|
583
|
+
VALUE rb_namespaces;
|
584
|
+
VALUE rb_comments_p;
|
585
|
+
int c_mode = 0;
|
586
|
+
xmlChar **c_namespaces;
|
499
587
|
|
500
|
-
xmlDocPtr
|
501
|
-
xmlOutputBufferPtr
|
502
|
-
xmlC14NIsVisibleCallback
|
503
|
-
void *
|
588
|
+
xmlDocPtr c_doc;
|
589
|
+
xmlOutputBufferPtr c_obuf;
|
590
|
+
xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
|
591
|
+
void *rb_callback = NULL;
|
504
592
|
|
505
593
|
VALUE rb_cStringIO;
|
506
|
-
VALUE
|
594
|
+
VALUE rb_io;
|
507
595
|
|
508
|
-
rb_scan_args(argc, argv, "03", &
|
596
|
+
rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
|
597
|
+
if (!NIL_P(rb_mode)) {
|
598
|
+
Check_Type(rb_mode, T_FIXNUM);
|
599
|
+
c_mode = NUM2INT(rb_mode);
|
600
|
+
}
|
601
|
+
if (!NIL_P(rb_namespaces)) {
|
602
|
+
Check_Type(rb_namespaces, T_ARRAY);
|
603
|
+
if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
|
604
|
+
rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
|
605
|
+
}
|
606
|
+
}
|
509
607
|
|
510
|
-
|
608
|
+
c_doc = noko_xml_document_unwrap(self);
|
511
609
|
|
512
610
|
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
|
513
|
-
|
514
|
-
|
611
|
+
rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
|
612
|
+
c_obuf = xmlAllocOutputBuffer(NULL);
|
515
613
|
|
516
|
-
|
517
|
-
|
518
|
-
|
614
|
+
c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
|
615
|
+
c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
|
616
|
+
c_obuf->context = (void *)rb_io;
|
519
617
|
|
520
|
-
if(rb_block_given_p()) {
|
521
|
-
|
522
|
-
|
618
|
+
if (rb_block_given_p()) {
|
619
|
+
c_callback_wrapper = block_caller;
|
620
|
+
rb_callback = (void *)rb_block_proc();
|
523
621
|
}
|
524
622
|
|
525
|
-
if(NIL_P(
|
526
|
-
|
527
|
-
}
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
VALUE entry = rb_ary_entry(incl_ns, i);
|
534
|
-
ns[i] = (xmlChar*)StringValueCStr(entry);
|
623
|
+
if (NIL_P(rb_namespaces)) {
|
624
|
+
c_namespaces = NULL;
|
625
|
+
} else {
|
626
|
+
long ns_len = RARRAY_LEN(rb_namespaces);
|
627
|
+
c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
|
628
|
+
for (int j = 0 ; j < ns_len ; j++) {
|
629
|
+
VALUE entry = rb_ary_entry(rb_namespaces, j);
|
630
|
+
c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
|
535
631
|
}
|
536
632
|
}
|
537
633
|
|
634
|
+
xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
|
635
|
+
c_mode,
|
636
|
+
c_namespaces,
|
637
|
+
(int)RTEST(rb_comments_p),
|
638
|
+
c_obuf);
|
639
|
+
|
640
|
+
ruby_xfree(c_namespaces);
|
641
|
+
xmlOutputBufferClose(c_obuf);
|
642
|
+
|
643
|
+
return rb_funcall(rb_io, rb_intern("string"), 0);
|
644
|
+
}
|
645
|
+
|
646
|
+
VALUE
|
647
|
+
noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
|
648
|
+
{
|
649
|
+
VALUE rb_document;
|
650
|
+
nokogiriTuplePtr tuple;
|
651
|
+
|
652
|
+
if (!klass) {
|
653
|
+
klass = cNokogiriXmlDocument;
|
654
|
+
}
|
655
|
+
|
656
|
+
rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
|
657
|
+
|
658
|
+
tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
|
659
|
+
tuple->doc = rb_document;
|
660
|
+
tuple->unlinkedNodes = st_init_numtable_with_size(128);
|
661
|
+
tuple->node_cache = rb_ary_new();
|
662
|
+
|
663
|
+
c_document->_private = tuple ;
|
538
664
|
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
(int) RTEST(with_comments),
|
543
|
-
buf);
|
665
|
+
rb_iv_set(rb_document, "@decorators", Qnil);
|
666
|
+
rb_iv_set(rb_document, "@errors", Qnil);
|
667
|
+
rb_iv_set(rb_document, "@node_cache", tuple->node_cache);
|
544
668
|
|
545
|
-
|
669
|
+
rb_obj_call_init(rb_document, argc, argv);
|
546
670
|
|
547
|
-
return
|
671
|
+
return rb_document ;
|
548
672
|
}
|
549
673
|
|
550
|
-
|
551
|
-
|
674
|
+
|
675
|
+
/* deprecated. use noko_xml_document_wrap() instead. */
|
676
|
+
VALUE
|
677
|
+
Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
|
552
678
|
{
|
553
|
-
|
554
|
-
|
555
|
-
|
679
|
+
/* TODO: deprecate this method in v2.0 */
|
680
|
+
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
681
|
+
}
|
556
682
|
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
cNokogiriXmlDocument = klass;
|
563
|
-
|
564
|
-
rb_define_singleton_method(klass, "read_memory", read_memory, 4);
|
565
|
-
rb_define_singleton_method(klass, "read_io", read_io, 4);
|
566
|
-
rb_define_singleton_method(klass, "new", new, -1);
|
567
|
-
|
568
|
-
rb_define_method(klass, "root", root, 0);
|
569
|
-
rb_define_method(klass, "root=", set_root, 1);
|
570
|
-
rb_define_method(klass, "encoding", encoding, 0);
|
571
|
-
rb_define_method(klass, "encoding=", set_encoding, 1);
|
572
|
-
rb_define_method(klass, "version", version, 0);
|
573
|
-
rb_define_method(klass, "canonicalize", canonicalize, -1);
|
574
|
-
rb_define_method(klass, "dup", duplicate_document, -1);
|
575
|
-
rb_define_method(klass, "url", url, 0);
|
576
|
-
rb_define_method(klass, "create_entity", create_entity, -1);
|
577
|
-
rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
|
683
|
+
VALUE
|
684
|
+
noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
|
685
|
+
{
|
686
|
+
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
578
687
|
}
|
579
688
|
|
689
|
+
xmlDocPtr
|
690
|
+
noko_xml_document_unwrap(VALUE rb_document)
|
691
|
+
{
|
692
|
+
xmlDocPtr c_document;
|
693
|
+
TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
|
694
|
+
return c_document;
|
695
|
+
}
|
580
696
|
|
581
|
-
/*
|
582
|
-
|
697
|
+
/* Schema creation will remove and deallocate "blank" nodes.
|
698
|
+
* If those blank nodes have been exposed to Ruby, they could get freed
|
699
|
+
* out from under the VALUE pointer. This function checks to see if any of
|
700
|
+
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
701
|
+
*/
|
702
|
+
int
|
703
|
+
noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
|
583
704
|
{
|
584
|
-
|
705
|
+
VALUE cache = DOC_NODE_CACHE(c_document);
|
585
706
|
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
dealloc,
|
590
|
-
doc
|
591
|
-
);
|
707
|
+
if (NIL_P(cache)) {
|
708
|
+
return 0;
|
709
|
+
}
|
592
710
|
|
593
|
-
|
594
|
-
|
595
|
-
|
711
|
+
for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
|
712
|
+
xmlNodePtr node;
|
713
|
+
VALUE element = rb_ary_entry(cache, jnode);
|
596
714
|
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
715
|
+
Noko_Node_Get_Struct(element, xmlNode, node);
|
716
|
+
if (xmlIsBlankNode(node)) {
|
717
|
+
return 1;
|
718
|
+
}
|
719
|
+
}
|
601
720
|
|
602
|
-
|
721
|
+
return 0;
|
722
|
+
}
|
603
723
|
|
604
|
-
|
724
|
+
void
|
725
|
+
noko_xml_document_pin_node(xmlNodePtr node)
|
726
|
+
{
|
727
|
+
xmlDocPtr doc;
|
728
|
+
nokogiriTuplePtr tuple;
|
729
|
+
|
730
|
+
doc = node->doc;
|
731
|
+
tuple = (nokogiriTuplePtr)doc->_private;
|
732
|
+
st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
|
733
|
+
}
|
734
|
+
|
735
|
+
|
736
|
+
void
|
737
|
+
noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
|
738
|
+
{
|
739
|
+
nokogiriTuplePtr tuple;
|
740
|
+
|
741
|
+
tuple = (nokogiriTuplePtr)doc->_private;
|
742
|
+
st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
|
743
|
+
}
|
744
|
+
|
745
|
+
|
746
|
+
void
|
747
|
+
noko_init_xml_document(void)
|
748
|
+
{
|
749
|
+
assert(cNokogiriXmlNode);
|
750
|
+
/*
|
751
|
+
* Nokogiri::XML::Document wraps an xml document.
|
752
|
+
*/
|
753
|
+
cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
|
754
|
+
|
755
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", read_memory, 4);
|
756
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4);
|
757
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
|
758
|
+
|
759
|
+
rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
|
760
|
+
rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
|
761
|
+
rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
|
762
|
+
rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
|
763
|
+
rb_define_method(cNokogiriXmlDocument, "version", version, 0);
|
764
|
+
rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
|
765
|
+
rb_define_method(cNokogiriXmlDocument, "dup", duplicate_document, -1);
|
766
|
+
rb_define_method(cNokogiriXmlDocument, "url", url, 0);
|
767
|
+
rb_define_method(cNokogiriXmlDocument, "create_entity", create_entity, -1);
|
768
|
+
rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
|
605
769
|
}
|