nokogiri 1.8.5 → 1.15.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +40 -18
- data/LICENSE-DEPENDENCIES.md +1636 -1024
- data/LICENSE.md +5 -28
- data/README.md +203 -90
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -61
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +867 -417
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +215 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +42 -37
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +40 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +401 -237
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +136 -62
- data/ext/nokogiri/xml_node.c +1387 -678
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +347 -212
- data/ext/nokogiri/xml_relax_ng.c +86 -77
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +145 -103
- data/ext/nokogiri/xml_sax_push_parser.c +64 -36
- data/ext/nokogiri/xml_schema.c +138 -81
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +36 -26
- data/ext/nokogiri/xml_xpath_context.c +366 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +224 -95
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +392 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +98 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -126
- data/lib/nokogiri/xml/document_fragment.rb +104 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +45 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1093 -411
- data/lib/nokogiri/xml/node_set.rb +173 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
- metadata +126 -399
- data/.autotest +0 -22
- data/.cross_rubies +0 -8
- data/.editorconfig +0 -17
- data/.gemtest +0 -0
- data/.travis.yml +0 -63
- data/CHANGELOG.md +0 -1368
- data/CONTRIBUTING.md +0 -42
- data/C_CODING_STYLE.rdoc +0 -33
- data/Gemfile-libxml-ruby +0 -3
- data/Manifest.txt +0 -370
- data/ROADMAP.md +0 -111
- data/Rakefile +0 -348
- data/SECURITY.md +0 -19
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/appveyor.yml +0 -29
- data/build_all +0 -44
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -15
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
- data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
- data/patches/sort-patches-by-date +0 -25
- data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
- data/suppressions/README.txt +0 -1
- data/suppressions/nokogiri_ruby-2.supp +0 -10
- data/tasks/test.rb +0 -100
- data/test/css/test_nthiness.rb +0 -226
- data/test/css/test_parser.rb +0 -386
- data/test/css/test_tokenizer.rb +0 -215
- data/test/css/test_xpath_visitor.rb +0 -96
- data/test/decorators/test_slop.rb +0 -23
- data/test/files/2ch.html +0 -108
- data/test/files/GH_1042.html +0 -18
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/atom.xml +0 -344
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/namespace_pressure_test.xml +0 -1684
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/saml/saml20assertion_schema.xsd +0 -283
- data/test/files/saml/saml20protocol_schema.xsd +0 -302
- data/test/files/saml/xenc_schema.xsd +0 -146
- data/test/files/saml/xmldsig_schema.xsd +0 -318
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/shift_jis_no_charset.html +0 -9
- data/test/files/slow-xpath.xml +0 -25509
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -851
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -271
- data/test/html/sax/test_parser.rb +0 -168
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/sax/test_parser_text.rb +0 -163
- data/test/html/sax/test_push_parser.rb +0 -87
- data/test/html/test_attributes.rb +0 -85
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -712
- data/test/html/test_document_encoding.rb +0 -143
- data/test/html/test_document_fragment.rb +0 -310
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -212
- data/test/html/test_node_encoding.rb +0 -91
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
- data/test/namespaces/test_namespaces_preservation.rb +0 -31
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -47
- data/test/test_encoding_handler.rb +0 -48
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -138
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -314
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -402
- data/test/xml/sax/test_parser_context.rb +0 -115
- data/test/xml/sax/test_parser_text.rb +0 -202
- data/test/xml/sax/test_push_parser.rb +0 -265
- data/test/xml/test_attr.rb +0 -74
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -341
- data/test/xml/test_c14n.rb +0 -180
- data/test/xml/test_cdata.rb +0 -54
- data/test/xml/test_comment.rb +0 -40
- data/test/xml/test_document.rb +0 -982
- data/test/xml/test_document_encoding.rb +0 -31
- data/test/xml/test_document_fragment.rb +0 -298
- data/test/xml/test_dtd.rb +0 -187
- data/test/xml/test_dtd_encoding.rb +0 -31
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -262
- data/test/xml/test_namespace.rb +0 -96
- data/test/xml/test_node.rb +0 -1325
- data/test/xml/test_node_attributes.rb +0 -115
- data/test/xml/test_node_encoding.rb +0 -75
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -592
- data/test/xml/test_node_set.rb +0 -809
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader.rb +0 -620
- data/test/xml/test_reader_encoding.rb +0 -134
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -142
- data/test/xml/test_syntax_error.rb +0 -36
- data/test/xml/test_text.rb +0 -60
- data/test/xml/test_unparented_node.rb +0 -483
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -470
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,132 +1,276 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "pathname"
|
5
|
+
|
1
6
|
module Nokogiri
|
2
7
|
module XML
|
3
|
-
|
4
|
-
# Nokogiri::XML::Document
|
5
|
-
#
|
6
|
-
# See Nokogiri::XML::Document.parse() for more information on parsing.
|
8
|
+
# Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
|
9
|
+
# is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
|
10
|
+
# on parsing.
|
7
11
|
#
|
8
12
|
# For searching a Document, see Nokogiri::XML::Searchable#css and
|
9
13
|
# Nokogiri::XML::Searchable#xpath
|
10
|
-
#
|
11
14
|
class Document < Nokogiri::XML::Node
|
12
|
-
#
|
13
|
-
#
|
15
|
+
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
16
|
+
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
17
|
+
# characters in NCNAMEs.
|
14
18
|
NCNAME_START_CHAR = "A-Za-z_"
|
15
|
-
NCNAME_CHAR = NCNAME_START_CHAR + "
|
16
|
-
NCNAME_RE = /^xmlns(
|
19
|
+
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
20
|
+
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
21
|
+
|
22
|
+
class << self
|
23
|
+
# Parse an XML file.
|
24
|
+
#
|
25
|
+
# +string_or_io+ may be a String, or any object that responds to
|
26
|
+
# _read_ and _close_ such as an IO, or StringIO.
|
27
|
+
#
|
28
|
+
# +url+ (optional) is the URI where this document is located.
|
29
|
+
#
|
30
|
+
# +encoding+ (optional) is the encoding that should be used when processing
|
31
|
+
# the document.
|
32
|
+
#
|
33
|
+
# +options+ (optional) is a configuration object that sets options during
|
34
|
+
# parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
|
35
|
+
# Nokogiri::XML::ParseOptions for more information.
|
36
|
+
#
|
37
|
+
# +block+ (optional) is passed a configuration object on which
|
38
|
+
# parse options may be set.
|
39
|
+
#
|
40
|
+
# By default, Nokogiri treats documents as untrusted, and so
|
41
|
+
# does not attempt to load DTDs or access the network. See
|
42
|
+
# Nokogiri::XML::ParseOptions for a complete list of options;
|
43
|
+
# and that module's DEFAULT_XML constant for what's set (and not
|
44
|
+
# set) by default.
|
45
|
+
#
|
46
|
+
# Nokogiri.XML() is a convenience method which will call this method.
|
47
|
+
#
|
48
|
+
def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
|
49
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
50
|
+
yield options if block_given?
|
51
|
+
|
52
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
53
|
+
|
54
|
+
if empty_doc?(string_or_io)
|
55
|
+
if options.strict?
|
56
|
+
raise Nokogiri::XML::SyntaxError, "Empty document"
|
57
|
+
else
|
58
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
doc = if string_or_io.respond_to?(:read)
|
63
|
+
if string_or_io.is_a?(Pathname)
|
64
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
65
|
+
string_or_io = string_or_io.expand_path.open
|
66
|
+
url ||= string_or_io.path
|
67
|
+
end
|
68
|
+
|
69
|
+
read_io(string_or_io, url, encoding, options.to_i)
|
70
|
+
else
|
71
|
+
# read_memory pukes on empty docs
|
72
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
73
|
+
end
|
74
|
+
|
75
|
+
# do xinclude processing
|
76
|
+
doc.do_xinclude(options) if options.xinclude?
|
77
|
+
|
78
|
+
doc
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def empty_doc?(string_or_io)
|
84
|
+
string_or_io.nil? ||
|
85
|
+
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
86
|
+
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
87
|
+
end
|
88
|
+
end
|
17
89
|
|
18
90
|
##
|
19
|
-
#
|
91
|
+
# :singleton-method: wrap
|
92
|
+
# :call-seq: wrap(java_document) → Nokogiri::XML::Document
|
20
93
|
#
|
21
|
-
#
|
22
|
-
# _read_ and _close_ such as an IO, or StringIO.
|
94
|
+
# ⚠ This method is only available when running JRuby.
|
23
95
|
#
|
24
|
-
#
|
96
|
+
# Create a Document using an existing Java DOM document object.
|
25
97
|
#
|
26
|
-
#
|
27
|
-
# the
|
98
|
+
# The returned Document shares the same underlying data structure as the Java object, so
|
99
|
+
# changes in one are reflected in the other.
|
28
100
|
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
101
|
+
# [Parameters]
|
102
|
+
# - `java_document` (Java::OrgW3cDom::Document)
|
103
|
+
# (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
|
32
104
|
#
|
33
|
-
#
|
34
|
-
# parse options may be set.
|
105
|
+
# [Returns] Nokogiri::XML::Document
|
35
106
|
#
|
36
|
-
#
|
37
|
-
|
38
|
-
#
|
39
|
-
#
|
40
|
-
# set) by default.
|
107
|
+
# See also \#to_java
|
108
|
+
|
109
|
+
# :method: to_java
|
110
|
+
# :call-seq: to_java() → Java::OrgW3cDom::Document
|
41
111
|
#
|
42
|
-
#
|
112
|
+
# ⚠ This method is only available when running JRuby.
|
43
113
|
#
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
doc = if string_or_io.respond_to?(:read)
|
58
|
-
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
59
|
-
read_io(string_or_io, url, encoding, options.to_i)
|
60
|
-
else
|
61
|
-
# read_memory pukes on empty docs
|
62
|
-
read_memory(string_or_io, url, encoding, options.to_i)
|
63
|
-
end
|
64
|
-
|
65
|
-
# do xinclude processing
|
66
|
-
doc.do_xinclude(options) if options.xinclude?
|
67
|
-
|
68
|
-
return doc
|
69
|
-
end
|
114
|
+
# Returns the underlying Java DOM document object for this document.
|
115
|
+
#
|
116
|
+
# The returned Java object shares the same underlying data structure as this document, so
|
117
|
+
# changes in one are reflected in the other.
|
118
|
+
#
|
119
|
+
# [Returns]
|
120
|
+
# Java::OrgW3cDom::Document
|
121
|
+
# (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
|
122
|
+
#
|
123
|
+
# See also Document.wrap
|
70
124
|
|
71
|
-
#
|
125
|
+
# The errors found while parsing a document.
|
126
|
+
#
|
127
|
+
# [Returns] Array<Nokogiri::XML::SyntaxError>
|
72
128
|
attr_accessor :errors
|
73
129
|
|
74
|
-
|
130
|
+
# When `true`, reparented elements without a namespace will inherit their new parent's
|
131
|
+
# namespace (if one exists). Defaults to `false`.
|
132
|
+
#
|
133
|
+
# [Returns] Boolean
|
134
|
+
#
|
135
|
+
# *Example:* Default behavior of namespace inheritance
|
136
|
+
#
|
137
|
+
# xml = <<~EOF
|
138
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
139
|
+
# <foo:parent>
|
140
|
+
# </foo:parent>
|
141
|
+
# </root>
|
142
|
+
# EOF
|
143
|
+
# doc = Nokogiri::XML(xml)
|
144
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
145
|
+
# parent.add_child("<child></child>")
|
146
|
+
# doc.to_xml
|
147
|
+
# # => <?xml version="1.0"?>
|
148
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
149
|
+
# # <foo:parent>
|
150
|
+
# # <child/>
|
151
|
+
# # </foo:parent>
|
152
|
+
# # </root>
|
153
|
+
#
|
154
|
+
# *Example:* Setting namespace inheritance to `true`
|
155
|
+
#
|
156
|
+
# xml = <<~EOF
|
157
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
158
|
+
# <foo:parent>
|
159
|
+
# </foo:parent>
|
160
|
+
# </root>
|
161
|
+
# EOF
|
162
|
+
# doc = Nokogiri::XML(xml)
|
163
|
+
# doc.namespace_inheritance = true
|
164
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
165
|
+
# parent.add_child("<child></child>")
|
166
|
+
# doc.to_xml
|
167
|
+
# # => <?xml version="1.0"?>
|
168
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
169
|
+
# # <foo:parent>
|
170
|
+
# # <foo:child/>
|
171
|
+
# # </foo:parent>
|
172
|
+
# # </root>
|
173
|
+
#
|
174
|
+
# Since v1.12.4
|
175
|
+
attr_accessor :namespace_inheritance
|
176
|
+
|
177
|
+
# :nodoc:
|
178
|
+
def initialize(*args) # rubocop:disable Lint/MissingSuper
|
75
179
|
@errors = []
|
76
180
|
@decorators = nil
|
181
|
+
@namespace_inheritance = false
|
77
182
|
end
|
78
183
|
|
79
|
-
|
80
|
-
#
|
184
|
+
# :call-seq:
|
185
|
+
# create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
|
186
|
+
#
|
187
|
+
# Create a new Element with `name` belonging to this document, optionally setting contents or
|
188
|
+
# attributes.
|
189
|
+
#
|
190
|
+
# This method is _not_ the most user-friendly option if your intention is to add a node to the
|
191
|
+
# document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
|
192
|
+
# Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
|
193
|
+
# place it in the document tree.
|
81
194
|
#
|
82
|
-
#
|
83
|
-
# doc.create_element "div", :class => "container" # <div class='container'></div>
|
84
|
-
# doc.create_element "div", "contents" # <div>contents</div>
|
85
|
-
# doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
|
86
|
-
# doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
|
195
|
+
# Arguments may be passed to initialize the element:
|
87
196
|
#
|
88
|
-
|
197
|
+
# - a Hash argument will be used to set attributes
|
198
|
+
# - a non-Hash object that responds to \#to_s will be used to set the new node's contents
|
199
|
+
#
|
200
|
+
# A block may be passed to mutate the node.
|
201
|
+
#
|
202
|
+
# [Parameters]
|
203
|
+
# - `name` (String)
|
204
|
+
# - `contents_or_attrs` (\#to_s, Hash)
|
205
|
+
# [Yields] `node` (Nokogiri::XML::Element)
|
206
|
+
# [Returns] Nokogiri::XML::Element
|
207
|
+
#
|
208
|
+
# *Example:* An empty element without attributes
|
209
|
+
#
|
210
|
+
# doc.create_element("div")
|
211
|
+
# # => <div></div>
|
212
|
+
#
|
213
|
+
# *Example:* An element with contents
|
214
|
+
#
|
215
|
+
# doc.create_element("div", "contents")
|
216
|
+
# # => <div>contents</div>
|
217
|
+
#
|
218
|
+
# *Example:* An element with attributes
|
219
|
+
#
|
220
|
+
# doc.create_element("div", {"class" => "container"})
|
221
|
+
# # => <div class='container'></div>
|
222
|
+
#
|
223
|
+
# *Example:* An element with contents and attributes
|
224
|
+
#
|
225
|
+
# doc.create_element("div", "contents", {"class" => "container"})
|
226
|
+
# # => <div class='container'>contents</div>
|
227
|
+
#
|
228
|
+
# *Example:* Passing a block to mutate the element
|
229
|
+
#
|
230
|
+
# doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
|
231
|
+
#
|
232
|
+
def create_element(name, *contents_or_attrs, &block)
|
89
233
|
elm = Nokogiri::XML::Element.new(name, self, &block)
|
90
|
-
|
234
|
+
contents_or_attrs.each do |arg|
|
91
235
|
case arg
|
92
236
|
when Hash
|
93
|
-
arg.each
|
237
|
+
arg.each do |k, v|
|
94
238
|
key = k.to_s
|
95
239
|
if key =~ NCNAME_RE
|
96
|
-
ns_name =
|
97
|
-
elm.add_namespace_definition
|
240
|
+
ns_name = Regexp.last_match(1)
|
241
|
+
elm.add_namespace_definition(ns_name, v)
|
98
242
|
else
|
99
243
|
elm[k.to_s] = v.to_s
|
100
244
|
end
|
101
|
-
|
245
|
+
end
|
102
246
|
else
|
103
247
|
elm.content = arg
|
104
248
|
end
|
105
249
|
end
|
106
|
-
if ns = elm.namespace_definitions.find { |n| n.prefix.nil?
|
250
|
+
if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
|
107
251
|
elm.namespace = ns
|
108
252
|
end
|
109
253
|
elm
|
110
254
|
end
|
111
255
|
|
112
256
|
# Create a Text Node with +string+
|
113
|
-
def create_text_node
|
114
|
-
Nokogiri::XML::Text.new
|
257
|
+
def create_text_node(string, &block)
|
258
|
+
Nokogiri::XML::Text.new(string.to_s, self, &block)
|
115
259
|
end
|
116
260
|
|
117
261
|
# Create a CDATA Node containing +string+
|
118
|
-
def create_cdata
|
119
|
-
Nokogiri::XML::CDATA.new
|
262
|
+
def create_cdata(string, &block)
|
263
|
+
Nokogiri::XML::CDATA.new(self, string.to_s, &block)
|
120
264
|
end
|
121
265
|
|
122
266
|
# Create a Comment Node containing +string+
|
123
|
-
def create_comment
|
124
|
-
Nokogiri::XML::Comment.new
|
267
|
+
def create_comment(string, &block)
|
268
|
+
Nokogiri::XML::Comment.new(self, string.to_s, &block)
|
125
269
|
end
|
126
270
|
|
127
271
|
# The name of this document. Always returns "document"
|
128
272
|
def name
|
129
|
-
|
273
|
+
"document"
|
130
274
|
end
|
131
275
|
|
132
276
|
# A reference to +self+
|
@@ -134,46 +278,51 @@ module Nokogiri
|
|
134
278
|
self
|
135
279
|
end
|
136
280
|
|
137
|
-
|
138
|
-
#
|
139
|
-
# return them as a hash.
|
281
|
+
# :call-seq:
|
282
|
+
# collect_namespaces() → Hash<String(Namespace#prefix) ⇒ String(Namespace#href)>
|
140
283
|
#
|
141
|
-
#
|
284
|
+
# Recursively get all namespaces from this node and its subtree and return them as a
|
285
|
+
# hash.
|
142
286
|
#
|
143
|
-
#
|
287
|
+
# ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
|
288
|
+
#
|
289
|
+
# Note that this method does an xpath lookup for nodes with namespaces, and as a result the
|
290
|
+
# order (and which duplicate prefix "wins") may be dependent on the implementation of the
|
291
|
+
# underlying XML library.
|
292
|
+
#
|
293
|
+
# *Example:* Basic usage
|
294
|
+
#
|
295
|
+
# Given this document:
|
296
|
+
#
|
297
|
+
# <root xmlns="default" xmlns:foo="bar">
|
144
298
|
# <bar xmlns:hello="world" />
|
145
299
|
# </root>
|
146
300
|
#
|
147
301
|
# This method will return:
|
148
302
|
#
|
149
|
-
# {
|
303
|
+
# {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
|
304
|
+
#
|
305
|
+
# *Example:* Duplicate prefixes
|
150
306
|
#
|
151
|
-
#
|
152
|
-
# For example, given this document:
|
307
|
+
# Given this document:
|
153
308
|
#
|
154
309
|
# <root xmlns:foo="bar">
|
155
310
|
# <bar xmlns:foo="baz" />
|
156
311
|
# </root>
|
157
312
|
#
|
158
|
-
# The hash returned will
|
313
|
+
# The hash returned will be something like:
|
159
314
|
#
|
160
|
-
#
|
161
|
-
# in the hash.
|
162
|
-
#
|
163
|
-
# Note that this method does an xpath lookup for nodes with
|
164
|
-
# namespaces, and as a result the order may be dependent on the
|
165
|
-
# implementation of the underlying XML library.
|
315
|
+
# {"xmlns:foo" => "baz"}
|
166
316
|
#
|
167
317
|
def collect_namespaces
|
168
|
-
xpath("//namespace::*").
|
169
|
-
hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
|
170
|
-
hash
|
318
|
+
xpath("//namespace::*").each_with_object({}) do |ns, hash|
|
319
|
+
hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
|
171
320
|
end
|
172
321
|
end
|
173
322
|
|
174
323
|
# Get the list of decorators given +key+
|
175
|
-
def decorators
|
176
|
-
@decorators ||=
|
324
|
+
def decorators(key)
|
325
|
+
@decorators ||= {}
|
177
326
|
@decorators[key] ||= []
|
178
327
|
end
|
179
328
|
|
@@ -182,7 +331,8 @@ module Nokogiri
|
|
182
331
|
# the document or +nil+ when there is no DTD.
|
183
332
|
def validate
|
184
333
|
return nil unless internal_subset
|
185
|
-
|
334
|
+
|
335
|
+
internal_subset.validate(self)
|
186
336
|
end
|
187
337
|
|
188
338
|
##
|
@@ -202,7 +352,7 @@ module Nokogiri
|
|
202
352
|
# ... which does absolutely nothing.
|
203
353
|
#
|
204
354
|
def slop!
|
205
|
-
unless decorators(XML::Node).include?
|
355
|
+
unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
|
206
356
|
decorators(XML::Node) << Nokogiri::Decorators::Slop
|
207
357
|
decorate!
|
208
358
|
end
|
@@ -212,16 +362,18 @@ module Nokogiri
|
|
212
362
|
|
213
363
|
##
|
214
364
|
# Apply any decorators to +node+
|
215
|
-
def decorate
|
365
|
+
def decorate(node)
|
216
366
|
return unless @decorators
|
217
|
-
|
367
|
+
|
368
|
+
@decorators.each do |klass, list|
|
218
369
|
next unless node.is_a?(klass)
|
370
|
+
|
219
371
|
list.each { |moodule| node.extend(moodule) }
|
220
|
-
|
372
|
+
end
|
221
373
|
end
|
222
374
|
|
223
|
-
|
224
|
-
|
375
|
+
alias_method :to_xml, :serialize
|
376
|
+
alias_method :clone, :dup
|
225
377
|
|
226
378
|
# Get the hash of namespaces on the root Nokogiri::XML::Node
|
227
379
|
def namespaces
|
@@ -231,51 +383,85 @@ module Nokogiri
|
|
231
383
|
##
|
232
384
|
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
233
385
|
# Returns an empty fragment if +tags+ is nil.
|
234
|
-
def fragment
|
235
|
-
DocumentFragment.new(self, tags,
|
386
|
+
def fragment(tags = nil)
|
387
|
+
DocumentFragment.new(self, tags, root)
|
236
388
|
end
|
237
389
|
|
238
390
|
undef_method :swap, :parent, :namespace, :default_namespace=
|
239
391
|
undef_method :add_namespace_definition, :attributes
|
240
392
|
undef_method :namespace_definitions, :line, :add_namespace
|
241
393
|
|
242
|
-
def add_child
|
243
|
-
raise "A document may not have multiple root nodes." if (root && root.name !=
|
394
|
+
def add_child(node_or_tags)
|
395
|
+
raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
396
|
+
|
244
397
|
node_or_tags = coerce(node_or_tags)
|
245
398
|
if node_or_tags.is_a?(XML::NodeSet)
|
246
399
|
raise "A document may not have multiple root nodes." if node_or_tags.size > 1
|
400
|
+
|
247
401
|
super(node_or_tags.first)
|
248
402
|
else
|
249
403
|
super
|
250
404
|
end
|
251
405
|
end
|
252
|
-
|
406
|
+
alias_method :<<, :add_child
|
253
407
|
|
254
|
-
|
255
|
-
#
|
256
|
-
#
|
257
|
-
|
258
|
-
|
259
|
-
|
408
|
+
# :call-seq:
|
409
|
+
# xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
|
410
|
+
#
|
411
|
+
# [Returns] The document type which determines CSS-to-XPath translation.
|
412
|
+
#
|
413
|
+
# See XPathVisitor for more information.
|
414
|
+
def xpath_doctype
|
415
|
+
Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
|
260
416
|
end
|
261
417
|
|
262
|
-
|
263
|
-
#
|
264
|
-
#
|
265
|
-
|
266
|
-
|
267
|
-
|
418
|
+
#
|
419
|
+
# :call-seq: deconstruct_keys(array_of_names) → Hash
|
420
|
+
#
|
421
|
+
# Returns a hash describing the Document, to use in pattern matching.
|
422
|
+
#
|
423
|
+
# Valid keys and their values:
|
424
|
+
# - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
|
425
|
+
#
|
426
|
+
# In the future, other keys may allow accessing things like doctype and processing
|
427
|
+
# instructions. If you have a use case and would like this functionality, please let us know
|
428
|
+
# by opening an issue or a discussion on the github project.
|
429
|
+
#
|
430
|
+
# ⚡ This is an experimental feature, available since v1.14.0
|
431
|
+
#
|
432
|
+
# *Example*
|
433
|
+
#
|
434
|
+
# doc = Nokogiri::XML.parse(<<~XML)
|
435
|
+
# <?xml version="1.0"?>
|
436
|
+
# <root>
|
437
|
+
# <child>
|
438
|
+
# </root>
|
439
|
+
# XML
|
440
|
+
#
|
441
|
+
# doc.deconstruct_keys([:root])
|
442
|
+
# # => {:root=>
|
443
|
+
# # #(Element:0x35c {
|
444
|
+
# # name = "root",
|
445
|
+
# # children = [
|
446
|
+
# # #(Text "\n" + " "),
|
447
|
+
# # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
|
448
|
+
# # #(Text "\n")]
|
449
|
+
# # })}
|
450
|
+
#
|
451
|
+
# *Example* of an empty document
|
452
|
+
#
|
453
|
+
# doc = Nokogiri::XML::Document.new
|
454
|
+
#
|
455
|
+
# doc.deconstruct_keys([:root])
|
456
|
+
# # => {:root=>nil}
|
457
|
+
#
|
458
|
+
def deconstruct_keys(keys)
|
459
|
+
{ root: root }
|
268
460
|
end
|
269
461
|
|
270
462
|
private
|
271
|
-
def self.empty_doc? string_or_io
|
272
|
-
string_or_io.nil? ||
|
273
|
-
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
274
|
-
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
275
|
-
end
|
276
463
|
|
277
|
-
#
|
278
|
-
IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
|
464
|
+
IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
|
279
465
|
|
280
466
|
def inspect_attributes
|
281
467
|
[:name, :children]
|