nokogiri 1.8.5 → 1.15.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +40 -18
- data/LICENSE-DEPENDENCIES.md +1636 -1024
- data/LICENSE.md +5 -28
- data/README.md +203 -90
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -61
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +867 -417
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +215 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +42 -37
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +40 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +401 -237
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +136 -62
- data/ext/nokogiri/xml_node.c +1387 -678
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +347 -212
- data/ext/nokogiri/xml_relax_ng.c +86 -77
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +145 -103
- data/ext/nokogiri/xml_sax_push_parser.c +64 -36
- data/ext/nokogiri/xml_schema.c +138 -81
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +36 -26
- data/ext/nokogiri/xml_xpath_context.c +366 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +224 -95
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +392 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +98 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -126
- data/lib/nokogiri/xml/document_fragment.rb +104 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +45 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1093 -411
- data/lib/nokogiri/xml/node_set.rb +173 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
- metadata +126 -399
- data/.autotest +0 -22
- data/.cross_rubies +0 -8
- data/.editorconfig +0 -17
- data/.gemtest +0 -0
- data/.travis.yml +0 -63
- data/CHANGELOG.md +0 -1368
- data/CONTRIBUTING.md +0 -42
- data/C_CODING_STYLE.rdoc +0 -33
- data/Gemfile-libxml-ruby +0 -3
- data/Manifest.txt +0 -370
- data/ROADMAP.md +0 -111
- data/Rakefile +0 -348
- data/SECURITY.md +0 -19
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/appveyor.yml +0 -29
- data/build_all +0 -44
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -15
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
- data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
- data/patches/sort-patches-by-date +0 -25
- data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
- data/suppressions/README.txt +0 -1
- data/suppressions/nokogiri_ruby-2.supp +0 -10
- data/tasks/test.rb +0 -100
- data/test/css/test_nthiness.rb +0 -226
- data/test/css/test_parser.rb +0 -386
- data/test/css/test_tokenizer.rb +0 -215
- data/test/css/test_xpath_visitor.rb +0 -96
- data/test/decorators/test_slop.rb +0 -23
- data/test/files/2ch.html +0 -108
- data/test/files/GH_1042.html +0 -18
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/atom.xml +0 -344
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/namespace_pressure_test.xml +0 -1684
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/saml/saml20assertion_schema.xsd +0 -283
- data/test/files/saml/saml20protocol_schema.xsd +0 -302
- data/test/files/saml/xenc_schema.xsd +0 -146
- data/test/files/saml/xmldsig_schema.xsd +0 -318
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/shift_jis_no_charset.html +0 -9
- data/test/files/slow-xpath.xml +0 -25509
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -851
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -271
- data/test/html/sax/test_parser.rb +0 -168
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/sax/test_parser_text.rb +0 -163
- data/test/html/sax/test_push_parser.rb +0 -87
- data/test/html/test_attributes.rb +0 -85
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -712
- data/test/html/test_document_encoding.rb +0 -143
- data/test/html/test_document_fragment.rb +0 -310
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -212
- data/test/html/test_node_encoding.rb +0 -91
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
- data/test/namespaces/test_namespaces_preservation.rb +0 -31
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -47
- data/test/test_encoding_handler.rb +0 -48
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -138
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -314
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -402
- data/test/xml/sax/test_parser_context.rb +0 -115
- data/test/xml/sax/test_parser_text.rb +0 -202
- data/test/xml/sax/test_push_parser.rb +0 -265
- data/test/xml/test_attr.rb +0 -74
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -341
- data/test/xml/test_c14n.rb +0 -180
- data/test/xml/test_cdata.rb +0 -54
- data/test/xml/test_comment.rb +0 -40
- data/test/xml/test_document.rb +0 -982
- data/test/xml/test_document_encoding.rb +0 -31
- data/test/xml/test_document_fragment.rb +0 -298
- data/test/xml/test_dtd.rb +0 -187
- data/test/xml/test_dtd_encoding.rb +0 -31
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -262
- data/test/xml/test_namespace.rb +0 -96
- data/test/xml/test_node.rb +0 -1325
- data/test/xml/test_node_attributes.rb +0 -115
- data/test/xml/test_node_encoding.rb +0 -75
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -592
- data/test/xml/test_node_set.rb +0 -809
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader.rb +0 -620
- data/test/xml/test_reader_encoding.rb +0 -134
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -142
- data/test/xml/test_syntax_error.rb +0 -36
- data/test/xml/test_text.rb +0 -60
- data/test/xml/test_unparented_node.rb +0 -483
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -470
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,105 +1,130 @@
|
|
1
|
-
# encoding:
|
2
|
-
|
3
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "stringio"
|
4
5
|
|
5
6
|
module Nokogiri
|
6
7
|
module XML
|
7
|
-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
8
|
+
# Nokogiri::XML::Node is the primary API you'll use to interact with your Document.
|
9
|
+
#
|
10
|
+
# == Attributes
|
11
|
+
#
|
12
|
+
# A Nokogiri::XML::Node may be treated similarly to a hash with regard to attributes. For
|
13
|
+
# example:
|
14
|
+
#
|
15
|
+
# node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
|
16
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
|
17
|
+
# node['href'] # => "#foo"
|
18
|
+
# node.keys # => ["href", "id"]
|
19
|
+
# node.values # => ["#foo", "link"]
|
20
|
+
# node['class'] = 'green' # => "green"
|
21
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
|
22
|
+
#
|
23
|
+
# See the method group entitled Node@Working+With+Node+Attributes for the full set of methods.
|
24
|
+
#
|
25
|
+
# == Navigation
|
26
|
+
#
|
27
|
+
# Nokogiri::XML::Node also has methods that let you move around your tree:
|
11
28
|
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
# irb(main):005:0> node['href']
|
15
|
-
# => "#foo"
|
16
|
-
# irb(main):006:0> node.keys
|
17
|
-
# => ["href", "id"]
|
18
|
-
# irb(main):007:0> node.values
|
19
|
-
# => ["#foo", "link"]
|
20
|
-
# irb(main):008:0> node['class'] = 'green'
|
21
|
-
# => "green"
|
22
|
-
# irb(main):009:0> node
|
23
|
-
# => <a href="#foo" id="link" class="green">link</a>
|
24
|
-
# irb(main):010:0>
|
29
|
+
# [#parent, #children, #next, #previous]
|
30
|
+
# Navigate up, down, or through siblings.
|
25
31
|
#
|
26
|
-
# See
|
32
|
+
# See the method group entitled Node@Traversing+Document+Structure for the full set of methods.
|
27
33
|
#
|
28
|
-
#
|
29
|
-
# tree. For navigating your tree, see:
|
34
|
+
# == Serialization
|
30
35
|
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
# * Nokogiri::XML::Node#next
|
34
|
-
# * Nokogiri::XML::Node#previous
|
36
|
+
# When printing or otherwise emitting a document or a node (and its subtree), there are a few
|
37
|
+
# methods you might want to use:
|
35
38
|
#
|
39
|
+
# [#content, #text, #inner_text, #to_str]
|
40
|
+
# These methods will all **emit plaintext**,
|
41
|
+
# meaning that entities will be replaced (e.g., +<+ will be replaced with +<+), meaning
|
42
|
+
# that any sanitizing will likely be un-done in the output.
|
36
43
|
#
|
37
|
-
#
|
38
|
-
#
|
44
|
+
# [#to_s, #to_xml, #to_html, #inner_html]
|
45
|
+
# These methods will all **emit properly-escaped markup**, meaning that it's suitable for
|
46
|
+
# consumption by browsers, parsers, etc.
|
39
47
|
#
|
40
|
-
#
|
48
|
+
# See the method group entitled Node@Serialization+and+Generating+Output for the full set of methods.
|
41
49
|
#
|
42
|
-
#
|
43
|
-
# document, meaning that entities will be replaced (e.g., "<"
|
44
|
-
# will be replaced with "<"), meaning that any sanitizing will
|
45
|
-
# likely be un-done in the output.
|
50
|
+
# == Searching
|
46
51
|
#
|
47
|
-
#
|
52
|
+
# You may search this node's subtree using methods like #xpath and #css.
|
48
53
|
#
|
49
|
-
#
|
50
|
-
# that it's suitable for consumption by browsers, parsers, etc.
|
54
|
+
# See the method group entitled Node@Searching+via+XPath+or+CSS+Queries for the full set of methods.
|
51
55
|
#
|
52
|
-
# You may search this node's subtree using Searchable#xpath and Searchable#css
|
53
56
|
class Node
|
54
57
|
include Nokogiri::XML::PP::Node
|
55
58
|
include Nokogiri::XML::Searchable
|
59
|
+
include Nokogiri::ClassResolver
|
56
60
|
include Enumerable
|
57
61
|
|
58
62
|
# Element node type, see Nokogiri::XML::Node#element?
|
59
|
-
ELEMENT_NODE =
|
63
|
+
ELEMENT_NODE = 1
|
60
64
|
# Attribute node type
|
61
|
-
ATTRIBUTE_NODE =
|
65
|
+
ATTRIBUTE_NODE = 2
|
62
66
|
# Text node type, see Nokogiri::XML::Node#text?
|
63
|
-
TEXT_NODE =
|
67
|
+
TEXT_NODE = 3
|
64
68
|
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
65
69
|
CDATA_SECTION_NODE = 4
|
66
70
|
# Entity reference node type
|
67
|
-
ENTITY_REF_NODE =
|
71
|
+
ENTITY_REF_NODE = 5
|
68
72
|
# Entity node type
|
69
|
-
ENTITY_NODE =
|
73
|
+
ENTITY_NODE = 6
|
70
74
|
# PI node type
|
71
|
-
PI_NODE =
|
75
|
+
PI_NODE = 7
|
72
76
|
# Comment node type, see Nokogiri::XML::Node#comment?
|
73
|
-
COMMENT_NODE =
|
77
|
+
COMMENT_NODE = 8
|
74
78
|
# Document node type, see Nokogiri::XML::Node#xml?
|
75
|
-
DOCUMENT_NODE =
|
79
|
+
DOCUMENT_NODE = 9
|
76
80
|
# Document type node type
|
77
81
|
DOCUMENT_TYPE_NODE = 10
|
78
82
|
# Document fragment node type
|
79
83
|
DOCUMENT_FRAG_NODE = 11
|
80
84
|
# Notation node type
|
81
|
-
NOTATION_NODE =
|
85
|
+
NOTATION_NODE = 12
|
82
86
|
# HTML document node type, see Nokogiri::XML::Node#html?
|
83
87
|
HTML_DOCUMENT_NODE = 13
|
84
88
|
# DTD node type
|
85
|
-
DTD_NODE =
|
89
|
+
DTD_NODE = 14
|
86
90
|
# Element declaration type
|
87
|
-
ELEMENT_DECL =
|
91
|
+
ELEMENT_DECL = 15
|
88
92
|
# Attribute declaration type
|
89
|
-
ATTRIBUTE_DECL =
|
93
|
+
ATTRIBUTE_DECL = 16
|
90
94
|
# Entity declaration type
|
91
|
-
ENTITY_DECL =
|
95
|
+
ENTITY_DECL = 17
|
92
96
|
# Namespace declaration type
|
93
|
-
NAMESPACE_DECL =
|
97
|
+
NAMESPACE_DECL = 18
|
94
98
|
# XInclude start type
|
95
|
-
XINCLUDE_START =
|
99
|
+
XINCLUDE_START = 19
|
96
100
|
# XInclude end type
|
97
|
-
XINCLUDE_END =
|
101
|
+
XINCLUDE_END = 20
|
98
102
|
# DOCB document node type
|
99
103
|
DOCB_DOCUMENT_NODE = 21
|
100
104
|
|
101
|
-
|
102
|
-
|
105
|
+
#
|
106
|
+
# :call-seq:
|
107
|
+
# new(name, document) -> Nokogiri::XML::Node
|
108
|
+
# new(name, document) { |node| ... } -> Nokogiri::XML::Node
|
109
|
+
#
|
110
|
+
# Create a new node with +name+ that belongs to +document+.
|
111
|
+
#
|
112
|
+
# If you intend to add a node to a document tree, it's likely that you will prefer one of the
|
113
|
+
# Nokogiri::XML::Node methods like #add_child, #add_next_sibling, #replace, etc. which will
|
114
|
+
# both create an element (or subtree) and place it in the document tree.
|
115
|
+
#
|
116
|
+
# Another alternative, if you are concerned about performance, is
|
117
|
+
# Nokogiri::XML::Document#create_element which accepts additional arguments for contents or
|
118
|
+
# attributes but (like this method) avoids parsing markup.
|
119
|
+
#
|
120
|
+
# [Parameters]
|
121
|
+
# - +name+ (String)
|
122
|
+
# - +document+ (Nokogiri::XML::Document) The document to which the the returned node will belong.
|
123
|
+
# [Yields] Nokogiri::XML::Node
|
124
|
+
# [Returns] Nokogiri::XML::Node
|
125
|
+
#
|
126
|
+
def initialize(name, document)
|
127
|
+
# This is intentionally empty, and sets the method signature for subclasses.
|
103
128
|
end
|
104
129
|
|
105
130
|
###
|
@@ -108,226 +133,483 @@ module Nokogiri
|
|
108
133
|
document.decorate(self)
|
109
134
|
end
|
110
135
|
|
111
|
-
|
112
|
-
# Search this node's immediate children using CSS selector +selector+
|
113
|
-
def > selector
|
114
|
-
ns = document.root.namespaces
|
115
|
-
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
116
|
-
end
|
117
|
-
|
118
|
-
###
|
119
|
-
# Get the attribute value for the attribute +name+
|
120
|
-
def [] name
|
121
|
-
get(name.to_s)
|
122
|
-
end
|
123
|
-
|
124
|
-
###
|
125
|
-
# Set the attribute value for the attribute +name+ to +value+
|
126
|
-
def []= name, value
|
127
|
-
set name.to_s, value.to_s
|
128
|
-
end
|
136
|
+
# :section: Manipulating Document Structure
|
129
137
|
|
130
138
|
###
|
131
139
|
# Add +node_or_tags+ as a child of this Node.
|
132
|
-
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
133
140
|
#
|
134
|
-
#
|
141
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
142
|
+
# containing markup.
|
143
|
+
#
|
144
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
|
145
|
+
# a DocumentFragment, NodeSet, or String).
|
135
146
|
#
|
136
147
|
# Also see related method +<<+.
|
137
|
-
def add_child
|
148
|
+
def add_child(node_or_tags)
|
138
149
|
node_or_tags = coerce(node_or_tags)
|
139
150
|
if node_or_tags.is_a?(XML::NodeSet)
|
140
|
-
node_or_tags.each { |n| add_child_node_and_reparent_attrs
|
151
|
+
node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
|
141
152
|
else
|
142
|
-
add_child_node_and_reparent_attrs
|
153
|
+
add_child_node_and_reparent_attrs(node_or_tags)
|
143
154
|
end
|
144
155
|
node_or_tags
|
145
156
|
end
|
146
157
|
|
147
158
|
###
|
148
159
|
# Add +node_or_tags+ as the first child of this Node.
|
149
|
-
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
150
160
|
#
|
151
|
-
#
|
161
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
162
|
+
# containing markup.
|
163
|
+
#
|
164
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
|
165
|
+
# a DocumentFragment, NodeSet, or String).
|
152
166
|
#
|
153
167
|
# Also see related method +add_child+.
|
154
|
-
def prepend_child
|
155
|
-
if first = children.first
|
168
|
+
def prepend_child(node_or_tags)
|
169
|
+
if (first = children.first)
|
156
170
|
# Mimic the error add_child would raise.
|
157
|
-
raise
|
171
|
+
raise "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
172
|
+
|
158
173
|
first.__send__(:add_sibling, :previous, node_or_tags)
|
159
174
|
else
|
160
175
|
add_child(node_or_tags)
|
161
176
|
end
|
162
177
|
end
|
163
178
|
|
179
|
+
# :call-seq:
|
180
|
+
# wrap(markup) -> self
|
181
|
+
# wrap(node) -> self
|
182
|
+
#
|
183
|
+
# Wrap this Node with the node parsed from +markup+ or a dup of the +node+.
|
184
|
+
#
|
185
|
+
# [Parameters]
|
186
|
+
# - *markup* (String)
|
187
|
+
# Markup that is parsed and used as the wrapper. This node's parent, if it exists, is used
|
188
|
+
# as the context node for parsing; otherwise the associated document is used. If the parsed
|
189
|
+
# fragment has multiple roots, the first root node is used as the wrapper.
|
190
|
+
# - *node* (Nokogiri::XML::Node)
|
191
|
+
# An element that is `#dup`ed and used as the wrapper.
|
192
|
+
#
|
193
|
+
# [Returns] +self+, to support chaining.
|
194
|
+
#
|
195
|
+
# Also see NodeSet#wrap
|
196
|
+
#
|
197
|
+
# *Example* with a +String+ argument:
|
198
|
+
#
|
199
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
200
|
+
# <html><body>
|
201
|
+
# <a>asdf</a>
|
202
|
+
# </body></html>
|
203
|
+
# HTML
|
204
|
+
# doc.at_css("a").wrap("<div></div>")
|
205
|
+
# doc.to_html
|
206
|
+
# # => <html><head></head><body>
|
207
|
+
# # <div><a>asdf</a></div>
|
208
|
+
# # </body></html>
|
209
|
+
#
|
210
|
+
# *Example* with a +Node+ argument:
|
211
|
+
#
|
212
|
+
# doc = Nokogiri::HTML5(<<~HTML)
|
213
|
+
# <html><body>
|
214
|
+
# <a>asdf</a>
|
215
|
+
# </body></html>
|
216
|
+
# HTML
|
217
|
+
# doc.at_css("a").wrap(doc.create_element("div"))
|
218
|
+
# doc.to_html
|
219
|
+
# # <html><head></head><body>
|
220
|
+
# # <div><a>asdf</a></div>
|
221
|
+
# # </body></html>
|
222
|
+
#
|
223
|
+
def wrap(node_or_tags)
|
224
|
+
case node_or_tags
|
225
|
+
when String
|
226
|
+
context_node = parent || document
|
227
|
+
new_parent = context_node.coerce(node_or_tags).first
|
228
|
+
if new_parent.nil?
|
229
|
+
raise "Failed to parse '#{node_or_tags}' in the context of a '#{context_node.name}' element"
|
230
|
+
end
|
231
|
+
when XML::Node
|
232
|
+
new_parent = node_or_tags.dup
|
233
|
+
else
|
234
|
+
raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}"
|
235
|
+
end
|
236
|
+
|
237
|
+
if parent
|
238
|
+
add_next_sibling(new_parent)
|
239
|
+
else
|
240
|
+
new_parent.unlink
|
241
|
+
end
|
242
|
+
new_parent.add_child(self)
|
243
|
+
|
244
|
+
self
|
245
|
+
end
|
246
|
+
|
164
247
|
###
|
165
248
|
# Add +node_or_tags+ as a child of this Node.
|
166
|
-
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
167
249
|
#
|
168
|
-
#
|
250
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
251
|
+
# containing markup.
|
252
|
+
#
|
253
|
+
# Returns +self+, to support chaining of calls (e.g., root << child1 << child2)
|
169
254
|
#
|
170
255
|
# Also see related method +add_child+.
|
171
|
-
def <<
|
172
|
-
add_child
|
256
|
+
def <<(node_or_tags)
|
257
|
+
add_child(node_or_tags)
|
173
258
|
self
|
174
259
|
end
|
175
260
|
|
176
261
|
###
|
177
262
|
# Insert +node_or_tags+ before this Node (as a sibling).
|
178
|
-
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
179
263
|
#
|
180
|
-
#
|
264
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
265
|
+
# containing markup.
|
266
|
+
#
|
267
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
|
268
|
+
# a DocumentFragment, NodeSet, or String).
|
181
269
|
#
|
182
270
|
# Also see related method +before+.
|
183
|
-
def add_previous_sibling
|
184
|
-
raise ArgumentError
|
271
|
+
def add_previous_sibling(node_or_tags)
|
272
|
+
raise ArgumentError,
|
273
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
185
274
|
|
186
|
-
add_sibling
|
275
|
+
add_sibling(:previous, node_or_tags)
|
187
276
|
end
|
188
277
|
|
189
278
|
###
|
190
279
|
# Insert +node_or_tags+ after this Node (as a sibling).
|
191
|
-
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
192
280
|
#
|
193
|
-
#
|
281
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
282
|
+
# containing markup.
|
283
|
+
#
|
284
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
|
285
|
+
# a DocumentFragment, NodeSet, or String).
|
194
286
|
#
|
195
287
|
# Also see related method +after+.
|
196
|
-
def add_next_sibling
|
197
|
-
raise ArgumentError
|
288
|
+
def add_next_sibling(node_or_tags)
|
289
|
+
raise ArgumentError,
|
290
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
198
291
|
|
199
|
-
add_sibling
|
292
|
+
add_sibling(:next, node_or_tags)
|
200
293
|
end
|
201
294
|
|
202
295
|
####
|
203
296
|
# Insert +node_or_tags+ before this node (as a sibling).
|
204
|
-
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
205
297
|
#
|
206
|
-
#
|
298
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
299
|
+
# containing markup.
|
300
|
+
#
|
301
|
+
# Returns +self+, to support chaining of calls.
|
207
302
|
#
|
208
303
|
# Also see related method +add_previous_sibling+.
|
209
|
-
def before
|
210
|
-
add_previous_sibling
|
304
|
+
def before(node_or_tags)
|
305
|
+
add_previous_sibling(node_or_tags)
|
211
306
|
self
|
212
307
|
end
|
213
308
|
|
214
309
|
####
|
215
310
|
# Insert +node_or_tags+ after this node (as a sibling).
|
216
|
-
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
|
217
311
|
#
|
218
|
-
#
|
312
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
|
313
|
+
# containing markup.
|
314
|
+
#
|
315
|
+
# Returns +self+, to support chaining of calls.
|
219
316
|
#
|
220
317
|
# Also see related method +add_next_sibling+.
|
221
|
-
def after
|
222
|
-
add_next_sibling
|
318
|
+
def after(node_or_tags)
|
319
|
+
add_next_sibling(node_or_tags)
|
223
320
|
self
|
224
321
|
end
|
225
322
|
|
226
323
|
####
|
227
|
-
# Set the
|
228
|
-
#
|
324
|
+
# Set the content for this Node to +node_or_tags+.
|
325
|
+
#
|
326
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
|
327
|
+
# containing markup.
|
229
328
|
#
|
230
|
-
#
|
329
|
+
# ⚠ Please note that despite the name, this method will *not* always parse a String argument
|
330
|
+
# as HTML. A String argument will be parsed with the +DocumentFragment+ parser related to this
|
331
|
+
# node's document.
|
332
|
+
#
|
333
|
+
# For example, if the document is an HTML4::Document then the string will be parsed as HTML4
|
334
|
+
# using HTML4::DocumentFragment; but if the document is an XML::Document then it will
|
335
|
+
# parse the string as XML using XML::DocumentFragment.
|
231
336
|
#
|
232
337
|
# Also see related method +children=+
|
233
|
-
def inner_html=
|
338
|
+
def inner_html=(node_or_tags)
|
234
339
|
self.children = node_or_tags
|
235
|
-
self
|
236
340
|
end
|
237
341
|
|
238
342
|
####
|
239
|
-
# Set the
|
240
|
-
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
|
343
|
+
# Set the content for this Node +node_or_tags+
|
241
344
|
#
|
242
|
-
#
|
345
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
|
346
|
+
# containing markup.
|
243
347
|
#
|
244
348
|
# Also see related method +inner_html=+
|
245
|
-
def children=
|
349
|
+
def children=(node_or_tags)
|
246
350
|
node_or_tags = coerce(node_or_tags)
|
247
351
|
children.unlink
|
248
352
|
if node_or_tags.is_a?(XML::NodeSet)
|
249
|
-
node_or_tags.each { |n| add_child_node_and_reparent_attrs
|
353
|
+
node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
|
250
354
|
else
|
251
|
-
add_child_node_and_reparent_attrs
|
355
|
+
add_child_node_and_reparent_attrs(node_or_tags)
|
252
356
|
end
|
253
|
-
node_or_tags
|
254
357
|
end
|
255
358
|
|
256
359
|
####
|
257
360
|
# Replace this Node with +node_or_tags+.
|
258
|
-
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
259
361
|
#
|
260
|
-
#
|
362
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
363
|
+
# containing markup.
|
364
|
+
#
|
365
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
|
366
|
+
# a DocumentFragment, NodeSet, or String).
|
261
367
|
#
|
262
368
|
# Also see related method +swap+.
|
263
|
-
def replace
|
369
|
+
def replace(node_or_tags)
|
370
|
+
raise("Cannot replace a node with no parent") unless parent
|
371
|
+
|
264
372
|
# We cannot replace a text node directly, otherwise libxml will return
|
265
373
|
# an internal error at parser.c:13031, I don't know exactly why
|
266
374
|
# libxml is trying to find a parent node that is an element or document
|
267
375
|
# so I can't tell if this is bug in libxml or not. issue #775.
|
268
376
|
if text?
|
269
|
-
replacee = Nokogiri::XML::Node.new
|
270
|
-
add_previous_sibling_node
|
377
|
+
replacee = Nokogiri::XML::Node.new("dummy", document)
|
378
|
+
add_previous_sibling_node(replacee)
|
271
379
|
unlink
|
272
|
-
return replacee.replace
|
380
|
+
return replacee.replace(node_or_tags)
|
273
381
|
end
|
274
382
|
|
275
|
-
node_or_tags = coerce(node_or_tags)
|
383
|
+
node_or_tags = parent.coerce(node_or_tags)
|
276
384
|
|
277
385
|
if node_or_tags.is_a?(XML::NodeSet)
|
278
|
-
node_or_tags.each { |n| add_previous_sibling
|
386
|
+
node_or_tags.each { |n| add_previous_sibling(n) }
|
279
387
|
unlink
|
280
388
|
else
|
281
|
-
replace_node
|
389
|
+
replace_node(node_or_tags)
|
282
390
|
end
|
283
391
|
node_or_tags
|
284
392
|
end
|
285
393
|
|
286
394
|
####
|
287
395
|
# Swap this Node for +node_or_tags+
|
288
|
-
#
|
396
|
+
#
|
397
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
|
398
|
+
# Containing markup.
|
289
399
|
#
|
290
400
|
# Returns self, to support chaining of calls.
|
291
401
|
#
|
292
402
|
# Also see related method +replace+.
|
293
|
-
def swap
|
294
|
-
replace
|
403
|
+
def swap(node_or_tags)
|
404
|
+
replace(node_or_tags)
|
295
405
|
self
|
296
406
|
end
|
297
407
|
|
298
|
-
alias :next :next_sibling
|
299
|
-
alias :previous :previous_sibling
|
300
|
-
|
301
|
-
# :stopdoc:
|
302
|
-
# HACK: This is to work around an RDoc bug
|
303
|
-
alias :next= :add_next_sibling
|
304
|
-
# :startdoc:
|
305
|
-
|
306
|
-
alias :previous= :add_previous_sibling
|
307
|
-
alias :remove :unlink
|
308
|
-
alias :get_attribute :[]
|
309
|
-
alias :attr :[]
|
310
|
-
alias :set_attribute :[]=
|
311
|
-
alias :text :content
|
312
|
-
alias :inner_text :content
|
313
|
-
alias :has_attribute? :key?
|
314
|
-
alias :name :node_name
|
315
|
-
alias :name= :node_name=
|
316
|
-
alias :type :node_type
|
317
|
-
alias :to_str :text
|
318
|
-
alias :clone :dup
|
319
|
-
alias :elements :element_children
|
320
|
-
|
321
408
|
####
|
322
|
-
#
|
323
|
-
#
|
324
|
-
|
325
|
-
|
326
|
-
|
409
|
+
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not
|
410
|
+
# interpreted as markup.
|
411
|
+
def content=(string)
|
412
|
+
self.native_content = encode_special_chars(string.to_s)
|
413
|
+
end
|
414
|
+
|
415
|
+
###
|
416
|
+
# Set the parent Node for this Node
|
417
|
+
def parent=(parent_node)
|
418
|
+
parent_node.add_child(self)
|
419
|
+
end
|
420
|
+
|
421
|
+
###
|
422
|
+
# Adds a default namespace supplied as a string +url+ href, to self.
|
423
|
+
# The consequence is as an xmlns attribute with supplied argument were
|
424
|
+
# present in parsed XML. A default namespace set with this method will
|
425
|
+
# now show up in #attributes, but when this node is serialized to XML an
|
426
|
+
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
427
|
+
def default_namespace=(url)
|
428
|
+
add_namespace_definition(nil, url)
|
429
|
+
end
|
430
|
+
|
431
|
+
###
|
432
|
+
# Set the default namespace on this node (as would be defined with an
|
433
|
+
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
434
|
+
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
435
|
+
# for this node. You probably want #default_namespace= instead, or perhaps
|
436
|
+
# #add_namespace_definition with a nil prefix argument.
|
437
|
+
def namespace=(ns)
|
438
|
+
return set_namespace(ns) unless ns
|
439
|
+
|
440
|
+
unless Nokogiri::XML::Namespace === ns
|
441
|
+
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
442
|
+
end
|
443
|
+
if ns.document != document
|
444
|
+
raise ArgumentError, "namespace must be declared on the same document"
|
445
|
+
end
|
446
|
+
|
447
|
+
set_namespace(ns)
|
448
|
+
end
|
449
|
+
|
450
|
+
###
|
451
|
+
# Do xinclude substitution on the subtree below node. If given a block, a
|
452
|
+
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
453
|
+
# passed to it, allowing more convenient modification of the parser options.
|
454
|
+
def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
|
455
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
456
|
+
yield options if block_given?
|
457
|
+
|
458
|
+
# call c extension
|
459
|
+
process_xincludes(options.to_i)
|
460
|
+
end
|
461
|
+
|
462
|
+
alias_method :next, :next_sibling
|
463
|
+
alias_method :previous, :previous_sibling
|
464
|
+
alias_method :next=, :add_next_sibling
|
465
|
+
alias_method :previous=, :add_previous_sibling
|
466
|
+
alias_method :remove, :unlink
|
467
|
+
alias_method :name=, :node_name=
|
468
|
+
alias_method :add_namespace, :add_namespace_definition
|
469
|
+
|
470
|
+
# :section:
|
471
|
+
|
472
|
+
alias_method :inner_text, :content
|
473
|
+
alias_method :text, :content
|
474
|
+
alias_method :to_str, :content
|
475
|
+
alias_method :name, :node_name
|
476
|
+
alias_method :type, :node_type
|
477
|
+
alias_method :clone, :dup
|
478
|
+
alias_method :elements, :element_children
|
479
|
+
|
480
|
+
# :section: Working With Node Attributes
|
481
|
+
|
482
|
+
# :call-seq: [](name) → (String, nil)
|
483
|
+
#
|
484
|
+
# Fetch an attribute from this node.
|
485
|
+
#
|
486
|
+
# ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
|
487
|
+
# namespaced attributes, use #attribute_with_ns.
|
488
|
+
#
|
489
|
+
# [Returns] (String, nil) value of the attribute +name+, or +nil+ if no matching attribute exists
|
490
|
+
#
|
491
|
+
# *Example*
|
492
|
+
#
|
493
|
+
# doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
|
494
|
+
# child = doc.at_css("child")
|
495
|
+
# child["size"] # => "large"
|
496
|
+
# child["class"] # => "big wide tall"
|
497
|
+
#
|
498
|
+
# *Example:* Namespaced attributes will not be returned.
|
499
|
+
#
|
500
|
+
# ⚠ Note namespaced attributes may be accessed with #attribute or #attribute_with_ns
|
501
|
+
#
|
502
|
+
# doc = Nokogiri::XML(<<~EOF)
|
503
|
+
# <root xmlns:width='http://example.com/widths'>
|
504
|
+
# <child width:size='broad'/>
|
505
|
+
# </root>
|
506
|
+
# EOF
|
507
|
+
# doc.at_css("child")["size"] # => nil
|
508
|
+
# doc.at_css("child").attribute("size").value # => "broad"
|
509
|
+
# doc.at_css("child").attribute_with_ns("size", "http://example.com/widths").value
|
510
|
+
# # => "broad"
|
511
|
+
#
|
512
|
+
def [](name)
|
513
|
+
get(name.to_s)
|
514
|
+
end
|
515
|
+
|
516
|
+
# :call-seq: []=(name, value) → value
|
517
|
+
#
|
518
|
+
# Update the attribute +name+ to +value+, or create the attribute if it does not exist.
|
519
|
+
#
|
520
|
+
# ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
|
521
|
+
# namespaced attributes for update, use #attribute_with_ns. To add a namespaced attribute,
|
522
|
+
# see the example below.
|
523
|
+
#
|
524
|
+
# [Returns] +value+
|
525
|
+
#
|
526
|
+
# *Example*
|
527
|
+
#
|
528
|
+
# doc = Nokogiri::XML("<root><child/></root>")
|
529
|
+
# child = doc.at_css("child")
|
530
|
+
# child["size"] = "broad"
|
531
|
+
# child.to_html
|
532
|
+
# # => "<child size=\"broad\"></child>"
|
533
|
+
#
|
534
|
+
# *Example:* Add a namespaced attribute.
|
535
|
+
#
|
536
|
+
# doc = Nokogiri::XML(<<~EOF)
|
537
|
+
# <root xmlns:width='http://example.com/widths'>
|
538
|
+
# <child/>
|
539
|
+
# </root>
|
540
|
+
# EOF
|
541
|
+
# child = doc.at_css("child")
|
542
|
+
# child["size"] = "broad"
|
543
|
+
# ns = doc.root.namespace_definitions.find { |ns| ns.prefix == "width" }
|
544
|
+
# child.attribute("size").namespace = ns
|
545
|
+
# doc.to_html
|
546
|
+
# # => "<root xmlns:width=\"http://example.com/widths\">\n" +
|
547
|
+
# # " <child width:size=\"broad\"></child>\n" +
|
548
|
+
# # "</root>\n"
|
549
|
+
#
|
550
|
+
def []=(name, value)
|
551
|
+
set(name.to_s, value.to_s)
|
552
|
+
end
|
553
|
+
|
554
|
+
#
|
555
|
+
# :call-seq: attributes() → Hash<String ⇒ Nokogiri::XML::Attr>
|
556
|
+
#
|
557
|
+
# Fetch this node's attributes.
|
558
|
+
#
|
559
|
+
# ⚠ Because the keys do not include any namespace information for the attribute, in case of a
|
560
|
+
# simple name collision, not all attributes will be returned. In this case, you will need to
|
561
|
+
# use #attribute_nodes.
|
562
|
+
#
|
563
|
+
# [Returns]
|
564
|
+
# Hash containing attributes belonging to +self+. The hash keys are String attribute
|
565
|
+
# names (without the namespace), and the hash values are Nokogiri::XML::Attr.
|
566
|
+
#
|
567
|
+
# *Example* with no namespaces:
|
568
|
+
#
|
569
|
+
# doc = Nokogiri::XML("<root><child size='large' class='big wide tall'/></root>")
|
570
|
+
# doc.at_css("child").attributes
|
571
|
+
# # => {"size"=>#(Attr:0x550 { name = "size", value = "large" }),
|
572
|
+
# # "class"=>#(Attr:0x564 { name = "class", value = "big wide tall" })}
|
573
|
+
#
|
574
|
+
# *Example* with a namespace:
|
575
|
+
#
|
576
|
+
# doc = Nokogiri::XML("<root xmlns:desc='http://example.com/sizes'><child desc:size='large'/></root>")
|
577
|
+
# doc.at_css("child").attributes
|
578
|
+
# # => {"size"=>
|
579
|
+
# # #(Attr:0x550 {
|
580
|
+
# # name = "size",
|
581
|
+
# # namespace = #(Namespace:0x564 {
|
582
|
+
# # prefix = "desc",
|
583
|
+
# # href = "http://example.com/sizes"
|
584
|
+
# # }),
|
585
|
+
# # value = "large"
|
586
|
+
# # })}
|
587
|
+
#
|
588
|
+
# *Example* with an attribute name collision:
|
589
|
+
#
|
590
|
+
# ⚠ Note that only one of the attributes is returned in the Hash.
|
591
|
+
#
|
592
|
+
# doc = Nokogiri::XML(<<~EOF)
|
593
|
+
# <root xmlns:width='http://example.com/widths'
|
594
|
+
# xmlns:height='http://example.com/heights'>
|
595
|
+
# <child width:size='broad' height:size='tall'/>
|
596
|
+
# </root>
|
597
|
+
# EOF
|
598
|
+
# doc.at_css("child").attributes
|
599
|
+
# # => {"size"=>
|
600
|
+
# # #(Attr:0x550 {
|
601
|
+
# # name = "size",
|
602
|
+
# # namespace = #(Namespace:0x564 {
|
603
|
+
# # prefix = "height",
|
604
|
+
# # href = "http://example.com/heights"
|
605
|
+
# # }),
|
606
|
+
# # value = "tall"
|
607
|
+
# # })}
|
608
|
+
#
|
327
609
|
def attributes
|
328
|
-
|
329
|
-
[node.node_name
|
330
|
-
|
610
|
+
attribute_nodes.each_with_object({}) do |node, hash|
|
611
|
+
hash[node.node_name] = node
|
612
|
+
end
|
331
613
|
end
|
332
614
|
|
333
615
|
###
|
@@ -336,6 +618,12 @@ module Nokogiri
|
|
336
618
|
attribute_nodes.map(&:value)
|
337
619
|
end
|
338
620
|
|
621
|
+
###
|
622
|
+
# Does this Node's attributes include <value>
|
623
|
+
def value?(value)
|
624
|
+
values.include?(value)
|
625
|
+
end
|
626
|
+
|
339
627
|
###
|
340
628
|
# Get the attribute names for this Node.
|
341
629
|
def keys
|
@@ -345,97 +633,401 @@ module Nokogiri
|
|
345
633
|
###
|
346
634
|
# Iterate over each attribute name and value pair for this Node.
|
347
635
|
def each
|
348
|
-
attribute_nodes.each
|
636
|
+
attribute_nodes.each do |node|
|
349
637
|
yield [node.node_name, node.value]
|
350
|
-
|
638
|
+
end
|
351
639
|
end
|
352
640
|
|
353
641
|
###
|
354
|
-
#
|
355
|
-
|
642
|
+
# Remove the attribute named +name+
|
643
|
+
def remove_attribute(name)
|
644
|
+
attr = attributes[name].remove if key?(name)
|
645
|
+
clear_xpath_context if Nokogiri.jruby?
|
646
|
+
attr
|
647
|
+
end
|
648
|
+
|
649
|
+
#
|
650
|
+
# :call-seq: classes() → Array<String>
|
651
|
+
#
|
652
|
+
# Fetch CSS class names of a Node.
|
653
|
+
#
|
654
|
+
# This is a convenience function and is equivalent to:
|
655
|
+
#
|
656
|
+
# node.kwattr_values("class")
|
657
|
+
#
|
658
|
+
# See related: #kwattr_values, #add_class, #append_class, #remove_class
|
659
|
+
#
|
660
|
+
# [Returns]
|
661
|
+
# The CSS classes (Array of String) present in the Node's "class" attribute. If the
|
662
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
663
|
+
#
|
664
|
+
# *Example*
|
665
|
+
#
|
666
|
+
# node # => <div class="section title header"></div>
|
667
|
+
# node.classes # => ["section", "title", "header"]
|
668
|
+
#
|
356
669
|
def classes
|
357
|
-
|
670
|
+
kwattr_values("class")
|
358
671
|
end
|
359
672
|
|
360
|
-
###
|
361
|
-
# Add +name+ to the "class" attribute value of this Node and
|
362
|
-
# return self. If the value is already in the current value, it
|
363
|
-
# is not added. If no "class" attribute exists yet, one is
|
364
|
-
# created with the given value.
|
365
673
|
#
|
366
|
-
#
|
367
|
-
#
|
368
|
-
|
369
|
-
|
370
|
-
|
674
|
+
# :call-seq: add_class(names) → self
|
675
|
+
#
|
676
|
+
# Ensure HTML CSS classes are present on +self+. Any CSS classes in +names+ that already exist
|
677
|
+
# in the "class" attribute are _not_ added. Note that any existing duplicates in the
|
678
|
+
# "class" attribute are not removed. Compare with #append_class.
|
679
|
+
#
|
680
|
+
# This is a convenience function and is equivalent to:
|
681
|
+
#
|
682
|
+
# node.kwattr_add("class", names)
|
683
|
+
#
|
684
|
+
# See related: #kwattr_add, #classes, #append_class, #remove_class
|
685
|
+
#
|
686
|
+
# [Parameters]
|
687
|
+
# - +names+ (String, Array<String>)
|
688
|
+
#
|
689
|
+
# CSS class names to be added to the Node's "class" attribute. May be a string containing
|
690
|
+
# whitespace-delimited names, or an Array of String names. Any class names already present
|
691
|
+
# will not be added. Any class names not present will be added. If no "class" attribute
|
692
|
+
# exists, one is created.
|
693
|
+
#
|
694
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
695
|
+
#
|
696
|
+
# *Example:* Ensure that the node has CSS class "section"
|
697
|
+
#
|
698
|
+
# node # => <div></div>
|
699
|
+
# node.add_class("section") # => <div class="section"></div>
|
700
|
+
# node.add_class("section") # => <div class="section"></div> # duplicate not added
|
701
|
+
#
|
702
|
+
# *Example:* Ensure that the node has CSS classes "section" and "header", via a String argument
|
703
|
+
#
|
704
|
+
# Note that the CSS class "section" is not added because it is already present.
|
705
|
+
# Note also that the pre-existing duplicate CSS class "section" is not removed.
|
706
|
+
#
|
707
|
+
# node # => <div class="section section"></div>
|
708
|
+
# node.add_class("section header") # => <div class="section section header"></div>
|
709
|
+
#
|
710
|
+
# *Example:* Ensure that the node has CSS classes "section" and "header", via an Array argument
|
711
|
+
#
|
712
|
+
# node # => <div></div>
|
713
|
+
# node.add_class(["section", "header"]) # => <div class="section header"></div>
|
714
|
+
#
|
715
|
+
def add_class(names)
|
716
|
+
kwattr_add("class", names)
|
717
|
+
end
|
718
|
+
|
719
|
+
#
|
720
|
+
# :call-seq: append_class(names) → self
|
721
|
+
#
|
722
|
+
# Add HTML CSS classes to +self+, regardless of duplication. Compare with #add_class.
|
723
|
+
#
|
724
|
+
# This is a convenience function and is equivalent to:
|
725
|
+
#
|
726
|
+
# node.kwattr_append("class", names)
|
727
|
+
#
|
728
|
+
# See related: #kwattr_append, #classes, #add_class, #remove_class
|
729
|
+
#
|
730
|
+
# [Parameters]
|
731
|
+
# - +names+ (String, Array<String>)
|
732
|
+
#
|
733
|
+
# CSS class names to be appended to the Node's "class" attribute. May be a string containing
|
734
|
+
# whitespace-delimited names, or an Array of String names. All class names passed in will be
|
735
|
+
# appended to the "class" attribute even if they are already present in the attribute
|
736
|
+
# value. If no "class" attribute exists, one is created.
|
737
|
+
#
|
738
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
739
|
+
#
|
740
|
+
# *Example:* Append "section" to the node's CSS "class" attribute
|
741
|
+
#
|
742
|
+
# node # => <div></div>
|
743
|
+
# node.append_class("section") # => <div class="section"></div>
|
744
|
+
# node.append_class("section") # => <div class="section section"></div> # duplicate added!
|
745
|
+
#
|
746
|
+
# *Example:* Append "section" and "header" to the noded's CSS "class" attribute, via a String argument
|
747
|
+
#
|
748
|
+
# Note that the CSS class "section" is appended even though it is already present
|
749
|
+
#
|
750
|
+
# node # => <div class="section section"></div>
|
751
|
+
# node.append_class("section header") # => <div class="section section section header"></div>
|
752
|
+
#
|
753
|
+
# *Example:* Append "section" and "header" to the node's CSS "class" attribute, via an Array argument
|
754
|
+
#
|
755
|
+
# node # => <div></div>
|
756
|
+
# node.append_class(["section", "header"]) # => <div class="section header"></div>
|
757
|
+
# node.append_class(["section", "header"]) # => <div class="section header section header"></div>
|
758
|
+
#
|
759
|
+
def append_class(names)
|
760
|
+
kwattr_append("class", names)
|
761
|
+
end
|
762
|
+
|
763
|
+
# :call-seq:
|
764
|
+
# remove_class(css_classes) → self
|
765
|
+
#
|
766
|
+
# Remove HTML CSS classes from this node. Any CSS class names in +css_classes+ that exist in
|
767
|
+
# this node's "class" attribute are removed, including any multiple entries.
|
768
|
+
#
|
769
|
+
# If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
|
770
|
+
# attribute is deleted from the node.
|
771
|
+
#
|
772
|
+
# This is a convenience function and is equivalent to:
|
773
|
+
#
|
774
|
+
# node.kwattr_remove("class", css_classes)
|
775
|
+
#
|
776
|
+
# Also see #kwattr_remove, #classes, #add_class, #append_class
|
777
|
+
#
|
778
|
+
# [Parameters]
|
779
|
+
# - +css_classes+ (String, Array<String>)
|
780
|
+
#
|
781
|
+
# CSS class names to be removed from the Node's
|
782
|
+
# "class" attribute. May be a string containing whitespace-delimited names, or an Array of
|
783
|
+
# String names. Any class names already present will be removed. If no CSS classes remain,
|
784
|
+
# the "class" attribute is deleted.
|
785
|
+
#
|
786
|
+
# [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
|
787
|
+
#
|
788
|
+
# *Example*: Deleting a CSS class
|
789
|
+
#
|
790
|
+
# Note that all instances of the class "section" are removed from the "class" attribute.
|
791
|
+
#
|
792
|
+
# node # => <div class="section header section"></div>
|
793
|
+
# node.remove_class("section") # => <div class="header"></div>
|
794
|
+
#
|
795
|
+
# *Example*: Deleting the only remaining CSS class
|
796
|
+
#
|
797
|
+
# Note that the attribute is removed once there are no remaining classes.
|
798
|
+
#
|
799
|
+
# node # => <div class="section"></div>
|
800
|
+
# node.remove_class("section") # => <div></div>
|
801
|
+
#
|
802
|
+
# *Example*: Deleting multiple CSS classes
|
803
|
+
#
|
804
|
+
# Note that the "class" attribute is deleted once it's empty.
|
805
|
+
#
|
806
|
+
# node # => <div class="section header float"></div>
|
807
|
+
# node.remove_class(["section", "float"]) # => <div class="header"></div>
|
808
|
+
#
|
809
|
+
def remove_class(names = nil)
|
810
|
+
kwattr_remove("class", names)
|
811
|
+
end
|
812
|
+
|
813
|
+
# :call-seq:
|
814
|
+
# kwattr_values(attribute_name) → Array<String>
|
815
|
+
#
|
816
|
+
# Fetch values from a keyword attribute of a Node.
|
817
|
+
#
|
818
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
819
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
820
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
821
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
822
|
+
#
|
823
|
+
# See also #classes, #kwattr_add, #kwattr_append, #kwattr_remove
|
824
|
+
#
|
825
|
+
# [Parameters]
|
826
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be inspected.
|
827
|
+
#
|
828
|
+
# [Returns]
|
829
|
+
# (Array<String>) The values present in the Node's +attribute_name+ attribute. If the
|
830
|
+
# attribute is empty or non-existent, the return value is an empty array.
|
831
|
+
#
|
832
|
+
# *Example:*
|
833
|
+
#
|
834
|
+
# node # => <a rel="nofollow noopener external">link</a>
|
835
|
+
# node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
|
836
|
+
#
|
837
|
+
# Since v1.11.0
|
838
|
+
def kwattr_values(attribute_name)
|
839
|
+
keywordify(get_attribute(attribute_name) || [])
|
840
|
+
end
|
841
|
+
|
842
|
+
# :call-seq:
|
843
|
+
# kwattr_add(attribute_name, keywords) → self
|
844
|
+
#
|
845
|
+
# Ensure that values are present in a keyword attribute.
|
846
|
+
#
|
847
|
+
# Any values in +keywords+ that already exist in the Node's attribute values are _not_
|
848
|
+
# added. Note that any existing duplicates in the attribute values are not removed. Compare
|
849
|
+
# with #kwattr_append.
|
850
|
+
#
|
851
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
852
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
853
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
854
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
855
|
+
#
|
856
|
+
# See also #add_class, #kwattr_values, #kwattr_append, #kwattr_remove
|
857
|
+
#
|
858
|
+
# [Parameters]
|
859
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
|
860
|
+
# - +keywords+ (String, Array<String>)
|
861
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
862
|
+
# whitespace-delimited values, or an Array of String values. Any values already present will
|
863
|
+
# not be added. Any values not present will be added. If the named attribute does not exist,
|
864
|
+
# it is created.
|
865
|
+
#
|
866
|
+
# [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
|
867
|
+
#
|
868
|
+
# *Example:* Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
|
869
|
+
#
|
870
|
+
# Note that duplicates are not added.
|
871
|
+
#
|
872
|
+
# node # => <a></a>
|
873
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
|
874
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
|
875
|
+
#
|
876
|
+
# *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a
|
877
|
+
# String argument.
|
878
|
+
#
|
879
|
+
# Note that "nofollow" is not added because it is already present. Note also that the
|
880
|
+
# pre-existing duplicate "nofollow" is not removed.
|
881
|
+
#
|
882
|
+
# node # => <a rel="nofollow nofollow"></a>
|
883
|
+
# node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
884
|
+
#
|
885
|
+
# *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via
|
886
|
+
# an Array argument.
|
887
|
+
#
|
888
|
+
# node # => <a></a>
|
889
|
+
# node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
890
|
+
#
|
891
|
+
# Since v1.11.0
|
892
|
+
def kwattr_add(attribute_name, keywords)
|
893
|
+
keywords = keywordify(keywords)
|
894
|
+
current_kws = kwattr_values(attribute_name)
|
895
|
+
new_kws = (current_kws + (keywords - current_kws)).join(" ")
|
896
|
+
set_attribute(attribute_name, new_kws)
|
371
897
|
self
|
372
898
|
end
|
373
899
|
|
374
|
-
|
375
|
-
#
|
376
|
-
#
|
377
|
-
#
|
378
|
-
#
|
900
|
+
# :call-seq:
|
901
|
+
# kwattr_append(attribute_name, keywords) → self
|
902
|
+
#
|
903
|
+
# Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
|
904
|
+
# #kwattr_add.
|
905
|
+
#
|
906
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
907
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
908
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
909
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
910
|
+
#
|
911
|
+
# See also #append_class, #kwattr_values, #kwattr_add, #kwattr_remove
|
912
|
+
#
|
913
|
+
# [Parameters]
|
914
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
|
915
|
+
# - +keywords+ (String, Array<String>)
|
916
|
+
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
|
917
|
+
# whitespace-delimited values, or an Array of String values. All values passed in will be
|
918
|
+
# appended to the named attribute even if they are already present in the attribute. If the
|
919
|
+
# named attribute does not exist, it is created.
|
920
|
+
#
|
921
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
379
922
|
#
|
380
|
-
#
|
381
|
-
#
|
382
|
-
|
383
|
-
|
923
|
+
# *Example:* Append "nofollow" to the +rel+ attribute.
|
924
|
+
#
|
925
|
+
# Note that duplicates are added.
|
926
|
+
#
|
927
|
+
# node # => <a></a>
|
928
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
|
929
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a>
|
930
|
+
#
|
931
|
+
# *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
|
932
|
+
#
|
933
|
+
# Note that "nofollow" is appended even though it is already present.
|
934
|
+
#
|
935
|
+
# node # => <a rel="nofollow"></a>
|
936
|
+
# node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
937
|
+
#
|
938
|
+
#
|
939
|
+
# *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
|
940
|
+
#
|
941
|
+
# node # => <a></a>
|
942
|
+
# node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
943
|
+
#
|
944
|
+
# Since v1.11.0
|
945
|
+
def kwattr_append(attribute_name, keywords)
|
946
|
+
keywords = keywordify(keywords)
|
947
|
+
current_kws = kwattr_values(attribute_name)
|
948
|
+
new_kws = (current_kws + keywords).join(" ")
|
949
|
+
set_attribute(attribute_name, new_kws)
|
384
950
|
self
|
385
951
|
end
|
386
952
|
|
387
|
-
|
388
|
-
#
|
389
|
-
# and return self. If there are many occurrences of the name,
|
390
|
-
# they are all removed.
|
953
|
+
# :call-seq:
|
954
|
+
# kwattr_remove(attribute_name, keywords) → self
|
391
955
|
#
|
392
|
-
#
|
393
|
-
#
|
956
|
+
# Remove keywords from a keyword attribute. Any matching keywords that exist in the named
|
957
|
+
# attribute are removed, including any multiple entries.
|
394
958
|
#
|
395
|
-
# If no
|
396
|
-
#
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
959
|
+
# If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
|
960
|
+
# deleted from the node.
|
961
|
+
#
|
962
|
+
# A "keyword attribute" is a node attribute that contains a set of space-delimited
|
963
|
+
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
|
964
|
+
# contain CSS classes. But other keyword attributes exist, for instance
|
965
|
+
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
|
966
|
+
#
|
967
|
+
# See also #remove_class, #kwattr_values, #kwattr_add, #kwattr_append
|
968
|
+
#
|
969
|
+
# [Parameters]
|
970
|
+
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
|
971
|
+
# - +keywords+ (String, Array<String>)
|
972
|
+
# Keywords to be removed from the attribute named +attribute_name+. May be a string
|
973
|
+
# containing whitespace-delimited values, or an Array of String values. Any keywords present
|
974
|
+
# in the named attribute will be removed. If no keywords remain, or if +keywords+ is nil,
|
975
|
+
# the attribute is deleted.
|
976
|
+
#
|
977
|
+
# [Returns] +self+ (Node) for ease of chaining method calls.
|
978
|
+
#
|
979
|
+
# *Example:*
|
980
|
+
#
|
981
|
+
# Note that the +rel+ attribute is deleted when empty.
|
982
|
+
#
|
983
|
+
# node # => <a rel="nofollow noreferrer">link</a>
|
984
|
+
# node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
|
985
|
+
# node.kwattr_remove("rel", "noreferrer") # => <a>link</a>
|
986
|
+
#
|
987
|
+
# Since v1.11.0
|
988
|
+
def kwattr_remove(attribute_name, keywords)
|
989
|
+
if keywords.nil?
|
990
|
+
remove_attribute(attribute_name)
|
991
|
+
return self
|
992
|
+
end
|
993
|
+
|
994
|
+
keywords = keywordify(keywords)
|
995
|
+
current_kws = kwattr_values(attribute_name)
|
996
|
+
new_kws = current_kws - keywords
|
997
|
+
if new_kws.empty?
|
998
|
+
remove_attribute(attribute_name)
|
405
999
|
else
|
406
|
-
|
1000
|
+
set_attribute(attribute_name, new_kws.join(" "))
|
407
1001
|
end
|
408
1002
|
self
|
409
1003
|
end
|
410
1004
|
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
alias :delete :remove_attribute
|
1005
|
+
alias_method :delete, :remove_attribute
|
1006
|
+
alias_method :get_attribute, :[]
|
1007
|
+
alias_method :attr, :[]
|
1008
|
+
alias_method :set_attribute, :[]=
|
1009
|
+
alias_method :has_attribute?, :key?
|
1010
|
+
|
1011
|
+
# :section:
|
419
1012
|
|
420
1013
|
###
|
421
1014
|
# Returns true if this Node matches +selector+
|
422
|
-
def matches?
|
1015
|
+
def matches?(selector)
|
423
1016
|
ancestors.last.search(selector).include?(self)
|
424
1017
|
end
|
425
1018
|
|
426
1019
|
###
|
427
1020
|
# Create a DocumentFragment containing +tags+ that is relative to _this_
|
428
1021
|
# context node.
|
429
|
-
def fragment
|
430
|
-
|
431
|
-
type::DocumentFragment.new(document, tags, self)
|
1022
|
+
def fragment(tags)
|
1023
|
+
document.related_class("DocumentFragment").new(document, tags, self)
|
432
1024
|
end
|
433
1025
|
|
434
1026
|
###
|
435
1027
|
# Parse +string_or_io+ as a document fragment within the context of
|
436
1028
|
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
|
437
1029
|
# +string_or_io+.
|
438
|
-
def parse
|
1030
|
+
def parse(string_or_io, options = nil)
|
439
1031
|
##
|
440
1032
|
# When the current node is unparented and not an element node, use the
|
441
1033
|
# document as the parsing context instead. Otherwise, the in-context
|
@@ -446,61 +1038,87 @@ module Nokogiri
|
|
446
1038
|
end
|
447
1039
|
|
448
1040
|
options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
|
449
|
-
if Integer === options
|
450
|
-
options = Nokogiri::XML::ParseOptions.new(options)
|
451
|
-
end
|
452
|
-
# Give the options to the user
|
1041
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
453
1042
|
yield options if block_given?
|
454
1043
|
|
455
|
-
contents = string_or_io.respond_to?(:read)
|
456
|
-
string_or_io.read
|
1044
|
+
contents = if string_or_io.respond_to?(:read)
|
1045
|
+
string_or_io.read
|
1046
|
+
else
|
457
1047
|
string_or_io
|
1048
|
+
end
|
458
1049
|
|
459
1050
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
460
1051
|
|
461
|
-
|
462
|
-
#
|
1052
|
+
# libxml2 does not obey the +recover+ option after encountering errors during +in_context+
|
1053
|
+
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
1054
|
+
#
|
1055
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1056
|
+
# would have been inherited from the context node won't be handled correctly. This hack was
|
1057
|
+
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
1058
|
+
# that's not easily prevented (or even detected).
|
1059
|
+
#
|
1060
|
+
# I think preferable behavior would be to either:
|
1061
|
+
#
|
1062
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
|
1063
|
+
# b. don't recover, but raise a sensible exception
|
1064
|
+
#
|
1065
|
+
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
1066
|
+
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
463
1067
|
error_count = document.errors.length
|
464
1068
|
node_set = in_context(contents, options.to_i)
|
465
|
-
if node_set.empty?
|
466
|
-
|
467
|
-
|
1069
|
+
if node_set.empty? && (document.errors.length > error_count)
|
1070
|
+
if options.recover?
|
1071
|
+
fragment = document.related_class("DocumentFragment").parse(contents)
|
1072
|
+
node_set = fragment.children
|
1073
|
+
else
|
1074
|
+
raise document.errors[error_count]
|
1075
|
+
end
|
468
1076
|
end
|
469
1077
|
node_set
|
470
1078
|
end
|
471
1079
|
|
472
|
-
|
473
|
-
#
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
# Set the parent Node for this Node
|
480
|
-
def parent= parent_node
|
481
|
-
parent_node.add_child(self)
|
482
|
-
parent_node
|
483
|
-
end
|
484
|
-
|
485
|
-
###
|
486
|
-
# Returns a Hash of {prefix => value} for all namespaces on this
|
487
|
-
# node and its ancestors.
|
1080
|
+
# :call-seq:
|
1081
|
+
# namespaces() → Hash<String(Namespace#prefix) ⇒ String(Namespace#href)>
|
1082
|
+
#
|
1083
|
+
# Fetch all the namespaces on this node and its ancestors.
|
1084
|
+
#
|
1085
|
+
# Note that the keys in this hash XML attributes that would be used to define this namespace,
|
1086
|
+
# such as "xmlns:prefix", not just the prefix.
|
488
1087
|
#
|
489
|
-
#
|
1088
|
+
# The default namespace for this node will be included with key "xmlns".
|
1089
|
+
#
|
1090
|
+
# See also #namespace_scopes
|
1091
|
+
#
|
1092
|
+
# [Returns]
|
1093
|
+
# Hash containing all the namespaces on this node and its ancestors. The hash keys are the
|
1094
|
+
# namespace prefix, and the hash value for each key is the namespace URI.
|
1095
|
+
#
|
1096
|
+
# *Example:*
|
1097
|
+
#
|
1098
|
+
# doc = Nokogiri::XML(<<~EOF)
|
1099
|
+
# <root xmlns="http://example.com/root" xmlns:in_scope="http://example.com/in_scope">
|
1100
|
+
# <first/>
|
1101
|
+
# <second xmlns="http://example.com/child"/>
|
1102
|
+
# <third xmlns:foo="http://example.com/foo"/>
|
1103
|
+
# </root>
|
1104
|
+
# EOF
|
1105
|
+
# doc.at_xpath("//root:first", "root" => "http://example.com/root").namespaces
|
1106
|
+
# # => {"xmlns"=>"http://example.com/root",
|
1107
|
+
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
|
1108
|
+
# doc.at_xpath("//child:second", "child" => "http://example.com/child").namespaces
|
1109
|
+
# # => {"xmlns"=>"http://example.com/child",
|
1110
|
+
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
|
1111
|
+
# doc.at_xpath("//root:third", "root" => "http://example.com/root").namespaces
|
1112
|
+
# # => {"xmlns:foo"=>"http://example.com/foo",
|
1113
|
+
# # "xmlns"=>"http://example.com/root",
|
1114
|
+
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
|
490
1115
|
#
|
491
|
-
# Returns namespaces in scope for self -- those defined on self
|
492
|
-
# element directly or any ancestor node -- as a Hash of
|
493
|
-
# attribute-name/value pairs. Note that the keys in this hash
|
494
|
-
# XML attributes that would be used to define this namespace,
|
495
|
-
# such as "xmlns:prefix", not just the prefix. Default namespace
|
496
|
-
# set on self will be included with key "xmlns". However,
|
497
|
-
# default namespaces set on ancestor will NOT be, even if self
|
498
|
-
# has no explicit default namespace.
|
499
1116
|
def namespaces
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
1117
|
+
namespace_scopes.each_with_object({}) do |ns, hash|
|
1118
|
+
prefix = ns.prefix
|
1119
|
+
key = prefix ? "xmlns:#{prefix}" : "xmlns"
|
1120
|
+
hash[key] = ns.href
|
1121
|
+
end
|
504
1122
|
end
|
505
1123
|
|
506
1124
|
# Returns true if this is a Comment
|
@@ -518,14 +1136,14 @@ module Nokogiri
|
|
518
1136
|
type == DOCUMENT_NODE
|
519
1137
|
end
|
520
1138
|
|
521
|
-
# Returns true if this is an
|
1139
|
+
# Returns true if this is an HTML4::Document or HTML5::Document node
|
522
1140
|
def html?
|
523
1141
|
type == HTML_DOCUMENT_NODE
|
524
1142
|
end
|
525
1143
|
|
526
1144
|
# Returns true if this is a Document
|
527
1145
|
def document?
|
528
|
-
is_a?
|
1146
|
+
is_a?(XML::Document)
|
529
1147
|
end
|
530
1148
|
|
531
1149
|
# Returns true if this is a ProcessingInstruction node
|
@@ -544,11 +1162,12 @@ module Nokogiri
|
|
544
1162
|
end
|
545
1163
|
|
546
1164
|
###
|
547
|
-
# Fetch the Nokogiri::
|
1165
|
+
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
548
1166
|
# nil on XML documents and on unknown tags.
|
549
1167
|
def description
|
550
1168
|
return nil if document.xml?
|
551
|
-
|
1169
|
+
|
1170
|
+
Nokogiri::HTML4::ElementDescription[name]
|
552
1171
|
end
|
553
1172
|
|
554
1173
|
###
|
@@ -562,7 +1181,8 @@ module Nokogiri
|
|
562
1181
|
def element?
|
563
1182
|
type == ELEMENT_NODE
|
564
1183
|
end
|
565
|
-
|
1184
|
+
|
1185
|
+
alias_method :elem?, :element?
|
566
1186
|
|
567
1187
|
###
|
568
1188
|
# Turn this node in to a string. If the document is HTML, this method
|
@@ -572,28 +1192,29 @@ module Nokogiri
|
|
572
1192
|
end
|
573
1193
|
|
574
1194
|
# Get the inner_html for this node's Node#children
|
575
|
-
def inner_html
|
1195
|
+
def inner_html(*args)
|
576
1196
|
children.map { |x| x.to_html(*args) }.join
|
577
1197
|
end
|
578
1198
|
|
579
1199
|
# Get the path to this node as a CSS expression
|
580
1200
|
def css_path
|
581
|
-
path.split(
|
582
|
-
part.
|
583
|
-
|
1201
|
+
path.split(%r{/}).filter_map do |part|
|
1202
|
+
part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
1203
|
+
end.join(" > ")
|
584
1204
|
end
|
585
1205
|
|
586
1206
|
###
|
587
1207
|
# Get a list of ancestor Node for this Node. If +selector+ is given,
|
588
1208
|
# the ancestors must match +selector+
|
589
|
-
def ancestors
|
1209
|
+
def ancestors(selector = nil)
|
590
1210
|
return NodeSet.new(document) unless respond_to?(:parent)
|
591
1211
|
return NodeSet.new(document) unless parent
|
592
1212
|
|
593
1213
|
parents = [parent]
|
594
1214
|
|
595
1215
|
while parents.last.respond_to?(:parent)
|
596
|
-
break unless ctx_parent = parents.last.parent
|
1216
|
+
break unless (ctx_parent = parents.last.parent)
|
1217
|
+
|
597
1218
|
parents << ctx_parent
|
598
1219
|
end
|
599
1220
|
|
@@ -602,89 +1223,76 @@ module Nokogiri
|
|
602
1223
|
root = parents.last
|
603
1224
|
search_results = root.search(selector)
|
604
1225
|
|
605
|
-
NodeSet.new(document, parents.find_all
|
1226
|
+
NodeSet.new(document, parents.find_all do |parent|
|
606
1227
|
search_results.include?(parent)
|
607
|
-
|
608
|
-
end
|
609
|
-
|
610
|
-
###
|
611
|
-
# Adds a default namespace supplied as a string +url+ href, to self.
|
612
|
-
# The consequence is as an xmlns attribute with supplied argument were
|
613
|
-
# present in parsed XML. A default namespace set with this method will
|
614
|
-
# now show up in #attributes, but when this node is serialized to XML an
|
615
|
-
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
616
|
-
def default_namespace= url
|
617
|
-
add_namespace_definition(nil, url)
|
618
|
-
end
|
619
|
-
alias :add_namespace :add_namespace_definition
|
620
|
-
|
621
|
-
###
|
622
|
-
# Set the default namespace on this node (as would be defined with an
|
623
|
-
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
624
|
-
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
625
|
-
# for this node. You probably want #default_namespace= instead, or perhaps
|
626
|
-
# #add_namespace_definition with a nil prefix argument.
|
627
|
-
def namespace= ns
|
628
|
-
return set_namespace(ns) unless ns
|
629
|
-
|
630
|
-
unless Nokogiri::XML::Namespace === ns
|
631
|
-
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
632
|
-
end
|
633
|
-
if ns.document != document
|
634
|
-
raise ArgumentError, 'namespace must be declared on the same document'
|
635
|
-
end
|
636
|
-
|
637
|
-
set_namespace ns
|
1228
|
+
end)
|
638
1229
|
end
|
639
1230
|
|
640
1231
|
####
|
641
1232
|
# Yields self and all children to +block+ recursively.
|
642
|
-
def traverse
|
643
|
-
children.each{|j| j.traverse(&block) }
|
644
|
-
|
1233
|
+
def traverse(&block)
|
1234
|
+
children.each { |j| j.traverse(&block) }
|
1235
|
+
yield(self)
|
645
1236
|
end
|
646
1237
|
|
647
1238
|
###
|
648
1239
|
# Accept a visitor. This method calls "visit" on +visitor+ with self.
|
649
|
-
def accept
|
1240
|
+
def accept(visitor)
|
650
1241
|
visitor.visit(self)
|
651
1242
|
end
|
652
1243
|
|
653
1244
|
###
|
654
1245
|
# Test to see if this Node is equal to +other+
|
655
|
-
def ==
|
1246
|
+
def ==(other)
|
656
1247
|
return false unless other
|
657
1248
|
return false unless other.respond_to?(:pointer_id)
|
1249
|
+
|
658
1250
|
pointer_id == other.pointer_id
|
659
1251
|
end
|
660
1252
|
|
661
1253
|
###
|
662
|
-
#
|
663
|
-
#
|
1254
|
+
# Compare two Node objects with respect to their Document. Nodes from
|
1255
|
+
# different documents cannot be compared.
|
1256
|
+
def <=>(other)
|
1257
|
+
return nil unless other.is_a?(Nokogiri::XML::Node)
|
1258
|
+
return nil unless document == other.document
|
1259
|
+
|
1260
|
+
compare(other)
|
1261
|
+
end
|
1262
|
+
|
1263
|
+
# :section: Serialization and Generating Output
|
1264
|
+
|
1265
|
+
###
|
1266
|
+
# Serialize Node using +options+. Save options can also be set using a block.
|
1267
|
+
#
|
1268
|
+
# See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.
|
664
1269
|
#
|
665
1270
|
# These two statements are equivalent:
|
666
1271
|
#
|
667
|
-
#
|
1272
|
+
# node.serialize(encoding: 'UTF-8', save_with: FORMAT | AS_XML)
|
668
1273
|
#
|
669
1274
|
# or
|
670
1275
|
#
|
671
|
-
# node.serialize(:
|
1276
|
+
# node.serialize(encoding: 'UTF-8') do |config|
|
672
1277
|
# config.format.as_xml
|
673
1278
|
# end
|
674
1279
|
#
|
675
|
-
def serialize
|
676
|
-
options = args.first.is_a?(Hash)
|
677
|
-
|
678
|
-
|
679
|
-
|
1280
|
+
def serialize(*args, &block)
|
1281
|
+
options = if args.first.is_a?(Hash)
|
1282
|
+
args.shift
|
1283
|
+
else
|
1284
|
+
{
|
1285
|
+
encoding: args[0],
|
1286
|
+
save_with: args[1],
|
1287
|
+
}
|
1288
|
+
end
|
680
1289
|
|
681
|
-
|
682
|
-
options[:encoding]
|
1290
|
+
options[:encoding] ||= document.encoding
|
1291
|
+
encoding = Encoding.find(options[:encoding] || "UTF-8")
|
683
1292
|
|
684
|
-
|
685
|
-
|
686
|
-
io
|
687
|
-
write_to io, options, &block
|
1293
|
+
io = StringIO.new(String.new(encoding: encoding))
|
1294
|
+
|
1295
|
+
write_to(io, options, &block)
|
688
1296
|
io.string
|
689
1297
|
end
|
690
1298
|
|
@@ -695,17 +1303,17 @@ module Nokogiri
|
|
695
1303
|
#
|
696
1304
|
# See Node#write_to for a list of +options+. For formatted output,
|
697
1305
|
# use Node#to_xhtml instead.
|
698
|
-
def to_html
|
699
|
-
to_format
|
1306
|
+
def to_html(options = {})
|
1307
|
+
to_format(SaveOptions::DEFAULT_HTML, options)
|
700
1308
|
end
|
701
1309
|
|
702
1310
|
###
|
703
1311
|
# Serialize this Node to XML using +options+
|
704
1312
|
#
|
705
|
-
# doc.to_xml(:
|
1313
|
+
# doc.to_xml(indent: 5, encoding: 'UTF-8')
|
706
1314
|
#
|
707
1315
|
# See Node#write_to for a list of +options+
|
708
|
-
def to_xml
|
1316
|
+
def to_xml(options = {})
|
709
1317
|
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
710
1318
|
serialize(options)
|
711
1319
|
end
|
@@ -713,62 +1321,76 @@ module Nokogiri
|
|
713
1321
|
###
|
714
1322
|
# Serialize this Node to XHTML using +options+
|
715
1323
|
#
|
716
|
-
# doc.to_xhtml(:
|
1324
|
+
# doc.to_xhtml(indent: 5, encoding: 'UTF-8')
|
717
1325
|
#
|
718
1326
|
# See Node#write_to for a list of +options+
|
719
|
-
def to_xhtml
|
720
|
-
to_format
|
1327
|
+
def to_xhtml(options = {})
|
1328
|
+
to_format(SaveOptions::DEFAULT_XHTML, options)
|
721
1329
|
end
|
722
1330
|
|
723
1331
|
###
|
724
|
-
#
|
725
|
-
#
|
1332
|
+
# :call-seq:
|
1333
|
+
# write_to(io, *options)
|
1334
|
+
#
|
1335
|
+
# Serialize this node or document to +io+.
|
726
1336
|
#
|
727
|
-
#
|
728
|
-
#
|
729
|
-
#
|
730
|
-
#
|
1337
|
+
# [Parameters]
|
1338
|
+
# - +io+ (IO) An IO-like object to which the serialized content will be written.
|
1339
|
+
# - +options+ (Hash) See below
|
1340
|
+
#
|
1341
|
+
# [Options]
|
1342
|
+
# * +:encoding+ (String or Encoding) specify the encoding of the output (defaults to document encoding)
|
1343
|
+
# * +:indent_text+ (String) the indentation text (defaults to <code>" "</code>)
|
1344
|
+
# * +:indent+ (Integer) the number of +:indent_text+ to use (defaults to +2+)
|
1345
|
+
# * +:save_with+ (Integer) a combination of SaveOptions constants
|
731
1346
|
#
|
732
1347
|
# To save with UTF-8 indented twice:
|
733
1348
|
#
|
734
|
-
# node.write_to(io, :
|
1349
|
+
# node.write_to(io, encoding: 'UTF-8', indent: 2)
|
735
1350
|
#
|
736
1351
|
# To save indented with two dashes:
|
737
1352
|
#
|
738
|
-
# node.write_to(io, :
|
1353
|
+
# node.write_to(io, indent_text: '-', indent: 2)
|
739
1354
|
#
|
740
|
-
def write_to
|
741
|
-
options
|
742
|
-
encoding
|
1355
|
+
def write_to(io, *options)
|
1356
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
1357
|
+
encoding = options[:encoding] || options[0] || document.encoding
|
743
1358
|
if Nokogiri.jruby?
|
744
|
-
save_options
|
745
|
-
indent_times
|
1359
|
+
save_options = options[:save_with] || options[1]
|
1360
|
+
indent_times = options[:indent] || 0
|
746
1361
|
else
|
747
|
-
save_options
|
748
|
-
indent_times
|
1362
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
1363
|
+
indent_times = options[:indent] || 2
|
749
1364
|
end
|
750
|
-
indent_text
|
1365
|
+
indent_text = options[:indent_text] || " "
|
1366
|
+
|
1367
|
+
# Any string times 0 returns an empty string. Therefore, use the same
|
1368
|
+
# string instead of generating a new empty string for every node with
|
1369
|
+
# zero indentation.
|
1370
|
+
indentation = indent_times.zero? ? "" : (indent_text * indent_times)
|
751
1371
|
|
752
1372
|
config = SaveOptions.new(save_options.to_i)
|
753
1373
|
yield config if block_given?
|
754
1374
|
|
755
|
-
|
1375
|
+
encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
|
1376
|
+
|
1377
|
+
native_write_to(io, encoding, indentation, config.options)
|
756
1378
|
end
|
757
1379
|
|
758
1380
|
###
|
759
1381
|
# Write Node as HTML to +io+ with +options+
|
760
1382
|
#
|
761
1383
|
# See Node#write_to for a list of +options+
|
762
|
-
def write_html_to
|
763
|
-
write_format_to
|
1384
|
+
def write_html_to(io, options = {})
|
1385
|
+
write_format_to(SaveOptions::DEFAULT_HTML, io, options)
|
764
1386
|
end
|
765
1387
|
|
766
1388
|
###
|
767
1389
|
# Write Node as XHTML to +io+ with +options+
|
768
1390
|
#
|
769
1391
|
# See Node#write_to for a list of +options+
|
770
|
-
def write_xhtml_to
|
771
|
-
write_format_to
|
1392
|
+
def write_xhtml_to(io, options = {})
|
1393
|
+
write_format_to(SaveOptions::DEFAULT_XHTML, io, options)
|
772
1394
|
end
|
773
1395
|
|
774
1396
|
###
|
@@ -777,110 +1399,168 @@ module Nokogiri
|
|
777
1399
|
# doc.write_xml_to io, :encoding => 'UTF-8'
|
778
1400
|
#
|
779
1401
|
# See Node#write_to for a list of options
|
780
|
-
def write_xml_to
|
1402
|
+
def write_xml_to(io, options = {})
|
781
1403
|
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
782
|
-
write_to
|
1404
|
+
write_to(io, options)
|
783
1405
|
end
|
784
1406
|
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
compare other
|
1407
|
+
def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
|
1408
|
+
c14n_root = self
|
1409
|
+
document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
|
1410
|
+
tn = node.is_a?(XML::Node) ? node : parent
|
1411
|
+
tn == c14n_root || tn.ancestors.include?(c14n_root)
|
1412
|
+
end
|
792
1413
|
end
|
793
1414
|
|
794
|
-
|
795
|
-
|
796
|
-
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
797
|
-
# passed to it, allowing more convenient modification of the parser options.
|
798
|
-
def do_xinclude options = XML::ParseOptions::DEFAULT_XML, &block
|
799
|
-
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
1415
|
+
DECONSTRUCT_KEYS = [:name, :attributes, :children, :namespace, :content, :elements, :inner_html].freeze # :nodoc:
|
1416
|
+
DECONSTRUCT_METHODS = { attributes: :attribute_nodes }.freeze # :nodoc:
|
800
1417
|
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
1418
|
+
#
|
1419
|
+
# :call-seq: deconstruct_keys(array_of_names) → Hash
|
1420
|
+
#
|
1421
|
+
# Returns a hash describing the Node, to use in pattern matching.
|
1422
|
+
#
|
1423
|
+
# Valid keys and their values:
|
1424
|
+
# - +name+ → (String) The name of this node, or "text" if it is a Text node.
|
1425
|
+
# - +namespace+ → (Namespace, nil) The namespace of this node, or nil if there is no namespace.
|
1426
|
+
# - +attributes+ → (Array<Attr>) The attributes of this node.
|
1427
|
+
# - +children+ → (Array<Node>) The children of this node. 💡 Note this includes text nodes.
|
1428
|
+
# - +elements+ → (Array<Node>) The child elements of this node. 💡 Note this does not include text nodes.
|
1429
|
+
# - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
|
1430
|
+
# - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
|
1431
|
+
#
|
1432
|
+
# ⚡ This is an experimental feature, available since v1.14.0
|
1433
|
+
#
|
1434
|
+
# *Example*
|
1435
|
+
#
|
1436
|
+
# doc = Nokogiri::XML.parse(<<~XML)
|
1437
|
+
# <?xml version="1.0"?>
|
1438
|
+
# <parent xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
|
1439
|
+
# <child1 foo="abc" noko:bar="def">First</child1>
|
1440
|
+
# <noko:child2 foo="qwe" noko:bar="rty">Second</noko:child2>
|
1441
|
+
# </parent>
|
1442
|
+
# XML
|
1443
|
+
#
|
1444
|
+
# doc.root.deconstruct_keys([:name, :namespace])
|
1445
|
+
# # => {:name=>"parent",
|
1446
|
+
# # :namespace=>
|
1447
|
+
# # #(Namespace:0x35c { href = "http://nokogiri.org/ns/default" })}
|
1448
|
+
#
|
1449
|
+
# doc.root.deconstruct_keys([:inner_html, :content])
|
1450
|
+
# # => {:content=>"\n" + " First\n" + " Second\n",
|
1451
|
+
# # :inner_html=>
|
1452
|
+
# # "\n" +
|
1453
|
+
# # " <child1 foo=\"abc\" noko:bar=\"def\">First</child1>\n" +
|
1454
|
+
# # " <noko:child2 foo=\"qwe\" noko:bar=\"rty\">Second</noko:child2>\n"}
|
1455
|
+
#
|
1456
|
+
# doc.root.elements.first.deconstruct_keys([:attributes])
|
1457
|
+
# # => {:attributes=>
|
1458
|
+
# # [#(Attr:0x370 { name = "foo", value = "abc" }),
|
1459
|
+
# # #(Attr:0x384 {
|
1460
|
+
# # name = "bar",
|
1461
|
+
# # namespace = #(Namespace:0x398 {
|
1462
|
+
# # prefix = "noko",
|
1463
|
+
# # href = "http://nokogiri.org/ns/noko"
|
1464
|
+
# # }),
|
1465
|
+
# # value = "def"
|
1466
|
+
# # })]}
|
1467
|
+
#
|
1468
|
+
def deconstruct_keys(keys)
|
1469
|
+
requested_keys = DECONSTRUCT_KEYS & keys
|
1470
|
+
{}.tap do |values|
|
1471
|
+
requested_keys.each do |key|
|
1472
|
+
method = DECONSTRUCT_METHODS[key] || key
|
1473
|
+
values[key] = send(method)
|
1474
|
+
end
|
1475
|
+
end
|
806
1476
|
end
|
807
1477
|
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
1478
|
+
# :section:
|
1479
|
+
|
1480
|
+
protected
|
1481
|
+
|
1482
|
+
def coerce(data)
|
1483
|
+
case data
|
1484
|
+
when XML::NodeSet
|
1485
|
+
return data
|
1486
|
+
when XML::DocumentFragment
|
1487
|
+
return data.children
|
1488
|
+
when String
|
1489
|
+
return fragment(data).children
|
1490
|
+
when Document, XML::Attr
|
1491
|
+
# unacceptable
|
1492
|
+
when XML::Node
|
1493
|
+
return data
|
813
1494
|
end
|
1495
|
+
|
1496
|
+
raise ArgumentError, <<~EOERR
|
1497
|
+
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
1498
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
1499
|
+
EOERR
|
814
1500
|
end
|
815
1501
|
|
816
1502
|
private
|
817
1503
|
|
818
|
-
def
|
819
|
-
|
820
|
-
|
1504
|
+
def keywordify(keywords)
|
1505
|
+
case keywords
|
1506
|
+
when Enumerable
|
1507
|
+
keywords
|
1508
|
+
when String
|
1509
|
+
keywords.scan(/\S+/)
|
1510
|
+
else
|
1511
|
+
raise ArgumentError,
|
1512
|
+
"Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}"
|
1513
|
+
end
|
1514
|
+
end
|
1515
|
+
|
1516
|
+
def add_sibling(next_or_previous, node_or_tags)
|
1517
|
+
raise("Cannot add sibling to a node with no parent") unless parent
|
821
1518
|
|
822
|
-
|
1519
|
+
impl = next_or_previous == :next ? :add_next_sibling_node : :add_previous_sibling_node
|
1520
|
+
iter = next_or_previous == :next ? :reverse_each : :each
|
1521
|
+
|
1522
|
+
node_or_tags = parent.coerce(node_or_tags)
|
823
1523
|
if node_or_tags.is_a?(XML::NodeSet)
|
824
1524
|
if text?
|
825
|
-
pivot = Nokogiri::XML::Node.new
|
826
|
-
send
|
1525
|
+
pivot = Nokogiri::XML::Node.new("dummy", document)
|
1526
|
+
send(impl, pivot)
|
827
1527
|
else
|
828
1528
|
pivot = self
|
829
1529
|
end
|
830
|
-
node_or_tags.send(iter) { |n| pivot.send
|
1530
|
+
node_or_tags.send(iter) { |n| pivot.send(impl, n) }
|
831
1531
|
pivot.unlink if text?
|
832
1532
|
else
|
833
|
-
send
|
1533
|
+
send(impl, node_or_tags)
|
834
1534
|
end
|
835
1535
|
node_or_tags
|
836
1536
|
end
|
837
1537
|
|
838
|
-
|
839
|
-
|
840
|
-
|
1538
|
+
USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
|
1539
|
+
private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1540
|
+
|
1541
|
+
def to_format(save_option, options)
|
1542
|
+
return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
841
1543
|
|
842
1544
|
options[:save_with] = save_option unless options[:save_with]
|
843
1545
|
serialize(options)
|
844
1546
|
end
|
845
1547
|
|
846
|
-
def write_format_to
|
847
|
-
|
848
|
-
return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
1548
|
+
def write_format_to(save_option, io, options)
|
1549
|
+
return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
849
1550
|
|
850
1551
|
options[:save_with] ||= save_option
|
851
|
-
write_to
|
1552
|
+
write_to(io, options)
|
852
1553
|
end
|
853
1554
|
|
854
1555
|
def inspect_attributes
|
855
1556
|
[:name, :namespace, :attribute_nodes, :children]
|
856
1557
|
end
|
857
1558
|
|
858
|
-
|
859
|
-
case data
|
860
|
-
when XML::NodeSet
|
861
|
-
return data
|
862
|
-
when XML::DocumentFragment
|
863
|
-
return data.children
|
864
|
-
when String
|
865
|
-
return fragment(data).children
|
866
|
-
when Document, XML::Attr
|
867
|
-
# unacceptable
|
868
|
-
when XML::Node
|
869
|
-
return data
|
870
|
-
end
|
871
|
-
|
872
|
-
raise ArgumentError, <<-EOERR
|
873
|
-
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
874
|
-
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
875
|
-
EOERR
|
876
|
-
end
|
877
|
-
|
878
|
-
# @private
|
879
|
-
IMPLIED_XPATH_CONTEXTS = [ './/'.freeze ].freeze # :nodoc:
|
1559
|
+
IMPLIED_XPATH_CONTEXTS = [".//"].freeze
|
880
1560
|
|
881
|
-
def add_child_node_and_reparent_attrs
|
882
|
-
add_child_node
|
883
|
-
node.attribute_nodes.find_all { |a| a.name
|
1561
|
+
def add_child_node_and_reparent_attrs(node)
|
1562
|
+
add_child_node(node)
|
1563
|
+
node.attribute_nodes.find_all { |a| a.name.include?(":") }.each do |attr_node|
|
884
1564
|
attr_node.remove
|
885
1565
|
node[attr_node.name] = attr_node.value
|
886
1566
|
end
|
@@ -888,3 +1568,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
|
888
1568
|
end
|
889
1569
|
end
|
890
1570
|
end
|
1571
|
+
|
1572
|
+
require_relative "node/save_options"
|