nokogiri 1.5.10 → 1.12.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1903 -0
- data/LICENSE.md +9 -0
- data/README.md +278 -0
- data/bin/nokogiri +50 -10
- data/dependencies.yml +74 -0
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +944 -100
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +232 -87
- data/ext/nokogiri/nokogiri.h +188 -129
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +49 -40
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +24 -23
- data/ext/nokogiri/xml_comment.c +29 -21
- data/ext/nokogiri/xml_document.c +305 -201
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +30 -19
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +74 -32
- data/ext/nokogiri/xml_node.c +808 -503
- data/ext/nokogiri/xml_node_set.c +239 -208
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +198 -186
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +123 -125
- data/ext/nokogiri/xml_sax_parser_context.c +138 -79
- data/ext/nokogiri/xml_sax_push_parser.c +88 -35
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +50 -23
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +162 -98
- data/ext/nokogiri/xslt_stylesheet.c +162 -168
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/css/node.rb +1 -50
- data/lib/nokogiri/css/parser.rb +317 -286
- data/lib/nokogiri/css/parser.y +57 -43
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rb +105 -103
- data/lib/nokogiri/css/tokenizer.rex +5 -5
- data/lib/nokogiri/css/xpath_visitor.rb +137 -48
- data/lib/nokogiri/css.rb +15 -14
- data/lib/nokogiri/decorators/slop.rb +13 -5
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +32 -27
- data/lib/nokogiri/{html → html4}/builder.rb +3 -2
- data/lib/nokogiri/{html → html4}/document.rb +118 -50
- data/lib/nokogiri/{html → html4}/document_fragment.rb +20 -11
- data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +22 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +215 -0
- data/lib/nokogiri/version.rb +3 -91
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +75 -33
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +157 -54
- data/lib/nokogiri/xml/document_fragment.rb +55 -8
- data/lib/nokogiri/xml/dtd.rb +15 -4
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node/save_options.rb +2 -1
- data/lib/nokogiri/xml/node.rb +712 -431
- data/lib/nokogiri/xml/node_set.rb +140 -123
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +31 -0
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/pp.rb +3 -2
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +9 -12
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax/document.rb +25 -30
- data/lib/nokogiri/xml/sax/parser.rb +8 -8
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/sax.rb +5 -4
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +239 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +4 -5
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xml.rb +37 -35
- data/lib/nokogiri/xslt/stylesheet.rb +2 -1
- data/lib/nokogiri/xslt.rb +17 -16
- data/lib/nokogiri.rb +55 -58
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- metadata +307 -459
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +0 -785
- data/CHANGELOG.rdoc +0 -783
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -303
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -228
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -56
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -13
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -14
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/lib/nokogiri/html/sax/push_parser.rb +0 -16
- data/tasks/cross_compile.rb +0 -150
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,87 +1,102 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
require "stringio"
|
3
4
|
|
4
5
|
module Nokogiri
|
5
6
|
module XML
|
6
|
-
|
7
|
-
# Nokogiri::XML::Node is your window to the fun filled world of dealing
|
8
|
-
#
|
9
|
-
#
|
7
|
+
##
|
8
|
+
# {Nokogiri::XML::Node} is your window to the fun filled world of dealing with XML and HTML
|
9
|
+
# tags. A {Nokogiri::XML::Node} may be treated similarly to a hash with regard to attributes. For
|
10
|
+
# example:
|
10
11
|
#
|
11
|
-
#
|
12
|
-
# => <a href
|
13
|
-
#
|
14
|
-
# => "
|
15
|
-
#
|
16
|
-
# =>
|
17
|
-
#
|
18
|
-
# => ["#foo", "link"]
|
19
|
-
# irb(main):008:0> node['class'] = 'green'
|
20
|
-
# => "green"
|
21
|
-
# irb(main):009:0> node
|
22
|
-
# => <a href="#foo" id="link" class="green">link</a>
|
23
|
-
# irb(main):010:0>
|
12
|
+
# node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
|
13
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
|
14
|
+
# node['href'] # => "#foo"
|
15
|
+
# node.keys # => ["href", "id"]
|
16
|
+
# node.values # => ["#foo", "link"]
|
17
|
+
# node['class'] = 'green' # => "green"
|
18
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
|
24
19
|
#
|
25
|
-
# See
|
20
|
+
# See the method group entitled "Working With Node Attributes" for the full set of methods.
|
26
21
|
#
|
27
|
-
# Nokogiri::XML::Node also has methods that let you move around your
|
22
|
+
# {Nokogiri::XML::Node} also has methods that let you move around your
|
28
23
|
# tree. For navigating your tree, see:
|
29
24
|
#
|
30
|
-
# *
|
31
|
-
# *
|
32
|
-
# *
|
33
|
-
# *
|
25
|
+
# * {#parent}
|
26
|
+
# * {#children}
|
27
|
+
# * {#next}
|
28
|
+
# * {#previous}
|
29
|
+
#
|
30
|
+
# When printing or otherwise emitting a document or a node (and
|
31
|
+
# its subtree), there are a few methods you might want to use:
|
32
|
+
#
|
33
|
+
# * {#content}, {#text}, {#inner_text}, {#to_str}: These methods will all <b>emit plaintext</b>,
|
34
|
+
# meaning that entities will be replaced (e.g., "<" will be replaced with "<"), meaning
|
35
|
+
# that any sanitizing will likely be un-done in the output.
|
36
|
+
#
|
37
|
+
# * {#to_s}, {#to_xml}, {#to_html}, {#inner_html}: These methods will all <b>emit
|
38
|
+
# properly-escaped markup</b>, meaning that it's suitable for consumption by browsers,
|
39
|
+
# parsers, etc.
|
40
|
+
#
|
41
|
+
# You may search this node's subtree using {#xpath} and {#css}
|
34
42
|
#
|
35
|
-
# You may search this node's subtree using Node#xpath and Node#css
|
36
43
|
class Node
|
37
44
|
include Nokogiri::XML::PP::Node
|
45
|
+
include Nokogiri::XML::Searchable
|
38
46
|
include Enumerable
|
39
47
|
|
40
|
-
# Element node type, see Nokogiri::XML::Node#element?
|
41
|
-
ELEMENT_NODE =
|
48
|
+
# Element node type, see {Nokogiri::XML::Node#element?}
|
49
|
+
ELEMENT_NODE = 1
|
42
50
|
# Attribute node type
|
43
|
-
ATTRIBUTE_NODE =
|
44
|
-
# Text node type, see Nokogiri::XML::Node#text?
|
45
|
-
TEXT_NODE =
|
46
|
-
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
51
|
+
ATTRIBUTE_NODE = 2
|
52
|
+
# Text node type, see {Nokogiri::XML::Node#text?}
|
53
|
+
TEXT_NODE = 3
|
54
|
+
# CDATA node type, see {Nokogiri::XML::Node#cdata?}
|
47
55
|
CDATA_SECTION_NODE = 4
|
48
56
|
# Entity reference node type
|
49
|
-
ENTITY_REF_NODE =
|
57
|
+
ENTITY_REF_NODE = 5
|
50
58
|
# Entity node type
|
51
|
-
ENTITY_NODE =
|
59
|
+
ENTITY_NODE = 6
|
52
60
|
# PI node type
|
53
|
-
PI_NODE =
|
54
|
-
# Comment node type, see Nokogiri::XML::Node#comment?
|
55
|
-
COMMENT_NODE =
|
56
|
-
# Document node type, see Nokogiri::XML::Node#xml?
|
57
|
-
DOCUMENT_NODE =
|
61
|
+
PI_NODE = 7
|
62
|
+
# Comment node type, see {Nokogiri::XML::Node#comment?}
|
63
|
+
COMMENT_NODE = 8
|
64
|
+
# Document node type, see {Nokogiri::XML::Node#xml?}
|
65
|
+
DOCUMENT_NODE = 9
|
58
66
|
# Document type node type
|
59
67
|
DOCUMENT_TYPE_NODE = 10
|
60
68
|
# Document fragment node type
|
61
69
|
DOCUMENT_FRAG_NODE = 11
|
62
70
|
# Notation node type
|
63
|
-
NOTATION_NODE =
|
64
|
-
# HTML document node type, see Nokogiri::XML::Node#html?
|
71
|
+
NOTATION_NODE = 12
|
72
|
+
# HTML document node type, see {Nokogiri::XML::Node#html?}
|
65
73
|
HTML_DOCUMENT_NODE = 13
|
66
74
|
# DTD node type
|
67
|
-
DTD_NODE =
|
75
|
+
DTD_NODE = 14
|
68
76
|
# Element declaration type
|
69
|
-
ELEMENT_DECL =
|
77
|
+
ELEMENT_DECL = 15
|
70
78
|
# Attribute declaration type
|
71
|
-
ATTRIBUTE_DECL =
|
79
|
+
ATTRIBUTE_DECL = 16
|
72
80
|
# Entity declaration type
|
73
|
-
ENTITY_DECL =
|
81
|
+
ENTITY_DECL = 17
|
74
82
|
# Namespace declaration type
|
75
|
-
NAMESPACE_DECL =
|
83
|
+
NAMESPACE_DECL = 18
|
76
84
|
# XInclude start type
|
77
|
-
XINCLUDE_START =
|
85
|
+
XINCLUDE_START = 19
|
78
86
|
# XInclude end type
|
79
|
-
XINCLUDE_END =
|
87
|
+
XINCLUDE_END = 20
|
80
88
|
# DOCB document node type
|
81
89
|
DOCB_DOCUMENT_NODE = 21
|
82
90
|
|
83
|
-
|
84
|
-
|
91
|
+
##
|
92
|
+
# Create a new node with +name+ sharing GC lifecycle with +document+.
|
93
|
+
# @param name [String]
|
94
|
+
# @param document [Nokogiri::XML::Document]
|
95
|
+
# @yieldparam node [Nokogiri::XML::Node]
|
96
|
+
# @return [Nokogiri::XML::Node]
|
97
|
+
# @see Nokogiri::XML::Node.new
|
98
|
+
def initialize(name, document)
|
99
|
+
# This is intentionally empty.
|
85
100
|
end
|
86
101
|
|
87
102
|
###
|
@@ -90,175 +105,18 @@ module Nokogiri
|
|
90
105
|
document.decorate(self)
|
91
106
|
end
|
92
107
|
|
93
|
-
|
94
|
-
# Search this node for +paths+. +paths+ can be XPath or CSS, and an
|
95
|
-
# optional hash of namespaces may be appended.
|
96
|
-
# See Node#xpath and Node#css.
|
97
|
-
def search *paths
|
98
|
-
# TODO use paths, handler, ns, binds = extract_params(paths)
|
99
|
-
ns = paths.last.is_a?(Hash) ? paths.pop :
|
100
|
-
(document.root ? document.root.namespaces : {})
|
101
|
-
|
102
|
-
prefix = "#{implied_xpath_context}/"
|
103
|
-
|
104
|
-
xpath(*(paths.map { |path|
|
105
|
-
path = path.to_s
|
106
|
-
path =~ /^(\.\/|\/|\.\.|\.$)/ ? path : CSS.xpath_for(
|
107
|
-
path,
|
108
|
-
:prefix => prefix,
|
109
|
-
:ns => ns
|
110
|
-
)
|
111
|
-
}.flatten.uniq) + [ns])
|
112
|
-
end
|
113
|
-
alias :/ :search
|
114
|
-
|
115
|
-
###
|
116
|
-
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
|
117
|
-
#
|
118
|
-
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
119
|
-
# queries.
|
120
|
-
#
|
121
|
-
# node.xpath('.//title')
|
122
|
-
#
|
123
|
-
# A hash of namespace bindings may be appended. For example:
|
124
|
-
#
|
125
|
-
# node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
|
126
|
-
# node.xpath('.//xmlns:name', node.root.namespaces)
|
127
|
-
#
|
128
|
-
# A hash of variable bindings may also be appended to the namespace bindings. For example:
|
129
|
-
#
|
130
|
-
# node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
131
|
-
#
|
132
|
-
# Custom XPath functions may also be defined. To define custom
|
133
|
-
# functions create a class and implement the function you want
|
134
|
-
# to define. The first argument to the method will be the
|
135
|
-
# current matching NodeSet. Any other arguments are ones that
|
136
|
-
# you pass in. Note that this class may appear anywhere in the
|
137
|
-
# argument list. For example:
|
138
|
-
#
|
139
|
-
# node.xpath('.//title[regex(., "\w+")]', Class.new {
|
140
|
-
# def regex node_set, regex
|
141
|
-
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
142
|
-
# end
|
143
|
-
# }.new)
|
144
|
-
#
|
145
|
-
def xpath *paths
|
146
|
-
return NodeSet.new(document) unless document
|
147
|
-
|
148
|
-
paths, handler, ns, binds = extract_params(paths)
|
149
|
-
|
150
|
-
sets = paths.map { |path|
|
151
|
-
ctx = XPathContext.new(self)
|
152
|
-
ctx.register_namespaces(ns)
|
153
|
-
path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
|
154
|
-
|
155
|
-
binds.each do |key,value|
|
156
|
-
ctx.register_variable key.to_s, value
|
157
|
-
end if binds
|
158
|
-
|
159
|
-
ctx.evaluate(path, handler)
|
160
|
-
}
|
161
|
-
return sets.first if sets.length == 1
|
162
|
-
|
163
|
-
NodeSet.new(document) do |combined|
|
164
|
-
sets.each do |set|
|
165
|
-
set.each do |node|
|
166
|
-
combined << node
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
###
|
173
|
-
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
174
|
-
#
|
175
|
-
# Search this node for CSS +rules+. +rules+ must be one or more CSS
|
176
|
-
# selectors. For example:
|
177
|
-
#
|
178
|
-
# node.css('title')
|
179
|
-
# node.css('body h1.bold')
|
180
|
-
# node.css('div + p.green', 'div#one')
|
181
|
-
#
|
182
|
-
# A hash of namespace bindings may be appended. For example:
|
183
|
-
#
|
184
|
-
# node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
|
185
|
-
#
|
186
|
-
# Custom CSS pseudo classes may also be defined. To define
|
187
|
-
# custom pseudo classes, create a class and implement the custom
|
188
|
-
# pseudo class you want defined. The first argument to the
|
189
|
-
# method will be the current matching NodeSet. Any other
|
190
|
-
# arguments are ones that you pass in. For example:
|
191
|
-
#
|
192
|
-
# node.css('title:regex("\w+")', Class.new {
|
193
|
-
# def regex node_set, regex
|
194
|
-
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
195
|
-
# end
|
196
|
-
# }.new)
|
197
|
-
#
|
198
|
-
# Note that the CSS query string is case-sensitive with regards
|
199
|
-
# to your document type. That is, if you're looking for "H1" in
|
200
|
-
# an HTML document, you'll never find anything, since HTML tags
|
201
|
-
# will match only lowercase CSS queries. However, "H1" might be
|
202
|
-
# found in an XML document, where tags names are case-sensitive
|
203
|
-
# (e.g., "H1" is distinct from "h1").
|
204
|
-
#
|
205
|
-
def css *rules
|
206
|
-
rules, handler, ns, binds = extract_params(rules)
|
207
|
-
|
208
|
-
prefix = "#{implied_xpath_context}/"
|
209
|
-
|
210
|
-
rules = rules.map { |rule|
|
211
|
-
CSS.xpath_for(rule, :prefix => prefix, :ns => ns)
|
212
|
-
}.flatten.uniq + [ns, handler, binds].compact
|
213
|
-
|
214
|
-
xpath(*rules)
|
215
|
-
end
|
108
|
+
# @!group Searching via XPath or CSS Queries
|
216
109
|
|
217
110
|
###
|
218
111
|
# Search this node's immediate children using CSS selector +selector+
|
219
|
-
def >
|
112
|
+
def >(selector)
|
220
113
|
ns = document.root.namespaces
|
221
114
|
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
222
115
|
end
|
223
116
|
|
224
|
-
|
225
|
-
# Search for the first occurrence of +path+.
|
226
|
-
#
|
227
|
-
# Returns nil if nothing is found, otherwise a Node.
|
228
|
-
def at path, ns = document.root ? document.root.namespaces : {}
|
229
|
-
search(path, ns).first
|
230
|
-
end
|
231
|
-
alias :% :at
|
117
|
+
# @!endgroup
|
232
118
|
|
233
|
-
|
234
|
-
# Search this node for the first occurrence of XPath +paths+.
|
235
|
-
# Equivalent to <tt>xpath(paths).first</tt>
|
236
|
-
# See Node#xpath for more information.
|
237
|
-
#
|
238
|
-
def at_xpath *paths
|
239
|
-
xpath(*paths).first
|
240
|
-
end
|
241
|
-
|
242
|
-
##
|
243
|
-
# Search this node for the first occurrence of CSS +rules+.
|
244
|
-
# Equivalent to <tt>css(rules).first</tt>
|
245
|
-
# See Node#css for more information.
|
246
|
-
#
|
247
|
-
def at_css *rules
|
248
|
-
css(*rules).first
|
249
|
-
end
|
250
|
-
|
251
|
-
###
|
252
|
-
# Get the attribute value for the attribute +name+
|
253
|
-
def [] name
|
254
|
-
get(name.to_s)
|
255
|
-
end
|
256
|
-
|
257
|
-
###
|
258
|
-
# Set the attribute value for the attribute +name+ to +value+
|
259
|
-
def []= name, value
|
260
|
-
set name.to_s, value.to_s
|
261
|
-
end
|
119
|
+
# @!group Manipulating Document Structure
|
262
120
|
|
263
121
|
###
|
264
122
|
# Add +node_or_tags+ as a child of this Node.
|
@@ -267,7 +125,7 @@ module Nokogiri
|
|
267
125
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
268
126
|
#
|
269
127
|
# Also see related method +<<+.
|
270
|
-
def add_child
|
128
|
+
def add_child(node_or_tags)
|
271
129
|
node_or_tags = coerce(node_or_tags)
|
272
130
|
if node_or_tags.is_a?(XML::NodeSet)
|
273
131
|
node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
|
@@ -277,6 +135,34 @@ module Nokogiri
|
|
277
135
|
node_or_tags
|
278
136
|
end
|
279
137
|
|
138
|
+
###
|
139
|
+
# Add +node_or_tags+ as the first child of this Node.
|
140
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
141
|
+
#
|
142
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
143
|
+
#
|
144
|
+
# Also see related method +add_child+.
|
145
|
+
def prepend_child(node_or_tags)
|
146
|
+
if first = children.first
|
147
|
+
# Mimic the error add_child would raise.
|
148
|
+
raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
149
|
+
first.__send__(:add_sibling, :previous, node_or_tags)
|
150
|
+
else
|
151
|
+
add_child(node_or_tags)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
###
|
156
|
+
# Add html around this node
|
157
|
+
#
|
158
|
+
# Returns self
|
159
|
+
def wrap(html)
|
160
|
+
new_parent = document.parse(html).first
|
161
|
+
add_next_sibling(new_parent)
|
162
|
+
new_parent.add_child(self)
|
163
|
+
self
|
164
|
+
end
|
165
|
+
|
280
166
|
###
|
281
167
|
# Add +node_or_tags+ as a child of this Node.
|
282
168
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
@@ -284,10 +170,11 @@ module Nokogiri
|
|
284
170
|
# Returns self, to support chaining of calls (e.g., root << child1 << child2)
|
285
171
|
#
|
286
172
|
# Also see related method +add_child+.
|
287
|
-
def <<
|
173
|
+
def <<(node_or_tags)
|
288
174
|
add_child node_or_tags
|
289
175
|
self
|
290
176
|
end
|
177
|
+
|
291
178
|
###
|
292
179
|
# Insert +node_or_tags+ before this Node (as a sibling).
|
293
180
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
@@ -295,8 +182,8 @@ module Nokogiri
|
|
295
182
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
296
183
|
#
|
297
184
|
# Also see related method +before+.
|
298
|
-
def add_previous_sibling
|
299
|
-
raise ArgumentError.new("A document may not have multiple root nodes.") if parent.
|
185
|
+
def add_previous_sibling(node_or_tags)
|
186
|
+
raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
300
187
|
|
301
188
|
add_sibling :previous, node_or_tags
|
302
189
|
end
|
@@ -308,9 +195,9 @@ module Nokogiri
|
|
308
195
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
309
196
|
#
|
310
197
|
# Also see related method +after+.
|
311
|
-
def add_next_sibling
|
312
|
-
raise ArgumentError.new("A document may not have multiple root nodes.") if parent.
|
313
|
-
|
198
|
+
def add_next_sibling(node_or_tags)
|
199
|
+
raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
200
|
+
|
314
201
|
add_sibling :next, node_or_tags
|
315
202
|
end
|
316
203
|
|
@@ -321,7 +208,7 @@ module Nokogiri
|
|
321
208
|
# Returns self, to support chaining of calls.
|
322
209
|
#
|
323
210
|
# Also see related method +add_previous_sibling+.
|
324
|
-
def before
|
211
|
+
def before(node_or_tags)
|
325
212
|
add_previous_sibling node_or_tags
|
326
213
|
self
|
327
214
|
end
|
@@ -333,7 +220,7 @@ module Nokogiri
|
|
333
220
|
# Returns self, to support chaining of calls.
|
334
221
|
#
|
335
222
|
# Also see related method +add_next_sibling+.
|
336
|
-
def after
|
223
|
+
def after(node_or_tags)
|
337
224
|
add_next_sibling node_or_tags
|
338
225
|
self
|
339
226
|
end
|
@@ -345,7 +232,7 @@ module Nokogiri
|
|
345
232
|
# Returns self.
|
346
233
|
#
|
347
234
|
# Also see related method +children=+
|
348
|
-
def inner_html=
|
235
|
+
def inner_html=(node_or_tags)
|
349
236
|
self.children = node_or_tags
|
350
237
|
self
|
351
238
|
end
|
@@ -357,7 +244,7 @@ module Nokogiri
|
|
357
244
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
358
245
|
#
|
359
246
|
# Also see related method +inner_html=+
|
360
|
-
def children=
|
247
|
+
def children=(node_or_tags)
|
361
248
|
node_or_tags = coerce(node_or_tags)
|
362
249
|
children.unlink
|
363
250
|
if node_or_tags.is_a?(XML::NodeSet)
|
@@ -375,19 +262,21 @@ module Nokogiri
|
|
375
262
|
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
376
263
|
#
|
377
264
|
# Also see related method +swap+.
|
378
|
-
def replace
|
265
|
+
def replace(node_or_tags)
|
266
|
+
raise("Cannot replace a node with no parent") unless parent
|
267
|
+
|
379
268
|
# We cannot replace a text node directly, otherwise libxml will return
|
380
269
|
# an internal error at parser.c:13031, I don't know exactly why
|
381
270
|
# libxml is trying to find a parent node that is an element or document
|
382
271
|
# so I can't tell if this is bug in libxml or not. issue #775.
|
383
272
|
if text?
|
384
|
-
replacee = Nokogiri::XML::Node.new
|
273
|
+
replacee = Nokogiri::XML::Node.new "dummy", document
|
385
274
|
add_previous_sibling_node replacee
|
386
275
|
unlink
|
387
276
|
return replacee.replace node_or_tags
|
388
277
|
end
|
389
278
|
|
390
|
-
node_or_tags = coerce(node_or_tags)
|
279
|
+
node_or_tags = parent.coerce(node_or_tags)
|
391
280
|
|
392
281
|
if node_or_tags.is_a?(XML::NodeSet)
|
393
282
|
node_or_tags.each { |n| add_previous_sibling n }
|
@@ -405,33 +294,98 @@ module Nokogiri
|
|
405
294
|
# Returns self, to support chaining of calls.
|
406
295
|
#
|
407
296
|
# Also see related method +replace+.
|
408
|
-
def swap
|
297
|
+
def swap(node_or_tags)
|
409
298
|
replace node_or_tags
|
410
299
|
self
|
411
300
|
end
|
412
301
|
|
413
|
-
|
414
|
-
|
302
|
+
####
|
303
|
+
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
|
304
|
+
def content=(string)
|
305
|
+
self.native_content = encode_special_chars(string.to_s)
|
306
|
+
end
|
415
307
|
|
416
|
-
|
417
|
-
#
|
418
|
-
|
419
|
-
|
308
|
+
###
|
309
|
+
# Set the parent Node for this Node
|
310
|
+
def parent=(parent_node)
|
311
|
+
parent_node.add_child(self)
|
312
|
+
parent_node
|
313
|
+
end
|
420
314
|
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
315
|
+
###
|
316
|
+
# Adds a default namespace supplied as a string +url+ href, to self.
|
317
|
+
# The consequence is as an xmlns attribute with supplied argument were
|
318
|
+
# present in parsed XML. A default namespace set with this method will
|
319
|
+
# now show up in #attributes, but when this node is serialized to XML an
|
320
|
+
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
321
|
+
def default_namespace=(url)
|
322
|
+
add_namespace_definition(nil, url)
|
323
|
+
end
|
324
|
+
|
325
|
+
###
|
326
|
+
# Set the default namespace on this node (as would be defined with an
|
327
|
+
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
328
|
+
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
329
|
+
# for this node. You probably want #default_namespace= instead, or perhaps
|
330
|
+
# #add_namespace_definition with a nil prefix argument.
|
331
|
+
def namespace=(ns)
|
332
|
+
return set_namespace(ns) unless ns
|
333
|
+
|
334
|
+
unless Nokogiri::XML::Namespace === ns
|
335
|
+
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
336
|
+
end
|
337
|
+
if ns.document != document
|
338
|
+
raise ArgumentError, "namespace must be declared on the same document"
|
339
|
+
end
|
340
|
+
|
341
|
+
set_namespace ns
|
342
|
+
end
|
343
|
+
|
344
|
+
###
|
345
|
+
# Do xinclude substitution on the subtree below node. If given a block, a
|
346
|
+
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
347
|
+
# passed to it, allowing more convenient modification of the parser options.
|
348
|
+
def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
|
349
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
350
|
+
|
351
|
+
# give options to user
|
352
|
+
yield options if block_given?
|
353
|
+
|
354
|
+
# call c extension
|
355
|
+
process_xincludes(options.to_i)
|
356
|
+
end
|
357
|
+
|
358
|
+
alias :next :next_sibling
|
359
|
+
alias :previous :previous_sibling
|
360
|
+
alias :next= :add_next_sibling
|
361
|
+
alias :previous= :add_previous_sibling
|
362
|
+
alias :remove :unlink
|
363
|
+
alias :name= :node_name=
|
364
|
+
alias :add_namespace :add_namespace_definition
|
365
|
+
|
366
|
+
# @!endgroup
|
367
|
+
|
368
|
+
alias :text :content
|
369
|
+
alias :inner_text :content
|
370
|
+
alias :name :node_name
|
371
|
+
alias :type :node_type
|
372
|
+
alias :to_str :text
|
373
|
+
alias :clone :dup
|
374
|
+
alias :elements :element_children
|
375
|
+
|
376
|
+
# @!group Working With Node Attributes
|
377
|
+
|
378
|
+
###
|
379
|
+
# Get the attribute value for the attribute +name+
|
380
|
+
def [](name)
|
381
|
+
get(name.to_s)
|
382
|
+
end
|
383
|
+
|
384
|
+
###
|
385
|
+
# Set the attribute value for the attribute +name+ to +value+
|
386
|
+
def []=(name, value)
|
387
|
+
set name.to_s, value.to_s
|
388
|
+
end
|
435
389
|
|
436
390
|
####
|
437
391
|
# Returns a hash containing the node's attributes. The key is
|
@@ -440,21 +394,27 @@ module Nokogiri
|
|
440
394
|
# If you need to distinguish attributes with the same name, with different namespaces
|
441
395
|
# use #attribute_nodes instead.
|
442
396
|
def attributes
|
443
|
-
|
444
|
-
[node.node_name
|
445
|
-
|
397
|
+
attribute_nodes.each_with_object({}) do |node, hash|
|
398
|
+
hash[node.node_name] = node
|
399
|
+
end
|
446
400
|
end
|
447
401
|
|
448
402
|
###
|
449
403
|
# Get the attribute values for this Node.
|
450
404
|
def values
|
451
|
-
attribute_nodes.map
|
405
|
+
attribute_nodes.map(&:value)
|
406
|
+
end
|
407
|
+
|
408
|
+
###
|
409
|
+
# Does this Node's attributes include <value>
|
410
|
+
def value?(value)
|
411
|
+
values.include? value
|
452
412
|
end
|
453
413
|
|
454
414
|
###
|
455
415
|
# Get the attribute names for this Node.
|
456
416
|
def keys
|
457
|
-
attribute_nodes.map
|
417
|
+
attribute_nodes.map(&:node_name)
|
458
418
|
end
|
459
419
|
|
460
420
|
###
|
@@ -467,21 +427,365 @@ module Nokogiri
|
|
467
427
|
|
468
428
|
###
|
469
429
|
# Remove the attribute named +name+
|
470
|
-
def remove_attribute
|
471
|
-
attributes[name].remove if key? name
|
430
|
+
def remove_attribute(name)
|
431
|
+
attr = attributes[name].remove if key? name
|
432
|
+
clear_xpath_context if Nokogiri.jruby?
|
433
|
+
attr
|
434
|
+
end
|
435
|
+
|
436
|
+
# Get the CSS class names of a Node.
|
437
|
+
#
|
438
|
+
# This is a convenience function and is equivalent to:
|
439
|
+
# node.kwattr_values("class")
|
440
|
+
#
|
441
|
+
# @see #kwattr_values
|
442
|
+
# @see #add_class
|
443
|
+
# @see #append_class
|
444
|
+
# @see #remove_class
|
445
|
+
#
|
446
|
+
# @return [Array<String>]
|
447
|
+
#
|
448
|
+
# The CSS classes present in the Node's +class+ attribute. If
|
449
|
+
# the attribute is empty or non-existent, the return value is
|
450
|
+
# an empty array.
|
451
|
+
#
|
452
|
+
# @example
|
453
|
+
# node # => <div class="section title header"></div>
|
454
|
+
# node.classes # => ["section", "title", "header"]
|
455
|
+
#
|
456
|
+
def classes
|
457
|
+
kwattr_values("class")
|
458
|
+
end
|
459
|
+
|
460
|
+
# Ensure HTML CSS classes are present on a +Node+. Any CSS
|
461
|
+
# classes in +names+ that already exist in the +Node+'s +class+
|
462
|
+
# attribute are _not_ added. Note that any existing duplicates
|
463
|
+
# in the +class+ attribute are not removed. Compare with
|
464
|
+
# {#append_class}.
|
465
|
+
#
|
466
|
+
# This is a convenience function and is equivalent to:
|
467
|
+
# node.kwattr_add("class", names)
|
468
|
+
#
|
469
|
+
# @see #kwattr_add
|
470
|
+
# @see #classes
|
471
|
+
# @see #append_class
|
472
|
+
# @see #remove_class
|
473
|
+
#
|
474
|
+
# @param names [String, Array<String>]
|
475
|
+
#
|
476
|
+
# CSS class names to be added to the Node's +class+
|
477
|
+
# attribute. May be a string containing whitespace-delimited
|
478
|
+
# names, or an Array of String names. Any class names already
|
479
|
+
# present will not be added. Any class names not present will
|
480
|
+
# be added. If no +class+ attribute exists, one is created.
|
481
|
+
#
|
482
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
483
|
+
#
|
484
|
+
# @example Ensure that a +Node+ has CSS class "section"
|
485
|
+
# node # => <div></div>
|
486
|
+
# node.add_class("section") # => <div class="section"></div>
|
487
|
+
# node.add_class("section") # => <div class="section"></div> # duplicate not added
|
488
|
+
#
|
489
|
+
# @example Ensure that a +Node+ has CSS classes "section" and "header", via a String argument.
|
490
|
+
# node # => <div class="section section"></div>
|
491
|
+
# node.add_class("section header") # => <div class="section section header"></div>
|
492
|
+
# # Note that the CSS class "section" is not added because it is already present.
|
493
|
+
# # Note also that the pre-existing duplicate CSS class "section" is not removed.
|
494
|
+
#
|
495
|
+
# @example Ensure that a +Node+ has CSS classes "section" and "header", via an Array argument.
|
496
|
+
# node # => <div></div>
|
497
|
+
# node.add_class(["section", "header"]) # => <div class="section header"></div>
|
498
|
+
#
|
499
|
+
def add_class(names)
|
500
|
+
kwattr_add("class", names)
|
501
|
+
end
|
502
|
+
|
503
|
+
# Add HTML CSS classes to a +Node+, regardless of
|
504
|
+
# duplication. Compare with {#add_class}.
|
505
|
+
#
|
506
|
+
# This is a convenience function and is equivalent to:
|
507
|
+
# node.kwattr_append("class", names)
|
508
|
+
#
|
509
|
+
# @see #kwattr_append
|
510
|
+
# @see #classes
|
511
|
+
# @see #add_class
|
512
|
+
# @see #remove_class
|
513
|
+
#
|
514
|
+
# @param names [String, Array<String>]
|
515
|
+
#
|
516
|
+
# CSS class names to be appended to the Node's +class+
|
517
|
+
# attribute. May be a string containing whitespace-delimited
|
518
|
+
# names, or an Array of String names. All class names passed
|
519
|
+
# in will be appended to the +class+ attribute even if they
|
520
|
+
# are already present in the attribute value. If no +class+
|
521
|
+
# attribute exists, one is created.
|
522
|
+
#
|
523
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
524
|
+
#
|
525
|
+
# @example Append "section" to a +Node+'s CSS +class+ attriubute
|
526
|
+
# node # => <div></div>
|
527
|
+
# node.append_class("section") # => <div class="section"></div>
|
528
|
+
# node.append_class("section") # => <div class="section section"></div> # duplicate added!
|
529
|
+
#
|
530
|
+
# @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via a String argument.
|
531
|
+
# node # => <div class="section section"></div>
|
532
|
+
# node.append_class("section header") # => <div class="section section section header"></div>
|
533
|
+
# # Note that the CSS class "section" is appended even though it is already present.
|
534
|
+
#
|
535
|
+
# @example Append "section" and "header" to a +Node+'s CSS +class+ attribute, via an Array argument.
|
536
|
+
# node # => <div></div>
|
537
|
+
# node.append_class(["section", "header"]) # => <div class="section header"></div>
|
538
|
+
# node.append_class(["section", "header"]) # => <div class="section header section header"></div>
|
539
|
+
#
|
540
|
+
def append_class(names)
|
541
|
+
kwattr_append("class", names)
|
542
|
+
end
|
543
|
+
|
544
|
+
# Remove HTML CSS classes from a +Node+. Any CSS classes in +names+ that
|
545
|
+
# exist in the +Node+'s +class+ attribute are removed, including any
|
546
|
+
# multiple entries.
|
547
|
+
#
|
548
|
+
# If no CSS classes remain after this operation, or if +names+ is
|
549
|
+
# +nil+, the +class+ attribute is deleted from the node.
|
550
|
+
#
|
551
|
+
# This is a convenience function and is equivalent to:
|
552
|
+
# node.kwattr_remove("class", names)
|
553
|
+
#
|
554
|
+
# @see #kwattr_remove
|
555
|
+
# @see #classes
|
556
|
+
# @see #add_class
|
557
|
+
# @see #append_class
|
558
|
+
#
|
559
|
+
# @param names [String, Array<String>]
|
560
|
+
#
|
561
|
+
# CSS class names to be removed from the Node's +class+ attribute. May
|
562
|
+
# be a string containing whitespace-delimited names, or an Array of
|
563
|
+
# String names. Any class names already present will be removed. If no
|
564
|
+
# CSS classes remain, the +class+ attribute is deleted.
|
565
|
+
#
|
566
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
567
|
+
#
|
568
|
+
# @example
|
569
|
+
# node # => <div class="section header"></div>
|
570
|
+
# node.remove_class("section") # => <div class="header"></div>
|
571
|
+
# node.remove_class("header") # => <div></div> # attribute is deleted when empty
|
572
|
+
#
|
573
|
+
def remove_class(names = nil)
|
574
|
+
kwattr_remove("class", names)
|
575
|
+
end
|
576
|
+
|
577
|
+
# Retrieve values from a keyword attribute of a Node.
|
578
|
+
#
|
579
|
+
# A "keyword attribute" is a node attribute that contains a set
|
580
|
+
# of space-delimited values. Perhaps the most familiar example
|
581
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
582
|
+
# classes. But other keyword attributes exist, for instance
|
583
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
584
|
+
#
|
585
|
+
# @see #classes
|
586
|
+
# @see #kwattr_add
|
587
|
+
# @see #kwattr_append
|
588
|
+
# @see #kwattr_remove
|
589
|
+
#
|
590
|
+
# @param attribute_name [String] The name of the keyword attribute to be inspected.
|
591
|
+
#
|
592
|
+
# @return [Array<String>]
|
593
|
+
#
|
594
|
+
# The values present in the Node's +attribute_name+
|
595
|
+
# attribute. If the attribute is empty or non-existent, the
|
596
|
+
# return value is an empty array.
|
597
|
+
#
|
598
|
+
# @example
|
599
|
+
# node # => <a rel="nofollow noopener external">link</a>
|
600
|
+
# node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
|
601
|
+
#
|
602
|
+
# @since v1.11.0
|
603
|
+
#
|
604
|
+
def kwattr_values(attribute_name)
|
605
|
+
keywordify(get_attribute(attribute_name) || [])
|
472
606
|
end
|
607
|
+
|
608
|
+
# Ensure that values are present in a keyword attribute.
|
609
|
+
#
|
610
|
+
# Any values in +keywords+ that already exist in the +Node+'s
|
611
|
+
# attribute values are _not_ added. Note that any existing
|
612
|
+
# duplicates in the attribute values are not removed. Compare
|
613
|
+
# with {#kwattr_append}.
|
614
|
+
#
|
615
|
+
# A "keyword attribute" is a node attribute that contains a set
|
616
|
+
# of space-delimited values. Perhaps the most familiar example
|
617
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
618
|
+
# classes. But other keyword attributes exist, for instance
|
619
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
620
|
+
#
|
621
|
+
# @see #add_class
|
622
|
+
# @see #kwattr_values
|
623
|
+
# @see #kwattr_append
|
624
|
+
# @see #kwattr_remove
|
625
|
+
#
|
626
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
627
|
+
#
|
628
|
+
# @param keywords [String, Array<String>]
|
629
|
+
#
|
630
|
+
# Keywords to be added to the attribute named
|
631
|
+
# +attribute_name+. May be a string containing
|
632
|
+
# whitespace-delimited values, or an Array of String
|
633
|
+
# values. Any values already present will not be added. Any
|
634
|
+
# values not present will be added. If the named attribute
|
635
|
+
# does not exist, it is created.
|
636
|
+
#
|
637
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
638
|
+
#
|
639
|
+
# @example Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
|
640
|
+
# node # => <a></a>
|
641
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a>
|
642
|
+
# node.kwattr_add("rel", "nofollow") # => <a rel="nofollow"></a> # duplicate not added
|
643
|
+
#
|
644
|
+
# @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a String argument.
|
645
|
+
# node # => <a rel="nofollow nofollow"></a>
|
646
|
+
# node.kwattr_add("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
647
|
+
# # Note that "nofollow" is not added because it is already present.
|
648
|
+
# # Note also that the pre-existing duplicate "nofollow" is not removed.
|
649
|
+
#
|
650
|
+
# @example Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via an Array argument.
|
651
|
+
# node # => <a></a>
|
652
|
+
# node.kwattr_add("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
653
|
+
#
|
654
|
+
# @since v1.11.0
|
655
|
+
#
|
656
|
+
def kwattr_add(attribute_name, keywords)
|
657
|
+
keywords = keywordify(keywords)
|
658
|
+
current_kws = kwattr_values(attribute_name)
|
659
|
+
new_kws = (current_kws + (keywords - current_kws)).join(" ")
|
660
|
+
set_attribute(attribute_name, new_kws)
|
661
|
+
self
|
662
|
+
end
|
663
|
+
|
664
|
+
# Add keywords to a Node's keyword attribute, regardless of
|
665
|
+
# duplication. Compare with {#kwattr_add}.
|
666
|
+
#
|
667
|
+
# A "keyword attribute" is a node attribute that contains a set
|
668
|
+
# of space-delimited values. Perhaps the most familiar example
|
669
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
670
|
+
# classes. But other keyword attributes exist, for instance
|
671
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
672
|
+
#
|
673
|
+
# @see #append_class
|
674
|
+
# @see #kwattr_values
|
675
|
+
# @see #kwattr_add
|
676
|
+
# @see #kwattr_remove
|
677
|
+
#
|
678
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
679
|
+
#
|
680
|
+
# @param keywords [String, Array<String>]
|
681
|
+
#
|
682
|
+
# Keywords to be added to the attribute named
|
683
|
+
# +attribute_name+. May be a string containing
|
684
|
+
# whitespace-delimited values, or an Array of String
|
685
|
+
# values. All values passed in will be appended to the named
|
686
|
+
# attribute even if they are already present in the
|
687
|
+
# attribute. If the named attribute does not exist, it is
|
688
|
+
# created.
|
689
|
+
#
|
690
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
691
|
+
#
|
692
|
+
# @example Append "nofollow" to the +rel+ attribute.
|
693
|
+
# node # => <a></a>
|
694
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow"></a>
|
695
|
+
# node.kwattr_append("rel", "nofollow") # => <a rel="nofollow nofollow"></a> # duplicate added!
|
696
|
+
#
|
697
|
+
# @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
|
698
|
+
# node # => <a rel="nofollow"></a>
|
699
|
+
# node.kwattr_append("rel", "nofollow noreferrer") # => <a rel="nofollow nofollow noreferrer"></a>
|
700
|
+
# # Note that "nofollow" is appended even though it is already present.
|
701
|
+
#
|
702
|
+
# @example Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
|
703
|
+
# node # => <a></a>
|
704
|
+
# node.kwattr_append("rel", ["nofollow", "noreferrer"]) # => <a rel="nofollow noreferrer"></a>
|
705
|
+
#
|
706
|
+
# @since v1.11.0
|
707
|
+
#
|
708
|
+
def kwattr_append(attribute_name, keywords)
|
709
|
+
keywords = keywordify(keywords)
|
710
|
+
current_kws = kwattr_values(attribute_name)
|
711
|
+
new_kws = (current_kws + keywords).join(" ")
|
712
|
+
set_attribute(attribute_name, new_kws)
|
713
|
+
self
|
714
|
+
end
|
715
|
+
|
716
|
+
# Remove keywords from a keyword attribute. Any matching
|
717
|
+
# keywords that exist in the named attribute are removed,
|
718
|
+
# including any multiple entries.
|
719
|
+
#
|
720
|
+
# If no keywords remain after this operation, or if +keywords+
|
721
|
+
# is +nil+, the attribute is deleted from the node.
|
722
|
+
#
|
723
|
+
# A "keyword attribute" is a node attribute that contains a set
|
724
|
+
# of space-delimited values. Perhaps the most familiar example
|
725
|
+
# of this is the HTML +class+ attribute used to contain CSS
|
726
|
+
# classes. But other keyword attributes exist, for instance
|
727
|
+
# [`rel`](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel).
|
728
|
+
#
|
729
|
+
# @see #remove_class
|
730
|
+
# @see #kwattr_values
|
731
|
+
# @see #kwattr_add
|
732
|
+
# @see #kwattr_append
|
733
|
+
#
|
734
|
+
# @param attribute_name [String] The name of the keyword attribute to be modified.
|
735
|
+
#
|
736
|
+
# @param keywords [String, Array<String>]
|
737
|
+
#
|
738
|
+
# Keywords to be removed from the attribute named
|
739
|
+
# +attribute_name+. May be a string containing
|
740
|
+
# whitespace-delimited values, or an Array of String
|
741
|
+
# values. Any keywords present in the named attribute will be
|
742
|
+
# removed. If no keywords remain, or if +keywords+ is nil, the
|
743
|
+
# attribute is deleted.
|
744
|
+
#
|
745
|
+
# @return [Node] Returns +self+ for ease of chaining method calls.
|
746
|
+
#
|
747
|
+
# @example
|
748
|
+
# node # => <a rel="nofollow noreferrer">link</a>
|
749
|
+
# node.kwattr_remove("rel", "nofollow") # => <a rel="noreferrer">link</a>
|
750
|
+
# node.kwattr_remove("rel", "noreferrer") # => <a>link</a> # attribute is deleted when empty
|
751
|
+
#
|
752
|
+
# @since v1.11.0
|
753
|
+
#
|
754
|
+
def kwattr_remove(attribute_name, keywords)
|
755
|
+
if keywords.nil?
|
756
|
+
remove_attribute(attribute_name)
|
757
|
+
return self
|
758
|
+
end
|
759
|
+
|
760
|
+
keywords = keywordify(keywords)
|
761
|
+
current_kws = kwattr_values(attribute_name)
|
762
|
+
new_kws = current_kws - keywords
|
763
|
+
if new_kws.empty?
|
764
|
+
remove_attribute(attribute_name)
|
765
|
+
else
|
766
|
+
set_attribute(attribute_name, new_kws.join(" "))
|
767
|
+
end
|
768
|
+
self
|
769
|
+
end
|
770
|
+
|
473
771
|
alias :delete :remove_attribute
|
772
|
+
alias :get_attribute :[]
|
773
|
+
alias :attr :[]
|
774
|
+
alias :set_attribute :[]=
|
775
|
+
alias :has_attribute? :key?
|
776
|
+
|
777
|
+
# @!endgroup
|
474
778
|
|
475
779
|
###
|
476
780
|
# Returns true if this Node matches +selector+
|
477
|
-
def matches?
|
781
|
+
def matches?(selector)
|
478
782
|
ancestors.last.search(selector).include?(self)
|
479
783
|
end
|
480
784
|
|
481
785
|
###
|
482
786
|
# Create a DocumentFragment containing +tags+ that is relative to _this_
|
483
787
|
# context node.
|
484
|
-
def fragment
|
788
|
+
def fragment(tags)
|
485
789
|
type = document.html? ? Nokogiri::HTML : Nokogiri::XML
|
486
790
|
type::DocumentFragment.new(document, tags, self)
|
487
791
|
end
|
@@ -490,9 +794,18 @@ module Nokogiri
|
|
490
794
|
# Parse +string_or_io+ as a document fragment within the context of
|
491
795
|
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
|
492
796
|
# +string_or_io+.
|
493
|
-
def parse
|
797
|
+
def parse(string_or_io, options = nil)
|
798
|
+
##
|
799
|
+
# When the current node is unparented and not an element node, use the
|
800
|
+
# document as the parsing context instead. Otherwise, the in-context
|
801
|
+
# parser cannot find an element or a document node.
|
802
|
+
# Document Fragments are also not usable by the in-context parser.
|
803
|
+
if !element? && !document? && (!parent || parent.fragment?)
|
804
|
+
return document.parse(string_or_io, options)
|
805
|
+
end
|
806
|
+
|
494
807
|
options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
|
495
|
-
if
|
808
|
+
if Integer === options
|
496
809
|
options = Nokogiri::XML::ParseOptions.new(options)
|
497
810
|
end
|
498
811
|
# Give the options to the user
|
@@ -504,32 +817,36 @@ module Nokogiri
|
|
504
817
|
|
505
818
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
506
819
|
|
507
|
-
|
508
|
-
#
|
820
|
+
# libxml2 does not obey the `recover` option after encountering errors during `in_context`
|
821
|
+
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
822
|
+
#
|
823
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
824
|
+
# would have been inherited from the context node won't be handled correctly. This hack was
|
825
|
+
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
826
|
+
# that's not easily prevented (or even detected).
|
827
|
+
#
|
828
|
+
# I think preferable behavior would be to either:
|
829
|
+
#
|
830
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
|
831
|
+
# b. don't recover, but raise a sensible exception
|
832
|
+
#
|
833
|
+
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
834
|
+
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
509
835
|
error_count = document.errors.length
|
510
836
|
node_set = in_context(contents, options.to_i)
|
511
|
-
if node_set.empty?
|
512
|
-
|
513
|
-
|
837
|
+
if (node_set.empty? && (document.errors.length > error_count))
|
838
|
+
if options.recover?
|
839
|
+
fragment = Nokogiri::HTML4::DocumentFragment.parse contents
|
840
|
+
node_set = fragment.children
|
841
|
+
else
|
842
|
+
raise document.errors[error_count]
|
843
|
+
end
|
514
844
|
end
|
515
845
|
node_set
|
516
846
|
end
|
517
847
|
|
518
|
-
####
|
519
|
-
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
|
520
|
-
def content= string
|
521
|
-
self.native_content = encode_special_chars(string.to_s)
|
522
|
-
end
|
523
|
-
|
524
|
-
###
|
525
|
-
# Set the parent Node for this Node
|
526
|
-
def parent= parent_node
|
527
|
-
parent_node.add_child(self)
|
528
|
-
parent_node
|
529
|
-
end
|
530
|
-
|
531
848
|
###
|
532
|
-
# Returns a Hash of {prefix => value} for all namespaces on this
|
849
|
+
# Returns a Hash of +{prefix => value}+ for all namespaces on this
|
533
850
|
# node and its ancestors.
|
534
851
|
#
|
535
852
|
# This method returns the same namespaces as #namespace_scopes.
|
@@ -543,16 +860,11 @@ module Nokogiri
|
|
543
860
|
# default namespaces set on ancestor will NOT be, even if self
|
544
861
|
# has no explicit default namespace.
|
545
862
|
def namespaces
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
rescue ArgumentError
|
552
|
-
end
|
553
|
-
end
|
554
|
-
[key, nd.href]
|
555
|
-
}]
|
863
|
+
namespace_scopes.each_with_object({}) do |ns, hash|
|
864
|
+
prefix = ns.prefix
|
865
|
+
key = prefix ? "xmlns:#{prefix}" : "xmlns"
|
866
|
+
hash[key] = ns.href
|
867
|
+
end
|
556
868
|
end
|
557
869
|
|
558
870
|
# Returns true if this is a Comment
|
@@ -570,11 +882,21 @@ module Nokogiri
|
|
570
882
|
type == DOCUMENT_NODE
|
571
883
|
end
|
572
884
|
|
573
|
-
# Returns true if this is an
|
885
|
+
# Returns true if this is an HTML4::Document node
|
574
886
|
def html?
|
575
887
|
type == HTML_DOCUMENT_NODE
|
576
888
|
end
|
577
889
|
|
890
|
+
# Returns true if this is a Document
|
891
|
+
def document?
|
892
|
+
is_a? XML::Document
|
893
|
+
end
|
894
|
+
|
895
|
+
# Returns true if this is a ProcessingInstruction node
|
896
|
+
def processing_instruction?
|
897
|
+
type == PI_NODE
|
898
|
+
end
|
899
|
+
|
578
900
|
# Returns true if this is a Text node
|
579
901
|
def text?
|
580
902
|
type == TEXT_NODE
|
@@ -586,11 +908,11 @@ module Nokogiri
|
|
586
908
|
end
|
587
909
|
|
588
910
|
###
|
589
|
-
# Fetch the Nokogiri::
|
911
|
+
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
590
912
|
# nil on XML documents and on unknown tags.
|
591
913
|
def description
|
592
914
|
return nil if document.xml?
|
593
|
-
Nokogiri::
|
915
|
+
Nokogiri::HTML4::ElementDescription[name]
|
594
916
|
end
|
595
917
|
|
596
918
|
###
|
@@ -604,6 +926,7 @@ module Nokogiri
|
|
604
926
|
def element?
|
605
927
|
type == ELEMENT_NODE
|
606
928
|
end
|
929
|
+
|
607
930
|
alias :elem? :element?
|
608
931
|
|
609
932
|
###
|
@@ -614,7 +937,7 @@ module Nokogiri
|
|
614
937
|
end
|
615
938
|
|
616
939
|
# Get the inner_html for this node's Node#children
|
617
|
-
def inner_html
|
940
|
+
def inner_html(*args)
|
618
941
|
children.map { |x| x.to_html(*args) }.join
|
619
942
|
end
|
620
943
|
|
@@ -622,13 +945,13 @@ module Nokogiri
|
|
622
945
|
def css_path
|
623
946
|
path.split(/\//).map { |part|
|
624
947
|
part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
625
|
-
}.compact.join(
|
948
|
+
}.compact.join(" > ")
|
626
949
|
end
|
627
950
|
|
628
951
|
###
|
629
952
|
# Get a list of ancestor Node for this Node. If +selector+ is given,
|
630
953
|
# the ancestors must match +selector+
|
631
|
-
def ancestors
|
954
|
+
def ancestors(selector = nil)
|
632
955
|
return NodeSet.new(document) unless respond_to?(:parent)
|
633
956
|
return NodeSet.new(document) unless parent
|
634
957
|
|
@@ -642,63 +965,45 @@ module Nokogiri
|
|
642
965
|
return NodeSet.new(document, parents) unless selector
|
643
966
|
|
644
967
|
root = parents.last
|
968
|
+
search_results = root.search(selector)
|
645
969
|
|
646
970
|
NodeSet.new(document, parents.find_all { |parent|
|
647
|
-
|
971
|
+
search_results.include?(parent)
|
648
972
|
})
|
649
973
|
end
|
650
974
|
|
651
|
-
###
|
652
|
-
# Adds a default namespace supplied as a string +url+ href, to self.
|
653
|
-
# The consequence is as an xmlns attribute with supplied argument were
|
654
|
-
# present in parsed XML. A default namespace set with this method will
|
655
|
-
# now show up in #attributes, but when this node is serialized to XML an
|
656
|
-
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
657
|
-
def default_namespace= url
|
658
|
-
add_namespace_definition(nil, url)
|
659
|
-
end
|
660
|
-
alias :add_namespace :add_namespace_definition
|
661
|
-
|
662
|
-
###
|
663
|
-
# Set the default namespace on this node (as would be defined with an
|
664
|
-
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
665
|
-
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
666
|
-
# for this node. You probably want #default_namespace= instead, or perhaps
|
667
|
-
# #add_namespace_definition with a nil prefix argument.
|
668
|
-
def namespace= ns
|
669
|
-
return set_namespace(ns) unless ns
|
670
|
-
|
671
|
-
unless Nokogiri::XML::Namespace === ns
|
672
|
-
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
673
|
-
end
|
674
|
-
if ns.document != document
|
675
|
-
raise ArgumentError, 'namespace must be declared on the same document'
|
676
|
-
end
|
677
|
-
|
678
|
-
set_namespace ns
|
679
|
-
end
|
680
|
-
|
681
975
|
####
|
682
976
|
# Yields self and all children to +block+ recursively.
|
683
|
-
def traverse
|
684
|
-
children.each{|j| j.traverse(&block) }
|
977
|
+
def traverse(&block)
|
978
|
+
children.each { |j| j.traverse(&block) }
|
685
979
|
block.call(self)
|
686
980
|
end
|
687
981
|
|
688
982
|
###
|
689
983
|
# Accept a visitor. This method calls "visit" on +visitor+ with self.
|
690
|
-
def accept
|
984
|
+
def accept(visitor)
|
691
985
|
visitor.visit(self)
|
692
986
|
end
|
693
987
|
|
694
988
|
###
|
695
989
|
# Test to see if this Node is equal to +other+
|
696
|
-
def ==
|
990
|
+
def ==(other)
|
697
991
|
return false unless other
|
698
992
|
return false unless other.respond_to?(:pointer_id)
|
699
993
|
pointer_id == other.pointer_id
|
700
994
|
end
|
701
995
|
|
996
|
+
###
|
997
|
+
# Compare two Node objects with respect to their Document. Nodes from
|
998
|
+
# different documents cannot be compared.
|
999
|
+
def <=>(other)
|
1000
|
+
return nil unless other.is_a?(Nokogiri::XML::Node)
|
1001
|
+
return nil unless document == other.document
|
1002
|
+
compare other
|
1003
|
+
end
|
1004
|
+
|
1005
|
+
# @!group Serialization and Generating Output
|
1006
|
+
|
702
1007
|
###
|
703
1008
|
# Serialize Node using +options+. Save options can also be set using a
|
704
1009
|
# block. See SaveOptions.
|
@@ -713,19 +1018,17 @@ module Nokogiri
|
|
713
1018
|
# config.format.as_xml
|
714
1019
|
# end
|
715
1020
|
#
|
716
|
-
def serialize
|
1021
|
+
def serialize(*args, &block)
|
717
1022
|
options = args.first.is_a?(Hash) ? args.shift : {
|
718
|
-
:encoding
|
719
|
-
:save_with
|
1023
|
+
:encoding => args[0],
|
1024
|
+
:save_with => args[1],
|
720
1025
|
}
|
721
1026
|
|
722
1027
|
encoding = options[:encoding] || document.encoding
|
723
1028
|
options[:encoding] = encoding
|
724
1029
|
|
725
|
-
outstring =
|
726
|
-
|
727
|
-
outstring.force_encoding(Encoding.find(encoding))
|
728
|
-
end
|
1030
|
+
outstring = String.new
|
1031
|
+
outstring.force_encoding(Encoding.find(encoding || "utf-8"))
|
729
1032
|
io = StringIO.new(outstring)
|
730
1033
|
write_to io, options, &block
|
731
1034
|
io.string
|
@@ -738,7 +1041,7 @@ module Nokogiri
|
|
738
1041
|
#
|
739
1042
|
# See Node#write_to for a list of +options+. For formatted output,
|
740
1043
|
# use Node#to_xhtml instead.
|
741
|
-
def to_html
|
1044
|
+
def to_html(options = {})
|
742
1045
|
to_format SaveOptions::DEFAULT_HTML, options
|
743
1046
|
end
|
744
1047
|
|
@@ -748,7 +1051,7 @@ module Nokogiri
|
|
748
1051
|
# doc.to_xml(:indent => 5, :encoding => 'UTF-8')
|
749
1052
|
#
|
750
1053
|
# See Node#write_to for a list of +options+
|
751
|
-
def to_xml
|
1054
|
+
def to_xml(options = {})
|
752
1055
|
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
753
1056
|
serialize(options)
|
754
1057
|
end
|
@@ -759,7 +1062,7 @@ module Nokogiri
|
|
759
1062
|
# doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
|
760
1063
|
#
|
761
1064
|
# See Node#write_to for a list of +options+
|
762
|
-
def to_xhtml
|
1065
|
+
def to_xhtml(options = {})
|
763
1066
|
to_format SaveOptions::DEFAULT_XHTML, options
|
764
1067
|
end
|
765
1068
|
|
@@ -778,31 +1081,36 @@ module Nokogiri
|
|
778
1081
|
#
|
779
1082
|
# To save indented with two dashes:
|
780
1083
|
#
|
781
|
-
# node.write_to(io, :indent_text => '-', :indent => 2
|
1084
|
+
# node.write_to(io, :indent_text => '-', :indent => 2)
|
782
1085
|
#
|
783
|
-
def write_to
|
784
|
-
options
|
785
|
-
encoding
|
1086
|
+
def write_to(io, *options)
|
1087
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
1088
|
+
encoding = options[:encoding] || options[0]
|
786
1089
|
if Nokogiri.jruby?
|
787
|
-
save_options
|
788
|
-
indent_times
|
1090
|
+
save_options = options[:save_with] || options[1]
|
1091
|
+
indent_times = options[:indent] || 0
|
789
1092
|
else
|
790
|
-
save_options
|
791
|
-
indent_times
|
1093
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
1094
|
+
indent_times = options[:indent] || 2
|
792
1095
|
end
|
793
|
-
indent_text
|
1096
|
+
indent_text = options[:indent_text] || " "
|
1097
|
+
|
1098
|
+
# Any string times 0 returns an empty string. Therefore, use the same
|
1099
|
+
# string instead of generating a new empty string for every node with
|
1100
|
+
# zero indentation.
|
1101
|
+
indentation = indent_times.zero? ? "" : (indent_text * indent_times)
|
794
1102
|
|
795
1103
|
config = SaveOptions.new(save_options.to_i)
|
796
1104
|
yield config if block_given?
|
797
1105
|
|
798
|
-
native_write_to(io, encoding,
|
1106
|
+
native_write_to(io, encoding, indentation, config.options)
|
799
1107
|
end
|
800
1108
|
|
801
1109
|
###
|
802
1110
|
# Write Node as HTML to +io+ with +options+
|
803
1111
|
#
|
804
1112
|
# See Node#write_to for a list of +options+
|
805
|
-
def write_html_to
|
1113
|
+
def write_html_to(io, options = {})
|
806
1114
|
write_format_to SaveOptions::DEFAULT_HTML, io, options
|
807
1115
|
end
|
808
1116
|
|
@@ -810,7 +1118,7 @@ module Nokogiri
|
|
810
1118
|
# Write Node as XHTML to +io+ with +options+
|
811
1119
|
#
|
812
1120
|
# See Node#write_to for a list of +options+
|
813
|
-
def write_xhtml_to
|
1121
|
+
def write_xhtml_to(io, options = {})
|
814
1122
|
write_format_to SaveOptions::DEFAULT_XHTML, io, options
|
815
1123
|
end
|
816
1124
|
|
@@ -820,52 +1128,66 @@ module Nokogiri
|
|
820
1128
|
# doc.write_xml_to io, :encoding => 'UTF-8'
|
821
1129
|
#
|
822
1130
|
# See Node#write_to for a list of options
|
823
|
-
def write_xml_to
|
1131
|
+
def write_xml_to(io, options = {})
|
824
1132
|
options[:save_with] ||= SaveOptions::DEFAULT_XML
|
825
1133
|
write_to io, options
|
826
1134
|
end
|
827
1135
|
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
compare other
|
1136
|
+
def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
|
1137
|
+
c14n_root = self
|
1138
|
+
document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
|
1139
|
+
tn = node.is_a?(XML::Node) ? node : parent
|
1140
|
+
tn == c14n_root || tn.ancestors.include?(c14n_root)
|
1141
|
+
end
|
835
1142
|
end
|
836
1143
|
|
837
|
-
|
838
|
-
# Do xinclude substitution on the subtree below node. If given a block, a
|
839
|
-
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
|
840
|
-
# passed to it, allowing more convenient modification of the parser options.
|
841
|
-
def do_xinclude options = XML::ParseOptions::DEFAULT_XML, &block
|
842
|
-
options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
|
1144
|
+
# @!endgroup
|
843
1145
|
|
844
|
-
|
845
|
-
yield options if block_given?
|
1146
|
+
protected
|
846
1147
|
|
847
|
-
|
848
|
-
|
1148
|
+
def coerce(data)
|
1149
|
+
case data
|
1150
|
+
when XML::NodeSet
|
1151
|
+
return data
|
1152
|
+
when XML::DocumentFragment
|
1153
|
+
return data.children
|
1154
|
+
when String
|
1155
|
+
return fragment(data).children
|
1156
|
+
when Document, XML::Attr
|
1157
|
+
# unacceptable
|
1158
|
+
when XML::Node
|
1159
|
+
return data
|
1160
|
+
end
|
1161
|
+
|
1162
|
+
raise ArgumentError, <<-EOERR
|
1163
|
+
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
1164
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
1165
|
+
EOERR
|
849
1166
|
end
|
850
1167
|
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
1168
|
+
private
|
1169
|
+
|
1170
|
+
def keywordify(keywords)
|
1171
|
+
case keywords
|
1172
|
+
when Enumerable
|
1173
|
+
return keywords
|
1174
|
+
when String
|
1175
|
+
return keywords.scan(/\S+/)
|
1176
|
+
else
|
1177
|
+
raise ArgumentError.new("Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}")
|
856
1178
|
end
|
857
1179
|
end
|
858
1180
|
|
859
|
-
|
1181
|
+
def add_sibling(next_or_previous, node_or_tags)
|
1182
|
+
raise("Cannot add sibling to a node with no parent") unless parent
|
860
1183
|
|
861
|
-
def add_sibling next_or_previous, node_or_tags
|
862
1184
|
impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
|
863
|
-
iter = (next_or_previous == :next) ? :reverse_each
|
1185
|
+
iter = (next_or_previous == :next) ? :reverse_each : :each
|
864
1186
|
|
865
|
-
node_or_tags = coerce
|
1187
|
+
node_or_tags = parent.coerce(node_or_tags)
|
866
1188
|
if node_or_tags.is_a?(XML::NodeSet)
|
867
1189
|
if text?
|
868
|
-
pivot = Nokogiri::XML::Node.new
|
1190
|
+
pivot = Nokogiri::XML::Node.new "dummy", document
|
869
1191
|
send impl, pivot
|
870
1192
|
else
|
871
1193
|
pivot = self
|
@@ -878,80 +1200,39 @@ module Nokogiri
|
|
878
1200
|
node_or_tags
|
879
1201
|
end
|
880
1202
|
|
881
|
-
|
882
|
-
|
883
|
-
|
1203
|
+
USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
|
1204
|
+
private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1205
|
+
|
1206
|
+
def to_format(save_option, options)
|
1207
|
+
return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
884
1208
|
|
885
|
-
options[:save_with] |= save_option if options[:save_with]
|
886
1209
|
options[:save_with] = save_option unless options[:save_with]
|
887
1210
|
serialize(options)
|
888
1211
|
end
|
889
1212
|
|
890
|
-
def write_format_to
|
891
|
-
|
892
|
-
return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
1213
|
+
def write_format_to(save_option, io, options)
|
1214
|
+
return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
893
1215
|
|
894
1216
|
options[:save_with] ||= save_option
|
895
1217
|
write_to io, options
|
896
1218
|
end
|
897
1219
|
|
898
|
-
def extract_params params # :nodoc:
|
899
|
-
# Pop off our custom function handler if it exists
|
900
|
-
handler = params.find { |param|
|
901
|
-
![Hash, String, Symbol].include?(param.class)
|
902
|
-
}
|
903
|
-
|
904
|
-
params -= [handler] if handler
|
905
|
-
|
906
|
-
hashes = []
|
907
|
-
while Hash === params.last || params.last.nil?
|
908
|
-
hashes << params.pop
|
909
|
-
break if params.empty?
|
910
|
-
end
|
911
|
-
|
912
|
-
ns, binds = hashes.reverse
|
913
|
-
|
914
|
-
ns ||= document.root ? document.root.namespaces : {}
|
915
|
-
|
916
|
-
[params, handler, ns, binds]
|
917
|
-
end
|
918
|
-
|
919
|
-
def coerce data # :nodoc:
|
920
|
-
case data
|
921
|
-
when XML::NodeSet
|
922
|
-
return data
|
923
|
-
when XML::DocumentFragment
|
924
|
-
return data.children
|
925
|
-
when String
|
926
|
-
return fragment(data).children
|
927
|
-
when Document, XML::Attr
|
928
|
-
# unacceptable
|
929
|
-
when XML::Node
|
930
|
-
return data
|
931
|
-
end
|
932
|
-
|
933
|
-
raise ArgumentError, <<-EOERR
|
934
|
-
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
935
|
-
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
936
|
-
EOERR
|
937
|
-
end
|
938
|
-
|
939
|
-
def implied_xpath_context
|
940
|
-
"./"
|
941
|
-
end
|
942
|
-
|
943
1220
|
def inspect_attributes
|
944
1221
|
[:name, :namespace, :attribute_nodes, :children]
|
945
1222
|
end
|
946
1223
|
|
947
|
-
|
1224
|
+
# @private
|
1225
|
+
IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
|
1226
|
+
|
1227
|
+
def add_child_node_and_reparent_attrs(node)
|
948
1228
|
add_child_node node
|
949
1229
|
node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
|
950
1230
|
attr_node.remove
|
951
1231
|
node[attr_node.name] = attr_node.value
|
952
1232
|
end
|
953
1233
|
end
|
954
|
-
|
955
1234
|
end
|
956
1235
|
end
|
957
1236
|
end
|
1237
|
+
|
1238
|
+
require_relative "node/save_options"
|