nokogiri 1.5.10 → 1.13.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.md +1903 -0
- data/LICENSE.md +9 -0
- data/README.md +280 -0
- data/bin/nokogiri +84 -31
- data/dependencies.yml +73 -0
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +956 -100
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +232 -87
- data/ext/nokogiri/nokogiri.h +188 -129
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +49 -40
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +24 -23
- data/ext/nokogiri/xml_comment.c +29 -21
- data/ext/nokogiri/xml_document.c +327 -223
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +56 -50
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +45 -20
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +74 -32
- data/ext/nokogiri/xml_node.c +1290 -680
- data/ext/nokogiri/xml_node_set.c +239 -208
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +227 -189
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +123 -125
- data/ext/nokogiri/xml_sax_parser_context.c +138 -79
- data/ext/nokogiri/xml_sax_push_parser.c +88 -35
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +50 -23
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +227 -140
- data/ext/nokogiri/xslt_stylesheet.c +162 -168
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -58
- data/lib/nokogiri/css/parser.rb +327 -288
- data/lib/nokogiri/css/parser.y +67 -45
- data/lib/nokogiri/css/parser_extras.rb +52 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +7 -6
- data/lib/nokogiri/css/xpath_visitor.rb +263 -75
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +17 -8
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +331 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +88 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +96 -0
- data/lib/nokogiri/html5.rb +477 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +221 -0
- data/lib/nokogiri/version.rb +3 -90
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +96 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +234 -95
- data/lib/nokogiri/xml/document_fragment.rb +86 -36
- data/lib/nokogiri/xml/dtd.rb +16 -4
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +947 -502
- data/lib/nokogiri/xml/node_set.rb +168 -159
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +40 -5
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +23 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +43 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +259 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -36
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +18 -16
- data/lib/nokogiri.rb +69 -69
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- metadata +382 -460
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +0 -785
- data/CHANGELOG.rdoc +0 -783
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -303
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -228
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -56
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -13
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -14
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -254
- data/lib/nokogiri/html/document_fragment.rb +0 -41
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/lib/nokogiri/html/sax/push_parser.rb +0 -16
- data/tasks/cross_compile.rb +0 -150
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
@@ -1,17 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
3
5
|
####
|
4
6
|
# A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
|
5
7
|
# a NodeSet is return as a result of searching a Document via
|
6
|
-
# Nokogiri::XML::
|
8
|
+
# Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath
|
7
9
|
class NodeSet
|
10
|
+
include Nokogiri::XML::Searchable
|
8
11
|
include Enumerable
|
9
12
|
|
10
13
|
# The Document this NodeSet is associated with
|
11
14
|
attr_accessor :document
|
12
15
|
|
16
|
+
alias_method :clone, :dup
|
17
|
+
|
13
18
|
# Create a NodeSet with +document+ defaulting to +list+
|
14
|
-
def initialize
|
19
|
+
def initialize(document, list = [])
|
15
20
|
@document = document
|
16
21
|
document.decorate(self)
|
17
22
|
list.each { |x| self << x }
|
@@ -20,10 +25,10 @@ module Nokogiri
|
|
20
25
|
|
21
26
|
###
|
22
27
|
# Get the first element of the NodeSet.
|
23
|
-
def first
|
28
|
+
def first(n = nil)
|
24
29
|
return self[0] unless n
|
25
30
|
list = []
|
26
|
-
n.times { |i| list << self[i] }
|
31
|
+
[n, length].min.times { |i| list << self[i] }
|
27
32
|
list
|
28
33
|
end
|
29
34
|
|
@@ -40,240 +45,238 @@ module Nokogiri
|
|
40
45
|
end
|
41
46
|
|
42
47
|
###
|
43
|
-
# Returns the index of the first node in self that is == to +node
|
44
|
-
def index(node)
|
45
|
-
|
48
|
+
# Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
|
49
|
+
def index(node = nil)
|
50
|
+
if node
|
51
|
+
warn("given block not used") if block_given?
|
52
|
+
each_with_index { |member, j| return j if member == node }
|
53
|
+
elsif block_given?
|
54
|
+
each_with_index { |member, j| return j if yield(member) }
|
55
|
+
end
|
46
56
|
nil
|
47
57
|
end
|
48
58
|
|
49
59
|
###
|
50
60
|
# Insert +datum+ before the first Node in this NodeSet
|
51
|
-
def before
|
52
|
-
first.before
|
61
|
+
def before(datum)
|
62
|
+
first.before(datum)
|
53
63
|
end
|
54
64
|
|
55
65
|
###
|
56
66
|
# Insert +datum+ after the last Node in this NodeSet
|
57
|
-
def after
|
58
|
-
last.after
|
67
|
+
def after(datum)
|
68
|
+
last.after(datum)
|
59
69
|
end
|
60
70
|
|
61
|
-
|
62
|
-
|
71
|
+
alias_method :<<, :push
|
72
|
+
alias_method :remove, :unlink
|
63
73
|
|
64
74
|
###
|
65
|
-
#
|
75
|
+
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
66
76
|
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
def search *paths
|
70
|
-
handler = ![
|
71
|
-
Hash, String, Symbol
|
72
|
-
].include?(paths.last.class) ? paths.pop : nil
|
73
|
-
|
74
|
-
ns = paths.last.is_a?(Hash) ? paths.pop : nil
|
75
|
-
|
76
|
-
sub_set = NodeSet.new(document)
|
77
|
-
|
78
|
-
paths.each do |path|
|
79
|
-
sub_set += send(
|
80
|
-
path =~ /^(\.\/|\/|\.\.|\.$)/ ? :xpath : :css,
|
81
|
-
*(paths + [ns, handler]).compact
|
82
|
-
)
|
83
|
-
end
|
84
|
-
|
85
|
-
document.decorate(sub_set)
|
86
|
-
sub_set
|
87
|
-
end
|
88
|
-
alias :/ :search
|
89
|
-
|
90
|
-
###
|
91
|
-
# Search this NodeSet for css +paths+
|
77
|
+
# Search this node set for CSS +rules+. +rules+ must be one or more CSS
|
78
|
+
# selectors. For example:
|
92
79
|
#
|
93
|
-
# For more information see Nokogiri::XML::
|
94
|
-
def css
|
95
|
-
handler =
|
96
|
-
|
97
|
-
].include?(paths.last.class) ? paths.pop : nil
|
98
|
-
|
99
|
-
ns = paths.last.is_a?(Hash) ? paths.pop : nil
|
80
|
+
# For more information see Nokogiri::XML::Searchable#css
|
81
|
+
def css(*args)
|
82
|
+
rules, handler, ns, _ = extract_params(args)
|
83
|
+
paths = css_rules_to_xpath(rules, ns)
|
100
84
|
|
101
|
-
|
102
|
-
|
103
|
-
each do |node|
|
104
|
-
doc = node.document
|
105
|
-
search_ns = ns || (doc.root ? doc.root.namespaces : {})
|
106
|
-
|
107
|
-
xpaths = paths.map { |rule|
|
108
|
-
[
|
109
|
-
CSS.xpath_for(rule.to_s, :prefix => ".//", :ns => search_ns),
|
110
|
-
CSS.xpath_for(rule.to_s, :prefix => "self::", :ns => search_ns)
|
111
|
-
].join(' | ')
|
112
|
-
}
|
113
|
-
|
114
|
-
sub_set += node.xpath(*(xpaths + [search_ns, handler].compact))
|
85
|
+
inject(NodeSet.new(document)) do |set, node|
|
86
|
+
set + xpath_internal(node, paths, handler, ns, nil)
|
115
87
|
end
|
116
|
-
document.decorate(sub_set)
|
117
|
-
sub_set
|
118
88
|
end
|
119
89
|
|
120
90
|
###
|
121
|
-
#
|
91
|
+
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
|
122
92
|
#
|
123
|
-
#
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
ns = paths.last.is_a?(Hash) ? paths.pop : nil
|
93
|
+
# Search this node set for XPath +paths+. +paths+ must be one or more XPath
|
94
|
+
# queries.
|
95
|
+
#
|
96
|
+
# For more information see Nokogiri::XML::Searchable#xpath
|
97
|
+
def xpath(*args)
|
98
|
+
paths, handler, ns, binds = extract_params(args)
|
130
99
|
|
131
|
-
|
132
|
-
|
133
|
-
sub_set += node.xpath(*(paths + [ns, handler].compact))
|
100
|
+
inject(NodeSet.new(document)) do |set, node|
|
101
|
+
set + xpath_internal(node, paths, handler, ns, binds)
|
134
102
|
end
|
135
|
-
document.decorate(sub_set)
|
136
|
-
sub_set
|
137
|
-
end
|
138
|
-
|
139
|
-
###
|
140
|
-
# Search this NodeSet's nodes' immediate children using CSS selector +selector+
|
141
|
-
def > selector
|
142
|
-
ns = document.root.namespaces
|
143
|
-
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
144
103
|
end
|
145
104
|
|
146
105
|
###
|
147
|
-
#
|
148
|
-
# first Node. Otherwise, index in to the array with +path+.
|
149
|
-
def at path, ns = document.root ? document.root.namespaces : {}
|
150
|
-
return self[path] if path.is_a?(Numeric)
|
151
|
-
search(path, ns).first
|
152
|
-
end
|
153
|
-
alias :% :at
|
154
|
-
|
155
|
-
##
|
156
|
-
# Search this NodeSet for the first occurrence of XPath +paths+.
|
157
|
-
# Equivalent to <tt>xpath(paths).first</tt>
|
158
|
-
# See NodeSet#xpath for more information.
|
106
|
+
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
159
107
|
#
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
#
|
166
|
-
#
|
167
|
-
#
|
108
|
+
# Search this object for +paths+, and return only the first
|
109
|
+
# result. +paths+ must be one or more XPath or CSS queries.
|
110
|
+
#
|
111
|
+
# See Searchable#search for more information.
|
112
|
+
#
|
113
|
+
# Or, if passed an integer, index into the NodeSet:
|
114
|
+
#
|
115
|
+
# node_set.at(3) # same as node_set[3]
|
168
116
|
#
|
169
|
-
def
|
170
|
-
|
117
|
+
def at(*args)
|
118
|
+
if args.length == 1 && args.first.is_a?(Numeric)
|
119
|
+
return self[args.first]
|
120
|
+
end
|
121
|
+
|
122
|
+
super(*args)
|
171
123
|
end
|
124
|
+
alias_method :%, :at
|
172
125
|
|
173
126
|
###
|
174
127
|
# Filter this list for nodes that match +expr+
|
175
|
-
def filter
|
128
|
+
def filter(expr)
|
176
129
|
find_all { |node| node.matches?(expr) }
|
177
130
|
end
|
178
131
|
|
179
132
|
###
|
180
|
-
#
|
181
|
-
|
133
|
+
# Add the class attribute +name+ to all Node objects in the
|
134
|
+
# NodeSet.
|
135
|
+
#
|
136
|
+
# See Nokogiri::XML::Node#add_class for more information.
|
137
|
+
def add_class(name)
|
182
138
|
each do |el|
|
183
|
-
|
184
|
-
el['class'] = classes.push(name).uniq.join " "
|
139
|
+
el.add_class(name)
|
185
140
|
end
|
186
141
|
self
|
187
142
|
end
|
188
143
|
|
189
144
|
###
|
190
|
-
#
|
191
|
-
# If +name+ is nil, remove the class attribute from all Nodes in the
|
145
|
+
# Append the class attribute +name+ to all Node objects in the
|
192
146
|
# NodeSet.
|
193
|
-
|
147
|
+
#
|
148
|
+
# See Nokogiri::XML::Node#append_class for more information.
|
149
|
+
def append_class(name)
|
194
150
|
each do |el|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
151
|
+
el.append_class(name)
|
152
|
+
end
|
153
|
+
self
|
154
|
+
end
|
155
|
+
|
156
|
+
###
|
157
|
+
# Remove the class attribute +name+ from all Node objects in the
|
158
|
+
# NodeSet.
|
159
|
+
#
|
160
|
+
# See Nokogiri::XML::Node#remove_class for more information.
|
161
|
+
def remove_class(name = nil)
|
162
|
+
each do |el|
|
163
|
+
el.remove_class(name)
|
205
164
|
end
|
206
165
|
self
|
207
166
|
end
|
208
167
|
|
209
168
|
###
|
210
|
-
# Set
|
211
|
-
#
|
212
|
-
|
213
|
-
|
214
|
-
|
169
|
+
# Set attributes on each Node in the NodeSet, or get an
|
170
|
+
# attribute from the first Node in the NodeSet.
|
171
|
+
#
|
172
|
+
# To get an attribute from the first Node in a NodeSet:
|
173
|
+
#
|
174
|
+
# node_set.attr("href") # => "https://www.nokogiri.org"
|
175
|
+
#
|
176
|
+
# Note that an empty NodeSet will return nil when +#attr+ is called as a getter.
|
177
|
+
#
|
178
|
+
# To set an attribute on each node, +key+ can either be an
|
179
|
+
# attribute name, or a Hash of attribute names and values. When
|
180
|
+
# called as a setter, +#attr+ returns the NodeSet.
|
181
|
+
#
|
182
|
+
# If +key+ is an attribute name, then either +value+ or +block+
|
183
|
+
# must be passed.
|
184
|
+
#
|
185
|
+
# If +key+ is a Hash then attributes will be set for each
|
186
|
+
# key/value pair:
|
187
|
+
#
|
188
|
+
# node_set.attr("href" => "https://www.nokogiri.org", "class" => "member")
|
189
|
+
#
|
190
|
+
# If +value+ is passed, it will be used as the attribute value
|
191
|
+
# for all nodes:
|
192
|
+
#
|
193
|
+
# node_set.attr("href", "https://www.nokogiri.org")
|
194
|
+
#
|
195
|
+
# If +block+ is passed, it will be called on each Node object in
|
196
|
+
# the NodeSet and the return value used as the attribute value
|
197
|
+
# for that node:
|
198
|
+
#
|
199
|
+
# node_set.attr("class") { |node| node.name }
|
200
|
+
#
|
201
|
+
def attr(key, value = nil, &block)
|
202
|
+
unless key.is_a?(Hash) || (key && (value || block))
|
203
|
+
return first ? first.attribute(key) : nil
|
215
204
|
end
|
216
205
|
|
217
206
|
hash = key.is_a?(Hash) ? key : { key => value }
|
218
207
|
|
219
|
-
hash.each
|
208
|
+
hash.each do |k, v|
|
209
|
+
each do |node|
|
210
|
+
node[k] = v || yield(node)
|
211
|
+
end
|
212
|
+
end
|
220
213
|
|
221
214
|
self
|
222
215
|
end
|
223
|
-
|
224
|
-
|
216
|
+
alias_method :set, :attr
|
217
|
+
alias_method :attribute, :attr
|
225
218
|
|
226
219
|
###
|
227
220
|
# Remove the attributed named +name+ from all Node objects in the NodeSet
|
228
|
-
def remove_attr
|
229
|
-
each { |el| el.delete
|
221
|
+
def remove_attr(name)
|
222
|
+
each { |el| el.delete(name) }
|
230
223
|
self
|
231
224
|
end
|
225
|
+
alias_method :remove_attribute, :remove_attr
|
232
226
|
|
233
227
|
###
|
234
228
|
# Iterate over each node, yielding to +block+
|
235
|
-
def each
|
229
|
+
def each
|
230
|
+
return to_enum unless block_given?
|
231
|
+
|
236
232
|
0.upto(length - 1) do |x|
|
237
233
|
yield self[x]
|
238
234
|
end
|
235
|
+
self
|
239
236
|
end
|
240
237
|
|
241
238
|
###
|
242
239
|
# Get the inner text of all contained Node objects
|
240
|
+
#
|
241
|
+
# Note: This joins the text of all Node objects in the NodeSet:
|
242
|
+
#
|
243
|
+
# doc = Nokogiri::XML('<xml><a><d>foo</d><d>bar</d></a></xml>')
|
244
|
+
# doc.css('d').text # => "foobar"
|
245
|
+
#
|
246
|
+
# Instead, if you want to return the text of all nodes in the NodeSet:
|
247
|
+
#
|
248
|
+
# doc.css('d').map(&:text) # => ["foo", "bar"]
|
249
|
+
#
|
250
|
+
# See Nokogiri::XML::Node#content for more information.
|
243
251
|
def inner_text
|
244
|
-
collect
|
252
|
+
collect(&:inner_text).join("")
|
245
253
|
end
|
246
|
-
|
254
|
+
alias_method :text, :inner_text
|
247
255
|
|
248
256
|
###
|
249
257
|
# Get the inner html of all contained Node objects
|
250
|
-
def inner_html
|
251
|
-
collect{|j| j.inner_html(*args) }.join(
|
258
|
+
def inner_html(*args)
|
259
|
+
collect { |j| j.inner_html(*args) }.join("")
|
252
260
|
end
|
253
261
|
|
254
262
|
###
|
255
|
-
# Wrap this NodeSet with +html+
|
256
|
-
def wrap(html
|
257
|
-
|
258
|
-
new_parent = document.parse(html).first
|
259
|
-
j.add_next_sibling(new_parent)
|
260
|
-
new_parent.add_child(j)
|
261
|
-
end
|
262
|
-
self
|
263
|
+
# Wrap this NodeSet with +html+
|
264
|
+
def wrap(html)
|
265
|
+
map { |node| node.wrap(html) }
|
263
266
|
end
|
264
267
|
|
265
268
|
###
|
266
269
|
# Convert this NodeSet to a string.
|
267
270
|
def to_s
|
268
|
-
map
|
271
|
+
map(&:to_s).join
|
269
272
|
end
|
270
273
|
|
271
274
|
###
|
272
275
|
# Convert this NodeSet to HTML
|
273
|
-
def to_html
|
276
|
+
def to_html(*args)
|
274
277
|
if Nokogiri.jruby?
|
275
278
|
options = args.first.is_a?(Hash) ? args.shift : {}
|
276
|
-
|
279
|
+
unless options[:save_with]
|
277
280
|
options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
|
278
281
|
end
|
279
282
|
args.insert(0, options)
|
@@ -283,25 +286,25 @@ module Nokogiri
|
|
283
286
|
|
284
287
|
###
|
285
288
|
# Convert this NodeSet to XHTML
|
286
|
-
def to_xhtml
|
289
|
+
def to_xhtml(*args)
|
287
290
|
map { |x| x.to_xhtml(*args) }.join
|
288
291
|
end
|
289
292
|
|
290
293
|
###
|
291
294
|
# Convert this NodeSet to XML
|
292
|
-
def to_xml
|
295
|
+
def to_xml(*args)
|
293
296
|
map { |x| x.to_xml(*args) }.join
|
294
297
|
end
|
295
298
|
|
296
|
-
|
297
|
-
|
299
|
+
alias_method :size, :length
|
300
|
+
alias_method :to_ary, :to_a
|
298
301
|
|
299
302
|
###
|
300
303
|
# Removes the last element from set and returns it, or +nil+ if
|
301
304
|
# the set is empty
|
302
305
|
def pop
|
303
306
|
return nil if length == 0
|
304
|
-
delete
|
307
|
+
delete(last)
|
305
308
|
end
|
306
309
|
|
307
310
|
###
|
@@ -309,14 +312,14 @@ module Nokogiri
|
|
309
312
|
# +nil+ if the set is empty.
|
310
313
|
def shift
|
311
314
|
return nil if length == 0
|
312
|
-
delete
|
315
|
+
delete(first)
|
313
316
|
end
|
314
317
|
|
315
318
|
###
|
316
319
|
# Equality -- Two NodeSets are equal if the contain the same number
|
317
320
|
# of elements and if each element is equal to the corresponding
|
318
321
|
# element in the other NodeSet
|
319
|
-
def ==
|
322
|
+
def ==(other)
|
320
323
|
return false unless other.is_a?(Nokogiri::XML::NodeSet)
|
321
324
|
return false unless length == other.length
|
322
325
|
each_with_index do |node, i|
|
@@ -329,7 +332,11 @@ module Nokogiri
|
|
329
332
|
# Returns a new NodeSet containing all the children of all the nodes in
|
330
333
|
# the NodeSet
|
331
334
|
def children
|
332
|
-
|
335
|
+
node_set = NodeSet.new(document)
|
336
|
+
each do |node|
|
337
|
+
node.children.each { |n| node_set.push(n) }
|
338
|
+
end
|
339
|
+
node_set
|
333
340
|
end
|
334
341
|
|
335
342
|
###
|
@@ -338,7 +345,7 @@ module Nokogiri
|
|
338
345
|
def reverse
|
339
346
|
node_set = NodeSet.new(document)
|
340
347
|
(length - 1).downto(0) do |x|
|
341
|
-
node_set.push
|
348
|
+
node_set.push(self[x])
|
342
349
|
end
|
343
350
|
node_set
|
344
351
|
end
|
@@ -346,10 +353,12 @@ module Nokogiri
|
|
346
353
|
###
|
347
354
|
# Return a nicely formated string representation
|
348
355
|
def inspect
|
349
|
-
"[#{map
|
356
|
+
"[#{map(&:inspect).join(", ")}]"
|
350
357
|
end
|
351
358
|
|
352
|
-
|
359
|
+
alias_method :+, :|
|
360
|
+
|
361
|
+
IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
|
353
362
|
end
|
354
363
|
end
|
355
364
|
end
|
@@ -1,6 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
5
|
+
# Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
|
3
6
|
class Notation < Struct.new(:name, :public_id, :system_id)
|
7
|
+
# dead comment to ensure rdoc processing
|
8
|
+
|
9
|
+
# :attr: name (String)
|
10
|
+
# The name for the element.
|
11
|
+
|
12
|
+
# :attr: public_id (String)
|
13
|
+
# The URI corresponding to the public identifier
|
14
|
+
|
15
|
+
# :attr: system_id (String,nil)
|
16
|
+
# The URI corresponding to the system identifier
|
4
17
|
end
|
5
18
|
end
|
6
19
|
end
|
@@ -1,7 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
3
5
|
###
|
4
6
|
# Parse options for passing to Nokogiri.XML or Nokogiri.HTML
|
7
|
+
#
|
8
|
+
# == Building combinations of parse options
|
9
|
+
# You can build your own combinations of these parse options by using any of the following methods:
|
10
|
+
# *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
|
11
|
+
# [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
|
12
|
+
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
|
13
|
+
# [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
|
14
|
+
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
|
15
|
+
# [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
|
16
|
+
# Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
|
17
|
+
#
|
18
|
+
# == Removing particular parse options
|
19
|
+
# You can also remove options from an instance of +ParseOptions+ dynamically.
|
20
|
+
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
|
21
|
+
# Note that this is not available for +STRICT+.
|
22
|
+
#
|
23
|
+
# # Setting the RECOVER & NOENT options...
|
24
|
+
# options = Nokogiri::XML::ParseOptions.new.recover.noent
|
25
|
+
# # later...
|
26
|
+
# options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
|
27
|
+
# options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
|
28
|
+
#
|
5
29
|
class ParseOptions
|
6
30
|
# Strict parsing
|
7
31
|
STRICT = 0
|
@@ -45,14 +69,21 @@ module Nokogiri
|
|
45
69
|
NOBASEFIX = 1 << 18
|
46
70
|
# relax any hardcoded limit from the parser
|
47
71
|
HUGE = 1 << 19
|
72
|
+
# line numbers stored as long int (instead of a short int)
|
73
|
+
BIG_LINES = 1 << 22
|
48
74
|
|
49
75
|
# the default options used for parsing XML documents
|
50
|
-
DEFAULT_XML = RECOVER | NONET
|
76
|
+
DEFAULT_XML = RECOVER | NONET | BIG_LINES
|
77
|
+
# the default options used for parsing XSLT stylesheets
|
78
|
+
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
|
51
79
|
# the default options used for parsing HTML documents
|
52
|
-
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
|
80
|
+
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
|
81
|
+
# the default options used for parsing XML schemas
|
82
|
+
DEFAULT_SCHEMA = NONET | BIG_LINES
|
53
83
|
|
54
84
|
attr_accessor :options
|
55
|
-
|
85
|
+
|
86
|
+
def initialize(options = STRICT)
|
56
87
|
@options = options
|
57
88
|
end
|
58
89
|
|
@@ -84,14 +115,18 @@ module Nokogiri
|
|
84
115
|
@options & RECOVER == STRICT
|
85
116
|
end
|
86
117
|
|
87
|
-
|
118
|
+
def ==(other)
|
119
|
+
other.to_i == to_i
|
120
|
+
end
|
121
|
+
|
122
|
+
alias_method :to_i, :options
|
88
123
|
|
89
124
|
def inspect
|
90
125
|
options = []
|
91
126
|
self.class.constants.each do |k|
|
92
127
|
options << k.downcase if send(:"#{k.downcase}?")
|
93
128
|
end
|
94
|
-
super.sub(/>$/, " " + options.join(
|
129
|
+
super.sub(/>$/, " " + options.join(", ") + ">")
|
95
130
|
end
|
96
131
|
end
|
97
132
|
end
|
@@ -1,16 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
5
|
+
# :nodoc: all
|
3
6
|
module PP
|
4
7
|
module CharacterData
|
5
|
-
def pretty_print
|
6
|
-
nice_name = self.class.name.split(
|
7
|
-
pp.group(2, "#(#{nice_name} ",
|
8
|
-
pp.pp
|
8
|
+
def pretty_print(pp)
|
9
|
+
nice_name = self.class.name.split("::").last
|
10
|
+
pp.group(2, "#(#{nice_name} ", ")") do
|
11
|
+
pp.pp(text)
|
9
12
|
end
|
10
13
|
end
|
11
14
|
|
12
|
-
def inspect
|
13
|
-
"#<#{self.class.name}:#{
|
15
|
+
def inspect
|
16
|
+
"#<#{self.class.name}:#{format("0x%x", object_id)} #{text.inspect}>"
|
14
17
|
end
|
15
18
|
end
|
16
19
|
end
|