nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
data/lib/nokogiri/css.rb
CHANGED
@@ -1,27 +1,132 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
x = $-w
|
4
|
-
$-w = false
|
5
|
-
require 'nokogiri/css/parser'
|
6
|
-
$-w = x
|
7
|
-
|
8
|
-
require 'nokogiri/css/tokenizer'
|
9
|
-
require 'nokogiri/css/syntax_error'
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
10
3
|
|
11
4
|
module Nokogiri
|
5
|
+
# Translate a CSS selector into an XPath 1.0 query
|
12
6
|
module CSS
|
13
7
|
class << self
|
14
|
-
|
15
|
-
#
|
16
|
-
def parse
|
17
|
-
|
8
|
+
# TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
|
9
|
+
# It is not used by Nokogiri and shouldn't be part of the public API.
|
10
|
+
def parse(selector) # :nodoc:
|
11
|
+
warn("Nokogiri::CSS.parse is deprecated and will be removed in a future version of Nokogiri. Use Nokogiri::CSS::Parser#parse instead.", uplevel: 1, category: :deprecated)
|
12
|
+
Parser.new.parse(selector)
|
18
13
|
end
|
19
14
|
|
20
|
-
|
21
|
-
#
|
22
|
-
|
23
|
-
|
15
|
+
# :call-seq:
|
16
|
+
# xpath_for(selector_list) → Array<String>
|
17
|
+
# xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array<String>
|
18
|
+
#
|
19
|
+
# Translate a CSS selector list to the equivalent XPath expressions.
|
20
|
+
#
|
21
|
+
# 💡 Note that translated queries are cached by default for performance concerns.
|
22
|
+
#
|
23
|
+
# ⚠ Users should prefer Nokogiri::XML::Searchable#css, which is mixed into all document and
|
24
|
+
# node classes, for querying documents with CSS selectors. This method is the underlying
|
25
|
+
# mechanism used by XML::Searchable and is provided solely for advanced users to translate
|
26
|
+
# \CSS selectors to XPath directly.
|
27
|
+
#
|
28
|
+
# Also see Nokogiri::XML::Searchable#css for documentation on supported CSS selector features,
|
29
|
+
# some extended syntax that Nokogiri supports, and advanced CSS features like pseudo-class
|
30
|
+
# functions.
|
31
|
+
#
|
32
|
+
# [Parameters]
|
33
|
+
# - +selector_list+ (String)
|
34
|
+
#
|
35
|
+
# The CSS selector to be translated into XPath. This is always a String, but that string
|
36
|
+
# value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see
|
37
|
+
# examples).
|
38
|
+
#
|
39
|
+
# [Keyword arguments]
|
40
|
+
# - +prefix:+ (String)
|
41
|
+
#
|
42
|
+
# The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath
|
43
|
+
# for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+.
|
44
|
+
#
|
45
|
+
# - +ns:+ (Hash<String ⇒ String>, nil)
|
46
|
+
#
|
47
|
+
# Namespaces that are referenced in the query, if any. This is a hash where the keys are the
|
48
|
+
# namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an
|
49
|
+
# empty set of namespaces.
|
50
|
+
#
|
51
|
+
# - +visitor:+ (Nokogiri::CSS::XPathVisitor)
|
52
|
+
#
|
53
|
+
# Use this XPathVisitor object to transform the CSS AST into XPath expressions. See
|
54
|
+
# Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can
|
55
|
+
# be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+.
|
56
|
+
#
|
57
|
+
# ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is
|
58
|
+
# provided, +prefix+ and +ns+ must not be present.
|
59
|
+
#
|
60
|
+
# - +cache:+ (Boolean)
|
61
|
+
#
|
62
|
+
# Whether to use the SelectorCache for the translated query to ensure that repeated queries
|
63
|
+
# don't incur the overhead of re-parsing the selector. Default is +true+.
|
64
|
+
#
|
65
|
+
# [Returns] (Array<String>) The equivalent set of XPath expressions for +selector_list+
|
66
|
+
#
|
67
|
+
# *Example* with a simple selector:
|
68
|
+
#
|
69
|
+
# Nokogiri::CSS.xpath_for("div") # => ["//div"]
|
70
|
+
#
|
71
|
+
# *Example* with a compound selector:
|
72
|
+
#
|
73
|
+
# Nokogiri::CSS.xpath_for("div.xl") # => ["//div[contains(concat(' ',normalize-space(@class),' '),' xl ')]"]
|
74
|
+
#
|
75
|
+
# *Example* with a complex selector:
|
76
|
+
#
|
77
|
+
# Nokogiri::CSS.xpath_for("h1 + div") # => ["//h1/following-sibling::*[1]/self::div"]
|
78
|
+
#
|
79
|
+
# *Example* with a selector list:
|
80
|
+
#
|
81
|
+
# Nokogiri::CSS.xpath_for("h1, h2, h3") # => ["//h1", "//h2", "//h3"]
|
82
|
+
#
|
83
|
+
def xpath_for(
|
84
|
+
selector, options = nil,
|
85
|
+
prefix: options&.delete(:prefix),
|
86
|
+
visitor: options&.delete(:visitor),
|
87
|
+
ns: options&.delete(:ns),
|
88
|
+
cache: true
|
89
|
+
)
|
90
|
+
unless options.nil?
|
91
|
+
warn("Nokogiri::CSS.xpath_for: Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
|
92
|
+
end
|
93
|
+
|
94
|
+
raise(TypeError, "no implicit conversion of #{selector.inspect} to String") unless selector.respond_to?(:to_str)
|
95
|
+
|
96
|
+
selector = selector.to_str
|
97
|
+
raise(Nokogiri::CSS::SyntaxError, "empty CSS selector") if selector.empty?
|
98
|
+
|
99
|
+
if visitor
|
100
|
+
raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix
|
101
|
+
raise ArgumentError, "cannot provide both :ns and :visitor" if ns
|
102
|
+
end
|
103
|
+
|
104
|
+
visitor ||= begin
|
105
|
+
visitor_kw = {}
|
106
|
+
visitor_kw[:prefix] = prefix if prefix
|
107
|
+
visitor_kw[:namespaces] = ns if ns
|
108
|
+
|
109
|
+
Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
|
110
|
+
end
|
111
|
+
|
112
|
+
if cache
|
113
|
+
key = SelectorCache.key(selector: selector, visitor: visitor)
|
114
|
+
SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor)
|
115
|
+
else
|
116
|
+
Parser.new.xpath_for(selector, visitor)
|
117
|
+
end
|
24
118
|
end
|
25
119
|
end
|
26
120
|
end
|
27
121
|
end
|
122
|
+
|
123
|
+
require_relative "css/selector_cache"
|
124
|
+
require_relative "css/node"
|
125
|
+
require_relative "css/xpath_visitor"
|
126
|
+
x = $-w
|
127
|
+
$-w = false
|
128
|
+
require_relative "css/parser"
|
129
|
+
$-w = x
|
130
|
+
|
131
|
+
require_relative "css/tokenizer"
|
132
|
+
require_relative "css/syntax_error"
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module Decorators
|
3
5
|
###
|
@@ -9,31 +11,29 @@ module Nokogiri
|
|
9
11
|
|
10
12
|
###
|
11
13
|
# look for node with +name+. See Nokogiri.Slop
|
12
|
-
def method_missing
|
14
|
+
def method_missing(name, *args, &block)
|
13
15
|
if args.empty?
|
14
|
-
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/,
|
15
|
-
elsif args.first.is_a?
|
16
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
|
17
|
+
elsif args.first.is_a?(Hash)
|
16
18
|
hash = args.first
|
17
19
|
if hash[:css]
|
18
20
|
list = css("#{name}#{hash[:css]}")
|
19
21
|
elsif hash[:xpath]
|
20
|
-
conds = Array(hash[:xpath]).join(
|
22
|
+
conds = Array(hash[:xpath]).join(" and ")
|
21
23
|
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
22
24
|
end
|
23
25
|
else
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
)
|
28
|
-
end
|
26
|
+
list = xpath(
|
27
|
+
*CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false),
|
28
|
+
)
|
29
29
|
end
|
30
30
|
|
31
31
|
super if list.empty?
|
32
32
|
list.length == 1 ? list.first : list
|
33
33
|
end
|
34
34
|
|
35
|
-
def respond_to_missing?
|
36
|
-
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/,
|
35
|
+
def respond_to_missing?(name, include_private = false)
|
36
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
|
37
37
|
|
38
38
|
!list.empty?
|
39
39
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
class EncodingHandler
|
6
|
+
# Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
|
7
|
+
USEFUL_ALIASES = {
|
8
|
+
# alias_name => true_name
|
9
|
+
"ISO-2022-JP" => "ISO-2022-JP", # only for JRuby tests, this is a no-op in CRuby
|
10
|
+
"NOKOGIRI-SENTINEL" => "ISO-2022-JP", # indicating the Nokogiri has installed aliases
|
11
|
+
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
12
|
+
}
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def install_default_aliases
|
16
|
+
USEFUL_ALIASES.each do |alias_name, name|
|
17
|
+
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# :stopdoc:
|
23
|
+
if Nokogiri.jruby?
|
24
|
+
class << self
|
25
|
+
def [](name)
|
26
|
+
storage.key?(name) ? new(storage[name]) : nil
|
27
|
+
end
|
28
|
+
|
29
|
+
def alias(name, alias_name)
|
30
|
+
storage[alias_name] = name
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete(name)
|
34
|
+
storage.delete(name)
|
35
|
+
end
|
36
|
+
|
37
|
+
def clear_aliases!
|
38
|
+
storage.clear
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def storage
|
44
|
+
@storage ||= {}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize(name)
|
49
|
+
@name = name
|
50
|
+
end
|
51
|
+
|
52
|
+
attr_reader :name
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
Nokogiri::EncodingHandler.install_default_aliases
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# load the C or Java extension
|
4
|
+
begin
|
5
|
+
# native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
|
6
|
+
RUBY_VERSION =~ /(\d+\.\d+)/
|
7
|
+
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
8
|
+
rescue LoadError => e
|
9
|
+
if e.message.include?("GLIBC")
|
10
|
+
warn(<<~EOM)
|
11
|
+
|
12
|
+
ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system
|
13
|
+
with an unsupported version of glibc.
|
14
|
+
|
15
|
+
#{e.message}
|
16
|
+
|
17
|
+
If that's the case, then please install Nokogiri via the `ruby` platform gem:
|
18
|
+
gem install nokogiri --platform=ruby
|
19
|
+
or:
|
20
|
+
bundle config set force_ruby_platform true
|
21
|
+
|
22
|
+
Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
|
23
|
+
|
24
|
+
EOM
|
25
|
+
raise e
|
26
|
+
end
|
27
|
+
|
28
|
+
# use "require" instead of "require_relative" because non-native gems will place C extension files
|
29
|
+
# in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
|
30
|
+
# is in $LOAD_PATH but not necessarily relative to this file (see #2300)
|
31
|
+
require "nokogiri/nokogiri"
|
32
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module Gumbo
|
5
|
+
# The default maximum number of attributes per element.
|
6
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
7
|
+
|
8
|
+
# The default maximum number of errors for parsing a document or a fragment.
|
9
|
+
DEFAULT_MAX_ERRORS = 0
|
10
|
+
|
11
|
+
# The default maximum depth of the DOM tree produced by parsing a document
|
12
|
+
# or fragment.
|
13
|
+
DEFAULT_MAX_TREE_DEPTH = 400
|
14
|
+
end
|
15
|
+
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -1,37 +1,48 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
require 'nokogiri/html/sax/parser'
|
6
|
-
require 'nokogiri/html/sax/push_parser'
|
7
|
-
require 'nokogiri/html/element_description'
|
8
|
-
require 'nokogiri/html/element_description_defaults'
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require_relative "html4"
|
9
5
|
|
10
6
|
module Nokogiri
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
7
|
+
# Alias for Nokogiri::HTML4
|
8
|
+
HTML = Nokogiri::HTML4
|
9
|
+
|
10
|
+
# :singleton-method: HTML
|
11
|
+
# :call-seq: HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) → Nokogiri::HTML4::Document
|
12
|
+
#
|
13
|
+
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
18
14
|
|
15
|
+
# :nodoc:
|
16
|
+
define_singleton_method(:HTML, Nokogiri.method(:HTML4))
|
17
|
+
|
18
|
+
# 💡 This module/namespace is an alias for Nokogiri::HTML4 as of v1.12.0. Before v1.12.0,
|
19
|
+
# Nokogiri::HTML4 did not exist, and this was the module/namespace for all HTML-related
|
20
|
+
# classes.
|
19
21
|
module HTML
|
20
|
-
class
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
# 💡 This class is an alias for Nokogiri::HTML4::Document as of v1.12.0.
|
23
|
+
class Document < Nokogiri::XML::Document
|
24
|
+
end
|
25
|
+
|
26
|
+
# 💡 This class is an alias for Nokogiri::HTML4::DocumentFragment as of v1.12.0.
|
27
|
+
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
28
|
+
end
|
29
|
+
|
30
|
+
# 💡 This class is an alias for Nokogiri::HTML4::Builder as of v1.12.0.
|
31
|
+
class Builder < Nokogiri::XML::Builder
|
32
|
+
end
|
33
|
+
|
34
|
+
module SAX
|
35
|
+
# 💡 This class is an alias for Nokogiri::HTML4::SAX::Parser as of v1.12.0.
|
36
|
+
class Parser < Nokogiri::XML::SAX::Parser
|
25
37
|
end
|
26
38
|
|
27
|
-
|
28
|
-
|
29
|
-
def fragment string, encoding = nil
|
30
|
-
HTML::DocumentFragment.parse string, encoding
|
39
|
+
# 💡 This class is an alias for Nokogiri::HTML4::SAX::ParserContext as of v1.12.0.
|
40
|
+
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
31
41
|
end
|
32
|
-
end
|
33
42
|
|
34
|
-
|
35
|
-
|
43
|
+
# 💡 This class is an alias for Nokogiri::HTML4::SAX::PushParser as of v1.12.0.
|
44
|
+
class PushParser
|
45
|
+
end
|
46
|
+
end
|
36
47
|
end
|
37
48
|
end
|
@@ -1,5 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
|
-
module
|
4
|
+
module HTML4
|
3
5
|
###
|
4
6
|
# Nokogiri HTML builder is used for building HTML documents. It is very
|
5
7
|
# similar to the Nokogiri::XML::Builder. In fact, you should go read the
|
@@ -11,7 +13,7 @@ module Nokogiri
|
|
11
13
|
# Create an HTML document with a body that has an onload attribute, and a
|
12
14
|
# span tag with a class of "bold" that has content of "Hello world".
|
13
15
|
#
|
14
|
-
# builder = Nokogiri::
|
16
|
+
# builder = Nokogiri::HTML4::Builder.new do |doc|
|
15
17
|
# doc.html {
|
16
18
|
# doc.body(:onload => 'some_func();') {
|
17
19
|
# doc.span.bold {
|
@@ -0,0 +1,235 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "pathname"
|
5
|
+
|
6
|
+
module Nokogiri
|
7
|
+
module HTML4
|
8
|
+
class Document < Nokogiri::XML::Document
|
9
|
+
###
|
10
|
+
# Get the meta tag encoding for this document. If there is no meta tag,
|
11
|
+
# then nil is returned.
|
12
|
+
def meta_encoding
|
13
|
+
if (meta = at_xpath("//meta[@charset]"))
|
14
|
+
meta[:charset]
|
15
|
+
elsif (meta = meta_content_type)
|
16
|
+
meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
###
|
21
|
+
# Set the meta tag encoding for this document.
|
22
|
+
#
|
23
|
+
# If an meta encoding tag is already present, its content is
|
24
|
+
# replaced with the given text.
|
25
|
+
#
|
26
|
+
# Otherwise, this method tries to create one at an appropriate
|
27
|
+
# place supplying head and/or html elements as necessary, which
|
28
|
+
# is inside a head element if any, and before any text node or
|
29
|
+
# content element (typically <body>) if any.
|
30
|
+
#
|
31
|
+
# The result when trying to set an encoding that is different
|
32
|
+
# from the document encoding is undefined.
|
33
|
+
#
|
34
|
+
# Beware in CRuby, that libxml2 automatically inserts a meta tag
|
35
|
+
# into a head element.
|
36
|
+
def meta_encoding=(encoding)
|
37
|
+
if (meta = meta_content_type)
|
38
|
+
meta["content"] = format("text/html; charset=%s", encoding)
|
39
|
+
encoding
|
40
|
+
elsif (meta = at_xpath("//meta[@charset]"))
|
41
|
+
meta["charset"] = encoding
|
42
|
+
else
|
43
|
+
meta = XML::Node.new("meta", self)
|
44
|
+
if (dtd = internal_subset) && dtd.html5_dtd?
|
45
|
+
meta["charset"] = encoding
|
46
|
+
else
|
47
|
+
meta["http-equiv"] = "Content-Type"
|
48
|
+
meta["content"] = format("text/html; charset=%s", encoding)
|
49
|
+
end
|
50
|
+
|
51
|
+
if (head = at_xpath("//head"))
|
52
|
+
head.prepend_child(meta)
|
53
|
+
else
|
54
|
+
set_metadata_element(meta)
|
55
|
+
end
|
56
|
+
encoding
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def meta_content_type
|
61
|
+
xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
|
62
|
+
node["http-equiv"] =~ /\AContent-Type\z/i
|
63
|
+
end
|
64
|
+
end
|
65
|
+
private :meta_content_type
|
66
|
+
|
67
|
+
###
|
68
|
+
# Get the title string of this document. Return nil if there is
|
69
|
+
# no title tag.
|
70
|
+
def title
|
71
|
+
(title = at_xpath("//title")) && title.inner_text
|
72
|
+
end
|
73
|
+
|
74
|
+
###
|
75
|
+
# Set the title string of this document.
|
76
|
+
#
|
77
|
+
# If a title element is already present, its content is replaced
|
78
|
+
# with the given text.
|
79
|
+
#
|
80
|
+
# Otherwise, this method tries to create one at an appropriate
|
81
|
+
# place supplying head and/or html elements as necessary, which
|
82
|
+
# is inside a head element if any, right after a meta
|
83
|
+
# encoding/charset tag if any, and before any text node or
|
84
|
+
# content element (typically <body>) if any.
|
85
|
+
def title=(text)
|
86
|
+
tnode = XML::Text.new(text, self)
|
87
|
+
if (title = at_xpath("//title"))
|
88
|
+
title.children = tnode
|
89
|
+
return text
|
90
|
+
end
|
91
|
+
|
92
|
+
title = XML::Node.new("title", self) << tnode
|
93
|
+
if (head = at_xpath("//head"))
|
94
|
+
head << title
|
95
|
+
elsif (meta = at_xpath("//meta[@charset]") || meta_content_type)
|
96
|
+
# better put after charset declaration
|
97
|
+
meta.add_next_sibling(title)
|
98
|
+
else
|
99
|
+
set_metadata_element(title)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def set_metadata_element(element) # rubocop:disable Naming/AccessorMethodName
|
104
|
+
if (head = at_xpath("//head"))
|
105
|
+
head << element
|
106
|
+
elsif (html = at_xpath("//html"))
|
107
|
+
head = html.prepend_child(XML::Node.new("head", self))
|
108
|
+
head.prepend_child(element)
|
109
|
+
elsif (first = children.find do |node|
|
110
|
+
case node
|
111
|
+
when XML::Element, XML::Text
|
112
|
+
true
|
113
|
+
end
|
114
|
+
end)
|
115
|
+
# We reach here only if the underlying document model
|
116
|
+
# allows <html>/<head> elements to be omitted and does not
|
117
|
+
# automatically supply them.
|
118
|
+
first.add_previous_sibling(element)
|
119
|
+
else
|
120
|
+
html = add_child(XML::Node.new("html", self))
|
121
|
+
head = html.add_child(XML::Node.new("head", self))
|
122
|
+
head.prepend_child(element)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
private :set_metadata_element
|
126
|
+
|
127
|
+
####
|
128
|
+
# Serialize Node using +options+. Save options can also be set using a block.
|
129
|
+
#
|
130
|
+
# See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.
|
131
|
+
#
|
132
|
+
# These two statements are equivalent:
|
133
|
+
#
|
134
|
+
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
|
135
|
+
#
|
136
|
+
# or
|
137
|
+
#
|
138
|
+
# node.serialize(:encoding => 'UTF-8') do |config|
|
139
|
+
# config.format.as_xml
|
140
|
+
# end
|
141
|
+
#
|
142
|
+
def serialize(options = {})
|
143
|
+
options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
|
144
|
+
super
|
145
|
+
end
|
146
|
+
|
147
|
+
####
|
148
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
149
|
+
def fragment(tags = nil)
|
150
|
+
DocumentFragment.new(self, tags, root)
|
151
|
+
end
|
152
|
+
|
153
|
+
# :call-seq:
|
154
|
+
# xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
|
155
|
+
#
|
156
|
+
# [Returns] The document type which determines CSS-to-XPath translation.
|
157
|
+
#
|
158
|
+
# See XPathVisitor for more information.
|
159
|
+
def xpath_doctype
|
160
|
+
Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML4
|
161
|
+
end
|
162
|
+
|
163
|
+
class << self
|
164
|
+
# :call-seq:
|
165
|
+
# parse(input) { |options| ... } => Nokogiri::HTML4::Document
|
166
|
+
# parse(input, url:, encoding:, options:) => Nokogiri::HTML4::Document
|
167
|
+
#
|
168
|
+
# Parse \HTML4 input from a String or IO object, and return a new HTML4::Document.
|
169
|
+
#
|
170
|
+
# [Required Parameters]
|
171
|
+
# - +input+ (String | IO) The content to be parsed.
|
172
|
+
#
|
173
|
+
# [Optional Keyword Arguments]
|
174
|
+
# - +url:+ (String) The base URI for this document.
|
175
|
+
#
|
176
|
+
# - +encoding:+ (String) The name of the encoding that should be used when processing the
|
177
|
+
# document. When not provided, the encoding will be determined based on the document
|
178
|
+
# content.
|
179
|
+
#
|
180
|
+
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
181
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
182
|
+
# +ParseOptions::DEFAULT_HTML+.
|
183
|
+
#
|
184
|
+
# [Yields]
|
185
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
186
|
+
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
|
187
|
+
#
|
188
|
+
# [Returns] Nokogiri::HTML4::Document
|
189
|
+
def parse(
|
190
|
+
input,
|
191
|
+
url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_HTML,
|
192
|
+
url: url_, encoding: encoding_, options: options_
|
193
|
+
)
|
194
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
195
|
+
yield options if block_given?
|
196
|
+
|
197
|
+
url ||= input.respond_to?(:path) ? input.path : nil
|
198
|
+
|
199
|
+
if input.respond_to?(:encoding)
|
200
|
+
unless input.encoding == Encoding::ASCII_8BIT
|
201
|
+
encoding ||= input.encoding.name
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
if input.respond_to?(:read)
|
206
|
+
if input.is_a?(Pathname)
|
207
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
208
|
+
input = input.expand_path.open
|
209
|
+
url ||= input.path
|
210
|
+
end
|
211
|
+
|
212
|
+
unless encoding
|
213
|
+
input = EncodingReader.new(input)
|
214
|
+
begin
|
215
|
+
return read_io(input, url, encoding, options.to_i)
|
216
|
+
rescue EncodingReader::EncodingFound => e
|
217
|
+
encoding = e.found_encoding
|
218
|
+
end
|
219
|
+
end
|
220
|
+
return read_io(input, url, encoding, options.to_i)
|
221
|
+
end
|
222
|
+
|
223
|
+
# read_memory pukes on empty docs
|
224
|
+
if input.nil? || input.empty?
|
225
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
226
|
+
end
|
227
|
+
|
228
|
+
encoding ||= EncodingReader.detect_encoding(input)
|
229
|
+
|
230
|
+
read_memory(input, url, encoding, options.to_i)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|