nokogiri 1.13.8 → 1.15.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +40 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +18 -11
- data/dependencies.yml +33 -15
- data/ext/nokogiri/extconf.rb +164 -46
- data/ext/nokogiri/gumbo.c +20 -10
- data/ext/nokogiri/html4_document.c +3 -4
- data/ext/nokogiri/html4_element_description.c +20 -15
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +11 -22
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +84 -75
- data/ext/nokogiri/nokogiri.h +31 -16
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +2 -2
- data/ext/nokogiri/xml_attribute_decl.c +2 -2
- data/ext/nokogiri/xml_cdata.c +32 -18
- data/ext/nokogiri/xml_comment.c +2 -2
- data/ext/nokogiri/xml_document.c +127 -34
- data/ext/nokogiri/xml_document_fragment.c +2 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_element_content.c +34 -31
- data/ext/nokogiri/xml_element_decl.c +7 -7
- data/ext/nokogiri/xml_encoding_handler.c +15 -7
- data/ext/nokogiri/xml_entity_decl.c +1 -1
- data/ext/nokogiri/xml_entity_reference.c +2 -2
- data/ext/nokogiri/xml_namespace.c +79 -14
- data/ext/nokogiri/xml_node.c +300 -34
- data/ext/nokogiri/xml_node_set.c +125 -107
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +81 -48
- data/ext/nokogiri/xml_relax_ng.c +66 -81
- data/ext/nokogiri/xml_sax_parser.c +45 -20
- data/ext/nokogiri/xml_sax_parser_context.c +46 -30
- data/ext/nokogiri/xml_sax_push_parser.c +30 -11
- data/ext/nokogiri/xml_schema.c +95 -117
- data/ext/nokogiri/xml_syntax_error.c +1 -1
- data/ext/nokogiri/xml_text.c +28 -14
- data/ext/nokogiri/xml_xpath_context.c +216 -136
- data/ext/nokogiri/xslt_stylesheet.c +118 -64
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +10 -6
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +15 -16
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +7 -5
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/decorators/slop.rb +1 -1
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/html4/document.rb +2 -121
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +10 -3
- data/lib/nokogiri/html5/node.rb +8 -5
- data/lib/nokogiri/html5.rb +130 -216
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -10
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +102 -55
- data/lib/nokogiri/xml/document_fragment.rb +50 -7
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +42 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -4
- data/lib/nokogiri/xml/node.rb +212 -48
- data/lib/nokogiri/xml/node_set.rb +88 -9
- data/lib/nokogiri/xml/parse_options.rb +129 -50
- data/lib/nokogiri/xml/pp/node.rb +28 -15
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xml/searchable.rb +18 -10
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +15 -15
- data/lib/xsd/xmlparser/nokogiri.rb +4 -2
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +19 -242
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -1,84 +1,163 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module Nokogiri
|
4
5
|
module XML
|
5
|
-
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
# You can
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
6
|
+
# Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
|
7
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
8
|
+
#
|
9
|
+
# These options directly expose libxml2's parse options, which are all boolean in the sense that
|
10
|
+
# an option is "on" or "off".
|
11
|
+
#
|
12
|
+
# 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
|
13
|
+
# HTML5 specification. See Nokogiri::HTML5.
|
14
|
+
#
|
15
|
+
# ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
|
16
|
+
# behavior in Xerces/NekoHTML on JRuby when it's possible.
|
17
|
+
#
|
18
|
+
# == Setting and unsetting parse options
|
19
|
+
#
|
20
|
+
# You can build your own combinations of parse options by using any of the following methods:
|
21
|
+
#
|
22
|
+
# [ParseOptions method chaining]
|
23
|
+
#
|
24
|
+
# Every option has an equivalent method in lowercase. You can chain these methods together to
|
25
|
+
# set various combinations.
|
26
|
+
#
|
27
|
+
# # Set the HUGE & PEDANTIC options
|
28
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
29
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
30
|
+
#
|
31
|
+
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
|
32
|
+
# methods on an instance of ParseOptions to unset the option.
|
33
|
+
#
|
34
|
+
# # Set the HUGE & PEDANTIC options
|
35
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
36
|
+
#
|
37
|
+
# # later we want to modify the options
|
38
|
+
# po.nohuge # Unset the HUGE option
|
39
|
+
# po.nopedantic # Unset the PEDANTIC option
|
40
|
+
#
|
41
|
+
# 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
|
42
|
+
# double negative:
|
43
|
+
#
|
44
|
+
# po.nocdata # Set the NOCDATA parse option
|
45
|
+
# po.nonocdata # Unset the NOCDATA parse option
|
46
|
+
#
|
47
|
+
# 💡 Note that negation is not available for STRICT, which is itself a negation of all other
|
48
|
+
# features.
|
49
|
+
#
|
50
|
+
#
|
51
|
+
# [Using Ruby Blocks]
|
52
|
+
#
|
53
|
+
# Most parsing methods will accept a block for configuration of parse options, and we
|
54
|
+
# recommend chaining the setter methods:
|
55
|
+
#
|
56
|
+
# doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
|
57
|
+
#
|
58
|
+
#
|
59
|
+
# [ParseOptions constants]
|
60
|
+
#
|
61
|
+
# You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
|
62
|
+
# combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
|
63
|
+
#
|
64
|
+
# po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
|
65
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
28
66
|
#
|
29
67
|
class ParseOptions
|
30
68
|
# Strict parsing
|
31
69
|
STRICT = 0
|
32
|
-
|
70
|
+
|
71
|
+
# Recover from errors. On by default for XML::Document, XML::DocumentFragment,
|
72
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
33
73
|
RECOVER = 1 << 0
|
34
|
-
|
74
|
+
|
75
|
+
# Substitute entities. Off by default.
|
76
|
+
#
|
77
|
+
# ⚠ This option enables entity substitution, contrary to what the name implies.
|
78
|
+
#
|
79
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
35
80
|
NOENT = 1 << 1
|
36
|
-
|
81
|
+
|
82
|
+
# Load external subsets. On by default for XSLT::Stylesheet.
|
83
|
+
#
|
84
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
37
85
|
DTDLOAD = 1 << 2
|
38
|
-
|
86
|
+
|
87
|
+
# Default DTD attributes. On by default for XSLT::Stylesheet.
|
39
88
|
DTDATTR = 1 << 3
|
40
|
-
|
89
|
+
|
90
|
+
# Validate with the DTD. Off by default.
|
41
91
|
DTDVALID = 1 << 4
|
42
|
-
|
92
|
+
|
93
|
+
# Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
43
94
|
NOERROR = 1 << 5
|
44
|
-
|
95
|
+
|
96
|
+
# Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
45
97
|
NOWARNING = 1 << 6
|
46
|
-
|
98
|
+
|
99
|
+
# Enable pedantic error reporting. Off by default.
|
47
100
|
PEDANTIC = 1 << 7
|
48
|
-
|
101
|
+
|
102
|
+
# Remove blank nodes. Off by default.
|
49
103
|
NOBLANKS = 1 << 8
|
50
|
-
|
104
|
+
|
105
|
+
# Use the SAX1 interface internally. Off by default.
|
51
106
|
SAX1 = 1 << 9
|
52
|
-
|
107
|
+
|
108
|
+
# Implement XInclude substitution. Off by default.
|
53
109
|
XINCLUDE = 1 << 10
|
54
|
-
|
110
|
+
|
111
|
+
# Forbid network access. On by default for XML::Document, XML::DocumentFragment,
|
112
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
113
|
+
#
|
114
|
+
# ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
|
55
115
|
NONET = 1 << 11
|
56
|
-
|
116
|
+
|
117
|
+
# Do not reuse the context dictionary. Off by default.
|
57
118
|
NODICT = 1 << 12
|
58
|
-
|
119
|
+
|
120
|
+
# Remove redundant namespaces declarations. Off by default.
|
59
121
|
NSCLEAN = 1 << 13
|
60
|
-
|
122
|
+
|
123
|
+
# Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
|
61
124
|
NOCDATA = 1 << 14
|
62
|
-
|
125
|
+
|
126
|
+
# Do not generate XInclude START/END nodes. Off by default.
|
63
127
|
NOXINCNODE = 1 << 15
|
64
|
-
|
128
|
+
|
129
|
+
# Compact small text nodes. Off by default.
|
130
|
+
#
|
131
|
+
# ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
|
132
|
+
# modify the tree.
|
65
133
|
COMPACT = 1 << 16
|
66
|
-
|
134
|
+
|
135
|
+
# Parse using XML-1.0 before update 5. Off by default
|
67
136
|
OLD10 = 1 << 17
|
68
|
-
|
137
|
+
|
138
|
+
# Do not fixup XInclude xml:base uris. Off by default
|
69
139
|
NOBASEFIX = 1 << 18
|
70
|
-
|
140
|
+
|
141
|
+
# Relax any hardcoded limit from the parser. Off by default.
|
142
|
+
#
|
143
|
+
# ⚠ There may be a performance penalty when this option is set.
|
71
144
|
HUGE = 1 << 19
|
72
|
-
|
145
|
+
|
146
|
+
# Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
|
147
|
+
# by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
|
148
|
+
# HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
73
149
|
BIG_LINES = 1 << 22
|
74
150
|
|
75
|
-
#
|
151
|
+
# The options mask used by default for parsing XML::Document and XML::DocumentFragment
|
76
152
|
DEFAULT_XML = RECOVER | NONET | BIG_LINES
|
77
|
-
|
153
|
+
|
154
|
+
# The options mask used by default used for parsing XSLT::Stylesheet
|
78
155
|
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
|
79
|
-
|
156
|
+
|
157
|
+
# The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
|
80
158
|
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
|
81
|
-
|
159
|
+
|
160
|
+
# The options mask used by default used for parsing XML::Schema
|
82
161
|
DEFAULT_SCHEMA = NONET | BIG_LINES
|
83
162
|
|
84
163
|
attr_accessor :options
|
@@ -90,7 +169,7 @@ module Nokogiri
|
|
90
169
|
constants.each do |constant|
|
91
170
|
next if constant.to_sym == :STRICT
|
92
171
|
|
93
|
-
class_eval
|
172
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
94
173
|
def #{constant.downcase}
|
95
174
|
@options |= #{constant}
|
96
175
|
self
|
@@ -104,7 +183,7 @@ module Nokogiri
|
|
104
183
|
def #{constant.downcase}?
|
105
184
|
#{constant} & @options == #{constant}
|
106
185
|
end
|
107
|
-
|
186
|
+
RUBY
|
108
187
|
end
|
109
188
|
|
110
189
|
def strict
|
data/lib/nokogiri/xml/pp/node.rb
CHANGED
@@ -5,15 +5,22 @@ module Nokogiri
|
|
5
5
|
# :nodoc: all
|
6
6
|
module PP
|
7
7
|
module Node
|
8
|
+
COLLECTIONS = [:attribute_nodes, :children]
|
9
|
+
|
8
10
|
def inspect
|
9
11
|
attributes = inspect_attributes.reject do |x|
|
10
12
|
attribute = send(x)
|
11
13
|
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
12
14
|
rescue NoMethodError
|
13
15
|
true
|
14
|
-
end
|
15
|
-
|
16
|
-
|
16
|
+
end
|
17
|
+
attributes = if inspect_attributes.length == 1
|
18
|
+
send(attributes.first).inspect
|
19
|
+
else
|
20
|
+
attributes.map do |attribute|
|
21
|
+
"#{attribute}=#{send(attribute).inspect}"
|
22
|
+
end.join(" ")
|
23
|
+
end
|
17
24
|
"#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
|
18
25
|
end
|
19
26
|
|
@@ -21,11 +28,12 @@ module Nokogiri
|
|
21
28
|
nice_name = self.class.name.split("::").last
|
22
29
|
pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
|
23
30
|
pp.breakable
|
24
|
-
|
31
|
+
|
32
|
+
attrs = inspect_attributes.filter_map do |t|
|
25
33
|
[t, send(t)] if respond_to?(t)
|
26
|
-
end.
|
34
|
+
end.find_all do |x|
|
27
35
|
if x.last
|
28
|
-
if
|
36
|
+
if COLLECTIONS.include?(x.first)
|
29
37
|
!x.last.empty?
|
30
38
|
else
|
31
39
|
true
|
@@ -33,19 +41,24 @@ module Nokogiri
|
|
33
41
|
end
|
34
42
|
end
|
35
43
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
44
|
+
if inspect_attributes.length == 1
|
45
|
+
pp.pp(attrs.first.last)
|
46
|
+
else
|
47
|
+
pp.seplist(attrs) do |v|
|
48
|
+
if COLLECTIONS.include?(v.first)
|
49
|
+
pp.group(2, "#{v.first} = [", "]") do
|
50
|
+
pp.breakable
|
51
|
+
pp.seplist(v.last) do |item|
|
52
|
+
pp.pp(item)
|
53
|
+
end
|
42
54
|
end
|
55
|
+
else
|
56
|
+
pp.text("#{v.first} = ")
|
57
|
+
pp.pp(v.last)
|
43
58
|
end
|
44
|
-
else
|
45
|
-
pp.text("#{v.first} = ")
|
46
|
-
pp.pp(v.last)
|
47
59
|
end
|
48
60
|
end
|
61
|
+
|
49
62
|
pp.breakable
|
50
63
|
end
|
51
64
|
end
|
@@ -100,7 +100,7 @@ module Nokogiri
|
|
100
100
|
# +prefix+ is the namespace prefix for the element
|
101
101
|
# +uri+ is the associated namespace URI
|
102
102
|
# +ns+ is a hash of namespace prefix:urls associated with the element
|
103
|
-
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
103
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
|
104
104
|
###
|
105
105
|
# Deal with SAX v1 interface
|
106
106
|
name = [prefix, name].compact.join(":")
|
@@ -88,9 +88,8 @@ module Nokogiri
|
|
88
88
|
|
89
89
|
###
|
90
90
|
# Parse given +io+
|
91
|
-
def parse_io(io, encoding =
|
92
|
-
|
93
|
-
ctx = ParserContext.io(io, ENCODINGS[@encoding])
|
91
|
+
def parse_io(io, encoding = @encoding)
|
92
|
+
ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
|
94
93
|
yield ctx if block_given?
|
95
94
|
ctx.parse_with(self)
|
96
95
|
end
|
@@ -36,16 +36,19 @@ module Nokogiri
|
|
36
36
|
# node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
37
37
|
#
|
38
38
|
# 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
|
39
|
-
# functions create a class and implement the function you want to define
|
40
|
-
#
|
41
|
-
#
|
39
|
+
# functions create a class and implement the function you want to define, which will be in the
|
40
|
+
# `nokogiri` namespace in XPath queries.
|
41
|
+
#
|
42
|
+
# The first argument to the method will be the current matching NodeSet. Any other arguments
|
43
|
+
# are ones that you pass in. Note that this class may appear anywhere in the argument
|
44
|
+
# list. For example:
|
42
45
|
#
|
43
46
|
# handler = Class.new {
|
44
47
|
# def regex node_set, regex
|
45
48
|
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
46
49
|
# end
|
47
50
|
# }.new
|
48
|
-
# node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
|
51
|
+
# node.search('.//title[nokogiri:regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
|
49
52
|
#
|
50
53
|
# See Searchable#xpath and Searchable#css for further usage help.
|
51
54
|
def search(*args)
|
@@ -160,16 +163,18 @@ module Nokogiri
|
|
160
163
|
# node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
161
164
|
#
|
162
165
|
# 💡 Custom XPath functions may also be defined. To define custom functions create a class and
|
163
|
-
# implement the function you want to define
|
164
|
-
#
|
165
|
-
#
|
166
|
+
# implement the function you want to define, which will be in the `nokogiri` namespace.
|
167
|
+
#
|
168
|
+
# The first argument to the method will be the current matching NodeSet. Any other arguments
|
169
|
+
# are ones that you pass in. Note that this class may appear anywhere in the argument
|
170
|
+
# list. For example:
|
166
171
|
#
|
167
172
|
# handler = Class.new {
|
168
173
|
# def regex(node_set, regex)
|
169
174
|
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
170
175
|
# end
|
171
176
|
# }.new
|
172
|
-
# node.xpath('.//title[regex(., "\w+")]', handler)
|
177
|
+
# node.xpath('.//title[nokogiri:regex(., "\w+")]', handler)
|
173
178
|
#
|
174
179
|
def xpath(*args)
|
175
180
|
paths, handler, ns, binds = extract_params(args)
|
@@ -243,8 +248,11 @@ module Nokogiri
|
|
243
248
|
doctype: document.xpath_doctype,
|
244
249
|
)
|
245
250
|
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
246
|
-
CSS.xpath_for(rule.to_s, {
|
247
|
-
|
251
|
+
CSS.xpath_for(rule.to_s, {
|
252
|
+
prefix: implied_xpath_context,
|
253
|
+
ns: ns,
|
254
|
+
visitor: visitor,
|
255
|
+
})
|
248
256
|
end.join(" | ")
|
249
257
|
end
|
250
258
|
|
data/lib/nokogiri/xslt.rb
CHANGED
@@ -20,8 +20,59 @@ module Nokogiri
|
|
20
20
|
# Stylesheet object.
|
21
21
|
module XSLT
|
22
22
|
class << self
|
23
|
-
|
24
|
-
#
|
23
|
+
# :call-seq:
|
24
|
+
# parse(xsl) → Nokogiri::XSLT::Stylesheet
|
25
|
+
# parse(xsl, modules) → Nokogiri::XSLT::Stylesheet
|
26
|
+
#
|
27
|
+
# Parse the stylesheet in +xsl+, registering optional +modules+ as custom class handlers.
|
28
|
+
#
|
29
|
+
# [Parameters]
|
30
|
+
# - +xsl+ (String) XSL content to be parsed into a stylesheet
|
31
|
+
# - +modules+ (Hash<String ⇒ Class>) A hash of URI-to-handler relations for linking a
|
32
|
+
# namespace to a custom function handler.
|
33
|
+
#
|
34
|
+
# ⚠ The XSLT handler classes are registered *globally*.
|
35
|
+
#
|
36
|
+
# Also see Nokogiri::XSLT.register
|
37
|
+
#
|
38
|
+
# *Example*
|
39
|
+
#
|
40
|
+
# xml = Nokogiri.XML(<<~XML)
|
41
|
+
# <nodes>
|
42
|
+
# <node>Foo</node>
|
43
|
+
# <node>Bar</node>
|
44
|
+
# </nodes>
|
45
|
+
# XML
|
46
|
+
#
|
47
|
+
# handler = Class.new do
|
48
|
+
# def reverse(node)
|
49
|
+
# node.text.reverse
|
50
|
+
# end
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
# xsl = <<~XSL
|
54
|
+
# <xsl:stylesheet version="1.0"
|
55
|
+
# xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
56
|
+
# xmlns:myfuncs="http://nokogiri.org/xslt/myfuncs"
|
57
|
+
# extension-element-prefixes="myfuncs">
|
58
|
+
# <xsl:template match="/">
|
59
|
+
# <reversed>
|
60
|
+
# <xsl:for-each select="nodes/node">
|
61
|
+
# <reverse><xsl:copy-of select="myfuncs:reverse(.)"/></reverse>
|
62
|
+
# </xsl:for-each>
|
63
|
+
# </reversed>
|
64
|
+
# </xsl:template>
|
65
|
+
# </xsl:stylesheet>
|
66
|
+
# XSL
|
67
|
+
#
|
68
|
+
# xsl = Nokogiri.XSLT(xsl, "http://nokogiri.org/xslt/myfuncs" => handler)
|
69
|
+
# xsl.transform(xml).to_xml
|
70
|
+
# # => "<?xml version=\"1.0\"?>\n" +
|
71
|
+
# # "<reversed>\n" +
|
72
|
+
# # " <reverse>ooF</reverse>\n" +
|
73
|
+
# # " <reverse>raB</reverse>\n" +
|
74
|
+
# # "</reversed>\n"
|
75
|
+
#
|
25
76
|
def parse(string, modules = {})
|
26
77
|
modules.each do |url, klass|
|
27
78
|
XSLT.register(url, klass)
|
@@ -47,9 +98,9 @@ module Nokogiri
|
|
47
98
|
# [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
|
48
99
|
#
|
49
100
|
def quote_params(params)
|
50
|
-
params.flatten.each_slice(2).
|
101
|
+
params.flatten.each_slice(2).with_object([]) do |kv, quoted_params|
|
51
102
|
key, value = kv.map(&:to_s)
|
52
|
-
value = if
|
103
|
+
value = if value.include?("'")
|
53
104
|
"concat('#{value.gsub(/'/, %q{', "'", '})}')"
|
54
105
|
else
|
55
106
|
"'#{value}'"
|
@@ -58,6 +109,25 @@ module Nokogiri
|
|
58
109
|
quoted_params << value
|
59
110
|
end
|
60
111
|
end
|
112
|
+
|
113
|
+
# call-seq:
|
114
|
+
# register(uri, custom_handler_class)
|
115
|
+
#
|
116
|
+
# Register a class that implements custom XSLT transformation functions.
|
117
|
+
#
|
118
|
+
# ⚠ The XSLT handler classes are registered *globally*.
|
119
|
+
#
|
120
|
+
# [Parameters}
|
121
|
+
# - +uri+ (String) The namespace for the custom handlers
|
122
|
+
# - +custom_handler_class+ (Class) A class with ruby methods that can be called during
|
123
|
+
# transformation
|
124
|
+
#
|
125
|
+
# See Nokogiri::XSLT.parse for usage.
|
126
|
+
#
|
127
|
+
def register(uri, custom_handler_class)
|
128
|
+
# NOTE: this is implemented in the C extension, see ext/nokogiri/xslt_stylesheet.c
|
129
|
+
raise NotImplementedError, "Nokogiri::XSLT.register is not implemented on JRuby"
|
130
|
+
end if Nokogiri.jruby?
|
61
131
|
end
|
62
132
|
end
|
63
133
|
end
|
data/lib/nokogiri.rb
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
require "rbconfig"
|
5
|
-
|
6
4
|
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
7
5
|
require_relative "nokogiri/jruby/dependencies"
|
8
6
|
end
|
@@ -47,11 +45,19 @@ module Nokogiri
|
|
47
45
|
# Expect an HTML indicator to appear within the first 512
|
48
46
|
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
|
49
47
|
# shouldn't be that long)
|
50
|
-
Nokogiri.HTML4(
|
51
|
-
|
48
|
+
Nokogiri.HTML4(
|
49
|
+
string,
|
50
|
+
url,
|
51
|
+
encoding,
|
52
|
+
options || XML::ParseOptions::DEFAULT_HTML,
|
53
|
+
)
|
52
54
|
else
|
53
|
-
Nokogiri.XML(
|
54
|
-
|
55
|
+
Nokogiri.XML(
|
56
|
+
string,
|
57
|
+
url,
|
58
|
+
encoding,
|
59
|
+
options || XML::ParseOptions::DEFAULT_XML,
|
60
|
+
)
|
55
61
|
end.tap do |doc|
|
56
62
|
yield doc if block_given?
|
57
63
|
end
|
@@ -88,17 +94,10 @@ module Nokogiri
|
|
88
94
|
|
89
95
|
# :nodoc:
|
90
96
|
def install_default_aliases
|
91
|
-
|
92
|
-
|
93
|
-
{
|
94
|
-
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
95
|
-
}.each do |alias_name, name|
|
96
|
-
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
97
|
-
end
|
97
|
+
warn("Nokogiri.install_default_aliases is deprecated and will be removed in a future version of Nokogiri. Please call Nokogiri::EncodingHandler.install_default_aliases instead.")
|
98
|
+
Nokogiri::EncodingHandler.install_default_aliases
|
98
99
|
end
|
99
100
|
end
|
100
|
-
|
101
|
-
Nokogiri.install_default_aliases
|
102
101
|
end
|
103
102
|
|
104
103
|
###
|
@@ -124,5 +123,6 @@ require_relative "nokogiri/html"
|
|
124
123
|
require_relative "nokogiri/decorators/slop"
|
125
124
|
require_relative "nokogiri/css"
|
126
125
|
require_relative "nokogiri/html4/builder"
|
126
|
+
require_relative "nokogiri/encoding_handler"
|
127
127
|
|
128
128
|
require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
|
@@ -71,7 +71,7 @@ module XSD
|
|
71
71
|
# +prefix+ is the namespace prefix for the element
|
72
72
|
# +uri+ is the associated namespace URI
|
73
73
|
# +ns+ is a hash of namespace prefix:urls associated with the element
|
74
|
-
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
74
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
|
75
75
|
###
|
76
76
|
# Deal with SAX v1 interface
|
77
77
|
name = [prefix, name].compact.join(":")
|
@@ -95,7 +95,9 @@ module XSD
|
|
95
95
|
end
|
96
96
|
|
97
97
|
["xmldecl", "start_document", "end_document", "comment"].each do |name|
|
98
|
-
class_eval
|
98
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
99
|
+
def #{name}(*args); end
|
100
|
+
RUBY
|
99
101
|
end
|
100
102
|
|
101
103
|
add_factory(self)
|