nokogiri 1.13.6 → 1.16.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +43 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +21 -11
- data/dependencies.yml +34 -15
- data/ext/nokogiri/extconf.rb +167 -48
- data/ext/nokogiri/gumbo.c +21 -11
- data/ext/nokogiri/html4_document.c +3 -4
- data/ext/nokogiri/html4_element_description.c +20 -15
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +11 -22
- data/ext/nokogiri/html4_sax_push_parser.c +4 -4
- data/ext/nokogiri/nokogiri.c +84 -75
- data/ext/nokogiri/nokogiri.h +46 -16
- data/ext/nokogiri/test_global_handlers.c +2 -2
- data/ext/nokogiri/xml_attr.c +3 -3
- data/ext/nokogiri/xml_attribute_decl.c +5 -5
- data/ext/nokogiri/xml_cdata.c +31 -18
- data/ext/nokogiri/xml_comment.c +2 -2
- data/ext/nokogiri/xml_document.c +135 -38
- data/ext/nokogiri/xml_document_fragment.c +2 -2
- data/ext/nokogiri/xml_dtd.c +9 -9
- data/ext/nokogiri/xml_element_content.c +34 -31
- data/ext/nokogiri/xml_element_decl.c +10 -10
- data/ext/nokogiri/xml_encoding_handler.c +15 -7
- data/ext/nokogiri/xml_entity_decl.c +6 -6
- data/ext/nokogiri/xml_entity_reference.c +2 -2
- data/ext/nokogiri/xml_namespace.c +75 -14
- data/ext/nokogiri/xml_node.c +365 -87
- data/ext/nokogiri/xml_node_set.c +129 -111
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +126 -64
- data/ext/nokogiri/xml_relax_ng.c +67 -82
- data/ext/nokogiri/xml_sax_parser.c +45 -20
- data/ext/nokogiri/xml_sax_parser_context.c +50 -30
- data/ext/nokogiri/xml_sax_push_parser.c +31 -12
- data/ext/nokogiri/xml_schema.c +95 -118
- data/ext/nokogiri/xml_syntax_error.c +4 -4
- data/ext/nokogiri/xml_text.c +27 -14
- data/ext/nokogiri/xml_xpath_context.c +213 -136
- data/ext/nokogiri/xslt_stylesheet.c +126 -67
- data/gumbo-parser/Makefile +28 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +10 -6
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +15 -16
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +29 -10
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +2 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +8 -26
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/decorators/slop.rb +1 -1
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/html4/document.rb +3 -122
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +10 -3
- data/lib/nokogiri/html5/node.rb +8 -5
- data/lib/nokogiri/html5.rb +74 -226
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +16 -14
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +103 -56
- data/lib/nokogiri/xml/document_fragment.rb +50 -7
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +41 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -4
- data/lib/nokogiri/xml/node.rb +241 -70
- data/lib/nokogiri/xml/node_set.rb +90 -11
- data/lib/nokogiri/xml/parse_options.rb +129 -50
- data/lib/nokogiri/xml/pp/node.rb +28 -15
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +16 -17
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +75 -5
- data/lib/nokogiri.rb +15 -15
- data/lib/xsd/xmlparser/nokogiri.rb +4 -2
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +21 -248
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -1,84 +1,163 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module Nokogiri
|
4
5
|
module XML
|
5
|
-
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
# You can
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
6
|
+
# Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
|
7
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
8
|
+
#
|
9
|
+
# These options directly expose libxml2's parse options, which are all boolean in the sense that
|
10
|
+
# an option is "on" or "off".
|
11
|
+
#
|
12
|
+
# 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
|
13
|
+
# HTML5 specification. See Nokogiri::HTML5.
|
14
|
+
#
|
15
|
+
# ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
|
16
|
+
# behavior in Xerces/NekoHTML on JRuby when it's possible.
|
17
|
+
#
|
18
|
+
# == Setting and unsetting parse options
|
19
|
+
#
|
20
|
+
# You can build your own combinations of parse options by using any of the following methods:
|
21
|
+
#
|
22
|
+
# [ParseOptions method chaining]
|
23
|
+
#
|
24
|
+
# Every option has an equivalent method in lowercase. You can chain these methods together to
|
25
|
+
# set various combinations.
|
26
|
+
#
|
27
|
+
# # Set the HUGE & PEDANTIC options
|
28
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
29
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
30
|
+
#
|
31
|
+
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
|
32
|
+
# methods on an instance of ParseOptions to unset the option.
|
33
|
+
#
|
34
|
+
# # Set the HUGE & PEDANTIC options
|
35
|
+
# po = Nokogiri::XML::ParseOptions.new.huge.pedantic
|
36
|
+
#
|
37
|
+
# # later we want to modify the options
|
38
|
+
# po.nohuge # Unset the HUGE option
|
39
|
+
# po.nopedantic # Unset the PEDANTIC option
|
40
|
+
#
|
41
|
+
# 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
|
42
|
+
# double negative:
|
43
|
+
#
|
44
|
+
# po.nocdata # Set the NOCDATA parse option
|
45
|
+
# po.nonocdata # Unset the NOCDATA parse option
|
46
|
+
#
|
47
|
+
# 💡 Note that negation is not available for STRICT, which is itself a negation of all other
|
48
|
+
# features.
|
49
|
+
#
|
50
|
+
#
|
51
|
+
# [Using Ruby Blocks]
|
52
|
+
#
|
53
|
+
# Most parsing methods will accept a block for configuration of parse options, and we
|
54
|
+
# recommend chaining the setter methods:
|
55
|
+
#
|
56
|
+
# doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
|
57
|
+
#
|
58
|
+
#
|
59
|
+
# [ParseOptions constants]
|
60
|
+
#
|
61
|
+
# You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
|
62
|
+
# combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
|
63
|
+
#
|
64
|
+
# po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
|
65
|
+
# doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
|
28
66
|
#
|
29
67
|
class ParseOptions
|
30
68
|
# Strict parsing
|
31
69
|
STRICT = 0
|
32
|
-
|
70
|
+
|
71
|
+
# Recover from errors. On by default for XML::Document, XML::DocumentFragment,
|
72
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
33
73
|
RECOVER = 1 << 0
|
34
|
-
|
74
|
+
|
75
|
+
# Substitute entities. Off by default.
|
76
|
+
#
|
77
|
+
# ⚠ This option enables entity substitution, contrary to what the name implies.
|
78
|
+
#
|
79
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
35
80
|
NOENT = 1 << 1
|
36
|
-
|
81
|
+
|
82
|
+
# Load external subsets. On by default for XSLT::Stylesheet.
|
83
|
+
#
|
84
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
37
85
|
DTDLOAD = 1 << 2
|
38
|
-
|
86
|
+
|
87
|
+
# Default DTD attributes. On by default for XSLT::Stylesheet.
|
39
88
|
DTDATTR = 1 << 3
|
40
|
-
|
89
|
+
|
90
|
+
# Validate with the DTD. Off by default.
|
41
91
|
DTDVALID = 1 << 4
|
42
|
-
|
92
|
+
|
93
|
+
# Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
43
94
|
NOERROR = 1 << 5
|
44
|
-
|
95
|
+
|
96
|
+
# Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
|
45
97
|
NOWARNING = 1 << 6
|
46
|
-
|
98
|
+
|
99
|
+
# Enable pedantic error reporting. Off by default.
|
47
100
|
PEDANTIC = 1 << 7
|
48
|
-
|
101
|
+
|
102
|
+
# Remove blank nodes. Off by default.
|
49
103
|
NOBLANKS = 1 << 8
|
50
|
-
|
104
|
+
|
105
|
+
# Use the SAX1 interface internally. Off by default.
|
51
106
|
SAX1 = 1 << 9
|
52
|
-
|
107
|
+
|
108
|
+
# Implement XInclude substitution. Off by default.
|
53
109
|
XINCLUDE = 1 << 10
|
54
|
-
|
110
|
+
|
111
|
+
# Forbid network access. On by default for XML::Document, XML::DocumentFragment,
|
112
|
+
# HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
113
|
+
#
|
114
|
+
# ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
|
55
115
|
NONET = 1 << 11
|
56
|
-
|
116
|
+
|
117
|
+
# Do not reuse the context dictionary. Off by default.
|
57
118
|
NODICT = 1 << 12
|
58
|
-
|
119
|
+
|
120
|
+
# Remove redundant namespaces declarations. Off by default.
|
59
121
|
NSCLEAN = 1 << 13
|
60
|
-
|
122
|
+
|
123
|
+
# Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
|
61
124
|
NOCDATA = 1 << 14
|
62
|
-
|
125
|
+
|
126
|
+
# Do not generate XInclude START/END nodes. Off by default.
|
63
127
|
NOXINCNODE = 1 << 15
|
64
|
-
|
128
|
+
|
129
|
+
# Compact small text nodes. Off by default.
|
130
|
+
#
|
131
|
+
# ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
|
132
|
+
# modify the tree.
|
65
133
|
COMPACT = 1 << 16
|
66
|
-
|
134
|
+
|
135
|
+
# Parse using XML-1.0 before update 5. Off by default
|
67
136
|
OLD10 = 1 << 17
|
68
|
-
|
137
|
+
|
138
|
+
# Do not fixup XInclude xml:base uris. Off by default
|
69
139
|
NOBASEFIX = 1 << 18
|
70
|
-
|
140
|
+
|
141
|
+
# Relax any hardcoded limit from the parser. Off by default.
|
142
|
+
#
|
143
|
+
# ⚠ There may be a performance penalty when this option is set.
|
71
144
|
HUGE = 1 << 19
|
72
|
-
|
145
|
+
|
146
|
+
# Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
|
147
|
+
# by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
|
148
|
+
# HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
|
73
149
|
BIG_LINES = 1 << 22
|
74
150
|
|
75
|
-
#
|
151
|
+
# The options mask used by default for parsing XML::Document and XML::DocumentFragment
|
76
152
|
DEFAULT_XML = RECOVER | NONET | BIG_LINES
|
77
|
-
|
153
|
+
|
154
|
+
# The options mask used by default used for parsing XSLT::Stylesheet
|
78
155
|
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
|
79
|
-
|
156
|
+
|
157
|
+
# The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
|
80
158
|
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
|
81
|
-
|
159
|
+
|
160
|
+
# The options mask used by default used for parsing XML::Schema
|
82
161
|
DEFAULT_SCHEMA = NONET | BIG_LINES
|
83
162
|
|
84
163
|
attr_accessor :options
|
@@ -90,7 +169,7 @@ module Nokogiri
|
|
90
169
|
constants.each do |constant|
|
91
170
|
next if constant.to_sym == :STRICT
|
92
171
|
|
93
|
-
class_eval
|
172
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
94
173
|
def #{constant.downcase}
|
95
174
|
@options |= #{constant}
|
96
175
|
self
|
@@ -104,7 +183,7 @@ module Nokogiri
|
|
104
183
|
def #{constant.downcase}?
|
105
184
|
#{constant} & @options == #{constant}
|
106
185
|
end
|
107
|
-
|
186
|
+
RUBY
|
108
187
|
end
|
109
188
|
|
110
189
|
def strict
|
data/lib/nokogiri/xml/pp/node.rb
CHANGED
@@ -5,15 +5,22 @@ module Nokogiri
|
|
5
5
|
# :nodoc: all
|
6
6
|
module PP
|
7
7
|
module Node
|
8
|
+
COLLECTIONS = [:attribute_nodes, :children]
|
9
|
+
|
8
10
|
def inspect
|
9
11
|
attributes = inspect_attributes.reject do |x|
|
10
12
|
attribute = send(x)
|
11
13
|
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
12
14
|
rescue NoMethodError
|
13
15
|
true
|
14
|
-
end
|
15
|
-
|
16
|
-
|
16
|
+
end
|
17
|
+
attributes = if inspect_attributes.length == 1
|
18
|
+
send(attributes.first).inspect
|
19
|
+
else
|
20
|
+
attributes.map do |attribute|
|
21
|
+
"#{attribute}=#{send(attribute).inspect}"
|
22
|
+
end.join(" ")
|
23
|
+
end
|
17
24
|
"#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
|
18
25
|
end
|
19
26
|
|
@@ -21,11 +28,12 @@ module Nokogiri
|
|
21
28
|
nice_name = self.class.name.split("::").last
|
22
29
|
pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
|
23
30
|
pp.breakable
|
24
|
-
|
31
|
+
|
32
|
+
attrs = inspect_attributes.filter_map do |t|
|
25
33
|
[t, send(t)] if respond_to?(t)
|
26
|
-
end.
|
34
|
+
end.find_all do |x|
|
27
35
|
if x.last
|
28
|
-
if
|
36
|
+
if COLLECTIONS.include?(x.first)
|
29
37
|
!x.last.empty?
|
30
38
|
else
|
31
39
|
true
|
@@ -33,19 +41,24 @@ module Nokogiri
|
|
33
41
|
end
|
34
42
|
end
|
35
43
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
44
|
+
if inspect_attributes.length == 1
|
45
|
+
pp.pp(attrs.first.last)
|
46
|
+
else
|
47
|
+
pp.seplist(attrs) do |v|
|
48
|
+
if COLLECTIONS.include?(v.first)
|
49
|
+
pp.group(2, "#{v.first} = [", "]") do
|
50
|
+
pp.breakable
|
51
|
+
pp.seplist(v.last) do |item|
|
52
|
+
pp.pp(item)
|
53
|
+
end
|
42
54
|
end
|
55
|
+
else
|
56
|
+
pp.text("#{v.first} = ")
|
57
|
+
pp.pp(v.last)
|
43
58
|
end
|
44
|
-
else
|
45
|
-
pp.text("#{v.first} = ")
|
46
|
-
pp.pp(v.last)
|
47
59
|
end
|
48
60
|
end
|
61
|
+
|
49
62
|
pp.breakable
|
50
63
|
end
|
51
64
|
end
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
5
|
###
|
6
|
-
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
|
7
|
+
# Reader is given an XML document, and yields nodes to an each block.
|
8
|
+
#
|
9
|
+
# The Reader parser might be good for when you need the speed and low memory usage of the SAX
|
10
|
+
# parser, but do not want to write a Document handler.
|
9
11
|
#
|
10
12
|
# Here is an example of usage:
|
11
13
|
#
|
@@ -22,13 +24,12 @@ module Nokogiri
|
|
22
24
|
#
|
23
25
|
# end
|
24
26
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# need during the first iteration.
|
27
|
+
# ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
28
|
+
# document, you must parse the document again. It may be better to capture all information you
|
29
|
+
# need during a single iteration.
|
29
30
|
#
|
30
|
-
#
|
31
|
-
#
|
31
|
+
# ⚠ libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
|
32
|
+
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
32
33
|
class Reader
|
33
34
|
include Enumerable
|
34
35
|
|
@@ -83,16 +84,14 @@ module Nokogiri
|
|
83
84
|
end
|
84
85
|
private :initialize
|
85
86
|
|
86
|
-
# Get the attributes of the current node as a Hash
|
87
|
+
# Get the attributes and namespaces of the current node as a Hash.
|
88
|
+
#
|
89
|
+
# This is the union of Reader#attribute_hash and Reader#namespaces
|
87
90
|
#
|
88
|
-
# [Returns]
|
91
|
+
# [Returns]
|
92
|
+
# (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
|
89
93
|
def attributes
|
90
|
-
|
91
|
-
hash[node.name] = node.to_s
|
92
|
-
end
|
93
|
-
ns = namespaces
|
94
|
-
attrs_hash.merge!(ns) if ns
|
95
|
-
attrs_hash
|
94
|
+
attribute_hash.merge(namespaces)
|
96
95
|
end
|
97
96
|
|
98
97
|
###
|
@@ -100,7 +100,7 @@ module Nokogiri
|
|
100
100
|
# +prefix+ is the namespace prefix for the element
|
101
101
|
# +uri+ is the associated namespace URI
|
102
102
|
# +ns+ is a hash of namespace prefix:urls associated with the element
|
103
|
-
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
103
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
|
104
104
|
###
|
105
105
|
# Deal with SAX v1 interface
|
106
106
|
name = [prefix, name].compact.join(":")
|
@@ -88,9 +88,8 @@ module Nokogiri
|
|
88
88
|
|
89
89
|
###
|
90
90
|
# Parse given +io+
|
91
|
-
def parse_io(io, encoding =
|
92
|
-
|
93
|
-
ctx = ParserContext.io(io, ENCODINGS[@encoding])
|
91
|
+
def parse_io(io, encoding = @encoding)
|
92
|
+
ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
|
94
93
|
yield ctx if block_given?
|
95
94
|
ctx.parse_with(self)
|
96
95
|
end
|
@@ -36,16 +36,19 @@ module Nokogiri
|
|
36
36
|
# node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
37
37
|
#
|
38
38
|
# 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
|
39
|
-
# functions create a class and implement the function you want to define
|
40
|
-
#
|
41
|
-
#
|
39
|
+
# functions create a class and implement the function you want to define, which will be in the
|
40
|
+
# `nokogiri` namespace in XPath queries.
|
41
|
+
#
|
42
|
+
# The first argument to the method will be the current matching NodeSet. Any other arguments
|
43
|
+
# are ones that you pass in. Note that this class may appear anywhere in the argument
|
44
|
+
# list. For example:
|
42
45
|
#
|
43
46
|
# handler = Class.new {
|
44
47
|
# def regex node_set, regex
|
45
48
|
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
46
49
|
# end
|
47
50
|
# }.new
|
48
|
-
# node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
|
51
|
+
# node.search('.//title[nokogiri:regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
|
49
52
|
#
|
50
53
|
# See Searchable#xpath and Searchable#css for further usage help.
|
51
54
|
def search(*args)
|
@@ -160,16 +163,18 @@ module Nokogiri
|
|
160
163
|
# node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
161
164
|
#
|
162
165
|
# 💡 Custom XPath functions may also be defined. To define custom functions create a class and
|
163
|
-
# implement the function you want to define
|
164
|
-
#
|
165
|
-
#
|
166
|
+
# implement the function you want to define, which will be in the `nokogiri` namespace.
|
167
|
+
#
|
168
|
+
# The first argument to the method will be the current matching NodeSet. Any other arguments
|
169
|
+
# are ones that you pass in. Note that this class may appear anywhere in the argument
|
170
|
+
# list. For example:
|
166
171
|
#
|
167
172
|
# handler = Class.new {
|
168
173
|
# def regex(node_set, regex)
|
169
174
|
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
170
175
|
# end
|
171
176
|
# }.new
|
172
|
-
# node.xpath('.//title[regex(., "\w+")]', handler)
|
177
|
+
# node.xpath('.//title[nokogiri:regex(., "\w+")]', handler)
|
173
178
|
#
|
174
179
|
def xpath(*args)
|
175
180
|
paths, handler, ns, binds = extract_params(args)
|
@@ -194,7 +199,7 @@ module Nokogiri
|
|
194
199
|
#
|
195
200
|
# Search this node's immediate children using CSS selector +selector+
|
196
201
|
def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
|
197
|
-
ns =
|
202
|
+
ns = document.root&.namespaces || {}
|
198
203
|
xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
|
199
204
|
end
|
200
205
|
|
@@ -224,7 +229,7 @@ module Nokogiri
|
|
224
229
|
def xpath_impl(node, path, handler, ns, binds)
|
225
230
|
ctx = XPathContext.new(node)
|
226
231
|
ctx.register_namespaces(ns)
|
227
|
-
path = path.gsub(
|
232
|
+
path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml?
|
228
233
|
|
229
234
|
binds&.each do |key, value|
|
230
235
|
ctx.register_variable(key.to_s, value)
|
@@ -243,8 +248,11 @@ module Nokogiri
|
|
243
248
|
doctype: document.xpath_doctype,
|
244
249
|
)
|
245
250
|
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
246
|
-
CSS.xpath_for(rule.to_s, {
|
247
|
-
|
251
|
+
CSS.xpath_for(rule.to_s, {
|
252
|
+
prefix: implied_xpath_context,
|
253
|
+
ns: ns,
|
254
|
+
visitor: visitor,
|
255
|
+
})
|
248
256
|
end.join(" | ")
|
249
257
|
end
|
250
258
|
|
@@ -261,7 +269,7 @@ module Nokogiri
|
|
261
269
|
end
|
262
270
|
ns, binds = hashes.reverse
|
263
271
|
|
264
|
-
ns ||=
|
272
|
+
ns ||= document.root&.namespaces || {}
|
265
273
|
|
266
274
|
[params, handler, ns, binds]
|
267
275
|
end
|
data/lib/nokogiri/xml.rb
CHANGED
@@ -10,15 +10,37 @@ module Nokogiri
|
|
10
10
|
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
11
11
|
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
12
12
|
#
|
13
|
-
#
|
13
|
+
# xslt.transform(doc) # => Nokogiri::XML::Document
|
14
14
|
#
|
15
|
-
#
|
16
|
-
#
|
15
|
+
# Many XSLT transformations include serialization behavior to emit a non-XML document. For these
|
16
|
+
# cases, please take care to invoke the #serialize method on the result of the transformation:
|
17
|
+
#
|
18
|
+
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
19
|
+
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
20
|
+
# xslt.serialize(xslt.transform(doc)) # => String
|
21
|
+
#
|
22
|
+
# or use the #apply_to method, which is a shortcut for `serialize(transform(document))`:
|
23
|
+
#
|
24
|
+
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
25
|
+
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
26
|
+
# xslt.apply_to(doc) # => String
|
27
|
+
#
|
28
|
+
# See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
|
17
29
|
class Stylesheet
|
18
|
-
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
30
|
+
# :call-seq:
|
31
|
+
# apply_to(document, params = []) -> String
|
32
|
+
#
|
33
|
+
# Apply an XSLT stylesheet to an XML::Document and serialize it properly. This method is
|
34
|
+
# equivalent to calling #serialize on the result of #transform.
|
35
|
+
#
|
36
|
+
# [Parameters]
|
37
|
+
# - +document+ is an instance of XML::Document to transform
|
38
|
+
# - +params+ is an array of strings used as XSLT parameters, passed into #transform
|
39
|
+
#
|
40
|
+
# [Returns]
|
41
|
+
# A string containing the serialized result of the transformation.
|
42
|
+
#
|
43
|
+
# See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
|
22
44
|
def apply_to(document, params = [])
|
23
45
|
serialize(transform(document, params))
|
24
46
|
end
|
data/lib/nokogiri/xslt.rb
CHANGED
@@ -20,8 +20,59 @@ module Nokogiri
|
|
20
20
|
# Stylesheet object.
|
21
21
|
module XSLT
|
22
22
|
class << self
|
23
|
-
|
24
|
-
#
|
23
|
+
# :call-seq:
|
24
|
+
# parse(xsl) → Nokogiri::XSLT::Stylesheet
|
25
|
+
# parse(xsl, modules) → Nokogiri::XSLT::Stylesheet
|
26
|
+
#
|
27
|
+
# Parse the stylesheet in +xsl+, registering optional +modules+ as custom class handlers.
|
28
|
+
#
|
29
|
+
# [Parameters]
|
30
|
+
# - +xsl+ (String) XSL content to be parsed into a stylesheet
|
31
|
+
# - +modules+ (Hash<String ⇒ Class>) A hash of URI-to-handler relations for linking a
|
32
|
+
# namespace to a custom function handler.
|
33
|
+
#
|
34
|
+
# ⚠ The XSLT handler classes are registered *globally*.
|
35
|
+
#
|
36
|
+
# Also see Nokogiri::XSLT.register
|
37
|
+
#
|
38
|
+
# *Example*
|
39
|
+
#
|
40
|
+
# xml = Nokogiri.XML(<<~XML)
|
41
|
+
# <nodes>
|
42
|
+
# <node>Foo</node>
|
43
|
+
# <node>Bar</node>
|
44
|
+
# </nodes>
|
45
|
+
# XML
|
46
|
+
#
|
47
|
+
# handler = Class.new do
|
48
|
+
# def reverse(node)
|
49
|
+
# node.text.reverse
|
50
|
+
# end
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
# xsl = <<~XSL
|
54
|
+
# <xsl:stylesheet version="1.0"
|
55
|
+
# xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
56
|
+
# xmlns:myfuncs="http://nokogiri.org/xslt/myfuncs"
|
57
|
+
# extension-element-prefixes="myfuncs">
|
58
|
+
# <xsl:template match="/">
|
59
|
+
# <reversed>
|
60
|
+
# <xsl:for-each select="nodes/node">
|
61
|
+
# <reverse><xsl:copy-of select="myfuncs:reverse(.)"/></reverse>
|
62
|
+
# </xsl:for-each>
|
63
|
+
# </reversed>
|
64
|
+
# </xsl:template>
|
65
|
+
# </xsl:stylesheet>
|
66
|
+
# XSL
|
67
|
+
#
|
68
|
+
# xsl = Nokogiri.XSLT(xsl, "http://nokogiri.org/xslt/myfuncs" => handler)
|
69
|
+
# xsl.transform(xml).to_xml
|
70
|
+
# # => "<?xml version=\"1.0\"?>\n" +
|
71
|
+
# # "<reversed>\n" +
|
72
|
+
# # " <reverse>ooF</reverse>\n" +
|
73
|
+
# # " <reverse>raB</reverse>\n" +
|
74
|
+
# # "</reversed>\n"
|
75
|
+
#
|
25
76
|
def parse(string, modules = {})
|
26
77
|
modules.each do |url, klass|
|
27
78
|
XSLT.register(url, klass)
|
@@ -47,10 +98,10 @@ module Nokogiri
|
|
47
98
|
# [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
|
48
99
|
#
|
49
100
|
def quote_params(params)
|
50
|
-
params.flatten.each_slice(2).
|
101
|
+
params.flatten.each_slice(2).with_object([]) do |kv, quoted_params|
|
51
102
|
key, value = kv.map(&:to_s)
|
52
|
-
value = if
|
53
|
-
"concat('#{value.gsub(
|
103
|
+
value = if value.include?("'")
|
104
|
+
"concat('#{value.gsub("'", %q{', "'", '})}')"
|
54
105
|
else
|
55
106
|
"'#{value}'"
|
56
107
|
end
|
@@ -58,6 +109,25 @@ module Nokogiri
|
|
58
109
|
quoted_params << value
|
59
110
|
end
|
60
111
|
end
|
112
|
+
|
113
|
+
# call-seq:
|
114
|
+
# register(uri, custom_handler_class)
|
115
|
+
#
|
116
|
+
# Register a class that implements custom XSLT transformation functions.
|
117
|
+
#
|
118
|
+
# ⚠ The XSLT handler classes are registered *globally*.
|
119
|
+
#
|
120
|
+
# [Parameters}
|
121
|
+
# - +uri+ (String) The namespace for the custom handlers
|
122
|
+
# - +custom_handler_class+ (Class) A class with ruby methods that can be called during
|
123
|
+
# transformation
|
124
|
+
#
|
125
|
+
# See Nokogiri::XSLT.parse for usage.
|
126
|
+
#
|
127
|
+
def register(uri, custom_handler_class)
|
128
|
+
# NOTE: this is implemented in the C extension, see ext/nokogiri/xslt_stylesheet.c
|
129
|
+
raise NotImplementedError, "Nokogiri::XSLT.register is not implemented on JRuby"
|
130
|
+
end if Nokogiri.jruby?
|
61
131
|
end
|
62
132
|
end
|
63
133
|
end
|