nokogiri 1.5.10 → 1.10.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/LICENSE-DEPENDENCIES.md +1614 -0
- data/LICENSE.md +9 -0
- data/README.md +198 -0
- data/bin/nokogiri +50 -10
- data/dependencies.yml +72 -0
- data/ext/nokogiri/extconf.rb +634 -92
- data/ext/nokogiri/html_document.c +8 -8
- data/ext/nokogiri/html_element_description.c +15 -15
- data/ext/nokogiri/html_entity_lookup.c +1 -1
- data/ext/nokogiri/html_sax_parser_context.c +4 -4
- data/ext/nokogiri/html_sax_push_parser.c +2 -2
- data/ext/nokogiri/nokogiri.c +20 -12
- data/ext/nokogiri/nokogiri.h +1 -44
- data/ext/nokogiri/xml_attr.c +34 -25
- data/ext/nokogiri/xml_cdata.c +12 -6
- data/ext/nokogiri/xml_comment.c +18 -3
- data/ext/nokogiri/xml_document.c +64 -32
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +3 -3
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_io.c +11 -6
- data/ext/nokogiri/xml_namespace.c +50 -17
- data/ext/nokogiri/xml_namespace.h +3 -2
- data/ext/nokogiri/xml_node.c +459 -240
- data/ext/nokogiri/xml_node_set.c +166 -147
- data/ext/nokogiri/xml_node_set.h +2 -4
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +6 -19
- data/ext/nokogiri/xml_sax_parser.c +11 -13
- data/ext/nokogiri/xml_sax_parser_context.c +41 -1
- data/ext/nokogiri/xml_sax_push_parser.c +56 -12
- data/ext/nokogiri/xml_schema.c +1 -1
- data/ext/nokogiri/xml_syntax_error.c +11 -5
- data/ext/nokogiri/xml_syntax_error.h +1 -1
- data/ext/nokogiri/xml_text.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +17 -38
- data/ext/nokogiri/xslt_stylesheet.c +10 -10
- data/lib/nokogiri/css/node.rb +0 -50
- data/lib/nokogiri/css/parser.rb +263 -233
- data/lib/nokogiri/css/parser.y +54 -40
- data/lib/nokogiri/css/tokenizer.rb +104 -103
- data/lib/nokogiri/css/tokenizer.rex +5 -5
- data/lib/nokogiri/css/xpath_visitor.rb +78 -19
- data/lib/nokogiri/decorators/slop.rb +12 -5
- data/lib/nokogiri/html/document.rb +102 -21
- data/lib/nokogiri/html/document_fragment.rb +11 -3
- data/lib/nokogiri/html/sax/parser.rb +12 -2
- data/lib/nokogiri/html/sax/push_parser.rb +22 -2
- data/lib/nokogiri/version.rb +40 -22
- data/lib/nokogiri/xml/builder.rb +34 -31
- data/lib/nokogiri/xml/document.rb +20 -14
- data/lib/nokogiri/xml/document_fragment.rb +50 -2
- data/lib/nokogiri/xml/dtd.rb +14 -4
- data/lib/nokogiri/xml/entity_reference.rb +18 -0
- data/lib/nokogiri/xml/node.rb +148 -203
- data/lib/nokogiri/xml/node_set.rb +139 -123
- data/lib/nokogiri/xml/parse_options.rb +22 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +7 -8
- data/lib/nokogiri/xml/searchable.rb +230 -0
- data/lib/nokogiri/xml/syntax_error.rb +24 -1
- data/lib/nokogiri/xml.rb +3 -1
- data/lib/nokogiri.rb +40 -24
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +78 -0
- data/patches/libxml2/0002-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxslt/0001-Fix-security-framework-bypass.patch +120 -0
- data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
- metadata +252 -388
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +0 -785
- data/CHANGELOG.rdoc +0 -783
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -303
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -228
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/tasks/cross_compile.rb +0 -150
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
@@ -0,0 +1,230 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
#
|
4
|
+
# The Searchable module declares the interface used for searching your DOM.
|
5
|
+
#
|
6
|
+
# It implements the public methods `search`, `css`, and `xpath`,
|
7
|
+
# as well as allowing specific implementations to specialize some
|
8
|
+
# of the important behaviors.
|
9
|
+
#
|
10
|
+
module Searchable
|
11
|
+
# Regular expression used by Searchable#search to determine if a query
|
12
|
+
# string is CSS or XPath
|
13
|
+
LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
|
14
|
+
|
15
|
+
###
|
16
|
+
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
17
|
+
#
|
18
|
+
# Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
|
19
|
+
#
|
20
|
+
# node.search("div.employee", ".//title")
|
21
|
+
#
|
22
|
+
# A hash of namespace bindings may be appended:
|
23
|
+
#
|
24
|
+
# node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
|
25
|
+
# node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
|
26
|
+
#
|
27
|
+
# For XPath queries, a hash of variable bindings may also be
|
28
|
+
# appended to the namespace bindings. For example:
|
29
|
+
#
|
30
|
+
# node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
31
|
+
#
|
32
|
+
# Custom XPath functions and CSS pseudo-selectors may also be
|
33
|
+
# defined. To define custom functions create a class and
|
34
|
+
# implement the function you want to define. The first argument
|
35
|
+
# to the method will be the current matching NodeSet. Any other
|
36
|
+
# arguments are ones that you pass in. Note that this class may
|
37
|
+
# appear anywhere in the argument list. For example:
|
38
|
+
#
|
39
|
+
# node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")'
|
40
|
+
# Class.new {
|
41
|
+
# def regex node_set, regex
|
42
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
43
|
+
# end
|
44
|
+
# }.new
|
45
|
+
# )
|
46
|
+
#
|
47
|
+
# See Searchable#xpath and Searchable#css for further usage help.
|
48
|
+
def search *args
|
49
|
+
paths, handler, ns, binds = extract_params(args)
|
50
|
+
|
51
|
+
xpaths = paths.map(&:to_s).map do |path|
|
52
|
+
(path =~ LOOKS_LIKE_XPATH) ? path : xpath_query_from_css_rule(path, ns)
|
53
|
+
end.flatten.uniq
|
54
|
+
|
55
|
+
xpath(*(xpaths + [ns, handler, binds].compact))
|
56
|
+
end
|
57
|
+
alias :/ :search
|
58
|
+
|
59
|
+
###
|
60
|
+
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
61
|
+
#
|
62
|
+
# Search this object for +paths+, and return only the first
|
63
|
+
# result. +paths+ must be one or more XPath or CSS queries.
|
64
|
+
#
|
65
|
+
# See Searchable#search for more information.
|
66
|
+
def at *args
|
67
|
+
search(*args).first
|
68
|
+
end
|
69
|
+
alias :% :at
|
70
|
+
|
71
|
+
###
|
72
|
+
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
73
|
+
#
|
74
|
+
# Search this object for CSS +rules+. +rules+ must be one or more CSS
|
75
|
+
# selectors. For example:
|
76
|
+
#
|
77
|
+
# node.css('title')
|
78
|
+
# node.css('body h1.bold')
|
79
|
+
# node.css('div + p.green', 'div#one')
|
80
|
+
#
|
81
|
+
# A hash of namespace bindings may be appended. For example:
|
82
|
+
#
|
83
|
+
# node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
|
84
|
+
#
|
85
|
+
# Custom CSS pseudo classes may also be defined. To define
|
86
|
+
# custom pseudo classes, create a class and implement the custom
|
87
|
+
# pseudo class you want defined. The first argument to the
|
88
|
+
# method will be the current matching NodeSet. Any other
|
89
|
+
# arguments are ones that you pass in. For example:
|
90
|
+
#
|
91
|
+
# node.css('title:regex("\w+")', Class.new {
|
92
|
+
# def regex node_set, regex
|
93
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
94
|
+
# end
|
95
|
+
# }.new)
|
96
|
+
#
|
97
|
+
# Note that the CSS query string is case-sensitive with regards
|
98
|
+
# to your document type. That is, if you're looking for "H1" in
|
99
|
+
# an HTML document, you'll never find anything, since HTML tags
|
100
|
+
# will match only lowercase CSS queries. However, "H1" might be
|
101
|
+
# found in an XML document, where tags names are case-sensitive
|
102
|
+
# (e.g., "H1" is distinct from "h1").
|
103
|
+
#
|
104
|
+
def css *args
|
105
|
+
rules, handler, ns, _ = extract_params(args)
|
106
|
+
|
107
|
+
css_internal self, rules, handler, ns
|
108
|
+
end
|
109
|
+
|
110
|
+
##
|
111
|
+
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
112
|
+
#
|
113
|
+
# Search this object for CSS +rules+, and return only the first
|
114
|
+
# match. +rules+ must be one or more CSS selectors.
|
115
|
+
#
|
116
|
+
# See Searchable#css for more information.
|
117
|
+
def at_css *args
|
118
|
+
css(*args).first
|
119
|
+
end
|
120
|
+
|
121
|
+
###
|
122
|
+
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
|
123
|
+
#
|
124
|
+
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
125
|
+
# queries.
|
126
|
+
#
|
127
|
+
# node.xpath('.//title')
|
128
|
+
#
|
129
|
+
# A hash of namespace bindings may be appended. For example:
|
130
|
+
#
|
131
|
+
# node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
|
132
|
+
# node.xpath('.//xmlns:name', node.root.namespaces)
|
133
|
+
#
|
134
|
+
# A hash of variable bindings may also be appended to the namespace bindings. For example:
|
135
|
+
#
|
136
|
+
# node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
137
|
+
#
|
138
|
+
# Custom XPath functions may also be defined. To define custom
|
139
|
+
# functions create a class and implement the function you want
|
140
|
+
# to define. The first argument to the method will be the
|
141
|
+
# current matching NodeSet. Any other arguments are ones that
|
142
|
+
# you pass in. Note that this class may appear anywhere in the
|
143
|
+
# argument list. For example:
|
144
|
+
#
|
145
|
+
# node.xpath('.//title[regex(., "\w+")]', Class.new {
|
146
|
+
# def regex node_set, regex
|
147
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
148
|
+
# end
|
149
|
+
# }.new)
|
150
|
+
#
|
151
|
+
def xpath *args
|
152
|
+
paths, handler, ns, binds = extract_params(args)
|
153
|
+
|
154
|
+
xpath_internal self, paths, handler, ns, binds
|
155
|
+
end
|
156
|
+
|
157
|
+
##
|
158
|
+
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
|
159
|
+
#
|
160
|
+
# Search this node for XPath +paths+, and return only the first
|
161
|
+
# match. +paths+ must be one or more XPath queries.
|
162
|
+
#
|
163
|
+
# See Searchable#xpath for more information.
|
164
|
+
def at_xpath *args
|
165
|
+
xpath(*args).first
|
166
|
+
end
|
167
|
+
|
168
|
+
private
|
169
|
+
|
170
|
+
def css_internal node, rules, handler, ns
|
171
|
+
xpath_internal node, css_rules_to_xpath(rules, ns), handler, ns, nil
|
172
|
+
end
|
173
|
+
|
174
|
+
def xpath_internal node, paths, handler, ns, binds
|
175
|
+
document = node.document
|
176
|
+
return NodeSet.new(document) unless document
|
177
|
+
|
178
|
+
if paths.length == 1
|
179
|
+
return xpath_impl(node, paths.first, handler, ns, binds)
|
180
|
+
end
|
181
|
+
|
182
|
+
NodeSet.new(document) do |combined|
|
183
|
+
paths.each do |path|
|
184
|
+
xpath_impl(node, path, handler, ns, binds).each { |set| combined << set }
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def xpath_impl node, path, handler, ns, binds
|
190
|
+
ctx = XPathContext.new(node)
|
191
|
+
ctx.register_namespaces(ns)
|
192
|
+
path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
|
193
|
+
|
194
|
+
binds.each do |key,value|
|
195
|
+
ctx.register_variable key.to_s, value
|
196
|
+
end if binds
|
197
|
+
|
198
|
+
ctx.evaluate(path, handler)
|
199
|
+
end
|
200
|
+
|
201
|
+
def css_rules_to_xpath(rules, ns)
|
202
|
+
rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
|
203
|
+
end
|
204
|
+
|
205
|
+
def xpath_query_from_css_rule rule, ns
|
206
|
+
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
207
|
+
CSS.xpath_for(rule.to_s, :prefix => implied_xpath_context, :ns => ns)
|
208
|
+
end.join(' | ')
|
209
|
+
end
|
210
|
+
|
211
|
+
def extract_params params # :nodoc:
|
212
|
+
handler = params.find do |param|
|
213
|
+
![Hash, String, Symbol].include?(param.class)
|
214
|
+
end
|
215
|
+
params -= [handler] if handler
|
216
|
+
|
217
|
+
hashes = []
|
218
|
+
while Hash === params.last || params.last.nil?
|
219
|
+
hashes << params.pop
|
220
|
+
break if params.empty?
|
221
|
+
end
|
222
|
+
ns, binds = hashes.reverse
|
223
|
+
|
224
|
+
ns ||= document.root ? document.root.namespaces : {}
|
225
|
+
|
226
|
+
[params, handler, ns, binds]
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
@@ -40,7 +40,30 @@ module Nokogiri
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def to_s
|
43
|
-
super.chomp
|
43
|
+
message = super.chomp
|
44
|
+
[location_to_s, level_to_s, message].
|
45
|
+
compact.join(": ").
|
46
|
+
force_encoding(message.encoding)
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def level_to_s
|
52
|
+
case level
|
53
|
+
when 3 then "FATAL"
|
54
|
+
when 2 then "ERROR"
|
55
|
+
when 1 then "WARNING"
|
56
|
+
else nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def nil_or_zero?(attribute)
|
61
|
+
attribute.nil? || attribute.zero?
|
62
|
+
end
|
63
|
+
|
64
|
+
def location_to_s
|
65
|
+
return nil if nil_or_zero?(line) && nil_or_zero?(column)
|
66
|
+
"#{line}:#{column}"
|
44
67
|
end
|
45
68
|
end
|
46
69
|
end
|
data/lib/nokogiri/xml.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'nokogiri/xml/pp'
|
2
2
|
require 'nokogiri/xml/parse_options'
|
3
3
|
require 'nokogiri/xml/sax'
|
4
|
+
require 'nokogiri/xml/searchable'
|
4
5
|
require 'nokogiri/xml/node'
|
5
6
|
require 'nokogiri/xml/attribute_decl'
|
6
7
|
require 'nokogiri/xml/element_decl'
|
@@ -22,6 +23,7 @@ require 'nokogiri/xml/builder'
|
|
22
23
|
require 'nokogiri/xml/reader'
|
23
24
|
require 'nokogiri/xml/notation'
|
24
25
|
require 'nokogiri/xml/entity_decl'
|
26
|
+
require 'nokogiri/xml/entity_reference'
|
25
27
|
require 'nokogiri/xml/schema'
|
26
28
|
require 'nokogiri/xml/relax_ng'
|
27
29
|
|
@@ -47,7 +49,7 @@ module Nokogiri
|
|
47
49
|
# Nokogiri::XML::Reader for mor information
|
48
50
|
def Reader string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT
|
49
51
|
|
50
|
-
options = Nokogiri::XML::ParseOptions.new(options) if
|
52
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
51
53
|
# Give the options to the user
|
52
54
|
yield options if block_given?
|
53
55
|
|
data/lib/nokogiri.rb
CHANGED
@@ -2,9 +2,6 @@
|
|
2
2
|
# Modify the PATH on windows so that the external DLLs will get loaded.
|
3
3
|
|
4
4
|
require 'rbconfig'
|
5
|
-
ENV['PATH'] = [File.expand_path(
|
6
|
-
File.join(File.dirname(__FILE__), "..", "ext", "nokogiri")
|
7
|
-
), ENV['PATH']].compact.join(';') if RbConfig::CONFIG['host_os'] =~ /(mswin|mingw)/i
|
8
5
|
|
9
6
|
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
10
7
|
# The line below caused a problem on non-GAE rack environment.
|
@@ -13,7 +10,7 @@ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
|
13
10
|
# However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
|
14
11
|
# an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
|
15
12
|
# of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
|
16
|
-
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
13
|
+
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
17
14
|
# already set in the classpath.
|
18
15
|
unless $LOAD_PATH.to_s.include?("appengine-rack")
|
19
16
|
require 'stringio'
|
@@ -22,10 +19,18 @@ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
|
22
19
|
require 'nekohtml.jar'
|
23
20
|
require 'nekodtd.jar'
|
24
21
|
require 'xercesImpl.jar'
|
22
|
+
require 'serializer.jar'
|
23
|
+
require 'xalan.jar'
|
24
|
+
require 'xml-apis.jar'
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
|
-
|
28
|
+
begin
|
29
|
+
RUBY_VERSION =~ /(\d+\.\d+)/
|
30
|
+
require "nokogiri/#{$1}/nokogiri"
|
31
|
+
rescue LoadError
|
32
|
+
require 'nokogiri/nokogiri'
|
33
|
+
end
|
29
34
|
require 'nokogiri/version'
|
30
35
|
require 'nokogiri/syntax_error'
|
31
36
|
require 'nokogiri/xml'
|
@@ -36,7 +41,8 @@ require 'nokogiri/css'
|
|
36
41
|
require 'nokogiri/html/builder'
|
37
42
|
|
38
43
|
# Nokogiri parses and searches XML/HTML very quickly, and also has
|
39
|
-
# correctly implemented CSS3 selector support as well as XPath
|
44
|
+
# correctly implemented CSS3 selector support as well as XPath 1.0
|
45
|
+
# support.
|
40
46
|
#
|
41
47
|
# Parsing a document returns either a Nokogiri::XML::Document, or a
|
42
48
|
# Nokogiri::HTML::Document depending on the kind of document you parse.
|
@@ -58,27 +64,26 @@ require 'nokogiri/html/builder'
|
|
58
64
|
# puts link.content
|
59
65
|
# end
|
60
66
|
#
|
61
|
-
# See Nokogiri::XML::
|
62
|
-
# See Nokogiri::XML::
|
67
|
+
# See Nokogiri::XML::Searchable#css for more information about CSS searching.
|
68
|
+
# See Nokogiri::XML::Searchable#xpath for more information about XPath searching.
|
63
69
|
module Nokogiri
|
64
70
|
class << self
|
65
71
|
###
|
66
72
|
# Parse an HTML or XML document. +string+ contains the document.
|
67
73
|
def parse string, url = nil, encoding = nil, options = nil
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
doc
|
74
|
+
if string.respond_to?(:read) ||
|
75
|
+
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
|
76
|
+
# Expect an HTML indicator to appear within the first 512
|
77
|
+
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
|
78
|
+
# shouldn't be that long)
|
79
|
+
Nokogiri.HTML(string, url, encoding,
|
80
|
+
options || XML::ParseOptions::DEFAULT_HTML)
|
81
|
+
else
|
82
|
+
Nokogiri.XML(string, url, encoding,
|
83
|
+
options || XML::ParseOptions::DEFAULT_XML)
|
84
|
+
end.tap { |doc|
|
85
|
+
yield doc if block_given?
|
86
|
+
}
|
82
87
|
end
|
83
88
|
|
84
89
|
###
|
@@ -109,7 +114,19 @@ module Nokogiri
|
|
109
114
|
def Slop(*args, &block)
|
110
115
|
Nokogiri(*args, &block).slop!
|
111
116
|
end
|
117
|
+
|
118
|
+
def install_default_aliases
|
119
|
+
# Make sure to support some popular encoding aliases not known by
|
120
|
+
# all iconv implementations.
|
121
|
+
{
|
122
|
+
'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
|
123
|
+
}.each { |alias_name, name|
|
124
|
+
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
125
|
+
}
|
126
|
+
end
|
112
127
|
end
|
128
|
+
|
129
|
+
Nokogiri.install_default_aliases
|
113
130
|
end
|
114
131
|
|
115
132
|
###
|
@@ -120,8 +137,7 @@ end
|
|
120
137
|
# To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
|
121
138
|
def Nokogiri(*args, &block)
|
122
139
|
if block_given?
|
123
|
-
|
124
|
-
return builder.doc.root
|
140
|
+
Nokogiri::HTML::Builder.new(&block).doc.root
|
125
141
|
else
|
126
142
|
Nokogiri.parse(*args)
|
127
143
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
From c5538465c08a8ea248a370bf55bc39cd3385e4af Mon Sep 17 00:00:00 2001
|
2
|
+
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
+
Date: Thu, 29 Mar 2018 14:09:00 -0400
|
4
|
+
Subject: [PATCH] Revert "Do not URI escape in server side includes"
|
5
|
+
|
6
|
+
This reverts commit 960f0e275616cadc29671a218d7fb9b69eb35588.
|
7
|
+
---
|
8
|
+
HTMLtree.c | 49 +++++++++++--------------------------------------
|
9
|
+
1 file changed, 11 insertions(+), 38 deletions(-)
|
10
|
+
|
11
|
+
diff --git a/HTMLtree.c b/HTMLtree.c
|
12
|
+
index 2fd0c9c..67160c5 100644
|
13
|
+
--- a/HTMLtree.c
|
14
|
+
+++ b/HTMLtree.c
|
15
|
+
@@ -717,49 +717,22 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
|
16
|
+
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
|
17
|
+
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
|
18
|
+
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
|
19
|
+
+ xmlChar *escaped;
|
20
|
+
xmlChar *tmp = value;
|
21
|
+
- /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
|
22
|
+
- xmlBufCCat(buf->buffer, "\"");
|
23
|
+
|
24
|
+
while (IS_BLANK_CH(*tmp)) tmp++;
|
25
|
+
|
26
|
+
- /* URI Escape everything, except server side includes. */
|
27
|
+
- for ( ; ; ) {
|
28
|
+
- xmlChar *escaped;
|
29
|
+
- xmlChar endChar;
|
30
|
+
- xmlChar *end = NULL;
|
31
|
+
- xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
|
32
|
+
- if (start != NULL) {
|
33
|
+
- end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
|
34
|
+
- if (end != NULL) {
|
35
|
+
- *start = '\0';
|
36
|
+
- }
|
37
|
+
- }
|
38
|
+
-
|
39
|
+
- /* Escape the whole string, or until start (set to '\0'). */
|
40
|
+
- escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
|
41
|
+
- if (escaped != NULL) {
|
42
|
+
- xmlBufCat(buf->buffer, escaped);
|
43
|
+
- xmlFree(escaped);
|
44
|
+
- } else {
|
45
|
+
- xmlBufCat(buf->buffer, tmp);
|
46
|
+
- }
|
47
|
+
-
|
48
|
+
- if (end == NULL) { /* Everything has been written. */
|
49
|
+
- break;
|
50
|
+
- }
|
51
|
+
-
|
52
|
+
- /* Do not escape anything within server side includes. */
|
53
|
+
- *start = '<'; /* Restore the first character of "<!--". */
|
54
|
+
- end += 3; /* strlen("-->") */
|
55
|
+
- endChar = *end;
|
56
|
+
- *end = '\0';
|
57
|
+
- xmlBufCat(buf->buffer, start);
|
58
|
+
- *end = endChar;
|
59
|
+
- tmp = end;
|
60
|
+
+ /*
|
61
|
+
+ * the < and > have already been escaped at the entity level
|
62
|
+
+ * And doing so here breaks server side includes
|
63
|
+
+ */
|
64
|
+
+ escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
|
65
|
+
+ if (escaped != NULL) {
|
66
|
+
+ xmlBufWriteQuotedString(buf->buffer, escaped);
|
67
|
+
+ xmlFree(escaped);
|
68
|
+
+ } else {
|
69
|
+
+ xmlBufWriteQuotedString(buf->buffer, value);
|
70
|
+
}
|
71
|
+
-
|
72
|
+
- xmlBufCCat(buf->buffer, "\"");
|
73
|
+
} else {
|
74
|
+
xmlBufWriteQuotedString(buf->buffer, value);
|
75
|
+
}
|
76
|
+
--
|
77
|
+
2.9.5
|
78
|
+
|
@@ -0,0 +1,40 @@
|
|
1
|
+
From 27e4aa8d885e47a296ea78d114dbbe8fc7aa3508 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Kevin Solorio <soloriok@gmail.com>
|
3
|
+
Date: Fri, 1 Feb 2019 14:32:42 -0800
|
4
|
+
Subject: [PATCH] Revert-support-html-h-b-7-1
|
5
|
+
|
6
|
+
---
|
7
|
+
entities.c | 17 -----------------
|
8
|
+
1 file changed, 17 deletions(-)
|
9
|
+
|
10
|
+
diff --git a/entities.c b/entities.c
|
11
|
+
index 43549bc5..82652f6d 100644
|
12
|
+
--- a/entities.c
|
13
|
+
+++ b/entities.c
|
14
|
+
@@ -623,23 +623,6 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
|
15
|
+
*out++ = 't';
|
16
|
+
*out++ = ';';
|
17
|
+
} else if (*cur == '&') {
|
18
|
+
- /*
|
19
|
+
- * Special handling of &{...} construct from HTML 4, see
|
20
|
+
- * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
|
21
|
+
- */
|
22
|
+
- if (html && attr && (cur[1] == '{') &&
|
23
|
+
- (strchr((const char *) cur, '}'))) {
|
24
|
+
- while (*cur != '}') {
|
25
|
+
- *out++ = *cur++;
|
26
|
+
- indx = out - buffer;
|
27
|
+
- if (indx + 100 > buffer_size) {
|
28
|
+
- growBufferReentrant();
|
29
|
+
- out = &buffer[indx];
|
30
|
+
- }
|
31
|
+
- }
|
32
|
+
- *out++ = *cur++;
|
33
|
+
- continue;
|
34
|
+
- }
|
35
|
+
*out++ = '&';
|
36
|
+
*out++ = 'a';
|
37
|
+
*out++ = 'm';
|
38
|
+
--
|
39
|
+
2.16.2
|
40
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
From ffc08467744bd2305d41ca882c37fa30adf3a067 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Kevin Solorio <soloriok@gmail.com>
|
3
|
+
Date: Wed, 27 Feb 2019 14:34:17 -0800
|
4
|
+
Subject: [PATCH 2/2] update entities.c to remove handling of ssi
|
5
|
+
|
6
|
+
---
|
7
|
+
entities.c | 21 ---------------------
|
8
|
+
1 file changed, 21 deletions(-)
|
9
|
+
|
10
|
+
diff --git a/entities.c b/entities.c
|
11
|
+
index 43549bc5..5c4a2a60 100644
|
12
|
+
--- a/entities.c
|
13
|
+
+++ b/entities.c
|
14
|
+
@@ -592,27 +592,6 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
|
15
|
+
* By default one have to encode at least '<', '>', '"' and '&' !
|
16
|
+
*/
|
17
|
+
if (*cur == '<') {
|
18
|
+
- const xmlChar *end;
|
19
|
+
-
|
20
|
+
- /*
|
21
|
+
- * Special handling of server side include in HTML attributes
|
22
|
+
- */
|
23
|
+
- if (html && attr &&
|
24
|
+
- (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
|
25
|
+
- ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
|
26
|
+
- while (cur != end) {
|
27
|
+
- *out++ = *cur++;
|
28
|
+
- indx = out - buffer;
|
29
|
+
- if (indx + 100 > buffer_size) {
|
30
|
+
- growBufferReentrant();
|
31
|
+
- out = &buffer[indx];
|
32
|
+
- }
|
33
|
+
- }
|
34
|
+
- *out++ = *cur++;
|
35
|
+
- *out++ = *cur++;
|
36
|
+
- *out++ = *cur++;
|
37
|
+
- continue;
|
38
|
+
- }
|
39
|
+
*out++ = '&';
|
40
|
+
*out++ = 'l';
|
41
|
+
*out++ = 't';
|
42
|
+
--
|
43
|
+
2.16.2
|
44
|
+
|
@@ -0,0 +1,120 @@
|
|
1
|
+
From e03553605b45c88f0b4b2980adfbbb8f6fca2fd6 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Nick Wellnhofer <wellnhofer@aevum.de>
|
3
|
+
Date: Sun, 24 Mar 2019 09:51:39 +0100
|
4
|
+
Subject: [PATCH] Fix security framework bypass
|
5
|
+
|
6
|
+
xsltCheckRead and xsltCheckWrite return -1 in case of error but callers
|
7
|
+
don't check for this condition and allow access. With a specially
|
8
|
+
crafted URL, xsltCheckRead could be tricked into returning an error
|
9
|
+
because of a supposedly invalid URL that would still be loaded
|
10
|
+
succesfully later on.
|
11
|
+
|
12
|
+
Fixes #12.
|
13
|
+
|
14
|
+
Thanks to Felix Wilhelm for the report.
|
15
|
+
---
|
16
|
+
libxslt/documents.c | 18 ++++++++++--------
|
17
|
+
libxslt/imports.c | 9 +++++----
|
18
|
+
libxslt/transform.c | 9 +++++----
|
19
|
+
libxslt/xslt.c | 9 +++++----
|
20
|
+
4 files changed, 25 insertions(+), 20 deletions(-)
|
21
|
+
|
22
|
+
diff --git a/libxslt/documents.c b/libxslt/documents.c
|
23
|
+
index 3f3a731..4aad11b 100644
|
24
|
+
--- a/libxslt/documents.c
|
25
|
+
+++ b/libxslt/documents.c
|
26
|
+
@@ -296,10 +296,11 @@ xsltLoadDocument(xsltTransformContextPtr ctxt, const xmlChar *URI) {
|
27
|
+
int res;
|
28
|
+
|
29
|
+
res = xsltCheckRead(ctxt->sec, ctxt, URI);
|
30
|
+
- if (res == 0) {
|
31
|
+
- xsltTransformError(ctxt, NULL, NULL,
|
32
|
+
- "xsltLoadDocument: read rights for %s denied\n",
|
33
|
+
- URI);
|
34
|
+
+ if (res <= 0) {
|
35
|
+
+ if (res == 0)
|
36
|
+
+ xsltTransformError(ctxt, NULL, NULL,
|
37
|
+
+ "xsltLoadDocument: read rights for %s denied\n",
|
38
|
+
+ URI);
|
39
|
+
return(NULL);
|
40
|
+
}
|
41
|
+
}
|
42
|
+
@@ -372,10 +373,11 @@ xsltLoadStyleDocument(xsltStylesheetPtr style, const xmlChar *URI) {
|
43
|
+
int res;
|
44
|
+
|
45
|
+
res = xsltCheckRead(sec, NULL, URI);
|
46
|
+
- if (res == 0) {
|
47
|
+
- xsltTransformError(NULL, NULL, NULL,
|
48
|
+
- "xsltLoadStyleDocument: read rights for %s denied\n",
|
49
|
+
- URI);
|
50
|
+
+ if (res <= 0) {
|
51
|
+
+ if (res == 0)
|
52
|
+
+ xsltTransformError(NULL, NULL, NULL,
|
53
|
+
+ "xsltLoadStyleDocument: read rights for %s denied\n",
|
54
|
+
+ URI);
|
55
|
+
return(NULL);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
diff --git a/libxslt/imports.c b/libxslt/imports.c
|
59
|
+
index 874870c..3783b24 100644
|
60
|
+
--- a/libxslt/imports.c
|
61
|
+
+++ b/libxslt/imports.c
|
62
|
+
@@ -130,10 +130,11 @@ xsltParseStylesheetImport(xsltStylesheetPtr style, xmlNodePtr cur) {
|
63
|
+
int secres;
|
64
|
+
|
65
|
+
secres = xsltCheckRead(sec, NULL, URI);
|
66
|
+
- if (secres == 0) {
|
67
|
+
- xsltTransformError(NULL, NULL, NULL,
|
68
|
+
- "xsl:import: read rights for %s denied\n",
|
69
|
+
- URI);
|
70
|
+
+ if (secres <= 0) {
|
71
|
+
+ if (secres == 0)
|
72
|
+
+ xsltTransformError(NULL, NULL, NULL,
|
73
|
+
+ "xsl:import: read rights for %s denied\n",
|
74
|
+
+ URI);
|
75
|
+
goto error;
|
76
|
+
}
|
77
|
+
}
|
78
|
+
diff --git a/libxslt/transform.c b/libxslt/transform.c
|
79
|
+
index 1379391..0636dbd 100644
|
80
|
+
--- a/libxslt/transform.c
|
81
|
+
+++ b/libxslt/transform.c
|
82
|
+
@@ -3493,10 +3493,11 @@ xsltDocumentElem(xsltTransformContextPtr ctxt, xmlNodePtr node,
|
83
|
+
*/
|
84
|
+
if (ctxt->sec != NULL) {
|
85
|
+
ret = xsltCheckWrite(ctxt->sec, ctxt, filename);
|
86
|
+
- if (ret == 0) {
|
87
|
+
- xsltTransformError(ctxt, NULL, inst,
|
88
|
+
- "xsltDocumentElem: write rights for %s denied\n",
|
89
|
+
- filename);
|
90
|
+
+ if (ret <= 0) {
|
91
|
+
+ if (ret == 0)
|
92
|
+
+ xsltTransformError(ctxt, NULL, inst,
|
93
|
+
+ "xsltDocumentElem: write rights for %s denied\n",
|
94
|
+
+ filename);
|
95
|
+
xmlFree(URL);
|
96
|
+
xmlFree(filename);
|
97
|
+
return;
|
98
|
+
diff --git a/libxslt/xslt.c b/libxslt/xslt.c
|
99
|
+
index 780a5ad..a234eb7 100644
|
100
|
+
--- a/libxslt/xslt.c
|
101
|
+
+++ b/libxslt/xslt.c
|
102
|
+
@@ -6763,10 +6763,11 @@ xsltParseStylesheetFile(const xmlChar* filename) {
|
103
|
+
int res;
|
104
|
+
|
105
|
+
res = xsltCheckRead(sec, NULL, filename);
|
106
|
+
- if (res == 0) {
|
107
|
+
- xsltTransformError(NULL, NULL, NULL,
|
108
|
+
- "xsltParseStylesheetFile: read rights for %s denied\n",
|
109
|
+
- filename);
|
110
|
+
+ if (res <= 0) {
|
111
|
+
+ if (res == 0)
|
112
|
+
+ xsltTransformError(NULL, NULL, NULL,
|
113
|
+
+ "xsltParseStylesheetFile: read rights for %s denied\n",
|
114
|
+
+ filename);
|
115
|
+
return(NULL);
|
116
|
+
}
|
117
|
+
}
|
118
|
+
--
|
119
|
+
2.17.1
|
120
|
+
|
Binary file
|