nokogiri 1.10.3 → 1.11.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/LICENSE.md +1 -1
- data/README.md +173 -94
- data/dependencies.yml +28 -26
- data/ext/nokogiri/depend +37 -358
- data/ext/nokogiri/extconf.rb +611 -391
- data/ext/nokogiri/html_document.c +78 -82
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +69 -66
- data/ext/nokogiri/html_sax_push_parser.c +42 -34
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +192 -87
- data/ext/nokogiri/nokogiri.h +181 -89
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +255 -183
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +387 -316
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +195 -172
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +118 -118
- data/ext/nokogiri/xml_sax_parser_context.c +103 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +111 -34
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +206 -123
- data/ext/nokogiri/xslt_stylesheet.c +158 -165
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +63 -62
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +105 -103
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/css.rb +1 -0
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/builder.rb +1 -0
- data/lib/nokogiri/html/document.rb +13 -26
- data/lib/nokogiri/html/document_fragment.rb +16 -15
- data/lib/nokogiri/html/element_description.rb +1 -0
- data/lib/nokogiri/html/element_description_defaults.rb +1 -0
- data/lib/nokogiri/html/entity_lookup.rb +1 -0
- data/lib/nokogiri/html/sax/parser.rb +1 -0
- data/lib/nokogiri/html/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html/sax/push_parser.rb +1 -0
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +205 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +36 -32
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +92 -41
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node/save_options.rb +1 -0
- data/lib/nokogiri/xml/node.rb +625 -290
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +10 -3
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +9 -12
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax/document.rb +1 -0
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +2 -3
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +1 -0
- data/lib/nokogiri.rb +6 -27
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- metadata +102 -147
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
- data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
###
|
@@ -5,11 +6,11 @@ module Nokogiri
|
|
5
6
|
#
|
6
7
|
# == Building combinations of parse options
|
7
8
|
# You can build your own combinations of these parse options by using any of the following methods:
|
8
|
-
# *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
|
9
|
+
# *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
|
9
10
|
# [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
|
10
|
-
# Nokogiri.XML('<content>Chapter 1</content',
|
11
|
+
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
|
11
12
|
# [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
|
12
|
-
# Nokogiri.XML('<content>Chapter 1</content',
|
13
|
+
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
|
13
14
|
# [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
|
14
15
|
# Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
|
15
16
|
#
|
@@ -72,6 +73,8 @@ module Nokogiri
|
|
72
73
|
DEFAULT_XML = RECOVER | NONET
|
73
74
|
# the default options used for parsing HTML documents
|
74
75
|
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
|
76
|
+
# the default options used for parsing XML schemas
|
77
|
+
DEFAULT_SCHEMA = NONET
|
75
78
|
|
76
79
|
attr_accessor :options
|
77
80
|
def initialize options = STRICT
|
@@ -106,6 +109,10 @@ module Nokogiri
|
|
106
109
|
@options & RECOVER == STRICT
|
107
110
|
end
|
108
111
|
|
112
|
+
def ==(other)
|
113
|
+
other.to_i == to_i
|
114
|
+
end
|
115
|
+
|
109
116
|
alias :to_i :options
|
110
117
|
|
111
118
|
def inspect
|
data/lib/nokogiri/xml/pp/node.rb
CHANGED
data/lib/nokogiri/xml/pp.rb
CHANGED
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
###
|
@@ -85,19 +86,15 @@ module Nokogiri
|
|
85
86
|
private :initialize
|
86
87
|
|
87
88
|
###
|
88
|
-
# Get
|
89
|
+
# Get the attributes of the current node as a Hash
|
90
|
+
# @return [Hash<String, String>] Attribute names and values
|
89
91
|
def attributes
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
# Get a list of attributes for the current node
|
97
|
-
def attribute_nodes
|
98
|
-
nodes = attr_nodes
|
99
|
-
nodes.each { |v| v.instance_variable_set(:@_r, self) }
|
100
|
-
nodes
|
92
|
+
attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
|
93
|
+
hash[node.name] = node.to_s
|
94
|
+
end
|
95
|
+
ns = namespaces
|
96
|
+
attrs_hash.merge!(ns) if ns
|
97
|
+
attrs_hash
|
101
98
|
end
|
102
99
|
|
103
100
|
###
|
@@ -1,11 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
class << self
|
4
5
|
###
|
5
6
|
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
|
6
7
|
# See Nokogiri::XML::RelaxNG for an example.
|
7
|
-
def RelaxNG
|
8
|
-
RelaxNG.new(string_or_io)
|
8
|
+
def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
9
|
+
RelaxNG.new(string_or_io, options)
|
9
10
|
end
|
10
11
|
end
|
11
12
|
|
@@ -26,6 +27,10 @@ module Nokogiri
|
|
26
27
|
# end
|
27
28
|
#
|
28
29
|
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
30
|
+
#
|
31
|
+
# NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
|
32
|
+
# underlying parsing libraries to access network resources. This is counter to Nokogiri's
|
33
|
+
# "untrusted by default" security policy, but is a limitation of the underlying libraries.
|
29
34
|
class RelaxNG < Nokogiri::XML::Schema
|
30
35
|
end
|
31
36
|
end
|
data/lib/nokogiri/xml/sax.rb
CHANGED
data/lib/nokogiri/xml/schema.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
class << self
|
4
5
|
###
|
5
6
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
6
7
|
# object.
|
7
|
-
def Schema
|
8
|
-
Schema.new(string_or_io)
|
8
|
+
def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
9
|
+
Schema.new(string_or_io, options)
|
9
10
|
end
|
10
11
|
end
|
11
12
|
|
@@ -26,15 +27,23 @@ module Nokogiri
|
|
26
27
|
# end
|
27
28
|
#
|
28
29
|
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
30
|
+
#
|
31
|
+
# NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
|
32
|
+
# are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
|
33
|
+
# documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
|
34
|
+
# security policy. If a document is trusted, then the caller may turn off the NONET option via
|
35
|
+
# the ParseOptions to re-enable external entity resolution over a network connection.
|
29
36
|
class Schema
|
30
37
|
# Errors while parsing the schema file
|
31
38
|
attr_accessor :errors
|
39
|
+
# The Nokogiri::XML::ParseOptions used to parse the schema
|
40
|
+
attr_accessor :parse_options
|
32
41
|
|
33
42
|
###
|
34
43
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
35
44
|
# object.
|
36
|
-
def self.new string_or_io
|
37
|
-
from_document
|
45
|
+
def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
|
46
|
+
from_document(Nokogiri::XML(string_or_io), options)
|
38
47
|
end
|
39
48
|
|
40
49
|
###
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
#
|
@@ -11,7 +12,9 @@ module Nokogiri
|
|
11
12
|
# Regular expression used by Searchable#search to determine if a query
|
12
13
|
# string is CSS or XPath
|
13
14
|
LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
|
14
|
-
|
15
|
+
|
16
|
+
# @!group Searching via XPath or CSS Queries
|
17
|
+
|
15
18
|
###
|
16
19
|
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
17
20
|
#
|
@@ -45,7 +48,7 @@ module Nokogiri
|
|
45
48
|
# )
|
46
49
|
#
|
47
50
|
# See Searchable#xpath and Searchable#css for further usage help.
|
48
|
-
def search
|
51
|
+
def search(*args)
|
49
52
|
paths, handler, ns, binds = extract_params(args)
|
50
53
|
|
51
54
|
xpaths = paths.map(&:to_s).map do |path|
|
@@ -54,6 +57,7 @@ module Nokogiri
|
|
54
57
|
|
55
58
|
xpath(*(xpaths + [ns, handler, binds].compact))
|
56
59
|
end
|
60
|
+
|
57
61
|
alias :/ :search
|
58
62
|
|
59
63
|
###
|
@@ -63,9 +67,10 @@ module Nokogiri
|
|
63
67
|
# result. +paths+ must be one or more XPath or CSS queries.
|
64
68
|
#
|
65
69
|
# See Searchable#search for more information.
|
66
|
-
def at
|
70
|
+
def at(*args)
|
67
71
|
search(*args).first
|
68
72
|
end
|
73
|
+
|
69
74
|
alias :% :at
|
70
75
|
|
71
76
|
###
|
@@ -101,7 +106,7 @@ module Nokogiri
|
|
101
106
|
# found in an XML document, where tags names are case-sensitive
|
102
107
|
# (e.g., "H1" is distinct from "h1").
|
103
108
|
#
|
104
|
-
def css
|
109
|
+
def css(*args)
|
105
110
|
rules, handler, ns, _ = extract_params(args)
|
106
111
|
|
107
112
|
css_internal self, rules, handler, ns
|
@@ -114,7 +119,7 @@ module Nokogiri
|
|
114
119
|
# match. +rules+ must be one or more CSS selectors.
|
115
120
|
#
|
116
121
|
# See Searchable#css for more information.
|
117
|
-
def at_css
|
122
|
+
def at_css(*args)
|
118
123
|
css(*args).first
|
119
124
|
end
|
120
125
|
|
@@ -148,7 +153,7 @@ module Nokogiri
|
|
148
153
|
# end
|
149
154
|
# }.new)
|
150
155
|
#
|
151
|
-
def xpath
|
156
|
+
def xpath(*args)
|
152
157
|
paths, handler, ns, binds = extract_params(args)
|
153
158
|
|
154
159
|
xpath_internal self, paths, handler, ns, binds
|
@@ -161,17 +166,19 @@ module Nokogiri
|
|
161
166
|
# match. +paths+ must be one or more XPath queries.
|
162
167
|
#
|
163
168
|
# See Searchable#xpath for more information.
|
164
|
-
def at_xpath
|
169
|
+
def at_xpath(*args)
|
165
170
|
xpath(*args).first
|
166
171
|
end
|
167
172
|
|
173
|
+
# @!endgroup
|
174
|
+
|
168
175
|
private
|
169
176
|
|
170
|
-
def css_internal
|
177
|
+
def css_internal(node, rules, handler, ns)
|
171
178
|
xpath_internal node, css_rules_to_xpath(rules, ns), handler, ns, nil
|
172
179
|
end
|
173
180
|
|
174
|
-
def xpath_internal
|
181
|
+
def xpath_internal(node, paths, handler, ns, binds)
|
175
182
|
document = node.document
|
176
183
|
return NodeSet.new(document) unless document
|
177
184
|
|
@@ -186,12 +193,12 @@ module Nokogiri
|
|
186
193
|
end
|
187
194
|
end
|
188
195
|
|
189
|
-
def xpath_impl
|
196
|
+
def xpath_impl(node, path, handler, ns, binds)
|
190
197
|
ctx = XPathContext.new(node)
|
191
198
|
ctx.register_namespaces(ns)
|
192
|
-
path = path.gsub(/xmlns:/,
|
199
|
+
path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
|
193
200
|
|
194
|
-
binds.each do |key,value|
|
201
|
+
binds.each do |key, value|
|
195
202
|
ctx.register_variable key.to_s, value
|
196
203
|
end if binds
|
197
204
|
|
@@ -202,13 +209,15 @@ module Nokogiri
|
|
202
209
|
rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
|
203
210
|
end
|
204
211
|
|
205
|
-
def xpath_query_from_css_rule
|
212
|
+
def xpath_query_from_css_rule(rule, ns)
|
213
|
+
visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
|
206
214
|
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
207
|
-
CSS.xpath_for(rule.to_s, :prefix => implied_xpath_context, :ns => ns
|
208
|
-
|
215
|
+
CSS.xpath_for(rule.to_s, {:prefix => implied_xpath_context, :ns => ns,
|
216
|
+
:visitor => visitor})
|
217
|
+
end.join(" | ")
|
209
218
|
end
|
210
219
|
|
211
|
-
def extract_params
|
220
|
+
def extract_params(params) # :nodoc:
|
212
221
|
handler = params.find do |param|
|
213
222
|
![Hash, String, Symbol].include?(param.class)
|
214
223
|
end
|
data/lib/nokogiri/xml/text.rb
CHANGED
data/lib/nokogiri/xml/xpath.rb
CHANGED
data/lib/nokogiri/xml.rb
CHANGED
data/lib/nokogiri/xslt.rb
CHANGED
data/lib/nokogiri.rb
CHANGED
@@ -1,36 +1,15 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
# Modify the PATH on windows so that the external DLLs will get loaded.
|
3
4
|
|
4
5
|
require 'rbconfig'
|
5
6
|
|
6
7
|
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
7
|
-
|
8
|
-
# unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
|
9
|
-
#
|
10
|
-
# However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
|
11
|
-
# an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
|
12
|
-
# of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
|
13
|
-
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
14
|
-
# already set in the classpath.
|
15
|
-
unless $LOAD_PATH.to_s.include?("appengine-rack")
|
16
|
-
require 'stringio'
|
17
|
-
require 'isorelax.jar'
|
18
|
-
require 'jing.jar'
|
19
|
-
require 'nekohtml.jar'
|
20
|
-
require 'nekodtd.jar'
|
21
|
-
require 'xercesImpl.jar'
|
22
|
-
require 'serializer.jar'
|
23
|
-
require 'xalan.jar'
|
24
|
-
require 'xml-apis.jar'
|
25
|
-
end
|
8
|
+
require 'nokogiri/jruby/dependencies'
|
26
9
|
end
|
27
10
|
|
28
|
-
|
29
|
-
|
30
|
-
require "nokogiri/#{$1}/nokogiri"
|
31
|
-
rescue LoadError
|
32
|
-
require 'nokogiri/nokogiri'
|
33
|
-
end
|
11
|
+
require 'nokogiri/extension'
|
12
|
+
|
34
13
|
require 'nokogiri/version'
|
35
14
|
require 'nokogiri/syntax_error'
|
36
15
|
require 'nokogiri/xml'
|
@@ -54,7 +33,7 @@ require 'nokogiri/html/builder'
|
|
54
33
|
#
|
55
34
|
# # Get a Nokogiri::HTML:Document for the page we’re interested in...
|
56
35
|
#
|
57
|
-
# doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
|
36
|
+
# doc = Nokogiri::HTML(URI.open('http://www.google.com/search?q=tenderlove'))
|
58
37
|
#
|
59
38
|
# # Do funky things with it using Nokogiri::XML::Node methods...
|
60
39
|
#
|
@@ -130,7 +109,7 @@ module Nokogiri
|
|
130
109
|
end
|
131
110
|
|
132
111
|
###
|
133
|
-
#
|
112
|
+
# Parse a document contained in +args+. Nokogiri will try to guess what
|
134
113
|
# type of document you are attempting to parse. For more information, see
|
135
114
|
# Nokogiri.parse
|
136
115
|
#
|
File without changes
|
File without changes
|
@@ -0,0 +1,25 @@
|
|
1
|
+
From 0b6ae484761fa01242fe8b67b54e3eb2d282d83d Mon Sep 17 00:00:00 2001
|
2
|
+
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
+
Date: Wed, 4 Dec 2019 08:43:51 -0500
|
4
|
+
Subject: [PATCH] fix libxml2.la's path
|
5
|
+
|
6
|
+
---
|
7
|
+
Makefile.in | 2 +-
|
8
|
+
1 file changed, 1 insertion(+), 1 deletion(-)
|
9
|
+
|
10
|
+
diff --git a/Makefile.in b/Makefile.in
|
11
|
+
index cf96d41..1372d8b 100644
|
12
|
+
--- a/Makefile.in
|
13
|
+
+++ b/Makefile.in
|
14
|
+
@@ -1057,7 +1057,7 @@ clean-noinstLTLIBRARIES:
|
15
|
+
rm -f $${locs}; \
|
16
|
+
}
|
17
|
+
|
18
|
+
-libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
|
19
|
+
+$(top_builddir)/libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
|
20
|
+
$(AM_V_CCLD)$(libxml2_la_LINK) -rpath $(libdir) $(libxml2_la_OBJECTS) $(libxml2_la_LIBADD) $(LIBS)
|
21
|
+
|
22
|
+
testdso.la: $(testdso_la_OBJECTS) $(testdso_la_DEPENDENCIES) $(EXTRA_testdso_la_DEPENDENCIES)
|
23
|
+
--
|
24
|
+
2.17.1
|
25
|
+
|
@@ -0,0 +1,53 @@
|
|
1
|
+
From c94172d2a4451368530db2186190d70be8a1d9e5 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Ilya Zub <ilya@serpapi.com>
|
3
|
+
Date: Wed, 23 Dec 2020 12:45:29 +0200
|
4
|
+
Subject: Use glibc strlen to speed up xmlStrlen
|
5
|
+
MIME-Version: 1.0
|
6
|
+
Content-Type: text/plain; charset=UTF-8
|
7
|
+
Content-Transfer-Encoding: 8bit
|
8
|
+
|
9
|
+
xmlStrlen (entire HTML file): 926171.936981 μs
|
10
|
+
glibc_xmlStrlen (entire HTML file): 36905.903992 μs
|
11
|
+
delta (xmlStrlen ÷ glibc_xmlStrlen): 25.094584 times
|
12
|
+
|
13
|
+
xmlStrlen (average string): 57479.204010 μs
|
14
|
+
glibc_xmlStrlen (average string): 5802.069000 μs
|
15
|
+
delta (xmlStrlen ÷ glibc_xmlStrlen): 9.905937 times
|
16
|
+
|
17
|
+
xmlStrlen (bigger string): 388056.315979 μs
|
18
|
+
glibc_xmlStrlen (bigger string): 12797.856995 μs
|
19
|
+
delta (xmlStrlen ÷ glibc_xmlStrlen): 30.318382 times
|
20
|
+
|
21
|
+
xmlStrlen (smallest string): 15870.046021 μs
|
22
|
+
glibc_xmlStrlen (smallest string): 6282.208984 μs
|
23
|
+
delta (xmlStrlen ÷ glibc_xmlStrlen): 2.527903 times
|
24
|
+
|
25
|
+
See https://gitlab.gnome.org/GNOME/libxml2/-/issues/212 for reference.
|
26
|
+
---
|
27
|
+
xmlstring.c | 9 ++-------
|
28
|
+
1 file changed, 2 insertions(+), 7 deletions(-)
|
29
|
+
|
30
|
+
diff --git a/xmlstring.c b/xmlstring.c
|
31
|
+
index e8a1e45d..df247dff 100644
|
32
|
+
--- a/xmlstring.c
|
33
|
+
+++ b/xmlstring.c
|
34
|
+
@@ -423,14 +423,9 @@ xmlStrsub(const xmlChar *str, int start, int len) {
|
35
|
+
|
36
|
+
int
|
37
|
+
xmlStrlen(const xmlChar *str) {
|
38
|
+
- int len = 0;
|
39
|
+
-
|
40
|
+
if (str == NULL) return(0);
|
41
|
+
- while (*str != 0) { /* non input consuming */
|
42
|
+
- str++;
|
43
|
+
- len++;
|
44
|
+
- }
|
45
|
+
- return(len);
|
46
|
+
+
|
47
|
+
+ return strlen((const char*)str);
|
48
|
+
}
|
49
|
+
|
50
|
+
/**
|
51
|
+
--
|
52
|
+
2.29.2
|
53
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
This patch is a result of rake-compiler-dock using centos 7 (manylinux2014) to cross-compile.
|
2
|
+
|
3
|
+
Centos, for reasons I have not been able to discern, implements `isnan` and `isinf` as a function
|
4
|
+
and not as a macro. Debian knows how to resolve that function at dynamic-link time (despite using a
|
5
|
+
macro at compile time), but musl-based systems (like alpine) do not. Running `nm` on nokogiri.so
|
6
|
+
created on such a centos system shows:
|
7
|
+
|
8
|
+
```
|
9
|
+
U __isinf@@GLIBC_2.2.5
|
10
|
+
U __isnan@@GLIBC_2.2.5
|
11
|
+
```
|
12
|
+
|
13
|
+
(see https://github.com/sparklemotion/nokogiri/pull/2142 for more info)
|
14
|
+
|
15
|
+
This patch avoids using glibc's `isnan` and `isinf` calls, instead using libxml2's fallback
|
16
|
+
implementation. There's history here, see libxml2 commit 8813f39:
|
17
|
+
|
18
|
+
commit 8813f39
|
19
|
+
Author: Nick Wellnhofer <wellnhofer@aevum.de>
|
20
|
+
Date: 2017-09-21 00:11:26 +0200
|
21
|
+
|
22
|
+
Simplify XPath NaN, inf and -0 handling
|
23
|
+
|
24
|
+
Use C99 macros NAN, INFINITY, isnan, isinf. If they're not available:
|
25
|
+
|
26
|
+
- Assume that (0.0 / 0.0) generates a NaN and !(x == x) tests for NaN.
|
27
|
+
- Use C89's HUGE_VAL for INFINITY.
|
28
|
+
|
29
|
+
Remove manual handling of NaN, infinity and negative zero in functions
|
30
|
+
xmlXPathValueFlipSign and xmlXPathDivValues.
|
31
|
+
|
32
|
+
Remove xmlXPathGetSign. All the tests for negative zero can be replaced
|
33
|
+
with a test for negative or positive zero.
|
34
|
+
|
35
|
+
Simplify xmlXPathRoundFunction.
|
36
|
+
|
37
|
+
Remove Trio dependency.
|
38
|
+
|
39
|
+
This should work on IEEE 754 compliant implementations even if the C99
|
40
|
+
macros aren't available, but will likely break some ancient platforms.
|
41
|
+
If problems arise, my plan is to port the relevant trionan.c solution
|
42
|
+
to xpath.c. Note that non-compliant implementations are impossible
|
43
|
+
to fully support, anyway, since XPath requires IEEE 754.
|
44
|
+
|
45
|
+
This patch would be unnecessary if any of the following was true:
|
46
|
+
|
47
|
+
* centos implements these as macros, and doesn't generate an unresolved symbol for either in the shared library
|
48
|
+
* we had a way to ensure `__isinf` and `__isnan` resolve on musl (e.g., we implement them locally)
|
49
|
+
|
50
|
+
diff --git a/xpath.c b/xpath.c
|
51
|
+
index 9f64ab9..5b6d999 100644
|
52
|
+
--- a/xpath.c
|
53
|
+
+++ b/xpath.c
|
54
|
+
@@ -515,11 +515,7 @@ xmlXPathInit(void) {
|
55
|
+
*/
|
56
|
+
int
|
57
|
+
xmlXPathIsNaN(double val) {
|
58
|
+
-#ifdef isnan
|
59
|
+
- return isnan(val);
|
60
|
+
-#else
|
61
|
+
return !(val == val);
|
62
|
+
-#endif
|
63
|
+
}
|
64
|
+
|
65
|
+
/**
|
66
|
+
@@ -530,15 +530,11 @@ xmlXPathIsNaN(double val) {
|
67
|
+
*/
|
68
|
+
int
|
69
|
+
xmlXPathIsInf(double val) {
|
70
|
+
-#ifdef isinf
|
71
|
+
- return isinf(val) ? (val > 0 ? 1 : -1) : 0;
|
72
|
+
-#else
|
73
|
+
if (val >= xmlXPathPINF)
|
74
|
+
return 1;
|
75
|
+
if (val <= -xmlXPathPINF)
|
76
|
+
return -1;
|
77
|
+
return 0;
|
78
|
+
-#endif
|
79
|
+
}
|
80
|
+
|
81
|
+
#endif /* SCHEMAS or XPATH */
|