nokogiri 1.10.3 → 1.11.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +173 -94
  6. data/dependencies.yml +28 -26
  7. data/ext/nokogiri/depend +37 -358
  8. data/ext/nokogiri/extconf.rb +611 -391
  9. data/ext/nokogiri/html_document.c +78 -82
  10. data/ext/nokogiri/html_element_description.c +84 -71
  11. data/ext/nokogiri/html_entity_lookup.c +21 -16
  12. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  13. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  14. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  15. data/ext/nokogiri/nokogiri.c +192 -87
  16. data/ext/nokogiri/nokogiri.h +181 -89
  17. data/ext/nokogiri/test_global_handlers.c +40 -0
  18. data/ext/nokogiri/xml_attr.c +15 -15
  19. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  20. data/ext/nokogiri/xml_cdata.c +13 -18
  21. data/ext/nokogiri/xml_comment.c +19 -26
  22. data/ext/nokogiri/xml_document.c +255 -183
  23. data/ext/nokogiri/xml_document_fragment.c +13 -15
  24. data/ext/nokogiri/xml_dtd.c +54 -48
  25. data/ext/nokogiri/xml_element_content.c +30 -27
  26. data/ext/nokogiri/xml_element_decl.c +22 -22
  27. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  28. data/ext/nokogiri/xml_entity_decl.c +32 -30
  29. data/ext/nokogiri/xml_entity_reference.c +16 -18
  30. data/ext/nokogiri/xml_namespace.c +56 -49
  31. data/ext/nokogiri/xml_node.c +387 -316
  32. data/ext/nokogiri/xml_node_set.c +168 -156
  33. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  34. data/ext/nokogiri/xml_reader.c +195 -172
  35. data/ext/nokogiri/xml_relax_ng.c +52 -28
  36. data/ext/nokogiri/xml_sax_parser.c +118 -118
  37. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  38. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  39. data/ext/nokogiri/xml_schema.c +111 -34
  40. data/ext/nokogiri/xml_syntax_error.c +42 -21
  41. data/ext/nokogiri/xml_text.c +13 -17
  42. data/ext/nokogiri/xml_xpath_context.c +206 -123
  43. data/ext/nokogiri/xslt_stylesheet.c +158 -165
  44. data/lib/nokogiri/css/node.rb +1 -0
  45. data/lib/nokogiri/css/parser.rb +63 -62
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +39 -36
  48. data/lib/nokogiri/css/syntax_error.rb +1 -0
  49. data/lib/nokogiri/css/tokenizer.rb +105 -103
  50. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  51. data/lib/nokogiri/css.rb +1 -0
  52. data/lib/nokogiri/decorators/slop.rb +1 -0
  53. data/lib/nokogiri/extension.rb +26 -0
  54. data/lib/nokogiri/html/builder.rb +1 -0
  55. data/lib/nokogiri/html/document.rb +13 -26
  56. data/lib/nokogiri/html/document_fragment.rb +16 -15
  57. data/lib/nokogiri/html/element_description.rb +1 -0
  58. data/lib/nokogiri/html/element_description_defaults.rb +1 -0
  59. data/lib/nokogiri/html/entity_lookup.rb +1 -0
  60. data/lib/nokogiri/html/sax/parser.rb +1 -0
  61. data/lib/nokogiri/html/sax/parser_context.rb +1 -0
  62. data/lib/nokogiri/html/sax/push_parser.rb +1 -0
  63. data/lib/nokogiri/html.rb +1 -0
  64. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  65. data/lib/nokogiri/syntax_error.rb +1 -0
  66. data/lib/nokogiri/version/constant.rb +5 -0
  67. data/lib/nokogiri/version/info.rb +205 -0
  68. data/lib/nokogiri/version.rb +3 -109
  69. data/lib/nokogiri/xml/attr.rb +1 -0
  70. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  71. data/lib/nokogiri/xml/builder.rb +36 -32
  72. data/lib/nokogiri/xml/cdata.rb +1 -0
  73. data/lib/nokogiri/xml/character_data.rb +1 -0
  74. data/lib/nokogiri/xml/document.rb +92 -41
  75. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  76. data/lib/nokogiri/xml/dtd.rb +1 -0
  77. data/lib/nokogiri/xml/element_content.rb +1 -0
  78. data/lib/nokogiri/xml/element_decl.rb +1 -0
  79. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  80. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  81. data/lib/nokogiri/xml/namespace.rb +1 -0
  82. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  83. data/lib/nokogiri/xml/node.rb +625 -290
  84. data/lib/nokogiri/xml/node_set.rb +1 -0
  85. data/lib/nokogiri/xml/notation.rb +1 -0
  86. data/lib/nokogiri/xml/parse_options.rb +10 -3
  87. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  88. data/lib/nokogiri/xml/pp/node.rb +1 -0
  89. data/lib/nokogiri/xml/pp.rb +1 -0
  90. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  91. data/lib/nokogiri/xml/reader.rb +9 -12
  92. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  93. data/lib/nokogiri/xml/sax/document.rb +1 -0
  94. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  95. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  96. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  97. data/lib/nokogiri/xml/sax.rb +1 -0
  98. data/lib/nokogiri/xml/schema.rb +13 -4
  99. data/lib/nokogiri/xml/searchable.rb +25 -16
  100. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  101. data/lib/nokogiri/xml/text.rb +1 -0
  102. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  103. data/lib/nokogiri/xml/xpath.rb +2 -3
  104. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  105. data/lib/nokogiri/xml.rb +1 -0
  106. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  107. data/lib/nokogiri/xslt.rb +1 -0
  108. data/lib/nokogiri.rb +6 -27
  109. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  110. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  111. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  112. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  113. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  114. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  115. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  116. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  117. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  118. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  119. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  120. metadata +102 -147
  121. data/ext/nokogiri/html_document.h +0 -10
  122. data/ext/nokogiri/html_element_description.h +0 -10
  123. data/ext/nokogiri/html_entity_lookup.h +0 -8
  124. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  125. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  126. data/ext/nokogiri/xml_attr.h +0 -9
  127. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  128. data/ext/nokogiri/xml_cdata.h +0 -9
  129. data/ext/nokogiri/xml_comment.h +0 -9
  130. data/ext/nokogiri/xml_document.h +0 -23
  131. data/ext/nokogiri/xml_document_fragment.h +0 -10
  132. data/ext/nokogiri/xml_dtd.h +0 -10
  133. data/ext/nokogiri/xml_element_content.h +0 -10
  134. data/ext/nokogiri/xml_element_decl.h +0 -9
  135. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  136. data/ext/nokogiri/xml_entity_decl.h +0 -10
  137. data/ext/nokogiri/xml_entity_reference.h +0 -9
  138. data/ext/nokogiri/xml_io.c +0 -61
  139. data/ext/nokogiri/xml_io.h +0 -11
  140. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  141. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  142. data/ext/nokogiri/xml_namespace.h +0 -14
  143. data/ext/nokogiri/xml_node.h +0 -13
  144. data/ext/nokogiri/xml_node_set.h +0 -12
  145. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  146. data/ext/nokogiri/xml_reader.h +0 -10
  147. data/ext/nokogiri/xml_relax_ng.h +0 -9
  148. data/ext/nokogiri/xml_sax_parser.h +0 -39
  149. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  150. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  151. data/ext/nokogiri/xml_schema.h +0 -9
  152. data/ext/nokogiri/xml_syntax_error.h +0 -13
  153. data/ext/nokogiri/xml_text.h +0 -9
  154. data/ext/nokogiri/xml_xpath_context.h +0 -10
  155. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  156. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  157. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
  158. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  159. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ####
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Notation < Struct.new(:name, :public_id, :system_id)
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -5,11 +6,11 @@ module Nokogiri
5
6
  #
6
7
  # == Building combinations of parse options
7
8
  # You can build your own combinations of these parse options by using any of the following methods:
8
- # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options. All examples use Ruby 2 optional parameter syntax.
9
+ # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
9
10
  # [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
10
- # Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
11
+ # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
11
12
  # [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
12
- # Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new.recover.noent)
13
+ # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
13
14
  # [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
14
15
  # Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
15
16
  #
@@ -72,6 +73,8 @@ module Nokogiri
72
73
  DEFAULT_XML = RECOVER | NONET
73
74
  # the default options used for parsing HTML documents
74
75
  DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
76
+ # the default options used for parsing XML schemas
77
+ DEFAULT_SCHEMA = NONET
75
78
 
76
79
  attr_accessor :options
77
80
  def initialize options = STRICT
@@ -106,6 +109,10 @@ module Nokogiri
106
109
  @options & RECOVER == STRICT
107
110
  end
108
111
 
112
+ def ==(other)
113
+ other.to_i == to_i
114
+ end
115
+
109
116
  alias :to_i :options
110
117
 
111
118
  def inspect
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  module PP
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  module PP
@@ -1,2 +1,3 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri/xml/pp/node'
2
3
  require 'nokogiri/xml/pp/character_data'
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class ProcessingInstruction < Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -85,19 +86,15 @@ module Nokogiri
85
86
  private :initialize
86
87
 
87
88
  ###
88
- # Get a list of attributes for the current node.
89
+ # Get the attributes of the current node as a Hash
90
+ # @return [Hash<String, String>] Attribute names and values
89
91
  def attributes
90
- Hash[attribute_nodes.map { |node|
91
- [node.name, node.to_s]
92
- }].merge(namespaces || {})
93
- end
94
-
95
- ###
96
- # Get a list of attributes for the current node
97
- def attribute_nodes
98
- nodes = attr_nodes
99
- nodes.each { |v| v.instance_variable_set(:@_r, self) }
100
- nodes
92
+ attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
93
+ hash[node.name] = node.to_s
94
+ end
95
+ ns = namespaces
96
+ attrs_hash.merge!(ns) if ns
97
+ attrs_hash
101
98
  end
102
99
 
103
100
  ###
@@ -1,11 +1,12 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class << self
4
5
  ###
5
6
  # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
6
7
  # See Nokogiri::XML::RelaxNG for an example.
7
- def RelaxNG string_or_io
8
- RelaxNG.new(string_or_io)
8
+ def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
9
+ RelaxNG.new(string_or_io, options)
9
10
  end
10
11
  end
11
12
 
@@ -26,6 +27,10 @@ module Nokogiri
26
27
  # end
27
28
  #
28
29
  # The list of errors are Nokogiri::XML::SyntaxError objects.
30
+ #
31
+ # NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
32
+ # underlying parsing libraries to access network resources. This is counter to Nokogiri's
33
+ # "untrusted by default" security policy, but is a limitation of the underlying libraries.
29
34
  class RelaxNG < Nokogiri::XML::Schema
30
35
  end
31
36
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  module SAX
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  module SAX
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  module SAX
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri/xml/sax/document'
2
3
  require 'nokogiri/xml/sax/parser_context'
3
4
  require 'nokogiri/xml/sax/parser'
@@ -1,11 +1,12 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class << self
4
5
  ###
5
6
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
6
7
  # object.
7
- def Schema string_or_io
8
- Schema.new(string_or_io)
8
+ def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
9
+ Schema.new(string_or_io, options)
9
10
  end
10
11
  end
11
12
 
@@ -26,15 +27,23 @@ module Nokogiri
26
27
  # end
27
28
  #
28
29
  # The list of errors are Nokogiri::XML::SyntaxError objects.
30
+ #
31
+ # NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
32
+ # are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
33
+ # documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
34
+ # security policy. If a document is trusted, then the caller may turn off the NONET option via
35
+ # the ParseOptions to re-enable external entity resolution over a network connection.
29
36
  class Schema
30
37
  # Errors while parsing the schema file
31
38
  attr_accessor :errors
39
+ # The Nokogiri::XML::ParseOptions used to parse the schema
40
+ attr_accessor :parse_options
32
41
 
33
42
  ###
34
43
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
35
44
  # object.
36
- def self.new string_or_io
37
- from_document Nokogiri::XML(string_or_io)
45
+ def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
46
+ from_document(Nokogiri::XML(string_or_io), options)
38
47
  end
39
48
 
40
49
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  #
@@ -11,7 +12,9 @@ module Nokogiri
11
12
  # Regular expression used by Searchable#search to determine if a query
12
13
  # string is CSS or XPath
13
14
  LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
14
-
15
+
16
+ # @!group Searching via XPath or CSS Queries
17
+
15
18
  ###
16
19
  # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
17
20
  #
@@ -45,7 +48,7 @@ module Nokogiri
45
48
  # )
46
49
  #
47
50
  # See Searchable#xpath and Searchable#css for further usage help.
48
- def search *args
51
+ def search(*args)
49
52
  paths, handler, ns, binds = extract_params(args)
50
53
 
51
54
  xpaths = paths.map(&:to_s).map do |path|
@@ -54,6 +57,7 @@ module Nokogiri
54
57
 
55
58
  xpath(*(xpaths + [ns, handler, binds].compact))
56
59
  end
60
+
57
61
  alias :/ :search
58
62
 
59
63
  ###
@@ -63,9 +67,10 @@ module Nokogiri
63
67
  # result. +paths+ must be one or more XPath or CSS queries.
64
68
  #
65
69
  # See Searchable#search for more information.
66
- def at *args
70
+ def at(*args)
67
71
  search(*args).first
68
72
  end
73
+
69
74
  alias :% :at
70
75
 
71
76
  ###
@@ -101,7 +106,7 @@ module Nokogiri
101
106
  # found in an XML document, where tags names are case-sensitive
102
107
  # (e.g., "H1" is distinct from "h1").
103
108
  #
104
- def css *args
109
+ def css(*args)
105
110
  rules, handler, ns, _ = extract_params(args)
106
111
 
107
112
  css_internal self, rules, handler, ns
@@ -114,7 +119,7 @@ module Nokogiri
114
119
  # match. +rules+ must be one or more CSS selectors.
115
120
  #
116
121
  # See Searchable#css for more information.
117
- def at_css *args
122
+ def at_css(*args)
118
123
  css(*args).first
119
124
  end
120
125
 
@@ -148,7 +153,7 @@ module Nokogiri
148
153
  # end
149
154
  # }.new)
150
155
  #
151
- def xpath *args
156
+ def xpath(*args)
152
157
  paths, handler, ns, binds = extract_params(args)
153
158
 
154
159
  xpath_internal self, paths, handler, ns, binds
@@ -161,17 +166,19 @@ module Nokogiri
161
166
  # match. +paths+ must be one or more XPath queries.
162
167
  #
163
168
  # See Searchable#xpath for more information.
164
- def at_xpath *args
169
+ def at_xpath(*args)
165
170
  xpath(*args).first
166
171
  end
167
172
 
173
+ # @!endgroup
174
+
168
175
  private
169
176
 
170
- def css_internal node, rules, handler, ns
177
+ def css_internal(node, rules, handler, ns)
171
178
  xpath_internal node, css_rules_to_xpath(rules, ns), handler, ns, nil
172
179
  end
173
180
 
174
- def xpath_internal node, paths, handler, ns, binds
181
+ def xpath_internal(node, paths, handler, ns, binds)
175
182
  document = node.document
176
183
  return NodeSet.new(document) unless document
177
184
 
@@ -186,12 +193,12 @@ module Nokogiri
186
193
  end
187
194
  end
188
195
 
189
- def xpath_impl node, path, handler, ns, binds
196
+ def xpath_impl(node, path, handler, ns, binds)
190
197
  ctx = XPathContext.new(node)
191
198
  ctx.register_namespaces(ns)
192
- path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
199
+ path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
193
200
 
194
- binds.each do |key,value|
201
+ binds.each do |key, value|
195
202
  ctx.register_variable key.to_s, value
196
203
  end if binds
197
204
 
@@ -202,13 +209,15 @@ module Nokogiri
202
209
  rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
203
210
  end
204
211
 
205
- def xpath_query_from_css_rule rule, ns
212
+ def xpath_query_from_css_rule(rule, ns)
213
+ visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
206
214
  self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
207
- CSS.xpath_for(rule.to_s, :prefix => implied_xpath_context, :ns => ns)
208
- end.join(' | ')
215
+ CSS.xpath_for(rule.to_s, {:prefix => implied_xpath_context, :ns => ns,
216
+ :visitor => visitor})
217
+ end.join(" | ")
209
218
  end
210
219
 
211
- def extract_params params # :nodoc:
220
+ def extract_params(params) # :nodoc:
212
221
  handler = params.find do |param|
213
222
  ![Hash, String, Symbol].include?(param.class)
214
223
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Text < Nokogiri::XML::CharacterData
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
- class XPath
4
+ module XPath
4
5
  class SyntaxError < XML::SyntaxError
5
6
  def to_s
6
7
  [super.chomp, str1].compact.join(': ')
@@ -1,10 +1,9 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri/xml/xpath/syntax_error'
2
3
 
3
4
  module Nokogiri
4
5
  module XML
5
- class XPath
6
- # The Nokogiri::XML::Document tied to this XPath instance
7
- attr_accessor :document
6
+ module XPath
8
7
  end
9
8
  end
10
9
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class XPathContext
data/lib/nokogiri/xml.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri/xml/pp'
2
3
  require 'nokogiri/xml/parse_options'
3
4
  require 'nokogiri/xml/sax'
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XSLT
3
4
  ###
data/lib/nokogiri/xslt.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri/xslt/stylesheet'
2
3
 
3
4
  module Nokogiri
data/lib/nokogiri.rb CHANGED
@@ -1,36 +1,15 @@
1
1
  # -*- coding: utf-8 -*-
2
+ # frozen_string_literal: true
2
3
  # Modify the PATH on windows so that the external DLLs will get loaded.
3
4
 
4
5
  require 'rbconfig'
5
6
 
6
7
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
7
- # The line below caused a problem on non-GAE rack environment.
8
- # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
9
- #
10
- # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
11
- # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
12
- # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
13
- # should skip loading xml jars. This is because those are in WEB-INF/lib and
14
- # already set in the classpath.
15
- unless $LOAD_PATH.to_s.include?("appengine-rack")
16
- require 'stringio'
17
- require 'isorelax.jar'
18
- require 'jing.jar'
19
- require 'nekohtml.jar'
20
- require 'nekodtd.jar'
21
- require 'xercesImpl.jar'
22
- require 'serializer.jar'
23
- require 'xalan.jar'
24
- require 'xml-apis.jar'
25
- end
8
+ require 'nokogiri/jruby/dependencies'
26
9
  end
27
10
 
28
- begin
29
- RUBY_VERSION =~ /(\d+\.\d+)/
30
- require "nokogiri/#{$1}/nokogiri"
31
- rescue LoadError
32
- require 'nokogiri/nokogiri'
33
- end
11
+ require 'nokogiri/extension'
12
+
34
13
  require 'nokogiri/version'
35
14
  require 'nokogiri/syntax_error'
36
15
  require 'nokogiri/xml'
@@ -54,7 +33,7 @@ require 'nokogiri/html/builder'
54
33
  #
55
34
  # # Get a Nokogiri::HTML:Document for the page we’re interested in...
56
35
  #
57
- # doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
36
+ # doc = Nokogiri::HTML(URI.open('http://www.google.com/search?q=tenderlove'))
58
37
  #
59
38
  # # Do funky things with it using Nokogiri::XML::Node methods...
60
39
  #
@@ -130,7 +109,7 @@ module Nokogiri
130
109
  end
131
110
 
132
111
  ###
133
- # Parser a document contained in +args+. Nokogiri will try to guess what
112
+ # Parse a document contained in +args+. Nokogiri will try to guess what
134
113
  # type of document you are attempting to parse. For more information, see
135
114
  # Nokogiri.parse
136
115
  #
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri'
2
3
 
3
4
  module XSD # :nodoc:
@@ -0,0 +1,25 @@
1
+ From 0b6ae484761fa01242fe8b67b54e3eb2d282d83d Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Wed, 4 Dec 2019 08:43:51 -0500
4
+ Subject: [PATCH] fix libxml2.la's path
5
+
6
+ ---
7
+ Makefile.in | 2 +-
8
+ 1 file changed, 1 insertion(+), 1 deletion(-)
9
+
10
+ diff --git a/Makefile.in b/Makefile.in
11
+ index cf96d41..1372d8b 100644
12
+ --- a/Makefile.in
13
+ +++ b/Makefile.in
14
+ @@ -1057,7 +1057,7 @@ clean-noinstLTLIBRARIES:
15
+ rm -f $${locs}; \
16
+ }
17
+
18
+ -libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
19
+ +$(top_builddir)/libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
20
+ $(AM_V_CCLD)$(libxml2_la_LINK) -rpath $(libdir) $(libxml2_la_OBJECTS) $(libxml2_la_LIBADD) $(LIBS)
21
+
22
+ testdso.la: $(testdso_la_OBJECTS) $(testdso_la_DEPENDENCIES) $(EXTRA_testdso_la_DEPENDENCIES)
23
+ --
24
+ 2.17.1
25
+
@@ -0,0 +1,53 @@
1
+ From c94172d2a4451368530db2186190d70be8a1d9e5 Mon Sep 17 00:00:00 2001
2
+ From: Ilya Zub <ilya@serpapi.com>
3
+ Date: Wed, 23 Dec 2020 12:45:29 +0200
4
+ Subject: Use glibc strlen to speed up xmlStrlen
5
+ MIME-Version: 1.0
6
+ Content-Type: text/plain; charset=UTF-8
7
+ Content-Transfer-Encoding: 8bit
8
+
9
+ xmlStrlen (entire HTML file): 926171.936981 μs
10
+ glibc_xmlStrlen (entire HTML file): 36905.903992 μs
11
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 25.094584 times
12
+
13
+ xmlStrlen (average string): 57479.204010 μs
14
+ glibc_xmlStrlen (average string): 5802.069000 μs
15
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 9.905937 times
16
+
17
+ xmlStrlen (bigger string): 388056.315979 μs
18
+ glibc_xmlStrlen (bigger string): 12797.856995 μs
19
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 30.318382 times
20
+
21
+ xmlStrlen (smallest string): 15870.046021 μs
22
+ glibc_xmlStrlen (smallest string): 6282.208984 μs
23
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 2.527903 times
24
+
25
+ See https://gitlab.gnome.org/GNOME/libxml2/-/issues/212 for reference.
26
+ ---
27
+ xmlstring.c | 9 ++-------
28
+ 1 file changed, 2 insertions(+), 7 deletions(-)
29
+
30
+ diff --git a/xmlstring.c b/xmlstring.c
31
+ index e8a1e45d..df247dff 100644
32
+ --- a/xmlstring.c
33
+ +++ b/xmlstring.c
34
+ @@ -423,14 +423,9 @@ xmlStrsub(const xmlChar *str, int start, int len) {
35
+
36
+ int
37
+ xmlStrlen(const xmlChar *str) {
38
+ - int len = 0;
39
+ -
40
+ if (str == NULL) return(0);
41
+ - while (*str != 0) { /* non input consuming */
42
+ - str++;
43
+ - len++;
44
+ - }
45
+ - return(len);
46
+ +
47
+ + return strlen((const char*)str);
48
+ }
49
+
50
+ /**
51
+ --
52
+ 2.29.2
53
+
@@ -0,0 +1,81 @@
1
+ This patch is a result of rake-compiler-dock using centos 7 (manylinux2014) to cross-compile.
2
+
3
+ Centos, for reasons I have not been able to discern, implements `isnan` and `isinf` as a function
4
+ and not as a macro. Debian knows how to resolve that function at dynamic-link time (despite using a
5
+ macro at compile time), but musl-based systems (like alpine) do not. Running `nm` on nokogiri.so
6
+ created on such a centos system shows:
7
+
8
+ ```
9
+ U __isinf@@GLIBC_2.2.5
10
+ U __isnan@@GLIBC_2.2.5
11
+ ```
12
+
13
+ (see https://github.com/sparklemotion/nokogiri/pull/2142 for more info)
14
+
15
+ This patch avoids using glibc's `isnan` and `isinf` calls, instead using libxml2's fallback
16
+ implementation. There's history here, see libxml2 commit 8813f39:
17
+
18
+ commit 8813f39
19
+ Author: Nick Wellnhofer <wellnhofer@aevum.de>
20
+ Date: 2017-09-21 00:11:26 +0200
21
+
22
+ Simplify XPath NaN, inf and -0 handling
23
+
24
+ Use C99 macros NAN, INFINITY, isnan, isinf. If they're not available:
25
+
26
+ - Assume that (0.0 / 0.0) generates a NaN and !(x == x) tests for NaN.
27
+ - Use C89's HUGE_VAL for INFINITY.
28
+
29
+ Remove manual handling of NaN, infinity and negative zero in functions
30
+ xmlXPathValueFlipSign and xmlXPathDivValues.
31
+
32
+ Remove xmlXPathGetSign. All the tests for negative zero can be replaced
33
+ with a test for negative or positive zero.
34
+
35
+ Simplify xmlXPathRoundFunction.
36
+
37
+ Remove Trio dependency.
38
+
39
+ This should work on IEEE 754 compliant implementations even if the C99
40
+ macros aren't available, but will likely break some ancient platforms.
41
+ If problems arise, my plan is to port the relevant trionan.c solution
42
+ to xpath.c. Note that non-compliant implementations are impossible
43
+ to fully support, anyway, since XPath requires IEEE 754.
44
+
45
+ This patch would be unnecessary if any of the following was true:
46
+
47
+ * centos implements these as macros, and doesn't generate an unresolved symbol for either in the shared library
48
+ * we had a way to ensure `__isinf` and `__isnan` resolve on musl (e.g., we implement them locally)
49
+
50
+ diff --git a/xpath.c b/xpath.c
51
+ index 9f64ab9..5b6d999 100644
52
+ --- a/xpath.c
53
+ +++ b/xpath.c
54
+ @@ -515,11 +515,7 @@ xmlXPathInit(void) {
55
+ */
56
+ int
57
+ xmlXPathIsNaN(double val) {
58
+ -#ifdef isnan
59
+ - return isnan(val);
60
+ -#else
61
+ return !(val == val);
62
+ -#endif
63
+ }
64
+
65
+ /**
66
+ @@ -530,15 +530,11 @@ xmlXPathIsNaN(double val) {
67
+ */
68
+ int
69
+ xmlXPathIsInf(double val) {
70
+ -#ifdef isinf
71
+ - return isinf(val) ? (val > 0 ? 1 : -1) : 0;
72
+ -#else
73
+ if (val >= xmlXPathPINF)
74
+ return 1;
75
+ if (val <= -xmlXPathPINF)
76
+ return -1;
77
+ return 0;
78
+ -#endif
79
+ }
80
+
81
+ #endif /* SCHEMAS or XPATH */