nokogiri 1.10.10 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +176 -96
  6. data/dependencies.yml +12 -12
  7. data/ext/nokogiri/depend +38 -358
  8. data/ext/nokogiri/extconf.rb +716 -414
  9. data/ext/nokogiri/gumbo.c +584 -0
  10. data/ext/nokogiri/html4_document.c +166 -0
  11. data/ext/nokogiri/html4_element_description.c +294 -0
  12. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  13. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  14. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  15. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  16. data/ext/nokogiri/nokogiri.c +228 -91
  17. data/ext/nokogiri/nokogiri.h +191 -89
  18. data/ext/nokogiri/test_global_handlers.c +40 -0
  19. data/ext/nokogiri/xml_attr.c +15 -15
  20. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  21. data/ext/nokogiri/xml_cdata.c +13 -18
  22. data/ext/nokogiri/xml_comment.c +19 -26
  23. data/ext/nokogiri/xml_document.c +267 -195
  24. data/ext/nokogiri/xml_document_fragment.c +13 -15
  25. data/ext/nokogiri/xml_dtd.c +54 -48
  26. data/ext/nokogiri/xml_element_content.c +31 -26
  27. data/ext/nokogiri/xml_element_decl.c +22 -22
  28. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  29. data/ext/nokogiri/xml_entity_decl.c +32 -30
  30. data/ext/nokogiri/xml_entity_reference.c +16 -18
  31. data/ext/nokogiri/xml_namespace.c +60 -51
  32. data/ext/nokogiri/xml_node.c +493 -407
  33. data/ext/nokogiri/xml_node_set.c +174 -162
  34. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  35. data/ext/nokogiri/xml_reader.c +197 -172
  36. data/ext/nokogiri/xml_relax_ng.c +52 -28
  37. data/ext/nokogiri/xml_sax_parser.c +112 -112
  38. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  39. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  40. data/ext/nokogiri/xml_schema.c +96 -46
  41. data/ext/nokogiri/xml_syntax_error.c +42 -21
  42. data/ext/nokogiri/xml_text.c +13 -17
  43. data/ext/nokogiri/xml_xpath_context.c +158 -73
  44. data/ext/nokogiri/xslt_stylesheet.c +158 -164
  45. data/gumbo-parser/CHANGES.md +63 -0
  46. data/gumbo-parser/Makefile +101 -0
  47. data/gumbo-parser/THANKS +27 -0
  48. data/gumbo-parser/src/Makefile +34 -0
  49. data/gumbo-parser/src/README.md +41 -0
  50. data/gumbo-parser/src/ascii.c +75 -0
  51. data/gumbo-parser/src/ascii.h +115 -0
  52. data/gumbo-parser/src/attribute.c +42 -0
  53. data/gumbo-parser/src/attribute.h +17 -0
  54. data/gumbo-parser/src/char_ref.c +22225 -0
  55. data/gumbo-parser/src/char_ref.h +29 -0
  56. data/gumbo-parser/src/char_ref.rl +2154 -0
  57. data/gumbo-parser/src/error.c +626 -0
  58. data/gumbo-parser/src/error.h +148 -0
  59. data/gumbo-parser/src/foreign_attrs.c +104 -0
  60. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  61. data/gumbo-parser/src/gumbo.h +943 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/parser.c +4886 -0
  65. data/gumbo-parser/src/parser.h +41 -0
  66. data/gumbo-parser/src/replacement.h +33 -0
  67. data/gumbo-parser/src/string_buffer.c +103 -0
  68. data/gumbo-parser/src/string_buffer.h +68 -0
  69. data/gumbo-parser/src/string_piece.c +48 -0
  70. data/gumbo-parser/src/svg_attrs.c +174 -0
  71. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  72. data/gumbo-parser/src/svg_tags.c +137 -0
  73. data/gumbo-parser/src/svg_tags.gperf +55 -0
  74. data/gumbo-parser/src/tag.c +222 -0
  75. data/gumbo-parser/src/tag_lookup.c +382 -0
  76. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  77. data/gumbo-parser/src/tag_lookup.h +13 -0
  78. data/gumbo-parser/src/token_buffer.c +79 -0
  79. data/gumbo-parser/src/token_buffer.h +71 -0
  80. data/gumbo-parser/src/token_type.h +17 -0
  81. data/gumbo-parser/src/tokenizer.c +3463 -0
  82. data/gumbo-parser/src/tokenizer.h +112 -0
  83. data/gumbo-parser/src/tokenizer_states.h +339 -0
  84. data/gumbo-parser/src/utf8.c +245 -0
  85. data/gumbo-parser/src/utf8.h +164 -0
  86. data/gumbo-parser/src/util.c +68 -0
  87. data/gumbo-parser/src/util.h +30 -0
  88. data/gumbo-parser/src/vector.c +111 -0
  89. data/gumbo-parser/src/vector.h +45 -0
  90. data/lib/nokogiri/css/node.rb +1 -0
  91. data/lib/nokogiri/css/parser.rb +64 -63
  92. data/lib/nokogiri/css/parser.y +3 -3
  93. data/lib/nokogiri/css/parser_extras.rb +39 -36
  94. data/lib/nokogiri/css/syntax_error.rb +2 -1
  95. data/lib/nokogiri/css/tokenizer.rb +1 -0
  96. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  97. data/lib/nokogiri/css.rb +15 -14
  98. data/lib/nokogiri/decorators/slop.rb +1 -0
  99. data/lib/nokogiri/extension.rb +31 -0
  100. data/lib/nokogiri/gumbo.rb +14 -0
  101. data/lib/nokogiri/html.rb +32 -27
  102. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  103. data/lib/nokogiri/{html → html4}/document.rb +17 -30
  104. data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
  105. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  106. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  107. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
  109. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  110. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
  111. data/lib/nokogiri/html4.rb +40 -0
  112. data/lib/nokogiri/html5/document.rb +74 -0
  113. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  114. data/lib/nokogiri/html5/node.rb +93 -0
  115. data/lib/nokogiri/html5.rb +473 -0
  116. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  117. data/lib/nokogiri/syntax_error.rb +1 -0
  118. data/lib/nokogiri/version/constant.rb +5 -0
  119. data/lib/nokogiri/version/info.rb +215 -0
  120. data/lib/nokogiri/version.rb +3 -109
  121. data/lib/nokogiri/xml/attr.rb +1 -0
  122. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  123. data/lib/nokogiri/xml/builder.rb +41 -2
  124. data/lib/nokogiri/xml/cdata.rb +1 -0
  125. data/lib/nokogiri/xml/character_data.rb +1 -0
  126. data/lib/nokogiri/xml/document.rb +138 -41
  127. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  128. data/lib/nokogiri/xml/dtd.rb +1 -0
  129. data/lib/nokogiri/xml/element_content.rb +1 -0
  130. data/lib/nokogiri/xml/element_decl.rb +1 -0
  131. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  132. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  133. data/lib/nokogiri/xml/namespace.rb +1 -0
  134. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  135. data/lib/nokogiri/xml/node.rb +629 -293
  136. data/lib/nokogiri/xml/node_set.rb +1 -0
  137. data/lib/nokogiri/xml/notation.rb +1 -0
  138. data/lib/nokogiri/xml/parse_options.rb +12 -3
  139. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  140. data/lib/nokogiri/xml/pp/node.rb +1 -0
  141. data/lib/nokogiri/xml/pp.rb +3 -2
  142. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  143. data/lib/nokogiri/xml/reader.rb +9 -12
  144. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  145. data/lib/nokogiri/xml/sax/document.rb +25 -30
  146. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  147. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  148. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  149. data/lib/nokogiri/xml/sax.rb +5 -4
  150. data/lib/nokogiri/xml/schema.rb +13 -4
  151. data/lib/nokogiri/xml/searchable.rb +25 -16
  152. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  153. data/lib/nokogiri/xml/text.rb +1 -0
  154. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  155. data/lib/nokogiri/xml/xpath.rb +4 -5
  156. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  157. data/lib/nokogiri/xml.rb +36 -36
  158. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  159. data/lib/nokogiri/xslt.rb +17 -16
  160. data/lib/nokogiri.rb +32 -51
  161. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  162. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  163. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  164. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  165. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  166. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  167. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  168. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  169. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  171. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  172. metadata +139 -161
  173. data/ext/nokogiri/html_document.c +0 -170
  174. data/ext/nokogiri/html_document.h +0 -10
  175. data/ext/nokogiri/html_element_description.c +0 -279
  176. data/ext/nokogiri/html_element_description.h +0 -10
  177. data/ext/nokogiri/html_entity_lookup.c +0 -32
  178. data/ext/nokogiri/html_entity_lookup.h +0 -8
  179. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  180. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  181. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  182. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_attr.h +0 -9
  184. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  185. data/ext/nokogiri/xml_cdata.h +0 -9
  186. data/ext/nokogiri/xml_comment.h +0 -9
  187. data/ext/nokogiri/xml_document.h +0 -23
  188. data/ext/nokogiri/xml_document_fragment.h +0 -10
  189. data/ext/nokogiri/xml_dtd.h +0 -10
  190. data/ext/nokogiri/xml_element_content.h +0 -10
  191. data/ext/nokogiri/xml_element_decl.h +0 -9
  192. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  193. data/ext/nokogiri/xml_entity_decl.h +0 -10
  194. data/ext/nokogiri/xml_entity_reference.h +0 -9
  195. data/ext/nokogiri/xml_io.c +0 -61
  196. data/ext/nokogiri/xml_io.h +0 -11
  197. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  198. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  199. data/ext/nokogiri/xml_namespace.h +0 -14
  200. data/ext/nokogiri/xml_node.h +0 -13
  201. data/ext/nokogiri/xml_node_set.h +0 -12
  202. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  203. data/ext/nokogiri/xml_reader.h +0 -10
  204. data/ext/nokogiri/xml_relax_ng.h +0 -9
  205. data/ext/nokogiri/xml_sax_parser.h +0 -39
  206. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  207. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  208. data/ext/nokogiri/xml_schema.h +0 -9
  209. data/ext/nokogiri/xml_syntax_error.h +0 -13
  210. data/ext/nokogiri/xml_text.h +0 -9
  211. data/ext/nokogiri/xml_xpath_context.h +0 -10
  212. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  213. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  214. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  215. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  216. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
data/lib/nokogiri/xslt.rb CHANGED
@@ -1,5 +1,4 @@
1
- require 'nokogiri/xslt/stylesheet'
2
-
1
+ # frozen_string_literal: true
3
2
  module Nokogiri
4
3
  class << self
5
4
  ###
@@ -21,32 +20,32 @@ module Nokogiri
21
20
  class << self
22
21
  ###
23
22
  # Parse the stylesheet in +string+, register any +modules+
24
- def parse string, modules = {}
23
+ def parse(string, modules = {})
25
24
  modules.each do |url, klass|
26
- XSLT.register url, klass
25
+ XSLT.register(url, klass)
27
26
  end
28
27
 
28
+ doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
29
29
  if Nokogiri.jruby?
30
- Stylesheet.parse_stylesheet_doc(XML.parse(string), string)
30
+ Stylesheet.parse_stylesheet_doc(doc, string)
31
31
  else
32
- Stylesheet.parse_stylesheet_doc(XML.parse(string))
32
+ Stylesheet.parse_stylesheet_doc(doc)
33
33
  end
34
34
  end
35
35
 
36
36
  ###
37
37
  # Quote parameters in +params+ for stylesheet safety
38
- def quote_params params
38
+ def quote_params(params)
39
39
  parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
40
- parray.each_with_index do |v,i|
41
- if i % 2 > 0
42
- parray[i]=
43
- if v =~ /'/
44
- "concat('#{ v.gsub(/'/, %q{', "'", '}) }')"
45
- else
46
- "'#{v}'";
47
- end
40
+ parray.each_with_index do |v, i|
41
+ parray[i] = if i % 2 > 0
42
+ if v =~ /'/
43
+ "concat('#{v.gsub(/'/, %q{', "'", '})}')"
44
+ else
45
+ "'#{v}'"
46
+ end
48
47
  else
49
- parray[i] = v.to_s
48
+ v.to_s
50
49
  end
51
50
  end
52
51
  parray.flatten
@@ -54,3 +53,5 @@ module Nokogiri
54
53
  end
55
54
  end
56
55
  end
56
+
57
+ require_relative "xslt/stylesheet"
data/lib/nokogiri.rb CHANGED
@@ -1,60 +1,30 @@
1
1
  # -*- coding: utf-8 -*-
2
+ # frozen_string_literal: true
2
3
  # Modify the PATH on windows so that the external DLLs will get loaded.
3
4
 
4
- require 'rbconfig'
5
+ require "rbconfig"
5
6
 
6
7
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
7
- # The line below caused a problem on non-GAE rack environment.
8
- # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
9
- #
10
- # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
11
- # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
12
- # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
13
- # should skip loading xml jars. This is because those are in WEB-INF/lib and
14
- # already set in the classpath.
15
- unless $LOAD_PATH.to_s.include?("appengine-rack")
16
- require 'stringio'
17
- require 'isorelax.jar'
18
- require 'jing.jar'
19
- require 'nekohtml.jar'
20
- require 'nekodtd.jar'
21
- require 'xercesImpl.jar'
22
- require 'serializer.jar'
23
- require 'xalan.jar'
24
- require 'xml-apis.jar'
25
- end
8
+ require_relative "nokogiri/jruby/dependencies"
26
9
  end
27
10
 
28
- begin
29
- RUBY_VERSION =~ /(\d+\.\d+)/
30
- require "nokogiri/#{$1}/nokogiri"
31
- rescue LoadError
32
- require 'nokogiri/nokogiri'
33
- end
34
- require 'nokogiri/version'
35
- require 'nokogiri/syntax_error'
36
- require 'nokogiri/xml'
37
- require 'nokogiri/xslt'
38
- require 'nokogiri/html'
39
- require 'nokogiri/decorators/slop'
40
- require 'nokogiri/css'
41
- require 'nokogiri/html/builder'
11
+ require_relative "nokogiri/extension"
42
12
 
43
13
  # Nokogiri parses and searches XML/HTML very quickly, and also has
44
14
  # correctly implemented CSS3 selector support as well as XPath 1.0
45
15
  # support.
46
16
  #
47
17
  # Parsing a document returns either a Nokogiri::XML::Document, or a
48
- # Nokogiri::HTML::Document depending on the kind of document you parse.
18
+ # Nokogiri::HTML4::Document depending on the kind of document you parse.
49
19
  #
50
20
  # Here is an example:
51
21
  #
52
22
  # require 'nokogiri'
53
23
  # require 'open-uri'
54
24
  #
55
- # # Get a Nokogiri::HTML:Document for the page we’re interested in...
25
+ # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
56
26
  #
57
- # doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
27
+ # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
58
28
  #
59
29
  # # Do funky things with it using Nokogiri::XML::Node methods...
60
30
  #
@@ -70,27 +40,27 @@ module Nokogiri
70
40
  class << self
71
41
  ###
72
42
  # Parse an HTML or XML document. +string+ contains the document.
73
- def parse string, url = nil, encoding = nil, options = nil
43
+ def parse(string, url = nil, encoding = nil, options = nil)
74
44
  if string.respond_to?(:read) ||
75
45
  /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
76
46
  # Expect an HTML indicator to appear within the first 512
77
47
  # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
78
48
  # shouldn't be that long)
79
- Nokogiri.HTML(string, url, encoding,
49
+ Nokogiri.HTML4(string, url, encoding,
80
50
  options || XML::ParseOptions::DEFAULT_HTML)
81
51
  else
82
52
  Nokogiri.XML(string, url, encoding,
83
53
  options || XML::ParseOptions::DEFAULT_XML)
84
- end.tap { |doc|
54
+ end.tap do |doc|
85
55
  yield doc if block_given?
86
- }
56
+ end
87
57
  end
88
58
 
89
59
  ###
90
60
  # Create a new Nokogiri::XML::DocumentFragment
91
- def make input = nil, opts = {}, &blk
61
+ def make(input = nil, opts = {}, &blk)
92
62
  if input
93
- Nokogiri::HTML.fragment(input).children.first
63
+ Nokogiri::HTML4.fragment(input).children.first
94
64
  else
95
65
  Nokogiri(&blk)
96
66
  end
@@ -119,10 +89,10 @@ module Nokogiri
119
89
  # Make sure to support some popular encoding aliases not known by
120
90
  # all iconv implementations.
121
91
  {
122
- 'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
123
- }.each { |alias_name, name|
92
+ "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
93
+ }.each do |alias_name, name|
124
94
  EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
125
- }
95
+ end
126
96
  end
127
97
  end
128
98
 
@@ -130,15 +100,26 @@ module Nokogiri
130
100
  end
131
101
 
132
102
  ###
133
- # Parser a document contained in +args+. Nokogiri will try to guess what
134
- # type of document you are attempting to parse. For more information, see
135
- # Nokogiri.parse
103
+ # Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
104
+ # attempting to parse. For more information, see Nokogiri.parse
136
105
  #
137
- # To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
106
+ # To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
138
107
  def Nokogiri(*args, &block)
139
108
  if block_given?
140
- Nokogiri::HTML::Builder.new(&block).doc.root
109
+ Nokogiri::HTML4::Builder.new(&block).doc.root
141
110
  else
142
111
  Nokogiri.parse(*args)
143
112
  end
144
113
  end
114
+
115
+ require_relative "nokogiri/version"
116
+ require_relative "nokogiri/syntax_error"
117
+ require_relative "nokogiri/xml"
118
+ require_relative "nokogiri/xslt"
119
+ require_relative "nokogiri/html4"
120
+ require_relative "nokogiri/html"
121
+ require_relative "nokogiri/decorators/slop"
122
+ require_relative "nokogiri/css"
123
+ require_relative "nokogiri/html4/builder"
124
+
125
+ require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri'
2
3
 
3
4
  module XSD # :nodoc:
@@ -16,7 +16,7 @@ index cf96d41..1372d8b 100644
16
16
  }
17
17
 
18
18
  -libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
19
- +$(top_builddir)/libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
19
+ +$(top_builddir)/libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
20
20
  $(AM_V_CCLD)$(libxml2_la_LINK) -rpath $(libdir) $(libxml2_la_OBJECTS) $(libxml2_la_LIBADD) $(LIBS)
21
21
 
22
22
  testdso.la: $(testdso_la_OBJECTS) $(testdso_la_DEPENDENCIES) $(EXTRA_testdso_la_DEPENDENCIES)
@@ -0,0 +1,53 @@
1
+ From c94172d2a4451368530db2186190d70be8a1d9e5 Mon Sep 17 00:00:00 2001
2
+ From: Ilya Zub <ilya@serpapi.com>
3
+ Date: Wed, 23 Dec 2020 12:45:29 +0200
4
+ Subject: Use glibc strlen to speed up xmlStrlen
5
+ MIME-Version: 1.0
6
+ Content-Type: text/plain; charset=UTF-8
7
+ Content-Transfer-Encoding: 8bit
8
+
9
+ xmlStrlen (entire HTML file): 926171.936981 μs
10
+ glibc_xmlStrlen (entire HTML file): 36905.903992 μs
11
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 25.094584 times
12
+
13
+ xmlStrlen (average string): 57479.204010 μs
14
+ glibc_xmlStrlen (average string): 5802.069000 μs
15
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 9.905937 times
16
+
17
+ xmlStrlen (bigger string): 388056.315979 μs
18
+ glibc_xmlStrlen (bigger string): 12797.856995 μs
19
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 30.318382 times
20
+
21
+ xmlStrlen (smallest string): 15870.046021 μs
22
+ glibc_xmlStrlen (smallest string): 6282.208984 μs
23
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 2.527903 times
24
+
25
+ See https://gitlab.gnome.org/GNOME/libxml2/-/issues/212 for reference.
26
+ ---
27
+ xmlstring.c | 9 ++-------
28
+ 1 file changed, 2 insertions(+), 7 deletions(-)
29
+
30
+ diff --git a/xmlstring.c b/xmlstring.c
31
+ index e8a1e45d..df247dff 100644
32
+ --- a/xmlstring.c
33
+ +++ b/xmlstring.c
34
+ @@ -423,14 +423,9 @@ xmlStrsub(const xmlChar *str, int start, int len) {
35
+
36
+ int
37
+ xmlStrlen(const xmlChar *str) {
38
+ - int len = 0;
39
+ -
40
+ if (str == NULL) return(0);
41
+ - while (*str != 0) { /* non input consuming */
42
+ - str++;
43
+ - len++;
44
+ - }
45
+ - return(len);
46
+ +
47
+ + return strlen((const char*)str);
48
+ }
49
+
50
+ /**
51
+ --
52
+ 2.29.2
53
+
@@ -0,0 +1,81 @@
1
+ This patch is a result of rake-compiler-dock using centos 7 (manylinux2014) to cross-compile.
2
+
3
+ Centos, for reasons I have not been able to discern, implements `isnan` and `isinf` as a function
4
+ and not as a macro. Debian knows how to resolve that function at dynamic-link time (despite using a
5
+ macro at compile time), but musl-based systems (like alpine) do not. Running `nm` on nokogiri.so
6
+ created on such a centos system shows:
7
+
8
+ ```
9
+ U __isinf@@GLIBC_2.2.5
10
+ U __isnan@@GLIBC_2.2.5
11
+ ```
12
+
13
+ (see https://github.com/sparklemotion/nokogiri/pull/2142 for more info)
14
+
15
+ This patch avoids using glibc's `isnan` and `isinf` calls, instead using libxml2's fallback
16
+ implementation. There's history here, see libxml2 commit 8813f39:
17
+
18
+ commit 8813f39
19
+ Author: Nick Wellnhofer <wellnhofer@aevum.de>
20
+ Date: 2017-09-21 00:11:26 +0200
21
+
22
+ Simplify XPath NaN, inf and -0 handling
23
+
24
+ Use C99 macros NAN, INFINITY, isnan, isinf. If they're not available:
25
+
26
+ - Assume that (0.0 / 0.0) generates a NaN and !(x == x) tests for NaN.
27
+ - Use C89's HUGE_VAL for INFINITY.
28
+
29
+ Remove manual handling of NaN, infinity and negative zero in functions
30
+ xmlXPathValueFlipSign and xmlXPathDivValues.
31
+
32
+ Remove xmlXPathGetSign. All the tests for negative zero can be replaced
33
+ with a test for negative or positive zero.
34
+
35
+ Simplify xmlXPathRoundFunction.
36
+
37
+ Remove Trio dependency.
38
+
39
+ This should work on IEEE 754 compliant implementations even if the C99
40
+ macros aren't available, but will likely break some ancient platforms.
41
+ If problems arise, my plan is to port the relevant trionan.c solution
42
+ to xpath.c. Note that non-compliant implementations are impossible
43
+ to fully support, anyway, since XPath requires IEEE 754.
44
+
45
+ This patch would be unnecessary if any of the following was true:
46
+
47
+ * centos implements these as macros, and doesn't generate an unresolved symbol for either in the shared library
48
+ * we had a way to ensure `__isinf` and `__isnan` resolve on musl (e.g., we implement them locally)
49
+
50
+ diff --git a/xpath.c b/xpath.c
51
+ index 9f64ab9..5b6d999 100644
52
+ --- a/xpath.c
53
+ +++ b/xpath.c
54
+ @@ -515,11 +515,7 @@ xmlXPathInit(void) {
55
+ */
56
+ int
57
+ xmlXPathIsNaN(double val) {
58
+ -#ifdef isnan
59
+ - return isnan(val);
60
+ -#else
61
+ return !(val == val);
62
+ -#endif
63
+ }
64
+
65
+ /**
66
+ @@ -530,15 +530,11 @@ xmlXPathIsNaN(double val) {
67
+ */
68
+ int
69
+ xmlXPathIsInf(double val) {
70
+ -#ifdef isinf
71
+ - return isinf(val) ? (val > 0 ? 1 : -1) : 0;
72
+ -#else
73
+ if (val >= xmlXPathPINF)
74
+ return 1;
75
+ if (val <= -xmlXPathPINF)
76
+ return -1;
77
+ return 0;
78
+ -#endif
79
+ }
80
+
81
+ #endif /* SCHEMAS or XPATH */