nokogiri 1.11.0.rc3 → 1.11.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +168 -91
  6. data/ext/nokogiri/depend +37 -358
  7. data/ext/nokogiri/extconf.rb +581 -374
  8. data/ext/nokogiri/html_document.c +78 -82
  9. data/ext/nokogiri/html_element_description.c +84 -71
  10. data/ext/nokogiri/html_entity_lookup.c +21 -16
  11. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  12. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  13. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  14. data/ext/nokogiri/nokogiri.c +192 -93
  15. data/ext/nokogiri/nokogiri.h +177 -98
  16. data/ext/nokogiri/test_global_handlers.c +40 -0
  17. data/ext/nokogiri/xml_attr.c +15 -15
  18. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  19. data/ext/nokogiri/xml_cdata.c +13 -18
  20. data/ext/nokogiri/xml_comment.c +19 -26
  21. data/ext/nokogiri/xml_document.c +246 -188
  22. data/ext/nokogiri/xml_document_fragment.c +13 -15
  23. data/ext/nokogiri/xml_dtd.c +54 -48
  24. data/ext/nokogiri/xml_element_content.c +30 -27
  25. data/ext/nokogiri/xml_element_decl.c +22 -22
  26. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  27. data/ext/nokogiri/xml_entity_decl.c +32 -30
  28. data/ext/nokogiri/xml_entity_reference.c +16 -18
  29. data/ext/nokogiri/xml_namespace.c +56 -49
  30. data/ext/nokogiri/xml_node.c +371 -320
  31. data/ext/nokogiri/xml_node_set.c +168 -156
  32. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  33. data/ext/nokogiri/xml_reader.c +191 -157
  34. data/ext/nokogiri/xml_relax_ng.c +52 -28
  35. data/ext/nokogiri/xml_sax_parser.c +118 -118
  36. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  37. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  38. data/ext/nokogiri/xml_schema.c +95 -47
  39. data/ext/nokogiri/xml_syntax_error.c +42 -21
  40. data/ext/nokogiri/xml_text.c +13 -17
  41. data/ext/nokogiri/xml_xpath_context.c +206 -123
  42. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  43. data/lib/nokogiri.rb +3 -7
  44. data/lib/nokogiri/css/parser.rb +3 -3
  45. data/lib/nokogiri/css/parser.y +2 -2
  46. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  47. data/lib/nokogiri/extension.rb +26 -0
  48. data/lib/nokogiri/html/document.rb +12 -26
  49. data/lib/nokogiri/html/document_fragment.rb +15 -15
  50. data/lib/nokogiri/version.rb +2 -149
  51. data/lib/nokogiri/version/constant.rb +5 -0
  52. data/lib/nokogiri/version/info.rb +205 -0
  53. data/lib/nokogiri/xml/document.rb +91 -35
  54. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  55. data/lib/nokogiri/xml/node.rb +89 -69
  56. data/lib/nokogiri/xml/parse_options.rb +6 -0
  57. data/lib/nokogiri/xml/reader.rb +2 -9
  58. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  59. data/lib/nokogiri/xml/schema.rb +12 -4
  60. data/lib/nokogiri/xml/searchable.rb +3 -1
  61. data/lib/nokogiri/xml/xpath.rb +1 -3
  62. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  63. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
  64. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
  65. data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
  66. data/patches/libxml2/0009-avoid-isnan-isinf.patch +81 -0
  67. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +70 -0
  68. data/patches/libxml2/0011-update-automake-files-for-arm64.patch +2511 -0
  69. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  70. metadata +86 -168
  71. data/ext/nokogiri/html_document.h +0 -10
  72. data/ext/nokogiri/html_element_description.h +0 -10
  73. data/ext/nokogiri/html_entity_lookup.h +0 -8
  74. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  75. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  76. data/ext/nokogiri/xml_attr.h +0 -9
  77. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  78. data/ext/nokogiri/xml_cdata.h +0 -9
  79. data/ext/nokogiri/xml_comment.h +0 -9
  80. data/ext/nokogiri/xml_document.h +0 -23
  81. data/ext/nokogiri/xml_document_fragment.h +0 -10
  82. data/ext/nokogiri/xml_dtd.h +0 -10
  83. data/ext/nokogiri/xml_element_content.h +0 -10
  84. data/ext/nokogiri/xml_element_decl.h +0 -9
  85. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  86. data/ext/nokogiri/xml_entity_decl.h +0 -10
  87. data/ext/nokogiri/xml_entity_reference.h +0 -9
  88. data/ext/nokogiri/xml_io.c +0 -63
  89. data/ext/nokogiri/xml_io.h +0 -11
  90. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  91. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  92. data/ext/nokogiri/xml_namespace.h +0 -14
  93. data/ext/nokogiri/xml_node.h +0 -13
  94. data/ext/nokogiri/xml_node_set.h +0 -12
  95. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  96. data/ext/nokogiri/xml_reader.h +0 -10
  97. data/ext/nokogiri/xml_relax_ng.h +0 -9
  98. data/ext/nokogiri/xml_sax_parser.h +0 -39
  99. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  100. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  101. data/ext/nokogiri/xml_schema.h +0 -9
  102. data/ext/nokogiri/xml_syntax_error.h +0 -13
  103. data/ext/nokogiri/xml_text.h +0 -9
  104. data/ext/nokogiri/xml_xpath_context.h +0 -10
  105. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -73,6 +73,8 @@ module Nokogiri
73
73
  DEFAULT_XML = RECOVER | NONET
74
74
  # the default options used for parsing HTML documents
75
75
  DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
76
+ # the default options used for parsing XML schemas
77
+ DEFAULT_SCHEMA = NONET
76
78
 
77
79
  attr_accessor :options
78
80
  def initialize options = STRICT
@@ -107,6 +109,10 @@ module Nokogiri
107
109
  @options & RECOVER == STRICT
108
110
  end
109
111
 
112
+ def ==(other)
113
+ other.to_i == to_i
114
+ end
115
+
110
116
  alias :to_i :options
111
117
 
112
118
  def inspect
@@ -86,7 +86,8 @@ module Nokogiri
86
86
  private :initialize
87
87
 
88
88
  ###
89
- # Get a list of attributes for the current node.
89
+ # Get the attributes of the current node as a Hash
90
+ # @return [Hash<String, String>] Attribute names and values
90
91
  def attributes
91
92
  attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
92
93
  hash[node.name] = node.to_s
@@ -96,14 +97,6 @@ module Nokogiri
96
97
  attrs_hash
97
98
  end
98
99
 
99
- ###
100
- # Get a list of attributes for the current node
101
- def attribute_nodes
102
- nodes = attr_nodes
103
- nodes.each { |v| v.instance_variable_set(:@_r, self) }
104
- nodes
105
- end
106
-
107
100
  ###
108
101
  # Move the cursor through the document yielding the cursor to the block
109
102
  def each
@@ -5,8 +5,8 @@ module Nokogiri
5
5
  ###
6
6
  # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
7
7
  # See Nokogiri::XML::RelaxNG for an example.
8
- def RelaxNG string_or_io
9
- RelaxNG.new(string_or_io)
8
+ def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
9
+ RelaxNG.new(string_or_io, options)
10
10
  end
11
11
  end
12
12
 
@@ -27,6 +27,10 @@ module Nokogiri
27
27
  # end
28
28
  #
29
29
  # The list of errors are Nokogiri::XML::SyntaxError objects.
30
+ #
31
+ # NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
32
+ # underlying parsing libraries to access network resources. This is counter to Nokogiri's
33
+ # "untrusted by default" security policy, but is a limitation of the underlying libraries.
30
34
  class RelaxNG < Nokogiri::XML::Schema
31
35
  end
32
36
  end
@@ -5,8 +5,8 @@ module Nokogiri
5
5
  ###
6
6
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
7
7
  # object.
8
- def Schema string_or_io
9
- Schema.new(string_or_io)
8
+ def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
9
+ Schema.new(string_or_io, options)
10
10
  end
11
11
  end
12
12
 
@@ -27,15 +27,23 @@ module Nokogiri
27
27
  # end
28
28
  #
29
29
  # The list of errors are Nokogiri::XML::SyntaxError objects.
30
+ #
31
+ # NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
32
+ # are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
33
+ # documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
34
+ # security policy. If a document is trusted, then the caller may turn off the NONET option via
35
+ # the ParseOptions to re-enable external entity resolution over a network connection.
30
36
  class Schema
31
37
  # Errors while parsing the schema file
32
38
  attr_accessor :errors
39
+ # The Nokogiri::XML::ParseOptions used to parse the schema
40
+ attr_accessor :parse_options
33
41
 
34
42
  ###
35
43
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
36
44
  # object.
37
- def self.new string_or_io
38
- from_document Nokogiri::XML(string_or_io)
45
+ def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
46
+ from_document(Nokogiri::XML(string_or_io), options)
39
47
  end
40
48
 
41
49
  ###
@@ -210,8 +210,10 @@ module Nokogiri
210
210
  end
211
211
 
212
212
  def xpath_query_from_css_rule(rule, ns)
213
+ visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
213
214
  self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
214
- CSS.xpath_for(rule.to_s, :prefix => implied_xpath_context, :ns => ns)
215
+ CSS.xpath_for(rule.to_s, {:prefix => implied_xpath_context, :ns => ns,
216
+ :visitor => visitor})
215
217
  end.join(" | ")
216
218
  end
217
219
 
@@ -3,9 +3,7 @@ require 'nokogiri/xml/xpath/syntax_error'
3
3
 
4
4
  module Nokogiri
5
5
  module XML
6
- class XPath
7
- # The Nokogiri::XML::Document tied to this XPath instance
8
- attr_accessor :document
6
+ module XPath
9
7
  end
10
8
  end
11
9
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  module Nokogiri
3
3
  module XML
4
- class XPath
4
+ module XPath
5
5
  class SyntaxError < XML::SyntaxError
6
6
  def to_s
7
7
  [super.chomp, str1].compact.join(': ')
@@ -0,0 +1,73 @@
1
+ From 4f51a6d2b1755ce5b36c524c215aad70d864ac1d Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Mon, 3 Aug 2020 17:36:05 -0400
4
+ Subject: [PATCH 1/2] htmlParseComment: treat `--!>` as if it closed the
5
+ comment
6
+
7
+ See guidance provided on incorrectly-closed comments here:
8
+
9
+ https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
10
+ ---
11
+ HTMLparser.c | 28 ++++++++++++++++++++--------
12
+ 1 file changed, 20 insertions(+), 8 deletions(-)
13
+
14
+ diff --git a/HTMLparser.c b/HTMLparser.c
15
+ index 7b6d689..4d43479 100644
16
+ --- a/HTMLparser.c
17
+ +++ b/HTMLparser.c
18
+ @@ -3300,6 +3300,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
19
+ int q, ql;
20
+ int r, rl;
21
+ int cur, l;
22
+ + int next, nl;
23
+ xmlParserInputState state;
24
+
25
+ /*
26
+ @@ -3332,6 +3333,21 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
27
+ while (IS_CHAR(cur) &&
28
+ ((cur != '>') ||
29
+ (r != '-') || (q != '-'))) {
30
+ + NEXTL(l);
31
+ + next = CUR_CHAR(nl);
32
+ + if (next == 0) {
33
+ + SHRINK;
34
+ + GROW;
35
+ + next = CUR_CHAR(nl);
36
+ + }
37
+ +
38
+ + if ((q == '-') && (r == '-') && (cur == '!') && (next == '>')) {
39
+ + htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
40
+ + "Comment incorrectly closed by '--!>'", NULL, NULL);
41
+ + cur = '>';
42
+ + break;
43
+ + }
44
+ +
45
+ if (len + 5 >= size) {
46
+ xmlChar *tmp;
47
+
48
+ @@ -3345,18 +3361,14 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
49
+ }
50
+ buf = tmp;
51
+ }
52
+ - COPY_BUF(ql,buf,len,q);
53
+ + COPY_BUF(ql,buf,len,q);
54
+ +
55
+ q = r;
56
+ ql = rl;
57
+ r = cur;
58
+ rl = l;
59
+ - NEXTL(l);
60
+ - cur = CUR_CHAR(l);
61
+ - if (cur == 0) {
62
+ - SHRINK;
63
+ - GROW;
64
+ - cur = CUR_CHAR(l);
65
+ - }
66
+ + cur = next;
67
+ + l = nl;
68
+ }
69
+ buf[len] = 0;
70
+ if (IS_CHAR(cur)) {
71
+ --
72
+ 2.25.1
73
+
@@ -0,0 +1,103 @@
1
+ From b20d746fa7cbb74716171bc49d836af99927e41e Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Sun, 11 Oct 2020 14:15:37 -0400
4
+ Subject: [PATCH 2/2] use new htmlParseLookupCommentEnd to find comment ends
5
+
6
+ Note that the caret in error messages generated during comment parsing
7
+ may have moved by one byte.
8
+
9
+ See guidance provided on incorrectly-closed comments here:
10
+
11
+ https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
12
+ ---
13
+ HTMLparser.c | 46 +++++++++++++++++++++++++++++++++++++---------
14
+ 1 file changed, 37 insertions(+), 9 deletions(-)
15
+
16
+ diff --git a/HTMLparser.c b/HTMLparser.c
17
+ index 4d43479..000dc3d 100644
18
+ --- a/HTMLparser.c
19
+ +++ b/HTMLparser.c
20
+ @@ -5331,6 +5331,39 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
21
+ return (-1);
22
+ }
23
+
24
+ +/**
25
+ + * htmlParseLookupCommentEnd:
26
+ + * @ctxt: an HTML parser context
27
+ + *
28
+ + * Try to find a comment end tag in the input stream
29
+ + * The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags.
30
+ + * (See https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment)
31
+ + * This function has a side effect of (possibly) incrementing ctxt->checkIndex
32
+ + * to avoid rescanning sequences of bytes, it DOES change the state of the
33
+ + * parser, do not use liberally.
34
+ + * This wraps to htmlParseLookupSequence()
35
+ + *
36
+ + * Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
37
+ + */
38
+ +static int
39
+ +htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
40
+ +{
41
+ + int mark = 0;
42
+ + int cur = CUR_PTR - BASE_PTR;
43
+ +
44
+ + while (mark >= 0) {
45
+ + mark = htmlParseLookupSequence(ctxt, '-', '-', 0, 1, 1);
46
+ + if ((mark < 0) ||
47
+ + (NXT(mark+2) == '>') ||
48
+ + ((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) {
49
+ + return mark;
50
+ + }
51
+ + ctxt->checkIndex = cur + mark + 1;
52
+ + }
53
+ + return mark;
54
+ +}
55
+ +
56
+ +
57
+ /**
58
+ * htmlParseTryOrFinish:
59
+ * @ctxt: an HTML parser context
60
+ @@ -5507,8 +5540,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
61
+ cur = in->cur[0];
62
+ if ((cur == '<') && (next == '!') &&
63
+ (in->cur[2] == '-') && (in->cur[3] == '-')) {
64
+ - if ((!terminate) &&
65
+ - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
66
+ + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
67
+ goto done;
68
+ #ifdef DEBUG_PUSH
69
+ xmlGenericError(xmlGenericErrorContext,
70
+ @@ -5567,8 +5599,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
71
+ next = in->cur[1];
72
+ if ((cur == '<') && (next == '!') &&
73
+ (in->cur[2] == '-') && (in->cur[3] == '-')) {
74
+ - if ((!terminate) &&
75
+ - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
76
+ + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
77
+ goto done;
78
+ #ifdef DEBUG_PUSH
79
+ xmlGenericError(xmlGenericErrorContext,
80
+ @@ -5614,8 +5645,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
81
+ next = in->cur[1];
82
+ if ((cur == '<') && (next == '!') &&
83
+ (in->cur[2] == '-') && (in->cur[3] == '-')) {
84
+ - if ((!terminate) &&
85
+ - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
86
+ + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
87
+ goto done;
88
+ #ifdef DEBUG_PUSH
89
+ xmlGenericError(xmlGenericErrorContext,
90
+ @@ -5871,9 +5901,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
91
+ htmlParseDocTypeDecl(ctxt);
92
+ } else if ((cur == '<') && (next == '!') &&
93
+ (in->cur[2] == '-') && (in->cur[3] == '-')) {
94
+ - if ((!terminate) &&
95
+ - (htmlParseLookupSequence(
96
+ - ctxt, '-', '-', '>', 1, 1) < 0))
97
+ + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
98
+ goto done;
99
+ #ifdef DEBUG_PUSH
100
+ xmlGenericError(xmlGenericErrorContext,
101
+ --
102
+ 2.25.1
103
+
@@ -0,0 +1,53 @@
1
+ From c94172d2a4451368530db2186190d70be8a1d9e5 Mon Sep 17 00:00:00 2001
2
+ From: Ilya Zub <ilya@serpapi.com>
3
+ Date: Wed, 23 Dec 2020 12:45:29 +0200
4
+ Subject: Use glibc strlen to speed up xmlStrlen
5
+ MIME-Version: 1.0
6
+ Content-Type: text/plain; charset=UTF-8
7
+ Content-Transfer-Encoding: 8bit
8
+
9
+ xmlStrlen (entire HTML file): 926171.936981 μs
10
+ glibc_xmlStrlen (entire HTML file): 36905.903992 μs
11
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 25.094584 times
12
+
13
+ xmlStrlen (average string): 57479.204010 μs
14
+ glibc_xmlStrlen (average string): 5802.069000 μs
15
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 9.905937 times
16
+
17
+ xmlStrlen (bigger string): 388056.315979 μs
18
+ glibc_xmlStrlen (bigger string): 12797.856995 μs
19
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 30.318382 times
20
+
21
+ xmlStrlen (smallest string): 15870.046021 μs
22
+ glibc_xmlStrlen (smallest string): 6282.208984 μs
23
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 2.527903 times
24
+
25
+ See https://gitlab.gnome.org/GNOME/libxml2/-/issues/212 for reference.
26
+ ---
27
+ xmlstring.c | 9 ++-------
28
+ 1 file changed, 2 insertions(+), 7 deletions(-)
29
+
30
+ diff --git a/xmlstring.c b/xmlstring.c
31
+ index e8a1e45d..df247dff 100644
32
+ --- a/xmlstring.c
33
+ +++ b/xmlstring.c
34
+ @@ -423,14 +423,9 @@ xmlStrsub(const xmlChar *str, int start, int len) {
35
+
36
+ int
37
+ xmlStrlen(const xmlChar *str) {
38
+ - int len = 0;
39
+ -
40
+ if (str == NULL) return(0);
41
+ - while (*str != 0) { /* non input consuming */
42
+ - str++;
43
+ - len++;
44
+ - }
45
+ - return(len);
46
+ +
47
+ + return strlen((const char*)str);
48
+ }
49
+
50
+ /**
51
+ --
52
+ 2.29.2
53
+
@@ -0,0 +1,81 @@
1
+ This patch is a result of rake-compiler-dock using centos 7 (manylinux2014) to cross-compile.
2
+
3
+ Centos, for reasons I have not been able to discern, implements `isnan` and `isinf` as a function
4
+ and not as a macro. Debian knows how to resolve that function at dynamic-link time (despite using a
5
+ macro at compile time), but musl-based systems (like alpine) do not. Running `nm` on nokogiri.so
6
+ created on such a centos system shows:
7
+
8
+ ```
9
+ U __isinf@@GLIBC_2.2.5
10
+ U __isnan@@GLIBC_2.2.5
11
+ ```
12
+
13
+ (see https://github.com/sparklemotion/nokogiri/pull/2142 for more info)
14
+
15
+ This patch avoids using glibc's `isnan` and `isinf` calls, instead using libxml2's fallback
16
+ implementation. There's history here, see libxml2 commit 8813f39:
17
+
18
+ commit 8813f39
19
+ Author: Nick Wellnhofer <wellnhofer@aevum.de>
20
+ Date: 2017-09-21 00:11:26 +0200
21
+
22
+ Simplify XPath NaN, inf and -0 handling
23
+
24
+ Use C99 macros NAN, INFINITY, isnan, isinf. If they're not available:
25
+
26
+ - Assume that (0.0 / 0.0) generates a NaN and !(x == x) tests for NaN.
27
+ - Use C89's HUGE_VAL for INFINITY.
28
+
29
+ Remove manual handling of NaN, infinity and negative zero in functions
30
+ xmlXPathValueFlipSign and xmlXPathDivValues.
31
+
32
+ Remove xmlXPathGetSign. All the tests for negative zero can be replaced
33
+ with a test for negative or positive zero.
34
+
35
+ Simplify xmlXPathRoundFunction.
36
+
37
+ Remove Trio dependency.
38
+
39
+ This should work on IEEE 754 compliant implementations even if the C99
40
+ macros aren't available, but will likely break some ancient platforms.
41
+ If problems arise, my plan is to port the relevant trionan.c solution
42
+ to xpath.c. Note that non-compliant implementations are impossible
43
+ to fully support, anyway, since XPath requires IEEE 754.
44
+
45
+ This patch would be unnecessary if any of the following was true:
46
+
47
+ * centos implements these as macros, and doesn't generate an unresolved symbol for either in the shared library
48
+ * we had a way to ensure `__isinf` and `__isnan` resolve on musl (e.g., we implement them locally)
49
+
50
+ diff --git a/xpath.c b/xpath.c
51
+ index 9f64ab9..5b6d999 100644
52
+ --- a/xpath.c
53
+ +++ b/xpath.c
54
+ @@ -509,11 +509,7 @@ xmlXPathInit(void) {
55
+ */
56
+ int
57
+ xmlXPathIsNaN(double val) {
58
+ -#ifdef isnan
59
+ - return isnan(val);
60
+ -#else
61
+ return !(val == val);
62
+ -#endif
63
+ }
64
+
65
+ /**
66
+ @@ -524,15 +520,11 @@ xmlXPathIsNaN(double val) {
67
+ */
68
+ int
69
+ xmlXPathIsInf(double val) {
70
+ -#ifdef isinf
71
+ - return isinf(val) ? (val > 0 ? 1 : -1) : 0;
72
+ -#else
73
+ if (val >= INFINITY)
74
+ return 1;
75
+ if (val <= -INFINITY)
76
+ return -1;
77
+ return 0;
78
+ -#endif
79
+ }
80
+
81
+ #endif /* SCHEMAS or XPATH */