nokogiri 1.13.8 → 1.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +18 -11
  6. data/dependencies.yml +33 -15
  7. data/ext/nokogiri/extconf.rb +164 -46
  8. data/ext/nokogiri/gumbo.c +20 -10
  9. data/ext/nokogiri/html4_document.c +3 -4
  10. data/ext/nokogiri/html4_element_description.c +20 -15
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +11 -22
  13. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  14. data/ext/nokogiri/nokogiri.c +84 -75
  15. data/ext/nokogiri/nokogiri.h +31 -16
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +2 -2
  18. data/ext/nokogiri/xml_attribute_decl.c +2 -2
  19. data/ext/nokogiri/xml_cdata.c +32 -18
  20. data/ext/nokogiri/xml_comment.c +2 -2
  21. data/ext/nokogiri/xml_document.c +127 -34
  22. data/ext/nokogiri/xml_document_fragment.c +2 -2
  23. data/ext/nokogiri/xml_dtd.c +2 -2
  24. data/ext/nokogiri/xml_element_content.c +34 -31
  25. data/ext/nokogiri/xml_element_decl.c +7 -7
  26. data/ext/nokogiri/xml_encoding_handler.c +15 -7
  27. data/ext/nokogiri/xml_entity_decl.c +1 -1
  28. data/ext/nokogiri/xml_entity_reference.c +2 -2
  29. data/ext/nokogiri/xml_namespace.c +79 -14
  30. data/ext/nokogiri/xml_node.c +300 -34
  31. data/ext/nokogiri/xml_node_set.c +125 -107
  32. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  33. data/ext/nokogiri/xml_reader.c +81 -48
  34. data/ext/nokogiri/xml_relax_ng.c +66 -81
  35. data/ext/nokogiri/xml_sax_parser.c +45 -20
  36. data/ext/nokogiri/xml_sax_parser_context.c +46 -30
  37. data/ext/nokogiri/xml_sax_push_parser.c +30 -11
  38. data/ext/nokogiri/xml_schema.c +95 -117
  39. data/ext/nokogiri/xml_syntax_error.c +1 -1
  40. data/ext/nokogiri/xml_text.c +28 -14
  41. data/ext/nokogiri/xml_xpath_context.c +216 -136
  42. data/ext/nokogiri/xslt_stylesheet.c +118 -64
  43. data/gumbo-parser/Makefile +10 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +10 -6
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +15 -16
  48. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +21 -5
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/css/node.rb +2 -2
  69. data/lib/nokogiri/css/xpath_visitor.rb +7 -5
  70. data/lib/nokogiri/css.rb +6 -0
  71. data/lib/nokogiri/decorators/slop.rb +1 -1
  72. data/lib/nokogiri/encoding_handler.rb +57 -0
  73. data/lib/nokogiri/extension.rb +4 -3
  74. data/lib/nokogiri/html4/document.rb +2 -121
  75. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  76. data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
  77. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  78. data/lib/nokogiri/html4.rb +1 -0
  79. data/lib/nokogiri/html5/document.rb +113 -36
  80. data/lib/nokogiri/html5/document_fragment.rb +10 -3
  81. data/lib/nokogiri/html5/node.rb +8 -5
  82. data/lib/nokogiri/html5.rb +130 -216
  83. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  84. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  85. data/lib/nokogiri/version/constant.rb +1 -1
  86. data/lib/nokogiri/version/info.rb +11 -10
  87. data/lib/nokogiri/xml/attr.rb +49 -0
  88. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  89. data/lib/nokogiri/xml/builder.rb +1 -1
  90. data/lib/nokogiri/xml/document.rb +102 -55
  91. data/lib/nokogiri/xml/document_fragment.rb +50 -7
  92. data/lib/nokogiri/xml/element_content.rb +10 -2
  93. data/lib/nokogiri/xml/element_decl.rb +4 -2
  94. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  95. data/lib/nokogiri/xml/namespace.rb +42 -0
  96. data/lib/nokogiri/xml/node/save_options.rb +14 -4
  97. data/lib/nokogiri/xml/node.rb +212 -48
  98. data/lib/nokogiri/xml/node_set.rb +88 -9
  99. data/lib/nokogiri/xml/parse_options.rb +129 -50
  100. data/lib/nokogiri/xml/pp/node.rb +28 -15
  101. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  102. data/lib/nokogiri/xml/sax/document.rb +1 -1
  103. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  104. data/lib/nokogiri/xml/searchable.rb +18 -10
  105. data/lib/nokogiri/xslt.rb +74 -4
  106. data/lib/nokogiri.rb +15 -15
  107. data/lib/xsd/xmlparser/nokogiri.rb +4 -2
  108. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  109. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  110. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  111. data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
  112. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  113. metadata +19 -242
  114. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  115. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  116. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  117. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  118. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  119. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  120. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
data/LICENSE.md CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License
2
2
 
3
- Copyright 2008 -- 2021 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Sam Ruby, Craig Barnes, Stephen Checkoway, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney.
3
+ Copyright 2008 -- 2023 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Sam Ruby, Craig Barnes, Stephen Checkoway, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney.
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
6
 
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Nokogiri
4
4
 
5
- Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for [reading](https://nokogiri.org/tutorials/parsing_an_html_xml_document.html), writing, [modifying](https://nokogiri.org/tutorials/modifying_an_html_xml_document.html), and [querying](https://nokogiri.org/tutorials/searching_a_xml_html_document.html) documents. It is fast and standards-compliant by relying on native parsers like libxml2 (CRuby) and xerces (JRuby).
5
+ Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for [reading](https://nokogiri.org/tutorials/parsing_an_html_xml_document.html), writing, [modifying](https://nokogiri.org/tutorials/modifying_an_html_xml_document.html), and [querying](https://nokogiri.org/tutorials/searching_a_xml_html_document.html) documents. It is fast and standards-compliant by relying on native parsers like libxml2, libgumbo, and xerces.
6
6
 
7
7
  ## Guiding Principles
8
8
 
@@ -40,10 +40,6 @@ Some guiding principles Nokogiri tries to follow:
40
40
 
41
41
  All official documentation is posted at https://nokogiri.org (the source for which is at https://github.com/sparklemotion/nokogiri.org/, and we welcome contributions).
42
42
 
43
- Consider subscribing to [Tidelift][tidelift] which provides license assurances and timely security notifications for your open source dependencies, including Nokogiri. [Tidelift][tidelift] subscriptions also help the Nokogiri maintainers fund our [automated testing](https://ci.nokogiri.org) which in turn allows us to ship releases, bugfixes, and security updates more often.
44
-
45
- [tidelift]: https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme
46
-
47
43
  ### Reading
48
44
 
49
45
  Your first stops for learning more about Nokogiri should be:
@@ -57,7 +53,6 @@ Your first stops for learning more about Nokogiri should be:
57
53
 
58
54
  There are a few ways to ask exploratory questions:
59
55
 
60
- - The Ruby Discord chat server is active at https://discord.gg/UyQnKrT
61
56
  - The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
62
57
  - Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
63
58
 
@@ -103,12 +98,21 @@ We bump `Major.Minor.Patch` versions following this guidance:
103
98
  - Updating packaged libraries for security-related reasons.
104
99
 
105
100
 
101
+ ### Sponsorship
102
+
103
+ You can help sponsor the maintainers of this software through one of these organizations:
104
+
105
+ - [github.com/sponsors/flavorjones](https://github.com/sponsors/flavorjones)
106
+ - [opencollective.com/nokogiri](https://opencollective.com/nokogiri)
107
+ - [tidelift.com/subscription/pkg/rubygems-nokogiri](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme)
108
+
109
+
106
110
  ## Installation
107
111
 
108
112
  Requirements:
109
113
 
110
- - Ruby >= 2.6
111
- - JRuby >= 9.3.0.0
114
+ - Ruby >= 2.7
115
+ - JRuby >= 9.4.0.0
112
116
 
113
117
 
114
118
  ### Native Gems: Faster, more reliable installation
@@ -119,10 +123,13 @@ Requirements:
119
123
 
120
124
  Nokogiri ships pre-compiled, "native" gems for the following platforms:
121
125
 
122
- - Linux: `x86-linux` and `x86_64-linux` (req: `glibc >= 2.17`), including musl platforms like Alpine
126
+ - Linux:
127
+ - `x86-linux` and `x86_64-linux` (req: `glibc >= 2.17`)
128
+ - `aarch64-linux` and `arm-linux` (req: `glibc >= 2.29`)
129
+ - Note that musl platforms like Alpine **are** supported
123
130
  - Darwin/MacOS: `x86_64-darwin` and `arm64-darwin`
124
- - Windows: `x86-mingw32` and `x64-mingw32`
125
- - Java: any platform running JRuby 9.3 or higher
131
+ - Windows: `x86-mingw32`, `x64-mingw32`, and `x64-mingw-ucrt`
132
+ - Java: any platform running JRuby 9.4 or higher
126
133
 
127
134
  To determine whether your system supports one of these gems, look at the output of `bundle platform` or `ruby -e 'puts Gem::Platform.local.to_s'`.
128
135
 
data/dependencies.yml CHANGED
@@ -1,23 +1,41 @@
1
1
  libxml2:
2
- version: "2.9.14"
3
- sha256: "60d74a257d1ccec0475e749cba2f21559e48139efba6ff28224357c7c798dfee"
4
- # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.14.sha256sum
2
+ version: "2.11.7"
3
+ sha256: "fb27720e25eaf457f94fd3d7189bcf2626c6dccf4201553bc8874d50e3560162"
4
+ # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.11/libxml2-2.11.7.sha256sum
5
5
 
6
6
  libxslt:
7
- version: "1.1.35"
8
- sha256: "8247f33e9a872c6ac859aa45018bc4c4d00b97e2feac9eebc10c93ce1f34dd79"
9
- # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.35.sha256sum
7
+ version: "1.1.39"
8
+ sha256: "2a20ad621148339b0759c4d4e96719362dee64c9a096dbba625ba053846349f0"
9
+ # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.39.sha256sum
10
10
 
11
11
  zlib:
12
- version: "1.2.12"
13
- sha256: "91844808532e5ce316b3c010929493c0244f3d37593afd6de04f71821d5136d9"
12
+ version: "1.2.13"
13
+ sha256: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30"
14
14
  # SHA-256 hash provided on http://zlib.net/
15
15
 
16
16
  libiconv:
17
- version: "1.16"
18
- sha256: "e6a1b1b589654277ee790cce3734f07876ac4ccfaecbee8afa0b649cf529cc04"
19
- # gpg: Signature made Fri 26 Apr 2019 03:36:38 PM EDT
20
- # gpg: using RSA key 4F494A942E4616C2
21
- # gpg: Good signature from "Bruno Haible (Open Source Development) <bruno@clisp.org>" [expired]
22
- # gpg: Note: This key has expired!
23
- # Primary key fingerprint: 68D9 4D8A AEEA D48A E7DC 5B90 4F49 4A94 2E46 16C2
17
+ version: "1.17"
18
+ sha256: "8f74213b56238c85a50a5329f77e06198771e70dd9a739779f4c02f65d971313"
19
+ # signature verified by following this path:
20
+ # - release announced at https://savannah.gnu.org/forum/forum.php?forum_id=10175
21
+ # - which links to https://savannah.gnu.org/users/haible as the releaser
22
+ # - which links to https://savannah.gnu.org/people/viewgpg.php?user_id=1871 as the gpg key
23
+ #
24
+ # So:
25
+ # - wget -q -O - https://savannah.gnu.org/people/viewgpg.php?user_id=1871 | gpg --import
26
+ # gpg: key F5BE8B267C6A406D: 1 signature not checked due to a missing key
27
+ # gpg: key F5BE8B267C6A406D: public key "Bruno Haible (Open Source Development) <bruno@clisp.org>" imported
28
+ # gpg: Total number processed: 1
29
+ # gpg: imported: 1
30
+ # gpg: marginals needed: 3 completes needed: 1 trust model: pgp
31
+ # gpg: depth: 0 valid: 4 signed: 0 trust: 0-, 0q, 0n, 0m, 0f, 4u
32
+ # gpg: next trustdb check due at 2024-05-09
33
+ # - gpg --verify libiconv-1.17.tar.gz.sig ports/archives/libiconv-1.17.tar.gz
34
+ # gpg: Signature made Sun 15 May 2022 11:26:42 AM EDT
35
+ # gpg: using RSA key 9001B85AF9E1B83DF1BDA942F5BE8B267C6A406D
36
+ # gpg: Good signature from "Bruno Haible (Open Source Development) <bruno@clisp.org>" [unknown]
37
+ # gpg: WARNING: This key is not certified with a trusted signature!
38
+ # gpg: There is no indication that the signature belongs to the owner.
39
+ # Primary key fingerprint: 9001 B85A F9E1 B83D F1BD A942 F5BE 8B26 7C6A 406D
40
+ #
41
+ # And this sha256sum is calculated from that verified tarball.
@@ -15,7 +15,7 @@ PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."
15
15
  REQUIRED_LIBXML_VERSION = "2.6.21"
16
16
  RECOMMENDED_LIBXML_VERSION = "2.9.3"
17
17
 
18
- REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.0" # keep this version in sync with the one in the gemspec
18
+ REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.2" # keep this version in sync with the one in the gemspec
19
19
  REQUIRED_PKG_CONFIG_VERSION = "~> 1.1"
20
20
 
21
21
  # Keep track of what versions of what libraries we build against
@@ -200,7 +200,7 @@ def nix?
200
200
  end
201
201
 
202
202
  def truffle?
203
- ::RUBY_ENGINE == "truffleruby"
203
+ RUBY_ENGINE == "truffleruby"
204
204
  end
205
205
 
206
206
  def concat_flags(*args)
@@ -211,6 +211,16 @@ def local_have_library(lib, func = nil, headers = nil)
211
211
  have_library(lib, func, headers) || have_library("lib#{lib}", func, headers)
212
212
  end
213
213
 
214
+ def zlib_source(version_string)
215
+ # As of 2022-12, I'm starting to see failed downloads often enough from zlib.net that I want to
216
+ # change the default to github.
217
+ if ENV["NOKOGIRI_USE_CANONICAL_ZLIB_SOURCE"]
218
+ "https://zlib.net/fossils/zlib-#{version_string}.tar.gz"
219
+ else
220
+ "https://github.com/madler/zlib/releases/download/v#{version_string}/zlib-#{version_string}.tar.gz"
221
+ end
222
+ end
223
+
214
224
  def gnome_source
215
225
  # As of 2022-02-20, some mirrors have expired SSL certificates. I'm able to retrieve from my home,
216
226
  # but whatever host is resolved on the github actions workers see an expired cert.
@@ -219,7 +229,7 @@ def gnome_source
219
229
  if ENV["NOKOGIRI_USE_CANONICAL_GNOME_SOURCE"]
220
230
  "https://download.gnome.org"
221
231
  else
222
- "https://mirror.csclub.uwaterloo.ca/gnome" # old reliable
232
+ "https://muug.ca/mirror/gnome" # old reliable
223
233
  end
224
234
  end
225
235
 
@@ -362,7 +372,7 @@ def try_link_iconv(using = nil)
362
372
  preserving_globals do
363
373
  yield if block_given?
364
374
 
365
- try_link(<<~'SRC', opt)
375
+ try_link(<<~SRC, opt)
366
376
  #include <stdlib.h>
367
377
  #include <iconv.h>
368
378
  int main(void)
@@ -400,8 +410,8 @@ def iconv_configure_flags
400
410
  return ["--with-iconv=yes"]
401
411
  end
402
412
 
403
- config = preserving_globals { have_package_configuration("libiconv") }
404
- if config && try_link_iconv("pkg-config libiconv") { have_package_configuration("libiconv") }
413
+ config = preserving_globals { pkg_config("libiconv") }
414
+ if config && try_link_iconv("pkg-config libiconv") { pkg_config("libiconv") }
405
415
  cflags, ldflags, libs = config
406
416
 
407
417
  return [
@@ -430,10 +440,12 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
430
440
  "#{@target}/#{RUBY_PLATFORM}/#{@name}/#{@version}"
431
441
  end
432
442
 
433
- recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
434
- # Prefer host_alias over host in order to use the correct compiler prefix for cross build, but
435
- # use host if not set.
443
+ # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
444
+ # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
436
445
  recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
446
+ recipe.host = recipe.host.gsub(/i386/, "i686")
447
+
448
+ recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
437
449
  recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
438
450
 
439
451
  yield recipe
@@ -525,7 +537,6 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
525
537
 
526
538
  EOM
527
539
 
528
- pp(recipe.files)
529
540
  chdir_for_build { recipe.cook }
530
541
  FileUtils.touch(checkpoint)
531
542
  end
@@ -573,6 +584,34 @@ def do_clean
573
584
  exit!(0)
574
585
  end
575
586
 
587
+ # In ruby 3.2, symbol resolution changed on Darwin, to introduce the `-bundle_loader` flag to
588
+ # resolve symbols against the ruby binary.
589
+ #
590
+ # This makes it challenging to build a single extension that works with both a ruby with
591
+ # `--enable-shared` and one with `--disable-shared. To work around that, we choose to add
592
+ # `-flat_namespace` to the link line (later in this file).
593
+ #
594
+ # The `-flat_namespace` line introduces its own behavior change, which is that (similar to on
595
+ # Linux), any symbols in the extension that are exported may now be resolved by shared libraries
596
+ # loaded by the Ruby process. Specifically, that means that libxml2 and libxslt, which are
597
+ # statically linked into the nokogiri bundle, will resolve (at runtime) to a system libxml2 loaded
598
+ # by Ruby on Darwin. And it appears that often Ruby on Darwin does indeed load the system libxml2,
599
+ # and that messes with our assumptions about whether we're running with a patched libxml2 or a
600
+ # vanilla libxml2.
601
+ #
602
+ # We choose to use `-load_hidden` in this case to prevent exporting those symbols from libxml2 and
603
+ # libxslt, which ensures that they will be resolved to the static libraries in the bundle. In other
604
+ # words, when we use `load_hidden`, what happens in the extension stays in the extension.
605
+ #
606
+ # See https://github.com/rake-compiler/rake-compiler-dock/issues/87 for more info.
607
+ #
608
+ # Anyway, this method is the logical bit to tell us when to turn on these workarounds.
609
+ def needs_darwin_linker_hack
610
+ config_cross_build? &&
611
+ darwin? &&
612
+ Gem::Requirement.new("~> 3.2").satisfied_by?(Gem::Version.new(RbConfig::CONFIG["ruby_version"].split("+").first))
613
+ end
614
+
576
615
  #
577
616
  # main
578
617
  #
@@ -580,7 +619,7 @@ do_help if arg_config("--help")
580
619
  do_clean if arg_config("--clean")
581
620
 
582
621
  if openbsd? && !config_system_libraries?
583
- if %x(#{ENV["CC"] || "/usr/bin/cc"} -v 2>&1) !~ /clang/
622
+ unless %x(#{ENV["CC"] || "/usr/bin/cc"} -v 2>&1).include?("clang")
584
623
  (ENV["CC"] ||= find_executable("egcc")) ||
585
624
  abort("Please install gcc 4.9+ from ports using `pkg_add -v gcc`")
586
625
  end
@@ -616,6 +655,9 @@ $LIBS = concat_flags($LIBS, ENV["LIBS"])
616
655
  # errors/warnings. see #2302
617
656
  append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
618
657
 
658
+ # gumbo html5 serialization is slower with O3, let's make sure we use O2
659
+ append_cflags("-O2")
660
+
619
661
  # always include debugging information
620
662
  append_cflags("-g")
621
663
 
@@ -625,8 +667,18 @@ append_cflags("-Winline")
625
667
  # good to have no matter what Ruby was compiled with
626
668
  append_cflags("-Wmissing-noreturn")
627
669
 
670
+ # check integer loss of precision
671
+ if darwin?
672
+ append_cflags("-Wshorten-64-to-32")
673
+ else
674
+ append_cflags("-Wconversion -Wno-sign-conversion")
675
+ end
676
+
628
677
  # handle clang variations, see #1101
629
- append_cflags("-Wno-error=unused-command-line-argument-hard-error-in-future") if darwin?
678
+ if darwin?
679
+ append_cflags("-Wno-error=unused-command-line-argument-hard-error-in-future")
680
+ append_cflags("-Wno-unknown-warning-option")
681
+ end
630
682
 
631
683
  # these tend to be noisy, but on occasion useful during development
632
684
  # append_cflags(["-Wcast-qual", "-Wwrite-strings"])
@@ -643,14 +695,34 @@ append_cppflags(' "-Idummypath"') if windows?
643
695
 
644
696
  if config_system_libraries?
645
697
  message "Building nokogiri using system libraries.\n"
646
- ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z",
647
- headers: "zlib.h", func: "gzdopen")
648
- ensure_package_configuration(opt: "xml2", pc: "libxml-2.0", lib: "xml2",
649
- headers: "libxml/parser.h", func: "xmlParseDoc")
650
- ensure_package_configuration(opt: "xslt", pc: "libxslt", lib: "xslt",
651
- headers: "libxslt/xslt.h", func: "xsltParseStylesheetDoc")
652
- ensure_package_configuration(opt: "exslt", pc: "libexslt", lib: "exslt",
653
- headers: "libexslt/exslt.h", func: "exsltFuncRegister")
698
+ ensure_package_configuration(
699
+ opt: "zlib",
700
+ pc: "zlib",
701
+ lib: "z",
702
+ headers: "zlib.h",
703
+ func: "gzdopen",
704
+ )
705
+ ensure_package_configuration(
706
+ opt: "xml2",
707
+ pc: "libxml-2.0",
708
+ lib: "xml2",
709
+ headers: "libxml/parser.h",
710
+ func: "xmlParseDoc",
711
+ )
712
+ ensure_package_configuration(
713
+ opt: "xslt",
714
+ pc: "libxslt",
715
+ lib: "xslt",
716
+ headers: "libxslt/xslt.h",
717
+ func: "xsltParseStylesheetDoc",
718
+ )
719
+ ensure_package_configuration(
720
+ opt: "exslt",
721
+ pc: "libexslt",
722
+ lib: "exslt",
723
+ headers: "libexslt/exslt.h",
724
+ func: "exsltFuncRegister",
725
+ )
654
726
 
655
727
  have_libxml_headers?(REQUIRED_LIBXML_VERSION) ||
656
728
  abort("ERROR: libxml2 version #{REQUIRED_LIBXML_VERSION} or later is required!")
@@ -666,6 +738,10 @@ else
666
738
  cross_build_p = config_cross_build?
667
739
  message "Cross build is #{cross_build_p ? "enabled" : "disabled"}.\n"
668
740
 
741
+ if needs_darwin_linker_hack
742
+ append_ldflags("-Wl,-flat_namespace")
743
+ end
744
+
669
745
  require "yaml"
670
746
  dependencies = YAML.load_file(File.join(PACKAGE_ROOT_DIR, "dependencies.yml"))
671
747
 
@@ -674,7 +750,7 @@ else
674
750
  if cross_build_p || windows?
675
751
  zlib_recipe = process_recipe("zlib", dependencies["zlib"]["version"], static_p, cross_build_p) do |recipe|
676
752
  recipe.files = [{
677
- url: "https://zlib.net/fossils/#{recipe.name}-#{recipe.version}.tar.gz",
753
+ url: zlib_source(recipe.version),
678
754
  sha256: dependencies["zlib"]["sha256"],
679
755
  }]
680
756
  if windows?
@@ -712,17 +788,29 @@ else
712
788
  else
713
789
  class << recipe
714
790
  def configure
715
- cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
716
- execute("configure",
717
- ["env", "CHOST=#{host}", "CFLAGS=#{cflags}", "./configure", "--static", configure_prefix])
791
+ env = {}
792
+ env["CFLAGS"] = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
793
+ env["CHOST"] = host
794
+ execute("configure", ["./configure", "--static", configure_prefix], { env: env })
795
+ if darwin?
796
+ # needed as of zlib 1.2.13
797
+ Dir.chdir(work_path) do
798
+ makefile = File.read("Makefile").gsub(/^AR=.*$/, "AR=#{host}-libtool")
799
+ File.open("Makefile", "w") { |m| m.write(makefile) }
800
+ end
801
+ end
718
802
  end
719
803
  end
720
804
  end
721
805
  end
722
806
 
723
807
  unless nix?
724
- libiconv_recipe = process_recipe("libiconv", dependencies["libiconv"]["version"], static_p,
725
- cross_build_p) do |recipe|
808
+ libiconv_recipe = process_recipe(
809
+ "libiconv",
810
+ dependencies["libiconv"]["version"],
811
+ static_p,
812
+ cross_build_p,
813
+ ) do |recipe|
726
814
  recipe.files = [{
727
815
  url: "https://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz",
728
816
  sha256: dependencies["libiconv"]["sha256"],
@@ -760,15 +848,25 @@ else
760
848
  if zlib_recipe
761
849
  append_cppflags("-I#{zlib_recipe.path}/include")
762
850
  $LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH
763
- ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z",
764
- headers: "zlib.h", func: "gzdopen")
851
+ ensure_package_configuration(
852
+ opt: "zlib",
853
+ pc: "zlib",
854
+ lib: "z",
855
+ headers: "zlib.h",
856
+ func: "gzdopen",
857
+ )
765
858
  end
766
859
 
767
860
  if libiconv_recipe
768
861
  append_cppflags("-I#{libiconv_recipe.path}/include")
769
862
  $LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH
770
- ensure_package_configuration(opt: "iconv", pc: "iconv", lib: "iconv",
771
- headers: "iconv.h", func: "iconv_open")
863
+ ensure_package_configuration(
864
+ opt: "iconv",
865
+ pc: "iconv",
866
+ lib: "iconv",
867
+ headers: "iconv.h",
868
+ func: "iconv_open",
869
+ )
772
870
  end
773
871
 
774
872
  libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"]["version"], static_p, cross_build_p) do |recipe|
@@ -784,8 +882,13 @@ else
784
882
  recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxml2", "*.patch")].sort
785
883
  end
786
884
 
885
+ cppflags = concat_flags(ENV["CPPFLAGS"])
787
886
  cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g")
788
887
 
888
+ if cross_build_p
889
+ cppflags = concat_flags(cppflags, "-DNOKOGIRI_PRECOMPILED_LIBRARIES")
890
+ end
891
+
789
892
  if zlib_recipe
790
893
  recipe.configure_options << "--with-zlib=#{zlib_recipe.path}"
791
894
  end
@@ -816,6 +919,7 @@ else
816
919
  "--with-c14n",
817
920
  "--with-debug",
818
921
  "--with-threads",
922
+ "CPPFLAGS=#{cppflags}",
819
923
  "CFLAGS=#{cflags}",
820
924
  ]
821
925
  end
@@ -839,6 +943,11 @@ else
839
943
  recipe.configure_options += ["RANLIB=/usr/bin/ranlib", "AR=/usr/bin/ar"]
840
944
  end
841
945
 
946
+ if windows?
947
+ cflags = concat_flags(cflags, "-ULIBXSLT_STATIC", "-DIN_LIBXSLT")
948
+ cflags = concat_flags(cflags, "-ULIBEXSLT_STATIC", "-DIN_LIBEXSLT")
949
+ end
950
+
842
951
  recipe.configure_options << if source_dir
843
952
  "--config-cache"
844
953
  else
@@ -860,9 +969,13 @@ else
860
969
  $libs = $libs.shellsplit.tap do |libs|
861
970
  [libxml2_recipe, libxslt_recipe].each do |recipe|
862
971
  libname = recipe.name[/\Alib(.+)\z/, 1]
863
- File.join(recipe.path, "bin", "#{libname}-config").tap do |config|
972
+ config_basename = "#{libname}-config"
973
+ File.join(recipe.path, "bin", config_basename).tap do |config|
864
974
  # call config scripts explicit with 'sh' for compat with Windows
865
- $CPPFLAGS = %x(sh #{config} --cflags).strip << " " << $CPPFLAGS
975
+ cflags = %x(sh #{config} --cflags).strip
976
+ message("#{config_basename} cflags: #{cflags}\n")
977
+ $CPPFLAGS = concat_flags(cflags, $CPPFLAGS) # prepend
978
+
866
979
  %x(sh #{config} --libs).strip.shellsplit.each do |arg|
867
980
  case arg
868
981
  when /\A-L(.+)\z/
@@ -881,7 +994,7 @@ else
881
994
  end
882
995
 
883
996
  patches_string = recipe.patch_files.map { |path| File.basename(path) }.join(" ")
884
- append_cppflags(%[-DNOKOGIRI_#{recipe.name.upcase}_PATCHES="\\\"#{patches_string}\\\""])
997
+ append_cppflags(%[-DNOKOGIRI_#{recipe.name.upcase}_PATCHES="\\"#{patches_string}\\""])
885
998
 
886
999
  case libname
887
1000
  when "xml2"
@@ -900,16 +1013,17 @@ else
900
1013
  end.shelljoin
901
1014
 
902
1015
  if static_p
1016
+ static_archive_ld_flag = needs_darwin_linker_hack ? ["-load_hidden"] : []
903
1017
  $libs = $libs.shellsplit.map do |arg|
904
1018
  case arg
905
1019
  when "-lxml2"
906
- File.join(libxml2_recipe.path, "lib", libflag_to_filename(arg))
1020
+ static_archive_ld_flag + [File.join(libxml2_recipe.path, "lib", libflag_to_filename(arg))]
907
1021
  when "-lxslt", "-lexslt"
908
- File.join(libxslt_recipe.path, "lib", libflag_to_filename(arg))
1022
+ static_archive_ld_flag + [File.join(libxslt_recipe.path, "lib", libflag_to_filename(arg))]
909
1023
  else
910
1024
  arg
911
1025
  end
912
- end.shelljoin
1026
+ end.flatten.shelljoin
913
1027
  end
914
1028
 
915
1029
  ensure_func("xmlParseDoc", "libxml/parser.h")
@@ -945,11 +1059,11 @@ libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_b
945
1059
  end
946
1060
 
947
1061
  def compile
948
- cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
1062
+ cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-O2", "-g")
949
1063
 
950
1064
  env = { "CC" => gcc_cmd, "CFLAGS" => cflags }
951
1065
  if config_cross_build?
952
- if /darwin/.match?(host)
1066
+ if host.include?("darwin")
953
1067
  env["AR"] = "#{host}-libtool"
954
1068
  env["ARFLAGS"] = "-o"
955
1069
  else
@@ -965,7 +1079,7 @@ end
965
1079
  append_cppflags("-I#{File.join(libgumbo_recipe.path, "include")}")
966
1080
  $libs = $libs + " " + File.join(libgumbo_recipe.path, "lib", "libgumbo.a")
967
1081
  $LIBPATH = $LIBPATH | [File.join(libgumbo_recipe.path, "lib")]
968
- ensure_func("gumbo_parse_with_options", "gumbo.h")
1082
+ ensure_func("gumbo_parse_with_options", "nokogiri_gumbo.h")
969
1083
 
970
1084
  have_func("xmlHasFeature") || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21
971
1085
  have_func("xmlFirstElementChild") # introduced in libxml 2.7.3
@@ -976,21 +1090,23 @@ have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
976
1090
  have_func("rb_gc_location") # introduced in Ruby 2.7
977
1091
  have_func("rb_category_warning") # introduced in Ruby 3.0
978
1092
 
979
- have_func("vasprintf")
980
-
981
1093
  other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
982
- append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\\"#{other_library_versions_string}\\\""])
1094
+ append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\"#{other_library_versions_string}\\""])
983
1095
 
984
1096
  unless config_system_libraries?
985
1097
  if cross_build_p
986
1098
  # When precompiling native gems, copy packaged libraries' headers to ext/nokogiri/include
987
1099
  # These are packaged up by the cross-compiling callback in the ExtensionTask
988
- copy_packaged_libraries_headers(to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"),
989
- from_recipes: [libxml2_recipe, libxslt_recipe])
1100
+ copy_packaged_libraries_headers(
1101
+ to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"),
1102
+ from_recipes: [libxml2_recipe, libxslt_recipe],
1103
+ )
990
1104
  else
991
1105
  # When compiling during installation, install packaged libraries' header files into ext/nokogiri/include
992
- copy_packaged_libraries_headers(to_path: "include",
993
- from_recipes: [libxml2_recipe, libxslt_recipe])
1106
+ copy_packaged_libraries_headers(
1107
+ to_path: "include",
1108
+ from_recipes: [libxml2_recipe, libxslt_recipe],
1109
+ )
994
1110
  $INSTALLFILES << ["include/**/*.h", "$(rubylibdir)"]
995
1111
  end
996
1112
  end
@@ -1008,3 +1124,5 @@ if config_clean?
1008
1124
  EOF
1009
1125
  end
1010
1126
  end
1127
+
1128
+ # rubocop:enable Style/GlobalVars