nokogiri 1.13.6 → 1.14.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +39 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +18 -11
  6. data/dependencies.yml +33 -15
  7. data/ext/nokogiri/extconf.rb +100 -24
  8. data/ext/nokogiri/gumbo.c +21 -11
  9. data/ext/nokogiri/html4_document.c +2 -2
  10. data/ext/nokogiri/html4_element_description.c +1 -1
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +1 -6
  13. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  14. data/ext/nokogiri/nokogiri.c +38 -51
  15. data/ext/nokogiri/nokogiri.h +26 -14
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +3 -3
  18. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  19. data/ext/nokogiri/xml_cdata.c +3 -3
  20. data/ext/nokogiri/xml_comment.c +1 -1
  21. data/ext/nokogiri/xml_document.c +23 -14
  22. data/ext/nokogiri/xml_document_fragment.c +1 -1
  23. data/ext/nokogiri/xml_dtd.c +9 -9
  24. data/ext/nokogiri/xml_element_content.c +3 -3
  25. data/ext/nokogiri/xml_element_decl.c +5 -5
  26. data/ext/nokogiri/xml_encoding_handler.c +3 -3
  27. data/ext/nokogiri/xml_entity_decl.c +6 -6
  28. data/ext/nokogiri/xml_entity_reference.c +1 -1
  29. data/ext/nokogiri/xml_namespace.c +80 -14
  30. data/ext/nokogiri/xml_node.c +363 -82
  31. data/ext/nokogiri/xml_node_set.c +4 -6
  32. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  33. data/ext/nokogiri/xml_reader.c +97 -22
  34. data/ext/nokogiri/xml_relax_ng.c +1 -3
  35. data/ext/nokogiri/xml_sax_parser.c +23 -17
  36. data/ext/nokogiri/xml_sax_parser_context.c +1 -6
  37. data/ext/nokogiri/xml_sax_push_parser.c +1 -3
  38. data/ext/nokogiri/xml_schema.c +4 -6
  39. data/ext/nokogiri/xml_syntax_error.c +1 -1
  40. data/ext/nokogiri/xml_text.c +2 -2
  41. data/ext/nokogiri/xml_xpath_context.c +91 -84
  42. data/ext/nokogiri/xslt_stylesheet.c +15 -14
  43. data/gumbo-parser/Makefile +10 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +2 -2
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +2 -2
  48. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  49. data/gumbo-parser/src/parser.c +8 -5
  50. data/gumbo-parser/src/replacement.h +1 -1
  51. data/gumbo-parser/src/string_buffer.h +1 -1
  52. data/gumbo-parser/src/string_piece.c +1 -1
  53. data/gumbo-parser/src/svg_attrs.c +2 -2
  54. data/gumbo-parser/src/svg_tags.c +2 -2
  55. data/gumbo-parser/src/tag.c +2 -1
  56. data/gumbo-parser/src/tag_lookup.c +7 -7
  57. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  58. data/gumbo-parser/src/tag_lookup.h +1 -1
  59. data/gumbo-parser/src/token_buffer.h +1 -1
  60. data/gumbo-parser/src/tokenizer.c +1 -1
  61. data/gumbo-parser/src/tokenizer.h +1 -1
  62. data/gumbo-parser/src/utf8.c +1 -1
  63. data/gumbo-parser/src/utf8.h +1 -1
  64. data/gumbo-parser/src/util.c +1 -3
  65. data/gumbo-parser/src/util.h +4 -0
  66. data/gumbo-parser/src/vector.h +1 -1
  67. data/lib/nokogiri/css/node.rb +2 -2
  68. data/lib/nokogiri/css/xpath_visitor.rb +5 -3
  69. data/lib/nokogiri/css.rb +6 -0
  70. data/lib/nokogiri/decorators/slop.rb +1 -1
  71. data/lib/nokogiri/encoding_handler.rb +57 -0
  72. data/lib/nokogiri/extension.rb +3 -2
  73. data/lib/nokogiri/html4/document.rb +2 -121
  74. data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
  75. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  76. data/lib/nokogiri/html4.rb +1 -0
  77. data/lib/nokogiri/html5/document.rb +113 -36
  78. data/lib/nokogiri/html5/document_fragment.rb +9 -2
  79. data/lib/nokogiri/html5/node.rb +3 -5
  80. data/lib/nokogiri/html5.rb +127 -216
  81. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  82. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  83. data/lib/nokogiri/version/constant.rb +1 -1
  84. data/lib/nokogiri/version/info.rb +11 -10
  85. data/lib/nokogiri/xml/attr.rb +49 -0
  86. data/lib/nokogiri/xml/builder.rb +1 -1
  87. data/lib/nokogiri/xml/document.rb +103 -55
  88. data/lib/nokogiri/xml/document_fragment.rb +49 -6
  89. data/lib/nokogiri/xml/namespace.rb +42 -0
  90. data/lib/nokogiri/xml/node/save_options.rb +6 -4
  91. data/lib/nokogiri/xml/node.rb +190 -35
  92. data/lib/nokogiri/xml/node_set.rb +88 -9
  93. data/lib/nokogiri/xml/parse_options.rb +129 -50
  94. data/lib/nokogiri/xml/pp/node.rb +6 -4
  95. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  96. data/lib/nokogiri/xml/reader.rb +6 -8
  97. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  98. data/lib/nokogiri/xslt.rb +1 -1
  99. data/lib/nokogiri.rb +3 -11
  100. data/lib/xsd/xmlparser/nokogiri.rb +3 -1
  101. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  102. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  103. metadata +11 -242
  104. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  105. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  106. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  107. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  108. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  109. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
data/LICENSE.md CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License
2
2
 
3
- Copyright 2008 -- 2021 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Sam Ruby, Craig Barnes, Stephen Checkoway, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney.
3
+ Copyright 2008 -- 2023 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Sam Ruby, Craig Barnes, Stephen Checkoway, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney.
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
6
 
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Nokogiri
4
4
 
5
- Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for [reading](https://nokogiri.org/tutorials/parsing_an_html_xml_document.html), writing, [modifying](https://nokogiri.org/tutorials/modifying_an_html_xml_document.html), and [querying](https://nokogiri.org/tutorials/searching_a_xml_html_document.html) documents. It is fast and standards-compliant by relying on native parsers like libxml2 (CRuby) and xerces (JRuby).
5
+ Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for [reading](https://nokogiri.org/tutorials/parsing_an_html_xml_document.html), writing, [modifying](https://nokogiri.org/tutorials/modifying_an_html_xml_document.html), and [querying](https://nokogiri.org/tutorials/searching_a_xml_html_document.html) documents. It is fast and standards-compliant by relying on native parsers like libxml2, libgumbo, and xerces.
6
6
 
7
7
  ## Guiding Principles
8
8
 
@@ -40,10 +40,6 @@ Some guiding principles Nokogiri tries to follow:
40
40
 
41
41
  All official documentation is posted at https://nokogiri.org (the source for which is at https://github.com/sparklemotion/nokogiri.org/, and we welcome contributions).
42
42
 
43
- Consider subscribing to [Tidelift][tidelift] which provides license assurances and timely security notifications for your open source dependencies, including Nokogiri. [Tidelift][tidelift] subscriptions also help the Nokogiri maintainers fund our [automated testing](https://ci.nokogiri.org) which in turn allows us to ship releases, bugfixes, and security updates more often.
44
-
45
- [tidelift]: https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme
46
-
47
43
  ### Reading
48
44
 
49
45
  Your first stops for learning more about Nokogiri should be:
@@ -57,7 +53,6 @@ Your first stops for learning more about Nokogiri should be:
57
53
 
58
54
  There are a few ways to ask exploratory questions:
59
55
 
60
- - The Ruby Discord chat server is active at https://discord.gg/UyQnKrT
61
56
  - The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
62
57
  - Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
63
58
 
@@ -103,12 +98,21 @@ We bump `Major.Minor.Patch` versions following this guidance:
103
98
  - Updating packaged libraries for security-related reasons.
104
99
 
105
100
 
101
+ ### Sponsorship
102
+
103
+ You can help sponsor the maintainers of this software through one of these organizations:
104
+
105
+ - [github.com/sponsors/flavorjones](https://github.com/sponsors/flavorjones)
106
+ - [opencollective.com/nokogiri](https://opencollective.com/nokogiri)
107
+ - [tidelift.com/subscription/pkg/rubygems-nokogiri](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme)
108
+
109
+
106
110
  ## Installation
107
111
 
108
112
  Requirements:
109
113
 
110
- - Ruby >= 2.6
111
- - JRuby >= 9.3.0.0
114
+ - Ruby >= 2.7
115
+ - JRuby >= 9.4.0.0
112
116
 
113
117
 
114
118
  ### Native Gems: Faster, more reliable installation
@@ -119,10 +123,13 @@ Requirements:
119
123
 
120
124
  Nokogiri ships pre-compiled, "native" gems for the following platforms:
121
125
 
122
- - Linux: `x86-linux` and `x86_64-linux` (req: `glibc >= 2.17`), including musl platforms like Alpine
126
+ - Linux:
127
+ - `x86-linux` and `x86_64-linux` (req: `glibc >= 2.17`)
128
+ - `aarch64-linux` and `arm-linux` (req: `glibc >= 2.29`)
129
+ - Note that musl platforms like Alpine **are** supported
123
130
  - Darwin/MacOS: `x86_64-darwin` and `arm64-darwin`
124
- - Windows: `x86-mingw32` and `x64-mingw32`
125
- - Java: any platform running JRuby 9.3 or higher
131
+ - Windows: `x86-mingw32`, `x64-mingw32`, and `x64-mingw-ucrt`
132
+ - Java: any platform running JRuby 9.4 or higher
126
133
 
127
134
  To determine whether your system supports one of these gems, look at the output of `bundle platform` or `ruby -e 'puts Gem::Platform.local.to_s'`.
128
135
 
data/dependencies.yml CHANGED
@@ -1,23 +1,41 @@
1
1
  libxml2:
2
- version: "2.9.14"
3
- sha256: "60d74a257d1ccec0475e749cba2f21559e48139efba6ff28224357c7c798dfee"
4
- # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.14.sha256sum
2
+ version: "2.10.3"
3
+ sha256: "5d2cc3d78bec3dbe212a9d7fa629ada25a7da928af432c93060ff5c17ee28a9c"
4
+ # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.10/libxml2-2.10.3.sha256sum
5
5
 
6
6
  libxslt:
7
- version: "1.1.35"
8
- sha256: "8247f33e9a872c6ac859aa45018bc4c4d00b97e2feac9eebc10c93ce1f34dd79"
9
- # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.35.sha256sum
7
+ version: "1.1.37"
8
+ sha256: "3a4b27dc8027ccd6146725950336f1ec520928f320f144eb5fa7990ae6123ab4"
9
+ # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.37.sha256sum
10
10
 
11
11
  zlib:
12
- version: "1.2.12"
13
- sha256: "91844808532e5ce316b3c010929493c0244f3d37593afd6de04f71821d5136d9"
12
+ version: "1.2.13"
13
+ sha256: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30"
14
14
  # SHA-256 hash provided on http://zlib.net/
15
15
 
16
16
  libiconv:
17
- version: "1.16"
18
- sha256: "e6a1b1b589654277ee790cce3734f07876ac4ccfaecbee8afa0b649cf529cc04"
19
- # gpg: Signature made Fri 26 Apr 2019 03:36:38 PM EDT
20
- # gpg: using RSA key 4F494A942E4616C2
21
- # gpg: Good signature from "Bruno Haible (Open Source Development) <bruno@clisp.org>" [expired]
22
- # gpg: Note: This key has expired!
23
- # Primary key fingerprint: 68D9 4D8A AEEA D48A E7DC 5B90 4F49 4A94 2E46 16C2
17
+ version: "1.17"
18
+ sha256: "8f74213b56238c85a50a5329f77e06198771e70dd9a739779f4c02f65d971313"
19
+ # signature verified by following this path:
20
+ # - release announced at https://savannah.gnu.org/forum/forum.php?forum_id=10175
21
+ # - which links to https://savannah.gnu.org/users/haible as the releaser
22
+ # - which links to https://savannah.gnu.org/people/viewgpg.php?user_id=1871 as the gpg key
23
+ #
24
+ # So:
25
+ # - wget -q -O - https://savannah.gnu.org/people/viewgpg.php?user_id=1871 | gpg --import
26
+ # gpg: key F5BE8B267C6A406D: 1 signature not checked due to a missing key
27
+ # gpg: key F5BE8B267C6A406D: public key "Bruno Haible (Open Source Development) <bruno@clisp.org>" imported
28
+ # gpg: Total number processed: 1
29
+ # gpg: imported: 1
30
+ # gpg: marginals needed: 3 completes needed: 1 trust model: pgp
31
+ # gpg: depth: 0 valid: 4 signed: 0 trust: 0-, 0q, 0n, 0m, 0f, 4u
32
+ # gpg: next trustdb check due at 2024-05-09
33
+ # - gpg --verify libiconv-1.17.tar.gz.sig ports/archives/libiconv-1.17.tar.gz
34
+ # gpg: Signature made Sun 15 May 2022 11:26:42 AM EDT
35
+ # gpg: using RSA key 9001B85AF9E1B83DF1BDA942F5BE8B267C6A406D
36
+ # gpg: Good signature from "Bruno Haible (Open Source Development) <bruno@clisp.org>" [unknown]
37
+ # gpg: WARNING: This key is not certified with a trusted signature!
38
+ # gpg: There is no indication that the signature belongs to the owner.
39
+ # Primary key fingerprint: 9001 B85A F9E1 B83D F1BD A942 F5BE 8B26 7C6A 406D
40
+ #
41
+ # And this sha256sum is calculated from that verified tarball.
@@ -200,7 +200,7 @@ def nix?
200
200
  end
201
201
 
202
202
  def truffle?
203
- ::RUBY_ENGINE == "truffleruby"
203
+ RUBY_ENGINE == "truffleruby"
204
204
  end
205
205
 
206
206
  def concat_flags(*args)
@@ -211,6 +211,16 @@ def local_have_library(lib, func = nil, headers = nil)
211
211
  have_library(lib, func, headers) || have_library("lib#{lib}", func, headers)
212
212
  end
213
213
 
214
+ def zlib_source(version_string)
215
+ # As of 2022-12, I'm starting to see failed downloads often enough from zlib.net that I want to
216
+ # change the default to github.
217
+ if ENV["NOKOGIRI_USE_CANONICAL_ZLIB_SOURCE"]
218
+ "https://zlib.net/fossils/zlib-#{version_string}.tar.gz"
219
+ else
220
+ "https://github.com/madler/zlib/releases/download/v#{version_string}/zlib-#{version_string}.tar.gz"
221
+ end
222
+ end
223
+
214
224
  def gnome_source
215
225
  # As of 2022-02-20, some mirrors have expired SSL certificates. I'm able to retrieve from my home,
216
226
  # but whatever host is resolved on the github actions workers see an expired cert.
@@ -400,8 +410,8 @@ def iconv_configure_flags
400
410
  return ["--with-iconv=yes"]
401
411
  end
402
412
 
403
- config = preserving_globals { have_package_configuration("libiconv") }
404
- if config && try_link_iconv("pkg-config libiconv") { have_package_configuration("libiconv") }
413
+ config = preserving_globals { pkg_config("libiconv") }
414
+ if config && try_link_iconv("pkg-config libiconv") { pkg_config("libiconv") }
405
415
  cflags, ldflags, libs = config
406
416
 
407
417
  return [
@@ -430,10 +440,12 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
430
440
  "#{@target}/#{RUBY_PLATFORM}/#{@name}/#{@version}"
431
441
  end
432
442
 
433
- recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
434
- # Prefer host_alias over host in order to use the correct compiler prefix for cross build, but
435
- # use host if not set.
443
+ # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
444
+ # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
436
445
  recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
446
+ recipe.host = recipe.host.gsub(/i386/, "i686")
447
+
448
+ recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
437
449
  recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
438
450
 
439
451
  yield recipe
@@ -525,7 +537,6 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
525
537
 
526
538
  EOM
527
539
 
528
- pp(recipe.files)
529
540
  chdir_for_build { recipe.cook }
530
541
  FileUtils.touch(checkpoint)
531
542
  end
@@ -573,6 +584,34 @@ def do_clean
573
584
  exit!(0)
574
585
  end
575
586
 
587
+ # In ruby 3.2, symbol resolution changed on Darwin, to introduce the `-bundle_loader` flag to
588
+ # resolve symbols against the ruby binary.
589
+ #
590
+ # This makes it challenging to build a single extension that works with both a ruby with
591
+ # `--enable-shared` and one with `--disable-shared. To work around that, we choose to add
592
+ # `-flat_namespace` to the link line (later in this file).
593
+ #
594
+ # The `-flat_namespace` line introduces its own behavior change, which is that (similar to on
595
+ # Linux), any symbols in the extension that are exported may now be resolved by shared libraries
596
+ # loaded by the Ruby process. Specifically, that means that libxml2 and libxslt, which are
597
+ # statically linked into the nokogiri bundle, will resolve (at runtime) to a system libxml2 loaded
598
+ # by Ruby on Darwin. And it appears that often Ruby on Darwin does indeed load the system libxml2,
599
+ # and that messes with our assumptions about whether we're running with a patched libxml2 or a
600
+ # vanilla libxml2.
601
+ #
602
+ # We choose to use `-load_hidden` in this case to prevent exporting those symbols from libxml2 and
603
+ # libxslt, which ensures that they will be resolved to the static libraries in the bundle. In other
604
+ # words, when we use `load_hidden`, what happens in the extension stays in the extension.
605
+ #
606
+ # See https://github.com/rake-compiler/rake-compiler-dock/issues/87 for more info.
607
+ #
608
+ # Anyway, this method is the logical bit to tell us when to turn on these workarounds.
609
+ def needs_darwin_linker_hack
610
+ config_cross_build? &&
611
+ darwin? &&
612
+ Gem::Requirement.new("~> 3.2").satisfied_by?(Gem::Version.new(RbConfig::CONFIG["ruby_version"].split("+").first))
613
+ end
614
+
576
615
  #
577
616
  # main
578
617
  #
@@ -580,7 +619,7 @@ do_help if arg_config("--help")
580
619
  do_clean if arg_config("--clean")
581
620
 
582
621
  if openbsd? && !config_system_libraries?
583
- if %x(#{ENV["CC"] || "/usr/bin/cc"} -v 2>&1) !~ /clang/
622
+ unless %x(#{ENV["CC"] || "/usr/bin/cc"} -v 2>&1).include?("clang")
584
623
  (ENV["CC"] ||= find_executable("egcc")) ||
585
624
  abort("Please install gcc 4.9+ from ports using `pkg_add -v gcc`")
586
625
  end
@@ -616,6 +655,9 @@ $LIBS = concat_flags($LIBS, ENV["LIBS"])
616
655
  # errors/warnings. see #2302
617
656
  append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
618
657
 
658
+ # gumbo html5 serialization is slower with O3, let's make sure we use O2
659
+ append_cflags("-O2")
660
+
619
661
  # always include debugging information
620
662
  append_cflags("-g")
621
663
 
@@ -625,8 +667,18 @@ append_cflags("-Winline")
625
667
  # good to have no matter what Ruby was compiled with
626
668
  append_cflags("-Wmissing-noreturn")
627
669
 
670
+ # check integer loss of precision
671
+ if darwin?
672
+ append_cflags("-Wshorten-64-to-32")
673
+ else
674
+ append_cflags("-Wconversion -Wno-sign-conversion")
675
+ end
676
+
628
677
  # handle clang variations, see #1101
629
- append_cflags("-Wno-error=unused-command-line-argument-hard-error-in-future") if darwin?
678
+ if darwin?
679
+ append_cflags("-Wno-error=unused-command-line-argument-hard-error-in-future")
680
+ append_cflags("-Wno-unknown-warning-option")
681
+ end
630
682
 
631
683
  # these tend to be noisy, but on occasion useful during development
632
684
  # append_cflags(["-Wcast-qual", "-Wwrite-strings"])
@@ -666,6 +718,10 @@ else
666
718
  cross_build_p = config_cross_build?
667
719
  message "Cross build is #{cross_build_p ? "enabled" : "disabled"}.\n"
668
720
 
721
+ if needs_darwin_linker_hack
722
+ append_ldflags("-Wl,-flat_namespace")
723
+ end
724
+
669
725
  require "yaml"
670
726
  dependencies = YAML.load_file(File.join(PACKAGE_ROOT_DIR, "dependencies.yml"))
671
727
 
@@ -674,7 +730,7 @@ else
674
730
  if cross_build_p || windows?
675
731
  zlib_recipe = process_recipe("zlib", dependencies["zlib"]["version"], static_p, cross_build_p) do |recipe|
676
732
  recipe.files = [{
677
- url: "https://zlib.net/fossils/#{recipe.name}-#{recipe.version}.tar.gz",
733
+ url: zlib_source(recipe.version),
678
734
  sha256: dependencies["zlib"]["sha256"],
679
735
  }]
680
736
  if windows?
@@ -712,9 +768,17 @@ else
712
768
  else
713
769
  class << recipe
714
770
  def configure
715
- cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
716
- execute("configure",
717
- ["env", "CHOST=#{host}", "CFLAGS=#{cflags}", "./configure", "--static", configure_prefix])
771
+ env = {}
772
+ env["CFLAGS"] = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
773
+ env["CHOST"] = host
774
+ execute("configure", ["./configure", "--static", configure_prefix], { env: env })
775
+ if darwin?
776
+ # needed as of zlib 1.2.13
777
+ Dir.chdir(work_path) do
778
+ makefile = File.read("Makefile").gsub(/^AR=.*$/, "AR=#{host}-libtool")
779
+ File.open("Makefile", "w") { |m| m.write(makefile) }
780
+ end
781
+ end
718
782
  end
719
783
  end
720
784
  end
@@ -839,6 +903,11 @@ else
839
903
  recipe.configure_options += ["RANLIB=/usr/bin/ranlib", "AR=/usr/bin/ar"]
840
904
  end
841
905
 
906
+ if windows?
907
+ cflags = concat_flags(cflags, "-ULIBXSLT_STATIC", "-DIN_LIBXSLT")
908
+ cflags = concat_flags(cflags, "-ULIBEXSLT_STATIC", "-DIN_LIBEXSLT")
909
+ end
910
+
842
911
  recipe.configure_options << if source_dir
843
912
  "--config-cache"
844
913
  else
@@ -860,9 +929,13 @@ else
860
929
  $libs = $libs.shellsplit.tap do |libs|
861
930
  [libxml2_recipe, libxslt_recipe].each do |recipe|
862
931
  libname = recipe.name[/\Alib(.+)\z/, 1]
863
- File.join(recipe.path, "bin", "#{libname}-config").tap do |config|
932
+ config_basename = "#{libname}-config"
933
+ File.join(recipe.path, "bin", config_basename).tap do |config|
864
934
  # call config scripts explicit with 'sh' for compat with Windows
865
- $CPPFLAGS = %x(sh #{config} --cflags).strip << " " << $CPPFLAGS
935
+ cflags = %x(sh #{config} --cflags).strip
936
+ message("#{config_basename} cflags: #{cflags}\n")
937
+ $CPPFLAGS = concat_flags(cflags, $CPPFLAGS) # prepend
938
+
866
939
  %x(sh #{config} --libs).strip.shellsplit.each do |arg|
867
940
  case arg
868
941
  when /\A-L(.+)\z/
@@ -881,7 +954,7 @@ else
881
954
  end
882
955
 
883
956
  patches_string = recipe.patch_files.map { |path| File.basename(path) }.join(" ")
884
- append_cppflags(%[-DNOKOGIRI_#{recipe.name.upcase}_PATCHES="\\\"#{patches_string}\\\""])
957
+ append_cppflags(%[-DNOKOGIRI_#{recipe.name.upcase}_PATCHES="\\"#{patches_string}\\""])
885
958
 
886
959
  case libname
887
960
  when "xml2"
@@ -900,16 +973,17 @@ else
900
973
  end.shelljoin
901
974
 
902
975
  if static_p
976
+ static_archive_ld_flag = needs_darwin_linker_hack ? ["-load_hidden"] : []
903
977
  $libs = $libs.shellsplit.map do |arg|
904
978
  case arg
905
979
  when "-lxml2"
906
- File.join(libxml2_recipe.path, "lib", libflag_to_filename(arg))
980
+ static_archive_ld_flag + [File.join(libxml2_recipe.path, "lib", libflag_to_filename(arg))]
907
981
  when "-lxslt", "-lexslt"
908
- File.join(libxslt_recipe.path, "lib", libflag_to_filename(arg))
982
+ static_archive_ld_flag + [File.join(libxslt_recipe.path, "lib", libflag_to_filename(arg))]
909
983
  else
910
984
  arg
911
985
  end
912
- end.shelljoin
986
+ end.flatten.shelljoin
913
987
  end
914
988
 
915
989
  ensure_func("xmlParseDoc", "libxml/parser.h")
@@ -945,7 +1019,7 @@ libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_b
945
1019
  end
946
1020
 
947
1021
  def compile
948
- cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
1022
+ cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-O2", "-g")
949
1023
 
950
1024
  env = { "CC" => gcc_cmd, "CFLAGS" => cflags }
951
1025
  if config_cross_build?
@@ -965,7 +1039,7 @@ end
965
1039
  append_cppflags("-I#{File.join(libgumbo_recipe.path, "include")}")
966
1040
  $libs = $libs + " " + File.join(libgumbo_recipe.path, "lib", "libgumbo.a")
967
1041
  $LIBPATH = $LIBPATH | [File.join(libgumbo_recipe.path, "lib")]
968
- ensure_func("gumbo_parse_with_options", "gumbo.h")
1042
+ ensure_func("gumbo_parse_with_options", "nokogiri_gumbo.h")
969
1043
 
970
1044
  have_func("xmlHasFeature") || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21
971
1045
  have_func("xmlFirstElementChild") # introduced in libxml 2.7.3
@@ -973,11 +1047,11 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
973
1047
  have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
974
1048
  have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
975
1049
  have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
976
-
977
- have_func("vasprintf")
1050
+ have_func("rb_gc_location") # introduced in Ruby 2.7
1051
+ have_func("rb_category_warning") # introduced in Ruby 3.0
978
1052
 
979
1053
  other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
980
- append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\\"#{other_library_versions_string}\\\""])
1054
+ append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\"#{other_library_versions_string}\\""])
981
1055
 
982
1056
  unless config_system_libraries?
983
1057
  if cross_build_p
@@ -1006,3 +1080,5 @@ if config_clean?
1006
1080
  EOF
1007
1081
  end
1008
1082
  end
1083
+
1084
+ # rubocop:enable Style/GlobalVars
data/ext/nokogiri/gumbo.c CHANGED
@@ -23,13 +23,13 @@
23
23
  //
24
24
  // Processing starts by calling gumbo_parse_with_options. The resulting document tree
25
25
  // is then walked, a parallel libxml2 tree is constructed, and the final document is
26
- // then wrapped using Nokogiri_wrap_xml_document. This approach reduces memory and CPU
26
+ // then wrapped using noko_xml_document_wrap. This approach reduces memory and CPU
27
27
  // requirements as Ruby objects are only built when necessary.
28
28
  //
29
29
 
30
30
  #include <nokogiri.h>
31
31
 
32
- #include "gumbo.h"
32
+ #include "nokogiri_gumbo.h"
33
33
 
34
34
  VALUE cNokogiriHtml5Document;
35
35
 
@@ -281,12 +281,12 @@ add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url)
281
281
  rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR
282
282
  rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR
283
283
  rb_iv_set(syntax_error, "@file", url);
284
- rb_iv_set(syntax_error, "@line", INT2NUM(position.line));
284
+ rb_iv_set(syntax_error, "@line", SIZET2NUM(position.line));
285
285
  rb_iv_set(syntax_error, "@str1", str1);
286
286
  rb_iv_set(syntax_error, "@str2", Qnil);
287
287
  rb_iv_set(syntax_error, "@str3", Qnil);
288
288
  rb_iv_set(syntax_error, "@int1", INT2NUM(0));
289
- rb_iv_set(syntax_error, "@column", INT2NUM(position.column));
289
+ rb_iv_set(syntax_error, "@column", SIZET2NUM(position.column));
290
290
  rb_ary_push(rerrors, syntax_error);
291
291
  }
292
292
  rb_iv_set(rdoc, "@errors", rerrors);
@@ -297,6 +297,7 @@ typedef struct {
297
297
  GumboOutput *output;
298
298
  VALUE input;
299
299
  VALUE url_or_frag;
300
+ VALUE klass;
300
301
  xmlDocPtr doc;
301
302
  } ParseArgs;
302
303
 
@@ -321,7 +322,7 @@ static VALUE parse_continue(VALUE parse_args);
321
322
  * @!visibility protected
322
323
  */
323
324
  static VALUE
324
- parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth)
325
+ parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth, VALUE klass)
325
326
  {
326
327
  GumboOptions options = kGumboDefaultOptions;
327
328
  options.max_attributes = NUM2INT(max_attributes);
@@ -333,6 +334,7 @@ parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors
333
334
  .output = output,
334
335
  .input = input,
335
336
  .url_or_frag = url,
337
+ .klass = klass,
336
338
  .doc = NULL,
337
339
  };
338
340
 
@@ -357,7 +359,9 @@ parse_continue(VALUE parse_args)
357
359
  }
358
360
  args->doc = doc; // Make sure doc gets cleaned up if an error is thrown.
359
361
  build_tree(doc, (xmlNodePtr)doc, output->document);
360
- VALUE rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc);
362
+ VALUE rdoc = noko_xml_document_wrap(args->klass, doc);
363
+ rb_iv_set(rdoc, "@url", args->url_or_frag);
364
+ rb_iv_set(rdoc, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode));
361
365
  args->doc = NULL; // The Ruby runtime now owns doc so don't delete it.
362
366
  add_errors(output, rdoc, args->input, args->url_or_frag);
363
367
  return rdoc;
@@ -401,7 +405,7 @@ static xmlNodePtr
401
405
  extract_xml_node(VALUE node)
402
406
  {
403
407
  xmlNodePtr xml_node;
404
- Data_Get_Struct(node, xmlNode, xml_node);
408
+ Noko_Node_Get_Struct(node, xmlNode, xml_node);
405
409
  return xml_node;
406
410
  }
407
411
 
@@ -498,9 +502,11 @@ error:
498
502
  }
499
503
 
500
504
  // Encoding.
501
- if (RSTRING_LEN(tag_name) == 14
505
+ if (ctx_ns == GUMBO_NAMESPACE_MATHML
506
+ && RSTRING_LEN(tag_name) == 14
502
507
  && !st_strcasecmp(ctx_tag, "annotation-xml")) {
503
508
  VALUE enc = rb_funcall(ctx, rb_intern_const("[]"),
509
+ 1,
504
510
  rb_utf8_str_new_static("encoding", 8));
505
511
  if (RTEST(enc)) {
506
512
  Check_Type(enc, T_STRING);
@@ -512,8 +518,11 @@ error:
512
518
  // Quirks mode.
513
519
  VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
514
520
  VALUE dtd = rb_funcall(doc, internal_subset, 0);
515
- if (NIL_P(dtd)) {
521
+ VALUE doc_quirks_mode = rb_iv_get(doc, "@quirks_mode");
522
+ if (NIL_P(ctx) || NIL_P(doc_quirks_mode)) {
516
523
  quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS;
524
+ } else if (NIL_P(dtd)) {
525
+ quirks_mode = GUMBO_DOCTYPE_QUIRKS;
517
526
  } else {
518
527
  VALUE dtd_name = rb_funcall(dtd, name, 0);
519
528
  VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0);
@@ -560,13 +569,14 @@ fragment_continue(VALUE parse_args)
560
569
  args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it.
561
570
  xmlNodePtr xml_frag = extract_xml_node(doc_fragment);
562
571
  build_tree(xml_doc, xml_frag, output->root);
572
+ rb_iv_set(doc_fragment, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode));
563
573
  add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9));
564
574
  return Qnil;
565
575
  }
566
576
 
567
577
  // Initialize the Nokogumbo class and fetch constants we will use later.
568
578
  void
569
- noko_init_gumbo()
579
+ noko_init_gumbo(void)
570
580
  {
571
581
  // Class constants.
572
582
  cNokogiriHtml5Document = rb_define_class_under(mNokogiriHtml5, "Document", cNokogiriHtml4Document);
@@ -577,7 +587,7 @@ noko_init_gumbo()
577
587
  parent = rb_intern_const("parent");
578
588
 
579
589
  // Define Nokogumbo module with parse and fragment methods.
580
- rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 5);
590
+ rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 6);
581
591
  rb_define_singleton_method(mNokogiriGumbo, "fragment", fragment, 6);
582
592
  }
583
593
 
@@ -146,11 +146,11 @@ rb_html_document_type(VALUE self)
146
146
  {
147
147
  htmlDocPtr doc;
148
148
  Data_Get_Struct(self, xmlDoc, doc);
149
- return INT2NUM((long)doc->type);
149
+ return INT2NUM(doc->type);
150
150
  }
151
151
 
152
152
  void
153
- noko_init_html_document()
153
+ noko_init_html_document(void)
154
154
  {
155
155
  assert(cNokogiriXmlDocument);
156
156
  cNokogiriHtml4Document = rb_define_class_under(mNokogiriHtml4, "Document", cNokogiriXmlDocument);
@@ -270,7 +270,7 @@ get_description(VALUE klass, VALUE tag_name)
270
270
  }
271
271
 
272
272
  void
273
- noko_init_html_element_description()
273
+ noko_init_html_element_description(void)
274
274
  {
275
275
  cNokogiriHtml4ElementDescription = rb_define_class_under(mNokogiriHtml4, "ElementDescription", rb_cObject);
276
276
 
@@ -20,7 +20,7 @@ get(VALUE _, VALUE rb_entity_name)
20
20
  return Qnil;
21
21
  }
22
22
 
23
- rb_constructor_args[0] = INT2NUM((long)c_entity_desc->value);
23
+ rb_constructor_args[0] = UINT2NUM(c_entity_desc->value);
24
24
  rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
25
25
  rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
26
26
 
@@ -29,7 +29,7 @@ get(VALUE _, VALUE rb_entity_name)
29
29
  }
30
30
 
31
31
  void
32
- noko_init_html_entity_lookup()
32
+ noko_init_html_entity_lookup(void)
33
33
  {
34
34
  cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject);
35
35
 
@@ -5,13 +5,8 @@ VALUE cNokogiriHtml4SaxParserContext ;
5
5
  static void
6
6
  deallocate(xmlParserCtxtPtr ctxt)
7
7
  {
8
- NOKOGIRI_DEBUG_START(ctxt);
9
-
10
8
  ctxt->sax = NULL;
11
-
12
9
  htmlFreeParserCtxt(ctxt);
13
-
14
- NOKOGIRI_DEBUG_END(ctxt);
15
10
  }
16
11
 
17
12
  static VALUE
@@ -106,7 +101,7 @@ parse_with(VALUE self, VALUE sax_handler)
106
101
  }
107
102
 
108
103
  void
109
- noko_init_html_sax_parser_context()
104
+ noko_init_html_sax_parser_context(void)
110
105
  {
111
106
  assert(cNokogiriXmlSaxParserContext);
112
107
  cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext",
@@ -85,7 +85,7 @@ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
85
85
  }
86
86
 
87
87
  void
88
- noko_init_html_sax_push_parser()
88
+ noko_init_html_sax_push_parser(void)
89
89
  {
90
90
  assert(cNokogiriXmlSaxPushParser);
91
91
  cNokogiriHtml4SaxPushParser = rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);