nokogiri 1.12.5 → 1.14.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (156) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +41 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +23 -14
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -66
  8. data/ext/nokogiri/extconf.rb +159 -63
  9. data/ext/nokogiri/gumbo.c +21 -11
  10. data/ext/nokogiri/html4_document.c +2 -2
  11. data/ext/nokogiri/html4_element_description.c +1 -1
  12. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  13. data/ext/nokogiri/html4_sax_parser_context.c +3 -9
  14. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  15. data/ext/nokogiri/nokogiri.c +38 -51
  16. data/ext/nokogiri/nokogiri.h +26 -14
  17. data/ext/nokogiri/test_global_handlers.c +1 -1
  18. data/ext/nokogiri/xml_attr.c +3 -3
  19. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  20. data/ext/nokogiri/xml_cdata.c +3 -3
  21. data/ext/nokogiri/xml_comment.c +1 -1
  22. data/ext/nokogiri/xml_document.c +53 -44
  23. data/ext/nokogiri/xml_document_fragment.c +1 -3
  24. data/ext/nokogiri/xml_dtd.c +11 -11
  25. data/ext/nokogiri/xml_element_content.c +3 -3
  26. data/ext/nokogiri/xml_element_decl.c +5 -5
  27. data/ext/nokogiri/xml_encoding_handler.c +28 -14
  28. data/ext/nokogiri/xml_entity_decl.c +6 -6
  29. data/ext/nokogiri/xml_entity_reference.c +1 -1
  30. data/ext/nokogiri/xml_namespace.c +80 -14
  31. data/ext/nokogiri/xml_node.c +982 -396
  32. data/ext/nokogiri/xml_node_set.c +4 -6
  33. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  34. data/ext/nokogiri/xml_reader.c +133 -32
  35. data/ext/nokogiri/xml_relax_ng.c +1 -3
  36. data/ext/nokogiri/xml_sax_parser.c +23 -17
  37. data/ext/nokogiri/xml_sax_parser_context.c +11 -9
  38. data/ext/nokogiri/xml_sax_push_parser.c +1 -3
  39. data/ext/nokogiri/xml_schema.c +4 -6
  40. data/ext/nokogiri/xml_syntax_error.c +1 -1
  41. data/ext/nokogiri/xml_text.c +2 -2
  42. data/ext/nokogiri/xml_xpath_context.c +144 -114
  43. data/ext/nokogiri/xslt_stylesheet.c +122 -23
  44. data/gumbo-parser/Makefile +10 -0
  45. data/gumbo-parser/src/attribute.h +1 -1
  46. data/gumbo-parser/src/error.c +2 -2
  47. data/gumbo-parser/src/error.h +1 -1
  48. data/gumbo-parser/src/foreign_attrs.c +2 -2
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +8 -16
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/class_resolver.rb +67 -0
  69. data/lib/nokogiri/css/node.rb +9 -8
  70. data/lib/nokogiri/css/parser.rb +360 -341
  71. data/lib/nokogiri/css/parser.y +249 -244
  72. data/lib/nokogiri/css/parser_extras.rb +22 -20
  73. data/lib/nokogiri/css/syntax_error.rb +1 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -3
  75. data/lib/nokogiri/css/tokenizer.rex +3 -2
  76. data/lib/nokogiri/css/xpath_visitor.rb +184 -85
  77. data/lib/nokogiri/css.rb +44 -6
  78. data/lib/nokogiri/decorators/slop.rb +8 -7
  79. data/lib/nokogiri/encoding_handler.rb +57 -0
  80. data/lib/nokogiri/extension.rb +4 -3
  81. data/lib/nokogiri/gumbo.rb +1 -0
  82. data/lib/nokogiri/html.rb +16 -10
  83. data/lib/nokogiri/html4/builder.rb +1 -0
  84. data/lib/nokogiri/html4/document.rb +56 -164
  85. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  86. data/lib/nokogiri/html4/element_description.rb +1 -0
  87. data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
  88. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  89. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  90. data/lib/nokogiri/html4/sax/parser.rb +5 -2
  91. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  92. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  93. data/lib/nokogiri/html4.rb +12 -5
  94. data/lib/nokogiri/html5/document.rb +126 -32
  95. data/lib/nokogiri/html5/document_fragment.rb +14 -4
  96. data/lib/nokogiri/html5/node.rb +12 -7
  97. data/lib/nokogiri/html5.rb +138 -222
  98. data/lib/nokogiri/jruby/dependencies.rb +2 -19
  99. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  100. data/lib/nokogiri/syntax_error.rb +1 -0
  101. data/lib/nokogiri/version/constant.rb +2 -1
  102. data/lib/nokogiri/version/info.rb +32 -24
  103. data/lib/nokogiri/version.rb +1 -0
  104. data/lib/nokogiri/xml/attr.rb +54 -3
  105. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  106. data/lib/nokogiri/xml/builder.rb +35 -33
  107. data/lib/nokogiri/xml/cdata.rb +2 -1
  108. data/lib/nokogiri/xml/character_data.rb +1 -0
  109. data/lib/nokogiri/xml/document.rb +232 -143
  110. data/lib/nokogiri/xml/document_fragment.rb +88 -42
  111. data/lib/nokogiri/xml/dtd.rb +3 -2
  112. data/lib/nokogiri/xml/element_content.rb +1 -0
  113. data/lib/nokogiri/xml/element_decl.rb +2 -1
  114. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  115. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  116. data/lib/nokogiri/xml/namespace.rb +44 -0
  117. data/lib/nokogiri/xml/node/save_options.rb +14 -8
  118. data/lib/nokogiri/xml/node.rb +708 -383
  119. data/lib/nokogiri/xml/node_set.rb +134 -59
  120. data/lib/nokogiri/xml/notation.rb +12 -0
  121. data/lib/nokogiri/xml/parse_options.rb +140 -56
  122. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  123. data/lib/nokogiri/xml/pp/node.rb +26 -26
  124. data/lib/nokogiri/xml/pp.rb +1 -0
  125. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  126. data/lib/nokogiri/xml/reader.rb +20 -24
  127. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  128. data/lib/nokogiri/xml/sax/document.rb +20 -19
  129. data/lib/nokogiri/xml/sax/parser.rb +38 -36
  130. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  131. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  132. data/lib/nokogiri/xml/sax.rb +1 -0
  133. data/lib/nokogiri/xml/schema.rb +7 -6
  134. data/lib/nokogiri/xml/searchable.rb +93 -62
  135. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  136. data/lib/nokogiri/xml/text.rb +1 -0
  137. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  138. data/lib/nokogiri/xml/xpath.rb +12 -0
  139. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  140. data/lib/nokogiri/xml.rb +4 -3
  141. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  142. data/lib/nokogiri/xslt.rb +21 -13
  143. data/lib/nokogiri.rb +22 -27
  144. data/lib/xsd/xmlparser/nokogiri.rb +28 -25
  145. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  146. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  147. data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
  148. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  149. metadata +20 -171
  150. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  151. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  152. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
  153. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
  154. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
  155. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  156. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
- ENV["RC_ARCHS"] = "" if RUBY_PLATFORM =~ /darwin/
2
+
3
+ # rubocop:disable Style/GlobalVars
4
+
5
+ ENV["RC_ARCHS"] = "" if RUBY_PLATFORM.include?("darwin")
3
6
 
4
7
  require "mkmf"
5
8
  require "rbconfig"
@@ -12,16 +15,14 @@ PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."
12
15
  REQUIRED_LIBXML_VERSION = "2.6.21"
13
16
  RECOMMENDED_LIBXML_VERSION = "2.9.3"
14
17
 
15
- # The gem version constraint in the Rakefile is not respected at install time.
16
- # Keep this version in sync with the one in the Rakefile !
17
- REQUIRED_MINI_PORTILE_VERSION = "~> 2.6.1"
18
+ REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.0" # keep this version in sync with the one in the gemspec
18
19
  REQUIRED_PKG_CONFIG_VERSION = "~> 1.1"
19
20
 
20
21
  # Keep track of what versions of what libraries we build against
21
22
  OTHER_LIBRARY_VERSIONS = {}
22
23
 
23
24
  NOKOGIRI_HELP_MESSAGE = <<~HELP
24
- USAGE: ruby #{$0} [options]
25
+ USAGE: ruby #{$PROGRAM_NAME} [options]
25
26
 
26
27
  Flags that are always valid:
27
28
 
@@ -175,23 +176,23 @@ def config_system_libraries?
175
176
  end
176
177
 
177
178
  def windows?
178
- RbConfig::CONFIG["target_os"] =~ /mingw32|mswin/
179
+ RbConfig::CONFIG["target_os"].match?(/mingw|mswin/)
179
180
  end
180
181
 
181
182
  def solaris?
182
- RbConfig::CONFIG["target_os"] =~ /solaris/
183
+ RbConfig::CONFIG["target_os"].include?("solaris")
183
184
  end
184
185
 
185
186
  def darwin?
186
- RbConfig::CONFIG["target_os"] =~ /darwin/
187
+ RbConfig::CONFIG["target_os"].include?("darwin")
187
188
  end
188
189
 
189
190
  def openbsd?
190
- RbConfig::CONFIG["target_os"] =~ /openbsd/
191
+ RbConfig::CONFIG["target_os"].include?("openbsd")
191
192
  end
192
193
 
193
194
  def aix?
194
- RbConfig::CONFIG["target_os"] =~ /aix/
195
+ RbConfig::CONFIG["target_os"].include?("aix")
195
196
  end
196
197
 
197
198
  def nix?
@@ -199,7 +200,7 @@ def nix?
199
200
  end
200
201
 
201
202
  def truffle?
202
- ::RUBY_ENGINE == "truffleruby"
203
+ RUBY_ENGINE == "truffleruby"
203
204
  end
204
205
 
205
206
  def concat_flags(*args)
@@ -210,6 +211,28 @@ def local_have_library(lib, func = nil, headers = nil)
210
211
  have_library(lib, func, headers) || have_library("lib#{lib}", func, headers)
211
212
  end
212
213
 
214
+ def zlib_source(version_string)
215
+ # As of 2022-12, I'm starting to see failed downloads often enough from zlib.net that I want to
216
+ # change the default to github.
217
+ if ENV["NOKOGIRI_USE_CANONICAL_ZLIB_SOURCE"]
218
+ "https://zlib.net/fossils/zlib-#{version_string}.tar.gz"
219
+ else
220
+ "https://github.com/madler/zlib/releases/download/v#{version_string}/zlib-#{version_string}.tar.gz"
221
+ end
222
+ end
223
+
224
+ def gnome_source
225
+ # As of 2022-02-20, some mirrors have expired SSL certificates. I'm able to retrieve from my home,
226
+ # but whatever host is resolved on the github actions workers see an expired cert.
227
+ #
228
+ # See https://github.com/sparklemotion/nokogiri/runs/5266206403?check_suite_focus=true
229
+ if ENV["NOKOGIRI_USE_CANONICAL_GNOME_SOURCE"]
230
+ "https://download.gnome.org"
231
+ else
232
+ "https://mirror.csclub.uwaterloo.ca/gnome" # old reliable
233
+ end
234
+ end
235
+
213
236
  LOCAL_PACKAGE_RESPONSE = Object.new
214
237
  def LOCAL_PACKAGE_RESPONSE.%(package)
215
238
  package ? "yes: #{package}" : "no"
@@ -284,17 +307,16 @@ ensure
284
307
  end
285
308
 
286
309
  def abort_could_not_find_library(lib)
287
- abort("-----\n#{caller[0]}\n#{lib} is missing. Please locate mkmf.log to investigate how it is failing.\n-----")
310
+ callers = caller(1..2).join("\n")
311
+ abort("-----\n#{callers}\n#{lib} is missing. Please locate mkmf.log to investigate how it is failing.\n-----")
288
312
  end
289
313
 
290
- def chdir_for_build
314
+ def chdir_for_build(&block)
291
315
  # When using rake-compiler-dock on Windows, the underlying Virtualbox shared
292
316
  # folders don't support symlinks, but libiconv expects it for a build on
293
317
  # Linux. We work around this limitation by using the temp dir for cooking.
294
- build_dir = ENV["RCD_HOST_RUBY_PLATFORM"].to_s =~ /mingw|mswin|cygwin/ ? "/tmp" : "."
295
- Dir.chdir(build_dir) do
296
- yield
297
- end
318
+ build_dir = /mingw|mswin|cygwin/.match?(ENV["RCD_HOST_RUBY_PLATFORM"].to_s) ? "/tmp" : "."
319
+ Dir.chdir(build_dir, &block)
298
320
  end
299
321
 
300
322
  def sh_export_path(path)
@@ -370,6 +392,7 @@ def iconv_configure_flags
370
392
  ["iconv", "opt"].each do |target|
371
393
  config = preserving_globals { dir_config(target) }
372
394
  next unless config.any? && try_link_iconv("--with-#{target}-* flags") { dir_config(target) }
395
+
373
396
  idirs, ldirs = config.map do |dirs|
374
397
  Array(dirs).flat_map do |dir|
375
398
  dir.split(File::PATH_SEPARATOR)
@@ -387,8 +410,8 @@ def iconv_configure_flags
387
410
  return ["--with-iconv=yes"]
388
411
  end
389
412
 
390
- config = preserving_globals { have_package_configuration("libiconv") }
391
- if config && try_link_iconv("pkg-config libiconv") { have_package_configuration("libiconv") }
413
+ config = preserving_globals { pkg_config("libiconv") }
414
+ if config && try_link_iconv("pkg-config libiconv") { pkg_config("libiconv") }
392
415
  cflags, ldflags, libs = config
393
416
 
394
417
  return [
@@ -402,9 +425,9 @@ def iconv_configure_flags
402
425
  abort_could_not_find_library("libiconv")
403
426
  end
404
427
 
405
- def process_recipe(name, version, static_p, cross_p, cacheable_p=true)
428
+ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
406
429
  require "rubygems"
407
- gem("mini_portile2", REQUIRED_MINI_PORTILE_VERSION)
430
+ gem("mini_portile2", REQUIRED_MINI_PORTILE_VERSION) # gemspec is not respected at install time
408
431
  require "mini_portile2"
409
432
  message("Using mini_portile version #{MiniPortile::VERSION}\n")
410
433
 
@@ -413,10 +436,16 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p=true)
413
436
  end
414
437
 
415
438
  MiniPortile.new(name, version).tap do |recipe|
416
- recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
417
- # Prefer host_alias over host in order to use i586-mingw32msvc as
418
- # correct compiler prefix for cross build, but use host if not set.
439
+ def recipe.port_path
440
+ "#{@target}/#{RUBY_PLATFORM}/#{@name}/#{@version}"
441
+ end
442
+
443
+ # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
444
+ # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
419
445
  recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
446
+ recipe.host = recipe.host.gsub(/i386/, "i686")
447
+
448
+ recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
420
449
  recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
421
450
 
422
451
  yield recipe
@@ -462,7 +491,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p=true)
462
491
  end
463
492
 
464
493
  if RbConfig::CONFIG["target_cpu"] == "universal"
465
- %w[CFLAGS LDFLAGS].each do |key|
494
+ ["CFLAGS", "LDFLAGS"].each do |key|
466
495
  unless env[key].include?("-arch")
467
496
  env[key] = concat_flags(env[key], RbConfig::CONFIG["ARCH_FLAG"])
468
497
  end
@@ -473,7 +502,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p=true)
473
502
  "#{key}=#{value.strip}"
474
503
  end
475
504
 
476
- checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed"
505
+ checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{RUBY_PLATFORM}.installed"
477
506
  if File.exist?(checkpoint) && !recipe.source_directory
478
507
  message("Building Nokogiri with a packaged version of #{name}-#{version}.\n")
479
508
  else
@@ -487,7 +516,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p=true)
487
516
  message("The following patches are being applied:\n")
488
517
 
489
518
  recipe.patch_files.each do |patch|
490
- message(" - %s\n" % File.basename(patch))
519
+ message(format(" - %s\n", File.basename(patch)))
491
520
  end
492
521
  end
493
522
 
@@ -555,6 +584,34 @@ def do_clean
555
584
  exit!(0)
556
585
  end
557
586
 
587
+ # In ruby 3.2, symbol resolution changed on Darwin, to introduce the `-bundle_loader` flag to
588
+ # resolve symbols against the ruby binary.
589
+ #
590
+ # This makes it challenging to build a single extension that works with both a ruby with
591
+ # `--enable-shared` and one with `--disable-shared. To work around that, we choose to add
592
+ # `-flat_namespace` to the link line (later in this file).
593
+ #
594
+ # The `-flat_namespace` line introduces its own behavior change, which is that (similar to on
595
+ # Linux), any symbols in the extension that are exported may now be resolved by shared libraries
596
+ # loaded by the Ruby process. Specifically, that means that libxml2 and libxslt, which are
597
+ # statically linked into the nokogiri bundle, will resolve (at runtime) to a system libxml2 loaded
598
+ # by Ruby on Darwin. And it appears that often Ruby on Darwin does indeed load the system libxml2,
599
+ # and that messes with our assumptions about whether we're running with a patched libxml2 or a
600
+ # vanilla libxml2.
601
+ #
602
+ # We choose to use `-load_hidden` in this case to prevent exporting those symbols from libxml2 and
603
+ # libxslt, which ensures that they will be resolved to the static libraries in the bundle. In other
604
+ # words, when we use `load_hidden`, what happens in the extension stays in the extension.
605
+ #
606
+ # See https://github.com/rake-compiler/rake-compiler-dock/issues/87 for more info.
607
+ #
608
+ # Anyway, this method is the logical bit to tell us when to turn on these workarounds.
609
+ def needs_darwin_linker_hack
610
+ config_cross_build? &&
611
+ darwin? &&
612
+ Gem::Requirement.new("~> 3.2").satisfied_by?(Gem::Version.new(RbConfig::CONFIG["ruby_version"].split("+").first))
613
+ end
614
+
558
615
  #
559
616
  # main
560
617
  #
@@ -562,7 +619,7 @@ do_help if arg_config("--help")
562
619
  do_clean if arg_config("--clean")
563
620
 
564
621
  if openbsd? && !config_system_libraries?
565
- if %x(#{ENV["CC"] || "/usr/bin/cc"} -v 2>&1) !~ /clang/
622
+ unless %x(#{ENV["CC"] || "/usr/bin/cc"} -v 2>&1).include?("clang")
566
623
  (ENV["CC"] ||= find_executable("egcc")) ||
567
624
  abort("Please install gcc 4.9+ from ports using `pkg_add -v gcc`")
568
625
  end
@@ -598,6 +655,9 @@ $LIBS = concat_flags($LIBS, ENV["LIBS"])
598
655
  # errors/warnings. see #2302
599
656
  append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
600
657
 
658
+ # gumbo html5 serialization is slower with O3, let's make sure we use O2
659
+ append_cflags("-O2")
660
+
601
661
  # always include debugging information
602
662
  append_cflags("-g")
603
663
 
@@ -607,8 +667,18 @@ append_cflags("-Winline")
607
667
  # good to have no matter what Ruby was compiled with
608
668
  append_cflags("-Wmissing-noreturn")
609
669
 
670
+ # check integer loss of precision
671
+ if darwin?
672
+ append_cflags("-Wshorten-64-to-32")
673
+ else
674
+ append_cflags("-Wconversion -Wno-sign-conversion")
675
+ end
676
+
610
677
  # handle clang variations, see #1101
611
- append_cflags("-Wno-error=unused-command-line-argument-hard-error-in-future") if darwin?
678
+ if darwin?
679
+ append_cflags("-Wno-error=unused-command-line-argument-hard-error-in-future")
680
+ append_cflags("-Wno-unknown-warning-option")
681
+ end
612
682
 
613
683
  # these tend to be noisy, but on occasion useful during development
614
684
  # append_cflags(["-Wcast-qual", "-Wwrite-strings"])
@@ -648,6 +718,10 @@ else
648
718
  cross_build_p = config_cross_build?
649
719
  message "Cross build is #{cross_build_p ? "enabled" : "disabled"}.\n"
650
720
 
721
+ if needs_darwin_linker_hack
722
+ append_ldflags("-Wl,-flat_namespace")
723
+ end
724
+
651
725
  require "yaml"
652
726
  dependencies = YAML.load_file(File.join(PACKAGE_ROOT_DIR, "dependencies.yml"))
653
727
 
@@ -656,7 +730,7 @@ else
656
730
  if cross_build_p || windows?
657
731
  zlib_recipe = process_recipe("zlib", dependencies["zlib"]["version"], static_p, cross_build_p) do |recipe|
658
732
  recipe.files = [{
659
- url: "http://zlib.net/fossils/#{recipe.name}-#{recipe.version}.tar.gz",
733
+ url: zlib_source(recipe.version),
660
734
  sha256: dependencies["zlib"]["sha256"],
661
735
  }]
662
736
  if windows?
@@ -694,16 +768,16 @@ else
694
768
  else
695
769
  class << recipe
696
770
  def configure
697
- cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
698
- execute("configure",
699
- ["env", "CHOST=#{host}", "CFLAGS=#{cflags}", "./configure", "--static", configure_prefix])
700
- end
701
-
702
- def compile
703
- if host =~ /darwin/
704
- execute("compile", "make AR=#{host}-libtool")
705
- else
706
- super
771
+ env = {}
772
+ env["CFLAGS"] = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
773
+ env["CHOST"] = host
774
+ execute("configure", ["./configure", "--static", configure_prefix], { env: env })
775
+ if darwin?
776
+ # needed as of zlib 1.2.13
777
+ Dir.chdir(work_path) do
778
+ makefile = File.read("Makefile").gsub(/^AR=.*$/, "AR=#{host}-libtool")
779
+ File.open("Makefile", "w") { |m| m.write(makefile) }
780
+ end
707
781
  end
708
782
  end
709
783
  end
@@ -714,10 +788,13 @@ else
714
788
  libiconv_recipe = process_recipe("libiconv", dependencies["libiconv"]["version"], static_p,
715
789
  cross_build_p) do |recipe|
716
790
  recipe.files = [{
717
- url: "http://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz",
791
+ url: "https://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz",
718
792
  sha256: dependencies["libiconv"]["sha256"],
719
793
  }]
720
794
 
795
+ # The libiconv configure script doesn't accept "arm64" host string but "aarch64"
796
+ recipe.host = recipe.host.gsub("arm64-apple-darwin", "aarch64-apple-darwin")
797
+
721
798
  cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g")
722
799
 
723
800
  recipe.configure_options += [
@@ -741,12 +818,21 @@ else
741
818
  Tools" to open the developer site, download the installer for your OS
742
819
  version and run it.
743
820
  -----
744
- EOM
821
+ EOM
745
822
  end
746
823
 
747
- unless windows?
748
- preserving_globals { local_have_library("z", "gzdopen", "zlib.h") } ||
749
- abort("zlib is missing; necessary for building libxml2")
824
+ if zlib_recipe
825
+ append_cppflags("-I#{zlib_recipe.path}/include")
826
+ $LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH
827
+ ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z",
828
+ headers: "zlib.h", func: "gzdopen")
829
+ end
830
+
831
+ if libiconv_recipe
832
+ append_cppflags("-I#{libiconv_recipe.path}/include")
833
+ $LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH
834
+ ensure_package_configuration(opt: "iconv", pc: "iconv", lib: "iconv",
835
+ headers: "iconv.h", func: "iconv_open")
750
836
  end
751
837
 
752
838
  libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"]["version"], static_p, cross_build_p) do |recipe|
@@ -754,8 +840,9 @@ else
754
840
  if source_dir
755
841
  recipe.source_directory = source_dir
756
842
  else
843
+ minor_version = Gem::Version.new(recipe.version).segments.take(2).join(".")
757
844
  recipe.files = [{
758
- url: "http://xmlsoft.org/sources/#{recipe.name}-#{recipe.version}.tar.gz",
845
+ url: "#{gnome_source}/sources/libxml2/#{minor_version}/#{recipe.name}-#{recipe.version}.tar.xz",
759
846
  sha256: dependencies["libxml2"]["sha256"],
760
847
  }]
761
848
  recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxml2", "*.patch")].sort
@@ -765,7 +852,6 @@ else
765
852
 
766
853
  if zlib_recipe
767
854
  recipe.configure_options << "--with-zlib=#{zlib_recipe.path}"
768
- cflags = concat_flags(cflags, "-I#{zlib_recipe.path}/include")
769
855
  end
770
856
 
771
857
  if libiconv_recipe
@@ -803,8 +889,9 @@ else
803
889
  if source_dir
804
890
  recipe.source_directory = source_dir
805
891
  else
892
+ minor_version = Gem::Version.new(recipe.version).segments.take(2).join(".")
806
893
  recipe.files = [{
807
- url: "http://xmlsoft.org/sources/#{recipe.name}-#{recipe.version}.tar.gz",
894
+ url: "#{gnome_source}/sources/libxslt/#{minor_version}/#{recipe.name}-#{recipe.version}.tar.xz",
808
895
  sha256: dependencies["libxslt"]["sha256"],
809
896
  }]
810
897
  recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxslt", "*.patch")].sort
@@ -816,6 +903,11 @@ else
816
903
  recipe.configure_options += ["RANLIB=/usr/bin/ranlib", "AR=/usr/bin/ar"]
817
904
  end
818
905
 
906
+ if windows?
907
+ cflags = concat_flags(cflags, "-ULIBXSLT_STATIC", "-DIN_LIBXSLT")
908
+ cflags = concat_flags(cflags, "-ULIBEXSLT_STATIC", "-DIN_LIBEXSLT")
909
+ end
910
+
819
911
  recipe.configure_options << if source_dir
820
912
  "--config-cache"
821
913
  else
@@ -834,15 +926,16 @@ else
834
926
  append_cppflags("-DNOKOGIRI_PACKAGED_LIBRARIES")
835
927
  append_cppflags("-DNOKOGIRI_PRECOMPILED_LIBRARIES") if cross_build_p
836
928
 
837
- $LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH if zlib_recipe
838
- $LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH if libiconv_recipe
839
-
840
929
  $libs = $libs.shellsplit.tap do |libs|
841
930
  [libxml2_recipe, libxslt_recipe].each do |recipe|
842
931
  libname = recipe.name[/\Alib(.+)\z/, 1]
843
- File.join(recipe.path, "bin", "#{libname}-config").tap do |config|
932
+ config_basename = "#{libname}-config"
933
+ File.join(recipe.path, "bin", config_basename).tap do |config|
844
934
  # call config scripts explicit with 'sh' for compat with Windows
845
- $CPPFLAGS = %x(sh #{config} --cflags).strip << " " << $CPPFLAGS
935
+ cflags = %x(sh #{config} --cflags).strip
936
+ message("#{config_basename} cflags: #{cflags}\n")
937
+ $CPPFLAGS = concat_flags(cflags, $CPPFLAGS) # prepend
938
+
846
939
  %x(sh #{config} --libs).strip.shellsplit.each do |arg|
847
940
  case arg
848
941
  when /\A-L(.+)\z/
@@ -861,7 +954,7 @@ else
861
954
  end
862
955
 
863
956
  patches_string = recipe.patch_files.map { |path| File.basename(path) }.join(" ")
864
- append_cppflags(%[-DNOKOGIRI_#{recipe.name.upcase}_PATCHES="\\\"#{patches_string}\\\""])
957
+ append_cppflags(%[-DNOKOGIRI_#{recipe.name.upcase}_PATCHES="\\"#{patches_string}\\""])
865
958
 
866
959
  case libname
867
960
  when "xml2"
@@ -880,16 +973,17 @@ else
880
973
  end.shelljoin
881
974
 
882
975
  if static_p
976
+ static_archive_ld_flag = needs_darwin_linker_hack ? ["-load_hidden"] : []
883
977
  $libs = $libs.shellsplit.map do |arg|
884
978
  case arg
885
979
  when "-lxml2"
886
- File.join(libxml2_recipe.path, "lib", libflag_to_filename(arg))
980
+ static_archive_ld_flag + [File.join(libxml2_recipe.path, "lib", libflag_to_filename(arg))]
887
981
  when "-lxslt", "-lexslt"
888
- File.join(libxslt_recipe.path, "lib", libflag_to_filename(arg))
982
+ static_archive_ld_flag + [File.join(libxslt_recipe.path, "lib", libflag_to_filename(arg))]
889
983
  else
890
984
  arg
891
985
  end
892
- end.shelljoin
986
+ end.flatten.shelljoin
893
987
  end
894
988
 
895
989
  ensure_func("xmlParseDoc", "libxml/parser.h")
@@ -925,11 +1019,11 @@ libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_b
925
1019
  end
926
1020
 
927
1021
  def compile
928
- cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
1022
+ cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-O2", "-g")
929
1023
 
930
1024
  env = { "CC" => gcc_cmd, "CFLAGS" => cflags }
931
1025
  if config_cross_build?
932
- if host =~ /darwin/
1026
+ if /darwin/.match?(host)
933
1027
  env["AR"] = "#{host}-libtool"
934
1028
  env["ARFLAGS"] = "-o"
935
1029
  else
@@ -945,7 +1039,7 @@ end
945
1039
  append_cppflags("-I#{File.join(libgumbo_recipe.path, "include")}")
946
1040
  $libs = $libs + " " + File.join(libgumbo_recipe.path, "lib", "libgumbo.a")
947
1041
  $LIBPATH = $LIBPATH | [File.join(libgumbo_recipe.path, "lib")]
948
- ensure_func("gumbo_parse_with_options", "gumbo.h")
1042
+ ensure_func("gumbo_parse_with_options", "nokogiri_gumbo.h")
949
1043
 
950
1044
  have_func("xmlHasFeature") || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21
951
1045
  have_func("xmlFirstElementChild") # introduced in libxml 2.7.3
@@ -953,11 +1047,11 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
953
1047
  have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
954
1048
  have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
955
1049
  have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
956
-
957
- have_func("vasprintf")
1050
+ have_func("rb_gc_location") # introduced in Ruby 2.7
1051
+ have_func("rb_category_warning") # introduced in Ruby 3.0
958
1052
 
959
1053
  other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
960
- append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\\"#{other_library_versions_string}\\\""])
1054
+ append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\"#{other_library_versions_string}\\""])
961
1055
 
962
1056
  unless config_system_libraries?
963
1057
  if cross_build_p
@@ -986,3 +1080,5 @@ if config_clean?
986
1080
  EOF
987
1081
  end
988
1082
  end
1083
+
1084
+ # rubocop:enable Style/GlobalVars
data/ext/nokogiri/gumbo.c CHANGED
@@ -23,13 +23,13 @@
23
23
  //
24
24
  // Processing starts by calling gumbo_parse_with_options. The resulting document tree
25
25
  // is then walked, a parallel libxml2 tree is constructed, and the final document is
26
- // then wrapped using Nokogiri_wrap_xml_document. This approach reduces memory and CPU
26
+ // then wrapped using noko_xml_document_wrap. This approach reduces memory and CPU
27
27
  // requirements as Ruby objects are only built when necessary.
28
28
  //
29
29
 
30
30
  #include <nokogiri.h>
31
31
 
32
- #include "gumbo.h"
32
+ #include "nokogiri_gumbo.h"
33
33
 
34
34
  VALUE cNokogiriHtml5Document;
35
35
 
@@ -281,12 +281,12 @@ add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url)
281
281
  rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR
282
282
  rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR
283
283
  rb_iv_set(syntax_error, "@file", url);
284
- rb_iv_set(syntax_error, "@line", INT2NUM(position.line));
284
+ rb_iv_set(syntax_error, "@line", SIZET2NUM(position.line));
285
285
  rb_iv_set(syntax_error, "@str1", str1);
286
286
  rb_iv_set(syntax_error, "@str2", Qnil);
287
287
  rb_iv_set(syntax_error, "@str3", Qnil);
288
288
  rb_iv_set(syntax_error, "@int1", INT2NUM(0));
289
- rb_iv_set(syntax_error, "@column", INT2NUM(position.column));
289
+ rb_iv_set(syntax_error, "@column", SIZET2NUM(position.column));
290
290
  rb_ary_push(rerrors, syntax_error);
291
291
  }
292
292
  rb_iv_set(rdoc, "@errors", rerrors);
@@ -297,6 +297,7 @@ typedef struct {
297
297
  GumboOutput *output;
298
298
  VALUE input;
299
299
  VALUE url_or_frag;
300
+ VALUE klass;
300
301
  xmlDocPtr doc;
301
302
  } ParseArgs;
302
303
 
@@ -321,7 +322,7 @@ static VALUE parse_continue(VALUE parse_args);
321
322
  * @!visibility protected
322
323
  */
323
324
  static VALUE
324
- parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth)
325
+ parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth, VALUE klass)
325
326
  {
326
327
  GumboOptions options = kGumboDefaultOptions;
327
328
  options.max_attributes = NUM2INT(max_attributes);
@@ -333,6 +334,7 @@ parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors
333
334
  .output = output,
334
335
  .input = input,
335
336
  .url_or_frag = url,
337
+ .klass = klass,
336
338
  .doc = NULL,
337
339
  };
338
340
 
@@ -357,7 +359,9 @@ parse_continue(VALUE parse_args)
357
359
  }
358
360
  args->doc = doc; // Make sure doc gets cleaned up if an error is thrown.
359
361
  build_tree(doc, (xmlNodePtr)doc, output->document);
360
- VALUE rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc);
362
+ VALUE rdoc = noko_xml_document_wrap(args->klass, doc);
363
+ rb_iv_set(rdoc, "@url", args->url_or_frag);
364
+ rb_iv_set(rdoc, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode));
361
365
  args->doc = NULL; // The Ruby runtime now owns doc so don't delete it.
362
366
  add_errors(output, rdoc, args->input, args->url_or_frag);
363
367
  return rdoc;
@@ -401,7 +405,7 @@ static xmlNodePtr
401
405
  extract_xml_node(VALUE node)
402
406
  {
403
407
  xmlNodePtr xml_node;
404
- Data_Get_Struct(node, xmlNode, xml_node);
408
+ Noko_Node_Get_Struct(node, xmlNode, xml_node);
405
409
  return xml_node;
406
410
  }
407
411
 
@@ -498,9 +502,11 @@ error:
498
502
  }
499
503
 
500
504
  // Encoding.
501
- if (RSTRING_LEN(tag_name) == 14
505
+ if (ctx_ns == GUMBO_NAMESPACE_MATHML
506
+ && RSTRING_LEN(tag_name) == 14
502
507
  && !st_strcasecmp(ctx_tag, "annotation-xml")) {
503
508
  VALUE enc = rb_funcall(ctx, rb_intern_const("[]"),
509
+ 1,
504
510
  rb_utf8_str_new_static("encoding", 8));
505
511
  if (RTEST(enc)) {
506
512
  Check_Type(enc, T_STRING);
@@ -512,8 +518,11 @@ error:
512
518
  // Quirks mode.
513
519
  VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
514
520
  VALUE dtd = rb_funcall(doc, internal_subset, 0);
515
- if (NIL_P(dtd)) {
521
+ VALUE doc_quirks_mode = rb_iv_get(doc, "@quirks_mode");
522
+ if (NIL_P(ctx) || NIL_P(doc_quirks_mode)) {
516
523
  quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS;
524
+ } else if (NIL_P(dtd)) {
525
+ quirks_mode = GUMBO_DOCTYPE_QUIRKS;
517
526
  } else {
518
527
  VALUE dtd_name = rb_funcall(dtd, name, 0);
519
528
  VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0);
@@ -560,13 +569,14 @@ fragment_continue(VALUE parse_args)
560
569
  args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it.
561
570
  xmlNodePtr xml_frag = extract_xml_node(doc_fragment);
562
571
  build_tree(xml_doc, xml_frag, output->root);
572
+ rb_iv_set(doc_fragment, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode));
563
573
  add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9));
564
574
  return Qnil;
565
575
  }
566
576
 
567
577
  // Initialize the Nokogumbo class and fetch constants we will use later.
568
578
  void
569
- noko_init_gumbo()
579
+ noko_init_gumbo(void)
570
580
  {
571
581
  // Class constants.
572
582
  cNokogiriHtml5Document = rb_define_class_under(mNokogiriHtml5, "Document", cNokogiriHtml4Document);
@@ -577,7 +587,7 @@ noko_init_gumbo()
577
587
  parent = rb_intern_const("parent");
578
588
 
579
589
  // Define Nokogumbo module with parse and fragment methods.
580
- rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 5);
590
+ rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 6);
581
591
  rb_define_singleton_method(mNokogiriGumbo, "fragment", fragment, 6);
582
592
  }
583
593
 
@@ -146,11 +146,11 @@ rb_html_document_type(VALUE self)
146
146
  {
147
147
  htmlDocPtr doc;
148
148
  Data_Get_Struct(self, xmlDoc, doc);
149
- return INT2NUM((long)doc->type);
149
+ return INT2NUM(doc->type);
150
150
  }
151
151
 
152
152
  void
153
- noko_init_html_document()
153
+ noko_init_html_document(void)
154
154
  {
155
155
  assert(cNokogiriXmlDocument);
156
156
  cNokogiriHtml4Document = rb_define_class_under(mNokogiriHtml4, "Document", cNokogiriXmlDocument);
@@ -270,7 +270,7 @@ get_description(VALUE klass, VALUE tag_name)
270
270
  }
271
271
 
272
272
  void
273
- noko_init_html_element_description()
273
+ noko_init_html_element_description(void)
274
274
  {
275
275
  cNokogiriHtml4ElementDescription = rb_define_class_under(mNokogiriHtml4, "ElementDescription", rb_cObject);
276
276
 
@@ -20,7 +20,7 @@ get(VALUE _, VALUE rb_entity_name)
20
20
  return Qnil;
21
21
  }
22
22
 
23
- rb_constructor_args[0] = INT2NUM((long)c_entity_desc->value);
23
+ rb_constructor_args[0] = UINT2NUM(c_entity_desc->value);
24
24
  rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
25
25
  rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
26
26
 
@@ -29,7 +29,7 @@ get(VALUE _, VALUE rb_entity_name)
29
29
  }
30
30
 
31
31
  void
32
- noko_init_html_entity_lookup()
32
+ noko_init_html_entity_lookup(void)
33
33
  {
34
34
  cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject);
35
35