nokogiri 1.12.5 → 1.14.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +41 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +23 -14
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -66
- data/ext/nokogiri/extconf.rb +159 -63
- data/ext/nokogiri/gumbo.c +21 -11
- data/ext/nokogiri/html4_document.c +2 -2
- data/ext/nokogiri/html4_element_description.c +1 -1
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +3 -9
- data/ext/nokogiri/html4_sax_push_parser.c +1 -1
- data/ext/nokogiri/nokogiri.c +38 -51
- data/ext/nokogiri/nokogiri.h +26 -14
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +3 -3
- data/ext/nokogiri/xml_attribute_decl.c +5 -5
- data/ext/nokogiri/xml_cdata.c +3 -3
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +53 -44
- data/ext/nokogiri/xml_document_fragment.c +1 -3
- data/ext/nokogiri/xml_dtd.c +11 -11
- data/ext/nokogiri/xml_element_content.c +3 -3
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +28 -14
- data/ext/nokogiri/xml_entity_decl.c +6 -6
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +80 -14
- data/ext/nokogiri/xml_node.c +982 -396
- data/ext/nokogiri/xml_node_set.c +4 -6
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +133 -32
- data/ext/nokogiri/xml_relax_ng.c +1 -3
- data/ext/nokogiri/xml_sax_parser.c +23 -17
- data/ext/nokogiri/xml_sax_parser_context.c +11 -9
- data/ext/nokogiri/xml_sax_push_parser.c +1 -3
- data/ext/nokogiri/xml_schema.c +4 -6
- data/ext/nokogiri/xml_syntax_error.c +1 -1
- data/ext/nokogiri/xml_text.c +2 -2
- data/ext/nokogiri/xml_xpath_context.c +144 -114
- data/ext/nokogiri/xslt_stylesheet.c +122 -23
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +2 -2
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +2 -2
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +8 -16
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +22 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +184 -85
- data/lib/nokogiri/css.rb +44 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +56 -164
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +5 -2
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +12 -5
- data/lib/nokogiri/html5/document.rb +126 -32
- data/lib/nokogiri/html5/document_fragment.rb +14 -4
- data/lib/nokogiri/html5/node.rb +12 -7
- data/lib/nokogiri/html5.rb +138 -222
- data/lib/nokogiri/jruby/dependencies.rb +2 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +32 -24
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +54 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +35 -33
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +232 -143
- data/lib/nokogiri/xml/document_fragment.rb +88 -42
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -8
- data/lib/nokogiri/xml/node.rb +708 -383
- data/lib/nokogiri/xml/node_set.rb +134 -59
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +140 -56
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +26 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +20 -24
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +38 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +4 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +22 -27
- data/lib/xsd/xmlparser/nokogiri.rb +28 -25
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
- metadata +20 -171
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
data/ext/nokogiri/extconf.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
2
|
+
|
3
|
+
# rubocop:disable Style/GlobalVars
|
4
|
+
|
5
|
+
ENV["RC_ARCHS"] = "" if RUBY_PLATFORM.include?("darwin")
|
3
6
|
|
4
7
|
require "mkmf"
|
5
8
|
require "rbconfig"
|
@@ -12,16 +15,14 @@ PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."
|
|
12
15
|
REQUIRED_LIBXML_VERSION = "2.6.21"
|
13
16
|
RECOMMENDED_LIBXML_VERSION = "2.9.3"
|
14
17
|
|
15
|
-
#
|
16
|
-
# Keep this version in sync with the one in the Rakefile !
|
17
|
-
REQUIRED_MINI_PORTILE_VERSION = "~> 2.6.1"
|
18
|
+
REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.0" # keep this version in sync with the one in the gemspec
|
18
19
|
REQUIRED_PKG_CONFIG_VERSION = "~> 1.1"
|
19
20
|
|
20
21
|
# Keep track of what versions of what libraries we build against
|
21
22
|
OTHER_LIBRARY_VERSIONS = {}
|
22
23
|
|
23
24
|
NOKOGIRI_HELP_MESSAGE = <<~HELP
|
24
|
-
USAGE: ruby #{$
|
25
|
+
USAGE: ruby #{$PROGRAM_NAME} [options]
|
25
26
|
|
26
27
|
Flags that are always valid:
|
27
28
|
|
@@ -175,23 +176,23 @@ def config_system_libraries?
|
|
175
176
|
end
|
176
177
|
|
177
178
|
def windows?
|
178
|
-
RbConfig::CONFIG["target_os"]
|
179
|
+
RbConfig::CONFIG["target_os"].match?(/mingw|mswin/)
|
179
180
|
end
|
180
181
|
|
181
182
|
def solaris?
|
182
|
-
RbConfig::CONFIG["target_os"]
|
183
|
+
RbConfig::CONFIG["target_os"].include?("solaris")
|
183
184
|
end
|
184
185
|
|
185
186
|
def darwin?
|
186
|
-
RbConfig::CONFIG["target_os"]
|
187
|
+
RbConfig::CONFIG["target_os"].include?("darwin")
|
187
188
|
end
|
188
189
|
|
189
190
|
def openbsd?
|
190
|
-
RbConfig::CONFIG["target_os"]
|
191
|
+
RbConfig::CONFIG["target_os"].include?("openbsd")
|
191
192
|
end
|
192
193
|
|
193
194
|
def aix?
|
194
|
-
RbConfig::CONFIG["target_os"]
|
195
|
+
RbConfig::CONFIG["target_os"].include?("aix")
|
195
196
|
end
|
196
197
|
|
197
198
|
def nix?
|
@@ -199,7 +200,7 @@ def nix?
|
|
199
200
|
end
|
200
201
|
|
201
202
|
def truffle?
|
202
|
-
|
203
|
+
RUBY_ENGINE == "truffleruby"
|
203
204
|
end
|
204
205
|
|
205
206
|
def concat_flags(*args)
|
@@ -210,6 +211,28 @@ def local_have_library(lib, func = nil, headers = nil)
|
|
210
211
|
have_library(lib, func, headers) || have_library("lib#{lib}", func, headers)
|
211
212
|
end
|
212
213
|
|
214
|
+
def zlib_source(version_string)
|
215
|
+
# As of 2022-12, I'm starting to see failed downloads often enough from zlib.net that I want to
|
216
|
+
# change the default to github.
|
217
|
+
if ENV["NOKOGIRI_USE_CANONICAL_ZLIB_SOURCE"]
|
218
|
+
"https://zlib.net/fossils/zlib-#{version_string}.tar.gz"
|
219
|
+
else
|
220
|
+
"https://github.com/madler/zlib/releases/download/v#{version_string}/zlib-#{version_string}.tar.gz"
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
def gnome_source
|
225
|
+
# As of 2022-02-20, some mirrors have expired SSL certificates. I'm able to retrieve from my home,
|
226
|
+
# but whatever host is resolved on the github actions workers see an expired cert.
|
227
|
+
#
|
228
|
+
# See https://github.com/sparklemotion/nokogiri/runs/5266206403?check_suite_focus=true
|
229
|
+
if ENV["NOKOGIRI_USE_CANONICAL_GNOME_SOURCE"]
|
230
|
+
"https://download.gnome.org"
|
231
|
+
else
|
232
|
+
"https://mirror.csclub.uwaterloo.ca/gnome" # old reliable
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
213
236
|
LOCAL_PACKAGE_RESPONSE = Object.new
|
214
237
|
def LOCAL_PACKAGE_RESPONSE.%(package)
|
215
238
|
package ? "yes: #{package}" : "no"
|
@@ -284,17 +307,16 @@ ensure
|
|
284
307
|
end
|
285
308
|
|
286
309
|
def abort_could_not_find_library(lib)
|
287
|
-
|
310
|
+
callers = caller(1..2).join("\n")
|
311
|
+
abort("-----\n#{callers}\n#{lib} is missing. Please locate mkmf.log to investigate how it is failing.\n-----")
|
288
312
|
end
|
289
313
|
|
290
|
-
def chdir_for_build
|
314
|
+
def chdir_for_build(&block)
|
291
315
|
# When using rake-compiler-dock on Windows, the underlying Virtualbox shared
|
292
316
|
# folders don't support symlinks, but libiconv expects it for a build on
|
293
317
|
# Linux. We work around this limitation by using the temp dir for cooking.
|
294
|
-
build_dir = ENV["RCD_HOST_RUBY_PLATFORM"].to_s
|
295
|
-
Dir.chdir(build_dir)
|
296
|
-
yield
|
297
|
-
end
|
318
|
+
build_dir = /mingw|mswin|cygwin/.match?(ENV["RCD_HOST_RUBY_PLATFORM"].to_s) ? "/tmp" : "."
|
319
|
+
Dir.chdir(build_dir, &block)
|
298
320
|
end
|
299
321
|
|
300
322
|
def sh_export_path(path)
|
@@ -370,6 +392,7 @@ def iconv_configure_flags
|
|
370
392
|
["iconv", "opt"].each do |target|
|
371
393
|
config = preserving_globals { dir_config(target) }
|
372
394
|
next unless config.any? && try_link_iconv("--with-#{target}-* flags") { dir_config(target) }
|
395
|
+
|
373
396
|
idirs, ldirs = config.map do |dirs|
|
374
397
|
Array(dirs).flat_map do |dir|
|
375
398
|
dir.split(File::PATH_SEPARATOR)
|
@@ -387,8 +410,8 @@ def iconv_configure_flags
|
|
387
410
|
return ["--with-iconv=yes"]
|
388
411
|
end
|
389
412
|
|
390
|
-
config = preserving_globals {
|
391
|
-
if config && try_link_iconv("pkg-config libiconv") {
|
413
|
+
config = preserving_globals { pkg_config("libiconv") }
|
414
|
+
if config && try_link_iconv("pkg-config libiconv") { pkg_config("libiconv") }
|
392
415
|
cflags, ldflags, libs = config
|
393
416
|
|
394
417
|
return [
|
@@ -402,9 +425,9 @@ def iconv_configure_flags
|
|
402
425
|
abort_could_not_find_library("libiconv")
|
403
426
|
end
|
404
427
|
|
405
|
-
def process_recipe(name, version, static_p, cross_p, cacheable_p=true)
|
428
|
+
def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
|
406
429
|
require "rubygems"
|
407
|
-
gem("mini_portile2", REQUIRED_MINI_PORTILE_VERSION)
|
430
|
+
gem("mini_portile2", REQUIRED_MINI_PORTILE_VERSION) # gemspec is not respected at install time
|
408
431
|
require "mini_portile2"
|
409
432
|
message("Using mini_portile version #{MiniPortile::VERSION}\n")
|
410
433
|
|
@@ -413,10 +436,16 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p=true)
|
|
413
436
|
end
|
414
437
|
|
415
438
|
MiniPortile.new(name, version).tap do |recipe|
|
416
|
-
recipe.
|
417
|
-
|
418
|
-
|
439
|
+
def recipe.port_path
|
440
|
+
"#{@target}/#{RUBY_PLATFORM}/#{@name}/#{@version}"
|
441
|
+
end
|
442
|
+
|
443
|
+
# We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
|
444
|
+
# prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
|
419
445
|
recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
|
446
|
+
recipe.host = recipe.host.gsub(/i386/, "i686")
|
447
|
+
|
448
|
+
recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
|
420
449
|
recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
|
421
450
|
|
422
451
|
yield recipe
|
@@ -462,7 +491,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p=true)
|
|
462
491
|
end
|
463
492
|
|
464
493
|
if RbConfig::CONFIG["target_cpu"] == "universal"
|
465
|
-
|
494
|
+
["CFLAGS", "LDFLAGS"].each do |key|
|
466
495
|
unless env[key].include?("-arch")
|
467
496
|
env[key] = concat_flags(env[key], RbConfig::CONFIG["ARCH_FLAG"])
|
468
497
|
end
|
@@ -473,7 +502,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p=true)
|
|
473
502
|
"#{key}=#{value.strip}"
|
474
503
|
end
|
475
504
|
|
476
|
-
checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{
|
505
|
+
checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{RUBY_PLATFORM}.installed"
|
477
506
|
if File.exist?(checkpoint) && !recipe.source_directory
|
478
507
|
message("Building Nokogiri with a packaged version of #{name}-#{version}.\n")
|
479
508
|
else
|
@@ -487,7 +516,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p=true)
|
|
487
516
|
message("The following patches are being applied:\n")
|
488
517
|
|
489
518
|
recipe.patch_files.each do |patch|
|
490
|
-
message(" - %s\n"
|
519
|
+
message(format(" - %s\n", File.basename(patch)))
|
491
520
|
end
|
492
521
|
end
|
493
522
|
|
@@ -555,6 +584,34 @@ def do_clean
|
|
555
584
|
exit!(0)
|
556
585
|
end
|
557
586
|
|
587
|
+
# In ruby 3.2, symbol resolution changed on Darwin, to introduce the `-bundle_loader` flag to
|
588
|
+
# resolve symbols against the ruby binary.
|
589
|
+
#
|
590
|
+
# This makes it challenging to build a single extension that works with both a ruby with
|
591
|
+
# `--enable-shared` and one with `--disable-shared. To work around that, we choose to add
|
592
|
+
# `-flat_namespace` to the link line (later in this file).
|
593
|
+
#
|
594
|
+
# The `-flat_namespace` line introduces its own behavior change, which is that (similar to on
|
595
|
+
# Linux), any symbols in the extension that are exported may now be resolved by shared libraries
|
596
|
+
# loaded by the Ruby process. Specifically, that means that libxml2 and libxslt, which are
|
597
|
+
# statically linked into the nokogiri bundle, will resolve (at runtime) to a system libxml2 loaded
|
598
|
+
# by Ruby on Darwin. And it appears that often Ruby on Darwin does indeed load the system libxml2,
|
599
|
+
# and that messes with our assumptions about whether we're running with a patched libxml2 or a
|
600
|
+
# vanilla libxml2.
|
601
|
+
#
|
602
|
+
# We choose to use `-load_hidden` in this case to prevent exporting those symbols from libxml2 and
|
603
|
+
# libxslt, which ensures that they will be resolved to the static libraries in the bundle. In other
|
604
|
+
# words, when we use `load_hidden`, what happens in the extension stays in the extension.
|
605
|
+
#
|
606
|
+
# See https://github.com/rake-compiler/rake-compiler-dock/issues/87 for more info.
|
607
|
+
#
|
608
|
+
# Anyway, this method is the logical bit to tell us when to turn on these workarounds.
|
609
|
+
def needs_darwin_linker_hack
|
610
|
+
config_cross_build? &&
|
611
|
+
darwin? &&
|
612
|
+
Gem::Requirement.new("~> 3.2").satisfied_by?(Gem::Version.new(RbConfig::CONFIG["ruby_version"].split("+").first))
|
613
|
+
end
|
614
|
+
|
558
615
|
#
|
559
616
|
# main
|
560
617
|
#
|
@@ -562,7 +619,7 @@ do_help if arg_config("--help")
|
|
562
619
|
do_clean if arg_config("--clean")
|
563
620
|
|
564
621
|
if openbsd? && !config_system_libraries?
|
565
|
-
|
622
|
+
unless %x(#{ENV["CC"] || "/usr/bin/cc"} -v 2>&1).include?("clang")
|
566
623
|
(ENV["CC"] ||= find_executable("egcc")) ||
|
567
624
|
abort("Please install gcc 4.9+ from ports using `pkg_add -v gcc`")
|
568
625
|
end
|
@@ -598,6 +655,9 @@ $LIBS = concat_flags($LIBS, ENV["LIBS"])
|
|
598
655
|
# errors/warnings. see #2302
|
599
656
|
append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
|
600
657
|
|
658
|
+
# gumbo html5 serialization is slower with O3, let's make sure we use O2
|
659
|
+
append_cflags("-O2")
|
660
|
+
|
601
661
|
# always include debugging information
|
602
662
|
append_cflags("-g")
|
603
663
|
|
@@ -607,8 +667,18 @@ append_cflags("-Winline")
|
|
607
667
|
# good to have no matter what Ruby was compiled with
|
608
668
|
append_cflags("-Wmissing-noreturn")
|
609
669
|
|
670
|
+
# check integer loss of precision
|
671
|
+
if darwin?
|
672
|
+
append_cflags("-Wshorten-64-to-32")
|
673
|
+
else
|
674
|
+
append_cflags("-Wconversion -Wno-sign-conversion")
|
675
|
+
end
|
676
|
+
|
610
677
|
# handle clang variations, see #1101
|
611
|
-
|
678
|
+
if darwin?
|
679
|
+
append_cflags("-Wno-error=unused-command-line-argument-hard-error-in-future")
|
680
|
+
append_cflags("-Wno-unknown-warning-option")
|
681
|
+
end
|
612
682
|
|
613
683
|
# these tend to be noisy, but on occasion useful during development
|
614
684
|
# append_cflags(["-Wcast-qual", "-Wwrite-strings"])
|
@@ -648,6 +718,10 @@ else
|
|
648
718
|
cross_build_p = config_cross_build?
|
649
719
|
message "Cross build is #{cross_build_p ? "enabled" : "disabled"}.\n"
|
650
720
|
|
721
|
+
if needs_darwin_linker_hack
|
722
|
+
append_ldflags("-Wl,-flat_namespace")
|
723
|
+
end
|
724
|
+
|
651
725
|
require "yaml"
|
652
726
|
dependencies = YAML.load_file(File.join(PACKAGE_ROOT_DIR, "dependencies.yml"))
|
653
727
|
|
@@ -656,7 +730,7 @@ else
|
|
656
730
|
if cross_build_p || windows?
|
657
731
|
zlib_recipe = process_recipe("zlib", dependencies["zlib"]["version"], static_p, cross_build_p) do |recipe|
|
658
732
|
recipe.files = [{
|
659
|
-
url:
|
733
|
+
url: zlib_source(recipe.version),
|
660
734
|
sha256: dependencies["zlib"]["sha256"],
|
661
735
|
}]
|
662
736
|
if windows?
|
@@ -694,16 +768,16 @@ else
|
|
694
768
|
else
|
695
769
|
class << recipe
|
696
770
|
def configure
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
771
|
+
env = {}
|
772
|
+
env["CFLAGS"] = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
|
773
|
+
env["CHOST"] = host
|
774
|
+
execute("configure", ["./configure", "--static", configure_prefix], { env: env })
|
775
|
+
if darwin?
|
776
|
+
# needed as of zlib 1.2.13
|
777
|
+
Dir.chdir(work_path) do
|
778
|
+
makefile = File.read("Makefile").gsub(/^AR=.*$/, "AR=#{host}-libtool")
|
779
|
+
File.open("Makefile", "w") { |m| m.write(makefile) }
|
780
|
+
end
|
707
781
|
end
|
708
782
|
end
|
709
783
|
end
|
@@ -714,10 +788,13 @@ else
|
|
714
788
|
libiconv_recipe = process_recipe("libiconv", dependencies["libiconv"]["version"], static_p,
|
715
789
|
cross_build_p) do |recipe|
|
716
790
|
recipe.files = [{
|
717
|
-
url: "
|
791
|
+
url: "https://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz",
|
718
792
|
sha256: dependencies["libiconv"]["sha256"],
|
719
793
|
}]
|
720
794
|
|
795
|
+
# The libiconv configure script doesn't accept "arm64" host string but "aarch64"
|
796
|
+
recipe.host = recipe.host.gsub("arm64-apple-darwin", "aarch64-apple-darwin")
|
797
|
+
|
721
798
|
cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g")
|
722
799
|
|
723
800
|
recipe.configure_options += [
|
@@ -741,12 +818,21 @@ else
|
|
741
818
|
Tools" to open the developer site, download the installer for your OS
|
742
819
|
version and run it.
|
743
820
|
-----
|
744
|
-
|
821
|
+
EOM
|
745
822
|
end
|
746
823
|
|
747
|
-
|
748
|
-
|
749
|
-
|
824
|
+
if zlib_recipe
|
825
|
+
append_cppflags("-I#{zlib_recipe.path}/include")
|
826
|
+
$LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH
|
827
|
+
ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z",
|
828
|
+
headers: "zlib.h", func: "gzdopen")
|
829
|
+
end
|
830
|
+
|
831
|
+
if libiconv_recipe
|
832
|
+
append_cppflags("-I#{libiconv_recipe.path}/include")
|
833
|
+
$LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH
|
834
|
+
ensure_package_configuration(opt: "iconv", pc: "iconv", lib: "iconv",
|
835
|
+
headers: "iconv.h", func: "iconv_open")
|
750
836
|
end
|
751
837
|
|
752
838
|
libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"]["version"], static_p, cross_build_p) do |recipe|
|
@@ -754,8 +840,9 @@ else
|
|
754
840
|
if source_dir
|
755
841
|
recipe.source_directory = source_dir
|
756
842
|
else
|
843
|
+
minor_version = Gem::Version.new(recipe.version).segments.take(2).join(".")
|
757
844
|
recipe.files = [{
|
758
|
-
url: "
|
845
|
+
url: "#{gnome_source}/sources/libxml2/#{minor_version}/#{recipe.name}-#{recipe.version}.tar.xz",
|
759
846
|
sha256: dependencies["libxml2"]["sha256"],
|
760
847
|
}]
|
761
848
|
recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxml2", "*.patch")].sort
|
@@ -765,7 +852,6 @@ else
|
|
765
852
|
|
766
853
|
if zlib_recipe
|
767
854
|
recipe.configure_options << "--with-zlib=#{zlib_recipe.path}"
|
768
|
-
cflags = concat_flags(cflags, "-I#{zlib_recipe.path}/include")
|
769
855
|
end
|
770
856
|
|
771
857
|
if libiconv_recipe
|
@@ -803,8 +889,9 @@ else
|
|
803
889
|
if source_dir
|
804
890
|
recipe.source_directory = source_dir
|
805
891
|
else
|
892
|
+
minor_version = Gem::Version.new(recipe.version).segments.take(2).join(".")
|
806
893
|
recipe.files = [{
|
807
|
-
url: "
|
894
|
+
url: "#{gnome_source}/sources/libxslt/#{minor_version}/#{recipe.name}-#{recipe.version}.tar.xz",
|
808
895
|
sha256: dependencies["libxslt"]["sha256"],
|
809
896
|
}]
|
810
897
|
recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxslt", "*.patch")].sort
|
@@ -816,6 +903,11 @@ else
|
|
816
903
|
recipe.configure_options += ["RANLIB=/usr/bin/ranlib", "AR=/usr/bin/ar"]
|
817
904
|
end
|
818
905
|
|
906
|
+
if windows?
|
907
|
+
cflags = concat_flags(cflags, "-ULIBXSLT_STATIC", "-DIN_LIBXSLT")
|
908
|
+
cflags = concat_flags(cflags, "-ULIBEXSLT_STATIC", "-DIN_LIBEXSLT")
|
909
|
+
end
|
910
|
+
|
819
911
|
recipe.configure_options << if source_dir
|
820
912
|
"--config-cache"
|
821
913
|
else
|
@@ -834,15 +926,16 @@ else
|
|
834
926
|
append_cppflags("-DNOKOGIRI_PACKAGED_LIBRARIES")
|
835
927
|
append_cppflags("-DNOKOGIRI_PRECOMPILED_LIBRARIES") if cross_build_p
|
836
928
|
|
837
|
-
$LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH if zlib_recipe
|
838
|
-
$LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH if libiconv_recipe
|
839
|
-
|
840
929
|
$libs = $libs.shellsplit.tap do |libs|
|
841
930
|
[libxml2_recipe, libxslt_recipe].each do |recipe|
|
842
931
|
libname = recipe.name[/\Alib(.+)\z/, 1]
|
843
|
-
|
932
|
+
config_basename = "#{libname}-config"
|
933
|
+
File.join(recipe.path, "bin", config_basename).tap do |config|
|
844
934
|
# call config scripts explicit with 'sh' for compat with Windows
|
845
|
-
|
935
|
+
cflags = %x(sh #{config} --cflags).strip
|
936
|
+
message("#{config_basename} cflags: #{cflags}\n")
|
937
|
+
$CPPFLAGS = concat_flags(cflags, $CPPFLAGS) # prepend
|
938
|
+
|
846
939
|
%x(sh #{config} --libs).strip.shellsplit.each do |arg|
|
847
940
|
case arg
|
848
941
|
when /\A-L(.+)\z/
|
@@ -861,7 +954,7 @@ else
|
|
861
954
|
end
|
862
955
|
|
863
956
|
patches_string = recipe.patch_files.map { |path| File.basename(path) }.join(" ")
|
864
|
-
append_cppflags(%[-DNOKOGIRI_#{recipe.name.upcase}_PATCHES="
|
957
|
+
append_cppflags(%[-DNOKOGIRI_#{recipe.name.upcase}_PATCHES="\\"#{patches_string}\\""])
|
865
958
|
|
866
959
|
case libname
|
867
960
|
when "xml2"
|
@@ -880,16 +973,17 @@ else
|
|
880
973
|
end.shelljoin
|
881
974
|
|
882
975
|
if static_p
|
976
|
+
static_archive_ld_flag = needs_darwin_linker_hack ? ["-load_hidden"] : []
|
883
977
|
$libs = $libs.shellsplit.map do |arg|
|
884
978
|
case arg
|
885
979
|
when "-lxml2"
|
886
|
-
File.join(libxml2_recipe.path, "lib", libflag_to_filename(arg))
|
980
|
+
static_archive_ld_flag + [File.join(libxml2_recipe.path, "lib", libflag_to_filename(arg))]
|
887
981
|
when "-lxslt", "-lexslt"
|
888
|
-
File.join(libxslt_recipe.path, "lib", libflag_to_filename(arg))
|
982
|
+
static_archive_ld_flag + [File.join(libxslt_recipe.path, "lib", libflag_to_filename(arg))]
|
889
983
|
else
|
890
984
|
arg
|
891
985
|
end
|
892
|
-
end.shelljoin
|
986
|
+
end.flatten.shelljoin
|
893
987
|
end
|
894
988
|
|
895
989
|
ensure_func("xmlParseDoc", "libxml/parser.h")
|
@@ -925,11 +1019,11 @@ libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_b
|
|
925
1019
|
end
|
926
1020
|
|
927
1021
|
def compile
|
928
|
-
cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
|
1022
|
+
cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-O2", "-g")
|
929
1023
|
|
930
1024
|
env = { "CC" => gcc_cmd, "CFLAGS" => cflags }
|
931
1025
|
if config_cross_build?
|
932
|
-
if
|
1026
|
+
if /darwin/.match?(host)
|
933
1027
|
env["AR"] = "#{host}-libtool"
|
934
1028
|
env["ARFLAGS"] = "-o"
|
935
1029
|
else
|
@@ -945,7 +1039,7 @@ end
|
|
945
1039
|
append_cppflags("-I#{File.join(libgumbo_recipe.path, "include")}")
|
946
1040
|
$libs = $libs + " " + File.join(libgumbo_recipe.path, "lib", "libgumbo.a")
|
947
1041
|
$LIBPATH = $LIBPATH | [File.join(libgumbo_recipe.path, "lib")]
|
948
|
-
ensure_func("gumbo_parse_with_options", "
|
1042
|
+
ensure_func("gumbo_parse_with_options", "nokogiri_gumbo.h")
|
949
1043
|
|
950
1044
|
have_func("xmlHasFeature") || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21
|
951
1045
|
have_func("xmlFirstElementChild") # introduced in libxml 2.7.3
|
@@ -953,11 +1047,11 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
|
|
953
1047
|
have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
|
954
1048
|
have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
|
955
1049
|
have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
|
956
|
-
|
957
|
-
have_func("
|
1050
|
+
have_func("rb_gc_location") # introduced in Ruby 2.7
|
1051
|
+
have_func("rb_category_warning") # introduced in Ruby 3.0
|
958
1052
|
|
959
1053
|
other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
|
960
|
-
append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="
|
1054
|
+
append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\"#{other_library_versions_string}\\""])
|
961
1055
|
|
962
1056
|
unless config_system_libraries?
|
963
1057
|
if cross_build_p
|
@@ -986,3 +1080,5 @@ if config_clean?
|
|
986
1080
|
EOF
|
987
1081
|
end
|
988
1082
|
end
|
1083
|
+
|
1084
|
+
# rubocop:enable Style/GlobalVars
|
data/ext/nokogiri/gumbo.c
CHANGED
@@ -23,13 +23,13 @@
|
|
23
23
|
//
|
24
24
|
// Processing starts by calling gumbo_parse_with_options. The resulting document tree
|
25
25
|
// is then walked, a parallel libxml2 tree is constructed, and the final document is
|
26
|
-
// then wrapped using
|
26
|
+
// then wrapped using noko_xml_document_wrap. This approach reduces memory and CPU
|
27
27
|
// requirements as Ruby objects are only built when necessary.
|
28
28
|
//
|
29
29
|
|
30
30
|
#include <nokogiri.h>
|
31
31
|
|
32
|
-
#include "
|
32
|
+
#include "nokogiri_gumbo.h"
|
33
33
|
|
34
34
|
VALUE cNokogiriHtml5Document;
|
35
35
|
|
@@ -281,12 +281,12 @@ add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url)
|
|
281
281
|
rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR
|
282
282
|
rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR
|
283
283
|
rb_iv_set(syntax_error, "@file", url);
|
284
|
-
rb_iv_set(syntax_error, "@line",
|
284
|
+
rb_iv_set(syntax_error, "@line", SIZET2NUM(position.line));
|
285
285
|
rb_iv_set(syntax_error, "@str1", str1);
|
286
286
|
rb_iv_set(syntax_error, "@str2", Qnil);
|
287
287
|
rb_iv_set(syntax_error, "@str3", Qnil);
|
288
288
|
rb_iv_set(syntax_error, "@int1", INT2NUM(0));
|
289
|
-
rb_iv_set(syntax_error, "@column",
|
289
|
+
rb_iv_set(syntax_error, "@column", SIZET2NUM(position.column));
|
290
290
|
rb_ary_push(rerrors, syntax_error);
|
291
291
|
}
|
292
292
|
rb_iv_set(rdoc, "@errors", rerrors);
|
@@ -297,6 +297,7 @@ typedef struct {
|
|
297
297
|
GumboOutput *output;
|
298
298
|
VALUE input;
|
299
299
|
VALUE url_or_frag;
|
300
|
+
VALUE klass;
|
300
301
|
xmlDocPtr doc;
|
301
302
|
} ParseArgs;
|
302
303
|
|
@@ -321,7 +322,7 @@ static VALUE parse_continue(VALUE parse_args);
|
|
321
322
|
* @!visibility protected
|
322
323
|
*/
|
323
324
|
static VALUE
|
324
|
-
parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth)
|
325
|
+
parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth, VALUE klass)
|
325
326
|
{
|
326
327
|
GumboOptions options = kGumboDefaultOptions;
|
327
328
|
options.max_attributes = NUM2INT(max_attributes);
|
@@ -333,6 +334,7 @@ parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors
|
|
333
334
|
.output = output,
|
334
335
|
.input = input,
|
335
336
|
.url_or_frag = url,
|
337
|
+
.klass = klass,
|
336
338
|
.doc = NULL,
|
337
339
|
};
|
338
340
|
|
@@ -357,7 +359,9 @@ parse_continue(VALUE parse_args)
|
|
357
359
|
}
|
358
360
|
args->doc = doc; // Make sure doc gets cleaned up if an error is thrown.
|
359
361
|
build_tree(doc, (xmlNodePtr)doc, output->document);
|
360
|
-
VALUE rdoc =
|
362
|
+
VALUE rdoc = noko_xml_document_wrap(args->klass, doc);
|
363
|
+
rb_iv_set(rdoc, "@url", args->url_or_frag);
|
364
|
+
rb_iv_set(rdoc, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode));
|
361
365
|
args->doc = NULL; // The Ruby runtime now owns doc so don't delete it.
|
362
366
|
add_errors(output, rdoc, args->input, args->url_or_frag);
|
363
367
|
return rdoc;
|
@@ -401,7 +405,7 @@ static xmlNodePtr
|
|
401
405
|
extract_xml_node(VALUE node)
|
402
406
|
{
|
403
407
|
xmlNodePtr xml_node;
|
404
|
-
|
408
|
+
Noko_Node_Get_Struct(node, xmlNode, xml_node);
|
405
409
|
return xml_node;
|
406
410
|
}
|
407
411
|
|
@@ -498,9 +502,11 @@ error:
|
|
498
502
|
}
|
499
503
|
|
500
504
|
// Encoding.
|
501
|
-
if (
|
505
|
+
if (ctx_ns == GUMBO_NAMESPACE_MATHML
|
506
|
+
&& RSTRING_LEN(tag_name) == 14
|
502
507
|
&& !st_strcasecmp(ctx_tag, "annotation-xml")) {
|
503
508
|
VALUE enc = rb_funcall(ctx, rb_intern_const("[]"),
|
509
|
+
1,
|
504
510
|
rb_utf8_str_new_static("encoding", 8));
|
505
511
|
if (RTEST(enc)) {
|
506
512
|
Check_Type(enc, T_STRING);
|
@@ -512,8 +518,11 @@ error:
|
|
512
518
|
// Quirks mode.
|
513
519
|
VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
|
514
520
|
VALUE dtd = rb_funcall(doc, internal_subset, 0);
|
515
|
-
|
521
|
+
VALUE doc_quirks_mode = rb_iv_get(doc, "@quirks_mode");
|
522
|
+
if (NIL_P(ctx) || NIL_P(doc_quirks_mode)) {
|
516
523
|
quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS;
|
524
|
+
} else if (NIL_P(dtd)) {
|
525
|
+
quirks_mode = GUMBO_DOCTYPE_QUIRKS;
|
517
526
|
} else {
|
518
527
|
VALUE dtd_name = rb_funcall(dtd, name, 0);
|
519
528
|
VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0);
|
@@ -560,13 +569,14 @@ fragment_continue(VALUE parse_args)
|
|
560
569
|
args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it.
|
561
570
|
xmlNodePtr xml_frag = extract_xml_node(doc_fragment);
|
562
571
|
build_tree(xml_doc, xml_frag, output->root);
|
572
|
+
rb_iv_set(doc_fragment, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode));
|
563
573
|
add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9));
|
564
574
|
return Qnil;
|
565
575
|
}
|
566
576
|
|
567
577
|
// Initialize the Nokogumbo class and fetch constants we will use later.
|
568
578
|
void
|
569
|
-
noko_init_gumbo()
|
579
|
+
noko_init_gumbo(void)
|
570
580
|
{
|
571
581
|
// Class constants.
|
572
582
|
cNokogiriHtml5Document = rb_define_class_under(mNokogiriHtml5, "Document", cNokogiriHtml4Document);
|
@@ -577,7 +587,7 @@ noko_init_gumbo()
|
|
577
587
|
parent = rb_intern_const("parent");
|
578
588
|
|
579
589
|
// Define Nokogumbo module with parse and fragment methods.
|
580
|
-
rb_define_singleton_method(mNokogiriGumbo, "parse", parse,
|
590
|
+
rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 6);
|
581
591
|
rb_define_singleton_method(mNokogiriGumbo, "fragment", fragment, 6);
|
582
592
|
}
|
583
593
|
|
@@ -146,11 +146,11 @@ rb_html_document_type(VALUE self)
|
|
146
146
|
{
|
147
147
|
htmlDocPtr doc;
|
148
148
|
Data_Get_Struct(self, xmlDoc, doc);
|
149
|
-
return INT2NUM(
|
149
|
+
return INT2NUM(doc->type);
|
150
150
|
}
|
151
151
|
|
152
152
|
void
|
153
|
-
noko_init_html_document()
|
153
|
+
noko_init_html_document(void)
|
154
154
|
{
|
155
155
|
assert(cNokogiriXmlDocument);
|
156
156
|
cNokogiriHtml4Document = rb_define_class_under(mNokogiriHtml4, "Document", cNokogiriXmlDocument);
|
@@ -270,7 +270,7 @@ get_description(VALUE klass, VALUE tag_name)
|
|
270
270
|
}
|
271
271
|
|
272
272
|
void
|
273
|
-
noko_init_html_element_description()
|
273
|
+
noko_init_html_element_description(void)
|
274
274
|
{
|
275
275
|
cNokogiriHtml4ElementDescription = rb_define_class_under(mNokogiriHtml4, "ElementDescription", rb_cObject);
|
276
276
|
|
@@ -20,7 +20,7 @@ get(VALUE _, VALUE rb_entity_name)
|
|
20
20
|
return Qnil;
|
21
21
|
}
|
22
22
|
|
23
|
-
rb_constructor_args[0] =
|
23
|
+
rb_constructor_args[0] = UINT2NUM(c_entity_desc->value);
|
24
24
|
rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
|
25
25
|
rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
|
26
26
|
|
@@ -29,7 +29,7 @@ get(VALUE _, VALUE rb_entity_name)
|
|
29
29
|
}
|
30
30
|
|
31
31
|
void
|
32
|
-
noko_init_html_entity_lookup()
|
32
|
+
noko_init_html_entity_lookup(void)
|
33
33
|
{
|
34
34
|
cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject);
|
35
35
|
|