nokogiri 1.14.2 → 1.16.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +19 -15
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +69 -26
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +23 -5
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +30 -17
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +113 -25
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -16
- data/ext/nokogiri/xml_node.c +13 -16
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +61 -74
- data/ext/nokogiri/xml_relax_ng.c +66 -79
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +50 -25
- data/ext/nokogiri/xml_sax_push_parser.c +30 -9
- data/ext/nokogiri/xml_schema.c +94 -115
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +26 -13
- data/ext/nokogiri/xml_xpath_context.c +153 -83
- data/ext/nokogiri/xslt_stylesheet.c +111 -53
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +3 -23
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +0 -63
- data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +3 -3
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +53 -37
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +13 -5
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +16 -12
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f268b2edaa2782a3e9a64e671e36d88149ef5dc23fcf3967a8a54c4475ed7b85
|
4
|
+
data.tar.gz: b49fbf8708f4733e34c211935637ad5999d3950b89aca2da71899c71eaa14901
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2f096bf7e99c453450c2a9d78c7274a3472d29ea8e8aeeff2dfb6976b5b009a8bfd5e673c7eb1a2575d820df26a830ce4e4043d33fb4c0239bf8098bdb98188
|
7
|
+
data.tar.gz: aa7658f376b0278150550d5426ddbe11469be8624d6fc732cf035d0ed00736e177e2848182c9077a9cc724ae6efe92666e8f3664b71282c4e74e79ad48c51373
|
data/Gemfile
CHANGED
@@ -5,34 +5,38 @@ source "https://rubygems.org"
|
|
5
5
|
gemspec
|
6
6
|
|
7
7
|
group :development do
|
8
|
+
# ruby 3.4.0-dev removed some gems from the default set
|
9
|
+
#
|
10
|
+
# TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
|
11
|
+
# transitively.
|
12
|
+
gem "mutex_m"
|
13
|
+
|
8
14
|
# bootstrapping
|
9
15
|
gem "bundler", "~> 2.3"
|
10
|
-
gem "rake", "
|
16
|
+
gem "rake", "13.1.0"
|
11
17
|
|
12
18
|
# building extensions
|
13
|
-
gem "rake-compiler", "
|
14
|
-
gem "rake-compiler-dock", "
|
15
|
-
|
16
|
-
# documentation
|
17
|
-
gem "hoe-markdown", "= 1.4.0"
|
19
|
+
gem "rake-compiler", "1.2.6"
|
20
|
+
gem "rake-compiler-dock", "1.4.0"
|
18
21
|
|
19
22
|
# parser generator
|
20
23
|
gem "rexical", "= 1.0.7"
|
21
24
|
|
22
25
|
# tests
|
23
|
-
gem "minitest", "5.
|
24
|
-
gem "minitest-
|
25
|
-
gem "ruby_memcheck", "
|
26
|
-
gem "simplecov", "= 0.21.2"
|
26
|
+
gem "minitest", "5.21.2"
|
27
|
+
gem "minitest-parallel_fork", "2.0.0"
|
28
|
+
gem "ruby_memcheck", "2.3.0"
|
27
29
|
gem "rubyzip", "~> 2.3.2"
|
30
|
+
gem "simplecov", "= 0.21.2"
|
28
31
|
|
29
32
|
# rubocop
|
30
33
|
if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
|
31
|
-
gem "rubocop", "1.
|
32
|
-
gem "rubocop-minitest", "0.
|
33
|
-
gem "rubocop-
|
34
|
+
gem "rubocop", "1.60.2"
|
35
|
+
gem "rubocop-minitest", "0.34.5"
|
36
|
+
gem "rubocop-packaging", "0.5.2"
|
37
|
+
gem "rubocop-performance", "1.20.2"
|
34
38
|
gem "rubocop-rake", "= 0.6.0"
|
35
|
-
gem "rubocop-shopify", "2.
|
39
|
+
gem "rubocop-shopify", "2.14.0"
|
36
40
|
end
|
37
41
|
end
|
38
42
|
|
@@ -40,5 +44,5 @@ end
|
|
40
44
|
# `bundle config set --local without rdoc`
|
41
45
|
# Then re-run `bundle install`.
|
42
46
|
group :rdoc do
|
43
|
-
gem "rdoc", "6.
|
47
|
+
gem "rdoc", "6.6.2"
|
44
48
|
end
|
data/README.md
CHANGED
@@ -55,6 +55,7 @@ There are a few ways to ask exploratory questions:
|
|
55
55
|
|
56
56
|
- The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
|
57
57
|
- Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
|
58
|
+
- Open a discussion at https://github.com/sparklemotion/nokogiri/discussions
|
58
59
|
|
59
60
|
Please do not mail the maintainers at their personal addresses.
|
60
61
|
|
@@ -90,6 +91,8 @@ We bump `Major.Minor.Patch` versions following this guidance:
|
|
90
91
|
- Updating packaged libraries for non-security-related reasons.
|
91
92
|
- Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
|
92
93
|
- Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
|
94
|
+
- Removal of deprecated methods or parameters, after a generous transition period; usually when those methods or parameters are rarely-used or dangerous to the user. Essentially, removals that do not justify a major version bump.
|
95
|
+
|
93
96
|
|
94
97
|
`Patch`:
|
95
98
|
|
@@ -111,7 +114,7 @@ You can help sponsor the maintainers of this software through one of these organ
|
|
111
114
|
|
112
115
|
Requirements:
|
113
116
|
|
114
|
-
- Ruby >=
|
117
|
+
- Ruby >= 3.0
|
115
118
|
- JRuby >= 9.4.0.0
|
116
119
|
|
117
120
|
|
data/dependencies.yml
CHANGED
@@ -1,16 +1,17 @@
|
|
1
|
+
---
|
1
2
|
libxml2:
|
2
|
-
version: "2.
|
3
|
-
sha256: "
|
4
|
-
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.
|
3
|
+
version: "2.12.7"
|
4
|
+
sha256: "24ae78ff1363a973e6d8beba941a7945da2ac056e19b53956aeb6927fd6cfb56"
|
5
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.7.sha256sum
|
5
6
|
|
6
7
|
libxslt:
|
7
|
-
version: "1.1.
|
8
|
-
sha256: "
|
9
|
-
# sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.
|
8
|
+
version: "1.1.39"
|
9
|
+
sha256: "2a20ad621148339b0759c4d4e96719362dee64c9a096dbba625ba053846349f0"
|
10
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.39.sha256sum
|
10
11
|
|
11
12
|
zlib:
|
12
|
-
version: "1.
|
13
|
-
sha256: "
|
13
|
+
version: "1.3.1"
|
14
|
+
sha256: "9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23"
|
14
15
|
# SHA-256 hash provided on http://zlib.net/
|
15
16
|
|
16
17
|
libiconv:
|
data/ext/nokogiri/extconf.rb
CHANGED
@@ -15,7 +15,7 @@ PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."
|
|
15
15
|
REQUIRED_LIBXML_VERSION = "2.6.21"
|
16
16
|
RECOMMENDED_LIBXML_VERSION = "2.9.3"
|
17
17
|
|
18
|
-
REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.
|
18
|
+
REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.2" # keep this version in sync with the one in the gemspec
|
19
19
|
REQUIRED_PKG_CONFIG_VERSION = "~> 1.1"
|
20
20
|
|
21
21
|
# Keep track of what versions of what libraries we build against
|
@@ -229,7 +229,7 @@ def gnome_source
|
|
229
229
|
if ENV["NOKOGIRI_USE_CANONICAL_GNOME_SOURCE"]
|
230
230
|
"https://download.gnome.org"
|
231
231
|
else
|
232
|
-
"https://
|
232
|
+
"https://muug.ca/mirror/gnome" # old reliable
|
233
233
|
end
|
234
234
|
end
|
235
235
|
|
@@ -372,7 +372,7 @@ def try_link_iconv(using = nil)
|
|
372
372
|
preserving_globals do
|
373
373
|
yield if block_given?
|
374
374
|
|
375
|
-
try_link(<<~
|
375
|
+
try_link(<<~SRC, opt)
|
376
376
|
#include <stdlib.h>
|
377
377
|
#include <iconv.h>
|
378
378
|
int main(void)
|
@@ -443,7 +443,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
|
|
443
443
|
# We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
|
444
444
|
# prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
|
445
445
|
recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
|
446
|
-
recipe.host = recipe.host.gsub(
|
446
|
+
recipe.host = recipe.host.gsub("i386", "i686")
|
447
447
|
|
448
448
|
recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
|
449
449
|
recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
|
@@ -651,8 +651,7 @@ append_cppflags(ENV["CPPFLAGS"].split) unless ENV["CPPFLAGS"].nil?
|
|
651
651
|
append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil?
|
652
652
|
$LIBS = concat_flags($LIBS, ENV["LIBS"])
|
653
653
|
|
654
|
-
#
|
655
|
-
# errors/warnings. see #2302
|
654
|
+
# libgumbo uses C90/C99 features, see #2302
|
656
655
|
append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
|
657
656
|
|
658
657
|
# gumbo html5 serialization is slower with O3, let's make sure we use O2
|
@@ -695,14 +694,34 @@ append_cppflags(' "-Idummypath"') if windows?
|
|
695
694
|
|
696
695
|
if config_system_libraries?
|
697
696
|
message "Building nokogiri using system libraries.\n"
|
698
|
-
ensure_package_configuration(
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
697
|
+
ensure_package_configuration(
|
698
|
+
opt: "zlib",
|
699
|
+
pc: "zlib",
|
700
|
+
lib: "z",
|
701
|
+
headers: "zlib.h",
|
702
|
+
func: "gzdopen",
|
703
|
+
)
|
704
|
+
ensure_package_configuration(
|
705
|
+
opt: "xml2",
|
706
|
+
pc: "libxml-2.0",
|
707
|
+
lib: "xml2",
|
708
|
+
headers: "libxml/parser.h",
|
709
|
+
func: "xmlParseDoc",
|
710
|
+
)
|
711
|
+
ensure_package_configuration(
|
712
|
+
opt: "xslt",
|
713
|
+
pc: "libxslt",
|
714
|
+
lib: "xslt",
|
715
|
+
headers: "libxslt/xslt.h",
|
716
|
+
func: "xsltParseStylesheetDoc",
|
717
|
+
)
|
718
|
+
ensure_package_configuration(
|
719
|
+
opt: "exslt",
|
720
|
+
pc: "libexslt",
|
721
|
+
lib: "exslt",
|
722
|
+
headers: "libexslt/exslt.h",
|
723
|
+
func: "exsltFuncRegister",
|
724
|
+
)
|
706
725
|
|
707
726
|
have_libxml_headers?(REQUIRED_LIBXML_VERSION) ||
|
708
727
|
abort("ERROR: libxml2 version #{REQUIRED_LIBXML_VERSION} or later is required!")
|
@@ -785,8 +804,12 @@ else
|
|
785
804
|
end
|
786
805
|
|
787
806
|
unless nix?
|
788
|
-
libiconv_recipe = process_recipe(
|
789
|
-
|
807
|
+
libiconv_recipe = process_recipe(
|
808
|
+
"libiconv",
|
809
|
+
dependencies["libiconv"]["version"],
|
810
|
+
static_p,
|
811
|
+
cross_build_p,
|
812
|
+
) do |recipe|
|
790
813
|
recipe.files = [{
|
791
814
|
url: "https://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz",
|
792
815
|
sha256: dependencies["libiconv"]["sha256"],
|
@@ -824,15 +847,25 @@ else
|
|
824
847
|
if zlib_recipe
|
825
848
|
append_cppflags("-I#{zlib_recipe.path}/include")
|
826
849
|
$LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH
|
827
|
-
ensure_package_configuration(
|
828
|
-
|
850
|
+
ensure_package_configuration(
|
851
|
+
opt: "zlib",
|
852
|
+
pc: "zlib",
|
853
|
+
lib: "z",
|
854
|
+
headers: "zlib.h",
|
855
|
+
func: "gzdopen",
|
856
|
+
)
|
829
857
|
end
|
830
858
|
|
831
859
|
if libiconv_recipe
|
832
860
|
append_cppflags("-I#{libiconv_recipe.path}/include")
|
833
861
|
$LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH
|
834
|
-
ensure_package_configuration(
|
835
|
-
|
862
|
+
ensure_package_configuration(
|
863
|
+
opt: "iconv",
|
864
|
+
pc: "iconv",
|
865
|
+
lib: "iconv",
|
866
|
+
headers: "iconv.h",
|
867
|
+
func: "iconv_open",
|
868
|
+
)
|
836
869
|
end
|
837
870
|
|
838
871
|
libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"]["version"], static_p, cross_build_p) do |recipe|
|
@@ -848,8 +881,13 @@ else
|
|
848
881
|
recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxml2", "*.patch")].sort
|
849
882
|
end
|
850
883
|
|
884
|
+
cppflags = concat_flags(ENV["CPPFLAGS"])
|
851
885
|
cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g")
|
852
886
|
|
887
|
+
if cross_build_p
|
888
|
+
cppflags = concat_flags(cppflags, "-DNOKOGIRI_PRECOMPILED_LIBRARIES")
|
889
|
+
end
|
890
|
+
|
853
891
|
if zlib_recipe
|
854
892
|
recipe.configure_options << "--with-zlib=#{zlib_recipe.path}"
|
855
893
|
end
|
@@ -880,6 +918,8 @@ else
|
|
880
918
|
"--with-c14n",
|
881
919
|
"--with-debug",
|
882
920
|
"--with-threads",
|
921
|
+
"--without-tls", # see https://github.com/sparklemotion/nokogiri/issues/3031
|
922
|
+
"CPPFLAGS=#{cppflags}",
|
883
923
|
"CFLAGS=#{cflags}",
|
884
924
|
]
|
885
925
|
end
|
@@ -1023,7 +1063,7 @@ libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_b
|
|
1023
1063
|
|
1024
1064
|
env = { "CC" => gcc_cmd, "CFLAGS" => cflags }
|
1025
1065
|
if config_cross_build?
|
1026
|
-
if
|
1066
|
+
if host.include?("darwin")
|
1027
1067
|
env["AR"] = "#{host}-libtool"
|
1028
1068
|
env["ARFLAGS"] = "-o"
|
1029
1069
|
else
|
@@ -1047,7 +1087,6 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
|
|
1047
1087
|
have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
|
1048
1088
|
have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
|
1049
1089
|
have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
|
1050
|
-
have_func("rb_gc_location") # introduced in Ruby 2.7
|
1051
1090
|
have_func("rb_category_warning") # introduced in Ruby 3.0
|
1052
1091
|
|
1053
1092
|
other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
|
@@ -1057,12 +1096,16 @@ unless config_system_libraries?
|
|
1057
1096
|
if cross_build_p
|
1058
1097
|
# When precompiling native gems, copy packaged libraries' headers to ext/nokogiri/include
|
1059
1098
|
# These are packaged up by the cross-compiling callback in the ExtensionTask
|
1060
|
-
copy_packaged_libraries_headers(
|
1061
|
-
|
1099
|
+
copy_packaged_libraries_headers(
|
1100
|
+
to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"),
|
1101
|
+
from_recipes: [libxml2_recipe, libxslt_recipe],
|
1102
|
+
)
|
1062
1103
|
else
|
1063
1104
|
# When compiling during installation, install packaged libraries' header files into ext/nokogiri/include
|
1064
|
-
copy_packaged_libraries_headers(
|
1065
|
-
|
1105
|
+
copy_packaged_libraries_headers(
|
1106
|
+
to_path: "include",
|
1107
|
+
from_recipes: [libxml2_recipe, libxslt_recipe],
|
1108
|
+
)
|
1066
1109
|
$INSTALLFILES << ["include/**/*.h", "$(rubylibdir)"]
|
1067
1110
|
end
|
1068
1111
|
end
|
@@ -144,8 +144,7 @@ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE r
|
|
144
144
|
static VALUE
|
145
145
|
rb_html_document_type(VALUE self)
|
146
146
|
{
|
147
|
-
htmlDocPtr doc;
|
148
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
147
|
+
htmlDocPtr doc = noko_xml_document_unwrap(self);
|
149
148
|
return INT2NUM(doc->type);
|
150
149
|
}
|
151
150
|
|
@@ -1,5 +1,10 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
+
static const rb_data_type_t html4_element_description_type = {
|
4
|
+
.wrap_struct_name = "Nokogiri::HTML4::ElementDescription",
|
5
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
6
|
+
};
|
7
|
+
|
3
8
|
VALUE cNokogiriHtml4ElementDescription ;
|
4
9
|
|
5
10
|
/*
|
@@ -15,7 +20,7 @@ required_attributes(VALUE self)
|
|
15
20
|
VALUE list;
|
16
21
|
int i;
|
17
22
|
|
18
|
-
|
23
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
19
24
|
|
20
25
|
list = rb_ary_new();
|
21
26
|
|
@@ -41,7 +46,7 @@ deprecated_attributes(VALUE self)
|
|
41
46
|
VALUE list;
|
42
47
|
int i;
|
43
48
|
|
44
|
-
|
49
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
45
50
|
|
46
51
|
list = rb_ary_new();
|
47
52
|
|
@@ -67,7 +72,7 @@ optional_attributes(VALUE self)
|
|
67
72
|
VALUE list;
|
68
73
|
int i;
|
69
74
|
|
70
|
-
|
75
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
71
76
|
|
72
77
|
list = rb_ary_new();
|
73
78
|
|
@@ -90,7 +95,7 @@ static VALUE
|
|
90
95
|
default_sub_element(VALUE self)
|
91
96
|
{
|
92
97
|
const htmlElemDesc *description;
|
93
|
-
|
98
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
94
99
|
|
95
100
|
if (description->defaultsubelt) {
|
96
101
|
return NOKOGIRI_STR_NEW2(description->defaultsubelt);
|
@@ -112,7 +117,7 @@ sub_elements(VALUE self)
|
|
112
117
|
VALUE list;
|
113
118
|
int i;
|
114
119
|
|
115
|
-
|
120
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
116
121
|
|
117
122
|
list = rb_ary_new();
|
118
123
|
|
@@ -135,7 +140,7 @@ static VALUE
|
|
135
140
|
description(VALUE self)
|
136
141
|
{
|
137
142
|
const htmlElemDesc *description;
|
138
|
-
|
143
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
139
144
|
|
140
145
|
return NOKOGIRI_STR_NEW2(description->desc);
|
141
146
|
}
|
@@ -150,7 +155,7 @@ static VALUE
|
|
150
155
|
inline_eh(VALUE self)
|
151
156
|
{
|
152
157
|
const htmlElemDesc *description;
|
153
|
-
|
158
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
154
159
|
|
155
160
|
if (description->isinline) { return Qtrue; }
|
156
161
|
return Qfalse;
|
@@ -166,7 +171,7 @@ static VALUE
|
|
166
171
|
deprecated_eh(VALUE self)
|
167
172
|
{
|
168
173
|
const htmlElemDesc *description;
|
169
|
-
|
174
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
170
175
|
|
171
176
|
if (description->depr) { return Qtrue; }
|
172
177
|
return Qfalse;
|
@@ -182,7 +187,7 @@ static VALUE
|
|
182
187
|
empty_eh(VALUE self)
|
183
188
|
{
|
184
189
|
const htmlElemDesc *description;
|
185
|
-
|
190
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
186
191
|
|
187
192
|
if (description->empty) { return Qtrue; }
|
188
193
|
return Qfalse;
|
@@ -198,7 +203,7 @@ static VALUE
|
|
198
203
|
save_end_tag_eh(VALUE self)
|
199
204
|
{
|
200
205
|
const htmlElemDesc *description;
|
201
|
-
|
206
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
202
207
|
|
203
208
|
if (description->saveEndTag) { return Qtrue; }
|
204
209
|
return Qfalse;
|
@@ -214,7 +219,7 @@ static VALUE
|
|
214
219
|
implied_end_tag_eh(VALUE self)
|
215
220
|
{
|
216
221
|
const htmlElemDesc *description;
|
217
|
-
|
222
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
218
223
|
|
219
224
|
if (description->endTag) { return Qtrue; }
|
220
225
|
return Qfalse;
|
@@ -230,7 +235,7 @@ static VALUE
|
|
230
235
|
implied_start_tag_eh(VALUE self)
|
231
236
|
{
|
232
237
|
const htmlElemDesc *description;
|
233
|
-
|
238
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
234
239
|
|
235
240
|
if (description->startTag) { return Qtrue; }
|
236
241
|
return Qfalse;
|
@@ -246,7 +251,7 @@ static VALUE
|
|
246
251
|
name(VALUE self)
|
247
252
|
{
|
248
253
|
const htmlElemDesc *description;
|
249
|
-
|
254
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
250
255
|
|
251
256
|
if (NULL == description->name) { return Qnil; }
|
252
257
|
return NOKOGIRI_STR_NEW2(description->name);
|
@@ -266,7 +271,7 @@ get_description(VALUE klass, VALUE tag_name)
|
|
266
271
|
);
|
267
272
|
|
268
273
|
if (NULL == description) { return Qnil; }
|
269
|
-
return
|
274
|
+
return TypedData_Wrap_Struct(klass, &html4_element_description_type, DISCARD_CONST_QUAL(void *, description));
|
270
275
|
}
|
271
276
|
|
272
277
|
void
|
@@ -2,13 +2,6 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriHtml4SaxParserContext ;
|
4
4
|
|
5
|
-
static void
|
6
|
-
deallocate(xmlParserCtxtPtr ctxt)
|
7
|
-
{
|
8
|
-
ctxt->sax = NULL;
|
9
|
-
htmlFreeParserCtxt(ctxt);
|
10
|
-
}
|
11
|
-
|
12
5
|
static VALUE
|
13
6
|
parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
14
7
|
{
|
@@ -38,7 +31,7 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
|
38
31
|
}
|
39
32
|
}
|
40
33
|
|
41
|
-
return
|
34
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
42
35
|
}
|
43
36
|
|
44
37
|
static VALUE
|
@@ -48,7 +41,13 @@ parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
|
48
41
|
StringValueCStr(filename),
|
49
42
|
StringValueCStr(encoding)
|
50
43
|
);
|
51
|
-
|
44
|
+
|
45
|
+
if (ctxt->sax) {
|
46
|
+
xmlFree(ctxt->sax);
|
47
|
+
ctxt->sax = NULL;
|
48
|
+
}
|
49
|
+
|
50
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
52
51
|
}
|
53
52
|
|
54
53
|
static VALUE
|
@@ -82,13 +81,8 @@ parse_with(VALUE self, VALUE sax_handler)
|
|
82
81
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
83
82
|
}
|
84
83
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
/* Free the sax handler since we'll assign our own */
|
89
|
-
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
|
90
|
-
xmlFree(ctxt->sax);
|
91
|
-
}
|
84
|
+
ctxt = noko_xml_sax_parser_context_unwrap(self);
|
85
|
+
sax = noko_sax_handler_unwrap(sax_handler);
|
92
86
|
|
93
87
|
ctxt->sax = sax;
|
94
88
|
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
@@ -17,7 +17,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
17
17
|
int status = 0;
|
18
18
|
libxmlStructuredErrorHandlerState handler_state;
|
19
19
|
|
20
|
-
|
20
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
21
21
|
|
22
22
|
if (Qnil != _chunk) {
|
23
23
|
chunk = StringValuePtr(_chunk);
|
@@ -32,7 +32,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
32
32
|
|
33
33
|
if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
|
34
34
|
// TODO: there appear to be no tests for this block
|
35
|
-
|
35
|
+
xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
|
36
36
|
Nokogiri_error_raise(NULL, e);
|
37
37
|
}
|
38
38
|
|
@@ -54,7 +54,7 @@ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
|
|
54
54
|
htmlParserCtxtPtr ctx;
|
55
55
|
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
56
56
|
|
57
|
-
|
57
|
+
sax = noko_sax_handler_unwrap(_xml_sax);
|
58
58
|
|
59
59
|
if (_filename != Qnil) { filename = StringValueCStr(_filename); }
|
60
60
|
|
data/ext/nokogiri/nokogiri.c
CHANGED
@@ -137,6 +137,48 @@ noko_io_close(void *io)
|
|
137
137
|
}
|
138
138
|
|
139
139
|
|
140
|
+
#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
141
|
+
# define NOKOGIRI_WINDOWS_DLLS 1
|
142
|
+
#else
|
143
|
+
# define NOKOGIRI_WINDOWS_DLLS 0
|
144
|
+
#endif
|
145
|
+
|
146
|
+
//
|
147
|
+
// | dlls || true | false |
|
148
|
+
// | nlmm || | |
|
149
|
+
// |-----------++---------+---------|
|
150
|
+
// | NULL || default | ruby |
|
151
|
+
// | "random" || default | ruby |
|
152
|
+
// | "ruby" || ruby | ruby |
|
153
|
+
// | "default" || default | default |
|
154
|
+
//
|
155
|
+
// We choose *not* to use Ruby's memory management functions with windows DLLs because of this
|
156
|
+
// issue: https://github.com/sparklemotion/nokogiri/issues/2241
|
157
|
+
//
|
158
|
+
static void
|
159
|
+
set_libxml_memory_management(void)
|
160
|
+
{
|
161
|
+
const char *nlmm = getenv("NOKOGIRI_LIBXML_MEMORY_MANAGEMENT");
|
162
|
+
if (nlmm) {
|
163
|
+
if (strcmp(nlmm, "default") == 0) {
|
164
|
+
goto libxml_uses_default_memory_management;
|
165
|
+
} else if (strcmp(nlmm, "ruby") == 0) {
|
166
|
+
goto libxml_uses_ruby_memory_management;
|
167
|
+
}
|
168
|
+
}
|
169
|
+
if (NOKOGIRI_WINDOWS_DLLS) {
|
170
|
+
libxml_uses_default_memory_management:
|
171
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
|
172
|
+
return;
|
173
|
+
} else {
|
174
|
+
libxml_uses_ruby_memory_management:
|
175
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
|
176
|
+
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
177
|
+
return;
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
181
|
+
|
140
182
|
void
|
141
183
|
Init_nokogiri(void)
|
142
184
|
{
|
@@ -150,6 +192,10 @@ Init_nokogiri(void)
|
|
150
192
|
mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
|
151
193
|
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
152
194
|
|
195
|
+
set_libxml_memory_management(); /* must be before any function calls that might invoke xmlInitParser() */
|
196
|
+
xmlInitParser();
|
197
|
+
exsltRegisterAll();
|
198
|
+
|
153
199
|
rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
|
154
200
|
rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
|
155
201
|
|
@@ -182,30 +228,6 @@ Init_nokogiri(void)
|
|
182
228
|
rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
|
183
229
|
#endif
|
184
230
|
|
185
|
-
#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
186
|
-
/*
|
187
|
-
* We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
|
188
|
-
* issue in libxml 2.9.12:
|
189
|
-
*
|
190
|
-
* https://github.com/sparklemotion/nokogiri/issues/2241
|
191
|
-
*
|
192
|
-
* If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
|
193
|
-
* this config only for the specific libxml2 versions 2.9.12.
|
194
|
-
*
|
195
|
-
* Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
|
196
|
-
* default memory management functions (recall that this config was introduced to reduce memory
|
197
|
-
* bloat and allow Ruby to GC more often); but we should *really* test with production workloads
|
198
|
-
* before making that kind of a potentially-invasive change.
|
199
|
-
*/
|
200
|
-
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
|
201
|
-
#else
|
202
|
-
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
|
203
|
-
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
204
|
-
#endif
|
205
|
-
|
206
|
-
xmlInitParser();
|
207
|
-
exsltRegisterAll();
|
208
|
-
|
209
231
|
if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
|
210
232
|
rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
|
211
233
|
} else {
|