nokogiri 1.14.2 → 1.16.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +19 -15
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +69 -26
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +23 -5
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +30 -17
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +113 -25
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -16
- data/ext/nokogiri/xml_node.c +13 -16
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +61 -74
- data/ext/nokogiri/xml_relax_ng.c +66 -79
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +50 -25
- data/ext/nokogiri/xml_sax_push_parser.c +30 -9
- data/ext/nokogiri/xml_schema.c +94 -115
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +26 -13
- data/ext/nokogiri/xml_xpath_context.c +153 -83
- data/ext/nokogiri/xslt_stylesheet.c +111 -53
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +3 -23
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +0 -63
- data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +3 -3
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +53 -37
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +13 -5
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +16 -12
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 265995b2c377e4457aca61465f5adb40912633b08a56df3aa709570d3274a3e2
|
4
|
+
data.tar.gz: 61e6f5f65e8b272032089b21e046f460f1f1ad39dba13f847fb8b6fc29bafd01
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e01dfe5f6511eb28d7db99dabb4ecd9b82ee6e749e1ad5a54251914f1068b9491d5b41676cd0f82a848466ceaacd0c65182d66f2d123c4545d11c9152d9150b
|
7
|
+
data.tar.gz: 74684f41f6c6170e21ab0f0e561cf4ed75764deda9f4c8442fcc317eafdf99503df2b623c948f4272348cbb4a5ca5af6ca5da140ba6eb04d58d833b75d01c45d
|
data/Gemfile
CHANGED
@@ -5,34 +5,38 @@ source "https://rubygems.org"
|
|
5
5
|
gemspec
|
6
6
|
|
7
7
|
group :development do
|
8
|
+
# ruby 3.4.0-dev removed some gems from the default set
|
9
|
+
#
|
10
|
+
# TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
|
11
|
+
# transitively.
|
12
|
+
gem "mutex_m"
|
13
|
+
|
8
14
|
# bootstrapping
|
9
15
|
gem "bundler", "~> 2.3"
|
10
|
-
gem "rake", "
|
16
|
+
gem "rake", "13.1.0"
|
11
17
|
|
12
18
|
# building extensions
|
13
|
-
gem "rake-compiler", "
|
14
|
-
gem "rake-compiler-dock", "
|
15
|
-
|
16
|
-
# documentation
|
17
|
-
gem "hoe-markdown", "= 1.4.0"
|
19
|
+
gem "rake-compiler", "1.2.6"
|
20
|
+
gem "rake-compiler-dock", "1.4.0"
|
18
21
|
|
19
22
|
# parser generator
|
20
23
|
gem "rexical", "= 1.0.7"
|
21
24
|
|
22
25
|
# tests
|
23
|
-
gem "minitest", "5.
|
24
|
-
gem "minitest-
|
25
|
-
gem "ruby_memcheck", "
|
26
|
-
gem "simplecov", "= 0.21.2"
|
26
|
+
gem "minitest", "5.21.2"
|
27
|
+
gem "minitest-parallel_fork", "2.0.0"
|
28
|
+
gem "ruby_memcheck", "2.3.0"
|
27
29
|
gem "rubyzip", "~> 2.3.2"
|
30
|
+
gem "simplecov", "= 0.21.2"
|
28
31
|
|
29
32
|
# rubocop
|
30
33
|
if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
|
31
|
-
gem "rubocop", "1.
|
32
|
-
gem "rubocop-minitest", "0.
|
33
|
-
gem "rubocop-
|
34
|
+
gem "rubocop", "1.60.2"
|
35
|
+
gem "rubocop-minitest", "0.34.5"
|
36
|
+
gem "rubocop-packaging", "0.5.2"
|
37
|
+
gem "rubocop-performance", "1.20.2"
|
34
38
|
gem "rubocop-rake", "= 0.6.0"
|
35
|
-
gem "rubocop-shopify", "2.
|
39
|
+
gem "rubocop-shopify", "2.14.0"
|
36
40
|
end
|
37
41
|
end
|
38
42
|
|
@@ -40,5 +44,5 @@ end
|
|
40
44
|
# `bundle config set --local without rdoc`
|
41
45
|
# Then re-run `bundle install`.
|
42
46
|
group :rdoc do
|
43
|
-
gem "rdoc", "6.
|
47
|
+
gem "rdoc", "6.6.2"
|
44
48
|
end
|
data/README.md
CHANGED
@@ -55,6 +55,7 @@ There are a few ways to ask exploratory questions:
|
|
55
55
|
|
56
56
|
- The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
|
57
57
|
- Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
|
58
|
+
- Open a discussion at https://github.com/sparklemotion/nokogiri/discussions
|
58
59
|
|
59
60
|
Please do not mail the maintainers at their personal addresses.
|
60
61
|
|
@@ -90,6 +91,8 @@ We bump `Major.Minor.Patch` versions following this guidance:
|
|
90
91
|
- Updating packaged libraries for non-security-related reasons.
|
91
92
|
- Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
|
92
93
|
- Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
|
94
|
+
- Removal of deprecated methods or parameters, after a generous transition period; usually when those methods or parameters are rarely-used or dangerous to the user. Essentially, removals that do not justify a major version bump.
|
95
|
+
|
93
96
|
|
94
97
|
`Patch`:
|
95
98
|
|
@@ -111,7 +114,7 @@ You can help sponsor the maintainers of this software through one of these organ
|
|
111
114
|
|
112
115
|
Requirements:
|
113
116
|
|
114
|
-
- Ruby >=
|
117
|
+
- Ruby >= 3.0
|
115
118
|
- JRuby >= 9.4.0.0
|
116
119
|
|
117
120
|
|
data/dependencies.yml
CHANGED
@@ -1,16 +1,17 @@
|
|
1
|
+
---
|
1
2
|
libxml2:
|
2
|
-
version: "2.
|
3
|
-
sha256: "
|
4
|
-
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.
|
3
|
+
version: "2.12.8"
|
4
|
+
sha256: "43ad877b018bc63deb2468d71f95219c2fac196876ef36d1bee51d226173ec93"
|
5
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.8.sha256sum
|
5
6
|
|
6
7
|
libxslt:
|
7
|
-
version: "1.1.
|
8
|
-
sha256: "
|
9
|
-
# sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.
|
8
|
+
version: "1.1.39"
|
9
|
+
sha256: "2a20ad621148339b0759c4d4e96719362dee64c9a096dbba625ba053846349f0"
|
10
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.39.sha256sum
|
10
11
|
|
11
12
|
zlib:
|
12
|
-
version: "1.
|
13
|
-
sha256: "
|
13
|
+
version: "1.3.1"
|
14
|
+
sha256: "9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23"
|
14
15
|
# SHA-256 hash provided on http://zlib.net/
|
15
16
|
|
16
17
|
libiconv:
|
data/ext/nokogiri/extconf.rb
CHANGED
@@ -15,7 +15,7 @@ PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."
|
|
15
15
|
REQUIRED_LIBXML_VERSION = "2.6.21"
|
16
16
|
RECOMMENDED_LIBXML_VERSION = "2.9.3"
|
17
17
|
|
18
|
-
REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.
|
18
|
+
REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.2" # keep this version in sync with the one in the gemspec
|
19
19
|
REQUIRED_PKG_CONFIG_VERSION = "~> 1.1"
|
20
20
|
|
21
21
|
# Keep track of what versions of what libraries we build against
|
@@ -229,7 +229,7 @@ def gnome_source
|
|
229
229
|
if ENV["NOKOGIRI_USE_CANONICAL_GNOME_SOURCE"]
|
230
230
|
"https://download.gnome.org"
|
231
231
|
else
|
232
|
-
"https://
|
232
|
+
"https://muug.ca/mirror/gnome" # old reliable
|
233
233
|
end
|
234
234
|
end
|
235
235
|
|
@@ -372,7 +372,7 @@ def try_link_iconv(using = nil)
|
|
372
372
|
preserving_globals do
|
373
373
|
yield if block_given?
|
374
374
|
|
375
|
-
try_link(<<~
|
375
|
+
try_link(<<~SRC, opt)
|
376
376
|
#include <stdlib.h>
|
377
377
|
#include <iconv.h>
|
378
378
|
int main(void)
|
@@ -443,7 +443,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
|
|
443
443
|
# We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
|
444
444
|
# prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
|
445
445
|
recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
|
446
|
-
recipe.host = recipe.host.gsub(
|
446
|
+
recipe.host = recipe.host.gsub("i386", "i686")
|
447
447
|
|
448
448
|
recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
|
449
449
|
recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
|
@@ -651,8 +651,7 @@ append_cppflags(ENV["CPPFLAGS"].split) unless ENV["CPPFLAGS"].nil?
|
|
651
651
|
append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil?
|
652
652
|
$LIBS = concat_flags($LIBS, ENV["LIBS"])
|
653
653
|
|
654
|
-
#
|
655
|
-
# errors/warnings. see #2302
|
654
|
+
# libgumbo uses C90/C99 features, see #2302
|
656
655
|
append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
|
657
656
|
|
658
657
|
# gumbo html5 serialization is slower with O3, let's make sure we use O2
|
@@ -695,14 +694,34 @@ append_cppflags(' "-Idummypath"') if windows?
|
|
695
694
|
|
696
695
|
if config_system_libraries?
|
697
696
|
message "Building nokogiri using system libraries.\n"
|
698
|
-
ensure_package_configuration(
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
697
|
+
ensure_package_configuration(
|
698
|
+
opt: "zlib",
|
699
|
+
pc: "zlib",
|
700
|
+
lib: "z",
|
701
|
+
headers: "zlib.h",
|
702
|
+
func: "gzdopen",
|
703
|
+
)
|
704
|
+
ensure_package_configuration(
|
705
|
+
opt: "xml2",
|
706
|
+
pc: "libxml-2.0",
|
707
|
+
lib: "xml2",
|
708
|
+
headers: "libxml/parser.h",
|
709
|
+
func: "xmlParseDoc",
|
710
|
+
)
|
711
|
+
ensure_package_configuration(
|
712
|
+
opt: "xslt",
|
713
|
+
pc: "libxslt",
|
714
|
+
lib: "xslt",
|
715
|
+
headers: "libxslt/xslt.h",
|
716
|
+
func: "xsltParseStylesheetDoc",
|
717
|
+
)
|
718
|
+
ensure_package_configuration(
|
719
|
+
opt: "exslt",
|
720
|
+
pc: "libexslt",
|
721
|
+
lib: "exslt",
|
722
|
+
headers: "libexslt/exslt.h",
|
723
|
+
func: "exsltFuncRegister",
|
724
|
+
)
|
706
725
|
|
707
726
|
have_libxml_headers?(REQUIRED_LIBXML_VERSION) ||
|
708
727
|
abort("ERROR: libxml2 version #{REQUIRED_LIBXML_VERSION} or later is required!")
|
@@ -785,8 +804,12 @@ else
|
|
785
804
|
end
|
786
805
|
|
787
806
|
unless nix?
|
788
|
-
libiconv_recipe = process_recipe(
|
789
|
-
|
807
|
+
libiconv_recipe = process_recipe(
|
808
|
+
"libiconv",
|
809
|
+
dependencies["libiconv"]["version"],
|
810
|
+
static_p,
|
811
|
+
cross_build_p,
|
812
|
+
) do |recipe|
|
790
813
|
recipe.files = [{
|
791
814
|
url: "https://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz",
|
792
815
|
sha256: dependencies["libiconv"]["sha256"],
|
@@ -824,15 +847,25 @@ else
|
|
824
847
|
if zlib_recipe
|
825
848
|
append_cppflags("-I#{zlib_recipe.path}/include")
|
826
849
|
$LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH
|
827
|
-
ensure_package_configuration(
|
828
|
-
|
850
|
+
ensure_package_configuration(
|
851
|
+
opt: "zlib",
|
852
|
+
pc: "zlib",
|
853
|
+
lib: "z",
|
854
|
+
headers: "zlib.h",
|
855
|
+
func: "gzdopen",
|
856
|
+
)
|
829
857
|
end
|
830
858
|
|
831
859
|
if libiconv_recipe
|
832
860
|
append_cppflags("-I#{libiconv_recipe.path}/include")
|
833
861
|
$LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH
|
834
|
-
ensure_package_configuration(
|
835
|
-
|
862
|
+
ensure_package_configuration(
|
863
|
+
opt: "iconv",
|
864
|
+
pc: "iconv",
|
865
|
+
lib: "iconv",
|
866
|
+
headers: "iconv.h",
|
867
|
+
func: "iconv_open",
|
868
|
+
)
|
836
869
|
end
|
837
870
|
|
838
871
|
libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"]["version"], static_p, cross_build_p) do |recipe|
|
@@ -848,8 +881,13 @@ else
|
|
848
881
|
recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxml2", "*.patch")].sort
|
849
882
|
end
|
850
883
|
|
884
|
+
cppflags = concat_flags(ENV["CPPFLAGS"])
|
851
885
|
cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g")
|
852
886
|
|
887
|
+
if cross_build_p
|
888
|
+
cppflags = concat_flags(cppflags, "-DNOKOGIRI_PRECOMPILED_LIBRARIES")
|
889
|
+
end
|
890
|
+
|
853
891
|
if zlib_recipe
|
854
892
|
recipe.configure_options << "--with-zlib=#{zlib_recipe.path}"
|
855
893
|
end
|
@@ -880,6 +918,8 @@ else
|
|
880
918
|
"--with-c14n",
|
881
919
|
"--with-debug",
|
882
920
|
"--with-threads",
|
921
|
+
"--without-tls", # see https://github.com/sparklemotion/nokogiri/issues/3031
|
922
|
+
"CPPFLAGS=#{cppflags}",
|
883
923
|
"CFLAGS=#{cflags}",
|
884
924
|
]
|
885
925
|
end
|
@@ -1023,7 +1063,7 @@ libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_b
|
|
1023
1063
|
|
1024
1064
|
env = { "CC" => gcc_cmd, "CFLAGS" => cflags }
|
1025
1065
|
if config_cross_build?
|
1026
|
-
if
|
1066
|
+
if host.include?("darwin")
|
1027
1067
|
env["AR"] = "#{host}-libtool"
|
1028
1068
|
env["ARFLAGS"] = "-o"
|
1029
1069
|
else
|
@@ -1047,7 +1087,6 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
|
|
1047
1087
|
have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
|
1048
1088
|
have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
|
1049
1089
|
have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
|
1050
|
-
have_func("rb_gc_location") # introduced in Ruby 2.7
|
1051
1090
|
have_func("rb_category_warning") # introduced in Ruby 3.0
|
1052
1091
|
|
1053
1092
|
other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
|
@@ -1057,12 +1096,16 @@ unless config_system_libraries?
|
|
1057
1096
|
if cross_build_p
|
1058
1097
|
# When precompiling native gems, copy packaged libraries' headers to ext/nokogiri/include
|
1059
1098
|
# These are packaged up by the cross-compiling callback in the ExtensionTask
|
1060
|
-
copy_packaged_libraries_headers(
|
1061
|
-
|
1099
|
+
copy_packaged_libraries_headers(
|
1100
|
+
to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"),
|
1101
|
+
from_recipes: [libxml2_recipe, libxslt_recipe],
|
1102
|
+
)
|
1062
1103
|
else
|
1063
1104
|
# When compiling during installation, install packaged libraries' header files into ext/nokogiri/include
|
1064
|
-
copy_packaged_libraries_headers(
|
1065
|
-
|
1105
|
+
copy_packaged_libraries_headers(
|
1106
|
+
to_path: "include",
|
1107
|
+
from_recipes: [libxml2_recipe, libxslt_recipe],
|
1108
|
+
)
|
1066
1109
|
$INSTALLFILES << ["include/**/*.h", "$(rubylibdir)"]
|
1067
1110
|
end
|
1068
1111
|
end
|
@@ -144,8 +144,7 @@ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE r
|
|
144
144
|
static VALUE
|
145
145
|
rb_html_document_type(VALUE self)
|
146
146
|
{
|
147
|
-
htmlDocPtr doc;
|
148
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
147
|
+
htmlDocPtr doc = noko_xml_document_unwrap(self);
|
149
148
|
return INT2NUM(doc->type);
|
150
149
|
}
|
151
150
|
|
@@ -1,5 +1,10 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
+
static const rb_data_type_t html4_element_description_type = {
|
4
|
+
.wrap_struct_name = "Nokogiri::HTML4::ElementDescription",
|
5
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
6
|
+
};
|
7
|
+
|
3
8
|
VALUE cNokogiriHtml4ElementDescription ;
|
4
9
|
|
5
10
|
/*
|
@@ -15,7 +20,7 @@ required_attributes(VALUE self)
|
|
15
20
|
VALUE list;
|
16
21
|
int i;
|
17
22
|
|
18
|
-
|
23
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
19
24
|
|
20
25
|
list = rb_ary_new();
|
21
26
|
|
@@ -41,7 +46,7 @@ deprecated_attributes(VALUE self)
|
|
41
46
|
VALUE list;
|
42
47
|
int i;
|
43
48
|
|
44
|
-
|
49
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
45
50
|
|
46
51
|
list = rb_ary_new();
|
47
52
|
|
@@ -67,7 +72,7 @@ optional_attributes(VALUE self)
|
|
67
72
|
VALUE list;
|
68
73
|
int i;
|
69
74
|
|
70
|
-
|
75
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
71
76
|
|
72
77
|
list = rb_ary_new();
|
73
78
|
|
@@ -90,7 +95,7 @@ static VALUE
|
|
90
95
|
default_sub_element(VALUE self)
|
91
96
|
{
|
92
97
|
const htmlElemDesc *description;
|
93
|
-
|
98
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
94
99
|
|
95
100
|
if (description->defaultsubelt) {
|
96
101
|
return NOKOGIRI_STR_NEW2(description->defaultsubelt);
|
@@ -112,7 +117,7 @@ sub_elements(VALUE self)
|
|
112
117
|
VALUE list;
|
113
118
|
int i;
|
114
119
|
|
115
|
-
|
120
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
116
121
|
|
117
122
|
list = rb_ary_new();
|
118
123
|
|
@@ -135,7 +140,7 @@ static VALUE
|
|
135
140
|
description(VALUE self)
|
136
141
|
{
|
137
142
|
const htmlElemDesc *description;
|
138
|
-
|
143
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
139
144
|
|
140
145
|
return NOKOGIRI_STR_NEW2(description->desc);
|
141
146
|
}
|
@@ -150,7 +155,7 @@ static VALUE
|
|
150
155
|
inline_eh(VALUE self)
|
151
156
|
{
|
152
157
|
const htmlElemDesc *description;
|
153
|
-
|
158
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
154
159
|
|
155
160
|
if (description->isinline) { return Qtrue; }
|
156
161
|
return Qfalse;
|
@@ -166,7 +171,7 @@ static VALUE
|
|
166
171
|
deprecated_eh(VALUE self)
|
167
172
|
{
|
168
173
|
const htmlElemDesc *description;
|
169
|
-
|
174
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
170
175
|
|
171
176
|
if (description->depr) { return Qtrue; }
|
172
177
|
return Qfalse;
|
@@ -182,7 +187,7 @@ static VALUE
|
|
182
187
|
empty_eh(VALUE self)
|
183
188
|
{
|
184
189
|
const htmlElemDesc *description;
|
185
|
-
|
190
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
186
191
|
|
187
192
|
if (description->empty) { return Qtrue; }
|
188
193
|
return Qfalse;
|
@@ -198,7 +203,7 @@ static VALUE
|
|
198
203
|
save_end_tag_eh(VALUE self)
|
199
204
|
{
|
200
205
|
const htmlElemDesc *description;
|
201
|
-
|
206
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
202
207
|
|
203
208
|
if (description->saveEndTag) { return Qtrue; }
|
204
209
|
return Qfalse;
|
@@ -214,7 +219,7 @@ static VALUE
|
|
214
219
|
implied_end_tag_eh(VALUE self)
|
215
220
|
{
|
216
221
|
const htmlElemDesc *description;
|
217
|
-
|
222
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
218
223
|
|
219
224
|
if (description->endTag) { return Qtrue; }
|
220
225
|
return Qfalse;
|
@@ -230,7 +235,7 @@ static VALUE
|
|
230
235
|
implied_start_tag_eh(VALUE self)
|
231
236
|
{
|
232
237
|
const htmlElemDesc *description;
|
233
|
-
|
238
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
234
239
|
|
235
240
|
if (description->startTag) { return Qtrue; }
|
236
241
|
return Qfalse;
|
@@ -246,7 +251,7 @@ static VALUE
|
|
246
251
|
name(VALUE self)
|
247
252
|
{
|
248
253
|
const htmlElemDesc *description;
|
249
|
-
|
254
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
|
250
255
|
|
251
256
|
if (NULL == description->name) { return Qnil; }
|
252
257
|
return NOKOGIRI_STR_NEW2(description->name);
|
@@ -266,7 +271,7 @@ get_description(VALUE klass, VALUE tag_name)
|
|
266
271
|
);
|
267
272
|
|
268
273
|
if (NULL == description) { return Qnil; }
|
269
|
-
return
|
274
|
+
return TypedData_Wrap_Struct(klass, &html4_element_description_type, DISCARD_CONST_QUAL(void *, description));
|
270
275
|
}
|
271
276
|
|
272
277
|
void
|
@@ -2,13 +2,6 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriHtml4SaxParserContext ;
|
4
4
|
|
5
|
-
static void
|
6
|
-
deallocate(xmlParserCtxtPtr ctxt)
|
7
|
-
{
|
8
|
-
ctxt->sax = NULL;
|
9
|
-
htmlFreeParserCtxt(ctxt);
|
10
|
-
}
|
11
|
-
|
12
5
|
static VALUE
|
13
6
|
parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
14
7
|
{
|
@@ -38,7 +31,7 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
|
38
31
|
}
|
39
32
|
}
|
40
33
|
|
41
|
-
return
|
34
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
42
35
|
}
|
43
36
|
|
44
37
|
static VALUE
|
@@ -48,7 +41,13 @@ parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
|
48
41
|
StringValueCStr(filename),
|
49
42
|
StringValueCStr(encoding)
|
50
43
|
);
|
51
|
-
|
44
|
+
|
45
|
+
if (ctxt->sax) {
|
46
|
+
xmlFree(ctxt->sax);
|
47
|
+
ctxt->sax = NULL;
|
48
|
+
}
|
49
|
+
|
50
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
52
51
|
}
|
53
52
|
|
54
53
|
static VALUE
|
@@ -82,13 +81,8 @@ parse_with(VALUE self, VALUE sax_handler)
|
|
82
81
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
83
82
|
}
|
84
83
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
/* Free the sax handler since we'll assign our own */
|
89
|
-
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
|
90
|
-
xmlFree(ctxt->sax);
|
91
|
-
}
|
84
|
+
ctxt = noko_xml_sax_parser_context_unwrap(self);
|
85
|
+
sax = noko_sax_handler_unwrap(sax_handler);
|
92
86
|
|
93
87
|
ctxt->sax = sax;
|
94
88
|
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
@@ -17,7 +17,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
17
17
|
int status = 0;
|
18
18
|
libxmlStructuredErrorHandlerState handler_state;
|
19
19
|
|
20
|
-
|
20
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
21
21
|
|
22
22
|
if (Qnil != _chunk) {
|
23
23
|
chunk = StringValuePtr(_chunk);
|
@@ -32,7 +32,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
32
32
|
|
33
33
|
if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
|
34
34
|
// TODO: there appear to be no tests for this block
|
35
|
-
|
35
|
+
xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
|
36
36
|
Nokogiri_error_raise(NULL, e);
|
37
37
|
}
|
38
38
|
|
@@ -54,7 +54,7 @@ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
|
|
54
54
|
htmlParserCtxtPtr ctx;
|
55
55
|
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
56
56
|
|
57
|
-
|
57
|
+
sax = noko_sax_handler_unwrap(_xml_sax);
|
58
58
|
|
59
59
|
if (_filename != Qnil) { filename = StringValueCStr(_filename); }
|
60
60
|
|
data/ext/nokogiri/nokogiri.c
CHANGED
@@ -137,6 +137,48 @@ noko_io_close(void *io)
|
|
137
137
|
}
|
138
138
|
|
139
139
|
|
140
|
+
#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
141
|
+
# define NOKOGIRI_WINDOWS_DLLS 1
|
142
|
+
#else
|
143
|
+
# define NOKOGIRI_WINDOWS_DLLS 0
|
144
|
+
#endif
|
145
|
+
|
146
|
+
//
|
147
|
+
// | dlls || true | false |
|
148
|
+
// | nlmm || | |
|
149
|
+
// |-----------++---------+---------|
|
150
|
+
// | NULL || default | ruby |
|
151
|
+
// | "random" || default | ruby |
|
152
|
+
// | "ruby" || ruby | ruby |
|
153
|
+
// | "default" || default | default |
|
154
|
+
//
|
155
|
+
// We choose *not* to use Ruby's memory management functions with windows DLLs because of this
|
156
|
+
// issue: https://github.com/sparklemotion/nokogiri/issues/2241
|
157
|
+
//
|
158
|
+
static void
|
159
|
+
set_libxml_memory_management(void)
|
160
|
+
{
|
161
|
+
const char *nlmm = getenv("NOKOGIRI_LIBXML_MEMORY_MANAGEMENT");
|
162
|
+
if (nlmm) {
|
163
|
+
if (strcmp(nlmm, "default") == 0) {
|
164
|
+
goto libxml_uses_default_memory_management;
|
165
|
+
} else if (strcmp(nlmm, "ruby") == 0) {
|
166
|
+
goto libxml_uses_ruby_memory_management;
|
167
|
+
}
|
168
|
+
}
|
169
|
+
if (NOKOGIRI_WINDOWS_DLLS) {
|
170
|
+
libxml_uses_default_memory_management:
|
171
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
|
172
|
+
return;
|
173
|
+
} else {
|
174
|
+
libxml_uses_ruby_memory_management:
|
175
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
|
176
|
+
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
177
|
+
return;
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
181
|
+
|
140
182
|
void
|
141
183
|
Init_nokogiri(void)
|
142
184
|
{
|
@@ -150,6 +192,10 @@ Init_nokogiri(void)
|
|
150
192
|
mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
|
151
193
|
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
152
194
|
|
195
|
+
set_libxml_memory_management(); /* must be before any function calls that might invoke xmlInitParser() */
|
196
|
+
xmlInitParser();
|
197
|
+
exsltRegisterAll();
|
198
|
+
|
153
199
|
rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
|
154
200
|
rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
|
155
201
|
|
@@ -182,30 +228,6 @@ Init_nokogiri(void)
|
|
182
228
|
rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
|
183
229
|
#endif
|
184
230
|
|
185
|
-
#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
186
|
-
/*
|
187
|
-
* We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
|
188
|
-
* issue in libxml 2.9.12:
|
189
|
-
*
|
190
|
-
* https://github.com/sparklemotion/nokogiri/issues/2241
|
191
|
-
*
|
192
|
-
* If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
|
193
|
-
* this config only for the specific libxml2 versions 2.9.12.
|
194
|
-
*
|
195
|
-
* Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
|
196
|
-
* default memory management functions (recall that this config was introduced to reduce memory
|
197
|
-
* bloat and allow Ruby to GC more often); but we should *really* test with production workloads
|
198
|
-
* before making that kind of a potentially-invasive change.
|
199
|
-
*/
|
200
|
-
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
|
201
|
-
#else
|
202
|
-
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
|
203
|
-
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
204
|
-
#endif
|
205
|
-
|
206
|
-
xmlInitParser();
|
207
|
-
exsltRegisterAll();
|
208
|
-
|
209
231
|
if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
|
210
232
|
rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
|
211
233
|
} else {
|