nokogiri 1.14.2 → 1.16.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +19 -15
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +69 -26
  6. data/ext/nokogiri/html4_document.c +1 -2
  7. data/ext/nokogiri/html4_element_description.c +19 -14
  8. data/ext/nokogiri/html4_sax_parser_context.c +10 -16
  9. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  10. data/ext/nokogiri/nokogiri.c +46 -24
  11. data/ext/nokogiri/nokogiri.h +23 -5
  12. data/ext/nokogiri/test_global_handlers.c +1 -1
  13. data/ext/nokogiri/xml_attr.c +1 -1
  14. data/ext/nokogiri/xml_cdata.c +30 -17
  15. data/ext/nokogiri/xml_comment.c +1 -1
  16. data/ext/nokogiri/xml_document.c +113 -25
  17. data/ext/nokogiri/xml_document_fragment.c +1 -1
  18. data/ext/nokogiri/xml_dtd.c +1 -1
  19. data/ext/nokogiri/xml_element_content.c +32 -29
  20. data/ext/nokogiri/xml_element_decl.c +5 -5
  21. data/ext/nokogiri/xml_encoding_handler.c +12 -4
  22. data/ext/nokogiri/xml_entity_reference.c +1 -1
  23. data/ext/nokogiri/xml_namespace.c +11 -16
  24. data/ext/nokogiri/xml_node.c +13 -16
  25. data/ext/nokogiri/xml_node_set.c +125 -105
  26. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  27. data/ext/nokogiri/xml_reader.c +61 -74
  28. data/ext/nokogiri/xml_relax_ng.c +66 -79
  29. data/ext/nokogiri/xml_sax_parser.c +24 -5
  30. data/ext/nokogiri/xml_sax_parser_context.c +50 -25
  31. data/ext/nokogiri/xml_sax_push_parser.c +30 -9
  32. data/ext/nokogiri/xml_schema.c +94 -115
  33. data/ext/nokogiri/xml_syntax_error.c +3 -3
  34. data/ext/nokogiri/xml_text.c +26 -13
  35. data/ext/nokogiri/xml_xpath_context.c +153 -83
  36. data/ext/nokogiri/xslt_stylesheet.c +111 -53
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +8 -4
  39. data/gumbo-parser/src/foreign_attrs.c +13 -14
  40. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  41. data/gumbo-parser/src/parser.c +21 -5
  42. data/gumbo-parser/src/tokenizer.c +1 -0
  43. data/lib/nokogiri/css/parser_extras.rb +1 -1
  44. data/lib/nokogiri/css/xpath_visitor.rb +3 -23
  45. data/lib/nokogiri/extension.rb +1 -1
  46. data/lib/nokogiri/html4/document.rb +1 -1
  47. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  48. data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
  49. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  50. data/lib/nokogiri/html5/document_fragment.rb +1 -1
  51. data/lib/nokogiri/html5/node.rb +5 -0
  52. data/lib/nokogiri/html5.rb +0 -63
  53. data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
  54. data/lib/nokogiri/version/constant.rb +1 -1
  55. data/lib/nokogiri/version/info.rb +6 -5
  56. data/lib/nokogiri/xml/attr.rb +2 -2
  57. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  58. data/lib/nokogiri/xml/document.rb +4 -5
  59. data/lib/nokogiri/xml/document_fragment.rb +3 -3
  60. data/lib/nokogiri/xml/element_content.rb +10 -2
  61. data/lib/nokogiri/xml/element_decl.rb +4 -2
  62. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  63. data/lib/nokogiri/xml/namespace.rb +1 -2
  64. data/lib/nokogiri/xml/node/save_options.rb +8 -0
  65. data/lib/nokogiri/xml/node.rb +53 -37
  66. data/lib/nokogiri/xml/node_set.rb +3 -3
  67. data/lib/nokogiri/xml/pp/node.rb +23 -12
  68. data/lib/nokogiri/xml/reader.rb +10 -9
  69. data/lib/nokogiri/xml/sax/document.rb +1 -1
  70. data/lib/nokogiri/xml/searchable.rb +21 -13
  71. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  72. data/lib/nokogiri/xml.rb +1 -1
  73. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  74. data/lib/nokogiri/xslt.rb +74 -4
  75. data/lib/nokogiri.rb +13 -5
  76. data/lib/xsd/xmlparser/nokogiri.rb +1 -1
  77. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  78. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  79. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  80. data/ports/archives/libxml2-2.12.7.tar.xz +0 -0
  81. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  82. metadata +16 -12
  83. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  84. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  85. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b926bf2ba1c28516c2dfc541d4b82e0f92ef3c585d6843334d7f1d44fe33abcb
4
- data.tar.gz: ec0ff5c75bc44ed7fe85761cfa00b28e92a03afdcd525356d31bd74060a2dbe5
3
+ metadata.gz: f268b2edaa2782a3e9a64e671e36d88149ef5dc23fcf3967a8a54c4475ed7b85
4
+ data.tar.gz: b49fbf8708f4733e34c211935637ad5999d3950b89aca2da71899c71eaa14901
5
5
  SHA512:
6
- metadata.gz: 11d8784c27eb523c7cba2c67efd1321c10253ce9624217b1b0383d246354dd8328466474c7968a7844b8ca01ca8e450e7d35bfee56d917b381949e721b435ce4
7
- data.tar.gz: '08db49457bb62572e68f5ec8b1eb29da18a9a632c6ef6ebb4ac9e193a29c0beac253d8aa2ed96a35513e70eb7a495344742d70e6b8ad81126b52e07ed400569d'
6
+ metadata.gz: e2f096bf7e99c453450c2a9d78c7274a3472d29ea8e8aeeff2dfb6976b5b009a8bfd5e673c7eb1a2575d820df26a830ce4e4043d33fb4c0239bf8098bdb98188
7
+ data.tar.gz: aa7658f376b0278150550d5426ddbe11469be8624d6fc732cf035d0ed00736e177e2848182c9077a9cc724ae6efe92666e8f3664b71282c4e74e79ad48c51373
data/Gemfile CHANGED
@@ -5,34 +5,38 @@ source "https://rubygems.org"
5
5
  gemspec
6
6
 
7
7
  group :development do
8
+ # ruby 3.4.0-dev removed some gems from the default set
9
+ #
10
+ # TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
11
+ # transitively.
12
+ gem "mutex_m"
13
+
8
14
  # bootstrapping
9
15
  gem "bundler", "~> 2.3"
10
- gem "rake", "= 13.0.6"
16
+ gem "rake", "13.1.0"
11
17
 
12
18
  # building extensions
13
- gem "rake-compiler", "= 1.2.1"
14
- gem "rake-compiler-dock", "= 1.3.0"
15
-
16
- # documentation
17
- gem "hoe-markdown", "= 1.4.0"
19
+ gem "rake-compiler", "1.2.6"
20
+ gem "rake-compiler-dock", "1.4.0"
18
21
 
19
22
  # parser generator
20
23
  gem "rexical", "= 1.0.7"
21
24
 
22
25
  # tests
23
- gem "minitest", "5.17.0"
24
- gem "minitest-reporters", "= 1.5.0"
25
- gem "ruby_memcheck", "1.2.0" unless RUBY_PLATFORM == "java"
26
- gem "simplecov", "= 0.21.2"
26
+ gem "minitest", "5.21.2"
27
+ gem "minitest-parallel_fork", "2.0.0"
28
+ gem "ruby_memcheck", "2.3.0"
27
29
  gem "rubyzip", "~> 2.3.2"
30
+ gem "simplecov", "= 0.21.2"
28
31
 
29
32
  # rubocop
30
33
  if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
31
- gem "rubocop", "1.44.1"
32
- gem "rubocop-minitest", "0.27.0"
33
- gem "rubocop-performance", "1.15.2"
34
+ gem "rubocop", "1.60.2"
35
+ gem "rubocop-minitest", "0.34.5"
36
+ gem "rubocop-packaging", "0.5.2"
37
+ gem "rubocop-performance", "1.20.2"
34
38
  gem "rubocop-rake", "= 0.6.0"
35
- gem "rubocop-shopify", "2.10.1"
39
+ gem "rubocop-shopify", "2.14.0"
36
40
  end
37
41
  end
38
42
 
@@ -40,5 +44,5 @@ end
40
44
  # `bundle config set --local without rdoc`
41
45
  # Then re-run `bundle install`.
42
46
  group :rdoc do
43
- gem "rdoc", "6.5.0"
47
+ gem "rdoc", "6.6.2"
44
48
  end
data/README.md CHANGED
@@ -55,6 +55,7 @@ There are a few ways to ask exploratory questions:
55
55
 
56
56
  - The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
57
57
  - Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
58
+ - Open a discussion at https://github.com/sparklemotion/nokogiri/discussions
58
59
 
59
60
  Please do not mail the maintainers at their personal addresses.
60
61
 
@@ -90,6 +91,8 @@ We bump `Major.Minor.Patch` versions following this guidance:
90
91
  - Updating packaged libraries for non-security-related reasons.
91
92
  - Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
92
93
  - Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
94
+ - Removal of deprecated methods or parameters, after a generous transition period; usually when those methods or parameters are rarely-used or dangerous to the user. Essentially, removals that do not justify a major version bump.
95
+
93
96
 
94
97
  `Patch`:
95
98
 
@@ -111,7 +114,7 @@ You can help sponsor the maintainers of this software through one of these organ
111
114
 
112
115
  Requirements:
113
116
 
114
- - Ruby >= 2.7
117
+ - Ruby >= 3.0
115
118
  - JRuby >= 9.4.0.0
116
119
 
117
120
 
data/dependencies.yml CHANGED
@@ -1,16 +1,17 @@
1
+ ---
1
2
  libxml2:
2
- version: "2.10.3"
3
- sha256: "5d2cc3d78bec3dbe212a9d7fa629ada25a7da928af432c93060ff5c17ee28a9c"
4
- # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.10/libxml2-2.10.3.sha256sum
3
+ version: "2.12.7"
4
+ sha256: "24ae78ff1363a973e6d8beba941a7945da2ac056e19b53956aeb6927fd6cfb56"
5
+ # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.7.sha256sum
5
6
 
6
7
  libxslt:
7
- version: "1.1.37"
8
- sha256: "3a4b27dc8027ccd6146725950336f1ec520928f320f144eb5fa7990ae6123ab4"
9
- # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.37.sha256sum
8
+ version: "1.1.39"
9
+ sha256: "2a20ad621148339b0759c4d4e96719362dee64c9a096dbba625ba053846349f0"
10
+ # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.39.sha256sum
10
11
 
11
12
  zlib:
12
- version: "1.2.13"
13
- sha256: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30"
13
+ version: "1.3.1"
14
+ sha256: "9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23"
14
15
  # SHA-256 hash provided on http://zlib.net/
15
16
 
16
17
  libiconv:
@@ -15,7 +15,7 @@ PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."
15
15
  REQUIRED_LIBXML_VERSION = "2.6.21"
16
16
  RECOMMENDED_LIBXML_VERSION = "2.9.3"
17
17
 
18
- REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.0" # keep this version in sync with the one in the gemspec
18
+ REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.2" # keep this version in sync with the one in the gemspec
19
19
  REQUIRED_PKG_CONFIG_VERSION = "~> 1.1"
20
20
 
21
21
  # Keep track of what versions of what libraries we build against
@@ -229,7 +229,7 @@ def gnome_source
229
229
  if ENV["NOKOGIRI_USE_CANONICAL_GNOME_SOURCE"]
230
230
  "https://download.gnome.org"
231
231
  else
232
- "https://mirror.csclub.uwaterloo.ca/gnome" # old reliable
232
+ "https://muug.ca/mirror/gnome" # old reliable
233
233
  end
234
234
  end
235
235
 
@@ -372,7 +372,7 @@ def try_link_iconv(using = nil)
372
372
  preserving_globals do
373
373
  yield if block_given?
374
374
 
375
- try_link(<<~'SRC', opt)
375
+ try_link(<<~SRC, opt)
376
376
  #include <stdlib.h>
377
377
  #include <iconv.h>
378
378
  int main(void)
@@ -443,7 +443,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
443
443
  # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
444
444
  # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
445
445
  recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
446
- recipe.host = recipe.host.gsub(/i386/, "i686")
446
+ recipe.host = recipe.host.gsub("i386", "i686")
447
447
 
448
448
  recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
449
449
  recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
@@ -651,8 +651,7 @@ append_cppflags(ENV["CPPFLAGS"].split) unless ENV["CPPFLAGS"].nil?
651
651
  append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil?
652
652
  $LIBS = concat_flags($LIBS, ENV["LIBS"])
653
653
 
654
- # nokogumbo code uses C90/C99 features, let's make sure older compilers won't give
655
- # errors/warnings. see #2302
654
+ # libgumbo uses C90/C99 features, see #2302
656
655
  append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
657
656
 
658
657
  # gumbo html5 serialization is slower with O3, let's make sure we use O2
@@ -695,14 +694,34 @@ append_cppflags(' "-Idummypath"') if windows?
695
694
 
696
695
  if config_system_libraries?
697
696
  message "Building nokogiri using system libraries.\n"
698
- ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z",
699
- headers: "zlib.h", func: "gzdopen")
700
- ensure_package_configuration(opt: "xml2", pc: "libxml-2.0", lib: "xml2",
701
- headers: "libxml/parser.h", func: "xmlParseDoc")
702
- ensure_package_configuration(opt: "xslt", pc: "libxslt", lib: "xslt",
703
- headers: "libxslt/xslt.h", func: "xsltParseStylesheetDoc")
704
- ensure_package_configuration(opt: "exslt", pc: "libexslt", lib: "exslt",
705
- headers: "libexslt/exslt.h", func: "exsltFuncRegister")
697
+ ensure_package_configuration(
698
+ opt: "zlib",
699
+ pc: "zlib",
700
+ lib: "z",
701
+ headers: "zlib.h",
702
+ func: "gzdopen",
703
+ )
704
+ ensure_package_configuration(
705
+ opt: "xml2",
706
+ pc: "libxml-2.0",
707
+ lib: "xml2",
708
+ headers: "libxml/parser.h",
709
+ func: "xmlParseDoc",
710
+ )
711
+ ensure_package_configuration(
712
+ opt: "xslt",
713
+ pc: "libxslt",
714
+ lib: "xslt",
715
+ headers: "libxslt/xslt.h",
716
+ func: "xsltParseStylesheetDoc",
717
+ )
718
+ ensure_package_configuration(
719
+ opt: "exslt",
720
+ pc: "libexslt",
721
+ lib: "exslt",
722
+ headers: "libexslt/exslt.h",
723
+ func: "exsltFuncRegister",
724
+ )
706
725
 
707
726
  have_libxml_headers?(REQUIRED_LIBXML_VERSION) ||
708
727
  abort("ERROR: libxml2 version #{REQUIRED_LIBXML_VERSION} or later is required!")
@@ -785,8 +804,12 @@ else
785
804
  end
786
805
 
787
806
  unless nix?
788
- libiconv_recipe = process_recipe("libiconv", dependencies["libiconv"]["version"], static_p,
789
- cross_build_p) do |recipe|
807
+ libiconv_recipe = process_recipe(
808
+ "libiconv",
809
+ dependencies["libiconv"]["version"],
810
+ static_p,
811
+ cross_build_p,
812
+ ) do |recipe|
790
813
  recipe.files = [{
791
814
  url: "https://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz",
792
815
  sha256: dependencies["libiconv"]["sha256"],
@@ -824,15 +847,25 @@ else
824
847
  if zlib_recipe
825
848
  append_cppflags("-I#{zlib_recipe.path}/include")
826
849
  $LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH
827
- ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z",
828
- headers: "zlib.h", func: "gzdopen")
850
+ ensure_package_configuration(
851
+ opt: "zlib",
852
+ pc: "zlib",
853
+ lib: "z",
854
+ headers: "zlib.h",
855
+ func: "gzdopen",
856
+ )
829
857
  end
830
858
 
831
859
  if libiconv_recipe
832
860
  append_cppflags("-I#{libiconv_recipe.path}/include")
833
861
  $LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH
834
- ensure_package_configuration(opt: "iconv", pc: "iconv", lib: "iconv",
835
- headers: "iconv.h", func: "iconv_open")
862
+ ensure_package_configuration(
863
+ opt: "iconv",
864
+ pc: "iconv",
865
+ lib: "iconv",
866
+ headers: "iconv.h",
867
+ func: "iconv_open",
868
+ )
836
869
  end
837
870
 
838
871
  libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"]["version"], static_p, cross_build_p) do |recipe|
@@ -848,8 +881,13 @@ else
848
881
  recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxml2", "*.patch")].sort
849
882
  end
850
883
 
884
+ cppflags = concat_flags(ENV["CPPFLAGS"])
851
885
  cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g")
852
886
 
887
+ if cross_build_p
888
+ cppflags = concat_flags(cppflags, "-DNOKOGIRI_PRECOMPILED_LIBRARIES")
889
+ end
890
+
853
891
  if zlib_recipe
854
892
  recipe.configure_options << "--with-zlib=#{zlib_recipe.path}"
855
893
  end
@@ -880,6 +918,8 @@ else
880
918
  "--with-c14n",
881
919
  "--with-debug",
882
920
  "--with-threads",
921
+ "--without-tls", # see https://github.com/sparklemotion/nokogiri/issues/3031
922
+ "CPPFLAGS=#{cppflags}",
883
923
  "CFLAGS=#{cflags}",
884
924
  ]
885
925
  end
@@ -1023,7 +1063,7 @@ libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_b
1023
1063
 
1024
1064
  env = { "CC" => gcc_cmd, "CFLAGS" => cflags }
1025
1065
  if config_cross_build?
1026
- if /darwin/.match?(host)
1066
+ if host.include?("darwin")
1027
1067
  env["AR"] = "#{host}-libtool"
1028
1068
  env["ARFLAGS"] = "-o"
1029
1069
  else
@@ -1047,7 +1087,6 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
1047
1087
  have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
1048
1088
  have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
1049
1089
  have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
1050
- have_func("rb_gc_location") # introduced in Ruby 2.7
1051
1090
  have_func("rb_category_warning") # introduced in Ruby 3.0
1052
1091
 
1053
1092
  other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
@@ -1057,12 +1096,16 @@ unless config_system_libraries?
1057
1096
  if cross_build_p
1058
1097
  # When precompiling native gems, copy packaged libraries' headers to ext/nokogiri/include
1059
1098
  # These are packaged up by the cross-compiling callback in the ExtensionTask
1060
- copy_packaged_libraries_headers(to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"),
1061
- from_recipes: [libxml2_recipe, libxslt_recipe])
1099
+ copy_packaged_libraries_headers(
1100
+ to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"),
1101
+ from_recipes: [libxml2_recipe, libxslt_recipe],
1102
+ )
1062
1103
  else
1063
1104
  # When compiling during installation, install packaged libraries' header files into ext/nokogiri/include
1064
- copy_packaged_libraries_headers(to_path: "include",
1065
- from_recipes: [libxml2_recipe, libxslt_recipe])
1105
+ copy_packaged_libraries_headers(
1106
+ to_path: "include",
1107
+ from_recipes: [libxml2_recipe, libxslt_recipe],
1108
+ )
1066
1109
  $INSTALLFILES << ["include/**/*.h", "$(rubylibdir)"]
1067
1110
  end
1068
1111
  end
@@ -144,8 +144,7 @@ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE r
144
144
  static VALUE
145
145
  rb_html_document_type(VALUE self)
146
146
  {
147
- htmlDocPtr doc;
148
- Data_Get_Struct(self, xmlDoc, doc);
147
+ htmlDocPtr doc = noko_xml_document_unwrap(self);
149
148
  return INT2NUM(doc->type);
150
149
  }
151
150
 
@@ -1,5 +1,10 @@
1
1
  #include <nokogiri.h>
2
2
 
3
+ static const rb_data_type_t html4_element_description_type = {
4
+ .wrap_struct_name = "Nokogiri::HTML4::ElementDescription",
5
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
6
+ };
7
+
3
8
  VALUE cNokogiriHtml4ElementDescription ;
4
9
 
5
10
  /*
@@ -15,7 +20,7 @@ required_attributes(VALUE self)
15
20
  VALUE list;
16
21
  int i;
17
22
 
18
- Data_Get_Struct(self, htmlElemDesc, description);
23
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
19
24
 
20
25
  list = rb_ary_new();
21
26
 
@@ -41,7 +46,7 @@ deprecated_attributes(VALUE self)
41
46
  VALUE list;
42
47
  int i;
43
48
 
44
- Data_Get_Struct(self, htmlElemDesc, description);
49
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
45
50
 
46
51
  list = rb_ary_new();
47
52
 
@@ -67,7 +72,7 @@ optional_attributes(VALUE self)
67
72
  VALUE list;
68
73
  int i;
69
74
 
70
- Data_Get_Struct(self, htmlElemDesc, description);
75
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
71
76
 
72
77
  list = rb_ary_new();
73
78
 
@@ -90,7 +95,7 @@ static VALUE
90
95
  default_sub_element(VALUE self)
91
96
  {
92
97
  const htmlElemDesc *description;
93
- Data_Get_Struct(self, htmlElemDesc, description);
98
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
94
99
 
95
100
  if (description->defaultsubelt) {
96
101
  return NOKOGIRI_STR_NEW2(description->defaultsubelt);
@@ -112,7 +117,7 @@ sub_elements(VALUE self)
112
117
  VALUE list;
113
118
  int i;
114
119
 
115
- Data_Get_Struct(self, htmlElemDesc, description);
120
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
116
121
 
117
122
  list = rb_ary_new();
118
123
 
@@ -135,7 +140,7 @@ static VALUE
135
140
  description(VALUE self)
136
141
  {
137
142
  const htmlElemDesc *description;
138
- Data_Get_Struct(self, htmlElemDesc, description);
143
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
139
144
 
140
145
  return NOKOGIRI_STR_NEW2(description->desc);
141
146
  }
@@ -150,7 +155,7 @@ static VALUE
150
155
  inline_eh(VALUE self)
151
156
  {
152
157
  const htmlElemDesc *description;
153
- Data_Get_Struct(self, htmlElemDesc, description);
158
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
154
159
 
155
160
  if (description->isinline) { return Qtrue; }
156
161
  return Qfalse;
@@ -166,7 +171,7 @@ static VALUE
166
171
  deprecated_eh(VALUE self)
167
172
  {
168
173
  const htmlElemDesc *description;
169
- Data_Get_Struct(self, htmlElemDesc, description);
174
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
170
175
 
171
176
  if (description->depr) { return Qtrue; }
172
177
  return Qfalse;
@@ -182,7 +187,7 @@ static VALUE
182
187
  empty_eh(VALUE self)
183
188
  {
184
189
  const htmlElemDesc *description;
185
- Data_Get_Struct(self, htmlElemDesc, description);
190
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
186
191
 
187
192
  if (description->empty) { return Qtrue; }
188
193
  return Qfalse;
@@ -198,7 +203,7 @@ static VALUE
198
203
  save_end_tag_eh(VALUE self)
199
204
  {
200
205
  const htmlElemDesc *description;
201
- Data_Get_Struct(self, htmlElemDesc, description);
206
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
202
207
 
203
208
  if (description->saveEndTag) { return Qtrue; }
204
209
  return Qfalse;
@@ -214,7 +219,7 @@ static VALUE
214
219
  implied_end_tag_eh(VALUE self)
215
220
  {
216
221
  const htmlElemDesc *description;
217
- Data_Get_Struct(self, htmlElemDesc, description);
222
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
218
223
 
219
224
  if (description->endTag) { return Qtrue; }
220
225
  return Qfalse;
@@ -230,7 +235,7 @@ static VALUE
230
235
  implied_start_tag_eh(VALUE self)
231
236
  {
232
237
  const htmlElemDesc *description;
233
- Data_Get_Struct(self, htmlElemDesc, description);
238
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
234
239
 
235
240
  if (description->startTag) { return Qtrue; }
236
241
  return Qfalse;
@@ -246,7 +251,7 @@ static VALUE
246
251
  name(VALUE self)
247
252
  {
248
253
  const htmlElemDesc *description;
249
- Data_Get_Struct(self, htmlElemDesc, description);
254
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
250
255
 
251
256
  if (NULL == description->name) { return Qnil; }
252
257
  return NOKOGIRI_STR_NEW2(description->name);
@@ -266,7 +271,7 @@ get_description(VALUE klass, VALUE tag_name)
266
271
  );
267
272
 
268
273
  if (NULL == description) { return Qnil; }
269
- return Data_Wrap_Struct(klass, 0, 0, DISCARD_CONST_QUAL(void *, description));
274
+ return TypedData_Wrap_Struct(klass, &html4_element_description_type, DISCARD_CONST_QUAL(void *, description));
270
275
  }
271
276
 
272
277
  void
@@ -2,13 +2,6 @@
2
2
 
3
3
  VALUE cNokogiriHtml4SaxParserContext ;
4
4
 
5
- static void
6
- deallocate(xmlParserCtxtPtr ctxt)
7
- {
8
- ctxt->sax = NULL;
9
- htmlFreeParserCtxt(ctxt);
10
- }
11
-
12
5
  static VALUE
13
6
  parse_memory(VALUE klass, VALUE data, VALUE encoding)
14
7
  {
@@ -38,7 +31,7 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
38
31
  }
39
32
  }
40
33
 
41
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
34
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
42
35
  }
43
36
 
44
37
  static VALUE
@@ -48,7 +41,13 @@ parse_file(VALUE klass, VALUE filename, VALUE encoding)
48
41
  StringValueCStr(filename),
49
42
  StringValueCStr(encoding)
50
43
  );
51
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
44
+
45
+ if (ctxt->sax) {
46
+ xmlFree(ctxt->sax);
47
+ ctxt->sax = NULL;
48
+ }
49
+
50
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
52
51
  }
53
52
 
54
53
  static VALUE
@@ -82,13 +81,8 @@ parse_with(VALUE self, VALUE sax_handler)
82
81
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
83
82
  }
84
83
 
85
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
86
- Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
87
-
88
- /* Free the sax handler since we'll assign our own */
89
- if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
90
- xmlFree(ctxt->sax);
91
- }
84
+ ctxt = noko_xml_sax_parser_context_unwrap(self);
85
+ sax = noko_sax_handler_unwrap(sax_handler);
92
86
 
93
87
  ctxt->sax = sax;
94
88
  ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
@@ -17,7 +17,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
17
17
  int status = 0;
18
18
  libxmlStructuredErrorHandlerState handler_state;
19
19
 
20
- Data_Get_Struct(self, xmlParserCtxt, ctx);
20
+ ctx = noko_xml_sax_push_parser_unwrap(self);
21
21
 
22
22
  if (Qnil != _chunk) {
23
23
  chunk = StringValuePtr(_chunk);
@@ -32,7 +32,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
32
32
 
33
33
  if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
34
34
  // TODO: there appear to be no tests for this block
35
- xmlErrorPtr e = xmlCtxtGetLastError(ctx);
35
+ xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
36
36
  Nokogiri_error_raise(NULL, e);
37
37
  }
38
38
 
@@ -54,7 +54,7 @@ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
54
54
  htmlParserCtxtPtr ctx;
55
55
  xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
56
56
 
57
- Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
57
+ sax = noko_sax_handler_unwrap(_xml_sax);
58
58
 
59
59
  if (_filename != Qnil) { filename = StringValueCStr(_filename); }
60
60
 
@@ -137,6 +137,48 @@ noko_io_close(void *io)
137
137
  }
138
138
 
139
139
 
140
+ #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
141
+ # define NOKOGIRI_WINDOWS_DLLS 1
142
+ #else
143
+ # define NOKOGIRI_WINDOWS_DLLS 0
144
+ #endif
145
+
146
+ //
147
+ // | dlls || true | false |
148
+ // | nlmm || | |
149
+ // |-----------++---------+---------|
150
+ // | NULL || default | ruby |
151
+ // | "random" || default | ruby |
152
+ // | "ruby" || ruby | ruby |
153
+ // | "default" || default | default |
154
+ //
155
+ // We choose *not* to use Ruby's memory management functions with windows DLLs because of this
156
+ // issue: https://github.com/sparklemotion/nokogiri/issues/2241
157
+ //
158
+ static void
159
+ set_libxml_memory_management(void)
160
+ {
161
+ const char *nlmm = getenv("NOKOGIRI_LIBXML_MEMORY_MANAGEMENT");
162
+ if (nlmm) {
163
+ if (strcmp(nlmm, "default") == 0) {
164
+ goto libxml_uses_default_memory_management;
165
+ } else if (strcmp(nlmm, "ruby") == 0) {
166
+ goto libxml_uses_ruby_memory_management;
167
+ }
168
+ }
169
+ if (NOKOGIRI_WINDOWS_DLLS) {
170
+ libxml_uses_default_memory_management:
171
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
172
+ return;
173
+ } else {
174
+ libxml_uses_ruby_memory_management:
175
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
176
+ xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
177
+ return;
178
+ }
179
+ }
180
+
181
+
140
182
  void
141
183
  Init_nokogiri(void)
142
184
  {
@@ -150,6 +192,10 @@ Init_nokogiri(void)
150
192
  mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
151
193
  mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
152
194
 
195
+ set_libxml_memory_management(); /* must be before any function calls that might invoke xmlInitParser() */
196
+ xmlInitParser();
197
+ exsltRegisterAll();
198
+
153
199
  rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
154
200
  rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
155
201
 
@@ -182,30 +228,6 @@ Init_nokogiri(void)
182
228
  rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
183
229
  #endif
184
230
 
185
- #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
186
- /*
187
- * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
188
- * issue in libxml 2.9.12:
189
- *
190
- * https://github.com/sparklemotion/nokogiri/issues/2241
191
- *
192
- * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
193
- * this config only for the specific libxml2 versions 2.9.12.
194
- *
195
- * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
196
- * default memory management functions (recall that this config was introduced to reduce memory
197
- * bloat and allow Ruby to GC more often); but we should *really* test with production workloads
198
- * before making that kind of a potentially-invasive change.
199
- */
200
- rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
201
- #else
202
- rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
203
- xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
204
- #endif
205
-
206
- xmlInitParser();
207
- exsltRegisterAll();
208
-
209
231
  if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
210
232
  rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
211
233
  } else {