nokogiri 1.14.2 → 1.16.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +19 -15
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +69 -26
  6. data/ext/nokogiri/html4_document.c +1 -2
  7. data/ext/nokogiri/html4_element_description.c +19 -14
  8. data/ext/nokogiri/html4_sax_parser_context.c +10 -16
  9. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  10. data/ext/nokogiri/nokogiri.c +46 -24
  11. data/ext/nokogiri/nokogiri.h +23 -5
  12. data/ext/nokogiri/test_global_handlers.c +1 -1
  13. data/ext/nokogiri/xml_attr.c +1 -1
  14. data/ext/nokogiri/xml_cdata.c +30 -17
  15. data/ext/nokogiri/xml_comment.c +1 -1
  16. data/ext/nokogiri/xml_document.c +113 -25
  17. data/ext/nokogiri/xml_document_fragment.c +1 -1
  18. data/ext/nokogiri/xml_dtd.c +1 -1
  19. data/ext/nokogiri/xml_element_content.c +32 -29
  20. data/ext/nokogiri/xml_element_decl.c +5 -5
  21. data/ext/nokogiri/xml_encoding_handler.c +12 -4
  22. data/ext/nokogiri/xml_entity_reference.c +1 -1
  23. data/ext/nokogiri/xml_namespace.c +11 -16
  24. data/ext/nokogiri/xml_node.c +13 -16
  25. data/ext/nokogiri/xml_node_set.c +125 -105
  26. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  27. data/ext/nokogiri/xml_reader.c +61 -74
  28. data/ext/nokogiri/xml_relax_ng.c +66 -79
  29. data/ext/nokogiri/xml_sax_parser.c +24 -5
  30. data/ext/nokogiri/xml_sax_parser_context.c +50 -25
  31. data/ext/nokogiri/xml_sax_push_parser.c +30 -9
  32. data/ext/nokogiri/xml_schema.c +94 -115
  33. data/ext/nokogiri/xml_syntax_error.c +3 -3
  34. data/ext/nokogiri/xml_text.c +26 -13
  35. data/ext/nokogiri/xml_xpath_context.c +153 -83
  36. data/ext/nokogiri/xslt_stylesheet.c +111 -53
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +8 -4
  39. data/gumbo-parser/src/foreign_attrs.c +13 -14
  40. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  41. data/gumbo-parser/src/parser.c +21 -5
  42. data/gumbo-parser/src/tokenizer.c +1 -0
  43. data/lib/nokogiri/css/parser_extras.rb +1 -1
  44. data/lib/nokogiri/css/xpath_visitor.rb +3 -23
  45. data/lib/nokogiri/extension.rb +1 -1
  46. data/lib/nokogiri/html4/document.rb +1 -1
  47. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  48. data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
  49. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  50. data/lib/nokogiri/html5/document_fragment.rb +1 -1
  51. data/lib/nokogiri/html5/node.rb +5 -0
  52. data/lib/nokogiri/html5.rb +0 -63
  53. data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
  54. data/lib/nokogiri/version/constant.rb +1 -1
  55. data/lib/nokogiri/version/info.rb +6 -5
  56. data/lib/nokogiri/xml/attr.rb +2 -2
  57. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  58. data/lib/nokogiri/xml/document.rb +4 -5
  59. data/lib/nokogiri/xml/document_fragment.rb +3 -3
  60. data/lib/nokogiri/xml/element_content.rb +10 -2
  61. data/lib/nokogiri/xml/element_decl.rb +4 -2
  62. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  63. data/lib/nokogiri/xml/namespace.rb +1 -2
  64. data/lib/nokogiri/xml/node/save_options.rb +8 -0
  65. data/lib/nokogiri/xml/node.rb +53 -37
  66. data/lib/nokogiri/xml/node_set.rb +3 -3
  67. data/lib/nokogiri/xml/pp/node.rb +23 -12
  68. data/lib/nokogiri/xml/reader.rb +10 -9
  69. data/lib/nokogiri/xml/sax/document.rb +1 -1
  70. data/lib/nokogiri/xml/searchable.rb +21 -13
  71. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  72. data/lib/nokogiri/xml.rb +1 -1
  73. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  74. data/lib/nokogiri/xslt.rb +74 -4
  75. data/lib/nokogiri.rb +13 -5
  76. data/lib/xsd/xmlparser/nokogiri.rb +1 -1
  77. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  78. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  79. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  80. data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
  81. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  82. metadata +16 -12
  83. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  84. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  85. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b926bf2ba1c28516c2dfc541d4b82e0f92ef3c585d6843334d7f1d44fe33abcb
4
- data.tar.gz: ec0ff5c75bc44ed7fe85761cfa00b28e92a03afdcd525356d31bd74060a2dbe5
3
+ metadata.gz: 265995b2c377e4457aca61465f5adb40912633b08a56df3aa709570d3274a3e2
4
+ data.tar.gz: 61e6f5f65e8b272032089b21e046f460f1f1ad39dba13f847fb8b6fc29bafd01
5
5
  SHA512:
6
- metadata.gz: 11d8784c27eb523c7cba2c67efd1321c10253ce9624217b1b0383d246354dd8328466474c7968a7844b8ca01ca8e450e7d35bfee56d917b381949e721b435ce4
7
- data.tar.gz: '08db49457bb62572e68f5ec8b1eb29da18a9a632c6ef6ebb4ac9e193a29c0beac253d8aa2ed96a35513e70eb7a495344742d70e6b8ad81126b52e07ed400569d'
6
+ metadata.gz: 4e01dfe5f6511eb28d7db99dabb4ecd9b82ee6e749e1ad5a54251914f1068b9491d5b41676cd0f82a848466ceaacd0c65182d66f2d123c4545d11c9152d9150b
7
+ data.tar.gz: 74684f41f6c6170e21ab0f0e561cf4ed75764deda9f4c8442fcc317eafdf99503df2b623c948f4272348cbb4a5ca5af6ca5da140ba6eb04d58d833b75d01c45d
data/Gemfile CHANGED
@@ -5,34 +5,38 @@ source "https://rubygems.org"
5
5
  gemspec
6
6
 
7
7
  group :development do
8
+ # ruby 3.4.0-dev removed some gems from the default set
9
+ #
10
+ # TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
11
+ # transitively.
12
+ gem "mutex_m"
13
+
8
14
  # bootstrapping
9
15
  gem "bundler", "~> 2.3"
10
- gem "rake", "= 13.0.6"
16
+ gem "rake", "13.1.0"
11
17
 
12
18
  # building extensions
13
- gem "rake-compiler", "= 1.2.1"
14
- gem "rake-compiler-dock", "= 1.3.0"
15
-
16
- # documentation
17
- gem "hoe-markdown", "= 1.4.0"
19
+ gem "rake-compiler", "1.2.6"
20
+ gem "rake-compiler-dock", "1.4.0"
18
21
 
19
22
  # parser generator
20
23
  gem "rexical", "= 1.0.7"
21
24
 
22
25
  # tests
23
- gem "minitest", "5.17.0"
24
- gem "minitest-reporters", "= 1.5.0"
25
- gem "ruby_memcheck", "1.2.0" unless RUBY_PLATFORM == "java"
26
- gem "simplecov", "= 0.21.2"
26
+ gem "minitest", "5.21.2"
27
+ gem "minitest-parallel_fork", "2.0.0"
28
+ gem "ruby_memcheck", "2.3.0"
27
29
  gem "rubyzip", "~> 2.3.2"
30
+ gem "simplecov", "= 0.21.2"
28
31
 
29
32
  # rubocop
30
33
  if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
31
- gem "rubocop", "1.44.1"
32
- gem "rubocop-minitest", "0.27.0"
33
- gem "rubocop-performance", "1.15.2"
34
+ gem "rubocop", "1.60.2"
35
+ gem "rubocop-minitest", "0.34.5"
36
+ gem "rubocop-packaging", "0.5.2"
37
+ gem "rubocop-performance", "1.20.2"
34
38
  gem "rubocop-rake", "= 0.6.0"
35
- gem "rubocop-shopify", "2.10.1"
39
+ gem "rubocop-shopify", "2.14.0"
36
40
  end
37
41
  end
38
42
 
@@ -40,5 +44,5 @@ end
40
44
  # `bundle config set --local without rdoc`
41
45
  # Then re-run `bundle install`.
42
46
  group :rdoc do
43
- gem "rdoc", "6.5.0"
47
+ gem "rdoc", "6.6.2"
44
48
  end
data/README.md CHANGED
@@ -55,6 +55,7 @@ There are a few ways to ask exploratory questions:
55
55
 
56
56
  - The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
57
57
  - Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
58
+ - Open a discussion at https://github.com/sparklemotion/nokogiri/discussions
58
59
 
59
60
  Please do not mail the maintainers at their personal addresses.
60
61
 
@@ -90,6 +91,8 @@ We bump `Major.Minor.Patch` versions following this guidance:
90
91
  - Updating packaged libraries for non-security-related reasons.
91
92
  - Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
92
93
  - Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
94
+ - Removal of deprecated methods or parameters, after a generous transition period; usually when those methods or parameters are rarely-used or dangerous to the user. Essentially, removals that do not justify a major version bump.
95
+
93
96
 
94
97
  `Patch`:
95
98
 
@@ -111,7 +114,7 @@ You can help sponsor the maintainers of this software through one of these organ
111
114
 
112
115
  Requirements:
113
116
 
114
- - Ruby >= 2.7
117
+ - Ruby >= 3.0
115
118
  - JRuby >= 9.4.0.0
116
119
 
117
120
 
data/dependencies.yml CHANGED
@@ -1,16 +1,17 @@
1
+ ---
1
2
  libxml2:
2
- version: "2.10.3"
3
- sha256: "5d2cc3d78bec3dbe212a9d7fa629ada25a7da928af432c93060ff5c17ee28a9c"
4
- # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.10/libxml2-2.10.3.sha256sum
3
+ version: "2.12.8"
4
+ sha256: "43ad877b018bc63deb2468d71f95219c2fac196876ef36d1bee51d226173ec93"
5
+ # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.8.sha256sum
5
6
 
6
7
  libxslt:
7
- version: "1.1.37"
8
- sha256: "3a4b27dc8027ccd6146725950336f1ec520928f320f144eb5fa7990ae6123ab4"
9
- # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.37.sha256sum
8
+ version: "1.1.39"
9
+ sha256: "2a20ad621148339b0759c4d4e96719362dee64c9a096dbba625ba053846349f0"
10
+ # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.39.sha256sum
10
11
 
11
12
  zlib:
12
- version: "1.2.13"
13
- sha256: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30"
13
+ version: "1.3.1"
14
+ sha256: "9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23"
14
15
  # SHA-256 hash provided on http://zlib.net/
15
16
 
16
17
  libiconv:
@@ -15,7 +15,7 @@ PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."
15
15
  REQUIRED_LIBXML_VERSION = "2.6.21"
16
16
  RECOMMENDED_LIBXML_VERSION = "2.9.3"
17
17
 
18
- REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.0" # keep this version in sync with the one in the gemspec
18
+ REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.2" # keep this version in sync with the one in the gemspec
19
19
  REQUIRED_PKG_CONFIG_VERSION = "~> 1.1"
20
20
 
21
21
  # Keep track of what versions of what libraries we build against
@@ -229,7 +229,7 @@ def gnome_source
229
229
  if ENV["NOKOGIRI_USE_CANONICAL_GNOME_SOURCE"]
230
230
  "https://download.gnome.org"
231
231
  else
232
- "https://mirror.csclub.uwaterloo.ca/gnome" # old reliable
232
+ "https://muug.ca/mirror/gnome" # old reliable
233
233
  end
234
234
  end
235
235
 
@@ -372,7 +372,7 @@ def try_link_iconv(using = nil)
372
372
  preserving_globals do
373
373
  yield if block_given?
374
374
 
375
- try_link(<<~'SRC', opt)
375
+ try_link(<<~SRC, opt)
376
376
  #include <stdlib.h>
377
377
  #include <iconv.h>
378
378
  int main(void)
@@ -443,7 +443,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
443
443
  # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
444
444
  # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
445
445
  recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
446
- recipe.host = recipe.host.gsub(/i386/, "i686")
446
+ recipe.host = recipe.host.gsub("i386", "i686")
447
447
 
448
448
  recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
449
449
  recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
@@ -651,8 +651,7 @@ append_cppflags(ENV["CPPFLAGS"].split) unless ENV["CPPFLAGS"].nil?
651
651
  append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil?
652
652
  $LIBS = concat_flags($LIBS, ENV["LIBS"])
653
653
 
654
- # nokogumbo code uses C90/C99 features, let's make sure older compilers won't give
655
- # errors/warnings. see #2302
654
+ # libgumbo uses C90/C99 features, see #2302
656
655
  append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
657
656
 
658
657
  # gumbo html5 serialization is slower with O3, let's make sure we use O2
@@ -695,14 +694,34 @@ append_cppflags(' "-Idummypath"') if windows?
695
694
 
696
695
  if config_system_libraries?
697
696
  message "Building nokogiri using system libraries.\n"
698
- ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z",
699
- headers: "zlib.h", func: "gzdopen")
700
- ensure_package_configuration(opt: "xml2", pc: "libxml-2.0", lib: "xml2",
701
- headers: "libxml/parser.h", func: "xmlParseDoc")
702
- ensure_package_configuration(opt: "xslt", pc: "libxslt", lib: "xslt",
703
- headers: "libxslt/xslt.h", func: "xsltParseStylesheetDoc")
704
- ensure_package_configuration(opt: "exslt", pc: "libexslt", lib: "exslt",
705
- headers: "libexslt/exslt.h", func: "exsltFuncRegister")
697
+ ensure_package_configuration(
698
+ opt: "zlib",
699
+ pc: "zlib",
700
+ lib: "z",
701
+ headers: "zlib.h",
702
+ func: "gzdopen",
703
+ )
704
+ ensure_package_configuration(
705
+ opt: "xml2",
706
+ pc: "libxml-2.0",
707
+ lib: "xml2",
708
+ headers: "libxml/parser.h",
709
+ func: "xmlParseDoc",
710
+ )
711
+ ensure_package_configuration(
712
+ opt: "xslt",
713
+ pc: "libxslt",
714
+ lib: "xslt",
715
+ headers: "libxslt/xslt.h",
716
+ func: "xsltParseStylesheetDoc",
717
+ )
718
+ ensure_package_configuration(
719
+ opt: "exslt",
720
+ pc: "libexslt",
721
+ lib: "exslt",
722
+ headers: "libexslt/exslt.h",
723
+ func: "exsltFuncRegister",
724
+ )
706
725
 
707
726
  have_libxml_headers?(REQUIRED_LIBXML_VERSION) ||
708
727
  abort("ERROR: libxml2 version #{REQUIRED_LIBXML_VERSION} or later is required!")
@@ -785,8 +804,12 @@ else
785
804
  end
786
805
 
787
806
  unless nix?
788
- libiconv_recipe = process_recipe("libiconv", dependencies["libiconv"]["version"], static_p,
789
- cross_build_p) do |recipe|
807
+ libiconv_recipe = process_recipe(
808
+ "libiconv",
809
+ dependencies["libiconv"]["version"],
810
+ static_p,
811
+ cross_build_p,
812
+ ) do |recipe|
790
813
  recipe.files = [{
791
814
  url: "https://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz",
792
815
  sha256: dependencies["libiconv"]["sha256"],
@@ -824,15 +847,25 @@ else
824
847
  if zlib_recipe
825
848
  append_cppflags("-I#{zlib_recipe.path}/include")
826
849
  $LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH
827
- ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z",
828
- headers: "zlib.h", func: "gzdopen")
850
+ ensure_package_configuration(
851
+ opt: "zlib",
852
+ pc: "zlib",
853
+ lib: "z",
854
+ headers: "zlib.h",
855
+ func: "gzdopen",
856
+ )
829
857
  end
830
858
 
831
859
  if libiconv_recipe
832
860
  append_cppflags("-I#{libiconv_recipe.path}/include")
833
861
  $LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH
834
- ensure_package_configuration(opt: "iconv", pc: "iconv", lib: "iconv",
835
- headers: "iconv.h", func: "iconv_open")
862
+ ensure_package_configuration(
863
+ opt: "iconv",
864
+ pc: "iconv",
865
+ lib: "iconv",
866
+ headers: "iconv.h",
867
+ func: "iconv_open",
868
+ )
836
869
  end
837
870
 
838
871
  libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"]["version"], static_p, cross_build_p) do |recipe|
@@ -848,8 +881,13 @@ else
848
881
  recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxml2", "*.patch")].sort
849
882
  end
850
883
 
884
+ cppflags = concat_flags(ENV["CPPFLAGS"])
851
885
  cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g")
852
886
 
887
+ if cross_build_p
888
+ cppflags = concat_flags(cppflags, "-DNOKOGIRI_PRECOMPILED_LIBRARIES")
889
+ end
890
+
853
891
  if zlib_recipe
854
892
  recipe.configure_options << "--with-zlib=#{zlib_recipe.path}"
855
893
  end
@@ -880,6 +918,8 @@ else
880
918
  "--with-c14n",
881
919
  "--with-debug",
882
920
  "--with-threads",
921
+ "--without-tls", # see https://github.com/sparklemotion/nokogiri/issues/3031
922
+ "CPPFLAGS=#{cppflags}",
883
923
  "CFLAGS=#{cflags}",
884
924
  ]
885
925
  end
@@ -1023,7 +1063,7 @@ libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_b
1023
1063
 
1024
1064
  env = { "CC" => gcc_cmd, "CFLAGS" => cflags }
1025
1065
  if config_cross_build?
1026
- if /darwin/.match?(host)
1066
+ if host.include?("darwin")
1027
1067
  env["AR"] = "#{host}-libtool"
1028
1068
  env["ARFLAGS"] = "-o"
1029
1069
  else
@@ -1047,7 +1087,6 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
1047
1087
  have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
1048
1088
  have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
1049
1089
  have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
1050
- have_func("rb_gc_location") # introduced in Ruby 2.7
1051
1090
  have_func("rb_category_warning") # introduced in Ruby 3.0
1052
1091
 
1053
1092
  other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
@@ -1057,12 +1096,16 @@ unless config_system_libraries?
1057
1096
  if cross_build_p
1058
1097
  # When precompiling native gems, copy packaged libraries' headers to ext/nokogiri/include
1059
1098
  # These are packaged up by the cross-compiling callback in the ExtensionTask
1060
- copy_packaged_libraries_headers(to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"),
1061
- from_recipes: [libxml2_recipe, libxslt_recipe])
1099
+ copy_packaged_libraries_headers(
1100
+ to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"),
1101
+ from_recipes: [libxml2_recipe, libxslt_recipe],
1102
+ )
1062
1103
  else
1063
1104
  # When compiling during installation, install packaged libraries' header files into ext/nokogiri/include
1064
- copy_packaged_libraries_headers(to_path: "include",
1065
- from_recipes: [libxml2_recipe, libxslt_recipe])
1105
+ copy_packaged_libraries_headers(
1106
+ to_path: "include",
1107
+ from_recipes: [libxml2_recipe, libxslt_recipe],
1108
+ )
1066
1109
  $INSTALLFILES << ["include/**/*.h", "$(rubylibdir)"]
1067
1110
  end
1068
1111
  end
@@ -144,8 +144,7 @@ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE r
144
144
  static VALUE
145
145
  rb_html_document_type(VALUE self)
146
146
  {
147
- htmlDocPtr doc;
148
- Data_Get_Struct(self, xmlDoc, doc);
147
+ htmlDocPtr doc = noko_xml_document_unwrap(self);
149
148
  return INT2NUM(doc->type);
150
149
  }
151
150
 
@@ -1,5 +1,10 @@
1
1
  #include <nokogiri.h>
2
2
 
3
+ static const rb_data_type_t html4_element_description_type = {
4
+ .wrap_struct_name = "Nokogiri::HTML4::ElementDescription",
5
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
6
+ };
7
+
3
8
  VALUE cNokogiriHtml4ElementDescription ;
4
9
 
5
10
  /*
@@ -15,7 +20,7 @@ required_attributes(VALUE self)
15
20
  VALUE list;
16
21
  int i;
17
22
 
18
- Data_Get_Struct(self, htmlElemDesc, description);
23
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
19
24
 
20
25
  list = rb_ary_new();
21
26
 
@@ -41,7 +46,7 @@ deprecated_attributes(VALUE self)
41
46
  VALUE list;
42
47
  int i;
43
48
 
44
- Data_Get_Struct(self, htmlElemDesc, description);
49
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
45
50
 
46
51
  list = rb_ary_new();
47
52
 
@@ -67,7 +72,7 @@ optional_attributes(VALUE self)
67
72
  VALUE list;
68
73
  int i;
69
74
 
70
- Data_Get_Struct(self, htmlElemDesc, description);
75
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
71
76
 
72
77
  list = rb_ary_new();
73
78
 
@@ -90,7 +95,7 @@ static VALUE
90
95
  default_sub_element(VALUE self)
91
96
  {
92
97
  const htmlElemDesc *description;
93
- Data_Get_Struct(self, htmlElemDesc, description);
98
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
94
99
 
95
100
  if (description->defaultsubelt) {
96
101
  return NOKOGIRI_STR_NEW2(description->defaultsubelt);
@@ -112,7 +117,7 @@ sub_elements(VALUE self)
112
117
  VALUE list;
113
118
  int i;
114
119
 
115
- Data_Get_Struct(self, htmlElemDesc, description);
120
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
116
121
 
117
122
  list = rb_ary_new();
118
123
 
@@ -135,7 +140,7 @@ static VALUE
135
140
  description(VALUE self)
136
141
  {
137
142
  const htmlElemDesc *description;
138
- Data_Get_Struct(self, htmlElemDesc, description);
143
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
139
144
 
140
145
  return NOKOGIRI_STR_NEW2(description->desc);
141
146
  }
@@ -150,7 +155,7 @@ static VALUE
150
155
  inline_eh(VALUE self)
151
156
  {
152
157
  const htmlElemDesc *description;
153
- Data_Get_Struct(self, htmlElemDesc, description);
158
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
154
159
 
155
160
  if (description->isinline) { return Qtrue; }
156
161
  return Qfalse;
@@ -166,7 +171,7 @@ static VALUE
166
171
  deprecated_eh(VALUE self)
167
172
  {
168
173
  const htmlElemDesc *description;
169
- Data_Get_Struct(self, htmlElemDesc, description);
174
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
170
175
 
171
176
  if (description->depr) { return Qtrue; }
172
177
  return Qfalse;
@@ -182,7 +187,7 @@ static VALUE
182
187
  empty_eh(VALUE self)
183
188
  {
184
189
  const htmlElemDesc *description;
185
- Data_Get_Struct(self, htmlElemDesc, description);
190
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
186
191
 
187
192
  if (description->empty) { return Qtrue; }
188
193
  return Qfalse;
@@ -198,7 +203,7 @@ static VALUE
198
203
  save_end_tag_eh(VALUE self)
199
204
  {
200
205
  const htmlElemDesc *description;
201
- Data_Get_Struct(self, htmlElemDesc, description);
206
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
202
207
 
203
208
  if (description->saveEndTag) { return Qtrue; }
204
209
  return Qfalse;
@@ -214,7 +219,7 @@ static VALUE
214
219
  implied_end_tag_eh(VALUE self)
215
220
  {
216
221
  const htmlElemDesc *description;
217
- Data_Get_Struct(self, htmlElemDesc, description);
222
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
218
223
 
219
224
  if (description->endTag) { return Qtrue; }
220
225
  return Qfalse;
@@ -230,7 +235,7 @@ static VALUE
230
235
  implied_start_tag_eh(VALUE self)
231
236
  {
232
237
  const htmlElemDesc *description;
233
- Data_Get_Struct(self, htmlElemDesc, description);
238
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
234
239
 
235
240
  if (description->startTag) { return Qtrue; }
236
241
  return Qfalse;
@@ -246,7 +251,7 @@ static VALUE
246
251
  name(VALUE self)
247
252
  {
248
253
  const htmlElemDesc *description;
249
- Data_Get_Struct(self, htmlElemDesc, description);
254
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
250
255
 
251
256
  if (NULL == description->name) { return Qnil; }
252
257
  return NOKOGIRI_STR_NEW2(description->name);
@@ -266,7 +271,7 @@ get_description(VALUE klass, VALUE tag_name)
266
271
  );
267
272
 
268
273
  if (NULL == description) { return Qnil; }
269
- return Data_Wrap_Struct(klass, 0, 0, DISCARD_CONST_QUAL(void *, description));
274
+ return TypedData_Wrap_Struct(klass, &html4_element_description_type, DISCARD_CONST_QUAL(void *, description));
270
275
  }
271
276
 
272
277
  void
@@ -2,13 +2,6 @@
2
2
 
3
3
  VALUE cNokogiriHtml4SaxParserContext ;
4
4
 
5
- static void
6
- deallocate(xmlParserCtxtPtr ctxt)
7
- {
8
- ctxt->sax = NULL;
9
- htmlFreeParserCtxt(ctxt);
10
- }
11
-
12
5
  static VALUE
13
6
  parse_memory(VALUE klass, VALUE data, VALUE encoding)
14
7
  {
@@ -38,7 +31,7 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
38
31
  }
39
32
  }
40
33
 
41
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
34
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
42
35
  }
43
36
 
44
37
  static VALUE
@@ -48,7 +41,13 @@ parse_file(VALUE klass, VALUE filename, VALUE encoding)
48
41
  StringValueCStr(filename),
49
42
  StringValueCStr(encoding)
50
43
  );
51
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
44
+
45
+ if (ctxt->sax) {
46
+ xmlFree(ctxt->sax);
47
+ ctxt->sax = NULL;
48
+ }
49
+
50
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
52
51
  }
53
52
 
54
53
  static VALUE
@@ -82,13 +81,8 @@ parse_with(VALUE self, VALUE sax_handler)
82
81
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
83
82
  }
84
83
 
85
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
86
- Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
87
-
88
- /* Free the sax handler since we'll assign our own */
89
- if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
90
- xmlFree(ctxt->sax);
91
- }
84
+ ctxt = noko_xml_sax_parser_context_unwrap(self);
85
+ sax = noko_sax_handler_unwrap(sax_handler);
92
86
 
93
87
  ctxt->sax = sax;
94
88
  ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
@@ -17,7 +17,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
17
17
  int status = 0;
18
18
  libxmlStructuredErrorHandlerState handler_state;
19
19
 
20
- Data_Get_Struct(self, xmlParserCtxt, ctx);
20
+ ctx = noko_xml_sax_push_parser_unwrap(self);
21
21
 
22
22
  if (Qnil != _chunk) {
23
23
  chunk = StringValuePtr(_chunk);
@@ -32,7 +32,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
32
32
 
33
33
  if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
34
34
  // TODO: there appear to be no tests for this block
35
- xmlErrorPtr e = xmlCtxtGetLastError(ctx);
35
+ xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
36
36
  Nokogiri_error_raise(NULL, e);
37
37
  }
38
38
 
@@ -54,7 +54,7 @@ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
54
54
  htmlParserCtxtPtr ctx;
55
55
  xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
56
56
 
57
- Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
57
+ sax = noko_sax_handler_unwrap(_xml_sax);
58
58
 
59
59
  if (_filename != Qnil) { filename = StringValueCStr(_filename); }
60
60
 
@@ -137,6 +137,48 @@ noko_io_close(void *io)
137
137
  }
138
138
 
139
139
 
140
+ #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
141
+ # define NOKOGIRI_WINDOWS_DLLS 1
142
+ #else
143
+ # define NOKOGIRI_WINDOWS_DLLS 0
144
+ #endif
145
+
146
+ //
147
+ // | dlls || true | false |
148
+ // | nlmm || | |
149
+ // |-----------++---------+---------|
150
+ // | NULL || default | ruby |
151
+ // | "random" || default | ruby |
152
+ // | "ruby" || ruby | ruby |
153
+ // | "default" || default | default |
154
+ //
155
+ // We choose *not* to use Ruby's memory management functions with windows DLLs because of this
156
+ // issue: https://github.com/sparklemotion/nokogiri/issues/2241
157
+ //
158
+ static void
159
+ set_libxml_memory_management(void)
160
+ {
161
+ const char *nlmm = getenv("NOKOGIRI_LIBXML_MEMORY_MANAGEMENT");
162
+ if (nlmm) {
163
+ if (strcmp(nlmm, "default") == 0) {
164
+ goto libxml_uses_default_memory_management;
165
+ } else if (strcmp(nlmm, "ruby") == 0) {
166
+ goto libxml_uses_ruby_memory_management;
167
+ }
168
+ }
169
+ if (NOKOGIRI_WINDOWS_DLLS) {
170
+ libxml_uses_default_memory_management:
171
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
172
+ return;
173
+ } else {
174
+ libxml_uses_ruby_memory_management:
175
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
176
+ xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
177
+ return;
178
+ }
179
+ }
180
+
181
+
140
182
  void
141
183
  Init_nokogiri(void)
142
184
  {
@@ -150,6 +192,10 @@ Init_nokogiri(void)
150
192
  mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
151
193
  mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
152
194
 
195
+ set_libxml_memory_management(); /* must be before any function calls that might invoke xmlInitParser() */
196
+ xmlInitParser();
197
+ exsltRegisterAll();
198
+
153
199
  rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
154
200
  rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
155
201
 
@@ -182,30 +228,6 @@ Init_nokogiri(void)
182
228
  rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
183
229
  #endif
184
230
 
185
- #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
186
- /*
187
- * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
188
- * issue in libxml 2.9.12:
189
- *
190
- * https://github.com/sparklemotion/nokogiri/issues/2241
191
- *
192
- * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
193
- * this config only for the specific libxml2 versions 2.9.12.
194
- *
195
- * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
196
- * default memory management functions (recall that this config was introduced to reduce memory
197
- * bloat and allow Ruby to GC more often); but we should *really* test with production workloads
198
- * before making that kind of a potentially-invasive change.
199
- */
200
- rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
201
- #else
202
- rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
203
- xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
204
- #endif
205
-
206
- xmlInitParser();
207
- exsltRegisterAll();
208
-
209
231
  if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
210
232
  rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
211
233
  } else {