nokogiri 1.10.3 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (218) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +176 -96
  6. data/dependencies.yml +28 -26
  7. data/ext/nokogiri/depend +38 -358
  8. data/ext/nokogiri/extconf.rb +716 -414
  9. data/ext/nokogiri/gumbo.c +584 -0
  10. data/ext/nokogiri/html4_document.c +166 -0
  11. data/ext/nokogiri/html4_element_description.c +294 -0
  12. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  13. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  14. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  15. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  16. data/ext/nokogiri/nokogiri.c +228 -91
  17. data/ext/nokogiri/nokogiri.h +191 -89
  18. data/ext/nokogiri/test_global_handlers.c +40 -0
  19. data/ext/nokogiri/xml_attr.c +15 -15
  20. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  21. data/ext/nokogiri/xml_cdata.c +13 -18
  22. data/ext/nokogiri/xml_comment.c +19 -26
  23. data/ext/nokogiri/xml_document.c +267 -195
  24. data/ext/nokogiri/xml_document_fragment.c +13 -15
  25. data/ext/nokogiri/xml_dtd.c +54 -48
  26. data/ext/nokogiri/xml_element_content.c +31 -26
  27. data/ext/nokogiri/xml_element_decl.c +22 -22
  28. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  29. data/ext/nokogiri/xml_entity_decl.c +32 -30
  30. data/ext/nokogiri/xml_entity_reference.c +16 -18
  31. data/ext/nokogiri/xml_namespace.c +60 -51
  32. data/ext/nokogiri/xml_node.c +493 -407
  33. data/ext/nokogiri/xml_node_set.c +174 -162
  34. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  35. data/ext/nokogiri/xml_reader.c +197 -172
  36. data/ext/nokogiri/xml_relax_ng.c +52 -28
  37. data/ext/nokogiri/xml_sax_parser.c +112 -112
  38. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  39. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  40. data/ext/nokogiri/xml_schema.c +112 -33
  41. data/ext/nokogiri/xml_syntax_error.c +42 -21
  42. data/ext/nokogiri/xml_text.c +13 -17
  43. data/ext/nokogiri/xml_xpath_context.c +158 -73
  44. data/ext/nokogiri/xslt_stylesheet.c +158 -164
  45. data/gumbo-parser/CHANGES.md +63 -0
  46. data/gumbo-parser/Makefile +101 -0
  47. data/gumbo-parser/THANKS +27 -0
  48. data/gumbo-parser/src/Makefile +34 -0
  49. data/gumbo-parser/src/README.md +41 -0
  50. data/gumbo-parser/src/ascii.c +75 -0
  51. data/gumbo-parser/src/ascii.h +115 -0
  52. data/gumbo-parser/src/attribute.c +42 -0
  53. data/gumbo-parser/src/attribute.h +17 -0
  54. data/gumbo-parser/src/char_ref.c +22225 -0
  55. data/gumbo-parser/src/char_ref.h +29 -0
  56. data/gumbo-parser/src/char_ref.rl +2154 -0
  57. data/gumbo-parser/src/error.c +626 -0
  58. data/gumbo-parser/src/error.h +148 -0
  59. data/gumbo-parser/src/foreign_attrs.c +104 -0
  60. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  61. data/gumbo-parser/src/gumbo.h +943 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/parser.c +4886 -0
  65. data/gumbo-parser/src/parser.h +41 -0
  66. data/gumbo-parser/src/replacement.h +33 -0
  67. data/gumbo-parser/src/string_buffer.c +103 -0
  68. data/gumbo-parser/src/string_buffer.h +68 -0
  69. data/gumbo-parser/src/string_piece.c +48 -0
  70. data/gumbo-parser/src/svg_attrs.c +174 -0
  71. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  72. data/gumbo-parser/src/svg_tags.c +137 -0
  73. data/gumbo-parser/src/svg_tags.gperf +55 -0
  74. data/gumbo-parser/src/tag.c +222 -0
  75. data/gumbo-parser/src/tag_lookup.c +382 -0
  76. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  77. data/gumbo-parser/src/tag_lookup.h +13 -0
  78. data/gumbo-parser/src/token_buffer.c +79 -0
  79. data/gumbo-parser/src/token_buffer.h +71 -0
  80. data/gumbo-parser/src/token_type.h +17 -0
  81. data/gumbo-parser/src/tokenizer.c +3463 -0
  82. data/gumbo-parser/src/tokenizer.h +112 -0
  83. data/gumbo-parser/src/tokenizer_states.h +339 -0
  84. data/gumbo-parser/src/utf8.c +245 -0
  85. data/gumbo-parser/src/utf8.h +164 -0
  86. data/gumbo-parser/src/util.c +68 -0
  87. data/gumbo-parser/src/util.h +30 -0
  88. data/gumbo-parser/src/vector.c +111 -0
  89. data/gumbo-parser/src/vector.h +45 -0
  90. data/lib/nokogiri/css/node.rb +1 -0
  91. data/lib/nokogiri/css/parser.rb +64 -63
  92. data/lib/nokogiri/css/parser.y +3 -3
  93. data/lib/nokogiri/css/parser_extras.rb +39 -36
  94. data/lib/nokogiri/css/syntax_error.rb +2 -1
  95. data/lib/nokogiri/css/tokenizer.rb +105 -103
  96. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  97. data/lib/nokogiri/css.rb +15 -14
  98. data/lib/nokogiri/decorators/slop.rb +1 -0
  99. data/lib/nokogiri/extension.rb +31 -0
  100. data/lib/nokogiri/gumbo.rb +14 -0
  101. data/lib/nokogiri/html.rb +32 -27
  102. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  103. data/lib/nokogiri/{html → html4}/document.rb +17 -30
  104. data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
  105. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  106. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  107. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
  109. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  110. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
  111. data/lib/nokogiri/html4.rb +40 -0
  112. data/lib/nokogiri/html5/document.rb +74 -0
  113. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  114. data/lib/nokogiri/html5/node.rb +93 -0
  115. data/lib/nokogiri/html5.rb +473 -0
  116. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  117. data/lib/nokogiri/syntax_error.rb +1 -0
  118. data/lib/nokogiri/version/constant.rb +5 -0
  119. data/lib/nokogiri/version/info.rb +215 -0
  120. data/lib/nokogiri/version.rb +3 -109
  121. data/lib/nokogiri/xml/attr.rb +1 -0
  122. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  123. data/lib/nokogiri/xml/builder.rb +74 -32
  124. data/lib/nokogiri/xml/cdata.rb +1 -0
  125. data/lib/nokogiri/xml/character_data.rb +1 -0
  126. data/lib/nokogiri/xml/document.rb +138 -41
  127. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  128. data/lib/nokogiri/xml/dtd.rb +1 -0
  129. data/lib/nokogiri/xml/element_content.rb +1 -0
  130. data/lib/nokogiri/xml/element_decl.rb +1 -0
  131. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  132. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  133. data/lib/nokogiri/xml/namespace.rb +1 -0
  134. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  135. data/lib/nokogiri/xml/node.rb +629 -293
  136. data/lib/nokogiri/xml/node_set.rb +1 -0
  137. data/lib/nokogiri/xml/notation.rb +1 -0
  138. data/lib/nokogiri/xml/parse_options.rb +12 -3
  139. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  140. data/lib/nokogiri/xml/pp/node.rb +1 -0
  141. data/lib/nokogiri/xml/pp.rb +3 -2
  142. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  143. data/lib/nokogiri/xml/reader.rb +9 -12
  144. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  145. data/lib/nokogiri/xml/sax/document.rb +25 -30
  146. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  147. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  148. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  149. data/lib/nokogiri/xml/sax.rb +5 -4
  150. data/lib/nokogiri/xml/schema.rb +13 -4
  151. data/lib/nokogiri/xml/searchable.rb +25 -16
  152. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  153. data/lib/nokogiri/xml/text.rb +1 -0
  154. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  155. data/lib/nokogiri/xml/xpath.rb +4 -5
  156. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  157. data/lib/nokogiri/xml.rb +36 -36
  158. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  159. data/lib/nokogiri/xslt.rb +17 -16
  160. data/lib/nokogiri.rb +32 -51
  161. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  162. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  163. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  164. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  165. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  166. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  167. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  168. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  169. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  171. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  172. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  173. metadata +151 -153
  174. data/ext/nokogiri/html_document.c +0 -170
  175. data/ext/nokogiri/html_document.h +0 -10
  176. data/ext/nokogiri/html_element_description.c +0 -279
  177. data/ext/nokogiri/html_element_description.h +0 -10
  178. data/ext/nokogiri/html_entity_lookup.c +0 -32
  179. data/ext/nokogiri/html_entity_lookup.h +0 -8
  180. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  181. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  182. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  183. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_attr.h +0 -9
  185. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  186. data/ext/nokogiri/xml_cdata.h +0 -9
  187. data/ext/nokogiri/xml_comment.h +0 -9
  188. data/ext/nokogiri/xml_document.h +0 -23
  189. data/ext/nokogiri/xml_document_fragment.h +0 -10
  190. data/ext/nokogiri/xml_dtd.h +0 -10
  191. data/ext/nokogiri/xml_element_content.h +0 -10
  192. data/ext/nokogiri/xml_element_decl.h +0 -9
  193. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  194. data/ext/nokogiri/xml_entity_decl.h +0 -10
  195. data/ext/nokogiri/xml_entity_reference.h +0 -9
  196. data/ext/nokogiri/xml_io.c +0 -61
  197. data/ext/nokogiri/xml_io.h +0 -11
  198. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  199. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  200. data/ext/nokogiri/xml_namespace.h +0 -14
  201. data/ext/nokogiri/xml_node.h +0 -13
  202. data/ext/nokogiri/xml_node_set.h +0 -12
  203. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  204. data/ext/nokogiri/xml_reader.h +0 -10
  205. data/ext/nokogiri/xml_relax_ng.h +0 -9
  206. data/ext/nokogiri/xml_sax_parser.h +0 -39
  207. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  208. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  209. data/ext/nokogiri/xml_schema.h +0 -9
  210. data/ext/nokogiri/xml_syntax_error.h +0 -13
  211. data/ext/nokogiri/xml_text.h +0 -9
  212. data/ext/nokogiri/xml_xpath_context.h +0 -10
  213. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  214. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  215. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  216. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
  217. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  218. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
data/LICENSE.md CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License
2
2
 
3
- Copyright 2008 -- 2018 by Aaron Patterson, Mike Dalessio, Charles Nutter, Sergio Arbeo, Patrick Mahoney, Yoko Harada, Akinori MUSHA, John Shahid, Lars Kanis
3
+ Copyright 2008 -- 2021 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Sam Ruby, Craig Barnes, Stephen Checkoway, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney.
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
6
 
data/README.md CHANGED
@@ -1,105 +1,159 @@
1
+ <div><img src="https://nokogiri.org/images/nokogiri-serif-black.png" align="right"/></div>
2
+
1
3
  # Nokogiri
2
4
 
3
- ## Description
5
+ Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for [reading](https://nokogiri.org/tutorials/parsing_an_html_xml_document.html), writing, [modifying](https://nokogiri.org/tutorials/modifying_an_html_xml_document.html), and [querying](https://nokogiri.org/tutorials/searching_a_xml_html_document.html) documents. It is fast and standards-compliant by relying on native parsers like libxml2 (C) and xerces (Java).
6
+
7
+ ## Guiding Principles
8
+
9
+ Some guiding principles Nokogiri tries to follow:
4
10
 
5
- Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among
6
- Nokogiri's many features is the ability to search documents via XPath
7
- or CSS3 selectors.
11
+ - be secure-by-default by treating all documents as **untrusted** by default
12
+ - be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers
8
13
 
9
14
 
10
- ## Links
15
+ ## Features Overview
11
16
 
12
- * https://nokogiri.org
13
- * [Installation Help](https://nokogiri.org/tutorials/installing_nokogiri.html)
14
- * [Tutorials](https://nokogiri.org)
15
- * [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet)
16
- * [GitHub](https://github.com/sparklemotion/nokogiri)
17
- * [Mailing List](https://groups.google.com/group/nokogiri-talk)
18
- * [Chat/Gitter](https://gitter.im/sparklemotion/nokogiri)
17
+ - DOM Parser for XML, HTML4, and HTML5
18
+ - SAX Parser for XML and HTML4
19
+ - Push Parser for XML and HTML4
20
+ - Document search via XPath 1.0
21
+ - Document search via CSS3 selectors, with some jquery-like extensions
22
+ - XSD Schema validation
23
+ - XSLT transformation
24
+ - "Builder" DSL for XML and HTML documents
19
25
 
20
26
 
21
27
  ## Status
22
28
 
23
- [![Concourse CI](https://ci.nokogiri.org/api/v1/teams/nokogiri-core/pipelines/nokogiri/jobs/ruby-2.4-system/badge)](https://ci.nokogiri.org/teams/nokogiri-core/pipelines/nokogiri)
24
- [![Appveyor CI](https://ci.appveyor.com/api/projects/status/xj2pqwvlxwuwgr06/branch/master?svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri/branch/master)
25
- [![Code Climate](https://codeclimate.com/github/sparklemotion/nokogiri.svg)](https://codeclimate.com/github/sparklemotion/nokogiri)
26
- [![Test Coverage](https://api.codeclimate.com/v1/badges/59c67b0e8976027a45ad/test_coverage)](https://codeclimate.com/github/sparklemotion/nokogiri/test_coverage)
29
+ [![Github Actions CI](https://github.com/sparklemotion/nokogiri/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/sparklemotion/nokogiri/actions/workflows/ci.yml)
30
+ [![Appveyor CI](https://ci.appveyor.com/api/projects/status/xj2pqwvlxwuwgr06/branch/main?svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri/branch/main)
27
31
 
28
32
  [![Gem Version](https://badge.fury.io/rb/nokogiri.svg)](https://rubygems.org/gems/nokogiri)
29
- [![SemVer compatibility](https://api.dependabot.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver)](https://dependabot.com/compatibility-score.html?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver)
30
- [![Tidelift dependencies](https://tidelift.com/badges/github/sparklemotion/nokogiri)](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme)
33
+ [![SemVer compatibility](https://api.dependabot.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver)](https://dependabot.com/compatibility-score/?dependency-name=nokogiri&package-manager=bundler)
34
+ [![Tidelift dependencies](https://tidelift.com/badges/package/rubygems/nokogiri)](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme)
31
35
 
32
36
 
33
- ## Features
37
+ ## Support, Getting Help, and Reporting Issues
34
38
 
35
- * XML/HTML DOM parser which handles broken HTML
36
- * XML/HTML SAX parser
37
- * XML/HTML Push parser
38
- * XPath 1.0 support for document searching
39
- * CSS3 selector support for document searching
40
- * XML/HTML builder
41
- * XSLT transformer
39
+ All official documentation is posted at https://nokogiri.org (the source for which is at https://github.com/sparklemotion/nokogiri.org/, and we welcome contributions).
42
40
 
43
- Nokogiri parses and searches XML/HTML using native libraries (either C
44
- or Java, depending on your Ruby), which means it's fast and
45
- standards-compliant.
41
+ Consider subscribing to [Tidelift][tidelift] which provides license assurances and timely security notifications for your open source dependencies, including Nokogiri. [Tidelift][tidelift] subscriptions also help the Nokogiri maintainers fund our [automated testing](https://ci.nokogiri.org) which in turn allows us to ship releases, bugfixes, and security updates more often.
46
42
 
43
+ [tidelift]: https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme
47
44
 
48
- ## Installation
45
+ ### Reading
49
46
 
50
- If this doesn't work:
47
+ Your first stops for learning more about Nokogiri should be:
51
48
 
52
- ```
53
- gem install nokogiri
54
- ```
49
+ - [API Documentation](https://nokogiri.org/rdoc/index.html)
50
+ - [Tutorials](https://nokogiri.org/tutorials/toc.html)
51
+ - An excellent community-maintained [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet)
55
52
 
56
- then please start troubleshooting here:
57
53
 
58
- > https://nokogiri.org/tutorials/installing_nokogiri.html
54
+ ### Ask For Help
59
55
 
60
- There are currently 1,237 Stack Overflow questions about Nokogiri
61
- installation. The vast majority of them are out of date and therefore
62
- incorrect. __Please do not use Stack Overflow.__
56
+ There are a few ways to ask exploratory questions:
63
57
 
64
- Instead, [tell us](https://nokogiri.org/tutorials/getting_help.html)
65
- when the above instructions don't work for you. This allows us to both
66
- help you directly and improve the documentation.
58
+ - The Ruby Discord chat server is active at https://discord.gg/UyQnKrT
59
+ - The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
60
+ - Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
67
61
 
62
+ Please do not mail the maintainers at their personal addresses.
68
63
 
69
- ### Binary packages
70
64
 
71
- Binary packages are available for some distributions.
65
+ ### Report A Bug
72
66
 
73
- * Debian: https://packages.debian.org/sid/ruby-nokogiri
74
- * SuSE: https://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/
75
- * Fedora: http://s390.koji.fedoraproject.org/koji/packageinfo?packageID=6756
67
+ The Nokogiri bug tracker is at https://github.com/sparklemotion/nokogiri/issues
76
68
 
69
+ Please use the "Bug Report" or "Installation Difficulties" templates.
77
70
 
78
- ## Support
79
71
 
80
- All official documentation is posted at https://nokogiri.org (the source for which is at https://github.com/sparklemotion/nokogiri.org/, and we welcome contributions).
72
+ ### Security and Vulnerability Reporting
81
73
 
82
- * The Nokogiri mailing list is active: https://groups.google.com/group/nokogiri-talk
83
- * The Nokogiri bug tracker is here: https://github.com/sparklemotion/nokogiri/issues
84
- * Before filing a bug report, please read our submission guidelines: http://nokogiri.org/tutorials/getting_help.html
85
- * The IRC channel is `#nokogiri` on freenode.
86
- * The project's GitHub wiki has an excellent community-maintained [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet) which might be useful.
74
+ Please report vulnerabilities at https://hackerone.com/nokogiri
87
75
 
88
- Consider subscribing to [Tidelift][tidelift] which provides license assurances and timely security notifications for your open source dependencies, including Nokogiri. [Tidelift][tidelift] subscriptions also help the Nokogiri maintainers fund our [automated testing](https://ci.nokogiri.org) which in turn allows us to ship releases, bugfixes, and security updates more often.
76
+ Full information and description of our security policy is in [`SECURITY.md`](SECURITY.md)
89
77
 
90
- [tidelift]: https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme
91
78
 
79
+ ### Semantic Versioning Policy
92
80
 
93
- ## Security and Vulnerability Reporting
81
+ Nokogiri follows [Semantic Versioning](https://semver.org/) (since 2017 or so). [![Dependabot's SemVer compatibility score for Nokogiri](https://api.dependabot.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver)](https://dependabot.com/compatibility-score/?dependency-name=nokogiri&package-manager=bundler)
94
82
 
95
- Please report vulnerabilities at https://hackerone.com/nokogiri
83
+ We bump `Major.Minor.Patch` versions following this guidance:
96
84
 
97
- Full information and description of our security policy is in [`SECURITY.md`](SECURITY.md)
85
+ `Major`: (we've never done this)
86
+
87
+ - Significant backwards-incompatible changes to the public API that would require rewriting existing application code.
88
+ - Some examples of backwards-incompatible changes we might someday consider for a Major release are at [`ROADMAP.md`](ROADMAP.md).
89
+
90
+ `Minor`:
91
+
92
+ - Features and bugfixes.
93
+ - Updating packaged libraries for non-security-related reasons.
94
+ - Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
95
+ - Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
98
96
 
97
+ `Patch`:
99
98
 
100
- ## Synopsis
99
+ - Bugfixes.
100
+ - Security updates.
101
+ - Updating packaged libraries for security-related reasons.
101
102
 
102
- Nokogiri is a large library, but here is example usage for parsing and examining a document:
103
+
104
+ ## Installation
105
+
106
+ Requirements:
107
+
108
+ - Ruby >= 2.5
109
+ - JRuby >= 9.2.0.0
110
+
111
+
112
+ ### Native Gems: Faster, more reliable installation
113
+
114
+ "Native gems" contain pre-compiled libraries for a specific machine architecture. On supported platforms, this removes the need for compiling the C extension and the packaged libraries, or for system dependencies to exist. This results in **much faster installation** and **more reliable installation**, which as you probably know are the biggest headaches for Nokogiri users.
115
+
116
+ ### Supported Platforms
117
+
118
+ As of v1.11.0, Nokogiri ships pre-compiled, "native" gems for the following platforms:
119
+
120
+ - Linux: `x86-linux` and `x86_64-linux` (req: `glibc >= 2.17`), including musl platforms like Alpine
121
+ - Darwin/MacOS: `x86_64-darwin` and `arm64-darwin`
122
+ - Windows: `x86-mingw32` and `x64-mingw32`
123
+ - Java: any platform running JRuby 9.2 or higher
124
+
125
+ To determine whether your system supports one of these gems, look at the output of `bundle platform` or `ruby -e 'puts Gem::Platform.local.to_s'`.
126
+
127
+ If you're on a supported platform, either `gem install` or `bundle install` should install a native gem without any additional action on your part. This installation should only take a few seconds, and your output should look something like:
128
+
129
+ ``` sh
130
+ $ gem install nokogiri
131
+ Fetching nokogiri-1.11.0-x86_64-linux.gem
132
+ Successfully installed nokogiri-1.11.0-x86_64-linux
133
+ 1 gem installed
134
+ ```
135
+
136
+
137
+ ### Other Installation Options
138
+
139
+ Because Nokogiri is a C extension, it requires that you have a C compiler toolchain, Ruby development header files, and some system dependencies installed.
140
+
141
+ The following may work for you if you have an appropriately-configured system:
142
+
143
+ ``` bash
144
+ gem install nokogiri
145
+ ```
146
+
147
+ If you have any issues, please visit [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html) for more complete instructions and troubleshooting.
148
+
149
+
150
+ ## How To Use Nokogiri
151
+
152
+ Nokogiri is a large library, and so it's challenging to briefly summarize it. We've tried to provide long, real-world examples at [Tutorials](https://nokogiri.org/tutorials/toc.html).
153
+
154
+ ### Parsing and Querying
155
+
156
+ Here is example usage for parsing and querying a document:
103
157
 
104
158
  ```ruby
105
159
  #! /usr/bin/env ruby
@@ -108,51 +162,26 @@ require 'nokogiri'
108
162
  require 'open-uri'
109
163
 
110
164
  # Fetch and parse HTML document
111
- doc = Nokogiri::HTML(open('https://nokogiri.org/tutorials/installing_nokogiri.html'))
165
+ doc = Nokogiri::HTML(URI.open('https://nokogiri.org/tutorials/installing_nokogiri.html'))
112
166
 
113
- puts "### Search for nodes by css"
167
+ # Search for nodes by css
114
168
  doc.css('nav ul.menu li a', 'article h2').each do |link|
115
169
  puts link.content
116
170
  end
117
171
 
118
- puts "### Search for nodes by xpath"
172
+ # Search for nodes by xpath
119
173
  doc.xpath('//nav//ul//li/a', '//article//h2').each do |link|
120
174
  puts link.content
121
175
  end
122
176
 
123
- puts "### Or mix and match."
177
+ # Or mix and match
124
178
  doc.search('nav ul.menu li a', '//article//h2').each do |link|
125
179
  puts link.content
126
180
  end
127
181
  ```
128
182
 
129
183
 
130
- ## Requirements
131
-
132
- * Ruby 2.3.0 or higher, including any development packages necessary
133
- to compile native extensions.
134
-
135
- * In Nokogiri 1.6.0 and later libxml2 and libxslt are bundled with the
136
- gem, but if you want to use the system versions:
137
-
138
- * First, check out [the long list](http://www.xmlsoft.org/news.html)
139
- of fixes and changes between releases before deciding to use any
140
- version older than is bundled with Nokogiri.
141
-
142
- * At install time, set the environment variable
143
- `NOKOGIRI_USE_SYSTEM_LIBRARIES` or else use the
144
- `--use-system-libraries` argument. (See
145
- https://nokogiri.org/tutorials/installing_nokogiri.html#install-with-system-libraries
146
- for specifics.)
147
-
148
- * libxml2 >=2.6.21 with iconv support
149
- (libxml2-dev/-devel is also required)
150
-
151
- * libxslt, built with and supported by the given libxml2
152
- (libxslt-dev/-devel is also required)
153
-
154
-
155
- ## Encoding
184
+ ### Encoding
156
185
 
157
186
  Strings are always stored as UTF-8 internally. Methods that return
158
187
  text values will always return UTF-8 encoded strings. Methods that
@@ -178,12 +207,41 @@ explicitly setting the encoding to EUC-JP on the parser:
178
207
  ```
179
208
 
180
209
 
181
- ## Development
210
+ ## Technical Overview
211
+
212
+ ### Guiding Principles
213
+
214
+ As noted above, two guiding principles of the software are:
215
+
216
+ - be secure-by-default by treating all documents as **untrusted** by default
217
+ - be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers
218
+
219
+ Notably, despite all parsers being standards-compliant, there are behavioral inconsistencies between the parsers used in the CRuby and JRuby implementations, and Nokogiri does not and should not attempt to remove these inconsistencies. Instead, we surface these differences in the test suite when they are important/semantic; or we intentionally write tests to depend only on the important/semantic bits (omitting whitespace from regex matchers on results, for example).
182
220
 
183
- ```bash
184
- bundle install
185
- bundle exec rake compile test
186
- ```
221
+
222
+ ### CRuby
223
+
224
+ The Ruby (a.k.a., CRuby, MRI, YARV) implementation is a C extension that depends on libxml2 and libxslt (which in turn depend on zlib and possibly libiconv).
225
+
226
+ These dependencies are met by default by Nokogiri's packaged versions of the libxml2 and libxslt source code, but a configuration option `--use-system-libraries` is provided to allow specification of alternative library locations. See [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html) for full documentation.
227
+
228
+ We provide native gems by pre-compiling libxml2 and libxslt (and potentially zlib and libiconv) and packaging them into the gem file. In this case, no compilation is necessary at installation time, which leads to faster and more reliable installation.
229
+
230
+ See [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for more information on which dependencies are provided in which native and source gems.
231
+
232
+
233
+ ### JRuby
234
+
235
+ The Java (a.k.a. JRuby) implementation is a Java extension that depends primarily on Xerces and NekoHTML for parsing, though additional dependencies are on `isorelax`, `nekodtd`, `jing`, `serializer`, `xalan-j`, and `xml-apis`.
236
+
237
+ These dependencies are provided by pre-compiled jar files packaged in the `java` platform gem.
238
+
239
+ See [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for more information on which dependencies are provided in which native and source gems.
240
+
241
+
242
+ ## Contributing
243
+
244
+ See [`CONTRIBUTING.md`](CONTRIBUTING.md) for an intro guide to developing Nokogiri.
187
245
 
188
246
 
189
247
  ## Code of Conduct
@@ -196,3 +254,25 @@ We've adopted the Contributor Covenant code of conduct, which you can read in fu
196
254
  This project is licensed under the terms of the MIT license.
197
255
 
198
256
  See this license at [`LICENSE.md`](LICENSE.md).
257
+
258
+
259
+ ### Dependencies
260
+
261
+ Some additional libraries may be distributed with your version of Nokogiri. Please see [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for a discussion of the variations as well as the licenses thereof.
262
+
263
+
264
+ ## Authors
265
+
266
+ - Mike Dalessio
267
+ - Aaron Patterson
268
+ - Yoko Harada
269
+ - Akinori MUSHA
270
+ - John Shahid
271
+ - Karol Bucek
272
+ - Sam Ruby
273
+ - Craig Barnes
274
+ - Stephen Checkoway
275
+ - Lars Kanis
276
+ - Sergio Arbeo
277
+ - Timothy Elliott
278
+ - Nobuyoshi Nakada
data/dependencies.yml CHANGED
@@ -1,11 +1,11 @@
1
1
  libxml2:
2
- version: "2.9.9"
3
- sha256: "94fb70890143e3c6549f265cee93ec064c80a84c42ad0f23e85ee1fd6540a871"
2
+ version: "2.9.12"
3
+ sha256: "c8d6681e38c56f172892c85ddc0852e1fd4b53b4209e7f4ebf17f7e2eae71d92"
4
4
  # manually verified checksum:
5
5
  #
6
- # $ gpg --verify libxml2-2.9.9.tar.gz.asc ports/archives/libxml2-2.9.9.tar.gz
7
- # gpg: Signature made Thu 03 Jan 2019 01:14:47 PM EST
8
- # gpg: using RSA key 15588B26596BEA5D
6
+ # $ gpg --verify libxml2-2.9.12.tar.gz.asc ports/archives/libxml2-2.9.12.tar.gz
7
+ # gpg: Signature made Thu 13 May 2021 02:59:16 PM EDT
8
+ # gpg: using RSA key DB46681BB91ADCEA170FA2D415588B26596BEA5D
9
9
  # gpg: Good signature from "Daniel Veillard (Red Hat work email) <veillard@redhat.com>" [unknown]
10
10
  # gpg: aka "Daniel Veillard <Daniel.Veillard@w3.org>" [unknown]
11
11
  # gpg: WARNING: This key is not certified with a trusted signature!
@@ -16,25 +16,26 @@ libxml2:
16
16
  # using this pgp signature:
17
17
  #
18
18
  # -----BEGIN PGP SIGNATURE-----
19
- #
20
- # iQEbBAABAgAGBQJcLlEXAAoJEBVYiyZZa+pd1B8H93xeCYNBLx+eX0xe3qS3ReS/
21
- # YstjkXKUkmDQYwqQ/9Knmv1P6NX64hQL5E1pZX5sXp36giwXXJ5tCK72VRzektzU
22
- # Kpo+M1/QA9feZQs1GmyKaXYzNwTSJnsdKA9nWqTHZ3bzfdhFSZ0czo94vgY/cz5z
23
- # 9P3FIgeldj1vi8p2rjXbArMFQyaxHnve9LdxI8hbudNSeUw/FEV6mjtXrlZ7MXqn
24
- # hmAkah2JwktOStF5tIlddCRqZeUPUX5flBxT95gfskXXlGEhaoGMXcC3izqqJyV2
25
- # sx5nY7fnXdkwfYsgRUXYWmDmbs8DnFjXH9lux9O4OWglLonaRoAqFPcOzE3aCw==
26
- # =4qWg
19
+ #
20
+ # iQEzBAABCAAdFiEE20ZoG7ka3OoXD6LUFViLJllr6l0FAmCddwQACgkQFViLJllr
21
+ # 6l11LQgAioRTdfmcC+uK/7+6HPtF/3c5zkX6j8VGYuvFBwZ0jayqMRBAl++fcpjE
22
+ # JUU/JKebSZ/KCYjzyeOWK/i3Gq77iqm3UbZFB85rqu4a5P3gmj/4STWVyAx0KU3z
23
+ # G3jKqDhJOt7c0acXb5lh2DngfDa1dn/VGcQcIXsqplNxNr4ET7MnSJjZ3nlxYfW2
24
+ # E5vWBdPCMUeXDBl6MjYvw9XnGGBLUAaEJWoFToG6jKmVf4GAd9nza20jj5dtbcJq
25
+ # QEOaSDKDr+f9h2NS8haOhJ9vOpy52PdeGzaFlbRkXarGXuAr8kITgATVs8FAqcgv
26
+ # MoVhmrO5r2hJf0dCM9fZoYqzpMfmNA==
27
+ # =KfJ9
27
28
  # -----END PGP SIGNATURE-----
28
29
  #
29
30
 
30
31
  libxslt:
31
- version: "1.1.33"
32
- sha256: "8e36605144409df979cab43d835002f63988f3dc94d5d3537c12796db90e38c8"
32
+ version: "1.1.34"
33
+ sha256: "98b1bd46d6792925ad2dfe9a87452ea2adebf69dcb9919ffd55bf926a7f93f7f"
33
34
  # manually verified checksum:
34
35
  #
35
- # $ gpg --verify libxslt-1.1.33.tar.gz.asc ports/archives/libxslt-1.1.33.tar.gz
36
- # gpg: Signature made Thu 03 Jan 2019 01:30:49 PM EST
37
- # gpg: using RSA key 15588B26596BEA5D
36
+ # $ gpg --verify ~/Downloads/libxslt-1.1.34.tar.gz.asc ports/archives/libxslt-1.1.34.tar.gz
37
+ # gpg: Signature made Wed 30 Oct 2019 04:02:48 PM EDT
38
+ # gpg: using RSA key DB46681BB91ADCEA170FA2D415588B26596BEA5D
38
39
  # gpg: Good signature from "Daniel Veillard (Red Hat work email) <veillard@redhat.com>" [unknown]
39
40
  # gpg: aka "Daniel Veillard <Daniel.Veillard@w3.org>" [unknown]
40
41
  # gpg: WARNING: This key is not certified with a trusted signature!
@@ -45,14 +46,15 @@ libxslt:
45
46
  # using this pgp signature:
46
47
  #
47
48
  # -----BEGIN PGP SIGNATURE-----
48
- #
49
- # iQEcBAABAgAGBQJcLlTZAAoJEBVYiyZZa+pd9NkIAIf6ei2iSpR/0QOyS71esDq8
50
- # 407PcUXd/yUjDANm4Uvm7kKK+SbbfBxFIPva4g984Noe1zYMfjK3u3iNs6jykySf
51
- # mN5eo2wNCxsZnqjbnsLgQvn5VCQpPInTddTuGUxgqJyvnR7p785L1oA2EStSPMP4
52
- # BGZ9dZGlbreK35WzgrhUi0VN5egJW2fpMsw7rTPvfwK+90gXL0DEm8v3WlA7fCDL
53
- # QsvuPm7jPOXxdt5bYrVP8wpNMTJIGqV6jxh7Vvl6kiGLldUjCyoCh0AGXLror0Gs
54
- # sAMlRKJNodpcCYkIWxzjLt74sUciKNrPLHZlXJcclZMONen1GWnVDcv83Tt9n6w=
55
- # =iAm8
49
+ #
50
+ # iQEzBAABCAAdFiEE20ZoG7ka3OoXD6LUFViLJllr6l0FAl257GgACgkQFViLJllr
51
+ # 6l2vVggAjJEHmASiS56SxhPOsGqbfBihM66gQFoIymQfMu2430N1GSTkLsfbkJO8
52
+ # 8yBX11NjzK/m9uxwshMW3rVCU7EpL3PUimN3reXdPiQj9hAOAWF1V3BZNevbQC2E
53
+ # FCIraioukaidf8sjUG4/sGpK/gOcP/3hYoN0HUoBigCNJjDqhijxM3M3GJJtCASp
54
+ # jL4CQbs2OmxW8ixOZbuWEESvFFHUgYRsdZjRVN+GRfSOvJjxypurmYwQ3RjO7JxL
55
+ # 2FY8qKQ+xpeID8NV8F5OUEvWBjk1QS133VTqBZNlONdnEtV/og6jNu5k0O/Kvhup
56
+ # caR+8TMErOcLr9OgDklO6DoYyAsf9Q==
57
+ # =g4i4
56
58
  # -----END PGP SIGNATURE-----
57
59
  #
58
60