nokogiri 1.11.0.rc2-java → 1.11.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +171 -94
  6. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  7. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  8. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  12. data/ext/java/nokogiri/NokogiriService.java +595 -556
  13. data/ext/java/nokogiri/XmlAttr.java +118 -126
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  15. data/ext/java/nokogiri/XmlCdata.java +35 -58
  16. data/ext/java/nokogiri/XmlComment.java +46 -67
  17. data/ext/java/nokogiri/XmlDocument.java +645 -572
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  19. data/ext/java/nokogiri/XmlDtd.java +448 -414
  20. data/ext/java/nokogiri/XmlElement.java +23 -48
  21. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  22. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  23. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  24. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  25. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1622
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  29. data/ext/java/nokogiri/XmlReader.java +513 -450
  30. data/ext/java/nokogiri/XmlRelaxng.java +89 -101
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  33. data/ext/java/nokogiri/XmlSchema.java +335 -242
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
  35. data/ext/java/nokogiri/XmlText.java +55 -76
  36. data/ext/java/nokogiri/XmlXpathContext.java +242 -210
  37. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  93. data/ext/nokogiri/depend +37 -358
  94. data/ext/nokogiri/extconf.rb +581 -374
  95. data/ext/nokogiri/html_document.c +78 -82
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  99. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +192 -93
  102. data/ext/nokogiri/test_global_handlers.c +40 -0
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +250 -187
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +371 -320
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +191 -157
  120. data/ext/nokogiri/xml_relax_ng.c +52 -28
  121. data/ext/nokogiri/xml_sax_parser.c +118 -118
  122. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  123. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  124. data/ext/nokogiri/xml_schema.c +95 -47
  125. data/ext/nokogiri/xml_syntax_error.c +42 -21
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +206 -123
  128. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  129. data/lib/nokogiri.rb +4 -8
  130. data/lib/nokogiri/css/parser.rb +62 -62
  131. data/lib/nokogiri/css/parser.y +2 -2
  132. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  133. data/lib/nokogiri/extension.rb +26 -0
  134. data/lib/nokogiri/html/document.rb +12 -26
  135. data/lib/nokogiri/html/document_fragment.rb +15 -15
  136. data/lib/nokogiri/nokogiri.jar +0 -0
  137. data/lib/nokogiri/version.rb +2 -148
  138. data/lib/nokogiri/version/constant.rb +5 -0
  139. data/lib/nokogiri/version/info.rb +205 -0
  140. data/lib/nokogiri/xml/builder.rb +2 -2
  141. data/lib/nokogiri/xml/document.rb +91 -35
  142. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  143. data/lib/nokogiri/xml/node.rb +89 -69
  144. data/lib/nokogiri/xml/parse_options.rb +6 -0
  145. data/lib/nokogiri/xml/reader.rb +2 -9
  146. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  147. data/lib/nokogiri/xml/schema.rb +12 -4
  148. data/lib/nokogiri/xml/searchable.rb +3 -1
  149. data/lib/nokogiri/xml/xpath.rb +1 -3
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  151. metadata +87 -164
  152. data/ext/nokogiri/html_document.h +0 -10
  153. data/ext/nokogiri/html_element_description.h +0 -10
  154. data/ext/nokogiri/html_entity_lookup.h +0 -8
  155. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  156. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  157. data/ext/nokogiri/nokogiri.h +0 -134
  158. data/ext/nokogiri/xml_attr.h +0 -9
  159. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  160. data/ext/nokogiri/xml_cdata.h +0 -9
  161. data/ext/nokogiri/xml_comment.h +0 -9
  162. data/ext/nokogiri/xml_document.h +0 -23
  163. data/ext/nokogiri/xml_document_fragment.h +0 -10
  164. data/ext/nokogiri/xml_dtd.h +0 -10
  165. data/ext/nokogiri/xml_element_content.h +0 -10
  166. data/ext/nokogiri/xml_element_decl.h +0 -9
  167. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  168. data/ext/nokogiri/xml_entity_decl.h +0 -10
  169. data/ext/nokogiri/xml_entity_reference.h +0 -9
  170. data/ext/nokogiri/xml_io.c +0 -61
  171. data/ext/nokogiri/xml_io.h +0 -11
  172. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  173. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  174. data/ext/nokogiri/xml_namespace.h +0 -14
  175. data/ext/nokogiri/xml_node.h +0 -13
  176. data/ext/nokogiri/xml_node_set.h +0 -12
  177. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  178. data/ext/nokogiri/xml_reader.h +0 -10
  179. data/ext/nokogiri/xml_relax_ng.h +0 -9
  180. data/ext/nokogiri/xml_sax_parser.h +0 -39
  181. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  182. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_schema.h +0 -9
  184. data/ext/nokogiri/xml_syntax_error.h +0 -13
  185. data/ext/nokogiri/xml_text.h +0 -9
  186. data/ext/nokogiri/xml_xpath_context.h +0 -10
  187. data/ext/nokogiri/xslt_stylesheet.h +0 -14
data/LICENSE.md CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License
2
2
 
3
- Copyright 2008 -- 2018 by Aaron Patterson, Mike Dalessio, Charles Nutter, Sergio Arbeo, Patrick Mahoney, Yoko Harada, Akinori MUSHA, John Shahid, Lars Kanis
3
+ Copyright 2008 -- 2021 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney.
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
6
 
data/README.md CHANGED
@@ -1,105 +1,161 @@
1
+ <div><img src="https://nokogiri.org/images/nokogiri-serif-black.png" align="right"/></div>
2
+
1
3
  # Nokogiri
2
4
 
3
- ## Description
5
+ Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for reading, writing, modifying, and querying documents. It is fast and standards-compliant by relying on native parsers like libxml2 (C) and xerces (Java).
6
+
7
+ ## Guiding Principles
8
+
9
+ Some guiding principles Nokogiri tries to follow:
4
10
 
5
- Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among
6
- Nokogiri's many features is the ability to search documents via XPath
7
- or CSS3 selectors.
11
+ - be secure-by-default by treating all documents as **untrusted** by default
12
+ - be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers
8
13
 
9
14
 
10
- ## Links
15
+ ## Features Overview
11
16
 
12
- * https://nokogiri.org
13
- * [Installation Help](https://nokogiri.org/tutorials/installing_nokogiri.html)
14
- * [Tutorials](https://nokogiri.org/tutorials/toc.html)
15
- * [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet)
16
- * [GitHub](https://github.com/sparklemotion/nokogiri)
17
- * [Mailing List](https://groups.google.com/group/nokogiri-talk)
18
- * [Chat/Gitter](https://gitter.im/sparklemotion/nokogiri)
17
+ - DOM Parser for XML and HTML4
18
+ - SAX Parser for XML and HTML4
19
+ - Push Parser for XML and HTML4
20
+ - Document search via XPath 1.0
21
+ - Document search via CSS3 selectors, with some jquery-like extensions
22
+ - XSD Schema validation
23
+ - XSLT transformation
24
+ - "Builder" DSL for XML and HTML documents
19
25
 
20
26
 
21
27
  ## Status
22
28
 
23
- [![Concourse CI](https://ci.nokogiri.org/api/v1/teams/nokogiri-core/pipelines/nokogiri/jobs/ruby-2.4-system/badge)](https://ci.nokogiri.org/teams/nokogiri-core/pipelines/nokogiri)
24
- [![Appveyor CI](https://ci.appveyor.com/api/projects/status/xj2pqwvlxwuwgr06/branch/master?svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri/branch/master)
29
+ [![Concourse CI](https://ci.nokogiri.org/api/v1/teams/nokogiri-core/pipelines/nokogiri/jobs/cruby-2.7/badge)](https://ci.nokogiri.org/teams/nokogiri-core/pipelines/nokogiri)
30
+ [![Appveyor CI](https://ci.appveyor.com/api/projects/status/xj2pqwvlxwuwgr06/branch/main?svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri/branch/main)
25
31
  [![Code Climate](https://codeclimate.com/github/sparklemotion/nokogiri.svg)](https://codeclimate.com/github/sparklemotion/nokogiri)
26
32
  [![Test Coverage](https://api.codeclimate.com/v1/badges/59c67b0e8976027a45ad/test_coverage)](https://codeclimate.com/github/sparklemotion/nokogiri/test_coverage)
27
33
 
28
34
  [![Gem Version](https://badge.fury.io/rb/nokogiri.svg)](https://rubygems.org/gems/nokogiri)
29
35
  [![SemVer compatibility](https://api.dependabot.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver)](https://dependabot.com/compatibility-score/?dependency-name=nokogiri&package-manager=bundler)
30
- [![Tidelift dependencies](https://tidelift.com/badges/github/sparklemotion/nokogiri)](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme)
36
+ [![Tidelift dependencies](https://tidelift.com/badges/package/rubygems/nokogiri)](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme)
31
37
 
32
38
 
33
- ## Features
39
+ ## Support, Getting Help, and Reporting Issues
34
40
 
35
- * XML/HTML DOM parser which handles broken HTML
36
- * XML/HTML SAX parser
37
- * XML/HTML Push parser
38
- * XPath 1.0 support for document searching
39
- * CSS3 selector support for document searching
40
- * XML/HTML builder
41
- * XSLT transformer
41
+ All official documentation is posted at https://nokogiri.org (the source for which is at https://github.com/sparklemotion/nokogiri.org/, and we welcome contributions).
42
42
 
43
- Nokogiri parses and searches XML/HTML using native libraries (either C
44
- or Java, depending on your Ruby), which means it's fast and
45
- standards-compliant.
43
+ Consider subscribing to [Tidelift][tidelift] which provides license assurances and timely security notifications for your open source dependencies, including Nokogiri. [Tidelift][tidelift] subscriptions also help the Nokogiri maintainers fund our [automated testing](https://ci.nokogiri.org) which in turn allows us to ship releases, bugfixes, and security updates more often.
46
44
 
45
+ [tidelift]: https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme
47
46
 
48
- ## Installation
47
+ ### Reading
49
48
 
50
- If this doesn't work:
49
+ Your first stops for learning more about Nokogiri should be:
51
50
 
52
- ```
53
- gem install nokogiri
54
- ```
51
+ - [API Documentation](https://nokogiri.org/rdoc/index.html)
52
+ - [Tutorials](https://nokogiri.org/tutorials/toc.html)
53
+ - An excellent community-maintained [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet)
55
54
 
56
- then please start troubleshooting here:
57
55
 
58
- > https://nokogiri.org/tutorials/installing_nokogiri.html
56
+ ### Ask For Help
59
57
 
60
- There are currently 1,237 Stack Overflow questions about Nokogiri
61
- installation. The vast majority of them are out of date and therefore
62
- incorrect. __Please do not use Stack Overflow.__
58
+ There are a few ways to ask exploratory questions:
63
59
 
64
- Instead, [tell us](https://nokogiri.org/tutorials/getting_help.html)
65
- when the above instructions don't work for you. This allows us to both
66
- help you directly and improve the documentation.
60
+ - The Ruby Discord chat server is active at https://discord.gg/UyQnKrT
61
+ - The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
62
+ - Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
67
63
 
64
+ Please do not mail the maintainers at their personal addresses.
68
65
 
69
- ### Binary packages
70
66
 
71
- Binary packages are available for some distributions.
67
+ ### Report A Bug
72
68
 
73
- * Debian: https://packages.debian.org/sid/ruby-nokogiri
74
- * SuSE: https://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/
75
- * Fedora: http://s390.koji.fedoraproject.org/koji/packageinfo?packageID=6756
69
+ The Nokogiri bug tracker is at https://github.com/sparklemotion/nokogiri/issues
76
70
 
71
+ Please use the "Bug Report" or "Installation Difficulties" templates.
77
72
 
78
- ## Support
79
73
 
80
- All official documentation is posted at https://nokogiri.org (the source for which is at https://github.com/sparklemotion/nokogiri.org/, and we welcome contributions).
74
+ ### Security and Vulnerability Reporting
81
75
 
82
- * The Nokogiri mailing list is active: https://groups.google.com/group/nokogiri-talk
83
- * The Nokogiri bug tracker is here: https://github.com/sparklemotion/nokogiri/issues
84
- * Before filing a bug report, please read our submission guidelines: http://nokogiri.org/tutorials/getting_help.html
85
- * The IRC channel is `#nokogiri` on freenode.
86
- * The project's GitHub wiki has an excellent community-maintained [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet) which might be useful.
76
+ Please report vulnerabilities at https://hackerone.com/nokogiri
87
77
 
88
- Consider subscribing to [Tidelift][tidelift] which provides license assurances and timely security notifications for your open source dependencies, including Nokogiri. [Tidelift][tidelift] subscriptions also help the Nokogiri maintainers fund our [automated testing](https://ci.nokogiri.org) which in turn allows us to ship releases, bugfixes, and security updates more often.
78
+ Full information and description of our security policy is in [`SECURITY.md`](SECURITY.md)
89
79
 
90
- [tidelift]: https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme
91
80
 
81
+ ### Semantic Versioning Policy
92
82
 
93
- ## Security and Vulnerability Reporting
83
+ Nokogiri follows [Semantic Versioning](https://semver.org/) (since 2017 or so). [![Dependabot's SemVer compatibility score for Nokogiri](https://api.dependabot.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver)](https://dependabot.com/compatibility-score/?dependency-name=nokogiri&package-manager=bundler)
94
84
 
95
- Please report vulnerabilities at https://hackerone.com/nokogiri
85
+ We bump `Major.Minor.Patch` versions following this guidance:
96
86
 
97
- Full information and description of our security policy is in [`SECURITY.md`](SECURITY.md)
87
+ `Major`: (we've never done this)
88
+
89
+ - Significant backwards-incompatible changes to the public API that would require rewriting existing application code.
90
+ - Some examples of backwards-incompatible changes we might someday consider for a Major release are at [`ROADMAP.md`](ROADMAP.md).
91
+
92
+ `Minor`:
93
+
94
+ - Features and bugfixes.
95
+ - Updating packaged libraries for non-security-related reasons.
96
+ - Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
97
+ - Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
98
+
99
+ `Patch`:
98
100
 
101
+ - Bugfixes.
102
+ - Security updates.
103
+ - Updating packaged libraries for security-related reasons.
99
104
 
100
- ## Synopsis
101
105
 
102
- Nokogiri is a large library, but here is example usage for parsing and examining a document:
106
+ ## Installation
107
+
108
+ Requirements:
109
+
110
+ - Ruby >= 2.5
111
+ - JRuby >= 9.2.0.0
112
+
113
+
114
+ ### Native Gems: Faster, more reliable installation
115
+
116
+ "Native gems" contain pre-compiled libraries for a specific machine architecture. On supported platforms, this removes the need for compiling the C extension and the packaged libraries, or for system dependencies to exist. This results in **much faster installation** and **more reliable installation**, which as you probably know are the biggest headaches for Nokogiri users.
117
+
118
+ ### Supported Platforms
119
+
120
+ As of v1.11.0, Nokogiri ships pre-compiled, "native" gems for the following platforms:
121
+
122
+ - Linux: `x86-linux` and `x86_64-linux` (req: `glibc >= 2.17`), including musl platforms like Alpine
123
+ - Darwin/MacOS: `x86_64-darwin` and `arm64-darwin`
124
+ - Windows: `x86-mingw32` and `x64-mingw32`
125
+ - Java: any platform running JRuby 9.2 or higher
126
+
127
+ To determine whether your system supports one of these gems, look at the output of `bundle platform` or `ruby -e 'puts Gem::Platform.local.to_s'`.
128
+
129
+ If you're on a supported platform, either `gem install` or `bundle install` should install a native gem without any additional action on your part. This installation should only take a few seconds, and your output should look something like:
130
+
131
+ ``` sh
132
+ $ gem install nokogiri
133
+ Fetching nokogiri-1.11.0-x86_64-linux.gem
134
+ Successfully installed nokogiri-1.11.0-x86_64-linux
135
+ 1 gem installed
136
+ ```
137
+
138
+
139
+ ### Other Installation Options
140
+
141
+ Because Nokogiri is a C extension, it requires that you have a C compiler toolchain, Ruby development header files, and some system dependencies installed.
142
+
143
+ The following may work for you if you have an appropriately-configured system:
144
+
145
+ ``` bash
146
+ gem install nokogiri
147
+ ```
148
+
149
+ If you have any issues, please visit [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html) for more complete instructions and troubleshooting.
150
+
151
+
152
+ ## How To Use Nokogiri
153
+
154
+ Nokogiri is a large library, and so it's challenging to briefly summarize it. We've tried to provide long, real-world examples at [Tutorials](https://nokogiri.org/tutorials/toc.html).
155
+
156
+ ### Parsing and Querying
157
+
158
+ Here is example usage for parsing and querying a document:
103
159
 
104
160
  ```ruby
105
161
  #! /usr/bin/env ruby
@@ -108,47 +164,26 @@ require 'nokogiri'
108
164
  require 'open-uri'
109
165
 
110
166
  # Fetch and parse HTML document
111
- doc = Nokogiri::HTML(open('https://nokogiri.org/tutorials/installing_nokogiri.html'))
167
+ doc = Nokogiri::HTML(URI.open('https://nokogiri.org/tutorials/installing_nokogiri.html'))
112
168
 
113
- puts "### Search for nodes by css"
169
+ # Search for nodes by css
114
170
  doc.css('nav ul.menu li a', 'article h2').each do |link|
115
171
  puts link.content
116
172
  end
117
173
 
118
- puts "### Search for nodes by xpath"
174
+ # Search for nodes by xpath
119
175
  doc.xpath('//nav//ul//li/a', '//article//h2').each do |link|
120
176
  puts link.content
121
177
  end
122
178
 
123
- puts "### Or mix and match."
179
+ # Or mix and match
124
180
  doc.search('nav ul.menu li a', '//article//h2').each do |link|
125
181
  puts link.content
126
182
  end
127
183
  ```
128
184
 
129
185
 
130
- ## Requirements
131
-
132
- Ruby 2.4.0 or higher, including any development packages necessary to compile native extensions.
133
-
134
- In Nokogiri 1.6.0 and later libxml2 and libxslt are bundled with the gem, but if you want to use the system versions:
135
-
136
- * First, check out [the long list](http://www.xmlsoft.org/news.html)
137
- of fixes and changes between releases before deciding to use any
138
- version older than is bundled with Nokogiri.
139
-
140
- * At install time, set the environment variable
141
- `NOKOGIRI_USE_SYSTEM_LIBRARIES` or else use the
142
- `--use-system-libraries` argument. (See
143
- https://nokogiri.org/tutorials/installing_nokogiri.html#install-with-system-libraries
144
- for specifics.)
145
-
146
- * libxml2 >=2.6.21 with iconv support (libxml2-dev/-devel is also required)
147
-
148
- * libxslt, built with and supported by the given libxml2 (libxslt-dev/-devel is also required)
149
-
150
-
151
- ## Encoding
186
+ ### Encoding
152
187
 
153
188
  Strings are always stored as UTF-8 internally. Methods that return
154
189
  text values will always return UTF-8 encoded strings. Methods that
@@ -174,27 +209,69 @@ explicitly setting the encoding to EUC-JP on the parser:
174
209
  ```
175
210
 
176
211
 
177
- ## Development
212
+ ## Technical Overview
178
213
 
179
- ```bash
180
- bundle install
181
- bundle exec rake compile test
182
- ```
214
+ ### Guiding Principles
183
215
 
216
+ As noted above, two guiding principles of the software are:
184
217
 
185
- ## Code of Conduct
218
+ - be secure-by-default by treating all documents as **untrusted** by default
219
+ - be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers
186
220
 
187
- We've adopted the Contributor Covenant code of conduct, which you can read in full in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md).
221
+ Notably, despite all parsers being standards-compliant, there are behavioral inconsistencies between the parsers used in the CRuby and JRuby implementations, and Nokogiri does not and should not attempt to remove these inconsistencies. Instead, we surface these differences in the test suite when they are important/semantic; or we intentionally write tests to depend only on the important/semantic bits (omitting whitespace from regex matchers on results, for example).
188
222
 
189
223
 
190
- ## Semantic Versioning
224
+ ### CRuby
191
225
 
192
- [![SemVer compatibility](https://api.dependabot.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver)](https://dependabot.com/compatibility-score/?dependency-name=nokogiri&package-manager=bundler)
226
+ The Ruby (a.k.a., CRuby, MRI, YARV) implementation is a C extension that depends on libxml2 and libxslt (which in turn depend on zlib and possibly libiconv).
227
+
228
+ These dependencies are met by default by Nokogiri's packaged versions of the libxml2 and libxslt source code, but a configuration option `--use-system-libraries` is provided to allow specification of alternative library locations. See [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html) for full documentation.
229
+
230
+ We provide native gems by pre-compiling libxml2 and libxslt (and potentially zlib and libiconv) and packaging them into the gem file. In this case, no compilation is necessary at installation time, which leads to faster and more reliable installation.
231
+
232
+ See [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for more information on which dependencies are provided in which native and source gems.
233
+
234
+
235
+ ### JRuby
236
+
237
+ The Java (a.k.a. JRuby) implementation is a Java extension that depends primarily on Xerces and NekoHTML for parsing, though additional dependencies are on `isorelax`, `nekodtd`, `jing`, `serializer`, `xalan-j`, and `xml-apis`.
238
+
239
+ These dependencies are provided by pre-compiled jar files packaged in the `java` platform gem.
240
+
241
+ See [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for more information on which dependencies are provided in which native and source gems.
242
+
243
+
244
+ ## Contributing
245
+
246
+ See [`CONTRIBUTING.md`](CONTRIBUTING.md) for an intro guide to developing Nokogiri.
247
+
248
+
249
+ ## Code of Conduct
250
+
251
+ We've adopted the Contributor Covenant code of conduct, which you can read in full in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md).
193
252
 
194
- Nokogiri follows [Semantic Versioning](https://semver.org/). See [`CHANGELOG.md`](CHANGELOG.md) for more details.
195
253
 
196
254
  ## License
197
255
 
198
256
  This project is licensed under the terms of the MIT license.
199
257
 
200
258
  See this license at [`LICENSE.md`](LICENSE.md).
259
+
260
+
261
+ ### Dependencies
262
+
263
+ Some additional libraries may be distributed with your version of Nokogiri. Please see [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for a discussion of the variations as well as the licenses thereof.
264
+
265
+
266
+ ## Authors
267
+
268
+ - Mike Dalessio
269
+ - Aaron Patterson
270
+ - Yoko Harada
271
+ - Akinori MUSHA
272
+ - John Shahid
273
+ - Karol Bucek
274
+ - Lars Kanis
275
+ - Sergio Arbeo
276
+ - Timothy Elliott
277
+ - Nobuyoshi Nakada
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri;
34
2
 
35
3
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
@@ -52,73 +20,92 @@ import org.jruby.runtime.builtin.IRubyObject;
52
20
  *
53
21
  * @author Patrick Mahoney <pat@polycrstal.org>
54
22
  */
55
- @JRubyClass(name="Nokogiri::EncodingHandler")
56
- public class EncodingHandler extends RubyObject {
57
- protected static HashMap<String,String> map = new HashMap<String,String>();
58
- static {
59
- addInitial();
60
- }
23
+ @JRubyClass(name = "Nokogiri::EncodingHandler")
24
+ public class EncodingHandler extends RubyObject
25
+ {
26
+ protected static HashMap<String, String> map = new HashMap<String, String>();
27
+ static
28
+ {
29
+ addInitial();
30
+ }
61
31
 
62
- protected String name;
32
+ protected String name;
63
33
 
64
- protected static void addInitial() {
65
- map.put("UTF-8", "UTF-8");
66
- }
67
-
68
- public EncodingHandler(Ruby ruby, RubyClass klass, String value) {
69
- super(ruby, klass);
70
- name = value;
71
- }
34
+ protected static void
35
+ addInitial()
36
+ {
37
+ map.put("UTF-8", "UTF-8");
38
+ }
72
39
 
73
- @JRubyMethod(name="[]", meta=true)
74
- public static IRubyObject get(ThreadContext context,
75
- IRubyObject _klass,
76
- IRubyObject keyObj) {
77
- Ruby ruby = context.getRuntime();
78
- String key = keyObj.toString();
79
- String value = map.get(key);
80
- if (value == null)
81
- return ruby.getNil();
40
+ public
41
+ EncodingHandler(Ruby ruby, RubyClass klass, String value)
42
+ {
43
+ super(ruby, klass);
44
+ name = value;
45
+ }
82
46
 
83
- return new EncodingHandler(
84
- ruby,
85
- getNokogiriClass(ruby, "Nokogiri::EncodingHandler"),
86
- value);
47
+ @JRubyMethod(name = "[]", meta = true)
48
+ public static IRubyObject
49
+ get(ThreadContext context,
50
+ IRubyObject _klass,
51
+ IRubyObject keyObj)
52
+ {
53
+ Ruby ruby = context.getRuntime();
54
+ String key = keyObj.toString();
55
+ String value = map.get(key);
56
+ if (value == null) {
57
+ return ruby.getNil();
87
58
  }
88
59
 
89
- @JRubyMethod(meta=true)
90
- public static IRubyObject delete(ThreadContext context,
91
- IRubyObject _klass,
92
- IRubyObject keyObj) {
93
- String key = keyObj.toString();
94
- String value = map.remove(key);
95
- if (value == null)
96
- return context.getRuntime().getNil();
97
- return context.getRuntime().newString(value);
98
- }
60
+ return new EncodingHandler(
61
+ ruby,
62
+ getNokogiriClass(ruby, "Nokogiri::EncodingHandler"),
63
+ value);
64
+ }
99
65
 
100
- @JRubyMethod(name="clear_aliases!", meta=true)
101
- public static IRubyObject clear_aliases(ThreadContext context,
102
- IRubyObject _klass) {
103
- map.clear();
104
- addInitial();
105
- return context.getRuntime().getNil();
66
+ @JRubyMethod(meta = true)
67
+ public static IRubyObject
68
+ delete (ThreadContext context,
69
+ IRubyObject _klass,
70
+ IRubyObject keyObj)
71
+ {
72
+ String key = keyObj.toString();
73
+ String value = map.remove(key);
74
+ if (value == null) {
75
+ return context.getRuntime().getNil();
106
76
  }
77
+ return context.getRuntime().newString(value);
78
+ }
107
79
 
108
- @JRubyMethod(meta=true)
109
- public static IRubyObject alias(ThreadContext context,
110
- IRubyObject _klass,
111
- IRubyObject orig,
112
- IRubyObject alias) {
113
- String value = map.get(orig.toString());
114
- if (value != null)
115
- map.put(alias.toString(), value);
80
+ @JRubyMethod(name = "clear_aliases!", meta = true)
81
+ public static IRubyObject
82
+ clear_aliases(ThreadContext context,
83
+ IRubyObject _klass)
84
+ {
85
+ map.clear();
86
+ addInitial();
87
+ return context.getRuntime().getNil();
88
+ }
116
89
 
117
- return context.getRuntime().getNil();
90
+ @JRubyMethod(meta = true)
91
+ public static IRubyObject
92
+ alias(ThreadContext context,
93
+ IRubyObject _klass,
94
+ IRubyObject orig,
95
+ IRubyObject alias)
96
+ {
97
+ String value = map.get(orig.toString());
98
+ if (value != null) {
99
+ map.put(alias.toString(), value);
118
100
  }
119
101
 
120
- @JRubyMethod
121
- public IRubyObject name(ThreadContext context) {
122
- return context.getRuntime().newString(name);
123
- }
102
+ return context.getRuntime().getNil();
103
+ }
104
+
105
+ @JRubyMethod
106
+ public IRubyObject
107
+ name(ThreadContext context)
108
+ {
109
+ return context.getRuntime().newString(name);
110
+ }
124
111
  }