nokogiri 1.6.0 → 1.13.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (340) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -19
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +280 -0
  6. data/bin/nokogiri +84 -31
  7. data/dependencies.yml +23 -4
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +952 -132
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +231 -96
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +327 -223
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +45 -20
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +1290 -680
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +227 -189
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +227 -140
  45. data/ext/nokogiri/xslt_stylesheet.c +269 -177
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -58
  93. data/lib/nokogiri/css/parser.rb +407 -357
  94. data/lib/nokogiri/css/parser.y +265 -246
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +8 -7
  99. data/lib/nokogiri/css/xpath_visitor.rb +266 -80
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +17 -8
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/html4/document.rb +331 -0
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -105
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +96 -54
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +234 -95
  130. data/lib/nokogiri/xml/document_fragment.rb +86 -36
  131. data/lib/nokogiri/xml/dtd.rb +16 -4
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +947 -502
  139. data/lib/nokogiri/xml/node_set.rb +168 -159
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +40 -5
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -41
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +270 -0
  155. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -36
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +69 -69
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  174. data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
  175. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  176. metadata +278 -362
  177. data/.autotest +0 -26
  178. data/.gemtest +0 -0
  179. data/.travis.yml +0 -27
  180. data/CHANGELOG.ja.rdoc +0 -819
  181. data/CHANGELOG.rdoc +0 -819
  182. data/C_CODING_STYLE.rdoc +0 -33
  183. data/Manifest.txt +0 -315
  184. data/README.ja.rdoc +0 -106
  185. data/README.rdoc +0 -175
  186. data/ROADMAP.md +0 -90
  187. data/Rakefile +0 -246
  188. data/STANDARD_RESPONSES.md +0 -47
  189. data/Y_U_NO_GEMSPEC.md +0 -155
  190. data/build_all +0 -105
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -56
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -13
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -14
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document.rb +0 -254
  232. data/lib/nokogiri/html/document_fragment.rb +0 -41
  233. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  234. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  235. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  236. data/ports/archives/libxml2-2.8.0.tar.gz +0 -0
  237. data/ports/archives/libxslt-1.1.26.tar.gz +0 -0
  238. data/tasks/cross_compile.rb +0 -132
  239. data/tasks/nokogiri.org.rb +0 -24
  240. data/tasks/test.rb +0 -95
  241. data/test/css/test_nthiness.rb +0 -159
  242. data/test/css/test_parser.rb +0 -341
  243. data/test/css/test_tokenizer.rb +0 -198
  244. data/test/css/test_xpath_visitor.rb +0 -91
  245. data/test/decorators/test_slop.rb +0 -16
  246. data/test/files/2ch.html +0 -108
  247. data/test/files/address_book.rlx +0 -12
  248. data/test/files/address_book.xml +0 -10
  249. data/test/files/bar/bar.xsd +0 -4
  250. data/test/files/bogus.xml +0 -0
  251. data/test/files/dont_hurt_em_why.xml +0 -422
  252. data/test/files/encoding.html +0 -82
  253. data/test/files/encoding.xhtml +0 -84
  254. data/test/files/exslt.xml +0 -8
  255. data/test/files/exslt.xslt +0 -35
  256. data/test/files/foo/foo.xsd +0 -4
  257. data/test/files/metacharset.html +0 -10
  258. data/test/files/noencoding.html +0 -47
  259. data/test/files/po.xml +0 -32
  260. data/test/files/po.xsd +0 -66
  261. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  262. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  263. data/test/files/saml/xenc_schema.xsd +0 -146
  264. data/test/files/saml/xmldsig_schema.xsd +0 -318
  265. data/test/files/shift_jis.html +0 -10
  266. data/test/files/shift_jis.xml +0 -5
  267. data/test/files/snuggles.xml +0 -3
  268. data/test/files/staff.dtd +0 -10
  269. data/test/files/staff.xml +0 -59
  270. data/test/files/staff.xslt +0 -32
  271. data/test/files/test_document_url/bar.xml +0 -2
  272. data/test/files/test_document_url/document.dtd +0 -4
  273. data/test/files/test_document_url/document.xml +0 -6
  274. data/test/files/tlm.html +0 -850
  275. data/test/files/to_be_xincluded.xml +0 -2
  276. data/test/files/valid_bar.xml +0 -2
  277. data/test/files/xinclude.xml +0 -4
  278. data/test/helper.rb +0 -154
  279. data/test/html/sax/test_parser.rb +0 -141
  280. data/test/html/sax/test_parser_context.rb +0 -46
  281. data/test/html/test_builder.rb +0 -164
  282. data/test/html/test_document.rb +0 -552
  283. data/test/html/test_document_encoding.rb +0 -138
  284. data/test/html/test_document_fragment.rb +0 -261
  285. data/test/html/test_element_description.rb +0 -105
  286. data/test/html/test_named_characters.rb +0 -14
  287. data/test/html/test_node.rb +0 -196
  288. data/test/html/test_node_encoding.rb +0 -27
  289. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  290. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  291. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  292. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  293. data/test/test_convert_xpath.rb +0 -135
  294. data/test/test_css_cache.rb +0 -45
  295. data/test/test_encoding_handler.rb +0 -46
  296. data/test/test_memory_leak.rb +0 -156
  297. data/test/test_nokogiri.rb +0 -132
  298. data/test/test_reader.rb +0 -555
  299. data/test/test_soap4r_sax.rb +0 -52
  300. data/test/test_xslt_transforms.rb +0 -254
  301. data/test/xml/node/test_save_options.rb +0 -28
  302. data/test/xml/node/test_subclass.rb +0 -44
  303. data/test/xml/sax/test_parser.rb +0 -366
  304. data/test/xml/sax/test_parser_context.rb +0 -106
  305. data/test/xml/sax/test_push_parser.rb +0 -157
  306. data/test/xml/test_attr.rb +0 -64
  307. data/test/xml/test_attribute_decl.rb +0 -86
  308. data/test/xml/test_builder.rb +0 -306
  309. data/test/xml/test_c14n.rb +0 -151
  310. data/test/xml/test_cdata.rb +0 -48
  311. data/test/xml/test_comment.rb +0 -29
  312. data/test/xml/test_document.rb +0 -828
  313. data/test/xml/test_document_encoding.rb +0 -28
  314. data/test/xml/test_document_fragment.rb +0 -223
  315. data/test/xml/test_dtd.rb +0 -103
  316. data/test/xml/test_dtd_encoding.rb +0 -33
  317. data/test/xml/test_element_content.rb +0 -56
  318. data/test/xml/test_element_decl.rb +0 -73
  319. data/test/xml/test_entity_decl.rb +0 -122
  320. data/test/xml/test_entity_reference.rb +0 -245
  321. data/test/xml/test_namespace.rb +0 -95
  322. data/test/xml/test_node.rb +0 -1137
  323. data/test/xml/test_node_attributes.rb +0 -96
  324. data/test/xml/test_node_encoding.rb +0 -107
  325. data/test/xml/test_node_inheritance.rb +0 -32
  326. data/test/xml/test_node_reparenting.rb +0 -374
  327. data/test/xml/test_node_set.rb +0 -755
  328. data/test/xml/test_parse_options.rb +0 -64
  329. data/test/xml/test_processing_instruction.rb +0 -30
  330. data/test/xml/test_reader_encoding.rb +0 -142
  331. data/test/xml/test_relax_ng.rb +0 -60
  332. data/test/xml/test_schema.rb +0 -103
  333. data/test/xml/test_syntax_error.rb +0 -12
  334. data/test/xml/test_text.rb +0 -45
  335. data/test/xml/test_unparented_node.rb +0 -422
  336. data/test/xml/test_xinclude.rb +0 -83
  337. data/test/xml/test_xpath.rb +0 -295
  338. data/test/xslt/test_custom_functions.rb +0 -133
  339. data/test/xslt/test_exception_handling.rb +0 -37
  340. data/test_all +0 -81
data/LICENSE.md ADDED
@@ -0,0 +1,9 @@
1
+ The MIT License
2
+
3
+ Copyright 2008 -- 2021 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Sam Ruby, Craig Barnes, Stephen Checkoway, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,280 @@
1
+ <div><img src="https://nokogiri.org/images/nokogiri-serif-black.png" align="right"/></div>
2
+
3
+ # Nokogiri
4
+
5
+ Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for [reading](https://nokogiri.org/tutorials/parsing_an_html_xml_document.html), writing, [modifying](https://nokogiri.org/tutorials/modifying_an_html_xml_document.html), and [querying](https://nokogiri.org/tutorials/searching_a_xml_html_document.html) documents. It is fast and standards-compliant by relying on native parsers like libxml2 (CRuby) and xerces (JRuby).
6
+
7
+ ## Guiding Principles
8
+
9
+ Some guiding principles Nokogiri tries to follow:
10
+
11
+ - be secure-by-default by treating all documents as **untrusted** by default
12
+ - be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers
13
+
14
+
15
+ ## Features Overview
16
+
17
+ - DOM Parser for XML, HTML4, and HTML5
18
+ - SAX Parser for XML and HTML4
19
+ - Push Parser for XML and HTML4
20
+ - Document search via XPath 1.0
21
+ - Document search via CSS3 selectors, with some jquery-like extensions
22
+ - XSD Schema validation
23
+ - XSLT transformation
24
+ - "Builder" DSL for XML and HTML documents
25
+
26
+
27
+ ## Status
28
+
29
+ [![Github Actions CI](https://github.com/sparklemotion/nokogiri/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/sparklemotion/nokogiri/actions/workflows/ci.yml)
30
+ [![Appveyor CI](https://ci.appveyor.com/api/projects/status/xj2pqwvlxwuwgr06/branch/main?svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri/branch/main)
31
+
32
+ [![Gem Version](https://badge.fury.io/rb/nokogiri.svg)](https://rubygems.org/gems/nokogiri)
33
+ [![SemVer compatibility](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&previous-version=1.11.7&new-version=1.12.5)](https://docs.github.com/en/code-security/supply-chain-security/managing-vulnerabilities-in-your-projects-dependencies/about-dependabot-security-updates#about-compatibility-scores)
34
+
35
+ [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/5344/badge)](https://bestpractices.coreinfrastructure.org/projects/5344)
36
+ [![Tidelift dependencies](https://tidelift.com/badges/package/rubygems/nokogiri)](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme)
37
+
38
+
39
+ ## Support, Getting Help, and Reporting Issues
40
+
41
+ All official documentation is posted at https://nokogiri.org (the source for which is at https://github.com/sparklemotion/nokogiri.org/, and we welcome contributions).
42
+
43
+ Consider subscribing to [Tidelift][tidelift] which provides license assurances and timely security notifications for your open source dependencies, including Nokogiri. [Tidelift][tidelift] subscriptions also help the Nokogiri maintainers fund our [automated testing](https://ci.nokogiri.org) which in turn allows us to ship releases, bugfixes, and security updates more often.
44
+
45
+ [tidelift]: https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme
46
+
47
+ ### Reading
48
+
49
+ Your first stops for learning more about Nokogiri should be:
50
+
51
+ - [API Documentation](https://nokogiri.org/rdoc/index.html)
52
+ - [Tutorials](https://nokogiri.org/tutorials/toc.html)
53
+ - An excellent community-maintained [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet)
54
+
55
+
56
+ ### Ask For Help
57
+
58
+ There are a few ways to ask exploratory questions:
59
+
60
+ - The Ruby Discord chat server is active at https://discord.gg/UyQnKrT
61
+ - The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
62
+ - Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
63
+
64
+ Please do not mail the maintainers at their personal addresses.
65
+
66
+
67
+ ### Report A Bug
68
+
69
+ The Nokogiri bug tracker is at https://github.com/sparklemotion/nokogiri/issues
70
+
71
+ Please use the "Bug Report" or "Installation Difficulties" templates.
72
+
73
+
74
+ ### Security and Vulnerability Reporting
75
+
76
+ Please report vulnerabilities at https://hackerone.com/nokogiri
77
+
78
+ Full information and description of our security policy is in [`SECURITY.md`](SECURITY.md)
79
+
80
+
81
+ ### Semantic Versioning Policy
82
+
83
+ Nokogiri follows [Semantic Versioning](https://semver.org/) (since 2017 or so). [![Dependabot's SemVer compatibility score for Nokogiri](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&previous-version=1.11.7&new-version=1.12.5)](https://docs.github.com/en/code-security/supply-chain-security/managing-vulnerabilities-in-your-projects-dependencies/about-dependabot-security-updates#about-compatibility-scores)
84
+
85
+ We bump `Major.Minor.Patch` versions following this guidance:
86
+
87
+ `Major`: (we've never done this)
88
+
89
+ - Significant backwards-incompatible changes to the public API that would require rewriting existing application code.
90
+ - Some examples of backwards-incompatible changes we might someday consider for a Major release are at [`ROADMAP.md`](ROADMAP.md).
91
+
92
+ `Minor`:
93
+
94
+ - Features and bugfixes.
95
+ - Updating packaged libraries for non-security-related reasons.
96
+ - Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
97
+ - Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
98
+
99
+ `Patch`:
100
+
101
+ - Bugfixes.
102
+ - Security updates.
103
+ - Updating packaged libraries for security-related reasons.
104
+
105
+
106
+ ## Installation
107
+
108
+ Requirements:
109
+
110
+ - Ruby >= 2.6
111
+ - JRuby >= 9.3.0.0
112
+
113
+
114
+ ### Native Gems: Faster, more reliable installation
115
+
116
+ "Native gems" contain pre-compiled libraries for a specific machine architecture. On supported platforms, this removes the need for compiling the C extension and the packaged libraries, or for system dependencies to exist. This results in **much faster installation** and **more reliable installation**, which as you probably know are the biggest headaches for Nokogiri users.
117
+
118
+ ### Supported Platforms
119
+
120
+ Nokogiri ships pre-compiled, "native" gems for the following platforms:
121
+
122
+ - Linux: `x86-linux` and `x86_64-linux` (req: `glibc >= 2.17`), including musl platforms like Alpine
123
+ - Darwin/MacOS: `x86_64-darwin` and `arm64-darwin`
124
+ - Windows: `x86-mingw32` and `x64-mingw32`
125
+ - Java: any platform running JRuby 9.3 or higher
126
+
127
+ To determine whether your system supports one of these gems, look at the output of `bundle platform` or `ruby -e 'puts Gem::Platform.local.to_s'`.
128
+
129
+ If you're on a supported platform, either `gem install` or `bundle install` should install a native gem without any additional action on your part. This installation should only take a few seconds, and your output should look something like:
130
+
131
+ ``` sh
132
+ $ gem install nokogiri
133
+ Fetching nokogiri-1.11.0-x86_64-linux.gem
134
+ Successfully installed nokogiri-1.11.0-x86_64-linux
135
+ 1 gem installed
136
+ ```
137
+
138
+
139
+ ### Other Installation Options
140
+
141
+ Because Nokogiri is a C extension, it requires that you have a C compiler toolchain, Ruby development header files, and some system dependencies installed.
142
+
143
+ The following may work for you if you have an appropriately-configured system:
144
+
145
+ ``` bash
146
+ gem install nokogiri
147
+ ```
148
+
149
+ If you have any issues, please visit [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html) for more complete instructions and troubleshooting.
150
+
151
+
152
+ ## How To Use Nokogiri
153
+
154
+ Nokogiri is a large library, and so it's challenging to briefly summarize it. We've tried to provide long, real-world examples at [Tutorials](https://nokogiri.org/tutorials/toc.html).
155
+
156
+ ### Parsing and Querying
157
+
158
+ Here is example usage for parsing and querying a document:
159
+
160
+ ```ruby
161
+ #! /usr/bin/env ruby
162
+
163
+ require 'nokogiri'
164
+ require 'open-uri'
165
+
166
+ # Fetch and parse HTML document
167
+ doc = Nokogiri::HTML(URI.open('https://nokogiri.org/tutorials/installing_nokogiri.html'))
168
+
169
+ # Search for nodes by css
170
+ doc.css('nav ul.menu li a', 'article h2').each do |link|
171
+ puts link.content
172
+ end
173
+
174
+ # Search for nodes by xpath
175
+ doc.xpath('//nav//ul//li/a', '//article//h2').each do |link|
176
+ puts link.content
177
+ end
178
+
179
+ # Or mix and match
180
+ doc.search('nav ul.menu li a', '//article//h2').each do |link|
181
+ puts link.content
182
+ end
183
+ ```
184
+
185
+
186
+ ### Encoding
187
+
188
+ Strings are always stored as UTF-8 internally. Methods that return
189
+ text values will always return UTF-8 encoded strings. Methods that
190
+ return a string containing markup (like `to_xml`, `to_html` and
191
+ `inner_html`) will return a string encoded like the source document.
192
+
193
+ __WARNING__
194
+
195
+ Some documents declare one encoding, but actually use a different
196
+ one. In these cases, which encoding should the parser choose?
197
+
198
+ Data is just a stream of bytes. Humans add meaning to that stream. Any
199
+ particular set of bytes could be valid characters in multiple
200
+ encodings, so detecting encoding with 100% accuracy is not
201
+ possible. `libxml2` does its best, but it can't be right all the time.
202
+
203
+ If you want Nokogiri to handle the document encoding properly, your
204
+ best bet is to explicitly set the encoding. Here is an example of
205
+ explicitly setting the encoding to EUC-JP on the parser:
206
+
207
+ ```ruby
208
+ doc = Nokogiri.XML('<foo><bar /></foo>', nil, 'EUC-JP')
209
+ ```
210
+
211
+
212
+ ## Technical Overview
213
+
214
+ ### Guiding Principles
215
+
216
+ As noted above, two guiding principles of the software are:
217
+
218
+ - be secure-by-default by treating all documents as **untrusted** by default
219
+ - be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers
220
+
221
+ Notably, despite all parsers being standards-compliant, there are behavioral inconsistencies between the parsers used in the CRuby and JRuby implementations, and Nokogiri does not and should not attempt to remove these inconsistencies. Instead, we surface these differences in the test suite when they are important/semantic; or we intentionally write tests to depend only on the important/semantic bits (omitting whitespace from regex matchers on results, for example).
222
+
223
+
224
+ ### CRuby
225
+
226
+ The Ruby (a.k.a., CRuby, MRI, YARV) implementation is a C extension that depends on libxml2 and libxslt (which in turn depend on zlib and possibly libiconv).
227
+
228
+ These dependencies are met by default by Nokogiri's packaged versions of the libxml2 and libxslt source code, but a configuration option `--use-system-libraries` is provided to allow specification of alternative library locations. See [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html) for full documentation.
229
+
230
+ We provide native gems by pre-compiling libxml2 and libxslt (and potentially zlib and libiconv) and packaging them into the gem file. In this case, no compilation is necessary at installation time, which leads to faster and more reliable installation.
231
+
232
+ See [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for more information on which dependencies are provided in which native and source gems.
233
+
234
+
235
+ ### JRuby
236
+
237
+ The Java (a.k.a. JRuby) implementation is a Java extension that depends primarily on Xerces and NekoHTML for parsing, though additional dependencies are on `isorelax`, `nekodtd`, `jing`, `serializer`, `xalan-j`, and `xml-apis`.
238
+
239
+ These dependencies are provided by pre-compiled jar files packaged in the `java` platform gem.
240
+
241
+ See [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for more information on which dependencies are provided in which native and source gems.
242
+
243
+
244
+ ## Contributing
245
+
246
+ See [`CONTRIBUTING.md`](CONTRIBUTING.md) for an intro guide to developing Nokogiri.
247
+
248
+
249
+ ## Code of Conduct
250
+
251
+ We've adopted the Contributor Covenant code of conduct, which you can read in full in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md).
252
+
253
+
254
+ ## License
255
+
256
+ This project is licensed under the terms of the MIT license.
257
+
258
+ See this license at [`LICENSE.md`](LICENSE.md).
259
+
260
+
261
+ ### Dependencies
262
+
263
+ Some additional libraries may be distributed with your version of Nokogiri. Please see [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for a discussion of the variations as well as the licenses thereof.
264
+
265
+
266
+ ## Authors
267
+
268
+ - Mike Dalessio
269
+ - Aaron Patterson
270
+ - Yoko Harada
271
+ - Akinori MUSHA
272
+ - John Shahid
273
+ - Karol Bucek
274
+ - Sam Ruby
275
+ - Craig Barnes
276
+ - Stephen Checkoway
277
+ - Lars Kanis
278
+ - Sergio Arbeo
279
+ - Timothy Elliott
280
+ - Nobuyoshi Nakada
data/bin/nokogiri CHANGED
@@ -1,30 +1,77 @@
1
1
  #!/usr/bin/env ruby
2
- require 'optparse'
3
- require 'open-uri'
4
- require 'irb'
5
- require 'uri'
6
- require 'rubygems'
7
- require 'nokogiri'
2
+ # frozen_string_literal: true
3
+
4
+ require "optparse"
5
+ require "open-uri"
6
+ require "uri"
7
+ require "rubygems"
8
+ require "nokogiri"
9
+ autoload :IRB, "irb"
8
10
 
9
11
  parse_class = Nokogiri
10
12
  encoding = nil
11
13
 
14
+ # This module provides some tunables with the nokogiri CLI for use in
15
+ # your ~/.nokogirirc.
16
+ module Nokogiri
17
+ module CLI
18
+ class << self
19
+ # Specify the console engine, defaulted to IRB.
20
+ #
21
+ # call-seq:
22
+ # require 'pry'
23
+ # Nokogiri::CLI.console = Pry
24
+ attr_writer :console
25
+
26
+ def console
27
+ case @console
28
+ when Symbol
29
+ Kernel.const_get(@console)
30
+ else
31
+ @console
32
+ end
33
+ end
34
+
35
+ attr_accessor :rcfile
36
+ end
37
+
38
+ self.rcfile = File.expand_path("~/.nokogirirc")
39
+ self.console = :IRB
40
+ end
41
+ end
42
+
43
+ def safe_read(uri_or_path)
44
+ uri = URI.parse(uri_or_path)
45
+ case uri
46
+ when URI::HTTP
47
+ uri.read
48
+ when URI::File
49
+ File.read(uri.path)
50
+ else
51
+ File.read(uri_or_path)
52
+ end
53
+ end
54
+
12
55
  opts = OptionParser.new do |opts|
13
56
  opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser"
14
- opts.define_head "Usage: nokogiri <uri|path> [options]"
15
- opts.separator ""
16
- opts.separator "Examples:"
17
- opts.separator " nokogiri http://www.ruby-lang.org/"
18
- opts.separator " nokogiri ./public/index.html"
19
- opts.separator " curl -s http://nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'"
20
- opts.separator ""
21
- opts.separator "Options:"
22
-
23
- opts.on("--type [TYPE]", [:xml, :html]) do |v|
24
- parse_class = {:xml => Nokogiri::XML, :html => Nokogiri::HTML}[v]
57
+ opts.define_head("Usage: nokogiri <uri|path> [options]")
58
+ opts.separator("")
59
+ opts.separator("Examples:")
60
+ opts.separator(" nokogiri https://www.ruby-lang.org/")
61
+ opts.separator(" nokogiri ./public/index.html")
62
+ opts.separator(" curl -s http://www.nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'")
63
+ opts.separator("")
64
+ opts.separator("Options:")
65
+
66
+ opts.on("--type type", "Parse as type: xml or html (default: auto)", [:xml, :html]) do |v|
67
+ parse_class = { xml: Nokogiri::XML, html: Nokogiri::HTML }[v]
25
68
  end
26
69
 
27
- opts.on("-E", "--encoding encoding", "Read as encoding (default #{encoding})") do |v|
70
+ opts.on("-C file", "Specifies initialization file to load (default #{Nokogiri::CLI.rcfile})") do |v|
71
+ Nokogiri::CLI.rcfile = v
72
+ end
73
+
74
+ opts.on("-E", "--encoding encoding", "Read as encoding (default: #{encoding || "none"})") do |v|
28
75
  encoding = v
29
76
  end
30
77
 
@@ -33,7 +80,7 @@ opts = OptionParser.new do |opts|
33
80
  end
34
81
 
35
82
  opts.on("--rng <uri|path>", "Validate using this rng file.") do |v|
36
- @rng = open(v) {|f| Nokogiri::XML::RelaxNG(f)}
83
+ @rng = Nokogiri::XML::RelaxNG(safe_read(v))
37
84
  end
38
85
 
39
86
  opts.on_tail("-?", "--help", "Show this message") do
@@ -48,17 +95,21 @@ opts = OptionParser.new do |opts|
48
95
  end
49
96
  opts.parse!
50
97
 
51
- uri = ARGV.shift
98
+ url = ARGV.shift
52
99
 
53
- if uri.to_s.strip.empty? && $stdin.tty?
100
+ if url.to_s.strip.empty? && $stdin.tty?
54
101
  puts opts
55
102
  exit 1
56
103
  end
57
104
 
58
- if $stdin.tty?
59
- @doc = parse_class.parse(open(uri).read, nil, encoding)
105
+ if File.file?(Nokogiri::CLI.rcfile)
106
+ load Nokogiri::CLI.rcfile
107
+ end
108
+
109
+ @doc = if url || $stdin.tty?
110
+ parse_class.parse(safe_read(url), url, encoding)
60
111
  else
61
- @doc = parse_class.parse($stdin, nil, encoding)
112
+ parse_class.parse($stdin, nil, encoding)
62
113
  end
63
114
 
64
115
  $_ = @doc
@@ -67,12 +118,14 @@ if @rng
67
118
  @rng.validate(@doc).each do |error|
68
119
  puts error.message
69
120
  end
70
- else
71
- if @script
72
- eval @script, binding, '<main>'
73
- else
74
- puts "Your document is stored in @doc..."
75
- IRB.start
121
+ elsif @script
122
+ begin
123
+ eval(@script, binding, "<main>") # rubocop:disable Security/Eval
124
+ rescue Exception => e # rubocop:disable Lint/RescueException
125
+ warn("ERROR: Exception raised while evaluating '#{@script}'")
126
+ raise e
76
127
  end
128
+ else
129
+ puts "Your document is stored in @doc..."
130
+ Nokogiri::CLI.console.start
77
131
  end
78
-
data/dependencies.yml CHANGED
@@ -1,4 +1,23 @@
1
- libxml2: "2.8.0"
2
- libxslt: "1.1.26"
3
- zlib: "1.2.7"
4
- libiconv: "1.13.1"
1
+ libxml2:
2
+ version: "2.9.13"
3
+ sha256: "276130602d12fe484ecc03447ee5e759d0465558fbc9d6bd144e3745306ebf0e"
4
+ # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.13.sha256sum
5
+
6
+ libxslt:
7
+ version: "1.1.35"
8
+ sha256: "8247f33e9a872c6ac859aa45018bc4c4d00b97e2feac9eebc10c93ce1f34dd79"
9
+ # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.35.sha256sum
10
+
11
+ zlib:
12
+ version: "1.2.11"
13
+ sha256: "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1"
14
+ # SHA-256 hash provided on http://zlib.net/
15
+
16
+ libiconv:
17
+ version: "1.16"
18
+ sha256: "e6a1b1b589654277ee790cce3734f07876ac4ccfaecbee8afa0b649cf529cc04"
19
+ # gpg: Signature made Fri 26 Apr 2019 03:36:38 PM EDT
20
+ # gpg: using RSA key 4F494A942E4616C2
21
+ # gpg: Good signature from "Bruno Haible (Open Source Development) <bruno@clisp.org>" [expired]
22
+ # gpg: Note: This key has expired!
23
+ # Primary key fingerprint: 68D9 4D8A AEEA D48A E7DC 5B90 4F49 4A94 2E46 16C2