nokogiri 1.8.5 → 1.13.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (353) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -21
  3. data/LICENSE-DEPENDENCIES.md +1159 -868
  4. data/LICENSE.md +5 -28
  5. data/README.md +196 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -59
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +765 -420
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +199 -88
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +21 -21
  22. data/ext/nokogiri/xml_cdata.c +14 -19
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +296 -217
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +25 -25
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +99 -54
  33. data/ext/nokogiri/xml_node.c +1107 -658
  34. data/ext/nokogiri/xml_node_set.c +178 -166
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +277 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +114 -35
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +226 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +895 -377
  139. data/lib/nokogiri/xml/node_set.rb +92 -65
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +21 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  171. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  172. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  173. metadata +211 -266
  174. data/.autotest +0 -22
  175. data/.cross_rubies +0 -8
  176. data/.editorconfig +0 -17
  177. data/.gemtest +0 -0
  178. data/.travis.yml +0 -63
  179. data/CHANGELOG.md +0 -1368
  180. data/CONTRIBUTING.md +0 -42
  181. data/C_CODING_STYLE.rdoc +0 -33
  182. data/Gemfile-libxml-ruby +0 -3
  183. data/Manifest.txt +0 -370
  184. data/ROADMAP.md +0 -111
  185. data/Rakefile +0 -348
  186. data/SECURITY.md +0 -19
  187. data/STANDARD_RESPONSES.md +0 -47
  188. data/Y_U_NO_GEMSPEC.md +0 -155
  189. data/appveyor.yml +0 -29
  190. data/build_all +0 -44
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -61
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -15
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -12
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document_fragment.rb +0 -49
  232. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  233. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  234. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  235. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  236. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  237. data/patches/sort-patches-by-date +0 -25
  238. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  239. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  240. data/suppressions/README.txt +0 -1
  241. data/suppressions/nokogiri_ruby-2.supp +0 -10
  242. data/tasks/test.rb +0 -100
  243. data/test/css/test_nthiness.rb +0 -226
  244. data/test/css/test_parser.rb +0 -386
  245. data/test/css/test_tokenizer.rb +0 -215
  246. data/test/css/test_xpath_visitor.rb +0 -96
  247. data/test/decorators/test_slop.rb +0 -23
  248. data/test/files/2ch.html +0 -108
  249. data/test/files/GH_1042.html +0 -18
  250. data/test/files/address_book.rlx +0 -12
  251. data/test/files/address_book.xml +0 -10
  252. data/test/files/atom.xml +0 -344
  253. data/test/files/bar/bar.xsd +0 -4
  254. data/test/files/bogus.xml +0 -0
  255. data/test/files/dont_hurt_em_why.xml +0 -422
  256. data/test/files/encoding.html +0 -82
  257. data/test/files/encoding.xhtml +0 -84
  258. data/test/files/exslt.xml +0 -8
  259. data/test/files/exslt.xslt +0 -35
  260. data/test/files/foo/foo.xsd +0 -4
  261. data/test/files/metacharset.html +0 -10
  262. data/test/files/namespace_pressure_test.xml +0 -1684
  263. data/test/files/noencoding.html +0 -47
  264. data/test/files/po.xml +0 -32
  265. data/test/files/po.xsd +0 -66
  266. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  267. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  268. data/test/files/saml/xenc_schema.xsd +0 -146
  269. data/test/files/saml/xmldsig_schema.xsd +0 -318
  270. data/test/files/shift_jis.html +0 -10
  271. data/test/files/shift_jis.xml +0 -5
  272. data/test/files/shift_jis_no_charset.html +0 -9
  273. data/test/files/slow-xpath.xml +0 -25509
  274. data/test/files/snuggles.xml +0 -3
  275. data/test/files/staff.dtd +0 -10
  276. data/test/files/staff.xml +0 -59
  277. data/test/files/staff.xslt +0 -32
  278. data/test/files/test_document_url/bar.xml +0 -2
  279. data/test/files/test_document_url/document.dtd +0 -4
  280. data/test/files/test_document_url/document.xml +0 -6
  281. data/test/files/tlm.html +0 -851
  282. data/test/files/to_be_xincluded.xml +0 -2
  283. data/test/files/valid_bar.xml +0 -2
  284. data/test/files/xinclude.xml +0 -4
  285. data/test/helper.rb +0 -271
  286. data/test/html/sax/test_parser.rb +0 -168
  287. data/test/html/sax/test_parser_context.rb +0 -46
  288. data/test/html/sax/test_parser_text.rb +0 -163
  289. data/test/html/sax/test_push_parser.rb +0 -87
  290. data/test/html/test_attributes.rb +0 -85
  291. data/test/html/test_builder.rb +0 -164
  292. data/test/html/test_document.rb +0 -712
  293. data/test/html/test_document_encoding.rb +0 -143
  294. data/test/html/test_document_fragment.rb +0 -310
  295. data/test/html/test_element_description.rb +0 -105
  296. data/test/html/test_named_characters.rb +0 -14
  297. data/test/html/test_node.rb +0 -212
  298. data/test/html/test_node_encoding.rb +0 -91
  299. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  300. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  301. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  302. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  303. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  304. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  305. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  306. data/test/test_convert_xpath.rb +0 -135
  307. data/test/test_css_cache.rb +0 -47
  308. data/test/test_encoding_handler.rb +0 -48
  309. data/test/test_memory_leak.rb +0 -156
  310. data/test/test_nokogiri.rb +0 -138
  311. data/test/test_soap4r_sax.rb +0 -52
  312. data/test/test_xslt_transforms.rb +0 -314
  313. data/test/xml/node/test_save_options.rb +0 -28
  314. data/test/xml/node/test_subclass.rb +0 -44
  315. data/test/xml/sax/test_parser.rb +0 -402
  316. data/test/xml/sax/test_parser_context.rb +0 -115
  317. data/test/xml/sax/test_parser_text.rb +0 -202
  318. data/test/xml/sax/test_push_parser.rb +0 -265
  319. data/test/xml/test_attr.rb +0 -74
  320. data/test/xml/test_attribute_decl.rb +0 -86
  321. data/test/xml/test_builder.rb +0 -341
  322. data/test/xml/test_c14n.rb +0 -180
  323. data/test/xml/test_cdata.rb +0 -54
  324. data/test/xml/test_comment.rb +0 -40
  325. data/test/xml/test_document.rb +0 -982
  326. data/test/xml/test_document_encoding.rb +0 -31
  327. data/test/xml/test_document_fragment.rb +0 -298
  328. data/test/xml/test_dtd.rb +0 -187
  329. data/test/xml/test_dtd_encoding.rb +0 -31
  330. data/test/xml/test_element_content.rb +0 -56
  331. data/test/xml/test_element_decl.rb +0 -73
  332. data/test/xml/test_entity_decl.rb +0 -122
  333. data/test/xml/test_entity_reference.rb +0 -262
  334. data/test/xml/test_namespace.rb +0 -96
  335. data/test/xml/test_node.rb +0 -1325
  336. data/test/xml/test_node_attributes.rb +0 -115
  337. data/test/xml/test_node_encoding.rb +0 -75
  338. data/test/xml/test_node_inheritance.rb +0 -32
  339. data/test/xml/test_node_reparenting.rb +0 -592
  340. data/test/xml/test_node_set.rb +0 -809
  341. data/test/xml/test_parse_options.rb +0 -64
  342. data/test/xml/test_processing_instruction.rb +0 -30
  343. data/test/xml/test_reader.rb +0 -620
  344. data/test/xml/test_reader_encoding.rb +0 -134
  345. data/test/xml/test_relax_ng.rb +0 -60
  346. data/test/xml/test_schema.rb +0 -142
  347. data/test/xml/test_syntax_error.rb +0 -36
  348. data/test/xml/test_text.rb +0 -60
  349. data/test/xml/test_unparented_node.rb +0 -483
  350. data/test/xml/test_xinclude.rb +0 -83
  351. data/test/xml/test_xpath.rb +0 -470
  352. data/test/xslt/test_custom_functions.rb +0 -133
  353. data/test/xslt/test_exception_handling.rb +0 -37
data/lib/nokogiri.rb CHANGED
@@ -1,96 +1,67 @@
1
- # -*- coding: utf-8 -*-
2
- # Modify the PATH on windows so that the external DLLs will get loaded.
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
3
 
4
- require 'rbconfig'
4
+ require "rbconfig"
5
5
 
6
6
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
7
- # The line below caused a problem on non-GAE rack environment.
8
- # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
9
- #
10
- # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
11
- # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
12
- # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
13
- # should skip loading xml jars. This is because those are in WEB-INF/lib and
14
- # already set in the classpath.
15
- unless $LOAD_PATH.to_s.include?("appengine-rack")
16
- require 'stringio'
17
- require 'isorelax.jar'
18
- require 'jing.jar'
19
- require 'nekohtml.jar'
20
- require 'nekodtd.jar'
21
- require 'xercesImpl.jar'
22
- require 'serializer.jar'
23
- require 'xalan.jar'
24
- require 'xml-apis.jar'
25
- end
7
+ require_relative "nokogiri/jruby/dependencies"
26
8
  end
27
9
 
28
- begin
29
- RUBY_VERSION =~ /(\d+\.\d+)/
30
- require "nokogiri/#{$1}/nokogiri"
31
- rescue LoadError
32
- require 'nokogiri/nokogiri'
33
- end
34
- require 'nokogiri/version'
35
- require 'nokogiri/syntax_error'
36
- require 'nokogiri/xml'
37
- require 'nokogiri/xslt'
38
- require 'nokogiri/html'
39
- require 'nokogiri/decorators/slop'
40
- require 'nokogiri/css'
41
- require 'nokogiri/html/builder'
10
+ require_relative "nokogiri/extension"
42
11
 
43
12
  # Nokogiri parses and searches XML/HTML very quickly, and also has
44
13
  # correctly implemented CSS3 selector support as well as XPath 1.0
45
14
  # support.
46
15
  #
47
16
  # Parsing a document returns either a Nokogiri::XML::Document, or a
48
- # Nokogiri::HTML::Document depending on the kind of document you parse.
17
+ # Nokogiri::HTML4::Document depending on the kind of document you parse.
49
18
  #
50
19
  # Here is an example:
51
20
  #
52
- # require 'nokogiri'
53
- # require 'open-uri'
21
+ # require 'nokogiri'
22
+ # require 'open-uri'
54
23
  #
55
- # # Get a Nokogiri::HTML:Document for the page we’re interested in...
24
+ # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
56
25
  #
57
- # doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
26
+ # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
58
27
  #
59
- # # Do funky things with it using Nokogiri::XML::Node methods...
28
+ # # Do funky things with it using Nokogiri::XML::Node methods...
60
29
  #
61
- # ####
62
- # # Search for nodes by css
63
- # doc.css('h3.r a.l').each do |link|
64
- # puts link.content
65
- # end
30
+ # ####
31
+ # # Search for nodes by css
32
+ # doc.css('h3.r a.l').each do |link|
33
+ # puts link.content
34
+ # end
66
35
  #
67
- # See Nokogiri::XML::Searchable#css for more information about CSS searching.
68
- # See Nokogiri::XML::Searchable#xpath for more information about XPath searching.
36
+ # See also:
37
+ #
38
+ # - Nokogiri::XML::Searchable#css for more information about CSS searching
39
+ # - Nokogiri::XML::Searchable#xpath for more information about XPath searching
69
40
  module Nokogiri
70
41
  class << self
71
42
  ###
72
43
  # Parse an HTML or XML document. +string+ contains the document.
73
- def parse string, url = nil, encoding = nil, options = nil
44
+ def parse(string, url = nil, encoding = nil, options = nil)
74
45
  if string.respond_to?(:read) ||
75
- /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
46
+ /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i.match?(string[0, 512])
76
47
  # Expect an HTML indicator to appear within the first 512
77
48
  # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
78
49
  # shouldn't be that long)
79
- Nokogiri.HTML(string, url, encoding,
50
+ Nokogiri.HTML4(string, url, encoding,
80
51
  options || XML::ParseOptions::DEFAULT_HTML)
81
52
  else
82
53
  Nokogiri.XML(string, url, encoding,
83
54
  options || XML::ParseOptions::DEFAULT_XML)
84
- end.tap { |doc|
55
+ end.tap do |doc|
85
56
  yield doc if block_given?
86
- }
57
+ end
87
58
  end
88
59
 
89
60
  ###
90
61
  # Create a new Nokogiri::XML::DocumentFragment
91
- def make input = nil, opts = {}, &blk
62
+ def make(input = nil, opts = {}, &blk)
92
63
  if input
93
- Nokogiri::HTML.fragment(input).children.first
64
+ Nokogiri::HTML4.fragment(input).children.first
94
65
  else
95
66
  Nokogiri(&blk)
96
67
  end
@@ -115,14 +86,15 @@ module Nokogiri
115
86
  Nokogiri(*args, &block).slop!
116
87
  end
117
88
 
89
+ # :nodoc:
118
90
  def install_default_aliases
119
91
  # Make sure to support some popular encoding aliases not known by
120
92
  # all iconv implementations.
121
93
  {
122
- 'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
123
- }.each { |alias_name, name|
94
+ "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
95
+ }.each do |alias_name, name|
124
96
  EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
125
- }
97
+ end
126
98
  end
127
99
  end
128
100
 
@@ -130,15 +102,27 @@ module Nokogiri
130
102
  end
131
103
 
132
104
  ###
133
- # Parser a document contained in +args+. Nokogiri will try to guess what
134
- # type of document you are attempting to parse. For more information, see
135
- # Nokogiri.parse
105
+ # Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
106
+ # attempting to parse. For more information, see Nokogiri.parse
136
107
  #
137
- # To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
108
+ # To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
138
109
  def Nokogiri(*args, &block)
139
- if block_given?
140
- Nokogiri::HTML::Builder.new(&block).doc.root
110
+ if block
111
+ Nokogiri::HTML4::Builder.new(&block).doc.root
141
112
  else
142
113
  Nokogiri.parse(*args)
143
114
  end
144
115
  end
116
+
117
+ require_relative "nokogiri/version"
118
+ require_relative "nokogiri/class_resolver"
119
+ require_relative "nokogiri/syntax_error"
120
+ require_relative "nokogiri/xml"
121
+ require_relative "nokogiri/xslt"
122
+ require_relative "nokogiri/html4"
123
+ require_relative "nokogiri/html"
124
+ require_relative "nokogiri/decorators/slop"
125
+ require_relative "nokogiri/css"
126
+ require_relative "nokogiri/html4/builder"
127
+
128
+ require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
@@ -1,7 +1,9 @@
1
- require 'nokogiri'
1
+ # frozen_string_literal: true
2
2
 
3
- module XSD # :nodoc:
4
- module XMLParser # :nodoc:
3
+ require "nokogiri"
4
+
5
+ module XSD
6
+ module XMLParser
5
7
  ###
6
8
  # Nokogiri XML parser for soap4r.
7
9
  #
@@ -26,40 +28,40 @@ module XSD # :nodoc:
26
28
  class Nokogiri < XSD::XMLParser::Parser
27
29
  ###
28
30
  # Create a new XSD parser with +host+ and +opt+
29
- def initialize host, opt = {}
31
+ def initialize(host, opt = {})
30
32
  super
31
- @parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || 'UTF-8')
33
+ @parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || "UTF-8")
32
34
  end
33
35
 
34
36
  ###
35
37
  # Start parsing +string_or_readable+
36
- def do_parse string_or_readable
38
+ def do_parse(string_or_readable)
37
39
  @parser.parse(string_or_readable)
38
40
  end
39
41
 
40
42
  ###
41
43
  # Handle the start_element event with +name+ and +attrs+
42
- def start_element name, attrs = []
44
+ def start_element(name, attrs = [])
43
45
  super(name, Hash[*attrs.flatten])
44
46
  end
45
47
 
46
48
  ###
47
49
  # Handle the end_element event with +name+
48
- def end_element name
50
+ def end_element(name)
49
51
  super
50
52
  end
51
53
 
52
54
  ###
53
55
  # Handle errors with message +msg+
54
- def error msg
55
- raise ParseError.new(msg)
56
+ def error(msg)
57
+ raise ParseError, msg
56
58
  end
57
- alias :warning :error
59
+ alias_method :warning, :error
58
60
 
59
61
  ###
60
62
  # Handle cdata_blocks containing +string+
61
- def cdata_block string
62
- characters string
63
+ def cdata_block(string)
64
+ characters(string)
63
65
  end
64
66
 
65
67
  ###
@@ -69,16 +71,16 @@ module XSD # :nodoc:
69
71
  # +prefix+ is the namespace prefix for the element
70
72
  # +uri+ is the associated namespace URI
71
73
  # +ns+ is a hash of namespace prefix:urls associated with the element
72
- def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
74
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
73
75
  ###
74
76
  # Deal with SAX v1 interface
75
- name = [prefix, name].compact.join(':')
76
- attributes = ns.map { |ns_prefix,ns_uri|
77
- [['xmlns', ns_prefix].compact.join(':'), ns_uri]
78
- } + attrs.map { |attr|
79
- [[attr.prefix, attr.localname].compact.join(':'), attr.value]
80
- }.flatten
81
- start_element name, attributes
77
+ name = [prefix, name].compact.join(":")
78
+ attributes = ns.map do |ns_prefix, ns_uri|
79
+ [["xmlns", ns_prefix].compact.join(":"), ns_uri]
80
+ end + attrs.map do |attr|
81
+ [[attr.prefix, attr.localname].compact.join(":"), attr.value]
82
+ end.flatten
83
+ start_element(name, attributes)
82
84
  end
83
85
 
84
86
  ###
@@ -86,13 +88,13 @@ module XSD # :nodoc:
86
88
  # +name+ is the element's name
87
89
  # +prefix+ is the namespace prefix associated with the element
88
90
  # +uri+ is the associated namespace URI
89
- def end_element_namespace name, prefix = nil, uri = nil
91
+ def end_element_namespace(name, prefix = nil, uri = nil)
90
92
  ###
91
93
  # Deal with SAX v1 interface
92
- end_element [prefix, name].compact.join(':')
94
+ end_element([prefix, name].compact.join(":"))
93
95
  end
94
96
 
95
- %w{ xmldecl start_document end_document comment }.each do |name|
97
+ ["xmldecl", "start_document", "end_document", "comment"].each do |name|
96
98
  class_eval %{ def #{name}(*args); end }
97
99
  end
98
100
 
@@ -0,0 +1,40 @@
1
+ From 27e4aa8d885e47a296ea78d114dbbe8fc7aa3508 Mon Sep 17 00:00:00 2001
2
+ From: Kevin Solorio <soloriok@gmail.com>
3
+ Date: Fri, 1 Feb 2019 14:32:42 -0800
4
+ Subject: [PATCH] Revert-support-html-h-b-7-1
5
+
6
+ ---
7
+ entities.c | 17 -----------------
8
+ 1 file changed, 17 deletions(-)
9
+
10
+ diff --git a/entities.c b/entities.c
11
+ index 43549bc5..82652f6d 100644
12
+ --- a/entities.c
13
+ +++ b/entities.c
14
+ @@ -623,23 +623,6 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
15
+ *out++ = 't';
16
+ *out++ = ';';
17
+ } else if (*cur == '&') {
18
+ - /*
19
+ - * Special handling of &{...} construct from HTML 4, see
20
+ - * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
21
+ - */
22
+ - if (html && attr && (cur[1] == '{') &&
23
+ - (strchr((const char *) cur, '}'))) {
24
+ - while (*cur != '}') {
25
+ - *out++ = *cur++;
26
+ - indx = out - buffer;
27
+ - if (indx + 100 > buffer_size) {
28
+ - growBufferReentrant();
29
+ - out = &buffer[indx];
30
+ - }
31
+ - }
32
+ - *out++ = *cur++;
33
+ - continue;
34
+ - }
35
+ *out++ = '&';
36
+ *out++ = 'a';
37
+ *out++ = 'm';
38
+ --
39
+ 2.16.2
40
+
@@ -0,0 +1,44 @@
1
+ From ffc08467744bd2305d41ca882c37fa30adf3a067 Mon Sep 17 00:00:00 2001
2
+ From: Kevin Solorio <soloriok@gmail.com>
3
+ Date: Wed, 27 Feb 2019 14:34:17 -0800
4
+ Subject: [PATCH 2/2] update entities.c to remove handling of ssi
5
+
6
+ ---
7
+ entities.c | 21 ---------------------
8
+ 1 file changed, 21 deletions(-)
9
+
10
+ diff --git a/entities.c b/entities.c
11
+ index 43549bc5..5c4a2a60 100644
12
+ --- a/entities.c
13
+ +++ b/entities.c
14
+ @@ -592,27 +592,6 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
15
+ * By default one have to encode at least '<', '>', '"' and '&' !
16
+ */
17
+ if (*cur == '<') {
18
+ - const xmlChar *end;
19
+ -
20
+ - /*
21
+ - * Special handling of server side include in HTML attributes
22
+ - */
23
+ - if (html && attr &&
24
+ - (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
25
+ - ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
26
+ - while (cur != end) {
27
+ - *out++ = *cur++;
28
+ - indx = out - buffer;
29
+ - if (indx + 100 > buffer_size) {
30
+ - growBufferReentrant();
31
+ - out = &buffer[indx];
32
+ - }
33
+ - }
34
+ - *out++ = *cur++;
35
+ - *out++ = *cur++;
36
+ - *out++ = *cur++;
37
+ - continue;
38
+ - }
39
+ *out++ = '&';
40
+ *out++ = 'l';
41
+ *out++ = 't';
42
+ --
43
+ 2.16.2
44
+
@@ -0,0 +1,25 @@
1
+ From 0b6ae484761fa01242fe8b67b54e3eb2d282d83d Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Wed, 4 Dec 2019 08:43:51 -0500
4
+ Subject: [PATCH] fix libxml2.la's path
5
+
6
+ ---
7
+ Makefile.in | 2 +-
8
+ 1 file changed, 1 insertion(+), 1 deletion(-)
9
+
10
+ diff --git a/Makefile.in b/Makefile.in
11
+ index cf96d41..1372d8b 100644
12
+ --- a/Makefile.in
13
+ +++ b/Makefile.in
14
+ @@ -1057,7 +1057,7 @@ clean-noinstLTLIBRARIES:
15
+ rm -f $${locs}; \
16
+ }
17
+
18
+ -libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
19
+ +$(top_builddir)/libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
20
+ $(AM_V_CCLD)$(libxml2_la_LINK) -rpath $(libdir) $(libxml2_la_OBJECTS) $(libxml2_la_LIBADD) $(LIBS)
21
+
22
+ testdso.la: $(testdso_la_OBJECTS) $(testdso_la_DEPENDENCIES) $(EXTRA_testdso_la_DEPENDENCIES)
23
+ --
24
+ 2.17.1
25
+
@@ -0,0 +1,81 @@
1
+ This patch is a result of rake-compiler-dock using centos 7 (manylinux2014) to cross-compile.
2
+
3
+ Centos, for reasons I have not been able to discern, implements `isnan` and `isinf` as a function
4
+ and not as a macro. Debian knows how to resolve that function at dynamic-link time (despite using a
5
+ macro at compile time), but musl-based systems (like alpine) do not. Running `nm` on nokogiri.so
6
+ created on such a centos system shows:
7
+
8
+ ```
9
+ U __isinf@@GLIBC_2.2.5
10
+ U __isnan@@GLIBC_2.2.5
11
+ ```
12
+
13
+ (see https://github.com/sparklemotion/nokogiri/pull/2142 for more info)
14
+
15
+ This patch avoids using glibc's `isnan` and `isinf` calls, instead using libxml2's fallback
16
+ implementation. There's history here, see libxml2 commit 8813f39:
17
+
18
+ commit 8813f39
19
+ Author: Nick Wellnhofer <wellnhofer@aevum.de>
20
+ Date: 2017-09-21 00:11:26 +0200
21
+
22
+ Simplify XPath NaN, inf and -0 handling
23
+
24
+ Use C99 macros NAN, INFINITY, isnan, isinf. If they're not available:
25
+
26
+ - Assume that (0.0 / 0.0) generates a NaN and !(x == x) tests for NaN.
27
+ - Use C89's HUGE_VAL for INFINITY.
28
+
29
+ Remove manual handling of NaN, infinity and negative zero in functions
30
+ xmlXPathValueFlipSign and xmlXPathDivValues.
31
+
32
+ Remove xmlXPathGetSign. All the tests for negative zero can be replaced
33
+ with a test for negative or positive zero.
34
+
35
+ Simplify xmlXPathRoundFunction.
36
+
37
+ Remove Trio dependency.
38
+
39
+ This should work on IEEE 754 compliant implementations even if the C99
40
+ macros aren't available, but will likely break some ancient platforms.
41
+ If problems arise, my plan is to port the relevant trionan.c solution
42
+ to xpath.c. Note that non-compliant implementations are impossible
43
+ to fully support, anyway, since XPath requires IEEE 754.
44
+
45
+ This patch would be unnecessary if any of the following was true:
46
+
47
+ * centos implements these as macros, and doesn't generate an unresolved symbol for either in the shared library
48
+ * we had a way to ensure `__isinf` and `__isnan` resolve on musl (e.g., we implement them locally)
49
+
50
+ diff --git a/xpath.c b/xpath.c
51
+ index 9f64ab9..5b6d999 100644
52
+ --- a/xpath.c
53
+ +++ b/xpath.c
54
+ @@ -515,11 +515,7 @@ xmlXPathInit(void) {
55
+ */
56
+ int
57
+ xmlXPathIsNaN(double val) {
58
+ -#ifdef isnan
59
+ - return isnan(val);
60
+ -#else
61
+ return !(val == val);
62
+ -#endif
63
+ }
64
+
65
+ /**
66
+ @@ -530,15 +530,11 @@ xmlXPathIsNaN(double val) {
67
+ */
68
+ int
69
+ xmlXPathIsInf(double val) {
70
+ -#ifdef isinf
71
+ - return isinf(val) ? (val > 0 ? 1 : -1) : 0;
72
+ -#else
73
+ if (val >= xmlXPathPINF)
74
+ return 1;
75
+ if (val <= -xmlXPathPINF)
76
+ return -1;
77
+ return 0;
78
+ -#endif
79
+ }
80
+
81
+ #endif /* SCHEMAS or XPATH */
@@ -0,0 +1,77 @@
1
+ From 74c95ec5932c737d4fcb06b8646b0017364ada14 Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Fri, 24 Dec 2021 19:08:01 -0500
4
+ Subject: [PATCH] attempt to hack in wildcard namespaces to xpath
5
+
6
+ I'm not confident this is a bulletproof patch.
7
+ ---
8
+ xpath.c | 24 ++++++++++++++++++------
9
+ 1 file changed, 18 insertions(+), 6 deletions(-)
10
+
11
+ diff --git a/xpath.c b/xpath.c
12
+ index 1aa2f1a..c7f0885 100644
13
+ --- a/xpath.c
14
+ +++ b/xpath.c
15
+ @@ -146,6 +146,9 @@
16
+ #define XPATH_MAX_RECURSION_DEPTH 5000
17
+ #endif
18
+
19
+ +#define WILDCARD_PREFIX "*"
20
+ +#define IS_WILDCARD_PREFIX(p) xmlStrEqual((xmlChar*)WILDCARD_PREFIX, p)
21
+ +
22
+ /*
23
+ * TODO:
24
+ * There are a few spots where some tests are done which depend upon ascii
25
+ @@ -11073,12 +11076,15 @@ xmlXPathCompNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test,
26
+ SKIP_BLANKS;
27
+
28
+ if ((name == NULL) && (CUR == '*')) {
29
+ - /*
30
+ - * All elements
31
+ - */
32
+ NEXT;
33
+ - *test = NODE_TEST_ALL;
34
+ - return(NULL);
35
+ + if (CUR != ':') {
36
+ + /*
37
+ + * All elements
38
+ + */
39
+ + *test = NODE_TEST_ALL;
40
+ + return(NULL);
41
+ + }
42
+ + name = xmlCharStrdup(WILDCARD_PREFIX);
43
+ }
44
+
45
+ if (name == NULL)
46
+ @@ -11327,6 +11333,10 @@ xmlXPathCompStep(xmlXPathParserContextPtr ctxt) {
47
+ }
48
+ #endif
49
+ if (CUR == '*') {
50
+ + if (NXT(1) == ':') {
51
+ + NEXT;
52
+ + name = xmlCharStrdup(WILDCARD_PREFIX);
53
+ + }
54
+ axis = AXIS_CHILD;
55
+ } else {
56
+ if (name == NULL)
57
+ @@ -12030,7 +12040,7 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
58
+ /*
59
+ * Setup namespaces.
60
+ */
61
+ - if (prefix != NULL) {
62
+ + if (prefix != NULL && !IS_WILDCARD_PREFIX(prefix)) {
63
+ URI = xmlXPathNsLookup(xpctxt, prefix);
64
+ if (URI == NULL) {
65
+ xmlXPathReleaseObject(xpctxt, obj);
66
+ @@ -12369,6 +12379,8 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
67
+ {
68
+ XP_TEST_HIT
69
+ }
70
+ + } else if (IS_WILDCARD_PREFIX(prefix)) {
71
+ + XP_TEST_HIT
72
+ } else {
73
+ if ((cur->ns != NULL) &&
74
+ (xmlStrEqual(URI, cur->ns->href)))
75
+ --
76
+ 2.31.0
77
+