nokogiri 1.6.8.1 → 1.13.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (354) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +3 -20
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +197 -83
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +16 -22
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +751 -432
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -95
  18. data/ext/nokogiri/nokogiri.h +190 -98
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +291 -217
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +61 -58
  33. data/ext/nokogiri/xml_node.c +1194 -729
  34. data/ext/nokogiri/xml_node_set.c +178 -165
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +226 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +115 -114
  39. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +13 -17
  44. data/ext/nokogiri/xml_xpath_context.c +223 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +410 -372
  94. data/lib/nokogiri/css/parser.y +260 -244
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +7 -6
  99. data/lib/nokogiri/css/xpath_visitor.rb +226 -92
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +104 -106
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -13
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -107
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +231 -93
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +911 -337
  139. data/lib/nokogiri/xml/node_set.rb +141 -84
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +23 -9
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -40
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +138 -89
  155. data/lib/nokogiri/xml/syntax_error.rb +26 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +39 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch +45 -0
  174. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  175. data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  177. metadata +233 -258
  178. data/.autotest +0 -26
  179. data/.cross_rubies +0 -9
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -51
  183. data/CHANGELOG.rdoc +0 -1160
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/LICENSE.txt +0 -31
  187. data/Manifest.txt +0 -364
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -375
  190. data/STANDARD_RESPONSES.md +0 -47
  191. data/Y_U_NO_GEMSPEC.md +0 -155
  192. data/appveyor.yml +0 -22
  193. data/build_all +0 -45
  194. data/ext/nokogiri/html_document.c +0 -170
  195. data/ext/nokogiri/html_document.h +0 -10
  196. data/ext/nokogiri/html_element_description.c +0 -279
  197. data/ext/nokogiri/html_element_description.h +0 -10
  198. data/ext/nokogiri/html_entity_lookup.c +0 -32
  199. data/ext/nokogiri/html_entity_lookup.h +0 -8
  200. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  201. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  202. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  203. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  204. data/ext/nokogiri/xml_attr.h +0 -9
  205. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  206. data/ext/nokogiri/xml_cdata.h +0 -9
  207. data/ext/nokogiri/xml_comment.h +0 -9
  208. data/ext/nokogiri/xml_document.h +0 -23
  209. data/ext/nokogiri/xml_document_fragment.h +0 -10
  210. data/ext/nokogiri/xml_dtd.h +0 -10
  211. data/ext/nokogiri/xml_element_content.h +0 -10
  212. data/ext/nokogiri/xml_element_decl.h +0 -9
  213. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  214. data/ext/nokogiri/xml_entity_decl.h +0 -10
  215. data/ext/nokogiri/xml_entity_reference.h +0 -9
  216. data/ext/nokogiri/xml_io.c +0 -60
  217. data/ext/nokogiri/xml_io.h +0 -11
  218. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  219. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  220. data/ext/nokogiri/xml_namespace.h +0 -13
  221. data/ext/nokogiri/xml_node.h +0 -13
  222. data/ext/nokogiri/xml_node_set.h +0 -13
  223. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  224. data/ext/nokogiri/xml_reader.h +0 -10
  225. data/ext/nokogiri/xml_relax_ng.h +0 -9
  226. data/ext/nokogiri/xml_sax_parser.h +0 -39
  227. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  228. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  229. data/ext/nokogiri/xml_schema.h +0 -9
  230. data/ext/nokogiri/xml_syntax_error.h +0 -13
  231. data/ext/nokogiri/xml_text.h +0 -9
  232. data/ext/nokogiri/xml_xpath_context.h +0 -10
  233. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  234. data/lib/nokogiri/html/document_fragment.rb +0 -39
  235. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  236. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  237. data/patches/sort-patches-by-date +0 -25
  238. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  239. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  240. data/suppressions/README.txt +0 -1
  241. data/suppressions/nokogiri_ree-1.8.7.358.supp +0 -61
  242. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  243. data/suppressions/nokogiri_ruby-1.9.2.320.supp +0 -28
  244. data/suppressions/nokogiri_ruby-1.9.3.327.supp +0 -28
  245. data/tasks/test.rb +0 -100
  246. data/test/css/test_nthiness.rb +0 -226
  247. data/test/css/test_parser.rb +0 -369
  248. data/test/css/test_tokenizer.rb +0 -215
  249. data/test/css/test_xpath_visitor.rb +0 -96
  250. data/test/decorators/test_slop.rb +0 -20
  251. data/test/files/2ch.html +0 -108
  252. data/test/files/GH_1042.html +0 -18
  253. data/test/files/address_book.rlx +0 -12
  254. data/test/files/address_book.xml +0 -10
  255. data/test/files/atom.xml +0 -344
  256. data/test/files/bar/bar.xsd +0 -4
  257. data/test/files/bogus.xml +0 -0
  258. data/test/files/dont_hurt_em_why.xml +0 -422
  259. data/test/files/encoding.html +0 -82
  260. data/test/files/encoding.xhtml +0 -84
  261. data/test/files/exslt.xml +0 -8
  262. data/test/files/exslt.xslt +0 -35
  263. data/test/files/foo/foo.xsd +0 -4
  264. data/test/files/metacharset.html +0 -10
  265. data/test/files/namespace_pressure_test.xml +0 -1684
  266. data/test/files/noencoding.html +0 -47
  267. data/test/files/po.xml +0 -32
  268. data/test/files/po.xsd +0 -66
  269. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  270. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  271. data/test/files/saml/xenc_schema.xsd +0 -146
  272. data/test/files/saml/xmldsig_schema.xsd +0 -318
  273. data/test/files/shift_jis.html +0 -10
  274. data/test/files/shift_jis.xml +0 -5
  275. data/test/files/shift_jis_no_charset.html +0 -9
  276. data/test/files/slow-xpath.xml +0 -25509
  277. data/test/files/snuggles.xml +0 -3
  278. data/test/files/staff.dtd +0 -10
  279. data/test/files/staff.xml +0 -59
  280. data/test/files/staff.xslt +0 -32
  281. data/test/files/test_document_url/bar.xml +0 -2
  282. data/test/files/test_document_url/document.dtd +0 -4
  283. data/test/files/test_document_url/document.xml +0 -6
  284. data/test/files/tlm.html +0 -851
  285. data/test/files/to_be_xincluded.xml +0 -2
  286. data/test/files/valid_bar.xml +0 -2
  287. data/test/files/xinclude.xml +0 -4
  288. data/test/helper.rb +0 -181
  289. data/test/html/sax/test_parser.rb +0 -141
  290. data/test/html/sax/test_parser_context.rb +0 -46
  291. data/test/html/sax/test_push_parser.rb +0 -87
  292. data/test/html/test_builder.rb +0 -164
  293. data/test/html/test_document.rb +0 -701
  294. data/test/html/test_document_encoding.rb +0 -145
  295. data/test/html/test_document_fragment.rb +0 -301
  296. data/test/html/test_element_description.rb +0 -105
  297. data/test/html/test_named_characters.rb +0 -14
  298. data/test/html/test_node.rb +0 -212
  299. data/test/html/test_node_encoding.rb +0 -85
  300. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  301. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  302. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  303. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  304. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  305. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  306. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  307. data/test/test_convert_xpath.rb +0 -135
  308. data/test/test_css_cache.rb +0 -45
  309. data/test/test_encoding_handler.rb +0 -48
  310. data/test/test_memory_leak.rb +0 -156
  311. data/test/test_nokogiri.rb +0 -138
  312. data/test/test_soap4r_sax.rb +0 -52
  313. data/test/test_xslt_transforms.rb +0 -314
  314. data/test/xml/node/test_save_options.rb +0 -28
  315. data/test/xml/node/test_subclass.rb +0 -44
  316. data/test/xml/sax/test_parser.rb +0 -394
  317. data/test/xml/sax/test_parser_context.rb +0 -115
  318. data/test/xml/sax/test_push_parser.rb +0 -157
  319. data/test/xml/test_attr.rb +0 -67
  320. data/test/xml/test_attribute_decl.rb +0 -86
  321. data/test/xml/test_builder.rb +0 -341
  322. data/test/xml/test_c14n.rb +0 -180
  323. data/test/xml/test_cdata.rb +0 -48
  324. data/test/xml/test_comment.rb +0 -40
  325. data/test/xml/test_document.rb +0 -982
  326. data/test/xml/test_document_encoding.rb +0 -31
  327. data/test/xml/test_document_fragment.rb +0 -271
  328. data/test/xml/test_dtd.rb +0 -187
  329. data/test/xml/test_dtd_encoding.rb +0 -31
  330. data/test/xml/test_element_content.rb +0 -56
  331. data/test/xml/test_element_decl.rb +0 -73
  332. data/test/xml/test_entity_decl.rb +0 -122
  333. data/test/xml/test_entity_reference.rb +0 -251
  334. data/test/xml/test_namespace.rb +0 -96
  335. data/test/xml/test_node.rb +0 -1244
  336. data/test/xml/test_node_attributes.rb +0 -115
  337. data/test/xml/test_node_encoding.rb +0 -69
  338. data/test/xml/test_node_inheritance.rb +0 -32
  339. data/test/xml/test_node_reparenting.rb +0 -549
  340. data/test/xml/test_node_set.rb +0 -775
  341. data/test/xml/test_parse_options.rb +0 -64
  342. data/test/xml/test_processing_instruction.rb +0 -30
  343. data/test/xml/test_reader.rb +0 -589
  344. data/test/xml/test_reader_encoding.rb +0 -134
  345. data/test/xml/test_relax_ng.rb +0 -60
  346. data/test/xml/test_schema.rb +0 -142
  347. data/test/xml/test_syntax_error.rb +0 -30
  348. data/test/xml/test_text.rb +0 -60
  349. data/test/xml/test_unparented_node.rb +0 -440
  350. data/test/xml/test_xinclude.rb +0 -83
  351. data/test/xml/test_xpath.rb +0 -445
  352. data/test/xslt/test_custom_functions.rb +0 -133
  353. data/test/xslt/test_exception_handling.rb +0 -37
  354. data/test_all +0 -107
@@ -5,257 +5,273 @@ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
5
  token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
6
6
 
7
7
  rule
8
- selector
9
- : selector COMMA simple_selector_1toN {
10
- result = [val.first, val.last].flatten
11
- }
12
- | prefixless_combinator_selector { result = val.flatten }
13
- | optional_S simple_selector_1toN { result = [val.last].flatten }
14
- ;
15
- combinator
16
- : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
17
- | GREATER { result = :CHILD_SELECTOR }
18
- | TILDE { result = :FOLLOWING_SELECTOR }
19
- | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
- | SLASH { result = :CHILD_SELECTOR }
21
- ;
22
- simple_selector
23
- : element_name hcap_0toN {
24
- result = if val[1].nil?
25
- val.first
26
- else
27
- Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
- end
29
- }
30
- | function
31
- | function pseudo {
32
- result = Node.new(:CONDITIONAL_SELECTOR, val)
33
- }
34
- | function attrib {
35
- result = Node.new(:CONDITIONAL_SELECTOR, val)
36
- }
37
- | hcap_1toN {
38
- result = Node.new(:CONDITIONAL_SELECTOR,
39
- [Node.new(:ELEMENT_NAME, ['*']), val.first]
40
- )
41
- }
42
- ;
43
- prefixless_combinator_selector
44
- : combinator simple_selector_1toN {
45
- result = Node.new(val.first, [nil, val.last])
46
- }
47
- ;
48
- simple_selector_1toN
49
- : simple_selector combinator simple_selector_1toN {
50
- result = Node.new(val[1], [val.first, val.last])
51
- }
52
- | simple_selector S simple_selector_1toN {
53
- result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
54
- }
55
- | simple_selector
56
- ;
57
- class
58
- : '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
59
- ;
60
- element_name
61
- : namespaced_ident
62
- | '*' { result = Node.new(:ELEMENT_NAME, val) }
63
- ;
64
- namespaced_ident
65
- : namespace '|' IDENT {
66
- result = Node.new(:ELEMENT_NAME,
67
- [[val.first, val.last].compact.join(':')]
68
- )
69
- }
70
- | IDENT {
71
- name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
72
- result = Node.new(:ELEMENT_NAME, [name])
73
- }
74
- ;
75
- namespace
76
- : IDENT { result = val[0] }
77
- |
78
- ;
79
- attrib
80
- : LSQUARE attrib_name attrib_val_0or1 RSQUARE {
81
- result = Node.new(:ATTRIBUTE_CONDITION,
82
- [val[1]] + (val[2] || [])
83
- )
84
- }
85
- | LSQUARE function attrib_val_0or1 RSQUARE {
86
- result = Node.new(:ATTRIBUTE_CONDITION,
87
- [val[1]] + (val[2] || [])
88
- )
89
- }
90
- | LSQUARE NUMBER RSQUARE {
91
- # Non standard, but hpricot supports it.
92
- result = Node.new(:PSEUDO_CLASS,
93
- [Node.new(:FUNCTION, ['nth-child(', val[1]])]
94
- )
95
- }
96
- ;
97
- attrib_name
98
- : namespace '|' IDENT {
99
- result = Node.new(:ELEMENT_NAME,
100
- [[val.first, val.last].compact.join(':')]
101
- )
102
- }
103
- | IDENT {
104
- # Default namespace is not applied to attributes.
105
- # So we don't add prefix "xmlns:" as in namespaced_ident.
106
- result = Node.new(:ELEMENT_NAME, [val.first])
107
- }
108
- ;
109
- function
110
- : FUNCTION RPAREN {
111
- result = Node.new(:FUNCTION, [val.first.strip])
112
- }
113
- | FUNCTION expr RPAREN {
114
- result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
115
- }
116
- | FUNCTION nth RPAREN {
117
- result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
118
- }
119
- | NOT expr RPAREN {
120
- result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
121
- }
122
- | HAS selector RPAREN {
123
- result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
124
- }
125
- ;
126
- expr
127
- : NUMBER COMMA expr { result = [val.first, val.last] }
128
- | STRING COMMA expr { result = [val.first, val.last] }
129
- | IDENT COMMA expr { result = [val.first, val.last] }
130
- | NUMBER
131
- | STRING
132
- | IDENT # even, odd
133
- {
134
- case val[0]
135
- when 'even'
136
- result = Node.new(:NTH, ['2','n','+','0'])
137
- when 'odd'
138
- result = Node.new(:NTH, ['2','n','+','1'])
139
- when 'n'
140
- result = Node.new(:NTH, ['1','n','+','0'])
141
- else
142
- # This is not CSS standard. It allows us to support this:
143
- # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
144
- # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
145
- # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
146
- result = val
147
- end
148
- }
149
- ;
150
- nth
151
- : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
152
- {
153
- if val[1] == 'n'
154
- result = Node.new(:NTH, val)
155
- else
156
- raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
157
- end
158
- }
159
- | IDENT PLUS NUMBER { # n+3, -n+3
160
- if val[0] == 'n'
161
- val.unshift("1")
162
- result = Node.new(:NTH, val)
163
- elsif val[0] == '-n'
164
- val[0] = 'n'
165
- val.unshift("-1")
166
- result = Node.new(:NTH, val)
167
- else
168
- raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
169
- end
170
- }
171
- | NUMBER IDENT { # 5n, -5n, 10n-1
172
- n = val[1]
173
- if n[0, 2] == 'n-'
174
- val[1] = 'n'
175
- val << "-"
176
- # b is contained in n as n is the string "n-b"
177
- val << n[2, n.size]
178
- result = Node.new(:NTH, val)
179
- elsif n == 'n'
180
- val << "+"
181
- val << "0"
182
- result = Node.new(:NTH, val)
183
- else
184
- raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
185
- end
186
- }
187
- ;
188
- pseudo
189
- : ':' function {
190
- result = Node.new(:PSEUDO_CLASS, [val[1]])
191
- }
192
- | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
193
- ;
194
- hcap_0toN
195
- : hcap_1toN
196
- |
197
- ;
198
- hcap_1toN
199
- : attribute_id hcap_1toN {
200
- result = Node.new(:COMBINATOR, val)
201
- }
202
- | class hcap_1toN {
203
- result = Node.new(:COMBINATOR, val)
204
- }
205
- | attrib hcap_1toN {
206
- result = Node.new(:COMBINATOR, val)
207
- }
208
- | pseudo hcap_1toN {
209
- result = Node.new(:COMBINATOR, val)
210
- }
211
- | negation hcap_1toN {
212
- result = Node.new(:COMBINATOR, val)
213
- }
214
- | attribute_id
215
- | class
216
- | attrib
217
- | pseudo
218
- | negation
219
- ;
220
- attribute_id
221
- : HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) }
222
- ;
223
- attrib_val_0or1
224
- : eql_incl_dash IDENT { result = [val.first, val[1]] }
225
- | eql_incl_dash STRING { result = [val.first, val[1]] }
226
- |
227
- ;
228
- eql_incl_dash
229
- : EQUAL { result = :equal }
230
- | PREFIXMATCH { result = :prefix_match }
231
- | SUFFIXMATCH { result = :suffix_match }
232
- | SUBSTRINGMATCH { result = :substring_match }
233
- | NOT_EQUAL { result = :not_equal }
234
- | INCLUDES { result = :includes }
235
- | DASHMATCH { result = :dash_match }
236
- ;
237
- negation
238
- : NOT negation_arg RPAREN {
239
- result = Node.new(:NOT, [val[1]])
240
- }
241
- ;
242
- negation_arg
243
- : element_name
244
- | element_name hcap_1toN
245
- | hcap_1toN
246
- ;
247
- optional_S
248
- : S
249
- |
250
- ;
8
+ selector:
9
+ selector COMMA simple_selector_1toN {
10
+ result = [val[0], val[2]].flatten
11
+ }
12
+ | prefixless_combinator_selector { result = val.flatten }
13
+ | optional_S simple_selector_1toN { result = [val[1]].flatten }
14
+ ;
15
+
16
+ combinator:
17
+ PLUS { result = :DIRECT_ADJACENT_SELECTOR }
18
+ | GREATER { result = :CHILD_SELECTOR }
19
+ | TILDE { result = :FOLLOWING_SELECTOR }
20
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
21
+ | SLASH { result = :CHILD_SELECTOR }
22
+ ;
23
+
24
+ xpath_attribute_name:
25
+ '@' IDENT { result = val[1] }
26
+ ;
27
+
28
+ xpath_attribute:
29
+ xpath_attribute_name { result = Node.new(:ATTRIB_NAME, [val[0]]) }
30
+ ;
31
+
32
+ simple_selector:
33
+ element_name hcap_0toN {
34
+ result = if val[1].nil?
35
+ val[0]
36
+ else
37
+ Node.new(:CONDITIONAL_SELECTOR, [val[0], val[1]])
38
+ end
39
+ }
40
+ | function
41
+ | function pseudo { result = Node.new(:CONDITIONAL_SELECTOR, val) }
42
+ | function attrib { result = Node.new(:CONDITIONAL_SELECTOR, val) }
43
+ | hcap_1toN { result = Node.new(:CONDITIONAL_SELECTOR, [Node.new(:ELEMENT_NAME, ['*']), val[0]]) }
44
+ | xpath_attribute
45
+ ;
46
+
47
+ prefixless_combinator_selector:
48
+ combinator simple_selector_1toN { result = Node.new(val[0], [nil, val[1]]) }
49
+ ;
50
+
51
+ simple_selector_1toN:
52
+ simple_selector combinator simple_selector_1toN { result = Node.new(val[1], [val[0], val[2]]) }
53
+ | simple_selector S simple_selector_1toN { result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) }
54
+ | simple_selector
55
+ ;
56
+
57
+ class:
58
+ '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
59
+ ;
60
+
61
+ element_name:
62
+ namespaced_ident
63
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
64
+ ;
65
+
66
+ namespaced_ident:
67
+ namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')]) }
68
+ | IDENT {
69
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0]
70
+ result = Node.new(:ELEMENT_NAME, [name])
71
+ }
72
+ ;
73
+
74
+ namespace:
75
+ IDENT { result = val[0] }
76
+ |
77
+ ;
78
+
79
+ attrib:
80
+ LSQUARE attrib_name attrib_val_0or1 RSQUARE {
81
+ result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || []))
82
+ }
83
+ | LSQUARE function attrib_val_0or1 RSQUARE {
84
+ result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || []))
85
+ }
86
+ | LSQUARE NUMBER RSQUARE {
87
+ result = Node.new(:PSEUDO_CLASS, [Node.new(:FUNCTION, ['nth-child(', val[1]])])
88
+ }
89
+ ;
90
+
91
+ attrib_name:
92
+ namespace '|' IDENT { result = Node.new(:ATTRIB_NAME, [[val[0], val[2]].compact.join(':')]) }
93
+ | IDENT { result = Node.new(:ATTRIB_NAME, [val[0]]) }
94
+ | xpath_attribute
95
+ ;
96
+
97
+ function:
98
+ FUNCTION RPAREN {
99
+ result = Node.new(:FUNCTION, [val[0].strip])
100
+ }
101
+ | FUNCTION expr RPAREN {
102
+ result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
103
+ }
104
+ | FUNCTION nth RPAREN {
105
+ result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
106
+ }
107
+ | NOT expr RPAREN {
108
+ result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
109
+ }
110
+ | HAS selector RPAREN {
111
+ result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
112
+ }
113
+ ;
114
+
115
+ expr:
116
+ NUMBER COMMA expr { result = [val[0], val[2]] }
117
+ | STRING COMMA expr { result = [val[0], val[2]] }
118
+ | IDENT COMMA expr { result = [val[0], val[2]] }
119
+ | xpath_attribute COMMA expr { result = [val[0], val[2]] }
120
+ | NUMBER
121
+ | STRING
122
+ | IDENT {
123
+ case val[0]
124
+ when 'even'
125
+ result = Node.new(:NTH, ['2','n','+','0'])
126
+ when 'odd'
127
+ result = Node.new(:NTH, ['2','n','+','1'])
128
+ when 'n'
129
+ result = Node.new(:NTH, ['1','n','+','0'])
130
+ else
131
+ result = val
132
+ end
133
+ }
134
+ | xpath_attribute
135
+ ;
136
+
137
+ nth:
138
+ NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
139
+ {
140
+ if val[1] == 'n'
141
+ result = Node.new(:NTH, val)
142
+ else
143
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
144
+ end
145
+ }
146
+ | IDENT PLUS NUMBER { # n+3, -n+3
147
+ if val[0] == 'n'
148
+ val.unshift("1")
149
+ result = Node.new(:NTH, val)
150
+ elsif val[0] == '-n'
151
+ val[0] = 'n'
152
+ val.unshift("-1")
153
+ result = Node.new(:NTH, val)
154
+ else
155
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
156
+ end
157
+ }
158
+ | NUMBER IDENT { # 5n, -5n, 10n-1
159
+ n = val[1]
160
+ if n[0, 2] == 'n-'
161
+ val[1] = 'n'
162
+ val << "-"
163
+ # b is contained in n as n is the string "n-b"
164
+ val << n[2, n.size]
165
+ result = Node.new(:NTH, val)
166
+ elsif n == 'n'
167
+ val << "+"
168
+ val << "0"
169
+ result = Node.new(:NTH, val)
170
+ else
171
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
172
+ end
173
+ }
174
+ ;
175
+
176
+ pseudo:
177
+ ':' function {
178
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
179
+ }
180
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
181
+ ;
182
+
183
+ hcap_0toN:
184
+ hcap_1toN
185
+ |
186
+ ;
187
+
188
+ hcap_1toN:
189
+ attribute_id hcap_1toN {
190
+ result = Node.new(:COMBINATOR, val)
191
+ }
192
+ | class hcap_1toN {
193
+ result = Node.new(:COMBINATOR, val)
194
+ }
195
+ | attrib hcap_1toN {
196
+ result = Node.new(:COMBINATOR, val)
197
+ }
198
+ | pseudo hcap_1toN {
199
+ result = Node.new(:COMBINATOR, val)
200
+ }
201
+ | negation hcap_1toN {
202
+ result = Node.new(:COMBINATOR, val)
203
+ }
204
+ | attribute_id
205
+ | class
206
+ | attrib
207
+ | pseudo
208
+ | negation
209
+ ;
210
+
211
+ attribute_id:
212
+ HASH { result = Node.new(:ID, [unescape_css_identifier(val[0])]) }
213
+ ;
214
+
215
+ attrib_val_0or1:
216
+ eql_incl_dash IDENT { result = [val[0], unescape_css_identifier(val[1])] }
217
+ | eql_incl_dash STRING { result = [val[0], unescape_css_string(val[1])] }
218
+ | eql_incl_dash NUMBER { result = [val[0], val[1]] }
219
+ |
220
+ ;
221
+
222
+ eql_incl_dash:
223
+ EQUAL { result = :equal }
224
+ | PREFIXMATCH { result = :prefix_match }
225
+ | SUFFIXMATCH { result = :suffix_match }
226
+ | SUBSTRINGMATCH { result = :substring_match }
227
+ | NOT_EQUAL { result = :not_equal }
228
+ | INCLUDES { result = :includes }
229
+ | DASHMATCH { result = :dash_match }
230
+ ;
231
+
232
+ negation:
233
+ NOT negation_arg RPAREN {
234
+ result = Node.new(:NOT, [val[1]])
235
+ }
236
+ ;
237
+
238
+ negation_arg:
239
+ element_name
240
+ | element_name hcap_1toN
241
+ | hcap_1toN
242
+ ;
243
+
244
+ optional_S:
245
+ S
246
+ |
247
+ ;
248
+
251
249
  end
252
250
 
253
251
  ---- header
254
252
 
255
- require 'nokogiri/css/parser_extras'
253
+ require_relative "parser_extras"
254
+
255
+ module Nokogiri
256
+ module CSS
257
+ # :nodoc: all
258
+ class Parser < Racc::Parser
259
+ end
260
+ end
261
+ end
256
262
 
257
263
  ---- inner
258
264
 
259
265
  def unescape_css_identifier(identifier)
260
266
  identifier.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/){ |m| $1 || [$2.hex].pack('U') }
261
267
  end
268
+
269
+ def unescape_css_string(str)
270
+ str.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/) do |m|
271
+ if $1=="\n"
272
+ ''
273
+ else
274
+ $1 || [$2.hex].pack('U')
275
+ end
276
+ end
277
+ end
@@ -1,64 +1,70 @@
1
- require 'thread'
1
+ # frozen_string_literal: true
2
+
3
+ require "thread"
2
4
 
3
5
  module Nokogiri
4
6
  module CSS
5
- class Parser < Racc::Parser
6
- @cache_on = true
7
- @cache = {}
8
- @mutex = Mutex.new
7
+ class Parser < Racc::Parser # :nodoc:
8
+ CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
9
+
10
+ @cache = {}
11
+ @mutex = Mutex.new
9
12
 
10
13
  class << self
11
- # Turn on CSS parse caching
12
- attr_accessor :cache_on
13
- alias :cache_on? :cache_on
14
- alias :set_cache :cache_on=
14
+ # Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
15
+ def cache_on?
16
+ !Thread.current[CACHE_SWITCH_NAME]
17
+ end
18
+
19
+ # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
20
+ def set_cache(value) # rubocop:disable Naming/AccessorMethodName
21
+ Thread.current[CACHE_SWITCH_NAME] = !value
22
+ end
15
23
 
16
24
  # Get the css selector in +string+ from the cache
17
- def [] string
18
- return unless @cache_on
25
+ def [](string)
26
+ return nil unless cache_on?
27
+
19
28
  @mutex.synchronize { @cache[string] }
20
29
  end
21
30
 
22
31
  # Set the css selector in +string+ in the cache to +value+
23
- def []= string, value
24
- return value unless @cache_on
32
+ def []=(string, value)
33
+ return value unless cache_on?
34
+
25
35
  @mutex.synchronize { @cache[string] = value }
26
36
  end
27
37
 
28
38
  # Clear the cache
29
- def clear_cache
30
- @mutex.synchronize { @cache = {} }
39
+ def clear_cache(create_new_object = false)
40
+ @mutex.synchronize do
41
+ if create_new_object
42
+ @cache = {}
43
+ else
44
+ @cache.clear
45
+ end
46
+ end
31
47
  end
32
48
 
33
49
  # Execute +block+ without cache
34
- def without_cache &block
35
- tmp = @cache_on
36
- @cache_on = false
37
- block.call
38
- @cache_on = tmp
39
- end
40
-
41
- ###
42
- # Parse this CSS selector in +selector+. Returns an AST.
43
- def parse selector
44
- @warned ||= false
45
- unless @warned
46
- $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
47
- @warned = true
48
- end
49
- new.parse selector
50
+ def without_cache(&block)
51
+ original_cache_setting = cache_on?
52
+ set_cache(false)
53
+ yield
54
+ ensure
55
+ set_cache(original_cache_setting)
50
56
  end
51
57
  end
52
58
 
53
59
  # Create a new CSS parser with respect to +namespaces+
54
- def initialize namespaces = {}
55
- @tokenizer = Tokenizer.new
60
+ def initialize(namespaces = {})
61
+ @tokenizer = Tokenizer.new
56
62
  @namespaces = namespaces
57
63
  super()
58
64
  end
59
65
 
60
- def parse string
61
- @tokenizer.scan_setup string
66
+ def parse(string)
67
+ @tokenizer.scan_setup(string)
62
68
  do_parse
63
69
  end
64
70
 
@@ -67,24 +73,23 @@ module Nokogiri
67
73
  end
68
74
 
69
75
  # Get the xpath for +string+ using +options+
70
- def xpath_for string, options={}
71
- key = "#{string}#{options[:ns]}#{options[:prefix]}"
72
- v = self.class[key]
73
- return v if v
74
-
75
- args = [
76
- options[:prefix] || '//',
77
- options[:visitor] || XPathVisitor.new
78
- ]
79
- self.class[key] = parse(string).map { |ast|
80
- ast.to_xpath(*args)
81
- }
76
+ def xpath_for(string, prefix, visitor)
77
+ key = cache_key(string, prefix, visitor)
78
+ self.class[key] ||= parse(string).map do |ast|
79
+ ast.to_xpath(prefix, visitor)
80
+ end
82
81
  end
83
82
 
84
83
  # On CSS parser error, raise an exception
85
- def on_error error_token_id, error_value, value_stack
84
+ def on_error(error_token_id, error_value, value_stack)
86
85
  after = value_stack.compact.last
87
- raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
86
+ raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
87
+ end
88
+
89
+ def cache_key(query, prefix, visitor)
90
+ if self.class.cache_on?
91
+ [query, prefix, @namespaces, visitor.config]
92
+ end
88
93
  end
89
94
  end
90
95
  end