nokogiri 1.5.10 → 1.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (334) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +5 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +280 -0
  6. data/bin/nokogiri +84 -31
  7. data/dependencies.yml +73 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +956 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +327 -223
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +45 -20
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +1290 -680
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +227 -189
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +227 -140
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -58
  93. data/lib/nokogiri/css/parser.rb +327 -288
  94. data/lib/nokogiri/css/parser.y +67 -45
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +7 -6
  99. data/lib/nokogiri/css/xpath_visitor.rb +263 -75
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +17 -8
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/html4/document.rb +331 -0
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -90
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +96 -54
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +234 -95
  130. data/lib/nokogiri/xml/document_fragment.rb +86 -36
  131. data/lib/nokogiri/xml/dtd.rb +16 -4
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +947 -502
  139. data/lib/nokogiri/xml/node_set.rb +168 -159
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +40 -5
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -41
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +259 -0
  155. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -36
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +18 -16
  163. data/lib/nokogiri.rb +69 -69
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  171. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  172. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  173. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  174. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  175. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  176. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  177. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  178. metadata +382 -460
  179. data/.autotest +0 -26
  180. data/.gemtest +0 -0
  181. data/CHANGELOG.ja.rdoc +0 -785
  182. data/CHANGELOG.rdoc +0 -783
  183. data/C_CODING_STYLE.rdoc +0 -33
  184. data/Manifest.txt +0 -303
  185. data/README.ja.rdoc +0 -106
  186. data/README.rdoc +0 -175
  187. data/ROADMAP.md +0 -90
  188. data/Rakefile +0 -228
  189. data/STANDARD_RESPONSES.md +0 -47
  190. data/Y_U_NO_GEMSPEC.md +0 -155
  191. data/build_all +0 -105
  192. data/ext/nokogiri/html_document.c +0 -170
  193. data/ext/nokogiri/html_document.h +0 -10
  194. data/ext/nokogiri/html_element_description.c +0 -279
  195. data/ext/nokogiri/html_element_description.h +0 -10
  196. data/ext/nokogiri/html_entity_lookup.c +0 -32
  197. data/ext/nokogiri/html_entity_lookup.h +0 -8
  198. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  199. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  200. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  201. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  202. data/ext/nokogiri/xml_attr.h +0 -9
  203. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  204. data/ext/nokogiri/xml_cdata.h +0 -9
  205. data/ext/nokogiri/xml_comment.h +0 -9
  206. data/ext/nokogiri/xml_document.h +0 -23
  207. data/ext/nokogiri/xml_document_fragment.h +0 -10
  208. data/ext/nokogiri/xml_dtd.h +0 -10
  209. data/ext/nokogiri/xml_element_content.h +0 -10
  210. data/ext/nokogiri/xml_element_decl.h +0 -9
  211. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  212. data/ext/nokogiri/xml_entity_decl.h +0 -10
  213. data/ext/nokogiri/xml_entity_reference.h +0 -9
  214. data/ext/nokogiri/xml_io.c +0 -56
  215. data/ext/nokogiri/xml_io.h +0 -11
  216. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  217. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  218. data/ext/nokogiri/xml_namespace.h +0 -13
  219. data/ext/nokogiri/xml_node.h +0 -13
  220. data/ext/nokogiri/xml_node_set.h +0 -14
  221. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  222. data/ext/nokogiri/xml_reader.h +0 -10
  223. data/ext/nokogiri/xml_relax_ng.h +0 -9
  224. data/ext/nokogiri/xml_sax_parser.h +0 -39
  225. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  226. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  227. data/ext/nokogiri/xml_schema.h +0 -9
  228. data/ext/nokogiri/xml_syntax_error.h +0 -13
  229. data/ext/nokogiri/xml_text.h +0 -9
  230. data/ext/nokogiri/xml_xpath_context.h +0 -10
  231. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  232. data/lib/nokogiri/html/document.rb +0 -254
  233. data/lib/nokogiri/html/document_fragment.rb +0 -41
  234. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  235. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  236. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  237. data/tasks/cross_compile.rb +0 -150
  238. data/tasks/nokogiri.org.rb +0 -24
  239. data/tasks/test.rb +0 -95
  240. data/test/css/test_nthiness.rb +0 -159
  241. data/test/css/test_parser.rb +0 -341
  242. data/test/css/test_tokenizer.rb +0 -198
  243. data/test/css/test_xpath_visitor.rb +0 -91
  244. data/test/decorators/test_slop.rb +0 -16
  245. data/test/files/2ch.html +0 -108
  246. data/test/files/address_book.rlx +0 -12
  247. data/test/files/address_book.xml +0 -10
  248. data/test/files/bar/bar.xsd +0 -4
  249. data/test/files/dont_hurt_em_why.xml +0 -422
  250. data/test/files/encoding.html +0 -82
  251. data/test/files/encoding.xhtml +0 -84
  252. data/test/files/exslt.xml +0 -8
  253. data/test/files/exslt.xslt +0 -35
  254. data/test/files/foo/foo.xsd +0 -4
  255. data/test/files/metacharset.html +0 -10
  256. data/test/files/noencoding.html +0 -47
  257. data/test/files/po.xml +0 -32
  258. data/test/files/po.xsd +0 -66
  259. data/test/files/shift_jis.html +0 -10
  260. data/test/files/shift_jis.xml +0 -5
  261. data/test/files/snuggles.xml +0 -3
  262. data/test/files/staff.dtd +0 -10
  263. data/test/files/staff.xml +0 -59
  264. data/test/files/staff.xslt +0 -32
  265. data/test/files/test_document_url/bar.xml +0 -2
  266. data/test/files/test_document_url/document.dtd +0 -4
  267. data/test/files/test_document_url/document.xml +0 -6
  268. data/test/files/tlm.html +0 -850
  269. data/test/files/to_be_xincluded.xml +0 -2
  270. data/test/files/valid_bar.xml +0 -2
  271. data/test/files/xinclude.xml +0 -4
  272. data/test/helper.rb +0 -154
  273. data/test/html/sax/test_parser.rb +0 -141
  274. data/test/html/sax/test_parser_context.rb +0 -46
  275. data/test/html/test_builder.rb +0 -164
  276. data/test/html/test_document.rb +0 -552
  277. data/test/html/test_document_encoding.rb +0 -138
  278. data/test/html/test_document_fragment.rb +0 -261
  279. data/test/html/test_element_description.rb +0 -105
  280. data/test/html/test_named_characters.rb +0 -14
  281. data/test/html/test_node.rb +0 -196
  282. data/test/html/test_node_encoding.rb +0 -27
  283. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  284. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  285. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  286. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  287. data/test/test_convert_xpath.rb +0 -135
  288. data/test/test_css_cache.rb +0 -45
  289. data/test/test_encoding_handler.rb +0 -46
  290. data/test/test_memory_leak.rb +0 -156
  291. data/test/test_nokogiri.rb +0 -132
  292. data/test/test_reader.rb +0 -555
  293. data/test/test_soap4r_sax.rb +0 -52
  294. data/test/test_xslt_transforms.rb +0 -254
  295. data/test/xml/node/test_save_options.rb +0 -28
  296. data/test/xml/node/test_subclass.rb +0 -44
  297. data/test/xml/sax/test_parser.rb +0 -366
  298. data/test/xml/sax/test_parser_context.rb +0 -106
  299. data/test/xml/sax/test_push_parser.rb +0 -157
  300. data/test/xml/test_attr.rb +0 -64
  301. data/test/xml/test_attribute_decl.rb +0 -86
  302. data/test/xml/test_builder.rb +0 -306
  303. data/test/xml/test_c14n.rb +0 -151
  304. data/test/xml/test_cdata.rb +0 -48
  305. data/test/xml/test_comment.rb +0 -29
  306. data/test/xml/test_document.rb +0 -828
  307. data/test/xml/test_document_encoding.rb +0 -28
  308. data/test/xml/test_document_fragment.rb +0 -223
  309. data/test/xml/test_dtd.rb +0 -103
  310. data/test/xml/test_dtd_encoding.rb +0 -33
  311. data/test/xml/test_element_content.rb +0 -56
  312. data/test/xml/test_element_decl.rb +0 -73
  313. data/test/xml/test_entity_decl.rb +0 -122
  314. data/test/xml/test_entity_reference.rb +0 -245
  315. data/test/xml/test_namespace.rb +0 -95
  316. data/test/xml/test_node.rb +0 -1137
  317. data/test/xml/test_node_attributes.rb +0 -96
  318. data/test/xml/test_node_encoding.rb +0 -107
  319. data/test/xml/test_node_inheritance.rb +0 -32
  320. data/test/xml/test_node_reparenting.rb +0 -374
  321. data/test/xml/test_node_set.rb +0 -755
  322. data/test/xml/test_parse_options.rb +0 -64
  323. data/test/xml/test_processing_instruction.rb +0 -30
  324. data/test/xml/test_reader_encoding.rb +0 -142
  325. data/test/xml/test_relax_ng.rb +0 -60
  326. data/test/xml/test_schema.rb +0 -103
  327. data/test/xml/test_syntax_error.rb +0 -12
  328. data/test/xml/test_text.rb +0 -45
  329. data/test/xml/test_unparented_node.rb +0 -422
  330. data/test/xml/test_xinclude.rb +0 -83
  331. data/test/xml/test_xpath.rb +0 -295
  332. data/test/xslt/test_custom_functions.rb +0 -133
  333. data/test/xslt/test_exception_handling.rb +0 -37
  334. data/test_all +0 -81
@@ -10,13 +10,12 @@ rule
10
10
  result = [val.first, val.last].flatten
11
11
  }
12
12
  | prefixless_combinator_selector { result = val.flatten }
13
- | simple_selector_1toN { result = val.flatten }
13
+ | optional_S simple_selector_1toN { result = [val.last].flatten }
14
14
  ;
15
15
  combinator
16
16
  : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
17
17
  | GREATER { result = :CHILD_SELECTOR }
18
18
  | TILDE { result = :FOLLOWING_SELECTOR }
19
- | S { result = :DESCENDANT_SELECTOR }
20
19
  | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
21
20
  | SLASH { result = :CHILD_SELECTOR }
22
21
  ;
@@ -28,17 +27,6 @@ rule
28
27
  Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
29
28
  end
30
29
  }
31
- | element_name hcap_1toN negation {
32
- result = Node.new(:CONDITIONAL_SELECTOR,
33
- [
34
- val.first,
35
- Node.new(:COMBINATOR, [val[1], val.last])
36
- ]
37
- )
38
- }
39
- | element_name negation {
40
- result = Node.new(:CONDITIONAL_SELECTOR, val)
41
- }
42
30
  | function
43
31
  | function pseudo {
44
32
  result = Node.new(:CONDITIONAL_SELECTOR, val)
@@ -46,14 +34,6 @@ rule
46
34
  | function attrib {
47
35
  result = Node.new(:CONDITIONAL_SELECTOR, val)
48
36
  }
49
- | hcap_1toN negation {
50
- result = Node.new(:CONDITIONAL_SELECTOR,
51
- [
52
- Node.new(:ELEMENT_NAME, ['*']),
53
- Node.new(:COMBINATOR, val)
54
- ]
55
- )
56
- }
57
37
  | hcap_1toN {
58
38
  result = Node.new(:CONDITIONAL_SELECTOR,
59
39
  [Node.new(:ELEMENT_NAME, ['*']), val.first]
@@ -69,10 +49,13 @@ rule
69
49
  : simple_selector combinator simple_selector_1toN {
70
50
  result = Node.new(val[1], [val.first, val.last])
71
51
  }
52
+ | simple_selector S simple_selector_1toN {
53
+ result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
54
+ }
72
55
  | simple_selector
73
56
  ;
74
57
  class
75
- : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
58
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
76
59
  ;
77
60
  element_name
78
61
  : namespaced_ident
@@ -105,7 +88,7 @@ rule
105
88
  )
106
89
  }
107
90
  | LSQUARE NUMBER RSQUARE {
108
- # Non standard, but hpricot supports it.
91
+ # non-standard, from hpricot
109
92
  result = Node.new(:PSEUDO_CLASS,
110
93
  [Node.new(:FUNCTION, ['nth-child(', val[1]])]
111
94
  )
@@ -113,14 +96,14 @@ rule
113
96
  ;
114
97
  attrib_name
115
98
  : namespace '|' IDENT {
116
- result = Node.new(:ELEMENT_NAME,
99
+ result = Node.new(:ATTRIB_NAME,
117
100
  [[val.first, val.last].compact.join(':')]
118
101
  )
119
102
  }
120
103
  | IDENT {
121
104
  # Default namespace is not applied to attributes.
122
105
  # So we don't add prefix "xmlns:" as in namespaced_ident.
123
- result = Node.new(:ELEMENT_NAME, [val.first])
106
+ result = Node.new(:ATTRIB_NAME, [val.first])
124
107
  }
125
108
  ;
126
109
  function
@@ -130,7 +113,7 @@ rule
130
113
  | FUNCTION expr RPAREN {
131
114
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
132
115
  }
133
- | FUNCTION an_plus_b RPAREN {
116
+ | FUNCTION nth RPAREN {
134
117
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
135
118
  }
136
119
  | NOT expr RPAREN {
@@ -148,14 +131,15 @@ rule
148
131
  | STRING
149
132
  | IDENT # even, odd
150
133
  {
151
- if val[0] == 'even'
152
- val = ["2","n","+","0"]
153
- result = Node.new(:AN_PLUS_B, val)
154
- elsif val[0] == 'odd'
155
- val = ["2","n","+","1"]
156
- result = Node.new(:AN_PLUS_B, val)
134
+ case val[0]
135
+ when 'even'
136
+ result = Node.new(:NTH, ['2','n','+','0'])
137
+ when 'odd'
138
+ result = Node.new(:NTH, ['2','n','+','1'])
139
+ when 'n'
140
+ result = Node.new(:NTH, ['1','n','+','0'])
157
141
  else
158
- # This is not CSS standard. It allows us to support this:
142
+ # non-standard to support custom functions:
159
143
  # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
160
144
  # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
161
145
  # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
@@ -163,11 +147,11 @@ rule
163
147
  end
164
148
  }
165
149
  ;
166
- an_plus_b
150
+ nth
167
151
  : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
168
152
  {
169
153
  if val[1] == 'n'
170
- result = Node.new(:AN_PLUS_B, val)
154
+ result = Node.new(:NTH, val)
171
155
  else
172
156
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
173
157
  end
@@ -175,21 +159,27 @@ rule
175
159
  | IDENT PLUS NUMBER { # n+3, -n+3
176
160
  if val[0] == 'n'
177
161
  val.unshift("1")
178
- result = Node.new(:AN_PLUS_B, val)
162
+ result = Node.new(:NTH, val)
179
163
  elsif val[0] == '-n'
180
164
  val[0] = 'n'
181
165
  val.unshift("-1")
182
- result = Node.new(:AN_PLUS_B, val)
166
+ result = Node.new(:NTH, val)
183
167
  else
184
168
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
185
169
  end
186
170
  }
187
- | NUMBER IDENT # 5n, -5n
188
- {
189
- if val[1] == 'n'
171
+ | NUMBER IDENT { # 5n, -5n, 10n-1
172
+ n = val[1]
173
+ if n[0, 2] == 'n-'
174
+ val[1] = 'n'
175
+ val << "-"
176
+ # b is contained in n as n is the string "n-b"
177
+ val << n[2, n.size]
178
+ result = Node.new(:NTH, val)
179
+ elsif n == 'n'
190
180
  val << "+"
191
181
  val << "0"
192
- result = Node.new(:AN_PLUS_B, val)
182
+ result = Node.new(:NTH, val)
193
183
  else
194
184
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
195
185
  end
@@ -218,17 +208,22 @@ rule
218
208
  | pseudo hcap_1toN {
219
209
  result = Node.new(:COMBINATOR, val)
220
210
  }
211
+ | negation hcap_1toN {
212
+ result = Node.new(:COMBINATOR, val)
213
+ }
221
214
  | attribute_id
222
215
  | class
223
216
  | attrib
224
217
  | pseudo
218
+ | negation
225
219
  ;
226
220
  attribute_id
227
- : HASH { result = Node.new(:ID, val) }
221
+ : HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) }
228
222
  ;
229
223
  attrib_val_0or1
230
- : eql_incl_dash IDENT { result = [val.first, val[1]] }
231
- | eql_incl_dash STRING { result = [val.first, val[1]] }
224
+ : eql_incl_dash IDENT { result = [val.first, unescape_css_identifier(val[1])] }
225
+ | eql_incl_dash STRING { result = [val.first, unescape_css_string(val[1])] }
226
+ | eql_incl_dash NUMBER { result = [val.first, val[1]] }
232
227
  |
233
228
  ;
234
229
  eql_incl_dash
@@ -250,9 +245,36 @@ rule
250
245
  | element_name hcap_1toN
251
246
  | hcap_1toN
252
247
  ;
248
+ optional_S
249
+ : S
250
+ |
251
+ ;
253
252
  end
254
253
 
255
254
  ---- header
256
255
 
257
- require 'nokogiri/css/parser_extras'
256
+ require_relative "parser_extras"
258
257
 
258
+ module Nokogiri
259
+ module CSS
260
+ # :nodoc: all
261
+ class Parser < Racc::Parser
262
+ end
263
+ end
264
+ end
265
+
266
+ ---- inner
267
+
268
+ def unescape_css_identifier(identifier)
269
+ identifier.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/){ |m| $1 || [$2.hex].pack('U') }
270
+ end
271
+
272
+ def unescape_css_string(str)
273
+ str.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/) do |m|
274
+ if $1=="\n"
275
+ ''
276
+ else
277
+ $1 || [$2.hex].pack('U')
278
+ end
279
+ end
280
+ end
@@ -1,64 +1,68 @@
1
- require 'thread'
1
+ # frozen_string_literal: true
2
+
3
+ require "thread"
2
4
 
3
5
  module Nokogiri
4
6
  module CSS
5
- class Parser < Racc::Parser
6
- @cache_on = true
7
- @cache = {}
8
- @mutex = Mutex.new
7
+ class Parser < Racc::Parser # :nodoc:
8
+ CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
9
+
10
+ @cache = {}
11
+ @mutex = Mutex.new
9
12
 
10
13
  class << self
11
- # Turn on CSS parse caching
12
- attr_accessor :cache_on
13
- alias :cache_on? :cache_on
14
- alias :set_cache :cache_on=
14
+ # Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
15
+ def cache_on?
16
+ !Thread.current[CACHE_SWITCH_NAME]
17
+ end
18
+
19
+ # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
20
+ def set_cache(value) # rubocop:disable Naming/AccessorMethodName
21
+ Thread.current[CACHE_SWITCH_NAME] = !value
22
+ end
15
23
 
16
24
  # Get the css selector in +string+ from the cache
17
- def [] string
18
- return unless @cache_on
25
+ def [](string)
26
+ return nil unless cache_on?
19
27
  @mutex.synchronize { @cache[string] }
20
28
  end
21
29
 
22
30
  # Set the css selector in +string+ in the cache to +value+
23
- def []= string, value
24
- return value unless @cache_on
31
+ def []=(string, value)
32
+ return value unless cache_on?
25
33
  @mutex.synchronize { @cache[string] = value }
26
34
  end
27
35
 
28
36
  # Clear the cache
29
- def clear_cache
30
- @mutex.synchronize { @cache = {} }
37
+ def clear_cache(create_new_object = false)
38
+ @mutex.synchronize do
39
+ if create_new_object
40
+ @cache = {}
41
+ else
42
+ @cache.clear
43
+ end
44
+ end
31
45
  end
32
46
 
33
47
  # Execute +block+ without cache
34
- def without_cache &block
35
- tmp = @cache_on
36
- @cache_on = false
37
- block.call
38
- @cache_on = tmp
39
- end
40
-
41
- ###
42
- # Parse this CSS selector in +selector+. Returns an AST.
43
- def parse selector
44
- @warned ||= false
45
- unless @warned
46
- $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
47
- @warned = true
48
- end
49
- new.parse selector
48
+ def without_cache(&block)
49
+ original_cache_setting = cache_on?
50
+ set_cache(false)
51
+ yield
52
+ ensure
53
+ set_cache(original_cache_setting)
50
54
  end
51
55
  end
52
56
 
53
57
  # Create a new CSS parser with respect to +namespaces+
54
- def initialize namespaces = {}
55
- @tokenizer = Tokenizer.new
58
+ def initialize(namespaces = {})
59
+ @tokenizer = Tokenizer.new
56
60
  @namespaces = namespaces
57
61
  super()
58
62
  end
59
63
 
60
- def parse string
61
- @tokenizer.scan_setup string
64
+ def parse(string)
65
+ @tokenizer.scan_setup(string)
62
66
  do_parse
63
67
  end
64
68
 
@@ -67,24 +71,23 @@ module Nokogiri
67
71
  end
68
72
 
69
73
  # Get the xpath for +string+ using +options+
70
- def xpath_for string, options={}
71
- key = "#{string}#{options[:ns]}#{options[:prefix]}"
72
- v = self.class[key]
73
- return v if v
74
-
75
- args = [
76
- options[:prefix] || '//',
77
- options[:visitor] || XPathVisitor.new
78
- ]
79
- self.class[key] = parse(string).map { |ast|
80
- ast.to_xpath(*args)
81
- }
74
+ def xpath_for(string, prefix, visitor)
75
+ key = cache_key(string, prefix, visitor)
76
+ self.class[key] ||= parse(string).map do |ast|
77
+ ast.to_xpath(prefix, visitor)
78
+ end
82
79
  end
83
80
 
84
81
  # On CSS parser error, raise an exception
85
- def on_error error_token_id, error_value, value_stack
82
+ def on_error(error_token_id, error_value, value_stack)
86
83
  after = value_stack.compact.last
87
- raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
84
+ raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
85
+ end
86
+
87
+ def cache_key(query, prefix, visitor)
88
+ if self.class.cache_on?
89
+ [query, prefix, @namespaces, visitor.config]
90
+ end
88
91
  end
89
92
  end
90
93
  end
@@ -1,4 +1,6 @@
1
- require 'nokogiri/syntax_error'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../syntax_error"
2
4
  module Nokogiri
3
5
  module CSS
4
6
  class SyntaxError < ::Nokogiri::SyntaxError
@@ -1,151 +1,154 @@
1
+ # frozen_string_literal: true
1
2
  #--
2
3
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by rex 1.0.5
4
+ # This file is automatically generated by rex 1.0.7
4
5
  # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
6
  #++
6
7
 
7
8
  module Nokogiri
8
9
  module CSS
9
- class Tokenizer # :nodoc:
10
- require 'strscan'
10
+ # :nodoc: all
11
+ class Tokenizer
12
+ require 'strscan'
11
13
 
12
- class ScanError < StandardError ; end
14
+ class ScanError < StandardError ; end
13
15
 
14
- attr_reader :lineno
15
- attr_reader :filename
16
- attr_accessor :state
16
+ attr_reader :lineno
17
+ attr_reader :filename
18
+ attr_accessor :state
17
19
 
18
- def scan_setup(str)
19
- @ss = StringScanner.new(str)
20
- @lineno = 1
21
- @state = nil
22
- end
20
+ def scan_setup(str)
21
+ @ss = StringScanner.new(str)
22
+ @lineno = 1
23
+ @state = nil
24
+ end
23
25
 
24
- def action
25
- yield
26
- end
26
+ def action
27
+ yield
28
+ end
27
29
 
28
- def scan_str(str)
29
- scan_setup(str)
30
- do_parse
31
- end
32
- alias :scan :scan_str
30
+ def scan_str(str)
31
+ scan_setup(str)
32
+ do_parse
33
+ end
34
+ alias :scan :scan_str
33
35
 
34
- def load_file( filename )
35
- @filename = filename
36
- open(filename, "r") do |f|
37
- scan_setup(f.read)
38
- end
39
- end
36
+ def load_file( filename )
37
+ @filename = filename
38
+ File.open(filename, "r") do |f|
39
+ scan_setup(f.read)
40
+ end
41
+ end
40
42
 
41
- def scan_file( filename )
42
- load_file(filename)
43
- do_parse
44
- end
43
+ def scan_file( filename )
44
+ load_file(filename)
45
+ do_parse
46
+ end
45
47
 
46
48
 
47
- def next_token
48
- return if @ss.eos?
49
-
50
- # skips empty actions
51
- until token = _next_token or @ss.eos?; end
52
- token
53
- end
49
+ def next_token
50
+ return if @ss.eos?
54
51
 
55
- def _next_token
56
- text = @ss.peek(1)
57
- @lineno += 1 if text == "\n"
58
- token = case @state
59
- when nil
60
- case
61
- when (text = @ss.scan(/has\([\s]*/))
62
- action { [:HAS, text] }
52
+ # skips empty actions
53
+ until token = _next_token or @ss.eos?; end
54
+ token
55
+ end
63
56
 
64
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
65
- action { [:FUNCTION, text] }
57
+ def _next_token
58
+ text = @ss.peek(1)
59
+ @lineno += 1 if text == "\n"
60
+ token = case @state
61
+ when nil
62
+ case
63
+ when (text = @ss.scan(/has\([\s]*/))
64
+ action { [:HAS, text] }
66
65
 
67
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
68
- action { [:IDENT, text] }
66
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
67
+ action { [:FUNCTION, text] }
69
68
 
70
- when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
71
- action { [:HASH, text] }
69
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
70
+ action { [:IDENT, text] }
72
71
 
73
- when (text = @ss.scan(/[\s]*~=[\s]*/))
74
- action { [:INCLUDES, text] }
72
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
73
+ action { [:HASH, text] }
75
74
 
76
- when (text = @ss.scan(/[\s]*\|=[\s]*/))
77
- action { [:DASHMATCH, text] }
75
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
76
+ action { [:INCLUDES, text] }
78
77
 
79
- when (text = @ss.scan(/[\s]*\^=[\s]*/))
80
- action { [:PREFIXMATCH, text] }
78
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
79
+ action { [:DASHMATCH, text] }
81
80
 
82
- when (text = @ss.scan(/[\s]*\$=[\s]*/))
83
- action { [:SUFFIXMATCH, text] }
81
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
82
+ action { [:PREFIXMATCH, text] }
84
83
 
85
- when (text = @ss.scan(/[\s]*\*=[\s]*/))
86
- action { [:SUBSTRINGMATCH, text] }
84
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
85
+ action { [:SUFFIXMATCH, text] }
87
86
 
88
- when (text = @ss.scan(/[\s]*!=[\s]*/))
89
- action { [:NOT_EQUAL, text] }
87
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
88
+ action { [:SUBSTRINGMATCH, text] }
90
89
 
91
- when (text = @ss.scan(/[\s]*=[\s]*/))
92
- action { [:EQUAL, text] }
90
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
91
+ action { [:NOT_EQUAL, text] }
93
92
 
94
- when (text = @ss.scan(/[\s]*\)/))
95
- action { [:RPAREN, text] }
93
+ when (text = @ss.scan(/[\s]*=[\s]*/))
94
+ action { [:EQUAL, text] }
96
95
 
97
- when (text = @ss.scan(/[\s]*\[[\s]*/))
98
- action { [:LSQUARE, text] }
96
+ when (text = @ss.scan(/[\s]*\)/))
97
+ action { [:RPAREN, text] }
99
98
 
100
- when (text = @ss.scan(/[\s]*\]/))
101
- action { [:RSQUARE, text] }
99
+ when (text = @ss.scan(/\[[\s]*/))
100
+ action { [:LSQUARE, text] }
102
101
 
103
- when (text = @ss.scan(/[\s]*\+[\s]*/))
104
- action { [:PLUS, text] }
102
+ when (text = @ss.scan(/[\s]*\]/))
103
+ action { [:RSQUARE, text] }
105
104
 
106
- when (text = @ss.scan(/[\s]*>[\s]*/))
107
- action { [:GREATER, text] }
105
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
106
+ action { [:PLUS, text] }
108
107
 
109
- when (text = @ss.scan(/[\s]*,[\s]*/))
110
- action { [:COMMA, text] }
108
+ when (text = @ss.scan(/[\s]*>[\s]*/))
109
+ action { [:GREATER, text] }
111
110
 
112
- when (text = @ss.scan(/[\s]*~[\s]*/))
113
- action { [:TILDE, text] }
111
+ when (text = @ss.scan(/[\s]*,[\s]*/))
112
+ action { [:COMMA, text] }
114
113
 
115
- when (text = @ss.scan(/\:not\([\s]*/))
116
- action { [:NOT, text] }
114
+ when (text = @ss.scan(/[\s]*~[\s]*/))
115
+ action { [:TILDE, text] }
117
116
 
118
- when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
119
- action { [:NUMBER, text] }
117
+ when (text = @ss.scan(/\:not\([\s]*/))
118
+ action { [:NOT, text] }
120
119
 
121
- when (text = @ss.scan(/[\s]*\/\/[\s]*/))
122
- action { [:DOUBLESLASH, text] }
120
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
121
+ action { [:NUMBER, text] }
123
122
 
124
- when (text = @ss.scan(/[\s]*\/[\s]*/))
125
- action { [:SLASH, text] }
123
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
124
+ action { [:DOUBLESLASH, text] }
126
125
 
127
- when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
128
- action {[:UNICODE_RANGE, text] }
126
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
127
+ action { [:SLASH, text] }
129
128
 
130
- when (text = @ss.scan(/[\s]+/))
131
- action { [:S, text] }
129
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
130
+ action {[:UNICODE_RANGE, text] }
132
131
 
133
- when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*'/))
134
- action { [:STRING, text] }
132
+ when (text = @ss.scan(/[\s]+/))
133
+ action { [:S, text] }
135
134
 
136
- when (text = @ss.scan(/./))
137
- action { [text, text] }
135
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
136
+ action { [:STRING, text] }
138
137
 
139
- else
140
- text = @ss.string[@ss.pos .. -1]
141
- raise ScanError, "can not match: '" + text + "'"
142
- end # if
138
+ when (text = @ss.scan(/./))
139
+ action { [text, text] }
143
140
 
144
- else
145
- raise ScanError, "undefined state: '" + state.to_s + "'"
146
- end # case state
147
- token
148
- end # def _next_token
141
+
142
+ else
143
+ text = @ss.string[@ss.pos .. -1]
144
+ raise ScanError, "can not match: '" + text + "'"
145
+ end # if
146
+
147
+ else
148
+ raise ScanError, "undefined state: '" + state.to_s + "'"
149
+ end # case state
150
+ token
151
+ end # def _next_token
149
152
 
150
153
  end # class
151
154
  end