nokogiri 1.5.10 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (328) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +278 -0
  6. data/bin/nokogiri +50 -10
  7. data/dependencies.yml +74 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +944 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +305 -201
  25. data/ext/nokogiri/xml_document_fragment.c +13 -15
  26. data/ext/nokogiri/xml_dtd.c +54 -48
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +30 -19
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +808 -503
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +198 -186
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +162 -98
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4886 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/css/node.rb +1 -50
  92. data/lib/nokogiri/css/parser.rb +317 -286
  93. data/lib/nokogiri/css/parser.y +57 -43
  94. data/lib/nokogiri/css/parser_extras.rb +39 -36
  95. data/lib/nokogiri/css/syntax_error.rb +2 -1
  96. data/lib/nokogiri/css/tokenizer.rb +105 -103
  97. data/lib/nokogiri/css/tokenizer.rex +5 -5
  98. data/lib/nokogiri/css/xpath_visitor.rb +137 -48
  99. data/lib/nokogiri/css.rb +15 -14
  100. data/lib/nokogiri/decorators/slop.rb +13 -5
  101. data/lib/nokogiri/extension.rb +31 -0
  102. data/lib/nokogiri/gumbo.rb +14 -0
  103. data/lib/nokogiri/html.rb +32 -27
  104. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  105. data/lib/nokogiri/{html → html4}/document.rb +118 -50
  106. data/lib/nokogiri/{html → html4}/document_fragment.rb +20 -11
  107. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  109. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  110. data/lib/nokogiri/{html → html4}/sax/parser.rb +22 -14
  111. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  112. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  113. data/lib/nokogiri/html4.rb +40 -0
  114. data/lib/nokogiri/html5/document.rb +74 -0
  115. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  116. data/lib/nokogiri/html5/node.rb +93 -0
  117. data/lib/nokogiri/html5.rb +473 -0
  118. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  119. data/lib/nokogiri/syntax_error.rb +1 -0
  120. data/lib/nokogiri/version/constant.rb +5 -0
  121. data/lib/nokogiri/version/info.rb +215 -0
  122. data/lib/nokogiri/version.rb +3 -91
  123. data/lib/nokogiri/xml/attr.rb +1 -0
  124. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  125. data/lib/nokogiri/xml/builder.rb +75 -33
  126. data/lib/nokogiri/xml/cdata.rb +1 -0
  127. data/lib/nokogiri/xml/character_data.rb +1 -0
  128. data/lib/nokogiri/xml/document.rb +157 -54
  129. data/lib/nokogiri/xml/document_fragment.rb +55 -8
  130. data/lib/nokogiri/xml/dtd.rb +15 -4
  131. data/lib/nokogiri/xml/element_content.rb +1 -0
  132. data/lib/nokogiri/xml/element_decl.rb +1 -0
  133. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  134. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  135. data/lib/nokogiri/xml/namespace.rb +1 -0
  136. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  137. data/lib/nokogiri/xml/node.rb +712 -431
  138. data/lib/nokogiri/xml/node_set.rb +140 -123
  139. data/lib/nokogiri/xml/notation.rb +1 -0
  140. data/lib/nokogiri/xml/parse_options.rb +31 -0
  141. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  142. data/lib/nokogiri/xml/pp/node.rb +1 -0
  143. data/lib/nokogiri/xml/pp.rb +3 -2
  144. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  145. data/lib/nokogiri/xml/reader.rb +9 -12
  146. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  147. data/lib/nokogiri/xml/sax/document.rb +25 -30
  148. data/lib/nokogiri/xml/sax/parser.rb +8 -8
  149. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  150. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  151. data/lib/nokogiri/xml/sax.rb +5 -4
  152. data/lib/nokogiri/xml/schema.rb +13 -4
  153. data/lib/nokogiri/xml/searchable.rb +239 -0
  154. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  155. data/lib/nokogiri/xml/text.rb +1 -0
  156. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  157. data/lib/nokogiri/xml/xpath.rb +4 -5
  158. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  159. data/lib/nokogiri/xml.rb +37 -35
  160. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  161. data/lib/nokogiri/xslt.rb +17 -16
  162. data/lib/nokogiri.rb +55 -58
  163. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  164. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  165. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  166. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  167. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  171. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  172. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  173. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  174. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  175. metadata +307 -459
  176. data/.autotest +0 -26
  177. data/.gemtest +0 -0
  178. data/CHANGELOG.ja.rdoc +0 -785
  179. data/CHANGELOG.rdoc +0 -783
  180. data/C_CODING_STYLE.rdoc +0 -33
  181. data/Manifest.txt +0 -303
  182. data/README.ja.rdoc +0 -106
  183. data/README.rdoc +0 -175
  184. data/ROADMAP.md +0 -90
  185. data/Rakefile +0 -228
  186. data/STANDARD_RESPONSES.md +0 -47
  187. data/Y_U_NO_GEMSPEC.md +0 -155
  188. data/build_all +0 -105
  189. data/ext/nokogiri/html_document.c +0 -170
  190. data/ext/nokogiri/html_document.h +0 -10
  191. data/ext/nokogiri/html_element_description.c +0 -279
  192. data/ext/nokogiri/html_element_description.h +0 -10
  193. data/ext/nokogiri/html_entity_lookup.c +0 -32
  194. data/ext/nokogiri/html_entity_lookup.h +0 -8
  195. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  196. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  197. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  198. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  199. data/ext/nokogiri/xml_attr.h +0 -9
  200. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  201. data/ext/nokogiri/xml_cdata.h +0 -9
  202. data/ext/nokogiri/xml_comment.h +0 -9
  203. data/ext/nokogiri/xml_document.h +0 -23
  204. data/ext/nokogiri/xml_document_fragment.h +0 -10
  205. data/ext/nokogiri/xml_dtd.h +0 -10
  206. data/ext/nokogiri/xml_element_content.h +0 -10
  207. data/ext/nokogiri/xml_element_decl.h +0 -9
  208. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  209. data/ext/nokogiri/xml_entity_decl.h +0 -10
  210. data/ext/nokogiri/xml_entity_reference.h +0 -9
  211. data/ext/nokogiri/xml_io.c +0 -56
  212. data/ext/nokogiri/xml_io.h +0 -11
  213. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  214. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  215. data/ext/nokogiri/xml_namespace.h +0 -13
  216. data/ext/nokogiri/xml_node.h +0 -13
  217. data/ext/nokogiri/xml_node_set.h +0 -14
  218. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  219. data/ext/nokogiri/xml_reader.h +0 -10
  220. data/ext/nokogiri/xml_relax_ng.h +0 -9
  221. data/ext/nokogiri/xml_sax_parser.h +0 -39
  222. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  223. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  224. data/ext/nokogiri/xml_schema.h +0 -9
  225. data/ext/nokogiri/xml_syntax_error.h +0 -13
  226. data/ext/nokogiri/xml_text.h +0 -9
  227. data/ext/nokogiri/xml_xpath_context.h +0 -10
  228. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  229. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  230. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  231. data/tasks/cross_compile.rb +0 -150
  232. data/tasks/nokogiri.org.rb +0 -24
  233. data/tasks/test.rb +0 -95
  234. data/test/css/test_nthiness.rb +0 -159
  235. data/test/css/test_parser.rb +0 -341
  236. data/test/css/test_tokenizer.rb +0 -198
  237. data/test/css/test_xpath_visitor.rb +0 -91
  238. data/test/decorators/test_slop.rb +0 -16
  239. data/test/files/2ch.html +0 -108
  240. data/test/files/address_book.rlx +0 -12
  241. data/test/files/address_book.xml +0 -10
  242. data/test/files/bar/bar.xsd +0 -4
  243. data/test/files/dont_hurt_em_why.xml +0 -422
  244. data/test/files/encoding.html +0 -82
  245. data/test/files/encoding.xhtml +0 -84
  246. data/test/files/exslt.xml +0 -8
  247. data/test/files/exslt.xslt +0 -35
  248. data/test/files/foo/foo.xsd +0 -4
  249. data/test/files/metacharset.html +0 -10
  250. data/test/files/noencoding.html +0 -47
  251. data/test/files/po.xml +0 -32
  252. data/test/files/po.xsd +0 -66
  253. data/test/files/shift_jis.html +0 -10
  254. data/test/files/shift_jis.xml +0 -5
  255. data/test/files/snuggles.xml +0 -3
  256. data/test/files/staff.dtd +0 -10
  257. data/test/files/staff.xml +0 -59
  258. data/test/files/staff.xslt +0 -32
  259. data/test/files/test_document_url/bar.xml +0 -2
  260. data/test/files/test_document_url/document.dtd +0 -4
  261. data/test/files/test_document_url/document.xml +0 -6
  262. data/test/files/tlm.html +0 -850
  263. data/test/files/to_be_xincluded.xml +0 -2
  264. data/test/files/valid_bar.xml +0 -2
  265. data/test/files/xinclude.xml +0 -4
  266. data/test/helper.rb +0 -154
  267. data/test/html/sax/test_parser.rb +0 -141
  268. data/test/html/sax/test_parser_context.rb +0 -46
  269. data/test/html/test_builder.rb +0 -164
  270. data/test/html/test_document.rb +0 -552
  271. data/test/html/test_document_encoding.rb +0 -138
  272. data/test/html/test_document_fragment.rb +0 -261
  273. data/test/html/test_element_description.rb +0 -105
  274. data/test/html/test_named_characters.rb +0 -14
  275. data/test/html/test_node.rb +0 -196
  276. data/test/html/test_node_encoding.rb +0 -27
  277. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  278. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  279. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  280. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  281. data/test/test_convert_xpath.rb +0 -135
  282. data/test/test_css_cache.rb +0 -45
  283. data/test/test_encoding_handler.rb +0 -46
  284. data/test/test_memory_leak.rb +0 -156
  285. data/test/test_nokogiri.rb +0 -132
  286. data/test/test_reader.rb +0 -555
  287. data/test/test_soap4r_sax.rb +0 -52
  288. data/test/test_xslt_transforms.rb +0 -254
  289. data/test/xml/node/test_save_options.rb +0 -28
  290. data/test/xml/node/test_subclass.rb +0 -44
  291. data/test/xml/sax/test_parser.rb +0 -366
  292. data/test/xml/sax/test_parser_context.rb +0 -106
  293. data/test/xml/sax/test_push_parser.rb +0 -157
  294. data/test/xml/test_attr.rb +0 -64
  295. data/test/xml/test_attribute_decl.rb +0 -86
  296. data/test/xml/test_builder.rb +0 -306
  297. data/test/xml/test_c14n.rb +0 -151
  298. data/test/xml/test_cdata.rb +0 -48
  299. data/test/xml/test_comment.rb +0 -29
  300. data/test/xml/test_document.rb +0 -828
  301. data/test/xml/test_document_encoding.rb +0 -28
  302. data/test/xml/test_document_fragment.rb +0 -223
  303. data/test/xml/test_dtd.rb +0 -103
  304. data/test/xml/test_dtd_encoding.rb +0 -33
  305. data/test/xml/test_element_content.rb +0 -56
  306. data/test/xml/test_element_decl.rb +0 -73
  307. data/test/xml/test_entity_decl.rb +0 -122
  308. data/test/xml/test_entity_reference.rb +0 -245
  309. data/test/xml/test_namespace.rb +0 -95
  310. data/test/xml/test_node.rb +0 -1137
  311. data/test/xml/test_node_attributes.rb +0 -96
  312. data/test/xml/test_node_encoding.rb +0 -107
  313. data/test/xml/test_node_inheritance.rb +0 -32
  314. data/test/xml/test_node_reparenting.rb +0 -374
  315. data/test/xml/test_node_set.rb +0 -755
  316. data/test/xml/test_parse_options.rb +0 -64
  317. data/test/xml/test_processing_instruction.rb +0 -30
  318. data/test/xml/test_reader_encoding.rb +0 -142
  319. data/test/xml/test_relax_ng.rb +0 -60
  320. data/test/xml/test_schema.rb +0 -103
  321. data/test/xml/test_syntax_error.rb +0 -12
  322. data/test/xml/test_text.rb +0 -45
  323. data/test/xml/test_unparented_node.rb +0 -422
  324. data/test/xml/test_xinclude.rb +0 -83
  325. data/test/xml/test_xpath.rb +0 -295
  326. data/test/xslt/test_custom_functions.rb +0 -133
  327. data/test/xslt/test_exception_handling.rb +0 -37
  328. data/test_all +0 -81
@@ -10,13 +10,12 @@ rule
10
10
  result = [val.first, val.last].flatten
11
11
  }
12
12
  | prefixless_combinator_selector { result = val.flatten }
13
- | simple_selector_1toN { result = val.flatten }
13
+ | optional_S simple_selector_1toN { result = [val.last].flatten }
14
14
  ;
15
15
  combinator
16
16
  : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
17
17
  | GREATER { result = :CHILD_SELECTOR }
18
18
  | TILDE { result = :FOLLOWING_SELECTOR }
19
- | S { result = :DESCENDANT_SELECTOR }
20
19
  | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
21
20
  | SLASH { result = :CHILD_SELECTOR }
22
21
  ;
@@ -28,17 +27,6 @@ rule
28
27
  Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
29
28
  end
30
29
  }
31
- | element_name hcap_1toN negation {
32
- result = Node.new(:CONDITIONAL_SELECTOR,
33
- [
34
- val.first,
35
- Node.new(:COMBINATOR, [val[1], val.last])
36
- ]
37
- )
38
- }
39
- | element_name negation {
40
- result = Node.new(:CONDITIONAL_SELECTOR, val)
41
- }
42
30
  | function
43
31
  | function pseudo {
44
32
  result = Node.new(:CONDITIONAL_SELECTOR, val)
@@ -46,14 +34,6 @@ rule
46
34
  | function attrib {
47
35
  result = Node.new(:CONDITIONAL_SELECTOR, val)
48
36
  }
49
- | hcap_1toN negation {
50
- result = Node.new(:CONDITIONAL_SELECTOR,
51
- [
52
- Node.new(:ELEMENT_NAME, ['*']),
53
- Node.new(:COMBINATOR, val)
54
- ]
55
- )
56
- }
57
37
  | hcap_1toN {
58
38
  result = Node.new(:CONDITIONAL_SELECTOR,
59
39
  [Node.new(:ELEMENT_NAME, ['*']), val.first]
@@ -69,10 +49,13 @@ rule
69
49
  : simple_selector combinator simple_selector_1toN {
70
50
  result = Node.new(val[1], [val.first, val.last])
71
51
  }
52
+ | simple_selector S simple_selector_1toN {
53
+ result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
54
+ }
72
55
  | simple_selector
73
56
  ;
74
57
  class
75
- : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
58
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
76
59
  ;
77
60
  element_name
78
61
  : namespaced_ident
@@ -105,7 +88,7 @@ rule
105
88
  )
106
89
  }
107
90
  | LSQUARE NUMBER RSQUARE {
108
- # Non standard, but hpricot supports it.
91
+ # non-standard, from hpricot
109
92
  result = Node.new(:PSEUDO_CLASS,
110
93
  [Node.new(:FUNCTION, ['nth-child(', val[1]])]
111
94
  )
@@ -130,7 +113,7 @@ rule
130
113
  | FUNCTION expr RPAREN {
131
114
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
132
115
  }
133
- | FUNCTION an_plus_b RPAREN {
116
+ | FUNCTION nth RPAREN {
134
117
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
135
118
  }
136
119
  | NOT expr RPAREN {
@@ -148,14 +131,15 @@ rule
148
131
  | STRING
149
132
  | IDENT # even, odd
150
133
  {
151
- if val[0] == 'even'
152
- val = ["2","n","+","0"]
153
- result = Node.new(:AN_PLUS_B, val)
154
- elsif val[0] == 'odd'
155
- val = ["2","n","+","1"]
156
- result = Node.new(:AN_PLUS_B, val)
134
+ case val[0]
135
+ when 'even'
136
+ result = Node.new(:NTH, ['2','n','+','0'])
137
+ when 'odd'
138
+ result = Node.new(:NTH, ['2','n','+','1'])
139
+ when 'n'
140
+ result = Node.new(:NTH, ['1','n','+','0'])
157
141
  else
158
- # This is not CSS standard. It allows us to support this:
142
+ # non-standard to support custom functions:
159
143
  # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
160
144
  # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
161
145
  # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
@@ -163,11 +147,11 @@ rule
163
147
  end
164
148
  }
165
149
  ;
166
- an_plus_b
150
+ nth
167
151
  : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
168
152
  {
169
153
  if val[1] == 'n'
170
- result = Node.new(:AN_PLUS_B, val)
154
+ result = Node.new(:NTH, val)
171
155
  else
172
156
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
173
157
  end
@@ -175,21 +159,27 @@ rule
175
159
  | IDENT PLUS NUMBER { # n+3, -n+3
176
160
  if val[0] == 'n'
177
161
  val.unshift("1")
178
- result = Node.new(:AN_PLUS_B, val)
162
+ result = Node.new(:NTH, val)
179
163
  elsif val[0] == '-n'
180
164
  val[0] = 'n'
181
165
  val.unshift("-1")
182
- result = Node.new(:AN_PLUS_B, val)
166
+ result = Node.new(:NTH, val)
183
167
  else
184
168
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
185
169
  end
186
170
  }
187
- | NUMBER IDENT # 5n, -5n
188
- {
189
- if val[1] == 'n'
171
+ | NUMBER IDENT { # 5n, -5n, 10n-1
172
+ n = val[1]
173
+ if n[0, 2] == 'n-'
174
+ val[1] = 'n'
175
+ val << "-"
176
+ # b is contained in n as n is the string "n-b"
177
+ val << n[2, n.size]
178
+ result = Node.new(:NTH, val)
179
+ elsif n == 'n'
190
180
  val << "+"
191
181
  val << "0"
192
- result = Node.new(:AN_PLUS_B, val)
182
+ result = Node.new(:NTH, val)
193
183
  else
194
184
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
195
185
  end
@@ -218,17 +208,22 @@ rule
218
208
  | pseudo hcap_1toN {
219
209
  result = Node.new(:COMBINATOR, val)
220
210
  }
211
+ | negation hcap_1toN {
212
+ result = Node.new(:COMBINATOR, val)
213
+ }
221
214
  | attribute_id
222
215
  | class
223
216
  | attrib
224
217
  | pseudo
218
+ | negation
225
219
  ;
226
220
  attribute_id
227
- : HASH { result = Node.new(:ID, val) }
221
+ : HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) }
228
222
  ;
229
223
  attrib_val_0or1
230
- : eql_incl_dash IDENT { result = [val.first, val[1]] }
231
- | eql_incl_dash STRING { result = [val.first, val[1]] }
224
+ : eql_incl_dash IDENT { result = [val.first, unescape_css_identifier(val[1])] }
225
+ | eql_incl_dash STRING { result = [val.first, unescape_css_string(val[1])] }
226
+ | eql_incl_dash NUMBER { result = [val.first, val[1]] }
232
227
  |
233
228
  ;
234
229
  eql_incl_dash
@@ -250,9 +245,28 @@ rule
250
245
  | element_name hcap_1toN
251
246
  | hcap_1toN
252
247
  ;
248
+ optional_S
249
+ : S
250
+ |
251
+ ;
253
252
  end
254
253
 
255
254
  ---- header
256
255
 
257
- require 'nokogiri/css/parser_extras'
256
+ require_relative "parser_extras"
258
257
 
258
+ ---- inner
259
+
260
+ def unescape_css_identifier(identifier)
261
+ identifier.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/){ |m| $1 || [$2.hex].pack('U') }
262
+ end
263
+
264
+ def unescape_css_string(str)
265
+ str.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/) do |m|
266
+ if $1=="\n"
267
+ ''
268
+ else
269
+ $1 || [$2.hex].pack('U')
270
+ end
271
+ end
272
+ end
@@ -1,63 +1,66 @@
1
- require 'thread'
1
+ # frozen_string_literal: true
2
+ require "thread"
2
3
 
3
4
  module Nokogiri
4
5
  module CSS
5
6
  class Parser < Racc::Parser
6
- @cache_on = true
7
- @cache = {}
8
- @mutex = Mutex.new
7
+ CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
8
+
9
+ @cache = {}
10
+ @mutex = Mutex.new
9
11
 
10
12
  class << self
11
- # Turn on CSS parse caching
12
- attr_accessor :cache_on
13
- alias :cache_on? :cache_on
14
- alias :set_cache :cache_on=
13
+ # Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
14
+ def cache_on?
15
+ !Thread.current[CACHE_SWITCH_NAME]
16
+ end
17
+
18
+ # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
19
+ def set_cache(value)
20
+ Thread.current[CACHE_SWITCH_NAME] = !value
21
+ end
15
22
 
16
23
  # Get the css selector in +string+ from the cache
17
- def [] string
18
- return unless @cache_on
24
+ def [](string)
25
+ return unless cache_on?
19
26
  @mutex.synchronize { @cache[string] }
20
27
  end
21
28
 
22
29
  # Set the css selector in +string+ in the cache to +value+
23
- def []= string, value
24
- return value unless @cache_on
30
+ def []=(string, value)
31
+ return value unless cache_on?
25
32
  @mutex.synchronize { @cache[string] = value }
26
33
  end
27
34
 
28
35
  # Clear the cache
29
- def clear_cache
30
- @mutex.synchronize { @cache = {} }
36
+ def clear_cache(create_new_object = false)
37
+ @mutex.synchronize do
38
+ if create_new_object
39
+ @cache = {}
40
+ else
41
+ @cache.clear
42
+ end
43
+ end
31
44
  end
32
45
 
33
46
  # Execute +block+ without cache
34
- def without_cache &block
35
- tmp = @cache_on
36
- @cache_on = false
47
+ def without_cache(&block)
48
+ original_cache_setting = cache_on?
49
+ set_cache false
37
50
  block.call
38
- @cache_on = tmp
39
- end
40
-
41
- ###
42
- # Parse this CSS selector in +selector+. Returns an AST.
43
- def parse selector
44
- @warned ||= false
45
- unless @warned
46
- $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
47
- @warned = true
48
- end
49
- new.parse selector
51
+ ensure
52
+ set_cache original_cache_setting
50
53
  end
51
54
  end
52
55
 
53
56
  # Create a new CSS parser with respect to +namespaces+
54
- def initialize namespaces = {}
55
- @tokenizer = Tokenizer.new
57
+ def initialize(namespaces = {})
58
+ @tokenizer = Tokenizer.new
56
59
  @namespaces = namespaces
57
60
  super()
58
61
  end
59
62
 
60
- def parse string
63
+ def parse(string)
61
64
  @tokenizer.scan_setup string
62
65
  do_parse
63
66
  end
@@ -67,14 +70,14 @@ module Nokogiri
67
70
  end
68
71
 
69
72
  # Get the xpath for +string+ using +options+
70
- def xpath_for string, options={}
73
+ def xpath_for(string, options = {})
71
74
  key = "#{string}#{options[:ns]}#{options[:prefix]}"
72
75
  v = self.class[key]
73
76
  return v if v
74
77
 
75
78
  args = [
76
- options[:prefix] || '//',
77
- options[:visitor] || XPathVisitor.new
79
+ options[:prefix] || "//",
80
+ options[:visitor] || XPathVisitor.new,
78
81
  ]
79
82
  self.class[key] = parse(string).map { |ast|
80
83
  ast.to_xpath(*args)
@@ -82,7 +85,7 @@ module Nokogiri
82
85
  end
83
86
 
84
87
  # On CSS parser error, raise an exception
85
- def on_error error_token_id, error_value, value_stack
88
+ def on_error(error_token_id, error_value, value_stack)
86
89
  after = value_stack.compact.last
87
90
  raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
88
91
  end
@@ -1,4 +1,5 @@
1
- require 'nokogiri/syntax_error'
1
+ # frozen_string_literal: true
2
+ require_relative "../syntax_error"
2
3
  module Nokogiri
3
4
  module CSS
4
5
  class SyntaxError < ::Nokogiri::SyntaxError
@@ -1,151 +1,153 @@
1
+ # frozen_string_literal: true
1
2
  #--
2
3
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by rex 1.0.5
4
+ # This file is automatically generated by rex 1.0.7
4
5
  # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
6
  #++
6
7
 
7
8
  module Nokogiri
8
9
  module CSS
9
10
  class Tokenizer # :nodoc:
10
- require 'strscan'
11
+ require 'strscan'
11
12
 
12
- class ScanError < StandardError ; end
13
+ class ScanError < StandardError ; end
13
14
 
14
- attr_reader :lineno
15
- attr_reader :filename
16
- attr_accessor :state
15
+ attr_reader :lineno
16
+ attr_reader :filename
17
+ attr_accessor :state
17
18
 
18
- def scan_setup(str)
19
- @ss = StringScanner.new(str)
20
- @lineno = 1
21
- @state = nil
22
- end
19
+ def scan_setup(str)
20
+ @ss = StringScanner.new(str)
21
+ @lineno = 1
22
+ @state = nil
23
+ end
23
24
 
24
- def action
25
- yield
26
- end
25
+ def action
26
+ yield
27
+ end
27
28
 
28
- def scan_str(str)
29
- scan_setup(str)
30
- do_parse
31
- end
32
- alias :scan :scan_str
29
+ def scan_str(str)
30
+ scan_setup(str)
31
+ do_parse
32
+ end
33
+ alias :scan :scan_str
33
34
 
34
- def load_file( filename )
35
- @filename = filename
36
- open(filename, "r") do |f|
37
- scan_setup(f.read)
38
- end
39
- end
35
+ def load_file( filename )
36
+ @filename = filename
37
+ File.open(filename, "r") do |f|
38
+ scan_setup(f.read)
39
+ end
40
+ end
40
41
 
41
- def scan_file( filename )
42
- load_file(filename)
43
- do_parse
44
- end
42
+ def scan_file( filename )
43
+ load_file(filename)
44
+ do_parse
45
+ end
45
46
 
46
47
 
47
- def next_token
48
- return if @ss.eos?
49
-
50
- # skips empty actions
51
- until token = _next_token or @ss.eos?; end
52
- token
53
- end
48
+ def next_token
49
+ return if @ss.eos?
54
50
 
55
- def _next_token
56
- text = @ss.peek(1)
57
- @lineno += 1 if text == "\n"
58
- token = case @state
59
- when nil
60
- case
61
- when (text = @ss.scan(/has\([\s]*/))
62
- action { [:HAS, text] }
51
+ # skips empty actions
52
+ until token = _next_token or @ss.eos?; end
53
+ token
54
+ end
63
55
 
64
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
65
- action { [:FUNCTION, text] }
56
+ def _next_token
57
+ text = @ss.peek(1)
58
+ @lineno += 1 if text == "\n"
59
+ token = case @state
60
+ when nil
61
+ case
62
+ when (text = @ss.scan(/has\([\s]*/))
63
+ action { [:HAS, text] }
66
64
 
67
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
68
- action { [:IDENT, text] }
65
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
+ action { [:FUNCTION, text] }
69
67
 
70
- when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
71
- action { [:HASH, text] }
68
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
+ action { [:IDENT, text] }
72
70
 
73
- when (text = @ss.scan(/[\s]*~=[\s]*/))
74
- action { [:INCLUDES, text] }
71
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
72
+ action { [:HASH, text] }
75
73
 
76
- when (text = @ss.scan(/[\s]*\|=[\s]*/))
77
- action { [:DASHMATCH, text] }
74
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
75
+ action { [:INCLUDES, text] }
78
76
 
79
- when (text = @ss.scan(/[\s]*\^=[\s]*/))
80
- action { [:PREFIXMATCH, text] }
77
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
78
+ action { [:DASHMATCH, text] }
81
79
 
82
- when (text = @ss.scan(/[\s]*\$=[\s]*/))
83
- action { [:SUFFIXMATCH, text] }
80
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
81
+ action { [:PREFIXMATCH, text] }
84
82
 
85
- when (text = @ss.scan(/[\s]*\*=[\s]*/))
86
- action { [:SUBSTRINGMATCH, text] }
83
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
84
+ action { [:SUFFIXMATCH, text] }
87
85
 
88
- when (text = @ss.scan(/[\s]*!=[\s]*/))
89
- action { [:NOT_EQUAL, text] }
86
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
87
+ action { [:SUBSTRINGMATCH, text] }
90
88
 
91
- when (text = @ss.scan(/[\s]*=[\s]*/))
92
- action { [:EQUAL, text] }
89
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
90
+ action { [:NOT_EQUAL, text] }
93
91
 
94
- when (text = @ss.scan(/[\s]*\)/))
95
- action { [:RPAREN, text] }
92
+ when (text = @ss.scan(/[\s]*=[\s]*/))
93
+ action { [:EQUAL, text] }
96
94
 
97
- when (text = @ss.scan(/[\s]*\[[\s]*/))
98
- action { [:LSQUARE, text] }
95
+ when (text = @ss.scan(/[\s]*\)/))
96
+ action { [:RPAREN, text] }
99
97
 
100
- when (text = @ss.scan(/[\s]*\]/))
101
- action { [:RSQUARE, text] }
98
+ when (text = @ss.scan(/\[[\s]*/))
99
+ action { [:LSQUARE, text] }
102
100
 
103
- when (text = @ss.scan(/[\s]*\+[\s]*/))
104
- action { [:PLUS, text] }
101
+ when (text = @ss.scan(/[\s]*\]/))
102
+ action { [:RSQUARE, text] }
105
103
 
106
- when (text = @ss.scan(/[\s]*>[\s]*/))
107
- action { [:GREATER, text] }
104
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
105
+ action { [:PLUS, text] }
108
106
 
109
- when (text = @ss.scan(/[\s]*,[\s]*/))
110
- action { [:COMMA, text] }
107
+ when (text = @ss.scan(/[\s]*>[\s]*/))
108
+ action { [:GREATER, text] }
111
109
 
112
- when (text = @ss.scan(/[\s]*~[\s]*/))
113
- action { [:TILDE, text] }
110
+ when (text = @ss.scan(/[\s]*,[\s]*/))
111
+ action { [:COMMA, text] }
114
112
 
115
- when (text = @ss.scan(/\:not\([\s]*/))
116
- action { [:NOT, text] }
113
+ when (text = @ss.scan(/[\s]*~[\s]*/))
114
+ action { [:TILDE, text] }
117
115
 
118
- when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
119
- action { [:NUMBER, text] }
116
+ when (text = @ss.scan(/\:not\([\s]*/))
117
+ action { [:NOT, text] }
120
118
 
121
- when (text = @ss.scan(/[\s]*\/\/[\s]*/))
122
- action { [:DOUBLESLASH, text] }
119
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
120
+ action { [:NUMBER, text] }
123
121
 
124
- when (text = @ss.scan(/[\s]*\/[\s]*/))
125
- action { [:SLASH, text] }
122
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
123
+ action { [:DOUBLESLASH, text] }
126
124
 
127
- when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
128
- action {[:UNICODE_RANGE, text] }
125
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
126
+ action { [:SLASH, text] }
129
127
 
130
- when (text = @ss.scan(/[\s]+/))
131
- action { [:S, text] }
128
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
129
+ action {[:UNICODE_RANGE, text] }
132
130
 
133
- when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*'/))
134
- action { [:STRING, text] }
131
+ when (text = @ss.scan(/[\s]+/))
132
+ action { [:S, text] }
135
133
 
136
- when (text = @ss.scan(/./))
137
- action { [text, text] }
134
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
135
+ action { [:STRING, text] }
138
136
 
139
- else
140
- text = @ss.string[@ss.pos .. -1]
141
- raise ScanError, "can not match: '" + text + "'"
142
- end # if
137
+ when (text = @ss.scan(/./))
138
+ action { [text, text] }
143
139
 
144
- else
145
- raise ScanError, "undefined state: '" + state.to_s + "'"
146
- end # case state
147
- token
148
- end # def _next_token
140
+
141
+ else
142
+ text = @ss.string[@ss.pos .. -1]
143
+ raise ScanError, "can not match: '" + text + "'"
144
+ end # if
145
+
146
+ else
147
+ raise ScanError, "undefined state: '" + state.to_s + "'"
148
+ end # case state
149
+ token
150
+ end # def _next_token
149
151
 
150
152
  end # class
151
153
  end
@@ -14,8 +14,8 @@ macro
14
14
  nmstart [_A-Za-z]|{nonascii}|{escape}
15
15
  ident [-@]?({nmstart})({nmchar})*
16
16
  name ({nmchar})+
17
- string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*"
18
- string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*'
17
+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
18
+ string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
19
19
  string {string1}|{string2}
20
20
 
21
21
  rule
@@ -34,7 +34,7 @@ rule
34
34
  {w}!={w} { [:NOT_EQUAL, text] }
35
35
  {w}={w} { [:EQUAL, text] }
36
36
  {w}\) { [:RPAREN, text] }
37
- {w}\[{w} { [:LSQUARE, text] }
37
+ \[{w} { [:LSQUARE, text] }
38
38
  {w}\] { [:RSQUARE, text] }
39
39
  {w}\+{w} { [:PLUS, text] }
40
40
  {w}>{w} { [:GREATER, text] }
@@ -44,9 +44,9 @@ rule
44
44
  {num} { [:NUMBER, text] }
45
45
  {w}\/\/{w} { [:DOUBLESLASH, text] }
46
46
  {w}\/{w} { [:SLASH, text] }
47
-
47
+
48
48
  U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
49
-
49
+
50
50
  [\s]+ { [:S, text] }
51
51
  {string} { [:STRING, text] }
52
52
  . { [text, text] }