nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -1,25 +1,23 @@
1
- class Nokogiri::CSS::GeneratedParser
1
+ class Nokogiri::CSS::Parser
2
2
 
3
3
  token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
- token COMMA URI CDO CDC NUMBER PERCENTAGE LENGTH EMS EXS ANGLE TIME FREQ
5
- token IMPORTANT_SYM IMPORT_SYM MEDIA_SYM PAGE_SYM CHARSET_SYM DIMENSION
6
- token PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL SLASH DOUBLESLASH
7
- token NOT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
8
6
 
9
7
  rule
10
8
  selector
11
- : selector COMMA s_0toN simple_selector_1toN {
9
+ : selector COMMA simple_selector_1toN {
12
10
  result = [val.first, val.last].flatten
13
11
  }
14
- | simple_selector_1toN { result = val.flatten }
12
+ | prefixless_combinator_selector { result = val.flatten }
13
+ | optional_S simple_selector_1toN { result = [val.last].flatten }
15
14
  ;
16
15
  combinator
17
- : PLUS s_0toN { result = :DIRECT_ADJACENT_SELECTOR }
18
- | GREATER s_0toN { result = :CHILD_SELECTOR }
19
- | TILDE s_0toN { result = :PRECEDING_SELECTOR }
20
- | S { result = :DESCENDANT_SELECTOR }
21
- | DOUBLESLASH s_0toN { result = :DESCENDANT_SELECTOR }
22
- | SLASH s_0toN { result = :CHILD_SELECTOR }
16
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
17
+ | GREATER { result = :CHILD_SELECTOR }
18
+ | TILDE { result = :FOLLOWING_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
23
21
  ;
24
22
  simple_selector
25
23
  : element_name hcap_0toN {
@@ -29,10 +27,10 @@ rule
29
27
  Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
30
28
  end
31
29
  }
32
- | element_name negation {
30
+ | function
31
+ | function pseudo {
33
32
  result = Node.new(:CONDITIONAL_SELECTOR, val)
34
33
  }
35
- | function
36
34
  | function attrib {
37
35
  result = Node.new(:CONDITIONAL_SELECTOR, val)
38
36
  }
@@ -42,60 +40,118 @@ rule
42
40
  )
43
41
  }
44
42
  ;
43
+ prefixless_combinator_selector
44
+ : combinator simple_selector_1toN {
45
+ result = Node.new(val.first, [nil, val.last])
46
+ }
47
+ ;
45
48
  simple_selector_1toN
46
49
  : simple_selector combinator simple_selector_1toN {
47
50
  result = Node.new(val[1], [val.first, val.last])
48
51
  }
52
+ | simple_selector S simple_selector_1toN {
53
+ result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
54
+ }
49
55
  | simple_selector
50
56
  ;
51
57
  class
52
- : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
58
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
53
59
  ;
54
60
  element_name
55
- : IDENT { result = Node.new(:ELEMENT_NAME, val) }
61
+ : namespaced_ident
56
62
  | '*' { result = Node.new(:ELEMENT_NAME, val) }
57
63
  ;
64
+ namespaced_ident
65
+ : namespace '|' IDENT {
66
+ result = Node.new(:ELEMENT_NAME,
67
+ [[val.first, val.last].compact.join(':')]
68
+ )
69
+ }
70
+ | IDENT {
71
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
72
+ result = Node.new(:ELEMENT_NAME, [name])
73
+ }
74
+ ;
75
+ namespace
76
+ : IDENT { result = val[0] }
77
+ |
78
+ ;
58
79
  attrib
59
- : '[' s_0toN IDENT s_0toN attrib_val_0or1 ']' {
80
+ : LSQUARE attrib_name attrib_val_0or1 RSQUARE {
60
81
  result = Node.new(:ATTRIBUTE_CONDITION,
61
- [Node.new(:ELEMENT_NAME, [val[2]])] + (val[4] || [])
82
+ [val[1]] + (val[2] || [])
62
83
  )
63
84
  }
64
- | '[' s_0toN function s_0toN attrib_val_0or1 ']' {
85
+ | LSQUARE function attrib_val_0or1 RSQUARE {
65
86
  result = Node.new(:ATTRIBUTE_CONDITION,
66
- [val[2]] + (val[4] || [])
87
+ [val[1]] + (val[2] || [])
67
88
  )
68
89
  }
69
- | '[' s_0toN NUMBER s_0toN ']' {
90
+ | LSQUARE NUMBER RSQUARE {
70
91
  # Non standard, but hpricot supports it.
71
92
  result = Node.new(:PSEUDO_CLASS,
72
- [Node.new(:FUNCTION, ['nth-child(', val[2]])]
93
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
73
94
  )
74
95
  }
75
96
  ;
97
+ attrib_name
98
+ : namespace '|' IDENT {
99
+ result = Node.new(:ELEMENT_NAME,
100
+ [[val.first, val.last].compact.join(':')]
101
+ )
102
+ }
103
+ | IDENT {
104
+ # Default namespace is not applied to attributes.
105
+ # So we don't add prefix "xmlns:" as in namespaced_ident.
106
+ result = Node.new(:ELEMENT_NAME, [val.first])
107
+ }
108
+ ;
76
109
  function
77
- : FUNCTION ')' {
110
+ : FUNCTION RPAREN {
78
111
  result = Node.new(:FUNCTION, [val.first.strip])
79
112
  }
80
- | FUNCTION expr ')' {
113
+ | FUNCTION expr RPAREN {
114
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
115
+ }
116
+ | FUNCTION nth RPAREN {
81
117
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
82
118
  }
83
- | FUNCTION an_plus_b ')' {
119
+ | NOT expr RPAREN {
84
120
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
85
121
  }
86
- | NOT expr ')' {
122
+ | HAS selector RPAREN {
87
123
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
88
124
  }
89
125
  ;
90
126
  expr
91
- : NUMBER
127
+ : NUMBER COMMA expr { result = [val.first, val.last] }
128
+ | STRING COMMA expr { result = [val.first, val.last] }
129
+ | IDENT COMMA expr { result = [val.first, val.last] }
130
+ | NUMBER
92
131
  | STRING
132
+ | IDENT # even, odd
133
+ {
134
+ case val[0]
135
+ when 'even'
136
+ result = Node.new(:NTH, ['2','n','+','0'])
137
+ when 'odd'
138
+ result = Node.new(:NTH, ['2','n','+','1'])
139
+ when 'n'
140
+ result = Node.new(:NTH, ['1','n','+','0'])
141
+ else
142
+ # This is not CSS standard. It allows us to support this:
143
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
144
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
145
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
146
+ result = val
147
+ end
148
+ }
93
149
  ;
94
- an_plus_b
150
+ nth
95
151
  : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
96
152
  {
97
153
  if val[1] == 'n'
98
- result = Node.new(:AN_PLUS_B, val)
154
+ result = Node.new(:NTH, val)
99
155
  else
100
156
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
101
157
  end
@@ -103,37 +159,31 @@ rule
103
159
  | IDENT PLUS NUMBER { # n+3, -n+3
104
160
  if val[0] == 'n'
105
161
  val.unshift("1")
106
- result = Node.new(:AN_PLUS_B, val)
162
+ result = Node.new(:NTH, val)
107
163
  elsif val[0] == '-n'
108
164
  val[0] = 'n'
109
165
  val.unshift("-1")
110
- result = Node.new(:AN_PLUS_B, val)
166
+ result = Node.new(:NTH, val)
111
167
  else
112
168
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
113
169
  end
114
170
  }
115
- | NUMBER IDENT # 5n, -5n
116
- {
117
- if val[1] == 'n'
171
+ | NUMBER IDENT { # 5n, -5n, 10n-1
172
+ n = val[1]
173
+ if n[0, 2] == 'n-'
174
+ val[1] = 'n'
175
+ val << "-"
176
+ # b is contained in n as n is the string "n-b"
177
+ val << n[2, n.size]
178
+ result = Node.new(:NTH, val)
179
+ elsif n == 'n'
118
180
  val << "+"
119
181
  val << "0"
120
- result = Node.new(:AN_PLUS_B, val)
182
+ result = Node.new(:NTH, val)
121
183
  else
122
184
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
123
185
  end
124
186
  }
125
- | IDENT # even, odd
126
- {
127
- if val[0] == 'even'
128
- val = ["2","n","+","0"]
129
- result = Node.new(:AN_PLUS_B, val)
130
- elsif val[0] == 'odd'
131
- val = ["2","n","+","1"]
132
- result = Node.new(:AN_PLUS_B, val)
133
- else
134
- raise Racc::ParseError, "parse error on IDENT '#{val[0]}'"
135
- end
136
- }
137
187
  ;
138
188
  pseudo
139
189
  : ':' function {
@@ -158,41 +208,54 @@ rule
158
208
  | pseudo hcap_1toN {
159
209
  result = Node.new(:COMBINATOR, val)
160
210
  }
211
+ | negation hcap_1toN {
212
+ result = Node.new(:COMBINATOR, val)
213
+ }
161
214
  | attribute_id
162
215
  | class
163
216
  | attrib
164
217
  | pseudo
218
+ | negation
165
219
  ;
166
220
  attribute_id
167
- : HASH { result = Node.new(:ID, val) }
221
+ : HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) }
168
222
  ;
169
223
  attrib_val_0or1
170
- : eql_incl_dash s_0toN IDENT s_0toN { result = [val.first, val[2]] }
171
- | eql_incl_dash s_0toN STRING s_0toN { result = [val.first, val[2]] }
224
+ : eql_incl_dash IDENT { result = [val.first, val[1]] }
225
+ | eql_incl_dash STRING { result = [val.first, val[1]] }
172
226
  |
173
227
  ;
174
228
  eql_incl_dash
175
- : '='
176
- | PREFIXMATCH
177
- | SUFFIXMATCH
178
- | SUBSTRINGMATCH
179
- | NOT_EQUAL
180
- | INCLUDES
181
- | DASHMATCH
229
+ : EQUAL { result = :equal }
230
+ | PREFIXMATCH { result = :prefix_match }
231
+ | SUFFIXMATCH { result = :suffix_match }
232
+ | SUBSTRINGMATCH { result = :substring_match }
233
+ | NOT_EQUAL { result = :not_equal }
234
+ | INCLUDES { result = :includes }
235
+ | DASHMATCH { result = :dash_match }
182
236
  ;
183
237
  negation
184
- : NOT s_0toN negation_arg s_0toN ')' {
185
- result = Node.new(:NOT, [val[2]])
238
+ : NOT negation_arg RPAREN {
239
+ result = Node.new(:NOT, [val[1]])
186
240
  }
187
241
  ;
188
242
  negation_arg
189
- : hcap_1toN
243
+ : element_name
244
+ | element_name hcap_1toN
245
+ | hcap_1toN
190
246
  ;
191
- s_0toN
192
- : S s_0toN
247
+ optional_S
248
+ : S
193
249
  |
194
250
  ;
195
251
  end
196
252
 
197
253
  ---- header
198
254
 
255
+ require 'nokogiri/css/parser_extras'
256
+
257
+ ---- inner
258
+
259
+ def unescape_css_identifier(identifier)
260
+ identifier.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/){ |m| $1 || [$2.hex].pack('U') }
261
+ end
@@ -0,0 +1,91 @@
1
+ require 'thread'
2
+
3
+ module Nokogiri
4
+ module CSS
5
+ class Parser < Racc::Parser
6
+ @cache_on = true
7
+ @cache = {}
8
+ @mutex = Mutex.new
9
+
10
+ class << self
11
+ # Turn on CSS parse caching
12
+ attr_accessor :cache_on
13
+ alias :cache_on? :cache_on
14
+ alias :set_cache :cache_on=
15
+
16
+ # Get the css selector in +string+ from the cache
17
+ def [] string
18
+ return unless @cache_on
19
+ @mutex.synchronize { @cache[string] }
20
+ end
21
+
22
+ # Set the css selector in +string+ in the cache to +value+
23
+ def []= string, value
24
+ return value unless @cache_on
25
+ @mutex.synchronize { @cache[string] = value }
26
+ end
27
+
28
+ # Clear the cache
29
+ def clear_cache
30
+ @mutex.synchronize { @cache = {} }
31
+ end
32
+
33
+ # Execute +block+ without cache
34
+ def without_cache &block
35
+ tmp = @cache_on
36
+ @cache_on = false
37
+ block.call
38
+ @cache_on = tmp
39
+ end
40
+
41
+ ###
42
+ # Parse this CSS selector in +selector+. Returns an AST.
43
+ def parse selector
44
+ @warned ||= false
45
+ unless @warned
46
+ $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
47
+ @warned = true
48
+ end
49
+ new.parse selector
50
+ end
51
+ end
52
+
53
+ # Create a new CSS parser with respect to +namespaces+
54
+ def initialize namespaces = {}
55
+ @tokenizer = Tokenizer.new
56
+ @namespaces = namespaces
57
+ super()
58
+ end
59
+
60
+ def parse string
61
+ @tokenizer.scan_setup string
62
+ do_parse
63
+ end
64
+
65
+ def next_token
66
+ @tokenizer.next_token
67
+ end
68
+
69
+ # Get the xpath for +string+ using +options+
70
+ def xpath_for string, options={}
71
+ key = "#{string}#{options[:ns]}#{options[:prefix]}"
72
+ v = self.class[key]
73
+ return v if v
74
+
75
+ args = [
76
+ options[:prefix] || '//',
77
+ options[:visitor] || XPathVisitor.new
78
+ ]
79
+ self.class[key] = parse(string).map { |ast|
80
+ ast.to_xpath(*args)
81
+ }
82
+ end
83
+
84
+ # On CSS parser error, raise an exception
85
+ def on_error error_token_id, error_value, value_stack
86
+ after = value_stack.compact.last
87
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,7 @@
1
+ require 'nokogiri/syntax_error'
2
+ module Nokogiri
3
+ module CSS
4
+ class SyntaxError < ::Nokogiri::SyntaxError
5
+ end
6
+ end
7
+ end
@@ -1,9 +1,152 @@
1
+ #--
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by rex 1.0.5
4
+ # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
+ #++
6
+
1
7
  module Nokogiri
2
- module CSS
3
- class Tokenizer < GeneratedTokenizer
4
- def scan(str)
5
- scan_evaluate(str)
6
- end
8
+ module CSS
9
+ class Tokenizer # :nodoc:
10
+ require 'strscan'
11
+
12
+ class ScanError < StandardError ; end
13
+
14
+ attr_reader :lineno
15
+ attr_reader :filename
16
+ attr_accessor :state
17
+
18
+ def scan_setup(str)
19
+ @ss = StringScanner.new(str)
20
+ @lineno = 1
21
+ @state = nil
22
+ end
23
+
24
+ def action
25
+ yield
26
+ end
27
+
28
+ def scan_str(str)
29
+ scan_setup(str)
30
+ do_parse
31
+ end
32
+ alias :scan :scan_str
33
+
34
+ def load_file( filename )
35
+ @filename = filename
36
+ open(filename, "r") do |f|
37
+ scan_setup(f.read)
7
38
  end
8
39
  end
40
+
41
+ def scan_file( filename )
42
+ load_file(filename)
43
+ do_parse
44
+ end
45
+
46
+
47
+ def next_token
48
+ return if @ss.eos?
49
+
50
+ # skips empty actions
51
+ until token = _next_token or @ss.eos?; end
52
+ token
53
+ end
54
+
55
+ def _next_token
56
+ text = @ss.peek(1)
57
+ @lineno += 1 if text == "\n"
58
+ token = case @state
59
+ when nil
60
+ case
61
+ when (text = @ss.scan(/has\([\s]*/))
62
+ action { [:HAS, text] }
63
+
64
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
65
+ action { [:FUNCTION, text] }
66
+
67
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
68
+ action { [:IDENT, text] }
69
+
70
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
71
+ action { [:HASH, text] }
72
+
73
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
74
+ action { [:INCLUDES, text] }
75
+
76
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
77
+ action { [:DASHMATCH, text] }
78
+
79
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
80
+ action { [:PREFIXMATCH, text] }
81
+
82
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
83
+ action { [:SUFFIXMATCH, text] }
84
+
85
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
86
+ action { [:SUBSTRINGMATCH, text] }
87
+
88
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
89
+ action { [:NOT_EQUAL, text] }
90
+
91
+ when (text = @ss.scan(/[\s]*=[\s]*/))
92
+ action { [:EQUAL, text] }
93
+
94
+ when (text = @ss.scan(/[\s]*\)/))
95
+ action { [:RPAREN, text] }
96
+
97
+ when (text = @ss.scan(/\[[\s]*/))
98
+ action { [:LSQUARE, text] }
99
+
100
+ when (text = @ss.scan(/[\s]*\]/))
101
+ action { [:RSQUARE, text] }
102
+
103
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
104
+ action { [:PLUS, text] }
105
+
106
+ when (text = @ss.scan(/[\s]*>[\s]*/))
107
+ action { [:GREATER, text] }
108
+
109
+ when (text = @ss.scan(/[\s]*,[\s]*/))
110
+ action { [:COMMA, text] }
111
+
112
+ when (text = @ss.scan(/[\s]*~[\s]*/))
113
+ action { [:TILDE, text] }
114
+
115
+ when (text = @ss.scan(/\:not\([\s]*/))
116
+ action { [:NOT, text] }
117
+
118
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
119
+ action { [:NUMBER, text] }
120
+
121
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
122
+ action { [:DOUBLESLASH, text] }
123
+
124
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
125
+ action { [:SLASH, text] }
126
+
127
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
128
+ action {[:UNICODE_RANGE, text] }
129
+
130
+ when (text = @ss.scan(/[\s]+/))
131
+ action { [:S, text] }
132
+
133
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*'/))
134
+ action { [:STRING, text] }
135
+
136
+ when (text = @ss.scan(/./))
137
+ action { [text, text] }
138
+
139
+ else
140
+ text = @ss.string[@ss.pos .. -1]
141
+ raise ScanError, "can not match: '" + text + "'"
142
+ end # if
143
+
144
+ else
145
+ raise ScanError, "undefined state: '" + state.to_s + "'"
146
+ end # case state
147
+ token
148
+ end # def _next_token
149
+
150
+ end # class
151
+ end
9
152
  end
@@ -1,62 +1,54 @@
1
1
  module Nokogiri
2
2
  module CSS
3
- class GeneratedTokenizer
3
+ class Tokenizer # :nodoc:
4
4
 
5
5
  macro
6
6
  nl \n|\r\n|\r|\f
7
- w [\s\r\n\f]*
8
- nonascii [^\\\\0-\\\\177]
7
+ w [\s]*
8
+ nonascii [^\0-\177]
9
9
  num -?([0-9]+|[0-9]*\.[0-9]+)
10
- unicode \\\\\\\\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?
10
+ unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
11
11
 
12
- escape {unicode}|\\\\\\\[^\n\r\f0-9a-f]
13
- nmchar [_a-z0-9-]|{nonascii}|{escape}
14
- nmstart [_a-z]|{nonascii}|{escape}
15
- ident [-]?({nmstart})({nmchar})*
12
+ escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
13
+ nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_A-Za-z]|{nonascii}|{escape}
15
+ ident [-@]?({nmstart})({nmchar})*
16
16
  name ({nmchar})+
17
- string1 "([^\n\r\f"]|\\{nl}|{nonascii}|{escape})*"
18
- string2 '([^\n\r\f']|\\{nl}|{nonascii}|{escape})*'
17
+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*"
18
+ string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*'
19
19
  string {string1}|{string2}
20
- invalid1 \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*
21
- invalid2 \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*
22
- invalid {invalid1}|{invalid2}
23
- Comment \/\*(.|[\r\n])*?\*\/
24
20
 
25
21
  rule
26
22
 
27
23
  # [:state] pattern [actions]
28
24
 
29
- ~= { [:INCLUDES, text] }
30
- \|= { [:DASHMATCH, text] }
31
- \^= { [:PREFIXMATCH, text] }
32
- \$= { [:SUFFIXMATCH, text] }
33
- \*= { [:SUBSTRINGMATCH, text] }
34
- != { [:NOT_EQUAL, text] }
35
- {ident}\(\s* { [:FUNCTION, text] }
36
- @{ident} { [:IDENT, text] }
25
+ has\({w} { [:HAS, text] }
26
+ {ident}\({w} { [:FUNCTION, text] }
37
27
  {ident} { [:IDENT, text] }
38
- {num} { [:NUMBER, text] }
39
28
  \#{name} { [:HASH, text] }
40
- {w}\+ { [:PLUS, text] }
41
- {w}> { [:GREATER, text] }
42
- {w}, { [:COMMA, text] }
43
- {w}~ { [:TILDE, text] }
44
- \:not\( { [:NOT, text] }
45
- @{ident} { [:ATKEYWORD, text] }
46
- {num}% { [:PERCENTAGE, text] }
47
- {num}{ident} { [:DIMENSION, text] }
48
- <!-- { [:CDO, text] }
49
- --> { [:CDC, text] }
50
- {w}\/\/ { [:DOUBLESLASH, text] }
51
- {w}\/ { [:SLASH, text] }
29
+ {w}~={w} { [:INCLUDES, text] }
30
+ {w}\|={w} { [:DASHMATCH, text] }
31
+ {w}\^={w} { [:PREFIXMATCH, text] }
32
+ {w}\$={w} { [:SUFFIXMATCH, text] }
33
+ {w}\*={w} { [:SUBSTRINGMATCH, text] }
34
+ {w}!={w} { [:NOT_EQUAL, text] }
35
+ {w}={w} { [:EQUAL, text] }
36
+ {w}\) { [:RPAREN, text] }
37
+ \[{w} { [:LSQUARE, text] }
38
+ {w}\] { [:RSQUARE, text] }
39
+ {w}\+{w} { [:PLUS, text] }
40
+ {w}>{w} { [:GREATER, text] }
41
+ {w},{w} { [:COMMA, text] }
42
+ {w}~{w} { [:TILDE, text] }
43
+ \:not\({w} { [:NOT, text] }
44
+ {num} { [:NUMBER, text] }
45
+ {w}\/\/{w} { [:DOUBLESLASH, text] }
46
+ {w}\/{w} { [:SLASH, text] }
52
47
 
53
48
  U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
54
49
 
55
- {Comment} /* ignore comments */
56
- [\s\t\r\n\f]+ { [:S, text] }
57
- [\.*:\[\]=\)] { [text, text] }
50
+ [\s]+ { [:S, text] }
58
51
  {string} { [:STRING, text] }
59
- {invalid} { [:INVALID, text] }
60
52
  . { [text, text] }
61
53
  end
62
54
  end