glebm-nokogiri 1.4.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (277) hide show
  1. data/.autotest +26 -0
  2. data/CHANGELOG.ja.rdoc +411 -0
  3. data/CHANGELOG.rdoc +397 -0
  4. data/Manifest.txt +276 -0
  5. data/README.ja.rdoc +106 -0
  6. data/README.rdoc +132 -0
  7. data/Rakefile +183 -0
  8. data/bin/nokogiri +49 -0
  9. data/deps.rip +5 -0
  10. data/ext/nokogiri/extconf.rb +97 -0
  11. data/ext/nokogiri/html_document.c +154 -0
  12. data/ext/nokogiri/html_document.h +10 -0
  13. data/ext/nokogiri/html_element_description.c +276 -0
  14. data/ext/nokogiri/html_element_description.h +10 -0
  15. data/ext/nokogiri/html_entity_lookup.c +32 -0
  16. data/ext/nokogiri/html_entity_lookup.h +8 -0
  17. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  18. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  19. data/ext/nokogiri/nokogiri.c +95 -0
  20. data/ext/nokogiri/nokogiri.h +153 -0
  21. data/ext/nokogiri/xml_attr.c +94 -0
  22. data/ext/nokogiri/xml_attr.h +9 -0
  23. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  24. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  25. data/ext/nokogiri/xml_cdata.c +56 -0
  26. data/ext/nokogiri/xml_cdata.h +9 -0
  27. data/ext/nokogiri/xml_comment.c +54 -0
  28. data/ext/nokogiri/xml_comment.h +9 -0
  29. data/ext/nokogiri/xml_document.c +464 -0
  30. data/ext/nokogiri/xml_document.h +23 -0
  31. data/ext/nokogiri/xml_document_fragment.c +48 -0
  32. data/ext/nokogiri/xml_document_fragment.h +10 -0
  33. data/ext/nokogiri/xml_dtd.c +202 -0
  34. data/ext/nokogiri/xml_dtd.h +10 -0
  35. data/ext/nokogiri/xml_element_content.c +123 -0
  36. data/ext/nokogiri/xml_element_content.h +10 -0
  37. data/ext/nokogiri/xml_element_decl.c +69 -0
  38. data/ext/nokogiri/xml_element_decl.h +9 -0
  39. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  40. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  41. data/ext/nokogiri/xml_entity_decl.c +110 -0
  42. data/ext/nokogiri/xml_entity_decl.h +10 -0
  43. data/ext/nokogiri/xml_entity_reference.c +52 -0
  44. data/ext/nokogiri/xml_entity_reference.h +9 -0
  45. data/ext/nokogiri/xml_io.c +31 -0
  46. data/ext/nokogiri/xml_io.h +11 -0
  47. data/ext/nokogiri/xml_namespace.c +84 -0
  48. data/ext/nokogiri/xml_namespace.h +13 -0
  49. data/ext/nokogiri/xml_node.c +1347 -0
  50. data/ext/nokogiri/xml_node.h +13 -0
  51. data/ext/nokogiri/xml_node_set.c +418 -0
  52. data/ext/nokogiri/xml_node_set.h +9 -0
  53. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  54. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  55. data/ext/nokogiri/xml_reader.c +665 -0
  56. data/ext/nokogiri/xml_reader.h +10 -0
  57. data/ext/nokogiri/xml_relax_ng.c +168 -0
  58. data/ext/nokogiri/xml_relax_ng.h +9 -0
  59. data/ext/nokogiri/xml_sax_parser.c +286 -0
  60. data/ext/nokogiri/xml_sax_parser.h +39 -0
  61. data/ext/nokogiri/xml_sax_parser_context.c +159 -0
  62. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  63. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  64. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  65. data/ext/nokogiri/xml_schema.c +205 -0
  66. data/ext/nokogiri/xml_schema.h +9 -0
  67. data/ext/nokogiri/xml_syntax_error.c +58 -0
  68. data/ext/nokogiri/xml_syntax_error.h +13 -0
  69. data/ext/nokogiri/xml_text.c +50 -0
  70. data/ext/nokogiri/xml_text.h +9 -0
  71. data/ext/nokogiri/xml_xpath_context.c +276 -0
  72. data/ext/nokogiri/xml_xpath_context.h +9 -0
  73. data/ext/nokogiri/xslt_stylesheet.c +142 -0
  74. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  75. data/lib/nokogiri.rb +133 -0
  76. data/lib/nokogiri/css.rb +25 -0
  77. data/lib/nokogiri/css/generated_parser.rb +669 -0
  78. data/lib/nokogiri/css/generated_tokenizer.rb +145 -0
  79. data/lib/nokogiri/css/node.rb +99 -0
  80. data/lib/nokogiri/css/parser.rb +82 -0
  81. data/lib/nokogiri/css/parser.y +232 -0
  82. data/lib/nokogiri/css/syntax_error.rb +7 -0
  83. data/lib/nokogiri/css/tokenizer.rb +7 -0
  84. data/lib/nokogiri/css/tokenizer.rex +55 -0
  85. data/lib/nokogiri/css/xpath_visitor.rb +169 -0
  86. data/lib/nokogiri/decorators/slop.rb +33 -0
  87. data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
  88. data/lib/nokogiri/ffi/html/document.rb +28 -0
  89. data/lib/nokogiri/ffi/html/element_description.rb +81 -0
  90. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  91. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  92. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  93. data/lib/nokogiri/ffi/libxml.rb +386 -0
  94. data/lib/nokogiri/ffi/structs/common_node.rb +38 -0
  95. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  96. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  97. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  98. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  99. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  100. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
  102. data/lib/nokogiri/ffi/structs/xml_document.rb +117 -0
  103. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  104. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  105. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  106. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  107. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  108. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  109. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  110. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  111. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  112. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  113. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  114. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  115. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
  116. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  117. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  118. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  119. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +38 -0
  120. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  121. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  122. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  123. data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
  124. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  125. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  126. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  127. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  128. data/lib/nokogiri/ffi/xml/document.rb +162 -0
  129. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  130. data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
  131. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  132. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  133. data/lib/nokogiri/ffi/xml/entity_decl.rb +36 -0
  134. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  135. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  136. data/lib/nokogiri/ffi/xml/node.rb +556 -0
  137. data/lib/nokogiri/ffi/xml/node_set.rb +149 -0
  138. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  139. data/lib/nokogiri/ffi/xml/reader.rb +232 -0
  140. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  141. data/lib/nokogiri/ffi/xml/sax/parser.rb +135 -0
  142. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  143. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +51 -0
  144. data/lib/nokogiri/ffi/xml/schema.rb +109 -0
  145. data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
  146. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  147. data/lib/nokogiri/ffi/xml/xpath.rb +9 -0
  148. data/lib/nokogiri/ffi/xml/xpath_context.rb +148 -0
  149. data/lib/nokogiri/ffi/xslt/stylesheet.rb +53 -0
  150. data/lib/nokogiri/html.rb +35 -0
  151. data/lib/nokogiri/html/builder.rb +35 -0
  152. data/lib/nokogiri/html/document.rb +90 -0
  153. data/lib/nokogiri/html/document_fragment.rb +36 -0
  154. data/lib/nokogiri/html/element_description.rb +23 -0
  155. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  156. data/lib/nokogiri/html/sax/parser.rb +48 -0
  157. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  158. data/lib/nokogiri/syntax_error.rb +4 -0
  159. data/lib/nokogiri/version.rb +37 -0
  160. data/lib/nokogiri/version_warning.rb +14 -0
  161. data/lib/nokogiri/xml.rb +67 -0
  162. data/lib/nokogiri/xml/attr.rb +14 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  164. data/lib/nokogiri/xml/builder.rb +418 -0
  165. data/lib/nokogiri/xml/cdata.rb +11 -0
  166. data/lib/nokogiri/xml/character_data.rb +7 -0
  167. data/lib/nokogiri/xml/document.rb +194 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +77 -0
  169. data/lib/nokogiri/xml/dtd.rb +11 -0
  170. data/lib/nokogiri/xml/element_content.rb +36 -0
  171. data/lib/nokogiri/xml/element_decl.rb +13 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  173. data/lib/nokogiri/xml/namespace.rb +13 -0
  174. data/lib/nokogiri/xml/node.rb +793 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  176. data/lib/nokogiri/xml/node_set.rb +325 -0
  177. data/lib/nokogiri/xml/notation.rb +6 -0
  178. data/lib/nokogiri/xml/parse_options.rb +85 -0
  179. data/lib/nokogiri/xml/pp.rb +2 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  181. data/lib/nokogiri/xml/pp/node.rb +56 -0
  182. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  183. data/lib/nokogiri/xml/reader.rb +74 -0
  184. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  185. data/lib/nokogiri/xml/sax.rb +4 -0
  186. data/lib/nokogiri/xml/sax/document.rb +160 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  190. data/lib/nokogiri/xml/schema.rb +57 -0
  191. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  192. data/lib/nokogiri/xml/text.rb +9 -0
  193. data/lib/nokogiri/xml/xpath.rb +10 -0
  194. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  195. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  196. data/lib/nokogiri/xslt.rb +48 -0
  197. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  198. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  199. data/tasks/cross_compile.rb +158 -0
  200. data/tasks/test.rb +94 -0
  201. data/test/css/test_nthiness.rb +159 -0
  202. data/test/css/test_parser.rb +282 -0
  203. data/test/css/test_tokenizer.rb +190 -0
  204. data/test/css/test_xpath_visitor.rb +85 -0
  205. data/test/ffi/test_document.rb +35 -0
  206. data/test/files/2ch.html +108 -0
  207. data/test/files/address_book.rlx +12 -0
  208. data/test/files/address_book.xml +10 -0
  209. data/test/files/bar/bar.xsd +4 -0
  210. data/test/files/dont_hurt_em_why.xml +422 -0
  211. data/test/files/exslt.xml +8 -0
  212. data/test/files/exslt.xslt +35 -0
  213. data/test/files/foo/foo.xsd +4 -0
  214. data/test/files/po.xml +32 -0
  215. data/test/files/po.xsd +66 -0
  216. data/test/files/shift_jis.html +10 -0
  217. data/test/files/shift_jis.xml +5 -0
  218. data/test/files/snuggles.xml +3 -0
  219. data/test/files/staff.dtd +10 -0
  220. data/test/files/staff.xml +59 -0
  221. data/test/files/staff.xslt +32 -0
  222. data/test/files/tlm.html +850 -0
  223. data/test/files/valid_bar.xml +2 -0
  224. data/test/helper.rb +169 -0
  225. data/test/html/sax/test_parser.rb +74 -0
  226. data/test/html/sax/test_parser_context.rb +48 -0
  227. data/test/html/test_builder.rb +164 -0
  228. data/test/html/test_document.rb +398 -0
  229. data/test/html/test_document_encoding.rb +77 -0
  230. data/test/html/test_document_fragment.rb +182 -0
  231. data/test/html/test_element_description.rb +98 -0
  232. data/test/html/test_named_characters.rb +14 -0
  233. data/test/html/test_node.rb +181 -0
  234. data/test/html/test_node_encoding.rb +27 -0
  235. data/test/test_convert_xpath.rb +135 -0
  236. data/test/test_css_cache.rb +45 -0
  237. data/test/test_encoding_handler.rb +46 -0
  238. data/test/test_memory_leak.rb +87 -0
  239. data/test/test_nokogiri.rb +138 -0
  240. data/test/test_reader.rb +386 -0
  241. data/test/test_soap4r_sax.rb +52 -0
  242. data/test/test_xslt_transforms.rb +188 -0
  243. data/test/xml/node/test_save_options.rb +20 -0
  244. data/test/xml/node/test_subclass.rb +44 -0
  245. data/test/xml/sax/test_parser.rb +307 -0
  246. data/test/xml/sax/test_parser_context.rb +63 -0
  247. data/test/xml/sax/test_push_parser.rb +139 -0
  248. data/test/xml/test_attr.rb +38 -0
  249. data/test/xml/test_attribute_decl.rb +82 -0
  250. data/test/xml/test_builder.rb +210 -0
  251. data/test/xml/test_cdata.rb +50 -0
  252. data/test/xml/test_comment.rb +29 -0
  253. data/test/xml/test_document.rb +668 -0
  254. data/test/xml/test_document_encoding.rb +26 -0
  255. data/test/xml/test_document_fragment.rb +180 -0
  256. data/test/xml/test_dtd.rb +82 -0
  257. data/test/xml/test_dtd_encoding.rb +33 -0
  258. data/test/xml/test_element_content.rb +56 -0
  259. data/test/xml/test_element_decl.rb +73 -0
  260. data/test/xml/test_entity_decl.rb +120 -0
  261. data/test/xml/test_entity_reference.rb +21 -0
  262. data/test/xml/test_namespace.rb +68 -0
  263. data/test/xml/test_node.rb +865 -0
  264. data/test/xml/test_node_attributes.rb +34 -0
  265. data/test/xml/test_node_encoding.rb +107 -0
  266. data/test/xml/test_node_reparenting.rb +293 -0
  267. data/test/xml/test_node_set.rb +649 -0
  268. data/test/xml/test_parse_options.rb +52 -0
  269. data/test/xml/test_processing_instruction.rb +30 -0
  270. data/test/xml/test_reader_encoding.rb +126 -0
  271. data/test/xml/test_relax_ng.rb +60 -0
  272. data/test/xml/test_schema.rb +89 -0
  273. data/test/xml/test_syntax_error.rb +12 -0
  274. data/test/xml/test_text.rb +38 -0
  275. data/test/xml/test_unparented_node.rb +381 -0
  276. data/test/xml/test_xpath.rb +138 -0
  277. metadata +533 -0
@@ -0,0 +1,145 @@
1
+ #--
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by rex 1.0.4
4
+ # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
+ #++
6
+
7
+ module Nokogiri
8
+ module CSS
9
+ class GeneratedTokenizer < GeneratedParser
10
+ require 'strscan'
11
+
12
+ class ScanError < StandardError ; end
13
+
14
+ attr_reader :lineno
15
+ attr_reader :filename
16
+ attr_accessor :state
17
+
18
+ def scan_setup(str)
19
+ @ss = StringScanner.new(str)
20
+ @lineno = 1
21
+ @state = nil
22
+ end
23
+
24
+ def action(&block)
25
+ yield
26
+ end
27
+
28
+ def scan_str(str)
29
+ scan_setup(str)
30
+ do_parse
31
+ end
32
+
33
+ def load_file( filename )
34
+ @filename = filename
35
+ open(filename, "r") do |f|
36
+ scan_setup(f.read)
37
+ end
38
+ end
39
+
40
+ def scan_file( filename )
41
+ load_file(filename)
42
+ do_parse
43
+ end
44
+
45
+
46
+ def next_token
47
+ return if @ss.eos?
48
+
49
+ text = @ss.peek(1)
50
+ @lineno += 1 if text == "\n"
51
+ token = case @state
52
+ when nil
53
+ case
54
+ when (text = @ss.scan(/has\([\s]*/))
55
+ action { [:HAS, text] }
56
+
57
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
58
+ action { [:FUNCTION, text] }
59
+
60
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
61
+ action { [:IDENT, text] }
62
+
63
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
64
+ action { [:HASH, text] }
65
+
66
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
67
+ action { [:INCLUDES, text] }
68
+
69
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
70
+ action { [:DASHMATCH, text] }
71
+
72
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
73
+ action { [:PREFIXMATCH, text] }
74
+
75
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
76
+ action { [:SUFFIXMATCH, text] }
77
+
78
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
79
+ action { [:SUBSTRINGMATCH, text] }
80
+
81
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
82
+ action { [:NOT_EQUAL, text] }
83
+
84
+ when (text = @ss.scan(/[\s]*=[\s]*/))
85
+ action { [:EQUAL, text] }
86
+
87
+ when (text = @ss.scan(/[\s]*\)/))
88
+ action { [:RPAREN, text] }
89
+
90
+ when (text = @ss.scan(/[\s]*\[[\s]*/))
91
+ action { [:LSQUARE, text] }
92
+
93
+ when (text = @ss.scan(/[\s]*\]/))
94
+ action { [:RSQUARE, text] }
95
+
96
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
97
+ action { [:PLUS, text] }
98
+
99
+ when (text = @ss.scan(/[\s]*>[\s]*/))
100
+ action { [:GREATER, text] }
101
+
102
+ when (text = @ss.scan(/[\s]*,[\s]*/))
103
+ action { [:COMMA, text] }
104
+
105
+ when (text = @ss.scan(/[\s]*~[\s]*/))
106
+ action { [:TILDE, text] }
107
+
108
+ when (text = @ss.scan(/\:not\([\s]*/))
109
+ action { [:NOT, text] }
110
+
111
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
112
+ action { [:NUMBER, text] }
113
+
114
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
115
+ action { [:DOUBLESLASH, text] }
116
+
117
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
118
+ action { [:SLASH, text] }
119
+
120
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
121
+ action {[:UNICODE_RANGE, text] }
122
+
123
+ when (text = @ss.scan(/[\s]+/))
124
+ action { [:S, text] }
125
+
126
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*'/))
127
+ action { [:STRING, text] }
128
+
129
+ when (text = @ss.scan(/./))
130
+ action { [text, text] }
131
+
132
+ else
133
+ text = @ss.string[@ss.pos .. -1]
134
+ raise ScanError, "can not match: '" + text + "'"
135
+ end # if
136
+
137
+ else
138
+ raise ScanError, "undefined state: '" + state.to_s + "'"
139
+ end # case state
140
+ token
141
+ end # def next_token
142
+
143
+ end # class
144
+ end
145
+ end
@@ -0,0 +1,99 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Node
4
+ # Get the type of this node
5
+ attr_accessor :type
6
+ # Get the value of this node
7
+ attr_accessor :value
8
+
9
+ # Create a new Node with +type+ and +value+
10
+ def initialize type, value
11
+ @type = type
12
+ @value = value
13
+ end
14
+
15
+ # Accept +visitor+
16
+ def accept visitor
17
+ visitor.send(:"visit_#{type.to_s.downcase}", self)
18
+ end
19
+
20
+ ###
21
+ # Convert this CSS node to xpath with +prefix+ using +visitor+
22
+ def to_xpath prefix = '//', visitor = XPathVisitor.new
23
+ self.preprocess!
24
+ prefix + visitor.accept(self)
25
+ end
26
+
27
+ # Preprocess this node tree
28
+ def preprocess!
29
+ ### Deal with nth-child
30
+ matches = find_by_type(
31
+ [:CONDITIONAL_SELECTOR,
32
+ [:ELEMENT_NAME],
33
+ [:PSEUDO_CLASS,
34
+ [:FUNCTION]
35
+ ]
36
+ ]
37
+ )
38
+ matches.each do |match|
39
+ if match.value[1].value[0].value[0] =~ /^nth-(last-)?child/
40
+ tag_name = match.value[0].value.first
41
+ match.value[0].value = ['*']
42
+ match.value[1] = Node.new(:COMBINATOR, [
43
+ match.value[1].value[0],
44
+ Node.new(:FUNCTION, ['self(', tag_name])
45
+ ])
46
+ end
47
+ end
48
+
49
+ ### Deal with first-child, last-child
50
+ matches = find_by_type(
51
+ [:CONDITIONAL_SELECTOR,
52
+ [:ELEMENT_NAME], [:PSEUDO_CLASS]
53
+ ])
54
+ matches.each do |match|
55
+ if ['first-child', 'last-child'].include?(match.value[1].value.first)
56
+ which = match.value[1].value.first.gsub(/-\w*$/, '')
57
+ tag_name = match.value[0].value.first
58
+ match.value[0].value = ['*']
59
+ match.value[1] = Node.new(:COMBINATOR, [
60
+ Node.new(:FUNCTION, ["#{which}("]),
61
+ Node.new(:FUNCTION, ['self(', tag_name])
62
+ ])
63
+ elsif 'only-child' == match.value[1].value.first
64
+ tag_name = match.value[0].value.first
65
+ match.value[0].value = ['*']
66
+ match.value[1] = Node.new(:COMBINATOR, [
67
+ Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
68
+ Node.new(:FUNCTION, ['self(', tag_name])
69
+ ])
70
+ end
71
+ end
72
+
73
+ self
74
+ end
75
+
76
+ # Find a node by type using +types+
77
+ def find_by_type types
78
+ matches = []
79
+ matches << self if to_type == types
80
+ @value.each do |v|
81
+ matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
82
+ end
83
+ matches
84
+ end
85
+
86
+ # Convert to_type
87
+ def to_type
88
+ [@type] + @value.map { |n|
89
+ n.to_type if n.respond_to?(:to_type)
90
+ }.compact
91
+ end
92
+
93
+ # Convert to array
94
+ def to_a
95
+ [@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,82 @@
1
+ require 'thread'
2
+
3
+ module Nokogiri
4
+ module CSS
5
+ class Parser < GeneratedTokenizer
6
+ @cache_on = true
7
+ @cache = {}
8
+ @mutex = Mutex.new
9
+
10
+ class << self
11
+ # Turn on CSS parse caching
12
+ attr_accessor :cache_on
13
+ alias :cache_on? :cache_on
14
+ alias :set_cache :cache_on=
15
+
16
+ # Get the css selector in +string+ from the cache
17
+ def [] string
18
+ return unless @cache_on
19
+ @mutex.synchronize { @cache[string] }
20
+ end
21
+
22
+ # Set the css selector in +string+ in the cache to +value+
23
+ def []= string, value
24
+ return value unless @cache_on
25
+ @mutex.synchronize { @cache[string] = value }
26
+ end
27
+
28
+ # Clear the cache
29
+ def clear_cache
30
+ @mutex.synchronize { @cache = {} }
31
+ end
32
+
33
+ # Execute +block+ without cache
34
+ def without_cache &block
35
+ tmp = @cache_on
36
+ @cache_on = false
37
+ block.call
38
+ @cache_on = tmp
39
+ end
40
+
41
+ ###
42
+ # Parse this CSS selector in +selector+. Returns an AST.
43
+ def parse selector
44
+ @warned ||= false
45
+ unless @warned
46
+ $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
47
+ @warned = true
48
+ end
49
+ new.parse selector
50
+ end
51
+ end
52
+
53
+ # Create a new CSS parser with respect to +namespaces+
54
+ def initialize namespaces = {}
55
+ @namespaces = namespaces
56
+ super()
57
+ end
58
+ alias :parse :scan_str
59
+
60
+ # Get the xpath for +string+ using +options+
61
+ def xpath_for string, options={}
62
+ key = "#{string}#{options[:ns]}#{options[:prefix]}"
63
+ v = self.class[key]
64
+ return v if v
65
+
66
+ args = [
67
+ options[:prefix] || '//',
68
+ options[:visitor] || XPathVisitor.new
69
+ ]
70
+ self.class[key] = parse(string).map { |ast|
71
+ ast.to_xpath(*args)
72
+ }
73
+ end
74
+
75
+ # On CSS parser error, raise an exception
76
+ def on_error error_token_id, error_value, value_stack
77
+ after = value_stack.compact.last
78
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,232 @@
1
+ class Nokogiri::CSS::GeneratedParser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
6
+
7
+ rule
8
+ selector
9
+ : selector COMMA simple_selector_1toN {
10
+ result = [val.first, val.last].flatten
11
+ }
12
+ | simple_selector_1toN { result = val.flatten }
13
+ ;
14
+ combinator
15
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
16
+ | GREATER { result = :CHILD_SELECTOR }
17
+ | TILDE { result = :PRECEDING_SELECTOR }
18
+ | S { result = :DESCENDANT_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
21
+ ;
22
+ simple_selector
23
+ : element_name hcap_0toN {
24
+ result = if val[1].nil?
25
+ val.first
26
+ else
27
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
+ end
29
+ }
30
+ | element_name hcap_1toN negation {
31
+ result = Node.new(:CONDITIONAL_SELECTOR,
32
+ [
33
+ val.first,
34
+ Node.new(:COMBINATOR, [val[1], val.last])
35
+ ]
36
+ )
37
+ }
38
+ | element_name negation {
39
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
40
+ }
41
+ | function
42
+ | function attrib {
43
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
44
+ }
45
+ | hcap_1toN negation {
46
+ result = Node.new(:CONDITIONAL_SELECTOR,
47
+ [
48
+ Node.new(:ELEMENT_NAME, ['*']),
49
+ Node.new(:COMBINATOR, val)
50
+ ]
51
+ )
52
+ }
53
+ | hcap_1toN {
54
+ result = Node.new(:CONDITIONAL_SELECTOR,
55
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
56
+ )
57
+ }
58
+ ;
59
+ simple_selector_1toN
60
+ : simple_selector combinator simple_selector_1toN {
61
+ result = Node.new(val[1], [val.first, val.last])
62
+ }
63
+ | simple_selector
64
+ ;
65
+ class
66
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
67
+ ;
68
+ element_name
69
+ : namespace '|' IDENT {
70
+ result = Node.new(:ELEMENT_NAME,
71
+ [[val.first, val.last].compact.join(':')]
72
+ )
73
+ }
74
+ | IDENT {
75
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
76
+ result = Node.new(:ELEMENT_NAME, [name])
77
+ }
78
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
79
+ ;
80
+ namespace
81
+ : IDENT { result = val[0] }
82
+ |
83
+ ;
84
+ attrib
85
+ : LSQUARE IDENT attrib_val_0or1 RSQUARE {
86
+ result = Node.new(:ATTRIBUTE_CONDITION,
87
+ [Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
88
+ )
89
+ }
90
+ | LSQUARE function attrib_val_0or1 RSQUARE {
91
+ result = Node.new(:ATTRIBUTE_CONDITION,
92
+ [val[1]] + (val[2] || [])
93
+ )
94
+ }
95
+ | LSQUARE NUMBER RSQUARE {
96
+ # Non standard, but hpricot supports it.
97
+ result = Node.new(:PSEUDO_CLASS,
98
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
99
+ )
100
+ }
101
+ ;
102
+ function
103
+ : FUNCTION RPAREN {
104
+ result = Node.new(:FUNCTION, [val.first.strip])
105
+ }
106
+ | FUNCTION expr RPAREN {
107
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
108
+ }
109
+ | FUNCTION an_plus_b RPAREN {
110
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
111
+ }
112
+ | NOT expr RPAREN {
113
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
114
+ }
115
+ | HAS selector RPAREN {
116
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
117
+ }
118
+ ;
119
+ expr
120
+ : NUMBER COMMA expr { result = [val.first, val.last] }
121
+ | STRING COMMA expr { result = [val.first, val.last] }
122
+ | IDENT COMMA expr { result = [val.first, val.last] }
123
+ | NUMBER
124
+ | STRING
125
+ | IDENT # even, odd
126
+ {
127
+ if val[0] == 'even'
128
+ val = ["2","n","+","0"]
129
+ result = Node.new(:AN_PLUS_B, val)
130
+ elsif val[0] == 'odd'
131
+ val = ["2","n","+","1"]
132
+ result = Node.new(:AN_PLUS_B, val)
133
+ else
134
+ # This is not CSS standard. It allows us to support this:
135
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
136
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
137
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
138
+ result = val
139
+ end
140
+ }
141
+ ;
142
+ an_plus_b
143
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
144
+ {
145
+ if val[1] == 'n'
146
+ result = Node.new(:AN_PLUS_B, val)
147
+ else
148
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
149
+ end
150
+ }
151
+ | IDENT PLUS NUMBER { # n+3, -n+3
152
+ if val[0] == 'n'
153
+ val.unshift("1")
154
+ result = Node.new(:AN_PLUS_B, val)
155
+ elsif val[0] == '-n'
156
+ val[0] = 'n'
157
+ val.unshift("-1")
158
+ result = Node.new(:AN_PLUS_B, val)
159
+ else
160
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
161
+ end
162
+ }
163
+ | NUMBER IDENT # 5n, -5n
164
+ {
165
+ if val[1] == 'n'
166
+ val << "+"
167
+ val << "0"
168
+ result = Node.new(:AN_PLUS_B, val)
169
+ else
170
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
171
+ end
172
+ }
173
+ ;
174
+ pseudo
175
+ : ':' function {
176
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
177
+ }
178
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
179
+ ;
180
+ hcap_0toN
181
+ : hcap_1toN
182
+ |
183
+ ;
184
+ hcap_1toN
185
+ : attribute_id hcap_1toN {
186
+ result = Node.new(:COMBINATOR, val)
187
+ }
188
+ | class hcap_1toN {
189
+ result = Node.new(:COMBINATOR, val)
190
+ }
191
+ | attrib hcap_1toN {
192
+ result = Node.new(:COMBINATOR, val)
193
+ }
194
+ | pseudo hcap_1toN {
195
+ result = Node.new(:COMBINATOR, val)
196
+ }
197
+ | attribute_id
198
+ | class
199
+ | attrib
200
+ | pseudo
201
+ ;
202
+ attribute_id
203
+ : HASH { result = Node.new(:ID, val) }
204
+ ;
205
+ attrib_val_0or1
206
+ : eql_incl_dash IDENT { result = [val.first, val[1]] }
207
+ | eql_incl_dash STRING { result = [val.first, val[1]] }
208
+ |
209
+ ;
210
+ eql_incl_dash
211
+ : EQUAL { result = :equal }
212
+ | PREFIXMATCH { result = :prefix_match }
213
+ | SUFFIXMATCH { result = :suffix_match }
214
+ | SUBSTRINGMATCH { result = :substring_match }
215
+ | NOT_EQUAL { result = :not_equal }
216
+ | INCLUDES { result = :includes }
217
+ | DASHMATCH { result = :dash_match }
218
+ ;
219
+ negation
220
+ : NOT negation_arg RPAREN {
221
+ result = Node.new(:NOT, [val[1]])
222
+ }
223
+ ;
224
+ negation_arg
225
+ : element_name
226
+ | element_name hcap_1toN
227
+ | hcap_1toN
228
+ ;
229
+ end
230
+
231
+ ---- header
232
+