caring-nokogiri 1.4.1.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (270) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +330 -0
  3. data/CHANGELOG.rdoc +324 -0
  4. data/Manifest.txt +269 -0
  5. data/README.ja.rdoc +105 -0
  6. data/README.rdoc +118 -0
  7. data/Rakefile +248 -0
  8. data/bin/nokogiri +49 -0
  9. data/ext/nokogiri/extconf.rb +147 -0
  10. data/ext/nokogiri/html_document.c +145 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +32 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  17. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  18. data/ext/nokogiri/nokogiri.c +95 -0
  19. data/ext/nokogiri/nokogiri.h +145 -0
  20. data/ext/nokogiri/xml_attr.c +92 -0
  21. data/ext/nokogiri/xml_attr.h +9 -0
  22. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  23. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  24. data/ext/nokogiri/xml_cdata.c +54 -0
  25. data/ext/nokogiri/xml_cdata.h +9 -0
  26. data/ext/nokogiri/xml_comment.c +52 -0
  27. data/ext/nokogiri/xml_comment.h +9 -0
  28. data/ext/nokogiri/xml_document.c +388 -0
  29. data/ext/nokogiri/xml_document.h +24 -0
  30. data/ext/nokogiri/xml_document_fragment.c +46 -0
  31. data/ext/nokogiri/xml_document_fragment.h +10 -0
  32. data/ext/nokogiri/xml_dtd.c +192 -0
  33. data/ext/nokogiri/xml_dtd.h +10 -0
  34. data/ext/nokogiri/xml_element_content.c +123 -0
  35. data/ext/nokogiri/xml_element_content.h +10 -0
  36. data/ext/nokogiri/xml_element_decl.c +69 -0
  37. data/ext/nokogiri/xml_element_decl.h +9 -0
  38. data/ext/nokogiri/xml_entity_decl.c +97 -0
  39. data/ext/nokogiri/xml_entity_decl.h +10 -0
  40. data/ext/nokogiri/xml_entity_reference.c +50 -0
  41. data/ext/nokogiri/xml_entity_reference.h +9 -0
  42. data/ext/nokogiri/xml_io.c +31 -0
  43. data/ext/nokogiri/xml_io.h +11 -0
  44. data/ext/nokogiri/xml_namespace.c +74 -0
  45. data/ext/nokogiri/xml_namespace.h +12 -0
  46. data/ext/nokogiri/xml_node.c +1060 -0
  47. data/ext/nokogiri/xml_node.h +13 -0
  48. data/ext/nokogiri/xml_node_set.c +397 -0
  49. data/ext/nokogiri/xml_node_set.h +9 -0
  50. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  51. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  52. data/ext/nokogiri/xml_reader.c +593 -0
  53. data/ext/nokogiri/xml_reader.h +10 -0
  54. data/ext/nokogiri/xml_relax_ng.c +159 -0
  55. data/ext/nokogiri/xml_relax_ng.h +9 -0
  56. data/ext/nokogiri/xml_sax_parser.c +286 -0
  57. data/ext/nokogiri/xml_sax_parser.h +43 -0
  58. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  59. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  60. data/ext/nokogiri/xml_sax_push_parser.c +91 -0
  61. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  62. data/ext/nokogiri/xml_schema.c +156 -0
  63. data/ext/nokogiri/xml_schema.h +9 -0
  64. data/ext/nokogiri/xml_syntax_error.c +261 -0
  65. data/ext/nokogiri/xml_syntax_error.h +13 -0
  66. data/ext/nokogiri/xml_text.c +48 -0
  67. data/ext/nokogiri/xml_text.h +9 -0
  68. data/ext/nokogiri/xml_xpath.c +53 -0
  69. data/ext/nokogiri/xml_xpath.h +11 -0
  70. data/ext/nokogiri/xml_xpath_context.c +239 -0
  71. data/ext/nokogiri/xml_xpath_context.h +9 -0
  72. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  73. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  74. data/lib/nokogiri.rb +116 -0
  75. data/lib/nokogiri/css.rb +25 -0
  76. data/lib/nokogiri/css/generated_parser.rb +646 -0
  77. data/lib/nokogiri/css/generated_tokenizer.rb +143 -0
  78. data/lib/nokogiri/css/node.rb +99 -0
  79. data/lib/nokogiri/css/parser.rb +82 -0
  80. data/lib/nokogiri/css/parser.y +227 -0
  81. data/lib/nokogiri/css/syntax_error.rb +7 -0
  82. data/lib/nokogiri/css/tokenizer.rb +7 -0
  83. data/lib/nokogiri/css/tokenizer.rex +54 -0
  84. data/lib/nokogiri/css/xpath_visitor.rb +162 -0
  85. data/lib/nokogiri/decorators/slop.rb +33 -0
  86. data/lib/nokogiri/ffi/html/document.rb +28 -0
  87. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  88. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  89. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  90. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  91. data/lib/nokogiri/ffi/libxml.rb +356 -0
  92. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  93. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  94. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  95. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  96. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  97. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  98. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  100. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  101. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  102. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  103. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  104. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  105. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  106. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  107. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  108. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  109. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  110. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  111. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  112. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
  113. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  114. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  115. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  116. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  117. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  118. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  119. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  120. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  121. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  122. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  123. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  124. data/lib/nokogiri/ffi/xml/document.rb +135 -0
  125. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  126. data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
  127. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  128. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  129. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  132. data/lib/nokogiri/ffi/xml/node.rb +444 -0
  133. data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
  134. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  135. data/lib/nokogiri/ffi/xml/reader.rb +227 -0
  136. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  137. data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
  138. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  139. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
  140. data/lib/nokogiri/ffi/xml/schema.rb +92 -0
  141. data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
  142. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  143. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  144. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  145. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  146. data/lib/nokogiri/html.rb +35 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +88 -0
  149. data/lib/nokogiri/html/document_fragment.rb +15 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  152. data/lib/nokogiri/html/sax/parser.rb +48 -0
  153. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  154. data/lib/nokogiri/syntax_error.rb +4 -0
  155. data/lib/nokogiri/version.rb +33 -0
  156. data/lib/nokogiri/version_warning.rb +11 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +405 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +131 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +73 -0
  165. data/lib/nokogiri/xml/dtd.rb +11 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  169. data/lib/nokogiri/xml/fragment_handler.rb +73 -0
  170. data/lib/nokogiri/xml/namespace.rb +13 -0
  171. data/lib/nokogiri/xml/node.rb +665 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  173. data/lib/nokogiri/xml/node_set.rb +307 -0
  174. data/lib/nokogiri/xml/notation.rb +6 -0
  175. data/lib/nokogiri/xml/parse_options.rb +85 -0
  176. data/lib/nokogiri/xml/pp.rb +2 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  178. data/lib/nokogiri/xml/pp/node.rb +56 -0
  179. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  180. data/lib/nokogiri/xml/reader.rb +74 -0
  181. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  182. data/lib/nokogiri/xml/sax.rb +4 -0
  183. data/lib/nokogiri/xml/sax/document.rb +160 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  187. data/lib/nokogiri/xml/schema.rb +61 -0
  188. data/lib/nokogiri/xml/syntax_error.rb +38 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +48 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xsd/xmlparser/nokogiri.rb +71 -0
  195. data/tasks/test.rb +100 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +277 -0
  198. data/test/css/test_tokenizer.rb +183 -0
  199. data/test/css/test_xpath_visitor.rb +76 -0
  200. data/test/ffi/test_document.rb +35 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/exslt.xml +8 -0
  207. data/test/files/exslt.xslt +35 -0
  208. data/test/files/foo/foo.xsd +4 -0
  209. data/test/files/po.xml +32 -0
  210. data/test/files/po.xsd +66 -0
  211. data/test/files/shift_jis.html +10 -0
  212. data/test/files/shift_jis.xml +5 -0
  213. data/test/files/snuggles.xml +3 -0
  214. data/test/files/staff.dtd +10 -0
  215. data/test/files/staff.xml +59 -0
  216. data/test/files/staff.xslt +32 -0
  217. data/test/files/tlm.html +850 -0
  218. data/test/files/valid_bar.xml +2 -0
  219. data/test/helper.rb +136 -0
  220. data/test/html/sax/test_parser.rb +64 -0
  221. data/test/html/sax/test_parser_context.rb +48 -0
  222. data/test/html/test_builder.rb +164 -0
  223. data/test/html/test_document.rb +390 -0
  224. data/test/html/test_document_encoding.rb +77 -0
  225. data/test/html/test_document_fragment.rb +142 -0
  226. data/test/html/test_element_description.rb +94 -0
  227. data/test/html/test_named_characters.rb +14 -0
  228. data/test/html/test_node.rb +228 -0
  229. data/test/html/test_node_encoding.rb +27 -0
  230. data/test/test_convert_xpath.rb +135 -0
  231. data/test/test_css_cache.rb +45 -0
  232. data/test/test_gc.rb +15 -0
  233. data/test/test_memory_leak.rb +77 -0
  234. data/test/test_nokogiri.rb +138 -0
  235. data/test/test_reader.rb +358 -0
  236. data/test/test_xslt_transforms.rb +131 -0
  237. data/test/xml/node/test_save_options.rb +20 -0
  238. data/test/xml/node/test_subclass.rb +44 -0
  239. data/test/xml/sax/test_parser.rb +307 -0
  240. data/test/xml/sax/test_parser_context.rb +56 -0
  241. data/test/xml/sax/test_push_parser.rb +106 -0
  242. data/test/xml/test_attr.rb +38 -0
  243. data/test/xml/test_attribute_decl.rb +82 -0
  244. data/test/xml/test_builder.rb +167 -0
  245. data/test/xml/test_cdata.rb +38 -0
  246. data/test/xml/test_comment.rb +29 -0
  247. data/test/xml/test_document.rb +607 -0
  248. data/test/xml/test_document_encoding.rb +26 -0
  249. data/test/xml/test_document_fragment.rb +144 -0
  250. data/test/xml/test_dtd.rb +82 -0
  251. data/test/xml/test_dtd_encoding.rb +33 -0
  252. data/test/xml/test_element_content.rb +56 -0
  253. data/test/xml/test_element_decl.rb +73 -0
  254. data/test/xml/test_entity_decl.rb +83 -0
  255. data/test/xml/test_entity_reference.rb +21 -0
  256. data/test/xml/test_namespace.rb +68 -0
  257. data/test/xml/test_node.rb +889 -0
  258. data/test/xml/test_node_attributes.rb +34 -0
  259. data/test/xml/test_node_encoding.rb +107 -0
  260. data/test/xml/test_node_set.rb +531 -0
  261. data/test/xml/test_parse_options.rb +52 -0
  262. data/test/xml/test_processing_instruction.rb +30 -0
  263. data/test/xml/test_reader_encoding.rb +126 -0
  264. data/test/xml/test_relax_ng.rb +60 -0
  265. data/test/xml/test_schema.rb +89 -0
  266. data/test/xml/test_syntax_error.rb +27 -0
  267. data/test/xml/test_text.rb +30 -0
  268. data/test/xml/test_unparented_node.rb +381 -0
  269. data/test/xml/test_xpath.rb +106 -0
  270. metadata +428 -0
@@ -0,0 +1,143 @@
1
+ #--
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by rex 1.0.4
4
+ # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
+ #++
6
+
7
+ module Nokogiri
8
+ module CSS
9
+ class GeneratedTokenizer < GeneratedParser
10
+ require 'strscan'
11
+
12
+ class ScanError < StandardError ; end
13
+
14
+ attr_reader :lineno
15
+ attr_reader :filename
16
+ attr_accessor :state
17
+
18
+ def scan_setup(str)
19
+ @ss = StringScanner.new(str)
20
+ @lineno = 1
21
+ @state = nil
22
+ end
23
+
24
+ def action(&block)
25
+ yield
26
+ end
27
+
28
+ def scan_str(str)
29
+ scan_setup(str)
30
+ do_parse
31
+ end
32
+ alias :scan :scan_str
33
+
34
+ def load_file( filename )
35
+ @filename = filename
36
+ open(filename, "r") do |f|
37
+ scan_setup(f.read)
38
+ end
39
+ end
40
+
41
+ def scan_file( filename )
42
+ load_file(filename)
43
+ do_parse
44
+ end
45
+
46
+
47
+ def next_token
48
+ return if @ss.eos?
49
+
50
+ text = @ss.peek(1)
51
+ @lineno += 1 if text == "\n"
52
+ token = case @state
53
+ when nil
54
+ case
55
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*\(\s*/))
56
+ action { [:FUNCTION, text] }
57
+
58
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*/))
59
+ action { [:IDENT, text] }
60
+
61
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])+/))
62
+ action { [:HASH, text] }
63
+
64
+ when (text = @ss.scan(/[\s\r\n\f]*~=[\s\r\n\f]*/))
65
+ action { [:INCLUDES, text] }
66
+
67
+ when (text = @ss.scan(/[\s\r\n\f]*\|=[\s\r\n\f]*/))
68
+ action { [:DASHMATCH, text] }
69
+
70
+ when (text = @ss.scan(/[\s\r\n\f]*\^=[\s\r\n\f]*/))
71
+ action { [:PREFIXMATCH, text] }
72
+
73
+ when (text = @ss.scan(/[\s\r\n\f]*\$=[\s\r\n\f]*/))
74
+ action { [:SUFFIXMATCH, text] }
75
+
76
+ when (text = @ss.scan(/[\s\r\n\f]*\*=[\s\r\n\f]*/))
77
+ action { [:SUBSTRINGMATCH, text] }
78
+
79
+ when (text = @ss.scan(/[\s\r\n\f]*!=[\s\r\n\f]*/))
80
+ action { [:NOT_EQUAL, text] }
81
+
82
+ when (text = @ss.scan(/[\s\r\n\f]*=[\s\r\n\f]*/))
83
+ action { [:EQUAL, text] }
84
+
85
+ when (text = @ss.scan(/[\s\r\n\f]*\)/))
86
+ action { [:RPAREN, text] }
87
+
88
+ when (text = @ss.scan(/[\s\r\n\f]*\[[\s\r\n\f]*/))
89
+ action { [:LSQUARE, text] }
90
+
91
+ when (text = @ss.scan(/[\s\r\n\f]*\]/))
92
+ action { [:RSQUARE, text] }
93
+
94
+ when (text = @ss.scan(/[\s\r\n\f]*\+[\s\r\n\f]*/))
95
+ action { [:PLUS, text] }
96
+
97
+ when (text = @ss.scan(/[\s\r\n\f]*>[\s\r\n\f]*/))
98
+ action { [:GREATER, text] }
99
+
100
+ when (text = @ss.scan(/[\s\r\n\f]*,[\s\r\n\f]*/))
101
+ action { [:COMMA, text] }
102
+
103
+ when (text = @ss.scan(/[\s\r\n\f]*~[\s\r\n\f]*/))
104
+ action { [:TILDE, text] }
105
+
106
+ when (text = @ss.scan(/\:not\([\s\r\n\f]*/))
107
+ action { [:NOT, text] }
108
+
109
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
110
+ action { [:NUMBER, text] }
111
+
112
+ when (text = @ss.scan(/[\s\r\n\f]*\/\/[\s\r\n\f]*/))
113
+ action { [:DOUBLESLASH, text] }
114
+
115
+ when (text = @ss.scan(/[\s\r\n\f]*\/[\s\r\n\f]*/))
116
+ action { [:SLASH, text] }
117
+
118
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
119
+ action {[:UNICODE_RANGE, text] }
120
+
121
+ when (text = @ss.scan(/[\s\t\r\n\f]+/))
122
+ action { [:S, text] }
123
+
124
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*'/))
125
+ action { [:STRING, text] }
126
+
127
+ when (text = @ss.scan(/./))
128
+ action { [text, text] }
129
+
130
+ else
131
+ text = @ss.string[@ss.pos .. -1]
132
+ raise ScanError, "can not match: '" + text + "'"
133
+ end # if
134
+
135
+ else
136
+ raise ScanError, "undefined state: '" + state.to_s + "'"
137
+ end # case state
138
+ token
139
+ end # def next_token
140
+
141
+ end # class
142
+ end
143
+ end
@@ -0,0 +1,99 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Node
4
+ # Get the type of this node
5
+ attr_accessor :type
6
+ # Get the value of this node
7
+ attr_accessor :value
8
+
9
+ # Create a new Node with +type+ and +value+
10
+ def initialize type, value
11
+ @type = type
12
+ @value = value
13
+ end
14
+
15
+ # Accept +visitor+
16
+ def accept visitor
17
+ visitor.send(:"visit_#{type.to_s.downcase}", self)
18
+ end
19
+
20
+ ###
21
+ # Convert this CSS node to xpath with +prefix+ using +visitor+
22
+ def to_xpath prefix = '//', visitor = XPathVisitor.new
23
+ self.preprocess!
24
+ prefix + visitor.accept(self)
25
+ end
26
+
27
+ # Preprocess this node tree
28
+ def preprocess!
29
+ ### Deal with nth-child
30
+ matches = find_by_type(
31
+ [:CONDITIONAL_SELECTOR,
32
+ [:ELEMENT_NAME],
33
+ [:PSEUDO_CLASS,
34
+ [:FUNCTION]
35
+ ]
36
+ ]
37
+ )
38
+ matches.each do |match|
39
+ if match.value[1].value[0].value[0] =~ /^nth-(last-)?child/
40
+ tag_name = match.value[0].value.first
41
+ match.value[0].value = ['*']
42
+ match.value[1] = Node.new(:COMBINATOR, [
43
+ match.value[1].value[0],
44
+ Node.new(:FUNCTION, ['self(', tag_name])
45
+ ])
46
+ end
47
+ end
48
+
49
+ ### Deal with first-child, last-child
50
+ matches = find_by_type(
51
+ [:CONDITIONAL_SELECTOR,
52
+ [:ELEMENT_NAME], [:PSEUDO_CLASS]
53
+ ])
54
+ matches.each do |match|
55
+ if ['first-child', 'last-child'].include?(match.value[1].value.first)
56
+ which = match.value[1].value.first.gsub(/-\w*$/, '')
57
+ tag_name = match.value[0].value.first
58
+ match.value[0].value = ['*']
59
+ match.value[1] = Node.new(:COMBINATOR, [
60
+ Node.new(:FUNCTION, ["#{which}("]),
61
+ Node.new(:FUNCTION, ['self(', tag_name])
62
+ ])
63
+ elsif 'only-child' == match.value[1].value.first
64
+ tag_name = match.value[0].value.first
65
+ match.value[0].value = ['*']
66
+ match.value[1] = Node.new(:COMBINATOR, [
67
+ Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
68
+ Node.new(:FUNCTION, ['self(', tag_name])
69
+ ])
70
+ end
71
+ end
72
+
73
+ self
74
+ end
75
+
76
+ # Find a node by type using +types+
77
+ def find_by_type types
78
+ matches = []
79
+ matches << self if to_type == types
80
+ @value.each do |v|
81
+ matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
82
+ end
83
+ matches
84
+ end
85
+
86
+ # Convert to_type
87
+ def to_type
88
+ [@type] + @value.map { |n|
89
+ n.to_type if n.respond_to?(:to_type)
90
+ }.compact
91
+ end
92
+
93
+ # Convert to array
94
+ def to_a
95
+ [@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,82 @@
1
+ require 'thread'
2
+
3
+ module Nokogiri
4
+ module CSS
5
+ class Parser < GeneratedTokenizer
6
+ @cache_on = true
7
+ @cache = {}
8
+ @mutex = Mutex.new
9
+
10
+ class << self
11
+ # Turn on CSS parse caching
12
+ attr_accessor :cache_on
13
+ alias :cache_on? :cache_on
14
+ alias :set_cache :cache_on=
15
+
16
+ # Get the css selector in +string+ from the cache
17
+ def [] string
18
+ return unless @cache_on
19
+ @mutex.synchronize { @cache[string] }
20
+ end
21
+
22
+ # Set the css selector in +string+ in the cache to +value+
23
+ def []= string, value
24
+ return value unless @cache_on
25
+ @mutex.synchronize { @cache[string] = value }
26
+ end
27
+
28
+ # Clear the cache
29
+ def clear_cache
30
+ @mutex.synchronize { @cache = {} }
31
+ end
32
+
33
+ # Execute +block+ without cache
34
+ def without_cache &block
35
+ tmp = @cache_on
36
+ @cache_on = false
37
+ block.call
38
+ @cache_on = tmp
39
+ end
40
+
41
+ ###
42
+ # Parse this CSS selector in +selector+. Returns an AST.
43
+ def parse selector
44
+ @warned ||= false
45
+ unless @warned
46
+ $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
47
+ @warned = true
48
+ end
49
+ new.parse selector
50
+ end
51
+ end
52
+
53
+ # Create a new CSS parser with respect to +namespaces+
54
+ def initialize namespaces = {}
55
+ @namespaces = namespaces
56
+ super()
57
+ end
58
+ alias :parse :scan_str
59
+
60
+ # Get the xpath for +string+ using +options+
61
+ def xpath_for string, options={}
62
+ key = "#{string}#{options[:ns]}#{options[:prefix]}"
63
+ v = self.class[key]
64
+ return v if v
65
+
66
+ args = [
67
+ options[:prefix] || '//',
68
+ options[:visitor] || XPathVisitor.new
69
+ ]
70
+ self.class[key] = parse(string).map { |ast|
71
+ ast.to_xpath(*args)
72
+ }
73
+ end
74
+
75
+ # On CSS parser error, raise an exception
76
+ def on_error error_token_id, error_value, value_stack
77
+ after = value_stack.compact.last
78
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,227 @@
1
+ class Nokogiri::CSS::GeneratedParser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE
6
+
7
+ rule
8
+ selector
9
+ : selector COMMA simple_selector_1toN {
10
+ result = [val.first, val.last].flatten
11
+ }
12
+ | simple_selector_1toN { result = val.flatten }
13
+ ;
14
+ combinator
15
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
16
+ | GREATER { result = :CHILD_SELECTOR }
17
+ | TILDE { result = :PRECEDING_SELECTOR }
18
+ | S { result = :DESCENDANT_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
21
+ ;
22
+ simple_selector
23
+ : element_name hcap_0toN {
24
+ result = if val[1].nil?
25
+ val.first
26
+ else
27
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
+ end
29
+ }
30
+ | element_name hcap_1toN negation {
31
+ result = Node.new(:CONDITIONAL_SELECTOR,
32
+ [
33
+ val.first,
34
+ Node.new(:COMBINATOR, [val[1], val.last])
35
+ ]
36
+ )
37
+ }
38
+ | element_name negation {
39
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
40
+ }
41
+ | function
42
+ | function attrib {
43
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
44
+ }
45
+ | hcap_1toN negation {
46
+ result = Node.new(:CONDITIONAL_SELECTOR,
47
+ [
48
+ Node.new(:ELEMENT_NAME, ['*']),
49
+ Node.new(:COMBINATOR, val)
50
+ ]
51
+ )
52
+ }
53
+ | hcap_1toN {
54
+ result = Node.new(:CONDITIONAL_SELECTOR,
55
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
56
+ )
57
+ }
58
+ ;
59
+ simple_selector_1toN
60
+ : simple_selector combinator simple_selector_1toN {
61
+ result = Node.new(val[1], [val.first, val.last])
62
+ }
63
+ | simple_selector
64
+ ;
65
+ class
66
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
67
+ ;
68
+ element_name
69
+ : namespace '|' IDENT {
70
+ result = Node.new(:ELEMENT_NAME,
71
+ [[val.first, val.last].compact.join(':')]
72
+ )
73
+ }
74
+ | IDENT {
75
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
76
+ result = Node.new(:ELEMENT_NAME, [name])
77
+ }
78
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
79
+ ;
80
+ namespace
81
+ : IDENT { result = val[0] }
82
+ |
83
+ ;
84
+ attrib
85
+ : LSQUARE IDENT attrib_val_0or1 RSQUARE {
86
+ result = Node.new(:ATTRIBUTE_CONDITION,
87
+ [Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
88
+ )
89
+ }
90
+ | LSQUARE function attrib_val_0or1 RSQUARE {
91
+ result = Node.new(:ATTRIBUTE_CONDITION,
92
+ [val[1]] + (val[2] || [])
93
+ )
94
+ }
95
+ | LSQUARE NUMBER RSQUARE {
96
+ # Non standard, but hpricot supports it.
97
+ result = Node.new(:PSEUDO_CLASS,
98
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
99
+ )
100
+ }
101
+ ;
102
+ function
103
+ : FUNCTION RPAREN {
104
+ result = Node.new(:FUNCTION, [val.first.strip])
105
+ }
106
+ | FUNCTION expr RPAREN {
107
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
108
+ }
109
+ | FUNCTION an_plus_b RPAREN {
110
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
111
+ }
112
+ | NOT expr RPAREN {
113
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
114
+ }
115
+ ;
116
+ expr
117
+ : NUMBER COMMA expr { result = [val.first, val.last] }
118
+ | STRING COMMA expr { result = [val.first, val.last] }
119
+ | IDENT COMMA expr { result = [val.first, val.last] }
120
+ | NUMBER
121
+ | STRING
122
+ | IDENT # even, odd
123
+ {
124
+ if val[0] == 'even'
125
+ val = ["2","n","+","0"]
126
+ result = Node.new(:AN_PLUS_B, val)
127
+ elsif val[0] == 'odd'
128
+ val = ["2","n","+","1"]
129
+ result = Node.new(:AN_PLUS_B, val)
130
+ else
131
+ # This is not CSS standard. It allows us to support this:
132
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
133
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
134
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
135
+ result = val
136
+ end
137
+ }
138
+ ;
139
+ an_plus_b
140
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
141
+ {
142
+ if val[1] == 'n'
143
+ result = Node.new(:AN_PLUS_B, val)
144
+ else
145
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
146
+ end
147
+ }
148
+ | IDENT PLUS NUMBER { # n+3, -n+3
149
+ if val[0] == 'n'
150
+ val.unshift("1")
151
+ result = Node.new(:AN_PLUS_B, val)
152
+ elsif val[0] == '-n'
153
+ val[0] = 'n'
154
+ val.unshift("-1")
155
+ result = Node.new(:AN_PLUS_B, val)
156
+ else
157
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
158
+ end
159
+ }
160
+ | NUMBER IDENT # 5n, -5n
161
+ {
162
+ if val[1] == 'n'
163
+ val << "+"
164
+ val << "0"
165
+ result = Node.new(:AN_PLUS_B, val)
166
+ else
167
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
168
+ end
169
+ }
170
+ ;
171
+ pseudo
172
+ : ':' function {
173
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
174
+ }
175
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
176
+ ;
177
+ hcap_0toN
178
+ : hcap_1toN
179
+ |
180
+ ;
181
+ hcap_1toN
182
+ : attribute_id hcap_1toN {
183
+ result = Node.new(:COMBINATOR, val)
184
+ }
185
+ | class hcap_1toN {
186
+ result = Node.new(:COMBINATOR, val)
187
+ }
188
+ | attrib hcap_1toN {
189
+ result = Node.new(:COMBINATOR, val)
190
+ }
191
+ | pseudo hcap_1toN {
192
+ result = Node.new(:COMBINATOR, val)
193
+ }
194
+ | attribute_id
195
+ | class
196
+ | attrib
197
+ | pseudo
198
+ ;
199
+ attribute_id
200
+ : HASH { result = Node.new(:ID, val) }
201
+ ;
202
+ attrib_val_0or1
203
+ : eql_incl_dash IDENT { result = [val.first, val[1]] }
204
+ | eql_incl_dash STRING { result = [val.first, val[1]] }
205
+ |
206
+ ;
207
+ eql_incl_dash
208
+ : EQUAL { result = :equal }
209
+ | PREFIXMATCH { result = :prefix_match }
210
+ | SUFFIXMATCH { result = :suffix_match }
211
+ | SUBSTRINGMATCH { result = :substring_match }
212
+ | NOT_EQUAL { result = :not_equal }
213
+ | INCLUDES { result = :includes }
214
+ | DASHMATCH { result = :dash_match }
215
+ ;
216
+ negation
217
+ : NOT negation_arg RPAREN {
218
+ result = Node.new(:NOT, [val[1]])
219
+ }
220
+ ;
221
+ negation_arg
222
+ : hcap_1toN
223
+ ;
224
+ end
225
+
226
+ ---- header
227
+