superfeedr-nokogiri 1.4.0.20091116183308

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +330 -0
  3. data/CHANGELOG.rdoc +314 -0
  4. data/Manifest.txt +269 -0
  5. data/README.ja.rdoc +105 -0
  6. data/README.rdoc +118 -0
  7. data/Rakefile +244 -0
  8. data/bin/nokogiri +49 -0
  9. data/ext/nokogiri/extconf.rb +145 -0
  10. data/ext/nokogiri/html_document.c +145 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +32 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  17. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  18. data/ext/nokogiri/nokogiri.c +89 -0
  19. data/ext/nokogiri/nokogiri.h +145 -0
  20. data/ext/nokogiri/xml_attr.c +92 -0
  21. data/ext/nokogiri/xml_attr.h +9 -0
  22. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  23. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  24. data/ext/nokogiri/xml_cdata.c +54 -0
  25. data/ext/nokogiri/xml_cdata.h +9 -0
  26. data/ext/nokogiri/xml_comment.c +52 -0
  27. data/ext/nokogiri/xml_comment.h +9 -0
  28. data/ext/nokogiri/xml_document.c +388 -0
  29. data/ext/nokogiri/xml_document.h +24 -0
  30. data/ext/nokogiri/xml_document_fragment.c +46 -0
  31. data/ext/nokogiri/xml_document_fragment.h +10 -0
  32. data/ext/nokogiri/xml_dtd.c +192 -0
  33. data/ext/nokogiri/xml_dtd.h +10 -0
  34. data/ext/nokogiri/xml_element_content.c +123 -0
  35. data/ext/nokogiri/xml_element_content.h +10 -0
  36. data/ext/nokogiri/xml_element_decl.c +69 -0
  37. data/ext/nokogiri/xml_element_decl.h +9 -0
  38. data/ext/nokogiri/xml_entity_decl.c +97 -0
  39. data/ext/nokogiri/xml_entity_decl.h +10 -0
  40. data/ext/nokogiri/xml_entity_reference.c +50 -0
  41. data/ext/nokogiri/xml_entity_reference.h +9 -0
  42. data/ext/nokogiri/xml_io.c +31 -0
  43. data/ext/nokogiri/xml_io.h +11 -0
  44. data/ext/nokogiri/xml_namespace.c +74 -0
  45. data/ext/nokogiri/xml_namespace.h +12 -0
  46. data/ext/nokogiri/xml_node.c +1060 -0
  47. data/ext/nokogiri/xml_node.h +13 -0
  48. data/ext/nokogiri/xml_node_set.c +397 -0
  49. data/ext/nokogiri/xml_node_set.h +9 -0
  50. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  51. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  52. data/ext/nokogiri/xml_reader.c +593 -0
  53. data/ext/nokogiri/xml_reader.h +10 -0
  54. data/ext/nokogiri/xml_relax_ng.c +159 -0
  55. data/ext/nokogiri/xml_relax_ng.h +9 -0
  56. data/ext/nokogiri/xml_sax_parser.c +286 -0
  57. data/ext/nokogiri/xml_sax_parser.h +43 -0
  58. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  59. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  60. data/ext/nokogiri/xml_sax_push_parser.c +114 -0
  61. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  62. data/ext/nokogiri/xml_schema.c +156 -0
  63. data/ext/nokogiri/xml_schema.h +9 -0
  64. data/ext/nokogiri/xml_syntax_error.c +261 -0
  65. data/ext/nokogiri/xml_syntax_error.h +13 -0
  66. data/ext/nokogiri/xml_text.c +48 -0
  67. data/ext/nokogiri/xml_text.h +9 -0
  68. data/ext/nokogiri/xml_xpath.c +53 -0
  69. data/ext/nokogiri/xml_xpath.h +11 -0
  70. data/ext/nokogiri/xml_xpath_context.c +239 -0
  71. data/ext/nokogiri/xml_xpath_context.h +9 -0
  72. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  73. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  74. data/lib/nokogiri.rb +116 -0
  75. data/lib/nokogiri/css.rb +25 -0
  76. data/lib/nokogiri/css/generated_parser.rb +646 -0
  77. data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
  78. data/lib/nokogiri/css/node.rb +99 -0
  79. data/lib/nokogiri/css/parser.rb +82 -0
  80. data/lib/nokogiri/css/parser.y +227 -0
  81. data/lib/nokogiri/css/syntax_error.rb +7 -0
  82. data/lib/nokogiri/css/tokenizer.rb +7 -0
  83. data/lib/nokogiri/css/tokenizer.rex +54 -0
  84. data/lib/nokogiri/css/xpath_visitor.rb +162 -0
  85. data/lib/nokogiri/decorators/slop.rb +33 -0
  86. data/lib/nokogiri/ffi/html/document.rb +28 -0
  87. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  88. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  89. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  90. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  91. data/lib/nokogiri/ffi/libxml.rb +356 -0
  92. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  93. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  94. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  95. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  96. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  97. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  98. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  100. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  101. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  102. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  103. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  104. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  105. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  106. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  107. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  108. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  109. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  110. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  111. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  112. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
  113. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  114. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  115. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  116. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  117. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  118. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  119. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  120. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  121. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  122. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  123. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  124. data/lib/nokogiri/ffi/xml/document.rb +135 -0
  125. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  126. data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
  127. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  128. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  129. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  132. data/lib/nokogiri/ffi/xml/node.rb +444 -0
  133. data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
  134. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  135. data/lib/nokogiri/ffi/xml/reader.rb +227 -0
  136. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  137. data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
  138. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  139. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
  140. data/lib/nokogiri/ffi/xml/schema.rb +92 -0
  141. data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
  142. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  143. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  144. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  145. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  146. data/lib/nokogiri/html.rb +35 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +88 -0
  149. data/lib/nokogiri/html/document_fragment.rb +15 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  152. data/lib/nokogiri/html/sax/parser.rb +48 -0
  153. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  154. data/lib/nokogiri/syntax_error.rb +4 -0
  155. data/lib/nokogiri/version.rb +33 -0
  156. data/lib/nokogiri/version_warning.rb +11 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +405 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +131 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +69 -0
  165. data/lib/nokogiri/xml/dtd.rb +11 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  169. data/lib/nokogiri/xml/fragment_handler.rb +71 -0
  170. data/lib/nokogiri/xml/namespace.rb +13 -0
  171. data/lib/nokogiri/xml/node.rb +665 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  173. data/lib/nokogiri/xml/node_set.rb +307 -0
  174. data/lib/nokogiri/xml/notation.rb +6 -0
  175. data/lib/nokogiri/xml/parse_options.rb +85 -0
  176. data/lib/nokogiri/xml/pp.rb +2 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  178. data/lib/nokogiri/xml/pp/node.rb +56 -0
  179. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  180. data/lib/nokogiri/xml/reader.rb +74 -0
  181. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  182. data/lib/nokogiri/xml/sax.rb +4 -0
  183. data/lib/nokogiri/xml/sax/document.rb +160 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  187. data/lib/nokogiri/xml/schema.rb +61 -0
  188. data/lib/nokogiri/xml/syntax_error.rb +38 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +48 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xsd/xmlparser/nokogiri.rb +71 -0
  195. data/tasks/test.rb +100 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +277 -0
  198. data/test/css/test_tokenizer.rb +183 -0
  199. data/test/css/test_xpath_visitor.rb +76 -0
  200. data/test/ffi/test_document.rb +35 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/exslt.xml +8 -0
  207. data/test/files/exslt.xslt +35 -0
  208. data/test/files/foo/foo.xsd +4 -0
  209. data/test/files/po.xml +32 -0
  210. data/test/files/po.xsd +66 -0
  211. data/test/files/shift_jis.html +10 -0
  212. data/test/files/shift_jis.xml +5 -0
  213. data/test/files/snuggles.xml +3 -0
  214. data/test/files/staff.dtd +10 -0
  215. data/test/files/staff.xml +59 -0
  216. data/test/files/staff.xslt +32 -0
  217. data/test/files/tlm.html +850 -0
  218. data/test/files/valid_bar.xml +2 -0
  219. data/test/helper.rb +136 -0
  220. data/test/html/sax/test_parser.rb +64 -0
  221. data/test/html/sax/test_parser_context.rb +48 -0
  222. data/test/html/test_builder.rb +164 -0
  223. data/test/html/test_document.rb +390 -0
  224. data/test/html/test_document_encoding.rb +77 -0
  225. data/test/html/test_document_fragment.rb +132 -0
  226. data/test/html/test_element_description.rb +94 -0
  227. data/test/html/test_named_characters.rb +14 -0
  228. data/test/html/test_node.rb +228 -0
  229. data/test/html/test_node_encoding.rb +27 -0
  230. data/test/test_convert_xpath.rb +135 -0
  231. data/test/test_css_cache.rb +45 -0
  232. data/test/test_gc.rb +15 -0
  233. data/test/test_memory_leak.rb +77 -0
  234. data/test/test_nokogiri.rb +134 -0
  235. data/test/test_reader.rb +358 -0
  236. data/test/test_xslt_transforms.rb +131 -0
  237. data/test/xml/node/test_save_options.rb +20 -0
  238. data/test/xml/node/test_subclass.rb +44 -0
  239. data/test/xml/sax/test_parser.rb +307 -0
  240. data/test/xml/sax/test_parser_context.rb +56 -0
  241. data/test/xml/sax/test_push_parser.rb +131 -0
  242. data/test/xml/test_attr.rb +38 -0
  243. data/test/xml/test_attribute_decl.rb +82 -0
  244. data/test/xml/test_builder.rb +167 -0
  245. data/test/xml/test_cdata.rb +38 -0
  246. data/test/xml/test_comment.rb +29 -0
  247. data/test/xml/test_document.rb +607 -0
  248. data/test/xml/test_document_encoding.rb +26 -0
  249. data/test/xml/test_document_fragment.rb +138 -0
  250. data/test/xml/test_dtd.rb +82 -0
  251. data/test/xml/test_dtd_encoding.rb +33 -0
  252. data/test/xml/test_element_content.rb +56 -0
  253. data/test/xml/test_element_decl.rb +73 -0
  254. data/test/xml/test_entity_decl.rb +83 -0
  255. data/test/xml/test_entity_reference.rb +21 -0
  256. data/test/xml/test_namespace.rb +68 -0
  257. data/test/xml/test_node.rb +889 -0
  258. data/test/xml/test_node_attributes.rb +34 -0
  259. data/test/xml/test_node_encoding.rb +107 -0
  260. data/test/xml/test_node_set.rb +531 -0
  261. data/test/xml/test_parse_options.rb +52 -0
  262. data/test/xml/test_processing_instruction.rb +30 -0
  263. data/test/xml/test_reader_encoding.rb +126 -0
  264. data/test/xml/test_relax_ng.rb +60 -0
  265. data/test/xml/test_schema.rb +89 -0
  266. data/test/xml/test_syntax_error.rb +27 -0
  267. data/test/xml/test_text.rb +30 -0
  268. data/test/xml/test_unparented_node.rb +381 -0
  269. data/test/xml/test_xpath.rb +106 -0
  270. metadata +430 -0
@@ -0,0 +1,142 @@
1
+ #--
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by rex 1.0.4
4
+ # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
+ #++
6
+
7
+ module Nokogiri
8
+ module CSS
9
+ class GeneratedTokenizer < GeneratedParser
10
+ require 'strscan'
11
+
12
+ class ScanError < StandardError ; end
13
+
14
+ attr_reader :lineno
15
+ attr_reader :filename
16
+ attr_accessor :state
17
+
18
+ def scan_setup(str)
19
+ @ss = StringScanner.new(str)
20
+ @lineno = 1
21
+ @state = nil
22
+ end
23
+
24
+ def action(&block)
25
+ yield
26
+ end
27
+
28
+ def scan_str(str)
29
+ scan_setup(str)
30
+ do_parse
31
+ end
32
+
33
+ def load_file( filename )
34
+ @filename = filename
35
+ open(filename, "r") do |f|
36
+ scan_setup(f.read)
37
+ end
38
+ end
39
+
40
+ def scan_file( filename )
41
+ load_file(filename)
42
+ do_parse
43
+ end
44
+
45
+
46
+ def next_token
47
+ return if @ss.eos?
48
+
49
+ text = @ss.peek(1)
50
+ @lineno += 1 if text == "\n"
51
+ token = case @state
52
+ when nil
53
+ case
54
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*\(\s*/))
55
+ action { [:FUNCTION, text] }
56
+
57
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*/))
58
+ action { [:IDENT, text] }
59
+
60
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])+/))
61
+ action { [:HASH, text] }
62
+
63
+ when (text = @ss.scan(/[\s\r\n\f]*~=[\s\r\n\f]*/))
64
+ action { [:INCLUDES, text] }
65
+
66
+ when (text = @ss.scan(/[\s\r\n\f]*\|=[\s\r\n\f]*/))
67
+ action { [:DASHMATCH, text] }
68
+
69
+ when (text = @ss.scan(/[\s\r\n\f]*\^=[\s\r\n\f]*/))
70
+ action { [:PREFIXMATCH, text] }
71
+
72
+ when (text = @ss.scan(/[\s\r\n\f]*\$=[\s\r\n\f]*/))
73
+ action { [:SUFFIXMATCH, text] }
74
+
75
+ when (text = @ss.scan(/[\s\r\n\f]*\*=[\s\r\n\f]*/))
76
+ action { [:SUBSTRINGMATCH, text] }
77
+
78
+ when (text = @ss.scan(/[\s\r\n\f]*!=[\s\r\n\f]*/))
79
+ action { [:NOT_EQUAL, text] }
80
+
81
+ when (text = @ss.scan(/[\s\r\n\f]*=[\s\r\n\f]*/))
82
+ action { [:EQUAL, text] }
83
+
84
+ when (text = @ss.scan(/[\s\r\n\f]*\)/))
85
+ action { [:RPAREN, text] }
86
+
87
+ when (text = @ss.scan(/[\s\r\n\f]*\[[\s\r\n\f]*/))
88
+ action { [:LSQUARE, text] }
89
+
90
+ when (text = @ss.scan(/[\s\r\n\f]*\]/))
91
+ action { [:RSQUARE, text] }
92
+
93
+ when (text = @ss.scan(/[\s\r\n\f]*\+[\s\r\n\f]*/))
94
+ action { [:PLUS, text] }
95
+
96
+ when (text = @ss.scan(/[\s\r\n\f]*>[\s\r\n\f]*/))
97
+ action { [:GREATER, text] }
98
+
99
+ when (text = @ss.scan(/[\s\r\n\f]*,[\s\r\n\f]*/))
100
+ action { [:COMMA, text] }
101
+
102
+ when (text = @ss.scan(/[\s\r\n\f]*~[\s\r\n\f]*/))
103
+ action { [:TILDE, text] }
104
+
105
+ when (text = @ss.scan(/\:not\([\s\r\n\f]*/))
106
+ action { [:NOT, text] }
107
+
108
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
109
+ action { [:NUMBER, text] }
110
+
111
+ when (text = @ss.scan(/[\s\r\n\f]*\/\/[\s\r\n\f]*/))
112
+ action { [:DOUBLESLASH, text] }
113
+
114
+ when (text = @ss.scan(/[\s\r\n\f]*\/[\s\r\n\f]*/))
115
+ action { [:SLASH, text] }
116
+
117
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
118
+ action {[:UNICODE_RANGE, text] }
119
+
120
+ when (text = @ss.scan(/[\s\t\r\n\f]+/))
121
+ action { [:S, text] }
122
+
123
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*'/))
124
+ action { [:STRING, text] }
125
+
126
+ when (text = @ss.scan(/./))
127
+ action { [text, text] }
128
+
129
+ else
130
+ text = @ss.string[@ss.pos .. -1]
131
+ raise ScanError, "can not match: '" + text + "'"
132
+ end # if
133
+
134
+ else
135
+ raise ScanError, "undefined state: '" + state.to_s + "'"
136
+ end # case state
137
+ token
138
+ end # def next_token
139
+
140
+ end # class
141
+ end
142
+ end
@@ -0,0 +1,99 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Node
4
+ # Get the type of this node
5
+ attr_accessor :type
6
+ # Get the value of this node
7
+ attr_accessor :value
8
+
9
+ # Create a new Node with +type+ and +value+
10
+ def initialize type, value
11
+ @type = type
12
+ @value = value
13
+ end
14
+
15
+ # Accept +visitor+
16
+ def accept visitor
17
+ visitor.send(:"visit_#{type.to_s.downcase}", self)
18
+ end
19
+
20
+ ###
21
+ # Convert this CSS node to xpath with +prefix+ using +visitor+
22
+ def to_xpath prefix = '//', visitor = XPathVisitor.new
23
+ self.preprocess!
24
+ prefix + visitor.accept(self)
25
+ end
26
+
27
+ # Preprocess this node tree
28
+ def preprocess!
29
+ ### Deal with nth-child
30
+ matches = find_by_type(
31
+ [:CONDITIONAL_SELECTOR,
32
+ [:ELEMENT_NAME],
33
+ [:PSEUDO_CLASS,
34
+ [:FUNCTION]
35
+ ]
36
+ ]
37
+ )
38
+ matches.each do |match|
39
+ if match.value[1].value[0].value[0] =~ /^nth-(last-)?child/
40
+ tag_name = match.value[0].value.first
41
+ match.value[0].value = ['*']
42
+ match.value[1] = Node.new(:COMBINATOR, [
43
+ match.value[1].value[0],
44
+ Node.new(:FUNCTION, ['self(', tag_name])
45
+ ])
46
+ end
47
+ end
48
+
49
+ ### Deal with first-child, last-child
50
+ matches = find_by_type(
51
+ [:CONDITIONAL_SELECTOR,
52
+ [:ELEMENT_NAME], [:PSEUDO_CLASS]
53
+ ])
54
+ matches.each do |match|
55
+ if ['first-child', 'last-child'].include?(match.value[1].value.first)
56
+ which = match.value[1].value.first.gsub(/-\w*$/, '')
57
+ tag_name = match.value[0].value.first
58
+ match.value[0].value = ['*']
59
+ match.value[1] = Node.new(:COMBINATOR, [
60
+ Node.new(:FUNCTION, ["#{which}("]),
61
+ Node.new(:FUNCTION, ['self(', tag_name])
62
+ ])
63
+ elsif 'only-child' == match.value[1].value.first
64
+ tag_name = match.value[0].value.first
65
+ match.value[0].value = ['*']
66
+ match.value[1] = Node.new(:COMBINATOR, [
67
+ Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
68
+ Node.new(:FUNCTION, ['self(', tag_name])
69
+ ])
70
+ end
71
+ end
72
+
73
+ self
74
+ end
75
+
76
+ # Find a node by type using +types+
77
+ def find_by_type types
78
+ matches = []
79
+ matches << self if to_type == types
80
+ @value.each do |v|
81
+ matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
82
+ end
83
+ matches
84
+ end
85
+
86
+ # Convert to_type
87
+ def to_type
88
+ [@type] + @value.map { |n|
89
+ n.to_type if n.respond_to?(:to_type)
90
+ }.compact
91
+ end
92
+
93
+ # Convert to array
94
+ def to_a
95
+ [@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,82 @@
1
+ require 'thread'
2
+
3
+ module Nokogiri
4
+ module CSS
5
+ class Parser < GeneratedTokenizer
6
+ @cache_on = true
7
+ @cache = {}
8
+ @mutex = Mutex.new
9
+
10
+ class << self
11
+ # Turn on CSS parse caching
12
+ attr_accessor :cache_on
13
+ alias :cache_on? :cache_on
14
+ alias :set_cache :cache_on=
15
+
16
+ # Get the css selector in +string+ from the cache
17
+ def [] string
18
+ return unless @cache_on
19
+ @mutex.synchronize { @cache[string] }
20
+ end
21
+
22
+ # Set the css selector in +string+ in the cache to +value+
23
+ def []= string, value
24
+ return value unless @cache_on
25
+ @mutex.synchronize { @cache[string] = value }
26
+ end
27
+
28
+ # Clear the cache
29
+ def clear_cache
30
+ @mutex.synchronize { @cache = {} }
31
+ end
32
+
33
+ # Execute +block+ without cache
34
+ def without_cache &block
35
+ tmp = @cache_on
36
+ @cache_on = false
37
+ block.call
38
+ @cache_on = tmp
39
+ end
40
+
41
+ ###
42
+ # Parse this CSS selector in +selector+. Returns an AST.
43
+ def parse selector
44
+ @warned ||= false
45
+ unless @warned
46
+ $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
47
+ @warned = true
48
+ end
49
+ new.parse selector
50
+ end
51
+ end
52
+
53
+ # Create a new CSS parser with respect to +namespaces+
54
+ def initialize namespaces = {}
55
+ @namespaces = namespaces
56
+ super()
57
+ end
58
+ alias :parse :scan_str
59
+
60
+ # Get the xpath for +string+ using +options+
61
+ def xpath_for string, options={}
62
+ key = "#{string}#{options[:ns]}#{options[:prefix]}"
63
+ v = self.class[key]
64
+ return v if v
65
+
66
+ args = [
67
+ options[:prefix] || '//',
68
+ options[:visitor] || XPathVisitor.new
69
+ ]
70
+ self.class[key] = parse(string).map { |ast|
71
+ ast.to_xpath(*args)
72
+ }
73
+ end
74
+
75
+ # On CSS parser error, raise an exception
76
+ def on_error error_token_id, error_value, value_stack
77
+ after = value_stack.compact.last
78
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,227 @@
1
+ class Nokogiri::CSS::GeneratedParser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE
6
+
7
+ rule
8
+ selector
9
+ : selector COMMA simple_selector_1toN {
10
+ result = [val.first, val.last].flatten
11
+ }
12
+ | simple_selector_1toN { result = val.flatten }
13
+ ;
14
+ combinator
15
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
16
+ | GREATER { result = :CHILD_SELECTOR }
17
+ | TILDE { result = :PRECEDING_SELECTOR }
18
+ | S { result = :DESCENDANT_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
21
+ ;
22
+ simple_selector
23
+ : element_name hcap_0toN {
24
+ result = if val[1].nil?
25
+ val.first
26
+ else
27
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
+ end
29
+ }
30
+ | element_name hcap_1toN negation {
31
+ result = Node.new(:CONDITIONAL_SELECTOR,
32
+ [
33
+ val.first,
34
+ Node.new(:COMBINATOR, [val[1], val.last])
35
+ ]
36
+ )
37
+ }
38
+ | element_name negation {
39
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
40
+ }
41
+ | function
42
+ | function attrib {
43
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
44
+ }
45
+ | hcap_1toN negation {
46
+ result = Node.new(:CONDITIONAL_SELECTOR,
47
+ [
48
+ Node.new(:ELEMENT_NAME, ['*']),
49
+ Node.new(:COMBINATOR, val)
50
+ ]
51
+ )
52
+ }
53
+ | hcap_1toN {
54
+ result = Node.new(:CONDITIONAL_SELECTOR,
55
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
56
+ )
57
+ }
58
+ ;
59
+ simple_selector_1toN
60
+ : simple_selector combinator simple_selector_1toN {
61
+ result = Node.new(val[1], [val.first, val.last])
62
+ }
63
+ | simple_selector
64
+ ;
65
+ class
66
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
67
+ ;
68
+ element_name
69
+ : namespace '|' IDENT {
70
+ result = Node.new(:ELEMENT_NAME,
71
+ [[val.first, val.last].compact.join(':')]
72
+ )
73
+ }
74
+ | IDENT {
75
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
76
+ result = Node.new(:ELEMENT_NAME, [name])
77
+ }
78
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
79
+ ;
80
+ namespace
81
+ : IDENT { result = val[0] }
82
+ |
83
+ ;
84
+ attrib
85
+ : LSQUARE IDENT attrib_val_0or1 RSQUARE {
86
+ result = Node.new(:ATTRIBUTE_CONDITION,
87
+ [Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
88
+ )
89
+ }
90
+ | LSQUARE function attrib_val_0or1 RSQUARE {
91
+ result = Node.new(:ATTRIBUTE_CONDITION,
92
+ [val[1]] + (val[2] || [])
93
+ )
94
+ }
95
+ | LSQUARE NUMBER RSQUARE {
96
+ # Non standard, but hpricot supports it.
97
+ result = Node.new(:PSEUDO_CLASS,
98
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
99
+ )
100
+ }
101
+ ;
102
+ function
103
+ : FUNCTION RPAREN {
104
+ result = Node.new(:FUNCTION, [val.first.strip])
105
+ }
106
+ | FUNCTION expr RPAREN {
107
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
108
+ }
109
+ | FUNCTION an_plus_b RPAREN {
110
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
111
+ }
112
+ | NOT expr RPAREN {
113
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
114
+ }
115
+ ;
116
+ expr
117
+ : NUMBER COMMA expr { result = [val.first, val.last] }
118
+ | STRING COMMA expr { result = [val.first, val.last] }
119
+ | IDENT COMMA expr { result = [val.first, val.last] }
120
+ | NUMBER
121
+ | STRING
122
+ | IDENT # even, odd
123
+ {
124
+ if val[0] == 'even'
125
+ val = ["2","n","+","0"]
126
+ result = Node.new(:AN_PLUS_B, val)
127
+ elsif val[0] == 'odd'
128
+ val = ["2","n","+","1"]
129
+ result = Node.new(:AN_PLUS_B, val)
130
+ else
131
+ # This is not CSS standard. It allows us to support this:
132
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
133
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
134
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
135
+ result = val
136
+ end
137
+ }
138
+ ;
139
+ an_plus_b
140
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
141
+ {
142
+ if val[1] == 'n'
143
+ result = Node.new(:AN_PLUS_B, val)
144
+ else
145
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
146
+ end
147
+ }
148
+ | IDENT PLUS NUMBER { # n+3, -n+3
149
+ if val[0] == 'n'
150
+ val.unshift("1")
151
+ result = Node.new(:AN_PLUS_B, val)
152
+ elsif val[0] == '-n'
153
+ val[0] = 'n'
154
+ val.unshift("-1")
155
+ result = Node.new(:AN_PLUS_B, val)
156
+ else
157
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
158
+ end
159
+ }
160
+ | NUMBER IDENT # 5n, -5n
161
+ {
162
+ if val[1] == 'n'
163
+ val << "+"
164
+ val << "0"
165
+ result = Node.new(:AN_PLUS_B, val)
166
+ else
167
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
168
+ end
169
+ }
170
+ ;
171
+ pseudo
172
+ : ':' function {
173
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
174
+ }
175
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
176
+ ;
177
+ hcap_0toN
178
+ : hcap_1toN
179
+ |
180
+ ;
181
+ hcap_1toN
182
+ : attribute_id hcap_1toN {
183
+ result = Node.new(:COMBINATOR, val)
184
+ }
185
+ | class hcap_1toN {
186
+ result = Node.new(:COMBINATOR, val)
187
+ }
188
+ | attrib hcap_1toN {
189
+ result = Node.new(:COMBINATOR, val)
190
+ }
191
+ | pseudo hcap_1toN {
192
+ result = Node.new(:COMBINATOR, val)
193
+ }
194
+ | attribute_id
195
+ | class
196
+ | attrib
197
+ | pseudo
198
+ ;
199
+ attribute_id
200
+ : HASH { result = Node.new(:ID, val) }
201
+ ;
202
+ attrib_val_0or1
203
+ : eql_incl_dash IDENT { result = [val.first, val[1]] }
204
+ | eql_incl_dash STRING { result = [val.first, val[1]] }
205
+ |
206
+ ;
207
+ eql_incl_dash
208
+ : EQUAL { result = :equal }
209
+ | PREFIXMATCH { result = :prefix_match }
210
+ | SUFFIXMATCH { result = :suffix_match }
211
+ | SUBSTRINGMATCH { result = :substring_match }
212
+ | NOT_EQUAL { result = :not_equal }
213
+ | INCLUDES { result = :includes }
214
+ | DASHMATCH { result = :dash_match }
215
+ ;
216
+ negation
217
+ : NOT negation_arg RPAREN {
218
+ result = Node.new(:NOT, [val[1]])
219
+ }
220
+ ;
221
+ negation_arg
222
+ : hcap_1toN
223
+ ;
224
+ end
225
+
226
+ ---- header
227
+