nokogiri 1.3.0-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (256) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +233 -0
  3. data/CHANGELOG.rdoc +222 -0
  4. data/Manifest.txt +247 -0
  5. data/README.ja.rdoc +103 -0
  6. data/README.rdoc +117 -0
  7. data/Rakefile +205 -0
  8. data/bin/nokogiri +47 -0
  9. data/ext/nokogiri/extconf.rb +89 -0
  10. data/ext/nokogiri/html_document.c +183 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +30 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser.c +57 -0
  17. data/ext/nokogiri/html_sax_parser.h +11 -0
  18. data/ext/nokogiri/iconv.dll +0 -0
  19. data/ext/nokogiri/libexslt.dll +0 -0
  20. data/ext/nokogiri/libxml2.dll +0 -0
  21. data/ext/nokogiri/libxslt.dll +0 -0
  22. data/ext/nokogiri/nokogiri.c +81 -0
  23. data/ext/nokogiri/nokogiri.h +149 -0
  24. data/ext/nokogiri/xml_attr.c +92 -0
  25. data/ext/nokogiri/xml_attr.h +9 -0
  26. data/ext/nokogiri/xml_cdata.c +53 -0
  27. data/ext/nokogiri/xml_cdata.h +9 -0
  28. data/ext/nokogiri/xml_comment.c +51 -0
  29. data/ext/nokogiri/xml_comment.h +9 -0
  30. data/ext/nokogiri/xml_document.c +308 -0
  31. data/ext/nokogiri/xml_document.h +21 -0
  32. data/ext/nokogiri/xml_document_fragment.c +48 -0
  33. data/ext/nokogiri/xml_document_fragment.h +10 -0
  34. data/ext/nokogiri/xml_dtd.c +102 -0
  35. data/ext/nokogiri/xml_dtd.h +8 -0
  36. data/ext/nokogiri/xml_entity_reference.c +50 -0
  37. data/ext/nokogiri/xml_entity_reference.h +9 -0
  38. data/ext/nokogiri/xml_io.c +24 -0
  39. data/ext/nokogiri/xml_io.h +10 -0
  40. data/ext/nokogiri/xml_namespace.c +69 -0
  41. data/ext/nokogiri/xml_namespace.h +12 -0
  42. data/ext/nokogiri/xml_node.c +928 -0
  43. data/ext/nokogiri/xml_node.h +14 -0
  44. data/ext/nokogiri/xml_node_set.c +386 -0
  45. data/ext/nokogiri/xml_node_set.h +9 -0
  46. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  47. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  48. data/ext/nokogiri/xml_reader.c +572 -0
  49. data/ext/nokogiri/xml_reader.h +10 -0
  50. data/ext/nokogiri/xml_relax_ng.c +106 -0
  51. data/ext/nokogiri/xml_relax_ng.h +9 -0
  52. data/ext/nokogiri/xml_sax_parser.c +336 -0
  53. data/ext/nokogiri/xml_sax_parser.h +10 -0
  54. data/ext/nokogiri/xml_sax_push_parser.c +86 -0
  55. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  56. data/ext/nokogiri/xml_schema.c +107 -0
  57. data/ext/nokogiri/xml_schema.h +9 -0
  58. data/ext/nokogiri/xml_syntax_error.c +203 -0
  59. data/ext/nokogiri/xml_syntax_error.h +12 -0
  60. data/ext/nokogiri/xml_text.c +47 -0
  61. data/ext/nokogiri/xml_text.h +9 -0
  62. data/ext/nokogiri/xml_xpath.c +53 -0
  63. data/ext/nokogiri/xml_xpath.h +11 -0
  64. data/ext/nokogiri/xml_xpath_context.c +252 -0
  65. data/ext/nokogiri/xml_xpath_context.h +9 -0
  66. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  67. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  68. data/ext/nokogiri/zlib1.dll +0 -0
  69. data/lib/action-nokogiri.rb +36 -0
  70. data/lib/nokogiri.rb +110 -0
  71. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  72. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  73. data/lib/nokogiri/css.rb +25 -0
  74. data/lib/nokogiri/css/generated_parser.rb +748 -0
  75. data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
  76. data/lib/nokogiri/css/node.rb +107 -0
  77. data/lib/nokogiri/css/parser.rb +82 -0
  78. data/lib/nokogiri/css/parser.y +227 -0
  79. data/lib/nokogiri/css/syntax_error.rb +7 -0
  80. data/lib/nokogiri/css/tokenizer.rb +11 -0
  81. data/lib/nokogiri/css/tokenizer.rex +54 -0
  82. data/lib/nokogiri/css/xpath_visitor.rb +172 -0
  83. data/lib/nokogiri/decorators.rb +2 -0
  84. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  85. data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
  86. data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
  87. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
  88. data/lib/nokogiri/decorators/slop.rb +33 -0
  89. data/lib/nokogiri/ffi/html/document.rb +37 -0
  90. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  91. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  92. data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
  93. data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
  94. data/lib/nokogiri/ffi/libxml.rb +314 -0
  95. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  96. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  97. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  98. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  100. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  102. data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
  103. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  104. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  105. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  106. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  107. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  108. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  109. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
  110. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  111. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  112. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  113. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  114. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  115. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  116. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  117. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  118. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  119. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  120. data/lib/nokogiri/ffi/xml/document.rb +107 -0
  121. data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
  122. data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
  123. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  124. data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
  125. data/lib/nokogiri/ffi/xml/node.rb +380 -0
  126. data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
  127. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  128. data/lib/nokogiri/ffi/xml/reader.rb +217 -0
  129. data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
  130. data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
  131. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
  132. data/lib/nokogiri/ffi/xml/schema.rb +55 -0
  133. data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
  134. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  135. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  136. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  137. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  138. data/lib/nokogiri/hpricot.rb +62 -0
  139. data/lib/nokogiri/html.rb +34 -0
  140. data/lib/nokogiri/html/builder.rb +35 -0
  141. data/lib/nokogiri/html/document.rb +71 -0
  142. data/lib/nokogiri/html/document_fragment.rb +15 -0
  143. data/lib/nokogiri/html/element_description.rb +23 -0
  144. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  145. data/lib/nokogiri/html/sax/parser.rb +47 -0
  146. data/lib/nokogiri/nokogiri.rb +1 -0
  147. data/lib/nokogiri/syntax_error.rb +4 -0
  148. data/lib/nokogiri/version.rb +29 -0
  149. data/lib/nokogiri/version_warning.rb +11 -0
  150. data/lib/nokogiri/xml.rb +62 -0
  151. data/lib/nokogiri/xml/attr.rb +9 -0
  152. data/lib/nokogiri/xml/builder.rb +254 -0
  153. data/lib/nokogiri/xml/cdata.rb +11 -0
  154. data/lib/nokogiri/xml/document.rb +100 -0
  155. data/lib/nokogiri/xml/document_fragment.rb +49 -0
  156. data/lib/nokogiri/xml/dtd.rb +11 -0
  157. data/lib/nokogiri/xml/entity_declaration.rb +11 -0
  158. data/lib/nokogiri/xml/fragment_handler.rb +55 -0
  159. data/lib/nokogiri/xml/namespace.rb +7 -0
  160. data/lib/nokogiri/xml/node.rb +745 -0
  161. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  162. data/lib/nokogiri/xml/node_set.rb +238 -0
  163. data/lib/nokogiri/xml/notation.rb +6 -0
  164. data/lib/nokogiri/xml/parse_options.rb +80 -0
  165. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  166. data/lib/nokogiri/xml/reader.rb +66 -0
  167. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  168. data/lib/nokogiri/xml/sax.rb +3 -0
  169. data/lib/nokogiri/xml/sax/document.rb +143 -0
  170. data/lib/nokogiri/xml/sax/parser.rb +101 -0
  171. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  172. data/lib/nokogiri/xml/schema.rb +65 -0
  173. data/lib/nokogiri/xml/syntax_error.rb +34 -0
  174. data/lib/nokogiri/xml/xpath.rb +10 -0
  175. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  176. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  177. data/lib/nokogiri/xslt.rb +48 -0
  178. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  179. data/lib/xsd/xmlparser/nokogiri.rb +64 -0
  180. data/tasks/test.rb +161 -0
  181. data/test/css/test_nthiness.rb +160 -0
  182. data/test/css/test_parser.rb +277 -0
  183. data/test/css/test_tokenizer.rb +176 -0
  184. data/test/css/test_xpath_visitor.rb +76 -0
  185. data/test/ffi/test_document.rb +35 -0
  186. data/test/files/address_book.rlx +12 -0
  187. data/test/files/address_book.xml +10 -0
  188. data/test/files/dont_hurt_em_why.xml +422 -0
  189. data/test/files/exslt.xml +8 -0
  190. data/test/files/exslt.xslt +35 -0
  191. data/test/files/po.xml +32 -0
  192. data/test/files/po.xsd +66 -0
  193. data/test/files/staff.xml +59 -0
  194. data/test/files/staff.xslt +32 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/helper.rb +123 -0
  197. data/test/hpricot/files/basic.xhtml +17 -0
  198. data/test/hpricot/files/boingboing.html +2266 -0
  199. data/test/hpricot/files/cy0.html +3653 -0
  200. data/test/hpricot/files/immob.html +400 -0
  201. data/test/hpricot/files/pace_application.html +1320 -0
  202. data/test/hpricot/files/tenderlove.html +16 -0
  203. data/test/hpricot/files/uswebgen.html +220 -0
  204. data/test/hpricot/files/utf8.html +1054 -0
  205. data/test/hpricot/files/week9.html +1723 -0
  206. data/test/hpricot/files/why.xml +19 -0
  207. data/test/hpricot/load_files.rb +11 -0
  208. data/test/hpricot/test_alter.rb +68 -0
  209. data/test/hpricot/test_builder.rb +20 -0
  210. data/test/hpricot/test_parser.rb +426 -0
  211. data/test/hpricot/test_paths.rb +15 -0
  212. data/test/hpricot/test_preserved.rb +77 -0
  213. data/test/hpricot/test_xml.rb +30 -0
  214. data/test/html/sax/test_parser.rb +52 -0
  215. data/test/html/test_builder.rb +156 -0
  216. data/test/html/test_document.rb +361 -0
  217. data/test/html/test_document_encoding.rb +46 -0
  218. data/test/html/test_document_fragment.rb +97 -0
  219. data/test/html/test_element_description.rb +95 -0
  220. data/test/html/test_named_characters.rb +14 -0
  221. data/test/html/test_node.rb +165 -0
  222. data/test/test_convert_xpath.rb +186 -0
  223. data/test/test_css_cache.rb +56 -0
  224. data/test/test_gc.rb +15 -0
  225. data/test/test_memory_leak.rb +77 -0
  226. data/test/test_nokogiri.rb +127 -0
  227. data/test/test_reader.rb +316 -0
  228. data/test/test_xslt_transforms.rb +131 -0
  229. data/test/xml/node/test_save_options.rb +20 -0
  230. data/test/xml/node/test_subclass.rb +44 -0
  231. data/test/xml/sax/test_parser.rb +169 -0
  232. data/test/xml/sax/test_push_parser.rb +92 -0
  233. data/test/xml/test_attr.rb +38 -0
  234. data/test/xml/test_builder.rb +73 -0
  235. data/test/xml/test_cdata.rb +38 -0
  236. data/test/xml/test_comment.rb +23 -0
  237. data/test/xml/test_document.rb +397 -0
  238. data/test/xml/test_document_encoding.rb +26 -0
  239. data/test/xml/test_document_fragment.rb +76 -0
  240. data/test/xml/test_dtd.rb +42 -0
  241. data/test/xml/test_dtd_encoding.rb +31 -0
  242. data/test/xml/test_entity_reference.rb +21 -0
  243. data/test/xml/test_namespace.rb +43 -0
  244. data/test/xml/test_node.rb +808 -0
  245. data/test/xml/test_node_attributes.rb +34 -0
  246. data/test/xml/test_node_encoding.rb +84 -0
  247. data/test/xml/test_node_set.rb +368 -0
  248. data/test/xml/test_parse_options.rb +52 -0
  249. data/test/xml/test_processing_instruction.rb +30 -0
  250. data/test/xml/test_reader_encoding.rb +126 -0
  251. data/test/xml/test_relax_ng.rb +60 -0
  252. data/test/xml/test_schema.rb +65 -0
  253. data/test/xml/test_text.rb +18 -0
  254. data/test/xml/test_unparented_node.rb +381 -0
  255. data/test/xml/test_xpath.rb +106 -0
  256. metadata +409 -0
@@ -0,0 +1,144 @@
1
+ #
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by rex 1.0.1
4
+ # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
+ #
6
+
7
+ module Nokogiri
8
+ module CSS
9
+ class GeneratedTokenizer < GeneratedParser
10
+ require 'strscan'
11
+
12
+ class ScanError < StandardError ; end
13
+
14
+ attr_reader :lineno
15
+ attr_reader :filename
16
+
17
+ def scan_setup ; end
18
+
19
+ def action &block
20
+ yield
21
+ end
22
+
23
+ def scan_str( str )
24
+ scan_evaluate str
25
+ do_parse
26
+ end
27
+
28
+ def load_file( filename )
29
+ @filename = filename
30
+ open(filename, "r") do |f|
31
+ scan_evaluate f.read
32
+ end
33
+ end
34
+
35
+ def scan_file( filename )
36
+ load_file filename
37
+ do_parse
38
+ end
39
+
40
+ def next_token
41
+ @rex_tokens.shift
42
+ end
43
+
44
+ def scan_evaluate( str )
45
+ scan_setup
46
+ @rex_tokens = []
47
+ @lineno = 1
48
+ ss = StringScanner.new(str)
49
+ state = nil
50
+ until ss.eos?
51
+ text = ss.peek(1)
52
+ @lineno += 1 if text == "\n"
53
+ case state
54
+ when nil
55
+ case
56
+ when (text = ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*\(\s*/))
57
+ @rex_tokens.push action { [:FUNCTION, text] }
58
+
59
+ when (text = ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*/))
60
+ @rex_tokens.push action { [:IDENT, text] }
61
+
62
+ when (text = ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])+/))
63
+ @rex_tokens.push action { [:HASH, text] }
64
+
65
+ when (text = ss.scan(/[\s\r\n\f]*~=[\s\r\n\f]*/))
66
+ @rex_tokens.push action { [:INCLUDES, text] }
67
+
68
+ when (text = ss.scan(/[\s\r\n\f]*\|=[\s\r\n\f]*/))
69
+ @rex_tokens.push action { [:DASHMATCH, text] }
70
+
71
+ when (text = ss.scan(/[\s\r\n\f]*\^=[\s\r\n\f]*/))
72
+ @rex_tokens.push action { [:PREFIXMATCH, text] }
73
+
74
+ when (text = ss.scan(/[\s\r\n\f]*\$=[\s\r\n\f]*/))
75
+ @rex_tokens.push action { [:SUFFIXMATCH, text] }
76
+
77
+ when (text = ss.scan(/[\s\r\n\f]*\*=[\s\r\n\f]*/))
78
+ @rex_tokens.push action { [:SUBSTRINGMATCH, text] }
79
+
80
+ when (text = ss.scan(/[\s\r\n\f]*!=[\s\r\n\f]*/))
81
+ @rex_tokens.push action { [:NOT_EQUAL, text] }
82
+
83
+ when (text = ss.scan(/[\s\r\n\f]*=[\s\r\n\f]*/))
84
+ @rex_tokens.push action { [:EQUAL, text] }
85
+
86
+ when (text = ss.scan(/[\s\r\n\f]*\)/))
87
+ @rex_tokens.push action { [:RPAREN, text] }
88
+
89
+ when (text = ss.scan(/[\s\r\n\f]*\[[\s\r\n\f]*/))
90
+ @rex_tokens.push action { [:LSQUARE, text] }
91
+
92
+ when (text = ss.scan(/[\s\r\n\f]*\]/))
93
+ @rex_tokens.push action { [:RSQUARE, text] }
94
+
95
+ when (text = ss.scan(/[\s\r\n\f]*\+[\s\r\n\f]*/))
96
+ @rex_tokens.push action { [:PLUS, text] }
97
+
98
+ when (text = ss.scan(/[\s\r\n\f]*>[\s\r\n\f]*/))
99
+ @rex_tokens.push action { [:GREATER, text] }
100
+
101
+ when (text = ss.scan(/[\s\r\n\f]*,[\s\r\n\f]*/))
102
+ @rex_tokens.push action { [:COMMA, text] }
103
+
104
+ when (text = ss.scan(/[\s\r\n\f]*~[\s\r\n\f]*/))
105
+ @rex_tokens.push action { [:TILDE, text] }
106
+
107
+ when (text = ss.scan(/\:not\([\s\r\n\f]*/))
108
+ @rex_tokens.push action { [:NOT, text] }
109
+
110
+ when (text = ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
111
+ @rex_tokens.push action { [:NUMBER, text] }
112
+
113
+ when (text = ss.scan(/[\s\r\n\f]*\/\/[\s\r\n\f]*/))
114
+ @rex_tokens.push action { [:DOUBLESLASH, text] }
115
+
116
+ when (text = ss.scan(/[\s\r\n\f]*\/[\s\r\n\f]*/))
117
+ @rex_tokens.push action { [:SLASH, text] }
118
+
119
+ when (text = ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
120
+ @rex_tokens.push action {[:UNICODE_RANGE, text] }
121
+
122
+ when (text = ss.scan(/[\s\t\r\n\f]+/))
123
+ @rex_tokens.push action { [:S, text] }
124
+
125
+ when (text = ss.scan(/"([^\n\r\f"]|\\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*'/))
126
+ @rex_tokens.push action { [:STRING, text] }
127
+
128
+ when (text = ss.scan(/./))
129
+ @rex_tokens.push action { [text, text] }
130
+
131
+ else
132
+ text = ss.string[ss.pos .. -1]
133
+ raise ScanError, "can not match: '" + text + "'"
134
+ end # if
135
+
136
+ else
137
+ raise ScanError, "undefined state: '" + state.to_s + "'"
138
+ end # case state
139
+ end # until ss
140
+ end # def scan_evaluate
141
+
142
+ end # class
143
+ end
144
+ end
@@ -0,0 +1,107 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Node
4
+ # Get the type of this node
5
+ attr_accessor :type
6
+ # Get the value of this node
7
+ attr_accessor :value
8
+
9
+ # Create a new Node with +type+ and +value+
10
+ def initialize type, value
11
+ @type = type
12
+ @value = value
13
+ end
14
+
15
+ # Accept +visitor+
16
+ def accept visitor
17
+ visitor.send(:"visit_#{type.to_s.downcase}", self)
18
+ end
19
+
20
+ ###
21
+ # Convert this CSS node to xpath with +prefix+ using +visitor+
22
+ def to_xpath prefix = '//', visitor = XPathVisitor.new
23
+ self.preprocess!
24
+ prefix + visitor.accept(self)
25
+ end
26
+
27
+ # Preprocess this node tree
28
+ def preprocess!
29
+ ### Deal with nth-child
30
+ matches = find_by_type(
31
+ [:CONDITIONAL_SELECTOR,
32
+ [:ELEMENT_NAME],
33
+ [:PSEUDO_CLASS,
34
+ [:FUNCTION]
35
+ ]
36
+ ]
37
+ )
38
+ matches.each do |match|
39
+ if match.value[1].value[0].value[0] =~ /^nth-child/
40
+ tag_name = match.value[0].value.first
41
+ match.value[0].value = ['*']
42
+ match.value[1] = Node.new(:COMBINATOR, [
43
+ match.value[1].value[0],
44
+ Node.new(:FUNCTION, ['self(', tag_name])
45
+ ])
46
+ end
47
+ if match.value[1].value[0].value[0] =~ /^nth-last-child/
48
+ tag_name = match.value[0].value.first
49
+ match.value[0].value = ['*']
50
+ match.value[1] = Node.new(:COMBINATOR, [
51
+ match.value[1].value[0],
52
+ Node.new(:FUNCTION, ['self(', tag_name])
53
+ ])
54
+ end
55
+ end
56
+
57
+ ### Deal with first-child, last-child
58
+ matches = find_by_type(
59
+ [:CONDITIONAL_SELECTOR,
60
+ [:ELEMENT_NAME], [:PSEUDO_CLASS]
61
+ ])
62
+ matches.each do |match|
63
+ if ['first-child', 'last-child'].include?(match.value[1].value.first)
64
+ which = match.value[1].value.first.gsub(/-\w*$/, '')
65
+ tag_name = match.value[0].value.first
66
+ match.value[0].value = ['*']
67
+ match.value[1] = Node.new(:COMBINATOR, [
68
+ Node.new(:FUNCTION, ["#{which}("]),
69
+ Node.new(:FUNCTION, ['self(', tag_name])
70
+ ])
71
+ elsif 'only-child' == match.value[1].value.first
72
+ tag_name = match.value[0].value.first
73
+ match.value[0].value = ['*']
74
+ match.value[1] = Node.new(:COMBINATOR, [
75
+ Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
76
+ Node.new(:FUNCTION, ['self(', tag_name])
77
+ ])
78
+ end
79
+ end
80
+
81
+ self
82
+ end
83
+
84
+ # Find a node by type using +types+
85
+ def find_by_type types
86
+ matches = []
87
+ matches << self if to_type == types
88
+ @value.each do |v|
89
+ matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
90
+ end
91
+ matches
92
+ end
93
+
94
+ # Convert to_type
95
+ def to_type
96
+ [@type] + @value.map { |n|
97
+ n.to_type if n.respond_to?(:to_type)
98
+ }.compact
99
+ end
100
+
101
+ # Convert to array
102
+ def to_a
103
+ [@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,82 @@
1
+ require 'thread'
2
+
3
+ module Nokogiri
4
+ module CSS
5
+ class Parser < GeneratedTokenizer
6
+ @cache_on = true
7
+ @cache = {}
8
+ @mutex = Mutex.new
9
+
10
+ class << self
11
+ # Turn on CSS parse caching
12
+ attr_accessor :cache_on
13
+ alias :cache_on? :cache_on
14
+ alias :set_cache :cache_on=
15
+
16
+ # Get the css selector in +string+ from the cache
17
+ def [] string
18
+ return unless @cache_on
19
+ @mutex.synchronize { @cache[string] }
20
+ end
21
+
22
+ # Set the css selector in +string+ in the cache to +value+
23
+ def []= string, value
24
+ return value unless @cache_on
25
+ @mutex.synchronize { @cache[string] = value }
26
+ end
27
+
28
+ # Clear the cache
29
+ def clear_cache
30
+ @mutex.synchronize { @cache = {} }
31
+ end
32
+
33
+ # Execute +block+ without cache
34
+ def without_cache &block
35
+ tmp = @cache_on
36
+ @cache_on = false
37
+ block.call
38
+ @cache_on = tmp
39
+ end
40
+
41
+ ###
42
+ # Parse this CSS selector in +selector+. Returns an AST.
43
+ def parse selector
44
+ @warned ||= false
45
+ unless @warned
46
+ $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse()')
47
+ @warned = true
48
+ end
49
+ new.parse selector
50
+ end
51
+ end
52
+
53
+ # Create a new CSS parser with respect to +namespaces+
54
+ def initialize namespaces = {}
55
+ @namespaces = namespaces
56
+ super()
57
+ end
58
+ alias :parse :scan_str
59
+
60
+ # Get the xpath for +string+ using +options+
61
+ def xpath_for string, options={}
62
+ key = string + options[:ns].to_s
63
+ v = self.class[key]
64
+ return v if v
65
+
66
+ args = [
67
+ options[:prefix] || '//',
68
+ options[:visitor] || XPathVisitor.new
69
+ ]
70
+ self.class[key] = parse(string).map { |ast|
71
+ ast.to_xpath(*args)
72
+ }
73
+ end
74
+
75
+ # On CSS parser error, raise an exception
76
+ def on_error error_token_id, error_value, value_stack
77
+ after = value_stack.compact.last
78
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,227 @@
1
+ class Nokogiri::CSS::GeneratedParser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE
6
+
7
+ rule
8
+ selector
9
+ : selector COMMA simple_selector_1toN {
10
+ result = [val.first, val.last].flatten
11
+ }
12
+ | simple_selector_1toN { result = val.flatten }
13
+ ;
14
+ combinator
15
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
16
+ | GREATER { result = :CHILD_SELECTOR }
17
+ | TILDE { result = :PRECEDING_SELECTOR }
18
+ | S { result = :DESCENDANT_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
21
+ ;
22
+ simple_selector
23
+ : element_name hcap_0toN {
24
+ result = if val[1].nil?
25
+ val.first
26
+ else
27
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
+ end
29
+ }
30
+ | element_name hcap_1toN negation {
31
+ result = Node.new(:CONDITIONAL_SELECTOR,
32
+ [
33
+ val.first,
34
+ Node.new(:COMBINATOR, [val[1], val.last])
35
+ ]
36
+ )
37
+ }
38
+ | element_name negation {
39
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
40
+ }
41
+ | function
42
+ | function attrib {
43
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
44
+ }
45
+ | hcap_1toN negation {
46
+ result = Node.new(:CONDITIONAL_SELECTOR,
47
+ [
48
+ Node.new(:ELEMENT_NAME, ['*']),
49
+ Node.new(:COMBINATOR, val)
50
+ ]
51
+ )
52
+ }
53
+ | hcap_1toN {
54
+ result = Node.new(:CONDITIONAL_SELECTOR,
55
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
56
+ )
57
+ }
58
+ ;
59
+ simple_selector_1toN
60
+ : simple_selector combinator simple_selector_1toN {
61
+ result = Node.new(val[1], [val.first, val.last])
62
+ }
63
+ | simple_selector
64
+ ;
65
+ class
66
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
67
+ ;
68
+ element_name
69
+ : namespace '|' IDENT {
70
+ result = Node.new(:ELEMENT_NAME,
71
+ [[val.first, val.last].compact.join(':')]
72
+ )
73
+ }
74
+ | IDENT {
75
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
76
+ result = Node.new(:ELEMENT_NAME, [name])
77
+ }
78
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
79
+ ;
80
+ namespace
81
+ : IDENT { result = val[0] }
82
+ |
83
+ ;
84
+ attrib
85
+ : LSQUARE IDENT attrib_val_0or1 RSQUARE {
86
+ result = Node.new(:ATTRIBUTE_CONDITION,
87
+ [Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
88
+ )
89
+ }
90
+ | LSQUARE function attrib_val_0or1 RSQUARE {
91
+ result = Node.new(:ATTRIBUTE_CONDITION,
92
+ [val[1]] + (val[2] || [])
93
+ )
94
+ }
95
+ | LSQUARE NUMBER RSQUARE {
96
+ # Non standard, but hpricot supports it.
97
+ result = Node.new(:PSEUDO_CLASS,
98
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
99
+ )
100
+ }
101
+ ;
102
+ function
103
+ : FUNCTION RPAREN {
104
+ result = Node.new(:FUNCTION, [val.first.strip])
105
+ }
106
+ | FUNCTION expr RPAREN {
107
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
108
+ }
109
+ | FUNCTION an_plus_b RPAREN {
110
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
111
+ }
112
+ | NOT expr RPAREN {
113
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
114
+ }
115
+ ;
116
+ expr
117
+ : NUMBER COMMA expr { result = [val.first, val.last] }
118
+ | STRING COMMA expr { result = [val.first, val.last] }
119
+ | IDENT COMMA expr { result = [val.first, val.last] }
120
+ | NUMBER
121
+ | STRING
122
+ | IDENT # even, odd
123
+ {
124
+ if val[0] == 'even'
125
+ val = ["2","n","+","0"]
126
+ result = Node.new(:AN_PLUS_B, val)
127
+ elsif val[0] == 'odd'
128
+ val = ["2","n","+","1"]
129
+ result = Node.new(:AN_PLUS_B, val)
130
+ else
131
+ # This is not CSS standard. It allows us to support this:
132
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
133
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
134
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
135
+ result = val
136
+ end
137
+ }
138
+ ;
139
+ an_plus_b
140
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
141
+ {
142
+ if val[1] == 'n'
143
+ result = Node.new(:AN_PLUS_B, val)
144
+ else
145
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
146
+ end
147
+ }
148
+ | IDENT PLUS NUMBER { # n+3, -n+3
149
+ if val[0] == 'n'
150
+ val.unshift("1")
151
+ result = Node.new(:AN_PLUS_B, val)
152
+ elsif val[0] == '-n'
153
+ val[0] = 'n'
154
+ val.unshift("-1")
155
+ result = Node.new(:AN_PLUS_B, val)
156
+ else
157
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
158
+ end
159
+ }
160
+ | NUMBER IDENT # 5n, -5n
161
+ {
162
+ if val[1] == 'n'
163
+ val << "+"
164
+ val << "0"
165
+ result = Node.new(:AN_PLUS_B, val)
166
+ else
167
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
168
+ end
169
+ }
170
+ ;
171
+ pseudo
172
+ : ':' function {
173
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
174
+ }
175
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
176
+ ;
177
+ hcap_0toN
178
+ : hcap_1toN
179
+ |
180
+ ;
181
+ hcap_1toN
182
+ : attribute_id hcap_1toN {
183
+ result = Node.new(:COMBINATOR, val)
184
+ }
185
+ | class hcap_1toN {
186
+ result = Node.new(:COMBINATOR, val)
187
+ }
188
+ | attrib hcap_1toN {
189
+ result = Node.new(:COMBINATOR, val)
190
+ }
191
+ | pseudo hcap_1toN {
192
+ result = Node.new(:COMBINATOR, val)
193
+ }
194
+ | attribute_id
195
+ | class
196
+ | attrib
197
+ | pseudo
198
+ ;
199
+ attribute_id
200
+ : HASH { result = Node.new(:ID, val) }
201
+ ;
202
+ attrib_val_0or1
203
+ : eql_incl_dash IDENT { result = [val.first, val[1]] }
204
+ | eql_incl_dash STRING { result = [val.first, val[1]] }
205
+ |
206
+ ;
207
+ eql_incl_dash
208
+ : EQUAL { result = :equal }
209
+ | PREFIXMATCH { result = :prefix_match }
210
+ | SUFFIXMATCH { result = :suffix_match }
211
+ | SUBSTRINGMATCH { result = :substring_match }
212
+ | NOT_EQUAL { result = :not_equal }
213
+ | INCLUDES { result = :includes }
214
+ | DASHMATCH { result = :dash_match }
215
+ ;
216
+ negation
217
+ : NOT negation_arg RPAREN {
218
+ result = Node.new(:NOT, [val[1]])
219
+ }
220
+ ;
221
+ negation_arg
222
+ : hcap_1toN
223
+ ;
224
+ end
225
+
226
+ ---- header
227
+