nokogiri-maglev- 1.5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. data/.autotest +26 -0
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.ja.rdoc +544 -0
  4. data/CHANGELOG.rdoc +532 -0
  5. data/Manifest.txt +283 -0
  6. data/README.ja.rdoc +106 -0
  7. data/README.rdoc +174 -0
  8. data/Rakefile +171 -0
  9. data/bin/nokogiri +53 -0
  10. data/ext/nokogiri/depend +358 -0
  11. data/ext/nokogiri/extconf.rb +124 -0
  12. data/ext/nokogiri/html_document.c +154 -0
  13. data/ext/nokogiri/html_document.h +10 -0
  14. data/ext/nokogiri/html_element_description.c +276 -0
  15. data/ext/nokogiri/html_element_description.h +10 -0
  16. data/ext/nokogiri/html_entity_lookup.c +32 -0
  17. data/ext/nokogiri/html_entity_lookup.h +8 -0
  18. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  19. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  20. data/ext/nokogiri/nokogiri.c +115 -0
  21. data/ext/nokogiri/nokogiri.h +160 -0
  22. data/ext/nokogiri/st.c +576 -0
  23. data/ext/nokogiri/xml_attr.c +94 -0
  24. data/ext/nokogiri/xml_attr.h +9 -0
  25. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  26. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  27. data/ext/nokogiri/xml_cdata.c +56 -0
  28. data/ext/nokogiri/xml_cdata.h +9 -0
  29. data/ext/nokogiri/xml_comment.c +54 -0
  30. data/ext/nokogiri/xml_comment.h +9 -0
  31. data/ext/nokogiri/xml_document.c +478 -0
  32. data/ext/nokogiri/xml_document.h +23 -0
  33. data/ext/nokogiri/xml_document_fragment.c +48 -0
  34. data/ext/nokogiri/xml_document_fragment.h +10 -0
  35. data/ext/nokogiri/xml_dtd.c +202 -0
  36. data/ext/nokogiri/xml_dtd.h +10 -0
  37. data/ext/nokogiri/xml_element_content.c +123 -0
  38. data/ext/nokogiri/xml_element_content.h +10 -0
  39. data/ext/nokogiri/xml_element_decl.c +69 -0
  40. data/ext/nokogiri/xml_element_decl.h +9 -0
  41. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  42. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  43. data/ext/nokogiri/xml_entity_decl.c +110 -0
  44. data/ext/nokogiri/xml_entity_decl.h +10 -0
  45. data/ext/nokogiri/xml_entity_reference.c +52 -0
  46. data/ext/nokogiri/xml_entity_reference.h +9 -0
  47. data/ext/nokogiri/xml_io.c +56 -0
  48. data/ext/nokogiri/xml_io.h +11 -0
  49. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  50. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  51. data/ext/nokogiri/xml_namespace.c +84 -0
  52. data/ext/nokogiri/xml_namespace.h +13 -0
  53. data/ext/nokogiri/xml_node.c +1397 -0
  54. data/ext/nokogiri/xml_node.h +13 -0
  55. data/ext/nokogiri/xml_node_set.c +418 -0
  56. data/ext/nokogiri/xml_node_set.h +9 -0
  57. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  58. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  59. data/ext/nokogiri/xml_reader.c +684 -0
  60. data/ext/nokogiri/xml_reader.h +10 -0
  61. data/ext/nokogiri/xml_relax_ng.c +162 -0
  62. data/ext/nokogiri/xml_relax_ng.h +9 -0
  63. data/ext/nokogiri/xml_sax_parser.c +293 -0
  64. data/ext/nokogiri/xml_sax_parser.h +39 -0
  65. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  66. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  67. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  68. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  69. data/ext/nokogiri/xml_schema.c +205 -0
  70. data/ext/nokogiri/xml_schema.h +9 -0
  71. data/ext/nokogiri/xml_syntax_error.c +58 -0
  72. data/ext/nokogiri/xml_syntax_error.h +13 -0
  73. data/ext/nokogiri/xml_text.c +50 -0
  74. data/ext/nokogiri/xml_text.h +9 -0
  75. data/ext/nokogiri/xml_xpath_context.c +315 -0
  76. data/ext/nokogiri/xml_xpath_context.h +9 -0
  77. data/ext/nokogiri/xslt_stylesheet.c +265 -0
  78. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  79. data/lib/nokogiri.rb +127 -0
  80. data/lib/nokogiri/css.rb +27 -0
  81. data/lib/nokogiri/css/node.rb +99 -0
  82. data/lib/nokogiri/css/parser.rb +677 -0
  83. data/lib/nokogiri/css/parser.y +237 -0
  84. data/lib/nokogiri/css/parser_extras.rb +91 -0
  85. data/lib/nokogiri/css/syntax_error.rb +7 -0
  86. data/lib/nokogiri/css/tokenizer.rb +152 -0
  87. data/lib/nokogiri/css/tokenizer.rex +55 -0
  88. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  89. data/lib/nokogiri/decorators/slop.rb +35 -0
  90. data/lib/nokogiri/html.rb +36 -0
  91. data/lib/nokogiri/html/builder.rb +35 -0
  92. data/lib/nokogiri/html/document.rb +213 -0
  93. data/lib/nokogiri/html/document_fragment.rb +41 -0
  94. data/lib/nokogiri/html/element_description.rb +23 -0
  95. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  96. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  97. data/lib/nokogiri/html/sax/parser.rb +52 -0
  98. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  99. data/lib/nokogiri/syntax_error.rb +4 -0
  100. data/lib/nokogiri/version.rb +88 -0
  101. data/lib/nokogiri/xml.rb +67 -0
  102. data/lib/nokogiri/xml/attr.rb +14 -0
  103. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  104. data/lib/nokogiri/xml/builder.rb +426 -0
  105. data/lib/nokogiri/xml/cdata.rb +11 -0
  106. data/lib/nokogiri/xml/character_data.rb +7 -0
  107. data/lib/nokogiri/xml/document.rb +234 -0
  108. data/lib/nokogiri/xml/document_fragment.rb +98 -0
  109. data/lib/nokogiri/xml/dtd.rb +22 -0
  110. data/lib/nokogiri/xml/element_content.rb +36 -0
  111. data/lib/nokogiri/xml/element_decl.rb +13 -0
  112. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  113. data/lib/nokogiri/xml/namespace.rb +13 -0
  114. data/lib/nokogiri/xml/node.rb +915 -0
  115. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  116. data/lib/nokogiri/xml/node_set.rb +357 -0
  117. data/lib/nokogiri/xml/notation.rb +6 -0
  118. data/lib/nokogiri/xml/parse_options.rb +93 -0
  119. data/lib/nokogiri/xml/pp.rb +2 -0
  120. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  121. data/lib/nokogiri/xml/pp/node.rb +56 -0
  122. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  123. data/lib/nokogiri/xml/reader.rb +112 -0
  124. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  125. data/lib/nokogiri/xml/sax.rb +4 -0
  126. data/lib/nokogiri/xml/sax/document.rb +164 -0
  127. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  128. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  129. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  130. data/lib/nokogiri/xml/schema.rb +63 -0
  131. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  132. data/lib/nokogiri/xml/text.rb +9 -0
  133. data/lib/nokogiri/xml/xpath.rb +10 -0
  134. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  135. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  136. data/lib/nokogiri/xslt.rb +52 -0
  137. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  138. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  139. data/nokogiri_help_responses.md +40 -0
  140. data/tasks/cross_compile.rb +152 -0
  141. data/tasks/nokogiri.org.rb +18 -0
  142. data/tasks/test.rb +94 -0
  143. data/test/css/test_nthiness.rb +159 -0
  144. data/test/css/test_parser.rb +303 -0
  145. data/test/css/test_tokenizer.rb +198 -0
  146. data/test/css/test_xpath_visitor.rb +85 -0
  147. data/test/decorators/test_slop.rb +16 -0
  148. data/test/files/2ch.html +108 -0
  149. data/test/files/address_book.rlx +12 -0
  150. data/test/files/address_book.xml +10 -0
  151. data/test/files/bar/bar.xsd +4 -0
  152. data/test/files/dont_hurt_em_why.xml +422 -0
  153. data/test/files/encoding.html +82 -0
  154. data/test/files/encoding.xhtml +84 -0
  155. data/test/files/exslt.xml +8 -0
  156. data/test/files/exslt.xslt +35 -0
  157. data/test/files/foo/foo.xsd +4 -0
  158. data/test/files/metacharset.html +10 -0
  159. data/test/files/noencoding.html +47 -0
  160. data/test/files/po.xml +32 -0
  161. data/test/files/po.xsd +66 -0
  162. data/test/files/shift_jis.html +10 -0
  163. data/test/files/shift_jis.xml +5 -0
  164. data/test/files/snuggles.xml +3 -0
  165. data/test/files/staff.dtd +10 -0
  166. data/test/files/staff.xml +59 -0
  167. data/test/files/staff.xslt +32 -0
  168. data/test/files/tlm.html +850 -0
  169. data/test/files/valid_bar.xml +2 -0
  170. data/test/helper.rb +173 -0
  171. data/test/html/sax/test_parser.rb +139 -0
  172. data/test/html/sax/test_parser_context.rb +48 -0
  173. data/test/html/test_builder.rb +165 -0
  174. data/test/html/test_document.rb +472 -0
  175. data/test/html/test_document_encoding.rb +138 -0
  176. data/test/html/test_document_fragment.rb +255 -0
  177. data/test/html/test_element_description.rb +101 -0
  178. data/test/html/test_named_characters.rb +14 -0
  179. data/test/html/test_node.rb +193 -0
  180. data/test/html/test_node_encoding.rb +27 -0
  181. data/test/test_convert_xpath.rb +135 -0
  182. data/test/test_css_cache.rb +45 -0
  183. data/test/test_encoding_handler.rb +46 -0
  184. data/test/test_memory_leak.rb +72 -0
  185. data/test/test_nokogiri.rb +133 -0
  186. data/test/test_reader.rb +425 -0
  187. data/test/test_soap4r_sax.rb +52 -0
  188. data/test/test_xslt_transforms.rb +193 -0
  189. data/test/xml/node/test_save_options.rb +28 -0
  190. data/test/xml/node/test_subclass.rb +44 -0
  191. data/test/xml/sax/test_parser.rb +338 -0
  192. data/test/xml/sax/test_parser_context.rb +113 -0
  193. data/test/xml/sax/test_push_parser.rb +156 -0
  194. data/test/xml/test_attr.rb +65 -0
  195. data/test/xml/test_attribute_decl.rb +86 -0
  196. data/test/xml/test_builder.rb +227 -0
  197. data/test/xml/test_cdata.rb +50 -0
  198. data/test/xml/test_comment.rb +29 -0
  199. data/test/xml/test_document.rb +697 -0
  200. data/test/xml/test_document_encoding.rb +26 -0
  201. data/test/xml/test_document_fragment.rb +192 -0
  202. data/test/xml/test_dtd.rb +107 -0
  203. data/test/xml/test_dtd_encoding.rb +33 -0
  204. data/test/xml/test_element_content.rb +56 -0
  205. data/test/xml/test_element_decl.rb +73 -0
  206. data/test/xml/test_entity_decl.rb +122 -0
  207. data/test/xml/test_entity_reference.rb +21 -0
  208. data/test/xml/test_namespace.rb +70 -0
  209. data/test/xml/test_node.rb +917 -0
  210. data/test/xml/test_node_attributes.rb +34 -0
  211. data/test/xml/test_node_encoding.rb +107 -0
  212. data/test/xml/test_node_reparenting.rb +334 -0
  213. data/test/xml/test_node_set.rb +742 -0
  214. data/test/xml/test_parse_options.rb +52 -0
  215. data/test/xml/test_processing_instruction.rb +30 -0
  216. data/test/xml/test_reader_encoding.rb +126 -0
  217. data/test/xml/test_relax_ng.rb +60 -0
  218. data/test/xml/test_schema.rb +94 -0
  219. data/test/xml/test_syntax_error.rb +12 -0
  220. data/test/xml/test_text.rb +47 -0
  221. data/test/xml/test_unparented_node.rb +381 -0
  222. data/test/xml/test_xpath.rb +237 -0
  223. data/test/xslt/test_custom_functions.rb +94 -0
  224. data/test/xslt/test_exception_handling.rb +37 -0
  225. metadata +548 -0
@@ -0,0 +1,237 @@
1
+ class Nokogiri::CSS::Parser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
6
+
7
+ rule
8
+ selector
9
+ : selector COMMA simple_selector_1toN {
10
+ result = [val.first, val.last].flatten
11
+ }
12
+ | simple_selector_1toN { result = val.flatten }
13
+ ;
14
+ combinator
15
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
16
+ | GREATER { result = :CHILD_SELECTOR }
17
+ | TILDE { result = :PRECEDING_SELECTOR }
18
+ | S { result = :DESCENDANT_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
21
+ ;
22
+ simple_selector
23
+ : element_name hcap_0toN {
24
+ result = if val[1].nil?
25
+ val.first
26
+ else
27
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
+ end
29
+ }
30
+ | element_name hcap_1toN negation {
31
+ result = Node.new(:CONDITIONAL_SELECTOR,
32
+ [
33
+ val.first,
34
+ Node.new(:COMBINATOR, [val[1], val.last])
35
+ ]
36
+ )
37
+ }
38
+ | element_name negation {
39
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
40
+ }
41
+ | function
42
+ | function pseudo {
43
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
44
+ }
45
+ | function attrib {
46
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
47
+ }
48
+ | hcap_1toN negation {
49
+ result = Node.new(:CONDITIONAL_SELECTOR,
50
+ [
51
+ Node.new(:ELEMENT_NAME, ['*']),
52
+ Node.new(:COMBINATOR, val)
53
+ ]
54
+ )
55
+ }
56
+ | hcap_1toN {
57
+ result = Node.new(:CONDITIONAL_SELECTOR,
58
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
59
+ )
60
+ }
61
+ ;
62
+ simple_selector_1toN
63
+ : simple_selector combinator simple_selector_1toN {
64
+ result = Node.new(val[1], [val.first, val.last])
65
+ }
66
+ | simple_selector
67
+ ;
68
+ class
69
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
70
+ ;
71
+ element_name
72
+ : namespace '|' IDENT {
73
+ result = Node.new(:ELEMENT_NAME,
74
+ [[val.first, val.last].compact.join(':')]
75
+ )
76
+ }
77
+ | IDENT {
78
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
79
+ result = Node.new(:ELEMENT_NAME, [name])
80
+ }
81
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
82
+ ;
83
+ namespace
84
+ : IDENT { result = val[0] }
85
+ |
86
+ ;
87
+ attrib
88
+ : LSQUARE IDENT attrib_val_0or1 RSQUARE {
89
+ result = Node.new(:ATTRIBUTE_CONDITION,
90
+ [Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
91
+ )
92
+ }
93
+ | LSQUARE function attrib_val_0or1 RSQUARE {
94
+ result = Node.new(:ATTRIBUTE_CONDITION,
95
+ [val[1]] + (val[2] || [])
96
+ )
97
+ }
98
+ | LSQUARE NUMBER RSQUARE {
99
+ # Non standard, but hpricot supports it.
100
+ result = Node.new(:PSEUDO_CLASS,
101
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
102
+ )
103
+ }
104
+ ;
105
+ function
106
+ : FUNCTION RPAREN {
107
+ result = Node.new(:FUNCTION, [val.first.strip])
108
+ }
109
+ | FUNCTION expr RPAREN {
110
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
111
+ }
112
+ | FUNCTION an_plus_b RPAREN {
113
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
114
+ }
115
+ | NOT expr RPAREN {
116
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
117
+ }
118
+ | HAS selector RPAREN {
119
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
120
+ }
121
+ ;
122
+ expr
123
+ : NUMBER COMMA expr { result = [val.first, val.last] }
124
+ | STRING COMMA expr { result = [val.first, val.last] }
125
+ | IDENT COMMA expr { result = [val.first, val.last] }
126
+ | NUMBER
127
+ | STRING
128
+ | IDENT # even, odd
129
+ {
130
+ if val[0] == 'even'
131
+ val = ["2","n","+","0"]
132
+ result = Node.new(:AN_PLUS_B, val)
133
+ elsif val[0] == 'odd'
134
+ val = ["2","n","+","1"]
135
+ result = Node.new(:AN_PLUS_B, val)
136
+ else
137
+ # This is not CSS standard. It allows us to support this:
138
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
139
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
140
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
141
+ result = val
142
+ end
143
+ }
144
+ ;
145
+ an_plus_b
146
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
147
+ {
148
+ if val[1] == 'n'
149
+ result = Node.new(:AN_PLUS_B, val)
150
+ else
151
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
152
+ end
153
+ }
154
+ | IDENT PLUS NUMBER { # n+3, -n+3
155
+ if val[0] == 'n'
156
+ val.unshift("1")
157
+ result = Node.new(:AN_PLUS_B, val)
158
+ elsif val[0] == '-n'
159
+ val[0] = 'n'
160
+ val.unshift("-1")
161
+ result = Node.new(:AN_PLUS_B, val)
162
+ else
163
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
164
+ end
165
+ }
166
+ | NUMBER IDENT # 5n, -5n
167
+ {
168
+ if val[1] == 'n'
169
+ val << "+"
170
+ val << "0"
171
+ result = Node.new(:AN_PLUS_B, val)
172
+ else
173
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
174
+ end
175
+ }
176
+ ;
177
+ pseudo
178
+ : ':' function {
179
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
180
+ }
181
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
182
+ ;
183
+ hcap_0toN
184
+ : hcap_1toN
185
+ |
186
+ ;
187
+ hcap_1toN
188
+ : attribute_id hcap_1toN {
189
+ result = Node.new(:COMBINATOR, val)
190
+ }
191
+ | class hcap_1toN {
192
+ result = Node.new(:COMBINATOR, val)
193
+ }
194
+ | attrib hcap_1toN {
195
+ result = Node.new(:COMBINATOR, val)
196
+ }
197
+ | pseudo hcap_1toN {
198
+ result = Node.new(:COMBINATOR, val)
199
+ }
200
+ | attribute_id
201
+ | class
202
+ | attrib
203
+ | pseudo
204
+ ;
205
+ attribute_id
206
+ : HASH { result = Node.new(:ID, val) }
207
+ ;
208
+ attrib_val_0or1
209
+ : eql_incl_dash IDENT { result = [val.first, val[1]] }
210
+ | eql_incl_dash STRING { result = [val.first, val[1]] }
211
+ |
212
+ ;
213
+ eql_incl_dash
214
+ : EQUAL { result = :equal }
215
+ | PREFIXMATCH { result = :prefix_match }
216
+ | SUFFIXMATCH { result = :suffix_match }
217
+ | SUBSTRINGMATCH { result = :substring_match }
218
+ | NOT_EQUAL { result = :not_equal }
219
+ | INCLUDES { result = :includes }
220
+ | DASHMATCH { result = :dash_match }
221
+ ;
222
+ negation
223
+ : NOT negation_arg RPAREN {
224
+ result = Node.new(:NOT, [val[1]])
225
+ }
226
+ ;
227
+ negation_arg
228
+ : element_name
229
+ | element_name hcap_1toN
230
+ | hcap_1toN
231
+ ;
232
+ end
233
+
234
+ ---- header
235
+
236
+ require 'nokogiri/css/parser_extras'
237
+
@@ -0,0 +1,91 @@
1
+ require 'thread'
2
+
3
+ module Nokogiri
4
+ module CSS
5
+ class Parser < Racc::Parser
6
+ @cache_on = true
7
+ @cache = {}
8
+ @mutex = Mutex.new
9
+
10
+ class << self
11
+ # Turn on CSS parse caching
12
+ attr_accessor :cache_on
13
+ alias :cache_on? :cache_on
14
+ alias :set_cache :cache_on=
15
+
16
+ # Get the css selector in +string+ from the cache
17
+ def [] string
18
+ return unless @cache_on
19
+ @mutex.synchronize { @cache[string] }
20
+ end
21
+
22
+ # Set the css selector in +string+ in the cache to +value+
23
+ def []= string, value
24
+ return value unless @cache_on
25
+ @mutex.synchronize { @cache[string] = value }
26
+ end
27
+
28
+ # Clear the cache
29
+ def clear_cache
30
+ @mutex.synchronize { @cache = {} }
31
+ end
32
+
33
+ # Execute +block+ without cache
34
+ def without_cache &block
35
+ tmp = @cache_on
36
+ @cache_on = false
37
+ block.call
38
+ @cache_on = tmp
39
+ end
40
+
41
+ ###
42
+ # Parse this CSS selector in +selector+. Returns an AST.
43
+ def parse selector
44
+ @warned ||= false
45
+ unless @warned
46
+ $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
47
+ @warned = true
48
+ end
49
+ new.parse selector
50
+ end
51
+ end
52
+
53
+ # Create a new CSS parser with respect to +namespaces+
54
+ def initialize namespaces = {}
55
+ @tokenizer = Tokenizer.new
56
+ @namespaces = namespaces
57
+ super()
58
+ end
59
+
60
+ def parse string
61
+ @tokenizer.scan_setup string
62
+ do_parse
63
+ end
64
+
65
+ def next_token
66
+ @tokenizer.next_token
67
+ end
68
+
69
+ # Get the xpath for +string+ using +options+
70
+ def xpath_for string, options={}
71
+ key = "#{string}#{options[:ns]}#{options[:prefix]}"
72
+ v = self.class[key]
73
+ return v if v
74
+
75
+ args = [
76
+ options[:prefix] || '//',
77
+ options[:visitor] || XPathVisitor.new
78
+ ]
79
+ self.class[key] = parse(string).map { |ast|
80
+ ast.to_xpath(*args)
81
+ }
82
+ end
83
+
84
+ # On CSS parser error, raise an exception
85
+ def on_error error_token_id, error_value, value_stack
86
+ after = value_stack.compact.last
87
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,7 @@
1
+ require 'nokogiri/syntax_error'
2
+ module Nokogiri
3
+ module CSS
4
+ class SyntaxError < ::Nokogiri::SyntaxError
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,152 @@
1
+ #--
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by rex 1.0.5
4
+ # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
+ #++
6
+
7
+ module Nokogiri
8
+ module CSS
9
+ class Tokenizer
10
+ require 'strscan'
11
+
12
+ class ScanError < StandardError ; end
13
+
14
+ attr_reader :lineno
15
+ attr_reader :filename
16
+ attr_accessor :state
17
+
18
+ def scan_setup(str)
19
+ @ss = StringScanner.new(str)
20
+ @lineno = 1
21
+ @state = nil
22
+ end
23
+
24
+ def action
25
+ yield
26
+ end
27
+
28
+ def scan_str(str)
29
+ scan_setup(str)
30
+ do_parse
31
+ end
32
+ alias :scan :scan_str
33
+
34
+ def load_file( filename )
35
+ @filename = filename
36
+ open(filename, "r") do |f|
37
+ scan_setup(f.read)
38
+ end
39
+ end
40
+
41
+ def scan_file( filename )
42
+ load_file(filename)
43
+ do_parse
44
+ end
45
+
46
+
47
+ def next_token
48
+ return if @ss.eos?
49
+
50
+ # skips empty actions
51
+ until token = _next_token or @ss.eos?; end
52
+ token
53
+ end
54
+
55
+ def _next_token
56
+ text = @ss.peek(1)
57
+ @lineno += 1 if text == "\n"
58
+ token = case @state
59
+ when nil
60
+ case
61
+ when (text = @ss.scan(/has\([\s]*/))
62
+ action { [:HAS, text] }
63
+
64
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
65
+ action { [:FUNCTION, text] }
66
+
67
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
68
+ action { [:IDENT, text] }
69
+
70
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
71
+ action { [:HASH, text] }
72
+
73
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
74
+ action { [:INCLUDES, text] }
75
+
76
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
77
+ action { [:DASHMATCH, text] }
78
+
79
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
80
+ action { [:PREFIXMATCH, text] }
81
+
82
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
83
+ action { [:SUFFIXMATCH, text] }
84
+
85
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
86
+ action { [:SUBSTRINGMATCH, text] }
87
+
88
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
89
+ action { [:NOT_EQUAL, text] }
90
+
91
+ when (text = @ss.scan(/[\s]*=[\s]*/))
92
+ action { [:EQUAL, text] }
93
+
94
+ when (text = @ss.scan(/[\s]*\)/))
95
+ action { [:RPAREN, text] }
96
+
97
+ when (text = @ss.scan(/[\s]*\[[\s]*/))
98
+ action { [:LSQUARE, text] }
99
+
100
+ when (text = @ss.scan(/[\s]*\]/))
101
+ action { [:RSQUARE, text] }
102
+
103
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
104
+ action { [:PLUS, text] }
105
+
106
+ when (text = @ss.scan(/[\s]*>[\s]*/))
107
+ action { [:GREATER, text] }
108
+
109
+ when (text = @ss.scan(/[\s]*,[\s]*/))
110
+ action { [:COMMA, text] }
111
+
112
+ when (text = @ss.scan(/[\s]*~[\s]*/))
113
+ action { [:TILDE, text] }
114
+
115
+ when (text = @ss.scan(/\:not\([\s]*/))
116
+ action { [:NOT, text] }
117
+
118
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
119
+ action { [:NUMBER, text] }
120
+
121
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
122
+ action { [:DOUBLESLASH, text] }
123
+
124
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
125
+ action { [:SLASH, text] }
126
+
127
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
128
+ action {[:UNICODE_RANGE, text] }
129
+
130
+ when (text = @ss.scan(/[\s]+/))
131
+ action { [:S, text] }
132
+
133
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*'/))
134
+ action { [:STRING, text] }
135
+
136
+ when (text = @ss.scan(/./))
137
+ action { [text, text] }
138
+
139
+ else
140
+ text = @ss.string[@ss.pos .. -1]
141
+ raise ScanError, "can not match: '" + text + "'"
142
+ end # if
143
+
144
+ else
145
+ raise ScanError, "undefined state: '" + state.to_s + "'"
146
+ end # case state
147
+ token
148
+ end # def _next_token
149
+
150
+ end # class
151
+ end
152
+ end