cocoa-xml 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,143 @@
1
+ #--
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by rex 1.0.4
4
+ # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
+ #++
6
+
7
+ module Nokogiri
8
+ module CSS
9
+ # @private
10
+ class GeneratedTokenizer < GeneratedParser
11
+ require 'strscan'
12
+
13
+ class ScanError < StandardError ; end
14
+
15
+ attr_reader :lineno
16
+ attr_reader :filename
17
+ attr_accessor :state
18
+
19
+ def scan_setup(str)
20
+ @ss = StringScanner.new(str)
21
+ @lineno = 1
22
+ @state = nil
23
+ end
24
+
25
+ def action(&block)
26
+ yield
27
+ end
28
+
29
+ def scan_str(str)
30
+ scan_setup(str)
31
+ do_parse
32
+ end
33
+
34
+ def load_file( filename )
35
+ @filename = filename
36
+ open(filename, "r") do |f|
37
+ scan_setup(f.read)
38
+ end
39
+ end
40
+
41
+ def scan_file( filename )
42
+ load_file(filename)
43
+ do_parse
44
+ end
45
+
46
+
47
+ def next_token
48
+ return if @ss.eos?
49
+
50
+ text = @ss.peek(1)
51
+ @lineno += 1 if text == "\n"
52
+ token = case @state
53
+ when nil
54
+ case
55
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*\(\s*/))
56
+ action { [:FUNCTION, text] }
57
+
58
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*/))
59
+ action { [:IDENT, text] }
60
+
61
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])+/))
62
+ action { [:HASH, text] }
63
+
64
+ when (text = @ss.scan(/[\s\r\n\f]*~=[\s\r\n\f]*/))
65
+ action { [:INCLUDES, text] }
66
+
67
+ when (text = @ss.scan(/[\s\r\n\f]*\|=[\s\r\n\f]*/))
68
+ action { [:DASHMATCH, text] }
69
+
70
+ when (text = @ss.scan(/[\s\r\n\f]*\^=[\s\r\n\f]*/))
71
+ action { [:PREFIXMATCH, text] }
72
+
73
+ when (text = @ss.scan(/[\s\r\n\f]*\$=[\s\r\n\f]*/))
74
+ action { [:SUFFIXMATCH, text] }
75
+
76
+ when (text = @ss.scan(/[\s\r\n\f]*\*=[\s\r\n\f]*/))
77
+ action { [:SUBSTRINGMATCH, text] }
78
+
79
+ when (text = @ss.scan(/[\s\r\n\f]*!=[\s\r\n\f]*/))
80
+ action { [:NOT_EQUAL, text] }
81
+
82
+ when (text = @ss.scan(/[\s\r\n\f]*=[\s\r\n\f]*/))
83
+ action { [:EQUAL, text] }
84
+
85
+ when (text = @ss.scan(/[\s\r\n\f]*\)/))
86
+ action { [:RPAREN, text] }
87
+
88
+ when (text = @ss.scan(/[\s\r\n\f]*\[[\s\r\n\f]*/))
89
+ action { [:LSQUARE, text] }
90
+
91
+ when (text = @ss.scan(/[\s\r\n\f]*\]/))
92
+ action { [:RSQUARE, text] }
93
+
94
+ when (text = @ss.scan(/[\s\r\n\f]*\+[\s\r\n\f]*/))
95
+ action { [:PLUS, text] }
96
+
97
+ when (text = @ss.scan(/[\s\r\n\f]*>[\s\r\n\f]*/))
98
+ action { [:GREATER, text] }
99
+
100
+ when (text = @ss.scan(/[\s\r\n\f]*,[\s\r\n\f]*/))
101
+ action { [:COMMA, text] }
102
+
103
+ when (text = @ss.scan(/[\s\r\n\f]*~[\s\r\n\f]*/))
104
+ action { [:TILDE, text] }
105
+
106
+ when (text = @ss.scan(/\:not\([\s\r\n\f]*/))
107
+ action { [:NOT, text] }
108
+
109
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
110
+ action { [:NUMBER, text] }
111
+
112
+ when (text = @ss.scan(/[\s\r\n\f]*\/\/[\s\r\n\f]*/))
113
+ action { [:DOUBLESLASH, text] }
114
+
115
+ when (text = @ss.scan(/[\s\r\n\f]*\/[\s\r\n\f]*/))
116
+ action { [:SLASH, text] }
117
+
118
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
119
+ action {[:UNICODE_RANGE, text] }
120
+
121
+ when (text = @ss.scan(/[\s\t\r\n\f]+/))
122
+ action { [:S, text] }
123
+
124
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*'/))
125
+ action { [:STRING, text] }
126
+
127
+ when (text = @ss.scan(/./))
128
+ action { [text, text] }
129
+
130
+ else
131
+ text = @ss.string[@ss.pos .. -1]
132
+ raise ScanError, "can not match: '" + text + "'"
133
+ end # if
134
+
135
+ else
136
+ raise ScanError, "undefined state: '" + state.to_s + "'"
137
+ end # case state
138
+ token
139
+ end # def next_token
140
+
141
+ end # class
142
+ end
143
+ end
@@ -0,0 +1,100 @@
1
+ module Nokogiri
2
+ module CSS
3
+ # @private
4
+ class Node
5
+ # Get the type of this node
6
+ attr_accessor :type
7
+ # Get the value of this node
8
+ attr_accessor :value
9
+
10
+ # Create a new Node with +type+ and +value+
11
+ def initialize type, value
12
+ @type = type
13
+ @value = value
14
+ end
15
+
16
+ # Accept +visitor+
17
+ def accept visitor
18
+ visitor.send(:"visit_#{type.to_s.downcase}", self)
19
+ end
20
+
21
+ ###
22
+ # Convert this CSS node to xpath with +prefix+ using +visitor+
23
+ def to_xpath prefix = '//', visitor = XPathVisitor.new
24
+ self.preprocess!
25
+ prefix + visitor.accept(self)
26
+ end
27
+
28
+ # Preprocess this node tree
29
+ def preprocess!
30
+ ### Deal with nth-child
31
+ matches = find_by_type(
32
+ [:CONDITIONAL_SELECTOR,
33
+ [:ELEMENT_NAME],
34
+ [:PSEUDO_CLASS,
35
+ [:FUNCTION]
36
+ ]
37
+ ]
38
+ )
39
+ matches.each do |match|
40
+ if match.value[1].value[0].value[0] =~ /^nth-(last-)?child/
41
+ tag_name = match.value[0].value.first
42
+ match.value[0].value = ['*']
43
+ match.value[1] = Node.new(:COMBINATOR, [
44
+ match.value[1].value[0],
45
+ Node.new(:FUNCTION, ['self(', tag_name])
46
+ ])
47
+ end
48
+ end
49
+
50
+ ### Deal with first-child, last-child
51
+ matches = find_by_type(
52
+ [:CONDITIONAL_SELECTOR,
53
+ [:ELEMENT_NAME], [:PSEUDO_CLASS]
54
+ ])
55
+ matches.each do |match|
56
+ if ['first-child', 'last-child'].include?(match.value[1].value.first)
57
+ which = match.value[1].value.first.gsub(/-\w*$/, '')
58
+ tag_name = match.value[0].value.first
59
+ match.value[0].value = ['*']
60
+ match.value[1] = Node.new(:COMBINATOR, [
61
+ Node.new(:FUNCTION, ["#{which}("]),
62
+ Node.new(:FUNCTION, ['self(', tag_name])
63
+ ])
64
+ elsif 'only-child' == match.value[1].value.first
65
+ tag_name = match.value[0].value.first
66
+ match.value[0].value = ['*']
67
+ match.value[1] = Node.new(:COMBINATOR, [
68
+ Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
69
+ Node.new(:FUNCTION, ['self(', tag_name])
70
+ ])
71
+ end
72
+ end
73
+
74
+ self
75
+ end
76
+
77
+ # Find a node by type using +types+
78
+ def find_by_type types
79
+ matches = []
80
+ matches << self if to_type == types
81
+ @value.each do |v|
82
+ matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
83
+ end
84
+ matches
85
+ end
86
+
87
+ # Convert to_type
88
+ def to_type
89
+ [@type] + @value.map { |n|
90
+ n.to_type if n.respond_to?(:to_type)
91
+ }.compact
92
+ end
93
+
94
+ # Convert to array
95
+ def to_a
96
+ [@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,83 @@
1
+ require 'thread'
2
+
3
+ module Nokogiri
4
+ module CSS
5
+ # @private
6
+ class Parser < GeneratedTokenizer
7
+ @cache_on = true
8
+ @cache = {}
9
+ @mutex = Mutex.new
10
+
11
+ class << self
12
+ # Turn on CSS parse caching
13
+ attr_accessor :cache_on
14
+ alias :cache_on? :cache_on
15
+ alias :set_cache :cache_on=
16
+
17
+ # Get the css selector in +string+ from the cache
18
+ def [] string
19
+ return unless @cache_on
20
+ @mutex.synchronize { @cache[string] }
21
+ end
22
+
23
+ # Set the css selector in +string+ in the cache to +value+
24
+ def []= string, value
25
+ return value unless @cache_on
26
+ @mutex.synchronize { @cache[string] = value }
27
+ end
28
+
29
+ # Clear the cache
30
+ def clear_cache
31
+ @mutex.synchronize { @cache = {} }
32
+ end
33
+
34
+ # Execute +block+ without cache
35
+ def without_cache &block
36
+ tmp = @cache_on
37
+ @cache_on = false
38
+ block.call
39
+ @cache_on = tmp
40
+ end
41
+
42
+ ###
43
+ # Parse this CSS selector in +selector+. Returns an AST.
44
+ def parse selector
45
+ @warned ||= false
46
+ unless @warned
47
+ $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
48
+ @warned = true
49
+ end
50
+ new.parse selector
51
+ end
52
+ end
53
+
54
+ # Create a new CSS parser with respect to +namespaces+
55
+ def initialize namespaces = {}
56
+ @namespaces = namespaces
57
+ super()
58
+ end
59
+ alias :parse :scan_str
60
+
61
+ # Get the xpath for +string+ using +options+
62
+ def xpath_for string, options={}
63
+ key = "#{string}#{options[:ns]}#{options[:prefix]}"
64
+ v = self.class[key]
65
+ return v if v
66
+
67
+ args = [
68
+ options[:prefix] || '//',
69
+ options[:visitor] || XPathVisitor.new
70
+ ]
71
+ self.class[key] = parse(string).map { |ast|
72
+ ast.to_xpath(*args)
73
+ }
74
+ end
75
+
76
+ # On CSS parser error, raise an exception
77
+ def on_error error_token_id, error_value, value_stack
78
+ after = value_stack.compact.last
79
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,230 @@
1
+ class Nokogiri::CSS::GeneratedParser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
6
+
7
+ rule
8
+ selector
9
+ : selector COMMA simple_selector_1toN {
10
+ result = [val.first, val.last].flatten
11
+ }
12
+ | simple_selector_1toN { result = val.flatten }
13
+ ;
14
+ combinator
15
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
16
+ | GREATER { result = :CHILD_SELECTOR }
17
+ | TILDE { result = :PRECEDING_SELECTOR }
18
+ | S { result = :DESCENDANT_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
21
+ ;
22
+ simple_selector
23
+ : element_name hcap_0toN {
24
+ result = if val[1].nil?
25
+ val.first
26
+ else
27
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
+ end
29
+ }
30
+ | element_name hcap_1toN negation {
31
+ result = Node.new(:CONDITIONAL_SELECTOR,
32
+ [
33
+ val.first,
34
+ Node.new(:COMBINATOR, [val[1], val.last])
35
+ ]
36
+ )
37
+ }
38
+ | element_name negation {
39
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
40
+ }
41
+ | function
42
+ | function attrib {
43
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
44
+ }
45
+ | hcap_1toN negation {
46
+ result = Node.new(:CONDITIONAL_SELECTOR,
47
+ [
48
+ Node.new(:ELEMENT_NAME, ['*']),
49
+ Node.new(:COMBINATOR, val)
50
+ ]
51
+ )
52
+ }
53
+ | hcap_1toN {
54
+ result = Node.new(:CONDITIONAL_SELECTOR,
55
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
56
+ )
57
+ }
58
+ ;
59
+ simple_selector_1toN
60
+ : simple_selector combinator simple_selector_1toN {
61
+ result = Node.new(val[1], [val.first, val.last])
62
+ }
63
+ | simple_selector
64
+ ;
65
+ class
66
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
67
+ ;
68
+ element_name
69
+ : namespace '|' IDENT {
70
+ result = Node.new(:ELEMENT_NAME,
71
+ [[val.first, val.last].compact.join(':')]
72
+ )
73
+ }
74
+ | IDENT {
75
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
76
+ result = Node.new(:ELEMENT_NAME, [name])
77
+ }
78
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
79
+ ;
80
+ namespace
81
+ : IDENT { result = val[0] }
82
+ |
83
+ ;
84
+ attrib
85
+ : LSQUARE IDENT attrib_val_0or1 RSQUARE {
86
+ result = Node.new(:ATTRIBUTE_CONDITION,
87
+ [Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
88
+ )
89
+ }
90
+ | LSQUARE function attrib_val_0or1 RSQUARE {
91
+ result = Node.new(:ATTRIBUTE_CONDITION,
92
+ [val[1]] + (val[2] || [])
93
+ )
94
+ }
95
+ | LSQUARE NUMBER RSQUARE {
96
+ # Non standard, but hpricot supports it.
97
+ result = Node.new(:PSEUDO_CLASS,
98
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
99
+ )
100
+ }
101
+ ;
102
+ function
103
+ : FUNCTION RPAREN {
104
+ result = Node.new(:FUNCTION, [val.first.strip])
105
+ }
106
+ | FUNCTION expr RPAREN {
107
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
108
+ }
109
+ | FUNCTION an_plus_b RPAREN {
110
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
111
+ }
112
+ | NOT expr RPAREN {
113
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
114
+ }
115
+ | HAS selector RPAREN {
116
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
117
+ }
118
+ ;
119
+ expr
120
+ : NUMBER COMMA expr { result = [val.first, val.last] }
121
+ | STRING COMMA expr { result = [val.first, val.last] }
122
+ | IDENT COMMA expr { result = [val.first, val.last] }
123
+ | NUMBER
124
+ | STRING
125
+ | IDENT # even, odd
126
+ {
127
+ if val[0] == 'even'
128
+ val = ["2","n","+","0"]
129
+ result = Node.new(:AN_PLUS_B, val)
130
+ elsif val[0] == 'odd'
131
+ val = ["2","n","+","1"]
132
+ result = Node.new(:AN_PLUS_B, val)
133
+ else
134
+ # This is not CSS standard. It allows us to support this:
135
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
136
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
137
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
138
+ result = val
139
+ end
140
+ }
141
+ ;
142
+ an_plus_b
143
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
144
+ {
145
+ if val[1] == 'n'
146
+ result = Node.new(:AN_PLUS_B, val)
147
+ else
148
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
149
+ end
150
+ }
151
+ | IDENT PLUS NUMBER { # n+3, -n+3
152
+ if val[0] == 'n'
153
+ val.unshift("1")
154
+ result = Node.new(:AN_PLUS_B, val)
155
+ elsif val[0] == '-n'
156
+ val[0] = 'n'
157
+ val.unshift("-1")
158
+ result = Node.new(:AN_PLUS_B, val)
159
+ else
160
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
161
+ end
162
+ }
163
+ | NUMBER IDENT # 5n, -5n
164
+ {
165
+ if val[1] == 'n'
166
+ val << "+"
167
+ val << "0"
168
+ result = Node.new(:AN_PLUS_B, val)
169
+ else
170
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
171
+ end
172
+ }
173
+ ;
174
+ pseudo
175
+ : ':' function {
176
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
177
+ }
178
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
179
+ ;
180
+ hcap_0toN
181
+ : hcap_1toN
182
+ |
183
+ ;
184
+ hcap_1toN
185
+ : attribute_id hcap_1toN {
186
+ result = Node.new(:COMBINATOR, val)
187
+ }
188
+ | class hcap_1toN {
189
+ result = Node.new(:COMBINATOR, val)
190
+ }
191
+ | attrib hcap_1toN {
192
+ result = Node.new(:COMBINATOR, val)
193
+ }
194
+ | pseudo hcap_1toN {
195
+ result = Node.new(:COMBINATOR, val)
196
+ }
197
+ | attribute_id
198
+ | class
199
+ | attrib
200
+ | pseudo
201
+ ;
202
+ attribute_id
203
+ : HASH { result = Node.new(:ID, val) }
204
+ ;
205
+ attrib_val_0or1
206
+ : eql_incl_dash IDENT { result = [val.first, val[1]] }
207
+ | eql_incl_dash STRING { result = [val.first, val[1]] }
208
+ |
209
+ ;
210
+ eql_incl_dash
211
+ : EQUAL { result = :equal }
212
+ | PREFIXMATCH { result = :prefix_match }
213
+ | SUFFIXMATCH { result = :suffix_match }
214
+ | SUBSTRINGMATCH { result = :substring_match }
215
+ | NOT_EQUAL { result = :not_equal }
216
+ | INCLUDES { result = :includes }
217
+ | DASHMATCH { result = :dash_match }
218
+ ;
219
+ negation
220
+ : NOT negation_arg RPAREN {
221
+ result = Node.new(:NOT, [val[1]])
222
+ }
223
+ ;
224
+ negation_arg
225
+ : hcap_1toN
226
+ ;
227
+ end
228
+
229
+ ---- header
230
+