cocoa-xml 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,143 @@
1
+ #--
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by rex 1.0.4
4
+ # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
+ #++
6
+
7
+ module Nokogiri
8
+ module CSS
9
+ # @private
10
+ class GeneratedTokenizer < GeneratedParser
11
+ require 'strscan'
12
+
13
+ class ScanError < StandardError ; end
14
+
15
+ attr_reader :lineno
16
+ attr_reader :filename
17
+ attr_accessor :state
18
+
19
+ def scan_setup(str)
20
+ @ss = StringScanner.new(str)
21
+ @lineno = 1
22
+ @state = nil
23
+ end
24
+
25
+ def action(&block)
26
+ yield
27
+ end
28
+
29
+ def scan_str(str)
30
+ scan_setup(str)
31
+ do_parse
32
+ end
33
+
34
+ def load_file( filename )
35
+ @filename = filename
36
+ open(filename, "r") do |f|
37
+ scan_setup(f.read)
38
+ end
39
+ end
40
+
41
+ def scan_file( filename )
42
+ load_file(filename)
43
+ do_parse
44
+ end
45
+
46
+
47
+ def next_token
48
+ return if @ss.eos?
49
+
50
+ text = @ss.peek(1)
51
+ @lineno += 1 if text == "\n"
52
+ token = case @state
53
+ when nil
54
+ case
55
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*\(\s*/))
56
+ action { [:FUNCTION, text] }
57
+
58
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*/))
59
+ action { [:IDENT, text] }
60
+
61
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])+/))
62
+ action { [:HASH, text] }
63
+
64
+ when (text = @ss.scan(/[\s\r\n\f]*~=[\s\r\n\f]*/))
65
+ action { [:INCLUDES, text] }
66
+
67
+ when (text = @ss.scan(/[\s\r\n\f]*\|=[\s\r\n\f]*/))
68
+ action { [:DASHMATCH, text] }
69
+
70
+ when (text = @ss.scan(/[\s\r\n\f]*\^=[\s\r\n\f]*/))
71
+ action { [:PREFIXMATCH, text] }
72
+
73
+ when (text = @ss.scan(/[\s\r\n\f]*\$=[\s\r\n\f]*/))
74
+ action { [:SUFFIXMATCH, text] }
75
+
76
+ when (text = @ss.scan(/[\s\r\n\f]*\*=[\s\r\n\f]*/))
77
+ action { [:SUBSTRINGMATCH, text] }
78
+
79
+ when (text = @ss.scan(/[\s\r\n\f]*!=[\s\r\n\f]*/))
80
+ action { [:NOT_EQUAL, text] }
81
+
82
+ when (text = @ss.scan(/[\s\r\n\f]*=[\s\r\n\f]*/))
83
+ action { [:EQUAL, text] }
84
+
85
+ when (text = @ss.scan(/[\s\r\n\f]*\)/))
86
+ action { [:RPAREN, text] }
87
+
88
+ when (text = @ss.scan(/[\s\r\n\f]*\[[\s\r\n\f]*/))
89
+ action { [:LSQUARE, text] }
90
+
91
+ when (text = @ss.scan(/[\s\r\n\f]*\]/))
92
+ action { [:RSQUARE, text] }
93
+
94
+ when (text = @ss.scan(/[\s\r\n\f]*\+[\s\r\n\f]*/))
95
+ action { [:PLUS, text] }
96
+
97
+ when (text = @ss.scan(/[\s\r\n\f]*>[\s\r\n\f]*/))
98
+ action { [:GREATER, text] }
99
+
100
+ when (text = @ss.scan(/[\s\r\n\f]*,[\s\r\n\f]*/))
101
+ action { [:COMMA, text] }
102
+
103
+ when (text = @ss.scan(/[\s\r\n\f]*~[\s\r\n\f]*/))
104
+ action { [:TILDE, text] }
105
+
106
+ when (text = @ss.scan(/\:not\([\s\r\n\f]*/))
107
+ action { [:NOT, text] }
108
+
109
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
110
+ action { [:NUMBER, text] }
111
+
112
+ when (text = @ss.scan(/[\s\r\n\f]*\/\/[\s\r\n\f]*/))
113
+ action { [:DOUBLESLASH, text] }
114
+
115
+ when (text = @ss.scan(/[\s\r\n\f]*\/[\s\r\n\f]*/))
116
+ action { [:SLASH, text] }
117
+
118
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
119
+ action {[:UNICODE_RANGE, text] }
120
+
121
+ when (text = @ss.scan(/[\s\t\r\n\f]+/))
122
+ action { [:S, text] }
123
+
124
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*'/))
125
+ action { [:STRING, text] }
126
+
127
+ when (text = @ss.scan(/./))
128
+ action { [text, text] }
129
+
130
+ else
131
+ text = @ss.string[@ss.pos .. -1]
132
+ raise ScanError, "can not match: '" + text + "'"
133
+ end # if
134
+
135
+ else
136
+ raise ScanError, "undefined state: '" + state.to_s + "'"
137
+ end # case state
138
+ token
139
+ end # def next_token
140
+
141
+ end # class
142
+ end
143
+ end
@@ -0,0 +1,100 @@
1
+ module Nokogiri
2
+ module CSS
3
+ # @private
4
+ class Node
5
+ # Get the type of this node
6
+ attr_accessor :type
7
+ # Get the value of this node
8
+ attr_accessor :value
9
+
10
+ # Create a new Node with +type+ and +value+
11
+ def initialize type, value
12
+ @type = type
13
+ @value = value
14
+ end
15
+
16
+ # Accept +visitor+
17
+ def accept visitor
18
+ visitor.send(:"visit_#{type.to_s.downcase}", self)
19
+ end
20
+
21
+ ###
22
+ # Convert this CSS node to xpath with +prefix+ using +visitor+
23
+ def to_xpath prefix = '//', visitor = XPathVisitor.new
24
+ self.preprocess!
25
+ prefix + visitor.accept(self)
26
+ end
27
+
28
+ # Preprocess this node tree
29
+ def preprocess!
30
+ ### Deal with nth-child
31
+ matches = find_by_type(
32
+ [:CONDITIONAL_SELECTOR,
33
+ [:ELEMENT_NAME],
34
+ [:PSEUDO_CLASS,
35
+ [:FUNCTION]
36
+ ]
37
+ ]
38
+ )
39
+ matches.each do |match|
40
+ if match.value[1].value[0].value[0] =~ /^nth-(last-)?child/
41
+ tag_name = match.value[0].value.first
42
+ match.value[0].value = ['*']
43
+ match.value[1] = Node.new(:COMBINATOR, [
44
+ match.value[1].value[0],
45
+ Node.new(:FUNCTION, ['self(', tag_name])
46
+ ])
47
+ end
48
+ end
49
+
50
+ ### Deal with first-child, last-child
51
+ matches = find_by_type(
52
+ [:CONDITIONAL_SELECTOR,
53
+ [:ELEMENT_NAME], [:PSEUDO_CLASS]
54
+ ])
55
+ matches.each do |match|
56
+ if ['first-child', 'last-child'].include?(match.value[1].value.first)
57
+ which = match.value[1].value.first.gsub(/-\w*$/, '')
58
+ tag_name = match.value[0].value.first
59
+ match.value[0].value = ['*']
60
+ match.value[1] = Node.new(:COMBINATOR, [
61
+ Node.new(:FUNCTION, ["#{which}("]),
62
+ Node.new(:FUNCTION, ['self(', tag_name])
63
+ ])
64
+ elsif 'only-child' == match.value[1].value.first
65
+ tag_name = match.value[0].value.first
66
+ match.value[0].value = ['*']
67
+ match.value[1] = Node.new(:COMBINATOR, [
68
+ Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
69
+ Node.new(:FUNCTION, ['self(', tag_name])
70
+ ])
71
+ end
72
+ end
73
+
74
+ self
75
+ end
76
+
77
+ # Find a node by type using +types+
78
+ def find_by_type types
79
+ matches = []
80
+ matches << self if to_type == types
81
+ @value.each do |v|
82
+ matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
83
+ end
84
+ matches
85
+ end
86
+
87
+ # Convert to_type
88
+ def to_type
89
+ [@type] + @value.map { |n|
90
+ n.to_type if n.respond_to?(:to_type)
91
+ }.compact
92
+ end
93
+
94
+ # Convert to array
95
+ def to_a
96
+ [@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,83 @@
1
+ require 'thread'
2
+
3
+ module Nokogiri
4
+ module CSS
5
+ # @private
6
+ class Parser < GeneratedTokenizer
7
+ @cache_on = true
8
+ @cache = {}
9
+ @mutex = Mutex.new
10
+
11
+ class << self
12
+ # Turn on CSS parse caching
13
+ attr_accessor :cache_on
14
+ alias :cache_on? :cache_on
15
+ alias :set_cache :cache_on=
16
+
17
+ # Get the css selector in +string+ from the cache
18
+ def [] string
19
+ return unless @cache_on
20
+ @mutex.synchronize { @cache[string] }
21
+ end
22
+
23
+ # Set the css selector in +string+ in the cache to +value+
24
+ def []= string, value
25
+ return value unless @cache_on
26
+ @mutex.synchronize { @cache[string] = value }
27
+ end
28
+
29
+ # Clear the cache
30
+ def clear_cache
31
+ @mutex.synchronize { @cache = {} }
32
+ end
33
+
34
+ # Execute +block+ without cache
35
+ def without_cache &block
36
+ tmp = @cache_on
37
+ @cache_on = false
38
+ block.call
39
+ @cache_on = tmp
40
+ end
41
+
42
+ ###
43
+ # Parse this CSS selector in +selector+. Returns an AST.
44
+ def parse selector
45
+ @warned ||= false
46
+ unless @warned
47
+ $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
48
+ @warned = true
49
+ end
50
+ new.parse selector
51
+ end
52
+ end
53
+
54
+ # Create a new CSS parser with respect to +namespaces+
55
+ def initialize namespaces = {}
56
+ @namespaces = namespaces
57
+ super()
58
+ end
59
+ alias :parse :scan_str
60
+
61
+ # Get the xpath for +string+ using +options+
62
+ def xpath_for string, options={}
63
+ key = "#{string}#{options[:ns]}#{options[:prefix]}"
64
+ v = self.class[key]
65
+ return v if v
66
+
67
+ args = [
68
+ options[:prefix] || '//',
69
+ options[:visitor] || XPathVisitor.new
70
+ ]
71
+ self.class[key] = parse(string).map { |ast|
72
+ ast.to_xpath(*args)
73
+ }
74
+ end
75
+
76
+ # On CSS parser error, raise an exception
77
+ def on_error error_token_id, error_value, value_stack
78
+ after = value_stack.compact.last
79
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,230 @@
1
+ class Nokogiri::CSS::GeneratedParser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
6
+
7
+ rule
8
+ selector
9
+ : selector COMMA simple_selector_1toN {
10
+ result = [val.first, val.last].flatten
11
+ }
12
+ | simple_selector_1toN { result = val.flatten }
13
+ ;
14
+ combinator
15
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
16
+ | GREATER { result = :CHILD_SELECTOR }
17
+ | TILDE { result = :PRECEDING_SELECTOR }
18
+ | S { result = :DESCENDANT_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
21
+ ;
22
+ simple_selector
23
+ : element_name hcap_0toN {
24
+ result = if val[1].nil?
25
+ val.first
26
+ else
27
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
+ end
29
+ }
30
+ | element_name hcap_1toN negation {
31
+ result = Node.new(:CONDITIONAL_SELECTOR,
32
+ [
33
+ val.first,
34
+ Node.new(:COMBINATOR, [val[1], val.last])
35
+ ]
36
+ )
37
+ }
38
+ | element_name negation {
39
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
40
+ }
41
+ | function
42
+ | function attrib {
43
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
44
+ }
45
+ | hcap_1toN negation {
46
+ result = Node.new(:CONDITIONAL_SELECTOR,
47
+ [
48
+ Node.new(:ELEMENT_NAME, ['*']),
49
+ Node.new(:COMBINATOR, val)
50
+ ]
51
+ )
52
+ }
53
+ | hcap_1toN {
54
+ result = Node.new(:CONDITIONAL_SELECTOR,
55
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
56
+ )
57
+ }
58
+ ;
59
+ simple_selector_1toN
60
+ : simple_selector combinator simple_selector_1toN {
61
+ result = Node.new(val[1], [val.first, val.last])
62
+ }
63
+ | simple_selector
64
+ ;
65
+ class
66
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
67
+ ;
68
+ element_name
69
+ : namespace '|' IDENT {
70
+ result = Node.new(:ELEMENT_NAME,
71
+ [[val.first, val.last].compact.join(':')]
72
+ )
73
+ }
74
+ | IDENT {
75
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
76
+ result = Node.new(:ELEMENT_NAME, [name])
77
+ }
78
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
79
+ ;
80
+ namespace
81
+ : IDENT { result = val[0] }
82
+ |
83
+ ;
84
+ attrib
85
+ : LSQUARE IDENT attrib_val_0or1 RSQUARE {
86
+ result = Node.new(:ATTRIBUTE_CONDITION,
87
+ [Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
88
+ )
89
+ }
90
+ | LSQUARE function attrib_val_0or1 RSQUARE {
91
+ result = Node.new(:ATTRIBUTE_CONDITION,
92
+ [val[1]] + (val[2] || [])
93
+ )
94
+ }
95
+ | LSQUARE NUMBER RSQUARE {
96
+ # Non standard, but hpricot supports it.
97
+ result = Node.new(:PSEUDO_CLASS,
98
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
99
+ )
100
+ }
101
+ ;
102
+ function
103
+ : FUNCTION RPAREN {
104
+ result = Node.new(:FUNCTION, [val.first.strip])
105
+ }
106
+ | FUNCTION expr RPAREN {
107
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
108
+ }
109
+ | FUNCTION an_plus_b RPAREN {
110
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
111
+ }
112
+ | NOT expr RPAREN {
113
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
114
+ }
115
+ | HAS selector RPAREN {
116
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
117
+ }
118
+ ;
119
+ expr
120
+ : NUMBER COMMA expr { result = [val.first, val.last] }
121
+ | STRING COMMA expr { result = [val.first, val.last] }
122
+ | IDENT COMMA expr { result = [val.first, val.last] }
123
+ | NUMBER
124
+ | STRING
125
+ | IDENT # even, odd
126
+ {
127
+ if val[0] == 'even'
128
+ val = ["2","n","+","0"]
129
+ result = Node.new(:AN_PLUS_B, val)
130
+ elsif val[0] == 'odd'
131
+ val = ["2","n","+","1"]
132
+ result = Node.new(:AN_PLUS_B, val)
133
+ else
134
+ # This is not CSS standard. It allows us to support this:
135
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
136
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
137
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
138
+ result = val
139
+ end
140
+ }
141
+ ;
142
+ an_plus_b
143
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
144
+ {
145
+ if val[1] == 'n'
146
+ result = Node.new(:AN_PLUS_B, val)
147
+ else
148
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
149
+ end
150
+ }
151
+ | IDENT PLUS NUMBER { # n+3, -n+3
152
+ if val[0] == 'n'
153
+ val.unshift("1")
154
+ result = Node.new(:AN_PLUS_B, val)
155
+ elsif val[0] == '-n'
156
+ val[0] = 'n'
157
+ val.unshift("-1")
158
+ result = Node.new(:AN_PLUS_B, val)
159
+ else
160
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
161
+ end
162
+ }
163
+ | NUMBER IDENT # 5n, -5n
164
+ {
165
+ if val[1] == 'n'
166
+ val << "+"
167
+ val << "0"
168
+ result = Node.new(:AN_PLUS_B, val)
169
+ else
170
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
171
+ end
172
+ }
173
+ ;
174
+ pseudo
175
+ : ':' function {
176
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
177
+ }
178
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
179
+ ;
180
+ hcap_0toN
181
+ : hcap_1toN
182
+ |
183
+ ;
184
+ hcap_1toN
185
+ : attribute_id hcap_1toN {
186
+ result = Node.new(:COMBINATOR, val)
187
+ }
188
+ | class hcap_1toN {
189
+ result = Node.new(:COMBINATOR, val)
190
+ }
191
+ | attrib hcap_1toN {
192
+ result = Node.new(:COMBINATOR, val)
193
+ }
194
+ | pseudo hcap_1toN {
195
+ result = Node.new(:COMBINATOR, val)
196
+ }
197
+ | attribute_id
198
+ | class
199
+ | attrib
200
+ | pseudo
201
+ ;
202
+ attribute_id
203
+ : HASH { result = Node.new(:ID, val) }
204
+ ;
205
+ attrib_val_0or1
206
+ : eql_incl_dash IDENT { result = [val.first, val[1]] }
207
+ | eql_incl_dash STRING { result = [val.first, val[1]] }
208
+ |
209
+ ;
210
+ eql_incl_dash
211
+ : EQUAL { result = :equal }
212
+ | PREFIXMATCH { result = :prefix_match }
213
+ | SUFFIXMATCH { result = :suffix_match }
214
+ | SUBSTRINGMATCH { result = :substring_match }
215
+ | NOT_EQUAL { result = :not_equal }
216
+ | INCLUDES { result = :includes }
217
+ | DASHMATCH { result = :dash_match }
218
+ ;
219
+ negation
220
+ : NOT negation_arg RPAREN {
221
+ result = Node.new(:NOT, [val[1]])
222
+ }
223
+ ;
224
+ negation_arg
225
+ : hcap_1toN
226
+ ;
227
+ end
228
+
229
+ ---- header
230
+