tenderlove-nokogiri 0.0.0-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest.txt +120 -0
- data/README.ja.txt +86 -0
- data/README.txt +87 -0
- data/Rakefile +264 -0
- data/ext/nokogiri/extconf.rb +59 -0
- data/ext/nokogiri/html_document.c +83 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_sax_parser.c +32 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/native.c +40 -0
- data/ext/nokogiri/native.h +51 -0
- data/ext/nokogiri/xml_cdata.c +52 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_document.c +159 -0
- data/ext/nokogiri/xml_document.h +10 -0
- data/ext/nokogiri/xml_dtd.c +117 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_node.c +709 -0
- data/ext/nokogiri/xml_node.h +15 -0
- data/ext/nokogiri/xml_node_set.c +124 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_reader.c +429 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_sax_parser.c +174 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_syntax_error.c +194 -0
- data/ext/nokogiri/xml_syntax_error.h +11 -0
- data/ext/nokogiri/xml_text.c +29 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +46 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +81 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +108 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri/css/node.rb +95 -0
- data/lib/nokogiri/css/parser.rb +24 -0
- data/lib/nokogiri/css/parser.y +198 -0
- data/lib/nokogiri/css/tokenizer.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rex +63 -0
- data/lib/nokogiri/css/xpath_visitor.rb +165 -0
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +58 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +17 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators.rb +1 -0
- data/lib/nokogiri/hpricot.rb +47 -0
- data/lib/nokogiri/html/builder.rb +9 -0
- data/lib/nokogiri/html/document.rb +9 -0
- data/lib/nokogiri/html/sax/parser.rb +21 -0
- data/lib/nokogiri/html.rb +95 -0
- data/lib/nokogiri/version.rb +3 -0
- data/lib/nokogiri/xml/after_handler.rb +18 -0
- data/lib/nokogiri/xml/before_handler.rb +32 -0
- data/lib/nokogiri/xml/builder.rb +79 -0
- data/lib/nokogiri/xml/cdata.rb +9 -0
- data/lib/nokogiri/xml/document.rb +30 -0
- data/lib/nokogiri/xml/dtd.rb +6 -0
- data/lib/nokogiri/xml/node.rb +195 -0
- data/lib/nokogiri/xml/node_set.rb +183 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/reader.rb +14 -0
- data/lib/nokogiri/xml/sax/document.rb +59 -0
- data/lib/nokogiri/xml/sax/parser.rb +33 -0
- data/lib/nokogiri/xml/sax.rb +9 -0
- data/lib/nokogiri/xml/syntax_error.rb +21 -0
- data/lib/nokogiri/xml/text.rb +6 -0
- data/lib/nokogiri/xml/xpath.rb +6 -0
- data/lib/nokogiri/xml/xpath_context.rb +14 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xslt/stylesheet.rb +6 -0
- data/lib/nokogiri/xslt.rb +11 -0
- data/lib/nokogiri.rb +51 -0
- data/nokogiri.gemspec +34 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +224 -0
- data/test/css/test_tokenizer.rb +162 -0
- data/test/css/test_xpath_visitor.rb +54 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +70 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +7 -0
- data/test/hpricot/test_alter.rb +67 -0
- data/test/hpricot/test_builder.rb +27 -0
- data/test/hpricot/test_parser.rb +423 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +78 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +27 -0
- data/test/html/test_builder.rb +78 -0
- data/test/html/test_document.rb +86 -0
- data/test/test_convert_xpath.rb +180 -0
- data/test/test_nokogiri.rb +36 -0
- data/test/test_reader.rb +222 -0
- data/test/test_xslt_transforms.rb +29 -0
- data/test/xml/sax/test_parser.rb +93 -0
- data/test/xml/test_builder.rb +16 -0
- data/test/xml/test_cdata.rb +18 -0
- data/test/xml/test_document.rb +171 -0
- data/test/xml/test_dtd.rb +43 -0
- data/test/xml/test_node.rb +223 -0
- data/test/xml/test_node_set.rb +116 -0
- data/test/xml/test_text.rb +13 -0
- metadata +214 -0
@@ -0,0 +1,95 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class Node
|
4
|
+
attr_accessor :type, :value
|
5
|
+
def initialize type, value
|
6
|
+
@type = type
|
7
|
+
@value = value
|
8
|
+
end
|
9
|
+
|
10
|
+
def accept visitor
|
11
|
+
visitor.send(:"visit_#{type.to_s.downcase}", self)
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_xpath prefix = '//', preprocess = true
|
15
|
+
self.preprocess! if preprocess
|
16
|
+
prefix + XPathVisitor.new.accept(self)
|
17
|
+
end
|
18
|
+
|
19
|
+
def preprocess!
|
20
|
+
### Deal with nth-child
|
21
|
+
matches = find_by_type(
|
22
|
+
[:CONDITIONAL_SELECTOR,
|
23
|
+
[:ELEMENT_NAME],
|
24
|
+
[:PSEUDO_CLASS,
|
25
|
+
[:FUNCTION]
|
26
|
+
]
|
27
|
+
]
|
28
|
+
)
|
29
|
+
matches.each do |match|
|
30
|
+
if match.value[1].value[0].value[0] =~ /^nth-child/
|
31
|
+
tag_name = match.value[0].value.first
|
32
|
+
match.value[0].value = ['*']
|
33
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
34
|
+
match.value[1].value[0],
|
35
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
36
|
+
])
|
37
|
+
end
|
38
|
+
if match.value[1].value[0].value[0] =~ /^nth-last-child/
|
39
|
+
tag_name = match.value[0].value.first
|
40
|
+
match.value[0].value = ['*']
|
41
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
42
|
+
match.value[1].value[0],
|
43
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
44
|
+
])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
### Deal with first-child, last-child
|
49
|
+
matches = find_by_type(
|
50
|
+
[:CONDITIONAL_SELECTOR,
|
51
|
+
[:ELEMENT_NAME], [:PSEUDO_CLASS]
|
52
|
+
])
|
53
|
+
matches.each do |match|
|
54
|
+
if ['first-child', 'last-child'].include?(match.value[1].value.first)
|
55
|
+
which = match.value[1].value.first.gsub(/-\w*$/, '')
|
56
|
+
tag_name = match.value[0].value.first
|
57
|
+
match.value[0].value = ['*']
|
58
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
59
|
+
Node.new(:FUNCTION, ["#{which}("]),
|
60
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
61
|
+
])
|
62
|
+
elsif 'only-child' == match.value[1].value.first
|
63
|
+
tag_name = match.value[0].value.first
|
64
|
+
match.value[0].value = ['*']
|
65
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
66
|
+
Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
|
67
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
68
|
+
])
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
self
|
73
|
+
end
|
74
|
+
|
75
|
+
def find_by_type(types)
|
76
|
+
matches = []
|
77
|
+
matches << self if to_type == types
|
78
|
+
@value.each do |v|
|
79
|
+
matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
|
80
|
+
end
|
81
|
+
matches
|
82
|
+
end
|
83
|
+
|
84
|
+
def to_type
|
85
|
+
[@type] + @value.map { |n|
|
86
|
+
n.to_type if n.respond_to?(:to_type)
|
87
|
+
}.compact
|
88
|
+
end
|
89
|
+
|
90
|
+
def to_a
|
91
|
+
[@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class Parser < GeneratedParser
|
4
|
+
class << self
|
5
|
+
def parse string
|
6
|
+
new.parse(string)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@tokenizer = Tokenizer.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse string
|
15
|
+
@tokenizer.scan string
|
16
|
+
do_parse
|
17
|
+
end
|
18
|
+
|
19
|
+
def next_token
|
20
|
+
@tokenizer.next_token
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,198 @@
|
|
1
|
+
class Nokogiri::CSS::GeneratedParser
|
2
|
+
|
3
|
+
token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
|
4
|
+
token COMMA URI CDO CDC NUMBER PERCENTAGE LENGTH EMS EXS ANGLE TIME FREQ
|
5
|
+
token IMPORTANT_SYM IMPORT_SYM MEDIA_SYM PAGE_SYM CHARSET_SYM DIMENSION
|
6
|
+
token PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL SLASH DOUBLESLASH
|
7
|
+
token NOT
|
8
|
+
|
9
|
+
rule
|
10
|
+
selector
|
11
|
+
: selector COMMA s_0toN simple_selector_1toN {
|
12
|
+
result = [val.first, val.last].flatten
|
13
|
+
}
|
14
|
+
| simple_selector_1toN { result = val.flatten }
|
15
|
+
;
|
16
|
+
combinator
|
17
|
+
: PLUS s_0toN { result = :DIRECT_ADJACENT_SELECTOR }
|
18
|
+
| GREATER s_0toN { result = :CHILD_SELECTOR }
|
19
|
+
| TILDE s_0toN { result = :PRECEDING_SELECTOR }
|
20
|
+
| S { result = :DESCENDANT_SELECTOR }
|
21
|
+
| DOUBLESLASH s_0toN { result = :DESCENDANT_SELECTOR }
|
22
|
+
| SLASH s_0toN { result = :CHILD_SELECTOR }
|
23
|
+
;
|
24
|
+
simple_selector
|
25
|
+
: element_name hcap_0toN {
|
26
|
+
result = if val[1].nil?
|
27
|
+
val.first
|
28
|
+
else
|
29
|
+
Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
|
30
|
+
end
|
31
|
+
}
|
32
|
+
| element_name negation {
|
33
|
+
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
34
|
+
}
|
35
|
+
| function
|
36
|
+
| function attrib {
|
37
|
+
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
38
|
+
}
|
39
|
+
| hcap_1toN {
|
40
|
+
result = Node.new(:CONDITIONAL_SELECTOR,
|
41
|
+
[Node.new(:ELEMENT_NAME, ['*']), val.first]
|
42
|
+
)
|
43
|
+
}
|
44
|
+
;
|
45
|
+
simple_selector_1toN
|
46
|
+
: simple_selector combinator simple_selector_1toN {
|
47
|
+
result = Node.new(val[1], [val.first, val.last])
|
48
|
+
}
|
49
|
+
| simple_selector
|
50
|
+
;
|
51
|
+
class
|
52
|
+
: '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
|
53
|
+
;
|
54
|
+
element_name
|
55
|
+
: IDENT { result = Node.new(:ELEMENT_NAME, val) }
|
56
|
+
| '*' { result = Node.new(:ELEMENT_NAME, val) }
|
57
|
+
;
|
58
|
+
attrib
|
59
|
+
: '[' s_0toN IDENT s_0toN attrib_val_0or1 ']' {
|
60
|
+
result = Node.new(:ATTRIBUTE_CONDITION,
|
61
|
+
[Node.new(:ELEMENT_NAME, [val[2]])] + (val[4] || [])
|
62
|
+
)
|
63
|
+
}
|
64
|
+
| '[' s_0toN function s_0toN attrib_val_0or1 ']' {
|
65
|
+
result = Node.new(:ATTRIBUTE_CONDITION,
|
66
|
+
[val[2]] + (val[4] || [])
|
67
|
+
)
|
68
|
+
}
|
69
|
+
| '[' s_0toN NUMBER s_0toN ']' {
|
70
|
+
# Non standard, but hpricot supports it.
|
71
|
+
result = Node.new(:PSEUDO_CLASS,
|
72
|
+
[Node.new(:FUNCTION, ['nth-child(', val[2]])]
|
73
|
+
)
|
74
|
+
}
|
75
|
+
;
|
76
|
+
function
|
77
|
+
: FUNCTION ')' {
|
78
|
+
result = Node.new(:FUNCTION, [val.first.strip])
|
79
|
+
}
|
80
|
+
| FUNCTION expr ')' {
|
81
|
+
result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
|
82
|
+
}
|
83
|
+
| FUNCTION an_plus_b ')' {
|
84
|
+
result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
|
85
|
+
}
|
86
|
+
| NOT expr ')' {
|
87
|
+
result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
|
88
|
+
}
|
89
|
+
;
|
90
|
+
expr
|
91
|
+
: NUMBER
|
92
|
+
| STRING
|
93
|
+
;
|
94
|
+
an_plus_b
|
95
|
+
: NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
|
96
|
+
{
|
97
|
+
if val[1] == 'n'
|
98
|
+
result = Node.new(:AN_PLUS_B, val)
|
99
|
+
else
|
100
|
+
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
101
|
+
end
|
102
|
+
}
|
103
|
+
| IDENT PLUS NUMBER { # n+3, -n+3
|
104
|
+
if val[0] == 'n'
|
105
|
+
val.unshift("1")
|
106
|
+
result = Node.new(:AN_PLUS_B, val)
|
107
|
+
elsif val[0] == '-n'
|
108
|
+
val[0] = 'n'
|
109
|
+
val.unshift("-1")
|
110
|
+
result = Node.new(:AN_PLUS_B, val)
|
111
|
+
else
|
112
|
+
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
113
|
+
end
|
114
|
+
}
|
115
|
+
| NUMBER IDENT # 5n, -5n
|
116
|
+
{
|
117
|
+
if val[1] == 'n'
|
118
|
+
val << "+"
|
119
|
+
val << "0"
|
120
|
+
result = Node.new(:AN_PLUS_B, val)
|
121
|
+
else
|
122
|
+
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
123
|
+
end
|
124
|
+
}
|
125
|
+
| IDENT # even, odd
|
126
|
+
{
|
127
|
+
if val[0] == 'even'
|
128
|
+
val = ["2","n","+","0"]
|
129
|
+
result = Node.new(:AN_PLUS_B, val)
|
130
|
+
elsif val[0] == 'odd'
|
131
|
+
val = ["2","n","+","1"]
|
132
|
+
result = Node.new(:AN_PLUS_B, val)
|
133
|
+
else
|
134
|
+
raise Racc::ParseError, "parse error on IDENT '#{val[0]}'"
|
135
|
+
end
|
136
|
+
}
|
137
|
+
;
|
138
|
+
pseudo
|
139
|
+
: ':' function {
|
140
|
+
result = Node.new(:PSEUDO_CLASS, [val[1]])
|
141
|
+
}
|
142
|
+
| ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
|
143
|
+
;
|
144
|
+
hcap_0toN
|
145
|
+
: hcap_1toN
|
146
|
+
|
|
147
|
+
;
|
148
|
+
hcap_1toN
|
149
|
+
: attribute_id hcap_1toN {
|
150
|
+
result = Node.new(:COMBINATOR, val)
|
151
|
+
}
|
152
|
+
| class hcap_1toN {
|
153
|
+
result = Node.new(:COMBINATOR, val)
|
154
|
+
}
|
155
|
+
| attrib hcap_1toN {
|
156
|
+
result = Node.new(:COMBINATOR, val)
|
157
|
+
}
|
158
|
+
| pseudo hcap_1toN {
|
159
|
+
result = Node.new(:COMBINATOR, val)
|
160
|
+
}
|
161
|
+
| attribute_id
|
162
|
+
| class
|
163
|
+
| attrib
|
164
|
+
| pseudo
|
165
|
+
;
|
166
|
+
attribute_id
|
167
|
+
: HASH { result = Node.new(:ID, val) }
|
168
|
+
;
|
169
|
+
attrib_val_0or1
|
170
|
+
: eql_incl_dash s_0toN IDENT s_0toN { result = [val.first, val[2]] }
|
171
|
+
| eql_incl_dash s_0toN STRING s_0toN { result = [val.first, val[2]] }
|
172
|
+
|
|
173
|
+
;
|
174
|
+
eql_incl_dash
|
175
|
+
: '='
|
176
|
+
| PREFIXMATCH
|
177
|
+
| SUFFIXMATCH
|
178
|
+
| SUBSTRINGMATCH
|
179
|
+
| NOT_EQUAL
|
180
|
+
| INCLUDES
|
181
|
+
| DASHMATCH
|
182
|
+
;
|
183
|
+
negation
|
184
|
+
: NOT s_0toN negation_arg s_0toN ')' {
|
185
|
+
result = Node.new(:NOT, [val[2]])
|
186
|
+
}
|
187
|
+
;
|
188
|
+
negation_arg
|
189
|
+
: hcap_1toN
|
190
|
+
;
|
191
|
+
s_0toN
|
192
|
+
: S s_0toN
|
193
|
+
|
|
194
|
+
;
|
195
|
+
end
|
196
|
+
|
197
|
+
---- header
|
198
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class GeneratedTokenizer
|
4
|
+
|
5
|
+
macro
|
6
|
+
nl \n|\r\n|\r|\f
|
7
|
+
w [\s\r\n\f]*
|
8
|
+
nonascii [^\\\\0-\\\\177]
|
9
|
+
num -?([0-9]+|[0-9]*\.[0-9]+)
|
10
|
+
unicode \\\\\\\\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?
|
11
|
+
|
12
|
+
escape {unicode}|\\\\\\\[^\n\r\f0-9a-f]
|
13
|
+
nmchar [_a-z0-9-]|{nonascii}|{escape}
|
14
|
+
nmstart [_a-z]|{nonascii}|{escape}
|
15
|
+
ident [-]?({nmstart})({nmchar})*
|
16
|
+
name ({nmchar})+
|
17
|
+
string1 "([^\n\r\f"]|\\{nl}|{nonascii}|{escape})*"
|
18
|
+
string2 '([^\n\r\f']|\\{nl}|{nonascii}|{escape})*'
|
19
|
+
string {string1}|{string2}
|
20
|
+
invalid1 \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*
|
21
|
+
invalid2 \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*
|
22
|
+
invalid {invalid1}|{invalid2}
|
23
|
+
Comment \/\*(.|[\r\n])*?\*\/
|
24
|
+
|
25
|
+
rule
|
26
|
+
|
27
|
+
# [:state] pattern [actions]
|
28
|
+
|
29
|
+
~= { [:INCLUDES, text] }
|
30
|
+
\|= { [:DASHMATCH, text] }
|
31
|
+
\^= { [:PREFIXMATCH, text] }
|
32
|
+
\$= { [:SUFFIXMATCH, text] }
|
33
|
+
\*= { [:SUBSTRINGMATCH, text] }
|
34
|
+
!= { [:NOT_EQUAL, text] }
|
35
|
+
{ident}\(\s* { [:FUNCTION, text] }
|
36
|
+
@{ident} { [:IDENT, text] }
|
37
|
+
{ident} { [:IDENT, text] }
|
38
|
+
{num} { [:NUMBER, text] }
|
39
|
+
\#{name} { [:HASH, text] }
|
40
|
+
{w}\+ { [:PLUS, text] }
|
41
|
+
{w}> { [:GREATER, text] }
|
42
|
+
{w}, { [:COMMA, text] }
|
43
|
+
{w}~ { [:TILDE, text] }
|
44
|
+
\:not\( { [:NOT, text] }
|
45
|
+
@{ident} { [:ATKEYWORD, text] }
|
46
|
+
{num}% { [:PERCENTAGE, text] }
|
47
|
+
{num}{ident} { [:DIMENSION, text] }
|
48
|
+
<!-- { [:CDO, text] }
|
49
|
+
--> { [:CDC, text] }
|
50
|
+
{w}\/\/ { [:DOUBLESLASH, text] }
|
51
|
+
{w}\/ { [:SLASH, text] }
|
52
|
+
|
53
|
+
U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
|
54
|
+
|
55
|
+
{Comment} /* ignore comments */
|
56
|
+
[\s\t\r\n\f]+ { [:S, text] }
|
57
|
+
[\.*:\[\]=\)] { [text, text] }
|
58
|
+
{string} { [:STRING, text] }
|
59
|
+
{invalid} { [:INVALID, text] }
|
60
|
+
. { [text, text] }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class XPathVisitor
|
4
|
+
def visit_function node
|
5
|
+
# note that nth-child and nth-last-child are preprocessed in css/node.rb.
|
6
|
+
msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
|
7
|
+
return self.send(msg, node) if self.respond_to?(msg)
|
8
|
+
|
9
|
+
case node.value.first
|
10
|
+
when /^text\(/
|
11
|
+
'child::text()'
|
12
|
+
when /^self\(/
|
13
|
+
"self::#{node.value[1]}"
|
14
|
+
when /^(eq|nth|nth-of-type|nth-child)\(/
|
15
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
|
16
|
+
an_plus_b(node.value[1])
|
17
|
+
else
|
18
|
+
"position() = " + node.value[1]
|
19
|
+
end
|
20
|
+
when /^(first|first-of-type)\(/
|
21
|
+
"position() = 1"
|
22
|
+
when /^(last|last-of-type)\(/
|
23
|
+
"position() = last()"
|
24
|
+
when /^(nth-last-child|nth-last-of-type)\(/
|
25
|
+
"position() = last() - #{node.value[1]}"
|
26
|
+
when /^contains\(/
|
27
|
+
"contains(., #{node.value[1]})"
|
28
|
+
when /^gt\(/
|
29
|
+
"position() > #{node.value[1]}"
|
30
|
+
when /^only-child\(/
|
31
|
+
"last() = 1"
|
32
|
+
else
|
33
|
+
node.value.first + ')'
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def visit_not node
|
38
|
+
'not(' + node.value.first.accept(self) + ')'
|
39
|
+
end
|
40
|
+
|
41
|
+
def visit_preceding_selector node
|
42
|
+
node.value.last.accept(self) +
|
43
|
+
'[preceding-sibling::' +
|
44
|
+
node.value.first.accept(self) +
|
45
|
+
']'
|
46
|
+
end
|
47
|
+
|
48
|
+
def visit_direct_adjacent_selector node
|
49
|
+
node.value.first.accept(self) +
|
50
|
+
"/following-sibling::*[1]/self::" +
|
51
|
+
node.value.last.accept(self)
|
52
|
+
end
|
53
|
+
|
54
|
+
def visit_id node
|
55
|
+
node.value.first =~ /^#(.*)$/
|
56
|
+
"@id = '#{$1}'"
|
57
|
+
end
|
58
|
+
|
59
|
+
def visit_attribute_condition node
|
60
|
+
attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
|
61
|
+
''
|
62
|
+
else
|
63
|
+
'@'
|
64
|
+
end
|
65
|
+
attribute += node.value.first.accept(self)
|
66
|
+
|
67
|
+
# Support non-standard css
|
68
|
+
attribute.gsub!(/^@@/, '@')
|
69
|
+
|
70
|
+
return attribute unless node.value.length == 3
|
71
|
+
|
72
|
+
value = node.value.last
|
73
|
+
value = "'#{value}'" if value !~ /^['"]/
|
74
|
+
|
75
|
+
case node.value[1]
|
76
|
+
when '*='
|
77
|
+
"contains(#{attribute}, #{value})"
|
78
|
+
when '^='
|
79
|
+
"starts-with(#{attribute}, #{value})"
|
80
|
+
when '|='
|
81
|
+
"#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
|
82
|
+
when '~='
|
83
|
+
"contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
|
84
|
+
when '$='
|
85
|
+
"substring(#{attribute}, string-length(#{attribute}) - " +
|
86
|
+
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
87
|
+
else
|
88
|
+
attribute + " #{node.value[1]} " + "#{value}"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def visit_pseudo_class node
|
93
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
|
94
|
+
node.value.first.accept(self)
|
95
|
+
else
|
96
|
+
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
|
97
|
+
return self.send(msg, node) if self.respond_to?(msg)
|
98
|
+
|
99
|
+
case node.value.first
|
100
|
+
when "first" then "position() = 1"
|
101
|
+
when "last" then "position() = last()"
|
102
|
+
when "first-of-type" then "position() = 1"
|
103
|
+
when "last-of-type" then "position() = last()"
|
104
|
+
when "only-of-type" then "last() = 1"
|
105
|
+
when "empty" then "not(node())"
|
106
|
+
when "parent" then "node()"
|
107
|
+
when "root" then "not(parent::*)"
|
108
|
+
else
|
109
|
+
'1 = 1'
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def visit_class_condition node
|
115
|
+
"contains(concat(' ', @class, ' '), ' #{node.value.first} ')"
|
116
|
+
end
|
117
|
+
|
118
|
+
def visit_combinator node
|
119
|
+
node.value.first.accept(self) + ' and ' +
|
120
|
+
node.value.last.accept(self)
|
121
|
+
end
|
122
|
+
|
123
|
+
def visit_conditional_selector node
|
124
|
+
node.value.first.accept(self) + '[' +
|
125
|
+
node.value.last.accept(self) + ']'
|
126
|
+
end
|
127
|
+
|
128
|
+
def visit_descendant_selector node
|
129
|
+
node.value.first.accept(self) +
|
130
|
+
'//' +
|
131
|
+
node.value.last.accept(self)
|
132
|
+
end
|
133
|
+
|
134
|
+
def visit_child_selector node
|
135
|
+
node.value.first.accept(self) +
|
136
|
+
'/' +
|
137
|
+
node.value.last.accept(self)
|
138
|
+
end
|
139
|
+
|
140
|
+
def visit_element_name node
|
141
|
+
node.value.first
|
142
|
+
end
|
143
|
+
|
144
|
+
def accept node
|
145
|
+
node.accept(self)
|
146
|
+
end
|
147
|
+
|
148
|
+
private
|
149
|
+
def an_plus_b node
|
150
|
+
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
151
|
+
|
152
|
+
a = node.value[0].to_i
|
153
|
+
b = node.value[3].to_i
|
154
|
+
|
155
|
+
if (b == 0)
|
156
|
+
return "(position() mod #{a}) = 0"
|
157
|
+
else
|
158
|
+
compare = (a < 0) ? "<=" : ">="
|
159
|
+
return "(position() #{compare} #{b}) and (((position()-#{b}) mod #{a.abs}) = 0)"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
data/lib/nokogiri/css.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module Decorators
|
3
|
+
module Hpricot
|
4
|
+
module Node
|
5
|
+
def search *paths
|
6
|
+
ns = paths.last.is_a?(Hash) ? paths.pop : {}
|
7
|
+
converted = paths.map { |path|
|
8
|
+
convert_to_xpath(path)
|
9
|
+
}.flatten.uniq
|
10
|
+
|
11
|
+
namespaces = document.xml? ? document.namespaces.merge(ns) : ns
|
12
|
+
super(*converted + [namespaces])
|
13
|
+
end
|
14
|
+
def /(path); search(path) end
|
15
|
+
|
16
|
+
def xpath *args
|
17
|
+
return super if args.length > 0
|
18
|
+
path
|
19
|
+
end
|
20
|
+
|
21
|
+
def raw_attributes; self end
|
22
|
+
|
23
|
+
def get_element_by_id element_id
|
24
|
+
search("//*[@id='#{element_id}']").first
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_elements_by_tag_name tag
|
28
|
+
search("//#{tag}")
|
29
|
+
end
|
30
|
+
|
31
|
+
def convert_to_xpath(rule)
|
32
|
+
rule = rule.to_s
|
33
|
+
case rule
|
34
|
+
when %r{^//}
|
35
|
+
[".#{rule}"]
|
36
|
+
when %r{^/}
|
37
|
+
[rule]
|
38
|
+
when %r{^.//}
|
39
|
+
[rule]
|
40
|
+
else
|
41
|
+
ctx = CSS::Parser.parse(rule)
|
42
|
+
visitor = CSS::XPathVisitor.new
|
43
|
+
visitor.extend(Hpricot::XPathVisitor)
|
44
|
+
ctx.map { |ast| './/' + visitor.accept(ast.preprocess!) }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def target
|
49
|
+
name
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_original_html
|
53
|
+
to_html
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module Decorators
|
3
|
+
module Hpricot
|
4
|
+
module NodeSet
|
5
|
+
def filter rule
|
6
|
+
ctx = CSS::Parser.parse(rule.to_s)
|
7
|
+
visitor = CSS::XPathVisitor.new
|
8
|
+
visitor.extend(Hpricot::XPathVisitor)
|
9
|
+
search('.//self::' + visitor.accept(ctx.first))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module Decorators
|
3
|
+
module Hpricot
|
4
|
+
####
|
5
|
+
# This mixin does custom adjustments to deal with _whyML
|
6
|
+
module XPathVisitor
|
7
|
+
def visit_attribute_condition node
|
8
|
+
unless (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /^@/)
|
9
|
+
node.value.first.value[0] = "child::" +
|
10
|
+
node.value.first.value[0]
|
11
|
+
end
|
12
|
+
super(node).gsub(/child::text\(\)/, 'normalize-space(child::text())')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'nokogiri/decorators/hpricot'
|