tenderlove-nokogiri 0.0.0-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. data/History.txt +6 -0
  2. data/Manifest.txt +120 -0
  3. data/README.ja.txt +86 -0
  4. data/README.txt +87 -0
  5. data/Rakefile +264 -0
  6. data/ext/nokogiri/extconf.rb +59 -0
  7. data/ext/nokogiri/html_document.c +83 -0
  8. data/ext/nokogiri/html_document.h +10 -0
  9. data/ext/nokogiri/html_sax_parser.c +32 -0
  10. data/ext/nokogiri/html_sax_parser.h +11 -0
  11. data/ext/nokogiri/native.c +40 -0
  12. data/ext/nokogiri/native.h +51 -0
  13. data/ext/nokogiri/xml_cdata.c +52 -0
  14. data/ext/nokogiri/xml_cdata.h +9 -0
  15. data/ext/nokogiri/xml_document.c +159 -0
  16. data/ext/nokogiri/xml_document.h +10 -0
  17. data/ext/nokogiri/xml_dtd.c +117 -0
  18. data/ext/nokogiri/xml_dtd.h +8 -0
  19. data/ext/nokogiri/xml_node.c +709 -0
  20. data/ext/nokogiri/xml_node.h +15 -0
  21. data/ext/nokogiri/xml_node_set.c +124 -0
  22. data/ext/nokogiri/xml_node_set.h +9 -0
  23. data/ext/nokogiri/xml_reader.c +429 -0
  24. data/ext/nokogiri/xml_reader.h +10 -0
  25. data/ext/nokogiri/xml_sax_parser.c +174 -0
  26. data/ext/nokogiri/xml_sax_parser.h +10 -0
  27. data/ext/nokogiri/xml_syntax_error.c +194 -0
  28. data/ext/nokogiri/xml_syntax_error.h +11 -0
  29. data/ext/nokogiri/xml_text.c +29 -0
  30. data/ext/nokogiri/xml_text.h +9 -0
  31. data/ext/nokogiri/xml_xpath.c +46 -0
  32. data/ext/nokogiri/xml_xpath.h +11 -0
  33. data/ext/nokogiri/xml_xpath_context.c +81 -0
  34. data/ext/nokogiri/xml_xpath_context.h +9 -0
  35. data/ext/nokogiri/xslt_stylesheet.c +108 -0
  36. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  37. data/lib/nokogiri/css/node.rb +95 -0
  38. data/lib/nokogiri/css/parser.rb +24 -0
  39. data/lib/nokogiri/css/parser.y +198 -0
  40. data/lib/nokogiri/css/tokenizer.rb +9 -0
  41. data/lib/nokogiri/css/tokenizer.rex +63 -0
  42. data/lib/nokogiri/css/xpath_visitor.rb +165 -0
  43. data/lib/nokogiri/css.rb +6 -0
  44. data/lib/nokogiri/decorators/hpricot/node.rb +58 -0
  45. data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
  46. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +17 -0
  47. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  48. data/lib/nokogiri/decorators.rb +1 -0
  49. data/lib/nokogiri/hpricot.rb +47 -0
  50. data/lib/nokogiri/html/builder.rb +9 -0
  51. data/lib/nokogiri/html/document.rb +9 -0
  52. data/lib/nokogiri/html/sax/parser.rb +21 -0
  53. data/lib/nokogiri/html.rb +95 -0
  54. data/lib/nokogiri/version.rb +3 -0
  55. data/lib/nokogiri/xml/after_handler.rb +18 -0
  56. data/lib/nokogiri/xml/before_handler.rb +32 -0
  57. data/lib/nokogiri/xml/builder.rb +79 -0
  58. data/lib/nokogiri/xml/cdata.rb +9 -0
  59. data/lib/nokogiri/xml/document.rb +30 -0
  60. data/lib/nokogiri/xml/dtd.rb +6 -0
  61. data/lib/nokogiri/xml/node.rb +195 -0
  62. data/lib/nokogiri/xml/node_set.rb +183 -0
  63. data/lib/nokogiri/xml/notation.rb +6 -0
  64. data/lib/nokogiri/xml/reader.rb +14 -0
  65. data/lib/nokogiri/xml/sax/document.rb +59 -0
  66. data/lib/nokogiri/xml/sax/parser.rb +33 -0
  67. data/lib/nokogiri/xml/sax.rb +9 -0
  68. data/lib/nokogiri/xml/syntax_error.rb +21 -0
  69. data/lib/nokogiri/xml/text.rb +6 -0
  70. data/lib/nokogiri/xml/xpath.rb +6 -0
  71. data/lib/nokogiri/xml/xpath_context.rb +14 -0
  72. data/lib/nokogiri/xml.rb +67 -0
  73. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  74. data/lib/nokogiri/xslt.rb +11 -0
  75. data/lib/nokogiri.rb +51 -0
  76. data/nokogiri.gemspec +34 -0
  77. data/test/css/test_nthiness.rb +159 -0
  78. data/test/css/test_parser.rb +224 -0
  79. data/test/css/test_tokenizer.rb +162 -0
  80. data/test/css/test_xpath_visitor.rb +54 -0
  81. data/test/files/staff.xml +59 -0
  82. data/test/files/staff.xslt +32 -0
  83. data/test/files/tlm.html +850 -0
  84. data/test/helper.rb +70 -0
  85. data/test/hpricot/files/basic.xhtml +17 -0
  86. data/test/hpricot/files/boingboing.html +2266 -0
  87. data/test/hpricot/files/cy0.html +3653 -0
  88. data/test/hpricot/files/immob.html +400 -0
  89. data/test/hpricot/files/pace_application.html +1320 -0
  90. data/test/hpricot/files/tenderlove.html +16 -0
  91. data/test/hpricot/files/uswebgen.html +220 -0
  92. data/test/hpricot/files/utf8.html +1054 -0
  93. data/test/hpricot/files/week9.html +1723 -0
  94. data/test/hpricot/files/why.xml +19 -0
  95. data/test/hpricot/load_files.rb +7 -0
  96. data/test/hpricot/test_alter.rb +67 -0
  97. data/test/hpricot/test_builder.rb +27 -0
  98. data/test/hpricot/test_parser.rb +423 -0
  99. data/test/hpricot/test_paths.rb +15 -0
  100. data/test/hpricot/test_preserved.rb +78 -0
  101. data/test/hpricot/test_xml.rb +30 -0
  102. data/test/html/sax/test_parser.rb +27 -0
  103. data/test/html/test_builder.rb +78 -0
  104. data/test/html/test_document.rb +86 -0
  105. data/test/test_convert_xpath.rb +180 -0
  106. data/test/test_nokogiri.rb +36 -0
  107. data/test/test_reader.rb +222 -0
  108. data/test/test_xslt_transforms.rb +29 -0
  109. data/test/xml/sax/test_parser.rb +93 -0
  110. data/test/xml/test_builder.rb +16 -0
  111. data/test/xml/test_cdata.rb +18 -0
  112. data/test/xml/test_document.rb +171 -0
  113. data/test/xml/test_dtd.rb +43 -0
  114. data/test/xml/test_node.rb +223 -0
  115. data/test/xml/test_node_set.rb +116 -0
  116. data/test/xml/test_text.rb +13 -0
  117. metadata +214 -0
@@ -0,0 +1,95 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Node
4
+ attr_accessor :type, :value
5
+ def initialize type, value
6
+ @type = type
7
+ @value = value
8
+ end
9
+
10
+ def accept visitor
11
+ visitor.send(:"visit_#{type.to_s.downcase}", self)
12
+ end
13
+
14
+ def to_xpath prefix = '//', preprocess = true
15
+ self.preprocess! if preprocess
16
+ prefix + XPathVisitor.new.accept(self)
17
+ end
18
+
19
+ def preprocess!
20
+ ### Deal with nth-child
21
+ matches = find_by_type(
22
+ [:CONDITIONAL_SELECTOR,
23
+ [:ELEMENT_NAME],
24
+ [:PSEUDO_CLASS,
25
+ [:FUNCTION]
26
+ ]
27
+ ]
28
+ )
29
+ matches.each do |match|
30
+ if match.value[1].value[0].value[0] =~ /^nth-child/
31
+ tag_name = match.value[0].value.first
32
+ match.value[0].value = ['*']
33
+ match.value[1] = Node.new(:COMBINATOR, [
34
+ match.value[1].value[0],
35
+ Node.new(:FUNCTION, ['self(', tag_name])
36
+ ])
37
+ end
38
+ if match.value[1].value[0].value[0] =~ /^nth-last-child/
39
+ tag_name = match.value[0].value.first
40
+ match.value[0].value = ['*']
41
+ match.value[1] = Node.new(:COMBINATOR, [
42
+ match.value[1].value[0],
43
+ Node.new(:FUNCTION, ['self(', tag_name])
44
+ ])
45
+ end
46
+ end
47
+
48
+ ### Deal with first-child, last-child
49
+ matches = find_by_type(
50
+ [:CONDITIONAL_SELECTOR,
51
+ [:ELEMENT_NAME], [:PSEUDO_CLASS]
52
+ ])
53
+ matches.each do |match|
54
+ if ['first-child', 'last-child'].include?(match.value[1].value.first)
55
+ which = match.value[1].value.first.gsub(/-\w*$/, '')
56
+ tag_name = match.value[0].value.first
57
+ match.value[0].value = ['*']
58
+ match.value[1] = Node.new(:COMBINATOR, [
59
+ Node.new(:FUNCTION, ["#{which}("]),
60
+ Node.new(:FUNCTION, ['self(', tag_name])
61
+ ])
62
+ elsif 'only-child' == match.value[1].value.first
63
+ tag_name = match.value[0].value.first
64
+ match.value[0].value = ['*']
65
+ match.value[1] = Node.new(:COMBINATOR, [
66
+ Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
67
+ Node.new(:FUNCTION, ['self(', tag_name])
68
+ ])
69
+ end
70
+ end
71
+
72
+ self
73
+ end
74
+
75
+ def find_by_type(types)
76
+ matches = []
77
+ matches << self if to_type == types
78
+ @value.each do |v|
79
+ matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
80
+ end
81
+ matches
82
+ end
83
+
84
+ def to_type
85
+ [@type] + @value.map { |n|
86
+ n.to_type if n.respond_to?(:to_type)
87
+ }.compact
88
+ end
89
+
90
+ def to_a
91
+ [@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,24 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Parser < GeneratedParser
4
+ class << self
5
+ def parse string
6
+ new.parse(string)
7
+ end
8
+ end
9
+
10
+ def initialize
11
+ @tokenizer = Tokenizer.new
12
+ end
13
+
14
+ def parse string
15
+ @tokenizer.scan string
16
+ do_parse
17
+ end
18
+
19
+ def next_token
20
+ @tokenizer.next_token
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,198 @@
1
+ class Nokogiri::CSS::GeneratedParser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA URI CDO CDC NUMBER PERCENTAGE LENGTH EMS EXS ANGLE TIME FREQ
5
+ token IMPORTANT_SYM IMPORT_SYM MEDIA_SYM PAGE_SYM CHARSET_SYM DIMENSION
6
+ token PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL SLASH DOUBLESLASH
7
+ token NOT
8
+
9
+ rule
10
+ selector
11
+ : selector COMMA s_0toN simple_selector_1toN {
12
+ result = [val.first, val.last].flatten
13
+ }
14
+ | simple_selector_1toN { result = val.flatten }
15
+ ;
16
+ combinator
17
+ : PLUS s_0toN { result = :DIRECT_ADJACENT_SELECTOR }
18
+ | GREATER s_0toN { result = :CHILD_SELECTOR }
19
+ | TILDE s_0toN { result = :PRECEDING_SELECTOR }
20
+ | S { result = :DESCENDANT_SELECTOR }
21
+ | DOUBLESLASH s_0toN { result = :DESCENDANT_SELECTOR }
22
+ | SLASH s_0toN { result = :CHILD_SELECTOR }
23
+ ;
24
+ simple_selector
25
+ : element_name hcap_0toN {
26
+ result = if val[1].nil?
27
+ val.first
28
+ else
29
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
30
+ end
31
+ }
32
+ | element_name negation {
33
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
34
+ }
35
+ | function
36
+ | function attrib {
37
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
38
+ }
39
+ | hcap_1toN {
40
+ result = Node.new(:CONDITIONAL_SELECTOR,
41
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
42
+ )
43
+ }
44
+ ;
45
+ simple_selector_1toN
46
+ : simple_selector combinator simple_selector_1toN {
47
+ result = Node.new(val[1], [val.first, val.last])
48
+ }
49
+ | simple_selector
50
+ ;
51
+ class
52
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
53
+ ;
54
+ element_name
55
+ : IDENT { result = Node.new(:ELEMENT_NAME, val) }
56
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
57
+ ;
58
+ attrib
59
+ : '[' s_0toN IDENT s_0toN attrib_val_0or1 ']' {
60
+ result = Node.new(:ATTRIBUTE_CONDITION,
61
+ [Node.new(:ELEMENT_NAME, [val[2]])] + (val[4] || [])
62
+ )
63
+ }
64
+ | '[' s_0toN function s_0toN attrib_val_0or1 ']' {
65
+ result = Node.new(:ATTRIBUTE_CONDITION,
66
+ [val[2]] + (val[4] || [])
67
+ )
68
+ }
69
+ | '[' s_0toN NUMBER s_0toN ']' {
70
+ # Non standard, but hpricot supports it.
71
+ result = Node.new(:PSEUDO_CLASS,
72
+ [Node.new(:FUNCTION, ['nth-child(', val[2]])]
73
+ )
74
+ }
75
+ ;
76
+ function
77
+ : FUNCTION ')' {
78
+ result = Node.new(:FUNCTION, [val.first.strip])
79
+ }
80
+ | FUNCTION expr ')' {
81
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
82
+ }
83
+ | FUNCTION an_plus_b ')' {
84
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
85
+ }
86
+ | NOT expr ')' {
87
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
88
+ }
89
+ ;
90
+ expr
91
+ : NUMBER
92
+ | STRING
93
+ ;
94
+ an_plus_b
95
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
96
+ {
97
+ if val[1] == 'n'
98
+ result = Node.new(:AN_PLUS_B, val)
99
+ else
100
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
101
+ end
102
+ }
103
+ | IDENT PLUS NUMBER { # n+3, -n+3
104
+ if val[0] == 'n'
105
+ val.unshift("1")
106
+ result = Node.new(:AN_PLUS_B, val)
107
+ elsif val[0] == '-n'
108
+ val[0] = 'n'
109
+ val.unshift("-1")
110
+ result = Node.new(:AN_PLUS_B, val)
111
+ else
112
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
113
+ end
114
+ }
115
+ | NUMBER IDENT # 5n, -5n
116
+ {
117
+ if val[1] == 'n'
118
+ val << "+"
119
+ val << "0"
120
+ result = Node.new(:AN_PLUS_B, val)
121
+ else
122
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
123
+ end
124
+ }
125
+ | IDENT # even, odd
126
+ {
127
+ if val[0] == 'even'
128
+ val = ["2","n","+","0"]
129
+ result = Node.new(:AN_PLUS_B, val)
130
+ elsif val[0] == 'odd'
131
+ val = ["2","n","+","1"]
132
+ result = Node.new(:AN_PLUS_B, val)
133
+ else
134
+ raise Racc::ParseError, "parse error on IDENT '#{val[0]}'"
135
+ end
136
+ }
137
+ ;
138
+ pseudo
139
+ : ':' function {
140
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
141
+ }
142
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
143
+ ;
144
+ hcap_0toN
145
+ : hcap_1toN
146
+ |
147
+ ;
148
+ hcap_1toN
149
+ : attribute_id hcap_1toN {
150
+ result = Node.new(:COMBINATOR, val)
151
+ }
152
+ | class hcap_1toN {
153
+ result = Node.new(:COMBINATOR, val)
154
+ }
155
+ | attrib hcap_1toN {
156
+ result = Node.new(:COMBINATOR, val)
157
+ }
158
+ | pseudo hcap_1toN {
159
+ result = Node.new(:COMBINATOR, val)
160
+ }
161
+ | attribute_id
162
+ | class
163
+ | attrib
164
+ | pseudo
165
+ ;
166
+ attribute_id
167
+ : HASH { result = Node.new(:ID, val) }
168
+ ;
169
+ attrib_val_0or1
170
+ : eql_incl_dash s_0toN IDENT s_0toN { result = [val.first, val[2]] }
171
+ | eql_incl_dash s_0toN STRING s_0toN { result = [val.first, val[2]] }
172
+ |
173
+ ;
174
+ eql_incl_dash
175
+ : '='
176
+ | PREFIXMATCH
177
+ | SUFFIXMATCH
178
+ | SUBSTRINGMATCH
179
+ | NOT_EQUAL
180
+ | INCLUDES
181
+ | DASHMATCH
182
+ ;
183
+ negation
184
+ : NOT s_0toN negation_arg s_0toN ')' {
185
+ result = Node.new(:NOT, [val[2]])
186
+ }
187
+ ;
188
+ negation_arg
189
+ : hcap_1toN
190
+ ;
191
+ s_0toN
192
+ : S s_0toN
193
+ |
194
+ ;
195
+ end
196
+
197
+ ---- header
198
+
@@ -0,0 +1,9 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Tokenizer < GeneratedTokenizer
4
+ def scan(str)
5
+ scan_evaluate(str)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,63 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class GeneratedTokenizer
4
+
5
+ macro
6
+ nl \n|\r\n|\r|\f
7
+ w [\s\r\n\f]*
8
+ nonascii [^\\\\0-\\\\177]
9
+ num -?([0-9]+|[0-9]*\.[0-9]+)
10
+ unicode \\\\\\\\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?
11
+
12
+ escape {unicode}|\\\\\\\[^\n\r\f0-9a-f]
13
+ nmchar [_a-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_a-z]|{nonascii}|{escape}
15
+ ident [-]?({nmstart})({nmchar})*
16
+ name ({nmchar})+
17
+ string1 "([^\n\r\f"]|\\{nl}|{nonascii}|{escape})*"
18
+ string2 '([^\n\r\f']|\\{nl}|{nonascii}|{escape})*'
19
+ string {string1}|{string2}
20
+ invalid1 \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*
21
+ invalid2 \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*
22
+ invalid {invalid1}|{invalid2}
23
+ Comment \/\*(.|[\r\n])*?\*\/
24
+
25
+ rule
26
+
27
+ # [:state] pattern [actions]
28
+
29
+ ~= { [:INCLUDES, text] }
30
+ \|= { [:DASHMATCH, text] }
31
+ \^= { [:PREFIXMATCH, text] }
32
+ \$= { [:SUFFIXMATCH, text] }
33
+ \*= { [:SUBSTRINGMATCH, text] }
34
+ != { [:NOT_EQUAL, text] }
35
+ {ident}\(\s* { [:FUNCTION, text] }
36
+ @{ident} { [:IDENT, text] }
37
+ {ident} { [:IDENT, text] }
38
+ {num} { [:NUMBER, text] }
39
+ \#{name} { [:HASH, text] }
40
+ {w}\+ { [:PLUS, text] }
41
+ {w}> { [:GREATER, text] }
42
+ {w}, { [:COMMA, text] }
43
+ {w}~ { [:TILDE, text] }
44
+ \:not\( { [:NOT, text] }
45
+ @{ident} { [:ATKEYWORD, text] }
46
+ {num}% { [:PERCENTAGE, text] }
47
+ {num}{ident} { [:DIMENSION, text] }
48
+ <!-- { [:CDO, text] }
49
+ --> { [:CDC, text] }
50
+ {w}\/\/ { [:DOUBLESLASH, text] }
51
+ {w}\/ { [:SLASH, text] }
52
+
53
+ U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
54
+
55
+ {Comment} /* ignore comments */
56
+ [\s\t\r\n\f]+ { [:S, text] }
57
+ [\.*:\[\]=\)] { [text, text] }
58
+ {string} { [:STRING, text] }
59
+ {invalid} { [:INVALID, text] }
60
+ . { [text, text] }
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,165 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class XPathVisitor
4
+ def visit_function node
5
+ # note that nth-child and nth-last-child are preprocessed in css/node.rb.
6
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
+ return self.send(msg, node) if self.respond_to?(msg)
8
+
9
+ case node.value.first
10
+ when /^text\(/
11
+ 'child::text()'
12
+ when /^self\(/
13
+ "self::#{node.value[1]}"
14
+ when /^(eq|nth|nth-of-type|nth-child)\(/
15
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
16
+ an_plus_b(node.value[1])
17
+ else
18
+ "position() = " + node.value[1]
19
+ end
20
+ when /^(first|first-of-type)\(/
21
+ "position() = 1"
22
+ when /^(last|last-of-type)\(/
23
+ "position() = last()"
24
+ when /^(nth-last-child|nth-last-of-type)\(/
25
+ "position() = last() - #{node.value[1]}"
26
+ when /^contains\(/
27
+ "contains(., #{node.value[1]})"
28
+ when /^gt\(/
29
+ "position() > #{node.value[1]}"
30
+ when /^only-child\(/
31
+ "last() = 1"
32
+ else
33
+ node.value.first + ')'
34
+ end
35
+ end
36
+
37
+ def visit_not node
38
+ 'not(' + node.value.first.accept(self) + ')'
39
+ end
40
+
41
+ def visit_preceding_selector node
42
+ node.value.last.accept(self) +
43
+ '[preceding-sibling::' +
44
+ node.value.first.accept(self) +
45
+ ']'
46
+ end
47
+
48
+ def visit_direct_adjacent_selector node
49
+ node.value.first.accept(self) +
50
+ "/following-sibling::*[1]/self::" +
51
+ node.value.last.accept(self)
52
+ end
53
+
54
+ def visit_id node
55
+ node.value.first =~ /^#(.*)$/
56
+ "@id = '#{$1}'"
57
+ end
58
+
59
+ def visit_attribute_condition node
60
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
61
+ ''
62
+ else
63
+ '@'
64
+ end
65
+ attribute += node.value.first.accept(self)
66
+
67
+ # Support non-standard css
68
+ attribute.gsub!(/^@@/, '@')
69
+
70
+ return attribute unless node.value.length == 3
71
+
72
+ value = node.value.last
73
+ value = "'#{value}'" if value !~ /^['"]/
74
+
75
+ case node.value[1]
76
+ when '*='
77
+ "contains(#{attribute}, #{value})"
78
+ when '^='
79
+ "starts-with(#{attribute}, #{value})"
80
+ when '|='
81
+ "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
82
+ when '~='
83
+ "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
84
+ when '$='
85
+ "substring(#{attribute}, string-length(#{attribute}) - " +
86
+ "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
87
+ else
88
+ attribute + " #{node.value[1]} " + "#{value}"
89
+ end
90
+ end
91
+
92
+ def visit_pseudo_class node
93
+ if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
94
+ node.value.first.accept(self)
95
+ else
96
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
97
+ return self.send(msg, node) if self.respond_to?(msg)
98
+
99
+ case node.value.first
100
+ when "first" then "position() = 1"
101
+ when "last" then "position() = last()"
102
+ when "first-of-type" then "position() = 1"
103
+ when "last-of-type" then "position() = last()"
104
+ when "only-of-type" then "last() = 1"
105
+ when "empty" then "not(node())"
106
+ when "parent" then "node()"
107
+ when "root" then "not(parent::*)"
108
+ else
109
+ '1 = 1'
110
+ end
111
+ end
112
+ end
113
+
114
+ def visit_class_condition node
115
+ "contains(concat(' ', @class, ' '), ' #{node.value.first} ')"
116
+ end
117
+
118
+ def visit_combinator node
119
+ node.value.first.accept(self) + ' and ' +
120
+ node.value.last.accept(self)
121
+ end
122
+
123
+ def visit_conditional_selector node
124
+ node.value.first.accept(self) + '[' +
125
+ node.value.last.accept(self) + ']'
126
+ end
127
+
128
+ def visit_descendant_selector node
129
+ node.value.first.accept(self) +
130
+ '//' +
131
+ node.value.last.accept(self)
132
+ end
133
+
134
+ def visit_child_selector node
135
+ node.value.first.accept(self) +
136
+ '/' +
137
+ node.value.last.accept(self)
138
+ end
139
+
140
+ def visit_element_name node
141
+ node.value.first
142
+ end
143
+
144
+ def accept node
145
+ node.accept(self)
146
+ end
147
+
148
+ private
149
+ def an_plus_b node
150
+ raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
151
+
152
+ a = node.value[0].to_i
153
+ b = node.value[3].to_i
154
+
155
+ if (b == 0)
156
+ return "(position() mod #{a}) = 0"
157
+ else
158
+ compare = (a < 0) ? "<=" : ">="
159
+ return "(position() #{compare} #{b}) and (((position()-#{b}) mod #{a.abs}) = 0)"
160
+ end
161
+ end
162
+
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,6 @@
1
+ require 'nokogiri/css/node'
2
+ require 'nokogiri/css/xpath_visitor'
3
+ require 'nokogiri/css/generated_tokenizer'
4
+ require 'nokogiri/css/generated_parser'
5
+ require 'nokogiri/css/tokenizer'
6
+ require 'nokogiri/css/parser'
@@ -0,0 +1,58 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ module Hpricot
4
+ module Node
5
+ def search *paths
6
+ ns = paths.last.is_a?(Hash) ? paths.pop : {}
7
+ converted = paths.map { |path|
8
+ convert_to_xpath(path)
9
+ }.flatten.uniq
10
+
11
+ namespaces = document.xml? ? document.namespaces.merge(ns) : ns
12
+ super(*converted + [namespaces])
13
+ end
14
+ def /(path); search(path) end
15
+
16
+ def xpath *args
17
+ return super if args.length > 0
18
+ path
19
+ end
20
+
21
+ def raw_attributes; self end
22
+
23
+ def get_element_by_id element_id
24
+ search("//*[@id='#{element_id}']").first
25
+ end
26
+
27
+ def get_elements_by_tag_name tag
28
+ search("//#{tag}")
29
+ end
30
+
31
+ def convert_to_xpath(rule)
32
+ rule = rule.to_s
33
+ case rule
34
+ when %r{^//}
35
+ [".#{rule}"]
36
+ when %r{^/}
37
+ [rule]
38
+ when %r{^.//}
39
+ [rule]
40
+ else
41
+ ctx = CSS::Parser.parse(rule)
42
+ visitor = CSS::XPathVisitor.new
43
+ visitor.extend(Hpricot::XPathVisitor)
44
+ ctx.map { |ast| './/' + visitor.accept(ast.preprocess!) }
45
+ end
46
+ end
47
+
48
+ def target
49
+ name
50
+ end
51
+
52
+ def to_original_html
53
+ to_html
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,14 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ module Hpricot
4
+ module NodeSet
5
+ def filter rule
6
+ ctx = CSS::Parser.parse(rule.to_s)
7
+ visitor = CSS::XPathVisitor.new
8
+ visitor.extend(Hpricot::XPathVisitor)
9
+ search('.//self::' + visitor.accept(ctx.first))
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,17 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ module Hpricot
4
+ ####
5
+ # This mixin does custom adjustments to deal with _whyML
6
+ module XPathVisitor
7
+ def visit_attribute_condition node
8
+ unless (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /^@/)
9
+ node.value.first.value[0] = "child::" +
10
+ node.value.first.value[0]
11
+ end
12
+ super(node).gsub(/child::text\(\)/, 'normalize-space(child::text())')
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,3 @@
1
+ require 'nokogiri/decorators/hpricot/node'
2
+ require 'nokogiri/decorators/hpricot/node_set'
3
+ require 'nokogiri/decorators/hpricot/xpath_visitor'
@@ -0,0 +1 @@
1
+ require 'nokogiri/decorators/hpricot'