tenderlove-nokogiri 0.0.0.20081001111445

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. data/History.txt +6 -0
  2. data/Manifest.txt +105 -0
  3. data/README.txt +51 -0
  4. data/Rakefile +70 -0
  5. data/ext/nokogiri/extconf.rb +24 -0
  6. data/ext/nokogiri/html_document.c +85 -0
  7. data/ext/nokogiri/html_document.h +10 -0
  8. data/ext/nokogiri/html_sax_parser.c +32 -0
  9. data/ext/nokogiri/html_sax_parser.h +11 -0
  10. data/ext/nokogiri/native.c +35 -0
  11. data/ext/nokogiri/native.h +32 -0
  12. data/ext/nokogiri/xml_cdata.c +36 -0
  13. data/ext/nokogiri/xml_cdata.h +9 -0
  14. data/ext/nokogiri/xml_document.c +159 -0
  15. data/ext/nokogiri/xml_document.h +10 -0
  16. data/ext/nokogiri/xml_node.c +573 -0
  17. data/ext/nokogiri/xml_node.h +13 -0
  18. data/ext/nokogiri/xml_node_set.c +90 -0
  19. data/ext/nokogiri/xml_node_set.h +9 -0
  20. data/ext/nokogiri/xml_reader.c +420 -0
  21. data/ext/nokogiri/xml_reader.h +10 -0
  22. data/ext/nokogiri/xml_sax_parser.c +161 -0
  23. data/ext/nokogiri/xml_sax_parser.h +10 -0
  24. data/ext/nokogiri/xml_text.c +25 -0
  25. data/ext/nokogiri/xml_text.h +9 -0
  26. data/ext/nokogiri/xml_xpath.c +39 -0
  27. data/ext/nokogiri/xml_xpath.h +11 -0
  28. data/ext/nokogiri/xml_xpath_context.c +69 -0
  29. data/ext/nokogiri/xml_xpath_context.h +9 -0
  30. data/ext/nokogiri/xslt_stylesheet.c +83 -0
  31. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  32. data/lib/nokogiri.rb +45 -0
  33. data/lib/nokogiri/css.rb +6 -0
  34. data/lib/nokogiri/css/node.rb +95 -0
  35. data/lib/nokogiri/css/parser.rb +24 -0
  36. data/lib/nokogiri/css/parser.y +198 -0
  37. data/lib/nokogiri/css/tokenizer.rb +9 -0
  38. data/lib/nokogiri/css/tokenizer.rex +63 -0
  39. data/lib/nokogiri/css/xpath_visitor.rb +153 -0
  40. data/lib/nokogiri/decorators.rb +1 -0
  41. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  42. data/lib/nokogiri/decorators/hpricot/node.rb +47 -0
  43. data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
  44. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +13 -0
  45. data/lib/nokogiri/hpricot.rb +46 -0
  46. data/lib/nokogiri/html.rb +64 -0
  47. data/lib/nokogiri/html/builder.rb +9 -0
  48. data/lib/nokogiri/html/document.rb +9 -0
  49. data/lib/nokogiri/html/sax/parser.rb +21 -0
  50. data/lib/nokogiri/version.rb +3 -0
  51. data/lib/nokogiri/xml.rb +29 -0
  52. data/lib/nokogiri/xml/after_handler.rb +18 -0
  53. data/lib/nokogiri/xml/before_handler.rb +32 -0
  54. data/lib/nokogiri/xml/builder.rb +79 -0
  55. data/lib/nokogiri/xml/document.rb +22 -0
  56. data/lib/nokogiri/xml/node.rb +162 -0
  57. data/lib/nokogiri/xml/node_set.rb +136 -0
  58. data/lib/nokogiri/xml/reader.rb +14 -0
  59. data/lib/nokogiri/xml/sax.rb +9 -0
  60. data/lib/nokogiri/xml/sax/document.rb +59 -0
  61. data/lib/nokogiri/xml/sax/parser.rb +33 -0
  62. data/lib/nokogiri/xml/text.rb +6 -0
  63. data/lib/nokogiri/xml/xpath.rb +6 -0
  64. data/lib/nokogiri/xslt.rb +11 -0
  65. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  66. data/nokogiri.gemspec +33 -0
  67. data/test/css/test_nthiness.rb +141 -0
  68. data/test/css/test_parser.rb +214 -0
  69. data/test/css/test_tokenizer.rb +162 -0
  70. data/test/files/staff.xml +57 -0
  71. data/test/files/staff.xslt +32 -0
  72. data/test/files/tlm.html +850 -0
  73. data/test/helper.rb +70 -0
  74. data/test/hpricot/files/basic.xhtml +17 -0
  75. data/test/hpricot/files/boingboing.html +2266 -0
  76. data/test/hpricot/files/cy0.html +3653 -0
  77. data/test/hpricot/files/immob.html +400 -0
  78. data/test/hpricot/files/pace_application.html +1320 -0
  79. data/test/hpricot/files/tenderlove.html +16 -0
  80. data/test/hpricot/files/uswebgen.html +220 -0
  81. data/test/hpricot/files/utf8.html +1054 -0
  82. data/test/hpricot/files/week9.html +1723 -0
  83. data/test/hpricot/files/why.xml +19 -0
  84. data/test/hpricot/load_files.rb +7 -0
  85. data/test/hpricot/test_alter.rb +67 -0
  86. data/test/hpricot/test_builder.rb +27 -0
  87. data/test/hpricot/test_parser.rb +412 -0
  88. data/test/hpricot/test_paths.rb +15 -0
  89. data/test/hpricot/test_preserved.rb +72 -0
  90. data/test/hpricot/test_xml.rb +26 -0
  91. data/test/html/sax/test_parser.rb +27 -0
  92. data/test/html/test_builder.rb +78 -0
  93. data/test/html/test_document.rb +22 -0
  94. data/test/test_convert_xpath.rb +173 -0
  95. data/test/test_nokogiri.rb +36 -0
  96. data/test/test_reader.rb +222 -0
  97. data/test/test_xslt_transforms.rb +29 -0
  98. data/test/xml/sax/test_parser.rb +93 -0
  99. data/test/xml/test_builder.rb +16 -0
  100. data/test/xml/test_document.rb +141 -0
  101. data/test/xml/test_node.rb +148 -0
  102. data/test/xml/test_node_set.rb +54 -0
  103. data/test/xml/test_text.rb +13 -0
  104. metadata +191 -0
@@ -0,0 +1,45 @@
1
+ require 'nokogiri/version'
2
+ require 'nokogiri/xml'
3
+ require 'nokogiri/xslt'
4
+ require 'nokogiri/html'
5
+ require 'nokogiri/decorators'
6
+ require 'nokogiri/css'
7
+ require 'nokogiri/html/builder'
8
+ require 'nokogiri/hpricot'
9
+ require 'nokogiri/native'
10
+
11
+ module Nokogiri
12
+ class << self
13
+ def parse(string, url = nil, encoding = nil, options = 32)
14
+ doc =
15
+ if string =~ /^\s*<[^Hh>]*html/i # Probably html
16
+ Nokogiri::HTML.parse(string, url, encoding, options)
17
+ else
18
+ Nokogiri::XML.parse(string, url, encoding, options)
19
+ end
20
+ yield doc if block_given?
21
+ doc
22
+ end
23
+
24
+ def XML(string)
25
+ Nokogiri::XML.parse(string)
26
+ end
27
+
28
+ def make(input = nil, opts = {}, &blk)
29
+ if input
30
+ Nokogiri::XML::Node.new_from_str(input)
31
+ else
32
+ Nokogiri(&blk)
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ def Nokogiri(*args, &block)
39
+ if block_given?
40
+ builder = Nokogiri::HTML::Builder.new(&block)
41
+ return builder.doc
42
+ else
43
+ Nokogiri::HTML.parse(*args)
44
+ end
45
+ end
@@ -0,0 +1,6 @@
1
+ require 'nokogiri/css/node'
2
+ require 'nokogiri/css/xpath_visitor'
3
+ require 'nokogiri/css/generated_tokenizer'
4
+ require 'nokogiri/css/generated_parser'
5
+ require 'nokogiri/css/tokenizer'
6
+ require 'nokogiri/css/parser'
@@ -0,0 +1,95 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Node
4
+ attr_accessor :type, :value
5
+ def initialize type, value
6
+ @type = type
7
+ @value = value
8
+ end
9
+
10
+ def accept visitor
11
+ visitor.send(:"visit_#{type.to_s.downcase}", self)
12
+ end
13
+
14
+ def to_xpath prefix = '//', preprocess = true
15
+ self.preprocess! if preprocess
16
+ prefix + XPathVisitor.new.accept(self)
17
+ end
18
+
19
+ def preprocess!
20
+ ### Deal with nth-child
21
+ matches = find_by_type(
22
+ [:CONDITIONAL_SELECTOR,
23
+ [:ELEMENT_NAME],
24
+ [:PSEUDO_CLASS,
25
+ [:FUNCTION]
26
+ ]
27
+ ]
28
+ )
29
+ matches.each do |match|
30
+ if match.value[1].value[0].value[0] =~ /^nth-child/
31
+ tag_name = match.value[0].value.first
32
+ match.value[0].value = ['*']
33
+ match.value[1] = Node.new(:COMBINATOR, [
34
+ match.value[1].value[0],
35
+ Node.new(:FUNCTION, ['self(', tag_name])
36
+ ])
37
+ end
38
+ if match.value[1].value[0].value[0] =~ /^nth-last-child/
39
+ tag_name = match.value[0].value.first
40
+ match.value[0].value = ['*']
41
+ match.value[1] = Node.new(:COMBINATOR, [
42
+ match.value[1].value[0],
43
+ Node.new(:FUNCTION, ['self(', tag_name])
44
+ ])
45
+ end
46
+ end
47
+
48
+ ### Deal with first-child, last-child
49
+ matches = find_by_type(
50
+ [:CONDITIONAL_SELECTOR,
51
+ [:ELEMENT_NAME], [:PSEUDO_CLASS]
52
+ ])
53
+ matches.each do |match|
54
+ if ['first-child', 'last-child'].include?(match.value[1].value.first)
55
+ which = match.value[1].value.first.gsub(/-\w*$/, '')
56
+ tag_name = match.value[0].value.first
57
+ match.value[0].value = ['*']
58
+ match.value[1] = Node.new(:COMBINATOR, [
59
+ Node.new(:FUNCTION, ["#{which}("]),
60
+ Node.new(:FUNCTION, ['self(', tag_name])
61
+ ])
62
+ elsif 'only-child' == match.value[1].value.first
63
+ tag_name = match.value[0].value.first
64
+ match.value[0].value = ['*']
65
+ match.value[1] = Node.new(:COMBINATOR, [
66
+ Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
67
+ Node.new(:FUNCTION, ['self(', tag_name])
68
+ ])
69
+ end
70
+ end
71
+
72
+ self
73
+ end
74
+
75
+ def find_by_type(types)
76
+ matches = []
77
+ matches << self if to_type == types
78
+ @value.each do |v|
79
+ matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
80
+ end
81
+ matches
82
+ end
83
+
84
+ def to_type
85
+ [@type] + @value.map { |n|
86
+ n.to_type if n.respond_to?(:to_type)
87
+ }.compact
88
+ end
89
+
90
+ def to_a
91
+ [@type] + @value.map { |n| n.to_a }
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,24 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Parser < GeneratedParser
4
+ class << self
5
+ def parse string
6
+ new.parse(string)
7
+ end
8
+ end
9
+
10
+ def initialize
11
+ @tokenizer = Tokenizer.new
12
+ end
13
+
14
+ def parse string
15
+ @tokenizer.scan string
16
+ do_parse
17
+ end
18
+
19
+ def next_token
20
+ @tokenizer.next_token
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,198 @@
1
+ class Nokogiri::CSS::GeneratedParser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA URI CDO CDC NUMBER PERCENTAGE LENGTH EMS EXS ANGLE TIME FREQ
5
+ token IMPORTANT_SYM IMPORT_SYM MEDIA_SYM PAGE_SYM CHARSET_SYM DIMENSION
6
+ token PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL SLASH DOUBLESLASH
7
+ token NOT
8
+
9
+ rule
10
+ selector
11
+ : selector COMMA s_0toN simple_selector_1toN {
12
+ result = [val.first, val.last].flatten
13
+ }
14
+ | simple_selector_1toN { result = val.flatten }
15
+ ;
16
+ combinator
17
+ : PLUS s_0toN { result = :DIRECT_ADJACENT_SELECTOR }
18
+ | GREATER s_0toN { result = :CHILD_SELECTOR }
19
+ | TILDE s_0toN { result = :PRECEDING_SELECTOR }
20
+ | S { result = :DESCENDANT_SELECTOR }
21
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
22
+ | SLASH { result = :CHILD_SELECTOR }
23
+ ;
24
+ simple_selector
25
+ : element_name hcap_0toN {
26
+ result = if val[1].nil?
27
+ val.first
28
+ else
29
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
30
+ end
31
+ }
32
+ | element_name negation {
33
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
34
+ }
35
+ | function
36
+ | function attrib {
37
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
38
+ }
39
+ | hcap_1toN {
40
+ result = Node.new(:CONDITIONAL_SELECTOR,
41
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
42
+ )
43
+ }
44
+ ;
45
+ simple_selector_1toN
46
+ : simple_selector combinator simple_selector_1toN {
47
+ result = Node.new(val[1], [val.first, val.last])
48
+ }
49
+ | simple_selector
50
+ ;
51
+ class
52
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
53
+ ;
54
+ element_name
55
+ : IDENT { result = Node.new(:ELEMENT_NAME, val) }
56
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
57
+ ;
58
+ attrib
59
+ : '[' s_0toN IDENT s_0toN attrib_val_0or1 ']' {
60
+ result = Node.new(:ATTRIBUTE_CONDITION,
61
+ [Node.new(:ELEMENT_NAME, [val[2]])] + (val[4] || [])
62
+ )
63
+ }
64
+ | '[' s_0toN function s_0toN attrib_val_0or1 ']' {
65
+ result = Node.new(:ATTRIBUTE_CONDITION,
66
+ [val[2]] + (val[4] || [])
67
+ )
68
+ }
69
+ | '[' s_0toN NUMBER s_0toN ']' {
70
+ # Non standard, but hpricot supports it.
71
+ result = Node.new(:PSEUDO_CLASS,
72
+ [Node.new(:FUNCTION, ['nth-child(', val[2]])]
73
+ )
74
+ }
75
+ ;
76
+ function
77
+ : FUNCTION ')' {
78
+ result = Node.new(:FUNCTION, [val.first.strip])
79
+ }
80
+ | FUNCTION expr ')' {
81
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
82
+ }
83
+ | FUNCTION an_plus_b ')' {
84
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
85
+ }
86
+ | NOT expr ')' {
87
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
88
+ }
89
+ ;
90
+ expr
91
+ : NUMBER
92
+ | STRING
93
+ ;
94
+ an_plus_b
95
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
96
+ {
97
+ if val[1] == 'n'
98
+ result = Node.new(:AN_PLUS_B, val)
99
+ else
100
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
101
+ end
102
+ }
103
+ | IDENT PLUS NUMBER { # n+3, -n+3
104
+ if val[0] == 'n'
105
+ val.unshift("1")
106
+ result = Node.new(:AN_PLUS_B, val)
107
+ elsif val[0] == '-n'
108
+ val[0] = 'n'
109
+ val.unshift("-1")
110
+ result = Node.new(:AN_PLUS_B, val)
111
+ else
112
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
113
+ end
114
+ }
115
+ | NUMBER IDENT # 5n, -5n
116
+ {
117
+ if val[1] == 'n'
118
+ val << "+"
119
+ val << "0"
120
+ result = Node.new(:AN_PLUS_B, val)
121
+ else
122
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
123
+ end
124
+ }
125
+ | IDENT # even, odd
126
+ {
127
+ if val[0] == 'even'
128
+ val = ["2","n","+","0"]
129
+ result = Node.new(:AN_PLUS_B, val)
130
+ elsif val[0] == 'odd'
131
+ val = ["2","n","+","1"]
132
+ result = Node.new(:AN_PLUS_B, val)
133
+ else
134
+ raise Racc::ParseError, "parse error on IDENT '#{val[0]}'"
135
+ end
136
+ }
137
+ ;
138
+ pseudo
139
+ : ':' function {
140
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
141
+ }
142
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
143
+ ;
144
+ hcap_0toN
145
+ : hcap_1toN
146
+ |
147
+ ;
148
+ hcap_1toN
149
+ : attribute_id hcap_1toN {
150
+ result = Node.new(:COMBINATOR, val)
151
+ }
152
+ | class hcap_1toN {
153
+ result = Node.new(:COMBINATOR, val)
154
+ }
155
+ | attrib hcap_1toN {
156
+ result = Node.new(:COMBINATOR, val)
157
+ }
158
+ | pseudo hcap_1toN {
159
+ result = Node.new(:COMBINATOR, val)
160
+ }
161
+ | attribute_id
162
+ | class
163
+ | attrib
164
+ | pseudo
165
+ ;
166
+ attribute_id
167
+ : HASH { result = Node.new(:ID, val) }
168
+ ;
169
+ attrib_val_0or1
170
+ : eql_incl_dash s_0toN IDENT s_0toN { result = [val.first, val[2]] }
171
+ | eql_incl_dash s_0toN STRING s_0toN { result = [val.first, val[2]] }
172
+ |
173
+ ;
174
+ eql_incl_dash
175
+ : '='
176
+ | PREFIXMATCH
177
+ | SUFFIXMATCH
178
+ | SUBSTRINGMATCH
179
+ | NOT_EQUAL
180
+ | INCLUDES
181
+ | DASHMATCH
182
+ ;
183
+ negation
184
+ : NOT s_0toN negation_arg s_0toN ')' {
185
+ result = Node.new(:NOT, [val[2]])
186
+ }
187
+ ;
188
+ negation_arg
189
+ : hcap_1toN
190
+ ;
191
+ s_0toN
192
+ : S s_0toN
193
+ |
194
+ ;
195
+ end
196
+
197
+ ---- header
198
+
@@ -0,0 +1,9 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Tokenizer < GeneratedTokenizer
4
+ def scan(str)
5
+ scan_evaluate(str)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,63 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class GeneratedTokenizer
4
+
5
+ macro
6
+ nl \n|\r\n|\r|\f
7
+ w [\s\r\n\f]*
8
+ nonascii [^\\\\0-\\\\177]
9
+ num -?([0-9]+|[0-9]*\.[0-9]+)
10
+ unicode \\\\\\\\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?
11
+
12
+ escape {unicode}|\\\\\\\[^\n\r\f0-9a-f]
13
+ nmchar [_a-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_a-z]|{nonascii}|{escape}
15
+ ident [-]?({nmstart})({nmchar})*
16
+ name ({nmchar})+
17
+ string1 "([^\n\r\f"]|\\{nl}|{nonascii}|{escape})*"
18
+ string2 '([^\n\r\f']|\\{nl}|{nonascii}|{escape})*'
19
+ string {string1}|{string2}
20
+ invalid1 \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*
21
+ invalid2 \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*
22
+ invalid {invalid1}|{invalid2}
23
+ Comment \/\*(.|[\r\n])*?\*\/
24
+
25
+ rule
26
+
27
+ # [:state] pattern [actions]
28
+
29
+ ~= { [:INCLUDES, text] }
30
+ \|= { [:DASHMATCH, text] }
31
+ \^= { [:PREFIXMATCH, text] }
32
+ \$= { [:SUFFIXMATCH, text] }
33
+ \*= { [:SUBSTRINGMATCH, text] }
34
+ != { [:NOT_EQUAL, text] }
35
+ {ident}\(\s* { [:FUNCTION, text] }
36
+ @{ident} { [:IDENT, text] }
37
+ {ident} { [:IDENT, text] }
38
+ {num} { [:NUMBER, text] }
39
+ \#{name} { [:HASH, text] }
40
+ {w}\+ { [:PLUS, text] }
41
+ {w}> { [:GREATER, text] }
42
+ {w}, { [:COMMA, text] }
43
+ {w}~ { [:TILDE, text] }
44
+ \:not\( { [:NOT, text] }
45
+ @{ident} { [:ATKEYWORD, text] }
46
+ {num}% { [:PERCENTAGE, text] }
47
+ {num}{ident} { [:DIMENSION, text] }
48
+ <!-- { [:CDO, text] }
49
+ --> { [:CDC, text] }
50
+ \/\/ { [:DOUBLESLASH, text] }
51
+ \/ { [:SLASH, text] }
52
+
53
+ U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
54
+
55
+ {Comment} /* ignore comments */
56
+ [\s\t\r\n\f]+ { [:S, text] }
57
+ [\.*:\[\]=\)] { [text, text] }
58
+ {string} { [:STRING, text] }
59
+ {invalid} { [:INVALID, text] }
60
+ . { [text, text] }
61
+ end
62
+ end
63
+ end