tenderlove-nokogiri 0.0.0.20081001111445
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest.txt +105 -0
- data/README.txt +51 -0
- data/Rakefile +70 -0
- data/ext/nokogiri/extconf.rb +24 -0
- data/ext/nokogiri/html_document.c +85 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_sax_parser.c +32 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/native.c +35 -0
- data/ext/nokogiri/native.h +32 -0
- data/ext/nokogiri/xml_cdata.c +36 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_document.c +159 -0
- data/ext/nokogiri/xml_document.h +10 -0
- data/ext/nokogiri/xml_node.c +573 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +90 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_reader.c +420 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_sax_parser.c +161 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_text.c +25 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +39 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +69 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +83 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +45 -0
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/css/node.rb +95 -0
- data/lib/nokogiri/css/parser.rb +24 -0
- data/lib/nokogiri/css/parser.y +198 -0
- data/lib/nokogiri/css/tokenizer.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rex +63 -0
- data/lib/nokogiri/css/xpath_visitor.rb +153 -0
- data/lib/nokogiri/decorators.rb +1 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +47 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +13 -0
- data/lib/nokogiri/hpricot.rb +46 -0
- data/lib/nokogiri/html.rb +64 -0
- data/lib/nokogiri/html/builder.rb +9 -0
- data/lib/nokogiri/html/document.rb +9 -0
- data/lib/nokogiri/html/sax/parser.rb +21 -0
- data/lib/nokogiri/version.rb +3 -0
- data/lib/nokogiri/xml.rb +29 -0
- data/lib/nokogiri/xml/after_handler.rb +18 -0
- data/lib/nokogiri/xml/before_handler.rb +32 -0
- data/lib/nokogiri/xml/builder.rb +79 -0
- data/lib/nokogiri/xml/document.rb +22 -0
- data/lib/nokogiri/xml/node.rb +162 -0
- data/lib/nokogiri/xml/node_set.rb +136 -0
- data/lib/nokogiri/xml/reader.rb +14 -0
- data/lib/nokogiri/xml/sax.rb +9 -0
- data/lib/nokogiri/xml/sax/document.rb +59 -0
- data/lib/nokogiri/xml/sax/parser.rb +33 -0
- data/lib/nokogiri/xml/text.rb +6 -0
- data/lib/nokogiri/xml/xpath.rb +6 -0
- data/lib/nokogiri/xslt.rb +11 -0
- data/lib/nokogiri/xslt/stylesheet.rb +6 -0
- data/nokogiri.gemspec +33 -0
- data/test/css/test_nthiness.rb +141 -0
- data/test/css/test_parser.rb +214 -0
- data/test/css/test_tokenizer.rb +162 -0
- data/test/files/staff.xml +57 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +70 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +7 -0
- data/test/hpricot/test_alter.rb +67 -0
- data/test/hpricot/test_builder.rb +27 -0
- data/test/hpricot/test_parser.rb +412 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +72 -0
- data/test/hpricot/test_xml.rb +26 -0
- data/test/html/sax/test_parser.rb +27 -0
- data/test/html/test_builder.rb +78 -0
- data/test/html/test_document.rb +22 -0
- data/test/test_convert_xpath.rb +173 -0
- data/test/test_nokogiri.rb +36 -0
- data/test/test_reader.rb +222 -0
- data/test/test_xslt_transforms.rb +29 -0
- data/test/xml/sax/test_parser.rb +93 -0
- data/test/xml/test_builder.rb +16 -0
- data/test/xml/test_document.rb +141 -0
- data/test/xml/test_node.rb +148 -0
- data/test/xml/test_node_set.rb +54 -0
- data/test/xml/test_text.rb +13 -0
- metadata +191 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class BeforeHandler < Nokogiri::XML::SAX::Document # :nodoc:
|
4
|
+
def initialize node, original_html
|
5
|
+
@original_html = original_html
|
6
|
+
@node = node
|
7
|
+
@stack = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def start_element name, attrs = []
|
11
|
+
return unless @original_html =~ /<#{name}/i
|
12
|
+
node = Node.new(name)
|
13
|
+
Hash[*attrs].each do |k,v|
|
14
|
+
node[k] = v
|
15
|
+
end
|
16
|
+
node.parent = @stack.last if @stack.length != 0
|
17
|
+
@stack << node
|
18
|
+
end
|
19
|
+
|
20
|
+
def characters string
|
21
|
+
node = @stack.last
|
22
|
+
node.content += string
|
23
|
+
end
|
24
|
+
|
25
|
+
def end_element name
|
26
|
+
return unless @original_html =~ /<#{name}/i
|
27
|
+
@node.add_previous_sibling @stack.last if @stack.length == 1
|
28
|
+
@stack.pop
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class Builder
|
4
|
+
attr_accessor :doc, :parent
|
5
|
+
def initialize(&block)
|
6
|
+
namespace = self.class.name.split('::')
|
7
|
+
namespace[-1] = 'Document'
|
8
|
+
@doc = eval(namespace.join('::')).new
|
9
|
+
@parent = @doc
|
10
|
+
instance_eval(&block)
|
11
|
+
@parent = @doc
|
12
|
+
end
|
13
|
+
|
14
|
+
def text(string)
|
15
|
+
node = Nokogiri::XML::Text.new(string)
|
16
|
+
insert(node)
|
17
|
+
end
|
18
|
+
|
19
|
+
def cdata(string)
|
20
|
+
node = Nokogiri::XML::CData.new(@doc, string)
|
21
|
+
insert(node)
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_xml
|
25
|
+
@doc.to_xml
|
26
|
+
end
|
27
|
+
|
28
|
+
def method_missing(method, *args, &block)
|
29
|
+
node = Nokogiri::XML::Node.new(method.to_s) { |n|
|
30
|
+
if content = args.first
|
31
|
+
if content.is_a?(Hash)
|
32
|
+
content.each { |k,v| n[k.to_s] = v.to_s }
|
33
|
+
else
|
34
|
+
n.content = content
|
35
|
+
end
|
36
|
+
end
|
37
|
+
}
|
38
|
+
insert(node, &block)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def insert(node, &block)
|
43
|
+
node.parent = @parent
|
44
|
+
if block_given?
|
45
|
+
@parent = node
|
46
|
+
instance_eval(&block)
|
47
|
+
@parent = node.parent
|
48
|
+
end
|
49
|
+
NodeBuilder.new(node, self)
|
50
|
+
end
|
51
|
+
|
52
|
+
class NodeBuilder # :nodoc:
|
53
|
+
def initialize(node, doc_builder)
|
54
|
+
@node = node
|
55
|
+
@doc_builder = doc_builder
|
56
|
+
end
|
57
|
+
|
58
|
+
def method_missing(method, *args, &block)
|
59
|
+
case method.to_s
|
60
|
+
when /^(.*)!$/
|
61
|
+
@node['id'] = $1
|
62
|
+
@node.content = args.first if args.first
|
63
|
+
when /^(.*)=/
|
64
|
+
@node[$1] = args.first
|
65
|
+
else
|
66
|
+
@node['class'] =
|
67
|
+
((@node['class'] || '').split(/\s/) + [method.to_s]).join(' ')
|
68
|
+
@node.content = args.first if args.first
|
69
|
+
end
|
70
|
+
if block_given?
|
71
|
+
@doc_builder.parent = @node
|
72
|
+
return @doc_builder.instance_eval(&block)
|
73
|
+
end
|
74
|
+
self
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class Document < Node
|
4
|
+
def decorators
|
5
|
+
@decorators ||= Hash.new { |h,k| h[k] = [] }
|
6
|
+
end
|
7
|
+
|
8
|
+
###
|
9
|
+
# Apply any decorators to +node+
|
10
|
+
def decorate(node)
|
11
|
+
key = node.class.name.split('::').last.downcase
|
12
|
+
decorators[key].each do |klass|
|
13
|
+
node.extend(klass)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_xml
|
18
|
+
serialize
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class Node
|
4
|
+
COMMENT_NODE = 8
|
5
|
+
DOCUMENT_NODE = 9
|
6
|
+
HTML_DOCUMENT_NODE = 13
|
7
|
+
DTD_NODE = 14
|
8
|
+
ELEMENT_DECL = 15
|
9
|
+
ATTRIBUTE_DECL = 16
|
10
|
+
ENTITY_DECL = 17
|
11
|
+
NAMESPACE_DECL = 18
|
12
|
+
XINCLUDE_START = 19
|
13
|
+
XINCLUDE_END = 20
|
14
|
+
DOCB_DOCUMENT_NODE = 21
|
15
|
+
|
16
|
+
def decorate!
|
17
|
+
document.decorate(self) if document
|
18
|
+
end
|
19
|
+
|
20
|
+
def children
|
21
|
+
list = NodeSet.new
|
22
|
+
list.document = document
|
23
|
+
document.decorate(list)
|
24
|
+
|
25
|
+
first = self.child
|
26
|
+
return list unless first # Empty list
|
27
|
+
|
28
|
+
list << first unless first.blank?
|
29
|
+
while first = first.next
|
30
|
+
list << first unless first.blank?
|
31
|
+
end
|
32
|
+
list
|
33
|
+
end
|
34
|
+
|
35
|
+
def find(*paths)
|
36
|
+
find_by_xpath(*(paths.map { |path|
|
37
|
+
path =~ /^(\.\/|\/)/ ? path : CSS::Parser.parse(path).map { |ast|
|
38
|
+
ast.to_xpath
|
39
|
+
}
|
40
|
+
}.flatten.uniq))
|
41
|
+
end
|
42
|
+
alias :search :find
|
43
|
+
alias :/ :find
|
44
|
+
|
45
|
+
def find_by_xpath *paths
|
46
|
+
sets = paths.map { |path|
|
47
|
+
set = XPathContext.new(self).evaluate(path).node_set
|
48
|
+
set.document = document
|
49
|
+
document.decorate(set)
|
50
|
+
set
|
51
|
+
}
|
52
|
+
return sets.first if sets.length == 1
|
53
|
+
|
54
|
+
NodeSet.new do |combined|
|
55
|
+
document.decorate(combined)
|
56
|
+
sets.each do |set|
|
57
|
+
set.each do |node|
|
58
|
+
combined << node
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def find_by_css *rules
|
65
|
+
find_by_xpath(*(rules.map { |rule|
|
66
|
+
CSS::Parser.parse(rule).map { |ast| ast.to_xpath }
|
67
|
+
}.flatten.uniq))
|
68
|
+
end
|
69
|
+
|
70
|
+
def at path
|
71
|
+
search("#{path}").first
|
72
|
+
end
|
73
|
+
|
74
|
+
def [](property)
|
75
|
+
return nil unless key?(property)
|
76
|
+
get(property)
|
77
|
+
end
|
78
|
+
|
79
|
+
def next
|
80
|
+
next_sibling
|
81
|
+
end
|
82
|
+
|
83
|
+
####
|
84
|
+
# Create nodes from +data+ and insert them before this node
|
85
|
+
# (as a sibling).
|
86
|
+
def before data
|
87
|
+
classes = document.class.name.split('::')
|
88
|
+
classes[-1] = 'SAX::Parser'
|
89
|
+
|
90
|
+
parser = eval(classes.join('::')).new(BeforeHandler.new(self, data))
|
91
|
+
parser.parse(data)
|
92
|
+
end
|
93
|
+
|
94
|
+
####
|
95
|
+
# Create nodes from +data+ and insert them after this node
|
96
|
+
# (as a sibling).
|
97
|
+
def after data
|
98
|
+
classes = document.class.name.split('::')
|
99
|
+
classes[-1] = 'SAX::Parser'
|
100
|
+
|
101
|
+
handler = AfterHandler.new(self, data)
|
102
|
+
parser = eval(classes.join('::')).new(handler)
|
103
|
+
parser.parse(data)
|
104
|
+
handler.after_nodes.reverse.each do |sibling|
|
105
|
+
self.add_next_sibling sibling
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def has_attribute?(property)
|
110
|
+
key? property
|
111
|
+
end
|
112
|
+
|
113
|
+
alias :get_attribute :[]
|
114
|
+
def set_attribute(name, value)
|
115
|
+
self[name] = value
|
116
|
+
end
|
117
|
+
|
118
|
+
def remove_attribute name
|
119
|
+
remove(name)
|
120
|
+
end
|
121
|
+
|
122
|
+
def inner_text
|
123
|
+
content
|
124
|
+
end
|
125
|
+
|
126
|
+
####
|
127
|
+
# Set the content to +string+.
|
128
|
+
# If +encode+, encode any special characters first.
|
129
|
+
def content= string, encode = true
|
130
|
+
self.native_content = encode_special_chars(string)
|
131
|
+
end
|
132
|
+
|
133
|
+
def comment?
|
134
|
+
type == COMMENT_NODE
|
135
|
+
end
|
136
|
+
|
137
|
+
def xml?
|
138
|
+
type == DOCUMENT_NODE
|
139
|
+
end
|
140
|
+
|
141
|
+
def html?
|
142
|
+
type == HTML_DOCUMENT_NODE
|
143
|
+
end
|
144
|
+
|
145
|
+
def to_html
|
146
|
+
to_xml
|
147
|
+
end
|
148
|
+
alias :to_s :to_html
|
149
|
+
alias :inner_html :to_html
|
150
|
+
|
151
|
+
def css_path
|
152
|
+
path.split(/\//).map { |part|
|
153
|
+
part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
154
|
+
}.compact.join(' > ')
|
155
|
+
end
|
156
|
+
|
157
|
+
def xpath
|
158
|
+
path
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class NodeSet
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
attr_accessor :document
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
yield self if block_given?
|
10
|
+
end
|
11
|
+
|
12
|
+
def first
|
13
|
+
self[0]
|
14
|
+
end
|
15
|
+
|
16
|
+
def last
|
17
|
+
self[length - 1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def before datum
|
21
|
+
first.before datum
|
22
|
+
end
|
23
|
+
|
24
|
+
def after datum
|
25
|
+
last.after datum
|
26
|
+
end
|
27
|
+
|
28
|
+
def << node
|
29
|
+
push(node)
|
30
|
+
end
|
31
|
+
|
32
|
+
def search path
|
33
|
+
sub_set = NodeSet.new
|
34
|
+
document.decorate(sub_set)
|
35
|
+
each do |node|
|
36
|
+
node.search(path).each do |sub_node|
|
37
|
+
sub_set << sub_node
|
38
|
+
end
|
39
|
+
end
|
40
|
+
sub_set.document = document
|
41
|
+
sub_set
|
42
|
+
end
|
43
|
+
alias :/ :search
|
44
|
+
|
45
|
+
def at path
|
46
|
+
search(path).first
|
47
|
+
end
|
48
|
+
|
49
|
+
def add_class name
|
50
|
+
each do |el|
|
51
|
+
next unless el.respond_to? :get_attribute
|
52
|
+
classes = el.get_attribute('class').to_s.split(" ")
|
53
|
+
el.set_attribute('class', classes.push(name).uniq.join(" "))
|
54
|
+
end
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
def remove_class name = nil
|
59
|
+
each do |el|
|
60
|
+
next unless el.respond_to? :get_attribute
|
61
|
+
if name
|
62
|
+
classes = el.get_attribute('class').to_s.split(" ")
|
63
|
+
el.set_attribute('class', (classes - [name]).uniq.join(" "))
|
64
|
+
else
|
65
|
+
el.remove_attribute("class")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
self
|
69
|
+
end
|
70
|
+
|
71
|
+
def attr key, value = nil, &blk
|
72
|
+
if value or blk
|
73
|
+
each do |el|
|
74
|
+
el.set_attribute(key, value || blk[el])
|
75
|
+
end
|
76
|
+
return self
|
77
|
+
end
|
78
|
+
if key.is_a? Hash
|
79
|
+
key.each { |k,v| self.attr(k,v) }
|
80
|
+
return self
|
81
|
+
else
|
82
|
+
return self[0].get_attribute(key)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
alias_method :set, :attr
|
86
|
+
|
87
|
+
def remove_attr name
|
88
|
+
each do |el|
|
89
|
+
next unless el.respond_to? :remove_attribute
|
90
|
+
el.remove_attribute(name)
|
91
|
+
end
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
###
|
96
|
+
# Iterate over each node, yielding to +block+
|
97
|
+
def each(&block)
|
98
|
+
x = 0
|
99
|
+
while x < length
|
100
|
+
yield self[x]
|
101
|
+
x += 1
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def inner_text
|
106
|
+
collect{|j| j.inner_text}.join('')
|
107
|
+
end
|
108
|
+
alias :text :inner_text
|
109
|
+
|
110
|
+
def wrap(html, &blk)
|
111
|
+
each do |j|
|
112
|
+
new_parent = Nokogiri.make(html, &blk)
|
113
|
+
j.replace(new_parent)
|
114
|
+
nest = new_parent
|
115
|
+
if nest.child
|
116
|
+
nest = nest.child until nest.child.nil?
|
117
|
+
end
|
118
|
+
j.parent = nest
|
119
|
+
end
|
120
|
+
self
|
121
|
+
end
|
122
|
+
|
123
|
+
def to_s
|
124
|
+
map { |x| x.to_s }.join
|
125
|
+
end
|
126
|
+
|
127
|
+
def to_html
|
128
|
+
map { |x| x.to_html }.join('')
|
129
|
+
end
|
130
|
+
|
131
|
+
def size
|
132
|
+
length
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|