tenderlove-nokogiri 0.0.0.20081001111445
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +105 -0
- data/README.txt +51 -0
- data/Rakefile +70 -0
- data/ext/nokogiri/extconf.rb +24 -0
- data/ext/nokogiri/html_document.c +85 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_sax_parser.c +32 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/native.c +35 -0
- data/ext/nokogiri/native.h +32 -0
- data/ext/nokogiri/xml_cdata.c +36 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_document.c +159 -0
- data/ext/nokogiri/xml_document.h +10 -0
- data/ext/nokogiri/xml_node.c +573 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +90 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_reader.c +420 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_sax_parser.c +161 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_text.c +25 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +39 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +69 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +83 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +45 -0
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/css/node.rb +95 -0
- data/lib/nokogiri/css/parser.rb +24 -0
- data/lib/nokogiri/css/parser.y +198 -0
- data/lib/nokogiri/css/tokenizer.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rex +63 -0
- data/lib/nokogiri/css/xpath_visitor.rb +153 -0
- data/lib/nokogiri/decorators.rb +1 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +47 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +13 -0
- data/lib/nokogiri/hpricot.rb +46 -0
- data/lib/nokogiri/html.rb +64 -0
- data/lib/nokogiri/html/builder.rb +9 -0
- data/lib/nokogiri/html/document.rb +9 -0
- data/lib/nokogiri/html/sax/parser.rb +21 -0
- data/lib/nokogiri/version.rb +3 -0
- data/lib/nokogiri/xml.rb +29 -0
- data/lib/nokogiri/xml/after_handler.rb +18 -0
- data/lib/nokogiri/xml/before_handler.rb +32 -0
- data/lib/nokogiri/xml/builder.rb +79 -0
- data/lib/nokogiri/xml/document.rb +22 -0
- data/lib/nokogiri/xml/node.rb +162 -0
- data/lib/nokogiri/xml/node_set.rb +136 -0
- data/lib/nokogiri/xml/reader.rb +14 -0
- data/lib/nokogiri/xml/sax.rb +9 -0
- data/lib/nokogiri/xml/sax/document.rb +59 -0
- data/lib/nokogiri/xml/sax/parser.rb +33 -0
- data/lib/nokogiri/xml/text.rb +6 -0
- data/lib/nokogiri/xml/xpath.rb +6 -0
- data/lib/nokogiri/xslt.rb +11 -0
- data/lib/nokogiri/xslt/stylesheet.rb +6 -0
- data/nokogiri.gemspec +33 -0
- data/test/css/test_nthiness.rb +141 -0
- data/test/css/test_parser.rb +214 -0
- data/test/css/test_tokenizer.rb +162 -0
- data/test/files/staff.xml +57 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +70 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +7 -0
- data/test/hpricot/test_alter.rb +67 -0
- data/test/hpricot/test_builder.rb +27 -0
- data/test/hpricot/test_parser.rb +412 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +72 -0
- data/test/hpricot/test_xml.rb +26 -0
- data/test/html/sax/test_parser.rb +27 -0
- data/test/html/test_builder.rb +78 -0
- data/test/html/test_document.rb +22 -0
- data/test/test_convert_xpath.rb +173 -0
- data/test/test_nokogiri.rb +36 -0
- data/test/test_reader.rb +222 -0
- data/test/test_xslt_transforms.rb +29 -0
- data/test/xml/sax/test_parser.rb +93 -0
- data/test/xml/test_builder.rb +16 -0
- data/test/xml/test_document.rb +141 -0
- data/test/xml/test_node.rb +148 -0
- data/test/xml/test_node_set.rb +54 -0
- data/test/xml/test_text.rb +13 -0
- metadata +191 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class BeforeHandler < Nokogiri::XML::SAX::Document # :nodoc:
|
|
4
|
+
def initialize node, original_html
|
|
5
|
+
@original_html = original_html
|
|
6
|
+
@node = node
|
|
7
|
+
@stack = []
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def start_element name, attrs = []
|
|
11
|
+
return unless @original_html =~ /<#{name}/i
|
|
12
|
+
node = Node.new(name)
|
|
13
|
+
Hash[*attrs].each do |k,v|
|
|
14
|
+
node[k] = v
|
|
15
|
+
end
|
|
16
|
+
node.parent = @stack.last if @stack.length != 0
|
|
17
|
+
@stack << node
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def characters string
|
|
21
|
+
node = @stack.last
|
|
22
|
+
node.content += string
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def end_element name
|
|
26
|
+
return unless @original_html =~ /<#{name}/i
|
|
27
|
+
@node.add_previous_sibling @stack.last if @stack.length == 1
|
|
28
|
+
@stack.pop
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class Builder
|
|
4
|
+
attr_accessor :doc, :parent
|
|
5
|
+
def initialize(&block)
|
|
6
|
+
namespace = self.class.name.split('::')
|
|
7
|
+
namespace[-1] = 'Document'
|
|
8
|
+
@doc = eval(namespace.join('::')).new
|
|
9
|
+
@parent = @doc
|
|
10
|
+
instance_eval(&block)
|
|
11
|
+
@parent = @doc
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def text(string)
|
|
15
|
+
node = Nokogiri::XML::Text.new(string)
|
|
16
|
+
insert(node)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def cdata(string)
|
|
20
|
+
node = Nokogiri::XML::CData.new(@doc, string)
|
|
21
|
+
insert(node)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def to_xml
|
|
25
|
+
@doc.to_xml
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def method_missing(method, *args, &block)
|
|
29
|
+
node = Nokogiri::XML::Node.new(method.to_s) { |n|
|
|
30
|
+
if content = args.first
|
|
31
|
+
if content.is_a?(Hash)
|
|
32
|
+
content.each { |k,v| n[k.to_s] = v.to_s }
|
|
33
|
+
else
|
|
34
|
+
n.content = content
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
}
|
|
38
|
+
insert(node, &block)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
def insert(node, &block)
|
|
43
|
+
node.parent = @parent
|
|
44
|
+
if block_given?
|
|
45
|
+
@parent = node
|
|
46
|
+
instance_eval(&block)
|
|
47
|
+
@parent = node.parent
|
|
48
|
+
end
|
|
49
|
+
NodeBuilder.new(node, self)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
class NodeBuilder # :nodoc:
|
|
53
|
+
def initialize(node, doc_builder)
|
|
54
|
+
@node = node
|
|
55
|
+
@doc_builder = doc_builder
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def method_missing(method, *args, &block)
|
|
59
|
+
case method.to_s
|
|
60
|
+
when /^(.*)!$/
|
|
61
|
+
@node['id'] = $1
|
|
62
|
+
@node.content = args.first if args.first
|
|
63
|
+
when /^(.*)=/
|
|
64
|
+
@node[$1] = args.first
|
|
65
|
+
else
|
|
66
|
+
@node['class'] =
|
|
67
|
+
((@node['class'] || '').split(/\s/) + [method.to_s]).join(' ')
|
|
68
|
+
@node.content = args.first if args.first
|
|
69
|
+
end
|
|
70
|
+
if block_given?
|
|
71
|
+
@doc_builder.parent = @node
|
|
72
|
+
return @doc_builder.instance_eval(&block)
|
|
73
|
+
end
|
|
74
|
+
self
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class Document < Node
|
|
4
|
+
def decorators
|
|
5
|
+
@decorators ||= Hash.new { |h,k| h[k] = [] }
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
###
|
|
9
|
+
# Apply any decorators to +node+
|
|
10
|
+
def decorate(node)
|
|
11
|
+
key = node.class.name.split('::').last.downcase
|
|
12
|
+
decorators[key].each do |klass|
|
|
13
|
+
node.extend(klass)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def to_xml
|
|
18
|
+
serialize
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class Node
|
|
4
|
+
COMMENT_NODE = 8
|
|
5
|
+
DOCUMENT_NODE = 9
|
|
6
|
+
HTML_DOCUMENT_NODE = 13
|
|
7
|
+
DTD_NODE = 14
|
|
8
|
+
ELEMENT_DECL = 15
|
|
9
|
+
ATTRIBUTE_DECL = 16
|
|
10
|
+
ENTITY_DECL = 17
|
|
11
|
+
NAMESPACE_DECL = 18
|
|
12
|
+
XINCLUDE_START = 19
|
|
13
|
+
XINCLUDE_END = 20
|
|
14
|
+
DOCB_DOCUMENT_NODE = 21
|
|
15
|
+
|
|
16
|
+
def decorate!
|
|
17
|
+
document.decorate(self) if document
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def children
|
|
21
|
+
list = NodeSet.new
|
|
22
|
+
list.document = document
|
|
23
|
+
document.decorate(list)
|
|
24
|
+
|
|
25
|
+
first = self.child
|
|
26
|
+
return list unless first # Empty list
|
|
27
|
+
|
|
28
|
+
list << first unless first.blank?
|
|
29
|
+
while first = first.next
|
|
30
|
+
list << first unless first.blank?
|
|
31
|
+
end
|
|
32
|
+
list
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def find(*paths)
|
|
36
|
+
find_by_xpath(*(paths.map { |path|
|
|
37
|
+
path =~ /^(\.\/|\/)/ ? path : CSS::Parser.parse(path).map { |ast|
|
|
38
|
+
ast.to_xpath
|
|
39
|
+
}
|
|
40
|
+
}.flatten.uniq))
|
|
41
|
+
end
|
|
42
|
+
alias :search :find
|
|
43
|
+
alias :/ :find
|
|
44
|
+
|
|
45
|
+
def find_by_xpath *paths
|
|
46
|
+
sets = paths.map { |path|
|
|
47
|
+
set = XPathContext.new(self).evaluate(path).node_set
|
|
48
|
+
set.document = document
|
|
49
|
+
document.decorate(set)
|
|
50
|
+
set
|
|
51
|
+
}
|
|
52
|
+
return sets.first if sets.length == 1
|
|
53
|
+
|
|
54
|
+
NodeSet.new do |combined|
|
|
55
|
+
document.decorate(combined)
|
|
56
|
+
sets.each do |set|
|
|
57
|
+
set.each do |node|
|
|
58
|
+
combined << node
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def find_by_css *rules
|
|
65
|
+
find_by_xpath(*(rules.map { |rule|
|
|
66
|
+
CSS::Parser.parse(rule).map { |ast| ast.to_xpath }
|
|
67
|
+
}.flatten.uniq))
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def at path
|
|
71
|
+
search("#{path}").first
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def [](property)
|
|
75
|
+
return nil unless key?(property)
|
|
76
|
+
get(property)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def next
|
|
80
|
+
next_sibling
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
####
|
|
84
|
+
# Create nodes from +data+ and insert them before this node
|
|
85
|
+
# (as a sibling).
|
|
86
|
+
def before data
|
|
87
|
+
classes = document.class.name.split('::')
|
|
88
|
+
classes[-1] = 'SAX::Parser'
|
|
89
|
+
|
|
90
|
+
parser = eval(classes.join('::')).new(BeforeHandler.new(self, data))
|
|
91
|
+
parser.parse(data)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
####
|
|
95
|
+
# Create nodes from +data+ and insert them after this node
|
|
96
|
+
# (as a sibling).
|
|
97
|
+
def after data
|
|
98
|
+
classes = document.class.name.split('::')
|
|
99
|
+
classes[-1] = 'SAX::Parser'
|
|
100
|
+
|
|
101
|
+
handler = AfterHandler.new(self, data)
|
|
102
|
+
parser = eval(classes.join('::')).new(handler)
|
|
103
|
+
parser.parse(data)
|
|
104
|
+
handler.after_nodes.reverse.each do |sibling|
|
|
105
|
+
self.add_next_sibling sibling
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def has_attribute?(property)
|
|
110
|
+
key? property
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
alias :get_attribute :[]
|
|
114
|
+
def set_attribute(name, value)
|
|
115
|
+
self[name] = value
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def remove_attribute name
|
|
119
|
+
remove(name)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def inner_text
|
|
123
|
+
content
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
####
|
|
127
|
+
# Set the content to +string+.
|
|
128
|
+
# If +encode+, encode any special characters first.
|
|
129
|
+
def content= string, encode = true
|
|
130
|
+
self.native_content = encode_special_chars(string)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def comment?
|
|
134
|
+
type == COMMENT_NODE
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def xml?
|
|
138
|
+
type == DOCUMENT_NODE
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def html?
|
|
142
|
+
type == HTML_DOCUMENT_NODE
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def to_html
|
|
146
|
+
to_xml
|
|
147
|
+
end
|
|
148
|
+
alias :to_s :to_html
|
|
149
|
+
alias :inner_html :to_html
|
|
150
|
+
|
|
151
|
+
def css_path
|
|
152
|
+
path.split(/\//).map { |part|
|
|
153
|
+
part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
|
154
|
+
}.compact.join(' > ')
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def xpath
|
|
158
|
+
path
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class NodeSet
|
|
4
|
+
include Enumerable
|
|
5
|
+
|
|
6
|
+
attr_accessor :document
|
|
7
|
+
|
|
8
|
+
def initialize
|
|
9
|
+
yield self if block_given?
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def first
|
|
13
|
+
self[0]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def last
|
|
17
|
+
self[length - 1]
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def before datum
|
|
21
|
+
first.before datum
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def after datum
|
|
25
|
+
last.after datum
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def << node
|
|
29
|
+
push(node)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def search path
|
|
33
|
+
sub_set = NodeSet.new
|
|
34
|
+
document.decorate(sub_set)
|
|
35
|
+
each do |node|
|
|
36
|
+
node.search(path).each do |sub_node|
|
|
37
|
+
sub_set << sub_node
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
sub_set.document = document
|
|
41
|
+
sub_set
|
|
42
|
+
end
|
|
43
|
+
alias :/ :search
|
|
44
|
+
|
|
45
|
+
def at path
|
|
46
|
+
search(path).first
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def add_class name
|
|
50
|
+
each do |el|
|
|
51
|
+
next unless el.respond_to? :get_attribute
|
|
52
|
+
classes = el.get_attribute('class').to_s.split(" ")
|
|
53
|
+
el.set_attribute('class', classes.push(name).uniq.join(" "))
|
|
54
|
+
end
|
|
55
|
+
self
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def remove_class name = nil
|
|
59
|
+
each do |el|
|
|
60
|
+
next unless el.respond_to? :get_attribute
|
|
61
|
+
if name
|
|
62
|
+
classes = el.get_attribute('class').to_s.split(" ")
|
|
63
|
+
el.set_attribute('class', (classes - [name]).uniq.join(" "))
|
|
64
|
+
else
|
|
65
|
+
el.remove_attribute("class")
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
self
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def attr key, value = nil, &blk
|
|
72
|
+
if value or blk
|
|
73
|
+
each do |el|
|
|
74
|
+
el.set_attribute(key, value || blk[el])
|
|
75
|
+
end
|
|
76
|
+
return self
|
|
77
|
+
end
|
|
78
|
+
if key.is_a? Hash
|
|
79
|
+
key.each { |k,v| self.attr(k,v) }
|
|
80
|
+
return self
|
|
81
|
+
else
|
|
82
|
+
return self[0].get_attribute(key)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
alias_method :set, :attr
|
|
86
|
+
|
|
87
|
+
def remove_attr name
|
|
88
|
+
each do |el|
|
|
89
|
+
next unless el.respond_to? :remove_attribute
|
|
90
|
+
el.remove_attribute(name)
|
|
91
|
+
end
|
|
92
|
+
self
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
###
|
|
96
|
+
# Iterate over each node, yielding to +block+
|
|
97
|
+
def each(&block)
|
|
98
|
+
x = 0
|
|
99
|
+
while x < length
|
|
100
|
+
yield self[x]
|
|
101
|
+
x += 1
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def inner_text
|
|
106
|
+
collect{|j| j.inner_text}.join('')
|
|
107
|
+
end
|
|
108
|
+
alias :text :inner_text
|
|
109
|
+
|
|
110
|
+
def wrap(html, &blk)
|
|
111
|
+
each do |j|
|
|
112
|
+
new_parent = Nokogiri.make(html, &blk)
|
|
113
|
+
j.replace(new_parent)
|
|
114
|
+
nest = new_parent
|
|
115
|
+
if nest.child
|
|
116
|
+
nest = nest.child until nest.child.nil?
|
|
117
|
+
end
|
|
118
|
+
j.parent = nest
|
|
119
|
+
end
|
|
120
|
+
self
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def to_s
|
|
124
|
+
map { |x| x.to_s }.join
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def to_html
|
|
128
|
+
map { |x| x.to_html }.join('')
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def size
|
|
132
|
+
length
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|