nokogiri 1.1.1-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/History.ja.txt +99 -0
- data/History.txt +99 -0
- data/Manifest.txt +141 -0
- data/README.ja.txt +100 -0
- data/README.txt +109 -0
- data/Rakefile +354 -0
- data/ext/nokogiri/extconf.rb +93 -0
- data/ext/nokogiri/html_document.c +86 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_sax_parser.c +36 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/native.c +41 -0
- data/ext/nokogiri/native.h +50 -0
- data/ext/nokogiri/xml_cdata.c +44 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +42 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +206 -0
- data/ext/nokogiri/xml_document.h +10 -0
- data/ext/nokogiri/xml_dtd.c +121 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_io.c +17 -0
- data/ext/nokogiri/xml_io.h +9 -0
- data/ext/nokogiri/xml_node.c +727 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +118 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_reader.c +465 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_sax_parser.c +201 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_syntax_error.c +199 -0
- data/ext/nokogiri/xml_syntax_error.h +11 -0
- data/ext/nokogiri/xml_text.c +40 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +214 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +123 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/action-nokogiri.rb +30 -0
- data/lib/nokogiri.rb +72 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +721 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +159 -0
- data/lib/nokogiri/css/node.rb +97 -0
- data/lib/nokogiri/css/parser.rb +64 -0
- data/lib/nokogiri/css/parser.y +216 -0
- data/lib/nokogiri/css/syntax_error.rb +6 -0
- data/lib/nokogiri/css/tokenizer.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rex +63 -0
- data/lib/nokogiri/css/xpath_visitor.rb +168 -0
- data/lib/nokogiri/decorators.rb +2 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +28 -0
- data/lib/nokogiri/decorators/slop.rb +31 -0
- data/lib/nokogiri/hpricot.rb +51 -0
- data/lib/nokogiri/html.rb +105 -0
- data/lib/nokogiri/html/builder.rb +9 -0
- data/lib/nokogiri/html/document.rb +9 -0
- data/lib/nokogiri/html/sax/parser.rb +21 -0
- data/lib/nokogiri/version.rb +3 -0
- data/lib/nokogiri/xml.rb +83 -0
- data/lib/nokogiri/xml/after_handler.rb +18 -0
- data/lib/nokogiri/xml/attr.rb +10 -0
- data/lib/nokogiri/xml/before_handler.rb +33 -0
- data/lib/nokogiri/xml/builder.rb +84 -0
- data/lib/nokogiri/xml/cdata.rb +9 -0
- data/lib/nokogiri/xml/comment.rb +6 -0
- data/lib/nokogiri/xml/document.rb +55 -0
- data/lib/nokogiri/xml/dtd.rb +6 -0
- data/lib/nokogiri/xml/element.rb +6 -0
- data/lib/nokogiri/xml/entity_declaration.rb +9 -0
- data/lib/nokogiri/xml/node.rb +333 -0
- data/lib/nokogiri/xml/node_set.rb +197 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/reader.rb +20 -0
- data/lib/nokogiri/xml/sax.rb +9 -0
- data/lib/nokogiri/xml/sax/document.rb +59 -0
- data/lib/nokogiri/xml/sax/parser.rb +37 -0
- data/lib/nokogiri/xml/syntax_error.rb +21 -0
- data/lib/nokogiri/xml/text.rb +6 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +14 -0
- data/lib/nokogiri/xslt.rb +28 -0
- data/lib/nokogiri/xslt/stylesheet.rb +6 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +237 -0
- data/test/css/test_tokenizer.rb +162 -0
- data/test/css/test_xpath_visitor.rb +64 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +78 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +11 -0
- data/test/hpricot/test_alter.rb +67 -0
- data/test/hpricot/test_builder.rb +27 -0
- data/test/hpricot/test_parser.rb +426 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +77 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +27 -0
- data/test/html/test_builder.rb +89 -0
- data/test/html/test_document.rb +150 -0
- data/test/html/test_node.rb +21 -0
- data/test/test_convert_xpath.rb +185 -0
- data/test/test_css_cache.rb +57 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +38 -0
- data/test/test_nokogiri.rb +97 -0
- data/test/test_reader.rb +222 -0
- data/test/test_xslt_transforms.rb +93 -0
- data/test/xml/sax/test_parser.rb +95 -0
- data/test/xml/test_attr.rb +15 -0
- data/test/xml/test_builder.rb +16 -0
- data/test/xml/test_cdata.rb +18 -0
- data/test/xml/test_comment.rb +16 -0
- data/test/xml/test_document.rb +195 -0
- data/test/xml/test_dtd.rb +43 -0
- data/test/xml/test_node.rb +394 -0
- data/test/xml/test_node_set.rb +143 -0
- data/test/xml/test_text.rb +13 -0
- data/test/xml/test_xpath.rb +105 -0
- data/vendor/hoe.rb +1020 -0
- metadata +233 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestParser < Nokogiri::TestCase
|
5
|
+
include Nokogiri
|
6
|
+
|
7
|
+
def test_roundtrip
|
8
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
9
|
+
%w[link link[2] body #link1 a p.ohmy].each do |css_sel|
|
10
|
+
ele = @basic.at(css_sel)
|
11
|
+
assert_equal ele, @basic.at(ele.css_path), ele.css_path
|
12
|
+
assert_equal ele, @basic.at(ele.xpath), ele.xpath
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestPreserved < Nokogiri::TestCase
|
5
|
+
def assert_roundtrip str
|
6
|
+
doc = Nokogiri.Hpricot(str)
|
7
|
+
yield doc if block_given?
|
8
|
+
str2 = doc.to_original_html
|
9
|
+
[*str].zip([*str2]).each do |s1, s2|
|
10
|
+
assert_equal s1, s2
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def assert_html str1, str2
|
15
|
+
doc = Nokogiri.Hpricot(str2)
|
16
|
+
yield doc if block_given?
|
17
|
+
assert_equal str1, doc.to_original_html
|
18
|
+
end
|
19
|
+
|
20
|
+
####
|
21
|
+
# Not supporting to_original_html
|
22
|
+
#def test_simple
|
23
|
+
# str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
|
24
|
+
# assert_html str, str
|
25
|
+
# assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
|
26
|
+
# (doc/:p).set('class', 'new')
|
27
|
+
# end
|
28
|
+
#end
|
29
|
+
|
30
|
+
####
|
31
|
+
# Not supporting to_original_html
|
32
|
+
#def test_parent
|
33
|
+
# str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
|
34
|
+
# assert_html str, str
|
35
|
+
# assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
|
36
|
+
# (doc/:head).remove
|
37
|
+
# (doc/:div).set('id', 'all')
|
38
|
+
# (doc/:p).wrap('<div></div>')
|
39
|
+
# end
|
40
|
+
#end
|
41
|
+
|
42
|
+
# Not really a valid test. If libxml can figure out the encoding of the file,
|
43
|
+
# it will use that encoding, otherwise it uses the &#xwhatever so that no data
|
44
|
+
# is lost.
|
45
|
+
#
|
46
|
+
# libxml on OSX can't figure out the encoding, so this tests passes. linux
|
47
|
+
# can figure out the encoding, so it fails.
|
48
|
+
#def test_escaping_of_contents
|
49
|
+
# doc = Nokogiri.Hpricot(TestFiles::BOINGBOING)
|
50
|
+
# assert_equal "Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
|
51
|
+
#end
|
52
|
+
|
53
|
+
####
|
54
|
+
# Modified. No.
|
55
|
+
#def test_files
|
56
|
+
# assert_roundtrip TestFiles::BASIC
|
57
|
+
# assert_roundtrip TestFiles::BOINGBOING
|
58
|
+
# assert_roundtrip TestFiles::CY0
|
59
|
+
#end
|
60
|
+
|
61
|
+
####
|
62
|
+
# Modified.. When calling "to_html" on the document, proper html/doc tags
|
63
|
+
# are produced too.
|
64
|
+
def test_escaping_of_attrs
|
65
|
+
# ampersands in URLs
|
66
|
+
str = %{<a href="http://google.com/search?q=nokogiri&l=en">Google</a>}
|
67
|
+
link = (doc = Nokogiri.Hpricot(str)).at(:a)
|
68
|
+
assert_equal "http://google.com/search?q=nokogiri&l=en", link['href']
|
69
|
+
assert_equal "http://google.com/search?q=nokogiri&l=en", link.get_attribute('href')
|
70
|
+
assert_equal "http://google.com/search?q=nokogiri&l=en", link.raw_attributes['href']
|
71
|
+
assert_equal str, link.to_html
|
72
|
+
|
73
|
+
# alter the url
|
74
|
+
link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
|
75
|
+
assert_equal %{<a href="javascript:alert("AGGA-KA-BOO!")">Google</a>}, link.to_html.gsub(/%22/, '"')
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestParser < Nokogiri::TestCase
|
5
|
+
include Nokogiri
|
6
|
+
# normally, the link tags are empty HTML tags.
|
7
|
+
# contributed by laudney.
|
8
|
+
def test_normally_empty
|
9
|
+
doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
|
10
|
+
assert_equal "this is title", (doc/:rss/:channel/:title).text
|
11
|
+
assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
|
12
|
+
end
|
13
|
+
|
14
|
+
# make sure XML doesn't get downcased
|
15
|
+
def test_casing
|
16
|
+
doc = Hpricot::XML(TestFiles::WHY)
|
17
|
+
|
18
|
+
### Modified.
|
19
|
+
# I don't want to differentiate pseudo classes from namespaces. If
|
20
|
+
# you're parsing xml, use XPath. That's what its for. :-P
|
21
|
+
assert_equal "hourly", (doc.at "//sy:updatePeriod").content
|
22
|
+
assert_equal 1, (doc/"guid[@isPermaLink]").length
|
23
|
+
end
|
24
|
+
|
25
|
+
# be sure tags named "text" are ok
|
26
|
+
def test_text_tags
|
27
|
+
doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
|
28
|
+
assert_equal "City Poisoned", (doc/"title").text
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
module SAX
|
6
|
+
class TestParser < Nokogiri::SAX::TestCase
|
7
|
+
def setup
|
8
|
+
@parser = HTML::SAX::Parser.new(Doc.new)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_parse_file
|
12
|
+
@parser.parse_file(HTML_FILE)
|
13
|
+
assert_equal 1110, @parser.document.end_elements.length
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_parse_document
|
17
|
+
@parser.parse_memory(<<-eoxml)
|
18
|
+
<p>Paragraph 1</p>
|
19
|
+
<p>Paragraph 2</p>
|
20
|
+
eoxml
|
21
|
+
assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
|
22
|
+
@parser.document.start_elements)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestBuilder < Nokogiri::TestCase
|
6
|
+
def test_hash_as_attributes
|
7
|
+
builder = Nokogiri::HTML::Builder.new do
|
8
|
+
div(:id => 'awesome') {
|
9
|
+
h1 "america"
|
10
|
+
}
|
11
|
+
end
|
12
|
+
assert_equal('<div id="awesome"><h1>america</h1></div>',
|
13
|
+
builder.to_html.gsub(/\n/, ''))
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_has_ampersand
|
17
|
+
builder = Nokogiri::HTML::Builder.new do
|
18
|
+
div.rad.thing! {
|
19
|
+
text "<awe&some>"
|
20
|
+
b "hello & world"
|
21
|
+
}
|
22
|
+
end
|
23
|
+
assert_equal(
|
24
|
+
'<div class="rad" id="thing"><awe&some><b>hello & world</b></div>',
|
25
|
+
builder.to_html.gsub(/\n/, ''))
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_multi_tags
|
29
|
+
builder = Nokogiri::HTML::Builder.new do
|
30
|
+
div.rad.thing! {
|
31
|
+
text "<awesome>"
|
32
|
+
b "hello"
|
33
|
+
}
|
34
|
+
end
|
35
|
+
assert_equal(
|
36
|
+
'<div class="rad" id="thing"><awesome><b>hello</b></div>',
|
37
|
+
builder.doc.to_html.gsub(/\n/, ''))
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_attributes_plus_block
|
41
|
+
builder = Nokogiri::HTML::Builder.new do
|
42
|
+
div.rad.thing! {
|
43
|
+
text "<awesome>"
|
44
|
+
}
|
45
|
+
end
|
46
|
+
assert_equal('<div class="rad" id="thing"><awesome></div>',
|
47
|
+
builder.doc.to_html.chomp)
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_builder_adds_attributes
|
51
|
+
builder = Nokogiri::HTML::Builder.new do
|
52
|
+
div.rad.thing! "tender div"
|
53
|
+
end
|
54
|
+
assert_equal('<div class="rad" id="thing">tender div</div>',
|
55
|
+
builder.doc.to_html.chomp)
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_bold_tag
|
59
|
+
builder = Nokogiri::HTML::Builder.new do
|
60
|
+
b "bold tag"
|
61
|
+
end
|
62
|
+
assert_equal('<b>bold tag</b>', builder.doc.to_html.chomp)
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_html_then_body_tag
|
66
|
+
builder = Nokogiri::HTML::Builder.new do
|
67
|
+
html {
|
68
|
+
body {
|
69
|
+
b "bold tag"
|
70
|
+
}
|
71
|
+
}
|
72
|
+
end
|
73
|
+
assert_equal('<html><body><b>bold tag</b></body></html>',
|
74
|
+
builder.doc.to_html.chomp)
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_instance_eval_with_delegation_to_block_context
|
78
|
+
class << self
|
79
|
+
def foo
|
80
|
+
"foo!"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
builder = Nokogiri::HTML::Builder.new { text foo }
|
85
|
+
assert builder.to_html.include?("foo!")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestDocument < Nokogiri::TestCase
|
6
|
+
def setup
|
7
|
+
@html = Nokogiri::HTML.parse(File.read(HTML_FILE))
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_HTML_function
|
11
|
+
html = Nokogiri::HTML(File.read(HTML_FILE))
|
12
|
+
assert html.html?
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_relative_css
|
16
|
+
html = Nokogiri::HTML(<<-eohtml)
|
17
|
+
<html>
|
18
|
+
<body>
|
19
|
+
<div>
|
20
|
+
<p>inside div tag</p>
|
21
|
+
</div>
|
22
|
+
<p>outside div tag</p>
|
23
|
+
</body>
|
24
|
+
</html>
|
25
|
+
eohtml
|
26
|
+
set = html.search('div').search('p')
|
27
|
+
assert_equal(1, set.length)
|
28
|
+
assert_equal('inside div tag', set.first.inner_text)
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_multi_css
|
32
|
+
html = Nokogiri::HTML(<<-eohtml)
|
33
|
+
<html>
|
34
|
+
<body>
|
35
|
+
<div>
|
36
|
+
<p>p tag</p>
|
37
|
+
<a>a tag</a>
|
38
|
+
</div>
|
39
|
+
</body>
|
40
|
+
</html>
|
41
|
+
eohtml
|
42
|
+
set = html.css('p, a')
|
43
|
+
assert_equal(2, set.length)
|
44
|
+
assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_inner_text
|
48
|
+
html = Nokogiri::HTML(<<-eohtml)
|
49
|
+
<html>
|
50
|
+
<body>
|
51
|
+
<div>
|
52
|
+
<p>
|
53
|
+
Hello world!
|
54
|
+
</p>
|
55
|
+
</div>
|
56
|
+
</body>
|
57
|
+
</html>
|
58
|
+
eohtml
|
59
|
+
node = html.xpath('//div').first
|
60
|
+
assert_equal('Hello world!', node.inner_text.strip)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_inner_html
|
64
|
+
html = Nokogiri::HTML(<<-eohtml)
|
65
|
+
<html>
|
66
|
+
<body>
|
67
|
+
<div>
|
68
|
+
<p>
|
69
|
+
Hello world!
|
70
|
+
</p>
|
71
|
+
</div>
|
72
|
+
</body>
|
73
|
+
</html>
|
74
|
+
eohtml
|
75
|
+
node = html.xpath('//div').first
|
76
|
+
assert_equal('<p>Helloworld!</p>', node.inner_html.gsub(/\s/, ''))
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_fragment
|
80
|
+
node_set = Nokogiri::HTML.fragment(<<-eohtml)
|
81
|
+
<div>
|
82
|
+
<b>Hello World</b>
|
83
|
+
</div>
|
84
|
+
eohtml
|
85
|
+
assert_equal 1, node_set.length
|
86
|
+
assert_equal 'div', node_set.first.name
|
87
|
+
assert_match(/Hello World/, node_set.to_html)
|
88
|
+
end
|
89
|
+
|
90
|
+
def test_relative_css_finder
|
91
|
+
doc = Nokogiri::HTML(<<-eohtml)
|
92
|
+
<html>
|
93
|
+
<body>
|
94
|
+
<div class="red">
|
95
|
+
<p>
|
96
|
+
inside red
|
97
|
+
</p>
|
98
|
+
</div>
|
99
|
+
<div class="green">
|
100
|
+
<p>
|
101
|
+
inside green
|
102
|
+
</p>
|
103
|
+
</div>
|
104
|
+
</body>
|
105
|
+
</html>
|
106
|
+
eohtml
|
107
|
+
red_divs = doc.css('div.red')
|
108
|
+
assert_equal 1, red_divs.length
|
109
|
+
p_tags = red_divs.first.css('p')
|
110
|
+
assert_equal 1, p_tags.length
|
111
|
+
assert_equal 'inside red', p_tags.first.text.strip
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_find_classes
|
115
|
+
doc = Nokogiri::HTML(<<-eohtml)
|
116
|
+
<html>
|
117
|
+
<body>
|
118
|
+
<p class="red">RED</p>
|
119
|
+
<p class="awesome red">RED</p>
|
120
|
+
<p class="notred">GREEN</p>
|
121
|
+
<p class="green notred">GREEN</p>
|
122
|
+
</body>
|
123
|
+
</html>
|
124
|
+
eohtml
|
125
|
+
list = doc.css('.red')
|
126
|
+
assert_equal 2, list.length
|
127
|
+
assert_equal %w{ RED RED }, list.map { |x| x.text }
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_parse_can_take_io
|
131
|
+
html = nil
|
132
|
+
File.open(HTML_FILE, 'rb') { |f|
|
133
|
+
html = Nokogiri::HTML(f)
|
134
|
+
}
|
135
|
+
assert html.html?
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_html?
|
139
|
+
assert !@html.xml?
|
140
|
+
assert @html.html?
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_serialize
|
144
|
+
assert @html.serialize
|
145
|
+
assert @html.to_html
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
require 'nkf'
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
module HTML
|
7
|
+
class TestNode < Nokogiri::TestCase
|
8
|
+
def test_to_html_does_not_contain_entities
|
9
|
+
html = NKF.nkf("-e --msdos", <<-EOH)
|
10
|
+
<html><body>
|
11
|
+
<p> test paragraph
|
12
|
+
foo bar </p>
|
13
|
+
</body></html>
|
14
|
+
EOH
|
15
|
+
nokogiri = Nokogiri::HTML.parse(html)
|
16
|
+
assert_equal "<p>testparagraph\r\nfoobar</p>",
|
17
|
+
nokogiri.at("p").to_html.gsub(/ /, '')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'rubygems'
|
5
|
+
require 'hpricot'
|
6
|
+
HAS_HPRICOT = true
|
7
|
+
rescue LoadError
|
8
|
+
HAS_HPRICOT = false
|
9
|
+
end
|
10
|
+
|
11
|
+
class TestConvertXPath < Nokogiri::TestCase
|
12
|
+
|
13
|
+
def setup
|
14
|
+
@N = Nokogiri(File.read(HTML_FILE))
|
15
|
+
@NH = Nokogiri.Hpricot(File.read(HTML_FILE)) # decorated document
|
16
|
+
@H = Hpricot(File.read(HTML_FILE)) if HAS_HPRICOT
|
17
|
+
end
|
18
|
+
|
19
|
+
def assert_syntactical_equivalence(hpath, xpath, match, &blk)
|
20
|
+
blk ||= lambda {|j| j.first}
|
21
|
+
assert_equal match, blk.call(@N.search(xpath)), "xpath result did not match"
|
22
|
+
if HAS_HPRICOT
|
23
|
+
assert_equal match, blk.call(@H.search(hpath)), "hpath result did not match"
|
24
|
+
end
|
25
|
+
assert_equal [xpath], @NH.convert_to_xpath(hpath), "converted hpath did not match xpath"
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_ordinary_xpath_conversions
|
29
|
+
assert_equal(".//p", @NH.convert_to_xpath("p").first)
|
30
|
+
assert_equal(".//p", @NH.convert_to_xpath(:p).first)
|
31
|
+
assert_equal(".//p", @NH.convert_to_xpath("//p").first)
|
32
|
+
assert_equal(".//p", @NH.convert_to_xpath(".//p").first)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_child_tag
|
36
|
+
assert_syntactical_equivalence("h1[a]", ".//h1[child::a]", "Tender Lovemaking") do |j|
|
37
|
+
j.inner_text
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_child_tag_equals
|
42
|
+
assert_syntactical_equivalence("h1[a='Tender Lovemaking']", ".//h1[child::a = 'Tender Lovemaking']", "Tender Lovemaking") do |j|
|
43
|
+
j.inner_text
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_filter_contains
|
48
|
+
assert_syntactical_equivalence("title:contains('Tender')", ".//title[contains(., 'Tender')]",
|
49
|
+
"Tender Lovemaking ") do |j|
|
50
|
+
j.inner_text
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_filter_comment
|
55
|
+
assert_syntactical_equivalence("div comment()[2]", ".//div//comment()[position() = 2]", "<!-- end of header -->") do |j|
|
56
|
+
j.first.to_s
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_filter_text
|
61
|
+
assert_syntactical_equivalence("a[text()]", ".//a[normalize-space(child::text())]", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
|
62
|
+
j.first.to_s
|
63
|
+
end
|
64
|
+
assert_syntactical_equivalence("a[text()='Tender Lovemaking']", ".//a[normalize-space(child::text()) = 'Tender Lovemaking']", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
|
65
|
+
j.first.to_s
|
66
|
+
end
|
67
|
+
assert_syntactical_equivalence("a/text()", ".//a/child::text()", "Tender Lovemaking") do |j|
|
68
|
+
j.first.to_s
|
69
|
+
end
|
70
|
+
assert_syntactical_equivalence("h2//a[text()!='Back Home!']", ".//h2//a[normalize-space(child::text()) != 'Back Home!']", "Meow meow meow meow meow") do |j|
|
71
|
+
j.first.inner_text
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_filter_by_attr
|
76
|
+
assert_syntactical_equivalence("a[@href='http://blog.geminigeek.com/wordpress-theme']",
|
77
|
+
".//a[@href = 'http://blog.geminigeek.com/wordpress-theme']",
|
78
|
+
"http://blog.geminigeek.com/wordpress-theme") do |j|
|
79
|
+
j.first["href"]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_css_id
|
84
|
+
assert_syntactical_equivalence("#linkcat-7", ".//*[@id = 'linkcat-7']", "linkcat-7") do |j|
|
85
|
+
j.first["id"]
|
86
|
+
end
|
87
|
+
assert_syntactical_equivalence("li#linkcat-7", ".//li[@id = 'linkcat-7']", "linkcat-7") do |j|
|
88
|
+
j.first["id"]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_css_class
|
93
|
+
assert_syntactical_equivalence(".cat-item-15", ".//*[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
|
94
|
+
"cat-item cat-item-15") do |j|
|
95
|
+
j.first["class"]
|
96
|
+
end
|
97
|
+
assert_syntactical_equivalence("li.cat-item-15", ".//li[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
|
98
|
+
"cat-item cat-item-15") do |j|
|
99
|
+
j.first["class"]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_css_tags
|
104
|
+
assert_syntactical_equivalence("div li a", ".//div//li//a", "http://brobinius.org/") do |j|
|
105
|
+
j.first.inner_text
|
106
|
+
end
|
107
|
+
assert_syntactical_equivalence("div li > a", ".//div//li/a", "http://brobinius.org/") do |j|
|
108
|
+
j.first.inner_text
|
109
|
+
end
|
110
|
+
assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
|
111
|
+
j.first.inner_text
|
112
|
+
end
|
113
|
+
assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
|
114
|
+
j.first.inner_text
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_positional
|
119
|
+
##
|
120
|
+
# we are intentionally NOT staying compatible with nth-and-friends, as Hpricot has an OB1 bug.
|
121
|
+
#
|
122
|
+
# assert_syntactical_equivalence("div > div:eq(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
|
123
|
+
# j.first.inner_text
|
124
|
+
# end
|
125
|
+
# assert_syntactical_equivalence("div/div:eq(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
|
126
|
+
# j.first.inner_text
|
127
|
+
# end
|
128
|
+
# assert_syntactical_equivalence("div/div:nth(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
|
129
|
+
# j.first.inner_text
|
130
|
+
# end
|
131
|
+
# assert_syntactical_equivalence("div/div:nth-of-type(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
|
132
|
+
# j.first.inner_text
|
133
|
+
# end
|
134
|
+
assert_syntactical_equivalence("div/div:first()", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
|
135
|
+
j.first.inner_text.gsub(/[\r\n]/, '')
|
136
|
+
end
|
137
|
+
assert_syntactical_equivalence("div/div:first", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
|
138
|
+
j.first.inner_text.gsub(/[\r\n]/, '')
|
139
|
+
end
|
140
|
+
assert_syntactical_equivalence("div//a:last()", ".//div//a[position() = last()]", "Wordpress") do |j|
|
141
|
+
j.last.inner_text
|
142
|
+
end
|
143
|
+
assert_syntactical_equivalence("div//a:last", ".//div//a[position() = last()]", "Wordpress") do |j|
|
144
|
+
j.last.inner_text
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def test_multiple_filters
|
149
|
+
assert_syntactical_equivalence("a[@rel='bookmark'][1]", ".//a[@rel = 'bookmark' and position() = 1]", "Back Home!") do |j|
|
150
|
+
j.first.inner_text
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def test_compat_mode_namespaces
|
155
|
+
assert_equal(".//*[name()='t:sam']", @NH.convert_to_xpath("//t:sam").first)
|
156
|
+
assert_equal(".//*[name()='t:sam'][@rel='bookmark'][1]", @NH.convert_to_xpath("//t:sam[@rel='bookmark'][1]").first)
|
157
|
+
end
|
158
|
+
|
159
|
+
##
|
160
|
+
# 'and' is not supported by hpricot
|
161
|
+
# def test_and
|
162
|
+
# assert_syntactical_equivalence("div[h1 and small]", ".//div[h1 and small]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
|
163
|
+
# j.inner_text
|
164
|
+
# end
|
165
|
+
# end
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
# TODO:
|
170
|
+
# doc/'title ~ link' -> links that are siblings of title
|
171
|
+
# doc/'p[@class~="final"]' -> class includes string (whitespacy)
|
172
|
+
# doc/'p[text()*="final"]' -> class includes string (index) (broken: always returns true?)
|
173
|
+
# doc/'p[text()$="final"]' -> /final$/
|
174
|
+
# doc/'p[text()|="final"]' -> /^final$/
|
175
|
+
# doc/'p[text()^="final"]' -> string starts with 'final
|
176
|
+
# nth_first
|
177
|
+
# nth_last
|
178
|
+
# even
|
179
|
+
# odd
|
180
|
+
# first-child, nth-child, last-child, nth-last-child, nth-last-of-type
|
181
|
+
# only-of-type, only-child
|
182
|
+
# parent
|
183
|
+
# empty
|
184
|
+
# root
|
185
|
+
end
|