nokogiri 1.3.3-java → 1.4.0-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +48 -3
- data/CHANGELOG.rdoc +42 -0
- data/Manifest.txt +44 -29
- data/README.ja.rdoc +0 -2
- data/README.rdoc +4 -7
- data/Rakefile +42 -6
- data/bin/nokogiri +7 -5
- data/ext/nokogiri/extconf.rb +5 -21
- data/ext/nokogiri/html_document.c +14 -50
- data/ext/nokogiri/html_element_description.c +7 -7
- data/ext/nokogiri/html_entity_lookup.c +6 -4
- data/ext/nokogiri/html_sax_parser_context.c +92 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/nokogiri.c +9 -3
- data/ext/nokogiri/nokogiri.h +16 -20
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_attribute_decl.c +67 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +6 -5
- data/ext/nokogiri/xml_comment.c +3 -2
- data/ext/nokogiri/xml_document.c +93 -23
- data/ext/nokogiri/xml_document_fragment.c +1 -3
- data/ext/nokogiri/xml_dtd.c +63 -6
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_entity_decl.c +97 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_io.c +10 -3
- data/ext/nokogiri/xml_io.h +1 -0
- data/ext/nokogiri/xml_namespace.c +2 -2
- data/ext/nokogiri/xml_node.c +139 -34
- data/ext/nokogiri/xml_node.h +0 -1
- data/ext/nokogiri/xml_node_set.c +23 -16
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +78 -50
- data/ext/nokogiri/xml_sax_parser.c +109 -168
- data/ext/nokogiri/xml_sax_parser.h +33 -0
- data/ext/nokogiri/xml_sax_parser_context.c +155 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +11 -6
- data/ext/nokogiri/xml_syntax_error.c +63 -12
- data/ext/nokogiri/xml_text.c +4 -3
- data/ext/nokogiri/xml_xpath.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +12 -25
- data/ext/nokogiri/xslt_stylesheet.c +3 -3
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/nokogiri.rb +4 -4
- data/lib/nokogiri/css/node.rb +1 -9
- data/lib/nokogiri/css/xpath_visitor.rb +11 -21
- data/lib/nokogiri/ffi/html/document.rb +0 -9
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +4 -2
- data/lib/nokogiri/ffi/libxml.rb +44 -10
- data/lib/nokogiri/ffi/structs/common_node.rb +1 -1
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +3 -1
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +4 -3
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +1 -1
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/comment.rb +2 -2
- data/lib/nokogiri/ffi/xml/document.rb +29 -12
- data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -5
- data/lib/nokogiri/ffi/xml/dtd.rb +14 -3
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/node.rb +45 -5
- data/lib/nokogiri/ffi/xml/node_set.rb +1 -1
- data/lib/nokogiri/ffi/xml/reader.rb +45 -24
- data/lib/nokogiri/ffi/xml/sax/parser.rb +27 -34
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +5 -4
- data/lib/nokogiri/ffi/xml/syntax_error.rb +31 -16
- data/lib/nokogiri/ffi/xml/text.rb +2 -2
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/document.rb +39 -24
- data/lib/nokogiri/html/sax/parser.rb +2 -2
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +6 -1
- data/lib/nokogiri/xml/attr.rb +5 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +121 -13
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +43 -29
- data/lib/nokogiri/xml/document_fragment.rb +26 -6
- data/lib/nokogiri/xml/dtd.rb +5 -5
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +15 -0
- data/lib/nokogiri/xml/fragment_handler.rb +22 -11
- data/lib/nokogiri/xml/namespace.rb +6 -0
- data/lib/nokogiri/xml/node.rb +33 -15
- data/lib/nokogiri/xml/node_set.rb +66 -44
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/reader.rb +8 -0
- data/lib/nokogiri/xml/sax.rb +1 -1
- data/lib/nokogiri/xml/sax/document.rb +18 -1
- data/lib/nokogiri/xml/sax/parser.rb +15 -8
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +0 -3
- data/lib/nokogiri/xml/syntax_error.rb +4 -0
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- data/test/css/test_nthiness.rb +1 -1
- data/test/css/test_parser.rb +1 -1
- data/test/css/test_tokenizer.rb +1 -1
- data/test/css/test_xpath_visitor.rb +1 -1
- data/test/ffi/test_document.rb +1 -1
- data/test/files/shift_jis.html +10 -0
- data/test/files/staff.dtd +10 -0
- data/test/helper.rb +12 -3
- data/test/html/sax/test_parser.rb +1 -1
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +8 -2
- data/test/html/test_document.rb +23 -1
- data/test/html/test_document_encoding.rb +15 -1
- data/test/html/test_document_fragment.rb +10 -1
- data/test/html/test_element_description.rb +1 -2
- data/test/html/test_named_characters.rb +1 -1
- data/test/html/test_node.rb +61 -1
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +1 -3
- data/test/test_css_cache.rb +1 -1
- data/test/test_gc.rb +1 -1
- data/test/test_memory_leak.rb +1 -1
- data/test/test_nokogiri.rb +3 -3
- data/test/test_reader.rb +29 -1
- data/test/test_xslt_transforms.rb +1 -1
- data/test/xml/node/test_save_options.rb +1 -1
- data/test/xml/node/test_subclass.rb +1 -1
- data/test/xml/sax/test_parser.rb +64 -3
- data/test/xml/sax/test_parser_context.rb +56 -0
- data/test/xml/sax/test_push_parser.rb +11 -1
- data/test/xml/test_attr.rb +1 -1
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +95 -1
- data/test/xml/test_cdata.rb +1 -1
- data/test/xml/test_comment.rb +7 -1
- data/test/xml/test_document.rb +147 -6
- data/test/xml/test_document_encoding.rb +1 -1
- data/test/xml/test_document_fragment.rb +55 -5
- data/test/xml/test_dtd.rb +40 -5
- data/test/xml/test_dtd_encoding.rb +3 -1
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +83 -0
- data/test/xml/test_entity_reference.rb +1 -1
- data/test/xml/test_namespace.rb +21 -1
- data/test/xml/test_node.rb +70 -4
- data/test/xml/test_node_attributes.rb +1 -1
- data/test/xml/test_node_encoding.rb +1 -1
- data/test/xml/test_node_set.rb +136 -2
- data/test/xml/test_parse_options.rb +1 -1
- data/test/xml/test_processing_instruction.rb +1 -1
- data/test/xml/test_reader_encoding.rb +1 -1
- data/test/xml/test_relax_ng.rb +1 -1
- data/test/xml/test_schema.rb +1 -1
- data/test/xml/test_syntax_error.rb +27 -0
- data/test/xml/test_text.rb +13 -1
- data/test/xml/test_unparented_node.rb +1 -1
- data/test/xml/test_xpath.rb +1 -1
- metadata +100 -78
- data/ext/nokogiri/html_sax_parser.c +0 -57
- data/ext/nokogiri/html_sax_parser.h +0 -11
- data/lib/action-nokogiri.rb +0 -38
- data/lib/nokogiri/decorators.rb +0 -2
- data/lib/nokogiri/decorators/hpricot.rb +0 -3
- data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
- data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -30
- data/lib/nokogiri/ffi/html/sax/parser.rb +0 -21
- data/lib/nokogiri/hpricot.rb +0 -92
- data/lib/nokogiri/xml/entity_declaration.rb +0 -11
- data/lib/nokogiri/xml/sax/legacy_handlers.rb +0 -65
- data/test/hpricot/files/basic.xhtml +0 -17
- data/test/hpricot/files/boingboing.html +0 -2266
- data/test/hpricot/files/cy0.html +0 -3653
- data/test/hpricot/files/immob.html +0 -400
- data/test/hpricot/files/pace_application.html +0 -1320
- data/test/hpricot/files/tenderlove.html +0 -16
- data/test/hpricot/files/uswebgen.html +0 -220
- data/test/hpricot/files/utf8.html +0 -1054
- data/test/hpricot/files/week9.html +0 -1723
- data/test/hpricot/files/why.xml +0 -19
- data/test/hpricot/load_files.rb +0 -11
- data/test/hpricot/test_alter.rb +0 -68
- data/test/hpricot/test_builder.rb +0 -20
- data/test/hpricot/test_parser.rb +0 -350
- data/test/hpricot/test_paths.rb +0 -15
- data/test/hpricot/test_preserved.rb +0 -77
- data/test/hpricot/test_xml.rb +0 -30
data/test/hpricot/files/why.xml
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
<?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
|
2
|
-
<channel>
|
3
|
-
<title>why the lucky stiff</title>
|
4
|
-
<link>http://whytheluckystiff.net</link>
|
5
|
-
<description>hex-editing reality to give us infinite grenades!!</description>
|
6
|
-
<dc:language>en-us</dc:language>
|
7
|
-
<dc:creator/>
|
8
|
-
<dc:date>2007-01-16T22:39:04+00:00</dc:date>
|
9
|
-
<admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
|
10
|
-
<sy:updatePeriod>hourly</sy:updatePeriod>
|
11
|
-
<sy:updateFrequency>1</sy:updateFrequency>
|
12
|
-
<sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
|
13
|
-
<item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description><blockquote>
|
14
|
-
<p>That cadillac of yours and that driver of yours!<br />You and your teacups rattling away in the back seat!<br />You always took the mike, oh, and all those cowboys you shot!<br />I held your hand! And I&#8217;ll shoot a cowboy one day!</p>
|
15
|
-
</blockquote>
|
16
|
-
<blockquote>
|
17
|
-
<p>You said, &#8220;Let&#8217;s run into the woods like kids!&#8221; <br />You said, &#8220;Let&#8217;s rub our hands together super-hot!&#8221; <br />And we scalded the trees and left octagons, I think that was you and<br />You threw parties on the roof!</p>
|
18
|
-
</blockquote></description></item></channel>
|
19
|
-
</rss>
|
data/test/hpricot/load_files.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
module TestFiles
|
2
|
-
Dir.chdir(File.dirname(__FILE__)) do
|
3
|
-
Dir['files/*.{html,xhtml,xml}'].each do |fname|
|
4
|
-
if RUBY_VERSION >= '1.9'
|
5
|
-
const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, File.open(fname, 'r:ascii-8bit') { |f| f.read }
|
6
|
-
else
|
7
|
-
const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, File.read(fname)
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
data/test/hpricot/test_alter.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
-
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
-
|
4
|
-
class TestAlter < Nokogiri::TestCase
|
5
|
-
include Nokogiri
|
6
|
-
|
7
|
-
def setup
|
8
|
-
super
|
9
|
-
@basic = Nokogiri::HTML.parse(TestFiles::BASIC)
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_before
|
13
|
-
test0 = "<link rel='stylesheet' href='test0.css' />"
|
14
|
-
@basic.at("link").before(test0)
|
15
|
-
assert_equal 'test0.css', @basic.at("link").attributes['href'].to_s
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_after
|
19
|
-
test_inf = "<link rel='stylesheet' href='test_inf.css' />"
|
20
|
-
@basic.search("link")[-1].after(test_inf)
|
21
|
-
assert_equal 'test_inf.css', @basic.search("link")[-1]['href']
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_wrap
|
25
|
-
ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
|
26
|
-
assert_equal 'wrapper', ohmy[0].parent['id']
|
27
|
-
assert_equal 'ohmy', Nokogiri(@basic.to_html).at("#wrapper").children[0]['class']
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_add_class
|
31
|
-
first_p = (@basic/"p:first").add_class("testing123")
|
32
|
-
assert first_p[0].get_attribute("class").split(" ").include?("testing123")
|
33
|
-
assert((Nokogiri(@basic.to_html)/"p:first")[0]["class"].split(" ").include?("testing123"))
|
34
|
-
####
|
35
|
-
# Modified. We do not support OB1 bug.
|
36
|
-
assert !(Nokogiri(@basic.to_html)/"p:gt(1)")[0]["class"].split(" ").include?("testing123")
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_change_attributes
|
40
|
-
all_ps = (@basic/"p").attr("title", "Some Title")
|
41
|
-
all_as = (@basic/"a").attr("href", "http://my_new_href.com")
|
42
|
-
all_lb = (@basic/"link").attr("href") { |e| e.name }
|
43
|
-
GC.start # try to shake out GC bugs with xpath and node sets.
|
44
|
-
assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"].to_s == "Some Title"}
|
45
|
-
assert_changed(@basic, "a", all_as) {|a| a.attributes["href"].to_s == "http://my_new_href.com"}
|
46
|
-
assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"].to_s == "link" }
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_remove_attr
|
50
|
-
all_rl = (@basic/"link").remove_attr("href")
|
51
|
-
assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_remove_class
|
55
|
-
all_c1 = (@basic/"p[@class*='last']").remove_class("last")
|
56
|
-
assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_remove_all_classes
|
60
|
-
all_c2 = (@basic/"p[@class]").remove_class
|
61
|
-
assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
|
62
|
-
end
|
63
|
-
|
64
|
-
def assert_changed original, selector, set, &block
|
65
|
-
assert set.all?(&block)
|
66
|
-
assert Nokogiri(original.to_html).search(selector).all?(&block)
|
67
|
-
end
|
68
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
-
|
3
|
-
class TestBuilder < Nokogiri::TestCase
|
4
|
-
####
|
5
|
-
# Modified
|
6
|
-
def test_escaping_text
|
7
|
-
doc = Nokogiri() { b "<a\"b>" }
|
8
|
-
assert_match "<b><a\"b></b>", doc.to_html.chomp
|
9
|
-
assert_equal %{<a\"b>}, doc.at("text()").to_s
|
10
|
-
end
|
11
|
-
|
12
|
-
####
|
13
|
-
# Modified
|
14
|
-
def test_no_escaping_text
|
15
|
-
doc = Nokogiri() { div.test.me! { text "<a\"b>" } }
|
16
|
-
assert_match %{<div class="test" id="me"><a"b></div>},
|
17
|
-
doc.to_html.chomp
|
18
|
-
assert_equal %{<a\"b>}, doc.at("text()").to_s
|
19
|
-
end
|
20
|
-
end
|
data/test/hpricot/test_parser.rb
DELETED
@@ -1,350 +0,0 @@
|
|
1
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
-
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
-
|
4
|
-
class TestParser < Nokogiri::TestCase
|
5
|
-
include Nokogiri
|
6
|
-
|
7
|
-
def test_set_attr
|
8
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
9
|
-
@basic.search('//p').set('class', 'para')
|
10
|
-
assert_equal 4, @basic.search('//p').length
|
11
|
-
assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_filter_by_attr
|
15
|
-
@boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
|
16
|
-
|
17
|
-
# this link is escaped in the doc
|
18
|
-
link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
|
19
|
-
assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_filter_contains
|
23
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
24
|
-
assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s.chomp
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_get_element_by_id
|
28
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
29
|
-
assert_equal 'link1', @basic.at('#link1')['id']
|
30
|
-
assert_equal 'link1', @basic.at('#body1').at('#link1')['id']
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_get_element_by_tag_name
|
34
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
35
|
-
assert_equal 'link1', @basic.at('a')['id']
|
36
|
-
assert_equal 'link1', @basic.at('body').at('#link1')['id']
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_output_basic
|
40
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
41
|
-
@basic2 = Nokogiri.parse(@basic.inner_html)
|
42
|
-
scan_basic @basic2
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_scan_basic
|
46
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
47
|
-
scan_basic @basic
|
48
|
-
end
|
49
|
-
|
50
|
-
def scan_basic doc
|
51
|
-
assert_not_equal doc.children.first.to_s, doc.children[1].to_s
|
52
|
-
assert_equal 'link1', doc.at('#link1')['id']
|
53
|
-
assert_equal 'link1', doc.at("p a")['id']
|
54
|
-
assert_equal 'link1', (doc/:p/:a).first['id']
|
55
|
-
assert_equal 'link1', doc.search('p').at('a')['id']
|
56
|
-
|
57
|
-
assert_equal 'link2', (doc/'p').css('.ohmy').search('a').first['id']
|
58
|
-
assert_equal((doc/'p')[2], (doc/'p').css('[text()="The third paragraph"]')[0])
|
59
|
-
assert_equal 3, (doc/'p:not(.ohmy)').length
|
60
|
-
|
61
|
-
assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
|
62
|
-
assert_equal 2, (doc/'p > a').length
|
63
|
-
assert_equal 1, (doc/'p.ohmy > a').length
|
64
|
-
assert_equal 2, (doc/'p / a').length
|
65
|
-
assert_equal 2, (doc/'link ~ link').length
|
66
|
-
assert_equal 3, (doc/'title ~ link').length
|
67
|
-
assert_equal 5, (doc/"//p/text()").length
|
68
|
-
assert_equal 6, (doc/"//p[a]//text()").length
|
69
|
-
assert_equal 2, (doc/"//p/a/text()").length
|
70
|
-
end
|
71
|
-
|
72
|
-
def test_positional
|
73
|
-
h = Nokogiri( "<div><br/><p>one</p><p>two</p></div>" )
|
74
|
-
assert_equal "<p>one</p>", h.search("div/p:eq(1)").to_s.chomp # MODIFIED: eq(0) -> eq(1), and removed initial '//'
|
75
|
-
assert_equal "<p>one</p>", h.search("div/p:first").to_s.chomp # MODIFIED: removed initial '//'
|
76
|
-
assert_equal "<p>one</p>", h.search("div/p:first()").to_s.chomp # MODIFIED: removed initial '//'
|
77
|
-
end
|
78
|
-
|
79
|
-
def test_pace
|
80
|
-
doc = Nokogiri(TestFiles::PACE_APPLICATION)
|
81
|
-
assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
|
82
|
-
end
|
83
|
-
|
84
|
-
def test_scan_boingboing
|
85
|
-
@boingboing = Nokogiri.HTML(TestFiles::BOINGBOING)
|
86
|
-
assert_equal 60, (@boingboing/'p.posted').length
|
87
|
-
assert_equal 1, @boingboing.search("//a[@name='027906']").length
|
88
|
-
assert_equal 3, @boingboing.search("a[text()*='Boing']").length
|
89
|
-
assert_equal 1, @boingboing.search(
|
90
|
-
"//h3[normalize-space(text())='College kids reportedly taking more smart drugs']"
|
91
|
-
).length
|
92
|
-
assert_equal 0, @boingboing.search("h3[text()='College']").length
|
93
|
-
assert_equal 60, @boingboing.search("h3").length
|
94
|
-
assert_equal 59, @boingboing.search("//h3[normalize-space(text())!='College kids reportedly taking more smart drugs']").length
|
95
|
-
assert_equal 211, @boingboing.search("p").length
|
96
|
-
end
|
97
|
-
|
98
|
-
def test_reparent
|
99
|
-
doc = Nokogiri(%{<div id="blurb_1"></div>})
|
100
|
-
div1 = doc.search('#blurb_1')
|
101
|
-
div1.before('<div id="blurb_0"></div>')
|
102
|
-
|
103
|
-
div0 = doc.search('#blurb_0')
|
104
|
-
div0.before('<div id="blurb_a"></div>')
|
105
|
-
|
106
|
-
assert_equal 'div', doc.at('#blurb_1').name
|
107
|
-
end
|
108
|
-
|
109
|
-
def test_siblings
|
110
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
111
|
-
t = @basic.at(:title)
|
112
|
-
e = t.next_sibling
|
113
|
-
assert_equal 'test1.css', e['href']
|
114
|
-
assert_equal 'title', e.previous_sibling.name
|
115
|
-
end
|
116
|
-
|
117
|
-
def test_css_negation
|
118
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
119
|
-
assert_equal 3, (@basic/'p:not(.final)').length
|
120
|
-
end
|
121
|
-
|
122
|
-
def test_remove_attribute
|
123
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
124
|
-
(@basic/:p).each { |ele| ele.remove_attribute('class') }
|
125
|
-
assert_equal 0, (@basic/'p[@class]').length
|
126
|
-
end
|
127
|
-
|
128
|
-
def test_abs_xpath
|
129
|
-
@boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
|
130
|
-
assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
|
131
|
-
assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
|
132
|
-
assert_equal 18, @boingboing.search("//script").length
|
133
|
-
divs = @boingboing.search("//script/../div")
|
134
|
-
assert_equal 2, divs.length
|
135
|
-
imgs = @boingboing.search('//div/p/a/img')
|
136
|
-
assert_equal 12, imgs.length
|
137
|
-
assert_equal 16, @boingboing.search('//div').search('p/a/img').length
|
138
|
-
assert imgs.all? { |x| x.name == 'img' }
|
139
|
-
end
|
140
|
-
|
141
|
-
def test_predicates
|
142
|
-
@boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
|
143
|
-
assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
|
144
|
-
p_imgs = @boingboing.search('//div/p[/a/img]')
|
145
|
-
#assert_equal 15, p_imgs.length
|
146
|
-
assert p_imgs.all? { |x| x.name == 'p' }
|
147
|
-
p_imgs = @boingboing.search('//div/p[a/img]')
|
148
|
-
assert_equal 12, p_imgs.length
|
149
|
-
assert p_imgs.all? { |x| x.name == 'p' }
|
150
|
-
assert_equal 1, @boingboing.search('//input[@checked]').length
|
151
|
-
end
|
152
|
-
|
153
|
-
def test_tag_case
|
154
|
-
@tenderlove = Nokogiri.parse(TestFiles::TENDERLOVE)
|
155
|
-
assert_equal 2, @tenderlove.search('//a').length
|
156
|
-
assert_equal 3, @tenderlove.search('//area').length
|
157
|
-
assert_equal 2, @tenderlove.search('//meta').length
|
158
|
-
end
|
159
|
-
|
160
|
-
def test_alt_predicates
|
161
|
-
@boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
|
162
|
-
assert_equal 2, @boingboing.search('table/tr:last').length
|
163
|
-
|
164
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
165
|
-
assert_equal "<p>The third paragraph</p>",
|
166
|
-
@basic.search('p:eq(3)').to_html.chomp
|
167
|
-
@basic.search('p:last').to_html.gsub(/\s+/,' ').gsub(/>\s*</, '><')
|
168
|
-
assert_equal 'last final', @basic.search('p:last-of-type').first.get_attribute('class')
|
169
|
-
end
|
170
|
-
|
171
|
-
def test_insert_after # ticket #63
|
172
|
-
doc = Nokogiri('<html><body><div id="a-div"></div></body></html>')
|
173
|
-
(doc/'div').each do |element|
|
174
|
-
element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
|
175
|
-
end
|
176
|
-
assert_match '<div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p>',
|
177
|
-
doc.to_html.gsub(/\n/, '').gsub(/>\s*</, '><')
|
178
|
-
end
|
179
|
-
|
180
|
-
def test_insert_before # ticket #61
|
181
|
-
doc = Nokogiri.HTML('<html><body><div id="a-div"></div></body></html>')
|
182
|
-
(doc/'div').each do |element|
|
183
|
-
element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
|
184
|
-
end
|
185
|
-
assert_match '<p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div>',
|
186
|
-
doc.to_html.gsub(/\n/, '').gsub(/>\s*</, '><')
|
187
|
-
end
|
188
|
-
|
189
|
-
def test_many_paths
|
190
|
-
@boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
|
191
|
-
assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
|
192
|
-
end
|
193
|
-
|
194
|
-
def test_class_search
|
195
|
-
doc = Nokogiri.HTML("<div class=xyz '>abc</div>")
|
196
|
-
assert_equal 1, doc.search(".xyz").length
|
197
|
-
|
198
|
-
doc = Nokogiri.HTML("<div class=xyz>abc</div><div class=abc>xyz</div>")
|
199
|
-
assert_equal 1, doc.search(".xyz").length
|
200
|
-
assert_equal 4, doc.search("*").length
|
201
|
-
end
|
202
|
-
|
203
|
-
def test_kleene_star
|
204
|
-
# bug noticed by raja bhatia
|
205
|
-
doc = Nokogiri.HTML("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
|
206
|
-
assert_equal 2, doc.search("*[@class*='small']").length
|
207
|
-
assert_equal 2, doc.search("*.small").length
|
208
|
-
assert_equal 2, doc.search(".small").length
|
209
|
-
assert_equal 2, doc.search(".large").length
|
210
|
-
end
|
211
|
-
|
212
|
-
def test_empty_comment
|
213
|
-
doc = Nokogiri.HTML("<p><!----></p>")
|
214
|
-
doc = doc.search('//body').first
|
215
|
-
assert doc.children[0].children[0].comment?
|
216
|
-
|
217
|
-
doc = Nokogiri.HTML("<p><!-- --></p>")
|
218
|
-
doc = doc.search('//body').first
|
219
|
-
assert doc.children[0].children[0].comment?
|
220
|
-
end
|
221
|
-
|
222
|
-
def test_body_newlines
|
223
|
-
@immob = Nokogiri.parse(TestFiles::IMMOB)
|
224
|
-
body = @immob.at(:body)
|
225
|
-
{'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
|
226
|
-
'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
|
227
|
-
'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
|
228
|
-
assert_equal v, body[k]
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
def test_nested_twins
|
233
|
-
@doc = Nokogiri("<div>Hi<div>there</div></div>")
|
234
|
-
assert_equal 1, (@doc/"div div").length
|
235
|
-
end
|
236
|
-
|
237
|
-
def test_wildcard
|
238
|
-
@basic = Nokogiri::HTML.parse(TestFiles::BASIC)
|
239
|
-
assert_equal 3, (@basic/"*[@id]").length
|
240
|
-
assert_equal 3, (@basic/"//*[@id]").length
|
241
|
-
end
|
242
|
-
|
243
|
-
def test_javascripts
|
244
|
-
@immob = Nokogiri::HTML.parse(TestFiles::IMMOB)
|
245
|
-
assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
|
246
|
-
end
|
247
|
-
|
248
|
-
####
|
249
|
-
# Modified. This test passes with later versions of libxml
|
250
|
-
def test_nested_scripts
|
251
|
-
@week9 = Nokogiri.parse(TestFiles::WEEK9)
|
252
|
-
unless Nokogiri::LIBXML_VERSION == '2.6.16'
|
253
|
-
assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
def test_uswebgen
|
258
|
-
@uswebgen = HTML.parse(TestFiles::USWEBGEN)
|
259
|
-
# sent by brent beardsley, nokogiri 0.3 had problems with all the links.
|
260
|
-
assert_equal 67, (@uswebgen/:a).length
|
261
|
-
end
|
262
|
-
|
263
|
-
def test_mangled_tags
|
264
|
-
[%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
|
265
|
-
%{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
|
266
|
-
%{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
|
267
|
-
%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
|
268
|
-
each do |str|
|
269
|
-
doc = Nokogiri(str)
|
270
|
-
assert_equal 1, (doc/:form).length
|
271
|
-
assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
|
272
|
-
end
|
273
|
-
end
|
274
|
-
|
275
|
-
####
|
276
|
-
# Modified. Added question. Don't care.
|
277
|
-
def test_procins
|
278
|
-
doc = Nokogiri.HTML("<?php print('hello') ?>\n<?xml blah='blah'?>")
|
279
|
-
assert_equal "php", doc.children[1].name
|
280
|
-
assert_equal "blah='blah'?", doc.children[2].content #"# quote added so emacs ruby-mode parser doesn't barf
|
281
|
-
end
|
282
|
-
|
283
|
-
####
|
284
|
-
# Altered... libxml does not get a buffer error
|
285
|
-
def test_buffer_error
|
286
|
-
assert_nothing_raised {
|
287
|
-
Nokogiri(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
|
288
|
-
}
|
289
|
-
end
|
290
|
-
|
291
|
-
def test_youtube_attr
|
292
|
-
str = <<-edoc
|
293
|
-
<html><body>
|
294
|
-
Lorem ipsum. Jolly roger, ding-dong sing-a-long
|
295
|
-
<object width="425" height="350">
|
296
|
-
<param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
|
297
|
-
<param name="wmode" value="transparent"></param>
|
298
|
-
<embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
|
299
|
-
type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
|
300
|
-
</embed>
|
301
|
-
</object>
|
302
|
-
Check out my posting, I have bright mice in large clown cars.
|
303
|
-
<object width="425" height="350">
|
304
|
-
<param name="movie" value="http://www.youtube.com/v/foobar"></param>
|
305
|
-
<param name="wmode" value="transparent"></param>
|
306
|
-
<embed src="http://www.youtube.com/v/foobar"
|
307
|
-
type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
|
308
|
-
</embed>
|
309
|
-
</object>
|
310
|
-
</body></html?
|
311
|
-
edoc
|
312
|
-
doc = Nokogiri(str)
|
313
|
-
assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
|
314
|
-
doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
|
315
|
-
end
|
316
|
-
|
317
|
-
# ticket #84 by jamezilla
|
318
|
-
def test_screwed_xmlns
|
319
|
-
doc = Nokogiri(<<-edoc)
|
320
|
-
<?xml:namespace prefix = cwi />
|
321
|
-
<html><body>HAI</body></html>
|
322
|
-
edoc
|
323
|
-
assert_equal "HAI", doc.at("body").inner_text
|
324
|
-
end
|
325
|
-
|
326
|
-
def test_filters
|
327
|
-
@basic = Nokogiri.parse(TestFiles::BASIC)
|
328
|
-
assert_equal 1, (@basic/"title:parent").size
|
329
|
-
assert_equal 4, (@basic/"p:parent").size
|
330
|
-
assert_equal 0, (@basic/"title:empty").size
|
331
|
-
assert_equal 3, (@basic/"link:empty").size
|
332
|
-
end
|
333
|
-
|
334
|
-
def test_keep_cdata
|
335
|
-
str = %{<script> /*<![CDATA[*/
|
336
|
-
/*]]>*/ </script>}
|
337
|
-
# MODIFIED: if you want the cdata, to_xml it
|
338
|
-
assert_match str, Nokogiri(str).to_xml
|
339
|
-
end
|
340
|
-
|
341
|
-
def test_namespace
|
342
|
-
chunk = <<-END
|
343
|
-
<a xmlns:t="http://www.nexopia.com/dev/template">
|
344
|
-
<t:sam>hi </t:sam>
|
345
|
-
</a>
|
346
|
-
END
|
347
|
-
doc = Nokogiri::XML(chunk)
|
348
|
-
assert((doc/"//t:sam").size > 0)
|
349
|
-
end
|
350
|
-
end
|