nokogiri 1.3.0-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +233 -0
- data/CHANGELOG.rdoc +222 -0
- data/Manifest.txt +247 -0
- data/README.ja.rdoc +103 -0
- data/README.rdoc +117 -0
- data/Rakefile +205 -0
- data/bin/nokogiri +47 -0
- data/ext/nokogiri/extconf.rb +89 -0
- data/ext/nokogiri/html_document.c +183 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +30 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser.c +57 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/nokogiri.c +81 -0
- data/ext/nokogiri/nokogiri.h +149 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_cdata.c +53 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +51 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +308 -0
- data/ext/nokogiri/xml_document.h +21 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +102 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +24 -0
- data/ext/nokogiri/xml_io.h +10 -0
- data/ext/nokogiri/xml_namespace.c +69 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +928 -0
- data/ext/nokogiri/xml_node.h +14 -0
- data/ext/nokogiri/xml_node_set.c +386 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +572 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +106 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +336 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +86 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +107 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +203 -0
- data/ext/nokogiri/xml_syntax_error.h +12 -0
- data/ext/nokogiri/xml_text.c +47 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +252 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/action-nokogiri.rb +36 -0
- data/lib/nokogiri.rb +110 -0
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +748 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
- data/lib/nokogiri/css/node.rb +107 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +11 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +172 -0
- data/lib/nokogiri/decorators.rb +2 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +37 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
- data/lib/nokogiri/ffi/libxml.rb +314 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +107 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
- data/lib/nokogiri/ffi/xml/node.rb +380 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +217 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
- data/lib/nokogiri/ffi/xml/schema.rb +55 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/hpricot.rb +62 -0
- data/lib/nokogiri/html.rb +34 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +71 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +47 -0
- data/lib/nokogiri/nokogiri.rb +1 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +29 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +62 -0
- data/lib/nokogiri/xml/attr.rb +9 -0
- data/lib/nokogiri/xml/builder.rb +254 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/document.rb +100 -0
- data/lib/nokogiri/xml/document_fragment.rb +49 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/entity_declaration.rb +11 -0
- data/lib/nokogiri/xml/fragment_handler.rb +55 -0
- data/lib/nokogiri/xml/namespace.rb +7 -0
- data/lib/nokogiri/xml/node.rb +745 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +238 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +80 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +66 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +3 -0
- data/lib/nokogiri/xml/sax/document.rb +143 -0
- data/lib/nokogiri/xml/sax/parser.rb +101 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +65 -0
- data/lib/nokogiri/xml/syntax_error.rb +34 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +64 -0
- data/tasks/test.rb +161 -0
- data/test/css/test_nthiness.rb +160 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +176 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +123 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +11 -0
- data/test/hpricot/test_alter.rb +68 -0
- data/test/hpricot/test_builder.rb +20 -0
- data/test/hpricot/test_parser.rb +426 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +77 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +52 -0
- data/test/html/test_builder.rb +156 -0
- data/test/html/test_document.rb +361 -0
- data/test/html/test_document_encoding.rb +46 -0
- data/test/html/test_document_fragment.rb +97 -0
- data/test/html/test_element_description.rb +95 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +165 -0
- data/test/test_convert_xpath.rb +186 -0
- data/test/test_css_cache.rb +56 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +127 -0
- data/test/test_reader.rb +316 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +169 -0
- data/test/xml/sax/test_push_parser.rb +92 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_builder.rb +73 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +23 -0
- data/test/xml/test_document.rb +397 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +76 -0
- data/test/xml/test_dtd.rb +42 -0
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +43 -0
- data/test/xml/test_node.rb +808 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +84 -0
- data/test/xml/test_node_set.rb +368 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +65 -0
- data/test/xml/test_text.rb +18 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +409 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestParser < Nokogiri::TestCase
|
5
|
+
include Nokogiri
|
6
|
+
|
7
|
+
def test_roundtrip
|
8
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
9
|
+
%w[link link[2] body #link1 a p.ohmy].each do |css_sel|
|
10
|
+
ele = @basic.at(css_sel)
|
11
|
+
assert_equal ele, @basic.at(ele.css_path), ele.css_path
|
12
|
+
assert_equal ele, @basic.at(ele.xpath), ele.xpath
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestPreserved < Nokogiri::TestCase
|
5
|
+
def assert_roundtrip str
|
6
|
+
doc = Nokogiri.Hpricot(str)
|
7
|
+
yield doc if block_given?
|
8
|
+
str2 = doc.to_original_html
|
9
|
+
[*str].zip([*str2]).each do |s1, s2|
|
10
|
+
assert_equal s1, s2
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def assert_html str1, str2
|
15
|
+
doc = Nokogiri.Hpricot(str2)
|
16
|
+
yield doc if block_given?
|
17
|
+
assert_equal str1, doc.to_original_html
|
18
|
+
end
|
19
|
+
|
20
|
+
####
|
21
|
+
# Not supporting to_original_html
|
22
|
+
#def test_simple
|
23
|
+
# str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
|
24
|
+
# assert_html str, str
|
25
|
+
# assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
|
26
|
+
# (doc/:p).set('class', 'new')
|
27
|
+
# end
|
28
|
+
#end
|
29
|
+
|
30
|
+
####
|
31
|
+
# Not supporting to_original_html
|
32
|
+
#def test_parent
|
33
|
+
# str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
|
34
|
+
# assert_html str, str
|
35
|
+
# assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
|
36
|
+
# (doc/:head).remove
|
37
|
+
# (doc/:div).set('id', 'all')
|
38
|
+
# (doc/:p).wrap('<div></div>')
|
39
|
+
# end
|
40
|
+
#end
|
41
|
+
|
42
|
+
# Not really a valid test. If libxml can figure out the encoding of the file,
|
43
|
+
# it will use that encoding, otherwise it uses the &#xwhatever so that no data
|
44
|
+
# is lost.
|
45
|
+
#
|
46
|
+
# libxml on OSX can't figure out the encoding, so this tests passes. linux
|
47
|
+
# can figure out the encoding, so it fails.
|
48
|
+
#def test_escaping_of_contents
|
49
|
+
# doc = Nokogiri.Hpricot(TestFiles::BOINGBOING)
|
50
|
+
# assert_equal "Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
|
51
|
+
#end
|
52
|
+
|
53
|
+
####
|
54
|
+
# Modified. No.
|
55
|
+
#def test_files
|
56
|
+
# assert_roundtrip TestFiles::BASIC
|
57
|
+
# assert_roundtrip TestFiles::BOINGBOING
|
58
|
+
# assert_roundtrip TestFiles::CY0
|
59
|
+
#end
|
60
|
+
|
61
|
+
####
|
62
|
+
# Modified.. When calling "to_html" on the document, proper html/doc tags
|
63
|
+
# are produced too.
|
64
|
+
def test_escaping_of_attrs
|
65
|
+
# ampersands in URLs
|
66
|
+
str = %{<a href="http://google.com/search?q=nokogiri&l=en">Google</a>}
|
67
|
+
link = (doc = Nokogiri.Hpricot(str)).at(:a)
|
68
|
+
assert_equal "http://google.com/search?q=nokogiri&l=en", link['href']
|
69
|
+
assert_equal "http://google.com/search?q=nokogiri&l=en", link.get_attribute('href')
|
70
|
+
assert_equal "http://google.com/search?q=nokogiri&l=en", link.raw_attributes['href']
|
71
|
+
assert_equal str, link.to_html
|
72
|
+
|
73
|
+
# alter the url
|
74
|
+
link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
|
75
|
+
assert_equal %{<a href="javascript:alert("AGGA-KA-BOO!")">Google</a>}, link.to_html.gsub(/%22/, '"')
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestParser < Nokogiri::TestCase
|
5
|
+
include Nokogiri
|
6
|
+
# normally, the link tags are empty HTML tags.
|
7
|
+
# contributed by laudney.
|
8
|
+
def test_normally_empty
|
9
|
+
doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
|
10
|
+
assert_equal "this is title", (doc/:rss/:channel/:title).text
|
11
|
+
assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
|
12
|
+
end
|
13
|
+
|
14
|
+
# make sure XML doesn't get downcased
|
15
|
+
def test_casing
|
16
|
+
doc = Hpricot::XML(TestFiles::WHY)
|
17
|
+
|
18
|
+
### Modified.
|
19
|
+
# I don't want to differentiate pseudo classes from namespaces. If
|
20
|
+
# you're parsing xml, use XPath. That's what its for. :-P
|
21
|
+
assert_equal "hourly", (doc.at "//sy:updatePeriod").content
|
22
|
+
assert_equal 1, (doc/"guid[@isPermaLink]").length
|
23
|
+
end
|
24
|
+
|
25
|
+
# be sure tags named "text" are ok
|
26
|
+
def test_text_tags
|
27
|
+
doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
|
28
|
+
assert_equal "City Poisoned", (doc/"title").text
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
module SAX
|
6
|
+
class TestParser < Nokogiri::SAX::TestCase
|
7
|
+
def setup
|
8
|
+
super
|
9
|
+
@parser = HTML::SAX::Parser.new(Doc.new)
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_parse_file
|
13
|
+
@parser.parse_file(HTML_FILE)
|
14
|
+
assert_equal 1110, @parser.document.end_elements.length
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_parse_file_nil_argument
|
18
|
+
assert_raises(ArgumentError) {
|
19
|
+
@parser.parse_file(nil)
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_parse_file_non_existant
|
24
|
+
assert_raise Errno::ENOENT do
|
25
|
+
@parser.parse_file('foo')
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_parse_file_with_dir
|
30
|
+
assert_raise Errno::EISDIR do
|
31
|
+
@parser.parse_file(File.dirname(__FILE__))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_parse_memory_nil
|
36
|
+
assert_raise ArgumentError do
|
37
|
+
@parser.parse_memory(nil)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_parse_document
|
42
|
+
@parser.parse_memory(<<-eoxml)
|
43
|
+
<p>Paragraph 1</p>
|
44
|
+
<p>Paragraph 2</p>
|
45
|
+
eoxml
|
46
|
+
assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
|
47
|
+
@parser.document.start_elements)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,156 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestBuilder < Nokogiri::TestCase
|
6
|
+
def test_builder_with_explicit_tags
|
7
|
+
html_doc = Nokogiri::HTML::Builder.new {
|
8
|
+
div.slide(:class => 'another_class') {
|
9
|
+
node = Nokogiri::XML::Node.new("id", doc)
|
10
|
+
node.content = "hello"
|
11
|
+
insert(node)
|
12
|
+
}
|
13
|
+
}.doc
|
14
|
+
assert_equal 1, html_doc.css('div.slide > id').length
|
15
|
+
assert_equal 'hello', html_doc.at('div.slide > id').content
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_hash_as_attributes_for_attribute_method
|
19
|
+
html = Nokogiri::HTML::Builder.new { ||
|
20
|
+
div.slide(:class => 'another_class') {
|
21
|
+
span 'Slide 1'
|
22
|
+
}
|
23
|
+
}.to_html
|
24
|
+
assert_match 'class="slide another_class"', html
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_hash_as_attributes
|
28
|
+
builder = Nokogiri::HTML::Builder.new do
|
29
|
+
div(:id => 'awesome') {
|
30
|
+
h1 "america"
|
31
|
+
}
|
32
|
+
end
|
33
|
+
assert_equal('<div id="awesome"><h1>america</h1></div>',
|
34
|
+
builder.doc.root.to_html.gsub(/\n/, '').gsub(/>\s*</, '><'))
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_href_with_attributes
|
38
|
+
uri = 'http://tenderlovemaking.com/'
|
39
|
+
built = Nokogiri::XML::Builder.new {
|
40
|
+
div {
|
41
|
+
a('King Khan & The Shrines', :href => uri)
|
42
|
+
}
|
43
|
+
}
|
44
|
+
assert_equal 'http://tenderlovemaking.com/',
|
45
|
+
built.doc.at('a')[:href]
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_tag_nesting
|
49
|
+
builder = Nokogiri::HTML::Builder.new do
|
50
|
+
span.left ''
|
51
|
+
span.middle {
|
52
|
+
div.icon ''
|
53
|
+
}
|
54
|
+
span.right ''
|
55
|
+
end
|
56
|
+
assert node = builder.doc.css('span.right').first
|
57
|
+
assert_equal 'middle', node.previous_sibling['class']
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_has_ampersand
|
61
|
+
builder = Nokogiri::HTML::Builder.new do
|
62
|
+
div.rad.thing! {
|
63
|
+
text "<awe&some>"
|
64
|
+
b "hello & world"
|
65
|
+
}
|
66
|
+
end
|
67
|
+
assert_equal(
|
68
|
+
'<div class="rad" id="thing"><awe&some><b>hello & world</b></div>',
|
69
|
+
builder.doc.root.to_html.gsub(/\n/, ''))
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_multi_tags
|
73
|
+
builder = Nokogiri::HTML::Builder.new do
|
74
|
+
div.rad.thing! {
|
75
|
+
text "<awesome>"
|
76
|
+
b "hello"
|
77
|
+
}
|
78
|
+
end
|
79
|
+
assert_equal(
|
80
|
+
'<div class="rad" id="thing"><awesome><b>hello</b></div>',
|
81
|
+
builder.doc.root.to_html.gsub(/\n/, ''))
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_attributes_plus_block
|
85
|
+
builder = Nokogiri::HTML::Builder.new do
|
86
|
+
div.rad.thing! {
|
87
|
+
text "<awesome>"
|
88
|
+
}
|
89
|
+
end
|
90
|
+
assert_equal('<div class="rad" id="thing"><awesome></div>',
|
91
|
+
builder.doc.root.to_html.chomp)
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_builder_adds_attributes
|
95
|
+
builder = Nokogiri::HTML::Builder.new do
|
96
|
+
div.rad.thing! "tender div"
|
97
|
+
end
|
98
|
+
assert_equal('<div class="rad" id="thing">tender div</div>',
|
99
|
+
builder.doc.root.to_html.chomp)
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_bold_tag
|
103
|
+
builder = Nokogiri::HTML::Builder.new do
|
104
|
+
b "bold tag"
|
105
|
+
end
|
106
|
+
assert_equal('<b>bold tag</b>', builder.doc.root.to_html.chomp)
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_html_then_body_tag
|
110
|
+
builder = Nokogiri::HTML::Builder.new do
|
111
|
+
html {
|
112
|
+
body {
|
113
|
+
b "bold tag"
|
114
|
+
}
|
115
|
+
}
|
116
|
+
end
|
117
|
+
assert_equal('<html><body><b>bold tag</b></body></html>',
|
118
|
+
builder.doc.root.to_html.chomp.gsub(/>\s*</, '><'))
|
119
|
+
end
|
120
|
+
|
121
|
+
def test_instance_eval_with_delegation_to_block_context
|
122
|
+
class << self
|
123
|
+
def foo
|
124
|
+
"foo!"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
builder = Nokogiri::HTML::Builder.new { text foo }
|
129
|
+
assert builder.to_html.include?("foo!")
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_builder_with_param
|
133
|
+
doc = Nokogiri::HTML::Builder.new { |html|
|
134
|
+
html.body {
|
135
|
+
html.p "hello world"
|
136
|
+
}
|
137
|
+
}.doc
|
138
|
+
|
139
|
+
assert node = doc.xpath('//body/p').first
|
140
|
+
assert_equal 'hello world', node.content
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_builder_with_id
|
144
|
+
text = "hello world"
|
145
|
+
doc = Nokogiri::HTML::Builder.new { |html|
|
146
|
+
html.body {
|
147
|
+
html.id_ text
|
148
|
+
}
|
149
|
+
}.doc
|
150
|
+
|
151
|
+
assert node = doc.xpath('//body/id').first
|
152
|
+
assert_equal text, node.content
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
@@ -0,0 +1,361 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestDocument < Nokogiri::TestCase
|
6
|
+
def setup
|
7
|
+
super
|
8
|
+
@html = Nokogiri::HTML.parse(File.read(HTML_FILE))
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_document_takes_config_block
|
12
|
+
options = nil
|
13
|
+
Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg|
|
14
|
+
options = cfg
|
15
|
+
options.nonet.nowarning.dtdattr
|
16
|
+
end
|
17
|
+
assert options.nonet?
|
18
|
+
assert options.nowarning?
|
19
|
+
assert options.dtdattr?
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_parse_takes_config_block
|
23
|
+
options = nil
|
24
|
+
Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
|
25
|
+
options = cfg
|
26
|
+
options.nonet.nowarning.dtdattr
|
27
|
+
end
|
28
|
+
assert options.nonet?
|
29
|
+
assert options.nowarning?
|
30
|
+
assert options.dtdattr?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_subclass
|
34
|
+
klass = Class.new(Nokogiri::HTML::Document)
|
35
|
+
doc = klass.new
|
36
|
+
assert_instance_of klass, doc
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_subclass_initialize
|
40
|
+
klass = Class.new(Nokogiri::HTML::Document) do
|
41
|
+
attr_accessor :initialized_with
|
42
|
+
|
43
|
+
def initialize(*args)
|
44
|
+
@initialized_with = args
|
45
|
+
end
|
46
|
+
end
|
47
|
+
doc = klass.new("uri", "external_id", 1)
|
48
|
+
assert_equal ["uri", "external_id", 1], doc.initialized_with
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_subclass_dup
|
52
|
+
klass = Class.new(Nokogiri::HTML::Document)
|
53
|
+
doc = klass.new.dup
|
54
|
+
assert_instance_of klass, doc
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_subclass_parse
|
58
|
+
klass = Class.new(Nokogiri::HTML::Document)
|
59
|
+
doc = klass.parse(File.read(HTML_FILE))
|
60
|
+
assert_equal @html.to_s, doc.to_s
|
61
|
+
assert_instance_of klass, doc
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_document_parse_method
|
65
|
+
html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE))
|
66
|
+
assert_equal @html.to_s, html.to_s
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_empty_string_returns_empty_doc
|
70
|
+
doc = Nokogiri::HTML('')
|
71
|
+
end
|
72
|
+
|
73
|
+
unless %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
74
|
+
# FIXME: this is a hack around broken libxml versions
|
75
|
+
def test_to_xhtml_with_indent
|
76
|
+
doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
|
77
|
+
doc = Nokogiri::HTML(doc.to_xhtml(:indent => 2))
|
78
|
+
assert_indent 2, doc
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_write_to_xhtml_with_indent
|
82
|
+
io = StringIO.new
|
83
|
+
doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
|
84
|
+
doc.write_xhtml_to io, :indent => 5
|
85
|
+
io.rewind
|
86
|
+
doc = Nokogiri::HTML(io.read)
|
87
|
+
assert_indent 5, doc
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_swap_should_not_exist
|
92
|
+
assert_raises(NoMethodError) {
|
93
|
+
@html.swap
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_namespace_should_not_exist
|
98
|
+
assert_raises(NoMethodError) {
|
99
|
+
@html.namespace
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_meta_encoding
|
104
|
+
assert_equal 'UTF-8', @html.meta_encoding
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_meta_encoding=
|
108
|
+
@html.meta_encoding = 'EUC-JP'
|
109
|
+
assert_equal 'EUC-JP', @html.meta_encoding
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_root_node_parent_is_document
|
113
|
+
parent = @html.root.parent
|
114
|
+
assert_equal @html, parent
|
115
|
+
assert_instance_of Nokogiri::HTML::Document, parent
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_parse_empty_document
|
119
|
+
doc = Nokogiri::HTML("\n")
|
120
|
+
assert_equal 0, doc.css('a').length
|
121
|
+
assert_equal 0, doc.xpath('//a').length
|
122
|
+
assert_equal 0, doc.search('//a').length
|
123
|
+
end
|
124
|
+
|
125
|
+
def test_HTML_function
|
126
|
+
html = Nokogiri::HTML(File.read(HTML_FILE))
|
127
|
+
assert html.html?
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_parse_io
|
131
|
+
assert doc = File.open(HTML_FILE, 'rb') { |f|
|
132
|
+
Document.read_io(f, nil, 'UTF-8',
|
133
|
+
XML::ParseOptions::NOERROR | XML::ParseOptions::NOWARNING
|
134
|
+
)
|
135
|
+
}
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_to_xhtml
|
139
|
+
assert_match 'XHTML', @html.to_xhtml
|
140
|
+
assert_match 'XHTML', @html.to_xhtml(:encoding => 'UTF-8')
|
141
|
+
assert_match 'UTF-8', @html.to_xhtml(:encoding => 'UTF-8')
|
142
|
+
end
|
143
|
+
|
144
|
+
def test_no_xml_header
|
145
|
+
html = Nokogiri::HTML(<<-eohtml)
|
146
|
+
<html>
|
147
|
+
</html>
|
148
|
+
eohtml
|
149
|
+
assert html.to_html.length > 0, 'html length is too short'
|
150
|
+
assert_no_match(/^<\?xml/, html.to_html)
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_document_has_error
|
154
|
+
html = Nokogiri::HTML(<<-eohtml)
|
155
|
+
<html>
|
156
|
+
<body>
|
157
|
+
<div awesome="asdf>
|
158
|
+
<p>inside div tag</p>
|
159
|
+
</div>
|
160
|
+
<p>outside div tag</p>
|
161
|
+
</body>
|
162
|
+
</html>
|
163
|
+
eohtml
|
164
|
+
assert html.errors.length > 0
|
165
|
+
end
|
166
|
+
|
167
|
+
def test_relative_css
|
168
|
+
html = Nokogiri::HTML(<<-eohtml)
|
169
|
+
<html>
|
170
|
+
<body>
|
171
|
+
<div>
|
172
|
+
<p>inside div tag</p>
|
173
|
+
</div>
|
174
|
+
<p>outside div tag</p>
|
175
|
+
</body>
|
176
|
+
</html>
|
177
|
+
eohtml
|
178
|
+
set = html.search('div').search('p')
|
179
|
+
assert_equal(1, set.length)
|
180
|
+
assert_equal('inside div tag', set.first.inner_text)
|
181
|
+
end
|
182
|
+
|
183
|
+
def test_multi_css
|
184
|
+
html = Nokogiri::HTML(<<-eohtml)
|
185
|
+
<html>
|
186
|
+
<body>
|
187
|
+
<div>
|
188
|
+
<p>p tag</p>
|
189
|
+
<a>a tag</a>
|
190
|
+
</div>
|
191
|
+
</body>
|
192
|
+
</html>
|
193
|
+
eohtml
|
194
|
+
set = html.css('p, a')
|
195
|
+
assert_equal(2, set.length)
|
196
|
+
assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
|
197
|
+
end
|
198
|
+
|
199
|
+
def test_inner_text
|
200
|
+
html = Nokogiri::HTML(<<-eohtml)
|
201
|
+
<html>
|
202
|
+
<body>
|
203
|
+
<div>
|
204
|
+
<p>
|
205
|
+
Hello world!
|
206
|
+
</p>
|
207
|
+
</div>
|
208
|
+
</body>
|
209
|
+
</html>
|
210
|
+
eohtml
|
211
|
+
node = html.xpath('//div').first
|
212
|
+
assert_equal('Hello world!', node.inner_text.strip)
|
213
|
+
end
|
214
|
+
|
215
|
+
def test_find_by_xpath
|
216
|
+
found = @html.xpath('//div/a')
|
217
|
+
assert_equal 3, found.length
|
218
|
+
end
|
219
|
+
|
220
|
+
def test_find_by_css
|
221
|
+
found = @html.css('div > a')
|
222
|
+
assert_equal 3, found.length
|
223
|
+
end
|
224
|
+
|
225
|
+
def test_find_by_css_with_square_brackets
|
226
|
+
found = @html.css("div[@id='header'] > h1")
|
227
|
+
found = @html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc
|
228
|
+
assert_equal 1, found.length
|
229
|
+
end
|
230
|
+
|
231
|
+
def test_find_with_function
|
232
|
+
found = @html.css("div:awesome() h1", Class.new {
|
233
|
+
def awesome divs
|
234
|
+
[divs.first]
|
235
|
+
end
|
236
|
+
}.new)
|
237
|
+
end
|
238
|
+
|
239
|
+
def test_dup_shallow
|
240
|
+
found = @html.search('//div/a').first
|
241
|
+
dup = found.dup(0)
|
242
|
+
assert dup
|
243
|
+
assert_equal '', dup.content
|
244
|
+
end
|
245
|
+
|
246
|
+
def test_search_can_handle_xpath_and_css
|
247
|
+
found = @html.search('//div/a', 'div > p')
|
248
|
+
length = @html.xpath('//div/a').length +
|
249
|
+
@html.css('div > p').length
|
250
|
+
assert_equal length, found.length
|
251
|
+
end
|
252
|
+
|
253
|
+
def test_dup_document
|
254
|
+
assert dup = @html.dup
|
255
|
+
assert_not_equal dup, @html
|
256
|
+
assert @html.html?
|
257
|
+
assert_instance_of Nokogiri::HTML::Document, dup
|
258
|
+
assert dup.html?, 'duplicate should be html'
|
259
|
+
assert_equal @html.to_s, dup.to_s
|
260
|
+
end
|
261
|
+
|
262
|
+
def test_dup_document_shallow
|
263
|
+
assert dup = @html.dup(0)
|
264
|
+
assert_not_equal dup, @html
|
265
|
+
end
|
266
|
+
|
267
|
+
def test_dup
|
268
|
+
found = @html.search('//div/a').first
|
269
|
+
dup = found.dup
|
270
|
+
assert dup
|
271
|
+
assert_equal found.content, dup.content
|
272
|
+
assert_equal found.document, dup.document
|
273
|
+
end
|
274
|
+
|
275
|
+
def test_inner_html
|
276
|
+
html = Nokogiri::HTML(<<-eohtml)
|
277
|
+
<html>
|
278
|
+
<body>
|
279
|
+
<div>
|
280
|
+
<p>
|
281
|
+
Hello world!
|
282
|
+
</p>
|
283
|
+
</div>
|
284
|
+
</body>
|
285
|
+
</html>
|
286
|
+
eohtml
|
287
|
+
node = html.xpath('//div').first
|
288
|
+
assert_equal('<p>Helloworld!</p>', node.inner_html.gsub(/\s/, ''))
|
289
|
+
end
|
290
|
+
|
291
|
+
def test_fragment_contains_text_node
|
292
|
+
fragment = Nokogiri::HTML.fragment('fooo')
|
293
|
+
assert_equal 1, fragment.children.length
|
294
|
+
assert_equal 'fooo', fragment.inner_text
|
295
|
+
end
|
296
|
+
|
297
|
+
def test_fragment_includes_two_tags
|
298
|
+
assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
|
299
|
+
end
|
300
|
+
|
301
|
+
def test_relative_css_finder
|
302
|
+
doc = Nokogiri::HTML(<<-eohtml)
|
303
|
+
<html>
|
304
|
+
<body>
|
305
|
+
<div class="red">
|
306
|
+
<p>
|
307
|
+
inside red
|
308
|
+
</p>
|
309
|
+
</div>
|
310
|
+
<div class="green">
|
311
|
+
<p>
|
312
|
+
inside green
|
313
|
+
</p>
|
314
|
+
</div>
|
315
|
+
</body>
|
316
|
+
</html>
|
317
|
+
eohtml
|
318
|
+
red_divs = doc.css('div.red')
|
319
|
+
assert_equal 1, red_divs.length
|
320
|
+
p_tags = red_divs.first.css('p')
|
321
|
+
assert_equal 1, p_tags.length
|
322
|
+
assert_equal 'inside red', p_tags.first.text.strip
|
323
|
+
end
|
324
|
+
|
325
|
+
def test_find_classes
|
326
|
+
doc = Nokogiri::HTML(<<-eohtml)
|
327
|
+
<html>
|
328
|
+
<body>
|
329
|
+
<p class="red">RED</p>
|
330
|
+
<p class="awesome red">RED</p>
|
331
|
+
<p class="notred">GREEN</p>
|
332
|
+
<p class="green notred">GREEN</p>
|
333
|
+
</body>
|
334
|
+
</html>
|
335
|
+
eohtml
|
336
|
+
list = doc.css('.red')
|
337
|
+
assert_equal 2, list.length
|
338
|
+
assert_equal %w{ RED RED }, list.map { |x| x.text }
|
339
|
+
end
|
340
|
+
|
341
|
+
def test_parse_can_take_io
|
342
|
+
html = nil
|
343
|
+
File.open(HTML_FILE, 'rb') { |f|
|
344
|
+
html = Nokogiri::HTML(f)
|
345
|
+
}
|
346
|
+
assert html.html?
|
347
|
+
end
|
348
|
+
|
349
|
+
def test_html?
|
350
|
+
assert !@html.xml?
|
351
|
+
assert @html.html?
|
352
|
+
end
|
353
|
+
|
354
|
+
def test_serialize
|
355
|
+
assert @html.serialize
|
356
|
+
assert @html.to_html
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|