nokogiri 1.3.0-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +233 -0
- data/CHANGELOG.rdoc +222 -0
- data/Manifest.txt +247 -0
- data/README.ja.rdoc +103 -0
- data/README.rdoc +117 -0
- data/Rakefile +205 -0
- data/bin/nokogiri +47 -0
- data/ext/nokogiri/extconf.rb +89 -0
- data/ext/nokogiri/html_document.c +183 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +30 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser.c +57 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/nokogiri.c +81 -0
- data/ext/nokogiri/nokogiri.h +149 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_cdata.c +53 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +51 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +308 -0
- data/ext/nokogiri/xml_document.h +21 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +102 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +24 -0
- data/ext/nokogiri/xml_io.h +10 -0
- data/ext/nokogiri/xml_namespace.c +69 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +928 -0
- data/ext/nokogiri/xml_node.h +14 -0
- data/ext/nokogiri/xml_node_set.c +386 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +572 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +106 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +336 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +86 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +107 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +203 -0
- data/ext/nokogiri/xml_syntax_error.h +12 -0
- data/ext/nokogiri/xml_text.c +47 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +252 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/action-nokogiri.rb +36 -0
- data/lib/nokogiri.rb +110 -0
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +748 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
- data/lib/nokogiri/css/node.rb +107 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +11 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +172 -0
- data/lib/nokogiri/decorators.rb +2 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +37 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
- data/lib/nokogiri/ffi/libxml.rb +314 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +107 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
- data/lib/nokogiri/ffi/xml/node.rb +380 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +217 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
- data/lib/nokogiri/ffi/xml/schema.rb +55 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/hpricot.rb +62 -0
- data/lib/nokogiri/html.rb +34 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +71 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +47 -0
- data/lib/nokogiri/nokogiri.rb +1 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +29 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +62 -0
- data/lib/nokogiri/xml/attr.rb +9 -0
- data/lib/nokogiri/xml/builder.rb +254 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/document.rb +100 -0
- data/lib/nokogiri/xml/document_fragment.rb +49 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/entity_declaration.rb +11 -0
- data/lib/nokogiri/xml/fragment_handler.rb +55 -0
- data/lib/nokogiri/xml/namespace.rb +7 -0
- data/lib/nokogiri/xml/node.rb +745 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +238 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +80 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +66 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +3 -0
- data/lib/nokogiri/xml/sax/document.rb +143 -0
- data/lib/nokogiri/xml/sax/parser.rb +101 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +65 -0
- data/lib/nokogiri/xml/syntax_error.rb +34 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +64 -0
- data/tasks/test.rb +161 -0
- data/test/css/test_nthiness.rb +160 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +176 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +123 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +11 -0
- data/test/hpricot/test_alter.rb +68 -0
- data/test/hpricot/test_builder.rb +20 -0
- data/test/hpricot/test_parser.rb +426 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +77 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +52 -0
- data/test/html/test_builder.rb +156 -0
- data/test/html/test_document.rb +361 -0
- data/test/html/test_document_encoding.rb +46 -0
- data/test/html/test_document_fragment.rb +97 -0
- data/test/html/test_element_description.rb +95 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +165 -0
- data/test/test_convert_xpath.rb +186 -0
- data/test/test_css_cache.rb +56 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +127 -0
- data/test/test_reader.rb +316 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +169 -0
- data/test/xml/sax/test_push_parser.rb +92 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_builder.rb +73 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +23 -0
- data/test/xml/test_document.rb +397 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +76 -0
- data/test/xml/test_dtd.rb +42 -0
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +43 -0
- data/test/xml/test_node.rb +808 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +84 -0
- data/test/xml/test_node_set.rb +368 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +65 -0
- data/test/xml/test_text.rb +18 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +409 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module HTML
|
6
|
+
if RUBY_VERSION =~ /^1\.9/
|
7
|
+
class TestDocumentEncoding < Nokogiri::TestCase
|
8
|
+
def test_default_to_encoding_from_string
|
9
|
+
bad_charset = <<-eohtml
|
10
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
11
|
+
<html>
|
12
|
+
<head>
|
13
|
+
<meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
|
14
|
+
</head>
|
15
|
+
<body>
|
16
|
+
<a href="http://tenderlovemaking.com/">blah!</a>
|
17
|
+
</body>
|
18
|
+
</html>
|
19
|
+
eohtml
|
20
|
+
doc = Nokogiri::HTML(bad_charset)
|
21
|
+
assert_equal bad_charset.encoding.name, doc.encoding
|
22
|
+
|
23
|
+
doc = Nokogiri.parse(bad_charset)
|
24
|
+
assert_equal bad_charset.encoding.name, doc.encoding
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_encoding_with_a_bad_name
|
28
|
+
bad_charset = <<-eohtml
|
29
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
30
|
+
<html>
|
31
|
+
<head>
|
32
|
+
<meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
|
33
|
+
</head>
|
34
|
+
<body>
|
35
|
+
<a href="http://tenderlovemaking.com/">blah!</a>
|
36
|
+
</body>
|
37
|
+
</html>
|
38
|
+
eohtml
|
39
|
+
doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
|
40
|
+
assert_equal ['http://tenderlovemaking.com/'],
|
41
|
+
doc.css('a').map { |a| a['href'] }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestDocumentFragment < Nokogiri::TestCase
|
6
|
+
def setup
|
7
|
+
super
|
8
|
+
@html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_new
|
12
|
+
fragment = Nokogiri::HTML::DocumentFragment.new(@html)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_fragment_should_have_document
|
16
|
+
fragment = Nokogiri::HTML::DocumentFragment.new(@html)
|
17
|
+
assert_equal @html, fragment.document
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_name
|
21
|
+
fragment = Nokogiri::HTML::DocumentFragment.new(@html)
|
22
|
+
assert_equal '#document-fragment', fragment.name
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_static_method
|
26
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse("<div>a</div>")
|
27
|
+
assert_instance_of Nokogiri::HTML::DocumentFragment, fragment
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_many_fragments
|
31
|
+
100.times { Nokogiri::HTML::DocumentFragment.new(@html) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_subclass
|
35
|
+
klass = Class.new(Nokogiri::HTML::DocumentFragment)
|
36
|
+
fragment = klass.new(@html, "<div>a</div>")
|
37
|
+
assert_instance_of klass, fragment
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_html_fragment
|
41
|
+
fragment = Nokogiri::HTML.fragment("<div>a</div>")
|
42
|
+
assert_equal "<div>a</div>", fragment.to_s
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_html_fragment_has_outer_text
|
46
|
+
doc = "a<div>b</div>c"
|
47
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
48
|
+
if Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.6.16"
|
49
|
+
assert_equal "a<div>b</div><p>c</p>", fragment.to_s
|
50
|
+
else
|
51
|
+
assert_equal "a<div>b</div>c", fragment.to_s
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_html_fragment_case_insensitivity
|
56
|
+
doc = "<crazyDiv>b</crazyDiv>"
|
57
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
58
|
+
assert_equal "<crazydiv>b</crazydiv>", fragment.to_s
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_html_fragment_with_leading_whitespace
|
62
|
+
doc = " <div>b</div> "
|
63
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
64
|
+
assert_equal "<div>b</div>", fragment.to_s
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_to_s
|
68
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
69
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
70
|
+
assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_s
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_to_html
|
74
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
75
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
76
|
+
assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_html
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_to_xhtml
|
80
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
81
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
82
|
+
if Nokogiri::VERSION_INFO['libxml']['loaded'] >= "2.7.0"
|
83
|
+
assert_equal "<span>foo<br /></span><span>bar</span>", fragment.to_xhtml
|
84
|
+
else
|
85
|
+
assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_xhtml
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_to_xml
|
90
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
91
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
92
|
+
assert_equal "<span>foo<br/></span><span>bar</span>", fragment.to_xml
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestElementDescription < Nokogiri::TestCase
|
6
|
+
def test_fetch_nonexistent
|
7
|
+
assert_nil ElementDescription['foo']
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_fetch_element_description
|
11
|
+
assert desc = ElementDescription['a']
|
12
|
+
assert_instance_of ElementDescription, desc
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_name
|
16
|
+
assert_equal 'a', ElementDescription['a'].name
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_implied_start_tag?
|
20
|
+
assert !ElementDescription['a'].implied_start_tag?
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_implied_end_tag?
|
24
|
+
assert !ElementDescription['a'].implied_end_tag?
|
25
|
+
assert ElementDescription['p'].implied_end_tag?
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_save_end_tag?
|
29
|
+
assert !ElementDescription['a'].save_end_tag?
|
30
|
+
assert ElementDescription['br'].save_end_tag?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_empty?
|
34
|
+
assert ElementDescription['br'].empty?
|
35
|
+
assert !ElementDescription['a'].empty?
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_deprecated?
|
39
|
+
assert ElementDescription['applet'].deprecated?
|
40
|
+
assert !ElementDescription['br'].deprecated?
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_inline?
|
44
|
+
assert ElementDescription['a'].inline?
|
45
|
+
assert !ElementDescription['div'].inline?
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_block?
|
49
|
+
element = ElementDescription['a']
|
50
|
+
assert_equal(!element.inline?, element.block?)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_description
|
54
|
+
assert ElementDescription['a'].description
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_subelements
|
58
|
+
sub_elements = ElementDescription['body'].sub_elements
|
59
|
+
assert_equal 61, sub_elements.length
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_default_sub_element
|
63
|
+
assert_equal 'div', ElementDescription['body'].default_sub_element
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_optional_attributes
|
67
|
+
attrs = ElementDescription['table'].optional_attributes
|
68
|
+
assert attrs
|
69
|
+
assert_equal 22, attrs.length
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_deprecated_attributes
|
73
|
+
attrs = ElementDescription['table'].deprecated_attributes
|
74
|
+
assert attrs
|
75
|
+
assert_equal 2, attrs.length
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_required_attributes
|
79
|
+
attrs = ElementDescription['table'].required_attributes
|
80
|
+
assert attrs
|
81
|
+
assert_equal 0, attrs.length
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_inspect
|
85
|
+
desc = ElementDescription['input']
|
86
|
+
assert_match desc.name, desc.inspect
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_to_s
|
90
|
+
desc = ElementDescription['input']
|
91
|
+
assert_match desc.name, desc.to_s
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestNamedCharacters < Nokogiri::TestCase
|
6
|
+
def test_named_character
|
7
|
+
copy = NamedCharacters.get('copy')
|
8
|
+
assert_equal 169, NamedCharacters['copy']
|
9
|
+
assert_equal copy.value, NamedCharacters['copy']
|
10
|
+
assert copy.description
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
require 'nkf'
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
module HTML
|
7
|
+
class TestNode < Nokogiri::TestCase
|
8
|
+
def setup
|
9
|
+
super
|
10
|
+
@html = Nokogiri::HTML(<<-eohtml)
|
11
|
+
<html>
|
12
|
+
<head></head>
|
13
|
+
<body>
|
14
|
+
<div class='baz'><a href="foo" class="bar">first</a></div>
|
15
|
+
</body>
|
16
|
+
</html>
|
17
|
+
eohtml
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_description
|
21
|
+
assert desc = @html.at('a.bar').description
|
22
|
+
assert_equal 'a', desc.name
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_add_next_sibling_with_empty_nodeset
|
26
|
+
assert_raises(ArgumentError) {
|
27
|
+
@html.at('a').add_next_sibling(@html.at('head').children)
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_add_next_sibling_with_non_empty_nodeset
|
32
|
+
assert_raises(ArgumentError) {
|
33
|
+
@html.at('head').add_next_sibling(@html.at('div').children)
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_ancestors_with_selector
|
38
|
+
assert node = @html.at('a.bar').child
|
39
|
+
assert list = node.ancestors('.baz')
|
40
|
+
assert_equal 1, list.length
|
41
|
+
assert_equal 'div', list.first.name
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_css_matches?
|
45
|
+
assert node = @html.at('a.bar')
|
46
|
+
assert node.matches?('a.bar')
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_xpath_matches?
|
50
|
+
assert node = @html.at('//a')
|
51
|
+
assert node.matches?('//a')
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_swap
|
55
|
+
@html.at('div').swap('<a href="foo">bar</a>')
|
56
|
+
a_tag = @html.css('a').first
|
57
|
+
assert_equal 'body', a_tag.parent.name
|
58
|
+
assert_equal 0, @html.css('div').length
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_swap_with_regex_characters
|
62
|
+
@html.at('div').swap('<a href="foo">ba)r</a>')
|
63
|
+
a_tag = @html.css('a').first
|
64
|
+
assert_equal 'ba)r', a_tag.text
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_attribute_decodes_entities
|
68
|
+
node = @html.at('div')
|
69
|
+
node['href'] = 'foo&bar'
|
70
|
+
assert_equal 'foo&bar', node['href']
|
71
|
+
node['href'] += '&baz'
|
72
|
+
assert_equal 'foo&bar&baz', node['href']
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
def test_before_will_prepend_text_nodes
|
77
|
+
assert node = @html.at('//body').children.first
|
78
|
+
node.before "some text"
|
79
|
+
assert_equal 'some text', @html.at('//body').children[0].content.strip
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_fragment_handler_does_not_regurge_on_invalid_attributes
|
83
|
+
iframe = %Q{<iframe style="width: 0%; height: 0px" src="http://someurl" allowtransparency></iframe>}
|
84
|
+
assert_nothing_raised { @html.at('div').before(iframe) }
|
85
|
+
assert_nothing_raised { @html.at('div').after(iframe) }
|
86
|
+
assert_nothing_raised { @html.at('div').inner_html=(iframe) }
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_inner_html=
|
90
|
+
assert div = @html.at('//div')
|
91
|
+
div.inner_html = '<span>testing</span>'
|
92
|
+
assert_equal 'span', div.children.first.name
|
93
|
+
|
94
|
+
div.inner_html = 'testing'
|
95
|
+
assert_equal 'testing', div.content
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_fragment
|
99
|
+
fragment = @html.fragment(<<-eohtml)
|
100
|
+
hello
|
101
|
+
<div class="foo">
|
102
|
+
<p>bar</p>
|
103
|
+
</div>
|
104
|
+
world
|
105
|
+
eohtml
|
106
|
+
assert_match(/^hello/, fragment.inner_html.strip)
|
107
|
+
assert_equal 3, fragment.children.length
|
108
|
+
assert p_tag = fragment.css('p').first
|
109
|
+
assert_equal 'div', p_tag.parent.name
|
110
|
+
assert_equal 'foo', p_tag.parent['class']
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_fragment_serialization
|
114
|
+
fragment = Nokogiri::HTML.fragment("<div>foo</div>")
|
115
|
+
assert_equal "<div>foo</div>", fragment.serialize.chomp
|
116
|
+
assert_equal "<div>foo</div>", fragment.to_xml.chomp
|
117
|
+
assert_equal "<div>foo</div>", fragment.inner_html
|
118
|
+
assert_equal "<div>foo</div>", fragment.to_html
|
119
|
+
assert_equal "<div>foo</div>", fragment.to_s
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_after_will_append_text_nodes
|
123
|
+
assert node = @html.at('//body/div')
|
124
|
+
node.after "some text"
|
125
|
+
assert_equal 'some text', node.next.text.strip
|
126
|
+
end
|
127
|
+
|
128
|
+
def test_replace
|
129
|
+
doc = Nokogiri::HTML(<<-eohtml)
|
130
|
+
<html>
|
131
|
+
<head></head>
|
132
|
+
<body>
|
133
|
+
<center><img src='logo.gif' /></center>
|
134
|
+
</body>
|
135
|
+
</html>
|
136
|
+
eohtml
|
137
|
+
center = doc.at("//center")
|
138
|
+
img = center.search("//img")
|
139
|
+
assert_raises ArgumentError do
|
140
|
+
center.replace img
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def test_to_html_does_not_contain_entities
|
145
|
+
html = NKF.nkf("-e --msdos", <<-EOH)
|
146
|
+
<html><body>
|
147
|
+
<p> test paragraph
|
148
|
+
foo bar </p>
|
149
|
+
</body></html>
|
150
|
+
EOH
|
151
|
+
nokogiri = Nokogiri::HTML.parse(html)
|
152
|
+
|
153
|
+
if RUBY_PLATFORM =~ /java/
|
154
|
+
# NKF linebreak modes are not supported as of jruby 1.2
|
155
|
+
# see http://jira.codehaus.org/browse/JRUBY-3602 for status
|
156
|
+
assert_equal "<p>testparagraph\nfoobar</p>",
|
157
|
+
nokogiri.at("p").to_html.gsub(/ /, '')
|
158
|
+
else
|
159
|
+
assert_equal "<p>testparagraph\r\nfoobar</p>",
|
160
|
+
nokogiri.at("p").to_html.gsub(/ /, '')
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,186 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'rubygems'
|
5
|
+
require 'hpricot'
|
6
|
+
HAS_HPRICOT = true
|
7
|
+
rescue LoadError
|
8
|
+
HAS_HPRICOT = false
|
9
|
+
end
|
10
|
+
|
11
|
+
class TestConvertXPath < Nokogiri::TestCase
|
12
|
+
|
13
|
+
def setup
|
14
|
+
super
|
15
|
+
@N = Nokogiri(File.read(HTML_FILE))
|
16
|
+
@NH = Nokogiri.Hpricot(File.read(HTML_FILE)) # decorated document
|
17
|
+
@H = Hpricot(File.read(HTML_FILE)) if HAS_HPRICOT
|
18
|
+
end
|
19
|
+
|
20
|
+
def assert_syntactical_equivalence(hpath, xpath, match, &blk)
|
21
|
+
blk ||= lambda {|j| j.first}
|
22
|
+
assert_equal match, blk.call(@N.search(xpath)), "xpath result did not match"
|
23
|
+
if HAS_HPRICOT
|
24
|
+
assert_equal match, blk.call(@H.search(hpath)).chomp, "hpath result did not match"
|
25
|
+
end
|
26
|
+
assert_equal [xpath], @NH.convert_to_xpath(hpath), "converted hpath did not match xpath"
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_ordinary_xpath_conversions
|
30
|
+
assert_equal(".//p", @NH.convert_to_xpath("p").first)
|
31
|
+
assert_equal(".//p", @NH.convert_to_xpath(:p).first)
|
32
|
+
assert_equal(".//p", @NH.convert_to_xpath("//p").first)
|
33
|
+
assert_equal(".//p", @NH.convert_to_xpath(".//p").first)
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_child_tag
|
37
|
+
assert_syntactical_equivalence("h1[a]", ".//h1[child::a]", "Tender Lovemaking") do |j|
|
38
|
+
j.inner_text
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_child_tag_equals
|
43
|
+
assert_syntactical_equivalence("h1[a='Tender Lovemaking']", ".//h1[child::a = 'Tender Lovemaking']", "Tender Lovemaking") do |j|
|
44
|
+
j.inner_text
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_filter_contains
|
49
|
+
assert_syntactical_equivalence("title:contains('Tender')", ".//title[contains(., 'Tender')]",
|
50
|
+
"Tender Lovemaking ") do |j|
|
51
|
+
j.inner_text
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_filter_comment
|
56
|
+
assert_syntactical_equivalence("div comment()[2]", ".//div//comment()[position() = 2]", "<!-- end of header -->") do |j|
|
57
|
+
j.first.to_s
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_filter_text
|
62
|
+
assert_syntactical_equivalence("a[text()]", ".//a[normalize-space(child::text())]", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
|
63
|
+
j.first.to_s
|
64
|
+
end
|
65
|
+
assert_syntactical_equivalence("a[text()='Tender Lovemaking']", ".//a[normalize-space(child::text()) = 'Tender Lovemaking']", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
|
66
|
+
j.first.to_s
|
67
|
+
end
|
68
|
+
assert_syntactical_equivalence("a/text()", ".//a/child::text()", "Tender Lovemaking") do |j|
|
69
|
+
j.first.to_s
|
70
|
+
end
|
71
|
+
assert_syntactical_equivalence("h2//a[text()!='Back Home!']", ".//h2//a[normalize-space(child::text()) != 'Back Home!']", "Meow meow meow meow meow") do |j|
|
72
|
+
j.first.inner_text
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_filter_by_attr
|
77
|
+
assert_syntactical_equivalence("a[@href='http://blog.geminigeek.com/wordpress-theme']",
|
78
|
+
".//a[@href = 'http://blog.geminigeek.com/wordpress-theme']",
|
79
|
+
"http://blog.geminigeek.com/wordpress-theme") do |j|
|
80
|
+
j.first["href"]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_css_id
|
85
|
+
assert_syntactical_equivalence("#linkcat-7", ".//*[@id = 'linkcat-7']", "linkcat-7") do |j|
|
86
|
+
j.first["id"]
|
87
|
+
end
|
88
|
+
assert_syntactical_equivalence("li#linkcat-7", ".//li[@id = 'linkcat-7']", "linkcat-7") do |j|
|
89
|
+
j.first["id"]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_css_class
|
94
|
+
assert_syntactical_equivalence(".cat-item-15", ".//*[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
|
95
|
+
"cat-item cat-item-15") do |j|
|
96
|
+
j.first["class"]
|
97
|
+
end
|
98
|
+
assert_syntactical_equivalence("li.cat-item-15", ".//li[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
|
99
|
+
"cat-item cat-item-15") do |j|
|
100
|
+
j.first["class"]
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_css_tags
|
105
|
+
assert_syntactical_equivalence("div li a", ".//div//li//a", "http://brobinius.org/") do |j|
|
106
|
+
j.first.inner_text
|
107
|
+
end
|
108
|
+
assert_syntactical_equivalence("div li > a", ".//div//li/a", "http://brobinius.org/") do |j|
|
109
|
+
j.first.inner_text
|
110
|
+
end
|
111
|
+
assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
|
112
|
+
j.first.inner_text
|
113
|
+
end
|
114
|
+
assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
|
115
|
+
j.first.inner_text
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_positional
|
120
|
+
##
|
121
|
+
# we are intentionally NOT staying compatible with nth-and-friends, as Hpricot has an OB1 bug.
|
122
|
+
#
|
123
|
+
# assert_syntactical_equivalence("div > div:eq(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
|
124
|
+
# j.first.inner_text
|
125
|
+
# end
|
126
|
+
# assert_syntactical_equivalence("div/div:eq(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
|
127
|
+
# j.first.inner_text
|
128
|
+
# end
|
129
|
+
# assert_syntactical_equivalence("div/div:nth(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
|
130
|
+
# j.first.inner_text
|
131
|
+
# end
|
132
|
+
# assert_syntactical_equivalence("div/div:nth-of-type(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
|
133
|
+
# j.first.inner_text
|
134
|
+
# end
|
135
|
+
assert_syntactical_equivalence("div/div:first()", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
|
136
|
+
j.first.inner_text.gsub(/[\r\n]/, '')
|
137
|
+
end
|
138
|
+
assert_syntactical_equivalence("div/div:first", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
|
139
|
+
j.first.inner_text.gsub(/[\r\n]/, '')
|
140
|
+
end
|
141
|
+
assert_syntactical_equivalence("div//a:last()", ".//div//a[position() = last()]", "Wordpress") do |j|
|
142
|
+
j.last.inner_text
|
143
|
+
end
|
144
|
+
assert_syntactical_equivalence("div//a:last", ".//div//a[position() = last()]", "Wordpress") do |j|
|
145
|
+
j.last.inner_text
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_multiple_filters
|
150
|
+
assert_syntactical_equivalence("a[@rel='bookmark'][1]", ".//a[@rel = 'bookmark' and position() = 1]", "Back Home!") do |j|
|
151
|
+
j.first.inner_text
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def test_compat_mode_namespaces
|
156
|
+
assert_equal(".//*[name()='t:sam']", @NH.convert_to_xpath("//t:sam").first)
|
157
|
+
assert_equal(".//*[name()='t:sam'][@rel='bookmark'][1]", @NH.convert_to_xpath("//t:sam[@rel='bookmark'][1]").first)
|
158
|
+
end
|
159
|
+
|
160
|
+
##
|
161
|
+
# 'and' is not supported by hpricot
|
162
|
+
# def test_and
|
163
|
+
# assert_syntactical_equivalence("div[h1 and small]", ".//div[h1 and small]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
|
164
|
+
# j.inner_text
|
165
|
+
# end
|
166
|
+
# end
|
167
|
+
|
168
|
+
|
169
|
+
|
170
|
+
# TODO:
|
171
|
+
# doc/'title ~ link' -> links that are siblings of title
|
172
|
+
# doc/'p[@class~="final"]' -> class includes string (whitespacy)
|
173
|
+
# doc/'p[text()*="final"]' -> class includes string (index) (broken: always returns true?)
|
174
|
+
# doc/'p[text()$="final"]' -> /final$/
|
175
|
+
# doc/'p[text()|="final"]' -> /^final$/
|
176
|
+
# doc/'p[text()^="final"]' -> string starts with 'final
|
177
|
+
# nth_first
|
178
|
+
# nth_last
|
179
|
+
# even
|
180
|
+
# odd
|
181
|
+
# first-child, nth-child, last-child, nth-last-child, nth-last-of-type
|
182
|
+
# only-of-type, only-child
|
183
|
+
# parent
|
184
|
+
# empty
|
185
|
+
# root
|
186
|
+
end
|