superfeedr-nokogiri 1.4.0.20091116183308
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +330 -0
- data/CHANGELOG.rdoc +314 -0
- data/Manifest.txt +269 -0
- data/README.ja.rdoc +105 -0
- data/README.rdoc +118 -0
- data/Rakefile +244 -0
- data/bin/nokogiri +49 -0
- data/ext/nokogiri/extconf.rb +145 -0
- data/ext/nokogiri/html_document.c +145 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +92 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +89 -0
- data/ext/nokogiri/nokogiri.h +145 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +67 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +54 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +52 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +388 -0
- data/ext/nokogiri/xml_document.h +24 -0
- data/ext/nokogiri/xml_document_fragment.c +46 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +192 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_entity_decl.c +97 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +31 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_namespace.c +74 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +1060 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +397 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +593 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +159 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +286 -0
- data/ext/nokogiri/xml_sax_parser.h +43 -0
- data/ext/nokogiri/xml_sax_parser_context.c +155 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +114 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +156 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +261 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +239 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +116 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +646 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +162 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +356 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +135 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +444 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +227 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
- data/lib/nokogiri/ffi/xml/schema.rb +92 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/html.rb +35 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +88 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +48 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +33 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +405 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +131 -0
- data/lib/nokogiri/xml/document_fragment.rb +69 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +15 -0
- data/lib/nokogiri/xml/fragment_handler.rb +71 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +665 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +307 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +85 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +74 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +160 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +61 -0
- data/lib/nokogiri/xml/syntax_error.rb +38 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +71 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +183 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +136 -0
- data/test/html/sax/test_parser.rb +64 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +390 -0
- data/test/html/test_document_encoding.rb +77 -0
- data/test/html/test_document_fragment.rb +132 -0
- data/test/html/test_element_description.rb +94 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +228 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +134 -0
- data/test/test_reader.rb +358 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +307 -0
- data/test/xml/sax/test_parser_context.rb +56 -0
- data/test/xml/sax/test_push_parser.rb +131 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +167 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +607 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +138 -0
- data/test/xml/test_dtd.rb +82 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +83 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +889 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_set.rb +531 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +89 -0
- data/test/xml/test_syntax_error.rb +27 -0
- data/test/xml/test_text.rb +30 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +430 -0
@@ -0,0 +1,77 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require "helper"
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module HTML
|
6
|
+
if RUBY_VERSION =~ /^1\.9/
|
7
|
+
class TestDocumentEncoding < Nokogiri::TestCase
|
8
|
+
def test_encoding
|
9
|
+
doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
|
10
|
+
|
11
|
+
hello = "こんにちは"
|
12
|
+
|
13
|
+
assert_match doc.encoding, doc.to_html
|
14
|
+
assert_match hello.encode('Shift_JIS'), doc.to_html
|
15
|
+
assert_equal 'Shift_JIS', doc.to_html.encoding.name
|
16
|
+
|
17
|
+
assert_match hello, doc.to_html(:encoding => 'UTF-8')
|
18
|
+
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
|
19
|
+
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_default_to_encoding_from_string
|
23
|
+
bad_charset = <<-eohtml
|
24
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
25
|
+
<html>
|
26
|
+
<head>
|
27
|
+
<meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
|
28
|
+
</head>
|
29
|
+
<body>
|
30
|
+
<a href="http://tenderlovemaking.com/">blah!</a>
|
31
|
+
</body>
|
32
|
+
</html>
|
33
|
+
eohtml
|
34
|
+
doc = Nokogiri::HTML(bad_charset)
|
35
|
+
assert_equal bad_charset.encoding.name, doc.encoding
|
36
|
+
|
37
|
+
doc = Nokogiri.parse(bad_charset)
|
38
|
+
assert_equal bad_charset.encoding.name, doc.encoding
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_encoding_non_utf8
|
42
|
+
orig = '日本語が上手です'
|
43
|
+
bin = Encoding::ASCII_8BIT
|
44
|
+
[Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
|
45
|
+
html = <<-eohtml.encode(enc)
|
46
|
+
<html>
|
47
|
+
<meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
|
48
|
+
<title xml:lang="ja">#{orig}</title></html>
|
49
|
+
eohtml
|
50
|
+
text = Nokogiri::HTML.parse(html).at('title').inner_text
|
51
|
+
assert_equal(
|
52
|
+
orig.encode(enc).force_encoding(bin),
|
53
|
+
text.encode(enc).force_encoding(bin)
|
54
|
+
)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_encoding_with_a_bad_name
|
59
|
+
bad_charset = <<-eohtml
|
60
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
61
|
+
<html>
|
62
|
+
<head>
|
63
|
+
<meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
|
64
|
+
</head>
|
65
|
+
<body>
|
66
|
+
<a href="http://tenderlovemaking.com/">blah!</a>
|
67
|
+
</body>
|
68
|
+
</html>
|
69
|
+
eohtml
|
70
|
+
doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
|
71
|
+
assert_equal ['http://tenderlovemaking.com/'],
|
72
|
+
doc.css('a').map { |a| a['href'] }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require "helper"
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module HTML
|
6
|
+
class TestDocumentFragment < Nokogiri::TestCase
|
7
|
+
def setup
|
8
|
+
super
|
9
|
+
@html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_fun_encoding
|
13
|
+
string = %Q(<body>こんにちは</body>)
|
14
|
+
html = Nokogiri::HTML::DocumentFragment.parse(
|
15
|
+
string
|
16
|
+
).to_html(:encoding => 'UTF-8')
|
17
|
+
assert_equal string, html
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_new
|
21
|
+
fragment = Nokogiri::HTML::DocumentFragment.new(@html)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_fragment_should_have_document
|
25
|
+
fragment = Nokogiri::HTML::DocumentFragment.new(@html)
|
26
|
+
assert_equal @html, fragment.document
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_name
|
30
|
+
fragment = Nokogiri::HTML::DocumentFragment.new(@html)
|
31
|
+
assert_equal '#document-fragment', fragment.name
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_static_method
|
35
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse("<div>a</div>")
|
36
|
+
assert_instance_of Nokogiri::HTML::DocumentFragment, fragment
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_many_fragments
|
40
|
+
100.times { Nokogiri::HTML::DocumentFragment.new(@html) }
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_subclass
|
44
|
+
klass = Class.new(Nokogiri::HTML::DocumentFragment)
|
45
|
+
fragment = klass.new(@html, "<div>a</div>")
|
46
|
+
assert_instance_of klass, fragment
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_subclass_parse
|
50
|
+
klass = Class.new(Nokogiri::HTML::DocumentFragment)
|
51
|
+
doc = klass.parse("<div>a</div>")
|
52
|
+
assert_instance_of klass, doc
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_html_fragment
|
56
|
+
fragment = Nokogiri::HTML.fragment("<div>a</div>")
|
57
|
+
assert_equal "<div>a</div>", fragment.to_s
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_html_fragment_has_outer_text
|
61
|
+
doc = "a<div>b</div>c"
|
62
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
63
|
+
if Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.6.16"
|
64
|
+
assert_equal "a<div>b</div><p>c</p>", fragment.to_s
|
65
|
+
else
|
66
|
+
assert_equal "a<div>b</div>c", fragment.to_s
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_html_fragment_case_insensitivity
|
71
|
+
doc = "<crazyDiv>b</crazyDiv>"
|
72
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
73
|
+
assert_equal "<crazydiv>b</crazydiv>", fragment.to_s
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_html_fragment_with_leading_whitespace
|
77
|
+
doc = " <div>b</div> "
|
78
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
79
|
+
assert_equal "<div>b</div>", fragment.to_s
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_html_fragment_with_leading_whitespace_and_newline
|
83
|
+
doc = " \n<div>b</div> "
|
84
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
85
|
+
assert_equal "<div>b</div>", fragment.to_s
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_to_s
|
89
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
90
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
91
|
+
assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_s
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_to_html
|
95
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
96
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
97
|
+
assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_html
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_to_xhtml
|
101
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
102
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
103
|
+
if Nokogiri::VERSION_INFO['libxml']['loaded'] >= "2.7.0"
|
104
|
+
assert_equal "<span>foo<br /></span><span>bar</span>", fragment.to_xhtml
|
105
|
+
else
|
106
|
+
assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_xhtml
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_to_xml
|
111
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
112
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
113
|
+
assert_equal "<span>foo<br/></span><span>bar</span>", fragment.to_xml
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_fragment_script_tag_with_cdata
|
117
|
+
doc = HTML::Document.new
|
118
|
+
fragment = doc.fragment("<script>var foo = 'bar';</script>")
|
119
|
+
assert_equal("<script>var foo = 'bar';</script>",
|
120
|
+
fragment.to_s)
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_fragment_with_comment
|
124
|
+
doc = HTML::Document.new
|
125
|
+
fragment = doc.fragment("<p>hello<!-- your ad here --></p>")
|
126
|
+
assert_equal("<p>hello<!-- your ad here --></p>",
|
127
|
+
fragment.to_s)
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestElementDescription < Nokogiri::TestCase
|
6
|
+
def test_fetch_nonexistent
|
7
|
+
assert_nil ElementDescription['foo']
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_fetch_element_description
|
11
|
+
assert desc = ElementDescription['a']
|
12
|
+
assert_instance_of ElementDescription, desc
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_name
|
16
|
+
assert_equal 'a', ElementDescription['a'].name
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_implied_start_tag?
|
20
|
+
assert !ElementDescription['a'].implied_start_tag?
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_implied_end_tag?
|
24
|
+
assert !ElementDescription['a'].implied_end_tag?
|
25
|
+
assert ElementDescription['p'].implied_end_tag?
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_save_end_tag?
|
29
|
+
assert !ElementDescription['a'].save_end_tag?
|
30
|
+
assert ElementDescription['br'].save_end_tag?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_empty?
|
34
|
+
assert ElementDescription['br'].empty?
|
35
|
+
assert !ElementDescription['a'].empty?
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_deprecated?
|
39
|
+
assert ElementDescription['applet'].deprecated?
|
40
|
+
assert !ElementDescription['br'].deprecated?
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_inline?
|
44
|
+
assert ElementDescription['a'].inline?
|
45
|
+
assert !ElementDescription['div'].inline?
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_block?
|
49
|
+
element = ElementDescription['a']
|
50
|
+
assert_equal(!element.inline?, element.block?)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_description
|
54
|
+
assert ElementDescription['a'].description
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_subelements
|
58
|
+
sub_elements = ElementDescription['body'].sub_elements
|
59
|
+
assert_equal 61, sub_elements.length
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_default_sub_element
|
63
|
+
assert_equal 'div', ElementDescription['body'].default_sub_element
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_optional_attributes
|
67
|
+
attrs = ElementDescription['table'].optional_attributes
|
68
|
+
assert attrs
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_deprecated_attributes
|
72
|
+
attrs = ElementDescription['table'].deprecated_attributes
|
73
|
+
assert attrs
|
74
|
+
assert_equal 2, attrs.length
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_required_attributes
|
78
|
+
attrs = ElementDescription['table'].required_attributes
|
79
|
+
assert attrs
|
80
|
+
assert_equal 0, attrs.length
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_inspect
|
84
|
+
desc = ElementDescription['input']
|
85
|
+
assert_match desc.name, desc.inspect
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_to_s
|
89
|
+
desc = ElementDescription['input']
|
90
|
+
assert_match desc.name, desc.to_s
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestNamedCharacters < Nokogiri::TestCase
|
6
|
+
def test_named_character
|
7
|
+
copy = NamedCharacters.get('copy')
|
8
|
+
assert_equal 169, NamedCharacters['copy']
|
9
|
+
assert_equal copy.value, NamedCharacters['copy']
|
10
|
+
assert copy.description
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,228 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
require 'nkf'
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
module HTML
|
7
|
+
class TestNode < Nokogiri::TestCase
|
8
|
+
def setup
|
9
|
+
super
|
10
|
+
@html = Nokogiri::HTML(<<-eohtml)
|
11
|
+
<html>
|
12
|
+
<head></head>
|
13
|
+
<body>
|
14
|
+
<div class='baz'><a href="foo" class="bar">first</a></div>
|
15
|
+
</body>
|
16
|
+
</html>
|
17
|
+
eohtml
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_get_attribute
|
21
|
+
element = @html.at('div')
|
22
|
+
assert_equal 'baz', element.get_attribute('class')
|
23
|
+
assert_equal 'baz', element['class']
|
24
|
+
element['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
|
25
|
+
assert_match(/%22AGGA-KA-BOO!%22/, element.to_html)
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_css_path_round_trip
|
29
|
+
doc = Nokogiri::HTML(File.read(HTML_FILE))
|
30
|
+
%w{ #header small div[2] div.post body }.each do |css_sel|
|
31
|
+
ele = doc.at css_sel
|
32
|
+
assert_equal ele, doc.at(ele.css_path), ele.css_path
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_path_round_trip
|
37
|
+
doc = Nokogiri::HTML(File.read(HTML_FILE))
|
38
|
+
%w{ #header small div[2] div.post body }.each do |css_sel|
|
39
|
+
ele = doc.at css_sel
|
40
|
+
assert_equal ele, doc.at(ele.path), ele.path
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_append_with_document
|
45
|
+
assert_raises(ArgumentError) do
|
46
|
+
@html.root << Nokogiri::HTML::Document.new
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
###
|
51
|
+
# Make sure a document that doesn't declare a meta encoding returns
|
52
|
+
# nil.
|
53
|
+
def test_meta_encoding
|
54
|
+
assert_nil @html.meta_encoding
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_description
|
58
|
+
assert desc = @html.at('a.bar').description
|
59
|
+
assert_equal 'a', desc.name
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_add_next_sibling_with_empty_nodeset
|
63
|
+
assert_raises(ArgumentError) {
|
64
|
+
@html.at('a').add_next_sibling(@html.at('head').children)
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_add_next_sibling_with_non_empty_nodeset
|
69
|
+
assert_raises(ArgumentError) {
|
70
|
+
@html.at('head').add_next_sibling(@html.at('div').children)
|
71
|
+
}
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_ancestors_with_selector
|
75
|
+
assert node = @html.at('a.bar').child
|
76
|
+
assert list = node.ancestors('.baz')
|
77
|
+
assert_equal 1, list.length
|
78
|
+
assert_equal 'div', list.first.name
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_css_matches?
|
82
|
+
assert node = @html.at('a.bar')
|
83
|
+
assert node.matches?('a.bar')
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_xpath_matches?
|
87
|
+
assert node = @html.at('//a')
|
88
|
+
assert node.matches?('//a')
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_unlink_then_swap
|
92
|
+
node = @html.at('a')
|
93
|
+
node.unlink
|
94
|
+
|
95
|
+
another_node = @html.at('div')
|
96
|
+
assert another_node, 'should have a node'
|
97
|
+
|
98
|
+
# This used to segv
|
99
|
+
assert_nothing_raised do
|
100
|
+
node.add_previous_sibling another_node
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_swap
|
105
|
+
@html.at('div').swap('<a href="foo">bar</a>')
|
106
|
+
a_tag = @html.css('a').first
|
107
|
+
assert_equal 'body', a_tag.parent.name
|
108
|
+
assert_equal 0, @html.css('div').length
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_swap_with_regex_characters
|
112
|
+
@html.at('div').swap('<a href="foo">ba)r</a>')
|
113
|
+
a_tag = @html.css('a').first
|
114
|
+
assert_equal 'ba)r', a_tag.text
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_attribute_decodes_entities
|
118
|
+
node = @html.at('div')
|
119
|
+
node['href'] = 'foo&bar'
|
120
|
+
assert_equal 'foo&bar', node['href']
|
121
|
+
node['href'] += '&baz'
|
122
|
+
assert_equal 'foo&bar&baz', node['href']
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
def test_before_will_prepend_text_nodes
|
127
|
+
assert node = @html.at('//body').children.first
|
128
|
+
node.before "some text"
|
129
|
+
assert_equal 'some text', @html.at('//body').children[0].content.strip
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_before
|
133
|
+
@html.at('//div').before('<a href="awesome">town</a>')
|
134
|
+
assert_equal 'awesome', @html.at('//div').previous['href']
|
135
|
+
end
|
136
|
+
|
137
|
+
def test_fragment_handler_does_not_regurge_on_invalid_attributes
|
138
|
+
iframe = %Q{<iframe style="width: 0%; height: 0px" src="http://someurl" allowtransparency></iframe>}
|
139
|
+
assert_nothing_raised { @html.at('div').before(iframe) }
|
140
|
+
assert_nothing_raised { @html.at('div').after(iframe) }
|
141
|
+
assert_nothing_raised { @html.at('div').inner_html=(iframe) }
|
142
|
+
end
|
143
|
+
|
144
|
+
def test_inner_html=
|
145
|
+
assert div = @html.at('//div')
|
146
|
+
div.inner_html = '1<span>2</span>3'
|
147
|
+
assert_equal '1', div.children[0].to_s
|
148
|
+
assert_equal 'span', div.children[1].name
|
149
|
+
assert_equal '2', div.children[1].inner_text
|
150
|
+
assert_equal '3', div.children[2].to_s
|
151
|
+
|
152
|
+
div.inner_html = 'testing'
|
153
|
+
assert_equal 'testing', div.content
|
154
|
+
end
|
155
|
+
|
156
|
+
def test_fragment
|
157
|
+
fragment = @html.fragment(<<-eohtml)
|
158
|
+
hello
|
159
|
+
<div class="foo">
|
160
|
+
<p>bar</p>
|
161
|
+
</div>
|
162
|
+
world
|
163
|
+
eohtml
|
164
|
+
assert_match(/^hello/, fragment.inner_html.strip)
|
165
|
+
assert_equal 3, fragment.children.length
|
166
|
+
assert p_tag = fragment.css('p').first
|
167
|
+
assert_equal 'div', p_tag.parent.name
|
168
|
+
assert_equal 'foo', p_tag.parent['class']
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_fragment_serialization
|
172
|
+
fragment = Nokogiri::HTML.fragment("<div>foo</div>")
|
173
|
+
assert_equal "<div>foo</div>", fragment.serialize.chomp
|
174
|
+
assert_equal "<div>foo</div>", fragment.to_xml.chomp
|
175
|
+
assert_equal "<div>foo</div>", fragment.inner_html
|
176
|
+
assert_equal "<div>foo</div>", fragment.to_html
|
177
|
+
assert_equal "<div>foo</div>", fragment.to_s
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_after_will_append_text_nodes
|
181
|
+
assert node = @html.at('//body/div')
|
182
|
+
node.after "some text"
|
183
|
+
assert_equal 'some text', node.next.text.strip
|
184
|
+
end
|
185
|
+
|
186
|
+
def test_after
|
187
|
+
@html.at('//div').after('<a href="awesome">town</a>')
|
188
|
+
assert_equal 'awesome', @html.at('//div').next['href']
|
189
|
+
end
|
190
|
+
|
191
|
+
def test_replace
|
192
|
+
doc = Nokogiri::HTML(<<-eohtml)
|
193
|
+
<html>
|
194
|
+
<head></head>
|
195
|
+
<body>
|
196
|
+
<center><img src='logo.gif' /></center>
|
197
|
+
</body>
|
198
|
+
</html>
|
199
|
+
eohtml
|
200
|
+
center = doc.at("//center")
|
201
|
+
img = center.search("//img")
|
202
|
+
assert_raises ArgumentError do
|
203
|
+
center.replace img
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def test_to_html_does_not_contain_entities
|
208
|
+
html = NKF.nkf("-e --msdos", <<-EOH)
|
209
|
+
<html><body>
|
210
|
+
<p> test paragraph
|
211
|
+
foo bar </p>
|
212
|
+
</body></html>
|
213
|
+
EOH
|
214
|
+
nokogiri = Nokogiri::HTML.parse(html)
|
215
|
+
|
216
|
+
if RUBY_PLATFORM =~ /java/
|
217
|
+
# NKF linebreak modes are not supported as of jruby 1.2
|
218
|
+
# see http://jira.codehaus.org/browse/JRUBY-3602 for status
|
219
|
+
assert_equal "<p>testparagraph\nfoobar</p>",
|
220
|
+
nokogiri.at("p").to_html.gsub(/ /, '')
|
221
|
+
else
|
222
|
+
assert_equal "<p>testparagraph\r\nfoobar</p>",
|
223
|
+
nokogiri.at("p").to_html.gsub(/ /, '')
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|