nokogiri 1.2.3-x86-mswin32-60 → 1.4.5-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +18 -7
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +297 -3
- data/CHANGELOG.rdoc +289 -0
- data/Manifest.txt +148 -37
- data/README.ja.rdoc +20 -20
- data/README.rdoc +53 -22
- data/Rakefile +127 -211
- data/bin/nokogiri +54 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +89 -54
- data/ext/nokogiri/html_document.c +34 -27
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +276 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +7 -5
- data/ext/nokogiri/html_entity_lookup.h +1 -1
- data/ext/nokogiri/html_sax_parser_context.c +94 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/{native.c → nokogiri.c} +31 -7
- data/ext/nokogiri/{native.h → nokogiri.h} +68 -41
- data/ext/nokogiri/xml_attr.c +20 -9
- data/ext/nokogiri/xml_attr.h +1 -1
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +21 -9
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +18 -6
- data/ext/nokogiri/xml_comment.h +1 -1
- data/ext/nokogiri/xml_document.c +247 -68
- data/ext/nokogiri/xml_document.h +5 -3
- data/ext/nokogiri/xml_document_fragment.c +15 -7
- data/ext/nokogiri/xml_document_fragment.h +1 -1
- data/ext/nokogiri/xml_dtd.c +110 -10
- data/ext/nokogiri/xml_dtd.h +3 -1
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +16 -5
- data/ext/nokogiri/xml_entity_reference.h +1 -1
- data/ext/nokogiri/xml_io.c +40 -8
- data/ext/nokogiri/xml_io.h +2 -1
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +84 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +782 -225
- data/ext/nokogiri/xml_node.h +2 -4
- data/ext/nokogiri/xml_node_set.c +253 -34
- data/ext/nokogiri/xml_node_set.h +2 -2
- data/ext/nokogiri/xml_processing_instruction.c +17 -5
- data/ext/nokogiri/xml_processing_instruction.h +1 -1
- data/ext/nokogiri/xml_reader.c +277 -85
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +168 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +183 -111
- data/ext/nokogiri/xml_sax_parser.h +30 -1
- data/ext/nokogiri/xml_sax_parser_context.c +199 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +42 -12
- data/ext/nokogiri/xml_sax_push_parser.h +1 -1
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +28 -173
- data/ext/nokogiri/xml_syntax_error.h +2 -1
- data/ext/nokogiri/xml_text.c +16 -6
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath_context.c +104 -47
- data/ext/nokogiri/xml_xpath_context.h +1 -1
- data/ext/nokogiri/xslt_stylesheet.c +161 -19
- data/ext/nokogiri/xslt_stylesheet.h +1 -1
- data/lib/nokogiri.rb +47 -8
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +6 -3
- data/lib/nokogiri/css/node.rb +14 -12
- data/lib/nokogiri/css/parser.rb +665 -62
- data/lib/nokogiri/css/parser.y +20 -10
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/tokenizer.rb +148 -5
- data/lib/nokogiri/css/tokenizer.rex +10 -9
- data/lib/nokogiri/css/xpath_visitor.rb +47 -44
- data/lib/nokogiri/decorators/slop.rb +8 -4
- data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +81 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +420 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +38 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +20 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +117 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xml_parser_input.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +38 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +174 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +36 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +559 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +150 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +236 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +143 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +79 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +51 -0
- data/lib/nokogiri/ffi/xml/schema.rb +109 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +9 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +153 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +77 -0
- data/lib/nokogiri/html.rb +13 -47
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +201 -7
- data/lib/nokogiri/html/document_fragment.rb +41 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +2 -0
- data/lib/nokogiri/html/sax/parser.rb +34 -3
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/nokogiri.rb +1 -0
- data/lib/nokogiri/version.rb +40 -1
- data/lib/nokogiri/version_warning.rb +14 -0
- data/lib/nokogiri/xml.rb +32 -53
- data/lib/nokogiri/xml/attr.rb +5 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +349 -29
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +166 -14
- data/lib/nokogiri/xml/document_fragment.rb +76 -1
- data/lib/nokogiri/xml/dtd.rb +16 -3
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +561 -166
- data/lib/nokogiri/xml/node/save_options.rb +22 -2
- data/lib/nokogiri/xml/node_set.rb +202 -40
- data/lib/nokogiri/xml/parse_options.rb +93 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -0
- data/lib/nokogiri/xml/reader.rb +93 -8
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +1 -7
- data/lib/nokogiri/xml/sax/document.rb +107 -2
- data/lib/nokogiri/xml/sax/parser.rb +57 -7
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +13 -1
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +4 -1
- data/lib/nokogiri/xml/xpath.rb +1 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +3 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -0
- data/lib/nokogiri/xslt.rb +26 -2
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/xsd/xmlparser/nokogiri.rb +45 -9
- data/tasks/cross_compile.rb +173 -0
- data/tasks/test.rb +25 -69
- data/test/css/test_nthiness.rb +3 -4
- data/test/css/test_parser.rb +75 -20
- data/test/css/test_tokenizer.rb +23 -1
- data/test/css/test_xpath_visitor.rb +10 -1
- data/test/decorators/test_slop.rb +16 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +101 -23
- data/test/html/sax/test_parser.rb +81 -2
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +39 -8
- data/test/html/test_document.rb +186 -23
- data/test/html/test_document_encoding.rb +78 -1
- data/test/html/test_document_fragment.rb +253 -0
- data/test/html/test_element_description.rb +98 -0
- data/test/html/test_named_characters.rb +1 -1
- data/test/html/test_node.rb +124 -36
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +1 -52
- data/test/test_css_cache.rb +2 -13
- data/test/test_encoding_handler.rb +46 -0
- data/test/test_memory_leak.rb +88 -19
- data/test/test_nokogiri.rb +38 -5
- data/test/test_reader.rb +188 -6
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +183 -83
- data/test/xml/node/test_save_options.rb +1 -1
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +175 -4
- data/test/xml/sax/test_parser_context.rb +113 -0
- data/test/xml/sax/test_push_parser.rb +90 -2
- data/test/xml/test_attr.rb +35 -1
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +186 -1
- data/test/xml/test_cdata.rb +32 -1
- data/test/xml/test_comment.rb +13 -1
- data/test/xml/test_document.rb +415 -43
- data/test/xml/test_document_encoding.rb +1 -1
- data/test/xml/test_document_fragment.rb +173 -5
- data/test/xml/test_dtd.rb +61 -6
- data/test/xml/test_dtd_encoding.rb +3 -1
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +120 -0
- data/test/xml/test_entity_reference.rb +5 -1
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +546 -201
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +33 -3
- data/test/xml/test_node_reparenting.rb +321 -0
- data/test/xml/test_node_set.rb +538 -2
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +6 -1
- data/test/xml/test_reader_encoding.rb +1 -1
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +94 -0
- data/test/xml/test_syntax_error.rb +12 -0
- data/test/xml/test_text.rb +35 -1
- data/test/xml/test_unparented_node.rb +5 -5
- data/test/xml/test_xpath.rb +142 -11
- data/test/xslt/test_custom_functions.rb +94 -0
- metadata +328 -92
- data/ext/nokogiri/html_sax_parser.c +0 -57
- data/ext/nokogiri/html_sax_parser.h +0 -11
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/native.so +0 -0
- data/ext/nokogiri/xml_xpath.c +0 -53
- data/ext/nokogiri/xml_xpath.h +0 -11
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/action-nokogiri.rb +0 -30
- data/lib/nokogiri/css/generated_parser.rb +0 -713
- data/lib/nokogiri/css/generated_tokenizer.rb +0 -144
- data/lib/nokogiri/decorators.rb +0 -2
- data/lib/nokogiri/decorators/hpricot.rb +0 -3
- data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
- data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -28
- data/lib/nokogiri/hpricot.rb +0 -51
- data/lib/nokogiri/xml/comment.rb +0 -6
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/entity_declaration.rb +0 -9
- data/lib/nokogiri/xml/fragment_handler.rb +0 -34
- data/test/hpricot/files/basic.xhtml +0 -17
- data/test/hpricot/files/boingboing.html +0 -2266
- data/test/hpricot/files/cy0.html +0 -3653
- data/test/hpricot/files/immob.html +0 -400
- data/test/hpricot/files/pace_application.html +0 -1320
- data/test/hpricot/files/tenderlove.html +0 -16
- data/test/hpricot/files/uswebgen.html +0 -220
- data/test/hpricot/files/utf8.html +0 -1054
- data/test/hpricot/files/week9.html +0 -1723
- data/test/hpricot/files/why.xml +0 -19
- data/test/hpricot/load_files.rb +0 -11
- data/test/hpricot/test_alter.rb +0 -68
- data/test/hpricot/test_builder.rb +0 -20
- data/test/hpricot/test_parser.rb +0 -426
- data/test/hpricot/test_paths.rb +0 -15
- data/test/hpricot/test_preserved.rb +0 -77
- data/test/hpricot/test_xml.rb +0 -30
- data/test/test_gc.rb +0 -15
@@ -1,10 +1,24 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
-
require
|
2
|
+
require "helper"
|
3
3
|
|
4
4
|
module Nokogiri
|
5
5
|
module HTML
|
6
6
|
if RUBY_VERSION =~ /^1\.9/
|
7
7
|
class TestDocumentEncoding < Nokogiri::TestCase
|
8
|
+
def test_encoding
|
9
|
+
doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
|
10
|
+
|
11
|
+
hello = "こんにちは"
|
12
|
+
|
13
|
+
assert_match doc.encoding, doc.to_html
|
14
|
+
assert_match hello.encode('Shift_JIS'), doc.to_html
|
15
|
+
assert_equal 'Shift_JIS', doc.to_html.encoding.name
|
16
|
+
|
17
|
+
assert_match hello, doc.to_html(:encoding => 'UTF-8')
|
18
|
+
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
|
19
|
+
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
|
20
|
+
end
|
21
|
+
|
8
22
|
def test_default_to_encoding_from_string
|
9
23
|
bad_charset = <<-eohtml
|
10
24
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
@@ -24,6 +38,23 @@ module Nokogiri
|
|
24
38
|
assert_equal bad_charset.encoding.name, doc.encoding
|
25
39
|
end
|
26
40
|
|
41
|
+
def test_encoding_non_utf8
|
42
|
+
orig = '日本語が上手です'
|
43
|
+
bin = Encoding::ASCII_8BIT
|
44
|
+
[Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
|
45
|
+
html = <<-eohtml.encode(enc)
|
46
|
+
<html>
|
47
|
+
<meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
|
48
|
+
<title xml:lang="ja">#{orig}</title></html>
|
49
|
+
eohtml
|
50
|
+
text = Nokogiri::HTML.parse(html).at('title').inner_text
|
51
|
+
assert_equal(
|
52
|
+
orig.encode(enc).force_encoding(bin),
|
53
|
+
text.encode(enc).force_encoding(bin)
|
54
|
+
)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
27
58
|
def test_encoding_with_a_bad_name
|
28
59
|
bad_charset = <<-eohtml
|
29
60
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
@@ -42,5 +73,51 @@ module Nokogiri
|
|
42
73
|
end
|
43
74
|
end
|
44
75
|
end
|
76
|
+
|
77
|
+
class TestDocumentEncodingDetection < Nokogiri::TestCase
|
78
|
+
if IO.respond_to?(:binread)
|
79
|
+
def binread(file)
|
80
|
+
IO.binread(file)
|
81
|
+
end
|
82
|
+
else
|
83
|
+
def binread(file)
|
84
|
+
IO.read(file)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def binopen(file)
|
89
|
+
File.open(file, 'rb')
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_document_xhtml_enc
|
93
|
+
[ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
|
94
|
+
doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
|
95
|
+
ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map { |text| text.text }
|
96
|
+
|
97
|
+
doc_from_string = Nokogiri::HTML(binread(file))
|
98
|
+
ary_from_string = doc_from_string.xpath('//p/text()').map { |text| text.text }
|
99
|
+
|
100
|
+
doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
|
101
|
+
ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map { |text| text.text }
|
102
|
+
|
103
|
+
doc_from_file = Nokogiri::HTML(binopen(file))
|
104
|
+
ary_from_file = doc_from_file.xpath('//p/text()').map { |text| text.text }
|
105
|
+
|
106
|
+
title = 'たこ焼き仮面'
|
107
|
+
|
108
|
+
assert_equal(title, doc_from_string_enc.at('//title/text()').text)
|
109
|
+
assert_equal(title, doc_from_string.at('//title/text()').text)
|
110
|
+
assert_equal(title, doc_from_file_enc.at('//title/text()').text)
|
111
|
+
assert_equal(title, doc_from_file.at('//title/text()').text)
|
112
|
+
|
113
|
+
evil = (0..72).map { |i| '超' * i + '悪い事を構想中。' }
|
114
|
+
|
115
|
+
assert_equal(evil, ary_from_string_enc)
|
116
|
+
assert_equal(evil, ary_from_string)
|
117
|
+
assert_equal(evil, ary_from_file_enc)
|
118
|
+
assert_equal(evil, ary_from_file)
|
119
|
+
}
|
120
|
+
end
|
121
|
+
end
|
45
122
|
end
|
46
123
|
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require "helper"
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module HTML
|
6
|
+
class TestDocumentFragment < Nokogiri::TestCase
|
7
|
+
def setup
|
8
|
+
super
|
9
|
+
@html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
|
10
|
+
end
|
11
|
+
|
12
|
+
if RUBY_VERSION >= '1.9'
|
13
|
+
def test_inspect_encoding
|
14
|
+
fragment = "<div>こんにちは!</div>".encode('EUC-JP')
|
15
|
+
f = Nokogiri::HTML::DocumentFragment.parse fragment
|
16
|
+
assert_equal "こんにちは!", f.content
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_html_parse_encoding
|
20
|
+
fragment = "<div>こんにちは!</div>".encode 'EUC-JP'
|
21
|
+
f = Nokogiri::HTML.fragment fragment
|
22
|
+
assert_equal 'EUC-JP', f.document.encoding
|
23
|
+
assert_equal "こんにちは!", f.content
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_parse_encoding
|
28
|
+
fragment = "<div>hello world</div>"
|
29
|
+
f = Nokogiri::HTML::DocumentFragment.parse fragment, 'ISO-8859-1'
|
30
|
+
assert_equal 'ISO-8859-1', f.document.encoding
|
31
|
+
assert_equal "hello world", f.content
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_html_parse_with_encoding
|
35
|
+
fragment = "<div>hello world</div>"
|
36
|
+
f = Nokogiri::HTML.fragment fragment, 'ISO-8859-1'
|
37
|
+
assert_equal 'ISO-8859-1', f.document.encoding
|
38
|
+
assert_equal "hello world", f.content
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_parse_in_context
|
42
|
+
assert_equal('<br>', @html.root.parse('<br />').to_s)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_inner_html=
|
46
|
+
fragment = Nokogiri::HTML.fragment '<hr />'
|
47
|
+
|
48
|
+
fragment.inner_html = "hello"
|
49
|
+
assert_equal 'hello', fragment.inner_html
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_ancestors_search
|
53
|
+
html = %q{
|
54
|
+
<div>
|
55
|
+
<ul>
|
56
|
+
<li>foo</li>
|
57
|
+
</ul>
|
58
|
+
</div>
|
59
|
+
}
|
60
|
+
fragment = Nokogiri::HTML.fragment html
|
61
|
+
li = fragment.at('li')
|
62
|
+
assert li.matches?('li')
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_fun_encoding
|
66
|
+
string = %Q(<body>こんにちは</body>)
|
67
|
+
html = Nokogiri::HTML::DocumentFragment.parse(
|
68
|
+
string
|
69
|
+
).to_html(:encoding => 'UTF-8')
|
70
|
+
assert_equal string, html
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_new
|
74
|
+
assert Nokogiri::HTML::DocumentFragment.new(@html)
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_body_fragment_should_contain_body
|
78
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse(" <body><div>foo</div></body>")
|
79
|
+
assert_match(/^<body>/, fragment.to_s)
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_nonbody_fragment_should_not_contain_body
|
83
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse("<div>foo</div>")
|
84
|
+
assert_match(/^<div>/, fragment.to_s)
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_fragment_should_have_document
|
88
|
+
fragment = Nokogiri::HTML::DocumentFragment.new(@html)
|
89
|
+
assert_equal @html, fragment.document
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_empty_fragment_should_be_searchable_by_css
|
93
|
+
fragment = Nokogiri::HTML.fragment("")
|
94
|
+
assert_equal 0, fragment.css("a").size
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_empty_fragment_should_be_searchable
|
98
|
+
fragment = Nokogiri::HTML.fragment("")
|
99
|
+
assert_equal 0, fragment.search("//a").size
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_name
|
103
|
+
fragment = Nokogiri::HTML::DocumentFragment.new(@html)
|
104
|
+
assert_equal '#document-fragment', fragment.name
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_static_method
|
108
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse("<div>a</div>")
|
109
|
+
assert_instance_of Nokogiri::HTML::DocumentFragment, fragment
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_many_fragments
|
113
|
+
100.times { Nokogiri::HTML::DocumentFragment.new(@html) }
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_subclass
|
117
|
+
klass = Class.new(Nokogiri::HTML::DocumentFragment)
|
118
|
+
fragment = klass.new(@html, "<div>a</div>")
|
119
|
+
assert_instance_of klass, fragment
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_subclass_parse
|
123
|
+
klass = Class.new(Nokogiri::HTML::DocumentFragment)
|
124
|
+
doc = klass.parse("<div>a</div>")
|
125
|
+
assert_instance_of klass, doc
|
126
|
+
end
|
127
|
+
|
128
|
+
def test_html_fragment
|
129
|
+
fragment = Nokogiri::HTML.fragment("<div>a</div>")
|
130
|
+
assert_equal "<div>a</div>", fragment.to_s
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_html_fragment_has_outer_text
|
134
|
+
doc = "a<div>b</div>c"
|
135
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
136
|
+
if Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.6.16"
|
137
|
+
assert_equal "a<div>b</div><p>c</p>", fragment.to_s
|
138
|
+
else
|
139
|
+
assert_equal "a<div>b</div>c", fragment.to_s
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_html_fragment_case_insensitivity
|
144
|
+
doc = "<Div>b</Div>"
|
145
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
146
|
+
assert_equal "<div>b</div>", fragment.to_s
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_html_fragment_with_leading_whitespace
|
150
|
+
doc = " <div>b</div> "
|
151
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
152
|
+
assert_match %r% <div>b</div> *%, fragment.to_s
|
153
|
+
end
|
154
|
+
|
155
|
+
def test_html_fragment_with_leading_whitespace_and_newline
|
156
|
+
doc = " \n<div>b</div> "
|
157
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
158
|
+
assert_match %r% \n<div>b</div> *%, fragment.to_s
|
159
|
+
end
|
160
|
+
|
161
|
+
def test_html_fragment_with_leading_text_and_newline
|
162
|
+
fragment = HTML::Document.new.fragment("First line\nSecond line<br>Broken line")
|
163
|
+
assert_equal fragment.to_s, "First line\nSecond line<br>Broken line"
|
164
|
+
end
|
165
|
+
|
166
|
+
def test_html_fragment_with_leading_whitespace_and_text_and_newline
|
167
|
+
fragment = HTML::Document.new.fragment(" First line\nSecond line<br>Broken line")
|
168
|
+
assert_equal " First line\nSecond line<br>Broken line", fragment.to_s
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_html_fragment_with_leading_entity
|
172
|
+
failed = ""test<br/>test""
|
173
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse(failed)
|
174
|
+
assert_equal '"test<br>test"', fragment.to_html
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_to_s
|
178
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
179
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
180
|
+
assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_s
|
181
|
+
end
|
182
|
+
|
183
|
+
def test_to_html
|
184
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
185
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
186
|
+
assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_html
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_to_xhtml
|
190
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
191
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
192
|
+
if Nokogiri::VERSION_INFO['libxml']['loaded'] >= "2.7.0"
|
193
|
+
assert_equal "<span>foo<br /></span><span>bar</span>", fragment.to_xhtml
|
194
|
+
else
|
195
|
+
assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_xhtml
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def test_to_xml
|
200
|
+
doc = "<span>foo<br></span><span>bar</span>"
|
201
|
+
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
202
|
+
assert_equal "<span>foo<br/></span><span>bar</span>", fragment.to_xml
|
203
|
+
end
|
204
|
+
|
205
|
+
def test_fragment_script_tag_with_cdata
|
206
|
+
doc = HTML::Document.new
|
207
|
+
fragment = doc.fragment("<script>var foo = 'bar';</script>")
|
208
|
+
assert_equal("<script>var foo = 'bar';</script>",
|
209
|
+
fragment.to_s)
|
210
|
+
end
|
211
|
+
|
212
|
+
def test_fragment_with_comment
|
213
|
+
doc = HTML::Document.new
|
214
|
+
fragment = doc.fragment("<p>hello<!-- your ad here --></p>")
|
215
|
+
assert_equal("<p>hello<!-- your ad here --></p>",
|
216
|
+
fragment.to_s)
|
217
|
+
end
|
218
|
+
|
219
|
+
def test_malformed_fragment_is_corrected
|
220
|
+
fragment = HTML::DocumentFragment.parse("<div </div>")
|
221
|
+
assert_equal "<div></div>", fragment.to_s
|
222
|
+
end
|
223
|
+
|
224
|
+
def test_unclosed_script_tag
|
225
|
+
# see GH#315
|
226
|
+
fragment = HTML::DocumentFragment.parse("foo <script>bar")
|
227
|
+
assert_equal "foo <script>bar</script>", fragment.to_html
|
228
|
+
end
|
229
|
+
|
230
|
+
def test_error_propagation_on_fragment_parse
|
231
|
+
frag = Nokogiri::HTML::DocumentFragment.parse "<hello>oh, hello there.</hello>"
|
232
|
+
assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be copied to the fragment"
|
233
|
+
end
|
234
|
+
|
235
|
+
def test_error_propagation_on_fragment_parse_in_node_context
|
236
|
+
doc = Nokogiri::HTML::Document.parse "<html><body><div></div></body></html>"
|
237
|
+
context_node = doc.at_css "div"
|
238
|
+
frag = Nokogiri::HTML::DocumentFragment.new doc, "<hello>oh, hello there.</hello>", context_node
|
239
|
+
assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document"
|
240
|
+
end
|
241
|
+
|
242
|
+
def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors
|
243
|
+
doc = Nokogiri::HTML::Document.parse "<html><body><div></div><jimmy></jimmy></body></html>"
|
244
|
+
assert doc.errors.any?{|err| err.to_s =~ /jimmy/}, "assert on setup"
|
245
|
+
|
246
|
+
context_node = doc.at_css "div"
|
247
|
+
frag = Nokogiri::HTML::DocumentFragment.new doc, "<hello>oh, hello there.</hello>", context_node
|
248
|
+
assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document"
|
249
|
+
assert frag.errors.none?{|err| err.to_s =~ /jimmy/}, "errors should not include pre-existing document errors"
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestElementDescription < Nokogiri::TestCase
|
6
|
+
def test_fetch_nonexistent
|
7
|
+
assert_nil ElementDescription['foo']
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_fetch_element_description
|
11
|
+
assert desc = ElementDescription['a']
|
12
|
+
assert_instance_of ElementDescription, desc
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_name
|
16
|
+
assert_equal 'a', ElementDescription['a'].name
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_implied_start_tag?
|
20
|
+
assert !ElementDescription['a'].implied_start_tag?
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_implied_end_tag?
|
24
|
+
assert !ElementDescription['a'].implied_end_tag?
|
25
|
+
assert ElementDescription['p'].implied_end_tag?
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_save_end_tag?
|
29
|
+
assert !ElementDescription['a'].save_end_tag?
|
30
|
+
assert ElementDescription['br'].save_end_tag?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_empty?
|
34
|
+
assert ElementDescription['br'].empty?
|
35
|
+
assert !ElementDescription['a'].empty?
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_deprecated?
|
39
|
+
assert ElementDescription['applet'].deprecated?
|
40
|
+
assert !ElementDescription['br'].deprecated?
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_inline?
|
44
|
+
assert ElementDescription['a'].inline?
|
45
|
+
assert !ElementDescription['div'].inline?
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_block?
|
49
|
+
element = ElementDescription['a']
|
50
|
+
assert_equal(!element.inline?, element.block?)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_description
|
54
|
+
assert ElementDescription['a'].description
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_subelements
|
58
|
+
sub_elements = ElementDescription['body'].sub_elements
|
59
|
+
if Nokogiri::LIBXML_VERSION >= '2.7.7'
|
60
|
+
assert_equal 65, sub_elements.length
|
61
|
+
else
|
62
|
+
assert_equal 61, sub_elements.length
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_default_sub_element
|
67
|
+
assert_equal 'div', ElementDescription['body'].default_sub_element
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_optional_attributes
|
71
|
+
attrs = ElementDescription['table'].optional_attributes
|
72
|
+
assert attrs
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_deprecated_attributes
|
76
|
+
attrs = ElementDescription['table'].deprecated_attributes
|
77
|
+
assert attrs
|
78
|
+
assert_equal 2, attrs.length
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_required_attributes
|
82
|
+
attrs = ElementDescription['table'].required_attributes
|
83
|
+
assert attrs
|
84
|
+
assert_equal 0, attrs.length
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_inspect
|
88
|
+
desc = ElementDescription['input']
|
89
|
+
assert_match desc.name, desc.inspect
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_to_s
|
93
|
+
desc = ElementDescription['input']
|
94
|
+
assert_match desc.name, desc.to_s
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|