nokogiri 1.6.0 → 1.13.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +3 -19
- data/LICENSE-DEPENDENCIES.md +1903 -0
- data/LICENSE.md +9 -0
- data/README.md +280 -0
- data/bin/nokogiri +84 -31
- data/dependencies.yml +23 -4
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +952 -132
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +231 -96
- data/ext/nokogiri/nokogiri.h +188 -129
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +49 -40
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +24 -23
- data/ext/nokogiri/xml_comment.c +29 -21
- data/ext/nokogiri/xml_document.c +327 -223
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +56 -50
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +45 -20
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +74 -32
- data/ext/nokogiri/xml_node.c +1290 -680
- data/ext/nokogiri/xml_node_set.c +239 -208
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +227 -189
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +123 -125
- data/ext/nokogiri/xml_sax_parser_context.c +138 -79
- data/ext/nokogiri/xml_sax_push_parser.c +88 -35
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +50 -23
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +227 -140
- data/ext/nokogiri/xslt_stylesheet.c +269 -177
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -58
- data/lib/nokogiri/css/parser.rb +407 -357
- data/lib/nokogiri/css/parser.y +265 -246
- data/lib/nokogiri/css/parser_extras.rb +52 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +8 -7
- data/lib/nokogiri/css/xpath_visitor.rb +266 -80
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +17 -8
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +331 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +88 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +96 -0
- data/lib/nokogiri/html5.rb +477 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +221 -0
- data/lib/nokogiri/version.rb +3 -105
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +96 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +234 -95
- data/lib/nokogiri/xml/document_fragment.rb +86 -36
- data/lib/nokogiri/xml/dtd.rb +16 -4
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +947 -502
- data/lib/nokogiri/xml/node_set.rb +168 -159
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +40 -5
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +23 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +43 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +270 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -36
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +29 -20
- data/lib/nokogiri.rb +69 -69
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
- data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +278 -362
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/.travis.yml +0 -27
- data/CHANGELOG.ja.rdoc +0 -819
- data/CHANGELOG.rdoc +0 -819
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -315
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -246
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -56
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -13
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -14
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -254
- data/lib/nokogiri/html/document_fragment.rb +0 -41
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/lib/nokogiri/html/sax/push_parser.rb +0 -16
- data/ports/archives/libxml2-2.8.0.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.26.tar.gz +0 -0
- data/tasks/cross_compile.rb +0 -132
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/saml/saml20assertion_schema.xsd +0 -283
- data/test/files/saml/saml20protocol_schema.xsd +0 -302
- data/test/files/saml/xenc_schema.xsd +0 -146
- data/test/files/saml/xmldsig_schema.xsd +0 -318
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
data/test/html/test_document.rb
DELETED
@@ -1,552 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
module Nokogiri
|
4
|
-
module HTML
|
5
|
-
class TestDocument < Nokogiri::TestCase
|
6
|
-
def setup
|
7
|
-
super
|
8
|
-
@html = Nokogiri::HTML.parse(File.read(HTML_FILE))
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_nil_css
|
12
|
-
# Behavior is undefined but shouldn't break
|
13
|
-
assert @html.css(nil)
|
14
|
-
assert @html.xpath(nil)
|
15
|
-
end
|
16
|
-
|
17
|
-
def test_exceptions_remove_newlines
|
18
|
-
errors = @html.errors
|
19
|
-
assert errors.length > 0, 'has errors'
|
20
|
-
errors.each do |error|
|
21
|
-
assert_equal(error.to_s.chomp, error.to_s)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_fragment
|
26
|
-
fragment = @html.fragment
|
27
|
-
assert_equal 0, fragment.children.length
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_document_takes_config_block
|
31
|
-
options = nil
|
32
|
-
Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg|
|
33
|
-
options = cfg
|
34
|
-
options.nonet.nowarning.dtdattr
|
35
|
-
end
|
36
|
-
assert options.nonet?
|
37
|
-
assert options.nowarning?
|
38
|
-
assert options.dtdattr?
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_parse_takes_config_block
|
42
|
-
options = nil
|
43
|
-
Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
|
44
|
-
options = cfg
|
45
|
-
options.nonet.nowarning.dtdattr
|
46
|
-
end
|
47
|
-
assert options.nonet?
|
48
|
-
assert options.nowarning?
|
49
|
-
assert options.dtdattr?
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_subclass
|
53
|
-
klass = Class.new(Nokogiri::HTML::Document)
|
54
|
-
doc = klass.new
|
55
|
-
assert_instance_of klass, doc
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_subclass_initialize
|
59
|
-
klass = Class.new(Nokogiri::HTML::Document) do
|
60
|
-
attr_accessor :initialized_with
|
61
|
-
|
62
|
-
def initialize(*args)
|
63
|
-
@initialized_with = args
|
64
|
-
end
|
65
|
-
end
|
66
|
-
doc = klass.new("uri", "external_id", 1)
|
67
|
-
assert_equal ["uri", "external_id", 1], doc.initialized_with
|
68
|
-
end
|
69
|
-
|
70
|
-
def test_subclass_dup
|
71
|
-
klass = Class.new(Nokogiri::HTML::Document)
|
72
|
-
doc = klass.new.dup
|
73
|
-
assert_instance_of klass, doc
|
74
|
-
end
|
75
|
-
|
76
|
-
def test_subclass_parse
|
77
|
-
klass = Class.new(Nokogiri::HTML::Document)
|
78
|
-
doc = klass.parse(File.read(HTML_FILE))
|
79
|
-
assert_equal @html.to_s, doc.to_s
|
80
|
-
assert_instance_of klass, doc
|
81
|
-
end
|
82
|
-
|
83
|
-
def test_document_parse_method
|
84
|
-
html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE))
|
85
|
-
assert_equal @html.to_s, html.to_s
|
86
|
-
end
|
87
|
-
|
88
|
-
def test_document_parse_method_with_url
|
89
|
-
require 'open-uri'
|
90
|
-
begin
|
91
|
-
html = open('http://google.com').read
|
92
|
-
rescue
|
93
|
-
skip("This test needs the internet. Skips if no internet available.")
|
94
|
-
end
|
95
|
-
doc = Nokogiri::HTML html ,"http:/foobar.foobar/"
|
96
|
-
refute_empty doc.to_s, "Document should not be empty"
|
97
|
-
end
|
98
|
-
|
99
|
-
###
|
100
|
-
# Nokogiri::HTML returns an empty Document when given a blank string GH#11
|
101
|
-
def test_empty_string_returns_empty_doc
|
102
|
-
doc = Nokogiri::HTML('')
|
103
|
-
assert_instance_of Nokogiri::HTML::Document, doc
|
104
|
-
assert_nil doc.root
|
105
|
-
end
|
106
|
-
|
107
|
-
unless Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
108
|
-
# FIXME: this is a hack around broken libxml versions
|
109
|
-
def test_to_xhtml_with_indent
|
110
|
-
doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
|
111
|
-
doc = Nokogiri::HTML(doc.to_xhtml(:indent => 2))
|
112
|
-
assert_indent 2, doc
|
113
|
-
end
|
114
|
-
|
115
|
-
def test_write_to_xhtml_with_indent
|
116
|
-
io = StringIO.new
|
117
|
-
doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
|
118
|
-
doc.write_xhtml_to io, :indent => 5
|
119
|
-
io.rewind
|
120
|
-
doc = Nokogiri::HTML(io.read)
|
121
|
-
assert_indent 5, doc
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def test_swap_should_not_exist
|
126
|
-
assert_raises(NoMethodError) {
|
127
|
-
@html.swap
|
128
|
-
}
|
129
|
-
end
|
130
|
-
|
131
|
-
def test_namespace_should_not_exist
|
132
|
-
assert_raises(NoMethodError) {
|
133
|
-
@html.namespace
|
134
|
-
}
|
135
|
-
end
|
136
|
-
|
137
|
-
def test_meta_encoding
|
138
|
-
assert_equal 'UTF-8', @html.meta_encoding
|
139
|
-
end
|
140
|
-
|
141
|
-
def test_meta_encoding_is_strict_about_http_equiv
|
142
|
-
doc = Nokogiri::HTML(<<-eohtml)
|
143
|
-
<html>
|
144
|
-
<head>
|
145
|
-
<meta http-equiv="X-Content-Type" content="text/html; charset=Shift_JIS">
|
146
|
-
</head>
|
147
|
-
<body>
|
148
|
-
foo
|
149
|
-
</body>
|
150
|
-
</html>
|
151
|
-
eohtml
|
152
|
-
assert_nil doc.meta_encoding
|
153
|
-
end
|
154
|
-
|
155
|
-
def test_meta_encoding_handles_malformed_content_charset
|
156
|
-
doc = Nokogiri::HTML(<<EOHTML)
|
157
|
-
<html>
|
158
|
-
<head>
|
159
|
-
<meta http-equiv="Content-type" content="text/html; utf-8" />
|
160
|
-
</head>
|
161
|
-
<body>
|
162
|
-
foo
|
163
|
-
</body>
|
164
|
-
</html>
|
165
|
-
EOHTML
|
166
|
-
assert_nil doc.meta_encoding
|
167
|
-
end
|
168
|
-
|
169
|
-
def test_meta_encoding=
|
170
|
-
@html.meta_encoding = 'EUC-JP'
|
171
|
-
assert_equal 'EUC-JP', @html.meta_encoding
|
172
|
-
end
|
173
|
-
|
174
|
-
def test_title
|
175
|
-
assert_equal 'Tender Lovemaking ', @html.title
|
176
|
-
doc = Nokogiri::HTML('<html><body>foo</body></html>')
|
177
|
-
assert_nil doc.title
|
178
|
-
end
|
179
|
-
|
180
|
-
def test_title=()
|
181
|
-
doc = Nokogiri::HTML(<<eohtml)
|
182
|
-
<html>
|
183
|
-
<head>
|
184
|
-
<title>old</title>
|
185
|
-
</head>
|
186
|
-
<body>
|
187
|
-
foo
|
188
|
-
</body>
|
189
|
-
</html>
|
190
|
-
eohtml
|
191
|
-
doc.title = 'new'
|
192
|
-
assert_equal 'new', doc.title
|
193
|
-
|
194
|
-
doc = Nokogiri::HTML(<<eohtml)
|
195
|
-
<html>
|
196
|
-
<head>
|
197
|
-
</head>
|
198
|
-
<body>
|
199
|
-
foo
|
200
|
-
</body>
|
201
|
-
</html>
|
202
|
-
eohtml
|
203
|
-
doc.title = 'new'
|
204
|
-
assert_equal 'new', doc.title
|
205
|
-
|
206
|
-
doc = Nokogiri::HTML(<<eohtml)
|
207
|
-
<html>
|
208
|
-
<body>
|
209
|
-
foo
|
210
|
-
</body>
|
211
|
-
</html>
|
212
|
-
eohtml
|
213
|
-
doc.title = 'new'
|
214
|
-
if Nokogiri.uses_libxml?
|
215
|
-
assert_nil doc.title
|
216
|
-
else
|
217
|
-
assert_equal 'new', doc.title
|
218
|
-
end
|
219
|
-
end
|
220
|
-
|
221
|
-
def test_meta_encoding_without_head
|
222
|
-
html = Nokogiri::HTML('<html><body>foo</body></html>')
|
223
|
-
assert_nil html.meta_encoding
|
224
|
-
|
225
|
-
html.meta_encoding = 'EUC-JP'
|
226
|
-
assert_nil html.meta_encoding
|
227
|
-
end
|
228
|
-
|
229
|
-
def test_meta_encoding_with_empty_content_type
|
230
|
-
html = Nokogiri::HTML(<<-eohtml)
|
231
|
-
<html>
|
232
|
-
<head>
|
233
|
-
<meta http-equiv="Content-Type" content="">
|
234
|
-
</head>
|
235
|
-
<body>
|
236
|
-
foo
|
237
|
-
</body>
|
238
|
-
</html>
|
239
|
-
eohtml
|
240
|
-
assert_nil html.meta_encoding
|
241
|
-
|
242
|
-
html = Nokogiri::HTML(<<-eohtml)
|
243
|
-
<html>
|
244
|
-
<head>
|
245
|
-
<meta http-equiv="Content-Type">
|
246
|
-
</head>
|
247
|
-
<body>
|
248
|
-
foo
|
249
|
-
</body>
|
250
|
-
</html>
|
251
|
-
eohtml
|
252
|
-
assert_nil html.meta_encoding
|
253
|
-
end
|
254
|
-
|
255
|
-
def test_root_node_parent_is_document
|
256
|
-
parent = @html.root.parent
|
257
|
-
assert_equal @html, parent
|
258
|
-
assert_instance_of Nokogiri::HTML::Document, parent
|
259
|
-
end
|
260
|
-
|
261
|
-
def test_parse_handles_nil_gracefully
|
262
|
-
@doc = Nokogiri::HTML::Document.parse(nil)
|
263
|
-
assert_instance_of Nokogiri::HTML::Document, @doc
|
264
|
-
end
|
265
|
-
|
266
|
-
def test_parse_empty_document
|
267
|
-
doc = Nokogiri::HTML("\n")
|
268
|
-
assert_equal 0, doc.css('a').length
|
269
|
-
assert_equal 0, doc.xpath('//a').length
|
270
|
-
assert_equal 0, doc.search('//a').length
|
271
|
-
end
|
272
|
-
|
273
|
-
def test_HTML_function
|
274
|
-
html = Nokogiri::HTML(File.read(HTML_FILE))
|
275
|
-
assert html.html?
|
276
|
-
end
|
277
|
-
|
278
|
-
def test_parse_io
|
279
|
-
assert File.open(HTML_FILE, 'rb') { |f|
|
280
|
-
Document.read_io(f, nil, 'UTF-8',
|
281
|
-
XML::ParseOptions::NOERROR | XML::ParseOptions::NOWARNING
|
282
|
-
)
|
283
|
-
}
|
284
|
-
end
|
285
|
-
|
286
|
-
def test_parse_temp_file
|
287
|
-
temp_html_file = Tempfile.new("TEMP_HTML_FILE")
|
288
|
-
File.open(HTML_FILE, 'rb') { |f| temp_html_file.write f.read }
|
289
|
-
temp_html_file.close
|
290
|
-
temp_html_file.open
|
291
|
-
assert_equal Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath('//div/a').length,
|
292
|
-
Nokogiri::HTML.parse(temp_html_file).xpath('//div/a').length
|
293
|
-
end
|
294
|
-
|
295
|
-
def test_to_xhtml
|
296
|
-
assert_match 'XHTML', @html.to_xhtml
|
297
|
-
assert_match 'XHTML', @html.to_xhtml(:encoding => 'UTF-8')
|
298
|
-
assert_match 'UTF-8', @html.to_xhtml(:encoding => 'UTF-8')
|
299
|
-
end
|
300
|
-
|
301
|
-
def test_no_xml_header
|
302
|
-
html = Nokogiri::HTML(<<-eohtml)
|
303
|
-
<html>
|
304
|
-
</html>
|
305
|
-
eohtml
|
306
|
-
assert html.to_html.length > 0, 'html length is too short'
|
307
|
-
assert_no_match(/^<\?xml/, html.to_html)
|
308
|
-
end
|
309
|
-
|
310
|
-
def test_document_has_error
|
311
|
-
html = Nokogiri::HTML(<<-eohtml)
|
312
|
-
<html>
|
313
|
-
<body>
|
314
|
-
<div awesome="asdf>
|
315
|
-
<p>inside div tag</p>
|
316
|
-
</div>
|
317
|
-
<p>outside div tag</p>
|
318
|
-
</body>
|
319
|
-
</html>
|
320
|
-
eohtml
|
321
|
-
assert html.errors.length > 0
|
322
|
-
end
|
323
|
-
|
324
|
-
def test_relative_css
|
325
|
-
html = Nokogiri::HTML(<<-eohtml)
|
326
|
-
<html>
|
327
|
-
<body>
|
328
|
-
<div>
|
329
|
-
<p>inside div tag</p>
|
330
|
-
</div>
|
331
|
-
<p>outside div tag</p>
|
332
|
-
</body>
|
333
|
-
</html>
|
334
|
-
eohtml
|
335
|
-
set = html.search('div').search('p')
|
336
|
-
assert_equal(1, set.length)
|
337
|
-
assert_equal('inside div tag', set.first.inner_text)
|
338
|
-
end
|
339
|
-
|
340
|
-
def test_multi_css
|
341
|
-
html = Nokogiri::HTML(<<-eohtml)
|
342
|
-
<html>
|
343
|
-
<body>
|
344
|
-
<div>
|
345
|
-
<p>p tag</p>
|
346
|
-
<a>a tag</a>
|
347
|
-
</div>
|
348
|
-
</body>
|
349
|
-
</html>
|
350
|
-
eohtml
|
351
|
-
set = html.css('p, a')
|
352
|
-
assert_equal(2, set.length)
|
353
|
-
assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
|
354
|
-
end
|
355
|
-
|
356
|
-
def test_inner_text
|
357
|
-
html = Nokogiri::HTML(<<-eohtml)
|
358
|
-
<html>
|
359
|
-
<body>
|
360
|
-
<div>
|
361
|
-
<p>
|
362
|
-
Hello world!
|
363
|
-
</p>
|
364
|
-
</div>
|
365
|
-
</body>
|
366
|
-
</html>
|
367
|
-
eohtml
|
368
|
-
node = html.xpath('//div').first
|
369
|
-
assert_equal('Hello world!', node.inner_text.strip)
|
370
|
-
end
|
371
|
-
|
372
|
-
def test_doc_type
|
373
|
-
html = Nokogiri::HTML(<<-eohtml)
|
374
|
-
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
375
|
-
<html xmlns="http://www.w3.org/1999/xhtml">
|
376
|
-
<body>
|
377
|
-
<p>Rainbow Dash</p>
|
378
|
-
</body>
|
379
|
-
</html>
|
380
|
-
eohtml
|
381
|
-
assert_equal "html", html.internal_subset.name
|
382
|
-
assert_equal "-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id
|
383
|
-
assert_equal "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id
|
384
|
-
assert_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">", html.to_s[0,97]
|
385
|
-
end
|
386
|
-
|
387
|
-
def test_content_size
|
388
|
-
html = Nokogiri::HTML('<div>
|
389
|
-
</div>')
|
390
|
-
assert_equal 1, html.content.size
|
391
|
-
assert_equal 1, html.content.split("").size
|
392
|
-
assert_equal "\n", html.content
|
393
|
-
end
|
394
|
-
|
395
|
-
def test_find_by_xpath
|
396
|
-
found = @html.xpath('//div/a')
|
397
|
-
assert_equal 3, found.length
|
398
|
-
end
|
399
|
-
|
400
|
-
def test_find_by_css
|
401
|
-
found = @html.css('div > a')
|
402
|
-
assert_equal 3, found.length
|
403
|
-
end
|
404
|
-
|
405
|
-
def test_find_by_css_with_square_brackets
|
406
|
-
found = @html.css("div[@id='header'] > h1")
|
407
|
-
found = @html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc
|
408
|
-
assert_equal 1, found.length
|
409
|
-
end
|
410
|
-
|
411
|
-
def test_find_with_function
|
412
|
-
assert @html.css("div:awesome() h1", Class.new {
|
413
|
-
def awesome divs
|
414
|
-
[divs.first]
|
415
|
-
end
|
416
|
-
}.new)
|
417
|
-
end
|
418
|
-
|
419
|
-
def test_dup_shallow
|
420
|
-
found = @html.search('//div/a').first
|
421
|
-
dup = found.dup(0)
|
422
|
-
assert dup
|
423
|
-
assert_equal '', dup.content
|
424
|
-
end
|
425
|
-
|
426
|
-
def test_search_can_handle_xpath_and_css
|
427
|
-
found = @html.search('//div/a', 'div > p')
|
428
|
-
length = @html.xpath('//div/a').length +
|
429
|
-
@html.css('div > p').length
|
430
|
-
assert_equal length, found.length
|
431
|
-
end
|
432
|
-
|
433
|
-
def test_dup_document
|
434
|
-
assert dup = @html.dup
|
435
|
-
assert_not_equal dup, @html
|
436
|
-
assert @html.html?
|
437
|
-
assert_instance_of Nokogiri::HTML::Document, dup
|
438
|
-
assert dup.html?, 'duplicate should be html'
|
439
|
-
assert_equal @html.to_s, dup.to_s
|
440
|
-
end
|
441
|
-
|
442
|
-
def test_dup_document_shallow
|
443
|
-
assert dup = @html.dup(0)
|
444
|
-
assert_not_equal dup, @html
|
445
|
-
end
|
446
|
-
|
447
|
-
def test_dup
|
448
|
-
found = @html.search('//div/a').first
|
449
|
-
dup = found.dup
|
450
|
-
assert dup
|
451
|
-
assert_equal found.content, dup.content
|
452
|
-
assert_equal found.document, dup.document
|
453
|
-
end
|
454
|
-
|
455
|
-
def test_inner_html
|
456
|
-
html = Nokogiri::HTML(<<-eohtml)
|
457
|
-
<html>
|
458
|
-
<body>
|
459
|
-
<div>
|
460
|
-
<p>
|
461
|
-
Hello world!
|
462
|
-
</p>
|
463
|
-
</div>
|
464
|
-
</body>
|
465
|
-
</html>
|
466
|
-
eohtml
|
467
|
-
node = html.xpath('//div').first
|
468
|
-
assert_equal('<p>Helloworld!</p>', node.inner_html.gsub(/\s/, ''))
|
469
|
-
end
|
470
|
-
|
471
|
-
def test_round_trip
|
472
|
-
doc = Nokogiri::HTML(@html.inner_html)
|
473
|
-
assert_equal @html.root.to_html, doc.root.to_html
|
474
|
-
end
|
475
|
-
|
476
|
-
def test_fragment_contains_text_node
|
477
|
-
fragment = Nokogiri::HTML.fragment('fooo')
|
478
|
-
assert_equal 1, fragment.children.length
|
479
|
-
assert_equal 'fooo', fragment.inner_text
|
480
|
-
end
|
481
|
-
|
482
|
-
def test_fragment_includes_two_tags
|
483
|
-
assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
|
484
|
-
end
|
485
|
-
|
486
|
-
def test_relative_css_finder
|
487
|
-
doc = Nokogiri::HTML(<<-eohtml)
|
488
|
-
<html>
|
489
|
-
<body>
|
490
|
-
<div class="red">
|
491
|
-
<p>
|
492
|
-
inside red
|
493
|
-
</p>
|
494
|
-
</div>
|
495
|
-
<div class="green">
|
496
|
-
<p>
|
497
|
-
inside green
|
498
|
-
</p>
|
499
|
-
</div>
|
500
|
-
</body>
|
501
|
-
</html>
|
502
|
-
eohtml
|
503
|
-
red_divs = doc.css('div.red')
|
504
|
-
assert_equal 1, red_divs.length
|
505
|
-
p_tags = red_divs.first.css('p')
|
506
|
-
assert_equal 1, p_tags.length
|
507
|
-
assert_equal 'inside red', p_tags.first.text.strip
|
508
|
-
end
|
509
|
-
|
510
|
-
def test_find_classes
|
511
|
-
doc = Nokogiri::HTML(<<-eohtml)
|
512
|
-
<html>
|
513
|
-
<body>
|
514
|
-
<p class="red">RED</p>
|
515
|
-
<p class="awesome red">RED</p>
|
516
|
-
<p class="notred">GREEN</p>
|
517
|
-
<p class="green notred">GREEN</p>
|
518
|
-
</body>
|
519
|
-
</html>
|
520
|
-
eohtml
|
521
|
-
list = doc.css('.red')
|
522
|
-
assert_equal 2, list.length
|
523
|
-
assert_equal %w{ RED RED }, list.map { |x| x.text }
|
524
|
-
end
|
525
|
-
|
526
|
-
def test_parse_can_take_io
|
527
|
-
html = nil
|
528
|
-
File.open(HTML_FILE, 'rb') { |f|
|
529
|
-
html = Nokogiri::HTML(f)
|
530
|
-
}
|
531
|
-
assert html.html?
|
532
|
-
end
|
533
|
-
|
534
|
-
def test_html?
|
535
|
-
assert !@html.xml?
|
536
|
-
assert @html.html?
|
537
|
-
end
|
538
|
-
|
539
|
-
def test_serialize
|
540
|
-
assert @html.serialize
|
541
|
-
assert @html.to_html
|
542
|
-
end
|
543
|
-
|
544
|
-
def test_empty_document
|
545
|
-
# empty document should return "" #699
|
546
|
-
assert_equal "", Nokogiri::HTML.parse(nil).text
|
547
|
-
assert_equal "", Nokogiri::HTML.parse("").text
|
548
|
-
end
|
549
|
-
end
|
550
|
-
end
|
551
|
-
end
|
552
|
-
|
@@ -1,138 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
require "helper"
|
3
|
-
|
4
|
-
module Nokogiri
|
5
|
-
module HTML
|
6
|
-
if RUBY_VERSION =~ /^1\.9/
|
7
|
-
class TestDocumentEncoding < Nokogiri::TestCase
|
8
|
-
def test_encoding
|
9
|
-
doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
|
10
|
-
|
11
|
-
hello = "こんにちは"
|
12
|
-
|
13
|
-
assert_match doc.encoding, doc.to_html
|
14
|
-
assert_match hello.encode('Shift_JIS'), doc.to_html
|
15
|
-
assert_equal 'Shift_JIS', doc.to_html.encoding.name
|
16
|
-
|
17
|
-
assert_match hello, doc.to_html(:encoding => 'UTF-8')
|
18
|
-
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
|
19
|
-
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_default_to_encoding_from_string
|
23
|
-
bad_charset = <<-eohtml
|
24
|
-
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
25
|
-
<html>
|
26
|
-
<head>
|
27
|
-
<meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
|
28
|
-
</head>
|
29
|
-
<body>
|
30
|
-
<a href="http://tenderlovemaking.com/">blah!</a>
|
31
|
-
</body>
|
32
|
-
</html>
|
33
|
-
eohtml
|
34
|
-
doc = Nokogiri::HTML(bad_charset)
|
35
|
-
assert_equal bad_charset.encoding.name, doc.encoding
|
36
|
-
|
37
|
-
doc = Nokogiri.parse(bad_charset)
|
38
|
-
assert_equal bad_charset.encoding.name, doc.encoding
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_encoding_non_utf8
|
42
|
-
orig = '日本語が上手です'
|
43
|
-
bin = Encoding::ASCII_8BIT
|
44
|
-
[Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
|
45
|
-
html = <<-eohtml.encode(enc)
|
46
|
-
<html>
|
47
|
-
<meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
|
48
|
-
<title xml:lang="ja">#{orig}</title></html>
|
49
|
-
eohtml
|
50
|
-
text = Nokogiri::HTML.parse(html).at('title').inner_text
|
51
|
-
assert_equal(
|
52
|
-
orig.encode(enc).force_encoding(bin),
|
53
|
-
text.encode(enc).force_encoding(bin)
|
54
|
-
)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_encoding_with_a_bad_name
|
59
|
-
bad_charset = <<-eohtml
|
60
|
-
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
61
|
-
<html>
|
62
|
-
<head>
|
63
|
-
<meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
|
64
|
-
</head>
|
65
|
-
<body>
|
66
|
-
<a href="http://tenderlovemaking.com/">blah!</a>
|
67
|
-
</body>
|
68
|
-
</html>
|
69
|
-
eohtml
|
70
|
-
doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
|
71
|
-
assert_equal ['http://tenderlovemaking.com/'],
|
72
|
-
doc.css('a').map { |a| a['href'] }
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
class TestDocumentEncodingDetection < Nokogiri::TestCase
|
78
|
-
if IO.respond_to?(:binread)
|
79
|
-
def binread(file)
|
80
|
-
IO.binread(file)
|
81
|
-
end
|
82
|
-
else
|
83
|
-
def binread(file)
|
84
|
-
IO.read(file)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def binopen(file)
|
89
|
-
File.open(file, 'rb')
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_document_html_noencoding
|
93
|
-
from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE))
|
94
|
-
from_string = Nokogiri::HTML(binread(NOENCODING_FILE))
|
95
|
-
|
96
|
-
assert_equal from_string.to_s.size, from_stream.to_s.size
|
97
|
-
end
|
98
|
-
|
99
|
-
def test_document_html_charset
|
100
|
-
html = Nokogiri::HTML(binopen(METACHARSET_FILE))
|
101
|
-
assert_equal 'iso-2022-jp', html.encoding
|
102
|
-
assert_equal 'たこ焼き仮面', html.title
|
103
|
-
end
|
104
|
-
|
105
|
-
def test_document_xhtml_enc
|
106
|
-
[ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
|
107
|
-
doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
|
108
|
-
ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map { |text| text.text }
|
109
|
-
|
110
|
-
doc_from_string = Nokogiri::HTML(binread(file))
|
111
|
-
ary_from_string = doc_from_string.xpath('//p/text()').map { |text| text.text }
|
112
|
-
|
113
|
-
doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
|
114
|
-
ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map { |text| text.text }
|
115
|
-
|
116
|
-
doc_from_file = Nokogiri::HTML(binopen(file))
|
117
|
-
ary_from_file = doc_from_file.xpath('//p/text()').map { |text| text.text }
|
118
|
-
|
119
|
-
title = 'たこ焼き仮面'
|
120
|
-
|
121
|
-
assert_equal(title, doc_from_string_enc.at('//title/text()').text)
|
122
|
-
assert_equal(title, doc_from_string.at('//title/text()').text)
|
123
|
-
assert_equal(title, doc_from_file_enc.at('//title/text()').text)
|
124
|
-
unless Nokogiri.jruby? && file == ENCODING_HTML_FILE
|
125
|
-
assert_equal(title, doc_from_file.at('//title/text()').text)
|
126
|
-
end
|
127
|
-
|
128
|
-
evil = (0..72).map { |i| '超' * i + '悪い事を構想中。' }
|
129
|
-
|
130
|
-
assert_equal(evil, ary_from_string_enc)
|
131
|
-
assert_equal(evil, ary_from_string)
|
132
|
-
assert_equal(evil, ary_from_file_enc)
|
133
|
-
assert_equal(evil, ary_from_file)
|
134
|
-
}
|
135
|
-
end
|
136
|
-
end
|
137
|
-
end
|
138
|
-
end
|