nokogiri 1.0.0 → 1.6.8.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.autotest +26 -0
- data/.cross_rubies +9 -0
- data/.editorconfig +17 -0
- data/.gemtest +0 -0
- data/.travis.yml +51 -0
- data/CHANGELOG.rdoc +1160 -0
- data/CONTRIBUTING.md +42 -0
- data/C_CODING_STYLE.rdoc +33 -0
- data/Gemfile +22 -0
- data/LICENSE.txt +31 -0
- data/Manifest.txt +284 -40
- data/README.md +166 -0
- data/ROADMAP.md +111 -0
- data/Rakefile +310 -199
- data/STANDARD_RESPONSES.md +47 -0
- data/Y_U_NO_GEMSPEC.md +155 -0
- data/appveyor.yml +22 -0
- data/bin/nokogiri +118 -0
- data/build_all +45 -0
- data/dependencies.yml +29 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +664 -34
- data/ext/nokogiri/html_document.c +120 -33
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +279 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +116 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +145 -0
- data/ext/nokogiri/nokogiri.h +131 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +23 -19
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +69 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +501 -54
- data/ext/nokogiri/xml_document.h +14 -1
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +109 -24
- data/ext/nokogiri/xml_dtd.h +3 -1
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +60 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +117 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1285 -315
- data/ext/nokogiri/xml_node.h +4 -6
- data/ext/nokogiri/xml_node_set.c +415 -54
- data/ext/nokogiri/xml_node_set.h +6 -2
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +316 -77
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +161 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +215 -80
- data/ext/nokogiri/xml_sax_parser.h +30 -1
- data/ext/nokogiri/xml_sax_parser_context.c +262 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +45 -175
- data/ext/nokogiri/xml_syntax_error.h +4 -2
- data/ext/nokogiri/xml_text.c +37 -14
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath_context.c +230 -13
- data/ext/nokogiri/xml_xpath_context.h +2 -1
- data/ext/nokogiri/xslt_stylesheet.c +196 -34
- data/ext/nokogiri/xslt_stylesheet.h +6 -1
- data/lib/nokogiri/css/node.rb +18 -61
- data/lib/nokogiri/css/parser.rb +725 -17
- data/lib/nokogiri/css/parser.y +126 -63
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +148 -5
- data/lib/nokogiri/css/tokenizer.rex +31 -39
- data/lib/nokogiri/css/xpath_visitor.rb +109 -51
- data/lib/nokogiri/css.rb +24 -3
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +329 -3
- data/lib/nokogiri/html/document_fragment.rb +39 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +35 -4
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/html/sax/push_parser.rb +36 -0
- data/lib/nokogiri/html.rb +18 -76
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +106 -1
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +395 -31
- data/lib/nokogiri/xml/cdata.rb +4 -2
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +267 -12
- data/lib/nokogiri/xml/document_fragment.rb +149 -0
- data/lib/nokogiri/xml/dtd.rb +27 -1
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node.rb +748 -109
- data/lib/nokogiri/xml/node_set.rb +200 -72
- data/lib/nokogiri/xml/parse_options.rb +120 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +102 -4
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax/document.rb +114 -2
- data/lib/nokogiri/xml/sax/parser.rb +97 -7
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/sax.rb +2 -7
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/searchable.rb +221 -0
- data/lib/nokogiri/xml/syntax_error.rb +27 -1
- data/lib/nokogiri/xml/text.rb +4 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath.rb +4 -0
- data/lib/nokogiri/xml/xpath_context.rb +3 -1
- data/lib/nokogiri/xml.rb +45 -38
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/nokogiri/xslt.rb +47 -2
- data/lib/nokogiri.rb +117 -24
- data/lib/xsd/xmlparser/nokogiri.rb +102 -0
- data/patches/sort-patches-by-date +25 -0
- data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
- data/suppressions/README.txt +1 -0
- data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
- data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
- data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
- data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +73 -6
- data/test/css/test_parser.rb +184 -39
- data/test/css/test_tokenizer.rb +72 -19
- data/test/css/test_xpath_visitor.rb +44 -2
- data/test/decorators/test_slop.rb +20 -0
- data/test/files/2ch.html +108 -0
- data/test/files/GH_1042.html +18 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/atom.xml +344 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/namespace_pressure_test.xml +1684 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/saml/saml20assertion_schema.xsd +283 -0
- data/test/files/saml/saml20protocol_schema.xsd +302 -0
- data/test/files/saml/xenc_schema.xsd +146 -0
- data/test/files/saml/xmldsig_schema.xsd +318 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/shift_jis_no_charset.html +9 -0
- data/test/files/slow-xpath.xml +25509 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/test_document_url/bar.xml +2 -0
- data/test/files/test_document_url/document.dtd +4 -0
- data/test/files/test_document_url/document.xml +6 -0
- data/test/files/tlm.html +2 -1
- data/test/files/to_be_xincluded.xml +2 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/files/xinclude.xml +4 -0
- data/test/helper.rb +124 -13
- data/test/html/sax/test_parser.rb +118 -4
- data/test/html/sax/test_parser_context.rb +46 -0
- data/test/html/sax/test_push_parser.rb +87 -0
- data/test/html/test_builder.rb +94 -8
- data/test/html/test_document.rb +626 -11
- data/test/html/test_document_encoding.rb +145 -0
- data/test/html/test_document_fragment.rb +301 -0
- data/test/html/test_element_description.rb +105 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +212 -0
- data/test/html/test_node_encoding.rb +85 -0
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
- data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
- data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
- data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
- data/test/namespaces/test_namespaces_preservation.rb +31 -0
- data/test/test_convert_xpath.rb +2 -47
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +48 -0
- data/test/test_memory_leak.rb +156 -0
- data/test/test_nokogiri.rb +103 -1
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +293 -8
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +309 -8
- data/test/xml/sax/test_parser_context.rb +115 -0
- data/test/xml/sax/test_push_parser.rb +157 -0
- data/test/xml/test_attr.rb +67 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +327 -2
- data/test/xml/test_c14n.rb +180 -0
- data/test/xml/test_cdata.rb +32 -2
- data/test/xml/test_comment.rb +40 -0
- data/test/xml/test_document.rb +846 -35
- data/test/xml/test_document_encoding.rb +31 -0
- data/test/xml/test_document_fragment.rb +271 -0
- data/test/xml/test_dtd.rb +153 -9
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +251 -0
- data/test/xml/test_namespace.rb +96 -0
- data/test/xml/test_node.rb +1126 -105
- data/test/xml/test_node_attributes.rb +115 -0
- data/test/xml/test_node_encoding.rb +69 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +549 -0
- data/test/xml/test_node_set.rb +668 -9
- data/test/xml/test_parse_options.rb +64 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader.rb +589 -0
- data/test/xml/test_reader_encoding.rb +134 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +142 -0
- data/test/xml/test_syntax_error.rb +30 -0
- data/test/xml/test_text.rb +49 -2
- data/test/xml/test_unparented_node.rb +440 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +445 -0
- data/test/xslt/test_custom_functions.rb +133 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- data/test_all +107 -0
- metadata +459 -115
- data/History.txt +0 -6
- data/README.ja.txt +0 -86
- data/README.txt +0 -87
- data/ext/nokogiri/html_sax_parser.c +0 -32
- data/ext/nokogiri/html_sax_parser.h +0 -11
- data/ext/nokogiri/native.c +0 -40
- data/ext/nokogiri/native.h +0 -51
- data/ext/nokogiri/xml_xpath.c +0 -46
- data/ext/nokogiri/xml_xpath.h +0 -11
- data/lib/nokogiri/css/generated_parser.rb +0 -653
- data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
- data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
- data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
- data/lib/nokogiri/decorators/hpricot.rb +0 -3
- data/lib/nokogiri/decorators.rb +0 -1
- data/lib/nokogiri/hpricot.rb +0 -47
- data/lib/nokogiri/xml/after_handler.rb +0 -18
- data/lib/nokogiri/xml/before_handler.rb +0 -32
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/entity_declaration.rb +0 -9
- data/nokogiri.gemspec +0 -34
- data/test/hpricot/files/basic.xhtml +0 -17
- data/test/hpricot/files/boingboing.html +0 -2266
- data/test/hpricot/files/cy0.html +0 -3653
- data/test/hpricot/files/immob.html +0 -400
- data/test/hpricot/files/pace_application.html +0 -1320
- data/test/hpricot/files/tenderlove.html +0 -16
- data/test/hpricot/files/uswebgen.html +0 -220
- data/test/hpricot/files/utf8.html +0 -1054
- data/test/hpricot/files/week9.html +0 -1723
- data/test/hpricot/files/why.xml +0 -19
- data/test/hpricot/load_files.rb +0 -7
- data/test/hpricot/test_alter.rb +0 -67
- data/test/hpricot/test_builder.rb +0 -27
- data/test/hpricot/test_parser.rb +0 -423
- data/test/hpricot/test_paths.rb +0 -15
- data/test/hpricot/test_preserved.rb +0 -78
- data/test/hpricot/test_xml.rb +0 -30
- data/test/test_reader.rb +0 -222
@@ -0,0 +1,10 @@
|
|
1
|
+
<!ENTITY ent1 "es">
|
2
|
+
<!ENTITY ent2 "1900 Dallas Road">
|
3
|
+
<!ENTITY ent3 "Texas">
|
4
|
+
<!ENTITY ent4 "<entElement domestic='Yes'>Element data</entElement><?PItarget PIdata?>">
|
5
|
+
<!ENTITY ent5 PUBLIC "entityURI" "entityFile" NDATA notation1>
|
6
|
+
<!ENTITY ent1 "This entity should be discarded">
|
7
|
+
<!ELEMENT br EMPTY>
|
8
|
+
<!ATTLIST br width CDATA "0">
|
9
|
+
<!NOTATION notation1 PUBLIC "notation1File">
|
10
|
+
<!NOTATION notation2 SYSTEM "notation2File">
|
data/test/files/tlm.html
CHANGED
@@ -46,7 +46,7 @@
|
|
46
46
|
.codesnip-container {border:1px solid #ccc; background:#eee; padding: 5px;margin:10px;}
|
47
47
|
</style>
|
48
48
|
<link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://tenderlovemaking.com/xmlrpc.php?rsd" />
|
49
|
-
<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
|
49
|
+
<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
|
50
50
|
<meta name="generator" content="WordPress 2.6" />
|
51
51
|
|
52
52
|
<link rel="stylesheet" type="text/css" href="http://tenderlovemaking.com/wp-content/plugins/spell_checker/spell_checker.css" />
|
@@ -826,6 +826,7 @@ page.<span class="me1">body</span> =~ /<textarea<span class="br0">[</span
|
|
826
826
|
</ul>
|
827
827
|
</div>
|
828
828
|
|
829
|
+
<div id="abc.123" class='special.character'>Special character div</div>
|
829
830
|
<div id="footer">
|
830
831
|
A design by <a href="http://blog.geminigeek.com/wordpress-theme">GeminiGeek</a> • Powered by <a href="http://wordpress.org">Wordpress</a><!--• <a href="#">CSS</a> • <a href="#">xHTML 1.0</a>-->
|
831
832
|
</div>
|
data/test/helper.rb
CHANGED
@@ -1,23 +1,101 @@
|
|
1
|
-
|
1
|
+
#Process.setrlimit(Process::RLIMIT_CORE, Process::RLIM_INFINITY) unless RUBY_PLATFORM =~ /(java|mswin|mingw)/i
|
2
|
+
$VERBOSE = true
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'minitest/pride'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'tempfile'
|
7
|
+
require 'pp'
|
2
8
|
|
3
|
-
|
4
|
-
|
9
|
+
require 'nokogiri'
|
10
|
+
if ENV['TEST_NOKOGIRI_WITH_LIBXML_RUBY']
|
11
|
+
require 'libxml'
|
12
|
+
warn "#{__FILE__}:#{__LINE__}: loaded libxml-ruby '#{LibXML::XML::VERSION}'"
|
5
13
|
end
|
6
14
|
|
7
|
-
|
15
|
+
warn "#{__FILE__}:#{__LINE__}: version info: #{Nokogiri::VERSION_INFO.inspect}"
|
8
16
|
|
9
17
|
module Nokogiri
|
10
|
-
class TestCase <
|
11
|
-
ASSETS_DIR
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
18
|
+
class TestCase < MiniTest::Spec
|
19
|
+
ASSETS_DIR = File.expand_path File.join(File.dirname(__FILE__), 'files')
|
20
|
+
ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
|
21
|
+
ADDRESS_XML_FILE = File.join(ASSETS_DIR, 'address_book.xml')
|
22
|
+
ENCODING_HTML_FILE = File.join(ASSETS_DIR, 'encoding.html')
|
23
|
+
ENCODING_XHTML_FILE = File.join(ASSETS_DIR, 'encoding.xhtml')
|
24
|
+
EXML_FILE = File.join(ASSETS_DIR, 'exslt.xml')
|
25
|
+
EXSLT_FILE = File.join(ASSETS_DIR, 'exslt.xslt')
|
26
|
+
HTML_FILE = File.join(ASSETS_DIR, 'tlm.html')
|
27
|
+
METACHARSET_FILE = File.join(ASSETS_DIR, 'metacharset.html')
|
28
|
+
NICH_FILE = File.join(ASSETS_DIR, '2ch.html')
|
29
|
+
NOENCODING_FILE = File.join(ASSETS_DIR, 'noencoding.html')
|
30
|
+
PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
|
31
|
+
PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
|
32
|
+
SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
|
33
|
+
SHIFT_JIS_NO_CHARSET= File.join(ASSETS_DIR, 'shift_jis_no_charset.html')
|
34
|
+
SHIFT_JIS_XML = File.join(ASSETS_DIR, 'shift_jis.xml')
|
35
|
+
SNUGGLES_FILE = File.join(ASSETS_DIR, 'snuggles.xml')
|
36
|
+
XML_FILE = File.join(ASSETS_DIR, 'staff.xml')
|
37
|
+
XML_XINCLUDE_FILE = File.join(ASSETS_DIR, 'xinclude.xml')
|
38
|
+
XML_ATOM_FILE = File.join(ASSETS_DIR, 'atom.xml')
|
39
|
+
XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
|
40
|
+
XPATH_FILE = File.join(ASSETS_DIR, 'slow-xpath.xml')
|
17
41
|
|
18
42
|
def teardown
|
19
|
-
|
43
|
+
if ENV['NOKOGIRI_GC']
|
44
|
+
STDOUT.putc '!'
|
45
|
+
if RUBY_PLATFORM =~ /java/
|
46
|
+
require 'java'
|
47
|
+
java.lang.System.gc
|
48
|
+
else
|
49
|
+
GC.start
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def stress_memory_while &block
|
55
|
+
# force the test to explicitly declare a skip
|
56
|
+
raise "JRuby doesn't do GC" if Nokogiri.jruby?
|
57
|
+
|
58
|
+
old_stress = GC.stress
|
59
|
+
begin
|
60
|
+
GC.stress = true
|
61
|
+
yield
|
62
|
+
ensure
|
63
|
+
GC.stress = old_stress
|
64
|
+
end
|
20
65
|
end
|
66
|
+
|
67
|
+
def assert_indent amount, doc, message = nil
|
68
|
+
nodes = []
|
69
|
+
doc.traverse do |node|
|
70
|
+
nodes << node if node.text? && node.blank?
|
71
|
+
end
|
72
|
+
assert nodes.length > 0
|
73
|
+
nodes.each do |node|
|
74
|
+
len = node.content.gsub(/[\r\n]/, '').length
|
75
|
+
assert_equal(0, len % amount, message)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def util_decorate(document, decorator_module)
|
80
|
+
document.decorators(XML::Node) << decorator_module
|
81
|
+
document.decorators(XML::NodeSet) << decorator_module
|
82
|
+
document.decorate!
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# Test::Unit backwards compatibility section
|
87
|
+
#
|
88
|
+
alias :assert_no_match :refute_match
|
89
|
+
alias :assert_not_nil :refute_nil
|
90
|
+
alias :assert_raise :assert_raises
|
91
|
+
alias :assert_not_equal :refute_equal
|
92
|
+
|
93
|
+
def assert_not_send send_ary, m = nil
|
94
|
+
recv, msg, *args = send_ary
|
95
|
+
m = message(m) {
|
96
|
+
"Expected #{mu_pp(recv)}.#{msg}(*#{mu_pp(args)}) to return false" }
|
97
|
+
assert !recv.__send__(msg, *args), m
|
98
|
+
end unless method_defined?(:assert_not_send)
|
21
99
|
end
|
22
100
|
|
23
101
|
module SAX
|
@@ -25,7 +103,15 @@ module Nokogiri
|
|
25
103
|
class Doc < XML::SAX::Document
|
26
104
|
attr_reader :start_elements, :start_document_called
|
27
105
|
attr_reader :end_elements, :end_document_called
|
28
|
-
attr_reader :data, :comments, :cdata_blocks
|
106
|
+
attr_reader :data, :comments, :cdata_blocks, :start_elements_namespace
|
107
|
+
attr_reader :errors, :warnings, :end_elements_namespace
|
108
|
+
attr_reader :xmldecls
|
109
|
+
attr_reader :processing_instructions
|
110
|
+
|
111
|
+
def xmldecl version, encoding, standalone
|
112
|
+
@xmldecls = [version, encoding, standalone].compact
|
113
|
+
super
|
114
|
+
end
|
29
115
|
|
30
116
|
def start_document
|
31
117
|
@start_document_called = true
|
@@ -37,16 +123,36 @@ module Nokogiri
|
|
37
123
|
super
|
38
124
|
end
|
39
125
|
|
126
|
+
def error error
|
127
|
+
(@errors ||= []) << error
|
128
|
+
super
|
129
|
+
end
|
130
|
+
|
131
|
+
def warning warning
|
132
|
+
(@warning ||= []) << warning
|
133
|
+
super
|
134
|
+
end
|
135
|
+
|
40
136
|
def start_element *args
|
41
137
|
(@start_elements ||= []) << args
|
42
138
|
super
|
43
139
|
end
|
44
140
|
|
141
|
+
def start_element_namespace *args
|
142
|
+
(@start_elements_namespace ||= []) << args
|
143
|
+
super
|
144
|
+
end
|
145
|
+
|
45
146
|
def end_element *args
|
46
147
|
(@end_elements ||= []) << args
|
47
148
|
super
|
48
149
|
end
|
49
150
|
|
151
|
+
def end_element_namespace *args
|
152
|
+
(@end_elements_namespace ||= []) << args
|
153
|
+
super
|
154
|
+
end
|
155
|
+
|
50
156
|
def characters string
|
51
157
|
@data ||= []
|
52
158
|
@data += [string]
|
@@ -64,6 +170,11 @@ module Nokogiri
|
|
64
170
|
@cdata_blocks += [string]
|
65
171
|
super
|
66
172
|
end
|
173
|
+
|
174
|
+
def processing_instruction name, content
|
175
|
+
@processing_instructions ||= []
|
176
|
+
@processing_instructions << [name, content]
|
177
|
+
end
|
67
178
|
end
|
68
179
|
end
|
69
180
|
end
|
@@ -1,16 +1,71 @@
|
|
1
|
-
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require "helper"
|
2
3
|
|
3
4
|
module Nokogiri
|
4
5
|
module HTML
|
5
6
|
module SAX
|
6
7
|
class TestParser < Nokogiri::SAX::TestCase
|
7
8
|
def setup
|
9
|
+
super
|
8
10
|
@parser = HTML::SAX::Parser.new(Doc.new)
|
9
11
|
end
|
10
12
|
|
13
|
+
def test_parse_empty_document
|
14
|
+
# This caused a segfault in libxml 2.6.x
|
15
|
+
assert_nil @parser.parse ''
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_parse_empty_file
|
19
|
+
# Make sure empty files don't break stuff
|
20
|
+
empty_file_name = File.join(ASSETS_DIR, 'bogus.xml')
|
21
|
+
# assert_nothing_raised do
|
22
|
+
@parser.parse_file empty_file_name
|
23
|
+
# end
|
24
|
+
end
|
25
|
+
|
11
26
|
def test_parse_file
|
12
27
|
@parser.parse_file(HTML_FILE)
|
13
|
-
|
28
|
+
|
29
|
+
# Take a look at the comment in test_parse_document to know
|
30
|
+
# a possible reason to this difference.
|
31
|
+
if Nokogiri.uses_libxml?
|
32
|
+
assert_equal 1111, @parser.document.end_elements.length
|
33
|
+
else
|
34
|
+
assert_equal 1120, @parser.document.end_elements.length
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_parse_file_nil_argument
|
39
|
+
assert_raises(ArgumentError) {
|
40
|
+
@parser.parse_file(nil)
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_parse_file_non_existant
|
45
|
+
assert_raise Errno::ENOENT do
|
46
|
+
@parser.parse_file('there_is_no_reasonable_way_this_file_exists')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_parse_file_with_dir
|
51
|
+
assert_raise Errno::EISDIR do
|
52
|
+
@parser.parse_file(File.dirname(__FILE__))
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_parse_memory_nil
|
57
|
+
assert_raise ArgumentError do
|
58
|
+
@parser.parse_memory(nil)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_parse_force_encoding
|
63
|
+
@parser.parse_memory(<<-HTML, 'UTF-8')
|
64
|
+
<meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
|
65
|
+
Информация
|
66
|
+
HTML
|
67
|
+
assert_equal("Информация",
|
68
|
+
@parser.document.data.join.strip)
|
14
69
|
end
|
15
70
|
|
16
71
|
def test_parse_document
|
@@ -18,8 +73,67 @@ module Nokogiri
|
|
18
73
|
<p>Paragraph 1</p>
|
19
74
|
<p>Paragraph 2</p>
|
20
75
|
eoxml
|
21
|
-
|
22
|
-
|
76
|
+
|
77
|
+
# JRuby version is different because of the internal implementation
|
78
|
+
# JRuby version uses NekoHTML which inserts empty "head" elements.
|
79
|
+
#
|
80
|
+
# Currently following features are set:
|
81
|
+
# "http://cyberneko.org/html/properties/names/elems" => "lower"
|
82
|
+
# "http://cyberneko.org/html/properties/names/attrs" => "lower"
|
83
|
+
if Nokogiri.uses_libxml?
|
84
|
+
assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
|
85
|
+
@parser.document.start_elements)
|
86
|
+
else
|
87
|
+
assert_equal([["html", []], ["head", []], ["body", []], ["p", []], ["p", []]],
|
88
|
+
@parser.document.start_elements)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_parser_attributes
|
93
|
+
html = <<-eohtml
|
94
|
+
<html>
|
95
|
+
<head>
|
96
|
+
<title>hello</title>
|
97
|
+
</head>
|
98
|
+
<body>
|
99
|
+
<img src="face.jpg" title="daddy & me">
|
100
|
+
<hr noshade size="2">
|
101
|
+
</body>
|
102
|
+
</html>
|
103
|
+
eohtml
|
104
|
+
|
105
|
+
block_called = false
|
106
|
+
@parser.parse(html) { |ctx|
|
107
|
+
block_called = true
|
108
|
+
ctx.replace_entities = true
|
109
|
+
}
|
110
|
+
|
111
|
+
assert block_called
|
112
|
+
|
113
|
+
noshade_value = if Nokogiri.uses_libxml? && Nokogiri::VERSION_INFO['libxml']['loaded'] < '2.7.7'
|
114
|
+
['noshade', 'noshade']
|
115
|
+
else
|
116
|
+
['noshade', nil]
|
117
|
+
end
|
118
|
+
|
119
|
+
assert_equal [
|
120
|
+
['html', []],
|
121
|
+
['head', []],
|
122
|
+
['title', []],
|
123
|
+
['body', []],
|
124
|
+
['img', [
|
125
|
+
['src', 'face.jpg'],
|
126
|
+
['title', 'daddy & me']
|
127
|
+
]],
|
128
|
+
['hr', [
|
129
|
+
noshade_value,
|
130
|
+
['size', '2']
|
131
|
+
]]
|
132
|
+
], @parser.document.start_elements
|
133
|
+
end
|
134
|
+
|
135
|
+
def test_empty_processing_instruction
|
136
|
+
@parser.parse_memory("<strong>this will segfault<?strong>")
|
23
137
|
end
|
24
138
|
end
|
25
139
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require "helper"
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
module HTML
|
7
|
+
module SAX
|
8
|
+
class TestParserContext < Nokogiri::SAX::TestCase
|
9
|
+
def test_from_io
|
10
|
+
ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
|
11
|
+
assert ctx
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_from_string
|
15
|
+
ctx = ParserContext.new 'blah blah'
|
16
|
+
assert ctx
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_parse_with
|
20
|
+
ctx = ParserContext.new 'blah'
|
21
|
+
assert_raises ArgumentError do
|
22
|
+
ctx.parse_with nil
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_parse_with_sax_parser
|
27
|
+
# assert_nothing_raised do
|
28
|
+
xml = "<root />"
|
29
|
+
ctx = ParserContext.new xml
|
30
|
+
parser = Parser.new Doc.new
|
31
|
+
ctx.parse_with parser
|
32
|
+
# end
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_from_file
|
36
|
+
# assert_nothing_raised do
|
37
|
+
ctx = ParserContext.file HTML_FILE, 'UTF-8'
|
38
|
+
parser = Parser.new Doc.new
|
39
|
+
ctx.parse_with parser
|
40
|
+
# end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require "helper"
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
module HTML
|
7
|
+
module SAX
|
8
|
+
class TestPushParser < Nokogiri::SAX::TestCase
|
9
|
+
def setup
|
10
|
+
super
|
11
|
+
@parser = HTML::SAX::PushParser.new(Doc.new)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_end_document_called
|
15
|
+
@parser.<<(<<-eoxml)
|
16
|
+
<p id="asdfasdf">
|
17
|
+
<!-- This is a comment -->
|
18
|
+
Paragraph 1
|
19
|
+
</p>
|
20
|
+
eoxml
|
21
|
+
assert ! @parser.document.end_document_called
|
22
|
+
@parser.finish
|
23
|
+
assert @parser.document.end_document_called
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_start_element
|
27
|
+
@parser.<<(<<-eoxml)
|
28
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
29
|
+
<html><head><body><p id="asdfasdf">
|
30
|
+
eoxml
|
31
|
+
|
32
|
+
assert_equal [["html", []], ["head", []], ["body", []], ["p", [["id", "asdfasdf"]]]],
|
33
|
+
@parser.document.start_elements
|
34
|
+
|
35
|
+
@parser.<<(<<-eoxml)
|
36
|
+
<!-- This is a comment -->
|
37
|
+
Paragraph 1
|
38
|
+
</p></body></html>
|
39
|
+
eoxml
|
40
|
+
assert_equal [' This is a comment '], @parser.document.comments
|
41
|
+
@parser.finish
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
def test_chevron_partial_html
|
46
|
+
@parser.<<(<<-eoxml)
|
47
|
+
<p id="asdfasdf">
|
48
|
+
eoxml
|
49
|
+
|
50
|
+
@parser.<<(<<-eoxml)
|
51
|
+
<!-- This is a comment -->
|
52
|
+
Paragraph 1
|
53
|
+
</p>
|
54
|
+
eoxml
|
55
|
+
assert_equal [' This is a comment '], @parser.document.comments
|
56
|
+
@parser.finish
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_chevron
|
60
|
+
@parser.<<(<<-eoxml)
|
61
|
+
<p id="asdfasdf">
|
62
|
+
<!-- This is a comment -->
|
63
|
+
Paragraph 1
|
64
|
+
</p>
|
65
|
+
eoxml
|
66
|
+
@parser.finish
|
67
|
+
assert_equal [' This is a comment '], @parser.document.comments
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_default_options
|
71
|
+
assert_equal 0, @parser.options
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_broken_encoding
|
75
|
+
skip("ultra hard to fix for pure Java version") if Nokogiri.jruby?
|
76
|
+
@parser.options |= XML::ParseOptions::RECOVER
|
77
|
+
# This is ISO_8859-1:
|
78
|
+
@parser.<< "<?xml version='1.0' encoding='UTF-8'?><r>Gau\337</r>"
|
79
|
+
@parser.finish
|
80
|
+
assert(@parser.document.errors.size >= 1)
|
81
|
+
assert_equal "Gau\337", @parser.document.data.join
|
82
|
+
assert_equal [["r"], ["body"], ["html"]], @parser.document.end_elements
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
data/test/html/test_builder.rb
CHANGED
@@ -1,8 +1,35 @@
|
|
1
|
-
require
|
1
|
+
require "helper"
|
2
2
|
|
3
3
|
module Nokogiri
|
4
4
|
module HTML
|
5
5
|
class TestBuilder < Nokogiri::TestCase
|
6
|
+
def test_top_level_function_builds
|
7
|
+
foo = nil
|
8
|
+
Nokogiri() { |xml| foo = xml }
|
9
|
+
assert_instance_of Nokogiri::HTML::Builder, foo
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_builder_with_explicit_tags
|
13
|
+
html_doc = Nokogiri::HTML::Builder.new {
|
14
|
+
div.slide(:class => 'another_class') {
|
15
|
+
node = Nokogiri::XML::Node.new("id", doc)
|
16
|
+
node.content = "hello"
|
17
|
+
insert(node)
|
18
|
+
}
|
19
|
+
}.doc
|
20
|
+
assert_equal 1, html_doc.css('div.slide > id').length
|
21
|
+
assert_equal 'hello', html_doc.at('div.slide > id').content
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_hash_as_attributes_for_attribute_method
|
25
|
+
html = Nokogiri::HTML::Builder.new { ||
|
26
|
+
div.slide(:class => 'another_class') {
|
27
|
+
span 'Slide 1'
|
28
|
+
}
|
29
|
+
}.to_html
|
30
|
+
assert_match 'class="slide another_class"', html
|
31
|
+
end
|
32
|
+
|
6
33
|
def test_hash_as_attributes
|
7
34
|
builder = Nokogiri::HTML::Builder.new do
|
8
35
|
div(:id => 'awesome') {
|
@@ -10,7 +37,32 @@ module Nokogiri
|
|
10
37
|
}
|
11
38
|
end
|
12
39
|
assert_equal('<div id="awesome"><h1>america</h1></div>',
|
13
|
-
builder.to_html.gsub(/\n/, ''))
|
40
|
+
builder.doc.root.to_html.gsub(/\n/, '').gsub(/>\s*</, '><'))
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_href_with_attributes
|
44
|
+
uri = 'http://tenderlovemaking.com/'
|
45
|
+
built = Nokogiri::XML::Builder.new {
|
46
|
+
div {
|
47
|
+
a('King Khan & The Shrines', :href => uri)
|
48
|
+
}
|
49
|
+
}
|
50
|
+
assert_equal 'http://tenderlovemaking.com/',
|
51
|
+
built.doc.at('a')[:href]
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_tag_nesting
|
55
|
+
builder = Nokogiri::HTML::Builder.new do
|
56
|
+
body {
|
57
|
+
span.left ''
|
58
|
+
span.middle {
|
59
|
+
div.icon ''
|
60
|
+
}
|
61
|
+
span.right ''
|
62
|
+
}
|
63
|
+
end
|
64
|
+
assert node = builder.doc.css('span.right').first
|
65
|
+
assert_equal 'middle', node.previous_sibling['class']
|
14
66
|
end
|
15
67
|
|
16
68
|
def test_has_ampersand
|
@@ -22,7 +74,7 @@ module Nokogiri
|
|
22
74
|
end
|
23
75
|
assert_equal(
|
24
76
|
'<div class="rad" id="thing"><awe&some><b>hello & world</b></div>',
|
25
|
-
builder.to_html.gsub(/\n/, ''))
|
77
|
+
builder.doc.root.to_html.gsub(/\n/, ''))
|
26
78
|
end
|
27
79
|
|
28
80
|
def test_multi_tags
|
@@ -34,7 +86,7 @@ module Nokogiri
|
|
34
86
|
end
|
35
87
|
assert_equal(
|
36
88
|
'<div class="rad" id="thing"><awesome><b>hello</b></div>',
|
37
|
-
builder.doc.to_html.gsub(/\n/, ''))
|
89
|
+
builder.doc.root.to_html.gsub(/\n/, ''))
|
38
90
|
end
|
39
91
|
|
40
92
|
def test_attributes_plus_block
|
@@ -44,7 +96,7 @@ module Nokogiri
|
|
44
96
|
}
|
45
97
|
end
|
46
98
|
assert_equal('<div class="rad" id="thing"><awesome></div>',
|
47
|
-
builder.doc.to_html.chomp)
|
99
|
+
builder.doc.root.to_html.chomp)
|
48
100
|
end
|
49
101
|
|
50
102
|
def test_builder_adds_attributes
|
@@ -52,14 +104,14 @@ module Nokogiri
|
|
52
104
|
div.rad.thing! "tender div"
|
53
105
|
end
|
54
106
|
assert_equal('<div class="rad" id="thing">tender div</div>',
|
55
|
-
builder.doc.to_html.chomp)
|
107
|
+
builder.doc.root.to_html.chomp)
|
56
108
|
end
|
57
109
|
|
58
110
|
def test_bold_tag
|
59
111
|
builder = Nokogiri::HTML::Builder.new do
|
60
112
|
b "bold tag"
|
61
113
|
end
|
62
|
-
assert_equal('<b>bold tag</b>', builder.doc.to_html.chomp)
|
114
|
+
assert_equal('<b>bold tag</b>', builder.doc.root.to_html.chomp)
|
63
115
|
end
|
64
116
|
|
65
117
|
def test_html_then_body_tag
|
@@ -71,7 +123,41 @@ module Nokogiri
|
|
71
123
|
}
|
72
124
|
end
|
73
125
|
assert_equal('<html><body><b>bold tag</b></body></html>',
|
74
|
-
builder.doc.to_html.chomp)
|
126
|
+
builder.doc.root.to_html.chomp.gsub(/>\s*</, '><'))
|
127
|
+
end
|
128
|
+
|
129
|
+
def test_instance_eval_with_delegation_to_block_context
|
130
|
+
class << self
|
131
|
+
def foo
|
132
|
+
"foo!"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
builder = Nokogiri::HTML::Builder.new { text foo }
|
137
|
+
assert builder.to_html.include?("foo!")
|
138
|
+
end
|
139
|
+
|
140
|
+
def test_builder_with_param
|
141
|
+
doc = Nokogiri::HTML::Builder.new { |html|
|
142
|
+
html.body {
|
143
|
+
html.p "hello world"
|
144
|
+
}
|
145
|
+
}.doc
|
146
|
+
|
147
|
+
assert node = doc.xpath('//body/p').first
|
148
|
+
assert_equal 'hello world', node.content
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_builder_with_id
|
152
|
+
text = "hello world"
|
153
|
+
doc = Nokogiri::HTML::Builder.new { |html|
|
154
|
+
html.body {
|
155
|
+
html.id_ text
|
156
|
+
}
|
157
|
+
}.doc
|
158
|
+
|
159
|
+
assert node = doc.xpath('//body/id').first
|
160
|
+
assert_equal text, node.content
|
75
161
|
end
|
76
162
|
end
|
77
163
|
end
|