feedtools 0.2.26 → 0.2.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
{"tests": [
|
|
2
|
+
|
|
3
|
+
{"description": "valid tabindex attribute value '-1'",
|
|
4
|
+
"input": "<span tabindex=-1>",
|
|
5
|
+
"fail-if": "invalid-integer-value"},
|
|
6
|
+
|
|
7
|
+
{"description": "valid tabindex attribute value '0'",
|
|
8
|
+
"input": "<span tabindex=0>",
|
|
9
|
+
"fail-if": "invalid-integer-value"},
|
|
10
|
+
|
|
11
|
+
{"description": "valid tabindex attribute value '1'",
|
|
12
|
+
"input": "<span tabindex=1>",
|
|
13
|
+
"fail-if": "invalid-integer-value"},
|
|
14
|
+
|
|
15
|
+
{"description": "valid tabindex attribute value '32768'",
|
|
16
|
+
"input": "<span tabindex=32768>",
|
|
17
|
+
"fail-if": "invalid-integer-value"},
|
|
18
|
+
|
|
19
|
+
{"description": "valid tabindex attribute value '-32768'",
|
|
20
|
+
"input": "<span tabindex=-32768>",
|
|
21
|
+
"fail-if": "invalid-integer-value"},
|
|
22
|
+
|
|
23
|
+
{"description": "valid tabindex attribute value with leading spaces",
|
|
24
|
+
"input": "<span tabindex=' -32768'>",
|
|
25
|
+
"fail-if": "invalid-integer-value"},
|
|
26
|
+
|
|
27
|
+
{"description": "valid tabindex attribute value with trailing spaces",
|
|
28
|
+
"input": "<span tabindex='-32768 '>",
|
|
29
|
+
"fail-if": "invalid-integer-value"},
|
|
30
|
+
|
|
31
|
+
{"description": "valid tabindex attribute value with trailing junk",
|
|
32
|
+
"input": "<span tabindex='32768a'>",
|
|
33
|
+
"fail-if": "invalid-integer-value"},
|
|
34
|
+
|
|
35
|
+
{"description": "valid tabindex attribute value with trailing junk and whitespace",
|
|
36
|
+
"input": "<span tabindex='32768a '>",
|
|
37
|
+
"fail-if": "invalid-integer-value"},
|
|
38
|
+
|
|
39
|
+
{"description": "valid tabindex attribute value with trailing whitespace and junk",
|
|
40
|
+
"input": "<span tabindex='32768 a'>",
|
|
41
|
+
"fail-if": "invalid-integer-value"},
|
|
42
|
+
|
|
43
|
+
{"description": "valid tabindex attribute value with leading spaces",
|
|
44
|
+
"input": "<span tabindex=' 32768'>",
|
|
45
|
+
"fail-if": "invalid-integer-value"},
|
|
46
|
+
|
|
47
|
+
{"description": "valid tabindex attribute value with leading spaces (with sign)",
|
|
48
|
+
"input": "<span tabindex=' -32768'>",
|
|
49
|
+
"fail-if": "invalid-integer-value"},
|
|
50
|
+
|
|
51
|
+
{"description": "invalid tabindex attribute value (blank)",
|
|
52
|
+
"input": "<span tabindex>",
|
|
53
|
+
"fail-unless": "attribute-value-can-not-be-blank"},
|
|
54
|
+
|
|
55
|
+
{"description": "invalid tabindex attribute value due to leading junk",
|
|
56
|
+
"input": "<span tabindex=a1>",
|
|
57
|
+
"fail-unless": "invalid-integer-value"},
|
|
58
|
+
|
|
59
|
+
{"description": "invalid tabindex attribute value due to two hyphens",
|
|
60
|
+
"input": "<span tabindex=--1>",
|
|
61
|
+
"fail-unless": "invalid-integer-value"},
|
|
62
|
+
|
|
63
|
+
{"description": "invalid tabindex attribute value due to non-numeric",
|
|
64
|
+
"input": "<span tabindex=foo>",
|
|
65
|
+
"fail-unless": "invalid-integer-value"},
|
|
66
|
+
|
|
67
|
+
{"description": "invalid tabindex attribute value due to positive sign",
|
|
68
|
+
"input": "<span tabindex=+1>",
|
|
69
|
+
"fail-unless": "invalid-integer-value"},
|
|
70
|
+
|
|
71
|
+
{"description": "invalid tabindex attribute value due to decimal point",
|
|
72
|
+
"input": "<span tabindex=.1>",
|
|
73
|
+
"fail-unless": "invalid-integer-value"},
|
|
74
|
+
|
|
75
|
+
{"description": "valid tabindex attribute value with trailing decimal point",
|
|
76
|
+
"input": "<span tabindex=1.0>",
|
|
77
|
+
"fail-if": "invalid-integer-value"}
|
|
78
|
+
|
|
79
|
+
]}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
require 'test/unit'
|
|
2
|
+
|
|
3
|
+
HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
|
|
4
|
+
|
|
5
|
+
if File.exists?(File.join(HTML5_BASE, 'testdata'))
|
|
6
|
+
TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
|
|
7
|
+
else
|
|
8
|
+
TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# $:.unshift File.join(File.dirname(File.dirname(__FILE__)), 'lib')
|
|
12
|
+
|
|
13
|
+
# $:.unshift File.dirname(__FILE__)
|
|
14
|
+
|
|
15
|
+
require 'core_ext/string'
|
|
16
|
+
|
|
17
|
+
def html5_test_files(subdirectory)
|
|
18
|
+
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
require 'rubygems'
|
|
22
|
+
require 'json'
|
|
23
|
+
|
|
24
|
+
module HTML5
|
|
25
|
+
module TestSupport
|
|
26
|
+
# convert the output of str(document) to the format used in the testcases
|
|
27
|
+
def convertTreeDump(treedump)
|
|
28
|
+
treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def sortattrs(output)
|
|
32
|
+
output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
|
|
33
|
+
match.split("\n").sort.join("\n")
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class TestData
|
|
38
|
+
include Enumerable
|
|
39
|
+
|
|
40
|
+
def initialize(filename, sections)
|
|
41
|
+
@f = open(filename)
|
|
42
|
+
@sections = sections
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def each
|
|
46
|
+
data = {}
|
|
47
|
+
key = nil
|
|
48
|
+
@f.each_line do |line|
|
|
49
|
+
if line[0] == ?# and @sections.include?(line[1..-2])
|
|
50
|
+
heading = line[1..-2]
|
|
51
|
+
if data.any? and heading == @sections[0]
|
|
52
|
+
data[key].chomp! #Remove trailing newline
|
|
53
|
+
yield normaliseOutput(data)
|
|
54
|
+
data = {}
|
|
55
|
+
end
|
|
56
|
+
key = heading
|
|
57
|
+
data[key]=""
|
|
58
|
+
elsif key
|
|
59
|
+
data[key] += line
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
yield normaliseOutput(data) if data
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def normaliseOutput(data)
|
|
66
|
+
#Remove trailing newlines
|
|
67
|
+
data.keys.each { |key| data[key].chomp! }
|
|
68
|
+
@sections.map {|heading| data[heading]}
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
|
2
|
+
|
|
3
|
+
require 'html5/inputstream'
|
|
4
|
+
|
|
5
|
+
class Html5EncodingTestCase < Test::Unit::TestCase
|
|
6
|
+
include HTML5
|
|
7
|
+
include TestSupport
|
|
8
|
+
|
|
9
|
+
begin
|
|
10
|
+
require 'rubygems'
|
|
11
|
+
require 'UniversalDetector'
|
|
12
|
+
|
|
13
|
+
def test_chardet #TODO: can we get rid of this?
|
|
14
|
+
file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
|
|
15
|
+
stream = HTML5::HTMLInputStream.new(file, :chardet => true)
|
|
16
|
+
assert_equal 'big5', stream.char_encoding.downcase
|
|
17
|
+
rescue LoadError
|
|
18
|
+
puts "chardet not found, skipping chardet tests"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
html5_test_files('encoding').each do |test_file|
|
|
23
|
+
test_name = File.basename(test_file).sub('.dat', '').tr('-', '')
|
|
24
|
+
|
|
25
|
+
TestData.new(test_file, %w(data encoding)).
|
|
26
|
+
each_with_index do |(input, encoding), index|
|
|
27
|
+
|
|
28
|
+
define_method 'test_%s_%d' % [ test_name, index + 1 ] do
|
|
29
|
+
stream = HTML5::HTMLInputStream.new(input, :chardet => false)
|
|
30
|
+
assert_equal encoding.downcase, stream.char_encoding.downcase, input
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
|
2
|
+
|
|
3
|
+
require 'html5/liberalxmlparser'
|
|
4
|
+
|
|
5
|
+
XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
|
|
6
|
+
|
|
7
|
+
def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
|
|
8
|
+
sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
|
|
9
|
+
document = parser.parse(input.chomp, :lowercase_attr_name => false, :lowercase_element_name => false).root
|
|
10
|
+
if not expected
|
|
11
|
+
expected = input.chomp.gsub(XMLELEM,&sortattrs)
|
|
12
|
+
expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
|
|
13
|
+
output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,&sortattrs)
|
|
14
|
+
assert_equal(expected, output)
|
|
15
|
+
else
|
|
16
|
+
assert_equal(expected, document.to_s.gsub(/'/,'"'))
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def assert_xhtml_equal(input, expected=nil, parser=HTML5::XHTMLParser)
|
|
21
|
+
assert_xml_equal(input, expected, parser)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
class BasicXhtml5Test < Test::Unit::TestCase
|
|
25
|
+
|
|
26
|
+
def test_title_body_mismatched_close
|
|
27
|
+
assert_xhtml_equal(
|
|
28
|
+
'<title>Xhtml</title><b><i>content</b></i>',
|
|
29
|
+
'<html xmlns="http://www.w3.org/1999/xhtml">' +
|
|
30
|
+
'<head><title>Xhtml</title></head>' +
|
|
31
|
+
'<body><b><i>content</i></b></body>' +
|
|
32
|
+
'</html>')
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def test_title_body_named_charref
|
|
36
|
+
assert_xhtml_equal(
|
|
37
|
+
'<title>ntilde</title>A ñ B',
|
|
38
|
+
'<html xmlns="http://www.w3.org/1999/xhtml">' +
|
|
39
|
+
'<head><title>ntilde</title></head>' +
|
|
40
|
+
'<body>A '+ [0xF1].pack('U') + ' B</body>' +
|
|
41
|
+
'</html>')
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
class BasicXmlTest < Test::Unit::TestCase
|
|
46
|
+
|
|
47
|
+
def test_comment
|
|
48
|
+
assert_xml_equal("<x><!-- foo --></x>")
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def test_cdata
|
|
52
|
+
assert_xml_equal("<x><![CDATA[foo]]></x>","<x>foo</x>")
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_simple_text
|
|
56
|
+
assert_xml_equal("<p>foo</p>","<p>foo</p>")
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def test_optional_close
|
|
60
|
+
assert_xml_equal("<p>foo","<p>foo</p>")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def test_html_mismatched
|
|
64
|
+
assert_xml_equal("<b><i>foo</b></i>","<b><i>foo</i></b>")
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
class OpmlTest < Test::Unit::TestCase
|
|
69
|
+
|
|
70
|
+
def test_mixedCaseElement
|
|
71
|
+
assert_xml_equal(
|
|
72
|
+
'<opml version="1.0">' +
|
|
73
|
+
'<head><ownerName>Dave Winer</ownerName></head>' +
|
|
74
|
+
'</opml>')
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def test_mixedCaseAttribute
|
|
78
|
+
assert_xml_equal(
|
|
79
|
+
'<opml version="1.0">' +
|
|
80
|
+
'<body><outline isComment="true"/></body>' +
|
|
81
|
+
'</opml>')
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def test_malformed
|
|
85
|
+
assert_xml_equal(
|
|
86
|
+
'<opml version="1.0">' +
|
|
87
|
+
'<body><outline text="Odds & Ends"/></body>' +
|
|
88
|
+
'</opml>',
|
|
89
|
+
'<opml version="1.0">' +
|
|
90
|
+
'<body><outline text="Odds & Ends"/></body>' +
|
|
91
|
+
'</opml>')
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
class XhtmlTest < Test::Unit::TestCase
|
|
96
|
+
|
|
97
|
+
def test_mathml
|
|
98
|
+
assert_xhtml_equal <<EOX
|
|
99
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
100
|
+
<head><title>MathML</title></head>
|
|
101
|
+
<body>
|
|
102
|
+
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
|
103
|
+
<mrow>
|
|
104
|
+
<mi>x</mi>
|
|
105
|
+
<mo>=</mo>
|
|
106
|
+
|
|
107
|
+
<mfrac>
|
|
108
|
+
<mrow>
|
|
109
|
+
<mrow>
|
|
110
|
+
<mo>-</mo>
|
|
111
|
+
<mi>b</mi>
|
|
112
|
+
</mrow>
|
|
113
|
+
<mo>±</mo>
|
|
114
|
+
<msqrt>
|
|
115
|
+
|
|
116
|
+
<mrow>
|
|
117
|
+
<msup>
|
|
118
|
+
<mi>b</mi>
|
|
119
|
+
<mn>2</mn>
|
|
120
|
+
</msup>
|
|
121
|
+
<mo>-</mo>
|
|
122
|
+
<mrow>
|
|
123
|
+
|
|
124
|
+
<mn>4</mn>
|
|
125
|
+
<mo>⁢</mo>
|
|
126
|
+
<mi>a</mi>
|
|
127
|
+
<mo>⁢</mo>
|
|
128
|
+
<mi>c</mi>
|
|
129
|
+
</mrow>
|
|
130
|
+
</mrow>
|
|
131
|
+
|
|
132
|
+
</msqrt>
|
|
133
|
+
</mrow>
|
|
134
|
+
<mrow>
|
|
135
|
+
<mn>2</mn>
|
|
136
|
+
<mo>⁢</mo>
|
|
137
|
+
<mi>a</mi>
|
|
138
|
+
</mrow>
|
|
139
|
+
</mfrac>
|
|
140
|
+
|
|
141
|
+
</mrow>
|
|
142
|
+
</math>
|
|
143
|
+
</body></html>
|
|
144
|
+
EOX
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def test_svg
|
|
148
|
+
assert_xhtml_equal <<EOX
|
|
149
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
150
|
+
<head><title>SVG</title></head>
|
|
151
|
+
<body>
|
|
152
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
|
153
|
+
<path d="M38,38c0-12,24-15,23-2c0,9-16,13-16,23v7h11v-4c0-9,17-12,17-27
|
|
154
|
+
c-2-22-45-22-45,3zM45,70h11v11h-11z" fill="#371">
|
|
155
|
+
</path>
|
|
156
|
+
<circle cx="50" cy="50" r="45" fill="none" stroke="#371" stroke-width="10">
|
|
157
|
+
</circle>
|
|
158
|
+
|
|
159
|
+
</svg>
|
|
160
|
+
</body></html>
|
|
161
|
+
EOX
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def test_xlink
|
|
165
|
+
assert_xhtml_equal <<EOX
|
|
166
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
167
|
+
<head><title>XLINK</title></head>
|
|
168
|
+
<body>
|
|
169
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
|
170
|
+
<defs xmlns:l="http://www.w3.org/1999/xlink">
|
|
171
|
+
<radialGradient id="s1" fx=".4" fy=".2" r=".7">
|
|
172
|
+
<stop stop-color="#FE8"/>
|
|
173
|
+
<stop stop-color="#D70" offset="1"/>
|
|
174
|
+
</radialGradient>
|
|
175
|
+
<radialGradient id="s2" fx=".8" fy=".5" l:href="#s1"/>
|
|
176
|
+
<radialGradient id="s3" fx=".5" fy=".9" l:href="#s1"/>
|
|
177
|
+
<radialGradient id="s4" fx=".1" fy=".5" l:href="#s1"/>
|
|
178
|
+
</defs>
|
|
179
|
+
<g stroke="#940">
|
|
180
|
+
<path d="M73,29c-37-40-62-24-52,4l6-7c-8-16,7-26,42,9z" fill="url(#s1)"/>
|
|
181
|
+
<path d="M47,8c33-16,48,21,9,47l-6-5c38-27,20-44,5-37z" fill="url(#s2)"/>
|
|
182
|
+
<path d="M77,32c22,30,10,57-39,51l-1-8c3,3,67,5,36-36z" fill="url(#s3)"/>
|
|
183
|
+
|
|
184
|
+
<path d="M58,84c-4,20-38-4-8-24l-6-5c-36,43,15,56,23,27z" fill="url(#s4)"/>
|
|
185
|
+
<path d="M40,14c-40,37-37,52-9,68l1-8c-16-13-29-21,16-56z" fill="url(#s1)"/>
|
|
186
|
+
<path d="M31,33c19,23,20,7,35,41l-9,1.7c-4-19-8-14-31-37z" fill="url(#s2)"/>
|
|
187
|
+
</g>
|
|
188
|
+
</svg>
|
|
189
|
+
</body></html>
|
|
190
|
+
EOX
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def test_br
|
|
194
|
+
assert_xhtml_equal <<EOX1
|
|
195
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
196
|
+
<head><title>BR</title></head>
|
|
197
|
+
<body>
|
|
198
|
+
<br/>
|
|
199
|
+
</body></html>
|
|
200
|
+
EOX1
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def test_strong
|
|
204
|
+
assert_xhtml_equal <<EOX
|
|
205
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
206
|
+
<head><title>STRONG</title></head>
|
|
207
|
+
<body>
|
|
208
|
+
<strong></strong>
|
|
209
|
+
</body></html>
|
|
210
|
+
EOX
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def test_script
|
|
214
|
+
assert_xhtml_equal <<EOX
|
|
215
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
216
|
+
<head><title>SCRIPT</title></head>
|
|
217
|
+
<body>
|
|
218
|
+
<script>1 < 2 & 3</script>
|
|
219
|
+
</body></html>
|
|
220
|
+
EOX
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def test_script_src
|
|
224
|
+
assert_xhtml_equal <<EOX1, <<EOX2.strip
|
|
225
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
226
|
+
<head><title>SCRIPT</title><script src="http://example.com"/></head>
|
|
227
|
+
<body>
|
|
228
|
+
<script>1 < 2 & 3</script>
|
|
229
|
+
</body></html>
|
|
230
|
+
EOX1
|
|
231
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
232
|
+
<head><title>SCRIPT</title><script src="http://example.com"></script></head>
|
|
233
|
+
<body>
|
|
234
|
+
<script>1 < 2 & 3</script>
|
|
235
|
+
</body></html>
|
|
236
|
+
EOX2
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def test_title
|
|
240
|
+
assert_xhtml_equal <<EOX
|
|
241
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
242
|
+
<head><title>1 < 2 & 3</title></head>
|
|
243
|
+
<body>
|
|
244
|
+
</body></html>
|
|
245
|
+
EOX
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def test_prolog
|
|
249
|
+
assert_xhtml_equal <<EOX1, <<EOX2.strip
|
|
250
|
+
<?xml version="1.0" encoding="UTF-8" ?>
|
|
251
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
252
|
+
<head><title>PROLOG</title></head>
|
|
253
|
+
<body>
|
|
254
|
+
</body></html>
|
|
255
|
+
EOX1
|
|
256
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
257
|
+
<head><title>PROLOG</title></head>
|
|
258
|
+
<body>
|
|
259
|
+
</body></html>
|
|
260
|
+
EOX2
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def test_tagsoup
|
|
264
|
+
assert_xhtml_equal <<EOX1, <<EOX2.strip
|
|
265
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
266
|
+
<head><title>TAGSOUP</title></head>
|
|
267
|
+
<body>
|
|
268
|
+
<u><blockquote><p></u>
|
|
269
|
+
</body></html>
|
|
270
|
+
EOX1
|
|
271
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
272
|
+
<head><title>TAGSOUP</title></head>
|
|
273
|
+
<body>
|
|
274
|
+
<u/><blockquote><u/><p><u/>
|
|
275
|
+
</p></blockquote></body></html>
|
|
276
|
+
EOX2
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
end
|