feedtools 0.2.26 → 0.2.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,7 @@
1
+ {"tests": [
2
+
3
+ {"description": "invalid style scoped attribute value 'inherit'",
4
+ "input": "<style scoped=inherit>",
5
+ "fail-unless": "invalid-boolean-value"}
6
+
7
+ ]}
@@ -0,0 +1,79 @@
1
+ {"tests": [
2
+
3
+ {"description": "valid tabindex attribute value '-1'",
4
+ "input": "<span tabindex=-1>",
5
+ "fail-if": "invalid-integer-value"},
6
+
7
+ {"description": "valid tabindex attribute value '0'",
8
+ "input": "<span tabindex=0>",
9
+ "fail-if": "invalid-integer-value"},
10
+
11
+ {"description": "valid tabindex attribute value '1'",
12
+ "input": "<span tabindex=1>",
13
+ "fail-if": "invalid-integer-value"},
14
+
15
+ {"description": "valid tabindex attribute value '32768'",
16
+ "input": "<span tabindex=32768>",
17
+ "fail-if": "invalid-integer-value"},
18
+
19
+ {"description": "valid tabindex attribute value '-32768'",
20
+ "input": "<span tabindex=-32768>",
21
+ "fail-if": "invalid-integer-value"},
22
+
23
+ {"description": "valid tabindex attribute value with leading spaces",
24
+ "input": "<span tabindex=' -32768'>",
25
+ "fail-if": "invalid-integer-value"},
26
+
27
+ {"description": "valid tabindex attribute value with trailing spaces",
28
+ "input": "<span tabindex='-32768 '>",
29
+ "fail-if": "invalid-integer-value"},
30
+
31
+ {"description": "valid tabindex attribute value with trailing junk",
32
+ "input": "<span tabindex='32768a'>",
33
+ "fail-if": "invalid-integer-value"},
34
+
35
+ {"description": "valid tabindex attribute value with trailing junk and whitespace",
36
+ "input": "<span tabindex='32768a '>",
37
+ "fail-if": "invalid-integer-value"},
38
+
39
+ {"description": "valid tabindex attribute value with trailing whitespace and junk",
40
+ "input": "<span tabindex='32768 a'>",
41
+ "fail-if": "invalid-integer-value"},
42
+
43
+ {"description": "valid tabindex attribute value with leading spaces",
44
+ "input": "<span tabindex=' 32768'>",
45
+ "fail-if": "invalid-integer-value"},
46
+
47
+ {"description": "valid tabindex attribute value with leading spaces (with sign)",
48
+ "input": "<span tabindex=' -32768'>",
49
+ "fail-if": "invalid-integer-value"},
50
+
51
+ {"description": "invalid tabindex attribute value (blank)",
52
+ "input": "<span tabindex>",
53
+ "fail-unless": "attribute-value-can-not-be-blank"},
54
+
55
+ {"description": "invalid tabindex attribute value due to leading junk",
56
+ "input": "<span tabindex=a1>",
57
+ "fail-unless": "invalid-integer-value"},
58
+
59
+ {"description": "invalid tabindex attribute value due to two hyphens",
60
+ "input": "<span tabindex=--1>",
61
+ "fail-unless": "invalid-integer-value"},
62
+
63
+ {"description": "invalid tabindex attribute value due to non-numeric",
64
+ "input": "<span tabindex=foo>",
65
+ "fail-unless": "invalid-integer-value"},
66
+
67
+ {"description": "invalid tabindex attribute value due to positive sign",
68
+ "input": "<span tabindex=+1>",
69
+ "fail-unless": "invalid-integer-value"},
70
+
71
+ {"description": "invalid tabindex attribute value due to decimal point",
72
+ "input": "<span tabindex=.1>",
73
+ "fail-unless": "invalid-integer-value"},
74
+
75
+ {"description": "valid tabindex attribute value with trailing decimal point",
76
+ "input": "<span tabindex=1.0>",
77
+ "fail-if": "invalid-integer-value"}
78
+
79
+ ]}
@@ -0,0 +1,72 @@
1
+ require 'test/unit'
2
+
3
+ HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
4
+
5
+ if File.exists?(File.join(HTML5_BASE, 'testdata'))
6
+ TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
7
+ else
8
+ TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
9
+ end
10
+
11
+ # $:.unshift File.join(File.dirname(File.dirname(__FILE__)), 'lib')
12
+
13
+ # $:.unshift File.dirname(__FILE__)
14
+
15
+ require 'core_ext/string'
16
+
17
+ def html5_test_files(subdirectory)
18
+ Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
19
+ end
20
+
21
+ require 'rubygems'
22
+ require 'json'
23
+
24
+ module HTML5
25
+ module TestSupport
26
+ # convert the output of str(document) to the format used in the testcases
27
+ def convertTreeDump(treedump)
28
+ treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
29
+ end
30
+
31
+ def sortattrs(output)
32
+ output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
33
+ match.split("\n").sort.join("\n")
34
+ end
35
+ end
36
+
37
+ class TestData
38
+ include Enumerable
39
+
40
+ def initialize(filename, sections)
41
+ @f = open(filename)
42
+ @sections = sections
43
+ end
44
+
45
+ def each
46
+ data = {}
47
+ key = nil
48
+ @f.each_line do |line|
49
+ if line[0] == ?# and @sections.include?(line[1..-2])
50
+ heading = line[1..-2]
51
+ if data.any? and heading == @sections[0]
52
+ data[key].chomp! #Remove trailing newline
53
+ yield normaliseOutput(data)
54
+ data = {}
55
+ end
56
+ key = heading
57
+ data[key]=""
58
+ elsif key
59
+ data[key] += line
60
+ end
61
+ end
62
+ yield normaliseOutput(data) if data
63
+ end
64
+
65
+ def normaliseOutput(data)
66
+ #Remove trailing newlines
67
+ data.keys.each { |key| data[key].chomp! }
68
+ @sections.map {|heading| data[heading]}
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,35 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ require 'html5/inputstream'
4
+
5
+ class Html5EncodingTestCase < Test::Unit::TestCase
6
+ include HTML5
7
+ include TestSupport
8
+
9
+ begin
10
+ require 'rubygems'
11
+ require 'UniversalDetector'
12
+
13
+ def test_chardet #TODO: can we get rid of this?
14
+ file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
15
+ stream = HTML5::HTMLInputStream.new(file, :chardet => true)
16
+ assert_equal 'big5', stream.char_encoding.downcase
17
+ rescue LoadError
18
+ puts "chardet not found, skipping chardet tests"
19
+ end
20
+ end
21
+
22
+ html5_test_files('encoding').each do |test_file|
23
+ test_name = File.basename(test_file).sub('.dat', '').tr('-', '')
24
+
25
+ TestData.new(test_file, %w(data encoding)).
26
+ each_with_index do |(input, encoding), index|
27
+
28
+ define_method 'test_%s_%d' % [ test_name, index + 1 ] do
29
+ stream = HTML5::HTMLInputStream.new(input, :chardet => false)
30
+ assert_equal encoding.downcase, stream.char_encoding.downcase, input
31
+ end
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,279 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ require 'html5/liberalxmlparser'
4
+
5
+ XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
6
+
7
+ def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
8
+ sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
9
+ document = parser.parse(input.chomp, :lowercase_attr_name => false, :lowercase_element_name => false).root
10
+ if not expected
11
+ expected = input.chomp.gsub(XMLELEM,&sortattrs)
12
+ expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
13
+ output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,&sortattrs)
14
+ assert_equal(expected, output)
15
+ else
16
+ assert_equal(expected, document.to_s.gsub(/'/,'"'))
17
+ end
18
+ end
19
+
20
+ def assert_xhtml_equal(input, expected=nil, parser=HTML5::XHTMLParser)
21
+ assert_xml_equal(input, expected, parser)
22
+ end
23
+
24
+ class BasicXhtml5Test < Test::Unit::TestCase
25
+
26
+ def test_title_body_mismatched_close
27
+ assert_xhtml_equal(
28
+ '<title>Xhtml</title><b><i>content</b></i>',
29
+ '<html xmlns="http://www.w3.org/1999/xhtml">' +
30
+ '<head><title>Xhtml</title></head>' +
31
+ '<body><b><i>content</i></b></body>' +
32
+ '</html>')
33
+ end
34
+
35
+ def test_title_body_named_charref
36
+ assert_xhtml_equal(
37
+ '<title>ntilde</title>A &ntilde B',
38
+ '<html xmlns="http://www.w3.org/1999/xhtml">' +
39
+ '<head><title>ntilde</title></head>' +
40
+ '<body>A '+ [0xF1].pack('U') + ' B</body>' +
41
+ '</html>')
42
+ end
43
+ end
44
+
45
+ class BasicXmlTest < Test::Unit::TestCase
46
+
47
+ def test_comment
48
+ assert_xml_equal("<x><!-- foo --></x>")
49
+ end
50
+
51
+ def test_cdata
52
+ assert_xml_equal("<x><![CDATA[foo]]></x>","<x>foo</x>")
53
+ end
54
+
55
+ def test_simple_text
56
+ assert_xml_equal("<p>foo</p>","<p>foo</p>")
57
+ end
58
+
59
+ def test_optional_close
60
+ assert_xml_equal("<p>foo","<p>foo</p>")
61
+ end
62
+
63
+ def test_html_mismatched
64
+ assert_xml_equal("<b><i>foo</b></i>","<b><i>foo</i></b>")
65
+ end
66
+ end
67
+
68
+ class OpmlTest < Test::Unit::TestCase
69
+
70
+ def test_mixedCaseElement
71
+ assert_xml_equal(
72
+ '<opml version="1.0">' +
73
+ '<head><ownerName>Dave Winer</ownerName></head>' +
74
+ '</opml>')
75
+ end
76
+
77
+ def test_mixedCaseAttribute
78
+ assert_xml_equal(
79
+ '<opml version="1.0">' +
80
+ '<body><outline isComment="true"/></body>' +
81
+ '</opml>')
82
+ end
83
+
84
+ def test_malformed
85
+ assert_xml_equal(
86
+ '<opml version="1.0">' +
87
+ '<body><outline text="Odds & Ends"/></body>' +
88
+ '</opml>',
89
+ '<opml version="1.0">' +
90
+ '<body><outline text="Odds &amp; Ends"/></body>' +
91
+ '</opml>')
92
+ end
93
+ end
94
+
95
+ class XhtmlTest < Test::Unit::TestCase
96
+
97
+ def test_mathml
98
+ assert_xhtml_equal <<EOX
99
+ <html xmlns="http://www.w3.org/1999/xhtml">
100
+ <head><title>MathML</title></head>
101
+ <body>
102
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
103
+ <mrow>
104
+ <mi>x</mi>
105
+ <mo>=</mo>
106
+
107
+ <mfrac>
108
+ <mrow>
109
+ <mrow>
110
+ <mo>-</mo>
111
+ <mi>b</mi>
112
+ </mrow>
113
+ <mo>&#177;</mo>
114
+ <msqrt>
115
+
116
+ <mrow>
117
+ <msup>
118
+ <mi>b</mi>
119
+ <mn>2</mn>
120
+ </msup>
121
+ <mo>-</mo>
122
+ <mrow>
123
+
124
+ <mn>4</mn>
125
+ <mo>&#8290;</mo>
126
+ <mi>a</mi>
127
+ <mo>&#8290;</mo>
128
+ <mi>c</mi>
129
+ </mrow>
130
+ </mrow>
131
+
132
+ </msqrt>
133
+ </mrow>
134
+ <mrow>
135
+ <mn>2</mn>
136
+ <mo>&#8290;</mo>
137
+ <mi>a</mi>
138
+ </mrow>
139
+ </mfrac>
140
+
141
+ </mrow>
142
+ </math>
143
+ </body></html>
144
+ EOX
145
+ end
146
+
147
+ def test_svg
148
+ assert_xhtml_equal <<EOX
149
+ <html xmlns="http://www.w3.org/1999/xhtml">
150
+ <head><title>SVG</title></head>
151
+ <body>
152
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
153
+ <path d="M38,38c0-12,24-15,23-2c0,9-16,13-16,23v7h11v-4c0-9,17-12,17-27
154
+ c-2-22-45-22-45,3zM45,70h11v11h-11z" fill="#371">
155
+ </path>
156
+ <circle cx="50" cy="50" r="45" fill="none" stroke="#371" stroke-width="10">
157
+ </circle>
158
+
159
+ </svg>
160
+ </body></html>
161
+ EOX
162
+ end
163
+
164
+ def test_xlink
165
+ assert_xhtml_equal <<EOX
166
+ <html xmlns="http://www.w3.org/1999/xhtml">
167
+ <head><title>XLINK</title></head>
168
+ <body>
169
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
170
+ <defs xmlns:l="http://www.w3.org/1999/xlink">
171
+ <radialGradient id="s1" fx=".4" fy=".2" r=".7">
172
+ <stop stop-color="#FE8"/>
173
+ <stop stop-color="#D70" offset="1"/>
174
+ </radialGradient>
175
+ <radialGradient id="s2" fx=".8" fy=".5" l:href="#s1"/>
176
+ <radialGradient id="s3" fx=".5" fy=".9" l:href="#s1"/>
177
+ <radialGradient id="s4" fx=".1" fy=".5" l:href="#s1"/>
178
+ </defs>
179
+ <g stroke="#940">
180
+ <path d="M73,29c-37-40-62-24-52,4l6-7c-8-16,7-26,42,9z" fill="url(#s1)"/>
181
+ <path d="M47,8c33-16,48,21,9,47l-6-5c38-27,20-44,5-37z" fill="url(#s2)"/>
182
+ <path d="M77,32c22,30,10,57-39,51l-1-8c3,3,67,5,36-36z" fill="url(#s3)"/>
183
+
184
+ <path d="M58,84c-4,20-38-4-8-24l-6-5c-36,43,15,56,23,27z" fill="url(#s4)"/>
185
+ <path d="M40,14c-40,37-37,52-9,68l1-8c-16-13-29-21,16-56z" fill="url(#s1)"/>
186
+ <path d="M31,33c19,23,20,7,35,41l-9,1.7c-4-19-8-14-31-37z" fill="url(#s2)"/>
187
+ </g>
188
+ </svg>
189
+ </body></html>
190
+ EOX
191
+ end
192
+
193
+ def test_br
194
+ assert_xhtml_equal <<EOX1
195
+ <html xmlns="http://www.w3.org/1999/xhtml">
196
+ <head><title>BR</title></head>
197
+ <body>
198
+ <br/>
199
+ </body></html>
200
+ EOX1
201
+ end
202
+
203
+ def test_strong
204
+ assert_xhtml_equal <<EOX
205
+ <html xmlns="http://www.w3.org/1999/xhtml">
206
+ <head><title>STRONG</title></head>
207
+ <body>
208
+ <strong></strong>
209
+ </body></html>
210
+ EOX
211
+ end
212
+
213
+ def test_script
214
+ assert_xhtml_equal <<EOX
215
+ <html xmlns="http://www.w3.org/1999/xhtml">
216
+ <head><title>SCRIPT</title></head>
217
+ <body>
218
+ <script>1 &lt; 2 &amp; 3</script>
219
+ </body></html>
220
+ EOX
221
+ end
222
+
223
+ def test_script_src
224
+ assert_xhtml_equal <<EOX1, <<EOX2.strip
225
+ <html xmlns="http://www.w3.org/1999/xhtml">
226
+ <head><title>SCRIPT</title><script src="http://example.com"/></head>
227
+ <body>
228
+ <script>1 &lt; 2 &amp; 3</script>
229
+ </body></html>
230
+ EOX1
231
+ <html xmlns="http://www.w3.org/1999/xhtml">
232
+ <head><title>SCRIPT</title><script src="http://example.com"></script></head>
233
+ <body>
234
+ <script>1 &lt; 2 &amp; 3</script>
235
+ </body></html>
236
+ EOX2
237
+ end
238
+
239
+ def test_title
240
+ assert_xhtml_equal <<EOX
241
+ <html xmlns="http://www.w3.org/1999/xhtml">
242
+ <head><title>1 &lt; 2 &amp; 3</title></head>
243
+ <body>
244
+ </body></html>
245
+ EOX
246
+ end
247
+
248
+ def test_prolog
249
+ assert_xhtml_equal <<EOX1, <<EOX2.strip
250
+ <?xml version="1.0" encoding="UTF-8" ?>
251
+ <html xmlns="http://www.w3.org/1999/xhtml">
252
+ <head><title>PROLOG</title></head>
253
+ <body>
254
+ </body></html>
255
+ EOX1
256
+ <html xmlns="http://www.w3.org/1999/xhtml">
257
+ <head><title>PROLOG</title></head>
258
+ <body>
259
+ </body></html>
260
+ EOX2
261
+ end
262
+
263
+ def test_tagsoup
264
+ assert_xhtml_equal <<EOX1, <<EOX2.strip
265
+ <html xmlns="http://www.w3.org/1999/xhtml">
266
+ <head><title>TAGSOUP</title></head>
267
+ <body>
268
+ <u><blockquote><p></u>
269
+ </body></html>
270
+ EOX1
271
+ <html xmlns="http://www.w3.org/1999/xhtml">
272
+ <head><title>TAGSOUP</title></head>
273
+ <body>
274
+ <u/><blockquote><u/><p><u/>
275
+ </p></blockquote></body></html>
276
+ EOX2
277
+ end
278
+
279
+ end