feedtools 0.2.26 → 0.2.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,85 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InSelectPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-select
7
+
8
+ handle_start 'html', 'option', 'optgroup', 'select'
9
+
10
+ handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
11
+
12
+ def processCharacters(data)
13
+ @tree.insertText(data)
14
+ end
15
+
16
+ def startTagOption(name, attributes)
17
+ # We need to imply </option> if <option> is the current node.
18
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
19
+ @tree.insert_element(name, attributes)
20
+ end
21
+
22
+ def startTagOptgroup(name, attributes)
23
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
24
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
25
+ @tree.insert_element(name, attributes)
26
+ end
27
+
28
+ def startTagSelect(name, attributes)
29
+ parse_error("unexpected-select-in-select")
30
+ endTagSelect('select')
31
+ end
32
+
33
+ def startTagOther(name, attributes)
34
+ parse_error("unexpected-start-tag-in-select", {"name" => name})
35
+ end
36
+
37
+ def endTagOption(name)
38
+ if @tree.open_elements.last.name == 'option'
39
+ @tree.open_elements.pop
40
+ else
41
+ parse_error("unexpected-end-tag-in-select", {"name" => "option"})
42
+ end
43
+ end
44
+
45
+ def endTagOptgroup(name)
46
+ # </optgroup> implicitly closes <option>
47
+ if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
48
+ @tree.open_elements.pop
49
+ end
50
+ # It also closes </optgroup>
51
+ if @tree.open_elements.last.name == 'optgroup'
52
+ @tree.open_elements.pop
53
+ # But nothing else
54
+ else
55
+ parse_error("unexpected-end-tag-in-select",
56
+ {"name" => "optgroup"})
57
+ end
58
+ end
59
+
60
+ def endTagSelect(name)
61
+ if in_scope?('select', true)
62
+ remove_open_elements_until('select')
63
+
64
+ @parser.reset_insertion_mode
65
+ else
66
+ # inner_html case
67
+ parse_error
68
+ end
69
+ end
70
+
71
+ def endTagTableElements(name)
72
+ parse_error("unexpected-end-tag-in-select", {"name" => name})
73
+
74
+ if in_scope?(name, true)
75
+ endTagSelect('select')
76
+ @parser.phase.processEndTag(name)
77
+ end
78
+ end
79
+
80
+ def endTagOther(name)
81
+ parse_error("unexpected-end-tag-in-select", {"name" => name})
82
+ end
83
+
84
+ end
85
+ end
@@ -0,0 +1,86 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InTableBodyPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
7
+
8
+ handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
9
+
10
+ handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ignore'
11
+
12
+ def processCharacters(data)
13
+ @parser.phases[:inTable].processCharacters(data)
14
+ end
15
+
16
+ def startTagTr(name, attributes)
17
+ clearStackToTableBodyContext
18
+ @tree.insert_element(name, attributes)
19
+ @parser.phase = @parser.phases[:inRow]
20
+ end
21
+
22
+ def startTagTableCell(name, attributes)
23
+ parse_error("unexpected-cell-in-table-body", {"name" => name})
24
+ startTagTr('tr', {})
25
+ @parser.phase.processStartTag(name, attributes)
26
+ end
27
+
28
+ def startTagTableOther(name, attributes)
29
+ # XXX AT Any ideas on how to share this with endTagTable?
30
+ if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
31
+ clearStackToTableBodyContext
32
+ endTagTableRowGroup(@tree.open_elements.last.name)
33
+ @parser.phase.processStartTag(name, attributes)
34
+ else
35
+ # inner_html case
36
+ parse_error
37
+ end
38
+ end
39
+
40
+ def startTagOther(name, attributes)
41
+ @parser.phases[:inTable].processStartTag(name, attributes)
42
+ end
43
+
44
+ def endTagTableRowGroup(name)
45
+ if in_scope?(name, true)
46
+ clearStackToTableBodyContext
47
+ @tree.open_elements.pop
48
+ @parser.phase = @parser.phases[:inTable]
49
+ else
50
+ parse_error("unexpected-end-tag-in-table-body",
51
+ {"name" => name})
52
+ end
53
+ end
54
+
55
+ def endTagTable(name)
56
+ if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
57
+ clearStackToTableBodyContext
58
+ endTagTableRowGroup(@tree.open_elements.last.name)
59
+ @parser.phase.processEndTag(name)
60
+ else
61
+ # inner_html case
62
+ parse_error
63
+ end
64
+ end
65
+
66
+ def endTagIgnore(name)
67
+ parse_error("unexpected-end-tag-in-table-body",
68
+ {"name" => name})
69
+ end
70
+
71
+ def endTagOther(name)
72
+ @parser.phases[:inTable].processEndTag(name)
73
+ end
74
+
75
+ protected
76
+
77
+ def clearStackToTableBodyContext
78
+ until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
79
+ parse_error("unexpected-implied-end-tag-in-table",
80
+ {"name" => @tree.open_elements.last.name})
81
+ @tree.open_elements.pop
82
+ end
83
+ end
84
+
85
+ end
86
+ end
@@ -0,0 +1,115 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InTablePhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-table
7
+
8
+ handle_start 'html', 'caption', 'colgroup', 'col', 'table'
9
+
10
+ handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
11
+
12
+ handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
13
+
14
+ def processCharacters(data)
15
+ parse_error("unexpected-char-implies-table-voodoo")
16
+ # Make all the special element rearranging voodoo kick in
17
+ @tree.insert_from_table = true
18
+ # Process the character in the "in body" mode
19
+ @parser.phases[:inBody].processCharacters(data)
20
+ @tree.insert_from_table = false
21
+ end
22
+
23
+ def startTagCaption(name, attributes)
24
+ clearStackToTableContext
25
+ @tree.activeFormattingElements.push(Marker)
26
+ @tree.insert_element(name, attributes)
27
+ @parser.phase = @parser.phases[:inCaption]
28
+ end
29
+
30
+ def startTagColgroup(name, attributes)
31
+ clearStackToTableContext
32
+ @tree.insert_element(name, attributes)
33
+ @parser.phase = @parser.phases[:inColumnGroup]
34
+ end
35
+
36
+ def startTagCol(name, attributes)
37
+ startTagColgroup('colgroup', {})
38
+ @parser.phase.processStartTag(name, attributes)
39
+ end
40
+
41
+ def startTagRowGroup(name, attributes)
42
+ clearStackToTableContext
43
+ @tree.insert_element(name, attributes)
44
+ @parser.phase = @parser.phases[:inTableBody]
45
+ end
46
+
47
+ def startTagImplyTbody(name, attributes)
48
+ startTagRowGroup('tbody', {})
49
+ @parser.phase.processStartTag(name, attributes)
50
+ end
51
+
52
+ def startTagTable(name, attributes)
53
+ parse_error("unexpected-start-tag-implies-end-tag",
54
+ {"startName" => "table", "endName" => "table"})
55
+ @parser.phase.processEndTag('table')
56
+ @parser.phase.processStartTag(name, attributes) unless @parser.inner_html
57
+ end
58
+
59
+ def startTagOther(name, attributes)
60
+ parse_error("unexpected-start-tag-implies-table-voodoo",
61
+ {"name" => name})
62
+ # Make all the special element rearranging voodoo kick in
63
+ @tree.insert_from_table = true
64
+ # Process the start tag in the "in body" mode
65
+ @parser.phases[:inBody].processStartTag(name, attributes)
66
+ @tree.insert_from_table = false
67
+ end
68
+
69
+ def endTagTable(name)
70
+ if in_scope?('table', true)
71
+ @tree.generateImpliedEndTags
72
+
73
+ unless @tree.open_elements.last.name == 'table'
74
+ parse_error("end-tag-too-early-named",
75
+ {"gotName" => "table",
76
+ "expectedName" => @tree.open_elements.last.name})
77
+ end
78
+
79
+ remove_open_elements_until('table')
80
+
81
+ @parser.reset_insertion_mode
82
+ else
83
+ # inner_html case
84
+ assert @parser.inner_html
85
+ parse_error
86
+ end
87
+ end
88
+
89
+ def endTagIgnore(name)
90
+ parse_error("unexpected-end-tag", {"name" => name})
91
+ end
92
+
93
+ def endTagOther(name)
94
+ parse_error("unexpected-end-tag-implies-table-voodoo", {"name" => name})
95
+ # Make all the special element rearranging voodoo kick in
96
+ @tree.insert_from_table = true
97
+ # Process the end tag in the "in body" mode
98
+ @parser.phases[:inBody].processEndTag(name)
99
+ @tree.insert_from_table = false
100
+ end
101
+
102
+ protected
103
+
104
+ def clearStackToTableContext
105
+ # "clear the stack back to a table context"
106
+ until %w[table html].include?(name = @tree.open_elements.last.name)
107
+ parse_error("unexpected-implied-end-tag-in-table",
108
+ {"name" => @tree.open_elements.last.name})
109
+ @tree.open_elements.pop
110
+ end
111
+ # When the current node is <html> it's an inner_html case
112
+ end
113
+
114
+ end
115
+ end
@@ -0,0 +1,133 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InitialPhase < Phase
5
+
6
+ # This phase deals with error handling as well which is currently not
7
+ # covered in the specification. The error handling is typically known as
8
+ # "quirks mode". It is expected that a future version of HTML5 will define this.
9
+
10
+ def process_eof
11
+ parse_error("expected-doctype-but-got-eof")
12
+ @parser.phase = @parser.phases[:rootElement]
13
+ @parser.phase.process_eof
14
+ end
15
+
16
+ def processComment(data)
17
+ @tree.insert_comment(data, @tree.document)
18
+ end
19
+
20
+ def processDoctype(name, publicId, systemId, correct)
21
+ if name.downcase != 'html' or publicId or systemId
22
+ parse_error("unknown-doctype")
23
+ end
24
+ # XXX need to update DOCTYPE tokens
25
+ @tree.insertDoctype(name, publicId, systemId)
26
+
27
+ publicId = publicId.to_s.upcase
28
+
29
+ if name.downcase != 'html'
30
+ # XXX quirks mode
31
+ else
32
+ if ["+//silmaril//dtd html pro v0r11 19970101//en",
33
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
34
+ "-//as//dtd html 3.0 aswedit + extensions//en",
35
+ "-//ietf//dtd html 2.0 level 1//en",
36
+ "-//ietf//dtd html 2.0 level 2//en",
37
+ "-//ietf//dtd html 2.0 strict level 1//en",
38
+ "-//ietf//dtd html 2.0 strict level 2//en",
39
+ "-//ietf//dtd html 2.0 strict//en",
40
+ "-//ietf//dtd html 2.0//en",
41
+ "-//ietf//dtd html 2.1e//en",
42
+ "-//ietf//dtd html 3.0//en",
43
+ "-//ietf//dtd html 3.0//en//",
44
+ "-//ietf//dtd html 3.2 final//en",
45
+ "-//ietf//dtd html 3.2//en",
46
+ "-//ietf//dtd html 3//en",
47
+ "-//ietf//dtd html level 0//en",
48
+ "-//ietf//dtd html level 0//en//2.0",
49
+ "-//ietf//dtd html level 1//en",
50
+ "-//ietf//dtd html level 1//en//2.0",
51
+ "-//ietf//dtd html level 2//en",
52
+ "-//ietf//dtd html level 2//en//2.0",
53
+ "-//ietf//dtd html level 3//en",
54
+ "-//ietf//dtd html level 3//en//3.0",
55
+ "-//ietf//dtd html strict level 0//en",
56
+ "-//ietf//dtd html strict level 0//en//2.0",
57
+ "-//ietf//dtd html strict level 1//en",
58
+ "-//ietf//dtd html strict level 1//en//2.0",
59
+ "-//ietf//dtd html strict level 2//en",
60
+ "-//ietf//dtd html strict level 2//en//2.0",
61
+ "-//ietf//dtd html strict level 3//en",
62
+ "-//ietf//dtd html strict level 3//en//3.0",
63
+ "-//ietf//dtd html strict//en",
64
+ "-//ietf//dtd html strict//en//2.0",
65
+ "-//ietf//dtd html strict//en//3.0",
66
+ "-//ietf//dtd html//en",
67
+ "-//ietf//dtd html//en//2.0",
68
+ "-//ietf//dtd html//en//3.0",
69
+ "-//metrius//dtd metrius presentational//en",
70
+ "-//microsoft//dtd internet explorer 2.0 html strict//en",
71
+ "-//microsoft//dtd internet explorer 2.0 html//en",
72
+ "-//microsoft//dtd internet explorer 2.0 tables//en",
73
+ "-//microsoft//dtd internet explorer 3.0 html strict//en",
74
+ "-//microsoft//dtd internet explorer 3.0 html//en",
75
+ "-//microsoft//dtd internet explorer 3.0 tables//en",
76
+ "-//netscape comm. corp.//dtd html//en",
77
+ "-//netscape comm. corp.//dtd strict html//en",
78
+ "-//o'reilly and associates//dtd html 2.0//en",
79
+ "-//o'reilly and associates//dtd html extended 1.0//en",
80
+ "-//spyglass//dtd html 2.0 extended//en",
81
+ "-//sq//dtd html 2.0 hotmetal + extensions//en",
82
+ "-//sun microsystems corp.//dtd hotjava html//en",
83
+ "-//sun microsystems corp.//dtd hotjava strict html//en",
84
+ "-//w3c//dtd html 3 1995-03-24//en",
85
+ "-//w3c//dtd html 3.2 draft//en",
86
+ "-//w3c//dtd html 3.2 final//en",
87
+ "-//w3c//dtd html 3.2//en",
88
+ "-//w3c//dtd html 3.2s draft//en",
89
+ "-//w3c//dtd html 4.0 frameset//en",
90
+ "-//w3c//dtd html 4.0 transitional//en",
91
+ "-//w3c//dtd html experimental 19960712//en",
92
+ "-//w3c//dtd html experimental 970421//en",
93
+ "-//w3c//dtd w3 html//en",
94
+ "-//w3o//dtd w3 html 3.0//en",
95
+ "-//w3o//dtd w3 html 3.0//en//",
96
+ "-//w3o//dtd w3 html strict 3.0//en//",
97
+ "-//webtechs//dtd mozilla html 2.0//en",
98
+ "-//webtechs//dtd mozilla html//en",
99
+ "-/w3c/dtd html 4.0 transitional/en",
100
+ "html"].include?(publicId) or
101
+ (systemId == nil and
102
+ ["-//w3c//dtd html 4.01 frameset//EN",
103
+ "-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
104
+ (systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
105
+ #XXX quirks mode
106
+ end
107
+ end
108
+
109
+ @parser.phase = @parser.phases[:rootElement]
110
+ end
111
+
112
+ def processSpaceCharacters(data)
113
+ end
114
+
115
+ def processCharacters(data)
116
+ parse_error("expected-doctype-but-got-chars")
117
+ @parser.phase = @parser.phases[:rootElement]
118
+ @parser.phase.processCharacters(data)
119
+ end
120
+
121
+ def processStartTag(name, attributes)
122
+ parse_error("expected-doctype-but-got-start-tag", {"name" => name})
123
+ @parser.phase = @parser.phases[:rootElement]
124
+ @parser.phase.processStartTag(name, attributes)
125
+ end
126
+
127
+ def processEndTag(name)
128
+ parse_error("expected-doctype-but-got-end-tag", {"name" => name})
129
+ @parser.phase = @parser.phases[:rootElement]
130
+ @parser.phase.processEndTag(name)
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,154 @@
1
+ module HTML5
2
+ # Base class for helper objects that implement each phase of processing.
3
+ #
4
+ # Handler methods should be in the following order (they can be omitted):
5
+ #
6
+ # * EOF
7
+ # * Comment
8
+ # * Doctype
9
+ # * SpaceCharacters
10
+ # * Characters
11
+ # * StartTag
12
+ # - startTag* methods
13
+ # * EndTag
14
+ # - endTag* methods
15
+ #
16
+ class Phase
17
+
18
+ extend Forwardable
19
+ def_delegators :@parser, :parse_error
20
+
21
+ # The following example call:
22
+ #
23
+ # tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
24
+ #
25
+ # ...would return a hash equal to this:
26
+ #
27
+ # { 'html' => 'startTagHtml',
28
+ # 'base' => 'startTagBaseLinkMeta',
29
+ # 'link' => 'startTagBaseLinkMeta',
30
+ # 'meta' => 'startTagBaseLinkMeta',
31
+ # 'li' => 'startTagListItem',
32
+ # 'dt' => 'startTagListItem',
33
+ # 'dd' => 'startTagListItem' }
34
+ #
35
+ def self.tag_handlers(prefix, *tags)
36
+ mapping = {}
37
+ if tags.last.is_a?(Hash)
38
+ tags.pop.each do |names, handler_method_suffix|
39
+ handler_method = prefix + handler_method_suffix
40
+ Array(names).each {|name| mapping[name] = handler_method }
41
+ end
42
+ end
43
+ tags.each do |names|
44
+ names = Array(names)
45
+ handler_method = prefix + names.map {|name| name.capitalize }.join
46
+ names.each {|name| mapping[name] = handler_method }
47
+ end
48
+ mapping
49
+ end
50
+
51
+ def self.start_tag_handlers
52
+ @start_tag_handlers ||= Hash.new('startTagOther')
53
+ end
54
+
55
+ # Declare what start tags this Phase handles. Can be called more than once.
56
+ #
57
+ # Example usage:
58
+ #
59
+ # handle_start 'html'
60
+ # # html start tags will be handled by a method named 'startTagHtml'
61
+ #
62
+ # handle_start %( base link meta )
63
+ # # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
64
+ #
65
+ # handle_start %( li dt dd ) => 'ListItem'
66
+ # # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
67
+ #
68
+ def self.handle_start(*tags)
69
+ start_tag_handlers.update tag_handlers('startTag', *tags)
70
+ end
71
+
72
+ def self.end_tag_handlers
73
+ @end_tag_handlers ||= Hash.new('endTagOther')
74
+ end
75
+
76
+ # Declare what end tags this Phase handles. Behaves like handle_start.
77
+ #
78
+ def self.handle_end(*tags)
79
+ end_tag_handlers.update tag_handlers('endTag', *tags)
80
+ end
81
+
82
+ def initialize(parser, tree)
83
+ @parser, @tree = parser, tree
84
+ end
85
+
86
+ def process_eof
87
+ @tree.generateImpliedEndTags
88
+
89
+ if @tree.open_elements.length > 2
90
+ parse_error("expected-closing-tag-but-got-eof")
91
+ elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
92
+ # This happens for framesets or something?
93
+ parse_error("expected-closing-tag-but-got-eof")
94
+ elsif @parser.inner_html and @tree.open_elements.length > 1
95
+ # XXX This is not what the specification says. Not sure what to do here.
96
+ parse_error("eof-in-innerhtml")
97
+ end
98
+ # Betting ends.
99
+ end
100
+
101
+ def processComment(data)
102
+ # For most phases the following is correct. Where it's not it will be
103
+ # overridden.
104
+ @tree.insert_comment(data, @tree.open_elements.last)
105
+ end
106
+
107
+ def processDoctype(name, publicId, systemId, correct)
108
+ parse_error("unexpected-doctype")
109
+ end
110
+
111
+ def processSpaceCharacters(data)
112
+ @tree.insertText(data)
113
+ end
114
+
115
+ def processStartTag(name, attributes)
116
+ send self.class.start_tag_handlers[name], name, attributes
117
+ end
118
+
119
+ def startTagHtml(name, attributes)
120
+ if @parser.first_start_tag == false and name == 'html'
121
+ parse_error("non-html-root")
122
+ end
123
+ # XXX Need a check here to see if the first start tag token emitted is
124
+ # this token... If it's not, invoke parse_error.
125
+ attributes.each do |attr, value|
126
+ unless @tree.open_elements.first.attributes.has_key?(attr)
127
+ @tree.open_elements.first.attributes[attr] = value
128
+ end
129
+ end
130
+ @parser.first_start_tag = false
131
+ end
132
+
133
+ def processEndTag(name)
134
+ send self.class.end_tag_handlers[name], name
135
+ end
136
+
137
+ def assert(value)
138
+ throw AssertionError.new unless value
139
+ end
140
+
141
+ def in_scope?(*args)
142
+ @tree.elementInScope(*args)
143
+ end
144
+
145
+ def remove_open_elements_until(name=nil)
146
+ finished = false
147
+ until finished
148
+ element = @tree.open_elements.pop
149
+ finished = name.nil? ? yield(element) : element.name == name
150
+ end
151
+ return element
152
+ end
153
+ end
154
+ end