feedtools 0.2.26 → 0.2.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,85 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InSelectPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-select
7
+
8
+ handle_start 'html', 'option', 'optgroup', 'select'
9
+
10
+ handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
11
+
12
+ def processCharacters(data)
13
+ @tree.insertText(data)
14
+ end
15
+
16
+ def startTagOption(name, attributes)
17
+ # We need to imply </option> if <option> is the current node.
18
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
19
+ @tree.insert_element(name, attributes)
20
+ end
21
+
22
+ def startTagOptgroup(name, attributes)
23
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
24
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
25
+ @tree.insert_element(name, attributes)
26
+ end
27
+
28
+ def startTagSelect(name, attributes)
29
+ parse_error("unexpected-select-in-select")
30
+ endTagSelect('select')
31
+ end
32
+
33
+ def startTagOther(name, attributes)
34
+ parse_error("unexpected-start-tag-in-select", {"name" => name})
35
+ end
36
+
37
+ def endTagOption(name)
38
+ if @tree.open_elements.last.name == 'option'
39
+ @tree.open_elements.pop
40
+ else
41
+ parse_error("unexpected-end-tag-in-select", {"name" => "option"})
42
+ end
43
+ end
44
+
45
+ def endTagOptgroup(name)
46
+ # </optgroup> implicitly closes <option>
47
+ if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
48
+ @tree.open_elements.pop
49
+ end
50
+ # It also closes </optgroup>
51
+ if @tree.open_elements.last.name == 'optgroup'
52
+ @tree.open_elements.pop
53
+ # But nothing else
54
+ else
55
+ parse_error("unexpected-end-tag-in-select",
56
+ {"name" => "optgroup"})
57
+ end
58
+ end
59
+
60
+ def endTagSelect(name)
61
+ if in_scope?('select', true)
62
+ remove_open_elements_until('select')
63
+
64
+ @parser.reset_insertion_mode
65
+ else
66
+ # inner_html case
67
+ parse_error
68
+ end
69
+ end
70
+
71
+ def endTagTableElements(name)
72
+ parse_error("unexpected-end-tag-in-select", {"name" => name})
73
+
74
+ if in_scope?(name, true)
75
+ endTagSelect('select')
76
+ @parser.phase.processEndTag(name)
77
+ end
78
+ end
79
+
80
+ def endTagOther(name)
81
+ parse_error("unexpected-end-tag-in-select", {"name" => name})
82
+ end
83
+
84
+ end
85
+ end
@@ -0,0 +1,86 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InTableBodyPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
7
+
8
+ handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
9
+
10
+ handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ignore'
11
+
12
+ def processCharacters(data)
13
+ @parser.phases[:inTable].processCharacters(data)
14
+ end
15
+
16
+ def startTagTr(name, attributes)
17
+ clearStackToTableBodyContext
18
+ @tree.insert_element(name, attributes)
19
+ @parser.phase = @parser.phases[:inRow]
20
+ end
21
+
22
+ def startTagTableCell(name, attributes)
23
+ parse_error("unexpected-cell-in-table-body", {"name" => name})
24
+ startTagTr('tr', {})
25
+ @parser.phase.processStartTag(name, attributes)
26
+ end
27
+
28
+ def startTagTableOther(name, attributes)
29
+ # XXX AT Any ideas on how to share this with endTagTable?
30
+ if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
31
+ clearStackToTableBodyContext
32
+ endTagTableRowGroup(@tree.open_elements.last.name)
33
+ @parser.phase.processStartTag(name, attributes)
34
+ else
35
+ # inner_html case
36
+ parse_error
37
+ end
38
+ end
39
+
40
+ def startTagOther(name, attributes)
41
+ @parser.phases[:inTable].processStartTag(name, attributes)
42
+ end
43
+
44
+ def endTagTableRowGroup(name)
45
+ if in_scope?(name, true)
46
+ clearStackToTableBodyContext
47
+ @tree.open_elements.pop
48
+ @parser.phase = @parser.phases[:inTable]
49
+ else
50
+ parse_error("unexpected-end-tag-in-table-body",
51
+ {"name" => name})
52
+ end
53
+ end
54
+
55
+ def endTagTable(name)
56
+ if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
57
+ clearStackToTableBodyContext
58
+ endTagTableRowGroup(@tree.open_elements.last.name)
59
+ @parser.phase.processEndTag(name)
60
+ else
61
+ # inner_html case
62
+ parse_error
63
+ end
64
+ end
65
+
66
+ def endTagIgnore(name)
67
+ parse_error("unexpected-end-tag-in-table-body",
68
+ {"name" => name})
69
+ end
70
+
71
+ def endTagOther(name)
72
+ @parser.phases[:inTable].processEndTag(name)
73
+ end
74
+
75
+ protected
76
+
77
+ def clearStackToTableBodyContext
78
+ until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
79
+ parse_error("unexpected-implied-end-tag-in-table",
80
+ {"name" => @tree.open_elements.last.name})
81
+ @tree.open_elements.pop
82
+ end
83
+ end
84
+
85
+ end
86
+ end
@@ -0,0 +1,115 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InTablePhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-table
7
+
8
+ handle_start 'html', 'caption', 'colgroup', 'col', 'table'
9
+
10
+ handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
11
+
12
+ handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
13
+
14
+ def processCharacters(data)
15
+ parse_error("unexpected-char-implies-table-voodoo")
16
+ # Make all the special element rearranging voodoo kick in
17
+ @tree.insert_from_table = true
18
+ # Process the character in the "in body" mode
19
+ @parser.phases[:inBody].processCharacters(data)
20
+ @tree.insert_from_table = false
21
+ end
22
+
23
+ def startTagCaption(name, attributes)
24
+ clearStackToTableContext
25
+ @tree.activeFormattingElements.push(Marker)
26
+ @tree.insert_element(name, attributes)
27
+ @parser.phase = @parser.phases[:inCaption]
28
+ end
29
+
30
+ def startTagColgroup(name, attributes)
31
+ clearStackToTableContext
32
+ @tree.insert_element(name, attributes)
33
+ @parser.phase = @parser.phases[:inColumnGroup]
34
+ end
35
+
36
+ def startTagCol(name, attributes)
37
+ startTagColgroup('colgroup', {})
38
+ @parser.phase.processStartTag(name, attributes)
39
+ end
40
+
41
+ def startTagRowGroup(name, attributes)
42
+ clearStackToTableContext
43
+ @tree.insert_element(name, attributes)
44
+ @parser.phase = @parser.phases[:inTableBody]
45
+ end
46
+
47
+ def startTagImplyTbody(name, attributes)
48
+ startTagRowGroup('tbody', {})
49
+ @parser.phase.processStartTag(name, attributes)
50
+ end
51
+
52
+ def startTagTable(name, attributes)
53
+ parse_error("unexpected-start-tag-implies-end-tag",
54
+ {"startName" => "table", "endName" => "table"})
55
+ @parser.phase.processEndTag('table')
56
+ @parser.phase.processStartTag(name, attributes) unless @parser.inner_html
57
+ end
58
+
59
+ def startTagOther(name, attributes)
60
+ parse_error("unexpected-start-tag-implies-table-voodoo",
61
+ {"name" => name})
62
+ # Make all the special element rearranging voodoo kick in
63
+ @tree.insert_from_table = true
64
+ # Process the start tag in the "in body" mode
65
+ @parser.phases[:inBody].processStartTag(name, attributes)
66
+ @tree.insert_from_table = false
67
+ end
68
+
69
+ def endTagTable(name)
70
+ if in_scope?('table', true)
71
+ @tree.generateImpliedEndTags
72
+
73
+ unless @tree.open_elements.last.name == 'table'
74
+ parse_error("end-tag-too-early-named",
75
+ {"gotName" => "table",
76
+ "expectedName" => @tree.open_elements.last.name})
77
+ end
78
+
79
+ remove_open_elements_until('table')
80
+
81
+ @parser.reset_insertion_mode
82
+ else
83
+ # inner_html case
84
+ assert @parser.inner_html
85
+ parse_error
86
+ end
87
+ end
88
+
89
+ def endTagIgnore(name)
90
+ parse_error("unexpected-end-tag", {"name" => name})
91
+ end
92
+
93
+ def endTagOther(name)
94
+ parse_error("unexpected-end-tag-implies-table-voodoo", {"name" => name})
95
+ # Make all the special element rearranging voodoo kick in
96
+ @tree.insert_from_table = true
97
+ # Process the end tag in the "in body" mode
98
+ @parser.phases[:inBody].processEndTag(name)
99
+ @tree.insert_from_table = false
100
+ end
101
+
102
+ protected
103
+
104
+ def clearStackToTableContext
105
+ # "clear the stack back to a table context"
106
+ until %w[table html].include?(name = @tree.open_elements.last.name)
107
+ parse_error("unexpected-implied-end-tag-in-table",
108
+ {"name" => @tree.open_elements.last.name})
109
+ @tree.open_elements.pop
110
+ end
111
+ # When the current node is <html> it's an inner_html case
112
+ end
113
+
114
+ end
115
+ end
@@ -0,0 +1,133 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InitialPhase < Phase
5
+
6
+ # This phase deals with error handling as well which is currently not
7
+ # covered in the specification. The error handling is typically known as
8
+ # "quirks mode". It is expected that a future version of HTML5 will define this.
9
+
10
+ def process_eof
11
+ parse_error("expected-doctype-but-got-eof")
12
+ @parser.phase = @parser.phases[:rootElement]
13
+ @parser.phase.process_eof
14
+ end
15
+
16
+ def processComment(data)
17
+ @tree.insert_comment(data, @tree.document)
18
+ end
19
+
20
+ def processDoctype(name, publicId, systemId, correct)
21
+ if name.downcase != 'html' or publicId or systemId
22
+ parse_error("unknown-doctype")
23
+ end
24
+ # XXX need to update DOCTYPE tokens
25
+ @tree.insertDoctype(name, publicId, systemId)
26
+
27
+ publicId = publicId.to_s.upcase
28
+
29
+ if name.downcase != 'html'
30
+ # XXX quirks mode
31
+ else
32
+ if ["+//silmaril//dtd html pro v0r11 19970101//en",
33
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
34
+ "-//as//dtd html 3.0 aswedit + extensions//en",
35
+ "-//ietf//dtd html 2.0 level 1//en",
36
+ "-//ietf//dtd html 2.0 level 2//en",
37
+ "-//ietf//dtd html 2.0 strict level 1//en",
38
+ "-//ietf//dtd html 2.0 strict level 2//en",
39
+ "-//ietf//dtd html 2.0 strict//en",
40
+ "-//ietf//dtd html 2.0//en",
41
+ "-//ietf//dtd html 2.1e//en",
42
+ "-//ietf//dtd html 3.0//en",
43
+ "-//ietf//dtd html 3.0//en//",
44
+ "-//ietf//dtd html 3.2 final//en",
45
+ "-//ietf//dtd html 3.2//en",
46
+ "-//ietf//dtd html 3//en",
47
+ "-//ietf//dtd html level 0//en",
48
+ "-//ietf//dtd html level 0//en//2.0",
49
+ "-//ietf//dtd html level 1//en",
50
+ "-//ietf//dtd html level 1//en//2.0",
51
+ "-//ietf//dtd html level 2//en",
52
+ "-//ietf//dtd html level 2//en//2.0",
53
+ "-//ietf//dtd html level 3//en",
54
+ "-//ietf//dtd html level 3//en//3.0",
55
+ "-//ietf//dtd html strict level 0//en",
56
+ "-//ietf//dtd html strict level 0//en//2.0",
57
+ "-//ietf//dtd html strict level 1//en",
58
+ "-//ietf//dtd html strict level 1//en//2.0",
59
+ "-//ietf//dtd html strict level 2//en",
60
+ "-//ietf//dtd html strict level 2//en//2.0",
61
+ "-//ietf//dtd html strict level 3//en",
62
+ "-//ietf//dtd html strict level 3//en//3.0",
63
+ "-//ietf//dtd html strict//en",
64
+ "-//ietf//dtd html strict//en//2.0",
65
+ "-//ietf//dtd html strict//en//3.0",
66
+ "-//ietf//dtd html//en",
67
+ "-//ietf//dtd html//en//2.0",
68
+ "-//ietf//dtd html//en//3.0",
69
+ "-//metrius//dtd metrius presentational//en",
70
+ "-//microsoft//dtd internet explorer 2.0 html strict//en",
71
+ "-//microsoft//dtd internet explorer 2.0 html//en",
72
+ "-//microsoft//dtd internet explorer 2.0 tables//en",
73
+ "-//microsoft//dtd internet explorer 3.0 html strict//en",
74
+ "-//microsoft//dtd internet explorer 3.0 html//en",
75
+ "-//microsoft//dtd internet explorer 3.0 tables//en",
76
+ "-//netscape comm. corp.//dtd html//en",
77
+ "-//netscape comm. corp.//dtd strict html//en",
78
+ "-//o'reilly and associates//dtd html 2.0//en",
79
+ "-//o'reilly and associates//dtd html extended 1.0//en",
80
+ "-//spyglass//dtd html 2.0 extended//en",
81
+ "-//sq//dtd html 2.0 hotmetal + extensions//en",
82
+ "-//sun microsystems corp.//dtd hotjava html//en",
83
+ "-//sun microsystems corp.//dtd hotjava strict html//en",
84
+ "-//w3c//dtd html 3 1995-03-24//en",
85
+ "-//w3c//dtd html 3.2 draft//en",
86
+ "-//w3c//dtd html 3.2 final//en",
87
+ "-//w3c//dtd html 3.2//en",
88
+ "-//w3c//dtd html 3.2s draft//en",
89
+ "-//w3c//dtd html 4.0 frameset//en",
90
+ "-//w3c//dtd html 4.0 transitional//en",
91
+ "-//w3c//dtd html experimental 19960712//en",
92
+ "-//w3c//dtd html experimental 970421//en",
93
+ "-//w3c//dtd w3 html//en",
94
+ "-//w3o//dtd w3 html 3.0//en",
95
+ "-//w3o//dtd w3 html 3.0//en//",
96
+ "-//w3o//dtd w3 html strict 3.0//en//",
97
+ "-//webtechs//dtd mozilla html 2.0//en",
98
+ "-//webtechs//dtd mozilla html//en",
99
+ "-/w3c/dtd html 4.0 transitional/en",
100
+ "html"].include?(publicId) or
101
+ (systemId == nil and
102
+ ["-//w3c//dtd html 4.01 frameset//EN",
103
+ "-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
104
+ (systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
105
+ #XXX quirks mode
106
+ end
107
+ end
108
+
109
+ @parser.phase = @parser.phases[:rootElement]
110
+ end
111
+
112
+ def processSpaceCharacters(data)
113
+ end
114
+
115
+ def processCharacters(data)
116
+ parse_error("expected-doctype-but-got-chars")
117
+ @parser.phase = @parser.phases[:rootElement]
118
+ @parser.phase.processCharacters(data)
119
+ end
120
+
121
+ def processStartTag(name, attributes)
122
+ parse_error("expected-doctype-but-got-start-tag", {"name" => name})
123
+ @parser.phase = @parser.phases[:rootElement]
124
+ @parser.phase.processStartTag(name, attributes)
125
+ end
126
+
127
+ def processEndTag(name)
128
+ parse_error("expected-doctype-but-got-end-tag", {"name" => name})
129
+ @parser.phase = @parser.phases[:rootElement]
130
+ @parser.phase.processEndTag(name)
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,154 @@
1
+ module HTML5
2
+ # Base class for helper objects that implement each phase of processing.
3
+ #
4
+ # Handler methods should be in the following order (they can be omitted):
5
+ #
6
+ # * EOF
7
+ # * Comment
8
+ # * Doctype
9
+ # * SpaceCharacters
10
+ # * Characters
11
+ # * StartTag
12
+ # - startTag* methods
13
+ # * EndTag
14
+ # - endTag* methods
15
+ #
16
+ class Phase
17
+
18
+ extend Forwardable
19
+ def_delegators :@parser, :parse_error
20
+
21
+ # The following example call:
22
+ #
23
+ # tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
24
+ #
25
+ # ...would return a hash equal to this:
26
+ #
27
+ # { 'html' => 'startTagHtml',
28
+ # 'base' => 'startTagBaseLinkMeta',
29
+ # 'link' => 'startTagBaseLinkMeta',
30
+ # 'meta' => 'startTagBaseLinkMeta',
31
+ # 'li' => 'startTagListItem',
32
+ # 'dt' => 'startTagListItem',
33
+ # 'dd' => 'startTagListItem' }
34
+ #
35
+ def self.tag_handlers(prefix, *tags)
36
+ mapping = {}
37
+ if tags.last.is_a?(Hash)
38
+ tags.pop.each do |names, handler_method_suffix|
39
+ handler_method = prefix + handler_method_suffix
40
+ Array(names).each {|name| mapping[name] = handler_method }
41
+ end
42
+ end
43
+ tags.each do |names|
44
+ names = Array(names)
45
+ handler_method = prefix + names.map {|name| name.capitalize }.join
46
+ names.each {|name| mapping[name] = handler_method }
47
+ end
48
+ mapping
49
+ end
50
+
51
+ def self.start_tag_handlers
52
+ @start_tag_handlers ||= Hash.new('startTagOther')
53
+ end
54
+
55
+ # Declare what start tags this Phase handles. Can be called more than once.
56
+ #
57
+ # Example usage:
58
+ #
59
+ # handle_start 'html'
60
+ # # html start tags will be handled by a method named 'startTagHtml'
61
+ #
62
+ # handle_start %( base link meta )
63
+ # # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
64
+ #
65
+ # handle_start %( li dt dd ) => 'ListItem'
66
+ # # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
67
+ #
68
+ def self.handle_start(*tags)
69
+ start_tag_handlers.update tag_handlers('startTag', *tags)
70
+ end
71
+
72
+ def self.end_tag_handlers
73
+ @end_tag_handlers ||= Hash.new('endTagOther')
74
+ end
75
+
76
+ # Declare what end tags this Phase handles. Behaves like handle_start.
77
+ #
78
+ def self.handle_end(*tags)
79
+ end_tag_handlers.update tag_handlers('endTag', *tags)
80
+ end
81
+
82
+ def initialize(parser, tree)
83
+ @parser, @tree = parser, tree
84
+ end
85
+
86
+ def process_eof
87
+ @tree.generateImpliedEndTags
88
+
89
+ if @tree.open_elements.length > 2
90
+ parse_error("expected-closing-tag-but-got-eof")
91
+ elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
92
+ # This happens for framesets or something?
93
+ parse_error("expected-closing-tag-but-got-eof")
94
+ elsif @parser.inner_html and @tree.open_elements.length > 1
95
+ # XXX This is not what the specification says. Not sure what to do here.
96
+ parse_error("eof-in-innerhtml")
97
+ end
98
+ # Betting ends.
99
+ end
100
+
101
+ def processComment(data)
102
+ # For most phases the following is correct. Where it's not it will be
103
+ # overridden.
104
+ @tree.insert_comment(data, @tree.open_elements.last)
105
+ end
106
+
107
+ def processDoctype(name, publicId, systemId, correct)
108
+ parse_error("unexpected-doctype")
109
+ end
110
+
111
+ def processSpaceCharacters(data)
112
+ @tree.insertText(data)
113
+ end
114
+
115
+ def processStartTag(name, attributes)
116
+ send self.class.start_tag_handlers[name], name, attributes
117
+ end
118
+
119
+ def startTagHtml(name, attributes)
120
+ if @parser.first_start_tag == false and name == 'html'
121
+ parse_error("non-html-root")
122
+ end
123
+ # XXX Need a check here to see if the first start tag token emitted is
124
+ # this token... If it's not, invoke parse_error.
125
+ attributes.each do |attr, value|
126
+ unless @tree.open_elements.first.attributes.has_key?(attr)
127
+ @tree.open_elements.first.attributes[attr] = value
128
+ end
129
+ end
130
+ @parser.first_start_tag = false
131
+ end
132
+
133
+ def processEndTag(name)
134
+ send self.class.end_tag_handlers[name], name
135
+ end
136
+
137
+ def assert(value)
138
+ throw AssertionError.new unless value
139
+ end
140
+
141
+ def in_scope?(*args)
142
+ @tree.elementInScope(*args)
143
+ end
144
+
145
+ def remove_open_elements_until(name=nil)
146
+ finished = false
147
+ until finished
148
+ element = @tree.open_elements.pop
149
+ finished = name.nil? ? yield(element) : element.name == name
150
+ end
151
+ return element
152
+ end
153
+ end
154
+ end