feedtools 0.2.26 → 0.2.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,69 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InCaptionPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
7
+
8
+ handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableElement'
9
+
10
+ handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
11
+
12
+ def ignoreEndTagCaption
13
+ !in_scope?('caption', true)
14
+ end
15
+
16
+ def processCharacters(data)
17
+ @parser.phases[:inBody].processCharacters(data)
18
+ end
19
+
20
+ def startTagTableElement(name, attributes)
21
+ parse_error "unexpected-end-tag", {"name" => name}
22
+ #XXX Have to duplicate logic here to find out if the tag is ignored
23
+ ignoreEndTag = ignoreEndTagCaption
24
+ @parser.phase.processEndTag('caption')
25
+ @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
26
+ end
27
+
28
+ def startTagOther(name, attributes)
29
+ @parser.phases[:inBody].processStartTag(name, attributes)
30
+ end
31
+
32
+ def endTagCaption(name)
33
+ if ignoreEndTagCaption
34
+ # inner_html case
35
+ assert @parser.inner_html
36
+ parse_error "unexpected-end-tag", {"name" => name}
37
+ else
38
+ # AT this code is quite similar to endTagTable in "InTable"
39
+ @tree.generateImpliedEndTags
40
+
41
+ unless @tree.open_elements[-1].name == 'caption'
42
+ parse_error("expected-one-end-tag-but-got-another",
43
+ {"gotName" => "caption",
44
+ "expectedName" => @tree.open_elements.last.name})
45
+ end
46
+
47
+ remove_open_elements_until('caption')
48
+
49
+ @tree.clearActiveFormattingElements
50
+ @parser.phase = @parser.phases[:inTable]
51
+ end
52
+ end
53
+
54
+ def endTagTable(name)
55
+ parse_error "unexpected-end-table-in-caption"
56
+ ignoreEndTag = ignoreEndTagCaption
57
+ @parser.phase.processEndTag('caption')
58
+ @parser.phase.processEndTag(name) unless ignoreEndTag
59
+ end
60
+
61
+ def endTagIgnore(name)
62
+ parse_error("unexpected-end-tag", {"name" => name})
63
+ end
64
+
65
+ def endTagOther(name)
66
+ @parser.phases[:inBody].processEndTag(name)
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,78 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InCellPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
7
+
8
+ handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableOther'
9
+
10
+ handle_end %w( td th ) => 'TableCell', %w( body caption col colgroup html ) => 'Ignore'
11
+
12
+ handle_end %w( table tbody tfoot thead tr ) => 'Imply'
13
+
14
+ def processCharacters(data)
15
+ @parser.phases[:inBody].processCharacters(data)
16
+ end
17
+
18
+ def startTagTableOther(name, attributes)
19
+ if in_scope?('td', true) or in_scope?('th', true)
20
+ closeCell
21
+ @parser.phase.processStartTag(name, attributes)
22
+ else
23
+ # inner_html case
24
+ parse_error
25
+ end
26
+ end
27
+
28
+ def startTagOther(name, attributes)
29
+ @parser.phases[:inBody].processStartTag(name, attributes)
30
+ end
31
+
32
+ def endTagTableCell(name)
33
+ if in_scope?(name, true)
34
+ @tree.generateImpliedEndTags(name)
35
+ if @tree.open_elements.last.name != name
36
+ parse_error("unexpected-cell-end-tag", {"name" => name})
37
+
38
+ remove_open_elements_until(name)
39
+ else
40
+ @tree.open_elements.pop
41
+ end
42
+ @tree.clearActiveFormattingElements
43
+ @parser.phase = @parser.phases[:inRow]
44
+ else
45
+ parse_error("unexpected-end-tag", {"name" => name})
46
+ end
47
+ end
48
+
49
+ def endTagIgnore(name)
50
+ parse_error("unexpected-end-tag", {"name" => name})
51
+ end
52
+
53
+ def endTagImply(name)
54
+ if in_scope?(name, true)
55
+ closeCell
56
+ @parser.phase.processEndTag(name)
57
+ else
58
+ # sometimes inner_html case
59
+ parse_error
60
+ end
61
+ end
62
+
63
+ def endTagOther(name)
64
+ @parser.phases[:inBody].processEndTag(name)
65
+ end
66
+
67
+ protected
68
+
69
+ def closeCell
70
+ if in_scope?('td', true)
71
+ endTagTableCell('td')
72
+ elsif in_scope?('th', true)
73
+ endTagTableCell('th')
74
+ end
75
+ end
76
+
77
+ end
78
+ end
@@ -0,0 +1,55 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InColumnGroupPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-column
7
+
8
+ handle_start 'html', 'col'
9
+
10
+ handle_end 'colgroup', 'col'
11
+
12
+ def ignoreEndTagColgroup
13
+ @tree.open_elements[-1].name == 'html'
14
+ end
15
+
16
+ def processCharacters(data)
17
+ ignoreEndTag = ignoreEndTagColgroup
18
+ endTagColgroup("colgroup")
19
+ @parser.phase.processCharacters(data) unless ignoreEndTag
20
+ end
21
+
22
+ def startTagCol(name, attributes)
23
+ @tree.insert_element(name, attributes)
24
+ @tree.open_elements.pop
25
+ end
26
+
27
+ def startTagOther(name, attributes)
28
+ ignoreEndTag = ignoreEndTagColgroup
29
+ endTagColgroup('colgroup')
30
+ @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
31
+ end
32
+
33
+ def endTagColgroup(name)
34
+ if ignoreEndTagColgroup
35
+ # inner_html case
36
+ assert @parser.inner_html
37
+ parse_error
38
+ else
39
+ @tree.open_elements.pop
40
+ @parser.phase = @parser.phases[:inTable]
41
+ end
42
+ end
43
+
44
+ def endTagCol(name)
45
+ parse_error("no-end-tag", {"name" => "col"})
46
+ end
47
+
48
+ def endTagOther(name)
49
+ ignoreEndTag = ignoreEndTagColgroup
50
+ endTagColgroup('colgroup')
51
+ @parser.phase.processEndTag(name) unless ignoreEndTag
52
+ end
53
+
54
+ end
55
+ end
@@ -0,0 +1,57 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InFramesetPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
7
+
8
+ handle_start 'html', 'frameset', 'frame', 'noframes'
9
+
10
+ handle_end 'frameset', 'noframes'
11
+
12
+ def processCharacters(data)
13
+ parse_error("unexpected-char-in-frameset")
14
+ end
15
+
16
+ def startTagFrameset(name, attributes)
17
+ @tree.insert_element(name, attributes)
18
+ end
19
+
20
+ def startTagFrame(name, attributes)
21
+ @tree.insert_element(name, attributes)
22
+ @tree.open_elements.pop
23
+ end
24
+
25
+ def startTagNoframes(name, attributes)
26
+ @parser.phases[:inBody].processStartTag(name, attributes)
27
+ end
28
+
29
+ def startTagOther(name, attributes)
30
+ parse_error("unexpected-start-tag-in-frameset",
31
+ {"name" => name})
32
+ end
33
+
34
+ def endTagFrameset(name)
35
+ if @tree.open_elements.last.name == 'html'
36
+ # inner_html case
37
+ parse_error("unexpected-frameset-in-frameset-innerhtml")
38
+ else
39
+ @tree.open_elements.pop
40
+ end
41
+ if (not @parser.inner_html and
42
+ @tree.open_elements.last.name != 'frameset')
43
+ # If we're not in inner_html mode and the the current node is not a
44
+ # "frameset" element (anymore) then switch.
45
+ @parser.phase = @parser.phases[:afterFrameset]
46
+ end
47
+ end
48
+
49
+ def endTagNoframes(name)
50
+ @parser.phases[:inBody].processEndTag(name)
51
+ end
52
+
53
+ def endTagOther(name)
54
+ parse_error("unexpected-end-tag-in-frameset", {"name" => name})
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,138 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InHeadPhase < Phase
5
+
6
+ handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
7
+ handle_start %w( base link meta )
8
+
9
+ handle_end 'head'
10
+ handle_end %w( html body br p ) => 'ImplyAfterHead'
11
+ handle_end %w( title style script noscript )
12
+
13
+ def process_eof
14
+ if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
15
+ parse_error("expected-named-closing-tag-but-got-eof", {"name" => @tree.open_elements.last.name})
16
+ @tree.open_elements.pop
17
+ end
18
+ anything_else
19
+ @parser.phase.process_eof
20
+ end
21
+
22
+ def processCharacters(data)
23
+ if %w[title style script noscript].include?(@tree.open_elements.last.name)
24
+ @tree.insertText(data)
25
+ else
26
+ anything_else
27
+ @parser.phase.processCharacters(data)
28
+ end
29
+ end
30
+
31
+ def startTagHead(name, attributes)
32
+ parse_error("two-heads-are-not-better-than-one")
33
+ end
34
+
35
+ def startTagTitle(name, attributes)
36
+ element = @tree.createElement(name, attributes)
37
+ appendToHead(element)
38
+ @tree.open_elements.push(element)
39
+ @parser.tokenizer.content_model_flag = :RCDATA
40
+ end
41
+
42
+ def startTagStyle(name, attributes)
43
+ element = @tree.createElement(name, attributes)
44
+ if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
45
+ appendToHead(element)
46
+ else
47
+ @tree.open_elements.last.appendChild(element)
48
+ end
49
+ @tree.open_elements.push(element)
50
+ @parser.tokenizer.content_model_flag = :CDATA
51
+ end
52
+
53
+ def startTagNoscript(name, attributes)
54
+ # XXX Need to decide whether to implement the scripting disabled case.
55
+ element = @tree.createElement(name, attributes)
56
+ if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
57
+ appendToHead(element)
58
+ else
59
+ @tree.open_elements.last.appendChild(element)
60
+ end
61
+ @tree.open_elements.push(element)
62
+ @parser.tokenizer.content_model_flag = :CDATA
63
+ end
64
+
65
+ def startTagScript(name, attributes)
66
+ #XXX Inner HTML case may be wrong
67
+ element = @tree.createElement(name, attributes)
68
+ element._flags.push("parser-inserted")
69
+ if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
70
+ appendToHead(element)
71
+ else
72
+ @tree.open_elements.last.appendChild(element)
73
+ end
74
+ @tree.open_elements.push(element)
75
+ @parser.tokenizer.content_model_flag = :CDATA
76
+ end
77
+
78
+ def startTagBaseLinkMeta(name, attributes)
79
+ element = @tree.createElement(name, attributes)
80
+ if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
81
+ appendToHead(element)
82
+ else
83
+ @tree.open_elements.last.appendChild(element)
84
+ end
85
+ end
86
+
87
+ def startTagOther(name, attributes)
88
+ anything_else
89
+ @parser.phase.processStartTag(name, attributes)
90
+ end
91
+
92
+ def endTagHead(name)
93
+ if @tree.open_elements.last.name == 'head'
94
+ @tree.open_elements.pop
95
+ else
96
+ parse_error("unexpected-end-tag", {"name" => "head"})
97
+ end
98
+ @parser.phase = @parser.phases[:afterHead]
99
+ end
100
+
101
+ def endTagImplyAfterHead(name)
102
+ anything_else
103
+ @parser.phase.processEndTag(name)
104
+ end
105
+
106
+ def endTagTitleStyleScriptNoscript(name)
107
+ if @tree.open_elements.last.name == name
108
+ @tree.open_elements.pop
109
+ else
110
+ parse_error("unexpected-end-tag", {"name" => name})
111
+ end
112
+ end
113
+
114
+ def endTagOther(name)
115
+ parse_error("unexpected-end-tag", {"name" => name})
116
+ end
117
+
118
+ def anything_else
119
+ if @tree.open_elements.last.name == 'head'
120
+ endTagHead('head')
121
+ else
122
+ @parser.phase = @parser.phases[:afterHead]
123
+ end
124
+ end
125
+
126
+ protected
127
+
128
+ def appendToHead(element)
129
+ if @tree.head_pointer.nil?
130
+ assert @parser.inner_html
131
+ @tree.open_elements.last.appendChild(element)
132
+ else
133
+ @tree.head_pointer.appendChild(element)
134
+ end
135
+ end
136
+
137
+ end
138
+ end
@@ -0,0 +1,89 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InRowPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-row
7
+
8
+ handle_start 'html', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead tr ) => 'TableOther'
9
+
10
+ handle_end 'tr', 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th ) => 'Ignore'
11
+
12
+ def processCharacters(data)
13
+ @parser.phases[:inTable].processCharacters(data)
14
+ end
15
+
16
+ def startTagTableCell(name, attributes)
17
+ clearStackToTableRowContext
18
+ @tree.insert_element(name, attributes)
19
+ @parser.phase = @parser.phases[:inCell]
20
+ @tree.activeFormattingElements.push(Marker)
21
+ end
22
+
23
+ def startTagTableOther(name, attributes)
24
+ ignoreEndTag = ignoreEndTagTr
25
+ endTagTr('tr')
26
+ # XXX how are we sure it's always ignored in the inner_html case?
27
+ @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
28
+ end
29
+
30
+ def startTagOther(name, attributes)
31
+ @parser.phases[:inTable].processStartTag(name, attributes)
32
+ end
33
+
34
+ def endTagTr(name)
35
+ if ignoreEndTagTr
36
+ # inner_html case
37
+ assert @parser.inner_html
38
+ parse_error
39
+ else
40
+ clearStackToTableRowContext
41
+ @tree.open_elements.pop
42
+ @parser.phase = @parser.phases[:inTableBody]
43
+ end
44
+ end
45
+
46
+ def endTagTable(name)
47
+ ignoreEndTag = ignoreEndTagTr
48
+ endTagTr('tr')
49
+ # Reprocess the current tag if the tr end tag was not ignored
50
+ # XXX how are we sure it's always ignored in the inner_html case?
51
+ @parser.phase.processEndTag(name) unless ignoreEndTag
52
+ end
53
+
54
+ def endTagTableRowGroup(name)
55
+ if in_scope?(name, true)
56
+ endTagTr('tr')
57
+ @parser.phase.processEndTag(name)
58
+ else
59
+ # inner_html case
60
+ parse_error
61
+ end
62
+ end
63
+
64
+ def endTagIgnore(name)
65
+ parse_error("unexpected-end-tag-in-table-row",
66
+ {"name" => name})
67
+ end
68
+
69
+ def endTagOther(name)
70
+ @parser.phases[:inTable].processEndTag(name)
71
+ end
72
+
73
+ protected
74
+
75
+ # XXX unify this with other table helper methods
76
+ def clearStackToTableRowContext
77
+ until %w[tr html].include?(name = @tree.open_elements.last.name)
78
+ parse_error("unexpected-implied-end-tag-in-table-row",
79
+ {"name" => @tree.open_elements.last.name})
80
+ @tree.open_elements.pop
81
+ end
82
+ end
83
+
84
+ def ignoreEndTagTr
85
+ not in_scope?('tr', :tableVariant => true)
86
+ end
87
+
88
+ end
89
+ end