feedtools 0.2.26 → 0.2.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,613 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InBodyPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-body
7
+
8
+ handle_start 'html'
9
+ handle_start %w(base link meta script style) => 'ProcessInHead'
10
+ handle_start 'title'
11
+
12
+ handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
13
+
14
+ handle_start 'input', 'textarea', 'select', 'isindex', %w(marquee object)
15
+
16
+ handle_start %w(li dd dt) => 'ListItem'
17
+
18
+ handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP'
19
+
20
+ handle_start %w(b big em font i s small strike strong tt u) => 'Formatting'
21
+ handle_start 'nobr'
22
+
23
+ handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting'
24
+
25
+ handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading'
26
+
27
+ handle_start %w(caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr) => 'Misplaced'
28
+
29
+ handle_start %w(event-source section nav article aside header footer datagrid command) => 'New'
30
+
31
+ handle_end 'p', 'body', 'html', 'form', %w(button marquee object), %w(dd dt li) => 'ListItem'
32
+
33
+ handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block'
34
+
35
+ handle_end HEADING_ELEMENTS => 'Heading'
36
+
37
+ handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting'
38
+
39
+ handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced'
40
+
41
+ handle_end 'br'
42
+
43
+ handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None'
44
+
45
+ handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
46
+
47
+ handle_end %w(event-source section nav article aside header footer datagrid command) => 'New'
48
+
49
+ def initialize(parser, tree)
50
+ super(parser, tree)
51
+
52
+ # for special handling of whitespace in <pre>
53
+ @processSpaceCharactersDropNewline = false
54
+ if $-w
55
+ $-w = false
56
+ alias processSpaceCharactersNonPre processSpaceCharacters
57
+ $-w = true
58
+ else
59
+ alias processSpaceCharactersNonPre processSpaceCharacters
60
+ end
61
+ end
62
+
63
+ def processSpaceCharactersDropNewline(data)
64
+ # #Sometimes (start of <pre> blocks) we want to drop leading newlines
65
+
66
+ if $-w
67
+ $-w = false
68
+ alias processSpaceCharacters processSpaceCharactersNonPre
69
+ $-w = true
70
+ else
71
+ alias processSpaceCharacters processSpaceCharactersNonPre
72
+ end
73
+
74
+ if (data.length > 0 and data[0] == ?\n &&
75
+ %w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
76
+ data = data[1..-1]
77
+ end
78
+
79
+ if data.length > 0
80
+ @tree.reconstructActiveFormattingElements
81
+ @tree.insertText(data)
82
+ end
83
+ end
84
+
85
+ def processSpaceCharacters(data)
86
+ @tree.reconstructActiveFormattingElements()
87
+ @tree.insertText(data)
88
+ end
89
+
90
+ def processCharacters(data)
91
+ # XXX The specification says to do this for every character at the
92
+ # moment, but apparently that doesn't match the real world so we don't
93
+ # do it for space characters.
94
+ @tree.reconstructActiveFormattingElements
95
+ @tree.insertText(data)
96
+ end
97
+
98
+ def startTagProcessInHead(name, attributes)
99
+ @parser.phases[:inHead].processStartTag(name, attributes)
100
+ end
101
+
102
+ def startTagTitle(name, attributes)
103
+ parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
104
+ @parser.phases[:inHead].processStartTag(name, attributes)
105
+ end
106
+
107
+ def startTagBody(name, attributes)
108
+ parse_error("unexpected-start-tag", {"name" => "body"})
109
+
110
+ if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body'
111
+ assert @parser.inner_html
112
+ else
113
+ attributes.each do |attr, value|
114
+ unless @tree.open_elements[1].attributes.has_key?(attr)
115
+ @tree.open_elements[1].attributes[attr] = value
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ def startTagCloseP(name, attributes)
122
+ endTagP('p') if in_scope?('p')
123
+ @tree.insert_element(name, attributes)
124
+ @processSpaceCharactersDropNewline = true if name == 'pre'
125
+ end
126
+
127
+ def startTagForm(name, attributes)
128
+ if @tree.formPointer
129
+ parse_error("unexpected-start-tag", {"name" => name})
130
+ else
131
+ endTagP('p') if in_scope?('p')
132
+ @tree.insert_element(name, attributes)
133
+ @tree.formPointer = @tree.open_elements.last
134
+ end
135
+ end
136
+
137
+ def startTagListItem(name, attributes)
138
+ endTagP('p') if in_scope?('p')
139
+ stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
140
+ stopName = stopNames[name]
141
+
142
+ @tree.open_elements.reverse.each_with_index do |node, i|
143
+ if stopName.include?(node.name)
144
+ poppedNodes = (0..i).collect { @tree.open_elements.pop }
145
+ if i >= 1
146
+ parse_error(
147
+ i == 1 ? "missing-end-tag" : "missing-end-tags",
148
+ {"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")})
149
+
150
+ end
151
+ break
152
+ end
153
+
154
+ # Phrasing elements are all non special, non scoping, non
155
+ # formatting elements
156
+ break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
157
+ end
158
+
159
+ # Always insert an <li> element.
160
+ @tree.insert_element(name, attributes)
161
+ end
162
+
163
+ def startTagPlaintext(name, attributes)
164
+ endTagP('p') if in_scope?('p')
165
+ @tree.insert_element(name, attributes)
166
+ @parser.tokenizer.content_model_flag = :PLAINTEXT
167
+ end
168
+
169
+ def startTagHeading(name, attributes)
170
+ endTagP('p') if in_scope?('p')
171
+
172
+ # Uncomment the following for IE7 behavior:
173
+ # HEADING_ELEMENTS.each do |element|
174
+ # if in_scope?(element)
175
+ # parse_error("unexpected-start-tag", {"name" => name})
176
+ #
177
+ # remove_open_elements_until do |element|
178
+ # HEADING_ELEMENTS.include?(element.name)
179
+ # end
180
+ #
181
+ # break
182
+ # end
183
+ # end
184
+ @tree.insert_element(name, attributes)
185
+ end
186
+
187
+ def startTagA(name, attributes)
188
+ if afeAElement = @tree.elementInActiveFormattingElements('a')
189
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "a", "endName" => "a"})
190
+ endTagFormatting('a')
191
+ @tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
192
+ @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
193
+ end
194
+ @tree.reconstructActiveFormattingElements
195
+ addFormattingElement(name, attributes)
196
+ end
197
+
198
+ def startTagFormatting(name, attributes)
199
+ @tree.reconstructActiveFormattingElements
200
+ addFormattingElement(name, attributes)
201
+ end
202
+
203
+ def startTagNobr(name, attributes)
204
+ @tree.reconstructActiveFormattingElements
205
+ if in_scope?('nobr')
206
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "nobr", "endName" => "nobr"})
207
+ processEndTag('nobr')
208
+ # XXX Need tests that trigger the following
209
+ @tree.reconstructActiveFormattingElements
210
+ end
211
+ addFormattingElement(name, attributes)
212
+ end
213
+
214
+ def startTagButton(name, attributes)
215
+ if in_scope?('button')
216
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "button", "endName" => "button"})
217
+ processEndTag('button')
218
+ @parser.phase.processStartTag(name, attributes)
219
+ else
220
+ @tree.reconstructActiveFormattingElements
221
+ @tree.insert_element(name, attributes)
222
+ @tree.activeFormattingElements.push(Marker)
223
+ end
224
+ end
225
+
226
+ def startTagMarqueeObject(name, attributes)
227
+ @tree.reconstructActiveFormattingElements
228
+ @tree.insert_element(name, attributes)
229
+ @tree.activeFormattingElements.push(Marker)
230
+ end
231
+
232
+ def startTagXmp(name, attributes)
233
+ @tree.reconstructActiveFormattingElements
234
+ @tree.insert_element(name, attributes)
235
+ @parser.tokenizer.content_model_flag = :CDATA
236
+ end
237
+
238
+ def startTagTable(name, attributes)
239
+ processEndTag('p') if in_scope?('p')
240
+ @tree.insert_element(name, attributes)
241
+ @parser.phase = @parser.phases[:inTable]
242
+ end
243
+
244
+ def startTagVoidFormatting(name, attributes)
245
+ @tree.reconstructActiveFormattingElements
246
+ @tree.insert_element(name, attributes)
247
+ @tree.open_elements.pop
248
+ end
249
+
250
+ def startTagHr(name, attributes)
251
+ endTagP('p') if in_scope?('p')
252
+ @tree.insert_element(name, attributes)
253
+ @tree.open_elements.pop
254
+ end
255
+
256
+ def startTagImage(name, attributes)
257
+ # No really...
258
+ parse_error("unexpected-start-tag-treated-as", {"originalName" => "image", "newName" => "img"})
259
+ processStartTag('img', attributes)
260
+ end
261
+
262
+ def startTagInput(name, attributes)
263
+ @tree.reconstructActiveFormattingElements
264
+ @tree.insert_element(name, attributes)
265
+ if @tree.formPointer
266
+ # XXX Not exactly sure what to do here
267
+ # @tree.open_elements[-1].form = @tree.formPointer
268
+ end
269
+ @tree.open_elements.pop
270
+ end
271
+
272
+ def startTagIsindex(name, attributes)
273
+ parse_error("deprecated-tag", {"name" => "isindex"})
274
+ return if @tree.formPointer
275
+ processStartTag('form', {})
276
+ processStartTag('hr', {})
277
+ processStartTag('p', {})
278
+ processStartTag('label', {})
279
+ # XXX Localization ...
280
+ processCharacters('This is a searchable index. Insert your search keywords here: ')
281
+ attributes['name'] = 'isindex'
282
+ attrs = attributes.to_a
283
+ processStartTag('input', attributes)
284
+ processEndTag('label')
285
+ processEndTag('p')
286
+ processStartTag('hr', {})
287
+ processEndTag('form')
288
+ end
289
+
290
+ def startTagTextarea(name, attributes)
291
+ # XXX Form element pointer checking here as well...
292
+ @tree.insert_element(name, attributes)
293
+ @parser.tokenizer.content_model_flag = :RCDATA
294
+ @processSpaceCharactersDropNewline = true
295
+ alias processSpaceCharacters processSpaceCharactersDropNewline
296
+ end
297
+
298
+ # iframe, noembed noframes, noscript(if scripting enabled)
299
+ def startTagCdata(name, attributes)
300
+ @tree.insert_element(name, attributes)
301
+ @parser.tokenizer.content_model_flag = :CDATA
302
+ end
303
+
304
+ def startTagSelect(name, attributes)
305
+ @tree.reconstructActiveFormattingElements
306
+ @tree.insert_element(name, attributes)
307
+ @parser.phase = @parser.phases[:inSelect]
308
+ end
309
+
310
+ def startTagMisplaced(name, attributes)
311
+ # Elements that should be children of other elements that have a
312
+ # different insertion mode; here they are ignored
313
+ # "caption", "col", "colgroup", "frame", "frameset", "head",
314
+ # "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
315
+ # "tr", "noscript"
316
+ parse_error("unexpected-start-tag-ignored", {"name" => name})
317
+ end
318
+
319
+ def startTagNew(name, attributes)
320
+ # New HTML5 elements, "event-source", "section", "nav",
321
+ # "article", "aside", "header", "footer", "datagrid", "command"
322
+ # $stderr.puts("Warning: Undefined behaviour for start tag #{name}")
323
+ startTagOther(name, attributes)
324
+ #raise NotImplementedError
325
+ end
326
+
327
+ def startTagOther(name, attributes)
328
+ @tree.reconstructActiveFormattingElements
329
+ @tree.insert_element(name, attributes)
330
+ end
331
+
332
+ def endTagP(name)
333
+ @tree.generateImpliedEndTags('p') if in_scope?('p')
334
+ parse_error("unexpected-end-tag", {"name" => "p"}) unless @tree.open_elements.last.name == 'p'
335
+ if in_scope?('p')
336
+ @tree.open_elements.pop while in_scope?('p')
337
+ else
338
+ startTagCloseP('p', {})
339
+ endTagP('p')
340
+ end
341
+ end
342
+
343
+ def endTagBody(name)
344
+ # XXX Need to take open <p> tags into account here. We shouldn't imply
345
+ # </p> but we should not throw a parse error either. Specification is
346
+ # likely to be updated.
347
+ unless @tree.open_elements[1].name == 'body'
348
+ # inner_html case
349
+ parse_error
350
+ return
351
+ end
352
+ unless @tree.open_elements.last.name == 'body'
353
+ parse_error("expected-one-end-tag-but-got-another",
354
+ {"expectedName" => "body",
355
+ "gotName" => @tree.open_elements.last.name})
356
+ end
357
+ @parser.phase = @parser.phases[:afterBody]
358
+ end
359
+
360
+ def endTagHtml(name)
361
+ endTagBody(name)
362
+ @parser.phase.processEndTag(name) unless @parser.inner_html
363
+ end
364
+
365
+ def endTagBlock(name)
366
+ #Put us back in the right whitespace handling mode
367
+ @processSpaceCharactersDropNewline = false if name == 'pre'
368
+
369
+ @tree.generateImpliedEndTags if in_scope?(name)
370
+
371
+ unless @tree.open_elements.last.name == name
372
+ parse_error("end-tag-too-early", {"name" => name})
373
+ end
374
+
375
+ if in_scope?(name)
376
+ remove_open_elements_until(name)
377
+ end
378
+ end
379
+
380
+ def endTagForm(name)
381
+ if in_scope?(name)
382
+ @tree.generateImpliedEndTags
383
+ end
384
+ if @tree.open_elements.last.name != name
385
+ parse_error("end-tag-too-early-ignored", {"name" => "form"})
386
+ else
387
+ @tree.open_elements.pop
388
+ end
389
+ @tree.formPointer = nil
390
+ end
391
+
392
+ def endTagListItem(name)
393
+ # AT Could merge this with the Block case
394
+ @tree.generateImpliedEndTags(name) if in_scope?(name)
395
+
396
+ unless @tree.open_elements.last.name == name
397
+ parse_error("end-tag-too-early", {"name" => name})
398
+ end
399
+
400
+ remove_open_elements_until(name) if in_scope?(name)
401
+ end
402
+
403
+ def endTagHeading(name)
404
+ HEADING_ELEMENTS.each do |element|
405
+ if in_scope?(element)
406
+ @tree.generateImpliedEndTags
407
+ break
408
+ end
409
+ end
410
+
411
+ unless @tree.open_elements.last.name == name
412
+ parse_error("end-tag-too-early", {"name" => name})
413
+ end
414
+
415
+ HEADING_ELEMENTS.each do |element|
416
+ if in_scope?(element)
417
+ remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
418
+ break
419
+ end
420
+ end
421
+ end
422
+
423
+ # The much-feared adoption agency algorithm
424
+ def endTagFormatting(name)
425
+ # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
426
+ # XXX Better parse_error messages appreciated.
427
+ while true
428
+ # Step 1 paragraph 1
429
+ afeElement = @tree.elementInActiveFormattingElements(name)
430
+ if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
431
+ parse_error("adoption-agency-1.1", {"name" => name})
432
+ return
433
+ # Step 1 paragraph 2
434
+ elsif not @tree.open_elements.include?(afeElement)
435
+ parse_error("adoption-agency-1.2", {"name" => name})
436
+ @tree.activeFormattingElements.delete(afeElement)
437
+ return
438
+ end
439
+
440
+ # Step 1 paragraph 3
441
+ if afeElement != @tree.open_elements.last
442
+ parse_error("adoption-agency-1.3", {"name" => name})
443
+ end
444
+
445
+ # Step 2
446
+ # Start of the adoption agency algorithm proper
447
+ afeIndex = @tree.open_elements.index(afeElement)
448
+ furthestBlock = nil
449
+ @tree.open_elements[afeIndex..-1].each do |element|
450
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
451
+ furthestBlock = element
452
+ break
453
+ end
454
+ end
455
+
456
+ # Step 3
457
+ if furthestBlock.nil?
458
+ element = remove_open_elements_until {|element| element == afeElement }
459
+ @tree.activeFormattingElements.delete(element)
460
+ return
461
+ end
462
+ commonAncestor = @tree.open_elements[afeIndex - 1]
463
+
464
+ # Step 5
465
+ furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
466
+
467
+ # Step 6
468
+ # The bookmark is supposed to help us identify where to reinsert
469
+ # nodes in step 12. We have to ensure that we reinsert nodes after
470
+ # the node before the active formatting element. Note the bookmark
471
+ # can move in step 7.4
472
+ bookmark = @tree.activeFormattingElements.index(afeElement)
473
+
474
+ # Step 7
475
+ lastNode = node = furthestBlock
476
+ while true
477
+ # AT replace this with a function and recursion?
478
+ # Node is element before node in open elements
479
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
480
+ until @tree.activeFormattingElements.include?(node)
481
+ tmpNode = node
482
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
483
+ @tree.open_elements.delete(tmpNode)
484
+ end
485
+ # Step 7.3
486
+ break if node == afeElement
487
+ # Step 7.4
488
+ if lastNode == furthestBlock
489
+ # XXX should this be index(node) or index(node)+1
490
+ # Anne: I think +1 is ok. Given x = [2,3,4,5]
491
+ # x.index(3) gives 1 and then x[1 +1] gives 4...
492
+ bookmark = @tree.activeFormattingElements.index(node) + 1
493
+ end
494
+ # Step 7.5
495
+ cite = node.parent
496
+ if node.hasContent
497
+ clone = node.cloneNode
498
+ # Replace node with clone
499
+ @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
500
+ @tree.open_elements[@tree.open_elements.index(node)] = clone
501
+ node = clone
502
+ end
503
+ # Step 7.6
504
+ # Remove lastNode from its parents, if any
505
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
506
+ node.appendChild(lastNode)
507
+ # Step 7.7
508
+ lastNode = node
509
+ # End of inner loop
510
+ end
511
+
512
+ # Step 8
513
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
514
+ commonAncestor.appendChild(lastNode)
515
+
516
+ # Step 9
517
+ clone = afeElement.cloneNode
518
+
519
+ # Step 10
520
+ furthestBlock.reparentChildren(clone)
521
+
522
+ # Step 11
523
+ furthestBlock.appendChild(clone)
524
+
525
+ # Step 12
526
+ @tree.activeFormattingElements.delete(afeElement)
527
+ @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
528
+
529
+ # Step 13
530
+ @tree.open_elements.delete(afeElement)
531
+ @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
532
+ end
533
+ end
534
+
535
+ def endTagButtonMarqueeObject(name)
536
+ @tree.generateImpliedEndTags if in_scope?(name)
537
+
538
+ unless @tree.open_elements.last.name == name
539
+ parse_error("end-tag-too-early", {"name" => name})
540
+ end
541
+
542
+ if in_scope?(name)
543
+ remove_open_elements_until(name)
544
+
545
+ @tree.clearActiveFormattingElements
546
+ end
547
+ end
548
+
549
+ def endTagMisplaced(name)
550
+ # This handles elements with end tags in other insertion modes.
551
+ parse_error("unexpected-end-tag", {"name" => name})
552
+ end
553
+
554
+ def endTagBr(name)
555
+ parse_error("unexpected-end-tag-treated-as",
556
+ {"originalName" => "br", "newName" => "br element"})
557
+ @tree.reconstructActiveFormattingElements
558
+ @tree.insert_element(name, {})
559
+ @tree.open_elements.pop()
560
+ end
561
+
562
+ def endTagNone(name)
563
+ # This handles elements with no end tag.
564
+ parse_error("no-end-tag", {"name" => name})
565
+ end
566
+
567
+ def endTagCdataTextAreaXmp(name)
568
+ if @tree.open_elements.last.name == name
569
+ @tree.open_elements.pop
570
+ else
571
+ parse_error("unexpected-end-tag", {"name" => name})
572
+ end
573
+ end
574
+
575
+ def endTagNew(name)
576
+ # New HTML5 elements, "event-source", "section", "nav",
577
+ # "article", "aside", "header", "footer", "datagrid", "command"
578
+ # STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
579
+ endTagOther(name)
580
+ #raise NotImplementedError
581
+ end
582
+
583
+ def endTagOther(name)
584
+ # XXX This logic should be moved into the treebuilder
585
+ @tree.open_elements.reverse.each do |node|
586
+ if node.name == name
587
+ @tree.generateImpliedEndTags
588
+
589
+ unless @tree.open_elements.last.name == name
590
+ parse_error("unexpected-end-tag", {"name" => name})
591
+ end
592
+
593
+ remove_open_elements_until {|element| element == node }
594
+
595
+ break
596
+ else
597
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
598
+ parse_error("unexpected-end-tag", {"name" => name})
599
+ break
600
+ end
601
+ end
602
+ end
603
+ end
604
+
605
+ protected
606
+
607
+ def addFormattingElement(name, attributes)
608
+ @tree.insert_element(name, attributes)
609
+ @tree.activeFormattingElements.push(@tree.open_elements.last)
610
+ end
611
+
612
+ end
613
+ end