feedtools 0.2.26 → 0.2.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,613 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InBodyPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-body
7
+
8
+ handle_start 'html'
9
+ handle_start %w(base link meta script style) => 'ProcessInHead'
10
+ handle_start 'title'
11
+
12
+ handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
13
+
14
+ handle_start 'input', 'textarea', 'select', 'isindex', %w(marquee object)
15
+
16
+ handle_start %w(li dd dt) => 'ListItem'
17
+
18
+ handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP'
19
+
20
+ handle_start %w(b big em font i s small strike strong tt u) => 'Formatting'
21
+ handle_start 'nobr'
22
+
23
+ handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting'
24
+
25
+ handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading'
26
+
27
+ handle_start %w(caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr) => 'Misplaced'
28
+
29
+ handle_start %w(event-source section nav article aside header footer datagrid command) => 'New'
30
+
31
+ handle_end 'p', 'body', 'html', 'form', %w(button marquee object), %w(dd dt li) => 'ListItem'
32
+
33
+ handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block'
34
+
35
+ handle_end HEADING_ELEMENTS => 'Heading'
36
+
37
+ handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting'
38
+
39
+ handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced'
40
+
41
+ handle_end 'br'
42
+
43
+ handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None'
44
+
45
+ handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
46
+
47
+ handle_end %w(event-source section nav article aside header footer datagrid command) => 'New'
48
+
49
+ def initialize(parser, tree)
50
+ super(parser, tree)
51
+
52
+ # for special handling of whitespace in <pre>
53
+ @processSpaceCharactersDropNewline = false
54
+ if $-w
55
+ $-w = false
56
+ alias processSpaceCharactersNonPre processSpaceCharacters
57
+ $-w = true
58
+ else
59
+ alias processSpaceCharactersNonPre processSpaceCharacters
60
+ end
61
+ end
62
+
63
+ def processSpaceCharactersDropNewline(data)
64
+ # #Sometimes (start of <pre> blocks) we want to drop leading newlines
65
+
66
+ if $-w
67
+ $-w = false
68
+ alias processSpaceCharacters processSpaceCharactersNonPre
69
+ $-w = true
70
+ else
71
+ alias processSpaceCharacters processSpaceCharactersNonPre
72
+ end
73
+
74
+ if (data.length > 0 and data[0] == ?\n &&
75
+ %w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
76
+ data = data[1..-1]
77
+ end
78
+
79
+ if data.length > 0
80
+ @tree.reconstructActiveFormattingElements
81
+ @tree.insertText(data)
82
+ end
83
+ end
84
+
85
+ def processSpaceCharacters(data)
86
+ @tree.reconstructActiveFormattingElements()
87
+ @tree.insertText(data)
88
+ end
89
+
90
+ def processCharacters(data)
91
+ # XXX The specification says to do this for every character at the
92
+ # moment, but apparently that doesn't match the real world so we don't
93
+ # do it for space characters.
94
+ @tree.reconstructActiveFormattingElements
95
+ @tree.insertText(data)
96
+ end
97
+
98
+ def startTagProcessInHead(name, attributes)
99
+ @parser.phases[:inHead].processStartTag(name, attributes)
100
+ end
101
+
102
+ def startTagTitle(name, attributes)
103
+ parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
104
+ @parser.phases[:inHead].processStartTag(name, attributes)
105
+ end
106
+
107
+ def startTagBody(name, attributes)
108
+ parse_error("unexpected-start-tag", {"name" => "body"})
109
+
110
+ if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body'
111
+ assert @parser.inner_html
112
+ else
113
+ attributes.each do |attr, value|
114
+ unless @tree.open_elements[1].attributes.has_key?(attr)
115
+ @tree.open_elements[1].attributes[attr] = value
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ def startTagCloseP(name, attributes)
122
+ endTagP('p') if in_scope?('p')
123
+ @tree.insert_element(name, attributes)
124
+ @processSpaceCharactersDropNewline = true if name == 'pre'
125
+ end
126
+
127
+ def startTagForm(name, attributes)
128
+ if @tree.formPointer
129
+ parse_error("unexpected-start-tag", {"name" => name})
130
+ else
131
+ endTagP('p') if in_scope?('p')
132
+ @tree.insert_element(name, attributes)
133
+ @tree.formPointer = @tree.open_elements.last
134
+ end
135
+ end
136
+
137
+ def startTagListItem(name, attributes)
138
+ endTagP('p') if in_scope?('p')
139
+ stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
140
+ stopName = stopNames[name]
141
+
142
+ @tree.open_elements.reverse.each_with_index do |node, i|
143
+ if stopName.include?(node.name)
144
+ poppedNodes = (0..i).collect { @tree.open_elements.pop }
145
+ if i >= 1
146
+ parse_error(
147
+ i == 1 ? "missing-end-tag" : "missing-end-tags",
148
+ {"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")})
149
+
150
+ end
151
+ break
152
+ end
153
+
154
+ # Phrasing elements are all non special, non scoping, non
155
+ # formatting elements
156
+ break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
157
+ end
158
+
159
+ # Always insert an <li> element.
160
+ @tree.insert_element(name, attributes)
161
+ end
162
+
163
+ def startTagPlaintext(name, attributes)
164
+ endTagP('p') if in_scope?('p')
165
+ @tree.insert_element(name, attributes)
166
+ @parser.tokenizer.content_model_flag = :PLAINTEXT
167
+ end
168
+
169
+ def startTagHeading(name, attributes)
170
+ endTagP('p') if in_scope?('p')
171
+
172
+ # Uncomment the following for IE7 behavior:
173
+ # HEADING_ELEMENTS.each do |element|
174
+ # if in_scope?(element)
175
+ # parse_error("unexpected-start-tag", {"name" => name})
176
+ #
177
+ # remove_open_elements_until do |element|
178
+ # HEADING_ELEMENTS.include?(element.name)
179
+ # end
180
+ #
181
+ # break
182
+ # end
183
+ # end
184
+ @tree.insert_element(name, attributes)
185
+ end
186
+
187
+ def startTagA(name, attributes)
188
+ if afeAElement = @tree.elementInActiveFormattingElements('a')
189
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "a", "endName" => "a"})
190
+ endTagFormatting('a')
191
+ @tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
192
+ @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
193
+ end
194
+ @tree.reconstructActiveFormattingElements
195
+ addFormattingElement(name, attributes)
196
+ end
197
+
198
+ def startTagFormatting(name, attributes)
199
+ @tree.reconstructActiveFormattingElements
200
+ addFormattingElement(name, attributes)
201
+ end
202
+
203
+ def startTagNobr(name, attributes)
204
+ @tree.reconstructActiveFormattingElements
205
+ if in_scope?('nobr')
206
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "nobr", "endName" => "nobr"})
207
+ processEndTag('nobr')
208
+ # XXX Need tests that trigger the following
209
+ @tree.reconstructActiveFormattingElements
210
+ end
211
+ addFormattingElement(name, attributes)
212
+ end
213
+
214
+ def startTagButton(name, attributes)
215
+ if in_scope?('button')
216
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "button", "endName" => "button"})
217
+ processEndTag('button')
218
+ @parser.phase.processStartTag(name, attributes)
219
+ else
220
+ @tree.reconstructActiveFormattingElements
221
+ @tree.insert_element(name, attributes)
222
+ @tree.activeFormattingElements.push(Marker)
223
+ end
224
+ end
225
+
226
+ def startTagMarqueeObject(name, attributes)
227
+ @tree.reconstructActiveFormattingElements
228
+ @tree.insert_element(name, attributes)
229
+ @tree.activeFormattingElements.push(Marker)
230
+ end
231
+
232
+ def startTagXmp(name, attributes)
233
+ @tree.reconstructActiveFormattingElements
234
+ @tree.insert_element(name, attributes)
235
+ @parser.tokenizer.content_model_flag = :CDATA
236
+ end
237
+
238
+ def startTagTable(name, attributes)
239
+ processEndTag('p') if in_scope?('p')
240
+ @tree.insert_element(name, attributes)
241
+ @parser.phase = @parser.phases[:inTable]
242
+ end
243
+
244
+ def startTagVoidFormatting(name, attributes)
245
+ @tree.reconstructActiveFormattingElements
246
+ @tree.insert_element(name, attributes)
247
+ @tree.open_elements.pop
248
+ end
249
+
250
+ def startTagHr(name, attributes)
251
+ endTagP('p') if in_scope?('p')
252
+ @tree.insert_element(name, attributes)
253
+ @tree.open_elements.pop
254
+ end
255
+
256
+ def startTagImage(name, attributes)
257
+ # No really...
258
+ parse_error("unexpected-start-tag-treated-as", {"originalName" => "image", "newName" => "img"})
259
+ processStartTag('img', attributes)
260
+ end
261
+
262
+ def startTagInput(name, attributes)
263
+ @tree.reconstructActiveFormattingElements
264
+ @tree.insert_element(name, attributes)
265
+ if @tree.formPointer
266
+ # XXX Not exactly sure what to do here
267
+ # @tree.open_elements[-1].form = @tree.formPointer
268
+ end
269
+ @tree.open_elements.pop
270
+ end
271
+
272
+ def startTagIsindex(name, attributes)
273
+ parse_error("deprecated-tag", {"name" => "isindex"})
274
+ return if @tree.formPointer
275
+ processStartTag('form', {})
276
+ processStartTag('hr', {})
277
+ processStartTag('p', {})
278
+ processStartTag('label', {})
279
+ # XXX Localization ...
280
+ processCharacters('This is a searchable index. Insert your search keywords here: ')
281
+ attributes['name'] = 'isindex'
282
+ attrs = attributes.to_a
283
+ processStartTag('input', attributes)
284
+ processEndTag('label')
285
+ processEndTag('p')
286
+ processStartTag('hr', {})
287
+ processEndTag('form')
288
+ end
289
+
290
+ def startTagTextarea(name, attributes)
291
+ # XXX Form element pointer checking here as well...
292
+ @tree.insert_element(name, attributes)
293
+ @parser.tokenizer.content_model_flag = :RCDATA
294
+ @processSpaceCharactersDropNewline = true
295
+ alias processSpaceCharacters processSpaceCharactersDropNewline
296
+ end
297
+
298
+ # iframe, noembed noframes, noscript(if scripting enabled)
299
+ def startTagCdata(name, attributes)
300
+ @tree.insert_element(name, attributes)
301
+ @parser.tokenizer.content_model_flag = :CDATA
302
+ end
303
+
304
+ def startTagSelect(name, attributes)
305
+ @tree.reconstructActiveFormattingElements
306
+ @tree.insert_element(name, attributes)
307
+ @parser.phase = @parser.phases[:inSelect]
308
+ end
309
+
310
+ def startTagMisplaced(name, attributes)
311
+ # Elements that should be children of other elements that have a
312
+ # different insertion mode; here they are ignored
313
+ # "caption", "col", "colgroup", "frame", "frameset", "head",
314
+ # "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
315
+ # "tr", "noscript"
316
+ parse_error("unexpected-start-tag-ignored", {"name" => name})
317
+ end
318
+
319
+ def startTagNew(name, attributes)
320
+ # New HTML5 elements, "event-source", "section", "nav",
321
+ # "article", "aside", "header", "footer", "datagrid", "command"
322
+ # $stderr.puts("Warning: Undefined behaviour for start tag #{name}")
323
+ startTagOther(name, attributes)
324
+ #raise NotImplementedError
325
+ end
326
+
327
+ def startTagOther(name, attributes)
328
+ @tree.reconstructActiveFormattingElements
329
+ @tree.insert_element(name, attributes)
330
+ end
331
+
332
+ def endTagP(name)
333
+ @tree.generateImpliedEndTags('p') if in_scope?('p')
334
+ parse_error("unexpected-end-tag", {"name" => "p"}) unless @tree.open_elements.last.name == 'p'
335
+ if in_scope?('p')
336
+ @tree.open_elements.pop while in_scope?('p')
337
+ else
338
+ startTagCloseP('p', {})
339
+ endTagP('p')
340
+ end
341
+ end
342
+
343
+ def endTagBody(name)
344
+ # XXX Need to take open <p> tags into account here. We shouldn't imply
345
+ # </p> but we should not throw a parse error either. Specification is
346
+ # likely to be updated.
347
+ unless @tree.open_elements[1].name == 'body'
348
+ # inner_html case
349
+ parse_error
350
+ return
351
+ end
352
+ unless @tree.open_elements.last.name == 'body'
353
+ parse_error("expected-one-end-tag-but-got-another",
354
+ {"expectedName" => "body",
355
+ "gotName" => @tree.open_elements.last.name})
356
+ end
357
+ @parser.phase = @parser.phases[:afterBody]
358
+ end
359
+
360
+ def endTagHtml(name)
361
+ endTagBody(name)
362
+ @parser.phase.processEndTag(name) unless @parser.inner_html
363
+ end
364
+
365
+ def endTagBlock(name)
366
+ #Put us back in the right whitespace handling mode
367
+ @processSpaceCharactersDropNewline = false if name == 'pre'
368
+
369
+ @tree.generateImpliedEndTags if in_scope?(name)
370
+
371
+ unless @tree.open_elements.last.name == name
372
+ parse_error("end-tag-too-early", {"name" => name})
373
+ end
374
+
375
+ if in_scope?(name)
376
+ remove_open_elements_until(name)
377
+ end
378
+ end
379
+
380
+ def endTagForm(name)
381
+ if in_scope?(name)
382
+ @tree.generateImpliedEndTags
383
+ end
384
+ if @tree.open_elements.last.name != name
385
+ parse_error("end-tag-too-early-ignored", {"name" => "form"})
386
+ else
387
+ @tree.open_elements.pop
388
+ end
389
+ @tree.formPointer = nil
390
+ end
391
+
392
+ def endTagListItem(name)
393
+ # AT Could merge this with the Block case
394
+ @tree.generateImpliedEndTags(name) if in_scope?(name)
395
+
396
+ unless @tree.open_elements.last.name == name
397
+ parse_error("end-tag-too-early", {"name" => name})
398
+ end
399
+
400
+ remove_open_elements_until(name) if in_scope?(name)
401
+ end
402
+
403
+ def endTagHeading(name)
404
+ HEADING_ELEMENTS.each do |element|
405
+ if in_scope?(element)
406
+ @tree.generateImpliedEndTags
407
+ break
408
+ end
409
+ end
410
+
411
+ unless @tree.open_elements.last.name == name
412
+ parse_error("end-tag-too-early", {"name" => name})
413
+ end
414
+
415
+ HEADING_ELEMENTS.each do |element|
416
+ if in_scope?(element)
417
+ remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
418
+ break
419
+ end
420
+ end
421
+ end
422
+
423
+ # The much-feared adoption agency algorithm
424
+ def endTagFormatting(name)
425
+ # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
426
+ # XXX Better parse_error messages appreciated.
427
+ while true
428
+ # Step 1 paragraph 1
429
+ afeElement = @tree.elementInActiveFormattingElements(name)
430
+ if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
431
+ parse_error("adoption-agency-1.1", {"name" => name})
432
+ return
433
+ # Step 1 paragraph 2
434
+ elsif not @tree.open_elements.include?(afeElement)
435
+ parse_error("adoption-agency-1.2", {"name" => name})
436
+ @tree.activeFormattingElements.delete(afeElement)
437
+ return
438
+ end
439
+
440
+ # Step 1 paragraph 3
441
+ if afeElement != @tree.open_elements.last
442
+ parse_error("adoption-agency-1.3", {"name" => name})
443
+ end
444
+
445
+ # Step 2
446
+ # Start of the adoption agency algorithm proper
447
+ afeIndex = @tree.open_elements.index(afeElement)
448
+ furthestBlock = nil
449
+ @tree.open_elements[afeIndex..-1].each do |element|
450
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
451
+ furthestBlock = element
452
+ break
453
+ end
454
+ end
455
+
456
+ # Step 3
457
+ if furthestBlock.nil?
458
+ element = remove_open_elements_until {|element| element == afeElement }
459
+ @tree.activeFormattingElements.delete(element)
460
+ return
461
+ end
462
+ commonAncestor = @tree.open_elements[afeIndex - 1]
463
+
464
+ # Step 5
465
+ furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
466
+
467
+ # Step 6
468
+ # The bookmark is supposed to help us identify where to reinsert
469
+ # nodes in step 12. We have to ensure that we reinsert nodes after
470
+ # the node before the active formatting element. Note the bookmark
471
+ # can move in step 7.4
472
+ bookmark = @tree.activeFormattingElements.index(afeElement)
473
+
474
+ # Step 7
475
+ lastNode = node = furthestBlock
476
+ while true
477
+ # AT replace this with a function and recursion?
478
+ # Node is element before node in open elements
479
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
480
+ until @tree.activeFormattingElements.include?(node)
481
+ tmpNode = node
482
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
483
+ @tree.open_elements.delete(tmpNode)
484
+ end
485
+ # Step 7.3
486
+ break if node == afeElement
487
+ # Step 7.4
488
+ if lastNode == furthestBlock
489
+ # XXX should this be index(node) or index(node)+1
490
+ # Anne: I think +1 is ok. Given x = [2,3,4,5]
491
+ # x.index(3) gives 1 and then x[1 +1] gives 4...
492
+ bookmark = @tree.activeFormattingElements.index(node) + 1
493
+ end
494
+ # Step 7.5
495
+ cite = node.parent
496
+ if node.hasContent
497
+ clone = node.cloneNode
498
+ # Replace node with clone
499
+ @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
500
+ @tree.open_elements[@tree.open_elements.index(node)] = clone
501
+ node = clone
502
+ end
503
+ # Step 7.6
504
+ # Remove lastNode from its parents, if any
505
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
506
+ node.appendChild(lastNode)
507
+ # Step 7.7
508
+ lastNode = node
509
+ # End of inner loop
510
+ end
511
+
512
+ # Step 8
513
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
514
+ commonAncestor.appendChild(lastNode)
515
+
516
+ # Step 9
517
+ clone = afeElement.cloneNode
518
+
519
+ # Step 10
520
+ furthestBlock.reparentChildren(clone)
521
+
522
+ # Step 11
523
+ furthestBlock.appendChild(clone)
524
+
525
+ # Step 12
526
+ @tree.activeFormattingElements.delete(afeElement)
527
+ @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
528
+
529
+ # Step 13
530
+ @tree.open_elements.delete(afeElement)
531
+ @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
532
+ end
533
+ end
534
+
535
+ def endTagButtonMarqueeObject(name)
536
+ @tree.generateImpliedEndTags if in_scope?(name)
537
+
538
+ unless @tree.open_elements.last.name == name
539
+ parse_error("end-tag-too-early", {"name" => name})
540
+ end
541
+
542
+ if in_scope?(name)
543
+ remove_open_elements_until(name)
544
+
545
+ @tree.clearActiveFormattingElements
546
+ end
547
+ end
548
+
549
+ def endTagMisplaced(name)
550
+ # This handles elements with end tags in other insertion modes.
551
+ parse_error("unexpected-end-tag", {"name" => name})
552
+ end
553
+
554
+ def endTagBr(name)
555
+ parse_error("unexpected-end-tag-treated-as",
556
+ {"originalName" => "br", "newName" => "br element"})
557
+ @tree.reconstructActiveFormattingElements
558
+ @tree.insert_element(name, {})
559
+ @tree.open_elements.pop()
560
+ end
561
+
562
+ def endTagNone(name)
563
+ # This handles elements with no end tag.
564
+ parse_error("no-end-tag", {"name" => name})
565
+ end
566
+
567
+ def endTagCdataTextAreaXmp(name)
568
+ if @tree.open_elements.last.name == name
569
+ @tree.open_elements.pop
570
+ else
571
+ parse_error("unexpected-end-tag", {"name" => name})
572
+ end
573
+ end
574
+
575
+ def endTagNew(name)
576
+ # New HTML5 elements, "event-source", "section", "nav",
577
+ # "article", "aside", "header", "footer", "datagrid", "command"
578
+ # STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
579
+ endTagOther(name)
580
+ #raise NotImplementedError
581
+ end
582
+
583
+ def endTagOther(name)
584
+ # XXX This logic should be moved into the treebuilder
585
+ @tree.open_elements.reverse.each do |node|
586
+ if node.name == name
587
+ @tree.generateImpliedEndTags
588
+
589
+ unless @tree.open_elements.last.name == name
590
+ parse_error("unexpected-end-tag", {"name" => name})
591
+ end
592
+
593
+ remove_open_elements_until {|element| element == node }
594
+
595
+ break
596
+ else
597
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
598
+ parse_error("unexpected-end-tag", {"name" => name})
599
+ break
600
+ end
601
+ end
602
+ end
603
+ end
604
+
605
+ protected
606
+
607
+ def addFormattingElement(name, attributes)
608
+ @tree.insert_element(name, attributes)
609
+ @tree.activeFormattingElements.push(@tree.open_elements.last)
610
+ end
611
+
612
+ end
613
+ end