feedtools 0.2.26 → 0.2.27
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,613 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class InBodyPhase < Phase
|
5
|
+
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
|
7
|
+
|
8
|
+
handle_start 'html'
|
9
|
+
handle_start %w(base link meta script style) => 'ProcessInHead'
|
10
|
+
handle_start 'title'
|
11
|
+
|
12
|
+
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
13
|
+
|
14
|
+
handle_start 'input', 'textarea', 'select', 'isindex', %w(marquee object)
|
15
|
+
|
16
|
+
handle_start %w(li dd dt) => 'ListItem'
|
17
|
+
|
18
|
+
handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP'
|
19
|
+
|
20
|
+
handle_start %w(b big em font i s small strike strong tt u) => 'Formatting'
|
21
|
+
handle_start 'nobr'
|
22
|
+
|
23
|
+
handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting'
|
24
|
+
|
25
|
+
handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading'
|
26
|
+
|
27
|
+
handle_start %w(caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr) => 'Misplaced'
|
28
|
+
|
29
|
+
handle_start %w(event-source section nav article aside header footer datagrid command) => 'New'
|
30
|
+
|
31
|
+
handle_end 'p', 'body', 'html', 'form', %w(button marquee object), %w(dd dt li) => 'ListItem'
|
32
|
+
|
33
|
+
handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block'
|
34
|
+
|
35
|
+
handle_end HEADING_ELEMENTS => 'Heading'
|
36
|
+
|
37
|
+
handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting'
|
38
|
+
|
39
|
+
handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced'
|
40
|
+
|
41
|
+
handle_end 'br'
|
42
|
+
|
43
|
+
handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None'
|
44
|
+
|
45
|
+
handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
|
46
|
+
|
47
|
+
handle_end %w(event-source section nav article aside header footer datagrid command) => 'New'
|
48
|
+
|
49
|
+
def initialize(parser, tree)
|
50
|
+
super(parser, tree)
|
51
|
+
|
52
|
+
# for special handling of whitespace in <pre>
|
53
|
+
@processSpaceCharactersDropNewline = false
|
54
|
+
if $-w
|
55
|
+
$-w = false
|
56
|
+
alias processSpaceCharactersNonPre processSpaceCharacters
|
57
|
+
$-w = true
|
58
|
+
else
|
59
|
+
alias processSpaceCharactersNonPre processSpaceCharacters
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def processSpaceCharactersDropNewline(data)
|
64
|
+
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
|
65
|
+
|
66
|
+
if $-w
|
67
|
+
$-w = false
|
68
|
+
alias processSpaceCharacters processSpaceCharactersNonPre
|
69
|
+
$-w = true
|
70
|
+
else
|
71
|
+
alias processSpaceCharacters processSpaceCharactersNonPre
|
72
|
+
end
|
73
|
+
|
74
|
+
if (data.length > 0 and data[0] == ?\n &&
|
75
|
+
%w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
|
76
|
+
data = data[1..-1]
|
77
|
+
end
|
78
|
+
|
79
|
+
if data.length > 0
|
80
|
+
@tree.reconstructActiveFormattingElements
|
81
|
+
@tree.insertText(data)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def processSpaceCharacters(data)
|
86
|
+
@tree.reconstructActiveFormattingElements()
|
87
|
+
@tree.insertText(data)
|
88
|
+
end
|
89
|
+
|
90
|
+
def processCharacters(data)
|
91
|
+
# XXX The specification says to do this for every character at the
|
92
|
+
# moment, but apparently that doesn't match the real world so we don't
|
93
|
+
# do it for space characters.
|
94
|
+
@tree.reconstructActiveFormattingElements
|
95
|
+
@tree.insertText(data)
|
96
|
+
end
|
97
|
+
|
98
|
+
def startTagProcessInHead(name, attributes)
|
99
|
+
@parser.phases[:inHead].processStartTag(name, attributes)
|
100
|
+
end
|
101
|
+
|
102
|
+
def startTagTitle(name, attributes)
|
103
|
+
parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
|
104
|
+
@parser.phases[:inHead].processStartTag(name, attributes)
|
105
|
+
end
|
106
|
+
|
107
|
+
def startTagBody(name, attributes)
|
108
|
+
parse_error("unexpected-start-tag", {"name" => "body"})
|
109
|
+
|
110
|
+
if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body'
|
111
|
+
assert @parser.inner_html
|
112
|
+
else
|
113
|
+
attributes.each do |attr, value|
|
114
|
+
unless @tree.open_elements[1].attributes.has_key?(attr)
|
115
|
+
@tree.open_elements[1].attributes[attr] = value
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def startTagCloseP(name, attributes)
|
122
|
+
endTagP('p') if in_scope?('p')
|
123
|
+
@tree.insert_element(name, attributes)
|
124
|
+
@processSpaceCharactersDropNewline = true if name == 'pre'
|
125
|
+
end
|
126
|
+
|
127
|
+
def startTagForm(name, attributes)
|
128
|
+
if @tree.formPointer
|
129
|
+
parse_error("unexpected-start-tag", {"name" => name})
|
130
|
+
else
|
131
|
+
endTagP('p') if in_scope?('p')
|
132
|
+
@tree.insert_element(name, attributes)
|
133
|
+
@tree.formPointer = @tree.open_elements.last
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def startTagListItem(name, attributes)
|
138
|
+
endTagP('p') if in_scope?('p')
|
139
|
+
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
|
140
|
+
stopName = stopNames[name]
|
141
|
+
|
142
|
+
@tree.open_elements.reverse.each_with_index do |node, i|
|
143
|
+
if stopName.include?(node.name)
|
144
|
+
poppedNodes = (0..i).collect { @tree.open_elements.pop }
|
145
|
+
if i >= 1
|
146
|
+
parse_error(
|
147
|
+
i == 1 ? "missing-end-tag" : "missing-end-tags",
|
148
|
+
{"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")})
|
149
|
+
|
150
|
+
end
|
151
|
+
break
|
152
|
+
end
|
153
|
+
|
154
|
+
# Phrasing elements are all non special, non scoping, non
|
155
|
+
# formatting elements
|
156
|
+
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
|
157
|
+
end
|
158
|
+
|
159
|
+
# Always insert an <li> element.
|
160
|
+
@tree.insert_element(name, attributes)
|
161
|
+
end
|
162
|
+
|
163
|
+
def startTagPlaintext(name, attributes)
|
164
|
+
endTagP('p') if in_scope?('p')
|
165
|
+
@tree.insert_element(name, attributes)
|
166
|
+
@parser.tokenizer.content_model_flag = :PLAINTEXT
|
167
|
+
end
|
168
|
+
|
169
|
+
def startTagHeading(name, attributes)
|
170
|
+
endTagP('p') if in_scope?('p')
|
171
|
+
|
172
|
+
# Uncomment the following for IE7 behavior:
|
173
|
+
# HEADING_ELEMENTS.each do |element|
|
174
|
+
# if in_scope?(element)
|
175
|
+
# parse_error("unexpected-start-tag", {"name" => name})
|
176
|
+
#
|
177
|
+
# remove_open_elements_until do |element|
|
178
|
+
# HEADING_ELEMENTS.include?(element.name)
|
179
|
+
# end
|
180
|
+
#
|
181
|
+
# break
|
182
|
+
# end
|
183
|
+
# end
|
184
|
+
@tree.insert_element(name, attributes)
|
185
|
+
end
|
186
|
+
|
187
|
+
def startTagA(name, attributes)
|
188
|
+
if afeAElement = @tree.elementInActiveFormattingElements('a')
|
189
|
+
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "a", "endName" => "a"})
|
190
|
+
endTagFormatting('a')
|
191
|
+
@tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
|
192
|
+
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
|
193
|
+
end
|
194
|
+
@tree.reconstructActiveFormattingElements
|
195
|
+
addFormattingElement(name, attributes)
|
196
|
+
end
|
197
|
+
|
198
|
+
def startTagFormatting(name, attributes)
|
199
|
+
@tree.reconstructActiveFormattingElements
|
200
|
+
addFormattingElement(name, attributes)
|
201
|
+
end
|
202
|
+
|
203
|
+
def startTagNobr(name, attributes)
|
204
|
+
@tree.reconstructActiveFormattingElements
|
205
|
+
if in_scope?('nobr')
|
206
|
+
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "nobr", "endName" => "nobr"})
|
207
|
+
processEndTag('nobr')
|
208
|
+
# XXX Need tests that trigger the following
|
209
|
+
@tree.reconstructActiveFormattingElements
|
210
|
+
end
|
211
|
+
addFormattingElement(name, attributes)
|
212
|
+
end
|
213
|
+
|
214
|
+
def startTagButton(name, attributes)
|
215
|
+
if in_scope?('button')
|
216
|
+
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "button", "endName" => "button"})
|
217
|
+
processEndTag('button')
|
218
|
+
@parser.phase.processStartTag(name, attributes)
|
219
|
+
else
|
220
|
+
@tree.reconstructActiveFormattingElements
|
221
|
+
@tree.insert_element(name, attributes)
|
222
|
+
@tree.activeFormattingElements.push(Marker)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def startTagMarqueeObject(name, attributes)
|
227
|
+
@tree.reconstructActiveFormattingElements
|
228
|
+
@tree.insert_element(name, attributes)
|
229
|
+
@tree.activeFormattingElements.push(Marker)
|
230
|
+
end
|
231
|
+
|
232
|
+
def startTagXmp(name, attributes)
|
233
|
+
@tree.reconstructActiveFormattingElements
|
234
|
+
@tree.insert_element(name, attributes)
|
235
|
+
@parser.tokenizer.content_model_flag = :CDATA
|
236
|
+
end
|
237
|
+
|
238
|
+
def startTagTable(name, attributes)
|
239
|
+
processEndTag('p') if in_scope?('p')
|
240
|
+
@tree.insert_element(name, attributes)
|
241
|
+
@parser.phase = @parser.phases[:inTable]
|
242
|
+
end
|
243
|
+
|
244
|
+
def startTagVoidFormatting(name, attributes)
|
245
|
+
@tree.reconstructActiveFormattingElements
|
246
|
+
@tree.insert_element(name, attributes)
|
247
|
+
@tree.open_elements.pop
|
248
|
+
end
|
249
|
+
|
250
|
+
def startTagHr(name, attributes)
|
251
|
+
endTagP('p') if in_scope?('p')
|
252
|
+
@tree.insert_element(name, attributes)
|
253
|
+
@tree.open_elements.pop
|
254
|
+
end
|
255
|
+
|
256
|
+
def startTagImage(name, attributes)
|
257
|
+
# No really...
|
258
|
+
parse_error("unexpected-start-tag-treated-as", {"originalName" => "image", "newName" => "img"})
|
259
|
+
processStartTag('img', attributes)
|
260
|
+
end
|
261
|
+
|
262
|
+
def startTagInput(name, attributes)
|
263
|
+
@tree.reconstructActiveFormattingElements
|
264
|
+
@tree.insert_element(name, attributes)
|
265
|
+
if @tree.formPointer
|
266
|
+
# XXX Not exactly sure what to do here
|
267
|
+
# @tree.open_elements[-1].form = @tree.formPointer
|
268
|
+
end
|
269
|
+
@tree.open_elements.pop
|
270
|
+
end
|
271
|
+
|
272
|
+
def startTagIsindex(name, attributes)
|
273
|
+
parse_error("deprecated-tag", {"name" => "isindex"})
|
274
|
+
return if @tree.formPointer
|
275
|
+
processStartTag('form', {})
|
276
|
+
processStartTag('hr', {})
|
277
|
+
processStartTag('p', {})
|
278
|
+
processStartTag('label', {})
|
279
|
+
# XXX Localization ...
|
280
|
+
processCharacters('This is a searchable index. Insert your search keywords here: ')
|
281
|
+
attributes['name'] = 'isindex'
|
282
|
+
attrs = attributes.to_a
|
283
|
+
processStartTag('input', attributes)
|
284
|
+
processEndTag('label')
|
285
|
+
processEndTag('p')
|
286
|
+
processStartTag('hr', {})
|
287
|
+
processEndTag('form')
|
288
|
+
end
|
289
|
+
|
290
|
+
def startTagTextarea(name, attributes)
|
291
|
+
# XXX Form element pointer checking here as well...
|
292
|
+
@tree.insert_element(name, attributes)
|
293
|
+
@parser.tokenizer.content_model_flag = :RCDATA
|
294
|
+
@processSpaceCharactersDropNewline = true
|
295
|
+
alias processSpaceCharacters processSpaceCharactersDropNewline
|
296
|
+
end
|
297
|
+
|
298
|
+
# iframe, noembed noframes, noscript(if scripting enabled)
|
299
|
+
def startTagCdata(name, attributes)
|
300
|
+
@tree.insert_element(name, attributes)
|
301
|
+
@parser.tokenizer.content_model_flag = :CDATA
|
302
|
+
end
|
303
|
+
|
304
|
+
def startTagSelect(name, attributes)
|
305
|
+
@tree.reconstructActiveFormattingElements
|
306
|
+
@tree.insert_element(name, attributes)
|
307
|
+
@parser.phase = @parser.phases[:inSelect]
|
308
|
+
end
|
309
|
+
|
310
|
+
def startTagMisplaced(name, attributes)
|
311
|
+
# Elements that should be children of other elements that have a
|
312
|
+
# different insertion mode; here they are ignored
|
313
|
+
# "caption", "col", "colgroup", "frame", "frameset", "head",
|
314
|
+
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
|
315
|
+
# "tr", "noscript"
|
316
|
+
parse_error("unexpected-start-tag-ignored", {"name" => name})
|
317
|
+
end
|
318
|
+
|
319
|
+
def startTagNew(name, attributes)
|
320
|
+
# New HTML5 elements, "event-source", "section", "nav",
|
321
|
+
# "article", "aside", "header", "footer", "datagrid", "command"
|
322
|
+
# $stderr.puts("Warning: Undefined behaviour for start tag #{name}")
|
323
|
+
startTagOther(name, attributes)
|
324
|
+
#raise NotImplementedError
|
325
|
+
end
|
326
|
+
|
327
|
+
def startTagOther(name, attributes)
|
328
|
+
@tree.reconstructActiveFormattingElements
|
329
|
+
@tree.insert_element(name, attributes)
|
330
|
+
end
|
331
|
+
|
332
|
+
def endTagP(name)
|
333
|
+
@tree.generateImpliedEndTags('p') if in_scope?('p')
|
334
|
+
parse_error("unexpected-end-tag", {"name" => "p"}) unless @tree.open_elements.last.name == 'p'
|
335
|
+
if in_scope?('p')
|
336
|
+
@tree.open_elements.pop while in_scope?('p')
|
337
|
+
else
|
338
|
+
startTagCloseP('p', {})
|
339
|
+
endTagP('p')
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
def endTagBody(name)
|
344
|
+
# XXX Need to take open <p> tags into account here. We shouldn't imply
|
345
|
+
# </p> but we should not throw a parse error either. Specification is
|
346
|
+
# likely to be updated.
|
347
|
+
unless @tree.open_elements[1].name == 'body'
|
348
|
+
# inner_html case
|
349
|
+
parse_error
|
350
|
+
return
|
351
|
+
end
|
352
|
+
unless @tree.open_elements.last.name == 'body'
|
353
|
+
parse_error("expected-one-end-tag-but-got-another",
|
354
|
+
{"expectedName" => "body",
|
355
|
+
"gotName" => @tree.open_elements.last.name})
|
356
|
+
end
|
357
|
+
@parser.phase = @parser.phases[:afterBody]
|
358
|
+
end
|
359
|
+
|
360
|
+
def endTagHtml(name)
|
361
|
+
endTagBody(name)
|
362
|
+
@parser.phase.processEndTag(name) unless @parser.inner_html
|
363
|
+
end
|
364
|
+
|
365
|
+
def endTagBlock(name)
|
366
|
+
#Put us back in the right whitespace handling mode
|
367
|
+
@processSpaceCharactersDropNewline = false if name == 'pre'
|
368
|
+
|
369
|
+
@tree.generateImpliedEndTags if in_scope?(name)
|
370
|
+
|
371
|
+
unless @tree.open_elements.last.name == name
|
372
|
+
parse_error("end-tag-too-early", {"name" => name})
|
373
|
+
end
|
374
|
+
|
375
|
+
if in_scope?(name)
|
376
|
+
remove_open_elements_until(name)
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
def endTagForm(name)
|
381
|
+
if in_scope?(name)
|
382
|
+
@tree.generateImpliedEndTags
|
383
|
+
end
|
384
|
+
if @tree.open_elements.last.name != name
|
385
|
+
parse_error("end-tag-too-early-ignored", {"name" => "form"})
|
386
|
+
else
|
387
|
+
@tree.open_elements.pop
|
388
|
+
end
|
389
|
+
@tree.formPointer = nil
|
390
|
+
end
|
391
|
+
|
392
|
+
def endTagListItem(name)
|
393
|
+
# AT Could merge this with the Block case
|
394
|
+
@tree.generateImpliedEndTags(name) if in_scope?(name)
|
395
|
+
|
396
|
+
unless @tree.open_elements.last.name == name
|
397
|
+
parse_error("end-tag-too-early", {"name" => name})
|
398
|
+
end
|
399
|
+
|
400
|
+
remove_open_elements_until(name) if in_scope?(name)
|
401
|
+
end
|
402
|
+
|
403
|
+
def endTagHeading(name)
|
404
|
+
HEADING_ELEMENTS.each do |element|
|
405
|
+
if in_scope?(element)
|
406
|
+
@tree.generateImpliedEndTags
|
407
|
+
break
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
unless @tree.open_elements.last.name == name
|
412
|
+
parse_error("end-tag-too-early", {"name" => name})
|
413
|
+
end
|
414
|
+
|
415
|
+
HEADING_ELEMENTS.each do |element|
|
416
|
+
if in_scope?(element)
|
417
|
+
remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
|
418
|
+
break
|
419
|
+
end
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
# The much-feared adoption agency algorithm
|
424
|
+
def endTagFormatting(name)
|
425
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
|
426
|
+
# XXX Better parse_error messages appreciated.
|
427
|
+
while true
|
428
|
+
# Step 1 paragraph 1
|
429
|
+
afeElement = @tree.elementInActiveFormattingElements(name)
|
430
|
+
if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
|
431
|
+
parse_error("adoption-agency-1.1", {"name" => name})
|
432
|
+
return
|
433
|
+
# Step 1 paragraph 2
|
434
|
+
elsif not @tree.open_elements.include?(afeElement)
|
435
|
+
parse_error("adoption-agency-1.2", {"name" => name})
|
436
|
+
@tree.activeFormattingElements.delete(afeElement)
|
437
|
+
return
|
438
|
+
end
|
439
|
+
|
440
|
+
# Step 1 paragraph 3
|
441
|
+
if afeElement != @tree.open_elements.last
|
442
|
+
parse_error("adoption-agency-1.3", {"name" => name})
|
443
|
+
end
|
444
|
+
|
445
|
+
# Step 2
|
446
|
+
# Start of the adoption agency algorithm proper
|
447
|
+
afeIndex = @tree.open_elements.index(afeElement)
|
448
|
+
furthestBlock = nil
|
449
|
+
@tree.open_elements[afeIndex..-1].each do |element|
|
450
|
+
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
|
451
|
+
furthestBlock = element
|
452
|
+
break
|
453
|
+
end
|
454
|
+
end
|
455
|
+
|
456
|
+
# Step 3
|
457
|
+
if furthestBlock.nil?
|
458
|
+
element = remove_open_elements_until {|element| element == afeElement }
|
459
|
+
@tree.activeFormattingElements.delete(element)
|
460
|
+
return
|
461
|
+
end
|
462
|
+
commonAncestor = @tree.open_elements[afeIndex - 1]
|
463
|
+
|
464
|
+
# Step 5
|
465
|
+
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
|
466
|
+
|
467
|
+
# Step 6
|
468
|
+
# The bookmark is supposed to help us identify where to reinsert
|
469
|
+
# nodes in step 12. We have to ensure that we reinsert nodes after
|
470
|
+
# the node before the active formatting element. Note the bookmark
|
471
|
+
# can move in step 7.4
|
472
|
+
bookmark = @tree.activeFormattingElements.index(afeElement)
|
473
|
+
|
474
|
+
# Step 7
|
475
|
+
lastNode = node = furthestBlock
|
476
|
+
while true
|
477
|
+
# AT replace this with a function and recursion?
|
478
|
+
# Node is element before node in open elements
|
479
|
+
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
|
480
|
+
until @tree.activeFormattingElements.include?(node)
|
481
|
+
tmpNode = node
|
482
|
+
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
|
483
|
+
@tree.open_elements.delete(tmpNode)
|
484
|
+
end
|
485
|
+
# Step 7.3
|
486
|
+
break if node == afeElement
|
487
|
+
# Step 7.4
|
488
|
+
if lastNode == furthestBlock
|
489
|
+
# XXX should this be index(node) or index(node)+1
|
490
|
+
# Anne: I think +1 is ok. Given x = [2,3,4,5]
|
491
|
+
# x.index(3) gives 1 and then x[1 +1] gives 4...
|
492
|
+
bookmark = @tree.activeFormattingElements.index(node) + 1
|
493
|
+
end
|
494
|
+
# Step 7.5
|
495
|
+
cite = node.parent
|
496
|
+
if node.hasContent
|
497
|
+
clone = node.cloneNode
|
498
|
+
# Replace node with clone
|
499
|
+
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
|
500
|
+
@tree.open_elements[@tree.open_elements.index(node)] = clone
|
501
|
+
node = clone
|
502
|
+
end
|
503
|
+
# Step 7.6
|
504
|
+
# Remove lastNode from its parents, if any
|
505
|
+
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
506
|
+
node.appendChild(lastNode)
|
507
|
+
# Step 7.7
|
508
|
+
lastNode = node
|
509
|
+
# End of inner loop
|
510
|
+
end
|
511
|
+
|
512
|
+
# Step 8
|
513
|
+
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
514
|
+
commonAncestor.appendChild(lastNode)
|
515
|
+
|
516
|
+
# Step 9
|
517
|
+
clone = afeElement.cloneNode
|
518
|
+
|
519
|
+
# Step 10
|
520
|
+
furthestBlock.reparentChildren(clone)
|
521
|
+
|
522
|
+
# Step 11
|
523
|
+
furthestBlock.appendChild(clone)
|
524
|
+
|
525
|
+
# Step 12
|
526
|
+
@tree.activeFormattingElements.delete(afeElement)
|
527
|
+
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
|
528
|
+
|
529
|
+
# Step 13
|
530
|
+
@tree.open_elements.delete(afeElement)
|
531
|
+
@tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
def endTagButtonMarqueeObject(name)
|
536
|
+
@tree.generateImpliedEndTags if in_scope?(name)
|
537
|
+
|
538
|
+
unless @tree.open_elements.last.name == name
|
539
|
+
parse_error("end-tag-too-early", {"name" => name})
|
540
|
+
end
|
541
|
+
|
542
|
+
if in_scope?(name)
|
543
|
+
remove_open_elements_until(name)
|
544
|
+
|
545
|
+
@tree.clearActiveFormattingElements
|
546
|
+
end
|
547
|
+
end
|
548
|
+
|
549
|
+
def endTagMisplaced(name)
|
550
|
+
# This handles elements with end tags in other insertion modes.
|
551
|
+
parse_error("unexpected-end-tag", {"name" => name})
|
552
|
+
end
|
553
|
+
|
554
|
+
def endTagBr(name)
|
555
|
+
parse_error("unexpected-end-tag-treated-as",
|
556
|
+
{"originalName" => "br", "newName" => "br element"})
|
557
|
+
@tree.reconstructActiveFormattingElements
|
558
|
+
@tree.insert_element(name, {})
|
559
|
+
@tree.open_elements.pop()
|
560
|
+
end
|
561
|
+
|
562
|
+
def endTagNone(name)
|
563
|
+
# This handles elements with no end tag.
|
564
|
+
parse_error("no-end-tag", {"name" => name})
|
565
|
+
end
|
566
|
+
|
567
|
+
def endTagCdataTextAreaXmp(name)
|
568
|
+
if @tree.open_elements.last.name == name
|
569
|
+
@tree.open_elements.pop
|
570
|
+
else
|
571
|
+
parse_error("unexpected-end-tag", {"name" => name})
|
572
|
+
end
|
573
|
+
end
|
574
|
+
|
575
|
+
def endTagNew(name)
|
576
|
+
# New HTML5 elements, "event-source", "section", "nav",
|
577
|
+
# "article", "aside", "header", "footer", "datagrid", "command"
|
578
|
+
# STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
|
579
|
+
endTagOther(name)
|
580
|
+
#raise NotImplementedError
|
581
|
+
end
|
582
|
+
|
583
|
+
def endTagOther(name)
|
584
|
+
# XXX This logic should be moved into the treebuilder
|
585
|
+
@tree.open_elements.reverse.each do |node|
|
586
|
+
if node.name == name
|
587
|
+
@tree.generateImpliedEndTags
|
588
|
+
|
589
|
+
unless @tree.open_elements.last.name == name
|
590
|
+
parse_error("unexpected-end-tag", {"name" => name})
|
591
|
+
end
|
592
|
+
|
593
|
+
remove_open_elements_until {|element| element == node }
|
594
|
+
|
595
|
+
break
|
596
|
+
else
|
597
|
+
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
|
598
|
+
parse_error("unexpected-end-tag", {"name" => name})
|
599
|
+
break
|
600
|
+
end
|
601
|
+
end
|
602
|
+
end
|
603
|
+
end
|
604
|
+
|
605
|
+
protected
|
606
|
+
|
607
|
+
def addFormattingElement(name, attributes)
|
608
|
+
@tree.insert_element(name, attributes)
|
609
|
+
@tree.activeFormattingElements.push(@tree.open_elements.last)
|
610
|
+
end
|
611
|
+
|
612
|
+
end
|
613
|
+
end
|