gammo 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.travis.yml +6 -0
  4. data/Gemfile +9 -0
  5. data/Gemfile.lock +27 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +177 -0
  8. data/Rakefile +25 -0
  9. data/gammo.gemspec +23 -0
  10. data/lib/gammo.rb +15 -0
  11. data/lib/gammo/attribute.rb +17 -0
  12. data/lib/gammo/fragment_parser.rb +65 -0
  13. data/lib/gammo/node.rb +157 -0
  14. data/lib/gammo/parser.rb +524 -0
  15. data/lib/gammo/parser/constants.rb +94 -0
  16. data/lib/gammo/parser/foreign.rb +307 -0
  17. data/lib/gammo/parser/insertion_mode.rb +74 -0
  18. data/lib/gammo/parser/insertion_mode/after_after_body.rb +36 -0
  19. data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +32 -0
  20. data/lib/gammo/parser/insertion_mode/after_body.rb +46 -0
  21. data/lib/gammo/parser/insertion_mode/after_frameset.rb +39 -0
  22. data/lib/gammo/parser/insertion_mode/after_head.rb +70 -0
  23. data/lib/gammo/parser/insertion_mode/before_head.rb +49 -0
  24. data/lib/gammo/parser/insertion_mode/before_html.rb +45 -0
  25. data/lib/gammo/parser/insertion_mode/in_body.rb +463 -0
  26. data/lib/gammo/parser/insertion_mode/in_caption.rb +47 -0
  27. data/lib/gammo/parser/insertion_mode/in_cell.rb +46 -0
  28. data/lib/gammo/parser/insertion_mode/in_column_group.rb +66 -0
  29. data/lib/gammo/parser/insertion_mode/in_frameset.rb +48 -0
  30. data/lib/gammo/parser/insertion_mode/in_head.rb +98 -0
  31. data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +52 -0
  32. data/lib/gammo/parser/insertion_mode/in_row.rb +53 -0
  33. data/lib/gammo/parser/insertion_mode/in_select.rb +77 -0
  34. data/lib/gammo/parser/insertion_mode/in_select_in_table.rb +46 -0
  35. data/lib/gammo/parser/insertion_mode/in_table.rb +114 -0
  36. data/lib/gammo/parser/insertion_mode/in_table_body.rb +55 -0
  37. data/lib/gammo/parser/insertion_mode/in_template.rb +80 -0
  38. data/lib/gammo/parser/insertion_mode/initial.rb +152 -0
  39. data/lib/gammo/parser/insertion_mode/text.rb +32 -0
  40. data/lib/gammo/parser/insertion_mode_stack.rb +8 -0
  41. data/lib/gammo/parser/node_stack.rb +24 -0
  42. data/lib/gammo/tags.rb +9 -0
  43. data/lib/gammo/tags/table.rb +744 -0
  44. data/lib/gammo/tokenizer.rb +373 -0
  45. data/lib/gammo/tokenizer/debug.rb +34 -0
  46. data/lib/gammo/tokenizer/entity.rb +2240 -0
  47. data/lib/gammo/tokenizer/escape.rb +174 -0
  48. data/lib/gammo/tokenizer/script_scanner.rb +229 -0
  49. data/lib/gammo/tokenizer/tokens.rb +66 -0
  50. data/lib/gammo/version.rb +3 -0
  51. data/misc/html.yaml +384 -0
  52. data/misc/table.erubi +14 -0
  53. metadata +97 -0
@@ -0,0 +1,36 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.22.
4
+ class AfterAfterBody < InsertionMode
5
+ def error_token(_)
6
+ # ignore the token
7
+ halt true
8
+ end
9
+
10
+ def text_token(token)
11
+ halt InBody.new(parser).process if token.data.lstrip.length.zero?
12
+ end
13
+
14
+ def start_tag_token(token)
15
+ case token.tag
16
+ when Tags::Html
17
+ halt InBody.new(parser).process
18
+ end
19
+ end
20
+
21
+ def comment_token(token)
22
+ parser.document.append_child Node::Comment.new(data: token.data)
23
+ halt true
24
+ end
25
+
26
+ def doctype_token(token)
27
+ halt InBody.new(parser).process
28
+ end
29
+
30
+ def default(_)
31
+ parser.insertion_mode = InBody
32
+ halt false
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,32 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.23.
4
+ class AfterAfterFrameset < InsertionMode
5
+ def comment_token(token)
6
+ parser.document.append_child Node::Comment.new(data: token.data)
7
+ end
8
+
9
+ def text_token(token)
10
+ halt InBody.new(parser).process unless token.data.gsub(/[^\s]/, '').empty?
11
+ end
12
+
13
+ def start_tag_token(token)
14
+ case token.tag
15
+ when Tags::Html
16
+ halt InBody.new(parser).process
17
+ when Tags::Noframes
18
+ halt InHead.new(parser).process
19
+ end
20
+ end
21
+
22
+ def doctype_token(token)
23
+ halt InBody.new(parser).process
24
+ end
25
+
26
+ def default(_)
27
+ # ignore the token
28
+ halt true
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,46 @@
1
+ require 'gammo/node'
2
+ module Gammo
3
+ class Parser
4
+ # Section 12.2.6.4.19.
5
+ class AfterBody < InsertionMode
6
+ def error_token(_)
7
+ # ignore the token
8
+ true
9
+ end
10
+
11
+ def text_token(token)
12
+ s = token.data.lstrip
13
+ halt InBody.new(parser).process if s.length.zero?
14
+ end
15
+
16
+ def start_tag_token(token)
17
+ case token.tag
18
+ when Tags::Html
19
+ halt InBody.new(parser).process
20
+ end
21
+ end
22
+
23
+ def end_tag_token(token)
24
+ case token.tag
25
+ when Tags::Html
26
+ parser.insertion_mode = AfterAfterBody unless parser.fragment?
27
+ halt true
28
+ end
29
+ end
30
+
31
+ def comment_token(token)
32
+ open_elements = parser.open_elements
33
+ if open_elements.length < 1 || open_elements.first.tag != Tags::Html
34
+ raise ParseError, 'bad parser state: <html> element not found, in the after-body insertion mode'
35
+ end
36
+ open_elements.first.append_child Node::Comment.new(data: token.data)
37
+ halt true
38
+ end
39
+
40
+ def default(_)
41
+ parser.insertion_mode = InBody
42
+ halt false
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,39 @@
1
+ require 'gammo/parser/insertion_mode/after_after_frameset'
2
+
3
+ module Gammo
4
+ class Parser
5
+ # Section 12.2.6.4.21.
6
+ class AfterFrameset < InsertionMode
7
+ def comment_token(token)
8
+ parser.add_child Node::Comment.new(data: token.data)
9
+ end
10
+
11
+ def text_token(token)
12
+ s = token.data.gsub(/[^\s]/, '')
13
+ parser.add_text(s) unless s.empty?
14
+ end
15
+
16
+ def start_tag_token(token)
17
+ case token.tag
18
+ when Tags::Html
19
+ halt InBody.new(parser).process
20
+ when Tags::Noframes
21
+ halt InHead.new(parser).process
22
+ end
23
+ end
24
+
25
+ def end_tag_token(token)
26
+ case token.tag
27
+ when Tags::Html
28
+ parser.insertion_mode = AfterAfterFrameset
29
+ halt true
30
+ end
31
+ end
32
+
33
+ def default(_)
34
+ # ignore the token
35
+ halt true
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,70 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.5.
4
+ class AfterHead < InsertionMode
5
+ def text_token(token)
6
+ s = token.data.lstrip
7
+ if s.length < token.data.length
8
+ # add the initial whitespace to the current node.
9
+ parser.add_text token.data.slice(0, token.data.length - s.length)
10
+ halt true if s == ''
11
+ token.data = s
12
+ end
13
+ end
14
+
15
+ def start_tag_token(token)
16
+ case token.tag
17
+ when Tags::Html then halt InBody.new(parser).process
18
+ when Tags::Body
19
+ parser.add_element
20
+ parser.frameset_ok = false
21
+ parser.insertion_mode = InBody
22
+ halt true
23
+ when Tags::Frameset
24
+ parser.add_element
25
+ parser.insertion_mode = InFrameset
26
+ halt true
27
+ when Tags::Base, Tags::Basefont, Tags::Bgsound, Tags::Link, Tags::Meta,
28
+ Tags::Noframes, Tags::Script, Tags::Style, Tags::Template, Tags::Title
29
+ parser.open_elements << parser.head
30
+ begin
31
+ halt InHead.new(parser).process
32
+ ensure
33
+ parser.open_elements.delete(parser.head)
34
+ end
35
+ when Tags::Head
36
+ # ignore the token
37
+ halt true
38
+ end
39
+ end
40
+
41
+ def end_tag_token(token)
42
+ case token.tag
43
+ when Tags::Body, Tags::Html, Tags::Br
44
+ # drop down to creating an implied <body> tag.
45
+ when Tags::Template
46
+ halt InHead.new(parser).process
47
+ else
48
+ # ignore the token.
49
+ halt true
50
+ end
51
+ end
52
+
53
+ def comment_token(token)
54
+ parser.add_child Node::Comment.new(data: token.data)
55
+ halt true
56
+ end
57
+
58
+ def doctype_token(token)
59
+ # ignore the token.
60
+ halt true
61
+ end
62
+
63
+ def default(_)
64
+ parser.parse_implied_token(Tokenizer::StartTagToken, Tags::Body, Tags::Body.to_s)
65
+ parser.frameset_ok = true
66
+ halt false
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,49 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.3
4
+ class BeforeHead < InsertionMode
5
+ def text_token(token)
6
+ token.data = token.data.lstrip
7
+ halt true if token.data.length.zero?
8
+ end
9
+
10
+ def start_tag_token(token)
11
+ case token.tag
12
+ when Tags::Head
13
+ parser.add_element
14
+ parser.head = parser.top
15
+ parser.insertion_mode = InHead
16
+ halt true
17
+ when Tags::Html
18
+ halt InBody.new(parser).process
19
+ end
20
+ end
21
+
22
+ def end_tag_token(token)
23
+ case token.tag
24
+ when Tags::Head, Tags::Body, Tags::Html, Tags::Br
25
+ parser.parse_implied_token Tokenizer::StartTagToken, Tags::Head, Tags::Head.to_s
26
+ halt false
27
+ else
28
+ # ignore the token.
29
+ halt true
30
+ end
31
+ end
32
+
33
+ def comment_token(token)
34
+ parser.add_child(Node::Comment.new(data: token.data))
35
+ halt true
36
+ end
37
+
38
+ def doctype_token(token)
39
+ # ignore the token.
40
+ halt true
41
+ end
42
+
43
+ def default(_)
44
+ parser.parse_implied_token Tokenizer::StartTagToken, Tags::Head, Tags::Head.to_s
45
+ halt false
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,45 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.2
4
+ class BeforeHTML < InsertionMode
5
+ # Ignores the token.
6
+ def doctype_token(_)
7
+ halt true
8
+ end
9
+
10
+ def text_token(token)
11
+ token.data = token.data.lstrip
12
+ # it's all whitespace so ignore it.
13
+ halt true if token.data.length.zero?
14
+ end
15
+
16
+ def start_tag_token(token)
17
+ return unless token.tag == Tags::Html
18
+ parser.add_element
19
+ parser.insertion_mode = BeforeHead
20
+ halt true
21
+ end
22
+
23
+ def end_tag_token(token)
24
+ case token.tag
25
+ when Tags::Head, Tags::Body, Tags::Html, Tags::Br
26
+ parser.parse_implied_token Tokenizer::StartTagToken, Tags::Html, Tags::Html.to_s
27
+ halt false
28
+ else
29
+ # ignore the token.
30
+ halt true
31
+ end
32
+ end
33
+
34
+ def comment_token(token)
35
+ parser.document.append_child Node::Comment.new(data: token.data)
36
+ halt true
37
+ end
38
+
39
+ def default(_)
40
+ parser.parse_implied_token Tokenizer::StartTagToken, Tags::Html, Tags::Html.to_s
41
+ halt false
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,463 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.6.
4
+ class InBody < InsertionMode
5
+ def text_token(token)
6
+ data = token.data
7
+ node = parser.open_elements.last
8
+ case node.tag
9
+ when Tags::Pre, Tags::Listing
10
+ unless node.first_child
11
+ # ignore a newline at the start of the <pre> block.
12
+ data = data.slice(1..-1) if !data.empty? && data[0] == ?\r
13
+ data = data.slice(1..-1) if !data.empty? && data[0] == ?\n
14
+ end
15
+ end
16
+ data = data.gsub("\x00", '')
17
+ halt true if data.empty?
18
+ parser.reconstruct_active_formatting_elements
19
+ parser.frameset_ok = false if parser.frameset_ok && !data.lstrip.empty?
20
+ parser.add_text(data)
21
+ end
22
+
23
+ def start_tag_token(token)
24
+ case token.tag
25
+ when Tags::Html
26
+ halt true if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
27
+ copy_attributes(parser.open_elements[0], token)
28
+ when Tags::Base, Tags::Basefont, Tags::Bgsound, Tags::Link, Tags::Meta,
29
+ Tags::Noframes, Tags::Script, Tags::Style, Tags::Template, Tags::Title
30
+ halt InHead.new(parser).process
31
+ when Tags::Body
32
+ halt true if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
33
+ if parser.open_elements.length >= 2
34
+ body = parser.open_elements[1]
35
+ if body.instance_of?(Node::Element) && body.tag == Tags::Body
36
+ parser.frameset_ok = false
37
+ copy_attributes(body, parser.token)
38
+ end
39
+ end
40
+ when Tags::Frameset
41
+ if !parser.frameset_ok || parser.open_elements.length < 2 || parser.open_elements[1].tag != Tags::Body
42
+ # ignore the token
43
+ halt true
44
+ end
45
+ body = parser.open_elements[1]
46
+ body.parent.remove_child(body) if body.parent
47
+ parser.open_elements = parser.open_elements.slice(0, 1)
48
+ parser.add_element
49
+ parser.insertion_mode = InFrameset
50
+ halt true
51
+ when Tags::Address, Tags::Article, Tags::Aside, Tags::Blockquote,
52
+ Tags::Center, Tags::Dialog, Tags::Details, Tags::Dir, Tags::Div,
53
+ Tags::Dl, Tags::Fieldset, Tags::Figcaption, Tags::Figure,
54
+ Tags::Footer, Tags::Header, Tags::Hgroup, Tags::Main, Tags::Menu,
55
+ Tags::Nav, Tags::Ol, Tags::P, Tags::Section, Tags::Summary, Tags::Ul
56
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
57
+ parser.add_element
58
+ when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
59
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
60
+ node = parser.top
61
+ case node.tag
62
+ when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
63
+ parser.open_elements.pop
64
+ end
65
+ parser.add_element
66
+ when Tags::Pre, Tags::Listing
67
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
68
+ parser.add_element
69
+ parser.frameset_ok = false
70
+ when Tags::Form
71
+ # ignore the token.
72
+ halt true if parser.form && !parser.open_elements.any? { |oe| oe.tag == Tags::Template }
73
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
74
+ parser.add_element
75
+ parser.form = parser.top unless parser.open_elements.any? { |oe| oe.tag == Tags::Template }
76
+ when Tags::Li
77
+ parser.frameset_ok = false
78
+ parser.open_elements.reverse_each_with_index do |open_element, index|
79
+ case open_element.tag
80
+ when Tags::Li then parser.open_elements = parser.open_elements.slice(0, index)
81
+ when Tags::Address, Tags::Div, Tags::P then next
82
+ else
83
+ next unless parser.special_element?(open_element)
84
+ end
85
+ break
86
+ end
87
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
88
+ parser.add_element
89
+ when Tags::Dd, Tags::Dt
90
+ parser.frameset_ok = false
91
+ parser.open_elements.reverse_each_with_index do |open_element, index|
92
+ case open_element.tag
93
+ when Tags::Dd, Tags::Dt then parser.open_elements = parser.open_elements.slice(0, index)
94
+ when Tags::Address, Tags::Div, Tags::P then next
95
+ else
96
+ next unless parser.special_element?(open_element)
97
+ end
98
+ break
99
+ end
100
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
101
+ parser.add_element
102
+ when Tags::Plaintext
103
+ parser.pop_until BUTTON_SCOPE, Tags::P
104
+ parser.add_element
105
+ when Tags::Button
106
+ parser.pop_until DEFAULT_SCOPE, Tags::Button
107
+ parser.reconstruct_active_formatting_elements
108
+ parser.add_element
109
+ parser.frameset_ok = false
110
+ when Tags::A
111
+ parser.active_formatting_elements.reverse_each do |afe|
112
+ break if afe.is_a?(Node::ScopeMarker)
113
+ next unless afe.instance_of?(Node::Element) && afe.tag == Tags::A
114
+ adoption_agency_for_end_tag_formatting(Tags::A, "a")
115
+ parser.open_elements.delete(afe)
116
+ parser.active_formatting_elements.delete(afe)
117
+ break
118
+ end
119
+ parser.reconstruct_active_formatting_elements
120
+ parser.add_formatting_element
121
+ when Tags::B, Tags::Big, Tags::Code, Tags::Em, Tags::Font, Tags::I,
122
+ Tags::S, Tags::Small, Tags::Strike, Tags::Strong, Tags::Tt, Tags::U
123
+ parser.reconstruct_active_formatting_elements
124
+ parser.add_formatting_element
125
+ when Tags::Nobr
126
+ parser.reconstruct_active_formatting_elements
127
+ if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Nobr)
128
+ adoption_agency_for_end_tag_formatting(Tags::Nobr, "nobr")
129
+ parser.reconstruct_active_formatting_elements
130
+ end
131
+ parser.add_formatting_element
132
+ when Tags::Applet, Tags::Marquee, Tags::Object
133
+ parser.reconstruct_active_formatting_elements
134
+ parser.add_element
135
+ parser.active_formatting_elements << Node::DEFAULT_SCOPE_MARKER
136
+ parser.frameset_ok = false
137
+ when Tags::Table
138
+ parser.pop_until(BUTTON_SCOPE, Tags::P) unless parser.quirks
139
+ parser.add_element
140
+ parser.frameset_ok = false
141
+ parser.insertion_mode = InTable
142
+ halt true
143
+ when Tags::Area, Tags::Br, Tags::Embed, Tags::Img, Tags::Input, Tags::Keygen, Tags::Wbr
144
+ parser.reconstruct_active_formatting_elements
145
+ parser.add_element
146
+ parser.open_elements.pop
147
+ parser.acknowledge_self_closing_tag
148
+ token.attributes.each do |attr|
149
+ # skip setting frameset_ok = false
150
+ halt true if attr.key == 'type' && attr.value.downcase == 'hidden'
151
+ end if token.tag == Tags::Input
152
+ parser.frameset_ok = false
153
+ when Tags::Param, Tags::Source, Tags::Track
154
+ parser.add_element
155
+ parser.open_elements.pop
156
+ parser.acknowledge_self_closing_tag
157
+ when Tags::Hr
158
+ parser.pop_until BUTTON_SCOPE, Tags::P
159
+ parser.add_element
160
+ parser.open_elements.pop
161
+ parser.acknowledge_self_closing_tag
162
+ parser.frameset_ok = false
163
+ when Tags::Image
164
+ token.tag = Tags::Img
165
+ # todo: fixme <img>
166
+ token.data = Tags::Img.to_s
167
+ halt false
168
+ when Tags::Textarea
169
+ parser.add_element
170
+ parser.set_original_insertion_mode
171
+ parser.frameset_ok = false
172
+ parser.insertion_mode = Text
173
+ when Tags::Xmp
174
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
175
+ parser.reconstruct_active_formatting_elements
176
+ parser.frameset_ok = false
177
+ parser.add_element
178
+ parser.set_original_insertion_mode
179
+ parser.insertion_mode = Text
180
+ when Tags::Iframe
181
+ parser.frameset_ok = false
182
+ parser.parse_generic_raw_text_element
183
+ when Tags::Noembed
184
+ parser.parse_generic_raw_text_element
185
+ when Tags::Noscript
186
+ if parser.scripting?
187
+ parser.parse_generic_raw_text_element
188
+ halt true
189
+ end
190
+ parser.reconstruct_active_formatting_elements
191
+ parser.add_element
192
+ parser.tokenizer.next_is_not_raw_text!
193
+ when Tags::Select
194
+ parser.reconstruct_active_formatting_elements
195
+ parser.add_element
196
+ parser.frameset_ok = false
197
+ parser.insertion_mode = InSelect
198
+ halt true
199
+ when Tags::Optgroup, Tags::Option
200
+ parser.open_elements.pop if parser.top.tag == Tags::Option
201
+ parser.reconstruct_active_formatting_elements
202
+ parser.add_element
203
+ when Tags::Rb, Tags::Rtc
204
+ parser.generate_implied_end_tags if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Ruby)
205
+ parser.add_element
206
+ when Tags::Rp, Tags::Rt
207
+ parser.generate_implied_end_tags('rtc') if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Ruby)
208
+ parser.add_element
209
+ when Tags::Math, Tags::Svg
210
+ parser.reconstruct_active_formatting_elements
211
+ parser.adjust_attribute_names(token.attributes, token.tag == Tags::Math ? Parser::MATH_ML_ATTRIBUTE_ADJUSTMENTS : Parser::SVG_ATTRIBUTE_ADJUSTMENTS)
212
+ parser.adjust_foreign_attributes(token.attributes)
213
+ parser.add_element
214
+ parser.top.namespace = token.data
215
+ if parser.has_self_closing_token
216
+ parser.open_elements.pop
217
+ parser.acknowledge_self_closing_tag
218
+ end
219
+ halt true
220
+ when Tags::Caption, Tags::Col, Tags::Colgroup, Tags::Frame, Tags::Head, Tags::Tbody, Tags::Td, Tags::Tfoot, Tags::Th, Tags::Thead, Tags::Tr
221
+ # ignore the token.
222
+ else
223
+ parser.reconstruct_active_formatting_elements
224
+ parser.add_element
225
+ end
226
+ end
227
+
228
+ def end_tag_token(token)
229
+ case token.tag
230
+ when Tags::Body
231
+ parser.insertion_mode = AfterBody if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Body)
232
+ when Tags::Html
233
+ halt true unless parser.element_in_scope?(DEFAULT_SCOPE, Tags::Body)
234
+ parser.parse_implied_token(Tokenizer::EndTagToken, Tags::Body, Tags::Body.to_s)
235
+ halt false
236
+ when Tags::Address, Tags::Article, Tags::Aside, Tags::Blockquote,
237
+ Tags::Button, Tags::Center, Tags::Dialog, Tags::Details, Tags::Dir,
238
+ Tags::Div, Tags::Dl, Tags::Fieldset, Tags::Figcaption, Tags::Figure,
239
+ Tags::Footer, Tags::Header, Tags::Hgroup, Tags::Listing, Tags::Main,
240
+ Tags::Menu, Tags::Nav, Tags::Ol, Tags::Pre, Tags::Section,
241
+ Tags::Summary, Tags::Ul
242
+ parser.pop_until(DEFAULT_SCOPE, token.tag)
243
+ when Tags::Form
244
+ if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
245
+ index = parser.index_of_element_in_scope(DEFAULT_SCOPE, Tags::Form)
246
+ # ignore the token.
247
+ halt true if index == -1
248
+ parser.generate_implied_end_tags
249
+ # ignore the token.
250
+ halt true if parser.open_elements[index].tag != Tags::Form
251
+ parser.pop_until(DEFAULT_SCOPE, Tags::Form)
252
+ else
253
+ node = parser.form
254
+ parser.form = nil
255
+ index = parser.index_of_element_in_scope(DEFAULT_SCOPE, Tags::Form)
256
+ # ignore the token.
257
+ halt true if node == nil || index == -1 || parser.open_elements[index] != node
258
+ parser.generate_implied_end_tags
259
+ parser.open_elements.delete(node)
260
+ end
261
+ when Tags::P
262
+ parser.parse_implied_token(Tokenizer::StartTagToken, Tags::P, Tags::P.to_s) unless parser.element_in_scope?(BUTTON_SCOPE, Tags::P)
263
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
264
+ when Tags::Li
265
+ parser.pop_until(LIST_ITEM_SCOPE, Tags::Li)
266
+ when Tags::Dd, Tags::Dt
267
+ parser.pop_until(DEFAULT_SCOPE, token.tag)
268
+ when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
269
+ parser.pop_until(DEFAULT_SCOPE, Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6)
270
+ when Tags::A, Tags::B, Tags::Big, Tags::Code, Tags::Em, Tags::Font,
271
+ Tags::I, Tags::Nobr, Tags::S, Tags::Small, Tags::Strike,
272
+ Tags::Strong, Tags::Tt, Tags::U
273
+ adoption_agency_for_end_tag_formatting(token.tag, token.data)
274
+ when Tags::Applet, Tags::Marquee, Tags::Object
275
+ parser.clear_active_formatting_elements if parser.pop_until(DEFAULT_SCOPE, token.tag)
276
+ when Tags::Br
277
+ # FIXME
278
+ parser.token = Tokenizer::StartTagToken.new(token.data, tag: token.tag)
279
+ halt false
280
+ when Tags::Template
281
+ halt InHead.new(parser).process
282
+ else
283
+ adoption_agency_for_end_tag_formatting(token.tag, token.data)
284
+ end
285
+ end
286
+
287
+ def comment_token(token)
288
+ parser.add_child Node::Comment.new(data: token.data)
289
+ end
290
+
291
+ def error_token(token)
292
+ if parser.template_stack.length > 0
293
+ parser.insertion_mode = InTemplate
294
+ halt false
295
+ else
296
+ parser.open_elements.any? do |oe|
297
+ case oe.tag
298
+ when Tags::Dd, Tags::Dt, Tags::Li, Tags::Optgroup, Tags::Option, Tags::P,
299
+ Tags::Rb, Tags::Rp, Tags::Rt, Tags::Rtc, Tags::Tbody, Tags::Td, Tags::Tfoot,
300
+ Tags::Th, Tags::Thead, Tags::Tr, Tags::Body, Tags::Html
301
+ else
302
+ halt true
303
+ end
304
+ end
305
+ halt true
306
+ end
307
+ end
308
+
309
+ def default(_)
310
+ halt true
311
+ end
312
+
313
+ # Implements "adoption agency" algorithm.
314
+ # https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
315
+ # @!visibility private
316
+ def adoption_agency_for_end_tag_formatting(tag, tagname)
317
+ # Step 1-2.
318
+ current = parser.open_elements.last
319
+ if current.data == tagname && parser.active_formatting_elements.index(current) == -1
320
+ parser.open_elements.pop
321
+ return
322
+ end
323
+
324
+ # Step 3-5. The outer loop
325
+ 8.times do |n|
326
+ # Step 6: Find the formatting element.
327
+ formatting_element = nil
328
+ parser.active_formatting_elements.reverse_each do |afe|
329
+ break if afe.instance_of? Node::ScopeMarker
330
+ if afe.tag == tag
331
+ formatting_element = afe
332
+ break
333
+ end
334
+ end
335
+ unless formatting_element
336
+ adoption_agency_for_end_tag_other(tag, tagname)
337
+ return
338
+ end
339
+ # Step 7. Ignore the tag if formatting element is not in the stack of
340
+ # open elements.
341
+ index = parser.open_elements.index(formatting_element)
342
+ unless index
343
+ parser.active_formatting_elements.delete(formatting_element)
344
+ return
345
+ end
346
+ # Step 8. Ignore the tag if formatting element is not in the scope.
347
+ return unless parser.element_in_scope?(DEFAULT_SCOPE, tag)
348
+
349
+ # Step 9. This step is omitted because it's just a parse error but no
350
+ # need to return.
351
+
352
+ # Step 10-11. Find the furthest block.
353
+ furthest_block = parser.open_elements.slice(index..-1).find(&parser.method(:special_element?))
354
+ unless furthest_block
355
+ element = parser.open_elements.pop
356
+ element = parser.open_elements.pop while element != formatting_element
357
+ parser.active_formatting_elements.delete(element)
358
+ return
359
+ end
360
+
361
+ # Step 12-13. Find the common ancestor and bookmark node.
362
+ common_ancestor = parser.open_elements[index - 1]
363
+ bookmark = parser.active_formatting_elements.index(formatting_element)
364
+
365
+ # Step 14. The inner loop. find the last node to reparent.
366
+ last_node = furthest_block
367
+ node = furthest_block
368
+ x = parser.open_elements.index(node)
369
+ # Step 14.1.
370
+ j = 0
371
+ loop do
372
+ # Step 14.2.
373
+ j += 1
374
+ # Step 14.3.
375
+ x -= 1
376
+ node = parser.open_elements[x]
377
+ # Step 14.4.
378
+ break if node == formatting_element
379
+
380
+ # Step 14.5. Remove node from the list of active formatting elements if
381
+ # inner loop counter is greater than three and node is in the list of
382
+ # active formatting elements.
383
+ ni = parser.active_formatting_elements.index(node)
384
+ if ni && j > 3
385
+ parser.active_formatting_elements.delete(node)
386
+ # If any element of the list of active formatting elements is removed,
387
+ # we need to take care whether bookmark should be decremented or not.
388
+ # This is because the value of bookmark may exceed the size of the
389
+ # list by removing elements from the list.
390
+ bookmark -= 1 if ni <= bookmark
391
+ next
392
+ end
393
+ # Step 14.6. Continue the next inner loop if node is not in the list of
394
+ # active formatting elements.
395
+ unless parser.active_formatting_elements.include?(node)
396
+ parser.open_elements.delete(node)
397
+ next
398
+ end
399
+ # Step 14.7
400
+ clone = node.clone
401
+ afei = parser.active_formatting_elements.index(node)
402
+ oei = parser.open_elements.index(node)
403
+ raise ParseError, 'bad parser state: expected elements are not found' if !(afei && oei)
404
+ parser.active_formatting_elements[afei] = clone
405
+ parser.open_elements[oei] = clone
406
+ node = clone
407
+ # Step 14.8
408
+ bookmark = (parser.active_formatting_elements.index(node) + 1) || 0 if last_node == furthest_block
409
+ # Step 14.9
410
+ last_node.parent.remove_child(last_node) if last_node.parent
411
+ node.append_child(last_node)
412
+ # Step 14.10
413
+ last_node = node
414
+ end
415
+ # Step 15. Reparent last_node to the common ancestor,
416
+ # or for misnested table nodes, to the foster parent.
417
+ last_node.parent.remove_child(last_node) if last_node.parent
418
+ case common_ancestor.tag
419
+ when Tags::Table, Tags::Tbody, Tags::Tfoot, Tags::Thead, Tags::Tr
420
+ parser.foster_parent(last_node)
421
+ else
422
+ common_ancestor.append_child(last_node)
423
+ end
424
+
425
+ # Steps 16-18. Reparent nodes from the furthest block's children
426
+ # to a clone of the formatting element.
427
+ clone = formatting_element.clone
428
+ reparent_children(clone, furthest_block)
429
+ furthest_block.append_child(clone)
430
+
431
+ # Step 19. Fix up the list of active formatting elements.
432
+ old_loc = parser.active_formatting_elements.index(formatting_element)
433
+ bookmark -= 1 if old_loc && old_loc < bookmark
434
+ parser.active_formatting_elements.delete(formatting_element)
435
+ parser.active_formatting_elements.insert(bookmark, clone)
436
+
437
+ # Step 20. Fix up the stack of open elements.
438
+ parser.open_elements.delete(formatting_element)
439
+ parser.open_elements.insert(parser.open_elements.index(furthest_block) + 1, clone)
440
+ end
441
+ end
442
+
443
+ # @!visibility private
444
+ def adoption_agency_for_end_tag_other(tag, tagname)
445
+ parser.open_elements.reverse_each_with_index do |open_element, index|
446
+ if open_element.tag == tag && open_element.data == tagname
447
+ parser.open_elements = parser.open_elements.slice(0, index)
448
+ break
449
+ end
450
+ break if parser.special_element?(open_element)
451
+ end
452
+ end
453
+
454
+ # @!visibility private
455
+ def reparent_children(dst, src)
456
+ while child = src.first_child
457
+ src.remove_child(child)
458
+ dst.append_child(child)
459
+ end
460
+ end
461
+ end
462
+ end
463
+ end