gammo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.travis.yml +6 -0
  4. data/Gemfile +9 -0
  5. data/Gemfile.lock +27 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +177 -0
  8. data/Rakefile +25 -0
  9. data/gammo.gemspec +23 -0
  10. data/lib/gammo.rb +15 -0
  11. data/lib/gammo/attribute.rb +17 -0
  12. data/lib/gammo/fragment_parser.rb +65 -0
  13. data/lib/gammo/node.rb +157 -0
  14. data/lib/gammo/parser.rb +524 -0
  15. data/lib/gammo/parser/constants.rb +94 -0
  16. data/lib/gammo/parser/foreign.rb +307 -0
  17. data/lib/gammo/parser/insertion_mode.rb +74 -0
  18. data/lib/gammo/parser/insertion_mode/after_after_body.rb +36 -0
  19. data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +32 -0
  20. data/lib/gammo/parser/insertion_mode/after_body.rb +46 -0
  21. data/lib/gammo/parser/insertion_mode/after_frameset.rb +39 -0
  22. data/lib/gammo/parser/insertion_mode/after_head.rb +70 -0
  23. data/lib/gammo/parser/insertion_mode/before_head.rb +49 -0
  24. data/lib/gammo/parser/insertion_mode/before_html.rb +45 -0
  25. data/lib/gammo/parser/insertion_mode/in_body.rb +463 -0
  26. data/lib/gammo/parser/insertion_mode/in_caption.rb +47 -0
  27. data/lib/gammo/parser/insertion_mode/in_cell.rb +46 -0
  28. data/lib/gammo/parser/insertion_mode/in_column_group.rb +66 -0
  29. data/lib/gammo/parser/insertion_mode/in_frameset.rb +48 -0
  30. data/lib/gammo/parser/insertion_mode/in_head.rb +98 -0
  31. data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +52 -0
  32. data/lib/gammo/parser/insertion_mode/in_row.rb +53 -0
  33. data/lib/gammo/parser/insertion_mode/in_select.rb +77 -0
  34. data/lib/gammo/parser/insertion_mode/in_select_in_table.rb +46 -0
  35. data/lib/gammo/parser/insertion_mode/in_table.rb +114 -0
  36. data/lib/gammo/parser/insertion_mode/in_table_body.rb +55 -0
  37. data/lib/gammo/parser/insertion_mode/in_template.rb +80 -0
  38. data/lib/gammo/parser/insertion_mode/initial.rb +152 -0
  39. data/lib/gammo/parser/insertion_mode/text.rb +32 -0
  40. data/lib/gammo/parser/insertion_mode_stack.rb +8 -0
  41. data/lib/gammo/parser/node_stack.rb +24 -0
  42. data/lib/gammo/tags.rb +9 -0
  43. data/lib/gammo/tags/table.rb +744 -0
  44. data/lib/gammo/tokenizer.rb +373 -0
  45. data/lib/gammo/tokenizer/debug.rb +34 -0
  46. data/lib/gammo/tokenizer/entity.rb +2240 -0
  47. data/lib/gammo/tokenizer/escape.rb +174 -0
  48. data/lib/gammo/tokenizer/script_scanner.rb +229 -0
  49. data/lib/gammo/tokenizer/tokens.rb +66 -0
  50. data/lib/gammo/version.rb +3 -0
  51. data/misc/html.yaml +384 -0
  52. data/misc/table.erubi +14 -0
  53. metadata +97 -0
@@ -0,0 +1,36 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.22.
4
+ class AfterAfterBody < InsertionMode
5
+ def error_token(_)
6
+ # ignore the token
7
+ halt true
8
+ end
9
+
10
+ def text_token(token)
11
+ halt InBody.new(parser).process if token.data.lstrip.length.zero?
12
+ end
13
+
14
+ def start_tag_token(token)
15
+ case token.tag
16
+ when Tags::Html
17
+ halt InBody.new(parser).process
18
+ end
19
+ end
20
+
21
+ def comment_token(token)
22
+ parser.document.append_child Node::Comment.new(data: token.data)
23
+ halt true
24
+ end
25
+
26
+ def doctype_token(token)
27
+ halt InBody.new(parser).process
28
+ end
29
+
30
+ def default(_)
31
+ parser.insertion_mode = InBody
32
+ halt false
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,32 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.23.
4
+ class AfterAfterFrameset < InsertionMode
5
+ def comment_token(token)
6
+ parser.document.append_child Node::Comment.new(data: token.data)
7
+ end
8
+
9
+ def text_token(token)
10
+ halt InBody.new(parser).process unless token.data.gsub(/[^\s]/, '').empty?
11
+ end
12
+
13
+ def start_tag_token(token)
14
+ case token.tag
15
+ when Tags::Html
16
+ halt InBody.new(parser).process
17
+ when Tags::Noframes
18
+ halt InHead.new(parser).process
19
+ end
20
+ end
21
+
22
+ def doctype_token(token)
23
+ halt InBody.new(parser).process
24
+ end
25
+
26
+ def default(_)
27
+ # ignore the token
28
+ halt true
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,46 @@
1
+ require 'gammo/node'
2
+ module Gammo
3
+ class Parser
4
+ # Section 12.2.6.4.19.
5
+ class AfterBody < InsertionMode
6
+ def error_token(_)
7
+ # ignore the token
8
+ true
9
+ end
10
+
11
+ def text_token(token)
12
+ s = token.data.lstrip
13
+ halt InBody.new(parser).process if s.length.zero?
14
+ end
15
+
16
+ def start_tag_token(token)
17
+ case token.tag
18
+ when Tags::Html
19
+ halt InBody.new(parser).process
20
+ end
21
+ end
22
+
23
+ def end_tag_token(token)
24
+ case token.tag
25
+ when Tags::Html
26
+ parser.insertion_mode = AfterAfterBody unless parser.fragment?
27
+ halt true
28
+ end
29
+ end
30
+
31
+ def comment_token(token)
32
+ open_elements = parser.open_elements
33
+ if open_elements.length < 1 || open_elements.first.tag != Tags::Html
34
+ raise ParseError, 'bad parser state: <html> element not found, in the after-body insertion mode'
35
+ end
36
+ open_elements.first.append_child Node::Comment.new(data: token.data)
37
+ halt true
38
+ end
39
+
40
+ def default(_)
41
+ parser.insertion_mode = InBody
42
+ halt false
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,39 @@
1
+ require 'gammo/parser/insertion_mode/after_after_frameset'
2
+
3
+ module Gammo
4
+ class Parser
5
+ # Section 12.2.6.4.21.
6
+ class AfterFrameset < InsertionMode
7
+ def comment_token(token)
8
+ parser.add_child Node::Comment.new(data: token.data)
9
+ end
10
+
11
+ def text_token(token)
12
+ s = token.data.gsub(/[^\s]/, '')
13
+ parser.add_text(s) unless s.empty?
14
+ end
15
+
16
+ def start_tag_token(token)
17
+ case token.tag
18
+ when Tags::Html
19
+ halt InBody.new(parser).process
20
+ when Tags::Noframes
21
+ halt InHead.new(parser).process
22
+ end
23
+ end
24
+
25
+ def end_tag_token(token)
26
+ case token.tag
27
+ when Tags::Html
28
+ parser.insertion_mode = AfterAfterFrameset
29
+ halt true
30
+ end
31
+ end
32
+
33
+ def default(_)
34
+ # ignore the token
35
+ halt true
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,70 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.5.
4
+ class AfterHead < InsertionMode
5
+ def text_token(token)
6
+ s = token.data.lstrip
7
+ if s.length < token.data.length
8
+ # add the initial whitespace to the current node.
9
+ parser.add_text token.data.slice(0, token.data.length - s.length)
10
+ halt true if s == ''
11
+ token.data = s
12
+ end
13
+ end
14
+
15
+ def start_tag_token(token)
16
+ case token.tag
17
+ when Tags::Html then halt InBody.new(parser).process
18
+ when Tags::Body
19
+ parser.add_element
20
+ parser.frameset_ok = false
21
+ parser.insertion_mode = InBody
22
+ halt true
23
+ when Tags::Frameset
24
+ parser.add_element
25
+ parser.insertion_mode = InFrameset
26
+ halt true
27
+ when Tags::Base, Tags::Basefont, Tags::Bgsound, Tags::Link, Tags::Meta,
28
+ Tags::Noframes, Tags::Script, Tags::Style, Tags::Template, Tags::Title
29
+ parser.open_elements << parser.head
30
+ begin
31
+ halt InHead.new(parser).process
32
+ ensure
33
+ parser.open_elements.delete(parser.head)
34
+ end
35
+ when Tags::Head
36
+ # ignore the token
37
+ halt true
38
+ end
39
+ end
40
+
41
+ def end_tag_token(token)
42
+ case token.tag
43
+ when Tags::Body, Tags::Html, Tags::Br
44
+ # drop down to creating an implied <body> tag.
45
+ when Tags::Template
46
+ halt InHead.new(parser).process
47
+ else
48
+ # ignore the token.
49
+ halt true
50
+ end
51
+ end
52
+
53
+ def comment_token(token)
54
+ parser.add_child Node::Comment.new(data: token.data)
55
+ halt true
56
+ end
57
+
58
+ def doctype_token(token)
59
+ # ignore the token.
60
+ halt true
61
+ end
62
+
63
+ def default(_)
64
+ parser.parse_implied_token(Tokenizer::StartTagToken, Tags::Body, Tags::Body.to_s)
65
+ parser.frameset_ok = true
66
+ halt false
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,49 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.3
4
+ class BeforeHead < InsertionMode
5
+ def text_token(token)
6
+ token.data = token.data.lstrip
7
+ halt true if token.data.length.zero?
8
+ end
9
+
10
+ def start_tag_token(token)
11
+ case token.tag
12
+ when Tags::Head
13
+ parser.add_element
14
+ parser.head = parser.top
15
+ parser.insertion_mode = InHead
16
+ halt true
17
+ when Tags::Html
18
+ halt InBody.new(parser).process
19
+ end
20
+ end
21
+
22
+ def end_tag_token(token)
23
+ case token.tag
24
+ when Tags::Head, Tags::Body, Tags::Html, Tags::Br
25
+ parser.parse_implied_token Tokenizer::StartTagToken, Tags::Head, Tags::Head.to_s
26
+ halt false
27
+ else
28
+ # ignore the token.
29
+ halt true
30
+ end
31
+ end
32
+
33
+ def comment_token(token)
34
+ parser.add_child(Node::Comment.new(data: token.data))
35
+ halt true
36
+ end
37
+
38
+ def doctype_token(token)
39
+ # ignore the token.
40
+ halt true
41
+ end
42
+
43
+ def default(_)
44
+ parser.parse_implied_token Tokenizer::StartTagToken, Tags::Head, Tags::Head.to_s
45
+ halt false
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,45 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.2
4
+ class BeforeHTML < InsertionMode
5
+ # Ignores the token.
6
+ def doctype_token(_)
7
+ halt true
8
+ end
9
+
10
+ def text_token(token)
11
+ token.data = token.data.lstrip
12
+ # it's all whitespace so ignore it.
13
+ halt true if token.data.length.zero?
14
+ end
15
+
16
+ def start_tag_token(token)
17
+ return unless token.tag == Tags::Html
18
+ parser.add_element
19
+ parser.insertion_mode = BeforeHead
20
+ halt true
21
+ end
22
+
23
+ def end_tag_token(token)
24
+ case token.tag
25
+ when Tags::Head, Tags::Body, Tags::Html, Tags::Br
26
+ parser.parse_implied_token Tokenizer::StartTagToken, Tags::Html, Tags::Html.to_s
27
+ halt false
28
+ else
29
+ # ignore the token.
30
+ halt true
31
+ end
32
+ end
33
+
34
+ def comment_token(token)
35
+ parser.document.append_child Node::Comment.new(data: token.data)
36
+ halt true
37
+ end
38
+
39
+ def default(_)
40
+ parser.parse_implied_token Tokenizer::StartTagToken, Tags::Html, Tags::Html.to_s
41
+ halt false
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,463 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.6.
4
+ class InBody < InsertionMode
5
+ def text_token(token)
6
+ data = token.data
7
+ node = parser.open_elements.last
8
+ case node.tag
9
+ when Tags::Pre, Tags::Listing
10
+ unless node.first_child
11
+ # ignore a newline at the start of the <pre> block.
12
+ data = data.slice(1..-1) if !data.empty? && data[0] == ?\r
13
+ data = data.slice(1..-1) if !data.empty? && data[0] == ?\n
14
+ end
15
+ end
16
+ data = data.gsub("\x00", '')
17
+ halt true if data.empty?
18
+ parser.reconstruct_active_formatting_elements
19
+ parser.frameset_ok = false if parser.frameset_ok && !data.lstrip.empty?
20
+ parser.add_text(data)
21
+ end
22
+
23
+ def start_tag_token(token)
24
+ case token.tag
25
+ when Tags::Html
26
+ halt true if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
27
+ copy_attributes(parser.open_elements[0], token)
28
+ when Tags::Base, Tags::Basefont, Tags::Bgsound, Tags::Link, Tags::Meta,
29
+ Tags::Noframes, Tags::Script, Tags::Style, Tags::Template, Tags::Title
30
+ halt InHead.new(parser).process
31
+ when Tags::Body
32
+ halt true if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
33
+ if parser.open_elements.length >= 2
34
+ body = parser.open_elements[1]
35
+ if body.instance_of?(Node::Element) && body.tag == Tags::Body
36
+ parser.frameset_ok = false
37
+ copy_attributes(body, parser.token)
38
+ end
39
+ end
40
+ when Tags::Frameset
41
+ if !parser.frameset_ok || parser.open_elements.length < 2 || parser.open_elements[1].tag != Tags::Body
42
+ # ignore the token
43
+ halt true
44
+ end
45
+ body = parser.open_elements[1]
46
+ body.parent.remove_child(body) if body.parent
47
+ parser.open_elements = parser.open_elements.slice(0, 1)
48
+ parser.add_element
49
+ parser.insertion_mode = InFrameset
50
+ halt true
51
+ when Tags::Address, Tags::Article, Tags::Aside, Tags::Blockquote,
52
+ Tags::Center, Tags::Dialog, Tags::Details, Tags::Dir, Tags::Div,
53
+ Tags::Dl, Tags::Fieldset, Tags::Figcaption, Tags::Figure,
54
+ Tags::Footer, Tags::Header, Tags::Hgroup, Tags::Main, Tags::Menu,
55
+ Tags::Nav, Tags::Ol, Tags::P, Tags::Section, Tags::Summary, Tags::Ul
56
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
57
+ parser.add_element
58
+ when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
59
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
60
+ node = parser.top
61
+ case node.tag
62
+ when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
63
+ parser.open_elements.pop
64
+ end
65
+ parser.add_element
66
+ when Tags::Pre, Tags::Listing
67
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
68
+ parser.add_element
69
+ parser.frameset_ok = false
70
+ when Tags::Form
71
+ # ignore the token.
72
+ halt true if parser.form && !parser.open_elements.any? { |oe| oe.tag == Tags::Template }
73
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
74
+ parser.add_element
75
+ parser.form = parser.top unless parser.open_elements.any? { |oe| oe.tag == Tags::Template }
76
+ when Tags::Li
77
+ parser.frameset_ok = false
78
+ parser.open_elements.reverse_each_with_index do |open_element, index|
79
+ case open_element.tag
80
+ when Tags::Li then parser.open_elements = parser.open_elements.slice(0, index)
81
+ when Tags::Address, Tags::Div, Tags::P then next
82
+ else
83
+ next unless parser.special_element?(open_element)
84
+ end
85
+ break
86
+ end
87
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
88
+ parser.add_element
89
+ when Tags::Dd, Tags::Dt
90
+ parser.frameset_ok = false
91
+ parser.open_elements.reverse_each_with_index do |open_element, index|
92
+ case open_element.tag
93
+ when Tags::Dd, Tags::Dt then parser.open_elements = parser.open_elements.slice(0, index)
94
+ when Tags::Address, Tags::Div, Tags::P then next
95
+ else
96
+ next unless parser.special_element?(open_element)
97
+ end
98
+ break
99
+ end
100
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
101
+ parser.add_element
102
+ when Tags::Plaintext
103
+ parser.pop_until BUTTON_SCOPE, Tags::P
104
+ parser.add_element
105
+ when Tags::Button
106
+ parser.pop_until DEFAULT_SCOPE, Tags::Button
107
+ parser.reconstruct_active_formatting_elements
108
+ parser.add_element
109
+ parser.frameset_ok = false
110
+ when Tags::A
111
+ parser.active_formatting_elements.reverse_each do |afe|
112
+ break if afe.is_a?(Node::ScopeMarker)
113
+ next unless afe.instance_of?(Node::Element) && afe.tag == Tags::A
114
+ adoption_agency_for_end_tag_formatting(Tags::A, "a")
115
+ parser.open_elements.delete(afe)
116
+ parser.active_formatting_elements.delete(afe)
117
+ break
118
+ end
119
+ parser.reconstruct_active_formatting_elements
120
+ parser.add_formatting_element
121
+ when Tags::B, Tags::Big, Tags::Code, Tags::Em, Tags::Font, Tags::I,
122
+ Tags::S, Tags::Small, Tags::Strike, Tags::Strong, Tags::Tt, Tags::U
123
+ parser.reconstruct_active_formatting_elements
124
+ parser.add_formatting_element
125
+ when Tags::Nobr
126
+ parser.reconstruct_active_formatting_elements
127
+ if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Nobr)
128
+ adoption_agency_for_end_tag_formatting(Tags::Nobr, "nobr")
129
+ parser.reconstruct_active_formatting_elements
130
+ end
131
+ parser.add_formatting_element
132
+ when Tags::Applet, Tags::Marquee, Tags::Object
133
+ parser.reconstruct_active_formatting_elements
134
+ parser.add_element
135
+ parser.active_formatting_elements << Node::DEFAULT_SCOPE_MARKER
136
+ parser.frameset_ok = false
137
+ when Tags::Table
138
+ parser.pop_until(BUTTON_SCOPE, Tags::P) unless parser.quirks
139
+ parser.add_element
140
+ parser.frameset_ok = false
141
+ parser.insertion_mode = InTable
142
+ halt true
143
+ when Tags::Area, Tags::Br, Tags::Embed, Tags::Img, Tags::Input, Tags::Keygen, Tags::Wbr
144
+ parser.reconstruct_active_formatting_elements
145
+ parser.add_element
146
+ parser.open_elements.pop
147
+ parser.acknowledge_self_closing_tag
148
+ token.attributes.each do |attr|
149
+ # skip setting frameset_ok = false
150
+ halt true if attr.key == 'type' && attr.value.downcase == 'hidden'
151
+ end if token.tag == Tags::Input
152
+ parser.frameset_ok = false
153
+ when Tags::Param, Tags::Source, Tags::Track
154
+ parser.add_element
155
+ parser.open_elements.pop
156
+ parser.acknowledge_self_closing_tag
157
+ when Tags::Hr
158
+ parser.pop_until BUTTON_SCOPE, Tags::P
159
+ parser.add_element
160
+ parser.open_elements.pop
161
+ parser.acknowledge_self_closing_tag
162
+ parser.frameset_ok = false
163
+ when Tags::Image
164
+ token.tag = Tags::Img
165
+ # todo: fixme <img>
166
+ token.data = Tags::Img.to_s
167
+ halt false
168
+ when Tags::Textarea
169
+ parser.add_element
170
+ parser.set_original_insertion_mode
171
+ parser.frameset_ok = false
172
+ parser.insertion_mode = Text
173
+ when Tags::Xmp
174
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
175
+ parser.reconstruct_active_formatting_elements
176
+ parser.frameset_ok = false
177
+ parser.add_element
178
+ parser.set_original_insertion_mode
179
+ parser.insertion_mode = Text
180
+ when Tags::Iframe
181
+ parser.frameset_ok = false
182
+ parser.parse_generic_raw_text_element
183
+ when Tags::Noembed
184
+ parser.parse_generic_raw_text_element
185
+ when Tags::Noscript
186
+ if parser.scripting?
187
+ parser.parse_generic_raw_text_element
188
+ halt true
189
+ end
190
+ parser.reconstruct_active_formatting_elements
191
+ parser.add_element
192
+ parser.tokenizer.next_is_not_raw_text!
193
+ when Tags::Select
194
+ parser.reconstruct_active_formatting_elements
195
+ parser.add_element
196
+ parser.frameset_ok = false
197
+ parser.insertion_mode = InSelect
198
+ halt true
199
+ when Tags::Optgroup, Tags::Option
200
+ parser.open_elements.pop if parser.top.tag == Tags::Option
201
+ parser.reconstruct_active_formatting_elements
202
+ parser.add_element
203
+ when Tags::Rb, Tags::Rtc
204
+ parser.generate_implied_end_tags if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Ruby)
205
+ parser.add_element
206
+ when Tags::Rp, Tags::Rt
207
+ parser.generate_implied_end_tags('rtc') if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Ruby)
208
+ parser.add_element
209
+ when Tags::Math, Tags::Svg
210
+ parser.reconstruct_active_formatting_elements
211
+ parser.adjust_attribute_names(token.attributes, token.tag == Tags::Math ? Parser::MATH_ML_ATTRIBUTE_ADJUSTMENTS : Parser::SVG_ATTRIBUTE_ADJUSTMENTS)
212
+ parser.adjust_foreign_attributes(token.attributes)
213
+ parser.add_element
214
+ parser.top.namespace = token.data
215
+ if parser.has_self_closing_token
216
+ parser.open_elements.pop
217
+ parser.acknowledge_self_closing_tag
218
+ end
219
+ halt true
220
+ when Tags::Caption, Tags::Col, Tags::Colgroup, Tags::Frame, Tags::Head, Tags::Tbody, Tags::Td, Tags::Tfoot, Tags::Th, Tags::Thead, Tags::Tr
221
+ # ignore the token.
222
+ else
223
+ parser.reconstruct_active_formatting_elements
224
+ parser.add_element
225
+ end
226
+ end
227
+
228
+ def end_tag_token(token)
229
+ case token.tag
230
+ when Tags::Body
231
+ parser.insertion_mode = AfterBody if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Body)
232
+ when Tags::Html
233
+ halt true unless parser.element_in_scope?(DEFAULT_SCOPE, Tags::Body)
234
+ parser.parse_implied_token(Tokenizer::EndTagToken, Tags::Body, Tags::Body.to_s)
235
+ halt false
236
+ when Tags::Address, Tags::Article, Tags::Aside, Tags::Blockquote,
237
+ Tags::Button, Tags::Center, Tags::Dialog, Tags::Details, Tags::Dir,
238
+ Tags::Div, Tags::Dl, Tags::Fieldset, Tags::Figcaption, Tags::Figure,
239
+ Tags::Footer, Tags::Header, Tags::Hgroup, Tags::Listing, Tags::Main,
240
+ Tags::Menu, Tags::Nav, Tags::Ol, Tags::Pre, Tags::Section,
241
+ Tags::Summary, Tags::Ul
242
+ parser.pop_until(DEFAULT_SCOPE, token.tag)
243
+ when Tags::Form
244
+ if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
245
+ index = parser.index_of_element_in_scope(DEFAULT_SCOPE, Tags::Form)
246
+ # ignore the token.
247
+ halt true if index == -1
248
+ parser.generate_implied_end_tags
249
+ # ignore the token.
250
+ halt true if parser.open_elements[index].tag != Tags::Form
251
+ parser.pop_until(DEFAULT_SCOPE, Tags::Form)
252
+ else
253
+ node = parser.form
254
+ parser.form = nil
255
+ index = parser.index_of_element_in_scope(DEFAULT_SCOPE, Tags::Form)
256
+ # ignore the token.
257
+ halt true if node == nil || index == -1 || parser.open_elements[index] != node
258
+ parser.generate_implied_end_tags
259
+ parser.open_elements.delete(node)
260
+ end
261
+ when Tags::P
262
+ parser.parse_implied_token(Tokenizer::StartTagToken, Tags::P, Tags::P.to_s) unless parser.element_in_scope?(BUTTON_SCOPE, Tags::P)
263
+ parser.pop_until(BUTTON_SCOPE, Tags::P)
264
+ when Tags::Li
265
+ parser.pop_until(LIST_ITEM_SCOPE, Tags::Li)
266
+ when Tags::Dd, Tags::Dt
267
+ parser.pop_until(DEFAULT_SCOPE, token.tag)
268
+ when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
269
+ parser.pop_until(DEFAULT_SCOPE, Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6)
270
+ when Tags::A, Tags::B, Tags::Big, Tags::Code, Tags::Em, Tags::Font,
271
+ Tags::I, Tags::Nobr, Tags::S, Tags::Small, Tags::Strike,
272
+ Tags::Strong, Tags::Tt, Tags::U
273
+ adoption_agency_for_end_tag_formatting(token.tag, token.data)
274
+ when Tags::Applet, Tags::Marquee, Tags::Object
275
+ parser.clear_active_formatting_elements if parser.pop_until(DEFAULT_SCOPE, token.tag)
276
+ when Tags::Br
277
+ # FIXME
278
+ parser.token = Tokenizer::StartTagToken.new(token.data, tag: token.tag)
279
+ halt false
280
+ when Tags::Template
281
+ halt InHead.new(parser).process
282
+ else
283
+ adoption_agency_for_end_tag_formatting(token.tag, token.data)
284
+ end
285
+ end
286
+
287
+ def comment_token(token)
288
+ parser.add_child Node::Comment.new(data: token.data)
289
+ end
290
+
291
+ def error_token(token)
292
+ if parser.template_stack.length > 0
293
+ parser.insertion_mode = InTemplate
294
+ halt false
295
+ else
296
+ parser.open_elements.any? do |oe|
297
+ case oe.tag
298
+ when Tags::Dd, Tags::Dt, Tags::Li, Tags::Optgroup, Tags::Option, Tags::P,
299
+ Tags::Rb, Tags::Rp, Tags::Rt, Tags::Rtc, Tags::Tbody, Tags::Td, Tags::Tfoot,
300
+ Tags::Th, Tags::Thead, Tags::Tr, Tags::Body, Tags::Html
301
+ else
302
+ halt true
303
+ end
304
+ end
305
+ halt true
306
+ end
307
+ end
308
+
309
+ def default(_)
310
+ halt true
311
+ end
312
+
313
+ # Implements "adoption agency" algorithm.
314
+ # https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
315
+ # @!visibility private
316
+ def adoption_agency_for_end_tag_formatting(tag, tagname)
317
+ # Step 1-2.
318
+ current = parser.open_elements.last
319
+ if current.data == tagname && parser.active_formatting_elements.index(current) == -1
320
+ parser.open_elements.pop
321
+ return
322
+ end
323
+
324
+ # Step 3-5. The outer loop
325
+ 8.times do |n|
326
+ # Step 6: Find the formatting element.
327
+ formatting_element = nil
328
+ parser.active_formatting_elements.reverse_each do |afe|
329
+ break if afe.instance_of? Node::ScopeMarker
330
+ if afe.tag == tag
331
+ formatting_element = afe
332
+ break
333
+ end
334
+ end
335
+ unless formatting_element
336
+ adoption_agency_for_end_tag_other(tag, tagname)
337
+ return
338
+ end
339
+ # Step 7. Ignore the tag if formatting element is not in the stack of
340
+ # open elements.
341
+ index = parser.open_elements.index(formatting_element)
342
+ unless index
343
+ parser.active_formatting_elements.delete(formatting_element)
344
+ return
345
+ end
346
+ # Step 8. Ignore the tag if formatting element is not in the scope.
347
+ return unless parser.element_in_scope?(DEFAULT_SCOPE, tag)
348
+
349
+ # Step 9. This step is omitted because it's just a parse error but no
350
+ # need to return.
351
+
352
+ # Step 10-11. Find the furthest block.
353
+ furthest_block = parser.open_elements.slice(index..-1).find(&parser.method(:special_element?))
354
+ unless furthest_block
355
+ element = parser.open_elements.pop
356
+ element = parser.open_elements.pop while element != formatting_element
357
+ parser.active_formatting_elements.delete(element)
358
+ return
359
+ end
360
+
361
+ # Step 12-13. Find the common ancestor and bookmark node.
362
+ common_ancestor = parser.open_elements[index - 1]
363
+ bookmark = parser.active_formatting_elements.index(formatting_element)
364
+
365
+ # Step 14. The inner loop. find the last node to reparent.
366
+ last_node = furthest_block
367
+ node = furthest_block
368
+ x = parser.open_elements.index(node)
369
+ # Step 14.1.
370
+ j = 0
371
+ loop do
372
+ # Step 14.2.
373
+ j += 1
374
+ # Step 14.3.
375
+ x -= 1
376
+ node = parser.open_elements[x]
377
+ # Step 14.4.
378
+ break if node == formatting_element
379
+
380
+ # Step 14.5. Remove node from the list of active formatting elements if
381
+ # inner loop counter is greater than three and node is in the list of
382
+ # active formatting elements.
383
+ ni = parser.active_formatting_elements.index(node)
384
+ if ni && j > 3
385
+ parser.active_formatting_elements.delete(node)
386
+ # If any element of the list of active formatting elements is removed,
387
+ # we need to take care whether bookmark should be decremented or not.
388
+ # This is because the value of bookmark may exceed the size of the
389
+ # list by removing elements from the list.
390
+ bookmark -= 1 if ni <= bookmark
391
+ next
392
+ end
393
+ # Step 14.6. Continue the next inner loop if node is not in the list of
394
+ # active formatting elements.
395
+ unless parser.active_formatting_elements.include?(node)
396
+ parser.open_elements.delete(node)
397
+ next
398
+ end
399
+ # Step 14.7
400
+ clone = node.clone
401
+ afei = parser.active_formatting_elements.index(node)
402
+ oei = parser.open_elements.index(node)
403
+ raise ParseError, 'bad parser state: expected elements are not found' if !(afei && oei)
404
+ parser.active_formatting_elements[afei] = clone
405
+ parser.open_elements[oei] = clone
406
+ node = clone
407
+ # Step 14.8
408
+ bookmark = (parser.active_formatting_elements.index(node) + 1) || 0 if last_node == furthest_block
409
+ # Step 14.9
410
+ last_node.parent.remove_child(last_node) if last_node.parent
411
+ node.append_child(last_node)
412
+ # Step 14.10
413
+ last_node = node
414
+ end
415
+ # Step 15. Reparent last_node to the common ancestor,
416
+ # or for misnested table nodes, to the foster parent.
417
+ last_node.parent.remove_child(last_node) if last_node.parent
418
+ case common_ancestor.tag
419
+ when Tags::Table, Tags::Tbody, Tags::Tfoot, Tags::Thead, Tags::Tr
420
+ parser.foster_parent(last_node)
421
+ else
422
+ common_ancestor.append_child(last_node)
423
+ end
424
+
425
+ # Steps 16-18. Reparent nodes from the furthest block's children
426
+ # to a clone of the formatting element.
427
+ clone = formatting_element.clone
428
+ reparent_children(clone, furthest_block)
429
+ furthest_block.append_child(clone)
430
+
431
+ # Step 19. Fix up the list of active formatting elements.
432
+ old_loc = parser.active_formatting_elements.index(formatting_element)
433
+ bookmark -= 1 if old_loc && old_loc < bookmark
434
+ parser.active_formatting_elements.delete(formatting_element)
435
+ parser.active_formatting_elements.insert(bookmark, clone)
436
+
437
+ # Step 20. Fix up the stack of open elements.
438
+ parser.open_elements.delete(formatting_element)
439
+ parser.open_elements.insert(parser.open_elements.index(furthest_block) + 1, clone)
440
+ end
441
+ end
442
+
443
+ # @!visibility private
444
+ def adoption_agency_for_end_tag_other(tag, tagname)
445
+ parser.open_elements.reverse_each_with_index do |open_element, index|
446
+ if open_element.tag == tag && open_element.data == tagname
447
+ parser.open_elements = parser.open_elements.slice(0, index)
448
+ break
449
+ end
450
+ break if parser.special_element?(open_element)
451
+ end
452
+ end
453
+
454
+ # @!visibility private
455
+ def reparent_children(dst, src)
456
+ while child = src.first_child
457
+ src.remove_child(child)
458
+ dst.append_child(child)
459
+ end
460
+ end
461
+ end
462
+ end
463
+ end