gammo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +6 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +27 -0
- data/LICENSE.txt +21 -0
- data/README.md +177 -0
- data/Rakefile +25 -0
- data/gammo.gemspec +23 -0
- data/lib/gammo.rb +15 -0
- data/lib/gammo/attribute.rb +17 -0
- data/lib/gammo/fragment_parser.rb +65 -0
- data/lib/gammo/node.rb +157 -0
- data/lib/gammo/parser.rb +524 -0
- data/lib/gammo/parser/constants.rb +94 -0
- data/lib/gammo/parser/foreign.rb +307 -0
- data/lib/gammo/parser/insertion_mode.rb +74 -0
- data/lib/gammo/parser/insertion_mode/after_after_body.rb +36 -0
- data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +32 -0
- data/lib/gammo/parser/insertion_mode/after_body.rb +46 -0
- data/lib/gammo/parser/insertion_mode/after_frameset.rb +39 -0
- data/lib/gammo/parser/insertion_mode/after_head.rb +70 -0
- data/lib/gammo/parser/insertion_mode/before_head.rb +49 -0
- data/lib/gammo/parser/insertion_mode/before_html.rb +45 -0
- data/lib/gammo/parser/insertion_mode/in_body.rb +463 -0
- data/lib/gammo/parser/insertion_mode/in_caption.rb +47 -0
- data/lib/gammo/parser/insertion_mode/in_cell.rb +46 -0
- data/lib/gammo/parser/insertion_mode/in_column_group.rb +66 -0
- data/lib/gammo/parser/insertion_mode/in_frameset.rb +48 -0
- data/lib/gammo/parser/insertion_mode/in_head.rb +98 -0
- data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +52 -0
- data/lib/gammo/parser/insertion_mode/in_row.rb +53 -0
- data/lib/gammo/parser/insertion_mode/in_select.rb +77 -0
- data/lib/gammo/parser/insertion_mode/in_select_in_table.rb +46 -0
- data/lib/gammo/parser/insertion_mode/in_table.rb +114 -0
- data/lib/gammo/parser/insertion_mode/in_table_body.rb +55 -0
- data/lib/gammo/parser/insertion_mode/in_template.rb +80 -0
- data/lib/gammo/parser/insertion_mode/initial.rb +152 -0
- data/lib/gammo/parser/insertion_mode/text.rb +32 -0
- data/lib/gammo/parser/insertion_mode_stack.rb +8 -0
- data/lib/gammo/parser/node_stack.rb +24 -0
- data/lib/gammo/tags.rb +9 -0
- data/lib/gammo/tags/table.rb +744 -0
- data/lib/gammo/tokenizer.rb +373 -0
- data/lib/gammo/tokenizer/debug.rb +34 -0
- data/lib/gammo/tokenizer/entity.rb +2240 -0
- data/lib/gammo/tokenizer/escape.rb +174 -0
- data/lib/gammo/tokenizer/script_scanner.rb +229 -0
- data/lib/gammo/tokenizer/tokens.rb +66 -0
- data/lib/gammo/version.rb +3 -0
- data/misc/html.yaml +384 -0
- data/misc/table.erubi +14 -0
- metadata +97 -0
data/lib/gammo/parser.rb
ADDED
@@ -0,0 +1,524 @@
|
|
1
|
+
require 'delegate'
|
2
|
+
require 'gammo/node'
|
3
|
+
require 'gammo/tags'
|
4
|
+
require 'gammo/tokenizer'
|
5
|
+
require 'gammo/parser/node_stack'
|
6
|
+
require 'gammo/parser/foreign'
|
7
|
+
require 'gammo/parser/constants'
|
8
|
+
require 'gammo/parser/insertion_mode_stack'
|
9
|
+
|
10
|
+
module Gammo
|
11
|
+
# Class for parsing an HTML input and building an HTML tree.
|
12
|
+
class Parser
|
13
|
+
require 'gammo/parser/insertion_mode'
|
14
|
+
|
15
|
+
include Foreign
|
16
|
+
include Constants
|
17
|
+
|
18
|
+
# Raised if anything goes wrong while parsing an HTML.
|
19
|
+
ParseError = Class.new(ArgumentError)
|
20
|
+
|
21
|
+
# Default scope stop tags defined in 12.2.4.2.
|
22
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-stack-of-open-elements
|
23
|
+
# @!visibility private
|
24
|
+
DEFAULT_SCOPE_STOP_TAGS = {
|
25
|
+
nil => [Tags::Applet, Tags::Caption, Tags::Html, Tags::Table, Tags::Td,
|
26
|
+
Tags::Th, Tags::Marquee, Tags::Object, Tags::Template],
|
27
|
+
'math' => [Tags::AnnotationXml, Tags::Mi, Tags::Mn, Tags::Mo, Tags::Ms,
|
28
|
+
Tags::Mtext],
|
29
|
+
'svg' => [Tags::Desc, Tags::ForeignObject, Tags::Title]
|
30
|
+
}.freeze
|
31
|
+
|
32
|
+
# Scope constants defined in 12.2.4.2.
|
33
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-stack-of-open-elements
|
34
|
+
|
35
|
+
# @!visibility private
|
36
|
+
DEFAULT_SCOPE = 0
|
37
|
+
|
38
|
+
# @!visibility private
|
39
|
+
LIST_ITEM_SCOPE = 1
|
40
|
+
|
41
|
+
# @!visibility private
|
42
|
+
BUTTON_SCOPE = 2
|
43
|
+
|
44
|
+
# @!visibility private
|
45
|
+
TABLE_SCOPE = 3
|
46
|
+
|
47
|
+
# @!visibility private
|
48
|
+
TABLE_ROW_SCOPE = 4
|
49
|
+
|
50
|
+
# @!visibility private
|
51
|
+
TABLE_BODY_SCOPE = 5
|
52
|
+
|
53
|
+
# @!visibility private
|
54
|
+
SELECT_SCOPE = 6
|
55
|
+
|
56
|
+
# Tokenizer for parsing each token.
|
57
|
+
# @!visibility private
|
58
|
+
attr_accessor :tokenizer, :token
|
59
|
+
|
60
|
+
# The insertion mode is a state variable that controls the primary operation
|
61
|
+
# of the tree construction stage.
|
62
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode
|
63
|
+
# @!visibility private
|
64
|
+
attr_accessor :insertion_mode
|
65
|
+
|
66
|
+
# The original insertion mode is set to this accessor, defined in 12.2.4.1.
|
67
|
+
# When the insertion mode is switched to "text" or "in table text",
|
68
|
+
# the original insertion mode is also set. This is the insertion mode to
|
69
|
+
# which the tree construction stage will return.
|
70
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode
|
71
|
+
# @!visibility private
|
72
|
+
attr_accessor :original_insertion_mode
|
73
|
+
|
74
|
+
# `template_stack` represents the stack of template insertion modes.
|
75
|
+
# Defined in 12.4.2.1.
|
76
|
+
# @!visibility private
|
77
|
+
attr_accessor :template_stack
|
78
|
+
|
79
|
+
# The stack of open elements, defined in 12.2.4.2.
|
80
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-stack-of-open-elements
|
81
|
+
# @!visibility private
|
82
|
+
attr_accessor :open_elements
|
83
|
+
|
84
|
+
# The list of active formatting elements defined in 12.2.4.3.
|
85
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-list-of-active-formatting-elements
|
86
|
+
# @!visibility private
|
87
|
+
attr_accessor :active_formatting_elements
|
88
|
+
|
89
|
+
# The element pointers defined in 12.2.4.4.
|
90
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#other-parsing-state-flags
|
91
|
+
attr_accessor :head, :form
|
92
|
+
|
93
|
+
# Other parsing state flags defined in 12.2.4.5.
|
94
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#other-parsing-state-flags
|
95
|
+
attr_accessor :scripting, :frameset_ok
|
96
|
+
alias_method :scripting?, :scripting
|
97
|
+
alias_method :frameset_ok?, :frameset_ok
|
98
|
+
|
99
|
+
# Document root element
|
100
|
+
attr_accessor :document
|
101
|
+
|
102
|
+
# Self-closing flag defined in 12.2.5.
|
103
|
+
# Self-closing tags like <img /> are treated as start tag token, except
|
104
|
+
# `has_self_closing_token` is set while they are being proceeded.
|
105
|
+
# @!visibility private
|
106
|
+
attr_accessor :has_self_closing_token
|
107
|
+
|
108
|
+
# Quirks flag is defined in 12.2.5.
|
109
|
+
# quirks flag is whether the parser is operating in the "force-quirks" mode.
|
110
|
+
# @!visibility private
|
111
|
+
attr_accessor :quirks
|
112
|
+
|
113
|
+
# `foster_parenting` is set to true if a new element should be inserted
|
114
|
+
# according to the foster parenting rule defined in 12.2.6.1.
|
115
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#creating-and-inserting-nodes
|
116
|
+
# @!visibility private
|
117
|
+
attr_accessor :foster_parenting
|
118
|
+
|
119
|
+
# The context element is for use in parsing an HTML fragment, defined in
|
120
|
+
# 12.2.4.2.
|
121
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
|
122
|
+
attr_accessor :context
|
123
|
+
|
124
|
+
# `input` is the original input text.
|
125
|
+
# @!visibility private
|
126
|
+
attr_reader :input
|
127
|
+
|
128
|
+
# Constructs a parser for parsing an HTML input.
|
129
|
+
# @param [String] input
|
130
|
+
# @param [TrueClass, FalseClass] scripting
|
131
|
+
# @param [TrueClass, FalseClass] frameset_ok
|
132
|
+
# @param [InsertionMode] insertion_mode
|
133
|
+
# @param [Gammo::Node] context
|
134
|
+
# @return [Gammo::Parser]
|
135
|
+
def initialize(input, scripting: true, frameset_ok: true, insertion_mode: Initial, context: nil)
|
136
|
+
@input = input
|
137
|
+
@scripting = scripting
|
138
|
+
@frameset_ok = frameset_ok
|
139
|
+
@context = context
|
140
|
+
@insertion_mode = insertion_mode
|
141
|
+
@token = nil
|
142
|
+
@tokenizer = Tokenizer.new(input)
|
143
|
+
@document = Node::Document.new
|
144
|
+
@open_elements = Parser::NodeStack.new([])
|
145
|
+
@active_formatting_elements = Parser::NodeStack.new([])
|
146
|
+
@template_stack = InsertionModeStack.new([])
|
147
|
+
@foster_parenting = false
|
148
|
+
@has_self_closing_token = false
|
149
|
+
@quirks = false
|
150
|
+
@form = nil
|
151
|
+
@head = nil
|
152
|
+
end
|
153
|
+
|
154
|
+
# Parses the current input and builds HTML tree from it.
|
155
|
+
# @raise [Gammo::ParseError] Raised if the parser gets error while parsing.
|
156
|
+
# @return [Gammo::Node::Document, nil]
|
157
|
+
def parse
|
158
|
+
while self.token != Tokenizer::EOS
|
159
|
+
# CDATA sections are allowed only in foreign content.
|
160
|
+
node = open_elements.last
|
161
|
+
tokenizer.allow_cdata!(node && node.namespace)
|
162
|
+
self.token = tokenizer.next_token
|
163
|
+
return if self.token.instance_of?(Tokenizer::ErrorToken) && self.token != Tokenizer::EOS
|
164
|
+
parse_current_token
|
165
|
+
break if self.token == Tokenizer::EOS
|
166
|
+
end
|
167
|
+
self.document
|
168
|
+
end
|
169
|
+
|
170
|
+
# Always returns false.
|
171
|
+
# @return [FalseClass]
|
172
|
+
# @!visibility private
|
173
|
+
def fragment?
|
174
|
+
false
|
175
|
+
end
|
176
|
+
|
177
|
+
# Returns true if given node is matched with any special elements
|
178
|
+
# defined in 12.2.4.2.
|
179
|
+
# https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
|
180
|
+
#
|
181
|
+
# @param [Gammo::Node] node
|
182
|
+
# @return [TrueClass, FalseClass]
|
183
|
+
# @see Gammo::Parser::Constants::SPECIAL_ELEMENTS
|
184
|
+
# @!visibility private
|
185
|
+
def special_element?(node)
|
186
|
+
case node.namespace
|
187
|
+
when nil, 'html'
|
188
|
+
SPECIAL_ELEMENTS[node.data]
|
189
|
+
when 'math'
|
190
|
+
case node.data
|
191
|
+
when 'mi', 'mo', 'mn', 'ms', 'mtext', 'annotation-xml'
|
192
|
+
true
|
193
|
+
end
|
194
|
+
when 'svg'
|
195
|
+
case node.data
|
196
|
+
when 'foreignObject', 'desc', 'title'
|
197
|
+
true
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# @!visibility private
|
203
|
+
def parse_implied_token(tok, tag, data)
|
204
|
+
real_token, self_closing = token, has_self_closing_token
|
205
|
+
@token = tok.new(data, tag: tag)
|
206
|
+
@has_self_closing_token = false
|
207
|
+
parse_current_token
|
208
|
+
@token, @has_self_closing_token = real_token, self_closing
|
209
|
+
end
|
210
|
+
|
211
|
+
# @!visibility private
|
212
|
+
def pop_until(scope, *match_tags)
|
213
|
+
index = index_of_element_in_scope(scope, *match_tags)
|
214
|
+
if index != -1
|
215
|
+
@open_elements = open_elements.slice(0, index)
|
216
|
+
return true
|
217
|
+
end
|
218
|
+
false
|
219
|
+
end
|
220
|
+
|
221
|
+
# @!visibility private
|
222
|
+
def index_of_element_in_scope(scope, *match_tags)
|
223
|
+
open_elements.reverse_each_with_index do |open_element, index|
|
224
|
+
tag = open_element.tag
|
225
|
+
unless open_element.namespace
|
226
|
+
return index if match_tags.include?(tag)
|
227
|
+
case scope
|
228
|
+
when DEFAULT_SCOPE
|
229
|
+
# no op
|
230
|
+
when LIST_ITEM_SCOPE
|
231
|
+
return -1 if tag == Tags::Ol || tag == Tags::Ul
|
232
|
+
when BUTTON_SCOPE
|
233
|
+
return -1 if tag == Tags::Button
|
234
|
+
when TABLE_SCOPE
|
235
|
+
return -1 if tag == Tags::Html || tag == Tags::Table || tag == Tags::Template
|
236
|
+
when SELECT_SCOPE
|
237
|
+
return -1 if tag == Tags::Optgroup && tag == Tags::Option
|
238
|
+
else
|
239
|
+
raise ParseError, 'unreachable parsing error, please report to github'
|
240
|
+
end
|
241
|
+
end
|
242
|
+
case scope
|
243
|
+
when DEFAULT_SCOPE, LIST_ITEM_SCOPE, BUTTON_SCOPE
|
244
|
+
return -1 if DEFAULT_SCOPE_STOP_TAGS[open_element.namespace].include?(tag)
|
245
|
+
end
|
246
|
+
end
|
247
|
+
-1
|
248
|
+
end
|
249
|
+
|
250
|
+
# @!visibility private
|
251
|
+
def parse_generic_raw_text_element
|
252
|
+
add_element
|
253
|
+
@original_insertion_mode = insertion_mode
|
254
|
+
@insertion_mode = Text
|
255
|
+
end
|
256
|
+
|
257
|
+
# 12.2.4.2
|
258
|
+
# @!visibility private
|
259
|
+
def adjusted_current_node
|
260
|
+
return context if open_elements.length == 1 && fragment? && context
|
261
|
+
open_elements.last
|
262
|
+
end
|
263
|
+
|
264
|
+
# @!visibility private
|
265
|
+
def element_in_scope?(scope, *match_tags)
|
266
|
+
index_of_element_in_scope(scope, *match_tags) != -1
|
267
|
+
end
|
268
|
+
|
269
|
+
# @!visibility private
|
270
|
+
def clear_stack_to_context(scope)
|
271
|
+
open_elements.reverse_each_with_index do |open_element, index|
|
272
|
+
tag = open_element.tag
|
273
|
+
case scope
|
274
|
+
when TABLE_SCOPE
|
275
|
+
if tag == Tags::Html || tag == Tags::Table || tag == Tags::Template
|
276
|
+
@open_elements = open_elements.slice(0, index + 1)
|
277
|
+
return
|
278
|
+
end
|
279
|
+
when TABLE_ROW_SCOPE
|
280
|
+
if tag == Tags::Html || tag == Tags::Tr || tag == Tags::Template
|
281
|
+
@open_elements = open_elements.slice(0, index + 1)
|
282
|
+
return
|
283
|
+
end
|
284
|
+
when TABLE_BODY_SCOPE
|
285
|
+
if tag == Tags::Html || tag == Tags::Tbody || tag == Tags::Tfoot || tag == Tags::Thead || tag == Tags::Template
|
286
|
+
@open_elements = open_elements.slice(0, index + 1)
|
287
|
+
return
|
288
|
+
end
|
289
|
+
else
|
290
|
+
raise ParseError, 'unreachable parsing error, please report to github'
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# @!visibility private
|
296
|
+
def generate_implied_end_tags(*exceptions)
|
297
|
+
index = open_elements.reverse_each_with_index do |node, i|
|
298
|
+
break index unless node.instance_of? Node::Element
|
299
|
+
case node.tag
|
300
|
+
when Tags::Dd, Tags::Dt, Tags::Optgroup, Tags::Option, Tags::P, Tags::Rb, Tags::Rp, Tags::Rt, Tags::Rtc
|
301
|
+
break i if exceptions.include?(node.data)
|
302
|
+
next
|
303
|
+
end
|
304
|
+
break i
|
305
|
+
end
|
306
|
+
@open_elements = open_elements.slice(0, index + 1)
|
307
|
+
end
|
308
|
+
|
309
|
+
# @!visibility private
|
310
|
+
def add_child(node)
|
311
|
+
should_foster_parent? ? foster_parent(node) : top.append_child(node)
|
312
|
+
open_elements << node if node.instance_of?(Node::Element)
|
313
|
+
end
|
314
|
+
|
315
|
+
# @!visibility private
|
316
|
+
def top
|
317
|
+
open_elements.last || document
|
318
|
+
end
|
319
|
+
|
320
|
+
# @!visibility private
|
321
|
+
def add_element
|
322
|
+
add_child(Node::Element.new(tag: token.tag, data: token.data, attributes: token.attributes))
|
323
|
+
end
|
324
|
+
|
325
|
+
# @!visibility private
|
326
|
+
def should_foster_parent?
|
327
|
+
return false unless foster_parenting
|
328
|
+
case top.tag
|
329
|
+
when Tags::Table, Tags::Tbody, Tags::Tfoot, Tags::Thead, Tags::Tr
|
330
|
+
return true
|
331
|
+
end
|
332
|
+
false
|
333
|
+
end
|
334
|
+
|
335
|
+
# @!visibility private
|
336
|
+
def foster_parent(node)
|
337
|
+
i = 0
|
338
|
+
table = open_elements.reverse_each_with_index do |open_element, index|
|
339
|
+
if open_element.tag == Tags::Table
|
340
|
+
i = index
|
341
|
+
break open_element
|
342
|
+
end
|
343
|
+
end
|
344
|
+
j = 0
|
345
|
+
template = open_elements.reverse_each_with_index do |open_element, index|
|
346
|
+
if open_element.tag == Tags::Template
|
347
|
+
j = index
|
348
|
+
break open_element
|
349
|
+
end
|
350
|
+
end
|
351
|
+
return template.append_child(node) if template && (!table || j > i)
|
352
|
+
parent = table ? table.parent : open_elements[0]
|
353
|
+
parent = open_elements[i - 1] unless parent
|
354
|
+
prev = table ? table.previous_sibling : parent.last_child
|
355
|
+
if prev && prev.instance_of?(Node::Text) && node.instance_of?(Node::Text)
|
356
|
+
prev.data += node.data
|
357
|
+
return
|
358
|
+
end
|
359
|
+
parent.insert_before(node, table)
|
360
|
+
end
|
361
|
+
|
362
|
+
# @!visibility private
|
363
|
+
def add_text(text)
|
364
|
+
return if text.empty?
|
365
|
+
return foster_parent(Node::Text.new(data: text)) if should_foster_parent?
|
366
|
+
t = top
|
367
|
+
node = t.last_child
|
368
|
+
if node && node.instance_of?(Node::Text)
|
369
|
+
node.data += text
|
370
|
+
return
|
371
|
+
end
|
372
|
+
add_child Node::Text.new(data: text)
|
373
|
+
end
|
374
|
+
|
375
|
+
# @!visibility private
|
376
|
+
def add_formatting_element
|
377
|
+
tag, attrs = token.tag, token.attributes
|
378
|
+
add_element
|
379
|
+
identical_elements = 0
|
380
|
+
# todo
|
381
|
+
continued_finding = false
|
382
|
+
active_formatting_elements.reverse_each_with_index do |node, i|
|
383
|
+
continued_finding = false
|
384
|
+
break if node.instance_of?(Node::ScopeMarker)
|
385
|
+
next unless node.instance_of?(Node::Element)
|
386
|
+
next if node.namespace || node.tag != tag || node.attributes.length != attrs.length
|
387
|
+
# compare attrs
|
388
|
+
node.attributes.each.with_index do |a, j|
|
389
|
+
continue_comparing = false
|
390
|
+
attrs.each_with_index do |b, k|
|
391
|
+
if a.key == b.key && a.namespace == b.namespace && a.value == b.value
|
392
|
+
continue_comparing = true
|
393
|
+
break
|
394
|
+
end
|
395
|
+
end
|
396
|
+
next if continue_comparing
|
397
|
+
continued_finding = true
|
398
|
+
break if continued_finding
|
399
|
+
end
|
400
|
+
next if continued_finding
|
401
|
+
identical_elements += 1
|
402
|
+
active_formatting_elements.delete(node) if identical_elements >= 3
|
403
|
+
end
|
404
|
+
|
405
|
+
active_formatting_elements << open_elements.last
|
406
|
+
end
|
407
|
+
|
408
|
+
# @!visibility private
|
409
|
+
def clear_active_formatting_elements
|
410
|
+
loop do
|
411
|
+
node = active_formatting_elements.pop
|
412
|
+
return if active_formatting_elements.length.zero? || node.instance_of?(Node::ScopeMarker)
|
413
|
+
end
|
414
|
+
end
|
415
|
+
|
416
|
+
# @!visibility private
|
417
|
+
def reconstruct_active_formatting_elements
|
418
|
+
return unless node = active_formatting_elements.last
|
419
|
+
return if node.instance_of?(Node::ScopeMarker) || open_elements.index(node)
|
420
|
+
i = active_formatting_elements.length - 1
|
421
|
+
until node.is_a?(Node::ScopeMarker) || open_elements.index(node)
|
422
|
+
if i.zero?
|
423
|
+
i = -1
|
424
|
+
break
|
425
|
+
end
|
426
|
+
i -= 1
|
427
|
+
node = active_formatting_elements[i]
|
428
|
+
end
|
429
|
+
loop do
|
430
|
+
i += 1
|
431
|
+
cloned = active_formatting_elements[i].clone
|
432
|
+
add_child(cloned)
|
433
|
+
active_formatting_elements[i] = cloned
|
434
|
+
break if i == active_formatting_elements.length - 1
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
# @!visibility private
|
439
|
+
def acknowledge_self_closing_tag
|
440
|
+
@has_self_closing_token = false
|
441
|
+
end
|
442
|
+
|
443
|
+
# @!visibility private
|
444
|
+
def set_original_insertion_mode
|
445
|
+
raise 'bad parser state: original im was set twice' if original_insertion_mode
|
446
|
+
@original_insertion_mode = @insertion_mode
|
447
|
+
end
|
448
|
+
|
449
|
+
# @!visibility private
|
450
|
+
def reset_insertion_mode
|
451
|
+
open_elements.reverse_each_with_index do |open_element, index|
|
452
|
+
node = open_element
|
453
|
+
last = index.zero?
|
454
|
+
node = self.context if last && self.context
|
455
|
+
case node.tag
|
456
|
+
when Tags::Select
|
457
|
+
unless last
|
458
|
+
ancestor = node
|
459
|
+
first = open_elements[0]
|
460
|
+
while ancestor != first
|
461
|
+
ancestor = open_elements[open_elements.index(ancestor) - 1]
|
462
|
+
case ancestor.tag
|
463
|
+
when Tags::Template
|
464
|
+
@insertion_mode = InSelect
|
465
|
+
return
|
466
|
+
when Tags::Table
|
467
|
+
@insertion_mode = InSelectInTable
|
468
|
+
return
|
469
|
+
end
|
470
|
+
end
|
471
|
+
end
|
472
|
+
@insertion_mode = InSelect
|
473
|
+
when Tags::Td, Tags::Th
|
474
|
+
# remove this divergence from the HTML5 spec.
|
475
|
+
@insertion_mode = InCell
|
476
|
+
when Tags::Tr
|
477
|
+
@insertion_mode = InRow
|
478
|
+
when Tags::Tbody, Tags::Thead, Tags::Tfoot
|
479
|
+
@insertion_mode = InTableBody
|
480
|
+
when Tags::Caption
|
481
|
+
@insertion_mode = InCaption
|
482
|
+
when Tags::Colgroup
|
483
|
+
@insertion_mode = InColumnGroup
|
484
|
+
when Tags::Table
|
485
|
+
@insertion_mode = InTable
|
486
|
+
when Tags::Template
|
487
|
+
# remove this divergence from the HTML5 spec.
|
488
|
+
next if node.namespace
|
489
|
+
@insertion_mode = template_stack.last
|
490
|
+
when Tags::Head
|
491
|
+
# remove this divergence from the HTML5 spec.
|
492
|
+
@insertion_mode = InHead
|
493
|
+
when Tags::Body
|
494
|
+
@insertion_mode = InBody
|
495
|
+
when Tags::Frameset
|
496
|
+
@insertion_mode = InFrameset
|
497
|
+
when Tags::Html
|
498
|
+
@insertion_mode = @head ? AfterHead : BeforeHead
|
499
|
+
else
|
500
|
+
if last
|
501
|
+
@insertion_mode = InBody
|
502
|
+
return
|
503
|
+
end
|
504
|
+
next
|
505
|
+
end
|
506
|
+
return
|
507
|
+
end
|
508
|
+
end
|
509
|
+
|
510
|
+
# @!visibility private
|
511
|
+
def parse_current_token
|
512
|
+
if token.instance_of? Tokenizer::SelfClosingTagToken
|
513
|
+
self.has_self_closing_token = true
|
514
|
+
self.token = Tokenizer::StartTagToken.new(token.data, tag: token.tag, attributes: token.attributes)
|
515
|
+
end
|
516
|
+
consumed = false
|
517
|
+
until consumed
|
518
|
+
consumed =
|
519
|
+
in_foreign_content? ? parse_foreign_content : insertion_mode.new(self).process
|
520
|
+
end
|
521
|
+
self.has_self_closing_token = false if self.has_self_closing_token
|
522
|
+
end
|
523
|
+
end
|
524
|
+
end
|