gammo 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +6 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +27 -0
- data/LICENSE.txt +21 -0
- data/README.md +177 -0
- data/Rakefile +25 -0
- data/gammo.gemspec +23 -0
- data/lib/gammo.rb +15 -0
- data/lib/gammo/attribute.rb +17 -0
- data/lib/gammo/fragment_parser.rb +65 -0
- data/lib/gammo/node.rb +157 -0
- data/lib/gammo/parser.rb +524 -0
- data/lib/gammo/parser/constants.rb +94 -0
- data/lib/gammo/parser/foreign.rb +307 -0
- data/lib/gammo/parser/insertion_mode.rb +74 -0
- data/lib/gammo/parser/insertion_mode/after_after_body.rb +36 -0
- data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +32 -0
- data/lib/gammo/parser/insertion_mode/after_body.rb +46 -0
- data/lib/gammo/parser/insertion_mode/after_frameset.rb +39 -0
- data/lib/gammo/parser/insertion_mode/after_head.rb +70 -0
- data/lib/gammo/parser/insertion_mode/before_head.rb +49 -0
- data/lib/gammo/parser/insertion_mode/before_html.rb +45 -0
- data/lib/gammo/parser/insertion_mode/in_body.rb +463 -0
- data/lib/gammo/parser/insertion_mode/in_caption.rb +47 -0
- data/lib/gammo/parser/insertion_mode/in_cell.rb +46 -0
- data/lib/gammo/parser/insertion_mode/in_column_group.rb +66 -0
- data/lib/gammo/parser/insertion_mode/in_frameset.rb +48 -0
- data/lib/gammo/parser/insertion_mode/in_head.rb +98 -0
- data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +52 -0
- data/lib/gammo/parser/insertion_mode/in_row.rb +53 -0
- data/lib/gammo/parser/insertion_mode/in_select.rb +77 -0
- data/lib/gammo/parser/insertion_mode/in_select_in_table.rb +46 -0
- data/lib/gammo/parser/insertion_mode/in_table.rb +114 -0
- data/lib/gammo/parser/insertion_mode/in_table_body.rb +55 -0
- data/lib/gammo/parser/insertion_mode/in_template.rb +80 -0
- data/lib/gammo/parser/insertion_mode/initial.rb +152 -0
- data/lib/gammo/parser/insertion_mode/text.rb +32 -0
- data/lib/gammo/parser/insertion_mode_stack.rb +8 -0
- data/lib/gammo/parser/node_stack.rb +24 -0
- data/lib/gammo/tags.rb +9 -0
- data/lib/gammo/tags/table.rb +744 -0
- data/lib/gammo/tokenizer.rb +373 -0
- data/lib/gammo/tokenizer/debug.rb +34 -0
- data/lib/gammo/tokenizer/entity.rb +2240 -0
- data/lib/gammo/tokenizer/escape.rb +174 -0
- data/lib/gammo/tokenizer/script_scanner.rb +229 -0
- data/lib/gammo/tokenizer/tokens.rb +66 -0
- data/lib/gammo/version.rb +3 -0
- data/misc/html.yaml +384 -0
- data/misc/table.erubi +14 -0
- metadata +97 -0
data/lib/gammo/parser.rb
ADDED
@@ -0,0 +1,524 @@
|
|
1
|
+
require 'delegate'
|
2
|
+
require 'gammo/node'
|
3
|
+
require 'gammo/tags'
|
4
|
+
require 'gammo/tokenizer'
|
5
|
+
require 'gammo/parser/node_stack'
|
6
|
+
require 'gammo/parser/foreign'
|
7
|
+
require 'gammo/parser/constants'
|
8
|
+
require 'gammo/parser/insertion_mode_stack'
|
9
|
+
|
10
|
+
module Gammo
|
11
|
+
# Class for parsing an HTML input and building an HTML tree.
|
12
|
+
class Parser
|
13
|
+
require 'gammo/parser/insertion_mode'
|
14
|
+
|
15
|
+
include Foreign
|
16
|
+
include Constants
|
17
|
+
|
18
|
+
# Raised if anything goes wrong while parsing an HTML.
|
19
|
+
ParseError = Class.new(ArgumentError)
|
20
|
+
|
21
|
+
# Default scope stop tags defined in 12.2.4.2.
|
22
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-stack-of-open-elements
|
23
|
+
# @!visibility private
|
24
|
+
DEFAULT_SCOPE_STOP_TAGS = {
|
25
|
+
nil => [Tags::Applet, Tags::Caption, Tags::Html, Tags::Table, Tags::Td,
|
26
|
+
Tags::Th, Tags::Marquee, Tags::Object, Tags::Template],
|
27
|
+
'math' => [Tags::AnnotationXml, Tags::Mi, Tags::Mn, Tags::Mo, Tags::Ms,
|
28
|
+
Tags::Mtext],
|
29
|
+
'svg' => [Tags::Desc, Tags::ForeignObject, Tags::Title]
|
30
|
+
}.freeze
|
31
|
+
|
32
|
+
# Scope constants defined in 12.2.4.2.
|
33
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-stack-of-open-elements
|
34
|
+
|
35
|
+
# @!visibility private
|
36
|
+
DEFAULT_SCOPE = 0
|
37
|
+
|
38
|
+
# @!visibility private
|
39
|
+
LIST_ITEM_SCOPE = 1
|
40
|
+
|
41
|
+
# @!visibility private
|
42
|
+
BUTTON_SCOPE = 2
|
43
|
+
|
44
|
+
# @!visibility private
|
45
|
+
TABLE_SCOPE = 3
|
46
|
+
|
47
|
+
# @!visibility private
|
48
|
+
TABLE_ROW_SCOPE = 4
|
49
|
+
|
50
|
+
# @!visibility private
|
51
|
+
TABLE_BODY_SCOPE = 5
|
52
|
+
|
53
|
+
# @!visibility private
|
54
|
+
SELECT_SCOPE = 6
|
55
|
+
|
56
|
+
# Tokenizer for parsing each token.
|
57
|
+
# @!visibility private
|
58
|
+
attr_accessor :tokenizer, :token
|
59
|
+
|
60
|
+
# The insertion mode is a state variable that controls the primary operation
|
61
|
+
# of the tree construction stage.
|
62
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode
|
63
|
+
# @!visibility private
|
64
|
+
attr_accessor :insertion_mode
|
65
|
+
|
66
|
+
# The original insertion mode is set to this accessor, defined in 12.2.4.1.
|
67
|
+
# When the insertion mode is switched to "text" or "in table text",
|
68
|
+
# the original insertion mode is also set. This is the insertion mode to
|
69
|
+
# which the tree construction stage will return.
|
70
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode
|
71
|
+
# @!visibility private
|
72
|
+
attr_accessor :original_insertion_mode
|
73
|
+
|
74
|
+
# `template_stack` represents the stack of template insertion modes.
|
75
|
+
# Defined in 12.4.2.1.
|
76
|
+
# @!visibility private
|
77
|
+
attr_accessor :template_stack
|
78
|
+
|
79
|
+
# The stack of open elements, defined in 12.2.4.2.
|
80
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-stack-of-open-elements
|
81
|
+
# @!visibility private
|
82
|
+
attr_accessor :open_elements
|
83
|
+
|
84
|
+
# The list of active formatting elements defined in 12.2.4.3.
|
85
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#the-list-of-active-formatting-elements
|
86
|
+
# @!visibility private
|
87
|
+
attr_accessor :active_formatting_elements
|
88
|
+
|
89
|
+
# The element pointers defined in 12.2.4.4.
|
90
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#other-parsing-state-flags
|
91
|
+
attr_accessor :head, :form
|
92
|
+
|
93
|
+
# Other parsing state flags defined in 12.2.4.5.
|
94
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#other-parsing-state-flags
|
95
|
+
attr_accessor :scripting, :frameset_ok
|
96
|
+
alias_method :scripting?, :scripting
|
97
|
+
alias_method :frameset_ok?, :frameset_ok
|
98
|
+
|
99
|
+
# Document root element
|
100
|
+
attr_accessor :document
|
101
|
+
|
102
|
+
# Self-closing flag defined in 12.2.5.
|
103
|
+
# Self-closing tags like <img /> are treated as start tag token, except
|
104
|
+
# `has_self_closing_token` is set while they are being proceeded.
|
105
|
+
# @!visibility private
|
106
|
+
attr_accessor :has_self_closing_token
|
107
|
+
|
108
|
+
# Quirks flag is defined in 12.2.5.
|
109
|
+
# quirks flag is whether the parser is operating in the "force-quirks" mode.
|
110
|
+
# @!visibility private
|
111
|
+
attr_accessor :quirks
|
112
|
+
|
113
|
+
# `foster_parenting` is set to true if a new element should be inserted
|
114
|
+
# according to the foster parenting rule defined in 12.2.6.1.
|
115
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#creating-and-inserting-nodes
|
116
|
+
# @!visibility private
|
117
|
+
attr_accessor :foster_parenting
|
118
|
+
|
119
|
+
# The context element is for use in parsing an HTML fragment, defined in
|
120
|
+
# 12.2.4.2.
|
121
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
|
122
|
+
attr_accessor :context
|
123
|
+
|
124
|
+
# `input` is the original input text.
|
125
|
+
# @!visibility private
|
126
|
+
attr_reader :input
|
127
|
+
|
128
|
+
# Constructs a parser for parsing an HTML input.
|
129
|
+
# @param [String] input
|
130
|
+
# @param [TrueClass, FalseClass] scripting
|
131
|
+
# @param [TrueClass, FalseClass] frameset_ok
|
132
|
+
# @param [InsertionMode] insertion_mode
|
133
|
+
# @param [Gammo::Node] context
|
134
|
+
# @return [Gammo::Parser]
|
135
|
+
def initialize(input, scripting: true, frameset_ok: true, insertion_mode: Initial, context: nil)
|
136
|
+
@input = input
|
137
|
+
@scripting = scripting
|
138
|
+
@frameset_ok = frameset_ok
|
139
|
+
@context = context
|
140
|
+
@insertion_mode = insertion_mode
|
141
|
+
@token = nil
|
142
|
+
@tokenizer = Tokenizer.new(input)
|
143
|
+
@document = Node::Document.new
|
144
|
+
@open_elements = Parser::NodeStack.new([])
|
145
|
+
@active_formatting_elements = Parser::NodeStack.new([])
|
146
|
+
@template_stack = InsertionModeStack.new([])
|
147
|
+
@foster_parenting = false
|
148
|
+
@has_self_closing_token = false
|
149
|
+
@quirks = false
|
150
|
+
@form = nil
|
151
|
+
@head = nil
|
152
|
+
end
|
153
|
+
|
154
|
+
# Parses the current input and builds HTML tree from it.
|
155
|
+
# @raise [Gammo::ParseError] Raised if the parser gets error while parsing.
|
156
|
+
# @return [Gammo::Node::Document, nil]
|
157
|
+
def parse
|
158
|
+
while self.token != Tokenizer::EOS
|
159
|
+
# CDATA sections are allowed only in foreign content.
|
160
|
+
node = open_elements.last
|
161
|
+
tokenizer.allow_cdata!(node && node.namespace)
|
162
|
+
self.token = tokenizer.next_token
|
163
|
+
return if self.token.instance_of?(Tokenizer::ErrorToken) && self.token != Tokenizer::EOS
|
164
|
+
parse_current_token
|
165
|
+
break if self.token == Tokenizer::EOS
|
166
|
+
end
|
167
|
+
self.document
|
168
|
+
end
|
169
|
+
|
170
|
+
# Always returns false.
|
171
|
+
# @return [FalseClass]
|
172
|
+
# @!visibility private
|
173
|
+
def fragment?
|
174
|
+
false
|
175
|
+
end
|
176
|
+
|
177
|
+
# Returns true if given node is matched with any special elements
|
178
|
+
# defined in 12.2.4.2.
|
179
|
+
# https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
|
180
|
+
#
|
181
|
+
# @param [Gammo::Node] node
|
182
|
+
# @return [TrueClass, FalseClass]
|
183
|
+
# @see Gammo::Parser::Constants::SPECIAL_ELEMENTS
|
184
|
+
# @!visibility private
|
185
|
+
def special_element?(node)
|
186
|
+
case node.namespace
|
187
|
+
when nil, 'html'
|
188
|
+
SPECIAL_ELEMENTS[node.data]
|
189
|
+
when 'math'
|
190
|
+
case node.data
|
191
|
+
when 'mi', 'mo', 'mn', 'ms', 'mtext', 'annotation-xml'
|
192
|
+
true
|
193
|
+
end
|
194
|
+
when 'svg'
|
195
|
+
case node.data
|
196
|
+
when 'foreignObject', 'desc', 'title'
|
197
|
+
true
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# @!visibility private
|
203
|
+
def parse_implied_token(tok, tag, data)
|
204
|
+
real_token, self_closing = token, has_self_closing_token
|
205
|
+
@token = tok.new(data, tag: tag)
|
206
|
+
@has_self_closing_token = false
|
207
|
+
parse_current_token
|
208
|
+
@token, @has_self_closing_token = real_token, self_closing
|
209
|
+
end
|
210
|
+
|
211
|
+
# @!visibility private
|
212
|
+
def pop_until(scope, *match_tags)
|
213
|
+
index = index_of_element_in_scope(scope, *match_tags)
|
214
|
+
if index != -1
|
215
|
+
@open_elements = open_elements.slice(0, index)
|
216
|
+
return true
|
217
|
+
end
|
218
|
+
false
|
219
|
+
end
|
220
|
+
|
221
|
+
# @!visibility private
|
222
|
+
def index_of_element_in_scope(scope, *match_tags)
|
223
|
+
open_elements.reverse_each_with_index do |open_element, index|
|
224
|
+
tag = open_element.tag
|
225
|
+
unless open_element.namespace
|
226
|
+
return index if match_tags.include?(tag)
|
227
|
+
case scope
|
228
|
+
when DEFAULT_SCOPE
|
229
|
+
# no op
|
230
|
+
when LIST_ITEM_SCOPE
|
231
|
+
return -1 if tag == Tags::Ol || tag == Tags::Ul
|
232
|
+
when BUTTON_SCOPE
|
233
|
+
return -1 if tag == Tags::Button
|
234
|
+
when TABLE_SCOPE
|
235
|
+
return -1 if tag == Tags::Html || tag == Tags::Table || tag == Tags::Template
|
236
|
+
when SELECT_SCOPE
|
237
|
+
return -1 if tag == Tags::Optgroup && tag == Tags::Option
|
238
|
+
else
|
239
|
+
raise ParseError, 'unreachable parsing error, please report to github'
|
240
|
+
end
|
241
|
+
end
|
242
|
+
case scope
|
243
|
+
when DEFAULT_SCOPE, LIST_ITEM_SCOPE, BUTTON_SCOPE
|
244
|
+
return -1 if DEFAULT_SCOPE_STOP_TAGS[open_element.namespace].include?(tag)
|
245
|
+
end
|
246
|
+
end
|
247
|
+
-1
|
248
|
+
end
|
249
|
+
|
250
|
+
# @!visibility private
|
251
|
+
def parse_generic_raw_text_element
|
252
|
+
add_element
|
253
|
+
@original_insertion_mode = insertion_mode
|
254
|
+
@insertion_mode = Text
|
255
|
+
end
|
256
|
+
|
257
|
+
# 12.2.4.2
|
258
|
+
# @!visibility private
|
259
|
+
def adjusted_current_node
|
260
|
+
return context if open_elements.length == 1 && fragment? && context
|
261
|
+
open_elements.last
|
262
|
+
end
|
263
|
+
|
264
|
+
# @!visibility private
|
265
|
+
def element_in_scope?(scope, *match_tags)
|
266
|
+
index_of_element_in_scope(scope, *match_tags) != -1
|
267
|
+
end
|
268
|
+
|
269
|
+
# @!visibility private
|
270
|
+
def clear_stack_to_context(scope)
|
271
|
+
open_elements.reverse_each_with_index do |open_element, index|
|
272
|
+
tag = open_element.tag
|
273
|
+
case scope
|
274
|
+
when TABLE_SCOPE
|
275
|
+
if tag == Tags::Html || tag == Tags::Table || tag == Tags::Template
|
276
|
+
@open_elements = open_elements.slice(0, index + 1)
|
277
|
+
return
|
278
|
+
end
|
279
|
+
when TABLE_ROW_SCOPE
|
280
|
+
if tag == Tags::Html || tag == Tags::Tr || tag == Tags::Template
|
281
|
+
@open_elements = open_elements.slice(0, index + 1)
|
282
|
+
return
|
283
|
+
end
|
284
|
+
when TABLE_BODY_SCOPE
|
285
|
+
if tag == Tags::Html || tag == Tags::Tbody || tag == Tags::Tfoot || tag == Tags::Thead || tag == Tags::Template
|
286
|
+
@open_elements = open_elements.slice(0, index + 1)
|
287
|
+
return
|
288
|
+
end
|
289
|
+
else
|
290
|
+
raise ParseError, 'unreachable parsing error, please report to github'
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# @!visibility private
|
296
|
+
def generate_implied_end_tags(*exceptions)
|
297
|
+
index = open_elements.reverse_each_with_index do |node, i|
|
298
|
+
break index unless node.instance_of? Node::Element
|
299
|
+
case node.tag
|
300
|
+
when Tags::Dd, Tags::Dt, Tags::Optgroup, Tags::Option, Tags::P, Tags::Rb, Tags::Rp, Tags::Rt, Tags::Rtc
|
301
|
+
break i if exceptions.include?(node.data)
|
302
|
+
next
|
303
|
+
end
|
304
|
+
break i
|
305
|
+
end
|
306
|
+
@open_elements = open_elements.slice(0, index + 1)
|
307
|
+
end
|
308
|
+
|
309
|
+
# @!visibility private
|
310
|
+
def add_child(node)
|
311
|
+
should_foster_parent? ? foster_parent(node) : top.append_child(node)
|
312
|
+
open_elements << node if node.instance_of?(Node::Element)
|
313
|
+
end
|
314
|
+
|
315
|
+
# @!visibility private
|
316
|
+
def top
|
317
|
+
open_elements.last || document
|
318
|
+
end
|
319
|
+
|
320
|
+
# @!visibility private
|
321
|
+
def add_element
|
322
|
+
add_child(Node::Element.new(tag: token.tag, data: token.data, attributes: token.attributes))
|
323
|
+
end
|
324
|
+
|
325
|
+
# @!visibility private
|
326
|
+
def should_foster_parent?
|
327
|
+
return false unless foster_parenting
|
328
|
+
case top.tag
|
329
|
+
when Tags::Table, Tags::Tbody, Tags::Tfoot, Tags::Thead, Tags::Tr
|
330
|
+
return true
|
331
|
+
end
|
332
|
+
false
|
333
|
+
end
|
334
|
+
|
335
|
+
# @!visibility private
|
336
|
+
def foster_parent(node)
|
337
|
+
i = 0
|
338
|
+
table = open_elements.reverse_each_with_index do |open_element, index|
|
339
|
+
if open_element.tag == Tags::Table
|
340
|
+
i = index
|
341
|
+
break open_element
|
342
|
+
end
|
343
|
+
end
|
344
|
+
j = 0
|
345
|
+
template = open_elements.reverse_each_with_index do |open_element, index|
|
346
|
+
if open_element.tag == Tags::Template
|
347
|
+
j = index
|
348
|
+
break open_element
|
349
|
+
end
|
350
|
+
end
|
351
|
+
return template.append_child(node) if template && (!table || j > i)
|
352
|
+
parent = table ? table.parent : open_elements[0]
|
353
|
+
parent = open_elements[i - 1] unless parent
|
354
|
+
prev = table ? table.previous_sibling : parent.last_child
|
355
|
+
if prev && prev.instance_of?(Node::Text) && node.instance_of?(Node::Text)
|
356
|
+
prev.data += node.data
|
357
|
+
return
|
358
|
+
end
|
359
|
+
parent.insert_before(node, table)
|
360
|
+
end
|
361
|
+
|
362
|
+
# @!visibility private
|
363
|
+
def add_text(text)
|
364
|
+
return if text.empty?
|
365
|
+
return foster_parent(Node::Text.new(data: text)) if should_foster_parent?
|
366
|
+
t = top
|
367
|
+
node = t.last_child
|
368
|
+
if node && node.instance_of?(Node::Text)
|
369
|
+
node.data += text
|
370
|
+
return
|
371
|
+
end
|
372
|
+
add_child Node::Text.new(data: text)
|
373
|
+
end
|
374
|
+
|
375
|
+
# @!visibility private
|
376
|
+
def add_formatting_element
|
377
|
+
tag, attrs = token.tag, token.attributes
|
378
|
+
add_element
|
379
|
+
identical_elements = 0
|
380
|
+
# todo
|
381
|
+
continued_finding = false
|
382
|
+
active_formatting_elements.reverse_each_with_index do |node, i|
|
383
|
+
continued_finding = false
|
384
|
+
break if node.instance_of?(Node::ScopeMarker)
|
385
|
+
next unless node.instance_of?(Node::Element)
|
386
|
+
next if node.namespace || node.tag != tag || node.attributes.length != attrs.length
|
387
|
+
# compare attrs
|
388
|
+
node.attributes.each.with_index do |a, j|
|
389
|
+
continue_comparing = false
|
390
|
+
attrs.each_with_index do |b, k|
|
391
|
+
if a.key == b.key && a.namespace == b.namespace && a.value == b.value
|
392
|
+
continue_comparing = true
|
393
|
+
break
|
394
|
+
end
|
395
|
+
end
|
396
|
+
next if continue_comparing
|
397
|
+
continued_finding = true
|
398
|
+
break if continued_finding
|
399
|
+
end
|
400
|
+
next if continued_finding
|
401
|
+
identical_elements += 1
|
402
|
+
active_formatting_elements.delete(node) if identical_elements >= 3
|
403
|
+
end
|
404
|
+
|
405
|
+
active_formatting_elements << open_elements.last
|
406
|
+
end
|
407
|
+
|
408
|
+
# @!visibility private
|
409
|
+
def clear_active_formatting_elements
|
410
|
+
loop do
|
411
|
+
node = active_formatting_elements.pop
|
412
|
+
return if active_formatting_elements.length.zero? || node.instance_of?(Node::ScopeMarker)
|
413
|
+
end
|
414
|
+
end
|
415
|
+
|
416
|
+
# @!visibility private
|
417
|
+
def reconstruct_active_formatting_elements
|
418
|
+
return unless node = active_formatting_elements.last
|
419
|
+
return if node.instance_of?(Node::ScopeMarker) || open_elements.index(node)
|
420
|
+
i = active_formatting_elements.length - 1
|
421
|
+
until node.is_a?(Node::ScopeMarker) || open_elements.index(node)
|
422
|
+
if i.zero?
|
423
|
+
i = -1
|
424
|
+
break
|
425
|
+
end
|
426
|
+
i -= 1
|
427
|
+
node = active_formatting_elements[i]
|
428
|
+
end
|
429
|
+
loop do
|
430
|
+
i += 1
|
431
|
+
cloned = active_formatting_elements[i].clone
|
432
|
+
add_child(cloned)
|
433
|
+
active_formatting_elements[i] = cloned
|
434
|
+
break if i == active_formatting_elements.length - 1
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
# @!visibility private
|
439
|
+
def acknowledge_self_closing_tag
|
440
|
+
@has_self_closing_token = false
|
441
|
+
end
|
442
|
+
|
443
|
+
# @!visibility private
|
444
|
+
def set_original_insertion_mode
|
445
|
+
raise 'bad parser state: original im was set twice' if original_insertion_mode
|
446
|
+
@original_insertion_mode = @insertion_mode
|
447
|
+
end
|
448
|
+
|
449
|
+
# @!visibility private
|
450
|
+
def reset_insertion_mode
|
451
|
+
open_elements.reverse_each_with_index do |open_element, index|
|
452
|
+
node = open_element
|
453
|
+
last = index.zero?
|
454
|
+
node = self.context if last && self.context
|
455
|
+
case node.tag
|
456
|
+
when Tags::Select
|
457
|
+
unless last
|
458
|
+
ancestor = node
|
459
|
+
first = open_elements[0]
|
460
|
+
while ancestor != first
|
461
|
+
ancestor = open_elements[open_elements.index(ancestor) - 1]
|
462
|
+
case ancestor.tag
|
463
|
+
when Tags::Template
|
464
|
+
@insertion_mode = InSelect
|
465
|
+
return
|
466
|
+
when Tags::Table
|
467
|
+
@insertion_mode = InSelectInTable
|
468
|
+
return
|
469
|
+
end
|
470
|
+
end
|
471
|
+
end
|
472
|
+
@insertion_mode = InSelect
|
473
|
+
when Tags::Td, Tags::Th
|
474
|
+
# remove this divergence from the HTML5 spec.
|
475
|
+
@insertion_mode = InCell
|
476
|
+
when Tags::Tr
|
477
|
+
@insertion_mode = InRow
|
478
|
+
when Tags::Tbody, Tags::Thead, Tags::Tfoot
|
479
|
+
@insertion_mode = InTableBody
|
480
|
+
when Tags::Caption
|
481
|
+
@insertion_mode = InCaption
|
482
|
+
when Tags::Colgroup
|
483
|
+
@insertion_mode = InColumnGroup
|
484
|
+
when Tags::Table
|
485
|
+
@insertion_mode = InTable
|
486
|
+
when Tags::Template
|
487
|
+
# remove this divergence from the HTML5 spec.
|
488
|
+
next if node.namespace
|
489
|
+
@insertion_mode = template_stack.last
|
490
|
+
when Tags::Head
|
491
|
+
# remove this divergence from the HTML5 spec.
|
492
|
+
@insertion_mode = InHead
|
493
|
+
when Tags::Body
|
494
|
+
@insertion_mode = InBody
|
495
|
+
when Tags::Frameset
|
496
|
+
@insertion_mode = InFrameset
|
497
|
+
when Tags::Html
|
498
|
+
@insertion_mode = @head ? AfterHead : BeforeHead
|
499
|
+
else
|
500
|
+
if last
|
501
|
+
@insertion_mode = InBody
|
502
|
+
return
|
503
|
+
end
|
504
|
+
next
|
505
|
+
end
|
506
|
+
return
|
507
|
+
end
|
508
|
+
end
|
509
|
+
|
510
|
+
# @!visibility private
|
511
|
+
def parse_current_token
|
512
|
+
if token.instance_of? Tokenizer::SelfClosingTagToken
|
513
|
+
self.has_self_closing_token = true
|
514
|
+
self.token = Tokenizer::StartTagToken.new(token.data, tag: token.tag, attributes: token.attributes)
|
515
|
+
end
|
516
|
+
consumed = false
|
517
|
+
until consumed
|
518
|
+
consumed =
|
519
|
+
in_foreign_content? ? parse_foreign_content : insertion_mode.new(self).process
|
520
|
+
end
|
521
|
+
self.has_self_closing_token = false if self.has_self_closing_token
|
522
|
+
end
|
523
|
+
end
|
524
|
+
end
|