gammo 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +6 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +27 -0
- data/LICENSE.txt +21 -0
- data/README.md +177 -0
- data/Rakefile +25 -0
- data/gammo.gemspec +23 -0
- data/lib/gammo.rb +15 -0
- data/lib/gammo/attribute.rb +17 -0
- data/lib/gammo/fragment_parser.rb +65 -0
- data/lib/gammo/node.rb +157 -0
- data/lib/gammo/parser.rb +524 -0
- data/lib/gammo/parser/constants.rb +94 -0
- data/lib/gammo/parser/foreign.rb +307 -0
- data/lib/gammo/parser/insertion_mode.rb +74 -0
- data/lib/gammo/parser/insertion_mode/after_after_body.rb +36 -0
- data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +32 -0
- data/lib/gammo/parser/insertion_mode/after_body.rb +46 -0
- data/lib/gammo/parser/insertion_mode/after_frameset.rb +39 -0
- data/lib/gammo/parser/insertion_mode/after_head.rb +70 -0
- data/lib/gammo/parser/insertion_mode/before_head.rb +49 -0
- data/lib/gammo/parser/insertion_mode/before_html.rb +45 -0
- data/lib/gammo/parser/insertion_mode/in_body.rb +463 -0
- data/lib/gammo/parser/insertion_mode/in_caption.rb +47 -0
- data/lib/gammo/parser/insertion_mode/in_cell.rb +46 -0
- data/lib/gammo/parser/insertion_mode/in_column_group.rb +66 -0
- data/lib/gammo/parser/insertion_mode/in_frameset.rb +48 -0
- data/lib/gammo/parser/insertion_mode/in_head.rb +98 -0
- data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +52 -0
- data/lib/gammo/parser/insertion_mode/in_row.rb +53 -0
- data/lib/gammo/parser/insertion_mode/in_select.rb +77 -0
- data/lib/gammo/parser/insertion_mode/in_select_in_table.rb +46 -0
- data/lib/gammo/parser/insertion_mode/in_table.rb +114 -0
- data/lib/gammo/parser/insertion_mode/in_table_body.rb +55 -0
- data/lib/gammo/parser/insertion_mode/in_template.rb +80 -0
- data/lib/gammo/parser/insertion_mode/initial.rb +152 -0
- data/lib/gammo/parser/insertion_mode/text.rb +32 -0
- data/lib/gammo/parser/insertion_mode_stack.rb +8 -0
- data/lib/gammo/parser/node_stack.rb +24 -0
- data/lib/gammo/tags.rb +9 -0
- data/lib/gammo/tags/table.rb +744 -0
- data/lib/gammo/tokenizer.rb +373 -0
- data/lib/gammo/tokenizer/debug.rb +34 -0
- data/lib/gammo/tokenizer/entity.rb +2240 -0
- data/lib/gammo/tokenizer/escape.rb +174 -0
- data/lib/gammo/tokenizer/script_scanner.rb +229 -0
- data/lib/gammo/tokenizer/tokens.rb +66 -0
- data/lib/gammo/version.rb +3 -0
- data/misc/html.yaml +384 -0
- data/misc/table.erubi +14 -0
- metadata +97 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.22.
|
4
|
+
class AfterAfterBody < InsertionMode
|
5
|
+
def error_token(_)
|
6
|
+
# ignore the token
|
7
|
+
halt true
|
8
|
+
end
|
9
|
+
|
10
|
+
def text_token(token)
|
11
|
+
halt InBody.new(parser).process if token.data.lstrip.length.zero?
|
12
|
+
end
|
13
|
+
|
14
|
+
def start_tag_token(token)
|
15
|
+
case token.tag
|
16
|
+
when Tags::Html
|
17
|
+
halt InBody.new(parser).process
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def comment_token(token)
|
22
|
+
parser.document.append_child Node::Comment.new(data: token.data)
|
23
|
+
halt true
|
24
|
+
end
|
25
|
+
|
26
|
+
def doctype_token(token)
|
27
|
+
halt InBody.new(parser).process
|
28
|
+
end
|
29
|
+
|
30
|
+
def default(_)
|
31
|
+
parser.insertion_mode = InBody
|
32
|
+
halt false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.23.
|
4
|
+
class AfterAfterFrameset < InsertionMode
|
5
|
+
def comment_token(token)
|
6
|
+
parser.document.append_child Node::Comment.new(data: token.data)
|
7
|
+
end
|
8
|
+
|
9
|
+
def text_token(token)
|
10
|
+
halt InBody.new(parser).process unless token.data.gsub(/[^\s]/, '').empty?
|
11
|
+
end
|
12
|
+
|
13
|
+
def start_tag_token(token)
|
14
|
+
case token.tag
|
15
|
+
when Tags::Html
|
16
|
+
halt InBody.new(parser).process
|
17
|
+
when Tags::Noframes
|
18
|
+
halt InHead.new(parser).process
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def doctype_token(token)
|
23
|
+
halt InBody.new(parser).process
|
24
|
+
end
|
25
|
+
|
26
|
+
def default(_)
|
27
|
+
# ignore the token
|
28
|
+
halt true
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'gammo/node'
|
2
|
+
module Gammo
|
3
|
+
class Parser
|
4
|
+
# Section 12.2.6.4.19.
|
5
|
+
class AfterBody < InsertionMode
|
6
|
+
def error_token(_)
|
7
|
+
# ignore the token
|
8
|
+
true
|
9
|
+
end
|
10
|
+
|
11
|
+
def text_token(token)
|
12
|
+
s = token.data.lstrip
|
13
|
+
halt InBody.new(parser).process if s.length.zero?
|
14
|
+
end
|
15
|
+
|
16
|
+
def start_tag_token(token)
|
17
|
+
case token.tag
|
18
|
+
when Tags::Html
|
19
|
+
halt InBody.new(parser).process
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def end_tag_token(token)
|
24
|
+
case token.tag
|
25
|
+
when Tags::Html
|
26
|
+
parser.insertion_mode = AfterAfterBody unless parser.fragment?
|
27
|
+
halt true
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def comment_token(token)
|
32
|
+
open_elements = parser.open_elements
|
33
|
+
if open_elements.length < 1 || open_elements.first.tag != Tags::Html
|
34
|
+
raise ParseError, 'bad parser state: <html> element not found, in the after-body insertion mode'
|
35
|
+
end
|
36
|
+
open_elements.first.append_child Node::Comment.new(data: token.data)
|
37
|
+
halt true
|
38
|
+
end
|
39
|
+
|
40
|
+
def default(_)
|
41
|
+
parser.insertion_mode = InBody
|
42
|
+
halt false
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'gammo/parser/insertion_mode/after_after_frameset'
|
2
|
+
|
3
|
+
module Gammo
|
4
|
+
class Parser
|
5
|
+
# Section 12.2.6.4.21.
|
6
|
+
class AfterFrameset < InsertionMode
|
7
|
+
def comment_token(token)
|
8
|
+
parser.add_child Node::Comment.new(data: token.data)
|
9
|
+
end
|
10
|
+
|
11
|
+
def text_token(token)
|
12
|
+
s = token.data.gsub(/[^\s]/, '')
|
13
|
+
parser.add_text(s) unless s.empty?
|
14
|
+
end
|
15
|
+
|
16
|
+
def start_tag_token(token)
|
17
|
+
case token.tag
|
18
|
+
when Tags::Html
|
19
|
+
halt InBody.new(parser).process
|
20
|
+
when Tags::Noframes
|
21
|
+
halt InHead.new(parser).process
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def end_tag_token(token)
|
26
|
+
case token.tag
|
27
|
+
when Tags::Html
|
28
|
+
parser.insertion_mode = AfterAfterFrameset
|
29
|
+
halt true
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def default(_)
|
34
|
+
# ignore the token
|
35
|
+
halt true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.5.
|
4
|
+
class AfterHead < InsertionMode
|
5
|
+
def text_token(token)
|
6
|
+
s = token.data.lstrip
|
7
|
+
if s.length < token.data.length
|
8
|
+
# add the initial whitespace to the current node.
|
9
|
+
parser.add_text token.data.slice(0, token.data.length - s.length)
|
10
|
+
halt true if s == ''
|
11
|
+
token.data = s
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def start_tag_token(token)
|
16
|
+
case token.tag
|
17
|
+
when Tags::Html then halt InBody.new(parser).process
|
18
|
+
when Tags::Body
|
19
|
+
parser.add_element
|
20
|
+
parser.frameset_ok = false
|
21
|
+
parser.insertion_mode = InBody
|
22
|
+
halt true
|
23
|
+
when Tags::Frameset
|
24
|
+
parser.add_element
|
25
|
+
parser.insertion_mode = InFrameset
|
26
|
+
halt true
|
27
|
+
when Tags::Base, Tags::Basefont, Tags::Bgsound, Tags::Link, Tags::Meta,
|
28
|
+
Tags::Noframes, Tags::Script, Tags::Style, Tags::Template, Tags::Title
|
29
|
+
parser.open_elements << parser.head
|
30
|
+
begin
|
31
|
+
halt InHead.new(parser).process
|
32
|
+
ensure
|
33
|
+
parser.open_elements.delete(parser.head)
|
34
|
+
end
|
35
|
+
when Tags::Head
|
36
|
+
# ignore the token
|
37
|
+
halt true
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def end_tag_token(token)
|
42
|
+
case token.tag
|
43
|
+
when Tags::Body, Tags::Html, Tags::Br
|
44
|
+
# drop down to creating an implied <body> tag.
|
45
|
+
when Tags::Template
|
46
|
+
halt InHead.new(parser).process
|
47
|
+
else
|
48
|
+
# ignore the token.
|
49
|
+
halt true
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def comment_token(token)
|
54
|
+
parser.add_child Node::Comment.new(data: token.data)
|
55
|
+
halt true
|
56
|
+
end
|
57
|
+
|
58
|
+
def doctype_token(token)
|
59
|
+
# ignore the token.
|
60
|
+
halt true
|
61
|
+
end
|
62
|
+
|
63
|
+
def default(_)
|
64
|
+
parser.parse_implied_token(Tokenizer::StartTagToken, Tags::Body, Tags::Body.to_s)
|
65
|
+
parser.frameset_ok = true
|
66
|
+
halt false
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.3
|
4
|
+
class BeforeHead < InsertionMode
|
5
|
+
def text_token(token)
|
6
|
+
token.data = token.data.lstrip
|
7
|
+
halt true if token.data.length.zero?
|
8
|
+
end
|
9
|
+
|
10
|
+
def start_tag_token(token)
|
11
|
+
case token.tag
|
12
|
+
when Tags::Head
|
13
|
+
parser.add_element
|
14
|
+
parser.head = parser.top
|
15
|
+
parser.insertion_mode = InHead
|
16
|
+
halt true
|
17
|
+
when Tags::Html
|
18
|
+
halt InBody.new(parser).process
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def end_tag_token(token)
|
23
|
+
case token.tag
|
24
|
+
when Tags::Head, Tags::Body, Tags::Html, Tags::Br
|
25
|
+
parser.parse_implied_token Tokenizer::StartTagToken, Tags::Head, Tags::Head.to_s
|
26
|
+
halt false
|
27
|
+
else
|
28
|
+
# ignore the token.
|
29
|
+
halt true
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def comment_token(token)
|
34
|
+
parser.add_child(Node::Comment.new(data: token.data))
|
35
|
+
halt true
|
36
|
+
end
|
37
|
+
|
38
|
+
def doctype_token(token)
|
39
|
+
# ignore the token.
|
40
|
+
halt true
|
41
|
+
end
|
42
|
+
|
43
|
+
def default(_)
|
44
|
+
parser.parse_implied_token Tokenizer::StartTagToken, Tags::Head, Tags::Head.to_s
|
45
|
+
halt false
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.2
|
4
|
+
class BeforeHTML < InsertionMode
|
5
|
+
# Ignores the token.
|
6
|
+
def doctype_token(_)
|
7
|
+
halt true
|
8
|
+
end
|
9
|
+
|
10
|
+
def text_token(token)
|
11
|
+
token.data = token.data.lstrip
|
12
|
+
# it's all whitespace so ignore it.
|
13
|
+
halt true if token.data.length.zero?
|
14
|
+
end
|
15
|
+
|
16
|
+
def start_tag_token(token)
|
17
|
+
return unless token.tag == Tags::Html
|
18
|
+
parser.add_element
|
19
|
+
parser.insertion_mode = BeforeHead
|
20
|
+
halt true
|
21
|
+
end
|
22
|
+
|
23
|
+
def end_tag_token(token)
|
24
|
+
case token.tag
|
25
|
+
when Tags::Head, Tags::Body, Tags::Html, Tags::Br
|
26
|
+
parser.parse_implied_token Tokenizer::StartTagToken, Tags::Html, Tags::Html.to_s
|
27
|
+
halt false
|
28
|
+
else
|
29
|
+
# ignore the token.
|
30
|
+
halt true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def comment_token(token)
|
35
|
+
parser.document.append_child Node::Comment.new(data: token.data)
|
36
|
+
halt true
|
37
|
+
end
|
38
|
+
|
39
|
+
def default(_)
|
40
|
+
parser.parse_implied_token Tokenizer::StartTagToken, Tags::Html, Tags::Html.to_s
|
41
|
+
halt false
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,463 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.6.
|
4
|
+
class InBody < InsertionMode
|
5
|
+
def text_token(token)
|
6
|
+
data = token.data
|
7
|
+
node = parser.open_elements.last
|
8
|
+
case node.tag
|
9
|
+
when Tags::Pre, Tags::Listing
|
10
|
+
unless node.first_child
|
11
|
+
# ignore a newline at the start of the <pre> block.
|
12
|
+
data = data.slice(1..-1) if !data.empty? && data[0] == ?\r
|
13
|
+
data = data.slice(1..-1) if !data.empty? && data[0] == ?\n
|
14
|
+
end
|
15
|
+
end
|
16
|
+
data = data.gsub("\x00", '')
|
17
|
+
halt true if data.empty?
|
18
|
+
parser.reconstruct_active_formatting_elements
|
19
|
+
parser.frameset_ok = false if parser.frameset_ok && !data.lstrip.empty?
|
20
|
+
parser.add_text(data)
|
21
|
+
end
|
22
|
+
|
23
|
+
def start_tag_token(token)
|
24
|
+
case token.tag
|
25
|
+
when Tags::Html
|
26
|
+
halt true if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
|
27
|
+
copy_attributes(parser.open_elements[0], token)
|
28
|
+
when Tags::Base, Tags::Basefont, Tags::Bgsound, Tags::Link, Tags::Meta,
|
29
|
+
Tags::Noframes, Tags::Script, Tags::Style, Tags::Template, Tags::Title
|
30
|
+
halt InHead.new(parser).process
|
31
|
+
when Tags::Body
|
32
|
+
halt true if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
|
33
|
+
if parser.open_elements.length >= 2
|
34
|
+
body = parser.open_elements[1]
|
35
|
+
if body.instance_of?(Node::Element) && body.tag == Tags::Body
|
36
|
+
parser.frameset_ok = false
|
37
|
+
copy_attributes(body, parser.token)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
when Tags::Frameset
|
41
|
+
if !parser.frameset_ok || parser.open_elements.length < 2 || parser.open_elements[1].tag != Tags::Body
|
42
|
+
# ignore the token
|
43
|
+
halt true
|
44
|
+
end
|
45
|
+
body = parser.open_elements[1]
|
46
|
+
body.parent.remove_child(body) if body.parent
|
47
|
+
parser.open_elements = parser.open_elements.slice(0, 1)
|
48
|
+
parser.add_element
|
49
|
+
parser.insertion_mode = InFrameset
|
50
|
+
halt true
|
51
|
+
when Tags::Address, Tags::Article, Tags::Aside, Tags::Blockquote,
|
52
|
+
Tags::Center, Tags::Dialog, Tags::Details, Tags::Dir, Tags::Div,
|
53
|
+
Tags::Dl, Tags::Fieldset, Tags::Figcaption, Tags::Figure,
|
54
|
+
Tags::Footer, Tags::Header, Tags::Hgroup, Tags::Main, Tags::Menu,
|
55
|
+
Tags::Nav, Tags::Ol, Tags::P, Tags::Section, Tags::Summary, Tags::Ul
|
56
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
57
|
+
parser.add_element
|
58
|
+
when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
|
59
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
60
|
+
node = parser.top
|
61
|
+
case node.tag
|
62
|
+
when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
|
63
|
+
parser.open_elements.pop
|
64
|
+
end
|
65
|
+
parser.add_element
|
66
|
+
when Tags::Pre, Tags::Listing
|
67
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
68
|
+
parser.add_element
|
69
|
+
parser.frameset_ok = false
|
70
|
+
when Tags::Form
|
71
|
+
# ignore the token.
|
72
|
+
halt true if parser.form && !parser.open_elements.any? { |oe| oe.tag == Tags::Template }
|
73
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
74
|
+
parser.add_element
|
75
|
+
parser.form = parser.top unless parser.open_elements.any? { |oe| oe.tag == Tags::Template }
|
76
|
+
when Tags::Li
|
77
|
+
parser.frameset_ok = false
|
78
|
+
parser.open_elements.reverse_each_with_index do |open_element, index|
|
79
|
+
case open_element.tag
|
80
|
+
when Tags::Li then parser.open_elements = parser.open_elements.slice(0, index)
|
81
|
+
when Tags::Address, Tags::Div, Tags::P then next
|
82
|
+
else
|
83
|
+
next unless parser.special_element?(open_element)
|
84
|
+
end
|
85
|
+
break
|
86
|
+
end
|
87
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
88
|
+
parser.add_element
|
89
|
+
when Tags::Dd, Tags::Dt
|
90
|
+
parser.frameset_ok = false
|
91
|
+
parser.open_elements.reverse_each_with_index do |open_element, index|
|
92
|
+
case open_element.tag
|
93
|
+
when Tags::Dd, Tags::Dt then parser.open_elements = parser.open_elements.slice(0, index)
|
94
|
+
when Tags::Address, Tags::Div, Tags::P then next
|
95
|
+
else
|
96
|
+
next unless parser.special_element?(open_element)
|
97
|
+
end
|
98
|
+
break
|
99
|
+
end
|
100
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
101
|
+
parser.add_element
|
102
|
+
when Tags::Plaintext
|
103
|
+
parser.pop_until BUTTON_SCOPE, Tags::P
|
104
|
+
parser.add_element
|
105
|
+
when Tags::Button
|
106
|
+
parser.pop_until DEFAULT_SCOPE, Tags::Button
|
107
|
+
parser.reconstruct_active_formatting_elements
|
108
|
+
parser.add_element
|
109
|
+
parser.frameset_ok = false
|
110
|
+
when Tags::A
|
111
|
+
parser.active_formatting_elements.reverse_each do |afe|
|
112
|
+
break if afe.is_a?(Node::ScopeMarker)
|
113
|
+
next unless afe.instance_of?(Node::Element) && afe.tag == Tags::A
|
114
|
+
adoption_agency_for_end_tag_formatting(Tags::A, "a")
|
115
|
+
parser.open_elements.delete(afe)
|
116
|
+
parser.active_formatting_elements.delete(afe)
|
117
|
+
break
|
118
|
+
end
|
119
|
+
parser.reconstruct_active_formatting_elements
|
120
|
+
parser.add_formatting_element
|
121
|
+
when Tags::B, Tags::Big, Tags::Code, Tags::Em, Tags::Font, Tags::I,
|
122
|
+
Tags::S, Tags::Small, Tags::Strike, Tags::Strong, Tags::Tt, Tags::U
|
123
|
+
parser.reconstruct_active_formatting_elements
|
124
|
+
parser.add_formatting_element
|
125
|
+
when Tags::Nobr
|
126
|
+
parser.reconstruct_active_formatting_elements
|
127
|
+
if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Nobr)
|
128
|
+
adoption_agency_for_end_tag_formatting(Tags::Nobr, "nobr")
|
129
|
+
parser.reconstruct_active_formatting_elements
|
130
|
+
end
|
131
|
+
parser.add_formatting_element
|
132
|
+
when Tags::Applet, Tags::Marquee, Tags::Object
|
133
|
+
parser.reconstruct_active_formatting_elements
|
134
|
+
parser.add_element
|
135
|
+
parser.active_formatting_elements << Node::DEFAULT_SCOPE_MARKER
|
136
|
+
parser.frameset_ok = false
|
137
|
+
when Tags::Table
|
138
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P) unless parser.quirks
|
139
|
+
parser.add_element
|
140
|
+
parser.frameset_ok = false
|
141
|
+
parser.insertion_mode = InTable
|
142
|
+
halt true
|
143
|
+
when Tags::Area, Tags::Br, Tags::Embed, Tags::Img, Tags::Input, Tags::Keygen, Tags::Wbr
|
144
|
+
parser.reconstruct_active_formatting_elements
|
145
|
+
parser.add_element
|
146
|
+
parser.open_elements.pop
|
147
|
+
parser.acknowledge_self_closing_tag
|
148
|
+
token.attributes.each do |attr|
|
149
|
+
# skip setting frameset_ok = false
|
150
|
+
halt true if attr.key == 'type' && attr.value.downcase == 'hidden'
|
151
|
+
end if token.tag == Tags::Input
|
152
|
+
parser.frameset_ok = false
|
153
|
+
when Tags::Param, Tags::Source, Tags::Track
|
154
|
+
parser.add_element
|
155
|
+
parser.open_elements.pop
|
156
|
+
parser.acknowledge_self_closing_tag
|
157
|
+
when Tags::Hr
|
158
|
+
parser.pop_until BUTTON_SCOPE, Tags::P
|
159
|
+
parser.add_element
|
160
|
+
parser.open_elements.pop
|
161
|
+
parser.acknowledge_self_closing_tag
|
162
|
+
parser.frameset_ok = false
|
163
|
+
when Tags::Image
|
164
|
+
token.tag = Tags::Img
|
165
|
+
# todo: fixme <img>
|
166
|
+
token.data = Tags::Img.to_s
|
167
|
+
halt false
|
168
|
+
when Tags::Textarea
|
169
|
+
parser.add_element
|
170
|
+
parser.set_original_insertion_mode
|
171
|
+
parser.frameset_ok = false
|
172
|
+
parser.insertion_mode = Text
|
173
|
+
when Tags::Xmp
|
174
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
175
|
+
parser.reconstruct_active_formatting_elements
|
176
|
+
parser.frameset_ok = false
|
177
|
+
parser.add_element
|
178
|
+
parser.set_original_insertion_mode
|
179
|
+
parser.insertion_mode = Text
|
180
|
+
when Tags::Iframe
|
181
|
+
parser.frameset_ok = false
|
182
|
+
parser.parse_generic_raw_text_element
|
183
|
+
when Tags::Noembed
|
184
|
+
parser.parse_generic_raw_text_element
|
185
|
+
when Tags::Noscript
|
186
|
+
if parser.scripting?
|
187
|
+
parser.parse_generic_raw_text_element
|
188
|
+
halt true
|
189
|
+
end
|
190
|
+
parser.reconstruct_active_formatting_elements
|
191
|
+
parser.add_element
|
192
|
+
parser.tokenizer.next_is_not_raw_text!
|
193
|
+
when Tags::Select
|
194
|
+
parser.reconstruct_active_formatting_elements
|
195
|
+
parser.add_element
|
196
|
+
parser.frameset_ok = false
|
197
|
+
parser.insertion_mode = InSelect
|
198
|
+
halt true
|
199
|
+
when Tags::Optgroup, Tags::Option
|
200
|
+
parser.open_elements.pop if parser.top.tag == Tags::Option
|
201
|
+
parser.reconstruct_active_formatting_elements
|
202
|
+
parser.add_element
|
203
|
+
when Tags::Rb, Tags::Rtc
|
204
|
+
parser.generate_implied_end_tags if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Ruby)
|
205
|
+
parser.add_element
|
206
|
+
when Tags::Rp, Tags::Rt
|
207
|
+
parser.generate_implied_end_tags('rtc') if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Ruby)
|
208
|
+
parser.add_element
|
209
|
+
when Tags::Math, Tags::Svg
|
210
|
+
parser.reconstruct_active_formatting_elements
|
211
|
+
parser.adjust_attribute_names(token.attributes, token.tag == Tags::Math ? Parser::MATH_ML_ATTRIBUTE_ADJUSTMENTS : Parser::SVG_ATTRIBUTE_ADJUSTMENTS)
|
212
|
+
parser.adjust_foreign_attributes(token.attributes)
|
213
|
+
parser.add_element
|
214
|
+
parser.top.namespace = token.data
|
215
|
+
if parser.has_self_closing_token
|
216
|
+
parser.open_elements.pop
|
217
|
+
parser.acknowledge_self_closing_tag
|
218
|
+
end
|
219
|
+
halt true
|
220
|
+
when Tags::Caption, Tags::Col, Tags::Colgroup, Tags::Frame, Tags::Head, Tags::Tbody, Tags::Td, Tags::Tfoot, Tags::Th, Tags::Thead, Tags::Tr
|
221
|
+
# ignore the token.
|
222
|
+
else
|
223
|
+
parser.reconstruct_active_formatting_elements
|
224
|
+
parser.add_element
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def end_tag_token(token)
|
229
|
+
case token.tag
|
230
|
+
when Tags::Body
|
231
|
+
parser.insertion_mode = AfterBody if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Body)
|
232
|
+
when Tags::Html
|
233
|
+
halt true unless parser.element_in_scope?(DEFAULT_SCOPE, Tags::Body)
|
234
|
+
parser.parse_implied_token(Tokenizer::EndTagToken, Tags::Body, Tags::Body.to_s)
|
235
|
+
halt false
|
236
|
+
when Tags::Address, Tags::Article, Tags::Aside, Tags::Blockquote,
|
237
|
+
Tags::Button, Tags::Center, Tags::Dialog, Tags::Details, Tags::Dir,
|
238
|
+
Tags::Div, Tags::Dl, Tags::Fieldset, Tags::Figcaption, Tags::Figure,
|
239
|
+
Tags::Footer, Tags::Header, Tags::Hgroup, Tags::Listing, Tags::Main,
|
240
|
+
Tags::Menu, Tags::Nav, Tags::Ol, Tags::Pre, Tags::Section,
|
241
|
+
Tags::Summary, Tags::Ul
|
242
|
+
parser.pop_until(DEFAULT_SCOPE, token.tag)
|
243
|
+
when Tags::Form
|
244
|
+
if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
|
245
|
+
index = parser.index_of_element_in_scope(DEFAULT_SCOPE, Tags::Form)
|
246
|
+
# ignore the token.
|
247
|
+
halt true if index == -1
|
248
|
+
parser.generate_implied_end_tags
|
249
|
+
# ignore the token.
|
250
|
+
halt true if parser.open_elements[index].tag != Tags::Form
|
251
|
+
parser.pop_until(DEFAULT_SCOPE, Tags::Form)
|
252
|
+
else
|
253
|
+
node = parser.form
|
254
|
+
parser.form = nil
|
255
|
+
index = parser.index_of_element_in_scope(DEFAULT_SCOPE, Tags::Form)
|
256
|
+
# ignore the token.
|
257
|
+
halt true if node == nil || index == -1 || parser.open_elements[index] != node
|
258
|
+
parser.generate_implied_end_tags
|
259
|
+
parser.open_elements.delete(node)
|
260
|
+
end
|
261
|
+
when Tags::P
|
262
|
+
parser.parse_implied_token(Tokenizer::StartTagToken, Tags::P, Tags::P.to_s) unless parser.element_in_scope?(BUTTON_SCOPE, Tags::P)
|
263
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
264
|
+
when Tags::Li
|
265
|
+
parser.pop_until(LIST_ITEM_SCOPE, Tags::Li)
|
266
|
+
when Tags::Dd, Tags::Dt
|
267
|
+
parser.pop_until(DEFAULT_SCOPE, token.tag)
|
268
|
+
when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
|
269
|
+
parser.pop_until(DEFAULT_SCOPE, Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6)
|
270
|
+
when Tags::A, Tags::B, Tags::Big, Tags::Code, Tags::Em, Tags::Font,
|
271
|
+
Tags::I, Tags::Nobr, Tags::S, Tags::Small, Tags::Strike,
|
272
|
+
Tags::Strong, Tags::Tt, Tags::U
|
273
|
+
adoption_agency_for_end_tag_formatting(token.tag, token.data)
|
274
|
+
when Tags::Applet, Tags::Marquee, Tags::Object
|
275
|
+
parser.clear_active_formatting_elements if parser.pop_until(DEFAULT_SCOPE, token.tag)
|
276
|
+
when Tags::Br
|
277
|
+
# FIXME
|
278
|
+
parser.token = Tokenizer::StartTagToken.new(token.data, tag: token.tag)
|
279
|
+
halt false
|
280
|
+
when Tags::Template
|
281
|
+
halt InHead.new(parser).process
|
282
|
+
else
|
283
|
+
adoption_agency_for_end_tag_formatting(token.tag, token.data)
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
def comment_token(token)
|
288
|
+
parser.add_child Node::Comment.new(data: token.data)
|
289
|
+
end
|
290
|
+
|
291
|
+
def error_token(token)
|
292
|
+
if parser.template_stack.length > 0
|
293
|
+
parser.insertion_mode = InTemplate
|
294
|
+
halt false
|
295
|
+
else
|
296
|
+
parser.open_elements.any? do |oe|
|
297
|
+
case oe.tag
|
298
|
+
when Tags::Dd, Tags::Dt, Tags::Li, Tags::Optgroup, Tags::Option, Tags::P,
|
299
|
+
Tags::Rb, Tags::Rp, Tags::Rt, Tags::Rtc, Tags::Tbody, Tags::Td, Tags::Tfoot,
|
300
|
+
Tags::Th, Tags::Thead, Tags::Tr, Tags::Body, Tags::Html
|
301
|
+
else
|
302
|
+
halt true
|
303
|
+
end
|
304
|
+
end
|
305
|
+
halt true
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def default(_)
|
310
|
+
halt true
|
311
|
+
end
|
312
|
+
|
313
|
+
# Implements "adoption agency" algorithm.
|
314
|
+
# https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
|
315
|
+
# @!visibility private
|
316
|
+
def adoption_agency_for_end_tag_formatting(tag, tagname)
|
317
|
+
# Step 1-2.
|
318
|
+
current = parser.open_elements.last
|
319
|
+
if current.data == tagname && parser.active_formatting_elements.index(current) == -1
|
320
|
+
parser.open_elements.pop
|
321
|
+
return
|
322
|
+
end
|
323
|
+
|
324
|
+
# Step 3-5. The outer loop
|
325
|
+
8.times do |n|
|
326
|
+
# Step 6: Find the formatting element.
|
327
|
+
formatting_element = nil
|
328
|
+
parser.active_formatting_elements.reverse_each do |afe|
|
329
|
+
break if afe.instance_of? Node::ScopeMarker
|
330
|
+
if afe.tag == tag
|
331
|
+
formatting_element = afe
|
332
|
+
break
|
333
|
+
end
|
334
|
+
end
|
335
|
+
unless formatting_element
|
336
|
+
adoption_agency_for_end_tag_other(tag, tagname)
|
337
|
+
return
|
338
|
+
end
|
339
|
+
# Step 7. Ignore the tag if formatting element is not in the stack of
|
340
|
+
# open elements.
|
341
|
+
index = parser.open_elements.index(formatting_element)
|
342
|
+
unless index
|
343
|
+
parser.active_formatting_elements.delete(formatting_element)
|
344
|
+
return
|
345
|
+
end
|
346
|
+
# Step 8. Ignore the tag if formatting element is not in the scope.
|
347
|
+
return unless parser.element_in_scope?(DEFAULT_SCOPE, tag)
|
348
|
+
|
349
|
+
# Step 9. This step is omitted because it's just a parse error but no
|
350
|
+
# need to return.
|
351
|
+
|
352
|
+
# Step 10-11. Find the furthest block.
|
353
|
+
furthest_block = parser.open_elements.slice(index..-1).find(&parser.method(:special_element?))
|
354
|
+
unless furthest_block
|
355
|
+
element = parser.open_elements.pop
|
356
|
+
element = parser.open_elements.pop while element != formatting_element
|
357
|
+
parser.active_formatting_elements.delete(element)
|
358
|
+
return
|
359
|
+
end
|
360
|
+
|
361
|
+
# Step 12-13. Find the common ancestor and bookmark node.
|
362
|
+
common_ancestor = parser.open_elements[index - 1]
|
363
|
+
bookmark = parser.active_formatting_elements.index(formatting_element)
|
364
|
+
|
365
|
+
# Step 14. The inner loop. find the last node to reparent.
|
366
|
+
last_node = furthest_block
|
367
|
+
node = furthest_block
|
368
|
+
x = parser.open_elements.index(node)
|
369
|
+
# Step 14.1.
|
370
|
+
j = 0
|
371
|
+
loop do
|
372
|
+
# Step 14.2.
|
373
|
+
j += 1
|
374
|
+
# Step 14.3.
|
375
|
+
x -= 1
|
376
|
+
node = parser.open_elements[x]
|
377
|
+
# Step 14.4.
|
378
|
+
break if node == formatting_element
|
379
|
+
|
380
|
+
# Step 14.5. Remove node from the list of active formatting elements if
|
381
|
+
# inner loop counter is greater than three and node is in the list of
|
382
|
+
# active formatting elements.
|
383
|
+
ni = parser.active_formatting_elements.index(node)
|
384
|
+
if ni && j > 3
|
385
|
+
parser.active_formatting_elements.delete(node)
|
386
|
+
# If any element of the list of active formatting elements is removed,
|
387
|
+
# we need to take care whether bookmark should be decremented or not.
|
388
|
+
# This is because the value of bookmark may exceed the size of the
|
389
|
+
# list by removing elements from the list.
|
390
|
+
bookmark -= 1 if ni <= bookmark
|
391
|
+
next
|
392
|
+
end
|
393
|
+
# Step 14.6. Continue the next inner loop if node is not in the list of
|
394
|
+
# active formatting elements.
|
395
|
+
unless parser.active_formatting_elements.include?(node)
|
396
|
+
parser.open_elements.delete(node)
|
397
|
+
next
|
398
|
+
end
|
399
|
+
# Step 14.7
|
400
|
+
clone = node.clone
|
401
|
+
afei = parser.active_formatting_elements.index(node)
|
402
|
+
oei = parser.open_elements.index(node)
|
403
|
+
raise ParseError, 'bad parser state: expected elements are not found' if !(afei && oei)
|
404
|
+
parser.active_formatting_elements[afei] = clone
|
405
|
+
parser.open_elements[oei] = clone
|
406
|
+
node = clone
|
407
|
+
# Step 14.8
|
408
|
+
bookmark = (parser.active_formatting_elements.index(node) + 1) || 0 if last_node == furthest_block
|
409
|
+
# Step 14.9
|
410
|
+
last_node.parent.remove_child(last_node) if last_node.parent
|
411
|
+
node.append_child(last_node)
|
412
|
+
# Step 14.10
|
413
|
+
last_node = node
|
414
|
+
end
|
415
|
+
# Step 15. Reparent last_node to the common ancestor,
|
416
|
+
# or for misnested table nodes, to the foster parent.
|
417
|
+
last_node.parent.remove_child(last_node) if last_node.parent
|
418
|
+
case common_ancestor.tag
|
419
|
+
when Tags::Table, Tags::Tbody, Tags::Tfoot, Tags::Thead, Tags::Tr
|
420
|
+
parser.foster_parent(last_node)
|
421
|
+
else
|
422
|
+
common_ancestor.append_child(last_node)
|
423
|
+
end
|
424
|
+
|
425
|
+
# Steps 16-18. Reparent nodes from the furthest block's children
|
426
|
+
# to a clone of the formatting element.
|
427
|
+
clone = formatting_element.clone
|
428
|
+
reparent_children(clone, furthest_block)
|
429
|
+
furthest_block.append_child(clone)
|
430
|
+
|
431
|
+
# Step 19. Fix up the list of active formatting elements.
|
432
|
+
old_loc = parser.active_formatting_elements.index(formatting_element)
|
433
|
+
bookmark -= 1 if old_loc && old_loc < bookmark
|
434
|
+
parser.active_formatting_elements.delete(formatting_element)
|
435
|
+
parser.active_formatting_elements.insert(bookmark, clone)
|
436
|
+
|
437
|
+
# Step 20. Fix up the stack of open elements.
|
438
|
+
parser.open_elements.delete(formatting_element)
|
439
|
+
parser.open_elements.insert(parser.open_elements.index(furthest_block) + 1, clone)
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
# @!visibility private
|
444
|
+
def adoption_agency_for_end_tag_other(tag, tagname)
|
445
|
+
parser.open_elements.reverse_each_with_index do |open_element, index|
|
446
|
+
if open_element.tag == tag && open_element.data == tagname
|
447
|
+
parser.open_elements = parser.open_elements.slice(0, index)
|
448
|
+
break
|
449
|
+
end
|
450
|
+
break if parser.special_element?(open_element)
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
# @!visibility private
|
455
|
+
def reparent_children(dst, src)
|
456
|
+
while child = src.first_child
|
457
|
+
src.remove_child(child)
|
458
|
+
dst.append_child(child)
|
459
|
+
end
|
460
|
+
end
|
461
|
+
end
|
462
|
+
end
|
463
|
+
end
|