gammo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +6 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +27 -0
- data/LICENSE.txt +21 -0
- data/README.md +177 -0
- data/Rakefile +25 -0
- data/gammo.gemspec +23 -0
- data/lib/gammo.rb +15 -0
- data/lib/gammo/attribute.rb +17 -0
- data/lib/gammo/fragment_parser.rb +65 -0
- data/lib/gammo/node.rb +157 -0
- data/lib/gammo/parser.rb +524 -0
- data/lib/gammo/parser/constants.rb +94 -0
- data/lib/gammo/parser/foreign.rb +307 -0
- data/lib/gammo/parser/insertion_mode.rb +74 -0
- data/lib/gammo/parser/insertion_mode/after_after_body.rb +36 -0
- data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +32 -0
- data/lib/gammo/parser/insertion_mode/after_body.rb +46 -0
- data/lib/gammo/parser/insertion_mode/after_frameset.rb +39 -0
- data/lib/gammo/parser/insertion_mode/after_head.rb +70 -0
- data/lib/gammo/parser/insertion_mode/before_head.rb +49 -0
- data/lib/gammo/parser/insertion_mode/before_html.rb +45 -0
- data/lib/gammo/parser/insertion_mode/in_body.rb +463 -0
- data/lib/gammo/parser/insertion_mode/in_caption.rb +47 -0
- data/lib/gammo/parser/insertion_mode/in_cell.rb +46 -0
- data/lib/gammo/parser/insertion_mode/in_column_group.rb +66 -0
- data/lib/gammo/parser/insertion_mode/in_frameset.rb +48 -0
- data/lib/gammo/parser/insertion_mode/in_head.rb +98 -0
- data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +52 -0
- data/lib/gammo/parser/insertion_mode/in_row.rb +53 -0
- data/lib/gammo/parser/insertion_mode/in_select.rb +77 -0
- data/lib/gammo/parser/insertion_mode/in_select_in_table.rb +46 -0
- data/lib/gammo/parser/insertion_mode/in_table.rb +114 -0
- data/lib/gammo/parser/insertion_mode/in_table_body.rb +55 -0
- data/lib/gammo/parser/insertion_mode/in_template.rb +80 -0
- data/lib/gammo/parser/insertion_mode/initial.rb +152 -0
- data/lib/gammo/parser/insertion_mode/text.rb +32 -0
- data/lib/gammo/parser/insertion_mode_stack.rb +8 -0
- data/lib/gammo/parser/node_stack.rb +24 -0
- data/lib/gammo/tags.rb +9 -0
- data/lib/gammo/tags/table.rb +744 -0
- data/lib/gammo/tokenizer.rb +373 -0
- data/lib/gammo/tokenizer/debug.rb +34 -0
- data/lib/gammo/tokenizer/entity.rb +2240 -0
- data/lib/gammo/tokenizer/escape.rb +174 -0
- data/lib/gammo/tokenizer/script_scanner.rb +229 -0
- data/lib/gammo/tokenizer/tokens.rb +66 -0
- data/lib/gammo/version.rb +3 -0
- data/misc/html.yaml +384 -0
- data/misc/table.erubi +14 -0
- metadata +97 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.22.
|
4
|
+
class AfterAfterBody < InsertionMode
|
5
|
+
def error_token(_)
|
6
|
+
# ignore the token
|
7
|
+
halt true
|
8
|
+
end
|
9
|
+
|
10
|
+
def text_token(token)
|
11
|
+
halt InBody.new(parser).process if token.data.lstrip.length.zero?
|
12
|
+
end
|
13
|
+
|
14
|
+
def start_tag_token(token)
|
15
|
+
case token.tag
|
16
|
+
when Tags::Html
|
17
|
+
halt InBody.new(parser).process
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def comment_token(token)
|
22
|
+
parser.document.append_child Node::Comment.new(data: token.data)
|
23
|
+
halt true
|
24
|
+
end
|
25
|
+
|
26
|
+
def doctype_token(token)
|
27
|
+
halt InBody.new(parser).process
|
28
|
+
end
|
29
|
+
|
30
|
+
def default(_)
|
31
|
+
parser.insertion_mode = InBody
|
32
|
+
halt false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.23.
|
4
|
+
class AfterAfterFrameset < InsertionMode
|
5
|
+
def comment_token(token)
|
6
|
+
parser.document.append_child Node::Comment.new(data: token.data)
|
7
|
+
end
|
8
|
+
|
9
|
+
def text_token(token)
|
10
|
+
halt InBody.new(parser).process unless token.data.gsub(/[^\s]/, '').empty?
|
11
|
+
end
|
12
|
+
|
13
|
+
def start_tag_token(token)
|
14
|
+
case token.tag
|
15
|
+
when Tags::Html
|
16
|
+
halt InBody.new(parser).process
|
17
|
+
when Tags::Noframes
|
18
|
+
halt InHead.new(parser).process
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def doctype_token(token)
|
23
|
+
halt InBody.new(parser).process
|
24
|
+
end
|
25
|
+
|
26
|
+
def default(_)
|
27
|
+
# ignore the token
|
28
|
+
halt true
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'gammo/node'
|
2
|
+
module Gammo
|
3
|
+
class Parser
|
4
|
+
# Section 12.2.6.4.19.
|
5
|
+
class AfterBody < InsertionMode
|
6
|
+
def error_token(_)
|
7
|
+
# ignore the token
|
8
|
+
true
|
9
|
+
end
|
10
|
+
|
11
|
+
def text_token(token)
|
12
|
+
s = token.data.lstrip
|
13
|
+
halt InBody.new(parser).process if s.length.zero?
|
14
|
+
end
|
15
|
+
|
16
|
+
def start_tag_token(token)
|
17
|
+
case token.tag
|
18
|
+
when Tags::Html
|
19
|
+
halt InBody.new(parser).process
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def end_tag_token(token)
|
24
|
+
case token.tag
|
25
|
+
when Tags::Html
|
26
|
+
parser.insertion_mode = AfterAfterBody unless parser.fragment?
|
27
|
+
halt true
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def comment_token(token)
|
32
|
+
open_elements = parser.open_elements
|
33
|
+
if open_elements.length < 1 || open_elements.first.tag != Tags::Html
|
34
|
+
raise ParseError, 'bad parser state: <html> element not found, in the after-body insertion mode'
|
35
|
+
end
|
36
|
+
open_elements.first.append_child Node::Comment.new(data: token.data)
|
37
|
+
halt true
|
38
|
+
end
|
39
|
+
|
40
|
+
def default(_)
|
41
|
+
parser.insertion_mode = InBody
|
42
|
+
halt false
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'gammo/parser/insertion_mode/after_after_frameset'
|
2
|
+
|
3
|
+
module Gammo
|
4
|
+
class Parser
|
5
|
+
# Section 12.2.6.4.21.
|
6
|
+
class AfterFrameset < InsertionMode
|
7
|
+
def comment_token(token)
|
8
|
+
parser.add_child Node::Comment.new(data: token.data)
|
9
|
+
end
|
10
|
+
|
11
|
+
def text_token(token)
|
12
|
+
s = token.data.gsub(/[^\s]/, '')
|
13
|
+
parser.add_text(s) unless s.empty?
|
14
|
+
end
|
15
|
+
|
16
|
+
def start_tag_token(token)
|
17
|
+
case token.tag
|
18
|
+
when Tags::Html
|
19
|
+
halt InBody.new(parser).process
|
20
|
+
when Tags::Noframes
|
21
|
+
halt InHead.new(parser).process
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def end_tag_token(token)
|
26
|
+
case token.tag
|
27
|
+
when Tags::Html
|
28
|
+
parser.insertion_mode = AfterAfterFrameset
|
29
|
+
halt true
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def default(_)
|
34
|
+
# ignore the token
|
35
|
+
halt true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.5.
|
4
|
+
class AfterHead < InsertionMode
|
5
|
+
def text_token(token)
|
6
|
+
s = token.data.lstrip
|
7
|
+
if s.length < token.data.length
|
8
|
+
# add the initial whitespace to the current node.
|
9
|
+
parser.add_text token.data.slice(0, token.data.length - s.length)
|
10
|
+
halt true if s == ''
|
11
|
+
token.data = s
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def start_tag_token(token)
|
16
|
+
case token.tag
|
17
|
+
when Tags::Html then halt InBody.new(parser).process
|
18
|
+
when Tags::Body
|
19
|
+
parser.add_element
|
20
|
+
parser.frameset_ok = false
|
21
|
+
parser.insertion_mode = InBody
|
22
|
+
halt true
|
23
|
+
when Tags::Frameset
|
24
|
+
parser.add_element
|
25
|
+
parser.insertion_mode = InFrameset
|
26
|
+
halt true
|
27
|
+
when Tags::Base, Tags::Basefont, Tags::Bgsound, Tags::Link, Tags::Meta,
|
28
|
+
Tags::Noframes, Tags::Script, Tags::Style, Tags::Template, Tags::Title
|
29
|
+
parser.open_elements << parser.head
|
30
|
+
begin
|
31
|
+
halt InHead.new(parser).process
|
32
|
+
ensure
|
33
|
+
parser.open_elements.delete(parser.head)
|
34
|
+
end
|
35
|
+
when Tags::Head
|
36
|
+
# ignore the token
|
37
|
+
halt true
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def end_tag_token(token)
|
42
|
+
case token.tag
|
43
|
+
when Tags::Body, Tags::Html, Tags::Br
|
44
|
+
# drop down to creating an implied <body> tag.
|
45
|
+
when Tags::Template
|
46
|
+
halt InHead.new(parser).process
|
47
|
+
else
|
48
|
+
# ignore the token.
|
49
|
+
halt true
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def comment_token(token)
|
54
|
+
parser.add_child Node::Comment.new(data: token.data)
|
55
|
+
halt true
|
56
|
+
end
|
57
|
+
|
58
|
+
def doctype_token(token)
|
59
|
+
# ignore the token.
|
60
|
+
halt true
|
61
|
+
end
|
62
|
+
|
63
|
+
def default(_)
|
64
|
+
parser.parse_implied_token(Tokenizer::StartTagToken, Tags::Body, Tags::Body.to_s)
|
65
|
+
parser.frameset_ok = true
|
66
|
+
halt false
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.3
|
4
|
+
class BeforeHead < InsertionMode
|
5
|
+
def text_token(token)
|
6
|
+
token.data = token.data.lstrip
|
7
|
+
halt true if token.data.length.zero?
|
8
|
+
end
|
9
|
+
|
10
|
+
def start_tag_token(token)
|
11
|
+
case token.tag
|
12
|
+
when Tags::Head
|
13
|
+
parser.add_element
|
14
|
+
parser.head = parser.top
|
15
|
+
parser.insertion_mode = InHead
|
16
|
+
halt true
|
17
|
+
when Tags::Html
|
18
|
+
halt InBody.new(parser).process
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def end_tag_token(token)
|
23
|
+
case token.tag
|
24
|
+
when Tags::Head, Tags::Body, Tags::Html, Tags::Br
|
25
|
+
parser.parse_implied_token Tokenizer::StartTagToken, Tags::Head, Tags::Head.to_s
|
26
|
+
halt false
|
27
|
+
else
|
28
|
+
# ignore the token.
|
29
|
+
halt true
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def comment_token(token)
|
34
|
+
parser.add_child(Node::Comment.new(data: token.data))
|
35
|
+
halt true
|
36
|
+
end
|
37
|
+
|
38
|
+
def doctype_token(token)
|
39
|
+
# ignore the token.
|
40
|
+
halt true
|
41
|
+
end
|
42
|
+
|
43
|
+
def default(_)
|
44
|
+
parser.parse_implied_token Tokenizer::StartTagToken, Tags::Head, Tags::Head.to_s
|
45
|
+
halt false
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.2
|
4
|
+
class BeforeHTML < InsertionMode
|
5
|
+
# Ignores the token.
|
6
|
+
def doctype_token(_)
|
7
|
+
halt true
|
8
|
+
end
|
9
|
+
|
10
|
+
def text_token(token)
|
11
|
+
token.data = token.data.lstrip
|
12
|
+
# it's all whitespace so ignore it.
|
13
|
+
halt true if token.data.length.zero?
|
14
|
+
end
|
15
|
+
|
16
|
+
def start_tag_token(token)
|
17
|
+
return unless token.tag == Tags::Html
|
18
|
+
parser.add_element
|
19
|
+
parser.insertion_mode = BeforeHead
|
20
|
+
halt true
|
21
|
+
end
|
22
|
+
|
23
|
+
def end_tag_token(token)
|
24
|
+
case token.tag
|
25
|
+
when Tags::Head, Tags::Body, Tags::Html, Tags::Br
|
26
|
+
parser.parse_implied_token Tokenizer::StartTagToken, Tags::Html, Tags::Html.to_s
|
27
|
+
halt false
|
28
|
+
else
|
29
|
+
# ignore the token.
|
30
|
+
halt true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def comment_token(token)
|
35
|
+
parser.document.append_child Node::Comment.new(data: token.data)
|
36
|
+
halt true
|
37
|
+
end
|
38
|
+
|
39
|
+
def default(_)
|
40
|
+
parser.parse_implied_token Tokenizer::StartTagToken, Tags::Html, Tags::Html.to_s
|
41
|
+
halt false
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,463 @@
|
|
1
|
+
module Gammo
|
2
|
+
class Parser
|
3
|
+
# Section 12.2.6.4.6.
|
4
|
+
class InBody < InsertionMode
|
5
|
+
def text_token(token)
|
6
|
+
data = token.data
|
7
|
+
node = parser.open_elements.last
|
8
|
+
case node.tag
|
9
|
+
when Tags::Pre, Tags::Listing
|
10
|
+
unless node.first_child
|
11
|
+
# ignore a newline at the start of the <pre> block.
|
12
|
+
data = data.slice(1..-1) if !data.empty? && data[0] == ?\r
|
13
|
+
data = data.slice(1..-1) if !data.empty? && data[0] == ?\n
|
14
|
+
end
|
15
|
+
end
|
16
|
+
data = data.gsub("\x00", '')
|
17
|
+
halt true if data.empty?
|
18
|
+
parser.reconstruct_active_formatting_elements
|
19
|
+
parser.frameset_ok = false if parser.frameset_ok && !data.lstrip.empty?
|
20
|
+
parser.add_text(data)
|
21
|
+
end
|
22
|
+
|
23
|
+
def start_tag_token(token)
|
24
|
+
case token.tag
|
25
|
+
when Tags::Html
|
26
|
+
halt true if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
|
27
|
+
copy_attributes(parser.open_elements[0], token)
|
28
|
+
when Tags::Base, Tags::Basefont, Tags::Bgsound, Tags::Link, Tags::Meta,
|
29
|
+
Tags::Noframes, Tags::Script, Tags::Style, Tags::Template, Tags::Title
|
30
|
+
halt InHead.new(parser).process
|
31
|
+
when Tags::Body
|
32
|
+
halt true if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
|
33
|
+
if parser.open_elements.length >= 2
|
34
|
+
body = parser.open_elements[1]
|
35
|
+
if body.instance_of?(Node::Element) && body.tag == Tags::Body
|
36
|
+
parser.frameset_ok = false
|
37
|
+
copy_attributes(body, parser.token)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
when Tags::Frameset
|
41
|
+
if !parser.frameset_ok || parser.open_elements.length < 2 || parser.open_elements[1].tag != Tags::Body
|
42
|
+
# ignore the token
|
43
|
+
halt true
|
44
|
+
end
|
45
|
+
body = parser.open_elements[1]
|
46
|
+
body.parent.remove_child(body) if body.parent
|
47
|
+
parser.open_elements = parser.open_elements.slice(0, 1)
|
48
|
+
parser.add_element
|
49
|
+
parser.insertion_mode = InFrameset
|
50
|
+
halt true
|
51
|
+
when Tags::Address, Tags::Article, Tags::Aside, Tags::Blockquote,
|
52
|
+
Tags::Center, Tags::Dialog, Tags::Details, Tags::Dir, Tags::Div,
|
53
|
+
Tags::Dl, Tags::Fieldset, Tags::Figcaption, Tags::Figure,
|
54
|
+
Tags::Footer, Tags::Header, Tags::Hgroup, Tags::Main, Tags::Menu,
|
55
|
+
Tags::Nav, Tags::Ol, Tags::P, Tags::Section, Tags::Summary, Tags::Ul
|
56
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
57
|
+
parser.add_element
|
58
|
+
when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
|
59
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
60
|
+
node = parser.top
|
61
|
+
case node.tag
|
62
|
+
when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
|
63
|
+
parser.open_elements.pop
|
64
|
+
end
|
65
|
+
parser.add_element
|
66
|
+
when Tags::Pre, Tags::Listing
|
67
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
68
|
+
parser.add_element
|
69
|
+
parser.frameset_ok = false
|
70
|
+
when Tags::Form
|
71
|
+
# ignore the token.
|
72
|
+
halt true if parser.form && !parser.open_elements.any? { |oe| oe.tag == Tags::Template }
|
73
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
74
|
+
parser.add_element
|
75
|
+
parser.form = parser.top unless parser.open_elements.any? { |oe| oe.tag == Tags::Template }
|
76
|
+
when Tags::Li
|
77
|
+
parser.frameset_ok = false
|
78
|
+
parser.open_elements.reverse_each_with_index do |open_element, index|
|
79
|
+
case open_element.tag
|
80
|
+
when Tags::Li then parser.open_elements = parser.open_elements.slice(0, index)
|
81
|
+
when Tags::Address, Tags::Div, Tags::P then next
|
82
|
+
else
|
83
|
+
next unless parser.special_element?(open_element)
|
84
|
+
end
|
85
|
+
break
|
86
|
+
end
|
87
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
88
|
+
parser.add_element
|
89
|
+
when Tags::Dd, Tags::Dt
|
90
|
+
parser.frameset_ok = false
|
91
|
+
parser.open_elements.reverse_each_with_index do |open_element, index|
|
92
|
+
case open_element.tag
|
93
|
+
when Tags::Dd, Tags::Dt then parser.open_elements = parser.open_elements.slice(0, index)
|
94
|
+
when Tags::Address, Tags::Div, Tags::P then next
|
95
|
+
else
|
96
|
+
next unless parser.special_element?(open_element)
|
97
|
+
end
|
98
|
+
break
|
99
|
+
end
|
100
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
101
|
+
parser.add_element
|
102
|
+
when Tags::Plaintext
|
103
|
+
parser.pop_until BUTTON_SCOPE, Tags::P
|
104
|
+
parser.add_element
|
105
|
+
when Tags::Button
|
106
|
+
parser.pop_until DEFAULT_SCOPE, Tags::Button
|
107
|
+
parser.reconstruct_active_formatting_elements
|
108
|
+
parser.add_element
|
109
|
+
parser.frameset_ok = false
|
110
|
+
when Tags::A
|
111
|
+
parser.active_formatting_elements.reverse_each do |afe|
|
112
|
+
break if afe.is_a?(Node::ScopeMarker)
|
113
|
+
next unless afe.instance_of?(Node::Element) && afe.tag == Tags::A
|
114
|
+
adoption_agency_for_end_tag_formatting(Tags::A, "a")
|
115
|
+
parser.open_elements.delete(afe)
|
116
|
+
parser.active_formatting_elements.delete(afe)
|
117
|
+
break
|
118
|
+
end
|
119
|
+
parser.reconstruct_active_formatting_elements
|
120
|
+
parser.add_formatting_element
|
121
|
+
when Tags::B, Tags::Big, Tags::Code, Tags::Em, Tags::Font, Tags::I,
|
122
|
+
Tags::S, Tags::Small, Tags::Strike, Tags::Strong, Tags::Tt, Tags::U
|
123
|
+
parser.reconstruct_active_formatting_elements
|
124
|
+
parser.add_formatting_element
|
125
|
+
when Tags::Nobr
|
126
|
+
parser.reconstruct_active_formatting_elements
|
127
|
+
if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Nobr)
|
128
|
+
adoption_agency_for_end_tag_formatting(Tags::Nobr, "nobr")
|
129
|
+
parser.reconstruct_active_formatting_elements
|
130
|
+
end
|
131
|
+
parser.add_formatting_element
|
132
|
+
when Tags::Applet, Tags::Marquee, Tags::Object
|
133
|
+
parser.reconstruct_active_formatting_elements
|
134
|
+
parser.add_element
|
135
|
+
parser.active_formatting_elements << Node::DEFAULT_SCOPE_MARKER
|
136
|
+
parser.frameset_ok = false
|
137
|
+
when Tags::Table
|
138
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P) unless parser.quirks
|
139
|
+
parser.add_element
|
140
|
+
parser.frameset_ok = false
|
141
|
+
parser.insertion_mode = InTable
|
142
|
+
halt true
|
143
|
+
when Tags::Area, Tags::Br, Tags::Embed, Tags::Img, Tags::Input, Tags::Keygen, Tags::Wbr
|
144
|
+
parser.reconstruct_active_formatting_elements
|
145
|
+
parser.add_element
|
146
|
+
parser.open_elements.pop
|
147
|
+
parser.acknowledge_self_closing_tag
|
148
|
+
token.attributes.each do |attr|
|
149
|
+
# skip setting frameset_ok = false
|
150
|
+
halt true if attr.key == 'type' && attr.value.downcase == 'hidden'
|
151
|
+
end if token.tag == Tags::Input
|
152
|
+
parser.frameset_ok = false
|
153
|
+
when Tags::Param, Tags::Source, Tags::Track
|
154
|
+
parser.add_element
|
155
|
+
parser.open_elements.pop
|
156
|
+
parser.acknowledge_self_closing_tag
|
157
|
+
when Tags::Hr
|
158
|
+
parser.pop_until BUTTON_SCOPE, Tags::P
|
159
|
+
parser.add_element
|
160
|
+
parser.open_elements.pop
|
161
|
+
parser.acknowledge_self_closing_tag
|
162
|
+
parser.frameset_ok = false
|
163
|
+
when Tags::Image
|
164
|
+
token.tag = Tags::Img
|
165
|
+
# todo: fixme <img>
|
166
|
+
token.data = Tags::Img.to_s
|
167
|
+
halt false
|
168
|
+
when Tags::Textarea
|
169
|
+
parser.add_element
|
170
|
+
parser.set_original_insertion_mode
|
171
|
+
parser.frameset_ok = false
|
172
|
+
parser.insertion_mode = Text
|
173
|
+
when Tags::Xmp
|
174
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
175
|
+
parser.reconstruct_active_formatting_elements
|
176
|
+
parser.frameset_ok = false
|
177
|
+
parser.add_element
|
178
|
+
parser.set_original_insertion_mode
|
179
|
+
parser.insertion_mode = Text
|
180
|
+
when Tags::Iframe
|
181
|
+
parser.frameset_ok = false
|
182
|
+
parser.parse_generic_raw_text_element
|
183
|
+
when Tags::Noembed
|
184
|
+
parser.parse_generic_raw_text_element
|
185
|
+
when Tags::Noscript
|
186
|
+
if parser.scripting?
|
187
|
+
parser.parse_generic_raw_text_element
|
188
|
+
halt true
|
189
|
+
end
|
190
|
+
parser.reconstruct_active_formatting_elements
|
191
|
+
parser.add_element
|
192
|
+
parser.tokenizer.next_is_not_raw_text!
|
193
|
+
when Tags::Select
|
194
|
+
parser.reconstruct_active_formatting_elements
|
195
|
+
parser.add_element
|
196
|
+
parser.frameset_ok = false
|
197
|
+
parser.insertion_mode = InSelect
|
198
|
+
halt true
|
199
|
+
when Tags::Optgroup, Tags::Option
|
200
|
+
parser.open_elements.pop if parser.top.tag == Tags::Option
|
201
|
+
parser.reconstruct_active_formatting_elements
|
202
|
+
parser.add_element
|
203
|
+
when Tags::Rb, Tags::Rtc
|
204
|
+
parser.generate_implied_end_tags if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Ruby)
|
205
|
+
parser.add_element
|
206
|
+
when Tags::Rp, Tags::Rt
|
207
|
+
parser.generate_implied_end_tags('rtc') if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Ruby)
|
208
|
+
parser.add_element
|
209
|
+
when Tags::Math, Tags::Svg
|
210
|
+
parser.reconstruct_active_formatting_elements
|
211
|
+
parser.adjust_attribute_names(token.attributes, token.tag == Tags::Math ? Parser::MATH_ML_ATTRIBUTE_ADJUSTMENTS : Parser::SVG_ATTRIBUTE_ADJUSTMENTS)
|
212
|
+
parser.adjust_foreign_attributes(token.attributes)
|
213
|
+
parser.add_element
|
214
|
+
parser.top.namespace = token.data
|
215
|
+
if parser.has_self_closing_token
|
216
|
+
parser.open_elements.pop
|
217
|
+
parser.acknowledge_self_closing_tag
|
218
|
+
end
|
219
|
+
halt true
|
220
|
+
when Tags::Caption, Tags::Col, Tags::Colgroup, Tags::Frame, Tags::Head, Tags::Tbody, Tags::Td, Tags::Tfoot, Tags::Th, Tags::Thead, Tags::Tr
|
221
|
+
# ignore the token.
|
222
|
+
else
|
223
|
+
parser.reconstruct_active_formatting_elements
|
224
|
+
parser.add_element
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def end_tag_token(token)
|
229
|
+
case token.tag
|
230
|
+
when Tags::Body
|
231
|
+
parser.insertion_mode = AfterBody if parser.element_in_scope?(DEFAULT_SCOPE, Tags::Body)
|
232
|
+
when Tags::Html
|
233
|
+
halt true unless parser.element_in_scope?(DEFAULT_SCOPE, Tags::Body)
|
234
|
+
parser.parse_implied_token(Tokenizer::EndTagToken, Tags::Body, Tags::Body.to_s)
|
235
|
+
halt false
|
236
|
+
when Tags::Address, Tags::Article, Tags::Aside, Tags::Blockquote,
|
237
|
+
Tags::Button, Tags::Center, Tags::Dialog, Tags::Details, Tags::Dir,
|
238
|
+
Tags::Div, Tags::Dl, Tags::Fieldset, Tags::Figcaption, Tags::Figure,
|
239
|
+
Tags::Footer, Tags::Header, Tags::Hgroup, Tags::Listing, Tags::Main,
|
240
|
+
Tags::Menu, Tags::Nav, Tags::Ol, Tags::Pre, Tags::Section,
|
241
|
+
Tags::Summary, Tags::Ul
|
242
|
+
parser.pop_until(DEFAULT_SCOPE, token.tag)
|
243
|
+
when Tags::Form
|
244
|
+
if parser.open_elements.any? { |oe| oe.tag == Tags::Template }
|
245
|
+
index = parser.index_of_element_in_scope(DEFAULT_SCOPE, Tags::Form)
|
246
|
+
# ignore the token.
|
247
|
+
halt true if index == -1
|
248
|
+
parser.generate_implied_end_tags
|
249
|
+
# ignore the token.
|
250
|
+
halt true if parser.open_elements[index].tag != Tags::Form
|
251
|
+
parser.pop_until(DEFAULT_SCOPE, Tags::Form)
|
252
|
+
else
|
253
|
+
node = parser.form
|
254
|
+
parser.form = nil
|
255
|
+
index = parser.index_of_element_in_scope(DEFAULT_SCOPE, Tags::Form)
|
256
|
+
# ignore the token.
|
257
|
+
halt true if node == nil || index == -1 || parser.open_elements[index] != node
|
258
|
+
parser.generate_implied_end_tags
|
259
|
+
parser.open_elements.delete(node)
|
260
|
+
end
|
261
|
+
when Tags::P
|
262
|
+
parser.parse_implied_token(Tokenizer::StartTagToken, Tags::P, Tags::P.to_s) unless parser.element_in_scope?(BUTTON_SCOPE, Tags::P)
|
263
|
+
parser.pop_until(BUTTON_SCOPE, Tags::P)
|
264
|
+
when Tags::Li
|
265
|
+
parser.pop_until(LIST_ITEM_SCOPE, Tags::Li)
|
266
|
+
when Tags::Dd, Tags::Dt
|
267
|
+
parser.pop_until(DEFAULT_SCOPE, token.tag)
|
268
|
+
when Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6
|
269
|
+
parser.pop_until(DEFAULT_SCOPE, Tags::H1, Tags::H2, Tags::H3, Tags::H4, Tags::H5, Tags::H6)
|
270
|
+
when Tags::A, Tags::B, Tags::Big, Tags::Code, Tags::Em, Tags::Font,
|
271
|
+
Tags::I, Tags::Nobr, Tags::S, Tags::Small, Tags::Strike,
|
272
|
+
Tags::Strong, Tags::Tt, Tags::U
|
273
|
+
adoption_agency_for_end_tag_formatting(token.tag, token.data)
|
274
|
+
when Tags::Applet, Tags::Marquee, Tags::Object
|
275
|
+
parser.clear_active_formatting_elements if parser.pop_until(DEFAULT_SCOPE, token.tag)
|
276
|
+
when Tags::Br
|
277
|
+
# FIXME
|
278
|
+
parser.token = Tokenizer::StartTagToken.new(token.data, tag: token.tag)
|
279
|
+
halt false
|
280
|
+
when Tags::Template
|
281
|
+
halt InHead.new(parser).process
|
282
|
+
else
|
283
|
+
adoption_agency_for_end_tag_formatting(token.tag, token.data)
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
def comment_token(token)
|
288
|
+
parser.add_child Node::Comment.new(data: token.data)
|
289
|
+
end
|
290
|
+
|
291
|
+
def error_token(token)
|
292
|
+
if parser.template_stack.length > 0
|
293
|
+
parser.insertion_mode = InTemplate
|
294
|
+
halt false
|
295
|
+
else
|
296
|
+
parser.open_elements.any? do |oe|
|
297
|
+
case oe.tag
|
298
|
+
when Tags::Dd, Tags::Dt, Tags::Li, Tags::Optgroup, Tags::Option, Tags::P,
|
299
|
+
Tags::Rb, Tags::Rp, Tags::Rt, Tags::Rtc, Tags::Tbody, Tags::Td, Tags::Tfoot,
|
300
|
+
Tags::Th, Tags::Thead, Tags::Tr, Tags::Body, Tags::Html
|
301
|
+
else
|
302
|
+
halt true
|
303
|
+
end
|
304
|
+
end
|
305
|
+
halt true
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def default(_)
|
310
|
+
halt true
|
311
|
+
end
|
312
|
+
|
313
|
+
# Implements "adoption agency" algorithm.
|
314
|
+
# https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
|
315
|
+
# @!visibility private
|
316
|
+
def adoption_agency_for_end_tag_formatting(tag, tagname)
|
317
|
+
# Step 1-2.
|
318
|
+
current = parser.open_elements.last
|
319
|
+
if current.data == tagname && parser.active_formatting_elements.index(current) == -1
|
320
|
+
parser.open_elements.pop
|
321
|
+
return
|
322
|
+
end
|
323
|
+
|
324
|
+
# Step 3-5. The outer loop
|
325
|
+
8.times do |n|
|
326
|
+
# Step 6: Find the formatting element.
|
327
|
+
formatting_element = nil
|
328
|
+
parser.active_formatting_elements.reverse_each do |afe|
|
329
|
+
break if afe.instance_of? Node::ScopeMarker
|
330
|
+
if afe.tag == tag
|
331
|
+
formatting_element = afe
|
332
|
+
break
|
333
|
+
end
|
334
|
+
end
|
335
|
+
unless formatting_element
|
336
|
+
adoption_agency_for_end_tag_other(tag, tagname)
|
337
|
+
return
|
338
|
+
end
|
339
|
+
# Step 7. Ignore the tag if formatting element is not in the stack of
|
340
|
+
# open elements.
|
341
|
+
index = parser.open_elements.index(formatting_element)
|
342
|
+
unless index
|
343
|
+
parser.active_formatting_elements.delete(formatting_element)
|
344
|
+
return
|
345
|
+
end
|
346
|
+
# Step 8. Ignore the tag if formatting element is not in the scope.
|
347
|
+
return unless parser.element_in_scope?(DEFAULT_SCOPE, tag)
|
348
|
+
|
349
|
+
# Step 9. This step is omitted because it's just a parse error but no
|
350
|
+
# need to return.
|
351
|
+
|
352
|
+
# Step 10-11. Find the furthest block.
|
353
|
+
furthest_block = parser.open_elements.slice(index..-1).find(&parser.method(:special_element?))
|
354
|
+
unless furthest_block
|
355
|
+
element = parser.open_elements.pop
|
356
|
+
element = parser.open_elements.pop while element != formatting_element
|
357
|
+
parser.active_formatting_elements.delete(element)
|
358
|
+
return
|
359
|
+
end
|
360
|
+
|
361
|
+
# Step 12-13. Find the common ancestor and bookmark node.
|
362
|
+
common_ancestor = parser.open_elements[index - 1]
|
363
|
+
bookmark = parser.active_formatting_elements.index(formatting_element)
|
364
|
+
|
365
|
+
# Step 14. The inner loop. find the last node to reparent.
|
366
|
+
last_node = furthest_block
|
367
|
+
node = furthest_block
|
368
|
+
x = parser.open_elements.index(node)
|
369
|
+
# Step 14.1.
|
370
|
+
j = 0
|
371
|
+
loop do
|
372
|
+
# Step 14.2.
|
373
|
+
j += 1
|
374
|
+
# Step 14.3.
|
375
|
+
x -= 1
|
376
|
+
node = parser.open_elements[x]
|
377
|
+
# Step 14.4.
|
378
|
+
break if node == formatting_element
|
379
|
+
|
380
|
+
# Step 14.5. Remove node from the list of active formatting elements if
|
381
|
+
# inner loop counter is greater than three and node is in the list of
|
382
|
+
# active formatting elements.
|
383
|
+
ni = parser.active_formatting_elements.index(node)
|
384
|
+
if ni && j > 3
|
385
|
+
parser.active_formatting_elements.delete(node)
|
386
|
+
# If any element of the list of active formatting elements is removed,
|
387
|
+
# we need to take care whether bookmark should be decremented or not.
|
388
|
+
# This is because the value of bookmark may exceed the size of the
|
389
|
+
# list by removing elements from the list.
|
390
|
+
bookmark -= 1 if ni <= bookmark
|
391
|
+
next
|
392
|
+
end
|
393
|
+
# Step 14.6. Continue the next inner loop if node is not in the list of
|
394
|
+
# active formatting elements.
|
395
|
+
unless parser.active_formatting_elements.include?(node)
|
396
|
+
parser.open_elements.delete(node)
|
397
|
+
next
|
398
|
+
end
|
399
|
+
# Step 14.7
|
400
|
+
clone = node.clone
|
401
|
+
afei = parser.active_formatting_elements.index(node)
|
402
|
+
oei = parser.open_elements.index(node)
|
403
|
+
raise ParseError, 'bad parser state: expected elements are not found' if !(afei && oei)
|
404
|
+
parser.active_formatting_elements[afei] = clone
|
405
|
+
parser.open_elements[oei] = clone
|
406
|
+
node = clone
|
407
|
+
# Step 14.8
|
408
|
+
bookmark = (parser.active_formatting_elements.index(node) + 1) || 0 if last_node == furthest_block
|
409
|
+
# Step 14.9
|
410
|
+
last_node.parent.remove_child(last_node) if last_node.parent
|
411
|
+
node.append_child(last_node)
|
412
|
+
# Step 14.10
|
413
|
+
last_node = node
|
414
|
+
end
|
415
|
+
# Step 15. Reparent last_node to the common ancestor,
|
416
|
+
# or for misnested table nodes, to the foster parent.
|
417
|
+
last_node.parent.remove_child(last_node) if last_node.parent
|
418
|
+
case common_ancestor.tag
|
419
|
+
when Tags::Table, Tags::Tbody, Tags::Tfoot, Tags::Thead, Tags::Tr
|
420
|
+
parser.foster_parent(last_node)
|
421
|
+
else
|
422
|
+
common_ancestor.append_child(last_node)
|
423
|
+
end
|
424
|
+
|
425
|
+
# Steps 16-18. Reparent nodes from the furthest block's children
|
426
|
+
# to a clone of the formatting element.
|
427
|
+
clone = formatting_element.clone
|
428
|
+
reparent_children(clone, furthest_block)
|
429
|
+
furthest_block.append_child(clone)
|
430
|
+
|
431
|
+
# Step 19. Fix up the list of active formatting elements.
|
432
|
+
old_loc = parser.active_formatting_elements.index(formatting_element)
|
433
|
+
bookmark -= 1 if old_loc && old_loc < bookmark
|
434
|
+
parser.active_formatting_elements.delete(formatting_element)
|
435
|
+
parser.active_formatting_elements.insert(bookmark, clone)
|
436
|
+
|
437
|
+
# Step 20. Fix up the stack of open elements.
|
438
|
+
parser.open_elements.delete(formatting_element)
|
439
|
+
parser.open_elements.insert(parser.open_elements.index(furthest_block) + 1, clone)
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
# @!visibility private
|
444
|
+
def adoption_agency_for_end_tag_other(tag, tagname)
|
445
|
+
parser.open_elements.reverse_each_with_index do |open_element, index|
|
446
|
+
if open_element.tag == tag && open_element.data == tagname
|
447
|
+
parser.open_elements = parser.open_elements.slice(0, index)
|
448
|
+
break
|
449
|
+
end
|
450
|
+
break if parser.special_element?(open_element)
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
# @!visibility private
|
455
|
+
def reparent_children(dst, src)
|
456
|
+
while child = src.first_child
|
457
|
+
src.remove_child(child)
|
458
|
+
dst.append_child(child)
|
459
|
+
end
|
460
|
+
end
|
461
|
+
end
|
462
|
+
end
|
463
|
+
end
|