gammo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.travis.yml +6 -0
  4. data/Gemfile +9 -0
  5. data/Gemfile.lock +27 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +177 -0
  8. data/Rakefile +25 -0
  9. data/gammo.gemspec +23 -0
  10. data/lib/gammo.rb +15 -0
  11. data/lib/gammo/attribute.rb +17 -0
  12. data/lib/gammo/fragment_parser.rb +65 -0
  13. data/lib/gammo/node.rb +157 -0
  14. data/lib/gammo/parser.rb +524 -0
  15. data/lib/gammo/parser/constants.rb +94 -0
  16. data/lib/gammo/parser/foreign.rb +307 -0
  17. data/lib/gammo/parser/insertion_mode.rb +74 -0
  18. data/lib/gammo/parser/insertion_mode/after_after_body.rb +36 -0
  19. data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +32 -0
  20. data/lib/gammo/parser/insertion_mode/after_body.rb +46 -0
  21. data/lib/gammo/parser/insertion_mode/after_frameset.rb +39 -0
  22. data/lib/gammo/parser/insertion_mode/after_head.rb +70 -0
  23. data/lib/gammo/parser/insertion_mode/before_head.rb +49 -0
  24. data/lib/gammo/parser/insertion_mode/before_html.rb +45 -0
  25. data/lib/gammo/parser/insertion_mode/in_body.rb +463 -0
  26. data/lib/gammo/parser/insertion_mode/in_caption.rb +47 -0
  27. data/lib/gammo/parser/insertion_mode/in_cell.rb +46 -0
  28. data/lib/gammo/parser/insertion_mode/in_column_group.rb +66 -0
  29. data/lib/gammo/parser/insertion_mode/in_frameset.rb +48 -0
  30. data/lib/gammo/parser/insertion_mode/in_head.rb +98 -0
  31. data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +52 -0
  32. data/lib/gammo/parser/insertion_mode/in_row.rb +53 -0
  33. data/lib/gammo/parser/insertion_mode/in_select.rb +77 -0
  34. data/lib/gammo/parser/insertion_mode/in_select_in_table.rb +46 -0
  35. data/lib/gammo/parser/insertion_mode/in_table.rb +114 -0
  36. data/lib/gammo/parser/insertion_mode/in_table_body.rb +55 -0
  37. data/lib/gammo/parser/insertion_mode/in_template.rb +80 -0
  38. data/lib/gammo/parser/insertion_mode/initial.rb +152 -0
  39. data/lib/gammo/parser/insertion_mode/text.rb +32 -0
  40. data/lib/gammo/parser/insertion_mode_stack.rb +8 -0
  41. data/lib/gammo/parser/node_stack.rb +24 -0
  42. data/lib/gammo/tags.rb +9 -0
  43. data/lib/gammo/tags/table.rb +744 -0
  44. data/lib/gammo/tokenizer.rb +373 -0
  45. data/lib/gammo/tokenizer/debug.rb +34 -0
  46. data/lib/gammo/tokenizer/entity.rb +2240 -0
  47. data/lib/gammo/tokenizer/escape.rb +174 -0
  48. data/lib/gammo/tokenizer/script_scanner.rb +229 -0
  49. data/lib/gammo/tokenizer/tokens.rb +66 -0
  50. data/lib/gammo/version.rb +3 -0
  51. data/misc/html.yaml +384 -0
  52. data/misc/table.erubi +14 -0
  53. metadata +97 -0
@@ -0,0 +1,47 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.10.
4
+ class InCaption < InsertionMode
5
+ def start_tag_token(token)
6
+ case token.tag
7
+ when Tags::Caption, Tags::Col, Tags::Colgroup, Tags::Tbody, Tags::Td, Tags::Tfoot, Tags::Thead, Tags::Tr
8
+ # ignore the token
9
+ halt true unless parser.pop_until(TABLE_SCOPE, Tags::Caption)
10
+ parser.clear_active_formatting_elements
11
+ parser.insertion_mode = InTable
12
+ halt false
13
+ when Tags::Select
14
+ parser.reconstruct_active_formatting_elements
15
+ parser.add_element
16
+ parser.frameset_ok = false
17
+ parser.insertion_mode = InSelectInTable
18
+ halt true
19
+ end
20
+ end
21
+
22
+ def end_tag_token(token)
23
+ case token.tag
24
+ when Tags::Caption
25
+ if parser.pop_until(TABLE_SCOPE, Tags::Caption)
26
+ parser.clear_active_formatting_elements
27
+ parser.insertion_mode = InTable
28
+ end
29
+ halt true
30
+ when Tags::Table
31
+ # ignore the token
32
+ halt true unless parser.pop_until(TABLE_SCOPE, Tags::Caption)
33
+ parser.clear_active_formatting_elements
34
+ parser.insertion_mode = InTable
35
+ halt false
36
+ when Tags::Body, Tags::Col, Tags::Colgroup, Tags::Html, Tags::Tbody, Tags::Td, Tags::Tfoot, Tags::Th, Tags::Thead, Tags::Tr
37
+ # ignore the token
38
+ halt true
39
+ end
40
+ end
41
+
42
+ def default(_)
43
+ halt InBody.new(parser).process
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,46 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.15.
4
+ class InCell < InsertionMode
5
+ def start_tag_token(token)
6
+ case token.tag
7
+ when Tags::Caption, Tags::Col, Tags::Colgroup, Tags::Tbody, Tags::Td, Tags::Tfoot, Tags::Th, Tags::Thead, Tags::Tr
8
+ halt true unless parser.pop_until(TABLE_SCOPE, Tags::Td, Tags::Th)
9
+ parser.clear_active_formatting_elements
10
+ parser.insertion_mode = InRow
11
+ halt false
12
+ when Tags::Select
13
+ parser.reconstruct_active_formatting_elements
14
+ parser.add_element
15
+ parser.frameset_ok = false
16
+ parser.insertion_mode = InSelectInTable
17
+ halt true
18
+ end
19
+ end
20
+
21
+ def end_tag_token(token)
22
+ case token.tag
23
+ when Tags::Td, Tags::Th
24
+ # ignore the token
25
+ halt true unless parser.pop_until(TABLE_SCOPE, token.tag)
26
+ parser.clear_active_formatting_elements
27
+ parser.insertion_mode = InRow
28
+ halt true
29
+ when Tags::Body, Tags::Caption, Tags::Col, Tags::Colgroup, Tags::Html
30
+ # ignore the token
31
+ halt true
32
+ when Tags::Table, Tags::Tbody, Tags::Tfoot, Tags::Thead, Tags::Tr
33
+ # ignore the token
34
+ halt true unless parser.element_in_scope?(TABLE_SCOPE, token.tag)
35
+ parser.clear_active_formatting_elements if parser.pop_until(TABLE_SCOPE, Tags::Td, Tags::Th)
36
+ parser.insertion_mode = InRow
37
+ halt false
38
+ end
39
+ end
40
+
41
+ def default(_)
42
+ halt InBody.new(parser).process
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,66 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.12.
4
+ class InColumnGroup < InsertionMode
5
+ def text_token(token)
6
+ s = token.data.lstrip
7
+ if s.length < token.data.length
8
+ # add the initial whitespace to the current node.
9
+ parser.add_text token.data.slice(0, token.data.length - s.length)
10
+ halt true if s == ''
11
+ token.data = s
12
+ end
13
+ end
14
+
15
+ def comment_token(token)
16
+ parser.add_child(Node::Comment.new(data: token.data))
17
+ halt true
18
+ end
19
+
20
+ def doctype_token(_)
21
+ halt true
22
+ end
23
+
24
+ def start_tag_token(token)
25
+ case token.tag
26
+ when Tags::Html
27
+ halt InBody.new(parser).process
28
+ when Tags::Col
29
+ parser.add_element
30
+ parser.open_elements.pop
31
+ parser.acknowledge_self_closing_tag
32
+ halt true
33
+ when Tags::Template
34
+ halt InHead.new(parser).process
35
+ end
36
+ end
37
+
38
+ def end_tag_token(token)
39
+ case token.tag
40
+ when Tags::Colgroup
41
+ if parser.top.tag == Tags::Colgroup
42
+ parser.open_elements.pop
43
+ parser.insertion_mode = InTable
44
+ end
45
+ halt true
46
+ when Tags::Col
47
+ # ignore the token
48
+ halt true
49
+ when Tags::Template
50
+ halt InHead.new(parser).process
51
+ end
52
+ end
53
+
54
+ def error_token(_)
55
+ halt InBody.new(parser).process
56
+ end
57
+
58
+ def default(_)
59
+ halt true if parser.top.tag != Tags::Colgroup
60
+ parser.open_elements.pop
61
+ parser.insertion_mode = InTable
62
+ halt false
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,48 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.20.
4
+ class InFrameset < InsertionMode
5
+ def comment_token(token)
6
+ parser.add_child Node::Comment.new(data: token.data)
7
+ end
8
+
9
+ def text_token(token)
10
+ text = token.data.each_char.with_object(String.new) { |c, s| s << c if c == ?\s }
11
+ parser.add_text(text) if text != ''
12
+ end
13
+
14
+ def start_tag_token(token)
15
+ case token.tag
16
+ when Tags::Html
17
+ halt InBody.new(parser).process
18
+ when Tags::Frameset
19
+ parser.add_element
20
+ when Tags::Frame
21
+ parser.add_element
22
+ parser.open_elements.pop
23
+ parser.acknowledge_self_closing_tag
24
+ when Tags::Noframes
25
+ halt InHead.new(parser).process
26
+ end
27
+ end
28
+
29
+ def end_tag_token(token)
30
+ case token.tag
31
+ when Tags::Frameset
32
+ if parser.open_elements.last.tag != Tags::Html
33
+ parser.open_elements.pop
34
+ if parser.open_elements.last.tag != Tags::Frameset
35
+ parser.insertion_mode = AfterFrameset
36
+ halt true
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ def default(_)
43
+ # ignore the token
44
+ halt true
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,98 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.4.
4
+ class InHead < InsertionMode
5
+ def text_token(token)
6
+ s = token.data.lstrip
7
+ if s.length < token.data.length
8
+ # add the initial whitespace to the current node.
9
+ parser.add_text token.data.slice(0, token.data.length - s.length)
10
+ halt true if s == ''
11
+ token.data = s
12
+ end
13
+ end
14
+
15
+ def start_tag_token(token)
16
+ case token.tag
17
+ when Tags::Html
18
+ halt InBody.new(parser).process
19
+ when Tags::Base, Tags::Basefont, Tags::Bgsound, Tags::Link, Tags::Meta
20
+ parser.add_element
21
+ parser.open_elements.pop
22
+ parser.acknowledge_self_closing_tag
23
+ halt true
24
+ when Tags::Noscript
25
+ if parser.scripting?
26
+ parser.parse_generic_raw_text_element
27
+ halt true
28
+ end
29
+ parser.add_element
30
+ parser.insertion_mode = InHeadNoscript
31
+ parser.tokenizer.next_is_not_raw_text!
32
+ halt true
33
+ when Tags::Script, Tags::Title
34
+ parser.add_element
35
+ parser.set_original_insertion_mode
36
+ parser.insertion_mode = Text
37
+ halt true
38
+ when Tags::Noframes, Tags::Style
39
+ parser.parse_generic_raw_text_element
40
+ halt true
41
+ when Tags::Head
42
+ # ignore the token
43
+ halt true
44
+ when Tags::Template
45
+ parser.add_element
46
+ parser.active_formatting_elements << Node::DEFAULT_SCOPE_MARKER
47
+ parser.frameset_ok = false
48
+ parser.insertion_mode = InTemplate
49
+ parser.template_stack << InTemplate
50
+ halt true
51
+ end
52
+ end
53
+
54
+ def end_tag_token(token)
55
+ case token.tag
56
+ when Tags::Head
57
+ parser.open_elements.pop
58
+ parser.insertion_mode = AfterHead
59
+ halt true
60
+ when Tags::Body, Tags::Html, Tags::Br
61
+ parser.parse_implied_token(Tokenizer::EndTagToken, Tags::Head, Tags::Head.to_s)
62
+ halt false
63
+ when Tags::Template
64
+ halt true if !parser.open_elements.any? { |oe| oe.tag == Tags::Template }
65
+ # remove this divergence from the HTML5 spec.
66
+ parser.generate_implied_end_tags
67
+ parser.open_elements.reverse_each_with_index do |open_element, index|
68
+ if !open_element.namespace && open_element.tag == Tags::Template
69
+ parser.open_elements = parser.open_elements.slice(0, index)
70
+ break
71
+ end
72
+ end
73
+ parser.clear_active_formatting_elements
74
+ parser.template_stack.pop
75
+ parser.reset_insertion_mode
76
+ halt true
77
+ else
78
+ # ignore the token
79
+ halt true
80
+ end
81
+ end
82
+
83
+ def comment_token(token)
84
+ parser.add_child Node::Comment.new(data: token.data)
85
+ halt true
86
+ end
87
+
88
+ def doctype_token(token)
89
+ halt true
90
+ end
91
+
92
+ def default(_)
93
+ parser.parse_implied_token(Tokenizer::EndTagToken, Tags::Head, Tags::Head.to_s)
94
+ halt false
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,52 @@
1
+ module Gammo
2
+ class Parser
3
+ # 12.2.6.4.5.
4
+ class InHeadNoscript < InsertionMode
5
+ def doctype_token(_)
6
+ # ignore the token.
7
+ halt true
8
+ end
9
+
10
+ def comment_token(_)
11
+ halt InHead.new(parser).process
12
+ end
13
+
14
+ def start_tag_token(token)
15
+ case token.tag
16
+ when Tags::Html then halt InBody.new(parser).process
17
+ when Tags::Basefont, Tags::Bgsound, Tags::Link, Tags::Meta, Tags::Noframes, Tags::Style
18
+ halt InHead.new(parser).process
19
+ when Tags::Head, Tags::Noscript
20
+ # ignore the token.
21
+ halt true
22
+ end
23
+ end
24
+
25
+ def end_tag_token(token)
26
+ case token.tag
27
+ when Tags::Noscript
28
+ parser.open_elements.pop
29
+ parser.insertion_mode = InHead
30
+ halt true
31
+ when Tags::Br
32
+ # no-op
33
+ else
34
+ # ignore the token.
35
+ halt true
36
+ end
37
+ end
38
+
39
+ def text_token(token)
40
+ halt InHead.new(parser).process if token.data.lstrip == ''
41
+ end
42
+
43
+ def default(token)
44
+ parser.open_elements.pop
45
+ raise ParseError, 'the new current node will be a head element.'\
46
+ if parser.top.tag != Tags::Head
47
+ parser.insertion_mode = InHead
48
+ halt false
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,53 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.14.
4
+ class InRow < InsertionMode
5
+ def start_tag_token(token)
6
+ case token.tag
7
+ when Tags::Td, Tags::Th
8
+ parser.clear_stack_to_context(TABLE_ROW_SCOPE)
9
+ parser.add_element
10
+ parser.active_formatting_elements << Node::DEFAULT_SCOPE_MARKER
11
+ parser.insertion_mode = InCell
12
+ halt true
13
+ when Tags::Caption, Tags::Col, Tags::Colgroup, Tags::Tbody, Tags::Tfoot, Tags::Thead, Tags::Tr
14
+ # ignore the token
15
+ halt true unless parser.pop_until(TABLE_SCOPE, Tags::Tr)
16
+ parser.insertion_mode = InTableBody
17
+ halt false
18
+ end
19
+ end
20
+
21
+ def end_tag_token(token)
22
+ case token.tag
23
+ when Tags::Tr
24
+ # ignore the token
25
+ halt true unless parser.pop_until(TABLE_SCOPE, Tags::Tr)
26
+ parser.insertion_mode = InTableBody
27
+ halt true
28
+ when Tags::Table
29
+ if parser.pop_until(TABLE_SCOPE, Tags::Tr)
30
+ parser.insertion_mode = InTableBody
31
+ halt false
32
+ end
33
+ # ignore the token
34
+ halt true
35
+ when Tags::Tbody, Tags::Tfoot, Tags::Thead
36
+ if parser.element_in_scope?(TABLE_SCOPE, token.tag)
37
+ parser.parse_implied_token(Tokenizer::EndTagToken, Tags::Tr, Tags::Tr.to_s)
38
+ halt false
39
+ end
40
+ # ignore the token
41
+ halt true
42
+ when Tags::Body, Tags::Caption, Tags::Col, Tags::Colgroup, Tags::Html, Tags::Td, Tags::Th
43
+ # ignore the token
44
+ halt true
45
+ end
46
+ end
47
+
48
+ def default(_)
49
+ halt InTable.new(parser).process
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,77 @@
1
+ module Gammo
2
+ class Parser
3
+ # Section 12.2.6.4.16.
4
+ class InSelect < InsertionMode
5
+ def text_token(token)
6
+ parser.add_text token.data.gsub("\x00",'')
7
+ end
8
+
9
+ def start_tag_token(token)
10
+ case token.tag
11
+ when Tags::Html
12
+ halt InBody.new(parser).process
13
+ when Tags::Option
14
+ parser.open_elements.pop if parser.top.tag == Tags::Option
15
+ parser.add_element
16
+ when Tags::Optgroup
17
+ parser.open_elements.pop if parser.top.tag == Tags::Option
18
+ parser.open_elements.pop if parser.top.tag == Tags::Optgroup
19
+ parser.add_element
20
+ when Tags::Select
21
+ # ignore the token
22
+ halt true unless parser.pop_until(SELECT_SCOPE, Tags::Select)
23
+ parser.reset_insertion_mode
24
+ when Tags::Input, Tags::Keygen, Tags::Textarea
25
+ if parser.element_in_scope?(SELECT_SCOPE, Tags::Select)
26
+ parser.parse_implied_token(Tokenizer::EndTagToken, Tags::Select, Tags::Select.to_s)
27
+ halt false
28
+ end
29
+ parser.tokenizer.next_is_not_raw_text!
30
+ # ignore the token
31
+ halt true
32
+ when Tags::Script, Tags::Template
33
+ halt InHead.new(parser).process
34
+ end
35
+ end
36
+
37
+ def end_tag_token(token)
38
+ case token.tag
39
+ when Tags::Option
40
+ parser.open_elements.pop if parser.top.tag == Tags::Option
41
+ nil
42
+ when Tags::Optgroup
43
+ i = parser.open_elements.length - 1
44
+ i -= 1 if parser.open_elements[i].tag == Tags::Option
45
+ if parser.open_elements[i].tag == Tags::Optgroup
46
+ parser.open_elements = parser.open_elements.slice(0, i)
47
+ end
48
+ nil
49
+ when Tags::Select
50
+ # ignore the token
51
+ halt true unless parser.pop_until(SELECT_SCOPE, Tags::Select)
52
+ parser.reset_insertion_mode
53
+ nil
54
+ when Tags::Template
55
+ halt InHead.new(parser).process
56
+ end
57
+ end
58
+
59
+ def comment_token(token)
60
+ parser.add_child(Node::Comment.new(data: token.data))
61
+ end
62
+
63
+ def doctype_token(_)
64
+ # ignore the token.
65
+ halt true
66
+ end
67
+
68
+ def error_token(_)
69
+ halt InBody.new(parser).process
70
+ end
71
+
72
+ def default(_)
73
+ halt true
74
+ end
75
+ end
76
+ end
77
+ end