html5 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +58 -0
  3. data/README +9 -0
  4. data/Rakefile.rb +17 -0
  5. data/lib/html5/constants.rb +818 -0
  6. data/lib/html5/filters/base.rb +10 -0
  7. data/lib/html5/filters/inject_meta_charset.rb +82 -0
  8. data/lib/html5/filters/optionaltags.rb +198 -0
  9. data/lib/html5/filters/sanitizer.rb +15 -0
  10. data/lib/html5/filters/whitespace.rb +36 -0
  11. data/lib/html5/html5parser/after_body_phase.rb +46 -0
  12. data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
  13. data/lib/html5/html5parser/after_head_phase.rb +50 -0
  14. data/lib/html5/html5parser/before_head_phase.rb +41 -0
  15. data/lib/html5/html5parser/in_body_phase.rb +607 -0
  16. data/lib/html5/html5parser/in_caption_phase.rb +68 -0
  17. data/lib/html5/html5parser/in_cell_phase.rb +78 -0
  18. data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  19. data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  20. data/lib/html5/html5parser/in_head_phase.rb +138 -0
  21. data/lib/html5/html5parser/in_row_phase.rb +87 -0
  22. data/lib/html5/html5parser/in_select_phase.rb +84 -0
  23. data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
  24. data/lib/html5/html5parser/in_table_phase.rb +110 -0
  25. data/lib/html5/html5parser/initial_phase.rb +134 -0
  26. data/lib/html5/html5parser/phase.rb +158 -0
  27. data/lib/html5/html5parser/root_element_phase.rb +42 -0
  28. data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  29. data/lib/html5/html5parser.rb +248 -0
  30. data/lib/html5/inputstream.rb +654 -0
  31. data/lib/html5/liberalxmlparser.rb +158 -0
  32. data/lib/html5/sanitizer.rb +188 -0
  33. data/lib/html5/serializer/htmlserializer.rb +180 -0
  34. data/lib/html5/serializer/xhtmlserializer.rb +20 -0
  35. data/lib/html5/serializer.rb +2 -0
  36. data/lib/html5/tokenizer.rb +968 -0
  37. data/lib/html5/treebuilders/base.rb +334 -0
  38. data/lib/html5/treebuilders/hpricot.rb +231 -0
  39. data/lib/html5/treebuilders/rexml.rb +208 -0
  40. data/lib/html5/treebuilders/simpletree.rb +185 -0
  41. data/lib/html5/treebuilders.rb +24 -0
  42. data/lib/html5/treewalkers/base.rb +154 -0
  43. data/lib/html5/treewalkers/hpricot.rb +48 -0
  44. data/lib/html5/treewalkers/rexml.rb +48 -0
  45. data/lib/html5/treewalkers/simpletree.rb +48 -0
  46. data/lib/html5/treewalkers.rb +26 -0
  47. data/lib/html5.rb +13 -0
  48. data/parse.rb +217 -0
  49. data/tests/preamble.rb +82 -0
  50. data/tests/test_encoding.rb +35 -0
  51. data/tests/test_lxp.rb +263 -0
  52. data/tests/test_parser.rb +68 -0
  53. data/tests/test_sanitizer.rb +142 -0
  54. data/tests/test_serializer.rb +68 -0
  55. data/tests/test_stream.rb +62 -0
  56. data/tests/test_tokenizer.rb +94 -0
  57. data/tests/test_treewalkers.rb +116 -0
  58. data/tests/tokenizer_test_parser.rb +63 -0
  59. metadata +120 -0
@@ -0,0 +1,78 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InCellPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
7
+
8
+ handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableOther'
9
+
10
+ handle_end %w( td th ) => 'TableCell', %w( body caption col colgroup html ) => 'Ignore'
11
+
12
+ handle_end %w( table tbody tfoot thead tr ) => 'Imply'
13
+
14
+ def processCharacters(data)
15
+ @parser.phases[:inBody].processCharacters(data)
16
+ end
17
+
18
+ def startTagTableOther(name, attributes)
19
+ if in_scope?('td', true) or in_scope?('th', true)
20
+ closeCell
21
+ @parser.phase.processStartTag(name, attributes)
22
+ else
23
+ # inner_html case
24
+ parse_error
25
+ end
26
+ end
27
+
28
+ def startTagOther(name, attributes)
29
+ @parser.phases[:inBody].processStartTag(name, attributes)
30
+ end
31
+
32
+ def endTagTableCell(name)
33
+ if in_scope?(name, true)
34
+ @tree.generateImpliedEndTags(name)
35
+ if @tree.open_elements.last.name != name
36
+ parse_error("Got table cell end tag (#{name}) while required end tags are missing.")
37
+
38
+ remove_open_elements_until(name)
39
+ else
40
+ @tree.open_elements.pop
41
+ end
42
+ @tree.clearActiveFormattingElements
43
+ @parser.phase = @parser.phases[:inRow]
44
+ else
45
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
46
+ end
47
+ end
48
+
49
+ def endTagIgnore(name)
50
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
51
+ end
52
+
53
+ def endTagImply(name)
54
+ if in_scope?(name, true)
55
+ closeCell
56
+ @parser.phase.processEndTag(name)
57
+ else
58
+ # sometimes inner_html case
59
+ parse_error
60
+ end
61
+ end
62
+
63
+ def endTagOther(name)
64
+ @parser.phases[:inBody].processEndTag(name)
65
+ end
66
+
67
+ protected
68
+
69
+ def closeCell
70
+ if in_scope?('td', true)
71
+ endTagTableCell('td')
72
+ elsif in_scope?('th', true)
73
+ endTagTableCell('th')
74
+ end
75
+ end
76
+
77
+ end
78
+ end
@@ -0,0 +1,55 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InColumnGroupPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-column
7
+
8
+ handle_start 'html', 'col'
9
+
10
+ handle_end 'colgroup', 'col'
11
+
12
+ def ignoreEndTagColgroup
13
+ @tree.open_elements[-1].name == 'html'
14
+ end
15
+
16
+ def processCharacters(data)
17
+ ignoreEndTag = ignoreEndTagColgroup
18
+ endTagColgroup("colgroup")
19
+ @parser.phase.processCharacters(data) unless ignoreEndTag
20
+ end
21
+
22
+ def startTagCol(name, attributes)
23
+ @tree.insert_element(name, attributes)
24
+ @tree.open_elements.pop
25
+ end
26
+
27
+ def startTagOther(name, attributes)
28
+ ignoreEndTag = ignoreEndTagColgroup
29
+ endTagColgroup('colgroup')
30
+ @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
31
+ end
32
+
33
+ def endTagColgroup(name)
34
+ if ignoreEndTagColgroup
35
+ # inner_html case
36
+ assert @parser.inner_html
37
+ parse_error
38
+ else
39
+ @tree.open_elements.pop
40
+ @parser.phase = @parser.phases[:inTable]
41
+ end
42
+ end
43
+
44
+ def endTagCol(name)
45
+ parse_error(_('Unexpected end tag (col). col has no end tag.'))
46
+ end
47
+
48
+ def endTagOther(name)
49
+ ignoreEndTag = ignoreEndTagColgroup
50
+ endTagColgroup('colgroup')
51
+ @parser.phase.processEndTag(name) unless ignoreEndTag
52
+ end
53
+
54
+ end
55
+ end
@@ -0,0 +1,57 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InFramesetPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
7
+
8
+ handle_start 'html', 'frameset', 'frame', 'noframes'
9
+
10
+ handle_end 'frameset', 'noframes'
11
+
12
+ def processCharacters(data)
13
+ parse_error(_('Unexpected characters in the frameset phase. Characters ignored.'))
14
+ end
15
+
16
+ def startTagFrameset(name, attributes)
17
+ @tree.insert_element(name, attributes)
18
+ end
19
+
20
+ def startTagFrame(name, attributes)
21
+ @tree.insert_element(name, attributes)
22
+ @tree.open_elements.pop
23
+ end
24
+
25
+ def startTagNoframes(name, attributes)
26
+ @parser.phases[:inBody].processStartTag(name, attributes)
27
+ end
28
+
29
+ def startTagOther(name, attributes)
30
+ parse_error(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
31
+ end
32
+
33
+ def endTagFrameset(name)
34
+ if @tree.open_elements.last.name == 'html'
35
+ # inner_html case
36
+ parse_error(_("Unexpected end tag token (frameset) in the frameset phase (inner_html)."))
37
+ else
38
+ @tree.open_elements.pop
39
+ end
40
+ if (not @parser.inner_html and
41
+ @tree.open_elements.last.name != 'frameset')
42
+ # If we're not in inner_html mode and the the current node is not a
43
+ # "frameset" element (anymore) then switch.
44
+ @parser.phase = @parser.phases[:afterFrameset]
45
+ end
46
+ end
47
+
48
+ def endTagNoframes(name)
49
+ @parser.phases[:inBody].processEndTag(name)
50
+ end
51
+
52
+ def endTagOther(name)
53
+ parse_error(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,138 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InHeadPhase < Phase
5
+
6
+ handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
7
+ handle_start %w( base link meta )
8
+
9
+ handle_end 'head'
10
+ handle_end %w( html body br p ) => 'ImplyAfterHead'
11
+ handle_end %w( title style script noscript )
12
+
13
+ def process_eof
14
+ if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
15
+ parse_error(_("Unexpected end of file. Expected end tag (#{name})."))
16
+ @tree.open_elements.pop
17
+ end
18
+ anythingElse
19
+ @parser.phase.process_eof
20
+ end
21
+
22
+ def processCharacters(data)
23
+ if %w[title style script noscript].include?(@tree.open_elements.last.name)
24
+ @tree.insertText(data)
25
+ else
26
+ anythingElse
27
+ @parser.phase.processCharacters(data)
28
+ end
29
+ end
30
+
31
+ def startTagHead(name, attributes)
32
+ parse_error(_('Unexpected start tag head in existing head. Ignored'))
33
+ end
34
+
35
+ def startTagTitle(name, attributes)
36
+ element = @tree.createElement(name, attributes)
37
+ appendToHead(element)
38
+ @tree.open_elements.push(element)
39
+ @parser.tokenizer.content_model_flag = :RCDATA
40
+ end
41
+
42
+ def startTagStyle(name, attributes)
43
+ element = @tree.createElement(name, attributes)
44
+ if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
45
+ appendToHead(element)
46
+ else
47
+ @tree.open_elements.last.appendChild(element)
48
+ end
49
+ @tree.open_elements.push(element)
50
+ @parser.tokenizer.content_model_flag = :CDATA
51
+ end
52
+
53
+ def startTagNoscript(name, attributes)
54
+ # XXX Need to decide whether to implement the scripting disabled case.
55
+ element = @tree.createElement(name, attributes)
56
+ if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
57
+ appendToHead(element)
58
+ else
59
+ @tree.open_elements.last.appendChild(element)
60
+ end
61
+ @tree.open_elements.push(element)
62
+ @parser.tokenizer.content_model_flag = :CDATA
63
+ end
64
+
65
+ def startTagScript(name, attributes)
66
+ #XXX Inner HTML case may be wrong
67
+ element = @tree.createElement(name, attributes)
68
+ element._flags.push("parser-inserted")
69
+ if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
70
+ appendToHead(element)
71
+ else
72
+ @tree.open_elements.last.appendChild(element)
73
+ end
74
+ @tree.open_elements.push(element)
75
+ @parser.tokenizer.content_model_flag = :CDATA
76
+ end
77
+
78
+ def startTagBaseLinkMeta(name, attributes)
79
+ element = @tree.createElement(name, attributes)
80
+ if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
81
+ appendToHead(element)
82
+ else
83
+ @tree.open_elements.last.appendChild(element)
84
+ end
85
+ end
86
+
87
+ def startTagOther(name, attributes)
88
+ anythingElse
89
+ @parser.phase.processStartTag(name, attributes)
90
+ end
91
+
92
+ def endTagHead(name)
93
+ if @tree.open_elements.last.name == 'head'
94
+ @tree.open_elements.pop
95
+ else
96
+ parse_error(_("Unexpected end tag (head). Ignored."))
97
+ end
98
+ @parser.phase = @parser.phases[:afterHead]
99
+ end
100
+
101
+ def endTagImplyAfterHead(name)
102
+ anythingElse
103
+ @parser.phase.processEndTag(name)
104
+ end
105
+
106
+ def endTagTitleStyleScriptNoscript(name)
107
+ if @tree.open_elements.last.name == name
108
+ @tree.open_elements.pop
109
+ else
110
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
111
+ end
112
+ end
113
+
114
+ def endTagOther(name)
115
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
116
+ end
117
+
118
+ def anythingElse
119
+ if @tree.open_elements.last.name == 'head'
120
+ endTagHead('head')
121
+ else
122
+ @parser.phase = @parser.phases[:afterHead]
123
+ end
124
+ end
125
+
126
+ protected
127
+
128
+ def appendToHead(element)
129
+ if @tree.head_pointer.nil?
130
+ assert @parser.inner_html
131
+ @tree.open_elements.last.appendChild(element)
132
+ else
133
+ @tree.head_pointer.appendChild(element)
134
+ end
135
+ end
136
+
137
+ end
138
+ end
@@ -0,0 +1,87 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InRowPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-row
7
+
8
+ handle_start 'html', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead tr ) => 'TableOther'
9
+
10
+ handle_end 'tr', 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th ) => 'Ignore'
11
+
12
+ def processCharacters(data)
13
+ @parser.phases[:inTable].processCharacters(data)
14
+ end
15
+
16
+ def startTagTableCell(name, attributes)
17
+ clearStackToTableRowContext
18
+ @tree.insert_element(name, attributes)
19
+ @parser.phase = @parser.phases[:inCell]
20
+ @tree.activeFormattingElements.push(Marker)
21
+ end
22
+
23
+ def startTagTableOther(name, attributes)
24
+ ignoreEndTag = ignoreEndTagTr
25
+ endTagTr('tr')
26
+ # XXX how are we sure it's always ignored in the inner_html case?
27
+ @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
28
+ end
29
+
30
+ def startTagOther(name, attributes)
31
+ @parser.phases[:inTable].processStartTag(name, attributes)
32
+ end
33
+
34
+ def endTagTr(name)
35
+ if ignoreEndTagTr
36
+ # inner_html case
37
+ assert @parser.inner_html
38
+ parse_error
39
+ else
40
+ clearStackToTableRowContext
41
+ @tree.open_elements.pop
42
+ @parser.phase = @parser.phases[:inTableBody]
43
+ end
44
+ end
45
+
46
+ def endTagTable(name)
47
+ ignoreEndTag = ignoreEndTagTr
48
+ endTagTr('tr')
49
+ # Reprocess the current tag if the tr end tag was not ignored
50
+ # XXX how are we sure it's always ignored in the inner_html case?
51
+ @parser.phase.processEndTag(name) unless ignoreEndTag
52
+ end
53
+
54
+ def endTagTableRowGroup(name)
55
+ if in_scope?(name, true)
56
+ endTagTr('tr')
57
+ @parser.phase.processEndTag(name)
58
+ else
59
+ # inner_html case
60
+ parse_error
61
+ end
62
+ end
63
+
64
+ def endTagIgnore(name)
65
+ parse_error(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
66
+ end
67
+
68
+ def endTagOther(name)
69
+ @parser.phases[:inTable].processEndTag(name)
70
+ end
71
+
72
+ protected
73
+
74
+ # XXX unify this with other table helper methods
75
+ def clearStackToTableRowContext
76
+ until %w[tr html].include?(name = @tree.open_elements.last.name)
77
+ parse_error(_("Unexpected implied end tag (#{name}) in the row phase."))
78
+ @tree.open_elements.pop
79
+ end
80
+ end
81
+
82
+ def ignoreEndTagTr
83
+ not in_scope?('tr', :tableVariant => true)
84
+ end
85
+
86
+ end
87
+ end
@@ -0,0 +1,84 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InSelectPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-select
7
+
8
+ handle_start 'html', 'option', 'optgroup', 'select'
9
+
10
+ handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
11
+
12
+ def processCharacters(data)
13
+ @tree.insertText(data)
14
+ end
15
+
16
+ def startTagOption(name, attributes)
17
+ # We need to imply </option> if <option> is the current node.
18
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
19
+ @tree.insert_element(name, attributes)
20
+ end
21
+
22
+ def startTagOptgroup(name, attributes)
23
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
24
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
25
+ @tree.insert_element(name, attributes)
26
+ end
27
+
28
+ def startTagSelect(name, attributes)
29
+ parse_error(_('Unexpected start tag (select) in the select phase implies select start tag.'))
30
+ endTagSelect('select')
31
+ end
32
+
33
+ def startTagOther(name, attributes)
34
+ parse_error(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
35
+ end
36
+
37
+ def endTagOption(name)
38
+ if @tree.open_elements.last.name == 'option'
39
+ @tree.open_elements.pop
40
+ else
41
+ parse_error(_('Unexpected end tag (option) in the select phase. Ignored.'))
42
+ end
43
+ end
44
+
45
+ def endTagOptgroup(name)
46
+ # </optgroup> implicitly closes <option>
47
+ if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
48
+ @tree.open_elements.pop
49
+ end
50
+ # It also closes </optgroup>
51
+ if @tree.open_elements.last.name == 'optgroup'
52
+ @tree.open_elements.pop
53
+ # But nothing else
54
+ else
55
+ parse_error(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
56
+ end
57
+ end
58
+
59
+ def endTagSelect(name)
60
+ if in_scope?('select', true)
61
+ remove_open_elements_until('select')
62
+
63
+ @parser.reset_insertion_mode
64
+ else
65
+ # inner_html case
66
+ parse_error
67
+ end
68
+ end
69
+
70
+ def endTagTableElements(name)
71
+ parse_error(_("Unexpected table end tag (#{name}) in the select phase."))
72
+
73
+ if in_scope?(name, true)
74
+ endTagSelect('select')
75
+ @parser.phase.processEndTag(name)
76
+ end
77
+ end
78
+
79
+ def endTagOther(name)
80
+ parse_error(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,83 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InTableBodyPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
7
+
8
+ handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
9
+
10
+ handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ingore'
11
+
12
+ def processCharacters(data)
13
+ @parser.phases[:inTable].processCharacters(data)
14
+ end
15
+
16
+ def startTagTr(name, attributes)
17
+ clearStackToTableBodyContext
18
+ @tree.insert_element(name, attributes)
19
+ @parser.phase = @parser.phases[:inRow]
20
+ end
21
+
22
+ def startTagTableCell(name, attributes)
23
+ parse_error(_("Unexpected table cell start tag (#{name}) in the table body phase."))
24
+ startTagTr('tr', {})
25
+ @parser.phase.processStartTag(name, attributes)
26
+ end
27
+
28
+ def startTagTableOther(name, attributes)
29
+ # XXX AT Any ideas on how to share this with endTagTable?
30
+ if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
31
+ clearStackToTableBodyContext
32
+ endTagTableRowGroup(@tree.open_elements.last.name)
33
+ @parser.phase.processStartTag(name, attributes)
34
+ else
35
+ # inner_html case
36
+ parse_error
37
+ end
38
+ end
39
+
40
+ def startTagOther(name, attributes)
41
+ @parser.phases[:inTable].processStartTag(name, attributes)
42
+ end
43
+
44
+ def endTagTableRowGroup(name)
45
+ if in_scope?(name, true)
46
+ clearStackToTableBodyContext
47
+ @tree.open_elements.pop
48
+ @parser.phase = @parser.phases[:inTable]
49
+ else
50
+ parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
51
+ end
52
+ end
53
+
54
+ def endTagTable(name)
55
+ if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
56
+ clearStackToTableBodyContext
57
+ endTagTableRowGroup(@tree.open_elements.last.name)
58
+ @parser.phase.processEndTag(name)
59
+ else
60
+ # inner_html case
61
+ parse_error
62
+ end
63
+ end
64
+
65
+ def endTagIgnore(name)
66
+ parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
67
+ end
68
+
69
+ def endTagOther(name)
70
+ @parser.phases[:inTable].processEndTag(name)
71
+ end
72
+
73
+ protected
74
+
75
+ def clearStackToTableBodyContext
76
+ until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
77
+ parse_error(_("Unexpected implied end tag (#{name}) in the table body phase."))
78
+ @tree.open_elements.pop
79
+ end
80
+ end
81
+
82
+ end
83
+ end
@@ -0,0 +1,110 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InTablePhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-table
7
+
8
+ handle_start 'html', 'caption', 'colgroup', 'col', 'table'
9
+
10
+ handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
11
+
12
+ handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
13
+
14
+ def processCharacters(data)
15
+ parse_error(_("Unexpected non-space characters in table context caused voodoo mode."))
16
+ # Make all the special element rearranging voodoo kick in
17
+ @tree.insert_from_table = true
18
+ # Process the character in the "in body" mode
19
+ @parser.phases[:inBody].processCharacters(data)
20
+ @tree.insert_from_table = false
21
+ end
22
+
23
+ def startTagCaption(name, attributes)
24
+ clearStackToTableContext
25
+ @tree.activeFormattingElements.push(Marker)
26
+ @tree.insert_element(name, attributes)
27
+ @parser.phase = @parser.phases[:inCaption]
28
+ end
29
+
30
+ def startTagColgroup(name, attributes)
31
+ clearStackToTableContext
32
+ @tree.insert_element(name, attributes)
33
+ @parser.phase = @parser.phases[:inColumnGroup]
34
+ end
35
+
36
+ def startTagCol(name, attributes)
37
+ startTagColgroup('colgroup', {})
38
+ @parser.phase.processStartTag(name, attributes)
39
+ end
40
+
41
+ def startTagRowGroup(name, attributes)
42
+ clearStackToTableContext
43
+ @tree.insert_element(name, attributes)
44
+ @parser.phase = @parser.phases[:inTableBody]
45
+ end
46
+
47
+ def startTagImplyTbody(name, attributes)
48
+ startTagRowGroup('tbody', {})
49
+ @parser.phase.processStartTag(name, attributes)
50
+ end
51
+
52
+ def startTagTable(name, attributes)
53
+ parse_error(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
54
+ @parser.phase.processEndTag('table')
55
+ @parser.phase.processStartTag(name, attributes) unless @parser.inner_html
56
+ end
57
+
58
+ def startTagOther(name, attributes)
59
+ parse_error(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
60
+ # Make all the special element rearranging voodoo kick in
61
+ @tree.insert_from_table = true
62
+ # Process the start tag in the "in body" mode
63
+ @parser.phases[:inBody].processStartTag(name, attributes)
64
+ @tree.insert_from_table = false
65
+ end
66
+
67
+ def endTagTable(name)
68
+ if in_scope?('table', true)
69
+ @tree.generateImpliedEndTags
70
+
71
+ unless @tree.open_elements.last.name == 'table'
72
+ parse_error(_("Unexpected end tag (table). Expected end tag (#{@tree.open_elements.last.name})."))
73
+ end
74
+
75
+ remove_open_elements_until('table')
76
+
77
+ @parser.reset_insertion_mode
78
+ else
79
+ # inner_html case
80
+ assert @parser.inner_html
81
+ parse_error
82
+ end
83
+ end
84
+
85
+ def endTagIgnore(name)
86
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
87
+ end
88
+
89
+ def endTagOther(name)
90
+ parse_error(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
91
+ # Make all the special element rearranging voodoo kick in
92
+ @tree.insert_from_table = true
93
+ # Process the end tag in the "in body" mode
94
+ @parser.phases[:inBody].processEndTag(name)
95
+ @tree.insert_from_table = false
96
+ end
97
+
98
+ protected
99
+
100
+ def clearStackToTableContext
101
+ # "clear the stack back to a table context"
102
+ until %w[table html].include?(name = @tree.open_elements.last.name)
103
+ parse_error(_("Unexpected implied end tag (#{name}) in the table phase."))
104
+ @tree.open_elements.pop
105
+ end
106
+ # When the current node is <html> it's an inner_html case
107
+ end
108
+
109
+ end
110
+ end