html5 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +58 -0
  3. data/README +9 -0
  4. data/Rakefile.rb +17 -0
  5. data/lib/html5/constants.rb +818 -0
  6. data/lib/html5/filters/base.rb +10 -0
  7. data/lib/html5/filters/inject_meta_charset.rb +82 -0
  8. data/lib/html5/filters/optionaltags.rb +198 -0
  9. data/lib/html5/filters/sanitizer.rb +15 -0
  10. data/lib/html5/filters/whitespace.rb +36 -0
  11. data/lib/html5/html5parser/after_body_phase.rb +46 -0
  12. data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
  13. data/lib/html5/html5parser/after_head_phase.rb +50 -0
  14. data/lib/html5/html5parser/before_head_phase.rb +41 -0
  15. data/lib/html5/html5parser/in_body_phase.rb +607 -0
  16. data/lib/html5/html5parser/in_caption_phase.rb +68 -0
  17. data/lib/html5/html5parser/in_cell_phase.rb +78 -0
  18. data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  19. data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  20. data/lib/html5/html5parser/in_head_phase.rb +138 -0
  21. data/lib/html5/html5parser/in_row_phase.rb +87 -0
  22. data/lib/html5/html5parser/in_select_phase.rb +84 -0
  23. data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
  24. data/lib/html5/html5parser/in_table_phase.rb +110 -0
  25. data/lib/html5/html5parser/initial_phase.rb +134 -0
  26. data/lib/html5/html5parser/phase.rb +158 -0
  27. data/lib/html5/html5parser/root_element_phase.rb +42 -0
  28. data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  29. data/lib/html5/html5parser.rb +248 -0
  30. data/lib/html5/inputstream.rb +654 -0
  31. data/lib/html5/liberalxmlparser.rb +158 -0
  32. data/lib/html5/sanitizer.rb +188 -0
  33. data/lib/html5/serializer/htmlserializer.rb +180 -0
  34. data/lib/html5/serializer/xhtmlserializer.rb +20 -0
  35. data/lib/html5/serializer.rb +2 -0
  36. data/lib/html5/tokenizer.rb +968 -0
  37. data/lib/html5/treebuilders/base.rb +334 -0
  38. data/lib/html5/treebuilders/hpricot.rb +231 -0
  39. data/lib/html5/treebuilders/rexml.rb +208 -0
  40. data/lib/html5/treebuilders/simpletree.rb +185 -0
  41. data/lib/html5/treebuilders.rb +24 -0
  42. data/lib/html5/treewalkers/base.rb +154 -0
  43. data/lib/html5/treewalkers/hpricot.rb +48 -0
  44. data/lib/html5/treewalkers/rexml.rb +48 -0
  45. data/lib/html5/treewalkers/simpletree.rb +48 -0
  46. data/lib/html5/treewalkers.rb +26 -0
  47. data/lib/html5.rb +13 -0
  48. data/parse.rb +217 -0
  49. data/tests/preamble.rb +82 -0
  50. data/tests/test_encoding.rb +35 -0
  51. data/tests/test_lxp.rb +263 -0
  52. data/tests/test_parser.rb +68 -0
  53. data/tests/test_sanitizer.rb +142 -0
  54. data/tests/test_serializer.rb +68 -0
  55. data/tests/test_stream.rb +62 -0
  56. data/tests/test_tokenizer.rb +94 -0
  57. data/tests/test_treewalkers.rb +116 -0
  58. data/tests/tokenizer_test_parser.rb +63 -0
  59. metadata +120 -0
@@ -0,0 +1,78 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InCellPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
7
+
8
+ handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableOther'
9
+
10
+ handle_end %w( td th ) => 'TableCell', %w( body caption col colgroup html ) => 'Ignore'
11
+
12
+ handle_end %w( table tbody tfoot thead tr ) => 'Imply'
13
+
14
+ def processCharacters(data)
15
+ @parser.phases[:inBody].processCharacters(data)
16
+ end
17
+
18
+ def startTagTableOther(name, attributes)
19
+ if in_scope?('td', true) or in_scope?('th', true)
20
+ closeCell
21
+ @parser.phase.processStartTag(name, attributes)
22
+ else
23
+ # inner_html case
24
+ parse_error
25
+ end
26
+ end
27
+
28
+ def startTagOther(name, attributes)
29
+ @parser.phases[:inBody].processStartTag(name, attributes)
30
+ end
31
+
32
+ def endTagTableCell(name)
33
+ if in_scope?(name, true)
34
+ @tree.generateImpliedEndTags(name)
35
+ if @tree.open_elements.last.name != name
36
+ parse_error("Got table cell end tag (#{name}) while required end tags are missing.")
37
+
38
+ remove_open_elements_until(name)
39
+ else
40
+ @tree.open_elements.pop
41
+ end
42
+ @tree.clearActiveFormattingElements
43
+ @parser.phase = @parser.phases[:inRow]
44
+ else
45
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
46
+ end
47
+ end
48
+
49
+ def endTagIgnore(name)
50
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
51
+ end
52
+
53
+ def endTagImply(name)
54
+ if in_scope?(name, true)
55
+ closeCell
56
+ @parser.phase.processEndTag(name)
57
+ else
58
+ # sometimes inner_html case
59
+ parse_error
60
+ end
61
+ end
62
+
63
+ def endTagOther(name)
64
+ @parser.phases[:inBody].processEndTag(name)
65
+ end
66
+
67
+ protected
68
+
69
+ def closeCell
70
+ if in_scope?('td', true)
71
+ endTagTableCell('td')
72
+ elsif in_scope?('th', true)
73
+ endTagTableCell('th')
74
+ end
75
+ end
76
+
77
+ end
78
+ end
@@ -0,0 +1,55 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InColumnGroupPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-column
7
+
8
+ handle_start 'html', 'col'
9
+
10
+ handle_end 'colgroup', 'col'
11
+
12
+ def ignoreEndTagColgroup
13
+ @tree.open_elements[-1].name == 'html'
14
+ end
15
+
16
+ def processCharacters(data)
17
+ ignoreEndTag = ignoreEndTagColgroup
18
+ endTagColgroup("colgroup")
19
+ @parser.phase.processCharacters(data) unless ignoreEndTag
20
+ end
21
+
22
+ def startTagCol(name, attributes)
23
+ @tree.insert_element(name, attributes)
24
+ @tree.open_elements.pop
25
+ end
26
+
27
+ def startTagOther(name, attributes)
28
+ ignoreEndTag = ignoreEndTagColgroup
29
+ endTagColgroup('colgroup')
30
+ @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
31
+ end
32
+
33
+ def endTagColgroup(name)
34
+ if ignoreEndTagColgroup
35
+ # inner_html case
36
+ assert @parser.inner_html
37
+ parse_error
38
+ else
39
+ @tree.open_elements.pop
40
+ @parser.phase = @parser.phases[:inTable]
41
+ end
42
+ end
43
+
44
+ def endTagCol(name)
45
+ parse_error(_('Unexpected end tag (col). col has no end tag.'))
46
+ end
47
+
48
+ def endTagOther(name)
49
+ ignoreEndTag = ignoreEndTagColgroup
50
+ endTagColgroup('colgroup')
51
+ @parser.phase.processEndTag(name) unless ignoreEndTag
52
+ end
53
+
54
+ end
55
+ end
@@ -0,0 +1,57 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InFramesetPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
7
+
8
+ handle_start 'html', 'frameset', 'frame', 'noframes'
9
+
10
+ handle_end 'frameset', 'noframes'
11
+
12
+ def processCharacters(data)
13
+ parse_error(_('Unexpected characters in the frameset phase. Characters ignored.'))
14
+ end
15
+
16
+ def startTagFrameset(name, attributes)
17
+ @tree.insert_element(name, attributes)
18
+ end
19
+
20
+ def startTagFrame(name, attributes)
21
+ @tree.insert_element(name, attributes)
22
+ @tree.open_elements.pop
23
+ end
24
+
25
+ def startTagNoframes(name, attributes)
26
+ @parser.phases[:inBody].processStartTag(name, attributes)
27
+ end
28
+
29
+ def startTagOther(name, attributes)
30
+ parse_error(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
31
+ end
32
+
33
+ def endTagFrameset(name)
34
+ if @tree.open_elements.last.name == 'html'
35
+ # inner_html case
36
+ parse_error(_("Unexpected end tag token (frameset) in the frameset phase (inner_html)."))
37
+ else
38
+ @tree.open_elements.pop
39
+ end
40
+ if (not @parser.inner_html and
41
+ @tree.open_elements.last.name != 'frameset')
42
+ # If we're not in inner_html mode and the the current node is not a
43
+ # "frameset" element (anymore) then switch.
44
+ @parser.phase = @parser.phases[:afterFrameset]
45
+ end
46
+ end
47
+
48
+ def endTagNoframes(name)
49
+ @parser.phases[:inBody].processEndTag(name)
50
+ end
51
+
52
+ def endTagOther(name)
53
+ parse_error(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,138 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InHeadPhase < Phase
5
+
6
+ handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
7
+ handle_start %w( base link meta )
8
+
9
+ handle_end 'head'
10
+ handle_end %w( html body br p ) => 'ImplyAfterHead'
11
+ handle_end %w( title style script noscript )
12
+
13
+ def process_eof
14
+ if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
15
+ parse_error(_("Unexpected end of file. Expected end tag (#{name})."))
16
+ @tree.open_elements.pop
17
+ end
18
+ anythingElse
19
+ @parser.phase.process_eof
20
+ end
21
+
22
+ def processCharacters(data)
23
+ if %w[title style script noscript].include?(@tree.open_elements.last.name)
24
+ @tree.insertText(data)
25
+ else
26
+ anythingElse
27
+ @parser.phase.processCharacters(data)
28
+ end
29
+ end
30
+
31
+ def startTagHead(name, attributes)
32
+ parse_error(_('Unexpected start tag head in existing head. Ignored'))
33
+ end
34
+
35
+ def startTagTitle(name, attributes)
36
+ element = @tree.createElement(name, attributes)
37
+ appendToHead(element)
38
+ @tree.open_elements.push(element)
39
+ @parser.tokenizer.content_model_flag = :RCDATA
40
+ end
41
+
42
+ def startTagStyle(name, attributes)
43
+ element = @tree.createElement(name, attributes)
44
+ if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
45
+ appendToHead(element)
46
+ else
47
+ @tree.open_elements.last.appendChild(element)
48
+ end
49
+ @tree.open_elements.push(element)
50
+ @parser.tokenizer.content_model_flag = :CDATA
51
+ end
52
+
53
+ def startTagNoscript(name, attributes)
54
+ # XXX Need to decide whether to implement the scripting disabled case.
55
+ element = @tree.createElement(name, attributes)
56
+ if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
57
+ appendToHead(element)
58
+ else
59
+ @tree.open_elements.last.appendChild(element)
60
+ end
61
+ @tree.open_elements.push(element)
62
+ @parser.tokenizer.content_model_flag = :CDATA
63
+ end
64
+
65
+ def startTagScript(name, attributes)
66
+ #XXX Inner HTML case may be wrong
67
+ element = @tree.createElement(name, attributes)
68
+ element._flags.push("parser-inserted")
69
+ if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
70
+ appendToHead(element)
71
+ else
72
+ @tree.open_elements.last.appendChild(element)
73
+ end
74
+ @tree.open_elements.push(element)
75
+ @parser.tokenizer.content_model_flag = :CDATA
76
+ end
77
+
78
+ def startTagBaseLinkMeta(name, attributes)
79
+ element = @tree.createElement(name, attributes)
80
+ if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
81
+ appendToHead(element)
82
+ else
83
+ @tree.open_elements.last.appendChild(element)
84
+ end
85
+ end
86
+
87
+ def startTagOther(name, attributes)
88
+ anythingElse
89
+ @parser.phase.processStartTag(name, attributes)
90
+ end
91
+
92
+ def endTagHead(name)
93
+ if @tree.open_elements.last.name == 'head'
94
+ @tree.open_elements.pop
95
+ else
96
+ parse_error(_("Unexpected end tag (head). Ignored."))
97
+ end
98
+ @parser.phase = @parser.phases[:afterHead]
99
+ end
100
+
101
+ def endTagImplyAfterHead(name)
102
+ anythingElse
103
+ @parser.phase.processEndTag(name)
104
+ end
105
+
106
+ def endTagTitleStyleScriptNoscript(name)
107
+ if @tree.open_elements.last.name == name
108
+ @tree.open_elements.pop
109
+ else
110
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
111
+ end
112
+ end
113
+
114
+ def endTagOther(name)
115
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
116
+ end
117
+
118
+ def anythingElse
119
+ if @tree.open_elements.last.name == 'head'
120
+ endTagHead('head')
121
+ else
122
+ @parser.phase = @parser.phases[:afterHead]
123
+ end
124
+ end
125
+
126
+ protected
127
+
128
+ def appendToHead(element)
129
+ if @tree.head_pointer.nil?
130
+ assert @parser.inner_html
131
+ @tree.open_elements.last.appendChild(element)
132
+ else
133
+ @tree.head_pointer.appendChild(element)
134
+ end
135
+ end
136
+
137
+ end
138
+ end
@@ -0,0 +1,87 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InRowPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-row
7
+
8
+ handle_start 'html', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead tr ) => 'TableOther'
9
+
10
+ handle_end 'tr', 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th ) => 'Ignore'
11
+
12
+ def processCharacters(data)
13
+ @parser.phases[:inTable].processCharacters(data)
14
+ end
15
+
16
+ def startTagTableCell(name, attributes)
17
+ clearStackToTableRowContext
18
+ @tree.insert_element(name, attributes)
19
+ @parser.phase = @parser.phases[:inCell]
20
+ @tree.activeFormattingElements.push(Marker)
21
+ end
22
+
23
+ def startTagTableOther(name, attributes)
24
+ ignoreEndTag = ignoreEndTagTr
25
+ endTagTr('tr')
26
+ # XXX how are we sure it's always ignored in the inner_html case?
27
+ @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
28
+ end
29
+
30
+ def startTagOther(name, attributes)
31
+ @parser.phases[:inTable].processStartTag(name, attributes)
32
+ end
33
+
34
+ def endTagTr(name)
35
+ if ignoreEndTagTr
36
+ # inner_html case
37
+ assert @parser.inner_html
38
+ parse_error
39
+ else
40
+ clearStackToTableRowContext
41
+ @tree.open_elements.pop
42
+ @parser.phase = @parser.phases[:inTableBody]
43
+ end
44
+ end
45
+
46
+ def endTagTable(name)
47
+ ignoreEndTag = ignoreEndTagTr
48
+ endTagTr('tr')
49
+ # Reprocess the current tag if the tr end tag was not ignored
50
+ # XXX how are we sure it's always ignored in the inner_html case?
51
+ @parser.phase.processEndTag(name) unless ignoreEndTag
52
+ end
53
+
54
+ def endTagTableRowGroup(name)
55
+ if in_scope?(name, true)
56
+ endTagTr('tr')
57
+ @parser.phase.processEndTag(name)
58
+ else
59
+ # inner_html case
60
+ parse_error
61
+ end
62
+ end
63
+
64
+ def endTagIgnore(name)
65
+ parse_error(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
66
+ end
67
+
68
+ def endTagOther(name)
69
+ @parser.phases[:inTable].processEndTag(name)
70
+ end
71
+
72
+ protected
73
+
74
+ # XXX unify this with other table helper methods
75
+ def clearStackToTableRowContext
76
+ until %w[tr html].include?(name = @tree.open_elements.last.name)
77
+ parse_error(_("Unexpected implied end tag (#{name}) in the row phase."))
78
+ @tree.open_elements.pop
79
+ end
80
+ end
81
+
82
+ def ignoreEndTagTr
83
+ not in_scope?('tr', :tableVariant => true)
84
+ end
85
+
86
+ end
87
+ end
@@ -0,0 +1,84 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InSelectPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-select
7
+
8
+ handle_start 'html', 'option', 'optgroup', 'select'
9
+
10
+ handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
11
+
12
+ def processCharacters(data)
13
+ @tree.insertText(data)
14
+ end
15
+
16
+ def startTagOption(name, attributes)
17
+ # We need to imply </option> if <option> is the current node.
18
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
19
+ @tree.insert_element(name, attributes)
20
+ end
21
+
22
+ def startTagOptgroup(name, attributes)
23
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
24
+ @tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
25
+ @tree.insert_element(name, attributes)
26
+ end
27
+
28
+ def startTagSelect(name, attributes)
29
+ parse_error(_('Unexpected start tag (select) in the select phase implies select start tag.'))
30
+ endTagSelect('select')
31
+ end
32
+
33
+ def startTagOther(name, attributes)
34
+ parse_error(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
35
+ end
36
+
37
+ def endTagOption(name)
38
+ if @tree.open_elements.last.name == 'option'
39
+ @tree.open_elements.pop
40
+ else
41
+ parse_error(_('Unexpected end tag (option) in the select phase. Ignored.'))
42
+ end
43
+ end
44
+
45
+ def endTagOptgroup(name)
46
+ # </optgroup> implicitly closes <option>
47
+ if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
48
+ @tree.open_elements.pop
49
+ end
50
+ # It also closes </optgroup>
51
+ if @tree.open_elements.last.name == 'optgroup'
52
+ @tree.open_elements.pop
53
+ # But nothing else
54
+ else
55
+ parse_error(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
56
+ end
57
+ end
58
+
59
+ def endTagSelect(name)
60
+ if in_scope?('select', true)
61
+ remove_open_elements_until('select')
62
+
63
+ @parser.reset_insertion_mode
64
+ else
65
+ # inner_html case
66
+ parse_error
67
+ end
68
+ end
69
+
70
+ def endTagTableElements(name)
71
+ parse_error(_("Unexpected table end tag (#{name}) in the select phase."))
72
+
73
+ if in_scope?(name, true)
74
+ endTagSelect('select')
75
+ @parser.phase.processEndTag(name)
76
+ end
77
+ end
78
+
79
+ def endTagOther(name)
80
+ parse_error(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,83 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InTableBodyPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
7
+
8
+ handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
9
+
10
+ handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ingore'
11
+
12
+ def processCharacters(data)
13
+ @parser.phases[:inTable].processCharacters(data)
14
+ end
15
+
16
+ def startTagTr(name, attributes)
17
+ clearStackToTableBodyContext
18
+ @tree.insert_element(name, attributes)
19
+ @parser.phase = @parser.phases[:inRow]
20
+ end
21
+
22
+ def startTagTableCell(name, attributes)
23
+ parse_error(_("Unexpected table cell start tag (#{name}) in the table body phase."))
24
+ startTagTr('tr', {})
25
+ @parser.phase.processStartTag(name, attributes)
26
+ end
27
+
28
+ def startTagTableOther(name, attributes)
29
+ # XXX AT Any ideas on how to share this with endTagTable?
30
+ if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
31
+ clearStackToTableBodyContext
32
+ endTagTableRowGroup(@tree.open_elements.last.name)
33
+ @parser.phase.processStartTag(name, attributes)
34
+ else
35
+ # inner_html case
36
+ parse_error
37
+ end
38
+ end
39
+
40
+ def startTagOther(name, attributes)
41
+ @parser.phases[:inTable].processStartTag(name, attributes)
42
+ end
43
+
44
+ def endTagTableRowGroup(name)
45
+ if in_scope?(name, true)
46
+ clearStackToTableBodyContext
47
+ @tree.open_elements.pop
48
+ @parser.phase = @parser.phases[:inTable]
49
+ else
50
+ parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
51
+ end
52
+ end
53
+
54
+ def endTagTable(name)
55
+ if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
56
+ clearStackToTableBodyContext
57
+ endTagTableRowGroup(@tree.open_elements.last.name)
58
+ @parser.phase.processEndTag(name)
59
+ else
60
+ # inner_html case
61
+ parse_error
62
+ end
63
+ end
64
+
65
+ def endTagIgnore(name)
66
+ parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
67
+ end
68
+
69
+ def endTagOther(name)
70
+ @parser.phases[:inTable].processEndTag(name)
71
+ end
72
+
73
+ protected
74
+
75
+ def clearStackToTableBodyContext
76
+ until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
77
+ parse_error(_("Unexpected implied end tag (#{name}) in the table body phase."))
78
+ @tree.open_elements.pop
79
+ end
80
+ end
81
+
82
+ end
83
+ end
@@ -0,0 +1,110 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InTablePhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-table
7
+
8
+ handle_start 'html', 'caption', 'colgroup', 'col', 'table'
9
+
10
+ handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
11
+
12
+ handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
13
+
14
+ def processCharacters(data)
15
+ parse_error(_("Unexpected non-space characters in table context caused voodoo mode."))
16
+ # Make all the special element rearranging voodoo kick in
17
+ @tree.insert_from_table = true
18
+ # Process the character in the "in body" mode
19
+ @parser.phases[:inBody].processCharacters(data)
20
+ @tree.insert_from_table = false
21
+ end
22
+
23
+ def startTagCaption(name, attributes)
24
+ clearStackToTableContext
25
+ @tree.activeFormattingElements.push(Marker)
26
+ @tree.insert_element(name, attributes)
27
+ @parser.phase = @parser.phases[:inCaption]
28
+ end
29
+
30
+ def startTagColgroup(name, attributes)
31
+ clearStackToTableContext
32
+ @tree.insert_element(name, attributes)
33
+ @parser.phase = @parser.phases[:inColumnGroup]
34
+ end
35
+
36
+ def startTagCol(name, attributes)
37
+ startTagColgroup('colgroup', {})
38
+ @parser.phase.processStartTag(name, attributes)
39
+ end
40
+
41
+ def startTagRowGroup(name, attributes)
42
+ clearStackToTableContext
43
+ @tree.insert_element(name, attributes)
44
+ @parser.phase = @parser.phases[:inTableBody]
45
+ end
46
+
47
+ def startTagImplyTbody(name, attributes)
48
+ startTagRowGroup('tbody', {})
49
+ @parser.phase.processStartTag(name, attributes)
50
+ end
51
+
52
+ def startTagTable(name, attributes)
53
+ parse_error(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
54
+ @parser.phase.processEndTag('table')
55
+ @parser.phase.processStartTag(name, attributes) unless @parser.inner_html
56
+ end
57
+
58
+ def startTagOther(name, attributes)
59
+ parse_error(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
60
+ # Make all the special element rearranging voodoo kick in
61
+ @tree.insert_from_table = true
62
+ # Process the start tag in the "in body" mode
63
+ @parser.phases[:inBody].processStartTag(name, attributes)
64
+ @tree.insert_from_table = false
65
+ end
66
+
67
+ def endTagTable(name)
68
+ if in_scope?('table', true)
69
+ @tree.generateImpliedEndTags
70
+
71
+ unless @tree.open_elements.last.name == 'table'
72
+ parse_error(_("Unexpected end tag (table). Expected end tag (#{@tree.open_elements.last.name})."))
73
+ end
74
+
75
+ remove_open_elements_until('table')
76
+
77
+ @parser.reset_insertion_mode
78
+ else
79
+ # inner_html case
80
+ assert @parser.inner_html
81
+ parse_error
82
+ end
83
+ end
84
+
85
+ def endTagIgnore(name)
86
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
87
+ end
88
+
89
+ def endTagOther(name)
90
+ parse_error(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
91
+ # Make all the special element rearranging voodoo kick in
92
+ @tree.insert_from_table = true
93
+ # Process the end tag in the "in body" mode
94
+ @parser.phases[:inBody].processEndTag(name)
95
+ @tree.insert_from_table = false
96
+ end
97
+
98
+ protected
99
+
100
+ def clearStackToTableContext
101
+ # "clear the stack back to a table context"
102
+ until %w[table html].include?(name = @tree.open_elements.last.name)
103
+ parse_error(_("Unexpected implied end tag (#{name}) in the table phase."))
104
+ @tree.open_elements.pop
105
+ end
106
+ # When the current node is <html> it's an inner_html case
107
+ end
108
+
109
+ end
110
+ end