feedtools 0.2.26 → 0.2.27
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class InSelectPhase < Phase
|
5
|
+
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
7
|
+
|
8
|
+
handle_start 'html', 'option', 'optgroup', 'select'
|
9
|
+
|
10
|
+
handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
|
11
|
+
|
12
|
+
def processCharacters(data)
|
13
|
+
@tree.insertText(data)
|
14
|
+
end
|
15
|
+
|
16
|
+
def startTagOption(name, attributes)
|
17
|
+
# We need to imply </option> if <option> is the current node.
|
18
|
+
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
|
19
|
+
@tree.insert_element(name, attributes)
|
20
|
+
end
|
21
|
+
|
22
|
+
def startTagOptgroup(name, attributes)
|
23
|
+
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
|
24
|
+
@tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
|
25
|
+
@tree.insert_element(name, attributes)
|
26
|
+
end
|
27
|
+
|
28
|
+
def startTagSelect(name, attributes)
|
29
|
+
parse_error("unexpected-select-in-select")
|
30
|
+
endTagSelect('select')
|
31
|
+
end
|
32
|
+
|
33
|
+
def startTagOther(name, attributes)
|
34
|
+
parse_error("unexpected-start-tag-in-select", {"name" => name})
|
35
|
+
end
|
36
|
+
|
37
|
+
def endTagOption(name)
|
38
|
+
if @tree.open_elements.last.name == 'option'
|
39
|
+
@tree.open_elements.pop
|
40
|
+
else
|
41
|
+
parse_error("unexpected-end-tag-in-select", {"name" => "option"})
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def endTagOptgroup(name)
|
46
|
+
# </optgroup> implicitly closes <option>
|
47
|
+
if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
|
48
|
+
@tree.open_elements.pop
|
49
|
+
end
|
50
|
+
# It also closes </optgroup>
|
51
|
+
if @tree.open_elements.last.name == 'optgroup'
|
52
|
+
@tree.open_elements.pop
|
53
|
+
# But nothing else
|
54
|
+
else
|
55
|
+
parse_error("unexpected-end-tag-in-select",
|
56
|
+
{"name" => "optgroup"})
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def endTagSelect(name)
|
61
|
+
if in_scope?('select', true)
|
62
|
+
remove_open_elements_until('select')
|
63
|
+
|
64
|
+
@parser.reset_insertion_mode
|
65
|
+
else
|
66
|
+
# inner_html case
|
67
|
+
parse_error
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def endTagTableElements(name)
|
72
|
+
parse_error("unexpected-end-tag-in-select", {"name" => name})
|
73
|
+
|
74
|
+
if in_scope?(name, true)
|
75
|
+
endTagSelect('select')
|
76
|
+
@parser.phase.processEndTag(name)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def endTagOther(name)
|
81
|
+
parse_error("unexpected-end-tag-in-select", {"name" => name})
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class InTableBodyPhase < Phase
|
5
|
+
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
7
|
+
|
8
|
+
handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
|
9
|
+
|
10
|
+
handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ignore'
|
11
|
+
|
12
|
+
def processCharacters(data)
|
13
|
+
@parser.phases[:inTable].processCharacters(data)
|
14
|
+
end
|
15
|
+
|
16
|
+
def startTagTr(name, attributes)
|
17
|
+
clearStackToTableBodyContext
|
18
|
+
@tree.insert_element(name, attributes)
|
19
|
+
@parser.phase = @parser.phases[:inRow]
|
20
|
+
end
|
21
|
+
|
22
|
+
def startTagTableCell(name, attributes)
|
23
|
+
parse_error("unexpected-cell-in-table-body", {"name" => name})
|
24
|
+
startTagTr('tr', {})
|
25
|
+
@parser.phase.processStartTag(name, attributes)
|
26
|
+
end
|
27
|
+
|
28
|
+
def startTagTableOther(name, attributes)
|
29
|
+
# XXX AT Any ideas on how to share this with endTagTable?
|
30
|
+
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
31
|
+
clearStackToTableBodyContext
|
32
|
+
endTagTableRowGroup(@tree.open_elements.last.name)
|
33
|
+
@parser.phase.processStartTag(name, attributes)
|
34
|
+
else
|
35
|
+
# inner_html case
|
36
|
+
parse_error
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def startTagOther(name, attributes)
|
41
|
+
@parser.phases[:inTable].processStartTag(name, attributes)
|
42
|
+
end
|
43
|
+
|
44
|
+
def endTagTableRowGroup(name)
|
45
|
+
if in_scope?(name, true)
|
46
|
+
clearStackToTableBodyContext
|
47
|
+
@tree.open_elements.pop
|
48
|
+
@parser.phase = @parser.phases[:inTable]
|
49
|
+
else
|
50
|
+
parse_error("unexpected-end-tag-in-table-body",
|
51
|
+
{"name" => name})
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def endTagTable(name)
|
56
|
+
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
57
|
+
clearStackToTableBodyContext
|
58
|
+
endTagTableRowGroup(@tree.open_elements.last.name)
|
59
|
+
@parser.phase.processEndTag(name)
|
60
|
+
else
|
61
|
+
# inner_html case
|
62
|
+
parse_error
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def endTagIgnore(name)
|
67
|
+
parse_error("unexpected-end-tag-in-table-body",
|
68
|
+
{"name" => name})
|
69
|
+
end
|
70
|
+
|
71
|
+
def endTagOther(name)
|
72
|
+
@parser.phases[:inTable].processEndTag(name)
|
73
|
+
end
|
74
|
+
|
75
|
+
protected
|
76
|
+
|
77
|
+
def clearStackToTableBodyContext
|
78
|
+
until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
|
79
|
+
parse_error("unexpected-implied-end-tag-in-table",
|
80
|
+
{"name" => @tree.open_elements.last.name})
|
81
|
+
@tree.open_elements.pop
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class InTablePhase < Phase
|
5
|
+
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
7
|
+
|
8
|
+
handle_start 'html', 'caption', 'colgroup', 'col', 'table'
|
9
|
+
|
10
|
+
handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
|
11
|
+
|
12
|
+
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
13
|
+
|
14
|
+
def processCharacters(data)
|
15
|
+
parse_error("unexpected-char-implies-table-voodoo")
|
16
|
+
# Make all the special element rearranging voodoo kick in
|
17
|
+
@tree.insert_from_table = true
|
18
|
+
# Process the character in the "in body" mode
|
19
|
+
@parser.phases[:inBody].processCharacters(data)
|
20
|
+
@tree.insert_from_table = false
|
21
|
+
end
|
22
|
+
|
23
|
+
def startTagCaption(name, attributes)
|
24
|
+
clearStackToTableContext
|
25
|
+
@tree.activeFormattingElements.push(Marker)
|
26
|
+
@tree.insert_element(name, attributes)
|
27
|
+
@parser.phase = @parser.phases[:inCaption]
|
28
|
+
end
|
29
|
+
|
30
|
+
def startTagColgroup(name, attributes)
|
31
|
+
clearStackToTableContext
|
32
|
+
@tree.insert_element(name, attributes)
|
33
|
+
@parser.phase = @parser.phases[:inColumnGroup]
|
34
|
+
end
|
35
|
+
|
36
|
+
def startTagCol(name, attributes)
|
37
|
+
startTagColgroup('colgroup', {})
|
38
|
+
@parser.phase.processStartTag(name, attributes)
|
39
|
+
end
|
40
|
+
|
41
|
+
def startTagRowGroup(name, attributes)
|
42
|
+
clearStackToTableContext
|
43
|
+
@tree.insert_element(name, attributes)
|
44
|
+
@parser.phase = @parser.phases[:inTableBody]
|
45
|
+
end
|
46
|
+
|
47
|
+
def startTagImplyTbody(name, attributes)
|
48
|
+
startTagRowGroup('tbody', {})
|
49
|
+
@parser.phase.processStartTag(name, attributes)
|
50
|
+
end
|
51
|
+
|
52
|
+
def startTagTable(name, attributes)
|
53
|
+
parse_error("unexpected-start-tag-implies-end-tag",
|
54
|
+
{"startName" => "table", "endName" => "table"})
|
55
|
+
@parser.phase.processEndTag('table')
|
56
|
+
@parser.phase.processStartTag(name, attributes) unless @parser.inner_html
|
57
|
+
end
|
58
|
+
|
59
|
+
def startTagOther(name, attributes)
|
60
|
+
parse_error("unexpected-start-tag-implies-table-voodoo",
|
61
|
+
{"name" => name})
|
62
|
+
# Make all the special element rearranging voodoo kick in
|
63
|
+
@tree.insert_from_table = true
|
64
|
+
# Process the start tag in the "in body" mode
|
65
|
+
@parser.phases[:inBody].processStartTag(name, attributes)
|
66
|
+
@tree.insert_from_table = false
|
67
|
+
end
|
68
|
+
|
69
|
+
def endTagTable(name)
|
70
|
+
if in_scope?('table', true)
|
71
|
+
@tree.generateImpliedEndTags
|
72
|
+
|
73
|
+
unless @tree.open_elements.last.name == 'table'
|
74
|
+
parse_error("end-tag-too-early-named",
|
75
|
+
{"gotName" => "table",
|
76
|
+
"expectedName" => @tree.open_elements.last.name})
|
77
|
+
end
|
78
|
+
|
79
|
+
remove_open_elements_until('table')
|
80
|
+
|
81
|
+
@parser.reset_insertion_mode
|
82
|
+
else
|
83
|
+
# inner_html case
|
84
|
+
assert @parser.inner_html
|
85
|
+
parse_error
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def endTagIgnore(name)
|
90
|
+
parse_error("unexpected-end-tag", {"name" => name})
|
91
|
+
end
|
92
|
+
|
93
|
+
def endTagOther(name)
|
94
|
+
parse_error("unexpected-end-tag-implies-table-voodoo", {"name" => name})
|
95
|
+
# Make all the special element rearranging voodoo kick in
|
96
|
+
@tree.insert_from_table = true
|
97
|
+
# Process the end tag in the "in body" mode
|
98
|
+
@parser.phases[:inBody].processEndTag(name)
|
99
|
+
@tree.insert_from_table = false
|
100
|
+
end
|
101
|
+
|
102
|
+
protected
|
103
|
+
|
104
|
+
def clearStackToTableContext
|
105
|
+
# "clear the stack back to a table context"
|
106
|
+
until %w[table html].include?(name = @tree.open_elements.last.name)
|
107
|
+
parse_error("unexpected-implied-end-tag-in-table",
|
108
|
+
{"name" => @tree.open_elements.last.name})
|
109
|
+
@tree.open_elements.pop
|
110
|
+
end
|
111
|
+
# When the current node is <html> it's an inner_html case
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class InitialPhase < Phase
|
5
|
+
|
6
|
+
# This phase deals with error handling as well which is currently not
|
7
|
+
# covered in the specification. The error handling is typically known as
|
8
|
+
# "quirks mode". It is expected that a future version of HTML5 will define this.
|
9
|
+
|
10
|
+
def process_eof
|
11
|
+
parse_error("expected-doctype-but-got-eof")
|
12
|
+
@parser.phase = @parser.phases[:rootElement]
|
13
|
+
@parser.phase.process_eof
|
14
|
+
end
|
15
|
+
|
16
|
+
def processComment(data)
|
17
|
+
@tree.insert_comment(data, @tree.document)
|
18
|
+
end
|
19
|
+
|
20
|
+
def processDoctype(name, publicId, systemId, correct)
|
21
|
+
if name.downcase != 'html' or publicId or systemId
|
22
|
+
parse_error("unknown-doctype")
|
23
|
+
end
|
24
|
+
# XXX need to update DOCTYPE tokens
|
25
|
+
@tree.insertDoctype(name, publicId, systemId)
|
26
|
+
|
27
|
+
publicId = publicId.to_s.upcase
|
28
|
+
|
29
|
+
if name.downcase != 'html'
|
30
|
+
# XXX quirks mode
|
31
|
+
else
|
32
|
+
if ["+//silmaril//dtd html pro v0r11 19970101//en",
|
33
|
+
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
|
34
|
+
"-//as//dtd html 3.0 aswedit + extensions//en",
|
35
|
+
"-//ietf//dtd html 2.0 level 1//en",
|
36
|
+
"-//ietf//dtd html 2.0 level 2//en",
|
37
|
+
"-//ietf//dtd html 2.0 strict level 1//en",
|
38
|
+
"-//ietf//dtd html 2.0 strict level 2//en",
|
39
|
+
"-//ietf//dtd html 2.0 strict//en",
|
40
|
+
"-//ietf//dtd html 2.0//en",
|
41
|
+
"-//ietf//dtd html 2.1e//en",
|
42
|
+
"-//ietf//dtd html 3.0//en",
|
43
|
+
"-//ietf//dtd html 3.0//en//",
|
44
|
+
"-//ietf//dtd html 3.2 final//en",
|
45
|
+
"-//ietf//dtd html 3.2//en",
|
46
|
+
"-//ietf//dtd html 3//en",
|
47
|
+
"-//ietf//dtd html level 0//en",
|
48
|
+
"-//ietf//dtd html level 0//en//2.0",
|
49
|
+
"-//ietf//dtd html level 1//en",
|
50
|
+
"-//ietf//dtd html level 1//en//2.0",
|
51
|
+
"-//ietf//dtd html level 2//en",
|
52
|
+
"-//ietf//dtd html level 2//en//2.0",
|
53
|
+
"-//ietf//dtd html level 3//en",
|
54
|
+
"-//ietf//dtd html level 3//en//3.0",
|
55
|
+
"-//ietf//dtd html strict level 0//en",
|
56
|
+
"-//ietf//dtd html strict level 0//en//2.0",
|
57
|
+
"-//ietf//dtd html strict level 1//en",
|
58
|
+
"-//ietf//dtd html strict level 1//en//2.0",
|
59
|
+
"-//ietf//dtd html strict level 2//en",
|
60
|
+
"-//ietf//dtd html strict level 2//en//2.0",
|
61
|
+
"-//ietf//dtd html strict level 3//en",
|
62
|
+
"-//ietf//dtd html strict level 3//en//3.0",
|
63
|
+
"-//ietf//dtd html strict//en",
|
64
|
+
"-//ietf//dtd html strict//en//2.0",
|
65
|
+
"-//ietf//dtd html strict//en//3.0",
|
66
|
+
"-//ietf//dtd html//en",
|
67
|
+
"-//ietf//dtd html//en//2.0",
|
68
|
+
"-//ietf//dtd html//en//3.0",
|
69
|
+
"-//metrius//dtd metrius presentational//en",
|
70
|
+
"-//microsoft//dtd internet explorer 2.0 html strict//en",
|
71
|
+
"-//microsoft//dtd internet explorer 2.0 html//en",
|
72
|
+
"-//microsoft//dtd internet explorer 2.0 tables//en",
|
73
|
+
"-//microsoft//dtd internet explorer 3.0 html strict//en",
|
74
|
+
"-//microsoft//dtd internet explorer 3.0 html//en",
|
75
|
+
"-//microsoft//dtd internet explorer 3.0 tables//en",
|
76
|
+
"-//netscape comm. corp.//dtd html//en",
|
77
|
+
"-//netscape comm. corp.//dtd strict html//en",
|
78
|
+
"-//o'reilly and associates//dtd html 2.0//en",
|
79
|
+
"-//o'reilly and associates//dtd html extended 1.0//en",
|
80
|
+
"-//spyglass//dtd html 2.0 extended//en",
|
81
|
+
"-//sq//dtd html 2.0 hotmetal + extensions//en",
|
82
|
+
"-//sun microsystems corp.//dtd hotjava html//en",
|
83
|
+
"-//sun microsystems corp.//dtd hotjava strict html//en",
|
84
|
+
"-//w3c//dtd html 3 1995-03-24//en",
|
85
|
+
"-//w3c//dtd html 3.2 draft//en",
|
86
|
+
"-//w3c//dtd html 3.2 final//en",
|
87
|
+
"-//w3c//dtd html 3.2//en",
|
88
|
+
"-//w3c//dtd html 3.2s draft//en",
|
89
|
+
"-//w3c//dtd html 4.0 frameset//en",
|
90
|
+
"-//w3c//dtd html 4.0 transitional//en",
|
91
|
+
"-//w3c//dtd html experimental 19960712//en",
|
92
|
+
"-//w3c//dtd html experimental 970421//en",
|
93
|
+
"-//w3c//dtd w3 html//en",
|
94
|
+
"-//w3o//dtd w3 html 3.0//en",
|
95
|
+
"-//w3o//dtd w3 html 3.0//en//",
|
96
|
+
"-//w3o//dtd w3 html strict 3.0//en//",
|
97
|
+
"-//webtechs//dtd mozilla html 2.0//en",
|
98
|
+
"-//webtechs//dtd mozilla html//en",
|
99
|
+
"-/w3c/dtd html 4.0 transitional/en",
|
100
|
+
"html"].include?(publicId) or
|
101
|
+
(systemId == nil and
|
102
|
+
["-//w3c//dtd html 4.01 frameset//EN",
|
103
|
+
"-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
|
104
|
+
(systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
|
105
|
+
#XXX quirks mode
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
@parser.phase = @parser.phases[:rootElement]
|
110
|
+
end
|
111
|
+
|
112
|
+
def processSpaceCharacters(data)
|
113
|
+
end
|
114
|
+
|
115
|
+
def processCharacters(data)
|
116
|
+
parse_error("expected-doctype-but-got-chars")
|
117
|
+
@parser.phase = @parser.phases[:rootElement]
|
118
|
+
@parser.phase.processCharacters(data)
|
119
|
+
end
|
120
|
+
|
121
|
+
def processStartTag(name, attributes)
|
122
|
+
parse_error("expected-doctype-but-got-start-tag", {"name" => name})
|
123
|
+
@parser.phase = @parser.phases[:rootElement]
|
124
|
+
@parser.phase.processStartTag(name, attributes)
|
125
|
+
end
|
126
|
+
|
127
|
+
def processEndTag(name)
|
128
|
+
parse_error("expected-doctype-but-got-end-tag", {"name" => name})
|
129
|
+
@parser.phase = @parser.phases[:rootElement]
|
130
|
+
@parser.phase.processEndTag(name)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
module HTML5
|
2
|
+
# Base class for helper objects that implement each phase of processing.
|
3
|
+
#
|
4
|
+
# Handler methods should be in the following order (they can be omitted):
|
5
|
+
#
|
6
|
+
# * EOF
|
7
|
+
# * Comment
|
8
|
+
# * Doctype
|
9
|
+
# * SpaceCharacters
|
10
|
+
# * Characters
|
11
|
+
# * StartTag
|
12
|
+
# - startTag* methods
|
13
|
+
# * EndTag
|
14
|
+
# - endTag* methods
|
15
|
+
#
|
16
|
+
class Phase
|
17
|
+
|
18
|
+
extend Forwardable
|
19
|
+
def_delegators :@parser, :parse_error
|
20
|
+
|
21
|
+
# The following example call:
|
22
|
+
#
|
23
|
+
# tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
|
24
|
+
#
|
25
|
+
# ...would return a hash equal to this:
|
26
|
+
#
|
27
|
+
# { 'html' => 'startTagHtml',
|
28
|
+
# 'base' => 'startTagBaseLinkMeta',
|
29
|
+
# 'link' => 'startTagBaseLinkMeta',
|
30
|
+
# 'meta' => 'startTagBaseLinkMeta',
|
31
|
+
# 'li' => 'startTagListItem',
|
32
|
+
# 'dt' => 'startTagListItem',
|
33
|
+
# 'dd' => 'startTagListItem' }
|
34
|
+
#
|
35
|
+
def self.tag_handlers(prefix, *tags)
|
36
|
+
mapping = {}
|
37
|
+
if tags.last.is_a?(Hash)
|
38
|
+
tags.pop.each do |names, handler_method_suffix|
|
39
|
+
handler_method = prefix + handler_method_suffix
|
40
|
+
Array(names).each {|name| mapping[name] = handler_method }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
tags.each do |names|
|
44
|
+
names = Array(names)
|
45
|
+
handler_method = prefix + names.map {|name| name.capitalize }.join
|
46
|
+
names.each {|name| mapping[name] = handler_method }
|
47
|
+
end
|
48
|
+
mapping
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.start_tag_handlers
|
52
|
+
@start_tag_handlers ||= Hash.new('startTagOther')
|
53
|
+
end
|
54
|
+
|
55
|
+
# Declare what start tags this Phase handles. Can be called more than once.
|
56
|
+
#
|
57
|
+
# Example usage:
|
58
|
+
#
|
59
|
+
# handle_start 'html'
|
60
|
+
# # html start tags will be handled by a method named 'startTagHtml'
|
61
|
+
#
|
62
|
+
# handle_start %( base link meta )
|
63
|
+
# # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
|
64
|
+
#
|
65
|
+
# handle_start %( li dt dd ) => 'ListItem'
|
66
|
+
# # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
|
67
|
+
#
|
68
|
+
def self.handle_start(*tags)
|
69
|
+
start_tag_handlers.update tag_handlers('startTag', *tags)
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.end_tag_handlers
|
73
|
+
@end_tag_handlers ||= Hash.new('endTagOther')
|
74
|
+
end
|
75
|
+
|
76
|
+
# Declare what end tags this Phase handles. Behaves like handle_start.
|
77
|
+
#
|
78
|
+
def self.handle_end(*tags)
|
79
|
+
end_tag_handlers.update tag_handlers('endTag', *tags)
|
80
|
+
end
|
81
|
+
|
82
|
+
def initialize(parser, tree)
|
83
|
+
@parser, @tree = parser, tree
|
84
|
+
end
|
85
|
+
|
86
|
+
def process_eof
|
87
|
+
@tree.generateImpliedEndTags
|
88
|
+
|
89
|
+
if @tree.open_elements.length > 2
|
90
|
+
parse_error("expected-closing-tag-but-got-eof")
|
91
|
+
elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
|
92
|
+
# This happens for framesets or something?
|
93
|
+
parse_error("expected-closing-tag-but-got-eof")
|
94
|
+
elsif @parser.inner_html and @tree.open_elements.length > 1
|
95
|
+
# XXX This is not what the specification says. Not sure what to do here.
|
96
|
+
parse_error("eof-in-innerhtml")
|
97
|
+
end
|
98
|
+
# Betting ends.
|
99
|
+
end
|
100
|
+
|
101
|
+
def processComment(data)
|
102
|
+
# For most phases the following is correct. Where it's not it will be
|
103
|
+
# overridden.
|
104
|
+
@tree.insert_comment(data, @tree.open_elements.last)
|
105
|
+
end
|
106
|
+
|
107
|
+
def processDoctype(name, publicId, systemId, correct)
|
108
|
+
parse_error("unexpected-doctype")
|
109
|
+
end
|
110
|
+
|
111
|
+
def processSpaceCharacters(data)
|
112
|
+
@tree.insertText(data)
|
113
|
+
end
|
114
|
+
|
115
|
+
def processStartTag(name, attributes)
|
116
|
+
send self.class.start_tag_handlers[name], name, attributes
|
117
|
+
end
|
118
|
+
|
119
|
+
def startTagHtml(name, attributes)
|
120
|
+
if @parser.first_start_tag == false and name == 'html'
|
121
|
+
parse_error("non-html-root")
|
122
|
+
end
|
123
|
+
# XXX Need a check here to see if the first start tag token emitted is
|
124
|
+
# this token... If it's not, invoke parse_error.
|
125
|
+
attributes.each do |attr, value|
|
126
|
+
unless @tree.open_elements.first.attributes.has_key?(attr)
|
127
|
+
@tree.open_elements.first.attributes[attr] = value
|
128
|
+
end
|
129
|
+
end
|
130
|
+
@parser.first_start_tag = false
|
131
|
+
end
|
132
|
+
|
133
|
+
def processEndTag(name)
|
134
|
+
send self.class.end_tag_handlers[name], name
|
135
|
+
end
|
136
|
+
|
137
|
+
def assert(value)
|
138
|
+
throw AssertionError.new unless value
|
139
|
+
end
|
140
|
+
|
141
|
+
def in_scope?(*args)
|
142
|
+
@tree.elementInScope(*args)
|
143
|
+
end
|
144
|
+
|
145
|
+
def remove_open_elements_until(name=nil)
|
146
|
+
finished = false
|
147
|
+
until finished
|
148
|
+
element = @tree.open_elements.pop
|
149
|
+
finished = name.nil? ? yield(element) : element.name == name
|
150
|
+
end
|
151
|
+
return element
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|