feedtools 0.2.26 → 0.2.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
require 'html5/html5parser/phase'
|
|
2
|
+
|
|
3
|
+
module HTML5
|
|
4
|
+
class InSelectPhase < Phase
|
|
5
|
+
|
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
|
7
|
+
|
|
8
|
+
handle_start 'html', 'option', 'optgroup', 'select'
|
|
9
|
+
|
|
10
|
+
handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
|
|
11
|
+
|
|
12
|
+
def processCharacters(data)
|
|
13
|
+
@tree.insertText(data)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def startTagOption(name, attributes)
|
|
17
|
+
# We need to imply </option> if <option> is the current node.
|
|
18
|
+
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
|
|
19
|
+
@tree.insert_element(name, attributes)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def startTagOptgroup(name, attributes)
|
|
23
|
+
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
|
|
24
|
+
@tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
|
|
25
|
+
@tree.insert_element(name, attributes)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def startTagSelect(name, attributes)
|
|
29
|
+
parse_error("unexpected-select-in-select")
|
|
30
|
+
endTagSelect('select')
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def startTagOther(name, attributes)
|
|
34
|
+
parse_error("unexpected-start-tag-in-select", {"name" => name})
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def endTagOption(name)
|
|
38
|
+
if @tree.open_elements.last.name == 'option'
|
|
39
|
+
@tree.open_elements.pop
|
|
40
|
+
else
|
|
41
|
+
parse_error("unexpected-end-tag-in-select", {"name" => "option"})
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def endTagOptgroup(name)
|
|
46
|
+
# </optgroup> implicitly closes <option>
|
|
47
|
+
if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
|
|
48
|
+
@tree.open_elements.pop
|
|
49
|
+
end
|
|
50
|
+
# It also closes </optgroup>
|
|
51
|
+
if @tree.open_elements.last.name == 'optgroup'
|
|
52
|
+
@tree.open_elements.pop
|
|
53
|
+
# But nothing else
|
|
54
|
+
else
|
|
55
|
+
parse_error("unexpected-end-tag-in-select",
|
|
56
|
+
{"name" => "optgroup"})
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def endTagSelect(name)
|
|
61
|
+
if in_scope?('select', true)
|
|
62
|
+
remove_open_elements_until('select')
|
|
63
|
+
|
|
64
|
+
@parser.reset_insertion_mode
|
|
65
|
+
else
|
|
66
|
+
# inner_html case
|
|
67
|
+
parse_error
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def endTagTableElements(name)
|
|
72
|
+
parse_error("unexpected-end-tag-in-select", {"name" => name})
|
|
73
|
+
|
|
74
|
+
if in_scope?(name, true)
|
|
75
|
+
endTagSelect('select')
|
|
76
|
+
@parser.phase.processEndTag(name)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def endTagOther(name)
|
|
81
|
+
parse_error("unexpected-end-tag-in-select", {"name" => name})
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
require 'html5/html5parser/phase'
|
|
2
|
+
|
|
3
|
+
module HTML5
|
|
4
|
+
class InTableBodyPhase < Phase
|
|
5
|
+
|
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
|
7
|
+
|
|
8
|
+
handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
|
|
9
|
+
|
|
10
|
+
handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ignore'
|
|
11
|
+
|
|
12
|
+
def processCharacters(data)
|
|
13
|
+
@parser.phases[:inTable].processCharacters(data)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def startTagTr(name, attributes)
|
|
17
|
+
clearStackToTableBodyContext
|
|
18
|
+
@tree.insert_element(name, attributes)
|
|
19
|
+
@parser.phase = @parser.phases[:inRow]
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def startTagTableCell(name, attributes)
|
|
23
|
+
parse_error("unexpected-cell-in-table-body", {"name" => name})
|
|
24
|
+
startTagTr('tr', {})
|
|
25
|
+
@parser.phase.processStartTag(name, attributes)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def startTagTableOther(name, attributes)
|
|
29
|
+
# XXX AT Any ideas on how to share this with endTagTable?
|
|
30
|
+
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
|
31
|
+
clearStackToTableBodyContext
|
|
32
|
+
endTagTableRowGroup(@tree.open_elements.last.name)
|
|
33
|
+
@parser.phase.processStartTag(name, attributes)
|
|
34
|
+
else
|
|
35
|
+
# inner_html case
|
|
36
|
+
parse_error
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def startTagOther(name, attributes)
|
|
41
|
+
@parser.phases[:inTable].processStartTag(name, attributes)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def endTagTableRowGroup(name)
|
|
45
|
+
if in_scope?(name, true)
|
|
46
|
+
clearStackToTableBodyContext
|
|
47
|
+
@tree.open_elements.pop
|
|
48
|
+
@parser.phase = @parser.phases[:inTable]
|
|
49
|
+
else
|
|
50
|
+
parse_error("unexpected-end-tag-in-table-body",
|
|
51
|
+
{"name" => name})
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def endTagTable(name)
|
|
56
|
+
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
|
57
|
+
clearStackToTableBodyContext
|
|
58
|
+
endTagTableRowGroup(@tree.open_elements.last.name)
|
|
59
|
+
@parser.phase.processEndTag(name)
|
|
60
|
+
else
|
|
61
|
+
# inner_html case
|
|
62
|
+
parse_error
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def endTagIgnore(name)
|
|
67
|
+
parse_error("unexpected-end-tag-in-table-body",
|
|
68
|
+
{"name" => name})
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def endTagOther(name)
|
|
72
|
+
@parser.phases[:inTable].processEndTag(name)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
protected
|
|
76
|
+
|
|
77
|
+
def clearStackToTableBodyContext
|
|
78
|
+
until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
|
|
79
|
+
parse_error("unexpected-implied-end-tag-in-table",
|
|
80
|
+
{"name" => @tree.open_elements.last.name})
|
|
81
|
+
@tree.open_elements.pop
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
require 'html5/html5parser/phase'
|
|
2
|
+
|
|
3
|
+
module HTML5
|
|
4
|
+
class InTablePhase < Phase
|
|
5
|
+
|
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
|
7
|
+
|
|
8
|
+
handle_start 'html', 'caption', 'colgroup', 'col', 'table'
|
|
9
|
+
|
|
10
|
+
handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
|
|
11
|
+
|
|
12
|
+
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
|
13
|
+
|
|
14
|
+
def processCharacters(data)
|
|
15
|
+
parse_error("unexpected-char-implies-table-voodoo")
|
|
16
|
+
# Make all the special element rearranging voodoo kick in
|
|
17
|
+
@tree.insert_from_table = true
|
|
18
|
+
# Process the character in the "in body" mode
|
|
19
|
+
@parser.phases[:inBody].processCharacters(data)
|
|
20
|
+
@tree.insert_from_table = false
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def startTagCaption(name, attributes)
|
|
24
|
+
clearStackToTableContext
|
|
25
|
+
@tree.activeFormattingElements.push(Marker)
|
|
26
|
+
@tree.insert_element(name, attributes)
|
|
27
|
+
@parser.phase = @parser.phases[:inCaption]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def startTagColgroup(name, attributes)
|
|
31
|
+
clearStackToTableContext
|
|
32
|
+
@tree.insert_element(name, attributes)
|
|
33
|
+
@parser.phase = @parser.phases[:inColumnGroup]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def startTagCol(name, attributes)
|
|
37
|
+
startTagColgroup('colgroup', {})
|
|
38
|
+
@parser.phase.processStartTag(name, attributes)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def startTagRowGroup(name, attributes)
|
|
42
|
+
clearStackToTableContext
|
|
43
|
+
@tree.insert_element(name, attributes)
|
|
44
|
+
@parser.phase = @parser.phases[:inTableBody]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def startTagImplyTbody(name, attributes)
|
|
48
|
+
startTagRowGroup('tbody', {})
|
|
49
|
+
@parser.phase.processStartTag(name, attributes)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def startTagTable(name, attributes)
|
|
53
|
+
parse_error("unexpected-start-tag-implies-end-tag",
|
|
54
|
+
{"startName" => "table", "endName" => "table"})
|
|
55
|
+
@parser.phase.processEndTag('table')
|
|
56
|
+
@parser.phase.processStartTag(name, attributes) unless @parser.inner_html
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def startTagOther(name, attributes)
|
|
60
|
+
parse_error("unexpected-start-tag-implies-table-voodoo",
|
|
61
|
+
{"name" => name})
|
|
62
|
+
# Make all the special element rearranging voodoo kick in
|
|
63
|
+
@tree.insert_from_table = true
|
|
64
|
+
# Process the start tag in the "in body" mode
|
|
65
|
+
@parser.phases[:inBody].processStartTag(name, attributes)
|
|
66
|
+
@tree.insert_from_table = false
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def endTagTable(name)
|
|
70
|
+
if in_scope?('table', true)
|
|
71
|
+
@tree.generateImpliedEndTags
|
|
72
|
+
|
|
73
|
+
unless @tree.open_elements.last.name == 'table'
|
|
74
|
+
parse_error("end-tag-too-early-named",
|
|
75
|
+
{"gotName" => "table",
|
|
76
|
+
"expectedName" => @tree.open_elements.last.name})
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
remove_open_elements_until('table')
|
|
80
|
+
|
|
81
|
+
@parser.reset_insertion_mode
|
|
82
|
+
else
|
|
83
|
+
# inner_html case
|
|
84
|
+
assert @parser.inner_html
|
|
85
|
+
parse_error
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def endTagIgnore(name)
|
|
90
|
+
parse_error("unexpected-end-tag", {"name" => name})
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def endTagOther(name)
|
|
94
|
+
parse_error("unexpected-end-tag-implies-table-voodoo", {"name" => name})
|
|
95
|
+
# Make all the special element rearranging voodoo kick in
|
|
96
|
+
@tree.insert_from_table = true
|
|
97
|
+
# Process the end tag in the "in body" mode
|
|
98
|
+
@parser.phases[:inBody].processEndTag(name)
|
|
99
|
+
@tree.insert_from_table = false
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
protected
|
|
103
|
+
|
|
104
|
+
def clearStackToTableContext
|
|
105
|
+
# "clear the stack back to a table context"
|
|
106
|
+
until %w[table html].include?(name = @tree.open_elements.last.name)
|
|
107
|
+
parse_error("unexpected-implied-end-tag-in-table",
|
|
108
|
+
{"name" => @tree.open_elements.last.name})
|
|
109
|
+
@tree.open_elements.pop
|
|
110
|
+
end
|
|
111
|
+
# When the current node is <html> it's an inner_html case
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
require 'html5/html5parser/phase'
|
|
2
|
+
|
|
3
|
+
module HTML5
|
|
4
|
+
class InitialPhase < Phase
|
|
5
|
+
|
|
6
|
+
# This phase deals with error handling as well which is currently not
|
|
7
|
+
# covered in the specification. The error handling is typically known as
|
|
8
|
+
# "quirks mode". It is expected that a future version of HTML5 will define this.
|
|
9
|
+
|
|
10
|
+
def process_eof
|
|
11
|
+
parse_error("expected-doctype-but-got-eof")
|
|
12
|
+
@parser.phase = @parser.phases[:rootElement]
|
|
13
|
+
@parser.phase.process_eof
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def processComment(data)
|
|
17
|
+
@tree.insert_comment(data, @tree.document)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def processDoctype(name, publicId, systemId, correct)
|
|
21
|
+
if name.downcase != 'html' or publicId or systemId
|
|
22
|
+
parse_error("unknown-doctype")
|
|
23
|
+
end
|
|
24
|
+
# XXX need to update DOCTYPE tokens
|
|
25
|
+
@tree.insertDoctype(name, publicId, systemId)
|
|
26
|
+
|
|
27
|
+
publicId = publicId.to_s.upcase
|
|
28
|
+
|
|
29
|
+
if name.downcase != 'html'
|
|
30
|
+
# XXX quirks mode
|
|
31
|
+
else
|
|
32
|
+
if ["+//silmaril//dtd html pro v0r11 19970101//en",
|
|
33
|
+
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
|
|
34
|
+
"-//as//dtd html 3.0 aswedit + extensions//en",
|
|
35
|
+
"-//ietf//dtd html 2.0 level 1//en",
|
|
36
|
+
"-//ietf//dtd html 2.0 level 2//en",
|
|
37
|
+
"-//ietf//dtd html 2.0 strict level 1//en",
|
|
38
|
+
"-//ietf//dtd html 2.0 strict level 2//en",
|
|
39
|
+
"-//ietf//dtd html 2.0 strict//en",
|
|
40
|
+
"-//ietf//dtd html 2.0//en",
|
|
41
|
+
"-//ietf//dtd html 2.1e//en",
|
|
42
|
+
"-//ietf//dtd html 3.0//en",
|
|
43
|
+
"-//ietf//dtd html 3.0//en//",
|
|
44
|
+
"-//ietf//dtd html 3.2 final//en",
|
|
45
|
+
"-//ietf//dtd html 3.2//en",
|
|
46
|
+
"-//ietf//dtd html 3//en",
|
|
47
|
+
"-//ietf//dtd html level 0//en",
|
|
48
|
+
"-//ietf//dtd html level 0//en//2.0",
|
|
49
|
+
"-//ietf//dtd html level 1//en",
|
|
50
|
+
"-//ietf//dtd html level 1//en//2.0",
|
|
51
|
+
"-//ietf//dtd html level 2//en",
|
|
52
|
+
"-//ietf//dtd html level 2//en//2.0",
|
|
53
|
+
"-//ietf//dtd html level 3//en",
|
|
54
|
+
"-//ietf//dtd html level 3//en//3.0",
|
|
55
|
+
"-//ietf//dtd html strict level 0//en",
|
|
56
|
+
"-//ietf//dtd html strict level 0//en//2.0",
|
|
57
|
+
"-//ietf//dtd html strict level 1//en",
|
|
58
|
+
"-//ietf//dtd html strict level 1//en//2.0",
|
|
59
|
+
"-//ietf//dtd html strict level 2//en",
|
|
60
|
+
"-//ietf//dtd html strict level 2//en//2.0",
|
|
61
|
+
"-//ietf//dtd html strict level 3//en",
|
|
62
|
+
"-//ietf//dtd html strict level 3//en//3.0",
|
|
63
|
+
"-//ietf//dtd html strict//en",
|
|
64
|
+
"-//ietf//dtd html strict//en//2.0",
|
|
65
|
+
"-//ietf//dtd html strict//en//3.0",
|
|
66
|
+
"-//ietf//dtd html//en",
|
|
67
|
+
"-//ietf//dtd html//en//2.0",
|
|
68
|
+
"-//ietf//dtd html//en//3.0",
|
|
69
|
+
"-//metrius//dtd metrius presentational//en",
|
|
70
|
+
"-//microsoft//dtd internet explorer 2.0 html strict//en",
|
|
71
|
+
"-//microsoft//dtd internet explorer 2.0 html//en",
|
|
72
|
+
"-//microsoft//dtd internet explorer 2.0 tables//en",
|
|
73
|
+
"-//microsoft//dtd internet explorer 3.0 html strict//en",
|
|
74
|
+
"-//microsoft//dtd internet explorer 3.0 html//en",
|
|
75
|
+
"-//microsoft//dtd internet explorer 3.0 tables//en",
|
|
76
|
+
"-//netscape comm. corp.//dtd html//en",
|
|
77
|
+
"-//netscape comm. corp.//dtd strict html//en",
|
|
78
|
+
"-//o'reilly and associates//dtd html 2.0//en",
|
|
79
|
+
"-//o'reilly and associates//dtd html extended 1.0//en",
|
|
80
|
+
"-//spyglass//dtd html 2.0 extended//en",
|
|
81
|
+
"-//sq//dtd html 2.0 hotmetal + extensions//en",
|
|
82
|
+
"-//sun microsystems corp.//dtd hotjava html//en",
|
|
83
|
+
"-//sun microsystems corp.//dtd hotjava strict html//en",
|
|
84
|
+
"-//w3c//dtd html 3 1995-03-24//en",
|
|
85
|
+
"-//w3c//dtd html 3.2 draft//en",
|
|
86
|
+
"-//w3c//dtd html 3.2 final//en",
|
|
87
|
+
"-//w3c//dtd html 3.2//en",
|
|
88
|
+
"-//w3c//dtd html 3.2s draft//en",
|
|
89
|
+
"-//w3c//dtd html 4.0 frameset//en",
|
|
90
|
+
"-//w3c//dtd html 4.0 transitional//en",
|
|
91
|
+
"-//w3c//dtd html experimental 19960712//en",
|
|
92
|
+
"-//w3c//dtd html experimental 970421//en",
|
|
93
|
+
"-//w3c//dtd w3 html//en",
|
|
94
|
+
"-//w3o//dtd w3 html 3.0//en",
|
|
95
|
+
"-//w3o//dtd w3 html 3.0//en//",
|
|
96
|
+
"-//w3o//dtd w3 html strict 3.0//en//",
|
|
97
|
+
"-//webtechs//dtd mozilla html 2.0//en",
|
|
98
|
+
"-//webtechs//dtd mozilla html//en",
|
|
99
|
+
"-/w3c/dtd html 4.0 transitional/en",
|
|
100
|
+
"html"].include?(publicId) or
|
|
101
|
+
(systemId == nil and
|
|
102
|
+
["-//w3c//dtd html 4.01 frameset//EN",
|
|
103
|
+
"-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
|
|
104
|
+
(systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
|
|
105
|
+
#XXX quirks mode
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
@parser.phase = @parser.phases[:rootElement]
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def processSpaceCharacters(data)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def processCharacters(data)
|
|
116
|
+
parse_error("expected-doctype-but-got-chars")
|
|
117
|
+
@parser.phase = @parser.phases[:rootElement]
|
|
118
|
+
@parser.phase.processCharacters(data)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def processStartTag(name, attributes)
|
|
122
|
+
parse_error("expected-doctype-but-got-start-tag", {"name" => name})
|
|
123
|
+
@parser.phase = @parser.phases[:rootElement]
|
|
124
|
+
@parser.phase.processStartTag(name, attributes)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def processEndTag(name)
|
|
128
|
+
parse_error("expected-doctype-but-got-end-tag", {"name" => name})
|
|
129
|
+
@parser.phase = @parser.phases[:rootElement]
|
|
130
|
+
@parser.phase.processEndTag(name)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
module HTML5
|
|
2
|
+
# Base class for helper objects that implement each phase of processing.
|
|
3
|
+
#
|
|
4
|
+
# Handler methods should be in the following order (they can be omitted):
|
|
5
|
+
#
|
|
6
|
+
# * EOF
|
|
7
|
+
# * Comment
|
|
8
|
+
# * Doctype
|
|
9
|
+
# * SpaceCharacters
|
|
10
|
+
# * Characters
|
|
11
|
+
# * StartTag
|
|
12
|
+
# - startTag* methods
|
|
13
|
+
# * EndTag
|
|
14
|
+
# - endTag* methods
|
|
15
|
+
#
|
|
16
|
+
class Phase
|
|
17
|
+
|
|
18
|
+
extend Forwardable
|
|
19
|
+
def_delegators :@parser, :parse_error
|
|
20
|
+
|
|
21
|
+
# The following example call:
|
|
22
|
+
#
|
|
23
|
+
# tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
|
|
24
|
+
#
|
|
25
|
+
# ...would return a hash equal to this:
|
|
26
|
+
#
|
|
27
|
+
# { 'html' => 'startTagHtml',
|
|
28
|
+
# 'base' => 'startTagBaseLinkMeta',
|
|
29
|
+
# 'link' => 'startTagBaseLinkMeta',
|
|
30
|
+
# 'meta' => 'startTagBaseLinkMeta',
|
|
31
|
+
# 'li' => 'startTagListItem',
|
|
32
|
+
# 'dt' => 'startTagListItem',
|
|
33
|
+
# 'dd' => 'startTagListItem' }
|
|
34
|
+
#
|
|
35
|
+
def self.tag_handlers(prefix, *tags)
|
|
36
|
+
mapping = {}
|
|
37
|
+
if tags.last.is_a?(Hash)
|
|
38
|
+
tags.pop.each do |names, handler_method_suffix|
|
|
39
|
+
handler_method = prefix + handler_method_suffix
|
|
40
|
+
Array(names).each {|name| mapping[name] = handler_method }
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
tags.each do |names|
|
|
44
|
+
names = Array(names)
|
|
45
|
+
handler_method = prefix + names.map {|name| name.capitalize }.join
|
|
46
|
+
names.each {|name| mapping[name] = handler_method }
|
|
47
|
+
end
|
|
48
|
+
mapping
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def self.start_tag_handlers
|
|
52
|
+
@start_tag_handlers ||= Hash.new('startTagOther')
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Declare what start tags this Phase handles. Can be called more than once.
|
|
56
|
+
#
|
|
57
|
+
# Example usage:
|
|
58
|
+
#
|
|
59
|
+
# handle_start 'html'
|
|
60
|
+
# # html start tags will be handled by a method named 'startTagHtml'
|
|
61
|
+
#
|
|
62
|
+
# handle_start %( base link meta )
|
|
63
|
+
# # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
|
|
64
|
+
#
|
|
65
|
+
# handle_start %( li dt dd ) => 'ListItem'
|
|
66
|
+
# # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
|
|
67
|
+
#
|
|
68
|
+
def self.handle_start(*tags)
|
|
69
|
+
start_tag_handlers.update tag_handlers('startTag', *tags)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def self.end_tag_handlers
|
|
73
|
+
@end_tag_handlers ||= Hash.new('endTagOther')
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Declare what end tags this Phase handles. Behaves like handle_start.
|
|
77
|
+
#
|
|
78
|
+
def self.handle_end(*tags)
|
|
79
|
+
end_tag_handlers.update tag_handlers('endTag', *tags)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def initialize(parser, tree)
|
|
83
|
+
@parser, @tree = parser, tree
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def process_eof
|
|
87
|
+
@tree.generateImpliedEndTags
|
|
88
|
+
|
|
89
|
+
if @tree.open_elements.length > 2
|
|
90
|
+
parse_error("expected-closing-tag-but-got-eof")
|
|
91
|
+
elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
|
|
92
|
+
# This happens for framesets or something?
|
|
93
|
+
parse_error("expected-closing-tag-but-got-eof")
|
|
94
|
+
elsif @parser.inner_html and @tree.open_elements.length > 1
|
|
95
|
+
# XXX This is not what the specification says. Not sure what to do here.
|
|
96
|
+
parse_error("eof-in-innerhtml")
|
|
97
|
+
end
|
|
98
|
+
# Betting ends.
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def processComment(data)
|
|
102
|
+
# For most phases the following is correct. Where it's not it will be
|
|
103
|
+
# overridden.
|
|
104
|
+
@tree.insert_comment(data, @tree.open_elements.last)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def processDoctype(name, publicId, systemId, correct)
|
|
108
|
+
parse_error("unexpected-doctype")
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def processSpaceCharacters(data)
|
|
112
|
+
@tree.insertText(data)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def processStartTag(name, attributes)
|
|
116
|
+
send self.class.start_tag_handlers[name], name, attributes
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def startTagHtml(name, attributes)
|
|
120
|
+
if @parser.first_start_tag == false and name == 'html'
|
|
121
|
+
parse_error("non-html-root")
|
|
122
|
+
end
|
|
123
|
+
# XXX Need a check here to see if the first start tag token emitted is
|
|
124
|
+
# this token... If it's not, invoke parse_error.
|
|
125
|
+
attributes.each do |attr, value|
|
|
126
|
+
unless @tree.open_elements.first.attributes.has_key?(attr)
|
|
127
|
+
@tree.open_elements.first.attributes[attr] = value
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
@parser.first_start_tag = false
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def processEndTag(name)
|
|
134
|
+
send self.class.end_tag_handlers[name], name
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def assert(value)
|
|
138
|
+
throw AssertionError.new unless value
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def in_scope?(*args)
|
|
142
|
+
@tree.elementInScope(*args)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def remove_open_elements_until(name=nil)
|
|
146
|
+
finished = false
|
|
147
|
+
until finished
|
|
148
|
+
element = @tree.open_elements.pop
|
|
149
|
+
finished = name.nil? ? yield(element) : element.name == name
|
|
150
|
+
end
|
|
151
|
+
return element
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|