feedtools 0.2.26 → 0.2.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,36 @@
1
+ require 'html5/constants'
2
+ require 'html5/filters/base'
3
+
4
+ module HTML5
5
+ module Filters
6
+ class WhitespaceFilter < Base
7
+
8
+ SPACE_PRESERVE_ELEMENTS = %w[pre textarea] + RCDATA_ELEMENTS
9
+ SPACES = /[#{SPACE_CHARACTERS.join('')}]+/m
10
+
11
+ def each
12
+ preserve = 0
13
+ __getobj__.each do |token|
14
+ case token[:type]
15
+ when :StartTag
16
+ if preserve > 0 or SPACE_PRESERVE_ELEMENTS.include?(token[:name])
17
+ preserve += 1
18
+ end
19
+
20
+ when :EndTag
21
+ preserve -= 1 if preserve > 0
22
+
23
+ when :SpaceCharacters
24
+ token[:data] = " " if preserve == 0 && token[:data]
25
+
26
+ when :Characters
27
+ token[:data] = token[:data].sub(SPACES,' ') if preserve == 0
28
+ end
29
+
30
+ yield token
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,248 @@
1
+ require 'html5/constants'
2
+ require 'html5/tokenizer'
3
+ require 'html5/treebuilders/rexml'
4
+
5
+ Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
6
+ require 'html5/html5parser/' + File.basename(path)
7
+ end
8
+
9
+ module HTML5
10
+
11
+ # Error in parsed document
12
+ class ParseError < Exception; end
13
+ class AssertionError < Exception; end
14
+
15
+ # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
16
+ #
17
+ class HTMLParser
18
+
19
+ attr_accessor :phase, :first_start_tag, :inner_html, :last_phase, :insert_from_table
20
+
21
+ attr_reader :phases, :tokenizer, :tree, :errors
22
+
23
+ def self.parse(stream, options = {})
24
+ encoding = options.delete(:encoding)
25
+ new(options).parse(stream,encoding)
26
+ end
27
+
28
+ def self.parse_fragment(stream, options = {})
29
+ container = options.delete(:container) || 'div'
30
+ encoding = options.delete(:encoding)
31
+ new(options).parse_fragment(stream, container, encoding)
32
+ end
33
+
34
+ @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
35
+ inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
36
+
37
+ # :strict - raise an exception when a parse error is encountered
38
+ # :tree - a treebuilder class controlling the type of tree that will be
39
+ # returned. Built in treebuilders can be accessed through
40
+ # HTML5::TreeBuilders[treeType]
41
+ def initialize(options = {})
42
+ @strict = false
43
+ @errors = []
44
+
45
+ @tokenizer = HTMLTokenizer
46
+ @tree = TreeBuilders::REXML::TreeBuilder
47
+
48
+ options.each {|name, value| instance_variable_set("@#{name}", value) }
49
+ @lowercase_attr_name = nil unless instance_variables.include?("@lowercase_attr_name")
50
+ @lowercase_element_name = nil unless instance_variables.include?("@lowercase_element_name")
51
+
52
+ @tree = @tree.new
53
+
54
+ @phases = @@phases.inject({}) do |phases, phase_name|
55
+ phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
56
+ phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
57
+ phases
58
+ end
59
+ end
60
+
61
+ def _parse(stream, inner_html, encoding, container = 'div')
62
+ @tree.reset
63
+ @first_start_tag = false
64
+ @errors = []
65
+
66
+ @tokenizer = @tokenizer.class unless Class === @tokenizer
67
+ @tokenizer = @tokenizer.new(stream, :encoding => encoding,
68
+ :parseMeta => !inner_html, :lowercase_attr_name => @lowercase_attr_name, :lowercase_element_name => @lowercase_element_name)
69
+
70
+ if inner_html
71
+ case @inner_html = container.downcase
72
+ when 'title', 'textarea'
73
+ @tokenizer.content_model_flag = :RCDATA
74
+ when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
75
+ @tokenizer.content_model_flag = :CDATA
76
+ when 'plaintext'
77
+ @tokenizer.content_model_flag = :PLAINTEXT
78
+ else
79
+ # content_model_flag already is PCDATA
80
+ @tokenizer.content_model_flag = :PCDATA
81
+ end
82
+
83
+ @phase = @phases[:rootElement]
84
+ @phase.insert_html_element
85
+ reset_insertion_mode
86
+ else
87
+ @inner_html = false
88
+ @phase = @phases[:initial]
89
+ end
90
+
91
+ # We only seem to have InBodyPhase testcases where the following is
92
+ # relevant ... need others too
93
+ @last_phase = nil
94
+
95
+ # XXX This is temporary for the moment so there isn't any other
96
+ # changes needed for the parser to work with the iterable tokenizer
97
+ @tokenizer.each do |token|
98
+ token = normalize_token(token)
99
+
100
+ method = 'process%s' % token[:type]
101
+
102
+ case token[:type]
103
+ when :Characters, :SpaceCharacters, :Comment
104
+ @phase.send method, token[:data]
105
+ when :StartTag
106
+ @phase.send method, token[:name], token[:data]
107
+ when :EndTag
108
+ @phase.send method, token[:name]
109
+ when :Doctype
110
+ @phase.send method, token[:name], token[:publicId],
111
+ token[:systemId], token[:correct]
112
+ else
113
+ parse_error(token[:data], token[:datavars])
114
+ end
115
+ end
116
+
117
+ # When the loop finishes it's EOF
118
+ @phase.process_eof
119
+ end
120
+
121
+ # Parse a HTML document into a well-formed tree
122
+ #
123
+ # stream - a filelike object or string containing the HTML to be parsed
124
+ #
125
+ # The optional encoding parameter must be a string that indicates
126
+ # the encoding. If specified, that encoding will be used,
127
+ # regardless of any BOM or later declaration (such as in a meta
128
+ # element)
129
+ def parse(stream, encoding=nil)
130
+ _parse(stream, false, encoding)
131
+ @tree.get_document
132
+ end
133
+
134
+ # Parse a HTML fragment into a well-formed tree fragment
135
+
136
+ # container - name of the element we're setting the inner_html property
137
+ # if set to nil, default to 'div'
138
+ #
139
+ # stream - a filelike object or string containing the HTML to be parsed
140
+ #
141
+ # The optional encoding parameter must be a string that indicates
142
+ # the encoding. If specified, that encoding will be used,
143
+ # regardless of any BOM or later declaration (such as in a meta
144
+ # element)
145
+ def parse_fragment(stream, container='div', encoding=nil)
146
+ _parse(stream, true, encoding, container)
147
+ @tree.get_fragment
148
+ end
149
+
150
+ def parse_error(code = 'XXX-undefined-error', data = {})
151
+ # XXX The idea is to make data mandatory.
152
+ @errors.push([@tokenizer.stream.position, code, data])
153
+ raise ParseError if @strict
154
+ end
155
+
156
+ # HTML5 specific normalizations to the token stream
157
+ def normalize_token(token)
158
+
159
+ if token[:type] == :EmptyTag
160
+ # When a solidus (/) is encountered within a tag name what happens
161
+ # depends on whether the current tag name matches that of a void
162
+ # element. If it matches a void element atheists did the wrong
163
+ # thing and if it doesn't it's wrong for everyone.
164
+
165
+ unless VOID_ELEMENTS.include?(token[:name])
166
+ parse_error("incorrectly-placed-solidus")
167
+ end
168
+
169
+ token[:type] = :StartTag
170
+ end
171
+
172
+ if token[:type] == :StartTag
173
+ token[:name] = token[:name].downcase
174
+
175
+ # We need to remove the duplicate attributes and convert attributes
176
+ # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
177
+
178
+ unless token[:data].empty?
179
+ data = token[:data].reverse.map {|attr, value| [attr.downcase, value] }
180
+ token[:data] = Hash[*data.flatten]
181
+ end
182
+
183
+ elsif token[:type] == :EndTag
184
+ parse_error("attributes-in-end-tag") unless token[:data].empty?
185
+ token[:name] = token[:name].downcase
186
+ end
187
+
188
+ token
189
+ end
190
+
191
+ @@new_modes = {
192
+ 'select' => :inSelect,
193
+ 'td' => :inCell,
194
+ 'th' => :inCell,
195
+ 'tr' => :inRow,
196
+ 'tbody' => :inTableBody,
197
+ 'thead' => :inTableBody,
198
+ 'tfoot' => :inTableBody,
199
+ 'caption' => :inCaption,
200
+ 'colgroup' => :inColumnGroup,
201
+ 'table' => :inTable,
202
+ 'head' => :inBody,
203
+ 'body' => :inBody,
204
+ 'frameset' => :inFrameset
205
+ }
206
+
207
+ def reset_insertion_mode
208
+ # The name of this method is mostly historical. (It's also used in the
209
+ # specification.)
210
+ last = false
211
+
212
+ @tree.open_elements.reverse.each do |node|
213
+ node_name = node.name
214
+
215
+ if node == @tree.open_elements.first
216
+ last = true
217
+ unless ['td', 'th'].include?(node_name)
218
+ # XXX
219
+ # assert @inner_html
220
+ node_name = @inner_html
221
+ end
222
+ end
223
+
224
+ # Check for conditions that should only happen in the inner_html
225
+ # case
226
+ if ['select', 'colgroup', 'head', 'frameset'].include?(node_name)
227
+ # XXX
228
+ # assert @inner_html
229
+ end
230
+
231
+ if @@new_modes.has_key?(node_name)
232
+ @phase = @phases[@@new_modes[node_name]]
233
+ elsif node_name == 'html'
234
+ @phase = @phases[@tree.head_pointer.nil?? :beforeHead : :afterHead]
235
+ elsif last
236
+ @phase = @phases[:inBody]
237
+ else
238
+ next
239
+ end
240
+
241
+ break
242
+ end
243
+ end
244
+
245
+ def _(string); string; end
246
+ end
247
+
248
+ end
@@ -0,0 +1,46 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class AfterBodyPhase < Phase
5
+
6
+ handle_end 'html'
7
+
8
+ def processComment(data)
9
+ # This is needed because data is to be appended to the <html> element
10
+ # here and not to whatever is currently open.
11
+ @tree.insert_comment(data, @tree.open_elements.first)
12
+ end
13
+
14
+ def processCharacters(data)
15
+ parse_error("unexpected-char-after-body")
16
+ @parser.phase = @parser.phases[:inBody]
17
+ @parser.phase.processCharacters(data)
18
+ end
19
+
20
+ def processStartTag(name, attributes)
21
+ parse_error("unexpected-start-tag-after-body", {"name" => name})
22
+ @parser.phase = @parser.phases[:inBody]
23
+ @parser.phase.processStartTag(name, attributes)
24
+ end
25
+
26
+ def endTagHtml(name)
27
+ if @parser.inner_html
28
+ parse_error
29
+ else
30
+ # XXX: This may need to be done, not sure
31
+ # Don't set last_phase to the current phase but to the inBody phase
32
+ # instead. No need for extra parse errors if there's something after </html>.
33
+ # Try "<!doctype html>X</html>X" for instance.
34
+ @parser.last_phase = @parser.phase
35
+ @parser.phase = @parser.phases[:trailingEnd]
36
+ end
37
+ end
38
+
39
+ def endTagOther(name)
40
+ parse_error("unexpected-end-tag-after-body", {"name" => name})
41
+ @parser.phase = @parser.phases[:inBody]
42
+ @parser.phase.processEndTag(name)
43
+ end
44
+
45
+ end
46
+ end
@@ -0,0 +1,33 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class AfterFramesetPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#after3
7
+
8
+ handle_start 'html', 'noframes'
9
+
10
+ handle_end 'html'
11
+
12
+ def processCharacters(data)
13
+ parse_error("unexpected-char-after-frameset")
14
+ end
15
+
16
+ def startTagNoframes(name, attributes)
17
+ @parser.phases[:inBody].processStartTag(name, attributes)
18
+ end
19
+
20
+ def startTagOther(name, attributes)
21
+ parse_error("unexpected-start-tag-after-frameset", {"name" => name})
22
+ end
23
+
24
+ def endTagHtml(name)
25
+ @parser.last_phase = @parser.phase
26
+ @parser.phase = @parser.phases[:trailingEnd]
27
+ end
28
+
29
+ def endTagOther(name)
30
+ parse_error("unexpected-end-tag-after-frameset", {"name" => name})
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,50 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class AfterHeadPhase < Phase
5
+
6
+ handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
7
+
8
+ def process_eof
9
+ anything_else
10
+ @parser.phase.process_eof
11
+ end
12
+
13
+ def processCharacters(data)
14
+ anything_else
15
+ @parser.phase.processCharacters(data)
16
+ end
17
+
18
+ def startTagBody(name, attributes)
19
+ @tree.insert_element(name, attributes)
20
+ @parser.phase = @parser.phases[:inBody]
21
+ end
22
+
23
+ def startTagFrameset(name, attributes)
24
+ @tree.insert_element(name, attributes)
25
+ @parser.phase = @parser.phases[:inFrameset]
26
+ end
27
+
28
+ def startTagFromHead(name, attributes)
29
+ parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
30
+ @parser.phase = @parser.phases[:inHead]
31
+ @parser.phase.processStartTag(name, attributes)
32
+ end
33
+
34
+ def startTagOther(name, attributes)
35
+ anything_else
36
+ @parser.phase.processStartTag(name, attributes)
37
+ end
38
+
39
+ def processEndTag(name)
40
+ anything_else
41
+ @parser.phase.processEndTag(name)
42
+ end
43
+
44
+ def anything_else
45
+ @tree.insert_element('body', {})
46
+ @parser.phase = @parser.phases[:inBody]
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,41 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class BeforeHeadPhase < Phase
5
+
6
+ handle_start 'html', 'head'
7
+
8
+ handle_end %w( html head body br p ) => 'ImplyHead'
9
+
10
+ def process_eof
11
+ startTagHead('head', {})
12
+ @parser.phase.process_eof
13
+ end
14
+
15
+ def processCharacters(data)
16
+ startTagHead('head', {})
17
+ @parser.phase.processCharacters(data)
18
+ end
19
+
20
+ def startTagHead(name, attributes)
21
+ @tree.insert_element(name, attributes)
22
+ @tree.head_pointer = @tree.open_elements[-1]
23
+ @parser.phase = @parser.phases[:inHead]
24
+ end
25
+
26
+ def startTagOther(name, attributes)
27
+ startTagHead('head', {})
28
+ @parser.phase.processStartTag(name, attributes)
29
+ end
30
+
31
+ def endTagImplyHead(name)
32
+ startTagHead('head', {})
33
+ @parser.phase.processEndTag(name)
34
+ end
35
+
36
+ def endTagOther(name)
37
+ parse_error("end-tag-after-implied-root", {"name" => name})
38
+ end
39
+
40
+ end
41
+ end