feedtools 0.2.26 → 0.2.27
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,24 @@
|
|
1
|
+
module HTML5
|
2
|
+
module TreeBuilders
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def [](name)
|
6
|
+
case name.to_s.downcase
|
7
|
+
when 'simpletree' then
|
8
|
+
require 'html5/treebuilders/simpletree'
|
9
|
+
SimpleTree::TreeBuilder
|
10
|
+
when 'rexml' then
|
11
|
+
require 'html5/treebuilders/rexml'
|
12
|
+
REXML::TreeBuilder
|
13
|
+
when 'hpricot' then
|
14
|
+
require 'html5/treebuilders/hpricot'
|
15
|
+
Hpricot::TreeBuilder
|
16
|
+
else
|
17
|
+
raise "Unknown TreeBuilder #{name}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
alias :get_tree_builder :[]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,334 @@
|
|
1
|
+
require 'html5/constants'
|
2
|
+
|
3
|
+
#XXX - TODO; make the default interface more ElementTree-like rather than DOM-like
|
4
|
+
|
5
|
+
module HTML5
|
6
|
+
|
7
|
+
# The scope markers are inserted when entering buttons, object elements,
|
8
|
+
# marquees, table cells, and table captions, and are used to prevent formatting
|
9
|
+
# from "leaking" into tables, buttons, object elements, and marquees.
|
10
|
+
Marker = nil
|
11
|
+
|
12
|
+
module TreeBuilders
|
13
|
+
module Base
|
14
|
+
|
15
|
+
class Node
|
16
|
+
# The parent of the current node (or nil for the document node)
|
17
|
+
attr_accessor :parent
|
18
|
+
|
19
|
+
# a list of child nodes of the current node. This must
|
20
|
+
# include all elements but not necessarily other node types
|
21
|
+
attr_accessor :childNodes
|
22
|
+
|
23
|
+
# A list of miscellaneous flags that can be set on the node
|
24
|
+
attr_accessor :_flags
|
25
|
+
|
26
|
+
def initialize(name)
|
27
|
+
@parent = nil
|
28
|
+
@childNodes = []
|
29
|
+
@_flags = []
|
30
|
+
end
|
31
|
+
|
32
|
+
# Insert node as a child of the current node
|
33
|
+
def appendChild(node)
|
34
|
+
raise NotImplementedError
|
35
|
+
end
|
36
|
+
|
37
|
+
# Insert data as text in the current node, positioned before the
|
38
|
+
# start of node insertBefore or to the end of the node's text.
|
39
|
+
def insertText(data, insertBefore=nil)
|
40
|
+
raise NotImplementedError
|
41
|
+
end
|
42
|
+
|
43
|
+
# Insert node as a child of the current node, before refNode in the
|
44
|
+
# list of child nodes. Raises ValueError if refNode is not a child of
|
45
|
+
# the current node
|
46
|
+
def insertBefore(node, refNode)
|
47
|
+
raise NotImplementedError
|
48
|
+
end
|
49
|
+
|
50
|
+
# Remove node from the children of the current node
|
51
|
+
def removeChild(node)
|
52
|
+
raise NotImplementedError
|
53
|
+
end
|
54
|
+
|
55
|
+
# Move all the children of the current node to newParent.
|
56
|
+
# This is needed so that trees that don't store text as nodes move the
|
57
|
+
# text in the correct way
|
58
|
+
def reparentChildren(newParent)
|
59
|
+
#XXX - should this method be made more general?
|
60
|
+
@childNodes.each { |child| newParent.appendChild(child) }
|
61
|
+
@childNodes = []
|
62
|
+
end
|
63
|
+
|
64
|
+
# Return a shallow copy of the current node i.e. a node with the same
|
65
|
+
# name and attributes but with no parent or child nodes
|
66
|
+
def cloneNode
|
67
|
+
raise NotImplementedError
|
68
|
+
end
|
69
|
+
|
70
|
+
# Return true if the node has children or text, false otherwise
|
71
|
+
def hasContent
|
72
|
+
raise NotImplementedError
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Base treebuilder implementation
|
77
|
+
class TreeBuilder
|
78
|
+
|
79
|
+
attr_accessor :open_elements
|
80
|
+
|
81
|
+
attr_accessor :activeFormattingElements
|
82
|
+
|
83
|
+
attr_accessor :document
|
84
|
+
|
85
|
+
attr_accessor :head_pointer
|
86
|
+
|
87
|
+
attr_accessor :formPointer
|
88
|
+
|
89
|
+
# Class to use for document root
|
90
|
+
documentClass = nil
|
91
|
+
|
92
|
+
# Class to use for HTML elements
|
93
|
+
elementClass = nil
|
94
|
+
|
95
|
+
# Class to use for comments
|
96
|
+
commentClass = nil
|
97
|
+
|
98
|
+
# Class to use for doctypes
|
99
|
+
doctypeClass = nil
|
100
|
+
|
101
|
+
# Fragment class
|
102
|
+
fragmentClass = nil
|
103
|
+
|
104
|
+
def initialize
|
105
|
+
reset
|
106
|
+
end
|
107
|
+
|
108
|
+
def reset
|
109
|
+
@open_elements = []
|
110
|
+
@activeFormattingElements = []
|
111
|
+
|
112
|
+
#XXX - rename these to headElement, formElement
|
113
|
+
@head_pointer = nil
|
114
|
+
@formPointer = nil
|
115
|
+
|
116
|
+
self.insert_from_table = false
|
117
|
+
|
118
|
+
@document = @documentClass.new
|
119
|
+
end
|
120
|
+
|
121
|
+
def elementInScope(target, tableVariant=false)
|
122
|
+
# Exit early when possible.
|
123
|
+
return true if @open_elements[-1].name == target
|
124
|
+
|
125
|
+
# AT How about while true and simply set node to [-1] and set it to
|
126
|
+
# [-2] at the end...
|
127
|
+
@open_elements.reverse.each do |element|
|
128
|
+
if element.name == target
|
129
|
+
return true
|
130
|
+
elsif element.name == 'table'
|
131
|
+
return false
|
132
|
+
elsif not tableVariant and SCOPING_ELEMENTS.include?(element.name)
|
133
|
+
return false
|
134
|
+
elsif element.name == 'html'
|
135
|
+
return false
|
136
|
+
end
|
137
|
+
end
|
138
|
+
assert false # We should never reach this point
|
139
|
+
end
|
140
|
+
|
141
|
+
def reconstructActiveFormattingElements
|
142
|
+
# Within this algorithm the order of steps described in the
|
143
|
+
# specification is not quite the same as the order of steps in the
|
144
|
+
# code. It should still do the same though.
|
145
|
+
|
146
|
+
# Step 1: stop the algorithm when there's nothing to do.
|
147
|
+
return if @activeFormattingElements.empty?
|
148
|
+
|
149
|
+
# Step 2 and step 3: we start with the last element. So i is -1.
|
150
|
+
i = -1
|
151
|
+
entry = @activeFormattingElements[i]
|
152
|
+
return if entry == Marker or @open_elements.include?(entry)
|
153
|
+
|
154
|
+
# Step 6
|
155
|
+
until entry == Marker or @open_elements.include?(entry)
|
156
|
+
# Step 5: let entry be one earlier in the list.
|
157
|
+
i -= 1
|
158
|
+
begin
|
159
|
+
entry = @activeFormattingElements[i]
|
160
|
+
rescue
|
161
|
+
# Step 4: at this point we need to jump to step 8. By not doing
|
162
|
+
# i += 1 which is also done in step 7 we achieve that.
|
163
|
+
break
|
164
|
+
end
|
165
|
+
end
|
166
|
+
while true
|
167
|
+
# Step 7
|
168
|
+
i += 1
|
169
|
+
|
170
|
+
# Step 8
|
171
|
+
clone = @activeFormattingElements[i].cloneNode
|
172
|
+
|
173
|
+
# Step 9
|
174
|
+
element = insert_element(clone.name, clone.attributes)
|
175
|
+
|
176
|
+
# Step 10
|
177
|
+
@activeFormattingElements[i] = element
|
178
|
+
|
179
|
+
# Step 11
|
180
|
+
break if element == @activeFormattingElements[-1]
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def clearActiveFormattingElements
|
185
|
+
{} until @activeFormattingElements.empty? || @activeFormattingElements.pop == Marker
|
186
|
+
end
|
187
|
+
|
188
|
+
# Check if an element exists between the end of the active
|
189
|
+
# formatting elements and the last marker. If it does, return it, else
|
190
|
+
# return false
|
191
|
+
def elementInActiveFormattingElements(name)
|
192
|
+
@activeFormattingElements.reverse.each do |element|
|
193
|
+
# Check for Marker first because if it's a Marker it doesn't have a
|
194
|
+
# name attribute.
|
195
|
+
break if element == Marker
|
196
|
+
return element if element.name == name
|
197
|
+
end
|
198
|
+
return false
|
199
|
+
end
|
200
|
+
|
201
|
+
def insertDoctype(name, public_id, system_id)
|
202
|
+
doctype = @doctypeClass.new(name)
|
203
|
+
doctype.public_id = public_id
|
204
|
+
doctype.system_id = system_id
|
205
|
+
@document.appendChild(doctype)
|
206
|
+
end
|
207
|
+
|
208
|
+
def insert_comment(data, parent=nil)
|
209
|
+
parent = @open_elements[-1] if parent.nil?
|
210
|
+
parent.appendChild(@commentClass.new(data))
|
211
|
+
end
|
212
|
+
|
213
|
+
# Create an element but don't insert it anywhere
|
214
|
+
def createElement(name, attributes)
|
215
|
+
element = @elementClass.new(name)
|
216
|
+
element.attributes = attributes
|
217
|
+
return element
|
218
|
+
end
|
219
|
+
|
220
|
+
# Switch the function used to insert an element from the
|
221
|
+
# normal one to the misnested table one and back again
|
222
|
+
def insert_from_table=(value)
|
223
|
+
@insert_from_table = value
|
224
|
+
@insert_element = value ? :insert_elementTable : :insert_elementNormal
|
225
|
+
end
|
226
|
+
|
227
|
+
def insert_element(name, attributes)
|
228
|
+
send(@insert_element, name, attributes)
|
229
|
+
end
|
230
|
+
|
231
|
+
def insert_elementNormal(name, attributes)
|
232
|
+
element = @elementClass.new(name)
|
233
|
+
element.attributes = attributes
|
234
|
+
@open_elements.last.appendChild(element)
|
235
|
+
@open_elements.push(element)
|
236
|
+
return element
|
237
|
+
end
|
238
|
+
|
239
|
+
# Create an element and insert it into the tree
|
240
|
+
def insert_elementTable(name, attributes)
|
241
|
+
element = @elementClass.new(name)
|
242
|
+
element.attributes = attributes
|
243
|
+
if TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements.last.name)
|
244
|
+
#We should be in the InTable mode. This means we want to do
|
245
|
+
#special magic element rearranging
|
246
|
+
parent, insertBefore = getTableMisnestedNodePosition
|
247
|
+
if insertBefore.nil?
|
248
|
+
parent.appendChild(element)
|
249
|
+
else
|
250
|
+
parent.insertBefore(element, insertBefore)
|
251
|
+
end
|
252
|
+
@open_elements.push(element)
|
253
|
+
else
|
254
|
+
return insert_elementNormal(name, attributes)
|
255
|
+
end
|
256
|
+
return element
|
257
|
+
end
|
258
|
+
|
259
|
+
def insertText(data, parent=nil)
|
260
|
+
parent = @open_elements[-1] if parent.nil?
|
261
|
+
|
262
|
+
if (not(@insert_from_table) or (@insert_from_table and not TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements[-1].name)))
|
263
|
+
parent.insertText(data)
|
264
|
+
else
|
265
|
+
#We should be in the InTable mode. This means we want to do
|
266
|
+
#special magic element rearranging
|
267
|
+
parent, insertBefore = getTableMisnestedNodePosition
|
268
|
+
parent.insertText(data, insertBefore)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# Get the foster parent element, and sibling to insert before
|
273
|
+
# (or nil) when inserting a misnested table node
|
274
|
+
def getTableMisnestedNodePosition
|
275
|
+
#The foster parent element is the one which comes before the most
|
276
|
+
#recently opened table element
|
277
|
+
#XXX - this is really inelegant
|
278
|
+
lastTable = nil
|
279
|
+
fosterParent = nil
|
280
|
+
insertBefore = nil
|
281
|
+
@open_elements.reverse.each do |element|
|
282
|
+
if element.name == "table"
|
283
|
+
lastTable = element
|
284
|
+
break
|
285
|
+
end
|
286
|
+
end
|
287
|
+
if lastTable
|
288
|
+
#XXX - we should really check that this parent is actually a
|
289
|
+
#node here
|
290
|
+
if lastTable.parent
|
291
|
+
fosterParent = lastTable.parent
|
292
|
+
insertBefore = lastTable
|
293
|
+
else
|
294
|
+
fosterParent = @open_elements[@open_elements.index(lastTable) - 1]
|
295
|
+
end
|
296
|
+
else
|
297
|
+
fosterParent = @open_elements[0]
|
298
|
+
end
|
299
|
+
return fosterParent, insertBefore
|
300
|
+
end
|
301
|
+
|
302
|
+
def generateImpliedEndTags(exclude=nil)
|
303
|
+
name = @open_elements[-1].name
|
304
|
+
|
305
|
+
# XXX td, th and tr are not actually needed
|
306
|
+
if (%w[dd dt li p td th tr].include?(name) and name != exclude)
|
307
|
+
@open_elements.pop
|
308
|
+
# XXX This is not entirely what the specification says. We should
|
309
|
+
# investigate it more closely.
|
310
|
+
generateImpliedEndTags(exclude)
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def get_document
|
315
|
+
@document
|
316
|
+
end
|
317
|
+
|
318
|
+
def get_fragment
|
319
|
+
#assert @inner_html
|
320
|
+
fragment = @fragmentClass.new
|
321
|
+
@open_elements[0].reparentChildren(fragment)
|
322
|
+
return fragment
|
323
|
+
end
|
324
|
+
|
325
|
+
# Serialize the subtree of node in the format required by unit tests
|
326
|
+
# node - the node from which to start serializing
|
327
|
+
def testSerializer(node)
|
328
|
+
raise NotImplementedError
|
329
|
+
end
|
330
|
+
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
@@ -0,0 +1,231 @@
|
|
1
|
+
require 'html5/treebuilders/base'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'hpricot'
|
4
|
+
require 'forwardable'
|
5
|
+
|
6
|
+
module HTML5
|
7
|
+
module TreeBuilders
|
8
|
+
module Hpricot
|
9
|
+
|
10
|
+
class Node < Base::Node
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
def_delegators :@hpricot, :name
|
14
|
+
|
15
|
+
attr_accessor :hpricot
|
16
|
+
|
17
|
+
def initialize(name)
|
18
|
+
super(name)
|
19
|
+
@hpricot = self.class.hpricot_class.new name
|
20
|
+
end
|
21
|
+
|
22
|
+
def appendChild(node)
|
23
|
+
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
|
24
|
+
childNodes.last.hpricot.content = childNodes.last.hpricot.content + node.hpricot.content
|
25
|
+
else
|
26
|
+
childNodes << node
|
27
|
+
hpricot.children << node.hpricot
|
28
|
+
end
|
29
|
+
if (oldparent = node.hpricot.parent) != nil
|
30
|
+
oldparent.children.delete_at(oldparent.children.index(node.hpricot))
|
31
|
+
end
|
32
|
+
node.hpricot.parent = hpricot
|
33
|
+
node.parent = self
|
34
|
+
end
|
35
|
+
|
36
|
+
def removeChild(node)
|
37
|
+
childNodes.delete(node)
|
38
|
+
hpricot.children.delete_at(hpricot.children.index(node.hpricot))
|
39
|
+
node.hpricot.parent = nil
|
40
|
+
node.parent = nil
|
41
|
+
end
|
42
|
+
|
43
|
+
def insertText(data, before=nil)
|
44
|
+
if before
|
45
|
+
insertBefore(TextNode.new(data), before)
|
46
|
+
else
|
47
|
+
appendChild(TextNode.new(data))
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def insertBefore(node, refNode)
|
52
|
+
index = childNodes.index(refNode)
|
53
|
+
if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
|
54
|
+
childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
|
55
|
+
else
|
56
|
+
refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
|
57
|
+
childNodes.insert(index, node)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def hasContent
|
62
|
+
childNodes.any?
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class Element < Node
|
67
|
+
def self.hpricot_class
|
68
|
+
::Hpricot::Elem
|
69
|
+
end
|
70
|
+
|
71
|
+
def initialize(name)
|
72
|
+
super(name)
|
73
|
+
|
74
|
+
@hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
|
75
|
+
end
|
76
|
+
|
77
|
+
def name
|
78
|
+
@hpricot.stag.name
|
79
|
+
end
|
80
|
+
|
81
|
+
def cloneNode
|
82
|
+
attributes.inject(self.class.new(name)) do |node, (name, value)|
|
83
|
+
node.hpricot[name] = value
|
84
|
+
node
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# A call to Hpricot::Elem#raw_attributes is built dynamically,
|
89
|
+
# so alterations to the returned value (a hash) will be lost.
|
90
|
+
#
|
91
|
+
# AttributeProxy works around this by forwarding :[]= calls
|
92
|
+
# to the raw_attributes accessor on the element start tag.
|
93
|
+
#
|
94
|
+
class AttributeProxy
|
95
|
+
def initialize(hpricot)
|
96
|
+
@hpricot = hpricot
|
97
|
+
end
|
98
|
+
|
99
|
+
def []=(k, v)
|
100
|
+
@hpricot.stag.send(stag_attributes_method)[k] = v
|
101
|
+
end
|
102
|
+
|
103
|
+
def stag_attributes_method
|
104
|
+
# STag#attributes changed to STag#raw_attributes after Hpricot 0.5
|
105
|
+
@hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
|
106
|
+
end
|
107
|
+
|
108
|
+
def method_missing(*a, &b)
|
109
|
+
@hpricot.attributes.send(*a, &b)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def attributes
|
114
|
+
AttributeProxy.new(@hpricot)
|
115
|
+
end
|
116
|
+
|
117
|
+
def attributes=(attrs)
|
118
|
+
attrs.each { |name, value| @hpricot[name] = value }
|
119
|
+
end
|
120
|
+
|
121
|
+
def printTree(indent=0)
|
122
|
+
tree = "\n|#{' ' * indent}<#{name}>"
|
123
|
+
indent += 2
|
124
|
+
attributes.each do |name, value|
|
125
|
+
next if name == 'xmlns'
|
126
|
+
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
127
|
+
end
|
128
|
+
childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
class Document < Node
|
133
|
+
def self.hpricot_class
|
134
|
+
::Hpricot::Doc
|
135
|
+
end
|
136
|
+
|
137
|
+
def initialize
|
138
|
+
super(nil)
|
139
|
+
end
|
140
|
+
|
141
|
+
def printTree(indent=0)
|
142
|
+
childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class DocumentType < Node
|
147
|
+
def_delegators :@hpricot, :public_id, :system_id
|
148
|
+
|
149
|
+
def self.hpricot_class
|
150
|
+
::Hpricot::DocType
|
151
|
+
end
|
152
|
+
|
153
|
+
def initialize(name, public_id, system_id)
|
154
|
+
begin
|
155
|
+
super(name)
|
156
|
+
rescue ArgumentError # needs 3...
|
157
|
+
end
|
158
|
+
|
159
|
+
@hpricot = ::Hpricot::DocType.new(name, public_id, system_id)
|
160
|
+
end
|
161
|
+
|
162
|
+
def printTree(indent=0)
|
163
|
+
if hpricot.target and hpricot.target.any?
|
164
|
+
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
|
165
|
+
else
|
166
|
+
"\n|#{' ' * indent}<!DOCTYPE >"
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
class DocumentFragment < Element
|
172
|
+
def initialize
|
173
|
+
super('')
|
174
|
+
end
|
175
|
+
|
176
|
+
def printTree(indent=0)
|
177
|
+
childNodes.inject('') {|tree, child| tree + child.printTree(indent + 2) }
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
class TextNode < Node
|
182
|
+
def initialize(data)
|
183
|
+
@hpricot = ::Hpricot::Text.new(data)
|
184
|
+
end
|
185
|
+
|
186
|
+
def printTree(indent=0)
|
187
|
+
"\n|#{' ' * indent}\"#{hpricot.content}\""
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
class CommentNode < Node
|
192
|
+
def self.hpricot_class
|
193
|
+
::Hpricot::Comment
|
194
|
+
end
|
195
|
+
|
196
|
+
def printTree(indent=0)
|
197
|
+
"\n|#{' ' * indent}<!-- #{hpricot.content} -->"
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
class TreeBuilder < Base::TreeBuilder
|
202
|
+
def initialize
|
203
|
+
@documentClass = Document
|
204
|
+
@doctypeClass = DocumentType
|
205
|
+
@elementClass = Element
|
206
|
+
@commentClass = CommentNode
|
207
|
+
@fragmentClass = DocumentFragment
|
208
|
+
end
|
209
|
+
|
210
|
+
def insertDoctype(name, public_id, system_id)
|
211
|
+
doctype = @doctypeClass.new(name, public_id, system_id)
|
212
|
+
@document.appendChild(doctype)
|
213
|
+
end
|
214
|
+
|
215
|
+
def testSerializer(node)
|
216
|
+
node.printTree
|
217
|
+
end
|
218
|
+
|
219
|
+
def get_document
|
220
|
+
@document.hpricot
|
221
|
+
end
|
222
|
+
|
223
|
+
def get_fragment
|
224
|
+
@document = super
|
225
|
+
return @document.hpricot.children
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|