feedtools 0.2.26 → 0.2.27
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,209 @@
|
|
1
|
+
require 'html5/treebuilders/base'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module HTML5
|
6
|
+
module TreeBuilders
|
7
|
+
module REXML
|
8
|
+
|
9
|
+
class Node < Base::Node
|
10
|
+
extend Forwardable
|
11
|
+
def_delegators :@rxobj, :name, :attributes
|
12
|
+
attr_accessor :rxobj
|
13
|
+
|
14
|
+
def initialize name
|
15
|
+
super name
|
16
|
+
@rxobj = self.class.rxclass.new name
|
17
|
+
end
|
18
|
+
|
19
|
+
def appendChild node
|
20
|
+
if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
|
21
|
+
childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
|
22
|
+
childNodes.last.rxobj.raw = true
|
23
|
+
else
|
24
|
+
childNodes.push node
|
25
|
+
rxobj.add node.rxobj
|
26
|
+
end
|
27
|
+
node.parent = self
|
28
|
+
end
|
29
|
+
|
30
|
+
def removeChild node
|
31
|
+
childNodes.delete node
|
32
|
+
rxobj.delete node.rxobj
|
33
|
+
node.parent = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def insertText data, before=nil
|
37
|
+
if before
|
38
|
+
insertBefore TextNode.new(data), before
|
39
|
+
else
|
40
|
+
appendChild TextNode.new(data)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def insertBefore node, refNode
|
45
|
+
index = childNodes.index(refNode)
|
46
|
+
if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
|
47
|
+
childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
|
48
|
+
childNodes[index-1].rxobj.raw = true
|
49
|
+
else
|
50
|
+
childNodes.insert index, node
|
51
|
+
refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def hasContent
|
56
|
+
(childNodes.length > 0)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class Element < Node
|
61
|
+
def self.rxclass
|
62
|
+
::REXML::Element
|
63
|
+
end
|
64
|
+
|
65
|
+
def initialize name
|
66
|
+
super name
|
67
|
+
end
|
68
|
+
|
69
|
+
def cloneNode
|
70
|
+
newNode = self.class.new name
|
71
|
+
attributes.each {|name,value| newNode.attributes[name] = value}
|
72
|
+
newNode
|
73
|
+
end
|
74
|
+
|
75
|
+
def attributes= value
|
76
|
+
value.each {|name, value| rxobj.attributes[name] = value}
|
77
|
+
end
|
78
|
+
|
79
|
+
def printTree indent=0
|
80
|
+
tree = "\n|#{' ' * indent}<#{name}>"
|
81
|
+
indent += 2
|
82
|
+
for name, value in attributes
|
83
|
+
next if name == 'xmlns'
|
84
|
+
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
85
|
+
end
|
86
|
+
for child in childNodes
|
87
|
+
tree += child.printTree(indent)
|
88
|
+
end
|
89
|
+
tree
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
class Document < Node
|
94
|
+
def self.rxclass
|
95
|
+
::REXML::Document
|
96
|
+
end
|
97
|
+
|
98
|
+
def initialize
|
99
|
+
super nil
|
100
|
+
end
|
101
|
+
|
102
|
+
# ryansking: not sure why this was here. removing it doesn't cause any tests to fail
|
103
|
+
# def appendChild node
|
104
|
+
# if node.kind_of? Element and node.name == 'html'
|
105
|
+
# node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
|
106
|
+
# end
|
107
|
+
# super node
|
108
|
+
# end
|
109
|
+
|
110
|
+
def printTree indent=0
|
111
|
+
tree = "#document"
|
112
|
+
for child in childNodes
|
113
|
+
tree += child.printTree(indent + 2)
|
114
|
+
end
|
115
|
+
return tree
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
class DocumentType < Node
|
120
|
+
def_delegator :@rxobj, :public, :public_id
|
121
|
+
|
122
|
+
def_delegator :@rxobj, :system, :system_id
|
123
|
+
|
124
|
+
def self.rxclass
|
125
|
+
::REXML::DocType
|
126
|
+
end
|
127
|
+
|
128
|
+
def initialize name, public_id, system_id
|
129
|
+
super(name)
|
130
|
+
if public_id
|
131
|
+
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
|
132
|
+
elsif system_id
|
133
|
+
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
|
134
|
+
else
|
135
|
+
@rxobj = ::REXML::DocType.new name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def printTree indent=0
|
140
|
+
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
class DocumentFragment < Element
|
145
|
+
def initialize
|
146
|
+
super nil
|
147
|
+
end
|
148
|
+
|
149
|
+
def printTree indent=0
|
150
|
+
tree = ""
|
151
|
+
for child in childNodes
|
152
|
+
tree += child.printTree(indent+2)
|
153
|
+
end
|
154
|
+
return tree
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
class TextNode < Node
|
159
|
+
def initialize data
|
160
|
+
raw = data.gsub('&', '&').gsub('<', '<').gsub('>', '>')
|
161
|
+
@rxobj = ::REXML::Text.new(raw, true, nil, true)
|
162
|
+
end
|
163
|
+
|
164
|
+
def printTree indent=0
|
165
|
+
"\n|#{' ' * indent}\"#{rxobj.value}\""
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class CommentNode < Node
|
170
|
+
def self.rxclass
|
171
|
+
::REXML::Comment
|
172
|
+
end
|
173
|
+
|
174
|
+
def printTree indent=0
|
175
|
+
"\n|#{' ' * indent}<!-- #{rxobj.string} -->"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
class TreeBuilder < Base::TreeBuilder
|
180
|
+
def initialize
|
181
|
+
@documentClass = Document
|
182
|
+
@doctypeClass = DocumentType
|
183
|
+
@elementClass = Element
|
184
|
+
@commentClass = CommentNode
|
185
|
+
@fragmentClass = DocumentFragment
|
186
|
+
end
|
187
|
+
|
188
|
+
def insertDoctype(name, public_id, system_id)
|
189
|
+
doctype = @doctypeClass.new(name, public_id, system_id)
|
190
|
+
@document.appendChild(doctype)
|
191
|
+
end
|
192
|
+
|
193
|
+
def testSerializer node
|
194
|
+
node.printTree
|
195
|
+
end
|
196
|
+
|
197
|
+
def get_document
|
198
|
+
@document.rxobj
|
199
|
+
end
|
200
|
+
|
201
|
+
def get_fragment
|
202
|
+
@document = super
|
203
|
+
return @document.rxobj.children
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
require 'html5/treebuilders/base'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
module TreeBuilders
|
5
|
+
module SimpleTree
|
6
|
+
|
7
|
+
class Node < Base::Node
|
8
|
+
# Node representing an item in the tree.
|
9
|
+
# name - The tag name associated with the node
|
10
|
+
attr_accessor :name
|
11
|
+
|
12
|
+
# The value of the current node (applies to text nodes and
|
13
|
+
# comments
|
14
|
+
attr_accessor :value
|
15
|
+
|
16
|
+
# a dict holding name, value pairs for attributes of the node
|
17
|
+
attr_accessor :attributes
|
18
|
+
|
19
|
+
def initialize name
|
20
|
+
super
|
21
|
+
@name = name
|
22
|
+
@value = nil
|
23
|
+
@attributes = {}
|
24
|
+
end
|
25
|
+
|
26
|
+
def appendChild node
|
27
|
+
if node.kind_of? TextNode and
|
28
|
+
childNodes.length > 0 and childNodes.last.kind_of? TextNode
|
29
|
+
childNodes.last.value += node.value
|
30
|
+
else
|
31
|
+
childNodes << node
|
32
|
+
end
|
33
|
+
node.parent = self
|
34
|
+
end
|
35
|
+
|
36
|
+
def removeChild node
|
37
|
+
childNodes.delete node
|
38
|
+
node.parent = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def cloneNode
|
42
|
+
newNode = self.class.new name
|
43
|
+
attributes.each {|name,value| newNode.attributes[name] = value}
|
44
|
+
newNode.value = value
|
45
|
+
newNode
|
46
|
+
end
|
47
|
+
|
48
|
+
def insertText data, before=nil
|
49
|
+
if before
|
50
|
+
insertBefore TextNode.new(data), before
|
51
|
+
else
|
52
|
+
appendChild TextNode.new(data)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def insertBefore node, refNode
|
57
|
+
index = childNodes.index(refNode)
|
58
|
+
if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
|
59
|
+
childNodes[index-1].value += node.value
|
60
|
+
else
|
61
|
+
childNodes.insert index, node
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def printTree indent=0
|
66
|
+
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
67
|
+
for child in childNodes
|
68
|
+
tree += child.printTree(indent + 2)
|
69
|
+
end
|
70
|
+
return tree
|
71
|
+
end
|
72
|
+
|
73
|
+
def hasContent
|
74
|
+
childNodes.length > 0
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class Element < Node
|
79
|
+
def to_s
|
80
|
+
"<#{name}>"
|
81
|
+
end
|
82
|
+
|
83
|
+
def printTree indent=0
|
84
|
+
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
85
|
+
indent += 2
|
86
|
+
for name, value in attributes
|
87
|
+
tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
|
88
|
+
end
|
89
|
+
for child in childNodes
|
90
|
+
tree += child.printTree(indent)
|
91
|
+
end
|
92
|
+
tree
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class Document < Node
|
97
|
+
def to_s
|
98
|
+
"#document"
|
99
|
+
end
|
100
|
+
|
101
|
+
def initialize
|
102
|
+
super nil
|
103
|
+
end
|
104
|
+
|
105
|
+
def printTree indent=0
|
106
|
+
tree = to_s
|
107
|
+
for child in childNodes
|
108
|
+
tree += child.printTree(indent + 2)
|
109
|
+
end
|
110
|
+
tree
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
class DocumentType < Node
|
115
|
+
attr_accessor :public_id, :system_id
|
116
|
+
|
117
|
+
def to_s
|
118
|
+
"<!DOCTYPE #{name}>"
|
119
|
+
end
|
120
|
+
|
121
|
+
def initialize name
|
122
|
+
super name
|
123
|
+
@public_id = nil
|
124
|
+
@system_id = nil
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
class DocumentFragment < Element
|
129
|
+
def initialize
|
130
|
+
super nil
|
131
|
+
end
|
132
|
+
|
133
|
+
def printTree indent=0
|
134
|
+
tree = ""
|
135
|
+
for child in childNodes
|
136
|
+
tree += child.printTree(indent+2)
|
137
|
+
end
|
138
|
+
return tree
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
class TextNode < Node
|
143
|
+
def initialize value
|
144
|
+
super nil
|
145
|
+
@value = value
|
146
|
+
end
|
147
|
+
|
148
|
+
def to_s
|
149
|
+
'"%s"' % value
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
class CommentNode < Node
|
154
|
+
def initialize value
|
155
|
+
super nil
|
156
|
+
@value = value
|
157
|
+
end
|
158
|
+
|
159
|
+
def to_s
|
160
|
+
"<!-- %s -->" % value
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
class TreeBuilder < Base::TreeBuilder
|
165
|
+
def initialize
|
166
|
+
@documentClass = Document
|
167
|
+
@doctypeClass = DocumentType
|
168
|
+
@elementClass = Element
|
169
|
+
@commentClass = CommentNode
|
170
|
+
@fragmentClass = DocumentFragment
|
171
|
+
end
|
172
|
+
|
173
|
+
def testSerializer node
|
174
|
+
node.printTree
|
175
|
+
end
|
176
|
+
|
177
|
+
def get_fragment
|
178
|
+
@document = super
|
179
|
+
@document
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'html5/treewalkers/base'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
module TreeWalkers
|
5
|
+
|
6
|
+
class << self
|
7
|
+
def [](name)
|
8
|
+
case name.to_s.downcase
|
9
|
+
when 'simpletree'
|
10
|
+
require 'html5/treewalkers/simpletree'
|
11
|
+
SimpleTree::TreeWalker
|
12
|
+
when 'rexml'
|
13
|
+
require 'html5/treewalkers/rexml'
|
14
|
+
REXML::TreeWalker
|
15
|
+
when 'hpricot'
|
16
|
+
require 'html5/treewalkers/hpricot'
|
17
|
+
Hpricot::TreeWalker
|
18
|
+
else
|
19
|
+
raise "Unknown TreeWalker #{name}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
alias :get_tree_walker :[]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require 'html5/constants'
|
2
|
+
module HTML5
|
3
|
+
module TreeWalkers
|
4
|
+
|
5
|
+
module TokenConstructor
|
6
|
+
def error(msg)
|
7
|
+
{:type => "SerializeError", :data => msg}
|
8
|
+
end
|
9
|
+
|
10
|
+
def normalize_attrs(attrs)
|
11
|
+
attrs.to_a
|
12
|
+
end
|
13
|
+
|
14
|
+
def empty_tag(name, attrs, has_children=false)
|
15
|
+
error(_("Void element has children")) if has_children
|
16
|
+
{:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
|
17
|
+
end
|
18
|
+
|
19
|
+
def start_tag(name, attrs)
|
20
|
+
{:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
|
21
|
+
end
|
22
|
+
|
23
|
+
def end_tag(name)
|
24
|
+
{:type => :EndTag, :name => name, :data => []}
|
25
|
+
end
|
26
|
+
|
27
|
+
def text(data)
|
28
|
+
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
|
29
|
+
yield({:type => :SpaceCharacters, :data => $1})
|
30
|
+
data = data[$1.length .. -1]
|
31
|
+
return if data.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
|
35
|
+
yield({:type => :Characters, :data => data[0 ... -$1.length]})
|
36
|
+
yield({:type => :SpaceCharacters, :data => $1})
|
37
|
+
else
|
38
|
+
yield({:type => :Characters, :data => data})
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def comment(data)
|
43
|
+
{:type => :Comment, :data => data}
|
44
|
+
end
|
45
|
+
|
46
|
+
def doctype(name, public_id, system_id, correct=nil)
|
47
|
+
{:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
|
48
|
+
end
|
49
|
+
|
50
|
+
def unknown(nodeType)
|
51
|
+
error(_("Unknown node type: ") + nodeType.to_s)
|
52
|
+
end
|
53
|
+
|
54
|
+
def _(str)
|
55
|
+
str
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class Base
|
60
|
+
include TokenConstructor
|
61
|
+
|
62
|
+
def initialize(tree)
|
63
|
+
@tree = tree
|
64
|
+
end
|
65
|
+
|
66
|
+
def each
|
67
|
+
raise NotImplementedError
|
68
|
+
end
|
69
|
+
|
70
|
+
alias walk each
|
71
|
+
|
72
|
+
def to_ary
|
73
|
+
a = []
|
74
|
+
each do |i|
|
75
|
+
a << i
|
76
|
+
end
|
77
|
+
a
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class NonRecursiveTreeWalker < TreeWalkers::Base
|
82
|
+
def node_details(node)
|
83
|
+
raise NotImplementedError
|
84
|
+
end
|
85
|
+
|
86
|
+
def first_child(node)
|
87
|
+
raise NotImplementedError
|
88
|
+
end
|
89
|
+
|
90
|
+
def next_sibling(node)
|
91
|
+
raise NotImplementedError
|
92
|
+
end
|
93
|
+
|
94
|
+
def parent(node)
|
95
|
+
raise NotImplementedError
|
96
|
+
end
|
97
|
+
|
98
|
+
def each
|
99
|
+
current_node = @tree
|
100
|
+
while current_node != nil
|
101
|
+
details = node_details(current_node)
|
102
|
+
has_children = false
|
103
|
+
|
104
|
+
case details.shift
|
105
|
+
when :DOCTYPE
|
106
|
+
yield doctype(*details)
|
107
|
+
|
108
|
+
when :TEXT
|
109
|
+
text(*details) {|token| yield token}
|
110
|
+
|
111
|
+
when :ELEMENT
|
112
|
+
name, attributes, has_children = details
|
113
|
+
if VOID_ELEMENTS.include?(name)
|
114
|
+
yield empty_tag(name, attributes.to_a, has_children)
|
115
|
+
has_children = false
|
116
|
+
else
|
117
|
+
yield start_tag(name, attributes.to_a)
|
118
|
+
end
|
119
|
+
|
120
|
+
when :COMMENT
|
121
|
+
yield comment(details[0])
|
122
|
+
|
123
|
+
when :DOCUMENT, :DOCUMENT_FRAGMENT
|
124
|
+
has_children = true
|
125
|
+
|
126
|
+
when nil
|
127
|
+
# ignore (REXML::XMLDecl is an example)
|
128
|
+
|
129
|
+
else
|
130
|
+
yield unknown(details[0])
|
131
|
+
end
|
132
|
+
|
133
|
+
first_child = has_children ? first_child(current_node) : nil
|
134
|
+
if first_child != nil
|
135
|
+
current_node = first_child
|
136
|
+
else
|
137
|
+
while current_node != nil
|
138
|
+
details = node_details(current_node)
|
139
|
+
if details.shift == :ELEMENT
|
140
|
+
name, attributes, has_children = details
|
141
|
+
yield end_tag(name) if !VOID_ELEMENTS.include?(name)
|
142
|
+
end
|
143
|
+
|
144
|
+
if @tree == current_node
|
145
|
+
current_node = nil
|
146
|
+
else
|
147
|
+
next_sibling = next_sibling(current_node)
|
148
|
+
if next_sibling != nil
|
149
|
+
current_node = next_sibling
|
150
|
+
break
|
151
|
+
end
|
152
|
+
|
153
|
+
current_node = parent(current_node)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|