feedtools 0.2.26 → 0.2.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,209 @@
1
+ require 'html5/treebuilders/base'
2
+ require 'rexml/document'
3
+ require 'forwardable'
4
+
5
+ module HTML5
6
+ module TreeBuilders
7
+ module REXML
8
+
9
+ class Node < Base::Node
10
+ extend Forwardable
11
+ def_delegators :@rxobj, :name, :attributes
12
+ attr_accessor :rxobj
13
+
14
+ def initialize name
15
+ super name
16
+ @rxobj = self.class.rxclass.new name
17
+ end
18
+
19
+ def appendChild node
20
+ if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
21
+ childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
22
+ childNodes.last.rxobj.raw = true
23
+ else
24
+ childNodes.push node
25
+ rxobj.add node.rxobj
26
+ end
27
+ node.parent = self
28
+ end
29
+
30
+ def removeChild node
31
+ childNodes.delete node
32
+ rxobj.delete node.rxobj
33
+ node.parent = nil
34
+ end
35
+
36
+ def insertText data, before=nil
37
+ if before
38
+ insertBefore TextNode.new(data), before
39
+ else
40
+ appendChild TextNode.new(data)
41
+ end
42
+ end
43
+
44
+ def insertBefore node, refNode
45
+ index = childNodes.index(refNode)
46
+ if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
47
+ childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
48
+ childNodes[index-1].rxobj.raw = true
49
+ else
50
+ childNodes.insert index, node
51
+ refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
52
+ end
53
+ end
54
+
55
+ def hasContent
56
+ (childNodes.length > 0)
57
+ end
58
+ end
59
+
60
+ class Element < Node
61
+ def self.rxclass
62
+ ::REXML::Element
63
+ end
64
+
65
+ def initialize name
66
+ super name
67
+ end
68
+
69
+ def cloneNode
70
+ newNode = self.class.new name
71
+ attributes.each {|name,value| newNode.attributes[name] = value}
72
+ newNode
73
+ end
74
+
75
+ def attributes= value
76
+ value.each {|name, value| rxobj.attributes[name] = value}
77
+ end
78
+
79
+ def printTree indent=0
80
+ tree = "\n|#{' ' * indent}<#{name}>"
81
+ indent += 2
82
+ for name, value in attributes
83
+ next if name == 'xmlns'
84
+ tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
85
+ end
86
+ for child in childNodes
87
+ tree += child.printTree(indent)
88
+ end
89
+ tree
90
+ end
91
+ end
92
+
93
+ class Document < Node
94
+ def self.rxclass
95
+ ::REXML::Document
96
+ end
97
+
98
+ def initialize
99
+ super nil
100
+ end
101
+
102
+ # ryansking: not sure why this was here. removing it doesn't cause any tests to fail
103
+ # def appendChild node
104
+ # if node.kind_of? Element and node.name == 'html'
105
+ # node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
106
+ # end
107
+ # super node
108
+ # end
109
+
110
+ def printTree indent=0
111
+ tree = "#document"
112
+ for child in childNodes
113
+ tree += child.printTree(indent + 2)
114
+ end
115
+ return tree
116
+ end
117
+ end
118
+
119
+ class DocumentType < Node
120
+ def_delegator :@rxobj, :public, :public_id
121
+
122
+ def_delegator :@rxobj, :system, :system_id
123
+
124
+ def self.rxclass
125
+ ::REXML::DocType
126
+ end
127
+
128
+ def initialize name, public_id, system_id
129
+ super(name)
130
+ if public_id
131
+ @rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
132
+ elsif system_id
133
+ @rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
134
+ else
135
+ @rxobj = ::REXML::DocType.new name
136
+ end
137
+ end
138
+
139
+ def printTree indent=0
140
+ "\n|#{' ' * indent}<!DOCTYPE #{name}>"
141
+ end
142
+ end
143
+
144
+ class DocumentFragment < Element
145
+ def initialize
146
+ super nil
147
+ end
148
+
149
+ def printTree indent=0
150
+ tree = ""
151
+ for child in childNodes
152
+ tree += child.printTree(indent+2)
153
+ end
154
+ return tree
155
+ end
156
+ end
157
+
158
+ class TextNode < Node
159
+ def initialize data
160
+ raw = data.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
161
+ @rxobj = ::REXML::Text.new(raw, true, nil, true)
162
+ end
163
+
164
+ def printTree indent=0
165
+ "\n|#{' ' * indent}\"#{rxobj.value}\""
166
+ end
167
+ end
168
+
169
+ class CommentNode < Node
170
+ def self.rxclass
171
+ ::REXML::Comment
172
+ end
173
+
174
+ def printTree indent=0
175
+ "\n|#{' ' * indent}<!-- #{rxobj.string} -->"
176
+ end
177
+ end
178
+
179
+ class TreeBuilder < Base::TreeBuilder
180
+ def initialize
181
+ @documentClass = Document
182
+ @doctypeClass = DocumentType
183
+ @elementClass = Element
184
+ @commentClass = CommentNode
185
+ @fragmentClass = DocumentFragment
186
+ end
187
+
188
+ def insertDoctype(name, public_id, system_id)
189
+ doctype = @doctypeClass.new(name, public_id, system_id)
190
+ @document.appendChild(doctype)
191
+ end
192
+
193
+ def testSerializer node
194
+ node.printTree
195
+ end
196
+
197
+ def get_document
198
+ @document.rxobj
199
+ end
200
+
201
+ def get_fragment
202
+ @document = super
203
+ return @document.rxobj.children
204
+ end
205
+ end
206
+
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,185 @@
1
+ require 'html5/treebuilders/base'
2
+
3
+ module HTML5
4
+ module TreeBuilders
5
+ module SimpleTree
6
+
7
+ class Node < Base::Node
8
+ # Node representing an item in the tree.
9
+ # name - The tag name associated with the node
10
+ attr_accessor :name
11
+
12
+ # The value of the current node (applies to text nodes and
13
+ # comments
14
+ attr_accessor :value
15
+
16
+ # a dict holding name, value pairs for attributes of the node
17
+ attr_accessor :attributes
18
+
19
+ def initialize name
20
+ super
21
+ @name = name
22
+ @value = nil
23
+ @attributes = {}
24
+ end
25
+
26
+ def appendChild node
27
+ if node.kind_of? TextNode and
28
+ childNodes.length > 0 and childNodes.last.kind_of? TextNode
29
+ childNodes.last.value += node.value
30
+ else
31
+ childNodes << node
32
+ end
33
+ node.parent = self
34
+ end
35
+
36
+ def removeChild node
37
+ childNodes.delete node
38
+ node.parent = nil
39
+ end
40
+
41
+ def cloneNode
42
+ newNode = self.class.new name
43
+ attributes.each {|name,value| newNode.attributes[name] = value}
44
+ newNode.value = value
45
+ newNode
46
+ end
47
+
48
+ def insertText data, before=nil
49
+ if before
50
+ insertBefore TextNode.new(data), before
51
+ else
52
+ appendChild TextNode.new(data)
53
+ end
54
+ end
55
+
56
+ def insertBefore node, refNode
57
+ index = childNodes.index(refNode)
58
+ if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
59
+ childNodes[index-1].value += node.value
60
+ else
61
+ childNodes.insert index, node
62
+ end
63
+ end
64
+
65
+ def printTree indent=0
66
+ tree = "\n|%s%s" % [' '* indent, self.to_s]
67
+ for child in childNodes
68
+ tree += child.printTree(indent + 2)
69
+ end
70
+ return tree
71
+ end
72
+
73
+ def hasContent
74
+ childNodes.length > 0
75
+ end
76
+ end
77
+
78
+ class Element < Node
79
+ def to_s
80
+ "<#{name}>"
81
+ end
82
+
83
+ def printTree indent=0
84
+ tree = "\n|%s%s" % [' '* indent, self.to_s]
85
+ indent += 2
86
+ for name, value in attributes
87
+ tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
88
+ end
89
+ for child in childNodes
90
+ tree += child.printTree(indent)
91
+ end
92
+ tree
93
+ end
94
+ end
95
+
96
+ class Document < Node
97
+ def to_s
98
+ "#document"
99
+ end
100
+
101
+ def initialize
102
+ super nil
103
+ end
104
+
105
+ def printTree indent=0
106
+ tree = to_s
107
+ for child in childNodes
108
+ tree += child.printTree(indent + 2)
109
+ end
110
+ tree
111
+ end
112
+ end
113
+
114
+ class DocumentType < Node
115
+ attr_accessor :public_id, :system_id
116
+
117
+ def to_s
118
+ "<!DOCTYPE #{name}>"
119
+ end
120
+
121
+ def initialize name
122
+ super name
123
+ @public_id = nil
124
+ @system_id = nil
125
+ end
126
+ end
127
+
128
+ class DocumentFragment < Element
129
+ def initialize
130
+ super nil
131
+ end
132
+
133
+ def printTree indent=0
134
+ tree = ""
135
+ for child in childNodes
136
+ tree += child.printTree(indent+2)
137
+ end
138
+ return tree
139
+ end
140
+ end
141
+
142
+ class TextNode < Node
143
+ def initialize value
144
+ super nil
145
+ @value = value
146
+ end
147
+
148
+ def to_s
149
+ '"%s"' % value
150
+ end
151
+ end
152
+
153
+ class CommentNode < Node
154
+ def initialize value
155
+ super nil
156
+ @value = value
157
+ end
158
+
159
+ def to_s
160
+ "<!-- %s -->" % value
161
+ end
162
+ end
163
+
164
+ class TreeBuilder < Base::TreeBuilder
165
+ def initialize
166
+ @documentClass = Document
167
+ @doctypeClass = DocumentType
168
+ @elementClass = Element
169
+ @commentClass = CommentNode
170
+ @fragmentClass = DocumentFragment
171
+ end
172
+
173
+ def testSerializer node
174
+ node.printTree
175
+ end
176
+
177
+ def get_fragment
178
+ @document = super
179
+ @document
180
+ end
181
+ end
182
+
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,26 @@
1
+ require 'html5/treewalkers/base'
2
+
3
+ module HTML5
4
+ module TreeWalkers
5
+
6
+ class << self
7
+ def [](name)
8
+ case name.to_s.downcase
9
+ when 'simpletree'
10
+ require 'html5/treewalkers/simpletree'
11
+ SimpleTree::TreeWalker
12
+ when 'rexml'
13
+ require 'html5/treewalkers/rexml'
14
+ REXML::TreeWalker
15
+ when 'hpricot'
16
+ require 'html5/treewalkers/hpricot'
17
+ Hpricot::TreeWalker
18
+ else
19
+ raise "Unknown TreeWalker #{name}"
20
+ end
21
+ end
22
+
23
+ alias :get_tree_walker :[]
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,162 @@
1
+ require 'html5/constants'
2
+ module HTML5
3
+ module TreeWalkers
4
+
5
+ module TokenConstructor
6
+ def error(msg)
7
+ {:type => "SerializeError", :data => msg}
8
+ end
9
+
10
+ def normalize_attrs(attrs)
11
+ attrs.to_a
12
+ end
13
+
14
+ def empty_tag(name, attrs, has_children=false)
15
+ error(_("Void element has children")) if has_children
16
+ {:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
17
+ end
18
+
19
+ def start_tag(name, attrs)
20
+ {:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
21
+ end
22
+
23
+ def end_tag(name)
24
+ {:type => :EndTag, :name => name, :data => []}
25
+ end
26
+
27
+ def text(data)
28
+ if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
29
+ yield({:type => :SpaceCharacters, :data => $1})
30
+ data = data[$1.length .. -1]
31
+ return if data.empty?
32
+ end
33
+
34
+ if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
35
+ yield({:type => :Characters, :data => data[0 ... -$1.length]})
36
+ yield({:type => :SpaceCharacters, :data => $1})
37
+ else
38
+ yield({:type => :Characters, :data => data})
39
+ end
40
+ end
41
+
42
+ def comment(data)
43
+ {:type => :Comment, :data => data}
44
+ end
45
+
46
+ def doctype(name, public_id, system_id, correct=nil)
47
+ {:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
48
+ end
49
+
50
+ def unknown(nodeType)
51
+ error(_("Unknown node type: ") + nodeType.to_s)
52
+ end
53
+
54
+ def _(str)
55
+ str
56
+ end
57
+ end
58
+
59
+ class Base
60
+ include TokenConstructor
61
+
62
+ def initialize(tree)
63
+ @tree = tree
64
+ end
65
+
66
+ def each
67
+ raise NotImplementedError
68
+ end
69
+
70
+ alias walk each
71
+
72
+ def to_ary
73
+ a = []
74
+ each do |i|
75
+ a << i
76
+ end
77
+ a
78
+ end
79
+ end
80
+
81
+ class NonRecursiveTreeWalker < TreeWalkers::Base
82
+ def node_details(node)
83
+ raise NotImplementedError
84
+ end
85
+
86
+ def first_child(node)
87
+ raise NotImplementedError
88
+ end
89
+
90
+ def next_sibling(node)
91
+ raise NotImplementedError
92
+ end
93
+
94
+ def parent(node)
95
+ raise NotImplementedError
96
+ end
97
+
98
+ def each
99
+ current_node = @tree
100
+ while current_node != nil
101
+ details = node_details(current_node)
102
+ has_children = false
103
+
104
+ case details.shift
105
+ when :DOCTYPE
106
+ yield doctype(*details)
107
+
108
+ when :TEXT
109
+ text(*details) {|token| yield token}
110
+
111
+ when :ELEMENT
112
+ name, attributes, has_children = details
113
+ if VOID_ELEMENTS.include?(name)
114
+ yield empty_tag(name, attributes.to_a, has_children)
115
+ has_children = false
116
+ else
117
+ yield start_tag(name, attributes.to_a)
118
+ end
119
+
120
+ when :COMMENT
121
+ yield comment(details[0])
122
+
123
+ when :DOCUMENT, :DOCUMENT_FRAGMENT
124
+ has_children = true
125
+
126
+ when nil
127
+ # ignore (REXML::XMLDecl is an example)
128
+
129
+ else
130
+ yield unknown(details[0])
131
+ end
132
+
133
+ first_child = has_children ? first_child(current_node) : nil
134
+ if first_child != nil
135
+ current_node = first_child
136
+ else
137
+ while current_node != nil
138
+ details = node_details(current_node)
139
+ if details.shift == :ELEMENT
140
+ name, attributes, has_children = details
141
+ yield end_tag(name) if !VOID_ELEMENTS.include?(name)
142
+ end
143
+
144
+ if @tree == current_node
145
+ current_node = nil
146
+ else
147
+ next_sibling = next_sibling(current_node)
148
+ if next_sibling != nil
149
+ current_node = next_sibling
150
+ break
151
+ end
152
+
153
+ current_node = parent(current_node)
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
159
+ end
160
+
161
+ end
162
+ end