feedtools 0.2.26 → 0.2.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,209 @@
1
+ require 'html5/treebuilders/base'
2
+ require 'rexml/document'
3
+ require 'forwardable'
4
+
5
+ module HTML5
6
+ module TreeBuilders
7
+ module REXML
8
+
9
+ class Node < Base::Node
10
+ extend Forwardable
11
+ def_delegators :@rxobj, :name, :attributes
12
+ attr_accessor :rxobj
13
+
14
+ def initialize name
15
+ super name
16
+ @rxobj = self.class.rxclass.new name
17
+ end
18
+
19
+ def appendChild node
20
+ if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
21
+ childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
22
+ childNodes.last.rxobj.raw = true
23
+ else
24
+ childNodes.push node
25
+ rxobj.add node.rxobj
26
+ end
27
+ node.parent = self
28
+ end
29
+
30
+ def removeChild node
31
+ childNodes.delete node
32
+ rxobj.delete node.rxobj
33
+ node.parent = nil
34
+ end
35
+
36
+ def insertText data, before=nil
37
+ if before
38
+ insertBefore TextNode.new(data), before
39
+ else
40
+ appendChild TextNode.new(data)
41
+ end
42
+ end
43
+
44
+ def insertBefore node, refNode
45
+ index = childNodes.index(refNode)
46
+ if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
47
+ childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
48
+ childNodes[index-1].rxobj.raw = true
49
+ else
50
+ childNodes.insert index, node
51
+ refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
52
+ end
53
+ end
54
+
55
+ def hasContent
56
+ (childNodes.length > 0)
57
+ end
58
+ end
59
+
60
+ class Element < Node
61
+ def self.rxclass
62
+ ::REXML::Element
63
+ end
64
+
65
+ def initialize name
66
+ super name
67
+ end
68
+
69
+ def cloneNode
70
+ newNode = self.class.new name
71
+ attributes.each {|name,value| newNode.attributes[name] = value}
72
+ newNode
73
+ end
74
+
75
+ def attributes= value
76
+ value.each {|name, value| rxobj.attributes[name] = value}
77
+ end
78
+
79
+ def printTree indent=0
80
+ tree = "\n|#{' ' * indent}<#{name}>"
81
+ indent += 2
82
+ for name, value in attributes
83
+ next if name == 'xmlns'
84
+ tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
85
+ end
86
+ for child in childNodes
87
+ tree += child.printTree(indent)
88
+ end
89
+ tree
90
+ end
91
+ end
92
+
93
+ class Document < Node
94
+ def self.rxclass
95
+ ::REXML::Document
96
+ end
97
+
98
+ def initialize
99
+ super nil
100
+ end
101
+
102
+ # ryansking: not sure why this was here. removing it doesn't cause any tests to fail
103
+ # def appendChild node
104
+ # if node.kind_of? Element and node.name == 'html'
105
+ # node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
106
+ # end
107
+ # super node
108
+ # end
109
+
110
+ def printTree indent=0
111
+ tree = "#document"
112
+ for child in childNodes
113
+ tree += child.printTree(indent + 2)
114
+ end
115
+ return tree
116
+ end
117
+ end
118
+
119
+ class DocumentType < Node
120
+ def_delegator :@rxobj, :public, :public_id
121
+
122
+ def_delegator :@rxobj, :system, :system_id
123
+
124
+ def self.rxclass
125
+ ::REXML::DocType
126
+ end
127
+
128
+ def initialize name, public_id, system_id
129
+ super(name)
130
+ if public_id
131
+ @rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
132
+ elsif system_id
133
+ @rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
134
+ else
135
+ @rxobj = ::REXML::DocType.new name
136
+ end
137
+ end
138
+
139
+ def printTree indent=0
140
+ "\n|#{' ' * indent}<!DOCTYPE #{name}>"
141
+ end
142
+ end
143
+
144
+ class DocumentFragment < Element
145
+ def initialize
146
+ super nil
147
+ end
148
+
149
+ def printTree indent=0
150
+ tree = ""
151
+ for child in childNodes
152
+ tree += child.printTree(indent+2)
153
+ end
154
+ return tree
155
+ end
156
+ end
157
+
158
+ class TextNode < Node
159
+ def initialize data
160
+ raw = data.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
161
+ @rxobj = ::REXML::Text.new(raw, true, nil, true)
162
+ end
163
+
164
+ def printTree indent=0
165
+ "\n|#{' ' * indent}\"#{rxobj.value}\""
166
+ end
167
+ end
168
+
169
+ class CommentNode < Node
170
+ def self.rxclass
171
+ ::REXML::Comment
172
+ end
173
+
174
+ def printTree indent=0
175
+ "\n|#{' ' * indent}<!-- #{rxobj.string} -->"
176
+ end
177
+ end
178
+
179
+ class TreeBuilder < Base::TreeBuilder
180
+ def initialize
181
+ @documentClass = Document
182
+ @doctypeClass = DocumentType
183
+ @elementClass = Element
184
+ @commentClass = CommentNode
185
+ @fragmentClass = DocumentFragment
186
+ end
187
+
188
+ def insertDoctype(name, public_id, system_id)
189
+ doctype = @doctypeClass.new(name, public_id, system_id)
190
+ @document.appendChild(doctype)
191
+ end
192
+
193
+ def testSerializer node
194
+ node.printTree
195
+ end
196
+
197
+ def get_document
198
+ @document.rxobj
199
+ end
200
+
201
+ def get_fragment
202
+ @document = super
203
+ return @document.rxobj.children
204
+ end
205
+ end
206
+
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,185 @@
1
+ require 'html5/treebuilders/base'
2
+
3
+ module HTML5
4
+ module TreeBuilders
5
+ module SimpleTree
6
+
7
+ class Node < Base::Node
8
+ # Node representing an item in the tree.
9
+ # name - The tag name associated with the node
10
+ attr_accessor :name
11
+
12
+ # The value of the current node (applies to text nodes and
13
+ # comments
14
+ attr_accessor :value
15
+
16
+ # a dict holding name, value pairs for attributes of the node
17
+ attr_accessor :attributes
18
+
19
+ def initialize name
20
+ super
21
+ @name = name
22
+ @value = nil
23
+ @attributes = {}
24
+ end
25
+
26
+ def appendChild node
27
+ if node.kind_of? TextNode and
28
+ childNodes.length > 0 and childNodes.last.kind_of? TextNode
29
+ childNodes.last.value += node.value
30
+ else
31
+ childNodes << node
32
+ end
33
+ node.parent = self
34
+ end
35
+
36
+ def removeChild node
37
+ childNodes.delete node
38
+ node.parent = nil
39
+ end
40
+
41
+ def cloneNode
42
+ newNode = self.class.new name
43
+ attributes.each {|name,value| newNode.attributes[name] = value}
44
+ newNode.value = value
45
+ newNode
46
+ end
47
+
48
+ def insertText data, before=nil
49
+ if before
50
+ insertBefore TextNode.new(data), before
51
+ else
52
+ appendChild TextNode.new(data)
53
+ end
54
+ end
55
+
56
+ def insertBefore node, refNode
57
+ index = childNodes.index(refNode)
58
+ if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
59
+ childNodes[index-1].value += node.value
60
+ else
61
+ childNodes.insert index, node
62
+ end
63
+ end
64
+
65
+ def printTree indent=0
66
+ tree = "\n|%s%s" % [' '* indent, self.to_s]
67
+ for child in childNodes
68
+ tree += child.printTree(indent + 2)
69
+ end
70
+ return tree
71
+ end
72
+
73
+ def hasContent
74
+ childNodes.length > 0
75
+ end
76
+ end
77
+
78
+ class Element < Node
79
+ def to_s
80
+ "<#{name}>"
81
+ end
82
+
83
+ def printTree indent=0
84
+ tree = "\n|%s%s" % [' '* indent, self.to_s]
85
+ indent += 2
86
+ for name, value in attributes
87
+ tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
88
+ end
89
+ for child in childNodes
90
+ tree += child.printTree(indent)
91
+ end
92
+ tree
93
+ end
94
+ end
95
+
96
+ class Document < Node
97
+ def to_s
98
+ "#document"
99
+ end
100
+
101
+ def initialize
102
+ super nil
103
+ end
104
+
105
+ def printTree indent=0
106
+ tree = to_s
107
+ for child in childNodes
108
+ tree += child.printTree(indent + 2)
109
+ end
110
+ tree
111
+ end
112
+ end
113
+
114
+ class DocumentType < Node
115
+ attr_accessor :public_id, :system_id
116
+
117
+ def to_s
118
+ "<!DOCTYPE #{name}>"
119
+ end
120
+
121
+ def initialize name
122
+ super name
123
+ @public_id = nil
124
+ @system_id = nil
125
+ end
126
+ end
127
+
128
+ class DocumentFragment < Element
129
+ def initialize
130
+ super nil
131
+ end
132
+
133
+ def printTree indent=0
134
+ tree = ""
135
+ for child in childNodes
136
+ tree += child.printTree(indent+2)
137
+ end
138
+ return tree
139
+ end
140
+ end
141
+
142
+ class TextNode < Node
143
+ def initialize value
144
+ super nil
145
+ @value = value
146
+ end
147
+
148
+ def to_s
149
+ '"%s"' % value
150
+ end
151
+ end
152
+
153
+ class CommentNode < Node
154
+ def initialize value
155
+ super nil
156
+ @value = value
157
+ end
158
+
159
+ def to_s
160
+ "<!-- %s -->" % value
161
+ end
162
+ end
163
+
164
+ class TreeBuilder < Base::TreeBuilder
165
+ def initialize
166
+ @documentClass = Document
167
+ @doctypeClass = DocumentType
168
+ @elementClass = Element
169
+ @commentClass = CommentNode
170
+ @fragmentClass = DocumentFragment
171
+ end
172
+
173
+ def testSerializer node
174
+ node.printTree
175
+ end
176
+
177
+ def get_fragment
178
+ @document = super
179
+ @document
180
+ end
181
+ end
182
+
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,26 @@
1
+ require 'html5/treewalkers/base'
2
+
3
+ module HTML5
4
+ module TreeWalkers
5
+
6
+ class << self
7
+ def [](name)
8
+ case name.to_s.downcase
9
+ when 'simpletree'
10
+ require 'html5/treewalkers/simpletree'
11
+ SimpleTree::TreeWalker
12
+ when 'rexml'
13
+ require 'html5/treewalkers/rexml'
14
+ REXML::TreeWalker
15
+ when 'hpricot'
16
+ require 'html5/treewalkers/hpricot'
17
+ Hpricot::TreeWalker
18
+ else
19
+ raise "Unknown TreeWalker #{name}"
20
+ end
21
+ end
22
+
23
+ alias :get_tree_walker :[]
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,162 @@
1
+ require 'html5/constants'
2
+ module HTML5
3
+ module TreeWalkers
4
+
5
+ module TokenConstructor
6
+ def error(msg)
7
+ {:type => "SerializeError", :data => msg}
8
+ end
9
+
10
+ def normalize_attrs(attrs)
11
+ attrs.to_a
12
+ end
13
+
14
+ def empty_tag(name, attrs, has_children=false)
15
+ error(_("Void element has children")) if has_children
16
+ {:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
17
+ end
18
+
19
+ def start_tag(name, attrs)
20
+ {:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
21
+ end
22
+
23
+ def end_tag(name)
24
+ {:type => :EndTag, :name => name, :data => []}
25
+ end
26
+
27
+ def text(data)
28
+ if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
29
+ yield({:type => :SpaceCharacters, :data => $1})
30
+ data = data[$1.length .. -1]
31
+ return if data.empty?
32
+ end
33
+
34
+ if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
35
+ yield({:type => :Characters, :data => data[0 ... -$1.length]})
36
+ yield({:type => :SpaceCharacters, :data => $1})
37
+ else
38
+ yield({:type => :Characters, :data => data})
39
+ end
40
+ end
41
+
42
+ def comment(data)
43
+ {:type => :Comment, :data => data}
44
+ end
45
+
46
+ def doctype(name, public_id, system_id, correct=nil)
47
+ {:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
48
+ end
49
+
50
+ def unknown(nodeType)
51
+ error(_("Unknown node type: ") + nodeType.to_s)
52
+ end
53
+
54
+ def _(str)
55
+ str
56
+ end
57
+ end
58
+
59
+ class Base
60
+ include TokenConstructor
61
+
62
+ def initialize(tree)
63
+ @tree = tree
64
+ end
65
+
66
+ def each
67
+ raise NotImplementedError
68
+ end
69
+
70
+ alias walk each
71
+
72
+ def to_ary
73
+ a = []
74
+ each do |i|
75
+ a << i
76
+ end
77
+ a
78
+ end
79
+ end
80
+
81
+ class NonRecursiveTreeWalker < TreeWalkers::Base
82
+ def node_details(node)
83
+ raise NotImplementedError
84
+ end
85
+
86
+ def first_child(node)
87
+ raise NotImplementedError
88
+ end
89
+
90
+ def next_sibling(node)
91
+ raise NotImplementedError
92
+ end
93
+
94
+ def parent(node)
95
+ raise NotImplementedError
96
+ end
97
+
98
+ def each
99
+ current_node = @tree
100
+ while current_node != nil
101
+ details = node_details(current_node)
102
+ has_children = false
103
+
104
+ case details.shift
105
+ when :DOCTYPE
106
+ yield doctype(*details)
107
+
108
+ when :TEXT
109
+ text(*details) {|token| yield token}
110
+
111
+ when :ELEMENT
112
+ name, attributes, has_children = details
113
+ if VOID_ELEMENTS.include?(name)
114
+ yield empty_tag(name, attributes.to_a, has_children)
115
+ has_children = false
116
+ else
117
+ yield start_tag(name, attributes.to_a)
118
+ end
119
+
120
+ when :COMMENT
121
+ yield comment(details[0])
122
+
123
+ when :DOCUMENT, :DOCUMENT_FRAGMENT
124
+ has_children = true
125
+
126
+ when nil
127
+ # ignore (REXML::XMLDecl is an example)
128
+
129
+ else
130
+ yield unknown(details[0])
131
+ end
132
+
133
+ first_child = has_children ? first_child(current_node) : nil
134
+ if first_child != nil
135
+ current_node = first_child
136
+ else
137
+ while current_node != nil
138
+ details = node_details(current_node)
139
+ if details.shift == :ELEMENT
140
+ name, attributes, has_children = details
141
+ yield end_tag(name) if !VOID_ELEMENTS.include?(name)
142
+ end
143
+
144
+ if @tree == current_node
145
+ current_node = nil
146
+ else
147
+ next_sibling = next_sibling(current_node)
148
+ if next_sibling != nil
149
+ current_node = next_sibling
150
+ break
151
+ end
152
+
153
+ current_node = parent(current_node)
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
159
+ end
160
+
161
+ end
162
+ end