feedtools 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/CHANGELOG +11 -0
  2. data/lib/feed_tools.rb +2496 -810
  3. data/lib/feed_tools/vendor/builder.rb +2 -0
  4. data/lib/feed_tools/vendor/builder/blankslate.rb +2 -0
  5. data/lib/feed_tools/vendor/builder/xmlbase.rb +2 -1
  6. data/lib/feed_tools/vendor/builder/xmlevents.rb +2 -0
  7. data/lib/feed_tools/vendor/builder/xmlmarkup.rb +4 -2
  8. data/lib/feed_tools/vendor/htree.rb +97 -0
  9. data/lib/feed_tools/vendor/htree/container.rb +10 -0
  10. data/lib/feed_tools/vendor/htree/context.rb +67 -0
  11. data/lib/feed_tools/vendor/htree/display.rb +27 -0
  12. data/lib/feed_tools/vendor/htree/doc.rb +149 -0
  13. data/lib/feed_tools/vendor/htree/elem.rb +262 -0
  14. data/lib/feed_tools/vendor/htree/encoder.rb +163 -0
  15. data/lib/feed_tools/vendor/htree/equality.rb +218 -0
  16. data/lib/feed_tools/vendor/htree/extract_text.rb +37 -0
  17. data/lib/feed_tools/vendor/htree/fstr.rb +33 -0
  18. data/lib/feed_tools/vendor/htree/gencode.rb +97 -0
  19. data/lib/feed_tools/vendor/htree/htmlinfo.rb +672 -0
  20. data/lib/feed_tools/vendor/htree/inspect.rb +108 -0
  21. data/lib/feed_tools/vendor/htree/leaf.rb +94 -0
  22. data/lib/feed_tools/vendor/htree/loc.rb +367 -0
  23. data/lib/feed_tools/vendor/htree/modules.rb +48 -0
  24. data/lib/feed_tools/vendor/htree/name.rb +124 -0
  25. data/lib/feed_tools/vendor/htree/output.rb +207 -0
  26. data/lib/feed_tools/vendor/htree/parse.rb +407 -0
  27. data/lib/feed_tools/vendor/htree/raw_string.rb +124 -0
  28. data/lib/feed_tools/vendor/htree/regexp-util.rb +15 -0
  29. data/lib/feed_tools/vendor/htree/rexml.rb +130 -0
  30. data/lib/feed_tools/vendor/htree/scan.rb +166 -0
  31. data/lib/feed_tools/vendor/htree/tag.rb +111 -0
  32. data/lib/feed_tools/vendor/htree/template.rb +909 -0
  33. data/lib/feed_tools/vendor/htree/text.rb +115 -0
  34. data/lib/feed_tools/vendor/htree/traverse.rb +465 -0
  35. data/rakefile +1 -1
  36. data/test/rss_test.rb +97 -0
  37. metadata +30 -1
@@ -0,0 +1,108 @@
1
+ # :stopdoc:
2
+ require 'pp'
3
+ require 'htree/doc'
4
+ require 'htree/elem'
5
+ require 'htree/leaf'
6
+ require 'htree/tag'
7
+ require 'htree/output'
8
+ require 'htree/raw_string'
9
+
10
+ module HTree
11
+ class Doc
12
+ def pretty_print(q)
13
+ q.object_group(self) { @children.each {|elt| q.breakable; q.pp elt } }
14
+ end
15
+ alias inspect pretty_print_inspect
16
+ end
17
+
18
+ class Elem
19
+ def pretty_print(q)
20
+ if @empty
21
+ q.group(1, '{emptyelem', '}') {
22
+ q.breakable; q.pp @stag
23
+ }
24
+ else
25
+ q.group(1, "{elem", "}") {
26
+ q.breakable; q.pp @stag
27
+ @children.each {|elt| q.breakable; q.pp elt }
28
+ if @etag
29
+ q.breakable; q.pp @etag
30
+ end
31
+ }
32
+ end
33
+ end
34
+ alias inspect pretty_print_inspect
35
+ end
36
+
37
+ module Leaf
38
+ def pretty_print(q)
39
+ q.group(1, '{', '}') {
40
+ q.text self.class.name.sub(/.*::/,'').downcase
41
+ if rs = @raw_string
42
+ rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line|
43
+ q.breakable
44
+ q.pp line
45
+ }
46
+ elsif self.respond_to? :display_xml
47
+ q.breakable
48
+ q.text self.display_xml('')
49
+ end
50
+ }
51
+ end
52
+ alias inspect pretty_print_inspect
53
+ end
54
+
55
+ class Name
56
+ def inspect
57
+ if xmlns?
58
+ @local_name ? "xmlns:#{@local_name}" : "xmlns"
59
+ elsif !@namespace_uri || @namespace_uri.empty?
60
+ @local_name
61
+ elsif @namespace_prefix
62
+ "#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
63
+ elsif @namespace_prefix == false
64
+ "-{#{@namespace_uri}}#{@local_name}"
65
+ else
66
+ "{#{@namespace_uri}}#{@local_name}"
67
+ end
68
+ end
69
+ end
70
+
71
+ class STag
72
+ def pretty_print(q)
73
+ q.group(1, '<', '>') {
74
+ q.text @name.inspect
75
+
76
+ @attributes.each {|n, t|
77
+ q.breakable
78
+ q.text "#{n.inspect}=\"#{t.to_attvalue_content}\""
79
+ }
80
+ }
81
+ end
82
+ alias inspect pretty_print_inspect
83
+ end
84
+
85
+ class ETag
86
+ def pretty_print(q)
87
+ q.group(1, '</', '>') {
88
+ q.text @qualified_name
89
+ }
90
+ end
91
+ alias inspect pretty_print_inspect
92
+ end
93
+
94
+ class BogusETag
95
+ def pretty_print(q)
96
+ q.group(1, '{', '}') {
97
+ q.text self.class.name.sub(/.*::/,'').downcase
98
+ if rs = @raw_string
99
+ q.breakable
100
+ q.text rs
101
+ else
102
+ q.text "</#{@qualified_name}>"
103
+ end
104
+ }
105
+ end
106
+ end
107
+ end
108
+ # :startdoc:
@@ -0,0 +1,94 @@
1
+ # :stopdoc:
2
+ require 'htree/modules'
3
+ require 'htree/raw_string'
4
+
5
+ module HTree # :nodoc:
6
+ class XMLDecl # :nodoc:
7
+ def initialize(version, encoding=nil, standalone=nil)
8
+ init_raw_string
9
+ if /\A[a-zA-Z0-9_.:-]+\z/ !~ version
10
+ raise HTree::Error, "invalid version in XML declaration: #{version.inspect}"
11
+ end
12
+ if encoding && /\A[A-Za-z][A-Za-z0-9._-]*\z/ !~ encoding
13
+ raise HTree::Error, "invalid encoding in XML declaration: #{encoding.inspect}"
14
+ end
15
+ unless standalone == nil || standalone == true || standalone == false
16
+ raise HTree::Error, "invalid standalone document declaration in XML declaration: #{standalone.inspect}"
17
+ end
18
+ @version = version
19
+ @encoding = encoding
20
+ @standalone = standalone
21
+ end
22
+ attr_reader :version, :encoding, :standalone
23
+ end
24
+
25
+ class DocType # :nodoc:
26
+ def initialize(root_element_name, public_identifier=nil, system_identifier=nil)
27
+ init_raw_string
28
+ if public_identifier && /\A[ \x0d\x0aa-zA-Z0-9\-'()+,.\/:=?;!*\#@$_%]*\z/ !~ public_identifier
29
+ raise HTree::Error, "invalid public identifier in document type declaration: #{public_identifier.inspect}"
30
+ end
31
+ if system_identifier && /"/ =~ system_identifier && /'/ =~ system_identifier
32
+ raise HTree::Error, "invalid system identifier in document type declaration: #{system_identifier.inspect}"
33
+ end
34
+
35
+ @root_element_name = root_element_name
36
+ @public_identifier = public_identifier
37
+ @system_identifier = system_identifier
38
+ end
39
+ attr_reader :root_element_name, :public_identifier, :system_identifier
40
+ end
41
+
42
+ class ProcIns # :nodoc:
43
+
44
+ class << self
45
+ alias new! new
46
+ end
47
+
48
+
49
+ def ProcIns.new(target, content)
50
+ content = content.gsub(/\?>/, '? >') if content
51
+ new! target, content
52
+ end
53
+
54
+ def initialize(target, content) # :notnew:
55
+ init_raw_string
56
+ if content && /\?>/ =~ content
57
+ raise HTree::Error, "invalid processing instruction content: #{content.inspect}"
58
+ end
59
+ @target = target
60
+ @content = content
61
+ end
62
+ attr_reader :target, :content
63
+ end
64
+
65
+ class Comment # :nodoc:
66
+
67
+ class << self
68
+ alias new! new
69
+ end
70
+
71
+
72
+ def Comment.new(content)
73
+ content = content.gsub(/-(-+)/) { '-' + ' -' * $1.length }.sub(/-\z/, '- ')
74
+ new! content
75
+ end
76
+
77
+ def initialize(content) # :notnew:
78
+ init_raw_string
79
+ if /--/ =~ content || /-\z/ =~ content
80
+ raise HTree::Error, "invalid comment content: #{content.inspect}"
81
+ end
82
+ @content = content
83
+ end
84
+ attr_reader :content
85
+ end
86
+
87
+ class BogusETag # :nodoc:
88
+ def initialize(qualified_name)
89
+ init_raw_string
90
+ @etag = ETag.new(qualified_name)
91
+ end
92
+ end
93
+ end
94
+ # :startdoc:
@@ -0,0 +1,367 @@
1
+ # :stopdoc:
2
+ require 'htree/modules'
3
+ require 'htree/elem'
4
+ require 'htree/inspect'
5
+
6
+ module HTree # :nodoc:
7
+ module Node
8
+ # creates a location object which points to self.
9
+ def make_loc
10
+ self.class::Loc.new(nil, nil, self)
11
+ end
12
+
13
+ # return self.
14
+ def to_node
15
+ self
16
+ end
17
+
18
+ # +subst+ substitutes several subtrees at once.
19
+ #
20
+ # t = HTree('<r><x/><y/><z/></r>')
21
+ # l = t.make_loc
22
+ # t2 = t.subst({
23
+ # l.get_subnode(0, 'k') => 'v',
24
+ # l.get_subnode(0, -1) => HTree('<a/>'),
25
+ # l.get_subnode(0, 1) => nil,
26
+ # l.get_subnode(0, 2, 0) => HTree('<b/>'),
27
+ # })
28
+ # pp t2
29
+ # # =>
30
+ # #<HTree::Doc
31
+ # {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
32
+ def subst(pairs)
33
+ pairs = pairs.map {|key, val|
34
+ key = key.index_list(self)
35
+ unless Array === val
36
+ val = [val]
37
+ end
38
+ [key, val]
39
+ }
40
+
41
+ pairs_empty_key, pairs_nonempty_key =
42
+ pairs.partition {|key, val| key.empty? }
43
+ if !pairs_empty_key.empty?
44
+ if !pairs_nonempty_key.empty?
45
+ raise ArgumentError, "cannot substitute a node under substituting tree."
46
+ end
47
+ result = []
48
+ pairs_empty_key.each {|key, val| result.concat val }
49
+ result.compact!
50
+ if result.length == 1
51
+ return result[0]
52
+ else
53
+ raise ArgumentError, "cannot substitute top node by multiple nodes: #{nodes.inspect}"
54
+ end
55
+ end
56
+ if pairs_nonempty_key.empty?
57
+ return self
58
+ end
59
+
60
+ subst_internal(pairs)
61
+ end
62
+
63
+ def subst_internal(pairs) # :nodoc:
64
+ subnode_pairs = {}
65
+ pairs.each {|key, val|
66
+ k = key.pop
67
+ (subnode_pairs[k] ||= []) << [key, val]
68
+ }
69
+ subnode_pairs = subnode_pairs.map {|k, subpairs|
70
+ s = get_subnode(k)
71
+ subpairs_empty_key, subpairs_nonempty_key =
72
+ subpairs.partition {|key, val| key.empty? }
73
+ if !subpairs_empty_key.empty?
74
+ if !subpairs_nonempty_key.empty?
75
+ raise ArgumentError, "cannot substitute a node under substituting tree."
76
+ end
77
+ r = []
78
+ subpairs_empty_key.each {|key, val| r.concat val }
79
+ [k, r.compact]
80
+ elsif subpairs_nonempty_key.empty?
81
+ [k, s]
82
+ else
83
+ [k, s.subst_internal(subpairs)]
84
+ end
85
+ }
86
+ subst_subnode(subnode_pairs)
87
+ end
88
+ end
89
+
90
+ class Doc; def node_test_string() 'doc()' end end
91
+ class Elem; def node_test_string() @stag.element_name.qualified_name end end
92
+ class Text; def node_test_string() 'text()' end end
93
+ class BogusETag; def node_test_string() 'bogus-etag()' end end
94
+ class XMLDecl; def node_test_string() 'xml-declaration()' end end
95
+ class DocType; def node_test_string() 'doctype()' end end
96
+ class ProcIns; def node_test_string() 'processing-instruction()' end end
97
+ class Comment; def node_test_string() 'comment()' end end
98
+
99
+ module Container
100
+ def find_loc_step(index)
101
+ if index < 0 || @children.length <= index
102
+ return "*[#{index}]"
103
+ end
104
+
105
+ return @loc_step_children[index].dup if defined? @loc_step_children
106
+
107
+ count = {}
108
+ count.default = 0
109
+
110
+ steps = []
111
+
112
+ @children.each {|c|
113
+ node_test = c.node_test_string
114
+ count[node_test] += 1
115
+ steps << [node_test, count[node_test]]
116
+ }
117
+
118
+ @loc_step_children = []
119
+ steps.each {|node_test, i|
120
+ if count[node_test] == 1
121
+ @loc_step_children << node_test
122
+ else
123
+ @loc_step_children << "#{node_test}[#{i}]"
124
+ end
125
+ }
126
+
127
+ return @loc_step_children[index].dup
128
+ end
129
+ end
130
+
131
+ class Elem
132
+ def find_loc_step(index)
133
+ return super if Integer === index
134
+ if String === index
135
+ index = Name.parse_attribute_name(index, DefaultContext)
136
+ end
137
+ unless Name === index
138
+ raise TypeError, "invalid index: #{index.inspect}"
139
+ end
140
+ "@#{index.qualified_name}"
141
+ end
142
+ end
143
+ end
144
+
145
+ class HTree::Location # :nodoc:
146
+ def initialize(parent, index, node) # :nodoc:
147
+ if parent
148
+ @parent = parent
149
+ @index = index
150
+ @node = parent.node.get_subnode(index)
151
+ if !@node.equal?(node)
152
+ raise ArgumentError, "unexpected node"
153
+ end
154
+ else
155
+ @parent = nil
156
+ @index = nil
157
+ @node = node
158
+ end
159
+ if @node && self.class != @node.class::Loc
160
+ raise ArgumentError, "invalid location class: #{self.class} should be #{node.class::Loc}"
161
+ end
162
+ @subloc = {}
163
+ end
164
+ attr_reader :parent, :index, :node
165
+ alias to_node node
166
+
167
+ # return self.
168
+ def make_loc
169
+ self
170
+ end
171
+
172
+ # +top+ returns the originator location.
173
+ #
174
+ # t = HTree('<a><b><c><d>')
175
+ # l = t.make_loc.get_subnode(0, 0, 0, 0)
176
+ # p l, l.top
177
+ # # =>
178
+ # #<HTree::Location: doc()/a/b/c/d>
179
+ # #<HTree::Location: doc()>
180
+ def top
181
+ result = self
182
+ while result.parent
183
+ result = result.parent
184
+ end
185
+ result
186
+ end
187
+
188
+ # +subst_itself+ substitutes the node pointed by the location.
189
+ # It returns the location of substituted node.
190
+ #
191
+ # t1 = HTree('<a><b><c><d>')
192
+ # p t1
193
+ # l1 = t1.make_loc.get_subnode(0, 0, 0, 0)
194
+ # p l1
195
+ # l2 = l1.subst_itself(HTree('<z/>'))
196
+ # p l2
197
+ # t2 = l2.top.to_node
198
+ # p t2
199
+ # # =>
200
+ # #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <d>}}}}>
201
+ # #<HTree::Location: doc()/a/b/c/d>
202
+ # #<HTree::Location: doc()/a/b/c/z>
203
+ # #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <z>}}}}>
204
+ #
205
+ def subst_itself(node)
206
+ if @parent
207
+ new_index = @index
208
+ if !@node
209
+ if Integer === @index
210
+ if @index < 0
211
+ new_index = 0
212
+ elsif @parent.to_node.children.length < @index
213
+ new_index = @parent.to_node.children.length
214
+ end
215
+ end
216
+ end
217
+ @parent.subst_itself(@parent.to_node.subst_subnode({@index=>node})).get_subnode(new_index)
218
+ else
219
+ node.make_loc
220
+ end
221
+ end
222
+
223
+ # +subst+ substitutes several subtrees at once.
224
+ #
225
+ # t = HTree('<r><x/><y/><z/></r>')
226
+ # l = t.make_loc
227
+ # l2 = l.subst({
228
+ # l.root.get_subnode('k') => 'v',
229
+ # l.root.get_subnode(-1) => HTree('<a/>'),
230
+ # l.find_element('y') => nil,
231
+ # l.find_element('z').get_subnode(0) => HTree('<b/>'),
232
+ # })
233
+ # pp l2, l2.to_node
234
+ # # =>
235
+ # #<HTree::Doc::Loc: doc()>
236
+ # #<HTree::Doc
237
+ # {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
238
+ def subst(pairs)
239
+ subst_itself(@node.subst(pairs))
240
+ end
241
+
242
+ # +loc_list+ returns an array containing from location's root to itself.
243
+ #
244
+ # t = HTree('<a><b><c>')
245
+ # l = t.make_loc.get_subnode(0, 0, 0)
246
+ # pp l, l.loc_list
247
+ # # =>
248
+ # #<HTree::Location: doc()/a/b/c>
249
+ # [#<HTree::Location: doc()>,
250
+ # #<HTree::Location: doc()/a>,
251
+ # #<HTree::Location: doc()/a/b>,
252
+ # #<HTree::Location: doc()/a/b/c>]
253
+ #
254
+ def loc_list
255
+ loc = self
256
+ result = [self]
257
+ while loc = loc.parent
258
+ result << loc
259
+ end
260
+ result.reverse!
261
+ result
262
+ end
263
+
264
+ # +path+ returns the path of the location.
265
+ #
266
+ # l = HTree.parse("<a><b>x</b><b/><a/>").make_loc
267
+ # l = l.get_subnode(0, 0, 0)
268
+ # p l.path # => "doc()/a/b[1]/text()"
269
+ def path
270
+ result = ''
271
+ loc_list.each {|loc|
272
+ if parent = loc.parent
273
+ result << '/' << parent.node.find_loc_step(loc.index)
274
+ else
275
+ result << loc.node.node_test_string
276
+ end
277
+ }
278
+ result
279
+ end
280
+
281
+ def index_list(node) # :nodoc:
282
+ result = []
283
+ loc = self
284
+ while parent = loc.parent
285
+ return result if loc.to_node.equal? node
286
+ result << loc.index
287
+ loc = parent
288
+ end
289
+ return result if loc.to_node.equal? node
290
+ raise ArgumentError, "the location is not under the node: #{self.path}"
291
+ end
292
+
293
+ def pretty_print(q)
294
+ q.group(1, "#<#{self.class.name}", '>') {
295
+ q.text ':'
296
+ q.breakable
297
+ loc_list.each {|loc|
298
+ if parent = loc.parent
299
+ q.text '/'
300
+ q.group { q.breakable '' }
301
+ q.text parent.node.find_loc_step(loc.index)
302
+ else
303
+ q.text loc.node.node_test_string
304
+ end
305
+ }
306
+ }
307
+ end
308
+ alias inspect pretty_print_inspect
309
+ end
310
+
311
+ module HTree::Container::Loc # :nodoc:
312
+ # +get_subnode+ returns a location object which points to a subnode
313
+ # indexed by _index_.
314
+ def get_subnode_internal(index) # :nodoc:
315
+ return @subloc[index] if @subloc.include? index
316
+ node = @node.get_subnode(index)
317
+ if node
318
+ @subloc[index] = node.class::Loc.new(self, index, node)
319
+ else
320
+ @subloc[index] = HTree::Location.new(self, index, node)
321
+ end
322
+ end
323
+
324
+ # +subst_subnode+ returns the location which refers the substituted tree.
325
+ # loc.subst_subnode(pairs) -> loc
326
+ #
327
+ # t = HTree('<a><b><c>')
328
+ # l = t.make_loc.get_subnode(0, 0)
329
+ # l = l.subst_subnode({0=>HTree('<z/>')})
330
+ # pp t, l.top.to_node
331
+ # # =>
332
+ # #<HTree::Doc {elem <a> {elem <b> {emptyelem <c>}}}>
333
+ # #<HTree::Doc {elem <a> {elem <b> {emptyelem <z>}}}>
334
+ #
335
+ def subst_subnode(pairs)
336
+ self.subst_itself(@node.subst_subnode(pairs))
337
+ end
338
+
339
+ # +children+ returns an array of child locations.
340
+ def children
341
+ (0...@node.children.length).map {|i| get_subnode(i) }
342
+ end
343
+ end
344
+
345
+ class HTree::Elem::Loc # :nodoc:
346
+ def context() @node.context end
347
+
348
+ # +element_name+ returns the name of the element name as a Name object.
349
+ def element_name() @node.element_name end
350
+
351
+ def empty_element?() @node.empty_element? end
352
+
353
+ # +each_attribute+ iterates over each attributes.
354
+ def each_attribute
355
+ @node.each_attribute {|attr_name, attr_text|
356
+ attr_loc = get_subnode(attr_name)
357
+ yield attr_name, attr_loc
358
+ }
359
+ end
360
+ end
361
+
362
+ class HTree::Text::Loc # :nodoc:
363
+ def to_s() @node.to_s end
364
+ def strip() @node.strip end
365
+ def empty?() @node.empty? end
366
+ end
367
+ # :startdoc: