feedtools 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/CHANGELOG +11 -0
  2. data/lib/feed_tools.rb +2496 -810
  3. data/lib/feed_tools/vendor/builder.rb +2 -0
  4. data/lib/feed_tools/vendor/builder/blankslate.rb +2 -0
  5. data/lib/feed_tools/vendor/builder/xmlbase.rb +2 -1
  6. data/lib/feed_tools/vendor/builder/xmlevents.rb +2 -0
  7. data/lib/feed_tools/vendor/builder/xmlmarkup.rb +4 -2
  8. data/lib/feed_tools/vendor/htree.rb +97 -0
  9. data/lib/feed_tools/vendor/htree/container.rb +10 -0
  10. data/lib/feed_tools/vendor/htree/context.rb +67 -0
  11. data/lib/feed_tools/vendor/htree/display.rb +27 -0
  12. data/lib/feed_tools/vendor/htree/doc.rb +149 -0
  13. data/lib/feed_tools/vendor/htree/elem.rb +262 -0
  14. data/lib/feed_tools/vendor/htree/encoder.rb +163 -0
  15. data/lib/feed_tools/vendor/htree/equality.rb +218 -0
  16. data/lib/feed_tools/vendor/htree/extract_text.rb +37 -0
  17. data/lib/feed_tools/vendor/htree/fstr.rb +33 -0
  18. data/lib/feed_tools/vendor/htree/gencode.rb +97 -0
  19. data/lib/feed_tools/vendor/htree/htmlinfo.rb +672 -0
  20. data/lib/feed_tools/vendor/htree/inspect.rb +108 -0
  21. data/lib/feed_tools/vendor/htree/leaf.rb +94 -0
  22. data/lib/feed_tools/vendor/htree/loc.rb +367 -0
  23. data/lib/feed_tools/vendor/htree/modules.rb +48 -0
  24. data/lib/feed_tools/vendor/htree/name.rb +124 -0
  25. data/lib/feed_tools/vendor/htree/output.rb +207 -0
  26. data/lib/feed_tools/vendor/htree/parse.rb +407 -0
  27. data/lib/feed_tools/vendor/htree/raw_string.rb +124 -0
  28. data/lib/feed_tools/vendor/htree/regexp-util.rb +15 -0
  29. data/lib/feed_tools/vendor/htree/rexml.rb +130 -0
  30. data/lib/feed_tools/vendor/htree/scan.rb +166 -0
  31. data/lib/feed_tools/vendor/htree/tag.rb +111 -0
  32. data/lib/feed_tools/vendor/htree/template.rb +909 -0
  33. data/lib/feed_tools/vendor/htree/text.rb +115 -0
  34. data/lib/feed_tools/vendor/htree/traverse.rb +465 -0
  35. data/rakefile +1 -1
  36. data/test/rss_test.rb +97 -0
  37. metadata +30 -1
@@ -0,0 +1,108 @@
1
+ # :stopdoc:
2
+ require 'pp'
3
+ require 'htree/doc'
4
+ require 'htree/elem'
5
+ require 'htree/leaf'
6
+ require 'htree/tag'
7
+ require 'htree/output'
8
+ require 'htree/raw_string'
9
+
10
+ module HTree
11
+ class Doc
12
+ def pretty_print(q)
13
+ q.object_group(self) { @children.each {|elt| q.breakable; q.pp elt } }
14
+ end
15
+ alias inspect pretty_print_inspect
16
+ end
17
+
18
+ class Elem
19
+ def pretty_print(q)
20
+ if @empty
21
+ q.group(1, '{emptyelem', '}') {
22
+ q.breakable; q.pp @stag
23
+ }
24
+ else
25
+ q.group(1, "{elem", "}") {
26
+ q.breakable; q.pp @stag
27
+ @children.each {|elt| q.breakable; q.pp elt }
28
+ if @etag
29
+ q.breakable; q.pp @etag
30
+ end
31
+ }
32
+ end
33
+ end
34
+ alias inspect pretty_print_inspect
35
+ end
36
+
37
+ module Leaf
38
+ def pretty_print(q)
39
+ q.group(1, '{', '}') {
40
+ q.text self.class.name.sub(/.*::/,'').downcase
41
+ if rs = @raw_string
42
+ rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line|
43
+ q.breakable
44
+ q.pp line
45
+ }
46
+ elsif self.respond_to? :display_xml
47
+ q.breakable
48
+ q.text self.display_xml('')
49
+ end
50
+ }
51
+ end
52
+ alias inspect pretty_print_inspect
53
+ end
54
+
55
+ class Name
56
+ def inspect
57
+ if xmlns?
58
+ @local_name ? "xmlns:#{@local_name}" : "xmlns"
59
+ elsif !@namespace_uri || @namespace_uri.empty?
60
+ @local_name
61
+ elsif @namespace_prefix
62
+ "#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
63
+ elsif @namespace_prefix == false
64
+ "-{#{@namespace_uri}}#{@local_name}"
65
+ else
66
+ "{#{@namespace_uri}}#{@local_name}"
67
+ end
68
+ end
69
+ end
70
+
71
+ class STag
72
+ def pretty_print(q)
73
+ q.group(1, '<', '>') {
74
+ q.text @name.inspect
75
+
76
+ @attributes.each {|n, t|
77
+ q.breakable
78
+ q.text "#{n.inspect}=\"#{t.to_attvalue_content}\""
79
+ }
80
+ }
81
+ end
82
+ alias inspect pretty_print_inspect
83
+ end
84
+
85
+ class ETag
86
+ def pretty_print(q)
87
+ q.group(1, '</', '>') {
88
+ q.text @qualified_name
89
+ }
90
+ end
91
+ alias inspect pretty_print_inspect
92
+ end
93
+
94
+ class BogusETag
95
+ def pretty_print(q)
96
+ q.group(1, '{', '}') {
97
+ q.text self.class.name.sub(/.*::/,'').downcase
98
+ if rs = @raw_string
99
+ q.breakable
100
+ q.text rs
101
+ else
102
+ q.text "</#{@qualified_name}>"
103
+ end
104
+ }
105
+ end
106
+ end
107
+ end
108
+ # :startdoc:
@@ -0,0 +1,94 @@
1
+ # :stopdoc:
2
+ require 'htree/modules'
3
+ require 'htree/raw_string'
4
+
5
+ module HTree # :nodoc:
6
+ class XMLDecl # :nodoc:
7
+ def initialize(version, encoding=nil, standalone=nil)
8
+ init_raw_string
9
+ if /\A[a-zA-Z0-9_.:-]+\z/ !~ version
10
+ raise HTree::Error, "invalid version in XML declaration: #{version.inspect}"
11
+ end
12
+ if encoding && /\A[A-Za-z][A-Za-z0-9._-]*\z/ !~ encoding
13
+ raise HTree::Error, "invalid encoding in XML declaration: #{encoding.inspect}"
14
+ end
15
+ unless standalone == nil || standalone == true || standalone == false
16
+ raise HTree::Error, "invalid standalone document declaration in XML declaration: #{standalone.inspect}"
17
+ end
18
+ @version = version
19
+ @encoding = encoding
20
+ @standalone = standalone
21
+ end
22
+ attr_reader :version, :encoding, :standalone
23
+ end
24
+
25
+ class DocType # :nodoc:
26
+ def initialize(root_element_name, public_identifier=nil, system_identifier=nil)
27
+ init_raw_string
28
+ if public_identifier && /\A[ \x0d\x0aa-zA-Z0-9\-'()+,.\/:=?;!*\#@$_%]*\z/ !~ public_identifier
29
+ raise HTree::Error, "invalid public identifier in document type declaration: #{public_identifier.inspect}"
30
+ end
31
+ if system_identifier && /"/ =~ system_identifier && /'/ =~ system_identifier
32
+ raise HTree::Error, "invalid system identifier in document type declaration: #{system_identifier.inspect}"
33
+ end
34
+
35
+ @root_element_name = root_element_name
36
+ @public_identifier = public_identifier
37
+ @system_identifier = system_identifier
38
+ end
39
+ attr_reader :root_element_name, :public_identifier, :system_identifier
40
+ end
41
+
42
+ class ProcIns # :nodoc:
43
+
44
+ class << self
45
+ alias new! new
46
+ end
47
+
48
+
49
+ def ProcIns.new(target, content)
50
+ content = content.gsub(/\?>/, '? >') if content
51
+ new! target, content
52
+ end
53
+
54
+ def initialize(target, content) # :notnew:
55
+ init_raw_string
56
+ if content && /\?>/ =~ content
57
+ raise HTree::Error, "invalid processing instruction content: #{content.inspect}"
58
+ end
59
+ @target = target
60
+ @content = content
61
+ end
62
+ attr_reader :target, :content
63
+ end
64
+
65
+ class Comment # :nodoc:
66
+
67
+ class << self
68
+ alias new! new
69
+ end
70
+
71
+
72
+ def Comment.new(content)
73
+ content = content.gsub(/-(-+)/) { '-' + ' -' * $1.length }.sub(/-\z/, '- ')
74
+ new! content
75
+ end
76
+
77
+ def initialize(content) # :notnew:
78
+ init_raw_string
79
+ if /--/ =~ content || /-\z/ =~ content
80
+ raise HTree::Error, "invalid comment content: #{content.inspect}"
81
+ end
82
+ @content = content
83
+ end
84
+ attr_reader :content
85
+ end
86
+
87
+ class BogusETag # :nodoc:
88
+ def initialize(qualified_name)
89
+ init_raw_string
90
+ @etag = ETag.new(qualified_name)
91
+ end
92
+ end
93
+ end
94
+ # :startdoc:
@@ -0,0 +1,367 @@
1
+ # :stopdoc:
2
+ require 'htree/modules'
3
+ require 'htree/elem'
4
+ require 'htree/inspect'
5
+
6
+ module HTree # :nodoc:
7
+ module Node
8
+ # creates a location object which points to self.
9
+ def make_loc
10
+ self.class::Loc.new(nil, nil, self)
11
+ end
12
+
13
+ # return self.
14
+ def to_node
15
+ self
16
+ end
17
+
18
+ # +subst+ substitutes several subtrees at once.
19
+ #
20
+ # t = HTree('<r><x/><y/><z/></r>')
21
+ # l = t.make_loc
22
+ # t2 = t.subst({
23
+ # l.get_subnode(0, 'k') => 'v',
24
+ # l.get_subnode(0, -1) => HTree('<a/>'),
25
+ # l.get_subnode(0, 1) => nil,
26
+ # l.get_subnode(0, 2, 0) => HTree('<b/>'),
27
+ # })
28
+ # pp t2
29
+ # # =>
30
+ # #<HTree::Doc
31
+ # {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
32
+ def subst(pairs)
33
+ pairs = pairs.map {|key, val|
34
+ key = key.index_list(self)
35
+ unless Array === val
36
+ val = [val]
37
+ end
38
+ [key, val]
39
+ }
40
+
41
+ pairs_empty_key, pairs_nonempty_key =
42
+ pairs.partition {|key, val| key.empty? }
43
+ if !pairs_empty_key.empty?
44
+ if !pairs_nonempty_key.empty?
45
+ raise ArgumentError, "cannot substitute a node under substituting tree."
46
+ end
47
+ result = []
48
+ pairs_empty_key.each {|key, val| result.concat val }
49
+ result.compact!
50
+ if result.length == 1
51
+ return result[0]
52
+ else
53
+ raise ArgumentError, "cannot substitute top node by multiple nodes: #{nodes.inspect}"
54
+ end
55
+ end
56
+ if pairs_nonempty_key.empty?
57
+ return self
58
+ end
59
+
60
+ subst_internal(pairs)
61
+ end
62
+
63
+ def subst_internal(pairs) # :nodoc:
64
+ subnode_pairs = {}
65
+ pairs.each {|key, val|
66
+ k = key.pop
67
+ (subnode_pairs[k] ||= []) << [key, val]
68
+ }
69
+ subnode_pairs = subnode_pairs.map {|k, subpairs|
70
+ s = get_subnode(k)
71
+ subpairs_empty_key, subpairs_nonempty_key =
72
+ subpairs.partition {|key, val| key.empty? }
73
+ if !subpairs_empty_key.empty?
74
+ if !subpairs_nonempty_key.empty?
75
+ raise ArgumentError, "cannot substitute a node under substituting tree."
76
+ end
77
+ r = []
78
+ subpairs_empty_key.each {|key, val| r.concat val }
79
+ [k, r.compact]
80
+ elsif subpairs_nonempty_key.empty?
81
+ [k, s]
82
+ else
83
+ [k, s.subst_internal(subpairs)]
84
+ end
85
+ }
86
+ subst_subnode(subnode_pairs)
87
+ end
88
+ end
89
+
90
+ class Doc; def node_test_string() 'doc()' end end
91
+ class Elem; def node_test_string() @stag.element_name.qualified_name end end
92
+ class Text; def node_test_string() 'text()' end end
93
+ class BogusETag; def node_test_string() 'bogus-etag()' end end
94
+ class XMLDecl; def node_test_string() 'xml-declaration()' end end
95
+ class DocType; def node_test_string() 'doctype()' end end
96
+ class ProcIns; def node_test_string() 'processing-instruction()' end end
97
+ class Comment; def node_test_string() 'comment()' end end
98
+
99
+ module Container
100
+ def find_loc_step(index)
101
+ if index < 0 || @children.length <= index
102
+ return "*[#{index}]"
103
+ end
104
+
105
+ return @loc_step_children[index].dup if defined? @loc_step_children
106
+
107
+ count = {}
108
+ count.default = 0
109
+
110
+ steps = []
111
+
112
+ @children.each {|c|
113
+ node_test = c.node_test_string
114
+ count[node_test] += 1
115
+ steps << [node_test, count[node_test]]
116
+ }
117
+
118
+ @loc_step_children = []
119
+ steps.each {|node_test, i|
120
+ if count[node_test] == 1
121
+ @loc_step_children << node_test
122
+ else
123
+ @loc_step_children << "#{node_test}[#{i}]"
124
+ end
125
+ }
126
+
127
+ return @loc_step_children[index].dup
128
+ end
129
+ end
130
+
131
+ class Elem
132
+ def find_loc_step(index)
133
+ return super if Integer === index
134
+ if String === index
135
+ index = Name.parse_attribute_name(index, DefaultContext)
136
+ end
137
+ unless Name === index
138
+ raise TypeError, "invalid index: #{index.inspect}"
139
+ end
140
+ "@#{index.qualified_name}"
141
+ end
142
+ end
143
+ end
144
+
145
+ class HTree::Location # :nodoc:
146
+ def initialize(parent, index, node) # :nodoc:
147
+ if parent
148
+ @parent = parent
149
+ @index = index
150
+ @node = parent.node.get_subnode(index)
151
+ if !@node.equal?(node)
152
+ raise ArgumentError, "unexpected node"
153
+ end
154
+ else
155
+ @parent = nil
156
+ @index = nil
157
+ @node = node
158
+ end
159
+ if @node && self.class != @node.class::Loc
160
+ raise ArgumentError, "invalid location class: #{self.class} should be #{node.class::Loc}"
161
+ end
162
+ @subloc = {}
163
+ end
164
+ attr_reader :parent, :index, :node
165
+ alias to_node node
166
+
167
+ # return self.
168
+ def make_loc
169
+ self
170
+ end
171
+
172
+ # +top+ returns the originator location.
173
+ #
174
+ # t = HTree('<a><b><c><d>')
175
+ # l = t.make_loc.get_subnode(0, 0, 0, 0)
176
+ # p l, l.top
177
+ # # =>
178
+ # #<HTree::Location: doc()/a/b/c/d>
179
+ # #<HTree::Location: doc()>
180
+ def top
181
+ result = self
182
+ while result.parent
183
+ result = result.parent
184
+ end
185
+ result
186
+ end
187
+
188
+ # +subst_itself+ substitutes the node pointed by the location.
189
+ # It returns the location of substituted node.
190
+ #
191
+ # t1 = HTree('<a><b><c><d>')
192
+ # p t1
193
+ # l1 = t1.make_loc.get_subnode(0, 0, 0, 0)
194
+ # p l1
195
+ # l2 = l1.subst_itself(HTree('<z/>'))
196
+ # p l2
197
+ # t2 = l2.top.to_node
198
+ # p t2
199
+ # # =>
200
+ # #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <d>}}}}>
201
+ # #<HTree::Location: doc()/a/b/c/d>
202
+ # #<HTree::Location: doc()/a/b/c/z>
203
+ # #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <z>}}}}>
204
+ #
205
+ def subst_itself(node)
206
+ if @parent
207
+ new_index = @index
208
+ if !@node
209
+ if Integer === @index
210
+ if @index < 0
211
+ new_index = 0
212
+ elsif @parent.to_node.children.length < @index
213
+ new_index = @parent.to_node.children.length
214
+ end
215
+ end
216
+ end
217
+ @parent.subst_itself(@parent.to_node.subst_subnode({@index=>node})).get_subnode(new_index)
218
+ else
219
+ node.make_loc
220
+ end
221
+ end
222
+
223
+ # +subst+ substitutes several subtrees at once.
224
+ #
225
+ # t = HTree('<r><x/><y/><z/></r>')
226
+ # l = t.make_loc
227
+ # l2 = l.subst({
228
+ # l.root.get_subnode('k') => 'v',
229
+ # l.root.get_subnode(-1) => HTree('<a/>'),
230
+ # l.find_element('y') => nil,
231
+ # l.find_element('z').get_subnode(0) => HTree('<b/>'),
232
+ # })
233
+ # pp l2, l2.to_node
234
+ # # =>
235
+ # #<HTree::Doc::Loc: doc()>
236
+ # #<HTree::Doc
237
+ # {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
238
+ def subst(pairs)
239
+ subst_itself(@node.subst(pairs))
240
+ end
241
+
242
+ # +loc_list+ returns an array containing from location's root to itself.
243
+ #
244
+ # t = HTree('<a><b><c>')
245
+ # l = t.make_loc.get_subnode(0, 0, 0)
246
+ # pp l, l.loc_list
247
+ # # =>
248
+ # #<HTree::Location: doc()/a/b/c>
249
+ # [#<HTree::Location: doc()>,
250
+ # #<HTree::Location: doc()/a>,
251
+ # #<HTree::Location: doc()/a/b>,
252
+ # #<HTree::Location: doc()/a/b/c>]
253
+ #
254
+ def loc_list
255
+ loc = self
256
+ result = [self]
257
+ while loc = loc.parent
258
+ result << loc
259
+ end
260
+ result.reverse!
261
+ result
262
+ end
263
+
264
+ # +path+ returns the path of the location.
265
+ #
266
+ # l = HTree.parse("<a><b>x</b><b/><a/>").make_loc
267
+ # l = l.get_subnode(0, 0, 0)
268
+ # p l.path # => "doc()/a/b[1]/text()"
269
+ def path
270
+ result = ''
271
+ loc_list.each {|loc|
272
+ if parent = loc.parent
273
+ result << '/' << parent.node.find_loc_step(loc.index)
274
+ else
275
+ result << loc.node.node_test_string
276
+ end
277
+ }
278
+ result
279
+ end
280
+
281
+ def index_list(node) # :nodoc:
282
+ result = []
283
+ loc = self
284
+ while parent = loc.parent
285
+ return result if loc.to_node.equal? node
286
+ result << loc.index
287
+ loc = parent
288
+ end
289
+ return result if loc.to_node.equal? node
290
+ raise ArgumentError, "the location is not under the node: #{self.path}"
291
+ end
292
+
293
+ def pretty_print(q)
294
+ q.group(1, "#<#{self.class.name}", '>') {
295
+ q.text ':'
296
+ q.breakable
297
+ loc_list.each {|loc|
298
+ if parent = loc.parent
299
+ q.text '/'
300
+ q.group { q.breakable '' }
301
+ q.text parent.node.find_loc_step(loc.index)
302
+ else
303
+ q.text loc.node.node_test_string
304
+ end
305
+ }
306
+ }
307
+ end
308
+ alias inspect pretty_print_inspect
309
+ end
310
+
311
+ module HTree::Container::Loc # :nodoc:
312
+ # +get_subnode+ returns a location object which points to a subnode
313
+ # indexed by _index_.
314
+ def get_subnode_internal(index) # :nodoc:
315
+ return @subloc[index] if @subloc.include? index
316
+ node = @node.get_subnode(index)
317
+ if node
318
+ @subloc[index] = node.class::Loc.new(self, index, node)
319
+ else
320
+ @subloc[index] = HTree::Location.new(self, index, node)
321
+ end
322
+ end
323
+
324
+ # +subst_subnode+ returns the location which refers the substituted tree.
325
+ # loc.subst_subnode(pairs) -> loc
326
+ #
327
+ # t = HTree('<a><b><c>')
328
+ # l = t.make_loc.get_subnode(0, 0)
329
+ # l = l.subst_subnode({0=>HTree('<z/>')})
330
+ # pp t, l.top.to_node
331
+ # # =>
332
+ # #<HTree::Doc {elem <a> {elem <b> {emptyelem <c>}}}>
333
+ # #<HTree::Doc {elem <a> {elem <b> {emptyelem <z>}}}>
334
+ #
335
+ def subst_subnode(pairs)
336
+ self.subst_itself(@node.subst_subnode(pairs))
337
+ end
338
+
339
+ # +children+ returns an array of child locations.
340
+ def children
341
+ (0...@node.children.length).map {|i| get_subnode(i) }
342
+ end
343
+ end
344
+
345
+ class HTree::Elem::Loc # :nodoc:
346
+ def context() @node.context end
347
+
348
+ # +element_name+ returns the name of the element name as a Name object.
349
+ def element_name() @node.element_name end
350
+
351
+ def empty_element?() @node.empty_element? end
352
+
353
+ # +each_attribute+ iterates over each attributes.
354
+ def each_attribute
355
+ @node.each_attribute {|attr_name, attr_text|
356
+ attr_loc = get_subnode(attr_name)
357
+ yield attr_name, attr_loc
358
+ }
359
+ end
360
+ end
361
+
362
+ class HTree::Text::Loc # :nodoc:
363
+ def to_s() @node.to_s end
364
+ def strip() @node.strip end
365
+ def empty?() @node.empty? end
366
+ end
367
+ # :startdoc: