feedtools 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/CHANGELOG +11 -0
  2. data/lib/feed_tools.rb +2496 -810
  3. data/lib/feed_tools/vendor/builder.rb +2 -0
  4. data/lib/feed_tools/vendor/builder/blankslate.rb +2 -0
  5. data/lib/feed_tools/vendor/builder/xmlbase.rb +2 -1
  6. data/lib/feed_tools/vendor/builder/xmlevents.rb +2 -0
  7. data/lib/feed_tools/vendor/builder/xmlmarkup.rb +4 -2
  8. data/lib/feed_tools/vendor/htree.rb +97 -0
  9. data/lib/feed_tools/vendor/htree/container.rb +10 -0
  10. data/lib/feed_tools/vendor/htree/context.rb +67 -0
  11. data/lib/feed_tools/vendor/htree/display.rb +27 -0
  12. data/lib/feed_tools/vendor/htree/doc.rb +149 -0
  13. data/lib/feed_tools/vendor/htree/elem.rb +262 -0
  14. data/lib/feed_tools/vendor/htree/encoder.rb +163 -0
  15. data/lib/feed_tools/vendor/htree/equality.rb +218 -0
  16. data/lib/feed_tools/vendor/htree/extract_text.rb +37 -0
  17. data/lib/feed_tools/vendor/htree/fstr.rb +33 -0
  18. data/lib/feed_tools/vendor/htree/gencode.rb +97 -0
  19. data/lib/feed_tools/vendor/htree/htmlinfo.rb +672 -0
  20. data/lib/feed_tools/vendor/htree/inspect.rb +108 -0
  21. data/lib/feed_tools/vendor/htree/leaf.rb +94 -0
  22. data/lib/feed_tools/vendor/htree/loc.rb +367 -0
  23. data/lib/feed_tools/vendor/htree/modules.rb +48 -0
  24. data/lib/feed_tools/vendor/htree/name.rb +124 -0
  25. data/lib/feed_tools/vendor/htree/output.rb +207 -0
  26. data/lib/feed_tools/vendor/htree/parse.rb +407 -0
  27. data/lib/feed_tools/vendor/htree/raw_string.rb +124 -0
  28. data/lib/feed_tools/vendor/htree/regexp-util.rb +15 -0
  29. data/lib/feed_tools/vendor/htree/rexml.rb +130 -0
  30. data/lib/feed_tools/vendor/htree/scan.rb +166 -0
  31. data/lib/feed_tools/vendor/htree/tag.rb +111 -0
  32. data/lib/feed_tools/vendor/htree/template.rb +909 -0
  33. data/lib/feed_tools/vendor/htree/text.rb +115 -0
  34. data/lib/feed_tools/vendor/htree/traverse.rb +465 -0
  35. data/rakefile +1 -1
  36. data/test/rss_test.rb +97 -0
  37. metadata +30 -1
@@ -0,0 +1,163 @@
1
+ # :stopdoc:
2
+ require 'iconv'
3
+
4
+ module HTree
5
+ class Encoder
6
+ # HTree::Encoder.internal_charset returns the MIME charset corresponding to $KCODE.
7
+ #
8
+ # - 'ISO-8859-1' when $KCODE=='NONE'
9
+ # - 'UTF-8' when $KCODE=='UTF8'
10
+ # - 'EUC-JP' when $KCODE=='EUC'
11
+ # - 'Shift_JIS' when $KCODE=='SJIS'
12
+ #
13
+ # This mapping ignores EUC-KR and various single byte charset other than ISO-8859-1 at least.
14
+ # This should be fixed when Ruby is m17nized.
15
+ def Encoder.internal_charset
16
+ KcodeCharset[$KCODE]
17
+ end
18
+
19
+ def initialize(output_encoding, internal_encoding=HTree::Encoder.internal_charset)
20
+ @buf = ''
21
+ @internal_encoding = internal_encoding
22
+ @output_encoding = output_encoding
23
+ @ic = Iconv.new(output_encoding, @internal_encoding)
24
+ @charpat = FirstCharPattern[internal_encoding]
25
+
26
+ @subcharset_list = SubCharset[output_encoding] || []
27
+ @subcharset_ic = {}
28
+ @subcharset_list.each {|subcharset|
29
+ @subcharset_ic[subcharset] = Iconv.new(subcharset, @internal_encoding)
30
+ }
31
+ end
32
+
33
+ def output_string(internal_str, external_str=@ic.iconv(internal_str))
34
+ @buf << external_str
35
+ @subcharset_ic.reject! {|subcharset, ic|
36
+ begin
37
+ ic.iconv(internal_str) != external_str
38
+ rescue Iconv::Failure
39
+ true
40
+ end
41
+ }
42
+ nil
43
+ end
44
+
45
+ def output_text(string)
46
+ begin
47
+ output_string string, @ic.iconv(string)
48
+ rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => e
49
+ output_string string[0, string.length - e.failed.length], e.success
50
+ unless @charpat =~ e.failed
51
+ # xxx: shoule be configulable?
52
+ #raise ArgumentError, "cannot extract first character: #{e.failed.dump}"
53
+ string = e.failed[1, e.failed.length-1]
54
+ output_string '?'
55
+ retry
56
+ end
57
+ char = $&
58
+ rest = $'
59
+ begin
60
+ ucode = Iconv.conv("UTF-8", @internal_encoding, char).unpack("U")[0]
61
+ char = "&##{ucode};"
62
+ rescue Iconv::IllegalSequence, Iconv::InvalidCharacter
63
+ # xxx: shoule be configulable?
64
+ char = '?'
65
+ end
66
+ output_string char
67
+ string = rest
68
+ retry
69
+ end
70
+ end
71
+
72
+ ChRef = {
73
+ '&' => '&amp;',
74
+ '<' => '&lt;',
75
+ '>' => '&gt;',
76
+ '"' => '&quot;',
77
+ }
78
+
79
+ def output_dynamic_text(string)
80
+ if string.respond_to? :rcdata
81
+ output_text(string.rcdata.gsub(/[<>]/) { ChRef[$&] })
82
+ else
83
+ output_text(string.to_s.gsub(/[&<>]/) { ChRef[$&] })
84
+ end
85
+ end
86
+
87
+ def output_dynamic_attvalue(string)
88
+ if string.respond_to? :rcdata
89
+ output_text(string.rcdata.gsub(/[<>"]/) { ChRef[$&] })
90
+ else
91
+ output_text(string.to_s.gsub(/[&<>"]/) { ChRef[$&] })
92
+ end
93
+ end
94
+
95
+ def finish
96
+ external_str = @ic.close
97
+ @buf << external_str
98
+ @subcharset_ic.reject! {|subcharset, ic|
99
+ begin
100
+ ic.close != external_str
101
+ rescue Iconv::Failure
102
+ true
103
+ end
104
+ }
105
+ @buf
106
+ end
107
+
108
+ def finish_with_xmldecl
109
+ content = finish
110
+ xmldecl = Iconv.conv(@output_encoding, 'US-ASCII',
111
+ "<?xml version=\"1.0\" encoding=\"#{minimal_charset}\"?>")
112
+ xmldecl + content
113
+ end
114
+
115
+ def minimal_charset
116
+ @subcharset_list.each {|subcharset|
117
+ if @subcharset_ic.include? subcharset
118
+ return subcharset
119
+ end
120
+ }
121
+ @output_encoding
122
+ end
123
+
124
+ KcodeCharset = {
125
+ 'EUC' => 'EUC-JP',
126
+ 'SJIS' => 'Shift_JIS',
127
+ 'UTF8' => 'UTF-8',
128
+ 'NONE' => 'ISO-8859-1',
129
+ }
130
+
131
+ FirstCharPattern = {
132
+ 'EUC-JP' => /\A(?:
133
+ [\x00-\x7f]
134
+ |[\xa1-\xfe][\xa1-\xfe]
135
+ |\x8e[\xa1-\xfe]
136
+ |\x8f[\xa1-\xfe][\xa1-\xfe])/nx,
137
+ 'Shift_JIS' => /\A(?:
138
+ [\x00-\x7f]
139
+ |[\x81-\x9f][\x40-\x7e\x80-\xfc]
140
+ |[\xa1-\xdf]
141
+ |[\xe0-\xfc][\x40-\x7e\x80-\xfc])/nx,
142
+ 'UTF-8' => /\A(?:
143
+ [\x00-\x7f]
144
+ |[\xc0-\xdf][\x80-\xbf]
145
+ |[\xe0-\xef][\x80-\xbf][\x80-\xbf]
146
+ |[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]
147
+ |[\xf8-\xfb][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf]
148
+ |[\xfc-\xfd][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf])/nx,
149
+ 'ISO-8859-1' => /\A[\x00-\xff]/n
150
+ }
151
+
152
+ SubCharset = {
153
+ 'ISO-2022-JP-2' => ['US-ASCII', 'ISO-2022-JP'],
154
+ 'ISO-2022-JP-3' => ['US-ASCII', 'ISO-2022-JP'],
155
+ 'UTF-16BE' => [],
156
+ 'UTF-16LE' => [],
157
+ 'UTF-16' => [],
158
+ }
159
+ SubCharset.default = ['US-ASCII']
160
+
161
+ end
162
+ end
163
+ # :startdoc:
@@ -0,0 +1,218 @@
1
+ # :stopdoc:
2
+ require 'htree/doc'
3
+ require 'htree/elem'
4
+ require 'htree/leaf'
5
+ require 'htree/tag'
6
+ require 'htree/raw_string'
7
+ require 'htree/context'
8
+
9
+ module HTree
10
+ # compare tree structures.
11
+ def ==(other)
12
+ check_equality(self, other, :usual_equal_object)
13
+ end
14
+ alias eql? ==
15
+
16
+ # hash value for the tree structure.
17
+ def hash
18
+ return @hash_code if defined? @hash_code
19
+ @hash_code = usual_equal_object.hash
20
+ end
21
+
22
+ def usual_equal_object
23
+ return @usual_equal_object if defined? @usual_equal_object
24
+ @usual_equal_object = make_usual_equal_object
25
+ end
26
+
27
+ def make_usual_equal_object
28
+ raise NotImplementedError
29
+ end
30
+
31
+ def exact_equal_object
32
+ return @exact_equal_object if defined? @exact_equal_object
33
+ @exact_equal_object = make_exact_equal_object
34
+ end
35
+
36
+ def make_exact_equal_object
37
+ raise NotImplementedError
38
+ end
39
+
40
+ def exact_equal?(other)
41
+ check_equality(self, other, :exact_equal_object)
42
+ end
43
+
44
+ def check_equality(obj1, obj2, equal_object_method)
45
+ return false unless obj1.class == obj2.class
46
+ if obj1.class == Array
47
+ return false unless obj1.length == obj2.length
48
+ obj1.each_with_index {|c1, i|
49
+ return false unless c1.class == obj2[i].class
50
+ }
51
+ obj1.each_with_index {|c1, i|
52
+ return false unless check_equality(c1, obj2[i], equal_object_method)
53
+ }
54
+ true
55
+ elsif obj1.respond_to? equal_object_method
56
+ o1 = obj1.send(equal_object_method)
57
+ o2 = obj2.send(equal_object_method)
58
+ check_equality(o1, o2, equal_object_method)
59
+ else
60
+ obj1 == obj2
61
+ end
62
+ end
63
+
64
+ class Doc
65
+ alias exact_equal_object children
66
+ alias usual_equal_object children
67
+ end
68
+
69
+ class Elem
70
+ def make_exact_equal_object
71
+ [@stag, @children, @empty, @etag]
72
+ end
73
+
74
+ def make_usual_equal_object
75
+ [@stag, @children]
76
+ end
77
+ end
78
+
79
+ class Name
80
+ def make_exact_equal_object
81
+ [@namespace_prefix, @namespace_uri, @local_name]
82
+ end
83
+
84
+ def make_usual_equal_object
85
+ xmlns? ? @local_name : [@namespace_uri, @local_name]
86
+ end
87
+ end
88
+
89
+ module Util
90
+ module_function
91
+ def cmp_with_nil(a, b)
92
+ if a == nil
93
+ if b == nil
94
+ 0
95
+ else
96
+ -1
97
+ end
98
+ else
99
+ if b == nil
100
+ 1
101
+ else
102
+ a <=> b
103
+ end
104
+ end
105
+ end
106
+ end
107
+
108
+ class Context
109
+ def make_exact_equal_object
110
+ @namespaces.keys.sort {|prefix1, prefix2|
111
+ Util.cmp_with_nil(prefix1, prefix2)
112
+ }.map {|prefix| [prefix, @namespaces[prefix]] }
113
+ end
114
+
115
+ # make_usual_equal_object is not used through STag#make_usual_equal_object
116
+ # NotImplementedError is suitable?
117
+ alias make_usual_equal_object make_exact_equal_object
118
+ end
119
+
120
+ class STag
121
+ def make_exact_equal_object
122
+ [@raw_string,
123
+ @name,
124
+ @attributes.sort {|(n1,t1), (n2, t2)|
125
+ Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
126
+ Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
127
+ Util.cmp_with_nil(n1.local_name, n2.local_name)
128
+ },
129
+ @inherited_context
130
+ ]
131
+ end
132
+
133
+ def make_usual_equal_object
134
+ [@name,
135
+ @attributes.find_all {|n,t| !n.xmlns? }.sort {|(n1,t1), (n2, t2)|
136
+ Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
137
+ Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
138
+ Util.cmp_with_nil(n1.local_name, n2.local_name)
139
+ }
140
+ ]
141
+ end
142
+
143
+ end
144
+
145
+ class ETag
146
+ def make_exact_equal_object
147
+ [@raw_string, @qualified_name]
148
+ end
149
+
150
+ alias usual_equal_object qualified_name
151
+ end
152
+
153
+ class Text
154
+ def make_exact_equal_object
155
+ [@raw_string, @rcdata]
156
+ end
157
+
158
+ def make_usual_equal_object
159
+ @normalized_rcdata
160
+ end
161
+ end
162
+
163
+ class XMLDecl
164
+ def make_exact_equal_object
165
+ [@raw_string, @version, @encoding, @standalone]
166
+ end
167
+
168
+ def make_usual_equal_object
169
+ [@version, @encoding, @standalone]
170
+ end
171
+ end
172
+
173
+ class DocType
174
+ def make_exact_equal_object
175
+ [@raw_string, @root_element_name, @system_identifier, @public_identifier]
176
+ end
177
+
178
+ def make_usual_equal_object
179
+ [@root_element_name, @system_identifier, @public_identifier]
180
+ end
181
+ end
182
+
183
+ class ProcIns
184
+ def make_exact_equal_object
185
+ [@raw_string, @target, @content]
186
+ end
187
+
188
+ def make_usual_equal_object
189
+ [@target, @content]
190
+ end
191
+ end
192
+
193
+ class Comment
194
+ def make_exact_equal_object
195
+ [@raw_string, @content]
196
+ end
197
+
198
+ alias usual_equal_object content
199
+ end
200
+
201
+ class BogusETag
202
+ def make_exact_equal_object
203
+ [@etag]
204
+ end
205
+
206
+ alias usual_equal_object make_exact_equal_object
207
+ end
208
+
209
+ class Location
210
+ def make_exact_equal_object
211
+ [@parent, @index, @node]
212
+ end
213
+
214
+ alias usual_equal_object make_exact_equal_object
215
+ end
216
+
217
+ end
218
+ # :startdoc:
@@ -0,0 +1,37 @@
1
+ # :stopdoc:
2
+ require 'htree/text'
3
+ require 'htree/doc'
4
+ require 'htree/elem'
5
+
6
+ module HTree
7
+ module Node
8
+ def extract_text
9
+ raise NotImplementedError
10
+ end
11
+ end
12
+
13
+ class Location
14
+ def extract_text
15
+ to_node.extract_text
16
+ end
17
+ end
18
+
19
+ module Container
20
+ def extract_text
21
+ Text.concat(*@children.map {|n| n.extract_text })
22
+ end
23
+ end
24
+
25
+ module Leaf
26
+ def extract_text
27
+ Text.new('')
28
+ end
29
+ end
30
+
31
+ class Text
32
+ def extract_text
33
+ self
34
+ end
35
+ end
36
+ end
37
+ # :startdoc:
@@ -0,0 +1,33 @@
1
+ # :stopdoc:
2
+ require 'htree/modules'
3
+
4
+ module HTree
5
+ def HTree.with_frozen_string_hash
6
+ if Thread.current[:htree_frozen_string_hash]
7
+ yield
8
+ else
9
+ begin
10
+ Thread.current[:htree_frozen_string_hash] = {}
11
+ yield
12
+ ensure
13
+ Thread.current[:htree_frozen_string_hash] = nil
14
+ end
15
+ end
16
+ end
17
+
18
+ def HTree.frozen_string(str)
19
+ if h = Thread.current[:htree_frozen_string_hash]
20
+ if s = h[str]
21
+ s
22
+ else
23
+ h[str] = str unless str.frozen?
24
+ str = str.dup.freeze
25
+ h[str] = str
26
+ end
27
+ else
28
+ str = str.dup.freeze unless str.frozen?
29
+ str
30
+ end
31
+ end
32
+ end
33
+ # :startdoc: