feedtools 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/CHANGELOG +11 -0
  2. data/lib/feed_tools.rb +2496 -810
  3. data/lib/feed_tools/vendor/builder.rb +2 -0
  4. data/lib/feed_tools/vendor/builder/blankslate.rb +2 -0
  5. data/lib/feed_tools/vendor/builder/xmlbase.rb +2 -1
  6. data/lib/feed_tools/vendor/builder/xmlevents.rb +2 -0
  7. data/lib/feed_tools/vendor/builder/xmlmarkup.rb +4 -2
  8. data/lib/feed_tools/vendor/htree.rb +97 -0
  9. data/lib/feed_tools/vendor/htree/container.rb +10 -0
  10. data/lib/feed_tools/vendor/htree/context.rb +67 -0
  11. data/lib/feed_tools/vendor/htree/display.rb +27 -0
  12. data/lib/feed_tools/vendor/htree/doc.rb +149 -0
  13. data/lib/feed_tools/vendor/htree/elem.rb +262 -0
  14. data/lib/feed_tools/vendor/htree/encoder.rb +163 -0
  15. data/lib/feed_tools/vendor/htree/equality.rb +218 -0
  16. data/lib/feed_tools/vendor/htree/extract_text.rb +37 -0
  17. data/lib/feed_tools/vendor/htree/fstr.rb +33 -0
  18. data/lib/feed_tools/vendor/htree/gencode.rb +97 -0
  19. data/lib/feed_tools/vendor/htree/htmlinfo.rb +672 -0
  20. data/lib/feed_tools/vendor/htree/inspect.rb +108 -0
  21. data/lib/feed_tools/vendor/htree/leaf.rb +94 -0
  22. data/lib/feed_tools/vendor/htree/loc.rb +367 -0
  23. data/lib/feed_tools/vendor/htree/modules.rb +48 -0
  24. data/lib/feed_tools/vendor/htree/name.rb +124 -0
  25. data/lib/feed_tools/vendor/htree/output.rb +207 -0
  26. data/lib/feed_tools/vendor/htree/parse.rb +407 -0
  27. data/lib/feed_tools/vendor/htree/raw_string.rb +124 -0
  28. data/lib/feed_tools/vendor/htree/regexp-util.rb +15 -0
  29. data/lib/feed_tools/vendor/htree/rexml.rb +130 -0
  30. data/lib/feed_tools/vendor/htree/scan.rb +166 -0
  31. data/lib/feed_tools/vendor/htree/tag.rb +111 -0
  32. data/lib/feed_tools/vendor/htree/template.rb +909 -0
  33. data/lib/feed_tools/vendor/htree/text.rb +115 -0
  34. data/lib/feed_tools/vendor/htree/traverse.rb +465 -0
  35. data/rakefile +1 -1
  36. data/test/rss_test.rb +97 -0
  37. metadata +30 -1
@@ -0,0 +1,163 @@
1
+ # :stopdoc:
2
+ require 'iconv'
3
+
4
+ module HTree
5
+ class Encoder
6
+ # HTree::Encoder.internal_charset returns the MIME charset corresponding to $KCODE.
7
+ #
8
+ # - 'ISO-8859-1' when $KCODE=='NONE'
9
+ # - 'UTF-8' when $KCODE=='UTF8'
10
+ # - 'EUC-JP' when $KCODE=='EUC'
11
+ # - 'Shift_JIS' when $KCODE=='SJIS'
12
+ #
13
+ # This mapping ignores EUC-KR and various single byte charset other than ISO-8859-1 at least.
14
+ # This should be fixed when Ruby is m17nized.
15
+ def Encoder.internal_charset
16
+ KcodeCharset[$KCODE]
17
+ end
18
+
19
+ def initialize(output_encoding, internal_encoding=HTree::Encoder.internal_charset)
20
+ @buf = ''
21
+ @internal_encoding = internal_encoding
22
+ @output_encoding = output_encoding
23
+ @ic = Iconv.new(output_encoding, @internal_encoding)
24
+ @charpat = FirstCharPattern[internal_encoding]
25
+
26
+ @subcharset_list = SubCharset[output_encoding] || []
27
+ @subcharset_ic = {}
28
+ @subcharset_list.each {|subcharset|
29
+ @subcharset_ic[subcharset] = Iconv.new(subcharset, @internal_encoding)
30
+ }
31
+ end
32
+
33
+ def output_string(internal_str, external_str=@ic.iconv(internal_str))
34
+ @buf << external_str
35
+ @subcharset_ic.reject! {|subcharset, ic|
36
+ begin
37
+ ic.iconv(internal_str) != external_str
38
+ rescue Iconv::Failure
39
+ true
40
+ end
41
+ }
42
+ nil
43
+ end
44
+
45
+ def output_text(string)
46
+ begin
47
+ output_string string, @ic.iconv(string)
48
+ rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => e
49
+ output_string string[0, string.length - e.failed.length], e.success
50
+ unless @charpat =~ e.failed
51
+ # xxx: shoule be configulable?
52
+ #raise ArgumentError, "cannot extract first character: #{e.failed.dump}"
53
+ string = e.failed[1, e.failed.length-1]
54
+ output_string '?'
55
+ retry
56
+ end
57
+ char = $&
58
+ rest = $'
59
+ begin
60
+ ucode = Iconv.conv("UTF-8", @internal_encoding, char).unpack("U")[0]
61
+ char = "&##{ucode};"
62
+ rescue Iconv::IllegalSequence, Iconv::InvalidCharacter
63
+ # xxx: shoule be configulable?
64
+ char = '?'
65
+ end
66
+ output_string char
67
+ string = rest
68
+ retry
69
+ end
70
+ end
71
+
72
+ ChRef = {
73
+ '&' => '&amp;',
74
+ '<' => '&lt;',
75
+ '>' => '&gt;',
76
+ '"' => '&quot;',
77
+ }
78
+
79
+ def output_dynamic_text(string)
80
+ if string.respond_to? :rcdata
81
+ output_text(string.rcdata.gsub(/[<>]/) { ChRef[$&] })
82
+ else
83
+ output_text(string.to_s.gsub(/[&<>]/) { ChRef[$&] })
84
+ end
85
+ end
86
+
87
+ def output_dynamic_attvalue(string)
88
+ if string.respond_to? :rcdata
89
+ output_text(string.rcdata.gsub(/[<>"]/) { ChRef[$&] })
90
+ else
91
+ output_text(string.to_s.gsub(/[&<>"]/) { ChRef[$&] })
92
+ end
93
+ end
94
+
95
+ def finish
96
+ external_str = @ic.close
97
+ @buf << external_str
98
+ @subcharset_ic.reject! {|subcharset, ic|
99
+ begin
100
+ ic.close != external_str
101
+ rescue Iconv::Failure
102
+ true
103
+ end
104
+ }
105
+ @buf
106
+ end
107
+
108
+ def finish_with_xmldecl
109
+ content = finish
110
+ xmldecl = Iconv.conv(@output_encoding, 'US-ASCII',
111
+ "<?xml version=\"1.0\" encoding=\"#{minimal_charset}\"?>")
112
+ xmldecl + content
113
+ end
114
+
115
+ def minimal_charset
116
+ @subcharset_list.each {|subcharset|
117
+ if @subcharset_ic.include? subcharset
118
+ return subcharset
119
+ end
120
+ }
121
+ @output_encoding
122
+ end
123
+
124
+ KcodeCharset = {
125
+ 'EUC' => 'EUC-JP',
126
+ 'SJIS' => 'Shift_JIS',
127
+ 'UTF8' => 'UTF-8',
128
+ 'NONE' => 'ISO-8859-1',
129
+ }
130
+
131
+ FirstCharPattern = {
132
+ 'EUC-JP' => /\A(?:
133
+ [\x00-\x7f]
134
+ |[\xa1-\xfe][\xa1-\xfe]
135
+ |\x8e[\xa1-\xfe]
136
+ |\x8f[\xa1-\xfe][\xa1-\xfe])/nx,
137
+ 'Shift_JIS' => /\A(?:
138
+ [\x00-\x7f]
139
+ |[\x81-\x9f][\x40-\x7e\x80-\xfc]
140
+ |[\xa1-\xdf]
141
+ |[\xe0-\xfc][\x40-\x7e\x80-\xfc])/nx,
142
+ 'UTF-8' => /\A(?:
143
+ [\x00-\x7f]
144
+ |[\xc0-\xdf][\x80-\xbf]
145
+ |[\xe0-\xef][\x80-\xbf][\x80-\xbf]
146
+ |[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]
147
+ |[\xf8-\xfb][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf]
148
+ |[\xfc-\xfd][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf])/nx,
149
+ 'ISO-8859-1' => /\A[\x00-\xff]/n
150
+ }
151
+
152
+ SubCharset = {
153
+ 'ISO-2022-JP-2' => ['US-ASCII', 'ISO-2022-JP'],
154
+ 'ISO-2022-JP-3' => ['US-ASCII', 'ISO-2022-JP'],
155
+ 'UTF-16BE' => [],
156
+ 'UTF-16LE' => [],
157
+ 'UTF-16' => [],
158
+ }
159
+ SubCharset.default = ['US-ASCII']
160
+
161
+ end
162
+ end
163
+ # :startdoc:
@@ -0,0 +1,218 @@
1
+ # :stopdoc:
2
+ require 'htree/doc'
3
+ require 'htree/elem'
4
+ require 'htree/leaf'
5
+ require 'htree/tag'
6
+ require 'htree/raw_string'
7
+ require 'htree/context'
8
+
9
+ module HTree
10
+ # compare tree structures.
11
+ def ==(other)
12
+ check_equality(self, other, :usual_equal_object)
13
+ end
14
+ alias eql? ==
15
+
16
+ # hash value for the tree structure.
17
+ def hash
18
+ return @hash_code if defined? @hash_code
19
+ @hash_code = usual_equal_object.hash
20
+ end
21
+
22
+ def usual_equal_object
23
+ return @usual_equal_object if defined? @usual_equal_object
24
+ @usual_equal_object = make_usual_equal_object
25
+ end
26
+
27
+ def make_usual_equal_object
28
+ raise NotImplementedError
29
+ end
30
+
31
+ def exact_equal_object
32
+ return @exact_equal_object if defined? @exact_equal_object
33
+ @exact_equal_object = make_exact_equal_object
34
+ end
35
+
36
+ def make_exact_equal_object
37
+ raise NotImplementedError
38
+ end
39
+
40
+ def exact_equal?(other)
41
+ check_equality(self, other, :exact_equal_object)
42
+ end
43
+
44
+ def check_equality(obj1, obj2, equal_object_method)
45
+ return false unless obj1.class == obj2.class
46
+ if obj1.class == Array
47
+ return false unless obj1.length == obj2.length
48
+ obj1.each_with_index {|c1, i|
49
+ return false unless c1.class == obj2[i].class
50
+ }
51
+ obj1.each_with_index {|c1, i|
52
+ return false unless check_equality(c1, obj2[i], equal_object_method)
53
+ }
54
+ true
55
+ elsif obj1.respond_to? equal_object_method
56
+ o1 = obj1.send(equal_object_method)
57
+ o2 = obj2.send(equal_object_method)
58
+ check_equality(o1, o2, equal_object_method)
59
+ else
60
+ obj1 == obj2
61
+ end
62
+ end
63
+
64
+ class Doc
65
+ alias exact_equal_object children
66
+ alias usual_equal_object children
67
+ end
68
+
69
+ class Elem
70
+ def make_exact_equal_object
71
+ [@stag, @children, @empty, @etag]
72
+ end
73
+
74
+ def make_usual_equal_object
75
+ [@stag, @children]
76
+ end
77
+ end
78
+
79
+ class Name
80
+ def make_exact_equal_object
81
+ [@namespace_prefix, @namespace_uri, @local_name]
82
+ end
83
+
84
+ def make_usual_equal_object
85
+ xmlns? ? @local_name : [@namespace_uri, @local_name]
86
+ end
87
+ end
88
+
89
+ module Util
90
+ module_function
91
+ def cmp_with_nil(a, b)
92
+ if a == nil
93
+ if b == nil
94
+ 0
95
+ else
96
+ -1
97
+ end
98
+ else
99
+ if b == nil
100
+ 1
101
+ else
102
+ a <=> b
103
+ end
104
+ end
105
+ end
106
+ end
107
+
108
+ class Context
109
+ def make_exact_equal_object
110
+ @namespaces.keys.sort {|prefix1, prefix2|
111
+ Util.cmp_with_nil(prefix1, prefix2)
112
+ }.map {|prefix| [prefix, @namespaces[prefix]] }
113
+ end
114
+
115
+ # make_usual_equal_object is not used through STag#make_usual_equal_object
116
+ # NotImplementedError is suitable?
117
+ alias make_usual_equal_object make_exact_equal_object
118
+ end
119
+
120
+ class STag
121
+ def make_exact_equal_object
122
+ [@raw_string,
123
+ @name,
124
+ @attributes.sort {|(n1,t1), (n2, t2)|
125
+ Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
126
+ Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
127
+ Util.cmp_with_nil(n1.local_name, n2.local_name)
128
+ },
129
+ @inherited_context
130
+ ]
131
+ end
132
+
133
+ def make_usual_equal_object
134
+ [@name,
135
+ @attributes.find_all {|n,t| !n.xmlns? }.sort {|(n1,t1), (n2, t2)|
136
+ Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
137
+ Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
138
+ Util.cmp_with_nil(n1.local_name, n2.local_name)
139
+ }
140
+ ]
141
+ end
142
+
143
+ end
144
+
145
+ class ETag
146
+ def make_exact_equal_object
147
+ [@raw_string, @qualified_name]
148
+ end
149
+
150
+ alias usual_equal_object qualified_name
151
+ end
152
+
153
+ class Text
154
+ def make_exact_equal_object
155
+ [@raw_string, @rcdata]
156
+ end
157
+
158
+ def make_usual_equal_object
159
+ @normalized_rcdata
160
+ end
161
+ end
162
+
163
+ class XMLDecl
164
+ def make_exact_equal_object
165
+ [@raw_string, @version, @encoding, @standalone]
166
+ end
167
+
168
+ def make_usual_equal_object
169
+ [@version, @encoding, @standalone]
170
+ end
171
+ end
172
+
173
+ class DocType
174
+ def make_exact_equal_object
175
+ [@raw_string, @root_element_name, @system_identifier, @public_identifier]
176
+ end
177
+
178
+ def make_usual_equal_object
179
+ [@root_element_name, @system_identifier, @public_identifier]
180
+ end
181
+ end
182
+
183
+ class ProcIns
184
+ def make_exact_equal_object
185
+ [@raw_string, @target, @content]
186
+ end
187
+
188
+ def make_usual_equal_object
189
+ [@target, @content]
190
+ end
191
+ end
192
+
193
+ class Comment
194
+ def make_exact_equal_object
195
+ [@raw_string, @content]
196
+ end
197
+
198
+ alias usual_equal_object content
199
+ end
200
+
201
+ class BogusETag
202
+ def make_exact_equal_object
203
+ [@etag]
204
+ end
205
+
206
+ alias usual_equal_object make_exact_equal_object
207
+ end
208
+
209
+ class Location
210
+ def make_exact_equal_object
211
+ [@parent, @index, @node]
212
+ end
213
+
214
+ alias usual_equal_object make_exact_equal_object
215
+ end
216
+
217
+ end
218
+ # :startdoc:
@@ -0,0 +1,37 @@
1
+ # :stopdoc:
2
+ require 'htree/text'
3
+ require 'htree/doc'
4
+ require 'htree/elem'
5
+
6
+ module HTree
7
+ module Node
8
+ def extract_text
9
+ raise NotImplementedError
10
+ end
11
+ end
12
+
13
+ class Location
14
+ def extract_text
15
+ to_node.extract_text
16
+ end
17
+ end
18
+
19
+ module Container
20
+ def extract_text
21
+ Text.concat(*@children.map {|n| n.extract_text })
22
+ end
23
+ end
24
+
25
+ module Leaf
26
+ def extract_text
27
+ Text.new('')
28
+ end
29
+ end
30
+
31
+ class Text
32
+ def extract_text
33
+ self
34
+ end
35
+ end
36
+ end
37
+ # :startdoc:
@@ -0,0 +1,33 @@
1
+ # :stopdoc:
2
+ require 'htree/modules'
3
+
4
+ module HTree
5
+ def HTree.with_frozen_string_hash
6
+ if Thread.current[:htree_frozen_string_hash]
7
+ yield
8
+ else
9
+ begin
10
+ Thread.current[:htree_frozen_string_hash] = {}
11
+ yield
12
+ ensure
13
+ Thread.current[:htree_frozen_string_hash] = nil
14
+ end
15
+ end
16
+ end
17
+
18
+ def HTree.frozen_string(str)
19
+ if h = Thread.current[:htree_frozen_string_hash]
20
+ if s = h[str]
21
+ s
22
+ else
23
+ h[str] = str unless str.frozen?
24
+ str = str.dup.freeze
25
+ h[str] = str
26
+ end
27
+ else
28
+ str = str.dup.freeze unless str.frozen?
29
+ str
30
+ end
31
+ end
32
+ end
33
+ # :startdoc: