feedtools 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +11 -0
- data/lib/feed_tools.rb +2496 -810
- data/lib/feed_tools/vendor/builder.rb +2 -0
- data/lib/feed_tools/vendor/builder/blankslate.rb +2 -0
- data/lib/feed_tools/vendor/builder/xmlbase.rb +2 -1
- data/lib/feed_tools/vendor/builder/xmlevents.rb +2 -0
- data/lib/feed_tools/vendor/builder/xmlmarkup.rb +4 -2
- data/lib/feed_tools/vendor/htree.rb +97 -0
- data/lib/feed_tools/vendor/htree/container.rb +10 -0
- data/lib/feed_tools/vendor/htree/context.rb +67 -0
- data/lib/feed_tools/vendor/htree/display.rb +27 -0
- data/lib/feed_tools/vendor/htree/doc.rb +149 -0
- data/lib/feed_tools/vendor/htree/elem.rb +262 -0
- data/lib/feed_tools/vendor/htree/encoder.rb +163 -0
- data/lib/feed_tools/vendor/htree/equality.rb +218 -0
- data/lib/feed_tools/vendor/htree/extract_text.rb +37 -0
- data/lib/feed_tools/vendor/htree/fstr.rb +33 -0
- data/lib/feed_tools/vendor/htree/gencode.rb +97 -0
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +672 -0
- data/lib/feed_tools/vendor/htree/inspect.rb +108 -0
- data/lib/feed_tools/vendor/htree/leaf.rb +94 -0
- data/lib/feed_tools/vendor/htree/loc.rb +367 -0
- data/lib/feed_tools/vendor/htree/modules.rb +48 -0
- data/lib/feed_tools/vendor/htree/name.rb +124 -0
- data/lib/feed_tools/vendor/htree/output.rb +207 -0
- data/lib/feed_tools/vendor/htree/parse.rb +407 -0
- data/lib/feed_tools/vendor/htree/raw_string.rb +124 -0
- data/lib/feed_tools/vendor/htree/regexp-util.rb +15 -0
- data/lib/feed_tools/vendor/htree/rexml.rb +130 -0
- data/lib/feed_tools/vendor/htree/scan.rb +166 -0
- data/lib/feed_tools/vendor/htree/tag.rb +111 -0
- data/lib/feed_tools/vendor/htree/template.rb +909 -0
- data/lib/feed_tools/vendor/htree/text.rb +115 -0
- data/lib/feed_tools/vendor/htree/traverse.rb +465 -0
- data/rakefile +1 -1
- data/test/rss_test.rb +97 -0
- metadata +30 -1
@@ -0,0 +1,163 @@
|
|
1
|
+
# :stopdoc:
|
2
|
+
require 'iconv'
|
3
|
+
|
4
|
+
module HTree
|
5
|
+
class Encoder
|
6
|
+
# HTree::Encoder.internal_charset returns the MIME charset corresponding to $KCODE.
|
7
|
+
#
|
8
|
+
# - 'ISO-8859-1' when $KCODE=='NONE'
|
9
|
+
# - 'UTF-8' when $KCODE=='UTF8'
|
10
|
+
# - 'EUC-JP' when $KCODE=='EUC'
|
11
|
+
# - 'Shift_JIS' when $KCODE=='SJIS'
|
12
|
+
#
|
13
|
+
# This mapping ignores EUC-KR and various single byte charset other than ISO-8859-1 at least.
|
14
|
+
# This should be fixed when Ruby is m17nized.
|
15
|
+
def Encoder.internal_charset
|
16
|
+
KcodeCharset[$KCODE]
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(output_encoding, internal_encoding=HTree::Encoder.internal_charset)
|
20
|
+
@buf = ''
|
21
|
+
@internal_encoding = internal_encoding
|
22
|
+
@output_encoding = output_encoding
|
23
|
+
@ic = Iconv.new(output_encoding, @internal_encoding)
|
24
|
+
@charpat = FirstCharPattern[internal_encoding]
|
25
|
+
|
26
|
+
@subcharset_list = SubCharset[output_encoding] || []
|
27
|
+
@subcharset_ic = {}
|
28
|
+
@subcharset_list.each {|subcharset|
|
29
|
+
@subcharset_ic[subcharset] = Iconv.new(subcharset, @internal_encoding)
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def output_string(internal_str, external_str=@ic.iconv(internal_str))
|
34
|
+
@buf << external_str
|
35
|
+
@subcharset_ic.reject! {|subcharset, ic|
|
36
|
+
begin
|
37
|
+
ic.iconv(internal_str) != external_str
|
38
|
+
rescue Iconv::Failure
|
39
|
+
true
|
40
|
+
end
|
41
|
+
}
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def output_text(string)
|
46
|
+
begin
|
47
|
+
output_string string, @ic.iconv(string)
|
48
|
+
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => e
|
49
|
+
output_string string[0, string.length - e.failed.length], e.success
|
50
|
+
unless @charpat =~ e.failed
|
51
|
+
# xxx: shoule be configulable?
|
52
|
+
#raise ArgumentError, "cannot extract first character: #{e.failed.dump}"
|
53
|
+
string = e.failed[1, e.failed.length-1]
|
54
|
+
output_string '?'
|
55
|
+
retry
|
56
|
+
end
|
57
|
+
char = $&
|
58
|
+
rest = $'
|
59
|
+
begin
|
60
|
+
ucode = Iconv.conv("UTF-8", @internal_encoding, char).unpack("U")[0]
|
61
|
+
char = "&##{ucode};"
|
62
|
+
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter
|
63
|
+
# xxx: shoule be configulable?
|
64
|
+
char = '?'
|
65
|
+
end
|
66
|
+
output_string char
|
67
|
+
string = rest
|
68
|
+
retry
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
ChRef = {
|
73
|
+
'&' => '&',
|
74
|
+
'<' => '<',
|
75
|
+
'>' => '>',
|
76
|
+
'"' => '"',
|
77
|
+
}
|
78
|
+
|
79
|
+
def output_dynamic_text(string)
|
80
|
+
if string.respond_to? :rcdata
|
81
|
+
output_text(string.rcdata.gsub(/[<>]/) { ChRef[$&] })
|
82
|
+
else
|
83
|
+
output_text(string.to_s.gsub(/[&<>]/) { ChRef[$&] })
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def output_dynamic_attvalue(string)
|
88
|
+
if string.respond_to? :rcdata
|
89
|
+
output_text(string.rcdata.gsub(/[<>"]/) { ChRef[$&] })
|
90
|
+
else
|
91
|
+
output_text(string.to_s.gsub(/[&<>"]/) { ChRef[$&] })
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def finish
|
96
|
+
external_str = @ic.close
|
97
|
+
@buf << external_str
|
98
|
+
@subcharset_ic.reject! {|subcharset, ic|
|
99
|
+
begin
|
100
|
+
ic.close != external_str
|
101
|
+
rescue Iconv::Failure
|
102
|
+
true
|
103
|
+
end
|
104
|
+
}
|
105
|
+
@buf
|
106
|
+
end
|
107
|
+
|
108
|
+
def finish_with_xmldecl
|
109
|
+
content = finish
|
110
|
+
xmldecl = Iconv.conv(@output_encoding, 'US-ASCII',
|
111
|
+
"<?xml version=\"1.0\" encoding=\"#{minimal_charset}\"?>")
|
112
|
+
xmldecl + content
|
113
|
+
end
|
114
|
+
|
115
|
+
def minimal_charset
|
116
|
+
@subcharset_list.each {|subcharset|
|
117
|
+
if @subcharset_ic.include? subcharset
|
118
|
+
return subcharset
|
119
|
+
end
|
120
|
+
}
|
121
|
+
@output_encoding
|
122
|
+
end
|
123
|
+
|
124
|
+
KcodeCharset = {
|
125
|
+
'EUC' => 'EUC-JP',
|
126
|
+
'SJIS' => 'Shift_JIS',
|
127
|
+
'UTF8' => 'UTF-8',
|
128
|
+
'NONE' => 'ISO-8859-1',
|
129
|
+
}
|
130
|
+
|
131
|
+
FirstCharPattern = {
|
132
|
+
'EUC-JP' => /\A(?:
|
133
|
+
[\x00-\x7f]
|
134
|
+
|[\xa1-\xfe][\xa1-\xfe]
|
135
|
+
|\x8e[\xa1-\xfe]
|
136
|
+
|\x8f[\xa1-\xfe][\xa1-\xfe])/nx,
|
137
|
+
'Shift_JIS' => /\A(?:
|
138
|
+
[\x00-\x7f]
|
139
|
+
|[\x81-\x9f][\x40-\x7e\x80-\xfc]
|
140
|
+
|[\xa1-\xdf]
|
141
|
+
|[\xe0-\xfc][\x40-\x7e\x80-\xfc])/nx,
|
142
|
+
'UTF-8' => /\A(?:
|
143
|
+
[\x00-\x7f]
|
144
|
+
|[\xc0-\xdf][\x80-\xbf]
|
145
|
+
|[\xe0-\xef][\x80-\xbf][\x80-\xbf]
|
146
|
+
|[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]
|
147
|
+
|[\xf8-\xfb][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf]
|
148
|
+
|[\xfc-\xfd][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf])/nx,
|
149
|
+
'ISO-8859-1' => /\A[\x00-\xff]/n
|
150
|
+
}
|
151
|
+
|
152
|
+
SubCharset = {
|
153
|
+
'ISO-2022-JP-2' => ['US-ASCII', 'ISO-2022-JP'],
|
154
|
+
'ISO-2022-JP-3' => ['US-ASCII', 'ISO-2022-JP'],
|
155
|
+
'UTF-16BE' => [],
|
156
|
+
'UTF-16LE' => [],
|
157
|
+
'UTF-16' => [],
|
158
|
+
}
|
159
|
+
SubCharset.default = ['US-ASCII']
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
163
|
+
# :startdoc:
|
@@ -0,0 +1,218 @@
|
|
1
|
+
# :stopdoc:
|
2
|
+
require 'htree/doc'
|
3
|
+
require 'htree/elem'
|
4
|
+
require 'htree/leaf'
|
5
|
+
require 'htree/tag'
|
6
|
+
require 'htree/raw_string'
|
7
|
+
require 'htree/context'
|
8
|
+
|
9
|
+
module HTree
|
10
|
+
# compare tree structures.
|
11
|
+
def ==(other)
|
12
|
+
check_equality(self, other, :usual_equal_object)
|
13
|
+
end
|
14
|
+
alias eql? ==
|
15
|
+
|
16
|
+
# hash value for the tree structure.
|
17
|
+
def hash
|
18
|
+
return @hash_code if defined? @hash_code
|
19
|
+
@hash_code = usual_equal_object.hash
|
20
|
+
end
|
21
|
+
|
22
|
+
def usual_equal_object
|
23
|
+
return @usual_equal_object if defined? @usual_equal_object
|
24
|
+
@usual_equal_object = make_usual_equal_object
|
25
|
+
end
|
26
|
+
|
27
|
+
def make_usual_equal_object
|
28
|
+
raise NotImplementedError
|
29
|
+
end
|
30
|
+
|
31
|
+
def exact_equal_object
|
32
|
+
return @exact_equal_object if defined? @exact_equal_object
|
33
|
+
@exact_equal_object = make_exact_equal_object
|
34
|
+
end
|
35
|
+
|
36
|
+
def make_exact_equal_object
|
37
|
+
raise NotImplementedError
|
38
|
+
end
|
39
|
+
|
40
|
+
def exact_equal?(other)
|
41
|
+
check_equality(self, other, :exact_equal_object)
|
42
|
+
end
|
43
|
+
|
44
|
+
def check_equality(obj1, obj2, equal_object_method)
|
45
|
+
return false unless obj1.class == obj2.class
|
46
|
+
if obj1.class == Array
|
47
|
+
return false unless obj1.length == obj2.length
|
48
|
+
obj1.each_with_index {|c1, i|
|
49
|
+
return false unless c1.class == obj2[i].class
|
50
|
+
}
|
51
|
+
obj1.each_with_index {|c1, i|
|
52
|
+
return false unless check_equality(c1, obj2[i], equal_object_method)
|
53
|
+
}
|
54
|
+
true
|
55
|
+
elsif obj1.respond_to? equal_object_method
|
56
|
+
o1 = obj1.send(equal_object_method)
|
57
|
+
o2 = obj2.send(equal_object_method)
|
58
|
+
check_equality(o1, o2, equal_object_method)
|
59
|
+
else
|
60
|
+
obj1 == obj2
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class Doc
|
65
|
+
alias exact_equal_object children
|
66
|
+
alias usual_equal_object children
|
67
|
+
end
|
68
|
+
|
69
|
+
class Elem
|
70
|
+
def make_exact_equal_object
|
71
|
+
[@stag, @children, @empty, @etag]
|
72
|
+
end
|
73
|
+
|
74
|
+
def make_usual_equal_object
|
75
|
+
[@stag, @children]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class Name
|
80
|
+
def make_exact_equal_object
|
81
|
+
[@namespace_prefix, @namespace_uri, @local_name]
|
82
|
+
end
|
83
|
+
|
84
|
+
def make_usual_equal_object
|
85
|
+
xmlns? ? @local_name : [@namespace_uri, @local_name]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
module Util
|
90
|
+
module_function
|
91
|
+
def cmp_with_nil(a, b)
|
92
|
+
if a == nil
|
93
|
+
if b == nil
|
94
|
+
0
|
95
|
+
else
|
96
|
+
-1
|
97
|
+
end
|
98
|
+
else
|
99
|
+
if b == nil
|
100
|
+
1
|
101
|
+
else
|
102
|
+
a <=> b
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
class Context
|
109
|
+
def make_exact_equal_object
|
110
|
+
@namespaces.keys.sort {|prefix1, prefix2|
|
111
|
+
Util.cmp_with_nil(prefix1, prefix2)
|
112
|
+
}.map {|prefix| [prefix, @namespaces[prefix]] }
|
113
|
+
end
|
114
|
+
|
115
|
+
# make_usual_equal_object is not used through STag#make_usual_equal_object
|
116
|
+
# NotImplementedError is suitable?
|
117
|
+
alias make_usual_equal_object make_exact_equal_object
|
118
|
+
end
|
119
|
+
|
120
|
+
class STag
|
121
|
+
def make_exact_equal_object
|
122
|
+
[@raw_string,
|
123
|
+
@name,
|
124
|
+
@attributes.sort {|(n1,t1), (n2, t2)|
|
125
|
+
Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
|
126
|
+
Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
|
127
|
+
Util.cmp_with_nil(n1.local_name, n2.local_name)
|
128
|
+
},
|
129
|
+
@inherited_context
|
130
|
+
]
|
131
|
+
end
|
132
|
+
|
133
|
+
def make_usual_equal_object
|
134
|
+
[@name,
|
135
|
+
@attributes.find_all {|n,t| !n.xmlns? }.sort {|(n1,t1), (n2, t2)|
|
136
|
+
Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
|
137
|
+
Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
|
138
|
+
Util.cmp_with_nil(n1.local_name, n2.local_name)
|
139
|
+
}
|
140
|
+
]
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
class ETag
|
146
|
+
def make_exact_equal_object
|
147
|
+
[@raw_string, @qualified_name]
|
148
|
+
end
|
149
|
+
|
150
|
+
alias usual_equal_object qualified_name
|
151
|
+
end
|
152
|
+
|
153
|
+
class Text
|
154
|
+
def make_exact_equal_object
|
155
|
+
[@raw_string, @rcdata]
|
156
|
+
end
|
157
|
+
|
158
|
+
def make_usual_equal_object
|
159
|
+
@normalized_rcdata
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
class XMLDecl
|
164
|
+
def make_exact_equal_object
|
165
|
+
[@raw_string, @version, @encoding, @standalone]
|
166
|
+
end
|
167
|
+
|
168
|
+
def make_usual_equal_object
|
169
|
+
[@version, @encoding, @standalone]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
class DocType
|
174
|
+
def make_exact_equal_object
|
175
|
+
[@raw_string, @root_element_name, @system_identifier, @public_identifier]
|
176
|
+
end
|
177
|
+
|
178
|
+
def make_usual_equal_object
|
179
|
+
[@root_element_name, @system_identifier, @public_identifier]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
class ProcIns
|
184
|
+
def make_exact_equal_object
|
185
|
+
[@raw_string, @target, @content]
|
186
|
+
end
|
187
|
+
|
188
|
+
def make_usual_equal_object
|
189
|
+
[@target, @content]
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
class Comment
|
194
|
+
def make_exact_equal_object
|
195
|
+
[@raw_string, @content]
|
196
|
+
end
|
197
|
+
|
198
|
+
alias usual_equal_object content
|
199
|
+
end
|
200
|
+
|
201
|
+
class BogusETag
|
202
|
+
def make_exact_equal_object
|
203
|
+
[@etag]
|
204
|
+
end
|
205
|
+
|
206
|
+
alias usual_equal_object make_exact_equal_object
|
207
|
+
end
|
208
|
+
|
209
|
+
class Location
|
210
|
+
def make_exact_equal_object
|
211
|
+
[@parent, @index, @node]
|
212
|
+
end
|
213
|
+
|
214
|
+
alias usual_equal_object make_exact_equal_object
|
215
|
+
end
|
216
|
+
|
217
|
+
end
|
218
|
+
# :startdoc:
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# :stopdoc:
|
2
|
+
require 'htree/text'
|
3
|
+
require 'htree/doc'
|
4
|
+
require 'htree/elem'
|
5
|
+
|
6
|
+
module HTree
|
7
|
+
module Node
|
8
|
+
def extract_text
|
9
|
+
raise NotImplementedError
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Location
|
14
|
+
def extract_text
|
15
|
+
to_node.extract_text
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
module Container
|
20
|
+
def extract_text
|
21
|
+
Text.concat(*@children.map {|n| n.extract_text })
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
module Leaf
|
26
|
+
def extract_text
|
27
|
+
Text.new('')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class Text
|
32
|
+
def extract_text
|
33
|
+
self
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
# :startdoc:
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# :stopdoc:
|
2
|
+
require 'htree/modules'
|
3
|
+
|
4
|
+
module HTree
|
5
|
+
def HTree.with_frozen_string_hash
|
6
|
+
if Thread.current[:htree_frozen_string_hash]
|
7
|
+
yield
|
8
|
+
else
|
9
|
+
begin
|
10
|
+
Thread.current[:htree_frozen_string_hash] = {}
|
11
|
+
yield
|
12
|
+
ensure
|
13
|
+
Thread.current[:htree_frozen_string_hash] = nil
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def HTree.frozen_string(str)
|
19
|
+
if h = Thread.current[:htree_frozen_string_hash]
|
20
|
+
if s = h[str]
|
21
|
+
s
|
22
|
+
else
|
23
|
+
h[str] = str unless str.frozen?
|
24
|
+
str = str.dup.freeze
|
25
|
+
h[str] = str
|
26
|
+
end
|
27
|
+
else
|
28
|
+
str = str.dup.freeze unless str.frozen?
|
29
|
+
str
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
# :startdoc:
|