ruby-feedparser 0.7 → 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +7 -7
- data/lib/feedparser/feedparser.rb +77 -8
- data/lib/feedparser/html-output.rb +4 -4
- data/lib/feedparser/html2text-parser.rb +24 -10
- data/lib/feedparser/sgml-parser.rb +3 -4
- data/lib/feedparser/text-output.rb +3 -4
- data/test/tc_feed_parse.rb +48 -1
- data/test/tc_html2text_parser.rb +43 -0
- data/test/tc_htmloutput.rb +13 -13
- data/test/tc_parser.rb +13 -13
- data/test/tc_sgml_parser.rb +22 -0
- data/test/tc_textoutput.rb +13 -13
- data/test/tc_textwrappedoutput.rb +13 -13
- metadata +48 -39
data/Rakefile
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
require 'rake/testtask'
|
2
|
-
require '
|
3
|
-
require '
|
2
|
+
require 'rdoc/task'
|
3
|
+
require 'rubygems/package_task'
|
4
4
|
require 'rake'
|
5
5
|
require 'find'
|
6
6
|
|
7
7
|
# Globals
|
8
8
|
PKG_NAME = 'ruby-feedparser'
|
9
|
-
PKG_VERSION = '
|
9
|
+
PKG_VERSION = `ruby -Ilib -rfeedparser/feedparser -e 'puts FeedParser::VERSION'`.strip
|
10
10
|
|
11
11
|
PKG_FILES = [ 'ChangeLog', 'README', 'COPYING', 'LICENSE', 'setup.rb', 'Rakefile']
|
12
12
|
Find.find('lib/', 'test/', 'tools/') do |f|
|
@@ -19,7 +19,7 @@ end
|
|
19
19
|
|
20
20
|
PKG_FILES.reject! { |f| f =~ /^test\/(source|.*_output)\// }
|
21
21
|
|
22
|
-
task :default => [:
|
22
|
+
task :default => [:test]
|
23
23
|
|
24
24
|
Rake::TestTask.new do |t|
|
25
25
|
t.libs << "test"
|
@@ -61,8 +61,6 @@ end
|
|
61
61
|
|
62
62
|
# "Gem" part of the Rakefile
|
63
63
|
begin
|
64
|
-
require 'rake/gempackagetask'
|
65
|
-
|
66
64
|
spec = Gem::Specification.new do |s|
|
67
65
|
s.platform = Gem::Platform::RUBY
|
68
66
|
s.summary = "Ruby library to parse ATOM and RSS feeds"
|
@@ -73,9 +71,11 @@ begin
|
|
73
71
|
s.autorequire = 'feedparser'
|
74
72
|
s.files = PKG_FILES
|
75
73
|
s.description = "Ruby library to parse ATOM and RSS feeds"
|
74
|
+
s.authors = ['Lucas Nussbaum']
|
75
|
+
s.add_runtime_dependency 'magic'
|
76
76
|
end
|
77
77
|
|
78
|
-
|
78
|
+
Gem::PackageTask.new(spec) do |pkg|
|
79
79
|
pkg.need_zip = true
|
80
80
|
pkg.need_tar = true
|
81
81
|
end
|
@@ -4,14 +4,43 @@ require 'feedparser/textconverters'
|
|
4
4
|
require 'feedparser/rexml_patch'
|
5
5
|
require 'feedparser/text-output'
|
6
6
|
require 'base64'
|
7
|
+
require 'magic'
|
8
|
+
require 'uri'
|
7
9
|
|
8
10
|
module FeedParser
|
9
11
|
|
10
|
-
VERSION = "0.
|
12
|
+
VERSION = "0.9.3"
|
11
13
|
|
12
14
|
class UnknownFeedTypeException < RuntimeError
|
13
15
|
end
|
14
16
|
|
17
|
+
def self.recode(str)
|
18
|
+
encoding = nil
|
19
|
+
begin
|
20
|
+
encoding = Magic.guess_string_mime_encoding(str)
|
21
|
+
rescue Magic::Exception
|
22
|
+
# this happens when magic does not find any content at all, e.g. with
|
23
|
+
# strings that contain only whitespace. In these case it *should* be safe
|
24
|
+
# to assume UTF-8
|
25
|
+
encoding = Encoding::UTF_8
|
26
|
+
end
|
27
|
+
if encoding == 'unknown-8bit'
|
28
|
+
# find first substring with a valid encoding that is not us-ascii
|
29
|
+
length = 1 # has to start at 1, magic requires at least 2 bytes
|
30
|
+
while length < str.length && ['us-ascii', 'unknown-8bit'].include?(encoding)
|
31
|
+
encoding = Magic.guess_string_mime_encoding(str[0..length])
|
32
|
+
length = length + 1
|
33
|
+
end
|
34
|
+
# need to remove iso-8859-1 control characters
|
35
|
+
if encoding == 'iso-8859-1'
|
36
|
+
str = str.bytes.select { |c| c < 128 || c > 159 }.map(&:chr).join
|
37
|
+
end
|
38
|
+
end
|
39
|
+
str.force_encoding(encoding)
|
40
|
+
str = str.chars.select { |c| c.valid_encoding? }.join
|
41
|
+
str.encode('UTF-8')
|
42
|
+
end
|
43
|
+
|
15
44
|
# an RSS/Atom feed
|
16
45
|
class Feed
|
17
46
|
attr_reader :type, :title, :link, :description, :creator, :encoding, :items
|
@@ -20,13 +49,16 @@ module FeedParser
|
|
20
49
|
attr_reader :xml
|
21
50
|
|
22
51
|
# parse str to build a Feed
|
23
|
-
def initialize(str = nil)
|
52
|
+
def initialize(str = nil, uri = nil)
|
24
53
|
parse(str) if str
|
54
|
+
parse_origin(uri) if uri
|
25
55
|
end
|
26
56
|
|
27
57
|
# Determines all the fields using a string containing an
|
28
58
|
# XML document
|
29
59
|
def parse(str)
|
60
|
+
str = FeedParser.recode(str)
|
61
|
+
|
30
62
|
# Dirty hack: some feeds contain the & char. It must be changed to &
|
31
63
|
str.gsub!(/&(\s+)/, '&\1')
|
32
64
|
doc = REXML::Document.new(str)
|
@@ -34,6 +66,7 @@ module FeedParser
|
|
34
66
|
# get feed info
|
35
67
|
@encoding = doc.encoding
|
36
68
|
@title,@link,@description,@creator = nil
|
69
|
+
@title = ""
|
37
70
|
@items = []
|
38
71
|
if doc.root.elements['channel'] || doc.root.elements['rss:channel']
|
39
72
|
@type = "rss"
|
@@ -108,19 +141,28 @@ module FeedParser
|
|
108
141
|
s += "Type: #{@type}\n"
|
109
142
|
s += "Encoding: #{@encoding}\n"
|
110
143
|
s += "Title: #{@title}\n"
|
111
|
-
s += "Link: #{
|
144
|
+
s += "Link: #{link}\n"
|
112
145
|
s += "Description: #{@description}\n"
|
113
146
|
s += "Creator: #{@creator}\n"
|
114
147
|
s += "\n"
|
115
148
|
@items.each { |i| s += i.to_s(localtime) }
|
116
149
|
s
|
117
150
|
end
|
151
|
+
|
152
|
+
def parse_origin(uri)
|
153
|
+
uri = URI.parse(uri)
|
154
|
+
if uri.hostname && uri.scheme
|
155
|
+
@origin = "#{uri.scheme}://#{uri.hostname}"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
attr_reader :origin
|
118
160
|
end
|
119
161
|
|
120
162
|
# an Item from a feed
|
121
163
|
class FeedItem
|
122
|
-
attr_accessor :title, :
|
123
|
-
:cacheditem
|
164
|
+
attr_accessor :title, :content, :date, :creators, :subject,
|
165
|
+
:cacheditem, :links
|
124
166
|
|
125
167
|
# The item's categories/tags. An array of strings.
|
126
168
|
attr_accessor :categories
|
@@ -137,9 +179,12 @@ module FeedParser
|
|
137
179
|
@xml = item
|
138
180
|
@feed = feed
|
139
181
|
@title, @link, @content, @date, @subject = nil
|
182
|
+
@links = []
|
140
183
|
@creators = []
|
141
184
|
@categories = []
|
142
185
|
@enclosures = []
|
186
|
+
|
187
|
+
@title = ""
|
143
188
|
parse(item) if item
|
144
189
|
end
|
145
190
|
|
@@ -160,7 +205,7 @@ module FeedParser
|
|
160
205
|
|
161
206
|
def to_s(localtime = true)
|
162
207
|
s = "--------------------------------\n" +
|
163
|
-
"Title: #{@title}\nLink: #{
|
208
|
+
"Title: #{@title}\nLink: #{link}\n"
|
164
209
|
if localtime or @date.nil?
|
165
210
|
s += "Date: #{@date.to_s}\n"
|
166
211
|
else
|
@@ -181,6 +226,22 @@ module FeedParser
|
|
181
226
|
end
|
182
227
|
return s
|
183
228
|
end
|
229
|
+
|
230
|
+
attr_writer :link
|
231
|
+
|
232
|
+
def link
|
233
|
+
if @link
|
234
|
+
uri = URI.parse(URI.escape(@link))
|
235
|
+
if uri.hostname && uri.scheme
|
236
|
+
@link
|
237
|
+
elsif feed && feed.origin
|
238
|
+
[feed.origin, @link].compact.join
|
239
|
+
else
|
240
|
+
@link
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
184
245
|
end
|
185
246
|
|
186
247
|
class RSSItem < FeedItem
|
@@ -199,7 +260,7 @@ module FeedParser
|
|
199
260
|
(e = item.elements['guid'] || item.elements['rss:guid'] and
|
200
261
|
not (e.attribute('isPermaLink') and
|
201
262
|
e.attribute('isPermaLink').value == 'false'))
|
202
|
-
|
263
|
+
self.link = e.text.rmWhiteSpace!
|
203
264
|
end
|
204
265
|
# Content
|
205
266
|
if (e = item.elements['content:encoded']) ||
|
@@ -261,8 +322,16 @@ module FeedParser
|
|
261
322
|
end
|
262
323
|
# Link
|
263
324
|
item.each_element('link') do |e|
|
325
|
+
|
264
326
|
if (h = e.attribute('href')) && h.value
|
265
|
-
|
327
|
+
self.link = h.value
|
328
|
+
|
329
|
+
if e.attribute('type')
|
330
|
+
@links << {:href => h.value, :type => e.attribute('type').value}
|
331
|
+
else
|
332
|
+
@links << {:href => h.value, :type => ''}
|
333
|
+
end
|
334
|
+
|
266
335
|
end
|
267
336
|
end
|
268
337
|
# Content
|
@@ -80,13 +80,13 @@ module FeedParser
|
|
80
80
|
s += (headline % ["Feed:", r])
|
81
81
|
|
82
82
|
r = ""
|
83
|
-
r += "<a href=\"#{
|
83
|
+
r += "<a href=\"#{link}\">" if link
|
84
84
|
if @title
|
85
85
|
r += "<b>#{@title.escape_html}</b>\n"
|
86
|
-
elsif
|
87
|
-
r += "<b>#{
|
86
|
+
elsif link
|
87
|
+
r += "<b>#{link.escape_html}</b>\n"
|
88
88
|
end
|
89
|
-
r += "</a>\n" if
|
89
|
+
r += "</a>\n" if link
|
90
90
|
s += (headline % ["Item:", r])
|
91
91
|
s += "</table></td></tr></table>\n"
|
92
92
|
s += "\n"
|
@@ -11,16 +11,16 @@ module FeedParser
|
|
11
11
|
@pre = false
|
12
12
|
@href = nil
|
13
13
|
@links = []
|
14
|
+
@curlink = []
|
14
15
|
@imgs = []
|
15
|
-
@img_index = '
|
16
|
+
@img_index = 'A'
|
16
17
|
super(verbose)
|
17
18
|
end
|
18
19
|
|
19
20
|
def next_img_index
|
20
|
-
|
21
|
-
@img_index =
|
22
|
-
|
23
|
-
return @img_index
|
21
|
+
idx = @img_index
|
22
|
+
@img_index = @img_index.next
|
23
|
+
idx
|
24
24
|
end
|
25
25
|
|
26
26
|
def handle_data(data)
|
@@ -29,7 +29,8 @@ module FeedParser
|
|
29
29
|
data.gsub!(/\n/, ' ')
|
30
30
|
data.gsub!(/( )+/, ' ')
|
31
31
|
end
|
32
|
-
|
32
|
+
data = FeedParser.recode(data)
|
33
|
+
@savedata << data.encode(Encoding::UTF_8)
|
33
34
|
end
|
34
35
|
|
35
36
|
def unknown_starttag(tag, attrs)
|
@@ -70,7 +71,14 @@ module FeedParser
|
|
70
71
|
end
|
71
72
|
end
|
72
73
|
if @href
|
73
|
-
@
|
74
|
+
@href.gsub!(/^("|'|)(.*)("|')$/,'\2')
|
75
|
+
@curlink = @links.find_index(@href)
|
76
|
+
if @curlink.nil?
|
77
|
+
@links << @href
|
78
|
+
@curlink = @links.length
|
79
|
+
else
|
80
|
+
@curlink += 1
|
81
|
+
end
|
74
82
|
end
|
75
83
|
when 'img'
|
76
84
|
# find src in args
|
@@ -81,8 +89,14 @@ module FeedParser
|
|
81
89
|
end
|
82
90
|
end
|
83
91
|
if src
|
84
|
-
|
85
|
-
@imgs
|
92
|
+
src.gsub!(/^("|'|)(.*)("|')$/,'\2')
|
93
|
+
i = @imgs.index { |e| e[1] == src }
|
94
|
+
if i.nil?
|
95
|
+
idx = next_img_index
|
96
|
+
@imgs << [ idx, src ]
|
97
|
+
else
|
98
|
+
idx = @imgs[i][0]
|
99
|
+
end
|
86
100
|
@savedata << "[#{idx}]"
|
87
101
|
end
|
88
102
|
else
|
@@ -125,7 +139,7 @@ module FeedParser
|
|
125
139
|
@pre = false
|
126
140
|
when 'a'
|
127
141
|
if @href
|
128
|
-
@savedata << "[#{@
|
142
|
+
@savedata << "[#{@curlink}]"
|
129
143
|
@href = nil
|
130
144
|
end
|
131
145
|
end
|
@@ -293,12 +293,11 @@ module FeedParser
|
|
293
293
|
end
|
294
294
|
|
295
295
|
def handle_charref(name)
|
296
|
-
|
297
|
-
if !(0 <= n && n <= 255)
|
296
|
+
if name =~ /[0-9]+/
|
298
297
|
unknown_charref(name)
|
299
|
-
|
298
|
+
else
|
299
|
+
handle_data(name)
|
300
300
|
end
|
301
|
-
handle_data(n.chr)
|
302
301
|
end
|
303
302
|
|
304
303
|
def handle_entityref(name)
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'feedparser'
|
2
1
|
require 'feedparser/html2text-parser'
|
3
2
|
require 'feedparser/filesizes'
|
4
3
|
|
@@ -61,7 +60,7 @@ module FeedParser
|
|
61
60
|
if header
|
62
61
|
s += "Item: "
|
63
62
|
s += @title if @title
|
64
|
-
s += "\n<#{
|
63
|
+
s += "\n<#{link}>" if link
|
65
64
|
if @date
|
66
65
|
if localtime
|
67
66
|
s += "\nDate: #{@date.to_s}"
|
@@ -71,7 +70,7 @@ module FeedParser
|
|
71
70
|
end
|
72
71
|
s += "\n"
|
73
72
|
else
|
74
|
-
s += "<#{
|
73
|
+
s += "<#{link}>\n\n" if link
|
75
74
|
end
|
76
75
|
s += "#{@content.html2text(wrapto).chomp}\n" if @content
|
77
76
|
if @enclosures and @enclosures.length > 0
|
@@ -89,7 +88,7 @@ module FeedParser
|
|
89
88
|
if not header
|
90
89
|
s += "\nItem: "
|
91
90
|
s += @title if @title
|
92
|
-
s += "\n<#{
|
91
|
+
s += "\n<#{link}>" if link
|
93
92
|
if @date
|
94
93
|
if localtime
|
95
94
|
s += "\nDate: #{@date.to_s}"
|
data/test/tc_feed_parse.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# encoding: UTF-8
|
2
2
|
|
3
3
|
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
4
|
|
@@ -114,4 +114,51 @@ class FeedParserTest < Test::Unit::TestCase
|
|
114
114
|
# the third one should be removed because an enclosure should have an url, or it's useless.
|
115
115
|
assert_equal([["url1", "1", "type1"], ["url2", nil, "type2"], ["url1", "1", nil]], ch.items[0].enclosures)
|
116
116
|
end
|
117
|
+
|
118
|
+
def test_recode_utf8
|
119
|
+
assert_equal 'UTF-8', FeedParser.recode("áéíóú").encoding.name
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_recode_iso88519
|
123
|
+
assert_equal 'UTF-8', FeedParser.recode("áéíóú".encode('iso-8859-1')).encoding.name
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_recode_utf8_mixed_with_ASCIIBIT
|
127
|
+
recoded = FeedParser.recode("áé\x8Díóú")
|
128
|
+
assert_equal'UTF-8', recoded.encoding.name
|
129
|
+
assert_equal 'áéíóú', recoded
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_recode_unicode_char
|
133
|
+
assert_equal "1280×1024", FeedParser.recode("1280×1024")
|
134
|
+
end
|
135
|
+
|
136
|
+
def test_almost_valid_iso88591
|
137
|
+
input = "Codifica\xE7\xE3o \x96 quase v\xE1lida"
|
138
|
+
assert_equal "Codificação quase válida", FeedParser.recode(input)
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_feed_origin
|
142
|
+
feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
|
143
|
+
assert_equal "http://foo.com", feed.origin
|
144
|
+
end
|
145
|
+
|
146
|
+
def test_item_origin
|
147
|
+
feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
|
148
|
+
item = FeedParser::FeedItem.new(nil, feed)
|
149
|
+
item.link = '/foo/bar'
|
150
|
+
assert_equal 'http://foo.com/foo/bar', item.link
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_item_origin_no_link
|
154
|
+
item = FeedParser::FeedItem.new(nil, nil)
|
155
|
+
assert_nil item.link
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_item_no_feed
|
159
|
+
item = FeedParser::FeedItem.new(nil, nil)
|
160
|
+
item.link = '/foo/bar'
|
161
|
+
assert_equal '/foo/bar', item.link
|
162
|
+
end
|
163
|
+
|
117
164
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
require 'feedparser/feedparser'
|
6
|
+
|
7
|
+
class Html2TextParserTest < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def test_next_img_index
|
10
|
+
parser = FeedParser::HTML2TextParser.new
|
11
|
+
assert_equal 'A', parser.next_img_index
|
12
|
+
assert_equal 'B', parser.next_img_index
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_numerical_entity
|
16
|
+
parser = FeedParser::HTML2TextParser.new
|
17
|
+
parser.feed('1280×1024')
|
18
|
+
parser.close
|
19
|
+
assert_equal "1280×1024", parser.savedata
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_numerical_entity_large_known
|
23
|
+
parser = FeedParser::HTML2TextParser.new
|
24
|
+
parser.feed('→')
|
25
|
+
parser.close
|
26
|
+
assert_equal "→", parser.savedata
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_numerical_entity_large
|
30
|
+
parser = FeedParser::HTML2TextParser.new
|
31
|
+
parser.feed('✐')
|
32
|
+
parser.close
|
33
|
+
assert_equal "✐", parser.savedata
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_non_numerical_entity
|
37
|
+
parser = FeedParser::HTML2TextParser.new
|
38
|
+
parser.feed('HTML&CO')
|
39
|
+
parser.close
|
40
|
+
assert_equal "HTML&CO", parser.savedata
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
data/test/tc_htmloutput.rb
CHANGED
@@ -19,12 +19,10 @@ class HTMLOutputTest < Test::Unit::TestCase
|
|
19
19
|
else
|
20
20
|
raise 'source directory not found.'
|
21
21
|
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
27
|
-
puts "Checking #{f}"
|
22
|
+
Dir.foreach(SRCDIR) do |f|
|
23
|
+
next if f !~ /.xml$/
|
24
|
+
testname = 'test_' + File.basename(f).gsub(/\W/, '_')
|
25
|
+
define_method(testname) do
|
28
26
|
str = File::read(SRCDIR + '/' + f)
|
29
27
|
chan = FeedParser::Feed::new(str)
|
30
28
|
chanstr = chan.to_html(false)
|
@@ -34,19 +32,21 @@ class HTMLOutputTest < Test::Unit::TestCase
|
|
34
32
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
35
33
|
fd.print(chanstr)
|
36
34
|
end
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
35
|
+
assert(
|
36
|
+
false,
|
37
|
+
[
|
38
|
+
"Test failed for #{f}.",
|
39
|
+
" Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
|
40
|
+
" Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
|
41
|
+
].join("\n")
|
42
|
+
)
|
41
43
|
end
|
42
44
|
else
|
43
|
-
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
44
45
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
45
46
|
f.print(chanstr)
|
46
47
|
end
|
47
|
-
|
48
|
+
assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
|
48
49
|
end
|
49
50
|
end
|
50
|
-
assert(allok)
|
51
51
|
end
|
52
52
|
end
|
data/test/tc_parser.rb
CHANGED
@@ -15,12 +15,10 @@ class ParserTest < Test::Unit::TestCase
|
|
15
15
|
else
|
16
16
|
raise 'source directory not found.'
|
17
17
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
-
puts "Checking #{f}"
|
18
|
+
Dir.foreach(SRCDIR) do |f|
|
19
|
+
next if f !~ /.xml$/
|
20
|
+
testname = 'test_' + File.basename(f).gsub(/\W/, '_')
|
21
|
+
define_method(testname) do
|
24
22
|
str = File::read(SRCDIR + '/' + f)
|
25
23
|
chan = FeedParser::Feed::new(str)
|
26
24
|
chanstr = chan.to_s(false)
|
@@ -30,19 +28,21 @@ class ParserTest < Test::Unit::TestCase
|
|
30
28
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
29
|
fd.print(chanstr)
|
32
30
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
assert(
|
32
|
+
false,
|
33
|
+
[
|
34
|
+
"Test failed for #{f}.",
|
35
|
+
" Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
|
36
|
+
" Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
|
37
|
+
].join("\n")
|
38
|
+
)
|
37
39
|
end
|
38
40
|
else
|
39
|
-
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
41
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
42
|
f.print(chanstr)
|
42
43
|
end
|
43
|
-
|
44
|
+
assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
|
44
45
|
end
|
45
46
|
end
|
46
|
-
assert(allok)
|
47
47
|
end
|
48
48
|
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'mocha/setup'
|
5
|
+
|
6
|
+
require 'feedparser/sgml-parser'
|
7
|
+
|
8
|
+
class SGMLParserTest < Test::Unit::TestCase
|
9
|
+
|
10
|
+
def test_numerical_charref
|
11
|
+
parser = FeedParser::SGMLParser.new
|
12
|
+
parser.expects(:unknown_charref).with('215')
|
13
|
+
parser.handle_charref('215')
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_non_numerical_charref
|
17
|
+
parser = FeedParser::SGMLParser.new
|
18
|
+
parser.expects(:handle_data).with('amp')
|
19
|
+
parser.handle_charref('amp')
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
data/test/tc_textoutput.rb
CHANGED
@@ -15,12 +15,10 @@ class TextOutputTest < Test::Unit::TestCase
|
|
15
15
|
else
|
16
16
|
raise 'source directory not found.'
|
17
17
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
-
puts "Checking #{f}"
|
18
|
+
Dir.foreach(SRCDIR) do |f|
|
19
|
+
next if f !~ /.xml$/
|
20
|
+
testname = 'test_' + File.basename(f).gsub(/\W/, '_')
|
21
|
+
define_method(testname) do
|
24
22
|
str = File::read(SRCDIR + '/' + f)
|
25
23
|
chan = FeedParser::Feed::new(str)
|
26
24
|
chanstr = chan.to_text(false) # localtime set to false
|
@@ -30,19 +28,21 @@ class TextOutputTest < Test::Unit::TestCase
|
|
30
28
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
29
|
fd.print(chanstr)
|
32
30
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
assert(
|
32
|
+
false,
|
33
|
+
[
|
34
|
+
"Test failed for #{f}.",
|
35
|
+
" Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
|
36
|
+
" Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
|
37
|
+
].join("\n")
|
38
|
+
)
|
37
39
|
end
|
38
40
|
else
|
39
|
-
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
41
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
42
|
f.print(chanstr)
|
42
43
|
end
|
43
|
-
|
44
|
+
assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
|
44
45
|
end
|
45
46
|
end
|
46
|
-
assert(allok)
|
47
47
|
end
|
48
48
|
end
|
@@ -15,12 +15,10 @@ class TextWrappedOutputTest < Test::Unit::TestCase
|
|
15
15
|
else
|
16
16
|
raise 'source directory not found.'
|
17
17
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
-
puts "Checking #{f}"
|
18
|
+
Dir.foreach(SRCDIR) do |f|
|
19
|
+
next if f !~ /.xml$/
|
20
|
+
testname = 'test_' + File.basename(f).gsub(/\W/, '_')
|
21
|
+
define_method(testname) do
|
24
22
|
str = File::read(SRCDIR + '/' + f)
|
25
23
|
chan = FeedParser::Feed::new(str)
|
26
24
|
chanstr = chan.to_text(false, 72) # localtime set to false
|
@@ -30,19 +28,21 @@ class TextWrappedOutputTest < Test::Unit::TestCase
|
|
30
28
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
29
|
fd.print(chanstr)
|
32
30
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
assert(
|
32
|
+
false,
|
33
|
+
[
|
34
|
+
"Test failed for #{f}.",
|
35
|
+
" Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
|
36
|
+
" Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
|
37
|
+
].join("\n")
|
38
|
+
)
|
37
39
|
end
|
38
40
|
else
|
39
|
-
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
41
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
42
|
f.print(chanstr)
|
42
43
|
end
|
43
|
-
|
44
|
+
assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
|
44
45
|
end
|
45
46
|
end
|
46
|
-
assert(allok)
|
47
47
|
end
|
48
48
|
end
|
metadata
CHANGED
@@ -1,76 +1,85 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-feedparser
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.3
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
|
-
authors:
|
7
|
-
|
7
|
+
authors:
|
8
|
+
- Lucas Nussbaum
|
8
9
|
autorequire: feedparser
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
date: 2014-02-05 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: magic
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
16
30
|
description: Ruby library to parse ATOM and RSS feeds
|
17
31
|
email:
|
18
32
|
executables: []
|
19
|
-
|
20
33
|
extensions: []
|
21
|
-
|
22
34
|
extra_rdoc_files: []
|
23
|
-
|
24
|
-
files:
|
35
|
+
files:
|
25
36
|
- ChangeLog
|
26
37
|
- README
|
27
38
|
- COPYING
|
28
39
|
- LICENSE
|
29
40
|
- setup.rb
|
30
41
|
- Rakefile
|
31
|
-
- lib/feedparser/
|
42
|
+
- lib/feedparser/feedparser.rb
|
32
43
|
- lib/feedparser/filesizes.rb
|
33
44
|
- lib/feedparser/html-output.rb
|
34
|
-
- lib/feedparser/rexml_patch.rb
|
35
45
|
- lib/feedparser/html2text-parser.rb
|
36
|
-
- lib/feedparser/
|
37
|
-
- lib/feedparser/feedparser.rb
|
46
|
+
- lib/feedparser/rexml_patch.rb
|
38
47
|
- lib/feedparser/sgml-parser.rb
|
48
|
+
- lib/feedparser/text-output.rb
|
49
|
+
- lib/feedparser/textconverters.rb
|
39
50
|
- lib/feedparser.rb
|
40
51
|
- test/tc_feed_parse.rb
|
41
|
-
- test/
|
52
|
+
- test/tc_html2text_parser.rb
|
42
53
|
- test/tc_htmloutput.rb
|
54
|
+
- test/tc_parser.rb
|
55
|
+
- test/tc_sgml_parser.rb
|
56
|
+
- test/tc_textoutput.rb
|
43
57
|
- test/tc_textwrappedoutput.rb
|
44
58
|
- test/ts_feedparser.rb
|
45
|
-
- test/tc_parser.rb
|
46
59
|
- tools/doctoweb.bash
|
47
|
-
has_rdoc: true
|
48
60
|
homepage:
|
49
61
|
licenses: []
|
50
|
-
|
51
62
|
post_install_message:
|
52
63
|
rdoc_options: []
|
53
|
-
|
54
|
-
require_paths:
|
64
|
+
require_paths:
|
55
65
|
- lib
|
56
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
requirements:
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ! '>='
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements:
|
69
79
|
- none
|
70
80
|
rubyforge_project:
|
71
|
-
rubygems_version: 1.
|
81
|
+
rubygems_version: 1.8.23
|
72
82
|
signing_key:
|
73
83
|
specification_version: 3
|
74
84
|
summary: Ruby library to parse ATOM and RSS feeds
|
75
85
|
test_files: []
|
76
|
-
|