ruby-feedparser 0.7 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +7 -7
- data/lib/feedparser/feedparser.rb +77 -8
- data/lib/feedparser/html-output.rb +4 -4
- data/lib/feedparser/html2text-parser.rb +24 -10
- data/lib/feedparser/sgml-parser.rb +3 -4
- data/lib/feedparser/text-output.rb +3 -4
- data/test/tc_feed_parse.rb +48 -1
- data/test/tc_html2text_parser.rb +43 -0
- data/test/tc_htmloutput.rb +13 -13
- data/test/tc_parser.rb +13 -13
- data/test/tc_sgml_parser.rb +22 -0
- data/test/tc_textoutput.rb +13 -13
- data/test/tc_textwrappedoutput.rb +13 -13
- metadata +48 -39
data/Rakefile
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
require 'rake/testtask'
|
2
|
-
require '
|
3
|
-
require '
|
2
|
+
require 'rdoc/task'
|
3
|
+
require 'rubygems/package_task'
|
4
4
|
require 'rake'
|
5
5
|
require 'find'
|
6
6
|
|
7
7
|
# Globals
|
8
8
|
PKG_NAME = 'ruby-feedparser'
|
9
|
-
PKG_VERSION = '
|
9
|
+
PKG_VERSION = `ruby -Ilib -rfeedparser/feedparser -e 'puts FeedParser::VERSION'`.strip
|
10
10
|
|
11
11
|
PKG_FILES = [ 'ChangeLog', 'README', 'COPYING', 'LICENSE', 'setup.rb', 'Rakefile']
|
12
12
|
Find.find('lib/', 'test/', 'tools/') do |f|
|
@@ -19,7 +19,7 @@ end
|
|
19
19
|
|
20
20
|
PKG_FILES.reject! { |f| f =~ /^test\/(source|.*_output)\// }
|
21
21
|
|
22
|
-
task :default => [:
|
22
|
+
task :default => [:test]
|
23
23
|
|
24
24
|
Rake::TestTask.new do |t|
|
25
25
|
t.libs << "test"
|
@@ -61,8 +61,6 @@ end
|
|
61
61
|
|
62
62
|
# "Gem" part of the Rakefile
|
63
63
|
begin
|
64
|
-
require 'rake/gempackagetask'
|
65
|
-
|
66
64
|
spec = Gem::Specification.new do |s|
|
67
65
|
s.platform = Gem::Platform::RUBY
|
68
66
|
s.summary = "Ruby library to parse ATOM and RSS feeds"
|
@@ -73,9 +71,11 @@ begin
|
|
73
71
|
s.autorequire = 'feedparser'
|
74
72
|
s.files = PKG_FILES
|
75
73
|
s.description = "Ruby library to parse ATOM and RSS feeds"
|
74
|
+
s.authors = ['Lucas Nussbaum']
|
75
|
+
s.add_runtime_dependency 'magic'
|
76
76
|
end
|
77
77
|
|
78
|
-
|
78
|
+
Gem::PackageTask.new(spec) do |pkg|
|
79
79
|
pkg.need_zip = true
|
80
80
|
pkg.need_tar = true
|
81
81
|
end
|
@@ -4,14 +4,43 @@ require 'feedparser/textconverters'
|
|
4
4
|
require 'feedparser/rexml_patch'
|
5
5
|
require 'feedparser/text-output'
|
6
6
|
require 'base64'
|
7
|
+
require 'magic'
|
8
|
+
require 'uri'
|
7
9
|
|
8
10
|
module FeedParser
|
9
11
|
|
10
|
-
VERSION = "0.
|
12
|
+
VERSION = "0.9.3"
|
11
13
|
|
12
14
|
class UnknownFeedTypeException < RuntimeError
|
13
15
|
end
|
14
16
|
|
17
|
+
def self.recode(str)
|
18
|
+
encoding = nil
|
19
|
+
begin
|
20
|
+
encoding = Magic.guess_string_mime_encoding(str)
|
21
|
+
rescue Magic::Exception
|
22
|
+
# this happens when magic does not find any content at all, e.g. with
|
23
|
+
# strings that contain only whitespace. In these case it *should* be safe
|
24
|
+
# to assume UTF-8
|
25
|
+
encoding = Encoding::UTF_8
|
26
|
+
end
|
27
|
+
if encoding == 'unknown-8bit'
|
28
|
+
# find first substring with a valid encoding that is not us-ascii
|
29
|
+
length = 1 # has to start at 1, magic requires at least 2 bytes
|
30
|
+
while length < str.length && ['us-ascii', 'unknown-8bit'].include?(encoding)
|
31
|
+
encoding = Magic.guess_string_mime_encoding(str[0..length])
|
32
|
+
length = length + 1
|
33
|
+
end
|
34
|
+
# need to remove iso-8859-1 control characters
|
35
|
+
if encoding == 'iso-8859-1'
|
36
|
+
str = str.bytes.select { |c| c < 128 || c > 159 }.map(&:chr).join
|
37
|
+
end
|
38
|
+
end
|
39
|
+
str.force_encoding(encoding)
|
40
|
+
str = str.chars.select { |c| c.valid_encoding? }.join
|
41
|
+
str.encode('UTF-8')
|
42
|
+
end
|
43
|
+
|
15
44
|
# an RSS/Atom feed
|
16
45
|
class Feed
|
17
46
|
attr_reader :type, :title, :link, :description, :creator, :encoding, :items
|
@@ -20,13 +49,16 @@ module FeedParser
|
|
20
49
|
attr_reader :xml
|
21
50
|
|
22
51
|
# parse str to build a Feed
|
23
|
-
def initialize(str = nil)
|
52
|
+
def initialize(str = nil, uri = nil)
|
24
53
|
parse(str) if str
|
54
|
+
parse_origin(uri) if uri
|
25
55
|
end
|
26
56
|
|
27
57
|
# Determines all the fields using a string containing an
|
28
58
|
# XML document
|
29
59
|
def parse(str)
|
60
|
+
str = FeedParser.recode(str)
|
61
|
+
|
30
62
|
# Dirty hack: some feeds contain the & char. It must be changed to &
|
31
63
|
str.gsub!(/&(\s+)/, '&\1')
|
32
64
|
doc = REXML::Document.new(str)
|
@@ -34,6 +66,7 @@ module FeedParser
|
|
34
66
|
# get feed info
|
35
67
|
@encoding = doc.encoding
|
36
68
|
@title,@link,@description,@creator = nil
|
69
|
+
@title = ""
|
37
70
|
@items = []
|
38
71
|
if doc.root.elements['channel'] || doc.root.elements['rss:channel']
|
39
72
|
@type = "rss"
|
@@ -108,19 +141,28 @@ module FeedParser
|
|
108
141
|
s += "Type: #{@type}\n"
|
109
142
|
s += "Encoding: #{@encoding}\n"
|
110
143
|
s += "Title: #{@title}\n"
|
111
|
-
s += "Link: #{
|
144
|
+
s += "Link: #{link}\n"
|
112
145
|
s += "Description: #{@description}\n"
|
113
146
|
s += "Creator: #{@creator}\n"
|
114
147
|
s += "\n"
|
115
148
|
@items.each { |i| s += i.to_s(localtime) }
|
116
149
|
s
|
117
150
|
end
|
151
|
+
|
152
|
+
def parse_origin(uri)
|
153
|
+
uri = URI.parse(uri)
|
154
|
+
if uri.hostname && uri.scheme
|
155
|
+
@origin = "#{uri.scheme}://#{uri.hostname}"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
attr_reader :origin
|
118
160
|
end
|
119
161
|
|
120
162
|
# an Item from a feed
|
121
163
|
class FeedItem
|
122
|
-
attr_accessor :title, :
|
123
|
-
:cacheditem
|
164
|
+
attr_accessor :title, :content, :date, :creators, :subject,
|
165
|
+
:cacheditem, :links
|
124
166
|
|
125
167
|
# The item's categories/tags. An array of strings.
|
126
168
|
attr_accessor :categories
|
@@ -137,9 +179,12 @@ module FeedParser
|
|
137
179
|
@xml = item
|
138
180
|
@feed = feed
|
139
181
|
@title, @link, @content, @date, @subject = nil
|
182
|
+
@links = []
|
140
183
|
@creators = []
|
141
184
|
@categories = []
|
142
185
|
@enclosures = []
|
186
|
+
|
187
|
+
@title = ""
|
143
188
|
parse(item) if item
|
144
189
|
end
|
145
190
|
|
@@ -160,7 +205,7 @@ module FeedParser
|
|
160
205
|
|
161
206
|
def to_s(localtime = true)
|
162
207
|
s = "--------------------------------\n" +
|
163
|
-
"Title: #{@title}\nLink: #{
|
208
|
+
"Title: #{@title}\nLink: #{link}\n"
|
164
209
|
if localtime or @date.nil?
|
165
210
|
s += "Date: #{@date.to_s}\n"
|
166
211
|
else
|
@@ -181,6 +226,22 @@ module FeedParser
|
|
181
226
|
end
|
182
227
|
return s
|
183
228
|
end
|
229
|
+
|
230
|
+
attr_writer :link
|
231
|
+
|
232
|
+
def link
|
233
|
+
if @link
|
234
|
+
uri = URI.parse(URI.escape(@link))
|
235
|
+
if uri.hostname && uri.scheme
|
236
|
+
@link
|
237
|
+
elsif feed && feed.origin
|
238
|
+
[feed.origin, @link].compact.join
|
239
|
+
else
|
240
|
+
@link
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
184
245
|
end
|
185
246
|
|
186
247
|
class RSSItem < FeedItem
|
@@ -199,7 +260,7 @@ module FeedParser
|
|
199
260
|
(e = item.elements['guid'] || item.elements['rss:guid'] and
|
200
261
|
not (e.attribute('isPermaLink') and
|
201
262
|
e.attribute('isPermaLink').value == 'false'))
|
202
|
-
|
263
|
+
self.link = e.text.rmWhiteSpace!
|
203
264
|
end
|
204
265
|
# Content
|
205
266
|
if (e = item.elements['content:encoded']) ||
|
@@ -261,8 +322,16 @@ module FeedParser
|
|
261
322
|
end
|
262
323
|
# Link
|
263
324
|
item.each_element('link') do |e|
|
325
|
+
|
264
326
|
if (h = e.attribute('href')) && h.value
|
265
|
-
|
327
|
+
self.link = h.value
|
328
|
+
|
329
|
+
if e.attribute('type')
|
330
|
+
@links << {:href => h.value, :type => e.attribute('type').value}
|
331
|
+
else
|
332
|
+
@links << {:href => h.value, :type => ''}
|
333
|
+
end
|
334
|
+
|
266
335
|
end
|
267
336
|
end
|
268
337
|
# Content
|
@@ -80,13 +80,13 @@ module FeedParser
|
|
80
80
|
s += (headline % ["Feed:", r])
|
81
81
|
|
82
82
|
r = ""
|
83
|
-
r += "<a href=\"#{
|
83
|
+
r += "<a href=\"#{link}\">" if link
|
84
84
|
if @title
|
85
85
|
r += "<b>#{@title.escape_html}</b>\n"
|
86
|
-
elsif
|
87
|
-
r += "<b>#{
|
86
|
+
elsif link
|
87
|
+
r += "<b>#{link.escape_html}</b>\n"
|
88
88
|
end
|
89
|
-
r += "</a>\n" if
|
89
|
+
r += "</a>\n" if link
|
90
90
|
s += (headline % ["Item:", r])
|
91
91
|
s += "</table></td></tr></table>\n"
|
92
92
|
s += "\n"
|
@@ -11,16 +11,16 @@ module FeedParser
|
|
11
11
|
@pre = false
|
12
12
|
@href = nil
|
13
13
|
@links = []
|
14
|
+
@curlink = []
|
14
15
|
@imgs = []
|
15
|
-
@img_index = '
|
16
|
+
@img_index = 'A'
|
16
17
|
super(verbose)
|
17
18
|
end
|
18
19
|
|
19
20
|
def next_img_index
|
20
|
-
|
21
|
-
@img_index =
|
22
|
-
|
23
|
-
return @img_index
|
21
|
+
idx = @img_index
|
22
|
+
@img_index = @img_index.next
|
23
|
+
idx
|
24
24
|
end
|
25
25
|
|
26
26
|
def handle_data(data)
|
@@ -29,7 +29,8 @@ module FeedParser
|
|
29
29
|
data.gsub!(/\n/, ' ')
|
30
30
|
data.gsub!(/( )+/, ' ')
|
31
31
|
end
|
32
|
-
|
32
|
+
data = FeedParser.recode(data)
|
33
|
+
@savedata << data.encode(Encoding::UTF_8)
|
33
34
|
end
|
34
35
|
|
35
36
|
def unknown_starttag(tag, attrs)
|
@@ -70,7 +71,14 @@ module FeedParser
|
|
70
71
|
end
|
71
72
|
end
|
72
73
|
if @href
|
73
|
-
@
|
74
|
+
@href.gsub!(/^("|'|)(.*)("|')$/,'\2')
|
75
|
+
@curlink = @links.find_index(@href)
|
76
|
+
if @curlink.nil?
|
77
|
+
@links << @href
|
78
|
+
@curlink = @links.length
|
79
|
+
else
|
80
|
+
@curlink += 1
|
81
|
+
end
|
74
82
|
end
|
75
83
|
when 'img'
|
76
84
|
# find src in args
|
@@ -81,8 +89,14 @@ module FeedParser
|
|
81
89
|
end
|
82
90
|
end
|
83
91
|
if src
|
84
|
-
|
85
|
-
@imgs
|
92
|
+
src.gsub!(/^("|'|)(.*)("|')$/,'\2')
|
93
|
+
i = @imgs.index { |e| e[1] == src }
|
94
|
+
if i.nil?
|
95
|
+
idx = next_img_index
|
96
|
+
@imgs << [ idx, src ]
|
97
|
+
else
|
98
|
+
idx = @imgs[i][0]
|
99
|
+
end
|
86
100
|
@savedata << "[#{idx}]"
|
87
101
|
end
|
88
102
|
else
|
@@ -125,7 +139,7 @@ module FeedParser
|
|
125
139
|
@pre = false
|
126
140
|
when 'a'
|
127
141
|
if @href
|
128
|
-
@savedata << "[#{@
|
142
|
+
@savedata << "[#{@curlink}]"
|
129
143
|
@href = nil
|
130
144
|
end
|
131
145
|
end
|
@@ -293,12 +293,11 @@ module FeedParser
|
|
293
293
|
end
|
294
294
|
|
295
295
|
def handle_charref(name)
|
296
|
-
|
297
|
-
if !(0 <= n && n <= 255)
|
296
|
+
if name =~ /[0-9]+/
|
298
297
|
unknown_charref(name)
|
299
|
-
|
298
|
+
else
|
299
|
+
handle_data(name)
|
300
300
|
end
|
301
|
-
handle_data(n.chr)
|
302
301
|
end
|
303
302
|
|
304
303
|
def handle_entityref(name)
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'feedparser'
|
2
1
|
require 'feedparser/html2text-parser'
|
3
2
|
require 'feedparser/filesizes'
|
4
3
|
|
@@ -61,7 +60,7 @@ module FeedParser
|
|
61
60
|
if header
|
62
61
|
s += "Item: "
|
63
62
|
s += @title if @title
|
64
|
-
s += "\n<#{
|
63
|
+
s += "\n<#{link}>" if link
|
65
64
|
if @date
|
66
65
|
if localtime
|
67
66
|
s += "\nDate: #{@date.to_s}"
|
@@ -71,7 +70,7 @@ module FeedParser
|
|
71
70
|
end
|
72
71
|
s += "\n"
|
73
72
|
else
|
74
|
-
s += "<#{
|
73
|
+
s += "<#{link}>\n\n" if link
|
75
74
|
end
|
76
75
|
s += "#{@content.html2text(wrapto).chomp}\n" if @content
|
77
76
|
if @enclosures and @enclosures.length > 0
|
@@ -89,7 +88,7 @@ module FeedParser
|
|
89
88
|
if not header
|
90
89
|
s += "\nItem: "
|
91
90
|
s += @title if @title
|
92
|
-
s += "\n<#{
|
91
|
+
s += "\n<#{link}>" if link
|
93
92
|
if @date
|
94
93
|
if localtime
|
95
94
|
s += "\nDate: #{@date.to_s}"
|
data/test/tc_feed_parse.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# encoding: UTF-8
|
2
2
|
|
3
3
|
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
4
|
|
@@ -114,4 +114,51 @@ class FeedParserTest < Test::Unit::TestCase
|
|
114
114
|
# the third one should be removed because an enclosure should have an url, or it's useless.
|
115
115
|
assert_equal([["url1", "1", "type1"], ["url2", nil, "type2"], ["url1", "1", nil]], ch.items[0].enclosures)
|
116
116
|
end
|
117
|
+
|
118
|
+
def test_recode_utf8
|
119
|
+
assert_equal 'UTF-8', FeedParser.recode("áéíóú").encoding.name
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_recode_iso88519
|
123
|
+
assert_equal 'UTF-8', FeedParser.recode("áéíóú".encode('iso-8859-1')).encoding.name
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_recode_utf8_mixed_with_ASCIIBIT
|
127
|
+
recoded = FeedParser.recode("áé\x8Díóú")
|
128
|
+
assert_equal'UTF-8', recoded.encoding.name
|
129
|
+
assert_equal 'áéíóú', recoded
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_recode_unicode_char
|
133
|
+
assert_equal "1280×1024", FeedParser.recode("1280×1024")
|
134
|
+
end
|
135
|
+
|
136
|
+
def test_almost_valid_iso88591
|
137
|
+
input = "Codifica\xE7\xE3o \x96 quase v\xE1lida"
|
138
|
+
assert_equal "Codificação quase válida", FeedParser.recode(input)
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_feed_origin
|
142
|
+
feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
|
143
|
+
assert_equal "http://foo.com", feed.origin
|
144
|
+
end
|
145
|
+
|
146
|
+
def test_item_origin
|
147
|
+
feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
|
148
|
+
item = FeedParser::FeedItem.new(nil, feed)
|
149
|
+
item.link = '/foo/bar'
|
150
|
+
assert_equal 'http://foo.com/foo/bar', item.link
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_item_origin_no_link
|
154
|
+
item = FeedParser::FeedItem.new(nil, nil)
|
155
|
+
assert_nil item.link
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_item_no_feed
|
159
|
+
item = FeedParser::FeedItem.new(nil, nil)
|
160
|
+
item.link = '/foo/bar'
|
161
|
+
assert_equal '/foo/bar', item.link
|
162
|
+
end
|
163
|
+
|
117
164
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
require 'feedparser/feedparser'
|
6
|
+
|
7
|
+
class Html2TextParserTest < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def test_next_img_index
|
10
|
+
parser = FeedParser::HTML2TextParser.new
|
11
|
+
assert_equal 'A', parser.next_img_index
|
12
|
+
assert_equal 'B', parser.next_img_index
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_numerical_entity
|
16
|
+
parser = FeedParser::HTML2TextParser.new
|
17
|
+
parser.feed('1280×1024')
|
18
|
+
parser.close
|
19
|
+
assert_equal "1280×1024", parser.savedata
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_numerical_entity_large_known
|
23
|
+
parser = FeedParser::HTML2TextParser.new
|
24
|
+
parser.feed('→')
|
25
|
+
parser.close
|
26
|
+
assert_equal "→", parser.savedata
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_numerical_entity_large
|
30
|
+
parser = FeedParser::HTML2TextParser.new
|
31
|
+
parser.feed('✐')
|
32
|
+
parser.close
|
33
|
+
assert_equal "✐", parser.savedata
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_non_numerical_entity
|
37
|
+
parser = FeedParser::HTML2TextParser.new
|
38
|
+
parser.feed('HTML&CO')
|
39
|
+
parser.close
|
40
|
+
assert_equal "HTML&CO", parser.savedata
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
data/test/tc_htmloutput.rb
CHANGED
@@ -19,12 +19,10 @@ class HTMLOutputTest < Test::Unit::TestCase
|
|
19
19
|
else
|
20
20
|
raise 'source directory not found.'
|
21
21
|
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
27
|
-
puts "Checking #{f}"
|
22
|
+
Dir.foreach(SRCDIR) do |f|
|
23
|
+
next if f !~ /.xml$/
|
24
|
+
testname = 'test_' + File.basename(f).gsub(/\W/, '_')
|
25
|
+
define_method(testname) do
|
28
26
|
str = File::read(SRCDIR + '/' + f)
|
29
27
|
chan = FeedParser::Feed::new(str)
|
30
28
|
chanstr = chan.to_html(false)
|
@@ -34,19 +32,21 @@ class HTMLOutputTest < Test::Unit::TestCase
|
|
34
32
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
35
33
|
fd.print(chanstr)
|
36
34
|
end
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
35
|
+
assert(
|
36
|
+
false,
|
37
|
+
[
|
38
|
+
"Test failed for #{f}.",
|
39
|
+
" Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
|
40
|
+
" Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
|
41
|
+
].join("\n")
|
42
|
+
)
|
41
43
|
end
|
42
44
|
else
|
43
|
-
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
44
45
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
45
46
|
f.print(chanstr)
|
46
47
|
end
|
47
|
-
|
48
|
+
assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
|
48
49
|
end
|
49
50
|
end
|
50
|
-
assert(allok)
|
51
51
|
end
|
52
52
|
end
|
data/test/tc_parser.rb
CHANGED
@@ -15,12 +15,10 @@ class ParserTest < Test::Unit::TestCase
|
|
15
15
|
else
|
16
16
|
raise 'source directory not found.'
|
17
17
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
-
puts "Checking #{f}"
|
18
|
+
Dir.foreach(SRCDIR) do |f|
|
19
|
+
next if f !~ /.xml$/
|
20
|
+
testname = 'test_' + File.basename(f).gsub(/\W/, '_')
|
21
|
+
define_method(testname) do
|
24
22
|
str = File::read(SRCDIR + '/' + f)
|
25
23
|
chan = FeedParser::Feed::new(str)
|
26
24
|
chanstr = chan.to_s(false)
|
@@ -30,19 +28,21 @@ class ParserTest < Test::Unit::TestCase
|
|
30
28
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
29
|
fd.print(chanstr)
|
32
30
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
assert(
|
32
|
+
false,
|
33
|
+
[
|
34
|
+
"Test failed for #{f}.",
|
35
|
+
" Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
|
36
|
+
" Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
|
37
|
+
].join("\n")
|
38
|
+
)
|
37
39
|
end
|
38
40
|
else
|
39
|
-
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
41
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
42
|
f.print(chanstr)
|
42
43
|
end
|
43
|
-
|
44
|
+
assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
|
44
45
|
end
|
45
46
|
end
|
46
|
-
assert(allok)
|
47
47
|
end
|
48
48
|
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'mocha/setup'
|
5
|
+
|
6
|
+
require 'feedparser/sgml-parser'
|
7
|
+
|
8
|
+
class SGMLParserTest < Test::Unit::TestCase
|
9
|
+
|
10
|
+
def test_numerical_charref
|
11
|
+
parser = FeedParser::SGMLParser.new
|
12
|
+
parser.expects(:unknown_charref).with('215')
|
13
|
+
parser.handle_charref('215')
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_non_numerical_charref
|
17
|
+
parser = FeedParser::SGMLParser.new
|
18
|
+
parser.expects(:handle_data).with('amp')
|
19
|
+
parser.handle_charref('amp')
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
data/test/tc_textoutput.rb
CHANGED
@@ -15,12 +15,10 @@ class TextOutputTest < Test::Unit::TestCase
|
|
15
15
|
else
|
16
16
|
raise 'source directory not found.'
|
17
17
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
-
puts "Checking #{f}"
|
18
|
+
Dir.foreach(SRCDIR) do |f|
|
19
|
+
next if f !~ /.xml$/
|
20
|
+
testname = 'test_' + File.basename(f).gsub(/\W/, '_')
|
21
|
+
define_method(testname) do
|
24
22
|
str = File::read(SRCDIR + '/' + f)
|
25
23
|
chan = FeedParser::Feed::new(str)
|
26
24
|
chanstr = chan.to_text(false) # localtime set to false
|
@@ -30,19 +28,21 @@ class TextOutputTest < Test::Unit::TestCase
|
|
30
28
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
29
|
fd.print(chanstr)
|
32
30
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
assert(
|
32
|
+
false,
|
33
|
+
[
|
34
|
+
"Test failed for #{f}.",
|
35
|
+
" Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
|
36
|
+
" Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
|
37
|
+
].join("\n")
|
38
|
+
)
|
37
39
|
end
|
38
40
|
else
|
39
|
-
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
41
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
42
|
f.print(chanstr)
|
42
43
|
end
|
43
|
-
|
44
|
+
assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
|
44
45
|
end
|
45
46
|
end
|
46
|
-
assert(allok)
|
47
47
|
end
|
48
48
|
end
|
@@ -15,12 +15,10 @@ class TextWrappedOutputTest < Test::Unit::TestCase
|
|
15
15
|
else
|
16
16
|
raise 'source directory not found.'
|
17
17
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
-
puts "Checking #{f}"
|
18
|
+
Dir.foreach(SRCDIR) do |f|
|
19
|
+
next if f !~ /.xml$/
|
20
|
+
testname = 'test_' + File.basename(f).gsub(/\W/, '_')
|
21
|
+
define_method(testname) do
|
24
22
|
str = File::read(SRCDIR + '/' + f)
|
25
23
|
chan = FeedParser::Feed::new(str)
|
26
24
|
chanstr = chan.to_text(false, 72) # localtime set to false
|
@@ -30,19 +28,21 @@ class TextWrappedOutputTest < Test::Unit::TestCase
|
|
30
28
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
29
|
fd.print(chanstr)
|
32
30
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
assert(
|
32
|
+
false,
|
33
|
+
[
|
34
|
+
"Test failed for #{f}.",
|
35
|
+
" Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
|
36
|
+
" Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
|
37
|
+
].join("\n")
|
38
|
+
)
|
37
39
|
end
|
38
40
|
else
|
39
|
-
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
41
|
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
42
|
f.print(chanstr)
|
42
43
|
end
|
43
|
-
|
44
|
+
assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
|
44
45
|
end
|
45
46
|
end
|
46
|
-
assert(allok)
|
47
47
|
end
|
48
48
|
end
|
metadata
CHANGED
@@ -1,76 +1,85 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-feedparser
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.3
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
|
-
authors:
|
7
|
-
|
7
|
+
authors:
|
8
|
+
- Lucas Nussbaum
|
8
9
|
autorequire: feedparser
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
date: 2014-02-05 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: magic
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
16
30
|
description: Ruby library to parse ATOM and RSS feeds
|
17
31
|
email:
|
18
32
|
executables: []
|
19
|
-
|
20
33
|
extensions: []
|
21
|
-
|
22
34
|
extra_rdoc_files: []
|
23
|
-
|
24
|
-
files:
|
35
|
+
files:
|
25
36
|
- ChangeLog
|
26
37
|
- README
|
27
38
|
- COPYING
|
28
39
|
- LICENSE
|
29
40
|
- setup.rb
|
30
41
|
- Rakefile
|
31
|
-
- lib/feedparser/
|
42
|
+
- lib/feedparser/feedparser.rb
|
32
43
|
- lib/feedparser/filesizes.rb
|
33
44
|
- lib/feedparser/html-output.rb
|
34
|
-
- lib/feedparser/rexml_patch.rb
|
35
45
|
- lib/feedparser/html2text-parser.rb
|
36
|
-
- lib/feedparser/
|
37
|
-
- lib/feedparser/feedparser.rb
|
46
|
+
- lib/feedparser/rexml_patch.rb
|
38
47
|
- lib/feedparser/sgml-parser.rb
|
48
|
+
- lib/feedparser/text-output.rb
|
49
|
+
- lib/feedparser/textconverters.rb
|
39
50
|
- lib/feedparser.rb
|
40
51
|
- test/tc_feed_parse.rb
|
41
|
-
- test/
|
52
|
+
- test/tc_html2text_parser.rb
|
42
53
|
- test/tc_htmloutput.rb
|
54
|
+
- test/tc_parser.rb
|
55
|
+
- test/tc_sgml_parser.rb
|
56
|
+
- test/tc_textoutput.rb
|
43
57
|
- test/tc_textwrappedoutput.rb
|
44
58
|
- test/ts_feedparser.rb
|
45
|
-
- test/tc_parser.rb
|
46
59
|
- tools/doctoweb.bash
|
47
|
-
has_rdoc: true
|
48
60
|
homepage:
|
49
61
|
licenses: []
|
50
|
-
|
51
62
|
post_install_message:
|
52
63
|
rdoc_options: []
|
53
|
-
|
54
|
-
require_paths:
|
64
|
+
require_paths:
|
55
65
|
- lib
|
56
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
requirements:
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ! '>='
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements:
|
69
79
|
- none
|
70
80
|
rubyforge_project:
|
71
|
-
rubygems_version: 1.
|
81
|
+
rubygems_version: 1.8.23
|
72
82
|
signing_key:
|
73
83
|
specification_version: 3
|
74
84
|
summary: Ruby library to parse ATOM and RSS feeds
|
75
85
|
test_files: []
|
76
|
-
|