ruby-feedparser 0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +340 -0
- data/ChangeLog +59 -0
- data/LICENSE +60 -0
- data/README +14 -0
- data/Rakefile +84 -0
- data/lib/feedparser.rb +28 -0
- data/lib/feedparser/feedparser.rb +343 -0
- data/lib/feedparser/filesizes.rb +14 -0
- data/lib/feedparser/html-output.rb +126 -0
- data/lib/feedparser/html2text-parser.rb +413 -0
- data/lib/feedparser/rexml_patch.rb +28 -0
- data/lib/feedparser/sgml-parser.rb +332 -0
- data/lib/feedparser/text-output.rb +108 -0
- data/lib/feedparser/textconverters.rb +120 -0
- data/setup.rb +1586 -0
- data/test/tc_feed_parse.rb +117 -0
- data/test/tc_htmloutput.rb +52 -0
- data/test/tc_parser.rb +48 -0
- data/test/tc_textoutput.rb +48 -0
- data/test/tc_textwrappedoutput.rb +48 -0
- data/test/ts_feedparser.rb +12 -0
- data/tools/doctoweb.bash +30 -0
- metadata +76 -0
@@ -0,0 +1,117 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'feedparser'
|
7
|
+
|
8
|
+
# This class includes some basic tests of the parser. More detailed test is
|
9
|
+
# made by tc_parser.rb
|
10
|
+
class FeedParserTest < Test::Unit::TestCase
|
11
|
+
# From http://my.netscape.com/publish/formats/rss-spec-0.91.html
|
12
|
+
def test_parse_rss091_1
|
13
|
+
ch = FeedParser::Feed::new <<-EOF
|
14
|
+
<?xml version="1.0"?>
|
15
|
+
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
|
16
|
+
<rss version="0.91">
|
17
|
+
<channel>
|
18
|
+
<language>en</language>
|
19
|
+
<description>News and commentary from the cross-platform scripting community.</description>
|
20
|
+
<link>http://www.scripting.com/</link>
|
21
|
+
<title>Scripting News</title>
|
22
|
+
<image>
|
23
|
+
<link>http://www.scripting.com/</link>
|
24
|
+
<title>Scripting News</title>
|
25
|
+
<url>http://www.scripting.com/gifs/tinyScriptingNews.gif</url>
|
26
|
+
</image>
|
27
|
+
</channel>
|
28
|
+
</rss>
|
29
|
+
EOF
|
30
|
+
assert_equal('Scripting News', ch.title)
|
31
|
+
assert_equal('http://www.scripting.com/', ch.link)
|
32
|
+
assert_equal('News and commentary from the cross-platform scripting community.', ch.description)
|
33
|
+
assert_equal([], ch.items)
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_parse_rss091_complete
|
37
|
+
ch = FeedParser::Feed::new <<-EOF
|
38
|
+
<?xml version="1.0"?>
|
39
|
+
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
|
40
|
+
<rss version="0.91">
|
41
|
+
<channel>
|
42
|
+
<copyright>Copyright 1997-1999 UserLand Software, Inc.</copyright>
|
43
|
+
<pubDate>Thu, 08 Jul 1999 07:00:00 GMT</pubDate>
|
44
|
+
<lastBuildDate>Thu, 08 Jul 1999 16:20:26 GMT</lastBuildDate>
|
45
|
+
<docs>http://my.userland.com/stories/storyReader$11</docs>
|
46
|
+
<description>News and commentary from the cross-platform scripting community.</description>
|
47
|
+
<link>http://www.scripting.com/</link>
|
48
|
+
<title>Scripting News</title>
|
49
|
+
<image>
|
50
|
+
<link>http://www.scripting.com/</link>
|
51
|
+
<title>Scripting News</title>
|
52
|
+
<url>http://www.scripting.com/gifs/tinyScriptingNews.gif</url>
|
53
|
+
<height>40</height>
|
54
|
+
<width>78</width>
|
55
|
+
<description>What is this used for?</description>
|
56
|
+
</image>
|
57
|
+
<managingEditor>dave@userland.com (Dave Winer)</managingEditor>
|
58
|
+
<webMaster>dave@userland.com (Dave Winer)</webMaster>
|
59
|
+
<language>en-us</language>
|
60
|
+
<skipHours>
|
61
|
+
<hour>6</hour><hour>7</hour><hour>8</hour><hour>9</hour><hour>10</hour><hour>11</hour>
|
62
|
+
</skipHours>
|
63
|
+
<skipDays>
|
64
|
+
<day>Sunday</day>
|
65
|
+
</skipDays>
|
66
|
+
<rating>(PICS-1.1 "http://www.rsac.org/ratingsv01.html" l gen true comment "RSACi North America Server" for "http://www.rsac.org" on "1996.04.16T08:15-0500" r (n 0 s 0 v 0 l 0))</rating>
|
67
|
+
<item>
|
68
|
+
<title>stuff</title>
|
69
|
+
<link>http://bar</link>
|
70
|
+
<description>This is an article about some stuff</description>
|
71
|
+
</item>
|
72
|
+
<item>
|
73
|
+
<title>second item's title</title>
|
74
|
+
<link>http://link2</link>
|
75
|
+
<description>aa bb cc
|
76
|
+
dd ee ff</description>
|
77
|
+
</item>
|
78
|
+
<textinput>
|
79
|
+
<title>Search Now!</title>
|
80
|
+
<description>Enter your search <terms></description>
|
81
|
+
<name>find</name>
|
82
|
+
<link>http://my.site.com/search.cgi</link>
|
83
|
+
</textinput>
|
84
|
+
</channel>
|
85
|
+
</rss>
|
86
|
+
EOF
|
87
|
+
assert_equal('Scripting News', ch.title)
|
88
|
+
assert_equal('http://www.scripting.com/', ch.link)
|
89
|
+
assert_equal('News and commentary from the cross-platform scripting community.', ch.description)
|
90
|
+
assert_equal(2, ch.items.length)
|
91
|
+
assert_equal('http://bar', ch.items[0].link)
|
92
|
+
assert_equal('<p>This is an article about some stuff</p>', ch.items[0].content)
|
93
|
+
assert_equal('stuff', ch.items[0].title)
|
94
|
+
assert_equal('http://link2', ch.items[1].link)
|
95
|
+
assert_equal("<p>aa bb cc\n dd ee ff</p>", ch.items[1].content)
|
96
|
+
assert_equal('second item\'s title', ch.items[1].title)
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_enclosures
|
100
|
+
ch = FeedParser::Feed::new <<-EOF
|
101
|
+
<?xml version="1.0"?>
|
102
|
+
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
|
103
|
+
<rss version="0.91">
|
104
|
+
<channel>
|
105
|
+
<item>
|
106
|
+
<enclosure url="url1" length="1" type="type1"/>
|
107
|
+
<enclosure url="url2" type="type2"/>
|
108
|
+
<enclosure length="3" type="type3"/>
|
109
|
+
<enclosure url="url1" length="1"/>
|
110
|
+
</item>
|
111
|
+
</channel>
|
112
|
+
</rss>
|
113
|
+
EOF
|
114
|
+
# the third one should be removed because an enclosure should have an url, or it's useless.
|
115
|
+
assert_equal([["url1", "1", "type1"], ["url2", nil, "type2"], ["url1", "1", nil]], ch.items[0].enclosures)
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'test')
|
5
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
6
|
+
$:.unshift File.join(File.dirname(__FILE__), 'test')
|
7
|
+
|
8
|
+
require 'test/unit'
|
9
|
+
require 'feedparser'
|
10
|
+
require 'feedparser/html-output'
|
11
|
+
|
12
|
+
class HTMLOutputTest < Test::Unit::TestCase
|
13
|
+
if File::directory?('test/source')
|
14
|
+
SRCDIR = 'test/source'
|
15
|
+
DSTDIR = 'test/html_output'
|
16
|
+
elsif File::directory?('source')
|
17
|
+
SRCDIR = 'source'
|
18
|
+
DSTDIR = 'html_output'
|
19
|
+
else
|
20
|
+
raise 'source directory not found.'
|
21
|
+
end
|
22
|
+
def test_parser
|
23
|
+
allok = true
|
24
|
+
Dir.foreach(SRCDIR) do |f|
|
25
|
+
next if f !~ /.xml$/
|
26
|
+
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
27
|
+
puts "Checking #{f}"
|
28
|
+
str = File::read(SRCDIR + '/' + f)
|
29
|
+
chan = FeedParser::Feed::new(str)
|
30
|
+
chanstr = chan.to_html(false)
|
31
|
+
if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
32
|
+
output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
33
|
+
if output != chanstr
|
34
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
35
|
+
fd.print(chanstr)
|
36
|
+
end
|
37
|
+
puts "Test failed for #{f}."
|
38
|
+
puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
|
39
|
+
puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
|
40
|
+
allok = false
|
41
|
+
end
|
42
|
+
else
|
43
|
+
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
44
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
45
|
+
f.print(chanstr)
|
46
|
+
end
|
47
|
+
allok = false
|
48
|
+
end
|
49
|
+
end
|
50
|
+
assert(allok)
|
51
|
+
end
|
52
|
+
end
|
data/test/tc_parser.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'feedparser'
|
7
|
+
|
8
|
+
class ParserTest < Test::Unit::TestCase
|
9
|
+
if File::directory?('test/source')
|
10
|
+
SRCDIR = 'test/source'
|
11
|
+
DSTDIR = 'test/parser_output'
|
12
|
+
elsif File::directory?('source')
|
13
|
+
SRCDIR = 'source'
|
14
|
+
DSTDIR = 'parser_output'
|
15
|
+
else
|
16
|
+
raise 'source directory not found.'
|
17
|
+
end
|
18
|
+
def test_parser
|
19
|
+
allok = true
|
20
|
+
Dir.foreach(SRCDIR) do |f|
|
21
|
+
next if f !~ /.xml$/
|
22
|
+
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
+
puts "Checking #{f}"
|
24
|
+
str = File::read(SRCDIR + '/' + f)
|
25
|
+
chan = FeedParser::Feed::new(str)
|
26
|
+
chanstr = chan.to_s(false)
|
27
|
+
if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
28
|
+
output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
29
|
+
if output != chanstr
|
30
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
|
+
fd.print(chanstr)
|
32
|
+
end
|
33
|
+
puts "Test failed for #{f}."
|
34
|
+
puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
|
35
|
+
puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
|
36
|
+
allok = false
|
37
|
+
end
|
38
|
+
else
|
39
|
+
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
|
+
f.print(chanstr)
|
42
|
+
end
|
43
|
+
allok = false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
assert(allok)
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'feedparser'
|
7
|
+
|
8
|
+
class TextOutputTest < Test::Unit::TestCase
|
9
|
+
if File::directory?('test/source')
|
10
|
+
SRCDIR = 'test/source'
|
11
|
+
DSTDIR = 'test/text_output'
|
12
|
+
elsif File::directory?('source')
|
13
|
+
SRCDIR = 'source'
|
14
|
+
DSTDIR = 'text_output'
|
15
|
+
else
|
16
|
+
raise 'source directory not found.'
|
17
|
+
end
|
18
|
+
def test_parser
|
19
|
+
allok = true
|
20
|
+
Dir.foreach(SRCDIR) do |f|
|
21
|
+
next if f !~ /.xml$/
|
22
|
+
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
+
puts "Checking #{f}"
|
24
|
+
str = File::read(SRCDIR + '/' + f)
|
25
|
+
chan = FeedParser::Feed::new(str)
|
26
|
+
chanstr = chan.to_text(false) # localtime set to false
|
27
|
+
if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
28
|
+
output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
29
|
+
if output != chanstr
|
30
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
|
+
fd.print(chanstr)
|
32
|
+
end
|
33
|
+
puts "Test failed for #{f}."
|
34
|
+
puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
|
35
|
+
puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
|
36
|
+
allok = false
|
37
|
+
end
|
38
|
+
else
|
39
|
+
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
|
+
f.print(chanstr)
|
42
|
+
end
|
43
|
+
allok = false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
assert(allok)
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'feedparser'
|
7
|
+
|
8
|
+
class TextWrappedOutputTest < Test::Unit::TestCase
|
9
|
+
if File::directory?('test/source')
|
10
|
+
SRCDIR = 'test/source'
|
11
|
+
DSTDIR = 'test/textwrapped_output'
|
12
|
+
elsif File::directory?('source')
|
13
|
+
SRCDIR = 'source'
|
14
|
+
DSTDIR = 'textwrapped_output'
|
15
|
+
else
|
16
|
+
raise 'source directory not found.'
|
17
|
+
end
|
18
|
+
def test_parser
|
19
|
+
allok = true
|
20
|
+
Dir.foreach(SRCDIR) do |f|
|
21
|
+
next if f !~ /.xml$/
|
22
|
+
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
+
puts "Checking #{f}"
|
24
|
+
str = File::read(SRCDIR + '/' + f)
|
25
|
+
chan = FeedParser::Feed::new(str)
|
26
|
+
chanstr = chan.to_text(false, 72) # localtime set to false
|
27
|
+
if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
28
|
+
output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
29
|
+
if output != chanstr
|
30
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
|
+
fd.print(chanstr)
|
32
|
+
end
|
33
|
+
puts "Test failed for #{f}."
|
34
|
+
puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
|
35
|
+
puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
|
36
|
+
allok = false
|
37
|
+
end
|
38
|
+
else
|
39
|
+
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
|
+
f.print(chanstr)
|
42
|
+
end
|
43
|
+
allok = false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
assert(allok)
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'test')
|
5
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
6
|
+
$:.unshift File.join(File.dirname(__FILE__), 'test')
|
7
|
+
|
8
|
+
require 'tc_feed_parse'
|
9
|
+
require 'tc_htmloutput'
|
10
|
+
require 'tc_parser'
|
11
|
+
require 'tc_textoutput'
|
12
|
+
require 'tc_textwrappedoutput'
|
data/tools/doctoweb.bash
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
if [ -z $CVSDIR ]; then
|
4
|
+
CVSDIR=$HOME/dev/ruby-feedparser/website
|
5
|
+
fi
|
6
|
+
|
7
|
+
TARGET=$CVSDIR/rdoc
|
8
|
+
|
9
|
+
echo "Copying rdoc documentation to $TARGET."
|
10
|
+
|
11
|
+
if [ ! -d $TARGET ]; then
|
12
|
+
echo "$TARGET doesn't exist, exiting."
|
13
|
+
exit 1
|
14
|
+
fi
|
15
|
+
rsync -a rdoc/ $TARGET/
|
16
|
+
|
17
|
+
echo "###########################################################"
|
18
|
+
echo "CVS status :"
|
19
|
+
cd $TARGET
|
20
|
+
svn st
|
21
|
+
echo "CVS Adding files."
|
22
|
+
while [ $(svn st | grep "^? " | wc -l) -gt 0 ]; do
|
23
|
+
svn add $(svn st | grep "^? " | awk '{print $2}')
|
24
|
+
done
|
25
|
+
echo "###########################################################"
|
26
|
+
echo "CVS status after adding missing files:"
|
27
|
+
svn st
|
28
|
+
echo "Commit changes now with"
|
29
|
+
echo "# (cd $TARGET && svn commit -m \"rdoc update\")"
|
30
|
+
exit 0
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby-feedparser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.7"
|
5
|
+
platform: ruby
|
6
|
+
authors: []
|
7
|
+
|
8
|
+
autorequire: feedparser
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-07-27 00:00:00 +02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Ruby library to parse ATOM and RSS feeds
|
17
|
+
email:
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- ChangeLog
|
26
|
+
- README
|
27
|
+
- COPYING
|
28
|
+
- LICENSE
|
29
|
+
- setup.rb
|
30
|
+
- Rakefile
|
31
|
+
- lib/feedparser/text-output.rb
|
32
|
+
- lib/feedparser/filesizes.rb
|
33
|
+
- lib/feedparser/html-output.rb
|
34
|
+
- lib/feedparser/rexml_patch.rb
|
35
|
+
- lib/feedparser/html2text-parser.rb
|
36
|
+
- lib/feedparser/textconverters.rb
|
37
|
+
- lib/feedparser/feedparser.rb
|
38
|
+
- lib/feedparser/sgml-parser.rb
|
39
|
+
- lib/feedparser.rb
|
40
|
+
- test/tc_feed_parse.rb
|
41
|
+
- test/tc_textoutput.rb
|
42
|
+
- test/tc_htmloutput.rb
|
43
|
+
- test/tc_textwrappedoutput.rb
|
44
|
+
- test/ts_feedparser.rb
|
45
|
+
- test/tc_parser.rb
|
46
|
+
- tools/doctoweb.bash
|
47
|
+
has_rdoc: true
|
48
|
+
homepage:
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: "0"
|
67
|
+
version:
|
68
|
+
requirements:
|
69
|
+
- none
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.3.4
|
72
|
+
signing_key:
|
73
|
+
specification_version: 3
|
74
|
+
summary: Ruby library to parse ATOM and RSS feeds
|
75
|
+
test_files: []
|
76
|
+
|