ruby-feedparser 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +340 -0
- data/ChangeLog +59 -0
- data/LICENSE +60 -0
- data/README +14 -0
- data/Rakefile +84 -0
- data/lib/feedparser.rb +28 -0
- data/lib/feedparser/feedparser.rb +343 -0
- data/lib/feedparser/filesizes.rb +14 -0
- data/lib/feedparser/html-output.rb +126 -0
- data/lib/feedparser/html2text-parser.rb +413 -0
- data/lib/feedparser/rexml_patch.rb +28 -0
- data/lib/feedparser/sgml-parser.rb +332 -0
- data/lib/feedparser/text-output.rb +108 -0
- data/lib/feedparser/textconverters.rb +120 -0
- data/setup.rb +1586 -0
- data/test/tc_feed_parse.rb +117 -0
- data/test/tc_htmloutput.rb +52 -0
- data/test/tc_parser.rb +48 -0
- data/test/tc_textoutput.rb +48 -0
- data/test/tc_textwrappedoutput.rb +48 -0
- data/test/ts_feedparser.rb +12 -0
- data/tools/doctoweb.bash +30 -0
- metadata +76 -0
@@ -0,0 +1,117 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'feedparser'
|
7
|
+
|
8
|
+
# This class includes some basic tests of the parser. More detailed test is
|
9
|
+
# made by tc_parser.rb
|
10
|
+
class FeedParserTest < Test::Unit::TestCase
|
11
|
+
# From http://my.netscape.com/publish/formats/rss-spec-0.91.html
|
12
|
+
def test_parse_rss091_1
|
13
|
+
ch = FeedParser::Feed::new <<-EOF
|
14
|
+
<?xml version="1.0"?>
|
15
|
+
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
|
16
|
+
<rss version="0.91">
|
17
|
+
<channel>
|
18
|
+
<language>en</language>
|
19
|
+
<description>News and commentary from the cross-platform scripting community.</description>
|
20
|
+
<link>http://www.scripting.com/</link>
|
21
|
+
<title>Scripting News</title>
|
22
|
+
<image>
|
23
|
+
<link>http://www.scripting.com/</link>
|
24
|
+
<title>Scripting News</title>
|
25
|
+
<url>http://www.scripting.com/gifs/tinyScriptingNews.gif</url>
|
26
|
+
</image>
|
27
|
+
</channel>
|
28
|
+
</rss>
|
29
|
+
EOF
|
30
|
+
assert_equal('Scripting News', ch.title)
|
31
|
+
assert_equal('http://www.scripting.com/', ch.link)
|
32
|
+
assert_equal('News and commentary from the cross-platform scripting community.', ch.description)
|
33
|
+
assert_equal([], ch.items)
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_parse_rss091_complete
|
37
|
+
ch = FeedParser::Feed::new <<-EOF
|
38
|
+
<?xml version="1.0"?>
|
39
|
+
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
|
40
|
+
<rss version="0.91">
|
41
|
+
<channel>
|
42
|
+
<copyright>Copyright 1997-1999 UserLand Software, Inc.</copyright>
|
43
|
+
<pubDate>Thu, 08 Jul 1999 07:00:00 GMT</pubDate>
|
44
|
+
<lastBuildDate>Thu, 08 Jul 1999 16:20:26 GMT</lastBuildDate>
|
45
|
+
<docs>http://my.userland.com/stories/storyReader$11</docs>
|
46
|
+
<description>News and commentary from the cross-platform scripting community.</description>
|
47
|
+
<link>http://www.scripting.com/</link>
|
48
|
+
<title>Scripting News</title>
|
49
|
+
<image>
|
50
|
+
<link>http://www.scripting.com/</link>
|
51
|
+
<title>Scripting News</title>
|
52
|
+
<url>http://www.scripting.com/gifs/tinyScriptingNews.gif</url>
|
53
|
+
<height>40</height>
|
54
|
+
<width>78</width>
|
55
|
+
<description>What is this used for?</description>
|
56
|
+
</image>
|
57
|
+
<managingEditor>dave@userland.com (Dave Winer)</managingEditor>
|
58
|
+
<webMaster>dave@userland.com (Dave Winer)</webMaster>
|
59
|
+
<language>en-us</language>
|
60
|
+
<skipHours>
|
61
|
+
<hour>6</hour><hour>7</hour><hour>8</hour><hour>9</hour><hour>10</hour><hour>11</hour>
|
62
|
+
</skipHours>
|
63
|
+
<skipDays>
|
64
|
+
<day>Sunday</day>
|
65
|
+
</skipDays>
|
66
|
+
<rating>(PICS-1.1 "http://www.rsac.org/ratingsv01.html" l gen true comment "RSACi North America Server" for "http://www.rsac.org" on "1996.04.16T08:15-0500" r (n 0 s 0 v 0 l 0))</rating>
|
67
|
+
<item>
|
68
|
+
<title>stuff</title>
|
69
|
+
<link>http://bar</link>
|
70
|
+
<description>This is an article about some stuff</description>
|
71
|
+
</item>
|
72
|
+
<item>
|
73
|
+
<title>second item's title</title>
|
74
|
+
<link>http://link2</link>
|
75
|
+
<description>aa bb cc
|
76
|
+
dd ee ff</description>
|
77
|
+
</item>
|
78
|
+
<textinput>
|
79
|
+
<title>Search Now!</title>
|
80
|
+
<description>Enter your search <terms></description>
|
81
|
+
<name>find</name>
|
82
|
+
<link>http://my.site.com/search.cgi</link>
|
83
|
+
</textinput>
|
84
|
+
</channel>
|
85
|
+
</rss>
|
86
|
+
EOF
|
87
|
+
assert_equal('Scripting News', ch.title)
|
88
|
+
assert_equal('http://www.scripting.com/', ch.link)
|
89
|
+
assert_equal('News and commentary from the cross-platform scripting community.', ch.description)
|
90
|
+
assert_equal(2, ch.items.length)
|
91
|
+
assert_equal('http://bar', ch.items[0].link)
|
92
|
+
assert_equal('<p>This is an article about some stuff</p>', ch.items[0].content)
|
93
|
+
assert_equal('stuff', ch.items[0].title)
|
94
|
+
assert_equal('http://link2', ch.items[1].link)
|
95
|
+
assert_equal("<p>aa bb cc\n dd ee ff</p>", ch.items[1].content)
|
96
|
+
assert_equal('second item\'s title', ch.items[1].title)
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_enclosures
|
100
|
+
ch = FeedParser::Feed::new <<-EOF
|
101
|
+
<?xml version="1.0"?>
|
102
|
+
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
|
103
|
+
<rss version="0.91">
|
104
|
+
<channel>
|
105
|
+
<item>
|
106
|
+
<enclosure url="url1" length="1" type="type1"/>
|
107
|
+
<enclosure url="url2" type="type2"/>
|
108
|
+
<enclosure length="3" type="type3"/>
|
109
|
+
<enclosure url="url1" length="1"/>
|
110
|
+
</item>
|
111
|
+
</channel>
|
112
|
+
</rss>
|
113
|
+
EOF
|
114
|
+
# the third one should be removed because an enclosure should have an url, or it's useless.
|
115
|
+
assert_equal([["url1", "1", "type1"], ["url2", nil, "type2"], ["url1", "1", nil]], ch.items[0].enclosures)
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'test')
|
5
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
6
|
+
$:.unshift File.join(File.dirname(__FILE__), 'test')
|
7
|
+
|
8
|
+
require 'test/unit'
|
9
|
+
require 'feedparser'
|
10
|
+
require 'feedparser/html-output'
|
11
|
+
|
12
|
+
class HTMLOutputTest < Test::Unit::TestCase
|
13
|
+
if File::directory?('test/source')
|
14
|
+
SRCDIR = 'test/source'
|
15
|
+
DSTDIR = 'test/html_output'
|
16
|
+
elsif File::directory?('source')
|
17
|
+
SRCDIR = 'source'
|
18
|
+
DSTDIR = 'html_output'
|
19
|
+
else
|
20
|
+
raise 'source directory not found.'
|
21
|
+
end
|
22
|
+
def test_parser
|
23
|
+
allok = true
|
24
|
+
Dir.foreach(SRCDIR) do |f|
|
25
|
+
next if f !~ /.xml$/
|
26
|
+
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
27
|
+
puts "Checking #{f}"
|
28
|
+
str = File::read(SRCDIR + '/' + f)
|
29
|
+
chan = FeedParser::Feed::new(str)
|
30
|
+
chanstr = chan.to_html(false)
|
31
|
+
if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
32
|
+
output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
33
|
+
if output != chanstr
|
34
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
35
|
+
fd.print(chanstr)
|
36
|
+
end
|
37
|
+
puts "Test failed for #{f}."
|
38
|
+
puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
|
39
|
+
puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
|
40
|
+
allok = false
|
41
|
+
end
|
42
|
+
else
|
43
|
+
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
44
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
45
|
+
f.print(chanstr)
|
46
|
+
end
|
47
|
+
allok = false
|
48
|
+
end
|
49
|
+
end
|
50
|
+
assert(allok)
|
51
|
+
end
|
52
|
+
end
|
data/test/tc_parser.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'feedparser'
|
7
|
+
|
8
|
+
class ParserTest < Test::Unit::TestCase
|
9
|
+
if File::directory?('test/source')
|
10
|
+
SRCDIR = 'test/source'
|
11
|
+
DSTDIR = 'test/parser_output'
|
12
|
+
elsif File::directory?('source')
|
13
|
+
SRCDIR = 'source'
|
14
|
+
DSTDIR = 'parser_output'
|
15
|
+
else
|
16
|
+
raise 'source directory not found.'
|
17
|
+
end
|
18
|
+
def test_parser
|
19
|
+
allok = true
|
20
|
+
Dir.foreach(SRCDIR) do |f|
|
21
|
+
next if f !~ /.xml$/
|
22
|
+
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
+
puts "Checking #{f}"
|
24
|
+
str = File::read(SRCDIR + '/' + f)
|
25
|
+
chan = FeedParser::Feed::new(str)
|
26
|
+
chanstr = chan.to_s(false)
|
27
|
+
if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
28
|
+
output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
29
|
+
if output != chanstr
|
30
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
|
+
fd.print(chanstr)
|
32
|
+
end
|
33
|
+
puts "Test failed for #{f}."
|
34
|
+
puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
|
35
|
+
puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
|
36
|
+
allok = false
|
37
|
+
end
|
38
|
+
else
|
39
|
+
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
|
+
f.print(chanstr)
|
42
|
+
end
|
43
|
+
allok = false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
assert(allok)
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'feedparser'
|
7
|
+
|
8
|
+
class TextOutputTest < Test::Unit::TestCase
|
9
|
+
if File::directory?('test/source')
|
10
|
+
SRCDIR = 'test/source'
|
11
|
+
DSTDIR = 'test/text_output'
|
12
|
+
elsif File::directory?('source')
|
13
|
+
SRCDIR = 'source'
|
14
|
+
DSTDIR = 'text_output'
|
15
|
+
else
|
16
|
+
raise 'source directory not found.'
|
17
|
+
end
|
18
|
+
def test_parser
|
19
|
+
allok = true
|
20
|
+
Dir.foreach(SRCDIR) do |f|
|
21
|
+
next if f !~ /.xml$/
|
22
|
+
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
+
puts "Checking #{f}"
|
24
|
+
str = File::read(SRCDIR + '/' + f)
|
25
|
+
chan = FeedParser::Feed::new(str)
|
26
|
+
chanstr = chan.to_text(false) # localtime set to false
|
27
|
+
if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
28
|
+
output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
29
|
+
if output != chanstr
|
30
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
|
+
fd.print(chanstr)
|
32
|
+
end
|
33
|
+
puts "Test failed for #{f}."
|
34
|
+
puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
|
35
|
+
puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
|
36
|
+
allok = false
|
37
|
+
end
|
38
|
+
else
|
39
|
+
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
|
+
f.print(chanstr)
|
42
|
+
end
|
43
|
+
allok = false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
assert(allok)
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'feedparser'
|
7
|
+
|
8
|
+
class TextWrappedOutputTest < Test::Unit::TestCase
|
9
|
+
if File::directory?('test/source')
|
10
|
+
SRCDIR = 'test/source'
|
11
|
+
DSTDIR = 'test/textwrapped_output'
|
12
|
+
elsif File::directory?('source')
|
13
|
+
SRCDIR = 'source'
|
14
|
+
DSTDIR = 'textwrapped_output'
|
15
|
+
else
|
16
|
+
raise 'source directory not found.'
|
17
|
+
end
|
18
|
+
def test_parser
|
19
|
+
allok = true
|
20
|
+
Dir.foreach(SRCDIR) do |f|
|
21
|
+
next if f !~ /.xml$/
|
22
|
+
next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
|
23
|
+
puts "Checking #{f}"
|
24
|
+
str = File::read(SRCDIR + '/' + f)
|
25
|
+
chan = FeedParser::Feed::new(str)
|
26
|
+
chanstr = chan.to_text(false, 72) # localtime set to false
|
27
|
+
if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
28
|
+
output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
|
29
|
+
if output != chanstr
|
30
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
|
31
|
+
fd.print(chanstr)
|
32
|
+
end
|
33
|
+
puts "Test failed for #{f}."
|
34
|
+
puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
|
35
|
+
puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
|
36
|
+
allok = false
|
37
|
+
end
|
38
|
+
else
|
39
|
+
puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
|
40
|
+
File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
|
41
|
+
f.print(chanstr)
|
42
|
+
end
|
43
|
+
allok = false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
assert(allok)
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'test')
|
5
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
6
|
+
$:.unshift File.join(File.dirname(__FILE__), 'test')
|
7
|
+
|
8
|
+
require 'tc_feed_parse'
|
9
|
+
require 'tc_htmloutput'
|
10
|
+
require 'tc_parser'
|
11
|
+
require 'tc_textoutput'
|
12
|
+
require 'tc_textwrappedoutput'
|
data/tools/doctoweb.bash
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
if [ -z $CVSDIR ]; then
|
4
|
+
CVSDIR=$HOME/dev/ruby-feedparser/website
|
5
|
+
fi
|
6
|
+
|
7
|
+
TARGET=$CVSDIR/rdoc
|
8
|
+
|
9
|
+
echo "Copying rdoc documentation to $TARGET."
|
10
|
+
|
11
|
+
if [ ! -d $TARGET ]; then
|
12
|
+
echo "$TARGET doesn't exist, exiting."
|
13
|
+
exit 1
|
14
|
+
fi
|
15
|
+
rsync -a rdoc/ $TARGET/
|
16
|
+
|
17
|
+
echo "###########################################################"
|
18
|
+
echo "CVS status :"
|
19
|
+
cd $TARGET
|
20
|
+
svn st
|
21
|
+
echo "CVS Adding files."
|
22
|
+
while [ $(svn st | grep "^? " | wc -l) -gt 0 ]; do
|
23
|
+
svn add $(svn st | grep "^? " | awk '{print $2}')
|
24
|
+
done
|
25
|
+
echo "###########################################################"
|
26
|
+
echo "CVS status after adding missing files:"
|
27
|
+
svn st
|
28
|
+
echo "Commit changes now with"
|
29
|
+
echo "# (cd $TARGET && svn commit -m \"rdoc update\")"
|
30
|
+
exit 0
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby-feedparser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.7"
|
5
|
+
platform: ruby
|
6
|
+
authors: []
|
7
|
+
|
8
|
+
autorequire: feedparser
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-07-27 00:00:00 +02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Ruby library to parse ATOM and RSS feeds
|
17
|
+
email:
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- ChangeLog
|
26
|
+
- README
|
27
|
+
- COPYING
|
28
|
+
- LICENSE
|
29
|
+
- setup.rb
|
30
|
+
- Rakefile
|
31
|
+
- lib/feedparser/text-output.rb
|
32
|
+
- lib/feedparser/filesizes.rb
|
33
|
+
- lib/feedparser/html-output.rb
|
34
|
+
- lib/feedparser/rexml_patch.rb
|
35
|
+
- lib/feedparser/html2text-parser.rb
|
36
|
+
- lib/feedparser/textconverters.rb
|
37
|
+
- lib/feedparser/feedparser.rb
|
38
|
+
- lib/feedparser/sgml-parser.rb
|
39
|
+
- lib/feedparser.rb
|
40
|
+
- test/tc_feed_parse.rb
|
41
|
+
- test/tc_textoutput.rb
|
42
|
+
- test/tc_htmloutput.rb
|
43
|
+
- test/tc_textwrappedoutput.rb
|
44
|
+
- test/ts_feedparser.rb
|
45
|
+
- test/tc_parser.rb
|
46
|
+
- tools/doctoweb.bash
|
47
|
+
has_rdoc: true
|
48
|
+
homepage:
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: "0"
|
67
|
+
version:
|
68
|
+
requirements:
|
69
|
+
- none
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.3.4
|
72
|
+
signing_key:
|
73
|
+
specification_version: 3
|
74
|
+
summary: Ruby library to parse ATOM and RSS feeds
|
75
|
+
test_files: []
|
76
|
+
|