cardmagic-simple-rss 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/install.rb ADDED
@@ -0,0 +1,40 @@
1
+ require 'rbconfig'
2
+ require 'find'
3
+ require 'ftools'
4
+
5
+ include Config
6
+
7
+ # this was adapted from rdoc's install.rb by ways of Log4r
8
+
9
+ $sitedir = CONFIG["sitelibdir"]
10
+ unless $sitedir
11
+ version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
12
+ $libdir = File.join(CONFIG["libdir"], "ruby", version)
13
+ $sitedir = $:.find {|x| x =~ /site_ruby/ }
14
+ if !$sitedir
15
+ $sitedir = File.join($libdir, "site_ruby")
16
+ elsif $sitedir !~ Regexp.quote(version)
17
+ $sitedir = File.join($sitedir, version)
18
+ end
19
+ end
20
+
21
+ makedirs = %w{ shipping }
22
+ makedirs.each {|f| File::makedirs(File.join($sitedir, *f.split(/\//)))}
23
+
24
+ Dir.chdir("lib")
25
+ begin
26
+ require 'rubygems'
27
+ require 'rake'
28
+ rescue LoadError
29
+ puts
30
+ puts "Please install Gem and Rake from http://rubyforge.org/projects/rubygems and http://rubyforge.org/projects/rake"
31
+ puts
32
+ exit(-1)
33
+ end
34
+
35
+ files = FileList["**/*"]
36
+
37
+ # File::safe_unlink *deprecated.collect{|f| File.join($sitedir, f.split(/\//))}
38
+ files.each {|f|
39
+ File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
40
+ }
data/lib/simple-rss.rb ADDED
@@ -0,0 +1,135 @@
1
+ require 'cgi'
2
+ require 'time'
3
+
4
+ class SimpleRSS
5
+ VERSION = "1.1"
6
+
7
+ attr_reader :items, :source
8
+ alias :entries :items
9
+
10
+ @@feed_tags = [
11
+ :id,
12
+ :title, :subtitle, :link,
13
+ :description,
14
+ :author, :webMaster, :managingEditor, :contributor,
15
+ :pubDate, :lastBuildDate, :updated, :'dc:date',
16
+ :generator, :language, :docs, :cloud,
17
+ :ttl, :skipHours, :skipDays,
18
+ :image, :logo, :icon, :rating,
19
+ :rights, :copyright,
20
+ :textInput, :'feedburner:browserFriendly',
21
+ :'itunes:author', :'itunes:category'
22
+ ]
23
+
24
+ @@item_tags = [
25
+ :id,
26
+ :title, :link,
27
+ :author, :contributor,
28
+ :description, :summary, :content, :'content:encoded', :comments,
29
+ :pubDate, :published, :updated, :expirationDate, :modified, :'dc:date',
30
+ :category, :guid,
31
+ :'trackback:ping', :'trackback:about',
32
+ :'dc:creator', :'dc:title', :'dc:subject', :'dc:rights', :'dc:publisher'
33
+ ]
34
+
35
+ def initialize(source)
36
+ @source = source.respond_to?(:read) ? source.read : source.to_s
37
+ @items = Array.new
38
+
39
+ parse
40
+ end
41
+
42
+ def channel() self end
43
+ alias :feed :channel
44
+
45
+ class << self
46
+ def feed_tags
47
+ @@feed_tags
48
+ end
49
+ def feed_tags=(ft)
50
+ @@feed_tags = ft
51
+ end
52
+
53
+ def item_tags
54
+ @@item_tags
55
+ end
56
+ def item_tags=(it)
57
+ @@item_tags = it
58
+ end
59
+
60
+ # The strict attribute is for compatibility with Ruby's standard RSS parser
61
+ def parse(source, do_validate=true, ignore_unknown_element=true, parser_class=false)
62
+ new source
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def parse
69
+ raise SimpleRSSError, "Poorly formatted feed" unless @source =~ %r{<(channel|feed).*?>.*?</(channel|feed)>}mi
70
+
71
+ # Feed's title and link
72
+ feed_content = $1 if @source =~ %r{(.*?)<(rss:|atom:)?(item|entry).*?>.*?</(rss:|atom:)?(item|entry)>}mi
73
+
74
+ @@feed_tags.each do |tag|
75
+ if feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
76
+ nil
77
+ elsif feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
78
+ nil
79
+ elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
80
+ nil
81
+ elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
82
+ nil
83
+ end
84
+
85
+ if $2 || $3
86
+ tag_cleaned = clean_tag(tag)
87
+ eval %{ @#{ tag_cleaned } = clean_content(tag, $2, $3) }
88
+ self.class.class_eval %{ attr_reader :#{ tag_cleaned } }
89
+ end
90
+ end
91
+
92
+ # RSS items' title, link, and description
93
+ @source.scan( %r{<(rss:|atom:)?(item|entry)([\s][^>]*)?>(.*?)</(rss:|atom:)?(item|entry)>}mi ) do |match|
94
+ item = Hash.new
95
+ @@item_tags.each do |tag|
96
+ if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
97
+ nil
98
+ elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)/\s*>}mi
99
+ nil
100
+ end
101
+ item[clean_tag(tag)] = clean_content(tag, $2, $3) if $2 || $3
102
+ end
103
+ def item.method_missing(name, *args) self[name] end
104
+ @items << item
105
+ end
106
+
107
+ end
108
+
109
+ def clean_content(tag, attrs, content)
110
+ content = content.to_s
111
+ case tag
112
+ when :pubDate, :lastBuildDate, :published, :updated, :expirationDate, :modified, :'dc:date'
113
+ Time.parse(content) rescue unescape(content)
114
+ when :author, :contributor, :skipHours, :skipDays
115
+ unescape(content.gsub(/<.*?>/,''))
116
+ else
117
+ content.empty? && "#{attrs} " =~ /href=['"]?([^'"]*)['" ]/mi ? $1.strip : unescape(content)
118
+ end
119
+ end
120
+
121
+ def clean_tag(tag)
122
+ tag.to_s.gsub(':','_').intern
123
+ end
124
+
125
+ def unescape(content)
126
+ if content =~ /([^-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]]%)/n then
127
+ CGI.unescape(content).gsub(/(<!\[CDATA\[|\]\]>)/,'').strip
128
+ else
129
+ content
130
+ end
131
+ end
132
+ end
133
+
134
+ class SimpleRSSError < StandardError
135
+ end
@@ -0,0 +1,12 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "simple-rss"
3
+ s.version = "1.1"
4
+ s.date = "2008-07-15"
5
+ s.summary = "A simple, flexible, extensible, and liberal RSS and Atom reader for Ruby. It is designed to be backwards compatible with the standard RSS parser, but will never do RSS generation."
6
+ s.email = "lucas@rufy.com"
7
+ s.homepage = "http://github.com/cardmagic/simple-rss"
8
+ s.description = "A simple, flexible, extensible, and liberal RSS and Atom reader for Ruby. It is designed to be backwards compatible with the standard RSS parser, but will never do RSS generation."
9
+ s.has_rdoc = true
10
+ s.authors = ["Lucas Carlson"]
11
+ s.files = ["install.rb", "lib", "lib/simple-rss.rb", "LICENSE", "Rakefile", "README", "simple-rss.gemspec", "test", "test/base", "test/base/base_test.rb", "test/data", "test/data/atom.xml", "test/data/not-rss.xml", "test/data/rss09.rdf", "test/data/rss20.xml", "test/test_helper.rb"]
12
+ end
@@ -0,0 +1,51 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+ class BaseTest < Test::Unit::TestCase
3
+ def setup
4
+ @rss09 = SimpleRSS.parse open(File.dirname(__FILE__) + '/../data/rss09.rdf'), true
5
+ @rss20 = SimpleRSS.parse open(File.dirname(__FILE__) + '/../data/rss20.xml'), false
6
+ @atom = SimpleRSS.parse open(File.dirname(__FILE__) + '/../data/atom.xml')
7
+ end
8
+
9
+ def test_channel
10
+ assert_equal @rss09, @rss09.channel
11
+ assert_equal @rss20, @rss20.channel
12
+ assert_equal @atom, @atom.feed
13
+ end
14
+
15
+ def test_items
16
+ assert_kind_of Array, @rss09.items
17
+ assert_kind_of Array, @rss20.items
18
+ assert_kind_of Array, @atom.entries
19
+ end
20
+
21
+ def test_rss09
22
+ assert_equal 10, @rss09.items.size
23
+ assert_equal "Slashdot", @rss09.title
24
+ assert_equal "http://slashdot.org/", @rss09.channel.link
25
+ assert_equal "http://books.slashdot.org/article.pl?sid=05/08/29/1319236&amp;from=rss", @rss09.items.first.link
26
+ assert_equal "http://books.slashdot.org/article.pl?sid=05/08/29/1319236&amp;from=rss", @rss09.items.first[:link]
27
+ assert_equal Time.parse("Wed Aug 24 13:33:34 UTC 2005"), @rss20.items.first.pubDate
28
+ assert_equal Time.parse("Fri Sep 09 02:52:31 PDT 2005"), @rss09.channel.dc_date
29
+ end
30
+
31
+ def test_rss20
32
+ assert_equal 10, @rss20.items.size
33
+ assert_equal "Technoblog", @rss20.title
34
+ assert_equal "http://tech.rufy.com", @rss20.channel.link
35
+ assert_equal "http://feeds.feedburner.com/rufytech?m=68", @rss20.items.first.link
36
+ assert_equal "http://feeds.feedburner.com/rufytech?m=68", @rss20.items.first[:link]
37
+ assert_equal "This is an XML content feed. It is intended to be viewed in a newsreader or syndicated to another site.", @rss20.channel.feedburner_browserFriendly
38
+ end
39
+
40
+ def test_atom
41
+ assert_equal 1, @atom.entries.size
42
+ assert_equal "dive into mark", @atom.title
43
+ assert_equal "http://example.org/", @atom.feed.link
44
+ assert_equal "http://example.org/2005/04/02/atom", @atom.entries.first.link
45
+ assert_equal "http://example.org/2005/04/02/atom", @atom.entries.first[:link]
46
+ end
47
+
48
+ def test_bad_feed
49
+ assert_raise(SimpleRSSError) { SimpleRSS.parse(open(File.dirname(__FILE__) + '/../data/not-rss.xml')) }
50
+ end
51
+ end
@@ -0,0 +1,45 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <feed xmlns="http://www.w3.org/2005/Atom">
3
+ <rss:title type="text">dive into mark</title>
4
+ <subtitle type="html">
5
+ A &lt;em&gt;lot&lt;/em&gt; of effort
6
+ went into making this effortless
7
+ </subtitle>
8
+ <updated>2005-07-31T12:29:29Z</updated>
9
+ <id>tag:example.org,2003:3</id>
10
+ <link rel="alternate" type="text/html"
11
+ hreflang="en" href="http://example.org/"/>
12
+ <link rel="self" type="application/atom+xml"
13
+ href="http://example.org/feed.atom"/>
14
+ <rights>Copyright (c) 2003, Mark Pilgrim</rights>
15
+ <generator uri="http://www.example.com/" version="1.0">
16
+ Example Toolkit
17
+ </generator>
18
+ <entry>
19
+ <title>Atom draft-07 snapshot</title>
20
+ <link rel="alternate" type="text/html"
21
+ href="http://example.org/2005/04/02/atom"/>
22
+ <link rel="enclosure" type="audio/mpeg" length="1337"
23
+ href="http://example.org/audio/ph34r_my_podcast.mp3"/>
24
+ <id>tag:example.org,2003:3.2397</id>
25
+ <updated>2005-07-31T12:29:29Z</updated>
26
+ <published>2003-12-13T08:29:29-04:00</published>
27
+ <author>
28
+ <name>Mark Pilgrim</name>
29
+ <uri>http://example.org/</uri>
30
+ <email>f8dy@example.com</email>
31
+ </author>
32
+ <contributor>
33
+ <name>Sam Ruby</name>
34
+ </contributor>
35
+ <contributor>
36
+ <name>Joe Gregorio</name>
37
+ </contributor>
38
+ <content type="xhtml" xml:lang="en"
39
+ xml:base="http://diveintomark.org/">
40
+ <div xmlns="http://www.w3.org/1999/xhtml">
41
+ <p><i>[Update: The Atom draft is finished.]</i></p>
42
+ </div>
43
+ </content>
44
+ </entry>
45
+ </feed>
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <title>This ain't RSS!</title>
4
+ </head>
5
+ <body>
6
+ No, this is HTML, not RSS.
7
+ </body>
8
+ </html>
@@ -0,0 +1,79 @@
1
+ <?xml version="1.0" encoding="ISO-8859-1"?>
2
+
3
+ <rdf:RDF
4
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
5
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
6
+ xmlns="http://my.netscape.com/rdf/simple/0.9/">
7
+
8
+ <channel>
9
+ <title>Slashdot</title>
10
+ <link>http://slashdot.org/</link>
11
+ <description>News for nerds, stuff that matters</description>
12
+ <dc:date>2005-09-09T02:52:31-07:00</dc:date>
13
+ </channel>
14
+
15
+ <image>
16
+ <title>Slashdot</title>
17
+ <url>http://images.slashdot.org/topics/topicslashdot.gif</url>
18
+ <link>http://slashdot.org/</link>
19
+ </image>
20
+
21
+ <item>
22
+ <title>JBoss - A Developer's Notebook</title>
23
+ <link>http://books.slashdot.org/article.pl?sid=05/08/29/1319236&amp;from=rss</link>
24
+ <dc:date>2005-09-09T02:52:31-07:00</dc:date>
25
+ </item>
26
+
27
+ <item>
28
+ <title>Apple Hedges Its Bet on New Intel Chips</title>
29
+ <link>http://hardware.slashdot.org/article.pl?sid=05/08/29/1314219&amp;from=rss</link>
30
+ </item>
31
+
32
+ <item>
33
+ <title>Beowulf Pioneer Lured From Cal Tech to LSU</title>
34
+ <link>http://slashdot.org/article.pl?sid=05/08/29/1035240&amp;from=rss</link>
35
+ </item>
36
+
37
+ <item>
38
+ <title>Google Talk Claims Openness, Lacks S2S Support</title>
39
+ <link>http://it.slashdot.org/article.pl?sid=05/08/29/1022242&amp;from=rss</link>
40
+ </item>
41
+
42
+ <item>
43
+ <title>The End of the Bar Code</title>
44
+ <link>http://slashdot.org/article.pl?sid=05/08/29/1020220&amp;from=rss</link>
45
+ </item>
46
+
47
+ <item>
48
+ <title>2.6.13 Linux Kernel Released</title>
49
+ <link>http://linux.slashdot.org/article.pl?sid=05/08/29/0334205&amp;from=rss</link>
50
+ </item>
51
+
52
+ <item>
53
+ <title>HOWTO: The Anti-Printer</title>
54
+ <link>http://hardware.slashdot.org/article.pl?sid=05/08/29/1016204&amp;from=rss</link>
55
+ </item>
56
+
57
+ <item>
58
+ <title>OSDL Skeptical Of Joint Study with Microsoft</title>
59
+ <link>http://linux.slashdot.org/article.pl?sid=05/08/29/0625224&amp;from=rss</link>
60
+ </item>
61
+
62
+ <item>
63
+ <title>New Mad Cow Test on the Horizon?</title>
64
+ <link>http://science.slashdot.org/article.pl?sid=05/08/29/0619259&amp;from=rss</link>
65
+ </item>
66
+
67
+ <item>
68
+ <title>Coffee A Health Drink?</title>
69
+ <link>http://science.slashdot.org/article.pl?sid=05/08/29/0342207&amp;from=rss</link>
70
+ </item>
71
+
72
+ <textinput>
73
+ <title>Search Slashdot</title>
74
+ <description>Search Slashdot stories</description>
75
+ <name>query</name>
76
+ <link>http://slashdot.org/search.pl</link>
77
+ </textinput>
78
+
79
+ </rdf:RDF>