RubyGems - yyyc514-syndication - Versions diffs - 0.6.1.1 - Mend

yyyc514-syndication 0.6.1.1

Files changed (24) hide show

data/CHANGES +10 -0
data/DEVELOPER +5 -0
data/IMPLEMENTATION +55 -0
data/README +228 -0
data/examples/apple.rb +24 -0
data/examples/google.rb +23 -0
data/examples/yahoo.rb +21 -0
data/lib/syndication/atom.rb +531 -0
data/lib/syndication/common.rb +289 -0
data/lib/syndication/content.rb +44 -0
data/lib/syndication/dublincore.rb +98 -0
data/lib/syndication/feedburner.rb +18 -0
data/lib/syndication/google.rb +58 -0
data/lib/syndication/podcast.rb +90 -0
data/lib/syndication/rss.rb +332 -0
data/lib/syndication/syndication.rb +49 -0
data/lib/syndication/tagsoup.rb +51 -0
data/rakefile +60 -0
data/test/atomtest.rb +190 -0
data/test/feedburntest.rb +79 -0
data/test/google.rb +91 -0
data/test/rsstest.rb +422 -0
data/test/tagsouptest.rb +86 -0
metadata +83 -0

data/test/feedburntest.rb ADDED Viewed

@@ -0,0 +1,79 @@
+# Copyright � mathew <meta@pobox.com> 2006.
+# Licensed under the same terms as Ruby.
+require 'syndication/rss'
+require 'test/unit'
+require 'syndication/dublincore'
+require 'syndication/content'
+require 'syndication/podcast'
+require 'syndication/feedburner'
+module Syndication
+# This class contains the unit tests for the Syndication module.
+class Tests < Test::Unit::TestCase
+    # A set of minimal assertions that can be applied to every well-formed parsed
+    # feed.
+    def baseline_rss_assertions(feed)
+      assert_not_nil(feed)
+      assert_kind_of(Syndication::RSS::Feed, feed)
+      loi = feed.items
+      assert_not_nil(loi)
+      assert_kind_of(Array, loi)
+      assert(loi.length >= 1)
+      assert_not_nil(loi[0])
+      assert_not_nil(loi[0].description)
+    end
+    def test_feedburner
+      xml = <<-EOF
+    <?xml version="1.0" encoding="UTF-8"?>
+    <?xml-stylesheet href="http://feeds.sfgate.com/~d/styles/rss2full.xsl" type="text/xsl" media="screen"?>
+    <?xml-stylesheet href="http://feeds.sfgate.com/~d/styles/itemcontent.css" type="text/css" media="screen"?>
+    <rss xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" version="2.0">
+    <channel>
+        <title>SFGate: Top News Stories</title>
+        <link>http://www.sfgate.com/</link>
+        <description>Top news stories. From SFGate.com: the Bay Area's home page, online home of the San Francisco Chronicle and much more.</description>
+        <language>en-us</language>
+        <copyright>Copyright 2006 Hearst Communications, Inc.</copyright>
+        <managingEditor>ed@sfgate.com (SFGate Editorial staff)</managingEditor>
+        <webMaster>support@sfgate.com (SFGate technical support)</webMaster>
+        <lastBuildDate>Sun, 09 Jul 2006 14:21:10 PDT</lastBuildDate>
+        <category>News</category>
+        <category>Newspapers</category>
+        <category>San Francisco</category>
+        <category>San Francisco Bay Area</category>
+        <docs>http://blogs.law.harvard.edu/tech/rss</docs>
+        <image>
+            <url>http://www.sfgate.com/templates/types/syndication/pages/rss/graphics/sfgate_logo.png</url>
+            <title>SFGate: Top News Stories</title>
+            <link>http://www.sfgate.com/</link>
+        </image>
+        <atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" href="http://www.sfgate.com/rss/feeds/news.xml" type="application/rss+xml" /><feedburner:browserFriendly>This is an RSS feed, but with the headlines made visible. Choose one of the buttons to add this feed to your favorite RSS reader.</feedburner:browserFriendly>
+        <item>
+            <title><![CDATA[Italy Beats France for 4th World Cup Title]]></title>
+            <link>http://feeds.sfgate.com/sfgate/rss/feeds/news?m=4300</link>
+            <description>Italy let France do nearly anything it wanted Sunday, except win the World Cup. That belongs to the Azzurri, 5-3 in a shootout after a 1-1 draw. Outplayed for an hour and into extra time, the Italians won it after French captain Zinedine Zidane was...&lt;img src="http://feeds.sfgate.com/sfgate/rss/feeds/news?g=4300"/&gt;</description>
+            <author><![CDATA[By BARRY WILNER, AP Sports Writer]]></author>
+            <pubDate>Sun, 09 Jul 2006 14:14:59 PDT</pubDate>
+            <guid isPermaLink="false">/n/a/2006/07/09/sports/s134544D82.DTL</guid>
+            <feedburner:origLink>http://www.sfgate.com/cgi-bin/article.cgi?f=/n/a/2006/07/09/sports/s134544D82.DTL&amp;feed=rss.news</feedburner:origLink>
+            </item>
+    </channel>
+    </rss>
+    EOF
+      f = Syndication::RSS::Parser.new.parse(xml)
+      il = f.items
+      assert_not_nil(il)
+      assert(il.length == 1)
+      i = il.first
+      assert_not_nil(i.feedburner_origlink)
+      assert(i.feedburner_origlink == "http://www.sfgate.com/cgi-bin/article.cgi?f=/n/a/2006/07/09/sports/s134544D82.DTL&feed=rss.news")
+    end
+  end
+end

data/test/google.rb ADDED Viewed

@@ -0,0 +1,91 @@
+# Copyright © mathew <meta@pobox.com> 2005.
+# Licensed under the same terms as Ruby.
+require 'syndication/atom'
+require 'syndication/google'
+require 'test/unit'
+require 'pp'
+module Syndication
+  # This class contains the unit tests for the Syndication module.
+  class Tests < Test::Unit::TestCase
+    # A set of minimal assertions that can be applied to every well-formed parsed
+    # feed.
+    def baseline_assertions(feed)
+      assert_not_nil(feed, 'Parser returned nil')
+      assert_kind_of(Syndication::Atom::Feed, feed)
+      assert_not_nil(feed.title, 'Feed#title was nil')
+      assert_not_nil(feed.id, 'Feed#id was nil')
+      assert_not_nil(feed.updated, 'Feed#updated was nil')
+      assert_kind_of(DateTime, feed.updated)
+      assert(feed.entries.length > 0, 'No entries in feed')
+      for entry in feed.entries
+        assert_not_nil(entry.title, 'Entry#title was nil')
+        assert_not_nil(entry.id, 'Entry#id was nil')
+        assert(entry.links.length > 0, 'No links in entry')
+        assert_not_nil(entry.links[0], 'Entry#links[0] was nil')
+        assert_not_nil(entry.updated, 'Entry#updated was nil')
+        assert_kind_of(DateTime, entry.updated)
+      end
+    end
+    # Minimal test
+    def test_atom_google
+      xml = <<EOF
+<feed xmlns='http://www.w3.org/2005/Atom'
+    xmlns:gd='http://schemas.google.com/g/2005'>
+  <id>http://www.google.com/calendar/feeds/jo@gmail.com/private-magicCookie/full</id>
+  <updated>2006-03-29T07:35:59.000Z</updated>
+  <title type='text'>Jo March</title>
+  <subtitle type='text'>This is my main calendar.</subtitle>
+  <link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml'
+    href='http://www.google.com/calendar/feeds/jo@gmail.com/private-magicCookie/full'></link>
+  <link rel='self' type='application/atom+xml'
+    href='http://www.google.com/calendar/feeds/jo@gmail.com/private-magicCookie/full'></link>
+  <author>
+    <name>Jo March</name>
+    <email>jo@gmail.com</email>
+  </author>
+  <generator version='1.0' uri='http://www.google.com/calendar/'>CL2</generator>
+  <gd:where valueString='California'></gd:where>
+  <entry>
+    <id>http://www.google.com/calendar/feeds/jo@gmail.com/private-magicCookie/full/entryID</id>
+    <published>2006-03-30T22:00:00.000Z</published>
+    <updated>2006-03-28T05:47:31.000Z</updated>
+    <category scheme='http://schemas.google.com/g/2005#kind'
+      term='http://schemas.google.com/g/2005#event'></category>
+    <title type='text'>Lunch with Darcy</title>
+    <content type='text'>Lunch to discuss future plans.</content>
+    <link rel='alternate' type='text/html'
+      href='http://www.google.com/calendar/event?eid=aTJxcnNqbW9tcTJnaTE5cnMybmEwaW04bXMgbWFyY2guam9AZ21haWwuY29t'
+      title='alternate'></link>
+    <link rel='self' type='application/atom+xml'
+      href='http://www.google.com/calendar/feeds/jo@gmail.com/private-magicCookie/full/entryID'></link>
+    <author>
+      <name>Jo March</name>
+      <email>jo@gmail.com</email>
+    </author>
+    <gd:transparency
+      value='http://schemas.google.com/g/2005#event.opaque'></gd:transparency>
+    <gd:eventStatus
+      value='http://schemas.google.com/g/2005#event.confirmed'></gd:eventStatus>
+    <gd:comments>
+      <gd:feedLink
+        href='http://www.google.com/calendar/feeds/jo@gmail.com/private-magicCookie/full/entryID/comments/'></gd:feedLink>
+    </gd:comments>
+    <gd:when startTime='2006-03-30T22:00:00.000Z'
+      endTime='2006-03-30T23:00:00.000Z'></gd:when>
+    <gd:where></gd:where>
+  </entry>
+</feed>
+EOF
+      f = Syndication::Atom::Parser.new.parse(xml)
+      baseline_assertions(f)
+      entry = f.entries.first
+      assert(entry.gd_when.to_s == "2006-03-30T22:00:00Z2006-03-30T23:00:00Z")
+    end
+  end
+end

data/test/rsstest.rb ADDED Viewed

@@ -0,0 +1,422 @@
+# Copyright � mathew <meta@pobox.com> 2005.
+# Licensed under the same terms as Ruby.
+#
+# $Header: /var/cvs/syndication/syndication/test/rsstest.rb,v 1.4 2005/10/23 23:00:59 meta Exp $
+require 'syndication/rss'
+require 'test/unit'
+require 'syndication/dublincore'
+require 'syndication/content'
+require 'syndication/podcast'
+module Syndication
+# This class contains the unit tests for the Syndication module.
+class Tests < Test::Unit::TestCase
+  # A set of minimal assertions that can be applied to every well-formed parsed
+  # feed.
+  def baseline_rss_assertions(feed)
+    assert_not_nil(feed)
+    assert_kind_of(Syndication::RSS::Feed, feed)
+    loi = feed.items
+    assert_not_nil(loi)
+    assert_kind_of(Array, loi)
+    assert(loi.length >= 1)
+    assert_not_nil(loi[0])
+    assert_not_nil(loi[0].description)
+  end
+  # Test a minimal well-formed RSS2.0 feed
+  def test_rss2_wf_minimal
+    xml = <<-EOF
+    <rss version="2.0">
+      <channel>
+        <title>I like coffee</title>
+        <link>http://www.coffeegeek.com/</link>
+        <description>Hand over the latte &amp; nobody gets hurt.</description>
+      </channel>
+      <item>
+        <description>A day without coffee is incomplete.</description>
+      </item>
+    </rss>
+    EOF
+    f = Syndication::RSS::Parser.new.parse(xml)
+    baseline_rss_assertions(f)
+    assert(f.channel.title == 'I like coffee')
+    assert(f.channel.link == 'http://www.coffeegeek.com/')
+    assert(f.channel.description == 'Hand over the latte & nobody gets hurt.')
+    assert(f.items.first.description == 'A day without coffee is incomplete.')
+    c = f.channel
+    assert_not_nil(c)
+    assert_kind_of(Syndication::RSS::Channel, c)
+    assert_not_nil(c.title)
+    assert_not_nil(c.link)
+    assert_not_nil(c.description)
+  end
+  # Test a minimal well-formed RSS1.0 feed
+  def test_rss1_wf_minimal
+    xml = <<-EOF
+    <?xml version="1.0"?>
+    <rdf:RDF
+      xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+      xmlns="http://purl.org/rss/1.0/">
+    <channel rdf:about="http://www.otternet.com/">
+      <title>OtterNet</title>
+      <link>http://www.otternet.com/</link>
+      <description>Otternet has pages &amp; pages of information about otters.</description>
+    </channel>
+    <item rdf:about="http://www.otternet.com/species/seaotter.htm">
+      <title>The Sea Otter</title>
+      <link>http://www.otternet.com/species/seaotter.htm</link>
+      <description>The enticingly cute enhydra lontris.</description>
+    </item>
+    </rdf:RDF>
+    EOF
+    f = Syndication::RSS::Parser.new.parse(xml)
+    baseline_rss_assertions(f)
+    assert(f.channel.title == 'OtterNet')
+    assert(f.channel.link == 'http://www.otternet.com/')
+    assert(f.channel.description == 'Otternet has pages & pages of information about otters.')
+    assert(f.items.first.title == 'The Sea Otter')
+    assert(f.items.first.link == 'http://www.otternet.com/species/seaotter.htm')
+    assert(f.items.first.description == 'The enticingly cute enhydra lontris.')
+    c = f.channel
+    assert_not_nil(c)
+    assert_kind_of(Syndication::RSS::Channel, c)
+    assert_not_nil(c.title)
+    assert_not_nil(c.link)
+    assert_not_nil(c.description)
+  end
+  # Test a well-formed RSS2 feed with every element possible and more than
+  # one item
+  def test_rss2_wf_full
+    xml = <<-EOF
+    <rss version="2">
+      <channel>
+        <title>Example Feed</title>
+        <link>http://www.example.com/</link>
+        <description>This is merely an example.</description>
+        <language>en-us</language>
+        <copyright>Copyright 2004 The Example Corporation.</copyright>
+        <managingEditor>editor@example.com</managingEditor>
+        <webMaster>webmaster@example.com</webMaster>
+        <pubDate>Sat, 07 Sep 2002 00:01:02 EDT</pubDate>
+        <lastBuildDate>Sat, 7 Sep 02 13:14:15 -0600</lastBuildDate>
+        <category>examples</category>
+        <category>boring</category>
+        <generator>vim of course</generator>
+        <docs>http://blogs.law.harvard.edu/tech/rss</docs>
+        <cloud domain="rpc.sys.com" port="80" path="/RPC2" registerProcedure="pingMe" protocol="soap"/>
+        <ttl>90</ttl>
+        <image>
+          <title>Example Inc</title>
+          <url>http://www.example.com/images/logo.jpg</url>
+          <link>http://www.example.com</link>
+          <width>42</width>
+          <height>23</height>
+        </image>
+        <rating>(PICS-1.1 "http://www.icra.org/ratingsv02.html" l gen true r (cz 1 lz 1 nz 1 oz 1 vz 1) "http://www.rsac.org/ratingsv01.html" l gen true r (n 0 s 0 v 0 l 0) "http://www.classify.org/safesurf/" l gen true r (SS~~000 1))</rating>
+        <textInput>
+          <title>Submit</title>
+          <description>Enter keywords</description>
+          <name>SearchKeywords</name>
+          <link>http://www.example.com/cgi-bin/search.pl</link>
+        </textInput>
+        <skipHours>
+          <hour>0</hour>
+          <hour>23</hour>
+        </skipHours>
+        <skipDays>
+          <day>Monday</day>
+          <day>Sunday</day>
+        </skipDays>
+        <item>
+          <title>Our stock price shot up</title>
+          <link>http://www.example.com/news/2.html</link>
+          <description>We were hyped in the press!</description>
+        </item>
+        <item>
+          <title>A dull example of little value.</title>
+          <link>http://www.example.com/news/1.html</link>
+          <description>If this was any less interesting, it would be amazing.</description>
+          <author>fred@example.com</author>
+          <pubDate>Sat, 07 Sep 2002 00:01:02 EDT</pubDate>
+          <category>dull</category>
+          <category>amazingly</category>
+          <comments>http://www.example.com/news/comments/1.html</comments>
+          <enclosure url="http://www.example.com/mp3/advertisement.mp3" length="123987" type="audio/mpeg" />
+          <guid>4asd98dgf9a74@example.com</guid>
+          <source url="http://www.example.com/news.xml">Example News</source>
+        </item>
+      </channel>
+    </rss>
+    EOF
+    f = Syndication::RSS::Parser.new.parse(xml)
+    baseline_rss_assertions(f)
+    for elem in %w(title link description language copyright managingeditor webmaster pubdate lastbuilddate category generator docs cloud ttl textinput rating skiphours skipdays)
+      assert_not_nil(f.channel.send(elem), "feed.channel.#{elem} is nil, it shouldn't be")
+      assert(f.channel.send(elem).to_s.length > 0)
+    end
+    items = f.items
+    assert(items.length == 2)
+    i = items.last
+    for elem in %w(title link description author pubdate category comments enclosure guid source)
+      assert_not_nil(i.send(elem), "feed.channel.item[1].#{elem} is nil, it shouldn't be")
+    end
+    cats = i.category
+    assert(cats.length == 2)
+    assert(cats.first == 'dull')
+    assert(cats.last == 'amazingly')
+    assert(f.channel.skiphours.length == 2)
+    assert(f.channel.skiphours.first == 0)
+    assert(f.channel.skiphours.last == 23)
+    assert(f.channel.pubdate.kind_of?(DateTime))
+    assert(f.channel.lastbuilddate.kind_of?(DateTime))
+    assert(f.channel.pubdate.mday == 7)
+    assert(f.channel.pubdate.month == 9)
+    assert(f.channel.lastbuilddate.mday == 7)
+    assert(f.channel.lastbuilddate.month == 9)
+    c = f.channel
+    assert_not_nil(c)
+    assert_kind_of(Syndication::RSS::Channel, c)
+    assert_not_nil(c.title)
+    assert_not_nil(c.link)
+    assert_not_nil(c.description)
+  end
+  # Test a well-formed RSS 1.0 feed with every element possible, more
+  # than one item, and rdf:resource links in the channel
+  def test_rss1_wf_full
+    xml = <<-EOF
+    <rdf:RDF
+      xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+      xmlns="http://purl.org/rss/1.0/">
+      <channel>
+        <title>Example Dot Org</title>
+        <link>http://www.example.org</link>
+        <description>the Example Organization web site</description>
+        <image rdf:resource="http://www.example.org/images/logo.gif"/>
+        <items>
+          <rdf:Seq>
+            <rdf:li resource="http://www.example.org/items/1"/>
+            <rdf:li resource="http://www.example.org/items/2"/>
+          </rdf:Seq>
+        </items>
+        <textinput rdf:resource="http://www.example.org/cgi-bin/input.pl"/>
+      </channel>
+      <textinput rdf:about="http://www.example.org/cgi-bin/input.pl">
+        <title>Search example.org</title>
+        <description>Search the example.org web site</description>
+        <name>query</name>
+        <link>http://www.example.org/cgi-bin/input.pl</link>
+      </textinput>
+      <image rdf:about="http://www.example.org/images/logo.gif">
+        <title>Example.org logo</title>
+        <link>http://www.example.org/</link>
+        <url>http://www.example.org/images/logo.gif</url>
+      </image>
+      <item rdf:about="http://www.example.org/items/1">
+        <title>Welcome</title>
+        <link>http://www.example.org/items/1</link>
+        <description>Welcome to our new news feed</description>
+      </item>
+      <item rdf:about="http://www.example.org/items/2">
+        <title>New Status Update</title>
+        <link>http://www.example.org/items/1</link>
+        <description>News about the Example project</description>
+      </item>
+    </rdf:RDF>
+    EOF
+    f = Syndication::RSS::Parser.new.parse(xml)
+    baseline_rss_assertions(f)
+    for elem in %w(title link description textinput)
+      assert_not_nil(f.channel.send(elem), "feed.channel.#{elem} is nil, it shouldn't be")
+      assert(f.channel.send(elem).to_s.length > 0)
+    end
+    il = f.items
+    assert(il.length == 2)
+    i = il.last
+    assert(i.link == 'http://www.example.org/items/1')
+    assert(i.title == 'New Status Update')
+    assert(i.description == 'News about the Example project')
+    assert(f.textinput.title == 'Search example.org')
+    f.channel.image.strip
+    assert(f.image.url == 'http://www.example.org/images/logo.gif')
+    c = f.channel
+    assert_not_nil(c)
+    assert_kind_of(Syndication::RSS::Channel, c)
+    assert_not_nil(c.title)
+    assert_not_nil(c.link)
+    assert_not_nil(c.description)
+  end
+  # Test HTML encoded content in RSS 1.0 and namespace remapping
+  def test_rss1_content
+    xml = <<-EOF
+    <?xml version="1.0"?>
+    <rdf:RDF
+      xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+      xmlns:html="http://purl.org/rss/1.0/modules/content/"
+      xmlns="http://purl.org/rss/1.0/">
+    <channel rdf:about="http://www.otternet.com/">
+      <title>OtterNet</title>
+      <link>http://www.otternet.com/</link>
+      <description>Otternet has dozens of pages of information about otters.</description>
+      <content:encoded><![CDATA[<p><cite>OtterNet</cite> has <em>dozens</em> of pages of information about otters.</p>]]></content:encoded>
+    </channel>
+    <item rdf:about="http://www.otternet.com/species/seaotter.htm">
+      <title>The Sea Otter</title>
+      <link>http://www.otternet.com/species/seaotter.htm</link>
+      <description>The enticingly cute enhydra lontris.</description>
+      <html:encoded>The enticingly cute &lt;i&gt;enhydra lontris&lt;/i&gt;</html:encoded>
+    </item>
+    <item rdf:about="http://www.ruby-lang.org/">
+      <title>Ruby</title>
+      <link>http://www.ruby-lang.org/</link>
+      <description>There's this language called Ruby, you may have heard of it.</description>
+      <html:encoded>There's this language called &lt;strong&gt;Ruby&lt;/strong&gt;, you &lt;em&gt;may&lt;/em&gt; have heard of it.</html:encoded>
+    </item>
+    </rdf:RDF>
+    EOF
+    f = Syndication::RSS::Parser.new.parse(xml)
+    baseline_rss_assertions(f)
+    il = f.items
+    assert(il.length == 2)
+    i1 = il.first
+    i2 = il.last
+    assert_not_nil(i1.content_encoded, "content_encoded nil, shouldn't be")
+    assert_not_nil(i2.content_encoded, "content_encoded nil, shouldn't be")
+    assert(i1.content_encoded == 'The enticingly cute <i>enhydra lontris</i>')
+    assert(i1.content_decoded == 'The enticingly cute <i>enhydra lontris</i>')
+    assert(i2.content_decoded == "There's this language called <strong>Ruby</strong>, you <em>may</em> have heard of it.")
+    c = f.channel
+    assert(c.content_encoded == '<![CDATA[<p><cite>OtterNet</cite> has <em>dozens</em> of pages of information about otters.</p>]]>')
+    assert(c.content_decoded == '<p><cite>OtterNet</cite> has <em>dozens</em> of pages of information about otters.</p>')
+    assert_not_nil(c)
+    assert_kind_of(Syndication::RSS::Channel, c)
+    assert_not_nil(c.title)
+    assert_not_nil(c.link)
+    assert_not_nil(c.description)
+  end
+  # Test iTunes-specific duration parsing
+  def test_itunes
+    i = Syndication::RSS::Item.new(nil)
+    i.itunes_duration = "12:34:56"
+    assert(i.itunes_duration == 45296, "Duration computed incorrectly")
+    i.itunes_duration = "5:43:21"
+    assert(i.itunes_duration == 20601, "Duration computed incorrectly")
+    i.itunes_duration = "20:01"
+    assert(i.itunes_duration == 1201, "Duration computed incorrectly")
+    i.itunes_duration = "3:52"
+    assert(i.itunes_duration == 232, "Duration computed incorrectly")
+  end
+  # Test a well-formed RSS2 feed with every element possible and more than
+  # one item, with all kinds of stuff CDATA escaped.
+  def test_rss2_wf_full_cdata
+    xml = <<-EOF
+    <rss version="2">
+      <channel>
+        <title><![CDATA[Example Feed]]></title>
+        <link>http://www.example.com/</link>
+        <description><![CDATA[This is merely an example.]]></description>
+        <language>en-us</language>
+        <copyright>Copyright 2004 The Example Corporation.</copyright>
+        <managingEditor>editor@example.com</managingEditor>
+        <webMaster>webmaster@example.com</webMaster>
+        <pubDate>Sat, 07 Sep 2002 00:01:02 EDT</pubDate>
+        <lastBuildDate>Sat, 7 Sep 02 13:14:15 -0600</lastBuildDate>
+        <category>examples</category>
+        <category>boring</category>
+        <generator>vim of course</generator>
+        <docs>http://blogs.law.harvard.edu/tech/rss</docs>
+        <cloud domain="rpc.sys.com" port="80" path="/RPC2" registerProcedure="pingMe" protocol="soap"/>
+        <ttl>90</ttl>
+        <image>
+          <title><![CDATA[Example Inc]]></title>
+          <url>http://www.example.com/images/logo.jpg</url>
+          <link>http://www.example.com</link>
+          <width>42</width>
+          <height>23</height>
+        </image>
+        <rating>(PICS-1.1 "http://www.icra.org/ratingsv02.html" l gen true r (cz 1 lz 1 nz 1 oz 1 vz 1) "http://www.rsac.org/ratingsv01.html" l gen true r (n 0 s 0 v 0 l 0) "http://www.classify.org/safesurf/" l gen true r (SS~~000 1))</rating>
+        <textInput>
+          <title>Submit</title>
+          <description>Enter keywords</description>
+          <name>SearchKeywords</name>
+          <link>http://www.example.com/cgi-bin/search.pl</link>
+        </textInput>
+        <skipHours>
+          <hour>0</hour>
+          <hour>23</hour>
+        </skipHours>
+        <skipDays>
+          <day>Monday</day>
+          <day>Sunday</day>
+        </skipDays>
+        <item>
+          <title>Our stock price shot up</title>
+          <link>http://www.example.com/news/2.html</link>
+          <description>We were hyped in the press!</description>
+        </item>
+        <item>
+          <title><![CDATA[Unencoded < > and & are allowed.]]></title>
+          <link><![CDATA[http://www.example.com/news/1.html]]></link>
+          <description><![CDATA[If this was any less interesting, it would be amazing.]]></description>
+          <author><![CDATA[fred@example.com]]></author>
+          <pubDate>Sat, 07 Sep 2002 00:01:02 EDT</pubDate>
+          <category>dull</category>
+          <category>amazingly</category>
+          <comments>http://www.example.com/news/comments/1.html</comments>
+          <enclosure url="http://www.example.com/mp3/advertisement.mp3" length="123987" type="audio/mpeg" />
+          <guid>4asd98dgf9a74@example.com</guid>
+          <source url="http://www.example.com/news.xml">Example News</source>
+        </item>
+      </channel>
+    </rss>
+    EOF
+    f = Syndication::RSS::Parser.new.parse(xml)
+    baseline_rss_assertions(f)
+    for elem in %w(title link description language copyright managingeditor webmaster pubdate lastbuilddate category generator docs cloud ttl textinput rating skiphours skipdays)
+      assert_not_nil(f.channel.send(elem), "feed.channel.#{elem} is nil, it shouldn't be")
+      assert(f.channel.send(elem).to_s.length > 0)
+    end
+    # Check CDATA is decoded properly
+    assert(f.channel.title == 'Example Feed')
+    assert(f.channel.description == 'This is merely an example.')
+    items = f.items
+    assert(items.length == 2)
+    i = items.last
+    for elem in %w(title link description author pubdate category comments enclosure guid source)
+      assert_not_nil(i.send(elem), "feed.channel.item[1].#{elem} is nil, it shouldn't be")
+    end
+    cats = i.category
+    assert(i.title == 'Unencoded < > and & are allowed.')
+    assert(i.link == 'http://www.example.com/news/1.html')
+    assert(cats.length == 2)
+    assert(cats.first == 'dull')
+    assert(cats.last == 'amazingly')
+    assert(f.channel.skiphours.length == 2)
+    assert(f.channel.skiphours.first == 0)
+    assert(f.channel.skiphours.last == 23)
+    assert(f.channel.pubdate.kind_of?(DateTime))
+    assert(f.channel.lastbuilddate.kind_of?(DateTime))
+    assert(f.channel.pubdate.mday == 7)
+    assert(f.channel.pubdate.month == 9)
+    assert(f.channel.lastbuilddate.mday == 7)
+    assert(f.channel.lastbuilddate.month == 9)
+    c = f.channel
+    assert_not_nil(c)
+    assert_kind_of(Syndication::RSS::Channel, c)
+    assert_not_nil(c.title)
+    assert_not_nil(c.link)
+    assert_not_nil(c.description)
+  end
+end
+end

data/test/tagsouptest.rb ADDED Viewed

@@ -0,0 +1,86 @@
+# Copyright � mathew <meta@pobox.com> 2005.
+# Licensed under the same terms as Ruby.
+#
+# $Header: /var/cvs/syndication/syndication/test/tagsouptest.rb,v 1.2 2005/10/17 20:06:51 meta Exp $
+require 'syndication/tagsoup'
+require 'test/unit'
+require 'rexml/document'
+module Syndication
+  # This class contains the unit tests for the Syndication module.
+  class Tests < Test::Unit::TestCase
+    def tag_start(x, pairs)
+      @events << "tag_start(#{x.strip})"
+      lst = nil
+      if pairs
+        for p in pairs
+          if lst
+            lst = lst + ","
+          else
+            lst = ""
+          end
+          lst << "#{p[0]}=#{p[1]}"
+        end
+        @events << "attrs(#{lst})"
+      end
+    end
+    def tag_end(x)
+      @events << "tag_end(#{x.strip})"
+    end
+    def text(x)
+      @events << "text(#{x.strip})"
+    end
+    # Minimal test
+    def test_tagsoup
+      xml = <<-EOF
+<a>
+<b>one
+<c></c></b>
+<d arg1="alpha">two</d>
+<e arg2='beta'>
+three&lt;four&#99;&trade;
+</e>
+</a>
+<feed xmlns="http://www.w3.org/2005/Atom">
+<title>One good turn usually gets most of the blanket.</title>
+<updated>2005-08-20T21:14:38Z</updated>
+<id>urn:uuid:035d3aa3022c1b1b2a17e37ae2dcc376</id>
+<entry>
+<title>Quidquid latine dictum sit, altum viditur.</title>
+<link href="http://example.com/05/08/20/2114.html"/>
+<id>urn:uuid:89d96d76a99426264f6f1f520c1b93c2</id>
+<updated>2005-08-20T21:14:38Z</updated>
+</entry>
+</feed>
+      EOF
+      @events = Array.new
+      Syndication::TagSoup.parse_stream(xml, self)
+      @tagsoup = @events
+      @events = Array.new
+      REXML::Document.parse_stream(xml, self)
+      @rexml = @events
+      #puts "REXML\n-----"
+      #pp @rexml
+      #puts "\nTAGSOUP\n-------"
+      #pp @tagsoup
+      errs = false
+      for tsevt in @tagsoup
+        rxevt = @rexml.shift
+        if rxevt
+          if tsevt.to_s != rxevt.to_s
+            errs = true
+            #puts "TagSoup: [#{tsevt}]\nREXML: [#{rxevt}]"
+          end
+        end
+      end
+      assert(!errs, "TagSoup and REXML parse results didn't match")
+    end
+  end
+end