RubyGems - syndication - Versions diffs - 0.4.0 - Mend

syndication 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/IMPLEMENTATION +33 -0
data/README +208 -0
data/examples/yahoo.rb +21 -0
data/lib/syndication/atom.rb +479 -0
data/lib/syndication/common.rb +267 -0
data/lib/syndication/content.rb +37 -0
data/lib/syndication/dublincore.rb +92 -0
data/lib/syndication/podcast.rb +85 -0
data/lib/syndication/rss.rb +326 -0
data/lib/syndication/syndication.rb +45 -0
data/test/atomtest.rb +186 -0
data/test/rsstest.rb +314 -0
metadata +55 -0

data/lib/syndication/rss.rb ADDED

@@ -0,0 +1,326 @@
+# This module provides classes and methods for parsing RSS web syndication
+# feeds.
+#
+# Copyright � mathew <meta@pobox.com> 2005.
+# Licensed under the same terms as Ruby.
+require 'uri'
+require 'rexml/parsers/streamparser'
+require 'rexml/streamlistener'
+require 'rexml/document'
+require 'date'
+require 'syndication/common'
+module Syndication
+  class Container
+    # This method is used by objects in RSS feeds that accept
+    # <category> elements
+    def store_category(cat)
+      if cat.kind_of?(String)
+        if !@category
+          @category = Array.new
+        end
+        @category << cat
+      end
+    end
+  end
+  # RSS is a method of syndicating web site content.
+  #
+  # There are nine different versions of RSS; see
+  # <URL:http://diveintomark.org/archives/2004/02/04/incompatible-rss>
+  #
+  # This code attempts to parse all of them, and provide the same API via
+  # the same data model regardless of the particular flavor of RSS fed in.
+  #
+  # One thing to be aware of is that RSS 0.9x and 2.0x have no mechanism for
+  # indicating the type of text in a description, whether plain text or HTML.
+  # As a result, this library leaves it to you to write code to 'sniff'
+  # the data returned and decide whether you think it looks like text or HTML.
+  #
+  # RSS 1.0 solves the problem via the content module, which is supported
+  # via Syndication::Content. Atom solves the problem too.
+module RSS
+  # Represents an individual story or entry in an RSS feed.
+  class Item < Container
+    # The title of the item as a String.
+    attr_accessor :title
+    # The URL of the item as a String.
+    attr_accessor :link
+    # A textual description of the item as a String.
+    attr_accessor :description
+    # E-mail address of item author.
+    attr_accessor :author
+    # One or more categories for the item, as an Array of Strings.
+    attr_reader :category
+    alias category= store_category
+    # URL for feedback on this item as a String.
+    attr_accessor :comments
+    # A media object attached to the item, as a Syndication::Enclosure.
+    attr_accessor :enclosure
+    # A globally unique identifier for this item, a String.
+    attr_accessor :guid
+    # The publication date for this item. Accepts anything DateTime can
+    # parse, which includes RFC822-style dates as specified by the RSS
+    # standards.
+    attr_writer :pubdate
+    # An RSS channel this item was copied from, used to give credit for
+    # copied links. A URL String.
+    attr_accessor :source
+    # Publication date as a DateTime if possible; if it won't parse,
+    # returns the original string.
+    def pubdate
+      parse_date(@pubdate)
+    end
+  end
+  # Used to represent graphical images provided in an RSS feed, with the
+  # intent that they be used to represent the channel in a graphical user
+  # interface, or on a web page.
+  #
+  # Typically found via Syndication::Channel#image
+  class Image < Container
+    # URL of image.
+    attr_accessor :url
+    # Title of image for use as ALT text.
+    attr_accessor :title
+    # Link to use when image is clicked on.
+    attr_accessor :link
+    # Width of image in pixels, as an integer.
+    attr_reader :width
+    # Height of image in pixels, as an integer.
+    attr_reader :height
+    # Set width in pixels.
+    def width=(x)
+      if x.kind_of?(String)
+        @width = x.to_i
+      end
+    end
+    # Set height in pixels.
+    def height=(x)
+      if x.kind_of?(String)
+        @height = x.to_i
+      end
+    end
+  end
+  # Represents a text input box to be used in association with an RSS feed, for
+  # example a search box or e-mail subscription input box.
+  #
+  # Typically found via Syndication::Channel#textinput method.
+  class TextInput < Container
+    # Label for Submit button in text input area.
+    attr_accessor :title
+    # Label to explain purpose of text input area.
+    attr_accessor :description
+    # Name of text object in input area, for form submission.
+    attr_accessor :name
+    # URL to submit data to via HTTP POST.
+    attr_accessor :link end
+  # Represents metadata about an RSS feed as a whole.
+  # Typically found via the Syndication::RSS::Feed#channel method.
+  class Channel < Container
+    # The title of the channel.
+    attr_accessor :title
+    # The URL of the web site this is a channel for.
+    attr_accessor :link
+    # A textual description of the channel.
+    attr_accessor :description
+    # Copyright statement for channel.
+    attr_accessor :copyright
+    # ISO code for the language the channel is written in.
+    attr_accessor :language
+    # E-mail address of person responsible for editorial content.
+    attr_accessor :managingeditor
+    # E-mail address of person responsible for technical issues with feed.
+    attr_accessor :webmaster
+    # Publication date of content in channel.
+    attr_writer :pubdate
+    # Last time content in channel changed.
+    attr_writer :lastbuilddate
+    # The graphical image to represent the channel, as a
+    # Syndication::Image object.
+    attr_accessor :image
+    # One or more categories for the channel, as an Array of Strings.
+    attr_accessor :category
+    alias category= store_category
+    # The software that generated the channel.
+    attr_accessor :generator
+    # The URL of some documentation on what the RSS format is.
+    attr_accessor :docs
+    # Time to live for this copy of the channel.
+    attr_accessor :ttl
+    # rssCloud interface (for Radio UserLand).
+    attr_accessor :cloud
+    # PICS rating for channel.
+    attr_accessor :rating
+    # The TextInput area as a Syndication::TextInput object.
+    attr_accessor :textinput
+    # Hours when the feed can be skipped (because it will not have new content).
+    # Returned as an Array of values in the range 0..23 (even if parsing the
+    # UserLand variant of RSS 0.91).
+    attr_reader :skiphours
+    # Full names (in English) of days when the feed can be skipped.
+    attr_reader :skipdays
+    # Publication date of content in channel, as a DateTime object if it
+    # can be parsed by DateTime; otherwise, as a String.
+    def pubdate
+      return parse_date(@pubdate)
+    end
+    # Last time content in channel changed, as a DateTime object if it
+    # can be parsed by DateTime; otherwise, as a String.
+    def lastbuilddate
+      return parse_date(@lastbuilddate)
+    end
+    # Add an hour to the list of hours to skip.
+    #
+    # The <hour> element in fact comes inside <skipHours>, but we don't enforce
+    # that; we just make the Channel recognize it and store the values.
+    def hour=(hr)
+      if hr.kind_of?(String)
+        if !@skiphours
+          @skiphours = Array.new
+        end
+        h = hr.to_i
+        @skiphours << (h == 24 ? 0 : h)
+      end
+    end
+    # Add a day name to the list of days to skip.
+    #
+    # The <day> element in fact comes inside <skipDays>, but we don't enforce
+    # that; we just make the Channel recognize it and store the values.
+    def day=(dayname)
+      if dayname.kind_of?(String)
+        if !@skipdays
+          @skipdays = Array.new
+        end
+        @skipdays << dayname
+      end
+    end
+  end
+  # The <cloud> element is very rarely used. It was added to the RSS standards
+  # to support the rssCloud protocol of Radio UserLand.
+  class Cloud < Container
+    # The hostname to connect to.
+    attr_accessor :domain
+    # The TCP/IP port number.
+    attr_reader :port
+    # The request path.
+    attr_accessor :path
+    # The registration method.
+    attr_accessor :registerprocedure
+    # The protocol to use.
+    attr_accessor :protocol
+    # Set port number
+    def port=(x)
+      @port = x.to_i
+    end
+    def initialize(parent, tag, attrs = nil)
+      @tag = tag
+      @parent = parent
+      if attrs
+        attrs.each_pair {|key, value|
+          self.store(key, value)
+        }
+      end
+    end
+  end
+  # Represents a multimedia enclosure in an RSS item.
+  # Typically found as Syndication::Item#enclosure
+  class Enclosure < Container
+    # The URL to the multimedia file.
+    attr_accessor :url
+    # The MIME type of the file.
+    attr_accessor :type
+    # The length of the file, in bytes.
+    attr_reader :length
+    # Set length in bytes.
+    def length=(x)
+      @length = x.to_i
+    end
+    def initialize(parent, tag, attrs = nil)
+      @tag = tag
+      @parent = parent
+      if attrs
+        attrs.each_pair {|key, value|
+          self.store(key, value)
+        }
+      end
+    end
+  end
+  # Represents a parsed RSS feed, as returned by Syndication::RSS::Parser.
+  class Feed < Container
+    # The Channel metadata and contents of the feed as a
+    # Syndication::Channel object
+    attr_accessor :channel
+    # The items in the feed as an Array of Syndication::Item objects.
+    attr_reader :items
+    # The text input area as a Syndication::TextInput object.
+    attr_accessor :textinput
+    # The image for the feed, as a Syndication::Image object.
+    attr_accessor :image
+    # Add an item to the feed.
+    def item=(obj)
+      if !@items
+        @items = Array.new
+      end
+      @items.push(obj)
+    end
+  end
+  # A parser for RSS feeds.
+  # See Syndication::Parser in common.rb for the abstract class this
+  # specializes.
+  class Parser < AbstractParser
+    include REXML::StreamListener
+    #:stopdoc:
+    # A hash of tags which require the creation of new objects, and the class
+    # to use for creating the object.
+    CLASS_FOR_TAG = {
+      'item' => Item,
+      'entry' => Item,
+      'image' => Image,
+      'channel' => Channel,
+      'cloud' => Cloud,
+      'textinput' => TextInput,
+      'textInput' => TextInput,
+      'enclosure' => Enclosure
+    }
+    #:startdoc:
+    # Reset the parser ready to parse a new feed.
+    def reset
+      # Set up an empty RSS::Feed object and make it the current object
+      @parsetree = Feed.new(nil)
+      # Set up the class-for-tag hash
+      @class_for_tag = CLASS_FOR_TAG
+      # Everything else is common to both kinds of parser
+      super
+    end
+    # The most recently parsed feed as a Syndication::RSS::Feed object.
+    def feed
+      return @parsetree
+    end
+  end
+end
+end

data/lib/syndication/syndication.rb ADDED

@@ -0,0 +1,45 @@
+require 'date'
+module Syndication
+  # Mixin for RSS 1.0 syndication data (draft standard for RSS 1.0).
+  #
+  # If you require 'syndication/syndication' these methods are added to the
+  # Syndication::Channel class.
+  #
+  # Access methods are named after the XML elements, prefixed with sy_.
+  #
+  module Syndication
+    # The period over which the channel is updated. Allowed values are
+    # 'hourly', 'daily', 'weekly', 'monthly', 'yearly'. If omitted, 'daily'
+    # is assumed.
+    attr_accessor :sy_updateperiod
+    # Frequency of updates, in relation to sy_updateperiod. Indicates how many
+    # times in each sy_updateperiod the channel is updated. For example,
+    # sy_updateperiod = 'daily' and sy_updatefrequency = 4 means four times
+    # per day.
+    attr_accessor :sy_updatefrequency
+    # Base date used to calculate publishing times. When combined with
+    # sy_updateperiod and sy_updatefrequency, the publishing schedule can
+    # be derived. Returned as a DateTime if possible, otherwise as a String.
+    attr_reader :sy_updatebase
+    def sy_updatebase=(x)
+      d = DateTime.parse(x)
+      if d
+        @sy_updatebase = d
+      else
+        @sy_updatebase = x
+      end
+    end
+  end
+  #:enddoc:
+  class Channel
+    include Syndication
+  end
+end

data/test/atomtest.rb ADDED

@@ -0,0 +1,186 @@
+require 'syndication/atom'
+require 'test/unit'
+module Syndication
+  # This class contains the unit tests for the Syndication module.
+  class Tests < Test::Unit::TestCase
+    # A set of minimal assertions that can be applied to every well-formed parsed
+    # feed.
+    def baseline_assertions(feed)
+      assert_not_nil(feed, 'Parser returned nil')
+      assert_kind_of(Syndication::Atom::Feed, feed)
+      assert_not_nil(feed.title, 'Feed#title was nil')
+      assert_not_nil(feed.id, 'Feed#id was nil')
+      assert_not_nil(feed.updated, 'Feed#updated was nil')
+      assert_kind_of(DateTime, feed.updated)
+      assert(feed.entries.length > 0, 'No entries in feed')
+      for entry in feed.entries
+        assert_not_nil(entry.title, 'Entry#title was nil')
+        assert_not_nil(entry.id, 'Entry#id was nil')
+        assert(entry.links.length > 0, 'No links in entry')
+        assert_not_nil(entry.links[0], 'Entry#links[0] was nil')
+        assert_not_nil(entry.updated, 'Entry#updated was nil')
+        assert_kind_of(DateTime, entry.updated)
+      end
+    end
+    # Minimal test
+    def test_atom_minimal
+      xml = <<-EOF
+    <?xml version="1.0" encoding="utf-8"?>
+    <feed xmlns="http://www.w3.org/2005/Atom">
+      <title>One good turn usually gets most of the blanket.</title>
+      <updated>2005-08-20T21:14:38Z</updated>
+      <id>urn:uuid:035d3aa3022c1b1b2a17e37ae2dcc376</id>
+      <entry>
+        <title>Quidquid latine dictum sit, altum viditur.</title>
+        <link href="http://example.com/05/08/20/2114.html"/>
+        <id>urn:uuid:89d96d76a99426264f6f1f520c1b93c2</id>
+        <updated>2005-08-20T21:14:38Z</updated>
+      </entry>
+    </feed>
+      EOF
+      f = Syndication::Atom::Parser.new.parse(xml)
+      baseline_assertions(f)
+      assert(f.title.txt == 'One good turn usually gets most of the blanket.')
+      assert(f.updated.strftime('%F %T') == '2005-08-20 21:14:38')
+      assert(f.entries.length == 1, 'Wrong number of entries in feed')
+      assert(f.id == 'urn:uuid:035d3aa3022c1b1b2a17e37ae2dcc376')
+      e = f.entries[0]
+      assert(e.title.txt == 'Quidquid latine dictum sit, altum viditur.')
+      assert(e.links.length == 1, 'Wrong number of links in entry')
+      l = e.links[0]
+      assert(l.href == 'http://example.com/05/08/20/2114.html')
+      assert(e.id == 'urn:uuid:89d96d76a99426264f6f1f520c1b93c2')
+      assert(e.updated.strftime('%F %T') == '2005-08-20 21:14:38')
+    end
+    # Test a well-formed Atom feed with all possible elements
+    def test_atom_wf_full
+      xml = <<-EOF
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <title type="text">It is the quality rather than the quantity that matters.</title>
+  <updated>2005-08-20T21:43:44Z</updated>
+  <id>urn:uuid:dc03a676cc5f04b9f0c728592270c8b7</id>
+  <author>
+    <name>mathew</name>
+    <email>meta@pobox.com</email>
+    <uri>http://www.pobox.com/~meta/</uri>
+  </author>
+  <category term="test"/>
+  <category term="Ruby"/>
+  <contributor>
+    <name>Phil Space</name>
+    <email>space@example.com</email>
+  </contributor>
+  <contributor>
+    <name>Anne Example</name>
+    <email>anne@example.com</email>
+  </contributor>
+  <generator uri="http://example.com/ruby/syndication" version="1.0">
+    Ruby Syndication Library
+  </generator>
+  <icon>http://www.example.com/goatseicon.gif</icon>
+  <link rel="self" type="application/ruby" href="file://atom.rb"/>
+  <logo>http://www.example.com/goatse.jpg</logo>
+  <rights>Copyright (c) meta@pobox.com 2005</rights>
+  <subtitle type="xhtml">
+    <div xmlns="http://www.w3.org/1999/xhtml">
+      <p>This is <b>XHTML</b> content.</p>
+    </div>
+  </subtitle>
+  <entry>
+    <title>Cleanliness is next to impossible.</title>
+    <summary type="xhtml">
+      <xhtml:div xmlns:xhtml="http://www.w3.org/1999/xhtml">
+        This is <xhtml:b>XHTML</xhtml:b> content.
+      </xhtml:div>
+    </summary>
+    <link href="http://example.com/05/08/20/2143.html"/>
+    <id>urn:uuid:380b651e97c2e6ecc68eaa66c90939b6</id>
+    <published>1978-03-12T10:22:11Z</published>
+    <updated>2005-08-20T21:43:44Z</updated>
+    <author>
+      <name>Stu Dapples</name>
+      <email>stu@example.com</email>
+    </author>
+    <category term="fortune"/>
+    <category term="aphorism"/>
+    <content type="text">
+      Cleanliness of code is certainly next to impossible if you have to parse
+      Atom feeds with all their features.
+    </content>
+    <contributor>
+      <name>Ben Dover</name>
+    </contributor>
+    <contributor>
+      <name>Eileen Dover</name>
+    </contributor>
+    <rights>This test entry is in the public domain.</rights>
+  </entry>
+  <entry>
+    <title type="html">&lt;b>WE HAVE TACOS&lt;/b></title>
+    <link href="http://www.pobox.com/~meta/"/>
+    <id>urn:uuid:13be6c856fac98d9a7fd144b61dee06d</id>
+    <updated>2004-12-23T21:22:23-06:00</updated>
+    <source>
+      <author><name>Rick O'Shea</name></author>
+      <category term="example"/>
+      <contributor><name>Hugh Cares</name></contributor>
+      <generator uri="http://www.pobox.com/~meta/" version="1">
+        Typed in by hand by some poor guy.
+      </generator>
+      <icon>http://www.example.com/icon2.png</icon>
+      <id>urn:uuid:1234decafbad7890deadbeef5678304</id>
+      <link rel="alternate" type="text/html"
+        href="http://www.pobox.com/~meta/"/>
+      <logo>http://www.example.com/logo.svg</logo>
+      <rights>Some rights reserved, some not</rights>
+      <title>More example stuff</title>
+      <subtitle>MAKE IT STOP!</subtitle>
+      <updated>2005-08-20T22:11-05:00</updated>
+    </source>
+  </entry>
+</feed>
+      EOF
+      f = Syndication::Atom::Parser.new.parse(xml)
+      baseline_assertions(f)
+      assert(f.categories.length == 2)
+      assert(f.contributors.length == 2)
+      assert(f.contributors[0].name == 'Phil Space', "Feed#contributors name didn't match")
+      assert(f.contributors[1].name == 'Anne Example', "Feed#contributors name didn't match")
+      assert(f.categories[0].term = 'test', "Feed#categories didn't match")
+      assert(f.categories[1].term = 'Ruby', "Feed#categories didn't match")
+      assert(f.title.txt == 'It is the quality rather than the quantity that matters.')
+      assert(f.updated == DateTime.parse('2005-08-20 21:43:44Z'), 'Feed#updated incorrectly parsed')
+      assert(f.author.name == 'mathew')
+      assert(f.author.email == 'meta@pobox.com')
+      assert(f.author.uri == 'http://www.pobox.com/~meta/')
+      assert(f.generator == 'Ruby Syndication Library')
+      assert(f.icon == 'http://www.example.com/goatseicon.gif')
+      assert(f.links.length == 1)
+      assert(f.links[0].rel == 'self')
+      assert(f.links[0].href == 'file://atom.rb')
+      assert(f.links[0].type == 'application/ruby')
+      assert(f.logo == 'http://www.example.com/goatse.jpg')
+      assert(f.rights == 'Copyright (c) meta@pobox.com 2005')
+      assert(f.subtitle.xhtml == '<p>This is <b>XHTML</b> content.</p>')
+      assert(f.entries.length == 2)
+      e1 = f.entries[0]
+      assert(e1.summary.xhtml == 'This is <b>XHTML</b> content.')
+      assert(e1.categories.length == 2)
+      assert(e1.categories[0].term == 'fortune')
+      assert(e1.categories[1].term == 'aphorism')
+      e2 = f.entries[1]
+      assert(e2.title.html == '<b>WE HAVE TACOS</b>')
+      s = e2.source
+      assert(s.kind_of?(Syndication::Atom::Feed))
+      assert(s.title.txt == 'More example stuff')
+      assert(s.updated == DateTime.parse('2005-08-20 22:11:00-0500'))
+    end
+  end
+end