RubyGems - feedtools - Versions diffs - 0.1.0 - Mend

feedtools 0.1.0

Files changed (11) hide show

data/CHANGELOG +4 -0
data/README +13 -0
data/install.rb +30 -0
data/lib/feed_tools.rb +1126 -0
data/lib/feed_tools/vendor/builder.rb +13 -0
data/lib/feed_tools/vendor/builder/blankslate.rb +53 -0
data/lib/feed_tools/vendor/builder/xmlbase.rb +143 -0
data/lib/feed_tools/vendor/builder/xmlevents.rb +63 -0
data/lib/feed_tools/vendor/builder/xmlmarkup.rb +297 -0
data/rakefile +111 -0
metadata +62 -0

data/CHANGELOG ADDED

@@ -0,0 +1,4 @@
+== FeedTools 0.1.0
+ * basic support for rss, atom, cdf
+ * basic caching using active record
+ * support for etags

data/README ADDED

@@ -0,0 +1,13 @@
+ FeedTools was designed to be a simple XML feed parser, generator, and translator with a built-in
+ caching system.
+== Example
+  slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
+  slashdot_feed.title
+  => "Slashdot"
+  slashdot_feed.description
+  => "News for nerds, stuff that matters"
+  slashdot_feed.link
+  => "http://slashdot.org/"
+  slashdot_feed.items.first.find_node("slash:hitparade/text()").to_s
+  => "43,37,28,23,11,3,1"

data/install.rb ADDED

@@ -0,0 +1,30 @@
+require 'rbconfig'
+require 'find'
+require 'ftools'
+include Config
+# this was adapted from rdoc's install.rb by ways of Log4r
+$sitedir = CONFIG["sitelibdir"]
+unless $sitedir
+  version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
+  $libdir = File.join(CONFIG["libdir"], "ruby", version)
+  $sitedir = $:.find {|x| x =~ /site_ruby/ }
+  if !$sitedir
+    $sitedir = File.join($libdir, "site_ruby")
+  elsif $sitedir !~ Regexp.quote(version)
+    $sitedir = File.join($sitedir, version)
+  end
+end
+# the acual gruntwork
+Dir.chdir("lib")
+Find.find("feed_tools", "feed_tools.rb") { |f|
+  if f[-3..-1] == ".rb"
+    File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
+  else
+    File::makedirs(File.join($sitedir, *f.split(/\//)))
+  end
+}

data/lib/feed_tools.rb ADDED

@@ -0,0 +1,1126 @@
+#--
+# Copyright (c) 2005 Robert Aman
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#++
+FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] || ENV['RAILS_ENV'] || 'production'
+$:.unshift(File.dirname(__FILE__))
+$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
+begin
+  require 'active_record'
+rescue LoadError
+  require 'rubygems'
+  require_gem 'activerecord'
+end
+begin
+  require 'rubygems'
+  require 'builder'
+rescue LoadError
+  # RubyGems is not available, use included Builder
+  $:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
+  require 'feed_tools/vendor/builder'
+end
+require 'open-uri'
+require 'time'
+require 'rexml/document'
+require 'yaml'
+require 'cgi'
+module FeedTools
+  class Feed < ActiveRecord::Base
+    include REXML
+    has_many :feed_items_unsorted, :class_name => "FeedItem"
+    def initialize
+      @live = false
+      @feed_items_unsorted = nil
+      super
+    end
+    # Loads the feed specified by the url, pulling the data from the cache if it hasn't expired
+    # Be aware that this method translates from the feed: and rss: pseudo-protocols to the
+    # http: protocol as needed.  This means that if you pass in a feed url that looks like
+    # 'feed://www.anywhere.com/feed.xml' it will end up being stored in the cache as
+    # 'http://www.anywhere.com/feed.xml' instead.  This does affect the usage of methods like
+    # find_by_url, but otherwise should be fairly transparent.
+    def Feed.open(url)
+      # deal with all of the ugly possibilities involved in the rss: and feed: pseudo-protocols
+      if (url =~ /feed:/) == 0
+        url = url.gsub(/feed:\/\/http:\/\/\//, "http://")
+        url = url.gsub(/feed:\/\/http:\/\//, "http://")
+        url = url.gsub(/feed:http:\/\/\//, "http://")
+        url = url.gsub(/feed:http:\/\//, "http://")
+        url = url.gsub(/feed:\/\/\//, "http://")
+        url = url.gsub(/feed:\/\//, "http://")
+        url = url.gsub(/feed:\//, "http://")
+        url = url.gsub(/feed:/, "http://")
+      end
+      if (url =~ /rss:/) == 0
+        url = url.gsub(/rss:\/\/http:\/\/\//, "http://")
+        url = url.gsub(/rss:\/\/http:\/\//, "http://")
+        url = url.gsub(/rss:http:\/\/\//, "http://")
+        url = url.gsub(/rss:http:\/\//, "http://")
+        url = url.gsub(/rss:\/\/\//, "http://")
+        url = url.gsub(/rss:\/\//, "http://")
+        url = url.gsub(/rss:\//, "http://")
+        url = url.gsub(/rss:/, "http://")
+      end
+      feed = nil
+      begin
+        feed = Feed.find_by_url(url)
+      rescue ActiveRecord::StatementInvalid
+        # make sure that the necessary tables are present and recover if possible
+        FeedTools::Feed.prepare_connection
+        unless FeedTools::Feed.cache_exists?
+          FeedTools::Feed.create_cache
+        end
+        feed = Feed.find_by_url(url)
+      end
+      unless feed.nil?
+        feed.update_if_needed
+      else
+        feed = Feed.new
+        feed.url = url
+        feed.load_remote_feed
+      end
+      return feed
+    end
+    # Checks if the feed has expired and updates if it has
+    def update_if_needed
+      if expired?
+        load_remote_feed
+      end
+    end
+    # Verifies that the table structure exists
+    def Feed.cache_exists?
+      return Feed.table_exists? && FeedItem.table_exists?
+    end
+    # Verifies that the required fields exist; additional ones added by the user are fine
+    def Feed.table_exists?
+      begin
+        connection.execute "select id, url, link, image_link, title, description, " +
+          "tags, last_updated, etag, time_to_live from feeds limit 1"
+      rescue ActiveRecord::StatementInvalid
+        return false
+      end
+      return true
+    end
+    # Generates the table structure if necessary
+    def Feed.create_cache
+      unless Feed.cache_exists?
+        feed_items_mysql = <<-SQL_END
+          CREATE TABLE `feed_items` (
+            `id`            int(6) unsigned NOT NULL auto_increment,
+            `feed_id`       int(6) unsigned NOT NULL default '0',
+            `link`          varchar(255) default NULL,
+            `title`         varchar(255) default NULL,
+            `author`        varchar(255) default NULL,
+            `description`   text default NULL,
+            `time`          datetime NOT NULL default '0000-00-00 00:00:00',
+            `tags`          varchar(255) default NULL,
+            PRIMARY KEY  (`id`)
+          ) ENGINE=MyISAM DEFAULT CHARSET=latin1;
+        SQL_END
+        feed_items_sqlite = <<-SQL_END
+          CREATE TABLE 'feed_items' (
+            'id'            INTEGER PRIMARY KEY NOT NULL,
+            'feed_id'       INTEGER NOT NULL,
+            'link'          VARCHAR(255) DEFAULT NULL,
+            'title'         VARCHAR(255) DEFAULT NULL,
+            'author'        VARCHAR(255) DEFAULT NULL,
+            'description'   TEXT DEFAULT NULL,
+            'time'          DATETIME DEFAULT NULL,
+            'tags'          VARCHAR(255) DEFAULT NULL
+          );
+        SQL_END
+        feed_items_psql = <<-SQL_END
+          CREATE TABLE feed_items (
+            id            SERIAL PRIMARY KEY NOT NULL,
+            feed_id       int REFERENCES feeds,
+            link          varchar(255) default NULL,
+            title         varchar(255) default NULL,
+            author        varchar(255) default NULL,
+            description   text default NULL,
+            time          datetime default NULL,
+            tags          varchar(255) default NULL
+          );
+        SQL_END
+        unless FeedItem.table_exists?
+          table_creation_sql = nil
+          if configurations["adapter"] == "mysql"
+            table_creation_sql = feed_items_mysql
+          elsif configurations["adapter"] == "sqlite"
+            table_creation_sql = feed_items_sqlite
+          elsif configurations["adapter"] == "postgresql"
+            table_creation_sql = feeds_psql
+          end
+          if table_creation_sql.nil?
+            raise "Could not build feed_items table."
+          else
+            connection.execute table_creation_sql
+          end
+        end
+        feeds_mysql = <<-SQL_END
+          CREATE TABLE `feeds` (
+            `id`            int(6) unsigned NOT NULL auto_increment,
+            `url`           varchar(255) NOT NULL default '',
+            `link`          varchar(255) NOT NULL default '',
+            `image_link`    varchar(255) default NULL,
+            `title`         varchar(255) default NULL,
+            `description`   text default NULL,
+            `tags`          varchar(255) default NULL,
+            `last_updated`  datetime default NULL,
+            `etag`          varchar(255) default NULL,
+            `time_to_live`  int(4) default NULL,
+            PRIMARY KEY  (`id`)
+          ) ENGINE=MyISAM DEFAULT CHARSET=latin1;
+        SQL_END
+        feeds_sqlite = <<-SQL_END
+          CREATE TABLE 'feeds' (
+            'id'            INTEGER PRIMARY KEY NOT NULL,
+            'url'           VARCHAR(255) DEFAULT NULL,
+            'link'          VARCHAR(255) DEFAULT NULL,
+            'image_link'    VARCHAR(255) DEFAULT NULL,
+            'title'         VARCHAR(255) DEFAULT NULL,
+            'description'   TEXT DEFAULT NULL,
+            'tags'          VARCHAR(255) DEFAULT NULL,
+            'last_updated'  DATETIME DEFAULT NULL,
+            'etag'          VARCHAR(255) DEFAULT NULL,
+            'time_to_live'  INTEGER DEFAULT NULL
+          );
+        SQL_END
+        feeds_psql = <<-SQL_END
+          CREATE TABLE feeds (
+            id            SERIAL PRIMARY KEY NOT NULL,
+            url           varchar(255) default NULL,
+            link          varchar(255) default NULL,
+            image_link    varchar(255) default NULL,
+            title         varchar(255) default NULL,
+            description   text default NULL,
+            tags          varchar(255) default NULL,
+            last_updated  datetime default NULL,
+            etag          varchar(255) default NULL,
+            time_to_live  int default NULL
+          );
+        SQL_END
+        unless Feed.table_exists?
+          table_creation_sql = nil
+          if configurations["adapter"] == "mysql"
+            table_creation_sql = feeds_mysql
+          elsif configurations["adapter"] == "sqlite"
+            table_creation_sql = feeds_sqlite
+          elsif configurations["adapter"] == "postgresql"
+            table_creation_sql = feeds_psql
+          end
+          if table_creation_sql.nil?
+            raise "Could not build feed_items table."
+          else
+            connection.execute table_creation_sql
+          end
+        end
+      end
+    end
+    # Removes all feed entries from the cache
+    # This could obviously be a very dangerous operation if you use the cache for more than simply
+    # caching the feeds.
+    def Feed.clear_cache
+      FeedItem.delete_all
+      Feed.delete_all
+    end
+    # Removes all feed items from the cache and resets the last updated time for all feeds
+    # This is probably much safer than the clear_cache method
+    def Feed.expire_cache
+      FeedItem.delete_all
+      Feed.update_all("last_updated = NULL")
+    end
+    # Removes all feed items older than the specified number of seconds
+    def Feed.purge_cache(purge_time=1.week)
+      purge_date = (Time.now - purge_time).strftime("%Y-%m-%d %H:%M:%S")
+      FeedItem.delete_all("time < '#{purge_date}'")
+    end
+    # If ActiveRecord is not already connected, attempts to find a configuration file and use
+    # it to open a connection for ActiveRecord.
+    # This method is probably unnecessary for anything but testing and debugging purposes.
+    def Feed.prepare_connection
+      begin
+        ActiveRecord::Base.connection
+      rescue
+        possible_config_files = [
+          "./config/database.yml",
+          "./database.yml"
+        ]
+        database_config_file = nil
+        for file in possible_config_files
+          if File.exists? file
+            database_config_file = file
+            break
+          end
+        end
+        database_config_hash = File.open(database_config_file) do |file|
+          config_hash = YAML::load(file)
+          unless config_hash[FEED_TOOLS_ENV].nil?
+            config_hash = config_hash[FEED_TOOLS_ENV]
+          end
+          config_hash
+        end
+        ActiveRecord::Base.configurations = database_config_hash
+        ActiveRecord::Base.establish_connection(database_config_hash)
+        ActiveRecord::Base.connection
+      end
+    end
+    def Feed.cache_enabled?
+      return true
+    end
+    def title
+      return (self["title"] or "Untitled Feed")
+    end
+    # Optional feed attribute.
+    # If you want to use it, the database table needs to have a language field added, otherwise
+    # it will just default to "en-US".
+    def language
+      begin
+        return (self["language"] or "en-US")
+      rescue
+        return "en-US"
+      end
+    end
+    def live?
+      if @live
+        return true
+      else
+        return false
+      end
+    end
+    def expired?
+      return last_updated == nil || (last_updated + time_to_live) < Time.now
+    end
+    # Forces this feed to expire.
+    def expire
+      FeedItem.delete_all("feed_id = '#{self.id}'")
+      @feed_items_unsorted = nil
+      self.last_updated = Time.mktime(1980)
+      self.save
+    end
+    # The ammount of time in seconds between the last time the feed was updated and the next
+    # valid time to retrieve a remote feed.
+    def time_to_live
+      return self['time_to_live'].nil? ? 1.hour : self['time_to_live'].hour
+    end
+    def tag_list
+      return tags.nil? ? nil : tags[1..-2].split("|")
+    end
+    def tag_list=(new_tag_list)
+      self.tags = "|" + (new_tag_list.map { |x| x.strip }).join("|") + "|"
+    end
+    def tag_string
+      return (tags.nil? ? nil : tags[1..-2]).split("|").join(", ")
+    end
+    def tag_string=(new_tag_string)
+      self.tags = "|" + (new_tag_string.split(",").map { |x| x.strip }).join("|") + "|"
+    end
+    # Returns a list of the feed_items, sorted by date
+    def feed_items
+      begin
+        if @feed_items_unsorted.nil?
+          @feed_items_unsorted = feed_items_unsorted
+        end
+        return @feed_items_unsorted.sort do |a,b|
+          b.time <=> a.time
+        end
+      rescue
+        unless @feed_items_unsorted.nil?
+          return @feed_items_unsorted
+        else
+          return feed_items_unsorted
+        end
+      end
+    end
+    # Attempts to load the feed from the remote location.  Requires the url to be set.
+    # If an etag has been set, attempts to use it to prevent unnecessary reloading of identical
+    # content.
+    def load_remote_feed
+      @live = true
+      self.last_updated = Time.now
+      if (etag != nil)
+        # TODO: verify that the etag code works as intended
+        # -> may need to check what gets returned when the
+        # etag is matched
+        # =================================================
+        open(url, "If-None-Match" => @etag ) do |http|
+          etag = http.meta['etag']
+          parse_feed(http.read)
+        end
+      else
+        open(url) do |http|
+          etag = http.meta['etag']
+          parse_feed(http.read)
+        end
+      end
+    end
+    def parse_feed_hook(feed_data)
+      return nil
+    end
+    def parse_feed(feed_data)
+      root_node = Document.new(feed_data).root
+      metadata_node = XPath.first(root_node, "channel")
+      if metadata_node == nil
+        metadata_node = root_node
+      end
+      # get the feed title
+      title = XPath.first(metadata_node, "title/text()").to_s
+      # is the title escaped?
+      if XPath.first(metadata_node, "title/@mode").to_s == "escaped"
+        title = CGI.unescapeHTML(title)
+      end
+      # get the feed link
+      link = XPath.first(metadata_node, "link[@rel='alternate' @type='text/html']/@href").to_s
+      if link == ""
+        link = XPath.first(metadata_node, "link[@rel='alternate']/@href").to_s
+      end
+      if link == ""
+        link = XPath.first(metadata_node, "link/@href").to_s
+      end
+      if link == ""
+        link = XPath.first(metadata_node, "link/text()").to_s
+      end
+      if link == ""
+        # The ordering here is somewhat incorrect, but the more correct ordering would
+        # introduce much more serious problems, so I've chosen to go with the lesser of two
+        # evils.  (The completely correct implementation would require a vestigial 'base' method
+        # on the Feed class to fully support CDF files.  This method will support almost all CDF
+        # files without any unnecessary methods.)  But given that this only exists to support
+        # CDF files, it's not a big deal.  It's not like CDF files really exist in the wild.
+        # (The assumption this ordering makes is that the 'base' attribute points to a valid
+        # location, hopefully the same as the 'href' location.  Chances are pretty good that this
+        # is true.)
+        link = XPath.first(metadata_node, "@base").to_s
+      end
+      if link == ""
+        link = XPath.first(metadata_node, "@href").to_s
+      end
+      # get the feed description
+      description = XPath.first(metadata_node, "description/text()").to_s
+      if description != ""
+        if XPath.first(metadata_node, "description/@encoding").to_s != ""
+          description = "[Embedded data objects are not supported.]"
+        else
+          description = CGI.unescapeHTML(description)
+        end
+      end
+      if description == ""
+        description = XPath.first(metadata_node, "tagline/text()").to_s
+        if description != "" && XPath.first(metadata_node, "tagline/@mode").to_s == "escaped"
+          description = CGI.unescapeHTML(description)
+        end
+      end
+      if description == "" && XPath.first(metadata_node, "tagline") == nil
+        description = XPath.first(metadata_node, "info/text()").to_s
+        if description != "" && XPath.first(metadata_node, "info/@mode").to_s == "escaped"
+          description = CGI.unescapeHTML(description)
+        end
+      end
+      if description == ""
+        description = CGI.unescapeHTML(XPath.first(metadata_node, "abstract/text()").to_s)
+      end
+      # get the image link
+      image_link = XPath.first(metadata_node, "image/url/text()").to_s
+      if image_link == ""
+        image_link = XPath.first(metadata_node, "image/@rdf:resource").to_s
+      end
+      if image_link == ""
+        image_link = XPath.first(metadata_node, "link[@type='image/jpeg']/@href").to_s
+      end
+      if image_link == ""
+        image_link = XPath.first(metadata_node, "link[@type='image/gif']/@href").to_s
+      end
+      if image_link == ""
+        image_link = XPath.first(metadata_node, "link[@type='image/png']/@href").to_s
+      end
+      if image_link == ""
+        image_link = XPath.first(metadata_node, "logo[@style='image']/@href").to_s
+      end
+      if image_link == ""
+        image_link = XPath.first(metadata_node, "logo/@href").to_s
+      end
+      # get the feed time to live (expressed in hours)
+      feed_time_to_live = nil
+      update_frequency = XPath.first(metadata_node, "syn:updateFrequency/text()").to_s
+      if update_frequency != ""
+        update_period = XPath.first(metadata_node, "syn:updatePeriod/text()").to_s
+        if update_period == "daily"
+          feed_time_to_live = update_frequency.to_i * 24
+        elsif update_period == "weekly"
+          feed_time_to_live = update_frequency.to_i * 24 * 7
+        elsif update_period == "monthly"
+          feed_time_to_live = update_frequency.to_i * 24 * 30
+        elsif update_period == "yearly"
+          feed_time_to_live = update_frequency.to_i * 24 * 365
+        else
+          # hourly
+          feed_time_to_live = update_frequency.to_i
+        end
+      end
+      if feed_time_to_live == nil
+        # expressed in minutes
+        update_frequency = XPath.first(metadata_node, "ttl/text()").to_s
+        if update_frequency != ""
+          feed_time_to_live = (update_frequency.to_i / 60)
+        end
+      end
+      # TODO: handle time_to_live for CDF files
+      # =======================================
+      # get the feed items
+      items = XPath.match(root_node, "item")
+      if items == nil || items == []
+        items = XPath.match(metadata_node, "item")
+      end
+      if items == nil || items == []
+        items = XPath.match(metadata_node, "entry")
+      end
+      # set all of the properties
+      if title != ""
+        self.title = title
+      else
+        self.title = nil
+      end
+      if link != ""
+        self.link = link
+      else
+        self.link = nil
+      end
+      if description != ""
+        self.description = description
+      else
+        self.description = nil
+      end
+      if image_link != ""
+        self.image_link = image_link
+      else
+        self.image_link = nil
+      end
+      if feed_time_to_live != nil
+        self.time_to_live = feed_time_to_live
+      else
+        self.time_to_live = nil
+      end
+      parse_feed_hook(feed_data)
+      if Feed.cache_enabled?
+        save
+      end
+      # check and make sure we don't have any cached feed_items with a nil link
+      # if we do, we need to start from scratch to avoid duplicates
+      for item_link in feed_items.map { |item| item.link }
+        if item_link.nil?
+          FeedItem.delete_all("feed_id = '#{self.id}'")
+          break
+        end
+      end
+      # parse the feed items
+      @feed_items_unsorted = []
+      if items != nil
+        for item_node in items
+          @feed_items_unsorted << handle_feed_item(item_node.to_s)
+        end
+      end
+      return self
+    end
+    # Locates the feed item in the database based on the supplied item xml data.
+    def find_feed_item_by_data(item_data)
+      item_node = Document.new(item_data).root
+      # get the link
+      item_link = XPath.first(item_node, "link[@rel='alternate']/@href").to_s
+      if item_link == ""
+        item_link = XPath.first(item_node, "link/@href").to_s
+      end
+      if item_link == ""
+        item_link = XPath.first(item_node, "link/text()").to_s
+      end
+      if item_link == ""
+        item_link = XPath.first(item_node, "@rdf:about").to_s
+      end
+      if item_link == ""
+        item_link = XPath.first(item_node, "guid/text()").to_s
+      end
+      item_title = XPath.first(item_node, "title/text()").to_s
+      feed_item = FeedItem.find_by_feed_id_and_link(self.id, item_link)
+      unless feed_item.nil?
+        # Some blogging tools alter the title of an item when the number of comments change (for
+        # example, TextPattern) and many email feed dumps use the same link for multiple
+        # items (for example, GMail).  We try to take both of these cases into account here.
+        existing_title = feed_item.title
+        item_title = item_title.gsub(/\[\d*\]/,"").strip
+        existing_title = existing_title.gsub(/\[\d*\]/,"").strip
+        item_title = item_title.gsub(/\(\d*\)/,"").strip
+        existing_title = existing_title.gsub(/\(\d*\)/,"").strip
+        item_title = item_title.gsub(/\{\d*\}/,"").strip
+        existing_title = existing_title.gsub(/\{\d*\}/,"").strip
+        if existing_title != item_title
+          feed_item = nil
+        end
+      end
+      return feed_item
+    end
+    def handle_feed_item(item_data)
+      feed_item = find_feed_item_by_data(item_data)
+      if feed_item.nil?
+        feed_item = FeedItem.new
+      end
+      feed_item.feed = self
+      feed_item.parse_item(item_data)
+      return feed_item
+    end
+    def build_feed_hook(feed_type, version, xml_builder)
+      return nil
+    end
+    def build_feed(feed_type, version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
+      if feed_type == "rss" && version == 0.0
+        version = 1.0
+      elsif feed_type == "atom" && version == 0.0
+        version = 0.3
+      end
+      if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
+        # RDF-based rss format
+        return xml_builder.tag!("rdf:RDF") do
+          xml_builder.channel("rdf:about" => CGI.escapeHTML(link)) do
+            unless title.nil? || title == ""
+              xml_builder.title(title)
+            else
+              xml_builder.title
+            end
+            unless link.nil? || link == ""
+              xml_builder.link(link)
+            else
+              xml_builder.link
+            end
+            unless image_link.nil? || image_link == ""
+              xml_builder.image("rdf:resource" => CGI.escapeHTML(image_link))
+            end
+            unless description.nil? || description == ""
+              xml_builder.description(description)
+            else
+              xml_builder.description
+            end
+            unless language.nil? || language == ""
+              xml_builder.tag!("dc:language", language)
+            end
+            xml_builder.tag!("syn:updatePeriod", "hourly")
+            xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
+            xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
+            xml_builder.items do
+              xml_builder.tag!("rdf:Seq") do
+                unless feed_items.nil?
+                  for item in feed_items
+                    if item.link.nil?
+                      raise "Cannot generate an rdf-based feed with a nil item link field."
+                    end
+                    xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
+                  end
+                end
+              end
+            end
+            build_feed_hook(feed_type, version, xml_builder)
+          end
+          unless image_link.nil? || image_link == ""
+            xml_builder.image("rdf:about" => CGI.escapeHTML(image_link)) do
+              unless title.nil? || title == ""
+                xml_builder.title(title)
+              else
+                xml_builder.title
+              end
+              unless image_link.nil? || image_link == ""
+                xml_builder.url(image_link)
+              end
+              unless link.nil? || link == ""
+                xml_builder.link(link)
+              else
+                xml_builder.link
+              end
+            end
+          end
+          unless feed_items.nil?
+            for item in feed_items
+              item.build_feed_item(feed_type, version, xml_builder)
+            end
+          end
+        end
+      elsif feed_type == "rss"
+        # normal rss format
+        return xml_builder.rss("version" => version.to_s) do
+          unless title.nil? || title == ""
+            xml_builder.title(title)
+          end
+          unless link.nil? || link == ""
+            xml_builder.link(link)
+          end
+          unless description.nil? || description == ""
+            xml_builder.description(description)
+          end
+          xml_builder.ttl((time_to_live / 1.minute).to_s)
+          xml_builder.generator("http://www.sporkmonger.com/projects/feedtools")
+          build_feed_hook(feed_type, version, xml_builder)
+          unless feed_items.nil?
+            for item in feed_items
+              item.build_feed_item(feed_type, version, xml_builder)
+            end
+          end
+        end
+      elsif feed_type == "atom"
+        # normal atom format
+        return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
+            "version" => version.to_s,
+            "xml:lang" => language) do
+          unless title.nil? || title == ""
+            xml_builder.title(title,
+                "mode" => "escaped",
+                "type" => "text/html")
+          end
+          unless link.nil? || link == ""
+            xml_builder.link("href" => link,
+                "rel" => "alternate",
+                "type" => "text/html",
+                "title" => title)
+          end
+          unless description.nil? || description == ""
+            xml_builder.tagline(description,
+                "mode" => "escaped",
+                "type" => "text/html")
+          end
+          xml_builder.generator("FeedTools",
+              "url" => "http://www.sporkmonger.com/projects/feedtools")
+          build_feed_hook(feed_type, version, xml_builder)
+          unless feed_items.nil?
+            for item in feed_items
+              item.build_feed_item(feed_type, version, xml_builder)
+            end
+          end
+        end
+      end
+    end
+    # Saves the current state of the feed to the database unless the feed lacks a remote location
+    def save
+      unless url.nil? || url == ""
+        super
+      end
+    end
+  end
+  class FeedItem < ActiveRecord::Base
+    include REXML
+    # Verifies that the required fields exist; additional ones added by the user are fine
+    def FeedItem.table_exists?
+      begin
+        connection.execute "select id, feed_id, link, title, author, description, " +
+          "time, tags from feed_items limit 1"
+      rescue ActiveRecord::StatementInvalid
+        return false
+      end
+      return true
+    end
+    def feed
+      if @feed != nil
+        return @feed
+      elsif @feed_id != nil
+        @feed = Feed.find_by_id(self.feed_id)
+        return @feed
+      else
+        return nil
+      end
+    end
+    def feed=(new_feed)
+      self.feed_id = new_feed.id
+      @feed = new_feed
+    end
+    def title
+      return (self['title'] or "Untitled Entry")
+    end
+    def tag_list
+      return tags.nil? ? nil : tags[1..-2].split("|")
+    end
+    def tag_list=(new_tag_list)
+      self.tags = "|" + (new_tag_list.map { |x| x.strip }).join("|") + "|"
+    end
+    def tag_string
+      return (tags.nil? ? nil : tags[1..-2]).split("|").join(", ")
+    end
+    def tag_string=(new_tag_string)
+      self.tags = "|" + (new_tag_string.split(",").map { |x| x.strip }).join("|") + "|"
+    end
+    def parse_feed_item_hook(item_data)
+      return nil
+    end
+    def parse_item(item_data)
+      item_node = Document.new(item_data).root
+      # get the feed base, in case the feed items use relative paths
+      base = feed.link
+      # get the link
+      link = XPath.first(item_node, "link[@rel='alternate']/@href").to_s
+      if link == ""
+        link = XPath.first(item_node, "link/@href").to_s
+      end
+      if link == ""
+        link = XPath.first(item_node, "link/text()").to_s
+      end
+      if link == ""
+        link = XPath.first(item_node, "@rdf:about").to_s
+      end
+      if link == ""
+        link = XPath.first(item_node, "guid/text()").to_s
+      end
+      if link != ""
+        link = CGI.unescapeHTML(link)
+      end
+      if link != "" && (link =~ /http:\/\//) != 0 && (link =~ /https:\/\//) != 0
+        # ensure that we don't end up with 'http://www.foobar.com//path/to/entry'
+        # future-proofed this so that it doesn't break when Ruby 1.9/2.0 starts
+        # returning single character Strings instead of FixNums
+        if (base[-1] == 47 && link[0] == 47) || (base[-1] == "/" && link[0] == "/")
+          link = link[1..-1]
+        end
+        # prepend the base to the link since they seem to have used a relative path
+        link = base + link
+      end
+      title = XPath.first(item_node, "title/text()").to_s
+      if title != ""
+        # some blogging tools (notably TextPattern I believe) include the number of
+        # comments in a post in the title... this is ugly, so we're gonna strip them out
+        title = title.gsub(/\[\d*\]/,"").strip
+      end
+      # get the item author
+      author = CGI.unescapeHTML(XPath.first(item_node, "author/name/text()").to_s)
+      if author == ""
+        author = CGI.unescapeHTML(XPath.first(item_node, "dc:creator/text()").to_s)
+      end
+      if author == ""
+        author = CGI.unescapeHTML(XPath.first(item_node, "author/text()").to_s)
+      end
+      # get the item content
+      description = ""
+      body = XPath.first(item_node, "xhtml:body")
+      if body == nil
+        body = XPath.first(item_node, "body")
+      end
+      if body != nil
+        description = body.inner_xml
+      end
+      if description == ""
+        description = CGI.unescapeHTML(XPath.first(item_node, "content:encoded/text()").to_s)
+      end
+      if description == ""
+        description = XPath.first(item_node, "description/text()").to_s
+        if description != ""
+          if XPath.first(item_node, "description/@encoding").to_s != ""
+            description = "[Embedded data objects are not supported.]"
+          else
+            description = CGI.unescapeHTML(description)
+          end
+        end
+      end
+      if description == ""
+        description = XPath.first(item_node,"content/text()").to_s
+        if description != "" && XPath.first(item_node, "content/@mode").to_s == "escaped"
+          description = CGI.unescapeHTML(description)
+        end
+      end
+      # get the item time
+      time = XPath.first(item_node, "pubDate/text()").to_s
+      if time == ""
+        time = XPath.first(item_node, "dc:date/text()").to_s
+      end
+      if time == ""
+        time = XPath.first(item_node, "issued/text()").to_s
+      end
+      # get the item tags
+      tags_array = []
+      if tags_array == nil || tags_array.size == 0
+        tags_array = []
+        tag_list = XPath.match(item_node, "dc:subject/rdf:Bag/rdf:li/text()")
+        if tag_list.size > 1
+          for tag in tag_list
+            tags_array << tag.to_s.downcase.strip
+          end
+        end
+      end
+      if tags_array == nil || tags_array.size == 0
+        tags_array = []
+        tag_list = XPath.match(item_node, "category/text()")
+        if tag_list.size > 1
+          for tag in tag_list
+            tags_array << tag.to_s.downcase.strip
+          end
+        end
+      end
+      if tags_array == nil || tags_array.size == 0
+        tags_array = []
+        tag_list = XPath.match(item_node, "dc:subject/text()")
+        if tag_list.size > 1
+          for tag in tag_list
+            tags_array << tag.to_s.downcase.strip
+          end
+        end
+      end
+      if tags_array == nil || tags_array.size == 0
+        tags_array = XPath.first(item_node,
+                           "category/text()").to_s.downcase.split(" ")
+      end
+      if tags_array == nil || tags_array.size == 0
+        begin
+          tags_array = XPath.first(item_node,
+                             "dc:subject/text()").to_s.downcase.split(" ")
+        rescue
+          tags_array = []
+        end
+      end
+      if tags_array == nil || tags_array.size == 0
+        tags_array = []
+        rdf_bag = XPath.match(item_node,
+                              "taxo:topics/rdf:Bag/rdf:li")
+        if rdf_bag != nil && rdf_bag.size > 0
+          for tag_node in rdf_bag
+            begin
+              tag_url = XPath.first(tag_node, "@resource").to_s
+              tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
+              if tag_match.size > 0
+                tags_array << tag_match.first.last.downcase.strip
+              end
+            rescue
+            end
+          end
+        end
+      end
+      # set all of the properties
+      if link != ""
+        self.link = link
+      else
+        self.link = nil
+      end
+      if title != ""
+        self.title = title
+      end
+      if description != ""
+        self.description = description.strip
+      end
+      if time != ""
+        self.time = Time.parse(time) rescue Time.now
+      elsif @time == nil
+        self.time = Time.now
+      end
+      if tags_array.size > 0
+        self.tag_list = tags_array
+      end
+      parse_feed_item_hook(item_data)
+      if Feed.cache_enabled?
+        save
+      end
+      return self
+    end
+    def build_feed_item_hook(feed_type, version, xml_builder)
+    end
+    def build_feed_item(feed_type, version, xml_builder=Builder::XmlMarkup.new(:indent => 2))
+      if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
+        # RDF-based rss format
+        if link.nil?
+          raise "Cannot generate an rdf-based feed item with a nil link field."
+        end
+        return xml_builder.item("rdf:about" => CGI.escapeHTML(link)) do
+          unless title.nil? || title == ""
+            xml_builder.title(title)
+          else
+            xml_builder.title
+          end
+          unless link.nil? || link == ""
+            xml_builder.link(link)
+          else
+            xml_builder.link
+          end
+          unless description.nil? || description == ""
+            xml_builder.description(description)
+          else
+            xml_builder.description
+          end
+          unless time.nil?
+            xml_builder.tag!("dc:date", time.iso8601)
+          end
+          unless tags.nil?
+            xml_builder.tag!("dc:subject") do
+              xml_builder.tag!("rdf:Bag") do
+                for tag in tag_list
+                  xml_builder.tag!("rdf:li", tag)
+                end
+              end
+            end
+          end
+        end
+      elsif feed_type == "rss"
+        # normal rss format
+        return xml_builder.item do
+          unless title.nil? || title == ""
+            xml_builder.title(title)
+          end
+          unless link.nil? || link == ""
+            xml_builder.link(link)
+          end
+          unless description.nil? || description == ""
+            xml_builder.description(description)
+          end
+          unless time.nil?
+            xml_builder.pubDate(time.rfc822)
+          end
+          unless tags.nil?
+            xml_builder.tag!("dc:subject") do
+              xml_builder.tag!("rdf:Bag") do
+                for tag in tag_list
+                  xml_builder.tag!("rdf:li", tag)
+                end
+              end
+            end
+          end
+        end
+      elsif feed_type == "atom"
+        # normal atom format
+        return xml_builder.entry("xmlns" => "http://purl.org/atom/ns#") do
+          unless title.nil? || title == ""
+            xml_builder.title(title,
+                "mode" => "escaped",
+                "type" => "text/html")
+          end
+          unless link.nil? || link == ""
+            xml_builder.link("href" => link,
+                "rel" => "alternate",
+                "type" => "text/html",
+                "title" => title)
+          end
+          unless description.nil? || description == ""
+            xml_builder.content(description,
+                "mode" => "escaped",
+                "type" => "text/html")
+          end
+          unless time.nil?
+            xml_builder.issued(time.iso8601)
+          end
+          unless tags.nil?
+            for tag in tag_list
+              xml_builder.category(tag)
+            end
+          end
+        end
+      end
+    end
+    # Saves the current state of the feed item to the database unless the feed lacks
+    # a remote location
+    def save
+      unless feed.nil? || feed.url.nil? || feed.url == ""
+        super
+      end
+    end
+  end
+end
+module REXML
+  class Element
+    # small extension to REXML to simplify parsing of xhtml feed items
+    def inner_xml
+      result = ""
+      each_child do |child|
+        result << child.to_s
+      end
+      return result
+    end
+  end
+end
+begin
+  FeedTools::Feed.prepare_connection
+  unless FeedTools::Feed.cache_exists?
+    FeedTools::Feed.create_cache
+  end
+rescue
+  # Nothing can be done until someone sets up the database connection.
+  # We'll just assume for now that the user will take care of that.
+end