RubyGems - feedtools - Versions diffs - 0.1.0 → 0.2.0 - Mend

feedtools 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

data/CHANGELOG +11 -0
data/lib/feed_tools.rb +2496 -810
data/lib/feed_tools/vendor/builder.rb +2 -0
data/lib/feed_tools/vendor/builder/blankslate.rb +2 -0
data/lib/feed_tools/vendor/builder/xmlbase.rb +2 -1
data/lib/feed_tools/vendor/builder/xmlevents.rb +2 -0
data/lib/feed_tools/vendor/builder/xmlmarkup.rb +4 -2
data/lib/feed_tools/vendor/htree.rb +97 -0
data/lib/feed_tools/vendor/htree/container.rb +10 -0
data/lib/feed_tools/vendor/htree/context.rb +67 -0
data/lib/feed_tools/vendor/htree/display.rb +27 -0
data/lib/feed_tools/vendor/htree/doc.rb +149 -0
data/lib/feed_tools/vendor/htree/elem.rb +262 -0
data/lib/feed_tools/vendor/htree/encoder.rb +163 -0
data/lib/feed_tools/vendor/htree/equality.rb +218 -0
data/lib/feed_tools/vendor/htree/extract_text.rb +37 -0
data/lib/feed_tools/vendor/htree/fstr.rb +33 -0
data/lib/feed_tools/vendor/htree/gencode.rb +97 -0
data/lib/feed_tools/vendor/htree/htmlinfo.rb +672 -0
data/lib/feed_tools/vendor/htree/inspect.rb +108 -0
data/lib/feed_tools/vendor/htree/leaf.rb +94 -0
data/lib/feed_tools/vendor/htree/loc.rb +367 -0
data/lib/feed_tools/vendor/htree/modules.rb +48 -0
data/lib/feed_tools/vendor/htree/name.rb +124 -0
data/lib/feed_tools/vendor/htree/output.rb +207 -0
data/lib/feed_tools/vendor/htree/parse.rb +407 -0
data/lib/feed_tools/vendor/htree/raw_string.rb +124 -0
data/lib/feed_tools/vendor/htree/regexp-util.rb +15 -0
data/lib/feed_tools/vendor/htree/rexml.rb +130 -0
data/lib/feed_tools/vendor/htree/scan.rb +166 -0
data/lib/feed_tools/vendor/htree/tag.rb +111 -0
data/lib/feed_tools/vendor/htree/template.rb +909 -0
data/lib/feed_tools/vendor/htree/text.rb +115 -0
data/lib/feed_tools/vendor/htree/traverse.rb +465 -0
data/rakefile +1 -1
data/test/rss_test.rb +97 -0
metadata +30 -1

data/CHANGELOG CHANGED

@@ -1,3 +1,14 @@
+== FeedTools 0.2.0
+ * more complete support for rss, atom, cdf
+ * modular caching mechanism
+ * lazy parsing
+ * HTML sanitization of possibly dangerous fields
+ * HTML tidy support
+ * support for podcasts and vidlogs
+ * corrected handling of http redirection
+ * made http header information available
+ * file: protocol support
+ * custom parsing can be done using the find_node and find_all_nodes methods
 == FeedTools 0.1.0
  * basic support for rss, atom, cdf
  * basic caching using active record

data/lib/feed_tools.rb CHANGED

@@ -21,987 +21,2669 @@
 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #++
-FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] || ENV['RAILS_ENV'] || 'production'
+FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
+                 ENV['RAILS_ENV'] ||
+                 'production' # :nodoc:
+FEED_TOOLS_VERSION = "0.2.0"
 $:.unshift(File.dirname(__FILE__))
 $:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
+$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
+require 'rubygems'
+require 'active_record'
 begin
-  require 'active_record'
+  require 'builder'
 rescue LoadError
-  require 'rubygems'
-  require_gem 'activerecord'
+  # RubyGems version is not available, use included Builder
+  require 'feed_tools/vendor/builder'
 end
 begin
-  require 'rubygems'
-  require 'builder'
+  require 'tidy'
 rescue LoadError
-  # RubyGems is not available, use included Builder
-  $:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
-  require 'feed_tools/vendor/builder'
+  # Ignore the error for now.
 end
-require 'open-uri'
-require 'time'
+require 'feed_tools/vendor/htree'
+require 'net/http'
+require 'net/https'
+require 'net/ftp'
 require 'rexml/document'
-require 'yaml'
+require 'iconv'
+require 'uri'
+require 'time'
 require 'cgi'
+require 'pp'
+require 'yaml'
+#= feed_tools.rb
+#
+# FeedTools was designed to be a simple XML feed parser, generator, and translator with a built-in
+# caching system.
+#
+#== Example
+#  slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
+#  slashdot_feed.title
+#  => "Slashdot"
+#  slashdot_feed.description
+#  => "News for nerds, stuff that matters"
+#  slashdot_feed.link
+#  => "http://slashdot.org/"
+#  slashdot_feed.items.first.find_node("slash:hitparade/text()").to_s
+#  => "43,37,28,23,11,3,1"
 module FeedTools
-  class Feed < ActiveRecord::Base
-    include REXML
-    has_many :feed_items_unsorted, :class_name => "FeedItem"
-    def initialize
-      @live = false
-      @feed_items_unsorted = nil
-      super
-    end
-    # Loads the feed specified by the url, pulling the data from the cache if it hasn't expired
-    # Be aware that this method translates from the feed: and rss: pseudo-protocols to the
-    # http: protocol as needed.  This means that if you pass in a feed url that looks like
-    # 'feed://www.anywhere.com/feed.xml' it will end up being stored in the cache as
-    # 'http://www.anywhere.com/feed.xml' instead.  This does affect the usage of methods like
-    # find_by_url, but otherwise should be fairly transparent.
-    def Feed.open(url)
-      # deal with all of the ugly possibilities involved in the rss: and feed: pseudo-protocols
-      if (url =~ /feed:/) == 0
-        url = url.gsub(/feed:\/\/http:\/\/\//, "http://")
-        url = url.gsub(/feed:\/\/http:\/\//, "http://")
-        url = url.gsub(/feed:http:\/\/\//, "http://")
-        url = url.gsub(/feed:http:\/\//, "http://")
-        url = url.gsub(/feed:\/\/\//, "http://")
-        url = url.gsub(/feed:\/\//, "http://")
-        url = url.gsub(/feed:\//, "http://")
-        url = url.gsub(/feed:/, "http://")
-      end
-      if (url =~ /rss:/) == 0
-        url = url.gsub(/rss:\/\/http:\/\/\//, "http://")
-        url = url.gsub(/rss:\/\/http:\/\//, "http://")
-        url = url.gsub(/rss:http:\/\/\//, "http://")
-        url = url.gsub(/rss:http:\/\//, "http://")
-        url = url.gsub(/rss:\/\/\//, "http://")
-        url = url.gsub(/rss:\/\//, "http://")
-        url = url.gsub(/rss:\//, "http://")
-        url = url.gsub(/rss:/, "http://")
-      end
-      feed = nil
+  # The default caching mechanism for the FeedTools module
+  class DatabaseFeedCache < ActiveRecord::Base
+    # Overrides the default table name to use the "feeds" table.
+    def self.table_name() "feeds" end
+    # If ActiveRecord is not already connected, attempts to find a configuration file and use
+    # it to open a connection for ActiveRecord.
+    # This method is probably unnecessary for anything but testing and debugging purposes.
+    # In a Rails environment, the connection will already have been established
+    # and this method will simply do nothing.
+    #
+    # This method should not raise any exceptions because it's designed to be run only when
+    # the module is first loaded.  If it fails, the user should get an exception when they
+    # try to perform some action that makes use of the caching functionality, and not until.
+    def DatabaseFeedCache.initialize_cache
+      # Establish a connection if we don't already have one
       begin
-        feed = Feed.find_by_url(url)
-      rescue ActiveRecord::StatementInvalid
-        # make sure that the necessary tables are present and recover if possible
-        FeedTools::Feed.prepare_connection
-        unless FeedTools::Feed.cache_exists?
-          FeedTools::Feed.create_cache
+        ActiveRecord::Base.connection
+      rescue
+        begin
+          possible_config_files = [
+            "./config/database.yml",
+            "../database.yml",
+            "./database.yml"
+          ]
+          database_config_file = nil
+          for file in possible_config_files
+            if File.exists? file
+              database_config_file = file
+              break
+            end
+          end
+          database_config_hash = File.open(database_config_file) do |file|
+            config_hash = YAML::load(file)
+            unless config_hash[FEED_TOOLS_ENV].nil?
+              config_hash = config_hash[FEED_TOOLS_ENV]
+            end
+            config_hash
+          end
+          ActiveRecord::Base.configurations = database_config_hash
+          ActiveRecord::Base.establish_connection(database_config_hash)
+          ActiveRecord::Base.connection
+        rescue
         end
-        feed = Feed.find_by_url(url)
       end
-      unless feed.nil?
-        feed.update_if_needed
-      else
-        feed = Feed.new
-        feed.url = url
-        feed.load_remote_feed
+      # Verify that the necessary database tables are in place
+      # and if they're missing, create them
+      unless DatabaseFeedCache.table_exists?
+        DatabaseFeedCache.create_table
       end
-      return feed
-    end
-    # Checks if the feed has expired and updates if it has
-    def update_if_needed
-      if expired?
-        load_remote_feed
-      end
-    end
-    # Verifies that the table structure exists
-    def Feed.cache_exists?
-      return Feed.table_exists? && FeedItem.table_exists?
+      return nil
     end
-    # Verifies that the required fields exist; additional ones added by the user are fine
-    def Feed.table_exists?
+    # True if the appropriate database table already exists
+    def DatabaseFeedCache.table_exists?
       begin
-        connection.execute "select id, url, link, image_link, title, description, " +
-          "tags, last_updated, etag, time_to_live from feeds limit 1"
+        ActiveRecord::Base.connection.execute "select id, url, title, " +
+          "link, xml_data, http_headers, last_retrieved " +
+          "from feeds limit 1"
       rescue ActiveRecord::StatementInvalid
         return false
+      rescue
+        return false
       end
       return true
     end
-    # Generates the table structure if necessary
-    def Feed.create_cache
-      unless Feed.cache_exists?
-        feed_items_mysql = <<-SQL_END
-          CREATE TABLE `feed_items` (
-            `id`            int(6) unsigned NOT NULL auto_increment,
-            `feed_id`       int(6) unsigned NOT NULL default '0',
-            `link`          varchar(255) default NULL,
-            `title`         varchar(255) default NULL,
-            `author`        varchar(255) default NULL,
-            `description`   text default NULL,
-            `time`          datetime NOT NULL default '0000-00-00 00:00:00',
-            `tags`          varchar(255) default NULL,
-            PRIMARY KEY  (`id`)
-          ) ENGINE=MyISAM DEFAULT CHARSET=latin1;
-        SQL_END
-        feed_items_sqlite = <<-SQL_END
-          CREATE TABLE 'feed_items' (
-            'id'            INTEGER PRIMARY KEY NOT NULL,
-            'feed_id'       INTEGER NOT NULL,
-            'link'          VARCHAR(255) DEFAULT NULL,
-            'title'         VARCHAR(255) DEFAULT NULL,
-            'author'        VARCHAR(255) DEFAULT NULL,
-            'description'   TEXT DEFAULT NULL,
-            'time'          DATETIME DEFAULT NULL,
-            'tags'          VARCHAR(255) DEFAULT NULL
-          );
-        SQL_END
-        feed_items_psql = <<-SQL_END
-          CREATE TABLE feed_items (
-            id            SERIAL PRIMARY KEY NOT NULL,
-            feed_id       int REFERENCES feeds,
-            link          varchar(255) default NULL,
-            title         varchar(255) default NULL,
-            author        varchar(255) default NULL,
-            description   text default NULL,
-            time          datetime default NULL,
-            tags          varchar(255) default NULL
-          );
-        SQL_END
-        unless FeedItem.table_exists?
-          table_creation_sql = nil
-          if configurations["adapter"] == "mysql"
-            table_creation_sql = feed_items_mysql
-          elsif configurations["adapter"] == "sqlite"
-            table_creation_sql = feed_items_sqlite
-          elsif configurations["adapter"] == "postgresql"
-            table_creation_sql = feeds_psql
-          end
-          if table_creation_sql.nil?
-            raise "Could not build feed_items table."
-          else
-            connection.execute table_creation_sql
-          end
-        end
+    # Creates the appropriate database table
+    def DatabaseFeedCache.create_table
+      unless DatabaseFeedCache.table_exists?
         feeds_mysql = <<-SQL_END
           CREATE TABLE `feeds` (
-            `id`            int(6) unsigned NOT NULL auto_increment,
-            `url`           varchar(255) NOT NULL default '',
-            `link`          varchar(255) NOT NULL default '',
-            `image_link`    varchar(255) default NULL,
-            `title`         varchar(255) default NULL,
-            `description`   text default NULL,
-            `tags`          varchar(255) default NULL,
-            `last_updated`  datetime default NULL,
-            `etag`          varchar(255) default NULL,
-            `time_to_live`  int(4) default NULL,
+            `id`              int(10) unsigned NOT NULL auto_increment,
+            `url`             varchar(255) default NULL,
+            `title`           varchar(255) default NULL,
+            `link`            varchar(255) default NULL,
+            `xml_data`        longtext default NULL,
+            `http_headers`    text default NULL,
+            `last_retrieved`  datetime default NULL,
             PRIMARY KEY  (`id`)
           ) ENGINE=MyISAM DEFAULT CHARSET=latin1;
         SQL_END
         feeds_sqlite = <<-SQL_END
           CREATE TABLE 'feeds' (
-            'id'            INTEGER PRIMARY KEY NOT NULL,
-            'url'           VARCHAR(255) DEFAULT NULL,
-            'link'          VARCHAR(255) DEFAULT NULL,
-            'image_link'    VARCHAR(255) DEFAULT NULL,
-            'title'         VARCHAR(255) DEFAULT NULL,
-            'description'   TEXT DEFAULT NULL,
-            'tags'          VARCHAR(255) DEFAULT NULL,
-            'last_updated'  DATETIME DEFAULT NULL,
-            'etag'          VARCHAR(255) DEFAULT NULL,
-            'time_to_live'  INTEGER DEFAULT NULL
+            'id'              INTEGER PRIMARY KEY NOT NULL,
+            'url'             VARCHAR(255) DEFAULT NULL,
+            'title'           VARCHAR(255) DEFAULT NULL,
+            'link'            VARCHAR(255) DEFAULT NULL,
+            'image_link'      VARCHAR(255) DEFAULT NULL,
+            'xml_data'        TEXT DEFAULT NULL,
+            'http_headers'    TEXT DEFAULT NULL,
+            'last_retrieved'  DATETIME DEFAULT NULL,
           );
         SQL_END
         feeds_psql = <<-SQL_END
           CREATE TABLE feeds (
-            id            SERIAL PRIMARY KEY NOT NULL,
-            url           varchar(255) default NULL,
-            link          varchar(255) default NULL,
-            image_link    varchar(255) default NULL,
-            title         varchar(255) default NULL,
-            description   text default NULL,
-            tags          varchar(255) default NULL,
-            last_updated  datetime default NULL,
-            etag          varchar(255) default NULL,
-            time_to_live  int default NULL
+            id                SERIAL PRIMARY KEY NOT NULL,
+            url               varchar(255) default NULL,
+            title             varchar(255) default NULL,
+            link              varchar(255) default NULL,
+            xml_data          text default NULL,
+            http_headers      text default NULL,
+            last_retrieved    datetime default NULL,
           );
         SQL_END
-        unless Feed.table_exists?
-          table_creation_sql = nil
-          if configurations["adapter"] == "mysql"
-            table_creation_sql = feeds_mysql
-          elsif configurations["adapter"] == "sqlite"
-            table_creation_sql = feeds_sqlite
-          elsif configurations["adapter"] == "postgresql"
-            table_creation_sql = feeds_psql
-          end
-          if table_creation_sql.nil?
-            raise "Could not build feed_items table."
-          else
-            connection.execute table_creation_sql
-          end
+        table_creation_sql = nil
+        if configurations["adapter"] == "mysql"
+          table_creation_sql = feeds_mysql
+        elsif configurations["adapter"] == "sqlite"
+          table_creation_sql = feeds_sqlite
+        elsif configurations["adapter"] == "postgresql"
+          table_creation_sql = feeds_psql
+        end
+        if table_creation_sql.nil?
+          raise "Could not build feed_items table."
+        else
+          connection.execute table_creation_sql
         end
       end
     end
-    # Removes all feed entries from the cache
-    # This could obviously be a very dangerous operation if you use the cache for more than simply
-    # caching the feeds.
-    def Feed.clear_cache
-      FeedItem.delete_all
-      Feed.delete_all
+  end
+  # Error raised when a feed cannot be retrieved
+  class FeedAccessError < StandardError
+  end
+  # Quick method of enabling small classes to have their attributes
+  # accessible as a dictionary.
+  module AttributeDictionary # :nodoc:
+    # Access the attributes as a dictionary
+    def [](key)
+      # Assignment, and destructive methods should not be
+      # accessed like this.
+      return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
+      return nil unless self.method(key).arity == 0
+      return self.send(key)
     end
-    # Removes all feed items from the cache and resets the last updated time for all feeds
-    # This is probably much safer than the clear_cache method
-    def Feed.expire_cache
-      FeedItem.delete_all
-      Feed.update_all("last_updated = NULL")
+    # Access the attributes as a dictionary
+    def []=(key, value)
+      # Assignment, and destructive methods should not be
+      # accessed like this.
+      return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
+      return nil unless self.method(key + "=").arity == 1
+      return self.send(key + "=", value)
     end
-    # Removes all feed items older than the specified number of seconds
-    def Feed.purge_cache(purge_time=1.week)
-      purge_date = (Time.now - purge_time).strftime("%Y-%m-%d %H:%M:%S")
-      FeedItem.delete_all("time < '#{purge_date}'")
+  end
+  @feed_cache = DatabaseFeedCache
+  @user_agent = "FeedTools/#{FEED_TOOLS_VERSION} " +
+    "+http://www.sporkmonger.com/projects/feedtools/"
+  # Returns the current caching mechanism.
+  def FeedTools.feed_cache
+    return @feed_cache
+  end
+  # Sets the current caching mechanism.  If set to nil, disables caching.
+  # Default is the DatabaseFeedCache class.
+  #
+  # Objects of this class must accept the following messages:
+  #  url
+  #  url=
+  #  title
+  #  title=
+  #  link
+  #  link=
+  #  xml_data
+  #  xml_data=
+  #  etag
+  #  etag=
+  #  last_modified
+  #  last_modified=
+  #  save
+  #
+  # Additionally, the class itself must accept the following messages:
+  #  find_by_id
+  #  find_by_url
+  #  initialize_cache
+  def FeedTools.feed_cache=(new_feed_cache)
+    # TODO: ensure that the feed cache class actually does those things.
+    # ==================================================================
+    @feed_cache = new_feed_cache
+  end
+  # Returns the currently used user agent string.
+  def FeedTools.user_agent
+    return @user_agent
+  end
+  # Sets the user agent string to send in the http headers.
+  def FeedTools.user_agent=(new_user_agent)
+    @user_agent = new_user_agent
+  end
+  # Returns true if the html tidy module can be used.
+  #
+  # Obviously, you need the tidy gem installed in order to run with html
+  # tidy features turned on.
+  #
+  # This method does a fairly complicated, and probably unnecessarily
+  # desperate search for the libtidy library.  If you want this thing to
+  # execute fast, the best thing to do is to set Tidy.path ahead of time.
+  # If Tidy.path is set, this method doesn't do much.  If it's not set,
+  # it will do it's darnedest to find the libtidy library.  If you set
+  # the LIBTIDYPATH environment variable to the libtidy library, it should
+  # be able to find it.
+  #
+  # Once the library is located, this method will run much faster.
+  def FeedTools.tidy_enabled?
+    # This is an override variable to keep tidy from being used even if it
+    # is available.
+    if @force_tidy_enabled == false
+      return false
     end
-    # If ActiveRecord is not already connected, attempts to find a configuration file and use
-    # it to open a connection for ActiveRecord.
-    # This method is probably unnecessary for anything but testing and debugging purposes.
-    def Feed.prepare_connection
+    if @tidy_enabled.nil? || @tidy_enabled == false
+      @tidy_enabled = false
       begin
-        ActiveRecord::Base.connection
-      rescue
-        possible_config_files = [
-          "./config/database.yml",
-          "./database.yml"
-        ]
-        database_config_file = nil
-        for file in possible_config_files
-          if File.exists? file
-            database_config_file = file
-            break
-          end
-        end
-        database_config_hash = File.open(database_config_file) do |file|
-          config_hash = YAML::load(file)
-          unless config_hash[FEED_TOOLS_ENV].nil?
-            config_hash = config_hash[FEED_TOOLS_ENV]
-          end
-          config_hash
-        end
-        ActiveRecord::Base.configurations = database_config_hash
-        ActiveRecord::Base.establish_connection(database_config_hash)
-        ActiveRecord::Base.connection
+        require 'tidy'
+        if Tidy.path.nil?
+          # *Shrug*, just brute force it, I guess.  There's a lot of places
+          # this thing might be hiding in, depending on platform and general
+          # sanity of the person who installed the thing.  Most of these are
+          # probably unlikely, but it's not like checking unlikely locations
+          # hurts.  Much.  Especially if you actually find it.
+          libtidy_locations = [
+            '/usr/local/lib/libtidy.dylib',
+            '/opt/local/lib/libtidy.dylib',
+            '/usr/lib/libtidy.dylib',
+            '/usr/local/lib/tidylib.dylib',
+            '/opt/local/lib/tidylib.dylib',
+            '/usr/lib/tidylib.dylib',
+            '/usr/local/lib/tidy.dylib',
+            '/opt/local/lib/tidy.dylib',
+            '/usr/lib/tidy.dylib',
+            '/usr/local/lib/libtidy.so',
+            '/opt/local/lib/libtidy.so',
+            '/usr/lib/libtidy.so',
+            '/usr/local/lib/tidylib.so',
+            '/opt/local/lib/tidylib.so',
+            '/usr/lib/tidylib.so',
+            '/usr/local/lib/tidy.so',
+            '/opt/local/lib/tidy.so',
+            '/usr/lib/tidy.so',
+            'C:\Program Files\Tidy\tidy.dll',
+            'C:\Tidy\tidy.dll',
+            '/usr/local/lib',
+            '/opt/local/lib',
+            '/usr/lib'
+          ]
+          # We just made this thing up, but if someone sets it, we'll
+          # go ahead and check it
+          unless ENV['LIBTIDYPATH'].nil?
+            libtidy_locations =
+              libtidy_locations.reverse.push(ENV['LIBTIDYPATH'])
+          end
+          for path in libtidy_locations
+            if File.exists? path
+              if File.ftype(path) == "file"
+                Tidy.path = path
+                @tidy_enabled = true
+                break
+              elsif File.ftype(path) == "directory"
+                # Ok, now perhaps we're getting a bit more desperate
+                lib_paths =
+                  `find #{path} -name '*tidy*' | grep '\\.\\(so\\|dylib\\)$'`
+                # If there's more than one, grab the first one and
+                # hope for the best, and if it doesn't work, then blame the
+                # user for not specifying more accurately.
+                tidy_path = lib_paths.split("\n").first
+                unless tidy_path.nil?
+                  Tidy.path = tidy_path
+                  @tidy_enabled = true
+                  break
+                end
+              end
+            end
+          end
+          # Still couldn't find it.
+          unless @tidy_enabled
+            @tidy_enabled = false
+          end
+        else
+          @tidy_enabled = true
+        end
+      rescue LoadError
+        # Tidy not installed, disable features that rely on tidy.
+        @tidy_enabled = false
       end
     end
-    def Feed.cache_enabled?
-      return true
+    return @tidy_enabled
+  end
+  # Turns html tidy support on or off.  Be aware, that setting this to true
+  # does not mean tidy will be enabled.  It simply means that tidy will be
+  # enabled if it is available to be enabled.
+  def FeedTools.tidy_enabled=(new_tidy_enabled)
+    @force_tidy_enabled = new_tidy_enabled
+  end
+  # Attempts to ensures that the passed url is valid and sane.  Accepts very, very ugly urls
+  # and makes every effort to figure out what it was supposed to be.  Also translates from
+  # the feed: and rss: pseudo-protocols to the http: protocol.
+  def FeedTools.normalize_url(url)
+    if url.nil? || url == ""
+      return nil
     end
+    normalized_url = url
-    def title
-      return (self["title"] or "Untitled Feed")
+    # if a url begins with the '/' character, it only makes sense that they
+    # meant to be using a file:// url.  Fix it for them.
+    if normalized_url.length > 0 && normalized_url[0..0] == "/"
+      normalized_url = "file://" + normalized_url
     end
-    # Optional feed attribute.
-    # If you want to use it, the database table needs to have a language field added, otherwise
-    # it will just default to "en-US".
-    def language
-      begin
-        return (self["language"] or "en-US")
-      rescue
-        return "en-US"
-      end
+    # if a url begins with javascript:, it's quite possibly an attempt at
+    # doing something malicious.  Let's keep that from getting anywhere,
+    # shall we?
+    if (normalized_url.downcase =~ /javascript:/) != nil
+      return "#"
     end
-    def live?
-      if @live
-        return true
-      else
-        return false
-      end
-    end
+    # deal with all of the many ugly possibilities involved in the rss:
+    # and feed: pseudo-protocols (incidentally, whose crazy idea was this
+    # mess?)
+    normalized_url.gsub!(/^http:\/*(feed:\/*)?/, "http://")
+    normalized_url.gsub!(/^http:\/*(rss:\/*)?/, "http://")
+    normalized_url.gsub!(/^feed:\/*(http:\/*)?/, "http://")
+    normalized_url.gsub!(/^rss:\/*(http:\/*)?/, "http://")
+    normalized_url.gsub!(/^file:\/*/, "file:///")
+    normalized_url.gsub!(/^https:\/*/, "https://")
+    # fix (very) bad urls (usually of the user-entered sort)
+    normalized_url.gsub!(/^http:\/*(http:\/*)*/, "http://")
+    if (normalized_url =~ /^file:/) == 0
+      # fix bad Windows-based entries
+      normalized_url.gsub!(/file:\/\/\/([a-zA-Z]):/, 'file:///\1|')
-    def expired?
-      return last_updated == nil || (last_updated + time_to_live) < Time.now
+      # maybe this is too aggressive?
+      normalized_url.gsub!(/\\/, '/')
+      return normalized_url
+    else
+      if (normalized_url =~ /https?:\/\//) == nil
+        normalized_url = "http://" + normalized_url
+      end
+      if normalized_url == "http://"
+        return nil
+      end
+      begin
+        feed_uri = URI.parse(normalized_url)
+        if feed_uri.scheme == nil
+          feed_uri.scheme = "http"
+        end
+        if feed_uri.path == nil || feed_uri.path == ""
+          feed_uri.path = "/"
+        end
+        if (feed_uri.path =~ /^[\/]+/) == 0
+          feed_uri.path.gsub!(/^[\/]+/, "/")
+        end
+        return feed_uri.to_s
+      rescue URI::InvalidURIError
+        return normalized_url
+      end
     end
+  end
-    # Forces this feed to expire.
-    def expire
-      FeedItem.delete_all("feed_id = '#{self.id}'")
-      @feed_items_unsorted = nil
-      self.last_updated = Time.mktime(1980)
-      self.save
+  # Returns true if the parameter appears to be a valid url
+  def FeedTools.is_url?(url)
+    return false if url.nil?
+    begin
+      uri = URI.parse(url)
+    rescue URI::InvalidURIError
+      return false
     end
+    return true
+  end
-    # The ammount of time in seconds between the last time the feed was updated and the next
-    # valid time to retrieve a remote feed.
-    def time_to_live
-      return self['time_to_live'].nil? ? 1.hour : self['time_to_live'].hour
-    end
+  # Removes all html tags from the html formatted text.
+  def FeedTools.strip_html(html)
+    # TODO: do this properly
+    # ======================
+    stripped_html = html.gsub(/<\/?[^>]+>/, "")
+    return stripped_html
+  end
-    def tag_list
-      return tags.nil? ? nil : tags[1..-2].split("|")
+  # Tidys up the html
+  def FeedTools.tidy_html(html)
+    if FeedTools.tidy_enabled?
+      is_fragment = true
+      if (html.strip =~ /<html>(.|\n)*<body>/) != nil ||
+          (html.strip =~ /<\/body>(.|\n)*<\/html>$/) != nil
+        is_fragment = false
+      end
+      if (html.strip =~ /<\?xml(.|\n)*\?>/) != nil
+        is_fragment = false
+      end
+      tidy_html = Tidy.open(:show_warnings=>false) do |tidy|
+        tidy.options.output_xml = true
+        tidy.options.indent = false
+        tidy.options.wrap_attributes = true
+        tidy.options.logical_emphasis = true
+        tidy.options.doctype = "omit"
+        xml = tidy.clean(html)
+        xml
+      end
+      if is_fragment
+        # Tidy puts <html>...<body>[our html]</body>...</html> in.
+        # We don't want this.
+        tidy_html.strip!
+        tidy_html.gsub!(/^<html>(.|\n)*<body>/, "")
+        tidy_html.gsub!(/<\/body>(.|\n)*<\/html>$/, "")
+        tidy_html.strip!
+      end
+    else
+      tidy_html = html
     end
+    return tidy_html
+  end
-    def tag_list=(new_tag_list)
-      self.tags = "|" + (new_tag_list.map { |x| x.strip }).join("|") + "|"
-    end
+  # Removes all dangerous html tags from the html formatted text.
+  # If mode is set to :escape, dangerous and unknown elements will
+  # be escaped.  If mode is set to :strip, dangerous and unknown
+  # elements and all children will be removed entirely.
+  # Dangerous or unknown attributes are always removed.
+  def FeedTools.sanitize_html(html, mode=:escape)
+    # Lists borrowed from Mark Pilgrim's feedparser
+    acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
+      'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
+      'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl',
+      'dt', 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4',
+      'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend',
+      'li', 'map', 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's',
+      'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
+      'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt',
+      'u', 'ul', 'var']
-    def tag_string
-      return (tags.nil? ? nil : tags[1..-2]).split("|").join(", ")
-    end
-    def tag_string=(new_tag_string)
-      self.tags = "|" + (new_tag_string.split(",").map { |x| x.strip }).join("|") + "|"
-    end
+    acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
+      'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
+      'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
+      'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
+      'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
+      'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
+      'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
+      'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
+      'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
+      'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
+      'type', 'usemap', 'valign', 'value', 'vspace', 'width']
-    # Returns a list of the feed_items, sorted by date
-    def feed_items
-      begin
-        if @feed_items_unsorted.nil?
-          @feed_items_unsorted = feed_items_unsorted
-        end
-        return @feed_items_unsorted.sort do |a,b|
-          b.time <=> a.time
-        end
-      rescue
-        unless @feed_items_unsorted.nil?
-          return @feed_items_unsorted
-        else
-          return feed_items_unsorted
+    # Stupid hack to pass this unit test:
+    # http://feedparser.org/tests/wellformed/rss/
+    #   item_description_not_a_doctype.xml
+    html.gsub!(/<!'/, "&lt;!'")
+    # The closer we are to proper xhtml, the more accurate the
+    # sanitization will be.
+    html = FeedTools.tidy_html(html)
+    # Hackity hack.  But it works, and it seems plenty fast enough.
+    html_doc = HTree.parse_xml("<root>" + html + "</root>").to_rexml
+    sanitize_node = lambda do |html_node|
+      if html_node.respond_to? :children
+        for child in html_node.children
+          if child.kind_of? REXML::Element
+            unless acceptable_elements.include? child.name
+              if mode == :strip
+                html_node.delete_element(child)
+              else
+                new_child = REXML::Text.new(CGI.escapeHTML(child.to_s))
+                html_node.insert_after(child, new_child)
+                html_node.delete_element(child)
+              end
+            end
+            for attribute in child.attributes.keys
+              unless acceptable_attributes.include? attribute
+                child.delete_attribute(attribute)
+              end
+            end
+          end
+          sanitize_node.call(child)
         end
       end
+      html_node
     end
+    sanitize_node.call(html_doc.root)
+    return html_doc.root.inner_xml
+  end
+  class Feed
+    include REXML
+    include AttributeDictionary
-    # Attempts to load the feed from the remote location.  Requires the url to be set.
-    # If an etag has been set, attempts to use it to prevent unnecessary reloading of identical
-    # content.
-    def load_remote_feed
-      @live = true
-      self.last_updated = Time.now
-      if (etag != nil)
-        # TODO: verify that the etag code works as intended
-        # -> may need to check what gets returned when the
-        # etag is matched
-        # =================================================
-        open(url, "If-None-Match" => @etag ) do |http|
-          etag = http.meta['etag']
-          parse_feed(http.read)
-        end
+    # Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
+    def Feed.open(url)
+      # clean up the url
+      url = FeedTools.normalize_url(url)
+      # create and load the new feed
+      feed = Feed.new
+      feed.url = url
+      feed.update
+      return feed
+    end
+    # Loads the feed from the remote url if the feed has expired from the cache or cannot be
+    # retrieved from the cache for some reason.
+    def update
+      if self.http_headers.nil? && !(self.cache_object.nil?) &&
+          !(self.cache_object.http_headers.nil?)
+        @http_headers = YAML.load(self.cache_object.http_headers)
+      end
+      if expired?
+        load_remote_feed
       else
-        open(url) do |http|
-          etag = http.meta['etag']
-          parse_feed(http.read)
-        end
+        @live = false
       end
     end
-    def parse_feed_hook(feed_data)
-      return nil
-    end
-    def parse_feed(feed_data)
-      root_node = Document.new(feed_data).root
-      metadata_node = XPath.first(root_node, "channel")
-      if metadata_node == nil
-        metadata_node = root_node
-      end
-      # get the feed title
-      title = XPath.first(metadata_node, "title/text()").to_s
+    # Attempts to load the feed from the remote location.  Requires the url
+    # field to be set.  If an etag or the last_modified date has been set,
+    # attempts to use them to prevent unnecessary reloading of identical
+    # content.
+    def load_remote_feed
+      @live = true
+      if self.http_headers.nil? && !(self.cache_object.nil?) &&
+          !(self.cache_object.http_headers.nil?)
+        @http_headers = YAML.load(self.cache_object.http_headers)
+      end
-      # is the title escaped?
-      if XPath.first(metadata_node, "title/@mode").to_s == "escaped"
-        title = CGI.unescapeHTML(title)
+      if (self.url =~ /^feed:/) == 0
+        # Woah, Nelly, how'd that happen?  You should've already been
+        # corrected.  So let's fix that url.  And please,
+        # just use less crappy browsers instead of badly defined
+        # pseudo-protocol hacks.
+        self.url = FeedTools.normalize_url(self.url)
       end
-      # get the feed link
-      link = XPath.first(metadata_node, "link[@rel='alternate' @type='text/html']/@href").to_s
-      if link == ""
-        link = XPath.first(metadata_node, "link[@rel='alternate']/@href").to_s
-      end
-      if link == ""
-        link = XPath.first(metadata_node, "link/@href").to_s
-      end
-      if link == ""
-        link = XPath.first(metadata_node, "link/text()").to_s
-      end
-      if link == ""
-        # The ordering here is somewhat incorrect, but the more correct ordering would
-        # introduce much more serious problems, so I've chosen to go with the lesser of two
-        # evils.  (The completely correct implementation would require a vestigial 'base' method
-        # on the Feed class to fully support CDF files.  This method will support almost all CDF
-        # files without any unnecessary methods.)  But given that this only exists to support
-        # CDF files, it's not a big deal.  It's not like CDF files really exist in the wild.
-        # (The assumption this ordering makes is that the 'base' attribute points to a valid
-        # location, hopefully the same as the 'href' location.  Chances are pretty good that this
-        # is true.)
-        link = XPath.first(metadata_node, "@base").to_s
-      end
-      if link == ""
-        link = XPath.first(metadata_node, "@href").to_s
+      # Find out what method we're going to be using to obtain this feed.
+      uri = URI.parse(self.url)
+      retrieval_method = "http"
+      case uri.scheme
+      when "http"
+        retrieval_method = "http"
+      when "ftp"
+        retrieval_method = "ftp"
+      when "file"
+        retrieval_method = "file"
+      when nil
+        raise FeedAccessError,
+          "No protocol was specified in the url."
+      else
+        raise FeedAccessError,
+          "Cannot retrieve feed using unrecognized protocol: " + uri.scheme
       end
-      # get the feed description
-      description = XPath.first(metadata_node, "description/text()").to_s
-      if description != ""
-        if XPath.first(metadata_node, "description/@encoding").to_s != ""
-          description = "[Embedded data objects are not supported.]"
-        else
-          description = CGI.unescapeHTML(description)
+      # No need for http headers unless we're actually doing http
+      if retrieval_method == "http"
+        # Set up the appropriate http headers
+        headers = {}
+        unless self.http_headers.nil?
+          headers["If-None-Match"] =
+            self.http_headers['etag'] unless self.http_headers['etag'].nil?
+          headers["If-Modified-Since"] =
+            self.http_headers['last-modified'] unless
+            self.http_headers['last-modified'].nil?
+        end
+        headers["User-Agent"] =
+          FeedTools.user_agent unless FeedTools.user_agent.nil?
+        # The http feed access method
+        def http_fetch(feed_url, http_headers, redirect_limit = 10,
+            response_chain = []) # :nodoc:
+          raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
+          feed_uri = nil
+          begin
+            feed_uri = URI.parse(feed_url)
+          rescue URI::InvalidURIError
+            # Uh, maybe try to fix it?
+            feed_uri = URI.parse(FeedTools.normalize_url(feed_url))
+          end
+          # Borrowed from open-uri:
+          # According to RFC2616 14.23, Host: request-header field should be
+          # set to an origin server.
+          # But net/http wrongly set a proxy server if an absolute URI is
+          # specified as a request URI.
+          # So override it here explicitly.
+          http_headers['Host'] = feed_uri.host
+          http_headers['Host'] += ":#{feed_uri.port}" if feed_uri.port
+          Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
+            response = http.request_get(feed_uri.path, http_headers)
+            case response
+            when Net::HTTPSuccess
+              # We've reached the final destination, process all previous
+              # redirections, and see if we need to update the url.
+              for redirected_response in response_chain
+                if redirected_response.last.code.to_i == 301
+                  self.url = redirected_response.first
+                else
+                  # Jump out as soon as we hit anything that isn't a
+                  # permanently moved redirection.
+                  break
+                end
+              end
+              return response
+            when Net::HTTPRedirection
+              if response.code.to_i == 304
+                response.error!
+              else
+                if response['Location'].nil?
+                  raise FeedAccessError,
+                    "No location to redirect to supplied: " + response.code
+                end
+                response_chain << [feed_url, response]
+                new_location = response['location']
+                if response_chain.assoc(new_location) != nil
+                  raise FeedAccessError, "Redirection loop detected."
+                end
+                # TODO: deal with stupid people using relative urls
+                # in Location header
+                # =================================================
+                http_fetch(new_location, http_headers,
+                  redirect_limit - 1, response_chain)
+              end
+            else
+              response.error!
+            end
+          end
+        end
+        begin
+          @http_response = http_fetch(self.url, headers)
+          @http_headers = {}
+          self.http_response.each_header do |header|
+            self.http_headers[header.first.downcase] = header.last
+          end
+          self.last_retrieved = Time.now
+          self.xml_data = self.http_response.body
+        rescue FeedAccessError
+          @live = false
+          if self.xml_data.nil?
+            raise
+          end
+        rescue Timeout::Error
+          # if we time out, do nothing, it should fall back to the xml_data
+          # stored in the cache.
+          @live = false
+          if self.xml_data.nil?
+            raise
+          end
+        rescue Errno::ECONNRESET
+          # if the connection gets reset by peer, oh well, fall back to the
+          # xml_data stored in the cache
+          @live = false
+          if self.xml_data.nil?
+            raise
+          end
+        rescue => error
+          # heck, if anything at all bad happens, fall back to the xml_data
+          # stored in the cache.
+          # If we can, get the HTTPResponse...
+          @http_response = nil
+          if error.respond_to?(:each_header)
+            @http_response = error
+          end
+          if error.respond_to?(:response) &&
+              error.response.respond_to?(:each_header)
+            @http_response = error.response
+          end
+          if @http_response != nil
+            @http_headers = {}
+            self.http_response.each_header do |header|
+              self.http_headers[header.first] = header.last
+            end
+            if self.http_response.code.to_i == 304
+              self.last_retrieved = Time.now
+            end
+          end
+          @live = false
+          if self.xml_data.nil?
+            raise
+          end
+        end
+      elsif retrieval_method == "https"
+        # Not supported... yet
+      elsif retrieval_method == "ftp"
+        # Not supported... yet
+        # Technically, CDF feeds are supposed to be able to be accessed directly
+        # from an ftp server.  This is silly, but we'll humor Microsoft.
+        #
+        # Eventually.
+      elsif retrieval_method == "file"
+        # Now that we've gone to all that trouble to ensure the url begins
+        # with 'file://', strip the 'file://' off the front of the url.
+        file_name = self.url.gsub(/^file:\/\//, "")
+        begin
+          open(file_name) do |file|
+            @http_response = nil
+            @http_headers = {}
+            self.last_retrieved = Time.now
+            self.xml_data = file.read
+          end
+        rescue
+          @live = false
+          # In this case, pulling from the cache is probably not going
+          # to help at all, and the use should probably be immediately
+          # appraised of the problem.  Raise the exception.
+          raise
         end
       end
-      if description == ""
-        description = XPath.first(metadata_node, "tagline/text()").to_s
-        if description != "" && XPath.first(metadata_node, "tagline/@mode").to_s == "escaped"
-          description = CGI.unescapeHTML(description)
+      unless self.cache_object.nil?
+        begin
+          self.save
+        rescue
         end
       end
-      if description == "" && XPath.first(metadata_node, "tagline") == nil
-        description = XPath.first(metadata_node, "info/text()").to_s
-        if description != "" && XPath.first(metadata_node, "info/@mode").to_s == "escaped"
-          description = CGI.unescapeHTML(description)
+    end
+    # Returns the relevant information from an http request.
+    def http_response
+      return @http_response
+    end
+    # Returns a hash of the http headers from the response.
+    def http_headers
+      return @http_headers
+    end
+    # Returns the feed's raw xml data.
+    def xml_data
+      if @xml_data.nil?
+        unless self.cache_object.nil?
+          @xml_data = self.cache_object.xml_data
         end
       end
-      if description == ""
-        description = CGI.unescapeHTML(XPath.first(metadata_node, "abstract/text()").to_s)
+      return @xml_data
+    end
+    # Sets the feed's xml data.
+    def xml_data=(new_xml_data)
+      @xml_data = new_xml_data
+      unless self.cache_object.nil?
+        self.cache_object.xml_data = new_xml_data
       end
+    end
-      # get the image link
-      image_link = XPath.first(metadata_node, "image/url/text()").to_s
-      if image_link == ""
-        image_link = XPath.first(metadata_node, "image/@rdf:resource").to_s
+    # Returns a REXML Document of the xml_data
+    def xml
+      if @xml_doc.nil?
+        begin
+          @xml_doc = Document.new(xml_data)
+        rescue
+          # Something failed, attempt to repair the xml with htree.
+          @xml_doc = HTree.parse(xml_data).to_rexml
+        end
       end
-      if image_link == ""
-        image_link = XPath.first(metadata_node, "link[@type='image/jpeg']/@href").to_s
+      return @xml_doc
+    end
+    # Returns the first node within the channel_node that matches the xpath query.
+    def find_node(xpath)
+      return XPath.first(channel_node, xpath)
+    end
+    # Returns all nodes within the channel_node that match the xpath query.
+    def find_all_nodes(xpath)
+      return XPath.match(channel_node, xpath)
+    end
+    # Returns the root node of the feed.
+    def root_node
+      if @root_node.nil?
+        @root_node = xml.root
       end
-      if image_link == ""
-        image_link = XPath.first(metadata_node, "link[@type='image/gif']/@href").to_s
+      return @root_node
+    end
+    # Returns the channel node of the feed.
+    def channel_node
+      if @channel_node.nil?
+        @channel_node = XPath.first(root_node, "channel")
+        if @channel_node == nil
+          @channel_node = XPath.first(root_node, "feedinfo")
+        end
+        if @channel_node == nil
+          @channel_node = root_node
+        end
       end
-      if image_link == ""
-        image_link = XPath.first(metadata_node, "link[@type='image/png']/@href").to_s
+      return @channel_node
+    end
+    # The cache object that handles the feed persistence.
+    def cache_object
+      unless FeedTools.feed_cache.nil?
+        if @cache_object.nil?
+          begin
+            if @id != nil
+              @cache_object = FeedTools.feed_cache.find_by_id(@id)
+            elsif @url != nil
+              @cache_object = FeedTools.feed_cache.find_by_url(@url)
+            end
+            if @cache_object.nil?
+              @cache_object = FeedTools.feed_cache.new
+            end
+          rescue
+          end
+        end
       end
-      if image_link == ""
-        image_link = XPath.first(metadata_node, "logo[@style='image']/@href").to_s
+      return @cache_object
+    end
+    # Sets the cache object for this feed.
+    #
+    # This can be any object, but it must accept the following messages:
+    # url
+    # url=
+    # title
+    # title=
+    # link
+    # link=
+    # xml_data
+    # xml_data=
+    # etag
+    # etag=
+    # last_modified
+    # last_modified=
+    # save
+    def cache_object=(new_cache_object)
+      @cache_object = new_cache_object
+    end
+    # Returns the feed's unique id
+    def id
+      if @id.nil?
+        @id = XPath.first(root_node, "id/text()").to_s
+        if @id == ""
+          @id = XPath.first(root_node, "guid/text()").to_s
+        end
+        @id = nil if @id == ""
       end
-      if image_link == ""
-        image_link = XPath.first(metadata_node, "logo/@href").to_s
+      return @id
+    end
+    # Sets the feed's unique id
+    def id=(new_id)
+      @id = new_id
+    end
+    # Returns the feed url.
+    def url
+      if @url.nil? && self.xml_data != nil
+        @url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
+        @url = nil if @url == ""
       end
+      return @url
+    end
-      # get the feed time to live (expressed in hours)
-      feed_time_to_live = nil
-      update_frequency = XPath.first(metadata_node, "syn:updateFrequency/text()").to_s
-      if update_frequency != ""
-        update_period = XPath.first(metadata_node, "syn:updatePeriod/text()").to_s
-        if update_period == "daily"
-          feed_time_to_live = update_frequency.to_i * 24
-        elsif update_period == "weekly"
-          feed_time_to_live = update_frequency.to_i * 24 * 7
-        elsif update_period == "monthly"
-          feed_time_to_live = update_frequency.to_i * 24 * 30
-        elsif update_period == "yearly"
-          feed_time_to_live = update_frequency.to_i * 24 * 365
+    # Sets the feed url and prepares the cache_object if necessary.
+    def url=(new_url)
+      @url = FeedTools.normalize_url(new_url)
+      self.cache_object.url = new_url unless self.cache_object.nil?
+    end
+    # Returns the feed title
+    def title
+      if @title.nil?
+        if XPath.first(channel_node, "title/@type").to_s == "xhtml" ||
+            XPath.first(channel_node, "title/@mode").to_s == "xhtml"
+          @title = XPath.first(channel_node, "title").inner_xml
+        elsif XPath.first(channel_node, "title/@type").to_s == "escaped" ||
+            XPath.first(channel_node, "title/@mode").to_s == "escaped"
+          @title = CGI.unescapeHTML(
+            XPath.first(channel_node, "title/text()").to_s)
         else
-          # hourly
-          feed_time_to_live = update_frequency.to_i
+          @title = CGI.unescapeHTML(
+            XPath.first(channel_node, "title/text()").to_s)
         end
-      end
-      if feed_time_to_live == nil
-        # expressed in minutes
-        update_frequency = XPath.first(metadata_node, "ttl/text()").to_s
-        if update_frequency != ""
-          feed_time_to_live = (update_frequency.to_i / 60)
+        unless @title.nil?
+          @title = CGI.unescapeHTML(FeedTools.sanitize_html(@title, :strip))
         end
+        if @title != "" && !(@title.nil?)
+          @title = FeedTools.strip_html(@title).strip
+        end
+        @title.gsub!(/\n/, " ")
+        @title = nil if @title == ""
+        self.cache_object.title = @title unless self.cache_object.nil?
       end
-      # TODO: handle time_to_live for CDF files
-      # =======================================
-      # get the feed items
-      items = XPath.match(root_node, "item")
-      if items == nil || items == []
-        items = XPath.match(metadata_node, "item")
+      return @title
+    end
+    # Sets the feed title
+    def title=(new_title)
+      @title = new_title
+      self.cache_object.title = new_title unless self.cache_object.nil?
+    end
+    # Returns the feed description
+    def description
+      if @description.nil?
+        # get the feed description from the xml document
+        @description = XPath.first(channel_node, "description/text()").to_s
+        if @description != ""
+          if XPath.first(channel_node, "description/@encoding").to_s != ""
+            @description = "[Embedded data objects are not supported.]"
+          else
+            @description = CGI.unescapeHTML(description)
+          end
+        end
+        if @description == ""
+          @description = XPath.first(channel_node, "subtitle/text()").to_s
+          if @description != "" &&
+              XPath.first(channel_node, "subtitle/@mode").to_s == "escaped"
+            @description = CGI.unescapeHTML(description)
+          end
+        end
+        if @description == ""
+          @description = XPath.first(channel_node, "tagline/text()").to_s
+          if @description != "" &&
+              XPath.first(channel_node, "tagline/@mode").to_s == "escaped"
+            @description = CGI.unescapeHTML(description)
+          end
+        end
+        if @description == "" && XPath.first(channel_node, "tagline") == nil
+          @description = XPath.first(channel_node, "info/text()").to_s
+          if @description != "" &&
+              XPath.first(channel_node, "info/@mode").to_s == "escaped"
+            @description = CGI.unescapeHTML(description)
+          end
+        end
+        if @description == ""
+          @description = CGI.unescapeHTML(
+            XPath.first(channel_node, "abstract/text()").to_s)
+        end
+        if @description == ""
+          @description = CGI.unescapeHTML(
+            XPath.first(channel_node, "summary/text()").to_s)
+        end
+        if @description == ""
+          # I don't think this is valid for anyone to do, but this is probably
+          # what they meant if they do it.
+          @description = CGI.unescapeHTML(
+            XPath.first(channel_node, "content:encoded/text()").to_s)
+          if @description != ""
+            @bozo = true
+          end
+        end
+        if @description == ""
+          begin
+            @description = XPath.first(channel_node, "description").inner_xml
+          rescue
+          end
+        end
+        if @description == ""
+          @description = self.itunes_summary
+          @description = "" if @description.nil?
+        end
+        if @description == ""
+          @description = self.itunes_subtitle
+          @description = "" if @description.nil?
+        end
+        @description =
+          FeedTools.sanitize_html(@description) unless @description.nil?
+        # If it started with a bunch of divs, hack them right off.  We can put
+        # them back later if they're needed.
+        @description.gsub!(/^(<div[^>]*>)*/, "")
+        @description.gsub!(/(<\/div>)*$/, "")
+        @description.gsub!(/\n/, " ") if @description.size < 80
+        @description = @description.strip unless @description.nil?
+        @description = nil if @description == ""
+      end
+      return @description
+    end
+    # Sets the feed description
+    def description=(new_description)
+      @description = new_description
+    end
+    # Returns the contents of the itunes:summary element
+    def itunes_summary
+      if @itunes_summary.nil?
+        @itunes_summary = CGI.unescapeHTML(XPath.first(root_node,
+          "itunes:summary/text()").to_s)
+        if @itunes_summary == ""
+          @itunes_summary = nil
+        end
+        @itunes_summary =
+          FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
+      end
+      return @itunes_summary
+    end
+    # Sets the contents of the itunes:summary element
+    def itunes_summary=(new_itunes_summary)
+      @itunes_summary = new_itunes_summary
+    end
+    # Returns the contents of the itunes:subtitle element
+    def itunes_subtitle
+      if @itunes_subtitle.nil?
+        @itunes_subtitle = CGI.unescapeHTML(XPath.first(root_node,
+          "itunes:subtitle/text()").to_s)
+        if @itunes_subtitle == ""
+          @itunes_subtitle = nil
+        end
+        unless @itunes_subtitle.nil?
+          @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
+        end
+      end
+      return @itunes_subtitle
+    end
+    # Sets the contents of the itunes:subtitle element
+    def itunes_subtitle=(new_itunes_subtitle)
+      @itunes_subtitle = new_itunes_subtitle
+    end
+    # Returns the feed link
+    def link
+      if @link.nil?
+        # get the feed link from the xml document
+        @link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
+        if @link == ""
+          @link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
+        end
+        if @link == ""
+          @link = XPath.first(channel_node, "link/@href").to_s
+        end
+        if @link == ""
+          @link = XPath.first(channel_node, "link/text()").to_s
+        end
+        if @link == ""
+          @link = XPath.first(channel_node, "@href").to_s
+        end
+        if @link == ""
+          if FeedTools.is_url? self.guid
+            @link = self.guid
+          end
+        end
+        if @link == ""
+          # Technically, we shouldn't use the base attribute for this, but if the href attribute
+          # is missing, it's already a given that we're looking at a messed up CDF file.  We can
+          # always pray it's correct.
+          @link = XPath.first(channel_node, "@base").to_s
+        end
+        @link = FeedTools.normalize_url(@link)
+        unless self.cache_object.nil?
+          self.cache_object.link = @link
+        end
+      end
+      return @link
+    end
+    # Sets the feed link
+    def link=(new_link)
+      @link = new_link
+      unless self.cache_object.nil?
+        self.cache_object.link = new_link
+      end
+    end
+    # Returns the feed image link
+    def image_link
+      if @image_link.nil?
+        # get the feed image link from the xml document
+        @image_link = XPath.first(channel_node, "image/url/text()").to_s
+        if @image_link == ""
+          @image_link = XPath.first(channel_node, "image/@rdf:resource").to_s
+        end
+        if @image_link == ""
+          @image_link = XPath.first(channel_node, "link[@type='image/jpeg']/@href").to_s
+        end
+        if @image_link == ""
+          @image_link = XPath.first(channel_node, "link[@type='image/gif']/@href").to_s
+        end
+        if @image_link == ""
+          @image_link = XPath.first(channel_node, "link[@type='image/png']/@href").to_s
+        end
+        if @image_link == ""
+          @image_link = XPath.first(channel_node, "logo[@style='image']/@href").to_s
+        end
+        if @image_link == ""
+          @image_link = XPath.first(channel_node, "logo/@href").to_s
+        end
+        @image_link = FeedTools.normalize_url(@image_link)
+      end
+      return @image_link
+    end
+    # Sets the feed image link
+    def image_link=(new_image_link)
+      @image_link = new_image_link
+    end
+    # Returns the url to the icon file for this feed.
+    #
+    # This method uses the url from the link field in order to avoid grabbing
+    # the favicon for services like feedburner.
+    def icon_link
+      if @icon_link.nil?
+        @icon_link = XPath.first(channel_node,
+          "link[@rel='icon']/@href").to_s
+        if @icon_link == ""
+          @icon_link = XPath.first(channel_node,
+            "link[@rel='shortcut icon']/@href").to_s
+        end
+        if @icon_link == ""
+          @icon_link = XPath.first(channel_node,
+            "link[@type='image/x-icon']/@href").to_s
+        end
+        if @icon_link == ""
+          @icon_link = XPath.first(channel_node,
+            "icon/@href").to_s
+        end
+        if @icon_link == ""
+          @icon_link = XPath.first(channel_node,
+            "icon/text()").to_s
+        end
+        if @icon_link == ""
+          link_uri = URI.parse(FeedTools.normalize_url(self.link))
+          @icon_link =
+            link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
+        end
+      end
+      return @icon_link
+    end
+    # Returns the number of seconds before the feed should expire
+    def time_to_live
+      if @time_to_live.nil?
+        # get the feed time to live from the xml document
+        update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
+        if update_frequency != ""
+          update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
+          if update_period == "daily"
+            @time_to_live = update_frequency.to_i * 24
+          elsif update_period == "weekly"
+            @time_to_live = update_frequency.to_i * 24 * 7
+          elsif update_period == "monthly"
+            @time_to_live = update_frequency.to_i * 24 * 30
+          elsif update_period == "yearly"
+            @time_to_live = update_frequency.to_i * 24 * 365
+          else
+            # hourly
+            @time_to_live = update_frequency.to_i
+          end
+        end
+      end
+      if @time_to_live.nil?
+        # expressed in minutes
+        update_frequency = XPath.first(channel_node, "ttl/text()").to_s
+        if update_frequency != ""
+          @time_to_live = (update_frequency.to_i / 60)
+        end
+      end
+      if @time_to_live.nil?
+        @time_to_live = 0
+        update_frequency_days = XPath.first(channel_node, "schedule/intervaltime/@days").to_s
+        update_frequency_hours = XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
+        update_frequency_minutes = XPath.first(channel_node, "schedule/intervaltime/@min").to_s
+        update_frequency_seconds = XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
+        if update_frequency_days != ""
+          @time_to_live = @time_to_live + update_frequency_days.to_i * 24
+        end
+        if update_frequency_hours != ""
+          @time_to_live = @time_to_live + update_frequency_hours.to_i * 1
+        end
+        if update_frequency_minutes != ""
+          @time_to_live = @time_to_live + update_frequency_minutes.to_i / 60
+        end
+        if update_frequency_seconds != ""
+          @time_to_live = @time_to_live + update_frequency_seconds.to_i / 3600
+        end
+        if @time_to_live == 0
+          @time_to_live = nil
+        end
+      end
+      if @time_to_live.nil? || @time_to_live == 0
+        # Default to one hour
+        @time_to_live = 1
+      end
+      @time_to_live = @time_to_live.round
+      return @time_to_live.hour
+    end
+    # Sets the feed time to live
+    def time_to_live=(new_time_to_live)
+      @time_to_live = (new_time_to_live / 3600).round
+      @time_to_live = 1 if @time_to_live < 1
+    end
+    # Returns the feed language
+    def language
+      if @language.nil?
+        @language = XPath.first(channel_node, "language/text()").to_s
+        if @language == ""
+          @language = XPath.first(channel_node, "dc:language/text()").to_s
+        end
+        if @language == ""
+          @language = XPath.first(channel_node, "xml:lang/text()").to_s
+        end
+        if @language == ""
+          @language = XPath.first(root_node, "xml:lang/text()").to_s
+        end
+        if @language == ""
+          @language = "en-us"
+        end
+        @language = @language.downcase
       end
-      if items == nil || items == []
-        items = XPath.match(metadata_node, "entry")
+      return @language
+    end
+    # Sets the feed language
+    def language=(new_language)
+      @language = new_language
+    end
+    # Returns true if this feed contains explicit material.
+    def explicit
+      if @explicit.nil?
+        if XPath.first(channel_node,
+              "media:adult/text()").to_s.downcase == "true" ||
+            XPath.first(channel_node,
+              "itunes:explicit/text()").to_s.downcase == "yes" ||
+            XPath.first(channel_node,
+              "itunes:explicit/text()").to_s.downcase == "true"
+          @explicit = true
+        else
+          @explicit = false
+        end
+      end
+      return @explicit
+    end
+    # Sets whether or not the feed contains explicit material
+    def explicit=(new_explicit)
+      @explicit = (new_explicit ? true : false)
+    end
+    # Returns the feed items
+    def items
+      if @items.nil?
+        raw_items = XPath.match(root_node, "item")
+        if raw_items == nil || raw_items == []
+          raw_items = XPath.match(channel_node, "item")
+        end
+        if raw_items == nil || raw_items == []
+          raw_items = XPath.match(channel_node, "entry")
+        end
+        # create the individual feed items
+        @items = []
+        if raw_items != nil
+          for item_node in raw_items
+            new_item = FeedItem.new
+            new_item.xml_data = item_node.to_s
+            new_item.feed = self
+            @items << new_item
+          end
+        end
       end
-      # set all of the properties
-      if title != ""
-        self.title = title
-      else
-        self.title = nil
+      # Sort the items
+      @items = @items.sort do |a,b|
+        (b.time or Time.mktime(1970)) <=> (a.time or Time.mktime(1970))
       end
-      if link != ""
-        self.link = link
-      else
-        self.link = nil
+      return @items
+    end
+    # The time that the feed was last requested from the remote server.  Nil if it has
+    # never been pulled, or if it was created from scratch.
+    def last_retrieved
+      unless self.cache_object.nil?
+        @last_retrieved = self.cache_object.last_retrieved
       end
-      if description != ""
-        self.description = description
-      else
-        self.description = nil
+      return @last_retrieved
+    end
+    # Sets the time that the feed was last updated.
+    def last_retrieved=(new_last_retrieved)
+      @last_retrieved = new_last_retrieved
+      unless self.cache_object.nil?
+        self.cache_object.last_retrieved = new_last_retrieved
       end
-      if image_link != ""
-        self.image_link = image_link
-      else
-        self.image_link = nil
+    end
+    # True if this feed contains audio content enclosures
+    def podcast?
+      podcast = false
+      $test_feed.items.each do |item|
+        item.enclosures.each do |enclosure|
+          podcast = true if enclosure.audio?
+        end
       end
-      if feed_time_to_live != nil
-        self.time_to_live = feed_time_to_live
+      return podcast
+    end
+    # True if this feed contains video content enclosures
+    def vidlog?
+      vidlog = false
+      $test_feed.items.each do |item|
+        item.enclosures.each do |enclosure|
+          vidlog = true if enclosure.video?
+        end
+      end
+      return vidlog
+    end
+    # True if the feed was not last retrieved from the cache.
+    def live?
+      return @live
+    end
+    # True if the feed has expired and must be reacquired from the remote server.
+    def expired?
+      return self.last_retrieved == nil || (self.last_retrieved + self.time_to_live.hour) < Time.now
+    end
+    # Forces this feed to expire.
+    def expire
+      self.last_retrieved = Time.mktime(1970)
+      self.save
+    end
+    # A hook method that is called during the feed generation process.  Overriding this method
+    # will enable additional content to be inserted into the feed.
+    def build_xml_hook(feed_type, version, xml_builder)
+      return nil
+    end
+    # Generates xml based on the content of the feed
+    def build_xml(feed_type="rss", version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
+      if feed_type == "rss" && version == 0.0
+        version = 1.0
+      elsif feed_type == "atom" && version == 0.0
+        version = 0.3
+      end
+      if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
+        # RDF-based rss format
+        return xml_builder.tag!("rdf:RDF") do
+          xml_builder.channel("rdf:about" => CGI.escapeHTML(link)) do
+            unless title.nil? || title == ""
+              xml_builder.title(title)
+            else
+              xml_builder.title
+            end
+            unless link.nil? || link == ""
+              xml_builder.link(link)
+            else
+              xml_builder.link
+            end
+            unless image_link.nil? || image_link == ""
+              xml_builder.image("rdf:resource" => CGI.escapeHTML(image_link))
+            end
+            unless description.nil? || description == ""
+              xml_builder.description(description)
+            else
+              xml_builder.description
+            end
+            unless language.nil? || language == ""
+              xml_builder.tag!("dc:language", language)
+            end
+            xml_builder.tag!("syn:updatePeriod", "hourly")
+            xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
+            xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
+            xml_builder.items do
+              xml_builder.tag!("rdf:Seq") do
+                unless items.nil?
+                  for item in items
+                    if item.link.nil?
+                      raise "Cannot generate an rdf-based feed with a nil item link field."
+                    end
+                    xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
+                  end
+                end
+              end
+            end
+            build_xml_hook(feed_type, version, xml_builder)
+          end
+          unless image_link.nil? || image_link == ""
+            xml_builder.image("rdf:about" => CGI.escapeHTML(image_link)) do
+              unless title.nil? || title == ""
+                xml_builder.title(title)
+              else
+                xml_builder.title
+              end
+              unless image_link.nil? || image_link == ""
+                xml_builder.url(image_link)
+              end
+              unless link.nil? || link == ""
+                xml_builder.link(link)
+              else
+                xml_builder.link
+              end
+            end
+          end
+          unless items.nil?
+            for item in items
+              item.build_xml(feed_type, version, xml_builder)
+            end
+          end
+        end
+      elsif feed_type == "rss"
+        # normal rss format
+        return xml_builder.rss("version" => version.to_s) do
+          unless title.nil? || title == ""
+            xml_builder.title(title)
+          end
+          unless link.nil? || link == ""
+            xml_builder.link(link)
+          end
+          unless description.nil? || description == ""
+            xml_builder.description(description)
+          end
+          xml_builder.ttl((time_to_live / 1.minute).to_s)
+          xml_builder.generator("http://www.sporkmonger.com/projects/feedtools")
+          build_xml_hook(feed_type, version, xml_builder)
+          unless items.nil?
+            for item in items
+              item.build_xml(feed_type, version, xml_builder)
+            end
+          end
+        end
+      elsif feed_type == "atom"
+        # normal atom format
+        return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
+            "version" => version.to_s,
+            "xml:lang" => language) do
+          unless title.nil? || title == ""
+            xml_builder.title(title,
+                "mode" => "escaped",
+                "type" => "text/html")
+          end
+          unless link.nil? || link == ""
+            xml_builder.link("href" => link,
+                "rel" => "alternate",
+                "type" => "text/html",
+                "title" => title)
+          end
+          unless description.nil? || description == ""
+            xml_builder.tagline(description,
+                "mode" => "escaped",
+                "type" => "text/html")
+          end
+          xml_builder.generator("FeedTools",
+              "url" => "http://www.sporkmonger.com/projects/feedtools")
+          build_xml_hook(feed_type, version, xml_builder)
+          unless items.nil?
+            for item in items
+              item.build_xml(feed_type, version, xml_builder)
+            end
+          end
+        end
+      end
+    end
+    # Persists the current feed state to the cache.
+    def save
+      if FeedTools.feed_cache.nil?
+        raise "Caching is currently disabled.  Cannot save to cache."
+      elsif self.url.nil?
+        raise "The url field must be set to save to the cache."
+      elsif self.xml_data.nil?
+        raise "The xml_data field must be set to save to the cache."
+      elsif self.cache_object.nil?
+        raise "The cache_object is currently nil.  Cannot save to cache."
       else
-        self.time_to_live = nil
+        self.cache_object.url = self.url
+        self.cache_object.title = self.title
+        self.cache_object.link = self.link
+        self.cache_object.xml_data = self.xml_data
+        unless self.http_response.nil?
+          self.cache_object.http_headers = self.http_headers.to_yaml
+        end
+        self.cache_object.last_retrieved = self.last_retrieved
+        self.cache_object.save
+      end
+    end
+    alias_method :tagline, :description
+    alias_method :tagline=, :description=
+    alias_method :subtitle, :description
+    alias_method :subtitle=, :description=
+    alias_method :abstract, :description
+    alias_method :abstract=, :description=
+    alias_method :content, :description
+    alias_method :content=, :description=
+    alias_method :ttl, :time_to_live
+    alias_method :ttl=, :time_to_live=
+    alias_method :guid, :id
+    alias_method :guid=, :id=
+    alias_method :entries, :items
+    # passes missing methods to the cache_object
+    def method_missing(msg, *params)
+      if self.cache_object.nil?
+        raise NoMethodError, "Invalid method #{msg.to_s}"
+      end
+      return self.cache_object.send(msg, params)
+    end
+    # passes missing methods to the FeedTools.feed_cache
+    def Feed.method_missing(msg, *params)
+      if FeedTools.feed_cache.nil?
+        raise NoMethodError, "Invalid method Feed.#{msg.to_s}"
+      end
+      result = FeedTools.feed_cache.send(msg, params)
+      if result.kind_of? FeedTools.feed_cache
+        result = Feed.open(result.url)
+      end
+      return result
+    end
+  end
+  class FeedItem
+    include REXML
+    include AttributeDictionary
+    # This class stores information about a feed item's file enclosures.
+    class Enclosure
+      include AttributeDictionary
+      # The url for the enclosure
+      attr_accessor :url
+      # The MIME type of the file referenced by the enclosure
+      attr_accessor :type
+      # The size of the file referenced by the enclosure
+      attr_accessor :file_size
+      # The total play time of the file referenced by the enclosure
+      attr_accessor :duration
+      # The height in pixels of the enclosed media
+      attr_accessor :height
+      # The width in pixels of the enclosed media
+      attr_accessor :width
+      # The bitrate of the enclosed media
+      attr_accessor :bitrate
+      # The framerate of the enclosed media
+      attr_accessor :framerate
+      # The thumbnail for this enclosure
+      attr_accessor :thumbnail
+      # The categories for this enclosure
+      attr_accessor :categories
+      # A hash of the enclosed file
+      attr_accessor :hash
+      # A website containing some kind of media player instead of a direct
+      # link to the media file.
+      attr_accessor :player
+      # A list of credits for the enclosed media
+      attr_accessor :credits
+      # A text rendition of the enclosed media
+      attr_accessor :text
+      # A list of alternate version of the enclosed media file
+      attr_accessor :versions
+      # The default version of the enclosed media file
+      attr_accessor :default_version
+      # Returns true if this is the default enclosure
+      def is_default?
+        return @is_default
+      end
+      # Sets whether this is the default enclosure for the media group
+      def is_default=(new_is_default)
+        @is_default = new_is_default
       end
-      parse_feed_hook(feed_data)
-      if Feed.cache_enabled?
-        save
+      # Returns true if the enclosure contains explicit material
+      def explicit?
+        return @explicit
+      end
+      # Sets the explicit attribute on the enclosure
+      def explicit=(new_explicit)
+        @explicit = new_explicit
+      end
+      # Determines if the object is a sample, or the full version of the
+      # object, or if it is a stream.
+      # Possible values are 'sample', 'full', 'nonstop'.
+      def expression
+        return @expression
+      end
+      # Sets the expression attribute on the enclosure.
+      # Allowed values are 'sample', 'full', 'nonstop'.
+      def expression=(new_expression)
+        unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
+          raise ArgumentError,
+            "Permitted values are 'sample', 'full', 'nonstop'."
+        end
+        @expression = new_expression.downcase
       end
-      # check and make sure we don't have any cached feed_items with a nil link
-      # if we do, we need to start from scratch to avoid duplicates
-      for item_link in feed_items.map { |item| item.link }
-        if item_link.nil?
-          FeedItem.delete_all("feed_id = '#{self.id}'")
-          break
+      # Returns true if this enclosure contains audio content
+      def audio?
+        unless self.type.nil?
+          return true if (self.type =~ /^audio/) != nil
+        end
+        # TODO: create a more complete list
+        # =================================
+        audio_extensions = ['mp3', 'm4a', 'm4p', 'wav', 'ogg', 'wma']
+        audio_extensions.each do |extension|
+          if (url =~ /#{extension}$/) != nil
+            return true
+          end
         end
+        return false
       end
-      # parse the feed items
-      @feed_items_unsorted = []
-      if items != nil
-        for item_node in items
-          @feed_items_unsorted << handle_feed_item(item_node.to_s)
+      # Returns true if this enclosure contains video content
+      def video?
+        unless self.type.nil?
+          return true if (self.type =~ /^video/) != nil
+          return true if self.type == "image/mov"
+        end
+        # TODO: create a more complete list
+        # =================================
+        video_extensions = ['mov', 'mp4', 'avi', 'wmv', 'asf']
+        video_extensions.each do |extension|
+          if (url =~ /#{extension}$/) != nil
+            return true
+          end
         end
+        return false
       end
-      return self
     end
-    # Locates the feed item in the database based on the supplied item xml data.
-    def find_feed_item_by_data(item_data)
-      item_node = Document.new(item_data).root
+    EnclosureCategory = Struct.new( "EnclosureCategory", :category, :scheme, :label )
+    EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
+    EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
+    EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
+    EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height, :width )
-      # get the link
-      item_link = XPath.first(item_node, "link[@rel='alternate']/@href").to_s
-      if item_link == ""
-        item_link = XPath.first(item_node, "link/@href").to_s
+    # Returns the parent feed of this feed item
+    def feed
+      return @feed
+    end
+    # Sets the parent feed of this feed item
+    def feed=(new_feed)
+      @feed = new_feed
+    end
+    # Returns the feed item's raw xml data.
+    def xml_data
+      return @xml_data
+    end
+    # Sets the feed item's xml data.
+    def xml_data=(new_xml_data)
+      @xml_data = new_xml_data
+    end
+    # Returns a REXML Document of the xml_data
+    def xml
+      if @xml_doc.nil?
+        @xml_doc = Document.new(xml_data)
+      end
+      return @xml_doc
+    end
+    # Returns the first node within the root_node that matches the xpath query.
+    def find_node(xpath)
+      return XPath.first(root_node, xpath)
+    end
+    # Returns all nodes within the root_node that match the xpath query.
+    def find_all_nodes(xpath)
+      return XPath.match(root_node, xpath)
+    end
+    # Returns the root node of the feed item.
+    def root_node
+      if @root_node.nil?
+        @root_node = xml.root
+      end
+      return @root_node
+    end
+    # Returns the feed item title
+    def title
+      if @title.nil?
+        if XPath.first(root_node, "title/@type").to_s == "xhtml" ||
+            XPath.first(root_node, "title/@mode").to_s == "xhtml"
+          @title = XPath.first(root_node, "title").inner_xml
+        elsif XPath.first(root_node, "title/@type").to_s == "escaped" ||
+            XPath.first(root_node, "title/@mode").to_s == "escaped"
+          @title = CGI.unescapeHTML(
+            XPath.first(root_node, "title/text()").to_s)
+        else
+          @title = CGI.unescapeHTML(
+            XPath.first(root_node, "title/text()").to_s)
+        end
+        unless @title.nil?
+          @title = CGI.unescapeHTML(FeedTools.sanitize_html(@title, :strip))
+        end
+        if @title != ""
+          # Some blogging tools include the number of comments in a post
+          # in the title... this is supremely ugly, and breaks any
+          # applications which expect the title to be static, so we're
+          # gonna strip them out.
+          #
+          # If for some incredibly wierd reason you need the actual
+          # unstripped title, just use find_node("title/text()").to_s
+          @title = FeedTools.strip_html(
+            @title.strip.gsub(/\[\d*\]$/, "")).strip
+          @title.gsub!(/\n/, " ")
+        end
+        @title = nil if @title == ""
+      end
+      return @title
+    end
+    # Sets the feed item title
+    def title=(new_title)
+      @title = new_title
+    end
+    # Returns the feed item description
+    def description
+      if @description.nil?
+        # get the item content
+        @description = ""
+        body_node = XPath.first(root_node, "xhtml:body")
+        if body_node == nil
+          body_node = XPath.first(root_node, "body")
+        end
+        if body_node != nil
+          @description = body_node.inner_xml
+        end
+        if @description == ""
+          @description =
+            CGI.unescapeHTML(XPath.first(root_node, "content:encoded/text()").to_s)
+        end
+        if @description == ""
+          begin
+            @description = XPath.first(root_node, "description").cdatas.first.to_s
+          rescue
+            @description = ""
+          end
+          if @description == ""
+            @description = XPath.first(root_node, "description/text()").to_s
+          end
+          if @description != ""
+            if XPath.first(root_node, "description/@encoding").to_s != ""
+              # Not supported... yet.
+              @description = "[Embedded data objects are not supported.]"
+            else
+              @description = CGI.unescapeHTML(@description)
+            end
+          end
+        end
+        if @description == ""
+          @description = XPath.first(root_node, "content/text()").to_s
+          if @description != "" &&
+              (XPath.first(root_node, "content/@mode").to_s == "escaped" ||
+              XPath.first(root_node, "content/@type").to_s == "escaped")
+            @description = CGI.unescapeHTML(@description)
+          end
+          if XPath.first(root_node, "content/@mode").to_s == "xhtml" ||
+              XPath.first(root_node, "content/@type").to_s == "xhtml"
+            @description = XPath.first(root_node, "content").inner_xml
+          end
+        end
+        if @description == ""
+          begin
+            @description = XPath.first(root_node, "description").inner_xml
+          rescue
+          end
+        end
+        if @description == ""
+          @description = self.itunes_summary
+          @description = "" if @description.nil?
+        end
+        if @description == ""
+          @description = self.itunes_subtitle
+          @description = "" if @description.nil?
+        end
+        if @description == ""
+          @description = self.media_text
+          @description = "" if @description.nil?
+        end
+        unless @description.nil?
+          @description = FeedTools.sanitize_html(@description)
+        end
+        # If it started with a bunch of divs, hack them right off.  We can put
+        # them back later if they're needed.
+        @description.gsub!(/^(<div[^>]*>)*/, "")
+        @description.gsub!(/(<\/div>)*$/, "")
+        @description.gsub!(/\n/, " ") if @description.size < 80
+        @description = @description.strip unless @description.nil?
+        @description = nil if @description == ""
       end
-      if item_link == ""
-        item_link = XPath.first(item_node, "link/text()").to_s
+      return @description
+    end
+    # Sets the feed item description
+    def description=(new_description)
+      @description = new_description
+    end
+    # Returns the feed item link
+    def link
+      if @link.nil?
+        @link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
+        if @link == ""
+          @link = XPath.first(root_node, "link/@href").to_s
+        end
+        if @link == ""
+          @link = XPath.first(root_node, "link/text()").to_s
+        end
+        if @link == ""
+          @link = XPath.first(root_node, "@rdf:about").to_s
+        end
+        if @link == ""
+          @link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
+        end
+        if @link == ""
+          if FeedTools.is_url? self.guid
+            @link = self.guid
+          end
+        end
+        if @link != ""
+          @link = CGI.unescapeHTML(@link)
+        end
+        if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
+          if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
+            @link = @link[1..-1]
+          end
+          # prepend the base to the link since they seem to have used a relative path
+          @link = feed.base + @link
+        end
+        @link = FeedTools.normalize_url(@link)
       end
-      if item_link == ""
-        item_link = XPath.first(item_node, "@rdf:about").to_s
+      return @link
+    end
+    # Sets the feed item link
+    def link=(new_link)
+      @link = new_link
+    end
+    # Returns the feed comment link
+    def comment_link
+      if @comment_link.nil?
+        # get the feed comment link from the xml document
+        @comment_link = XPath.first(root_node, "comments/text()").to_s
+        if @comment_link == ""
+          @comment_link = self.link
+        end
+        @comment_link = FeedTools.normalize_url(@comment_link)
       end
-      if item_link == ""
-        item_link = XPath.first(item_node, "guid/text()").to_s
+      return @comment_link
+    end
+    # Sets the feed comment link
+    def comment_link=(new_comment_link)
+      @comment_link = new_comment_link
+    end
+    # Returns the feed image link
+    def image_link
+      if @image_link.nil?
+        # get the feed image link from the xml document
+        if @image_link == ""
+          @image_link = XPath.first(root_node, "link[@type='image/jpeg']/@href").to_s
+        end
+        if @image_link == ""
+          @image_link = XPath.first(root_node, "link[@type='image/gif']/@href").to_s
+        end
+        if @image_link == ""
+          @image_link = XPath.first(root_node, "link[@type='image/png']/@href").to_s
+        end
+        # The following two should technically never occur, but have been included
+        # simply because I've seen both occuring in the wild at least once.
+        if @image_link == ""
+          @image_link = XPath.first(root_node, "image/url/text()").to_s
+        end
+        if @image_link == ""
+          @image_link = XPath.first(root_node, "image/@rdf:resource").to_s
+        end
+        if @image_link == ""
+          # If there's only a media thumbnail, we can just borrow it.  Technically, this isn't
+          # ideal, but chances are very good that anything that makes use of this image is
+          # simply not going to care anyhow.
+          @image_link = XPath.first(root_node, "media:thumbnail/@url").to_s
+          if @image_link == ""
+            @media_image_link = @image_link
+          end
+        end
+        if @image_link == ""
+          # If there's only an itunes image, we can just borrow it.  See comment above regarding
+          # less-than-ideal-ness.
+          if @itunes_image_link == ""
+            @image_link = XPath.first(root_node, "itunes:image/@href").to_s
+            if @image_link == ""
+              @image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
+            end
+            @itunes_image_link = @image_link
+          else
+            @image_link = @itunes_image_link
+          end
+        end
+        @image_link = FeedTools.normalize_url(@image_link)
+      end
+      return @image_link
+    end
+    # Sets the feed image link
+    def image_link=(new_image_link)
+      @image_link = new_image_link
+    end
+    # Returns the feed item itunes image link
+    #
+    # If it's not present, falls back to the normal image link.
+    # Technically, the itunes spec says that the image needs to be
+    # square and larger than 300x300, but hey, if there's an image
+    # to be had, it's better than none at all.
+    def itunes_image_link
+      if @itunes_image_link.nil?
+        # get the feed item itunes image link from the xml document
+        @itunes_image_link = XPath.first(root_node, "itunes:image/@href").to_s
+        if @itunes_image_link == ""
+          @itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
+        end
+        if @itunes_image_link == ""
+          @itunes_image_link = self.image_link
+        end
+        @itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
+      end
+      return @itunes_image_link
+    end
+    # Sets the feed item itunes image link
+    def itunes_image_link=(new_itunes_image_link)
+      @itunes_image_link = new_itunes_image_link
+    end
+    # Returns the feed item media thumbnail link
+    #
+    # If it's not present, falls back to the normal image link.
+    def media_thumbnail_link
+      if @media_thumbnail_link.nil?
+        # get the feed item itunes image link from the xml document
+        @media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
+        if @media_thumbnail_link == ""
+          @media_thumbnail_link = image_link
+        end
+        @media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
       end
-      item_title = XPath.first(item_node, "title/text()").to_s
+      return @media_thumbnail_link
+    end
+    # Sets the feed item media thumbnail url
+    def media_thumbnail_link=(new_media_thumbnail_link)
+      @media_thumbnail_link = new_media_thumbnail_link
+    end
-      feed_item = FeedItem.find_by_feed_id_and_link(self.id, item_link)
-      unless feed_item.nil?
-        # Some blogging tools alter the title of an item when the number of comments change (for
-        # example, TextPattern) and many email feed dumps use the same link for multiple
-        # items (for example, GMail).  We try to take both of these cases into account here.
-        existing_title = feed_item.title
-        item_title = item_title.gsub(/\[\d*\]/,"").strip
-        existing_title = existing_title.gsub(/\[\d*\]/,"").strip
-        item_title = item_title.gsub(/\(\d*\)/,"").strip
-        existing_title = existing_title.gsub(/\(\d*\)/,"").strip
-        item_title = item_title.gsub(/\{\d*\}/,"").strip
-        existing_title = existing_title.gsub(/\{\d*\}/,"").strip
-        if existing_title != item_title
-          feed_item = nil
+    # Returns the feed items's unique id
+    def id
+      if @id.nil?
+        @id = XPath.first(root_node, "id/text()").to_s
+        if @id == ""
+          @id = XPath.first(root_node, "guid/text()").to_s
         end
+        @id = nil if @id == ""
       end
-      return feed_item
+      return @id
     end
-    def handle_feed_item(item_data)
-      feed_item = find_feed_item_by_data(item_data)
-      if feed_item.nil?
-        feed_item = FeedItem.new
-      end
-      feed_item.feed = self
-      feed_item.parse_item(item_data)
-      return feed_item
-    end
-    def build_feed_hook(feed_type, version, xml_builder)
-      return nil
+    # Sets the feed item's unique id
+    def id=(new_id)
+      @id = new_id
     end
-    def build_feed(feed_type, version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
-      if feed_type == "rss" && version == 0.0
-        version = 1.0
-      elsif feed_type == "atom" && version == 0.0
-        version = 0.3
-      end
-      if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
-        # RDF-based rss format
-        return xml_builder.tag!("rdf:RDF") do
-          xml_builder.channel("rdf:about" => CGI.escapeHTML(link)) do
-            unless title.nil? || title == ""
-              xml_builder.title(title)
-            else
-              xml_builder.title
+    # Returns all feed item enclosures
+    def enclosures
+      if @enclosures.nil?
+        @enclosures = []
+        # First, load up all the different possible sources of enclosures
+        rss_enclosures = XPath.match(root_node, "enclosure")
+        atom_enclosures = XPath.match(root_node, "link[@rel='enclosure']")
+        media_content_enclosures = XPath.match(root_node, "media:content")
+        media_group_enclosures = XPath.match(root_node, "media:group")
+        # Parse RSS-type enclosures.  Thanks to a few buggy enclosures implementations,
+        # sometimes these also manage to show up in atom files.
+        for enclosure_node in rss_enclosures
+          enclosure = Enclosure.new
+          enclosure.url = CGI.unescapeHTML(enclosure_node.attributes["url"].to_s)
+          enclosure.type = enclosure_node.attributes["type"].to_s
+          enclosure.file_size = enclosure_node.attributes["length"].to_i
+          enclosure.credits = []
+          enclosure.explicit = false
+          @enclosures << enclosure
+        end
+        # Parse atom-type enclosures.  If there are repeats of the same enclosure object,
+        # we merge the two together.
+        for enclosure_node in atom_enclosures
+          enclosure_url = CGI.unescapeHTML(enclosure_node.attributes["href"].to_s)
+          enclosure = nil
+          new_enclosure = false
+          for existing_enclosure in @enclosures
+            if existing_enclosure.url == enclosure_url
+              enclosure = existing_enclosure
+              break
             end
-            unless link.nil? || link == ""
-              xml_builder.link(link)
-            else
-              xml_builder.link
+          end
+          if enclosure.nil?
+            new_enclosure = true
+            enclosure = Enclosure.new
+          end
+          enclosure.url = enclosure_url
+          enclosure.type = enclosure_node.attributes["type"].to_s
+          enclosure.file_size = enclosure_node.attributes["length"].to_i
+          enclosure.credits = []
+          enclosure.explicit = false
+          if new_enclosure
+            @enclosures << enclosure
+          end
+        end
+        # Creates an anonymous method to parse content objects from the media module.  We
+        # do this to avoid excessive duplication of code since we have to do identical
+        # processing for content objects within group objects.
+        parse_media_content = lambda do |media_content_nodes|
+          affected_enclosures = []
+          for enclosure_node in media_content_nodes
+            enclosure_url = CGI.unescapeHTML(enclosure_node.attributes["url"].to_s)
+            enclosure = nil
+            new_enclosure = false
+            for existing_enclosure in @enclosures
+              if existing_enclosure.url == enclosure_url
+                enclosure = existing_enclosure
+                break
+              end
             end
-            unless image_link.nil? || image_link == ""
-              xml_builder.image("rdf:resource" => CGI.escapeHTML(image_link))
+            if enclosure.nil?
+              new_enclosure = true
+              enclosure = Enclosure.new
             end
-            unless description.nil? || description == ""
-              xml_builder.description(description)
-            else
-              xml_builder.description
+            enclosure.url = enclosure_url
+            enclosure.type = enclosure_node.attributes["type"].to_s
+            enclosure.file_size = enclosure_node.attributes["fileSize"].to_i
+            enclosure.duration = enclosure_node.attributes["duration"].to_s
+            enclosure.height = enclosure_node.attributes["height"].to_i
+            enclosure.width = enclosure_node.attributes["width"].to_i
+            enclosure.bitrate = enclosure_node.attributes["bitrate"].to_i
+            enclosure.framerate = enclosure_node.attributes["framerate"].to_i
+            enclosure.expression = enclosure_node.attributes["expression"].to_s
+            enclosure.is_default =
+              (enclosure_node.attributes["isDefault"].to_s.downcase == "true")
+            if XPath.first(enclosure_node, "media:thumbnail/@url").to_s != ""
+              enclosure.thumbnail = EnclosureThumbnail.new(
+                CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@url").to_s),
+                CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@height").to_s),
+                CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@width").to_s)
+              )
+              if enclosure.thumbnail.height == ""
+                enclosure.thumbnail.height = nil
+              end
+              if enclosure.thumbnail.width == ""
+                enclosure.thumbnail.width = nil
+              end
             end
-            unless language.nil? || language == ""
-              xml_builder.tag!("dc:language", language)
+            enclosure.categories = []
+            for category in XPath.match(enclosure_node, "media:category")
+              enclosure.categories << EnclosureCategory.new(
+                CGI.unescapeHTML(category.text),
+                CGI.unescapeHTML(category.attributes["scheme"].to_s),
+                CGI.unescapeHTML(category.attributes["label"].to_s)
+              )
+              if enclosure.categories.last.scheme == ""
+                enclosure.categories.last.scheme = nil
+              end
+              if enclosure.categories.last.label == ""
+                enclosure.categories.last.label = nil
+              end
             end
-            xml_builder.tag!("syn:updatePeriod", "hourly")
-            xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
-            xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
-            xml_builder.items do
-              xml_builder.tag!("rdf:Seq") do
-                unless feed_items.nil?
-                  for item in feed_items
-                    if item.link.nil?
-                      raise "Cannot generate an rdf-based feed with a nil item link field."
-                    end
-                    xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
-                  end
-                end
+            if XPath.first(enclosure_node, "media:hash/text()").to_s != ""
+              enclosure.hash = EnclosureHash.new(
+                FeedTools.sanitize_html(CGI.unescapeHTML(XPath.first(
+                  enclosure_node, "media:hash/text()").to_s), :strip),
+                "md5"
+              )
+            end
+            if XPath.first(enclosure_node, "media:player/@url").to_s != ""
+              enclosure.player = EnclosurePlayer.new(
+                CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@url").to_s),
+                CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@height").to_s),
+                CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@width").to_s)
+              )
+              if enclosure.player.height == ""
+                enclosure.player.height = nil
+              end
+              if enclosure.player.width == ""
+                enclosure.player.width = nil
+              end
+            end
+            enclosure.credits = []
+            for credit in XPath.match(enclosure_node, "media:credit")
+              enclosure.credits << EnclosureCredit.new(
+                CGI.unescapeHTML(CGI.unescapeHTML(credit.text)),
+                CGI.unescapeHTML(credit.attributes["role"].to_s.downcase)
+              )
+              if enclosure.credits.last.role == ""
+                enclosure.credits.last.role = nil
               end
             end
-            build_feed_hook(feed_type, version, xml_builder)
+            enclosure.explicit = (XPath.first(enclosure_node,
+              "media:adult/text()").to_s.downcase == "true")
+            if XPath.first(enclosure_node, "media:text/text()").to_s != ""
+              enclosure.text = CGI.unescapeHTML(XPath.first(enclosure_node,
+                "media:text/text()").to_s)
+            end
+            affected_enclosures << enclosure
+            if new_enclosure
+              @enclosures << enclosure
+            end
           end
-          unless image_link.nil? || image_link == ""
-            xml_builder.image("rdf:about" => CGI.escapeHTML(image_link)) do
-              unless title.nil? || title == ""
-                xml_builder.title(title)
-              else
-                xml_builder.title
+          affected_enclosures
+        end
+        # Parse the independant content objects.
+        parse_media_content.call(media_content_enclosures)
+        media_groups = []
+        # Parse the group objects.
+        for media_group in media_group_enclosures
+          group_media_content_enclosures =
+            XPath.match(media_group, "media:content")
+          # Parse the content objects within the group objects.
+          affected_enclosures =
+            parse_media_content.call(group_media_content_enclosures)
+          # Now make sure that content objects inherit certain properties from
+          # the group objects.
+          for enclosure in affected_enclosures
+            if enclosure.thumbnail.nil? &&
+                XPath.first(media_group, "media:thumbnail/@url").to_s != ""
+              enclosure.thumbnail = EnclosureThumbnail.new(
+                CGI.unescapeHTML(
+                  XPath.first(media_group, "media:thumbnail/@url").to_s),
+                CGI.unescapeHTML(
+                  XPath.first(media_group, "media:thumbnail/@height").to_s),
+                CGI.unescapeHTML(
+                  XPath.first(media_group, "media:thumbnail/@width").to_s)
+              )
+              if enclosure.thumbnail.height == ""
+                enclosure.thumbnail.height = nil
               end
-              unless image_link.nil? || image_link == ""
-                xml_builder.url(image_link)
+              if enclosure.thumbnail.width == ""
+                enclosure.thumbnail.width = nil
               end
-              unless link.nil? || link == ""
-                xml_builder.link(link)
-              else
-                xml_builder.link
+            end
+            if (enclosure.categories.nil? || enclosure.categories.size == 0)
+              enclosure.categories = []
+              for category in XPath.match(media_group, "media:category")
+                enclosure.categories << EnclosureCategory.new(
+                  CGI.unescapeHTML(category.text),
+                  CGI.unescapeHTML(category.attributes["scheme"].to_s),
+                  CGI.unescapeHTML(category.attributes["label"].to_s)
+                )
+                if enclosure.categories.last.scheme == ""
+                  enclosure.categories.last.scheme = nil
+                end
+                if enclosure.categories.last.label == ""
+                  enclosure.categories.last.label = nil
+                end
               end
             end
-          end
-          unless feed_items.nil?
-            for item in feed_items
-              item.build_feed_item(feed_type, version, xml_builder)
+            if enclosure.hash.nil? &&
+                XPath.first(media_group, "media:hash/text()").to_s != ""
+              enclosure.hash = EnclosureHash.new(
+                CGI.unescapeHTML(XPath.first(media_group, "media:hash/text()").to_s),
+                "md5"
+              )
+            end
+            if enclosure.player.nil? &&
+                XPath.first(media_group, "media:player/@url").to_s != ""
+              enclosure.player = EnclosurePlayer.new(
+                CGI.unescapeHTML(XPath.first(media_group, "media:player/@url").to_s),
+                CGI.unescapeHTML(XPath.first(media_group, "media:player/@height").to_s),
+                CGI.unescapeHTML(XPath.first(media_group, "media:player/@width").to_s)
+              )
+              if enclosure.player.height == ""
+                enclosure.player.height = nil
+              end
+              if enclosure.player.width == ""
+                enclosure.player.width = nil
+              end
+            end
+            if enclosure.credits.nil? || enclosure.credits.size == 0
+              enclosure.credits = []
+              for credit in XPath.match(media_group, "media:credit")
+                enclosure.credits << EnclosureCredit.new(
+                  CGI.unescapeHTML(CGI.unescapeHTML(credit.text)),
+                  CGI.unescapeHTML(credit.attributes["role"].to_s.downcase)
+                )
+                if enclosure.credits.last.role == ""
+                  enclosure.credits.last.role = nil
+                end
+              end
+            end
+            if enclosure.explicit?.nil?
+              enclosure.explicit = (XPath.first(media_group,
+                "media:adult/text()").to_s.downcase == "true") ? true : false
+            end
+            if enclosure.text.nil? &&
+                XPath.first(media_group, "media:text/text()").to_s != ""
+              enclosure.text = FeedTools.sanitize_html(CGI.unescapeHTML(
+                XPath.first(media_group, "media:text/text()").to_s), :strip)
             end
           end
+          # Keep track of the media groups
+          media_groups << affected_enclosures
         end
-      elsif feed_type == "rss"
-        # normal rss format
-        return xml_builder.rss("version" => version.to_s) do
-          unless title.nil? || title == ""
-            xml_builder.title(title)
-          end
-          unless link.nil? || link == ""
-            xml_builder.link(link)
+        # Now we need to inherit any relevant item level information.
+        if self.explicit?
+          for enclosure in @enclosures
+            enclosure.explicit = true
           end
-          unless description.nil? || description == ""
-            xml_builder.description(description)
+        end
+        # Add all the itunes categories
+        for itunes_category in XPath.match(root_node, "itunes:category")
+          genre = "Podcasts"
+          category = itunes_category.attributes["text"].to_s
+          subcategory = XPath.first(itunes_category, "itunes:category/@text").to_s
+          category_path = genre
+          if category != ""
+            category_path << "/" + category
           end
-          xml_builder.ttl((time_to_live / 1.minute).to_s)
-          xml_builder.generator("http://www.sporkmonger.com/projects/feedtools")
-          build_feed_hook(feed_type, version, xml_builder)
-          unless feed_items.nil?
-            for item in feed_items
-              item.build_feed_item(feed_type, version, xml_builder)
+          if subcategory != ""
+            category_path << "/" + subcategory
+          end
+          for enclosure in @enclosures
+            if enclosure.categories.nil?
+              enclosure.categories = []
             end
+            enclosure.categories << EnclosureCategory.new(
+              CGI.unescapeHTML(category_path),
+              CGI.unescapeHTML("http://www.apple.com/itunes/store/"),
+              CGI.unescapeHTML("iTunes Music Store Categories")
+            )
           end
         end
-      elsif feed_type == "atom"
-        # normal atom format
-        return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
-            "version" => version.to_s,
-            "xml:lang" => language) do
-          unless title.nil? || title == ""
-            xml_builder.title(title,
-                "mode" => "escaped",
-                "type" => "text/html")
+        for enclosure in @enclosures
+          # Clean up any of those attributes that incorrectly have ""
+          # or 0 as their values
+          if enclosure.type == ""
+            enclosure.type = nil
           end
-          unless link.nil? || link == ""
-            xml_builder.link("href" => link,
-                "rel" => "alternate",
-                "type" => "text/html",
-                "title" => title)
+          if enclosure.file_size == 0
+            enclosure.file_size = nil
           end
-          unless description.nil? || description == ""
-            xml_builder.tagline(description,
-                "mode" => "escaped",
-                "type" => "text/html")
+          if enclosure.duration == 0
+            enclosure.duration = nil
           end
-          xml_builder.generator("FeedTools",
-              "url" => "http://www.sporkmonger.com/projects/feedtools")
-          build_feed_hook(feed_type, version, xml_builder)
-          unless feed_items.nil?
-            for item in feed_items
-              item.build_feed_item(feed_type, version, xml_builder)
+          if enclosure.height == 0
+            enclosure.height = nil
+          end
+          if enclosure.width == 0
+            enclosure.width = nil
+          end
+          if enclosure.bitrate == 0
+            enclosure.bitrate = nil
+          end
+          if enclosure.framerate == 0
+            enclosure.framerate = nil
+          end
+          if enclosure.expression == "" || enclosure.expression.nil?
+            enclosure.expression = "full"
+          end
+          # If an enclosure is missing the text field, fall back on the itunes:summary field
+          if enclosure.text.nil? || enclosure.text = ""
+            enclosure.text = self.itunes_summary
+          end
+          # Make sure we don't have duplicate categories
+          unless enclosure.categories.nil?
+            enclosure.categories.uniq!
+          end
+        end
+        # And finally, now things get complicated.  This is where we make
+        # sure that the enclosures method only returns either default
+        # enclosures or enclosures with only one version.  Any enclosures
+        # that are wrapped in a media:group will be placed in the appropriate
+        # versions field.
+        affected_enclosure_urls = []
+        for media_group in media_groups
+          affected_enclosure_urls =
+            affected_enclosure_urls | (media_group.map do |enclosure|
+              enclosure.url
+            end)
+        end
+        @enclosures.delete_if do |enclosure|
+          (affected_enclosure_urls.include? enclosure.url)
+        end
+        for media_group in media_groups
+          default_enclosure = nil
+          for enclosure in media_group
+            if enclosure.is_default?
+              default_enclosure = enclosure
             end
           end
+          for enclosure in media_group
+            enclosure.default_version = default_enclosure
+            enclosure.versions = media_group.clone
+            enclosure.versions.delete(enclosure)
+          end
+          @enclosures << default_enclosure
         end
       end
-    end
-    # Saves the current state of the feed to the database unless the feed lacks a remote location
-    def save
-      unless url.nil? || url == ""
-        super
-      end
-    end
-  end
-  class FeedItem < ActiveRecord::Base
-    include REXML
-    # Verifies that the required fields exist; additional ones added by the user are fine
-    def FeedItem.table_exists?
-      begin
-        connection.execute "select id, feed_id, link, title, author, description, " +
-          "time, tags from feed_items limit 1"
-      rescue ActiveRecord::StatementInvalid
-        return false
+      # If we have a single enclosure, it's safe to inherit the itunes:duration field
+      # if it's missing.
+      if @enclosures.size == 1
+        if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
+          @enclosures.first.duration = self.duration
+        end
       end
-      return true
+      return @enclosures
     end
-    def feed
-      if @feed != nil
-        return @feed
-      elsif @feed_id != nil
-        @feed = Feed.find_by_id(self.feed_id)
-        return @feed
-      else
-        return nil
-      end
+    def enclosures=(new_enclosures)
+      @enclosures = new_enclosures
     end
-    def feed=(new_feed)
-      self.feed_id = new_feed.id
-      @feed = new_feed
-    end
-    def title
-      return (self['title'] or "Untitled Entry")
+    # Returns the feed item author
+    def author_name
+      # TODO: make this not suck, actually ensure we're looking at a name
+      # and not an email address.
+      # Also, factor in itunes module.
+      # =================================================================
+      if @author_name.nil?
+        @author_name = CGI.unescapeHTML(XPath.first(root_node, "author/name/text()").to_s)
+        if @author_name == ""
+          @author_name = CGI.unescapeHTML(XPath.first(root_node, "dc:creator/text()").to_s)
+        end
+        if @author_name == ""
+          @author_name = CGI.unescapeHTML(XPath.first(root_node, "author/text()").to_s)
+        end
+      end
+      return @author_name
     end
-    def tag_list
-      return tags.nil? ? nil : tags[1..-2].split("|")
+    # Sets the feed item author
+    def author_name=(new_author_name)
+      @author_name = new_author_name
     end
-    def tag_list=(new_tag_list)
-      self.tags = "|" + (new_tag_list.map { |x| x.strip }).join("|") + "|"
+    # Returns the contents of the itunes:summary element
+    def itunes_summary
+      if @itunes_summary.nil?
+        @itunes_summary = CGI.unescapeHTML(XPath.first(root_node,
+          "itunes:summary/text()").to_s)
+        if @itunes_summary == ""
+          @itunes_summary = nil
+        end
+        unless @itunes_summary.nil?
+          @itunes_summary = FeedTools.sanitize_html(@itunes_summary)
+        end
+      end
+      return @itunes_summary
     end
-    def tag_string
-      return (tags.nil? ? nil : tags[1..-2]).split("|").join(", ")
+    # Sets the contents of the itunes:summary element
+    def itunes_summary=(new_itunes_summary)
+      @itunes_summary = new_itunes_summary
     end
-    def tag_string=(new_tag_string)
-      self.tags = "|" + (new_tag_string.split(",").map { |x| x.strip }).join("|") + "|"
+    # Returns the contents of the itunes:subtitle element
+    def itunes_subtitle
+      if @itunes_subtitle.nil?
+        @itunes_subtitle = CGI.unescapeHTML(XPath.first(root_node,
+          "itunes:subtitle/text()").to_s)
+        if @itunes_subtitle == ""
+          @itunes_subtitle = nil
+        end
+        unless @itunes_subtitle.nil?
+          @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
+        end
+      end
+      return @itunes_subtitle
     end
-    def parse_feed_item_hook(item_data)
-      return nil
+    # Sets the contents of the itunes:subtitle element
+    def itunes_subtitle=(new_itunes_subtitle)
+      @itunes_subtitle = new_itunes_subtitle
     end
-    def parse_item(item_data)
-      item_node = Document.new(item_data).root
-      # get the feed base, in case the feed items use relative paths
-      base = feed.link
-      # get the link
-      link = XPath.first(item_node, "link[@rel='alternate']/@href").to_s
-      if link == ""
-        link = XPath.first(item_node, "link/@href").to_s
-      end
-      if link == ""
-        link = XPath.first(item_node, "link/text()").to_s
-      end
-      if link == ""
-        link = XPath.first(item_node, "@rdf:about").to_s
-      end
-      if link == ""
-        link = XPath.first(item_node, "guid/text()").to_s
-      end
-      if link != ""
-        link = CGI.unescapeHTML(link)
-      end
-      if link != "" && (link =~ /http:\/\//) != 0 && (link =~ /https:\/\//) != 0
-        # ensure that we don't end up with 'http://www.foobar.com//path/to/entry'
-        # future-proofed this so that it doesn't break when Ruby 1.9/2.0 starts
-        # returning single character Strings instead of FixNums
-        if (base[-1] == 47 && link[0] == 47) || (base[-1] == "/" && link[0] == "/")
-          link = link[1..-1]
+    # Returns the contents of the media:text element
+    def media_text
+      if @media_text.nil?
+        @media_text = CGI.unescapeHTML(XPath.first(root_node,
+          "itunes:subtitle/text()").to_s)
+        if @media_text == ""
+          @media_text = nil
+        end
+        unless @media_text.nil?
+          @media_text = FeedTools.sanitize_html(@media_text)
         end
-        # prepend the base to the link since they seem to have used a relative path
-        link = base + link
-      end
-      title = XPath.first(item_node, "title/text()").to_s
-      if title != ""
-        # some blogging tools (notably TextPattern I believe) include the number of
-        # comments in a post in the title... this is ugly, so we're gonna strip them out
-        title = title.gsub(/\[\d*\]/,"").strip
-      end
-      # get the item author
-      author = CGI.unescapeHTML(XPath.first(item_node, "author/name/text()").to_s)
-      if author == ""
-        author = CGI.unescapeHTML(XPath.first(item_node, "dc:creator/text()").to_s)
-      end
-      if author == ""
-        author = CGI.unescapeHTML(XPath.first(item_node, "author/text()").to_s)
       end
+      return @media_text
+    end
-      # get the item content
-      description = ""
-      body = XPath.first(item_node, "xhtml:body")
-      if body == nil
-        body = XPath.first(item_node, "body")
-      end
-      if body != nil
-        description = body.inner_xml
-      end
-      if description == ""
-        description = CGI.unescapeHTML(XPath.first(item_node, "content:encoded/text()").to_s)
+    # Sets the contents of the media:text element
+    def media_text=(new_media_text)
+      @media_text = new_media_text
+    end
+    # Returns the contents of the itunes:author element
+    #
+    # This inherits from any incorrectly placed channel-level itunes:author
+    # elements.  They're actually amazingly commong.  People don't read specs.
+    def itunes_author
+      if @itunes_author.nil?
+        @itunes_author = CGI.unescapeHTML(XPath.first(root_node,
+          "itunes:author/text()").to_s)
+        if @itunes_author == ""
+          @itunes_author = CGI.unescapeHTML(XPath.first(feed.channel_node,
+            "itunes:author/text()").to_s)
+        end
+        if @itunes_author == ""
+          @itunes_author = nil
+        end
       end
-      if description == ""
-        description = XPath.first(item_node, "description/text()").to_s
-        if description != ""
-          if XPath.first(item_node, "description/@encoding").to_s != ""
-            description = "[Embedded data objects are not supported.]"
-          else
-            description = CGI.unescapeHTML(description)
+      return @itunes_author
+    end
+    # Sets the contents of the itunes:author element
+    def itunes_author=(new_itunes_author)
+      @itunes_author = new_itunes_author
+    end
+    # Returns the number of seconds that the associated media runs for
+    def duration
+      if @duration.nil?
+        itunes_duration = CGI.unescapeHTML(XPath.first(root_node,
+          "itunes:duration/text()").to_s)
+        if itunes_duration != ""
+          hms = itunes_duration.split(":").map { |x| x.to_i }
+          if hms.size == 3
+            @duration = hms[0].hour + hms[1].minute + hms[2]
+          elsif hms.size == 2
+            @duration = hms[0].minute + hms[1]
+          elsif hms.size == 1
+            @duration = hms[0]
           end
         end
       end
-      if description == ""
-        description = XPath.first(item_node,"content/text()").to_s
-        if description != "" && XPath.first(item_node, "content/@mode").to_s == "escaped"
-          description = CGI.unescapeHTML(description)
+      return @duration
+    end
+    # Sets the number of seconds that the associate media runs for
+    def duration=(new_duration)
+      @duration = new_duration
+    end
+    # Sets the itunes:summary
+    def itunes_summary=(new_itunes_summary)
+    end
+    # Returns the feed item time
+    def time
+      if @time.nil?
+        time_string = XPath.first(root_node, "pubDate/text()").to_s
+        if time_string == ""
+          time_string = XPath.first(root_node, "dc:date/text()").to_s
+        end
+        if time_string == ""
+          time_string = XPath.first(root_node, "issued/text()").to_s
+        end
+        if time_string != ""
+          @time = Time.parse(time_string) rescue Time.now
+        elsif time_string == nil
+          @time = Time.now
         end
       end
-      # get the item time
-      time = XPath.first(item_node, "pubDate/text()").to_s
-      if time == ""
-        time = XPath.first(item_node, "dc:date/text()").to_s
-      end
-      if time == ""
-        time = XPath.first(item_node, "issued/text()").to_s
-      end
-      # get the item tags
-      tags_array = []
-      if tags_array == nil || tags_array.size == 0
-        tags_array = []
-        tag_list = XPath.match(item_node, "dc:subject/rdf:Bag/rdf:li/text()")
-        if tag_list.size > 1
-          for tag in tag_list
-            tags_array << tag.to_s.downcase.strip
+      return @time
+    end
+    # Sets the feed item time
+    def time=(new_time)
+      @time = new_time
+    end
+    # Returns the feed item tags
+    def tags
+      # TODO: support the rel="tag" microformat
+      # =======================================
+      if @tags.nil?
+        @tags = []
+        if @tags.nil? || @tags.size == 0
+          @tags = []
+          tag_list = XPath.match(root_node, "dc:subject/rdf:Bag/rdf:li/text()")
+          if tag_list.size > 1
+            for tag in tag_list
+              @tags << tag.to_s.downcase.strip
+            end
           end
         end
-      end
-      if tags_array == nil || tags_array.size == 0
-        tags_array = []
-        tag_list = XPath.match(item_node, "category/text()")
-        if tag_list.size > 1
+        if @tags.nil? || @tags.size == 0
+          # messy effort to find ourselves some tags, mainly for del.icio.us
+          @tags = []
+          rdf_bag = XPath.match(root_node, "taxo:topics/rdf:Bag/rdf:li")
+          if rdf_bag != nil && rdf_bag.size > 0
+            for tag_node in rdf_bag
+              begin
+                tag_url = XPath.first(root_node, "@resource").to_s
+                tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
+                if tag_match.size > 0
+                  @tags << tag_match.first.last.downcase.strip
+                end
+              rescue
+              end
+            end
+          end
+        end
+        if @tags.nil? || @tags.size == 0
+          @tags = []
+          tag_list = XPath.match(root_node, "category/text()")
           for tag in tag_list
-            tags_array << tag.to_s.downcase.strip
+            @tags << tag.to_s.downcase.strip
           end
         end
-      end
-      if tags_array == nil || tags_array.size == 0
-        tags_array = []
-        tag_list = XPath.match(item_node, "dc:subject/text()")
-        if tag_list.size > 1
+        if @tags.nil? || @tags.size == 0
+          @tags = []
+          tag_list = XPath.match(root_node, "dc:subject/text()")
           for tag in tag_list
-            tags_array << tag.to_s.downcase.strip
+            @tags << tag.to_s.downcase.strip
           end
         end
-      end
-      if tags_array == nil || tags_array.size == 0
-        tags_array = XPath.first(item_node,
-                           "category/text()").to_s.downcase.split(" ")
-      end
-      if tags_array == nil || tags_array.size == 0
-        begin
-          tags_array = XPath.first(item_node,
-                             "dc:subject/text()").to_s.downcase.split(" ")
-        rescue
-          tags_array = []
-        end
-      end
-      if tags_array == nil || tags_array.size == 0
-        tags_array = []
-        rdf_bag = XPath.match(item_node,
-                              "taxo:topics/rdf:Bag/rdf:li")
-        if rdf_bag != nil && rdf_bag.size > 0
-          for tag_node in rdf_bag
-            begin
-              tag_url = XPath.first(tag_node, "@resource").to_s
-              tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
-              if tag_match.size > 0
-                tags_array << tag_match.first.last.downcase.strip
-              end
-            rescue
-            end
+        if @tags.nil? || @tags.size == 0
+          begin
+            @tags = XPath.first(root_node, "itunes:keywords/text()").to_s.downcase.split(" ")
+          rescue
+            @tags = []
           end
         end
+        if @tags.nil?
+          @tags = []
+        end
+        @tags.uniq!
       end
+      return @tags
+    end
-      # set all of the properties
-      if link != ""
-        self.link = link
-      else
-        self.link = nil
-      end
-      if title != ""
-        self.title = title
-      end
-      if description != ""
-        self.description = description.strip
-      end
-      if time != ""
-        self.time = Time.parse(time) rescue Time.now
-      elsif @time == nil
-        self.time = Time.now
-      end
-      if tags_array.size > 0
-        self.tag_list = tags_array
-      end
-      parse_feed_item_hook(item_data)
-      if Feed.cache_enabled?
-        save
+    # Sets the feed item tags
+    def tags=(new_tags)
+      @tags = new_tags
+    end
+    # Returns true if this feed item contains explicit material.  If the whole
+    # feed has been marked as explicit, this will return true even if the item
+    # isn't explicitly marked as explicit.
+    def explicit?
+      if @explicit.nil?
+        if XPath.first(root_node,
+              "media:adult/text()").to_s.downcase == "true" ||
+            XPath.first(root_node,
+              "itunes:explicit/text()").to_s.downcase == "yes" ||
+            XPath.first(root_node,
+              "itunes:explicit/text()").to_s.downcase == "true" ||
+            feed.explicit
+          @explicit = true
+        else
+          @explicit = false
+        end
       end
-      return self
+      return @explicit
     end
-    def build_feed_item_hook(feed_type, version, xml_builder)
+    # Sets whether or not the feed contains explicit material
+    def explicit=(new_explicit)
+      @explicit = (new_explicit ? true : false)
     end
-    def build_feed_item(feed_type, version, xml_builder=Builder::XmlMarkup.new(:indent => 2))
+    # A hook method that is called during the feed generation process.  Overriding this method
+    # will enable additional content to be inserted into the feed.
+    def build_xml_hook(feed_type, version, xml_builder)
+      return nil
+    end
+    # Generates xml based on the content of the feed item
+    def build_xml(feed_type="rss", version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
       if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
         # RDF-based rss format
         if link.nil?
@@ -1026,15 +2708,17 @@ module FeedTools
           unless time.nil?
             xml_builder.tag!("dc:date", time.iso8601)
           end
-          unless tags.nil?
+          unless tags.nil? || tags.size == 0
             xml_builder.tag!("dc:subject") do
               xml_builder.tag!("rdf:Bag") do
-                for tag in tag_list
+                for tag in tags
                   xml_builder.tag!("rdf:li", tag)
                 end
               end
             end
+            xml_builder.tag!("itunes:keywords", tags.join(" "))
           end
+          build_xml_hook(feed_type, version, xml_builder)
         end
       elsif feed_type == "rss"
         # normal rss format
@@ -1051,15 +2735,17 @@ module FeedTools
           unless time.nil?
             xml_builder.pubDate(time.rfc822)
           end
-          unless tags.nil?
+          unless tags.nil? || tags.size == 0
             xml_builder.tag!("dc:subject") do
               xml_builder.tag!("rdf:Bag") do
-                for tag in tag_list
+                for tag in tags
                   xml_builder.tag!("rdf:li", tag)
                 end
               end
             end
+            xml_builder.tag!("itunes:keywords", tags.join(" "))
           end
+          build_xml_hook(feed_type, version, xml_builder)
         end
       elsif feed_type == "atom"
         # normal atom format
@@ -1083,31 +2769,34 @@ module FeedTools
           unless time.nil?
             xml_builder.issued(time.iso8601)
           end
-          unless tags.nil?
-            for tag in tag_list
+          unless tags.nil? || tags.size == 0
+            for tag in tags
               xml_builder.category(tag)
             end
           end
+          build_xml_hook(feed_type, version, xml_builder)
         end
       end
     end
-    # Saves the current state of the feed item to the database unless the feed lacks
-    # a remote location
-    def save
-      unless feed.nil? || feed.url.nil? || feed.url == ""
-        super
-      end
-    end
+    alias_method :tagline, :description
+    alias_method :tagline=, :description=
+    alias_method :subtitle, :description
+    alias_method :subtitle=, :description=
+    alias_method :abstract, :description
+    alias_method :abstract=, :description=
+    alias_method :content, :description
+    alias_method :content=, :description=
+    alias_method :guid, :id
+    alias_method :guid=, :id=
   end
 end
-module REXML
-  class Element
-    # small extension to REXML to simplify parsing of xhtml feed items
-    def inner_xml
+module REXML #:nodoc:
+  class Element #:nodoc:
+    def inner_xml #:nodoc:
       result = ""
-      each_child do |child|
+      self.each_child do |child|
         result << child.to_s
       end
       return result
@@ -1116,11 +2805,8 @@ module REXML
 end
 begin
-  FeedTools::Feed.prepare_connection
-  unless FeedTools::Feed.cache_exists?
-    FeedTools::Feed.create_cache
+  unless FeedTools.feed_cache.nil?
+    FeedTools.feed_cache.initialize_cache
   end
 rescue
-  # Nothing can be done until someone sets up the database connection.
-  # We'll just assume for now that the user will take care of that.
 end