RubyGems - feed-normalizer - Versions diffs - 1.4.0 → 1.5.0 - Mend

feed-normalizer 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/History.txt +10 -0
data/Rakefile +2 -2
data/lib/html-cleaner.rb +3 -2
data/lib/parsers/rss.rb +15 -3
data/lib/parsers/simple-rss.rb +43 -4
data/lib/structures.rb +76 -4
data/test/data/atom03.xml +1 -1
data/test/data/atom10.xml +2 -2
data/test/data/rss20.xml +13 -2
data/test/data/rss20diff.xml +11 -0
data/test/data/rss20diff_short.xml +11 -0
data/test/test_feednormalizer.rb +69 -9
metadata +4 -4

data/History.txt CHANGED Viewed

@@ -1,3 +1,13 @@
+1.5.0
+ * Add support for new fields:
+   * Atom 0.3: issued is now available through entry.date_published.
+   * RSS: feed.skip_hours, feed.skip_days, feed.ttl [joshpeek]
+   * All: entry.last_updated, this is an alias to entry.date_published for RSS.
+ * Rewrite relative links in content [joshpeek]
+ * Handle CDATA sections consistently across all formats. [sam.lown]
+ * Prevent SimpleRSS from doing its own escaping. [reported by: paul.stadig, lionel.bouton]
+ * Reparse Time classes [reported by: sam.lown]
 1.4.0
  * Support content:encoded. Accessible via Entry#content.

data/Rakefile CHANGED Viewed

@@ -1,11 +1,11 @@
 require 'hoe'
-Hoe.new("feed-normalizer", "1.4.0") do |s|
+Hoe.new("feed-normalizer", "1.5.0") do |s|
   s.author = "Andrew A. Smith"
   s.email = "andy@tinnedfruit.org"
   s.url = "http://feed-normalizer.rubyforge.org/"
   s.summary = "Extensible Ruby wrapper for Atom and RSS parsers"
-  s.description = s.paragraphs_of('Readme.txt', 1..2).join("\n\n")
+  s.description = s.paragraphs_of('README.txt', 1..2).join("\n\n")
   s.changes = s.paragraphs_of('History.txt', 0..1).join("\n\n")
   s.extra_deps << ["simple-rss", ">= 1.1"]
   s.extra_deps << ["hpricot", ">= 0.6"]

data/lib/html-cleaner.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+require 'rubygems'
 require 'hpricot'
 require 'cgi'
@@ -99,10 +100,10 @@ module FeedNormalizer
         doc = Hpricot(str, :xhtml_strict => true)
         doc = subtree(doc, :body)
-        out = ""
+        out = []
         doc.traverse_text {|t| out << add_entities(t.to_html)}
-        return out
+        return out.join
       end
       # Returns true if the given string contains a suspicious URL,

data/lib/parsers/rss.rb CHANGED Viewed

@@ -42,7 +42,8 @@ module FeedNormalizer
         :copyright => :copyright,
         :authors => :managingEditor,
         :last_updated => [:lastBuildDate, :pubDate, :dc_date],
-        :id => :guid
+        :id => :guid,
+        :ttl => :ttl
       }
       # make two passes, to catch all possible root elements
@@ -51,6 +52,8 @@ module FeedNormalizer
       # custom channel elements
       feed.image = rss.image ? rss.image.url : nil
+      feed.skip_hours = skip(rss, :skipHours)
+      feed.skip_days = skip(rss, :skipDays)
       # item elements
       item_mapping = {
@@ -59,7 +62,8 @@ module FeedNormalizer
         :description => :description,
         :content => [:content_encoded, :description],
         :title => :title,
-        :authors => [:author, :dc_creator]
+        :authors => [:author, :dc_creator],
+        :last_updated => [:pubDate, :dc_date] # This is effectively an alias for date_published for this parser.
       }
       rss.items.each do |rss_item|
@@ -79,6 +83,14 @@ module FeedNormalizer
       feed
     end
+    def self.skip(parser, attribute)
+      attributes = case attribute
+        when :skipHours: :hours
+        when :skipDays: :days
+      end
+      channel = parser.channel
+      channel.respond_to?(attribute) && channel.send(attribute).send(attributes).map { |e| e.content }
+    end
   end
 end

data/lib/parsers/simple-rss.rb CHANGED Viewed

@@ -1,5 +1,42 @@
 require 'simple-rss'
+# Monkey patches for outstanding issues logged in the simple-rss project.
+#   * Add support for issued time field:
+#     http://rubyforge.org/tracker/index.php?func=detail&aid=13980&group_id=893&atid=3517
+#   * The '+' symbol is lost when escaping fields.
+#     http://rubyforge.org/tracker/index.php?func=detail&aid=10852&group_id=893&atid=3517
+#
+class SimpleRSS
+  @@item_tags << :issued
+  undef clean_content
+  def clean_content(tag, attrs, content)
+    content = content.to_s
+    case tag
+      when :pubDate, :lastBuildDate, :published, :updated, :expirationDate, :modified, :'dc:date', :issued
+        Time.parse(content) rescue unescape(content)
+      when :author, :contributor, :skipHours, :skipDays
+        unescape(content.gsub(/<.*?>/,''))
+      else
+        content.empty? && "#{attrs} " =~ /href=['"]?([^'"]*)['" ]/mi ? $1.strip : unescape(content)
+    end
+  end
+  undef unescape
+  def unescape(s)
+   if s =~ /^(<!\[CDATA\[|\]\]>)/
+     # Raw HTML is inside the CDATA, so just remove the CDATA wrapper.
+     s.gsub(/(<!\[CDATA\[|\]\]>)/,'').strip
+   elsif s =~ /[<>]/
+     # Already looks like HTML.
+     s
+   else
+     # Make it HTML.
+     FeedNormalizer::HtmlCleaner.unescapeHTML(s)
+   end
+ end
+end
 module FeedNormalizer
   # The SimpleRSS parser can handle both RSS and Atom feeds.
@@ -38,7 +75,8 @@ module FeedNormalizer
         :copyright => [:copyright, :rights],
         :authors => [:author, :webMaster, :managingEditor, :contributor],
         :urls => :link,
-        :description => [:description, :subtitle]
+        :description => [:description, :subtitle],
+        :ttl => :ttl
       }
       map_functions!(feed_mapping, atomrss, feed)
@@ -50,13 +88,14 @@ module FeedNormalizer
       # entry elements
       entry_mapping = {
-        :date_published => [:pubDate, :published, :dc_date],
+        :date_published => [:pubDate, :published, :dc_date, :issued],
         :urls => :link,
         :description => [:description, :summary],
         :content => [:content, :content_encoded, :description],
         :title => :title,
         :authors => [:author, :contributor, :dc_creator],
-        :categories => :category
+        :categories => :category,
+        :last_updated => [:updated, :dc_date, :pubDate]
       }
       atomrss.entries.each do |atomrss_entry|
@@ -76,7 +115,7 @@ module FeedNormalizer
     def self.image(parser)
       if parser.respond_to?(:image) && parser.image
         if parser.image =~ /<url>/ # RSS image contains an <url> spec
-          parser.image.scan(/<url>(.*)<\/url>/).to_s
+          parser.image.scan(/<url>(.*?)<\/url>/).to_s
         else
           parser.image # Atom contains just the url
         end

data/lib/structures.rb CHANGED Viewed

@@ -115,13 +115,55 @@ module FeedNormalizer
     end
   end
+  module TimeFix
+    # Reparse any Time instances, due to RSS::Parser's redefinition of
+    # certain aspects of the Time class that creates unexpected behaviour
+    # when extending the Time class, as some common third party libraries do.
+    # See http://code.google.com/p/feed-normalizer/issues/detail?id=13.
+    def reparse(obj)
+      @parsed ||= false
+      return obj if @parsed
+      if obj.is_a?(Time)
+        @parsed = true
+        Time.at(obj) rescue obj
+      end
+    end
+  end
+  module RewriteRelativeLinks
+    def rewrite_relative_links(text, url)
+      if host = url_host(url)
+        text.to_s.gsub(/(href|src)=('|")\//, '\1=\2http://' + host + '/')
+      else
+        text
+      end
+    end
+    private
+      def url_host(url)
+        URI.parse(url).host rescue nil
+      end
+  end
   # Represents a feed item entry.
+  # Available fields are:
+  #  * content
+  #  * description
+  #  * title
+  #  * date_published
+  #  * urls / url
+  #  * id
+  #  * authors / author
+  #  * copyright
+  #  * categories
   class Entry
-    include Singular, ElementEquality, ElementCleaner
+    include Singular, ElementEquality, ElementCleaner, TimeFix, RewriteRelativeLinks
     HTML_ELEMENTS = [:content, :description, :title]
-    SIMPLE_ELEMENTS = [:date_published, :urls, :id, :authors, :copyright, :categories]
+    SIMPLE_ELEMENTS = [:date_published, :urls, :id, :authors, :copyright, :categories, :last_updated]
     BLENDED_ELEMENTS = []
     ELEMENTS = HTML_ELEMENTS + SIMPLE_ELEMENTS + BLENDED_ELEMENTS
@@ -132,19 +174,41 @@ module FeedNormalizer
       @urls = []
       @authors = []
       @categories = []
+      @date_published, @content = nil
+    end
+    undef date_published
+    def date_published
+      @date_published = reparse(@date_published)
+    end
+    undef content
+    def content
+      @content = rewrite_relative_links(@content, url)
     end
   end
   # Represents the root element of a feed.
+  # Available fields are:
+  #  * title
+  #  * description
+  #  * id
+  #  * last_updated
+  #  * copyright
+  #  * authors / author
+  #  * urls / url
+  #  * image
+  #  * generator
+  #  * items / channel
   class Feed
-    include Singular, ElementEquality, ElementCleaner
+    include Singular, ElementEquality, ElementCleaner, TimeFix
     # Elements that can contain HTML fragments.
     HTML_ELEMENTS = [:title, :description]
     # Elements that contain 'plain' Strings, with HTML escaped.
-    SIMPLE_ELEMENTS = [:id, :last_updated, :copyright, :authors, :urls, :image, :generator]
+    SIMPLE_ELEMENTS = [:id, :last_updated, :copyright, :authors, :urls, :image, :generator, :ttl, :skip_hours, :skip_days]
     # Elements that contain both HTML and escaped HTML.
     BLENDED_ELEMENTS = [:items]
@@ -160,8 +224,16 @@ module FeedNormalizer
       # set up associations (i.e. arrays where needed)
       @urls = []
       @authors = []
+      @skip_hours = []
+      @skip_days = []
       @items = []
       @parser = wrapper.parser.to_s
+      @last_updated = nil
+    end
+    undef last_updated
+    def last_updated
+      @last_updated = reparse(@last_updated)
     end
     def channel() self end

data/test/data/atom03.xml CHANGED Viewed

@@ -24,7 +24,7 @@
 Kmart has the Levi Strauss Signature Girl&#8217;s Low Rise Flare Jean for $10 after $5 instant savings (ends 9/2)
 Slim fit through hip and thigh, with zip-fly with button-through closure. Machine washable 99% Cotton/1% Spandex
 ]]></summary>
-    <content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/24557/levi-strauss-signature-girls-low-rise-slim-fit-flare-jeans-10/"><![CDATA[<p><a href="http://clickserve.cc-dt.com/link/tplclick?lid=41000000011334249&#038;pubid=21000000000053626" target=_"blank"><img  src="http://images.kmart.com/assets/images/product/productDetail/9990000058546711.jpg" width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a><br />
+    <content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/24557/levi-strauss-signature-girls-low-rise-slim-fit-flare-jeans-10/"><![CDATA[<p><a href="/link/tplclick?lid=41000000011334249&#038;pubid=21000000000053626" target=_"blank"><img  src="/assets/images/product/productDetail/9990000058546711.jpg" width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a><br />
 <strong>Kmart has the <a href="http://clickserve.cc-dt.com/link/tplclick?lid=41000000011334249&#038;pubid=21000000000053626" target=_"blank">Levi Strauss Signature Girl&#8217;s Low Rise Flare Jean</a> for $10 after $5 instant savings (ends 9/2)</strong></p>
 <p>Slim fit through hip and thigh, with zip-fly with button-through closure. Machine washable 99% Cotton/1% Spandex</p>
 ]]></content>

data/test/data/atom10.xml CHANGED Viewed

@@ -17,8 +17,8 @@
     <link href="http://habtm.com/articles/2006/08/16/a-forum-on-rails" rel="alternate" type="text/html"/>
     <category term="rails" scheme="http://habtm.com/articles/category/rails" label="rails"/>
     <category term="ruby" scheme="http://habtm.com/articles/category/ruby" label="ruby"/>
-    <summary type="html">&lt;p&gt;Josh Goebel and I took an evening to bang out a little project: &lt;a href="http://beast.caboo.se/"&gt;Beast&lt;/a&gt;.  It&amp;#8217;s our minimal no-fluff Rails forum.  It&amp;#8217;s no beast of an application either, clocking in at 285 LOC and a 1:1.5 test ratio.  &lt;a href="http://svn.techno-weenie.net/projects/beast/"&gt;Check it out&lt;/a&gt;!&lt;/p&gt;</summary>
-    <content type="html">&lt;p&gt;Josh Goebel and I took an evening to bang out a little project: &lt;a href="http://beast.caboo.se/"&gt;Beast&lt;/a&gt;.  It&amp;#8217;s our minimal no-fluff Rails forum.  It&amp;#8217;s no beast of an application either, clocking in at 285 LOC and a 1:1.5 test ratio.  &lt;a href="http://svn.techno-weenie.net/projects/beast/"&gt;Check it out&lt;/a&gt;!&lt;/p&gt;</content>
+    <summary type="html">&lt;plaintext&gt;&lt;p&gt;Josh Goebel and I took an evening to bang out a little project: &lt;a href="http://beast.caboo.se/"&gt;Beast&lt;/a&gt;.  It&amp;#8217;s our minimal no-fluff Rails forum.  It&amp;#8217;s no beast of an application either, clocking in at 285 LOC and a 1:1.5 test ratio.  &lt;a href="http://svn.techno-weenie.net/projects/beast/"&gt;Check it out&lt;/a&gt;!&lt;/p&gt;</summary>
+    <content type="html">&lt;plaintext&gt;&lt;p&gt;Josh Goebel and I took an evening to bang out a little project: &lt;a href="http://beast.caboo.se/"&gt;Beast&lt;/a&gt;.  It&amp;#8217;s our minimal no-fluff Rails forum.  It&amp;#8217;s no beast of an application either, clocking in at 285 LOC and a 1:1.5 test ratio.  &lt;a href="http://svn.techno-weenie.net/projects/beast/"&gt;Check it out&lt;/a&gt;!&lt;/p&gt;</content>
   </entry>
   <entry>
     <author>

data/test/data/rss20.xml CHANGED Viewed

@@ -10,6 +10,17 @@
     <copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
     <docs>http://www.bbc.co.uk/syndication/</docs>
     <ttl>15</ttl>
+    <skipHours>
+       <hour>6</hour>
+       <hour>7</hour>
+       <hour>8</hour>
+       <hour>9</hour>
+       <hour>10</hour>
+       <hour>11</hour>
+    </skipHours>
+    <skipDays>
+       <day>Sunday</day>
+    </skipDays>
     <image>
       <title>BBC News</title>
@@ -19,7 +30,7 @@
     <item>
       <title>Concerns over security software</title>
-      <description>BBC Click investigates free security software and finds out who will protect PCs when Microsoft launches Vista.</description>
+      <description><![CDATA[BBC Click investigates free security software and finds out who will protect PCs when Microsoft launches Vista.]]></description>
       <content:encoded><![CDATA[<p>test1</p>]]></content:encoded>
       <link>http://news.bbc.co.uk/go/rss/-/1/hi/programmes/click_online/5326654.stm</link>
       <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/programmes/click_online/5326654.stm</guid>
@@ -40,7 +51,7 @@
     <item>
       <title>MP3 player court order overturned</title>
-      <description>SanDisk puts its MP3 players back on display at a German electronics show after overturning a court injunction.</description>
+      <description>&lt;b&gt;SanDisk&lt;/b&gt; puts its MP3 players back on display at a German electronics show after overturning a court injunction.</description>
       <content:encoded><![CDATA[<p>test3</p>]]></content:encoded>
       <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/5326660.stm</link>
       <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5326660.stm</guid>

data/test/data/rss20diff.xml CHANGED Viewed

@@ -10,6 +10,17 @@
     <copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
     <docs>http://www.bbc.co.uk/syndication/</docs>
     <ttl>15</ttl>
+    <skipHours>
+       <hour>6</hour>
+       <hour>7</hour>
+       <hour>8</hour>
+       <hour>9</hour>
+       <hour>10</hour>
+       <hour>11</hour>
+    </skipHours>
+    <skipDays>
+       <day>Sunday</day>
+    </skipDays>
     <image>
       <title>BBC News</title>

data/test/data/rss20diff_short.xml CHANGED Viewed

@@ -10,6 +10,17 @@
     <copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
     <docs>http://www.bbc.co.uk/syndication/</docs>
     <ttl>15</ttl>
+    <skipHours>
+       <hour>6</hour>
+       <hour>7</hour>
+       <hour>8</hour>
+       <hour>9</hour>
+       <hour>10</hour>
+       <hour>11</hour>
+    </skipHours>
+    <skipDays>
+       <day>Sunday</day>
+    </skipDays>
     <image>
       <title>BBC News</title>

data/test/test_feednormalizer.rb CHANGED Viewed

@@ -1,4 +1,6 @@
 require 'test/unit'
+$:.unshift(File.dirname(__FILE__))
+$:.unshift(File.dirname(__FILE__) + '/../lib')
 require 'feed-normalizer'
 require 'yaml'
@@ -66,8 +68,11 @@ class FeedNormalizerTest < Test::Unit::TestCase
     assert_equal "BBC News | Technology | UK Edition", feed.title
     assert_equal ["http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm"], feed.urls
+    assert_equal 15, feed.ttl
+    assert_equal [6, 7, 8, 9, 10, 11], feed.skip_hours
+    assert_equal ["Sunday"], feed.skip_days
     assert_equal "MP3 player court order overturned", feed.entries.last.title
-    assert_equal "SanDisk puts its MP3 players back on display at a German electronics show after overturning a court injunction.", feed.entries.last.description
+    assert_equal "<b>SanDisk</b> puts its MP3 players back on display at a German electronics show after overturning a court injunction.", feed.entries.last.description
     assert_match(/test\d/, feed.entries.last.content)
     assert_instance_of Time, feed.entries.last.date_published
   end
@@ -77,6 +82,9 @@ class FeedNormalizerTest < Test::Unit::TestCase
     assert_equal "~:caboose", feed.title
     assert_equal "http://habtm.com/xml/atom10/feed.xml", feed.url
+    assert_equal nil, feed.ttl
+    assert_equal [], feed.skip_hours
+    assert_equal [], feed.skip_days
     assert_equal "Starfish - Easy Distribution of Site Maintenance", feed.entries.last.title
     assert_equal "urn:uuid:6c028f36-f87a-4f53-b7e3-1f943d2341f0", feed.entries.last.id
@@ -134,9 +142,11 @@ class FeedNormalizerTest < Test::Unit::TestCase
   def test_clean
     feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
-    assert feed.entries.first.content !~ /\<p\>/
+    assert_match(/<plaintext>/, feed.entries.first.content)
+    assert_match(/<plaintext>/, feed.entries.first.description)
     feed.clean!
-    assert feed.entries.first.content =~ /\<p\>/
+    assert_no_match(/<plaintext>/, feed.entries.first.content)
+    assert_no_match(/<plaintext>/, feed.entries.first.description)
   end
   def test_malformed_feed
@@ -145,25 +155,21 @@ class FeedNormalizerTest < Test::Unit::TestCase
   def test_dublin_core_date_ruby_rss
     feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => RubyRssParser, :try_others => false)
-    assert_equal 'RSS::Parser', feed.parser
     assert_instance_of Time, feed.entries.first.date_published
   end
   def test_dublin_core_date_simple_rss
     feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => SimpleRssParser, :try_others => false)
-    assert_equal 'SimpleRSS', feed.parser
     assert_instance_of Time, feed.entries.first.date_published
   end
   def test_dublin_core_creator_ruby_rss
     feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => RubyRssParser, :try_others => false)
-    assert_equal 'RSS::Parser', feed.parser
     assert_equal 'Jeff Hecht', feed.entries.last.author
   end
   def test_dublin_core_creator_simple_rss
     feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => SimpleRssParser, :try_others => false)
-    assert_equal 'SimpleRSS', feed.parser
     assert_equal 'Jeff Hecht', feed.entries.last.author
   end
@@ -191,7 +197,7 @@ class FeedNormalizerTest < Test::Unit::TestCase
     feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => SimpleRssParser, :try_others => false)
     feed.entries.each_with_index do |e, i|
-      assert_match(/test#{i+1}/, e.content)
+      assert_equal("<p>test#{i+1}</p>", e.content)
     end
   end
@@ -199,9 +205,63 @@ class FeedNormalizerTest < Test::Unit::TestCase
     feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => RubyRssParser, :try_others => false)
     feed.entries.each_with_index do |e, i|
-      assert_match(/test#{i+1}/, e.content)
+      assert_equal("<p>test#{i+1}</p>", e.content)
     end
   end
+  def test_atom_content_contains_pluses
+    feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], :force_parser => SimpleRssParser, :try_others => false)
+    assert_equal 2, feed.entries.last.content.scan(/\+/).size
+  end
+  # http://code.google.com/p/feed-normalizer/issues/detail?id=13
+  def test_times_are_reparsed
+    feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => RubyRssParser, :try_others => false)
+    Time.class_eval "alias :old_to_s :to_s; def to_s(x=1); old_to_s; end"
+    assert_equal "Sat Sep 09 07:57:06 -0700 2006", feed.last_updated.to_s(:foo)
+    assert_equal "Sat Sep 09 05:45:35 -0700 2006", feed.entries.first.date_published.to_s(:foo)
+  end
+  def test_atom03_has_issued
+    SimpleRSS.class_eval "@@item_tags.delete(:issued)"
+    feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03], :force_parser => SimpleRssParser, :try_others => false)
+    assert_nil feed.entries.first.date_published
+    SimpleRSS.class_eval "@@item_tags << :issued"
+    feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03], :force_parser => SimpleRssParser, :try_others => false)
+    assert_equal "Tue Aug 29 02:31:03 UTC 2006", feed.entries.first.date_published.to_s
+  end
+  def test_html_should_be_escaped_by_default
+    feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => RubyRssParser, :try_others => false)
+    assert_match "<b>SanDisk</b>", feed.items.last.description
+    feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => SimpleRssParser, :try_others => false)
+    assert_match "<b>SanDisk</b>", feed.items.last.description
+  end
+  def test_relative_links_and_images_should_be_rewritten_with_url_base
+    feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03])
+    assert_match '<a href="http://www.cheapstingybargains.com/link/tplclick?lid=41000000011334249&#038;pubid=21000000000053626"' +
+      ' target=_"blank"><img  src="http://www.cheapstingybargains.com/assets/images/product/productDetail/9990000058546711.jpg"' +
+      ' width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a>',
+      feed.items.first.content
+  end
+  def test_last_updated_simple_rss
+    feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], :force_parser => SimpleRssParser, :try_others => false)
+    assert_equal Time.parse("Wed Aug 16 09:59:44 -0700 2006"), feed.entries.first.last_updated
+  end
+  def test_last_updated_ruby_rss
+    feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => RubyRssParser, :try_others => false)
+    assert_equal feed.entries.first.date_published, feed.entries.first.last_updated
+  end
 end

metadata CHANGED Viewed

@@ -1,10 +1,10 @@
 --- !ruby/object:Gem::Specification
-rubygems_version: 0.9.2
+rubygems_version: 0.9.4
 specification_version: 1
 name: feed-normalizer
 version: !ruby/object:Gem::Version
-  version: 1.4.0
-date: 2007-07-10 00:00:00 -07:00
+  version: 1.5.0
+date: 2008-02-05 00:00:00 -08:00
 summary: Extensible Ruby wrapper for Atom and RSS parsers
 require_paths:
 - lib
@@ -90,5 +90,5 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.2.1
+        version: 1.5.0
     version: