RubyGems - rfeedreader - Versions diffs - 0.9.20 → 1.0.0 - Mend

rfeedreader 0.9.20 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/History.txt CHANGED Viewed

@@ -1,3 +1,12 @@
+== 1.0.0 2007-11-08
+* Added feed.contains_entries?
+* Solved truncated UTF-8
+* Solved truncated html in title
+* Solved empty titles, CDATA issue
+* Solved multiple space inside description
+* Solved Hpricot issue with Nil and inner_text
 == 0.9.20 2007-10-29
 * Update license

data/lib/rfeedreader.rb CHANGED Viewed

@@ -13,33 +13,32 @@ module Rfeedreader
   module_function
   class TextyHelper
-    def TextyHelper.clean(html, length = 45)
-      return html if html.empty?
-      if html.index("<")
-        html.gsub!(/(<[^>]*>)|\n|\t/s) {" "}
+    def TextyHelper.clean(text, length = 45)
+      return text if text.empty?
+      if text.index("<")
+        # Strip html tags, tabs and new lines
+        text.gsub!(/(<[^>]*>)/s, " ")
         # strip any comments, and if they have a newline at the end (ie. line with
         # only a comment) strip that too
-        truncate(html.gsub(/<!--(.*?)-->[\n]?/m, ""), length)
-      else
-        truncate(html, length) # already plain text
-      end
+        text.gsub!(/<!--(.*?)-->[\n]?/m, "")
+      end
+      text.gsub!(/\s{2,}|\n|\t/, ' ')
+      truncate(HTMLEntities.encode_entities(text, :named, :decimal), length)
     end
-    def TextyHelper.truncate(text, length = 45, truncate_string = "...")
-      if text.nil? then
-         return
-      end
+    def TextyHelper.truncate(text="", length = 45, truncate_string = "...")
+      return if text.empty?
       l = length - truncate_string.length
-      if text.length > length
-        text = text[0...l]
-        # Avoid html entity truncation
-        if text =~ /(&#\d+[^;])$/
-          text.delete!($1)
-        end
-        text = text + truncate_string
-      end
-      text
+      truncated_text = text[0...l]
+      # Avoid html entity truncation
+      truncated_text.gsub!(/(&\S+[^;])$/,  '')
+      truncated_text << truncate_string if text.length > length
+      return truncated_text
     end
     def TextyHelper.convertEncoding(text, encoding='utf-8')
@@ -154,6 +153,10 @@ module Rfeedreader
       end
     end
+    def contains_entries?
+      return @entries.size > 0
+    end
     protected
     def read_charset(hpricot_doc)
@@ -164,20 +167,28 @@ module Rfeedreader
     end
     def read_title(hpricot_doc)
-      @title = (hpricot_doc/"//title:first").text
+      begin
+        @title = (hpricot_doc/"//title:first").text
+      rescue
+        @title = ""
+      end
     end
     def read_link(hpricot_doc)
-      @link = (hpricot_doc/"link").first.inner_text
-      if @link.empty?
-        element = (hpricot_doc/"link[@rel=alternate]").first
-        @link = element[:href] unless element.nil?
-      end
-      if @link.empty?
-        element = (hpricot_doc/"link").first
-        @link = element[:href] unless element.nil?
+      begin
+        @link = (hpricot_doc/"link").first.inner_text
+        if @link.empty?
+          element = (hpricot_doc/"link[@rel=alternate]").first
+          @link = element[:href] unless element.nil?
+        end
+        if @link.empty?
+          element = (hpricot_doc/"link").first
+          @link = element[:href] unless element.nil?
+        end
+      rescue
+        @link = ""
       end
     end
@@ -213,7 +224,13 @@ module Rfeedreader
     end
     def read_title
-      @title = TextyHelper.convertEncoding(TextyHelper.clean((@hpricot_item/:title).to_s), @charset)
+      preformatted_title = (@hpricot_item/:title).text
+      if preformatted_title.index("CDATA")
+        preformatted_title.gsub!(/<\/*title>/, '')
+        preformatted_title.gsub!(/<\!\[CDATA\[/, '')
+        preformatted_title.gsub!(/\]\]>/, '')
+      end
+      @title = TextyHelper.convertEncoding(TextyHelper.clean(preformatted_title), @charset)
     end
     def read_description
@@ -229,15 +246,13 @@ module Rfeedreader
       unless @description.empty?
         @description = TextyHelper.clean(@description, 200)
-        @description = HTMLEntities.encode_entities(@description, :named, :decimal)
         @description = TextyHelper.convertEncoding(@description, @charset)
         @description.gsub!("&#10;", "")
         @description.gsub!("&#13;", "")
         @description.strip!
-        @description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]')
+        @description.gsub!(/((https?):\/\/([^\/]+)\/(\S*))/, '[<a href=\'\1\'>link</a>]')
         @description.strip!
       end
     end

data/lib/rfeedreader/version.rb CHANGED Viewed

@@ -1,8 +1,8 @@
 module Rfeedreader #:nodoc:
   module VERSION #:nodoc:
-    MAJOR = 0
-    MINOR = 9
-    TINY  = 20
+    MAJOR = 1
+    MINOR = 0
+    TINY  = 0
     STRING = [MAJOR, MINOR, TINY].join('.')
   end

data/test/test_helper.rb CHANGED Viewed

@@ -10,7 +10,28 @@ end
 def read_first(feed_url)
   puts "Read first from #{feed_url}"
   feed = Rfeedreader.read_first feed_url
-  assert_not_nil feed
-  assert_equal 1, feed.entries.size
-  feed.display_entries
+  unless feed.nil?
+    feed.display_entries
+  else
+    puts "+++WARNING+++ nil feed"
+  end
+end
+def read_opml(filename)
+  puts "Read OPML from #{filename}"
+  doc = Hpricot(open(filename))
+  feeds = (doc/"outline[@htmlurl]")
+  nb_feeds = feeds.size
+  current_feed = 1
+  feeds.each do |url|
+    if current_feed > 117
+      puts "Feed #{current_feed}/#{nb_feeds}"
+      unless url[:xmlurl].nil?
+        read_first(url[:xmlurl])
+      else
+        read_first(url[:htmlurl])
+      end
+    end
+    current_feed += 1
+  end
 end

data/test/test_rfeedreader.rb CHANGED Viewed

@@ -32,13 +32,19 @@ class TestRfeedreader < Test::Unit::TestCase
   def test_read_teketen_problem
     #
     feed = Rfeedreader.read("http://www.eitb24.com/rss/rss-eitb24-kultura-eu.xml")
-    puts feed
-    feed.display_entries
+    assert_not_nil feed
+    unless feed.nil?
+      puts feed
+      feed.display_entries
+    end
     # 412 problem in rfeedfinder
     feed = Rfeedreader.read("http://www.arteleku.net/4.1/blog/laburrak/?feed=rss2")
-    puts feed
-    feed.display_entries
+    assert_not_nil feed
+    unless feed.nil?
+      puts feed
+      feed.display_entries
+    end
   end
   def test_read_from_feevy
@@ -177,4 +183,37 @@ class TestRfeedreader < Test::Unit::TestCase
   def test_inquirer
     read_first "http://theinquirer.es/feed/"
   end
+  def test_imified
+    read_first "http://feeds.feedburner.com/imified"
+  end
+  def test_pere_opml
+    read_opml File.dirname(__FILE__) + '/pere.opml'
+  end
+  def test_encoding_with_amp
+    read_first " http://abladias.blogspot.com/feeds/posts/default"
+  end
+  def test_lot_of_space
+    read_first "http://igandekoa.wordpress.com/feed/"
+  end
+  def test_wrongly_formatted_link
+    read_first "http://snippets.dzone.com/rss/tag/R"
+  end
+  def test_title_truncate
+    read_first "http://corankeando.zoomblog.com/rss.xml"
+  end
+  def test_bad_title_encoding
+    read_first "http://www.esperanto.de/dej/aktualajhoj/rss.php?lingvo=eo"
+  end
+  def test_unrecognized_feed
+    read_first "http://www.gobmenorca.com/noticies/RSS"
+    #read_first "http://www.liberafolio.org/search_rss?SearchableText=&Title=&Description=&portal_type:list=News+Item&portal_type:list=Link&portal_type:list=Document&Creator=&submit=Search&sort_on=created&sort_order=reverse&review_s"
+  end
 end

data/website/index.html CHANGED Viewed

@@ -33,7 +33,7 @@
     <h1>rfeedreader</h1>
     <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rfeedreader"; return false'>
       <p>Get Version</p>
-      <a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.9.20</a>
+      <a href="http://rubyforge.org/projects/rfeedreader" class="numbers">1.0.0</a>
     </div>
     <h2>What</h2>
@@ -81,7 +81,7 @@ Rfeedfinder.read('http://blog.alexgirard.com/feed/', nb_posts=3)
 	<h2>License</h2>
-	<p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
+	<p>This code is free to use under the terms of the Creative Commons <span class="caps">GNU GPL</span>.</p>
 	<h2>Contact</h2>
@@ -89,7 +89,7 @@ Rfeedfinder.read('http://blog.alexgirard.com/feed/', nb_posts=3)
 	<p>Comments are welcome. Send an email to <a href="mailto:alx.girard@gmail.com">Alexandre Girard</a>.</p>
     <p class="coda">
-      <a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 1st September 2007<br>
+      <a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 29th October 2007<br>
       Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
     </p>
 </div>

data/website/index.txt CHANGED Viewed

@@ -36,7 +36,7 @@ The trunk repository is <code>svn://rubyforge.org/var/svn/rfeedreader/trunk</cod
 h2. License
-This code is free to use under the terms of the MIT license.
+This code is free to use under the terms of the Creative Commons GNU GPL.
 h2. Contact

metadata CHANGED Viewed

@@ -3,8 +3,8 @@ rubygems_version: 0.9.4
 specification_version: 1
 name: rfeedreader
 version: !ruby/object:Gem::Version
-  version: 0.9.20
-date: 2007-10-29 00:00:00 +01:00
+  version: 1.0.0
+date: 2007-11-09 00:00:00 +01:00
 summary: Feed parser to read feed and return first posts of this feed. Special parsing from sources like Flickr, Jumcut, Google video, ...
 require_paths:
 - lib