RubyGems - rfeedreader - Versions diffs - 0.9.1 → 0.9.2 - Mend

rfeedreader 0.9.1 → 0.9.2

Files changed (6) hide show

data/History.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-== 0.9.1 2007-09-01
+== 0.9.2 2007-09-01
 * Correct lib dependencies

data/Rakefile CHANGED Viewed

@@ -11,8 +11,6 @@ require 'hoe'
 include FileUtils
 require File.join(File.dirname(__FILE__), 'lib', 'rfeedreader', 'version')
-require File.join(File.dirname(__FILE__), 'lib', 'rfeedreader', 'texty_helper')
-require File.join(File.dirname(__FILE__), 'lib', 'rfeedreader', 'html_entities')
 AUTHOR = 'Alexandre Girard'  # can also be an array of Authors
 EMAIL = "alx.girard@gmail.com"
@@ -73,7 +71,9 @@ hoe = Hoe.new(GEM_NAME, VERS) do |p|
   # == Optional
   p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
-  p.extra_deps = [['rfeedfinder', '>=0.9.0']]     # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
+  p.extra_deps = [['rfeedfinder',   '>=0.9.0'],
+                  ['htmlentities',  '>=4.0.0'],
+                  ['hpricot',       '>=0.6']]     # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
   #p.spec_extras = {}    # A hash of extra values to set in the gemspec.
 end

data/lib/rfeedreader/version.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Rfeedreader #:nodoc:
   module VERSION #:nodoc:
     MAJOR = 0
     MINOR = 9
-    TINY  = 1
+    TINY  = 2
     STRING = [MAJOR, MINOR, TINY].join('.')
   end

data/lib/rfeedreader.rb CHANGED Viewed

@@ -1,15 +1,106 @@
-require 'net/http'
 require 'rubygems'
-require 'open-uri'
 require 'hpricot'
-require 'timeout'
+require 'htmlentities'
+require 'iconv'
+require 'net/http'
+require 'open-uri'
 require 'rfeedfinder'
-require 'rfeedreader/texty_helper'
-require 'rfeedreader/html_entities'
+require 'timeout'
 module Rfeedreader
   module_function
+  class TextyHelper
+    def TextyHelper.clean(html, length = 45)
+      return html if html.empty?
+      if html.index("<")
+        html.gsub!(/(<[^>]*>)|\n|\t/s) {" "}
+        # strip any comments, and if they have a newline at the end (ie. line with
+        # only a comment) strip that too
+        truncate(html.gsub(/<!--(.*?)-->[\n]?/m, ""), length)
+      else
+        truncate(html, length) # already plain text
+      end
+    end
+    def TextyHelper.truncate(text, length = 45, truncate_string = "...")
+      if text.nil? then
+         return
+      end
+      l = length - truncate_string.length
+      if text.length > length
+        text = text[0...l]
+        # Avoid html entity truncation
+        if text =~ /(&#\d+[^;])$/
+          text.delete!($1)
+        end
+        text = text + truncate_string
+      end
+      text
+    end
+    def TextyHelper.convertEncoding(text, encoding='utf-8')
+      # Pre-process encoding
+      unless text.nil?
+        if encoding == 'utf-8'
+          # Some strange caracters to handle
+          text.gsub!("\342\200\042", "&ndash;")   # en-dash
+          text.gsub!("\342\200\041", "&mdash;")   # em-dash
+          text.gsub!("\342\200\174", "&hellip;")  # elipse
+          text.gsub!("\342\200\176", "&lsquo;")   # single quote
+          text.gsub!("\342\200\177", "&rsquo;")   # single quote
+          text.gsub!("\342\200\230", "&rsquo;")   # single quote
+          text.gsub!("\342\200\231", "&rsquo;")   # single quote
+          text.gsub!("\342\200\234", "&ldquo;")   # Double quote, right
+          text.gsub!("\342\200\235", "&rdquo;")   # Double quote, left
+          text.gsub!("\342\200\242", ".")
+          text.gsub!("\342\202\254", "&euro;");   # Euro symbol
+          text.gsub!(/\S\200\S/, " ")             # every strange character send to the moon
+          text.gsub!("\176", "\'")  # single quote
+          text.gsub!("\177", "\'")  # single quote
+          text.gsub!("\205", "-")		# ISO-Latin1 horizontal elipses (0x85)
+          text.gsub!("\221", "\'")	# ISO-Latin1 left single-quote
+          text.gsub!("\222", "\'")	# ISO-Latin1 right single-quote
+          text.gsub!("\223", "\"")	# ISO-Latin1 left double-quote
+          text.gsub!("\224", "\"")	# ISO-Latin1 right double-quote
+          text.gsub!("\225", "\*")	# ISO-Latin1 bullet
+          text.gsub!("\226", "-")		# ISO-Latin1 en-dash (0x96)
+          text.gsub!("\227", "-")		# ISO-Latin1 em-dash (0x97)
+          text.gsub!("\230", "\'")  # single quote
+          text.gsub!("\231", "\'")  # single quote
+          text.gsub!("\233", ">")		# ISO-Latin1 single right angle quote
+          text.gsub!("\234", "\"")  # Double quote
+          text.gsub!("\235", "\"")  # Double quote
+          text.gsub!("\240", " ")		# ISO-Latin1 nonbreaking space
+          text.gsub!("\246", "\|")	# ISO-Latin1 broken vertical bar
+          text.gsub!("\255", "")	  # ISO-Latin1 soft hyphen (0xAD)
+          text.gsub!("\264", "\'")	# ISO-Latin1 spacing acute
+          text.gsub!("\267", "\*")	# ISO-Latin1 middle dot (0xB7)
+          ic = Iconv.new('UTF-8//IGNORE', 'UTF-8')
+          text = ic.iconv(text + ' ')[0..-2]
+        elsif encoding == 'iso-8859-15'
+          text.gsub!("&#8217;", "'") # Long horizontal bar
+        end
+      end
+      begin
+        text = Iconv.new('iso-8859-1', encoding).iconv(text)
+        # Post-process encoding
+        unless text.nil? or text.empty? or text.kind_of? ArgumentError
+          text.gsub!(/[\240-\377]/) { |c| "&#%d;" % c[0] }
+          if encoding == 'iso-8859-15'
+            text.gsub!("&#8217;", "'")
+          end
+        end
+      rescue  => err
+        puts "Error while encoding: #{err} #{err.class}"
+      end
+      return text
+    end
+  end
   class Feed
     attr_accessor :title, :link, :charset, :entries
@@ -100,7 +191,7 @@ module Rfeedreader
     end
     def read_title
-      @title = TextyHelper::convertEncoding((@hpricot_item/:title).text, @charset).downcase
+      @title = TextyHelper.convertEncoding((@hpricot_item/:title).text, @charset).downcase
     end
     def read_description
@@ -110,14 +201,19 @@ module Rfeedreader
       @description = (@hpricot_item/"description|summary|[@type='text']").text if @description.empty?
       unless @description.empty?
+        @description = TextyHelper.clean(@description, 200)
         @description = HTMLEntities.encode_entities(@description, :named, :decimal)
+        @description = TextyHelper.convertEncoding(@description, @charset)
         @description.gsub!("&#10;", "")
         @description.gsub!("&#13;", "")
         @description.strip!
-        @description = TextyHelper::clean(TextyHelper::convertEncoding(@description, @charset), 200)
         @description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]')
         @description.strip!
       end
     end

data/website/index.html CHANGED Viewed

@@ -33,7 +33,7 @@
     <h1>rfeedreader</h1>
     <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rfeedreader"; return false'>
       <p>Get Version</p>
-      <a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.9.1</a>
+      <a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.9.2</a>
     </div>
     <h2>What</h2>

metadata CHANGED Viewed

@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
 specification_version: 1
 name: rfeedreader
 version: !ruby/object:Gem::Version
-  version: 0.9.1
+  version: 0.9.2
 date: 2007-09-01 00:00:00 +02:00
 summary: Feed parser to read feed and return first posts of this feed. Special parsing from sources like Flickr, Jumcut, Google video, ...
 require_paths:
@@ -73,3 +73,21 @@ dependencies:
       - !ruby/object:Gem::Version
         version: 0.9.0
     version:
+- !ruby/object:Gem::Dependency
+  name: htmlentities
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Version::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 4.0.0
+    version:
+- !ruby/object:Gem::Dependency
+  name: hpricot
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Version::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0.6"
+    version: