RubyGems - ruby-feedparser - Versions diffs - 0.7 → 0.9.7 - Mend

ruby-feedparser 0.7 → 0.9.7

Files changed (20) hide show

checksums.yaml +7 -0
data/{ChangeLog → ChangeLog.md} +23 -16
data/Rakefile +16 -8
data/lib/feedparser/feedparser.rb +85 -10
data/lib/feedparser/html-output.rb +4 -4
data/lib/feedparser/html2text-parser.rb +24 -10
data/lib/feedparser/rexml_patch.rb +3 -0
data/lib/feedparser/sgml-parser.rb +3 -4
data/lib/feedparser/text-output.rb +3 -4
data/lib/feedparser/textconverters.rb +1 -1
data/lib/feedparser/version.rb +3 -0
data/test/tc_feed_parse.rb +52 -1
data/test/tc_feeditem.rb +47 -0
data/test/tc_html2text_parser.rb +43 -0
data/test/tc_htmloutput.rb +13 -13
data/test/tc_parser.rb +13 -13
data/test/tc_sgml_parser.rb +22 -0
data/test/tc_textoutput.rb +13 -13
data/test/tc_textwrappedoutput.rb +13 -13
metadata +49 -43

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: e198cf8ee7423ff4edf5ba4367ac809ba1fe2a9d6361fcf53d12b984aa138228
+  data.tar.gz: bbbd8c024c4e85c991ae2ceae4494e24d8b0865d2fe6a4df2646007e798e96ac
+SHA512:
+  metadata.gz: ac90154cfa40180e03d4b7b1d631186c6db1d70d79bdbb7f4edb4c54a66eddab3085e04480ed965b7c2e055770976873224b9d12f20be3d282817d6cd34245be
+  data.tar.gz: 4f658dc07c1d692b44f9abd0d400449cb0e1aa3d1cda8c782b052e45b193a8ad462ce7e2d8f951b3d15b301764e651f3781da93cf07f28c1ea8438ab87a1c989

data/{ChangeLog → ChangeLog.md} RENAMED Viewed

@@ -1,22 +1,29 @@
-Ruby-Feedparser 0.7 (27/07/2009)
-================================
+# 0.9.4 (25/03/2016)
+Bug fixes:
+* feedparser: relax exception check for Magic errors; by Eric Wong
+* Always sort author list to avoid unecessary invalidation of caches; by Sébastien Dailly
+# 0.7 (27/07/2009)
 * Handled several creators per feed item
 * Fix bug with urls into tag attributes
 * Better item categories support
 * Reworked text output formatting
 * Ignore &shy;, as some blog software (dotclear2) misuse it.
-Ruby-Feedparser 0.6 (23/07/2008)
-================================
-* Moved to_human_readable from class Fixnum to class Integer.
+# 0.6 (23/07/2008)
+* Moved `to_human_readable` from class Fixnum to class Integer.
 * Correctly parse http://www.tbray.org/ongoing/ongoing.atom. Thanks
   to Janico Greifenberg for reporting this.
 * String#html2text now takes an additional wrapto parameter, allowing
   to wrap the text to a specified number of chars. Thanks to
   Maxime Petazzoni for the patch.
-Ruby-Feedparser 0.5 (26/10/2007)
-================================
+# 0.5 (26/10/2007)
 * Fixed a bug with items with both non-escaped and escaped HTML. Reported,
   then patch provided by Gregory Hartman <gghartma@cs.cmu.edu>.
 * In Atom feeds, use the date provided in <updated>, and use it in
@@ -27,33 +34,33 @@ Ruby-Feedparser 0.5 (26/10/2007)
 * Make checks for HTML tags case-insensitive. Broke Dilbert feeds!!
   Reported by Michal Čihař. Closes gna bug #10199.
-Ruby-Feedparser 0.4 (01/05/2007)
-================================
+# 0.4 (01/05/2007)
 * Fixed a problem with html entities in the items' titles.
 * Date was not fetched for blogspot's atom feeds.
   Patch from Jason Ling <jason.ling@jeyel.com>.
 * Tests are now timezone-friendly. (closes GNA bug #8145).
 * Much nicer text output.
-Ruby-Feedparser 0.3 (01/12/2006)
-================================
+# 0.3 (01/12/2006)
 * Much nicer HTML output
 * Fixed a problem with some feeds with broken enclosures (without url)
-* Now automatically fixes non-absolute <a href> or <img src>
+* Now automatically fixes non-absolute `<a href>` or `<img src>`
 * Fixed small parser bugs
 * Now displays enclosures in the text and html outputs. Ready for
   podcasting :-)
 * Now escape title, creator, subject and category internally. This minor
   fix avoids &amp; stuff in the titles, for example.
-Ruby-Feedparser 0.2 (05/06/2006)
-================================
+* 0.2 (05/06/2006)
 * Fixed a problem when parsing some ATOM feeds with <link> without type
   attribute. (Thanks Michal Cihar !)
 * FeedParser::Feed and FeedParser::FeedItem now have an xml attribute to
   get the related REXML::Element.
 * <enclosure/> support in RSS.
-Ruby-Feedparser 0.1 (24/11/2005)
-================================
+# 0.1 (24/11/2005)
 * first public release.

data/Rakefile CHANGED Viewed

@@ -1,14 +1,15 @@
 require 'rake/testtask'
-require 'rake/rdoctask'
-require 'rake/packagetask'
+require 'rdoc/task'
+require 'rubygems/package_task'
 require 'rake'
 require 'find'
+require_relative 'lib/feedparser/version.rb'
 # Globals
 PKG_NAME = 'ruby-feedparser'
-PKG_VERSION = '0.7'
+PKG_VERSION = FeedParser::VERSION
-PKG_FILES = [ 'ChangeLog', 'README', 'COPYING', 'LICENSE', 'setup.rb', 'Rakefile']
+PKG_FILES = [ 'ChangeLog.md', 'README', 'COPYING', 'LICENSE', 'setup.rb', 'Rakefile']
 Find.find('lib/', 'test/', 'tools/') do |f|
 	if FileTest.directory?(f) and f =~ /\.svn/
 		Find.prune
@@ -19,7 +20,7 @@ end
 PKG_FILES.reject! { |f| f =~ /^test\/(source|.*_output)\// }
-task :default => [:package]
+task :default => [:test]
 Rake::TestTask.new do |t|
 	t.libs << "test"
@@ -61,8 +62,6 @@ end
 # "Gem" part of the Rakefile
 begin
-	require 'rake/gempackagetask'
 	spec = Gem::Specification.new do |s|
 		s.platform = Gem::Platform::RUBY
 		s.summary = "Ruby library to parse ATOM and RSS feeds"
@@ -73,12 +72,21 @@ begin
 		s.autorequire = 'feedparser'
 		s.files = PKG_FILES
 		s.description = "Ruby library to parse ATOM and RSS feeds"
+		s.authors = ['Lucas Nussbaum']
+		s.add_runtime_dependency 'magic'
 	end
-	Rake::GemPackageTask.new(spec) do |pkg|
+	Gem::PackageTask.new(spec) do |pkg|
 		pkg.need_zip = true
 		pkg.need_tar = true
 	end
 rescue LoadError
   puts "Will not generate gem."
 end
+task :release => :repackage do
+  sh 'git', 'tag', 'v' + PKG_VERSION
+  sh 'git', 'push'
+  sh 'git', 'push', '--tags'
+  sh 'gem', 'push', "pkg/#{PKG_NAME}-#{PKG_VERSION}.gem"
+end

data/lib/feedparser/feedparser.rb CHANGED Viewed

@@ -1,17 +1,47 @@
+require 'cgi'
 require 'rexml/document'
 require 'time'
 require 'feedparser/textconverters'
 require 'feedparser/rexml_patch'
 require 'feedparser/text-output'
+require 'feedparser/version'
 require 'base64'
+require 'magic'
+require 'uri'
 module FeedParser
-  VERSION = "0.7"
   class UnknownFeedTypeException < RuntimeError
   end
+  def self.recode(str)
+    encoding = nil
+    begin
+      encoding = Magic.guess_string_mime_encoding(str)
+    rescue => e
+      raise unless e.class.to_s =~ /\AMagic::(?:Exception|Error)\z/
+      # this happens when magic does not find any content at all, e.g. with
+      # strings that contain only whitespace. In these case it *should* be safe
+      # to assume UTF-8
+      encoding = Encoding::UTF_8
+    end
+    if encoding == 'unknown-8bit'
+      # find first substring with a valid encoding that is not us-ascii
+      length = 1 # has to start at 1, magic requires at least 2 bytes
+      while length < str.length && ['us-ascii', 'unknown-8bit'].include?(encoding)
+        encoding = Magic.guess_string_mime_encoding(str[0..length])
+        length = length + 1
+      end
+      # need to remove iso-8859-1 control characters
+      if encoding == 'iso-8859-1'
+        str = str.bytes.select { |c| c < 128 || c > 159 }.map(&:chr).join
+      end
+    end
+    str.force_encoding(encoding)
+    str = str.chars.select { |c| c.valid_encoding? }.join
+    str.encode('UTF-8')
+  end
   # an RSS/Atom feed
   class Feed
     attr_reader :type, :title, :link, :description, :creator, :encoding, :items
@@ -20,13 +50,16 @@ module FeedParser
     attr_reader :xml
     # parse str to build a Feed
-    def initialize(str = nil)
+    def initialize(str = nil, uri = nil)
       parse(str) if str
+      parse_origin(uri) if uri
     end
     # Determines all the fields using a string containing an
     # XML document
     def parse(str)
+      str = FeedParser.recode(str)
       # Dirty hack: some feeds contain the & char. It must be changed to &amp;
       str.gsub!(/&(\s+)/, '&amp;\1')
       doc = REXML::Document.new(str)
@@ -34,6 +67,7 @@ module FeedParser
       # get feed info
       @encoding = doc.encoding
       @title,@link,@description,@creator = nil
+      @title = ""
       @items = []
       if doc.root.elements['channel'] || doc.root.elements['rss:channel']
         @type = "rss"
@@ -108,19 +142,28 @@ module FeedParser
       s += "Type: #{@type}\n"
       s += "Encoding: #{@encoding}\n"
       s += "Title: #{@title}\n"
-      s += "Link: #{@link}\n"
+      s += "Link: #{link}\n"
       s += "Description: #{@description}\n"
       s += "Creator: #{@creator}\n"
       s += "\n"
       @items.each { |i| s += i.to_s(localtime) }
       s
     end
+    def parse_origin(uri)
+      uri = URI.parse(uri)
+      if uri.hostname && uri.scheme
+        @origin = "#{uri.scheme}://#{uri.hostname}"
+      end
+    end
+    attr_reader :origin
   end
   # an Item from a feed
   class FeedItem
-    attr_accessor :title, :link, :content, :date, :creators, :subject,
-                  :cacheditem
+    attr_accessor :title, :content, :date, :creators, :subject,
+                  :cacheditem, :links
     # The item's categories/tags. An array of strings.
     attr_accessor :categories
@@ -137,9 +180,12 @@ module FeedParser
       @xml = item
       @feed = feed
       @title, @link, @content, @date, @subject = nil
+      @links = []
       @creators = []
       @categories = []
       @enclosures = []
+      @title = ""
       parse(item) if item
     end
@@ -154,13 +200,14 @@ module FeedParser
       when 1
         return creators[0]
       else
-        return creators[0...-1].join(", ")+" and "+creators[-1]
+        sorted_creators = creators.sort
+        return sorted_creators[0...-1].join(", ") + " and " + sorted_creators[-1]
       end
     end
     def to_s(localtime = true)
       s = "--------------------------------\n" +
-        "Title: #{@title}\nLink: #{@link}\n"
+        "Title: #{@title}\nLink: #{link}\n"
       if localtime or @date.nil?
         s += "Date: #{@date.to_s}\n"
       else
@@ -181,6 +228,26 @@ module FeedParser
       end
       return s
     end
+    attr_writer :link
+    def link
+      if @link
+        begin
+          uri = URI.parse(@link)
+        rescue URI::InvalidURIError
+          return @link
+        end
+        if uri.hostname && uri.scheme
+          @link
+        elsif feed && feed.origin
+          [feed.origin, @link].compact.join
+        else
+          @link
+        end
+      end
+    end
   end
   class RSSItem < FeedItem
@@ -199,7 +266,7 @@ module FeedParser
           (e = item.elements['guid'] || item.elements['rss:guid'] and
           not (e.attribute('isPermaLink') and
           e.attribute('isPermaLink').value == 'false'))
-        @link = e.text.rmWhiteSpace!
+        self.link = e.text.rmWhiteSpace!
       end
       # Content
       if (e = item.elements['content:encoded']) ||
@@ -261,8 +328,16 @@ module FeedParser
       end
       # Link
       item.each_element('link') do |e|
         if (h = e.attribute('href')) && h.value
-          @link = h.value
+          self.link = h.value
+          if e.attribute('type')
+            @links << {:href => h.value, :type => e.attribute('type').value}
+          else
+            @links << {:href => h.value, :type => ''}
+          end
         end
       end
       # Content

data/lib/feedparser/html-output.rb CHANGED Viewed

@@ -80,13 +80,13 @@ module FeedParser
       s += (headline % ["Feed:", r])
       r = ""
-      r += "<a href=\"#{@link}\">" if @link
+      r += "<a href=\"#{link}\">" if link
       if @title
         r += "<b>#{@title.escape_html}</b>\n"
-      elsif @link
-        r += "<b>#{@link.escape_html}</b>\n"
+      elsif link
+        r += "<b>#{link.escape_html}</b>\n"
       end
-      r += "</a>\n" if @link
+      r += "</a>\n" if link
       s += (headline % ["Item:", r])
       s += "</table></td></tr></table>\n"
       s += "\n"

data/lib/feedparser/html2text-parser.rb CHANGED Viewed

@@ -11,16 +11,16 @@ module FeedParser
       @pre = false
       @href = nil
       @links = []
+      @curlink = []
       @imgs = []
-      @img_index = '@'
+      @img_index = 'A'
       super(verbose)
     end
     def next_img_index
-      n = @img_index[0] + 1
-      @img_index = " "
-      @img_index[0] = n
-      return @img_index
+      idx = @img_index
+      @img_index = @img_index.next
+      idx
     end
     def handle_data(data)
@@ -29,7 +29,8 @@ module FeedParser
         data.gsub!(/\n/, ' ')
         data.gsub!(/( )+/, ' ')
       end
-      @savedata << data
+      data = FeedParser.recode(data)
+      @savedata << data.encode(Encoding::UTF_8)
     end
     def unknown_starttag(tag, attrs)
@@ -70,7 +71,14 @@ module FeedParser
           end
         end
         if @href
-          @links << @href.gsub(/^("|'|)(.*)("|')$/,'\2')
+          @href.gsub!(/^("|'|)(.*)("|')$/,'\2')
+          @curlink = @links.find_index(@href)
+          if @curlink.nil?
+            @links << @href
+            @curlink = @links.length
+          else
+            @curlink += 1
+          end
         end
       when 'img'
         # find src in args
@@ -81,8 +89,14 @@ module FeedParser
           end
         end
         if src
-          idx = next_img_index
-          @imgs << [ idx, src.gsub(/^("|'|)(.*)("|')$/,'\2') ]
+          src.gsub!(/^("|'|)(.*)("|')$/,'\2')
+          i = @imgs.index { |e| e[1] == src }
+          if i.nil?
+            idx = next_img_index
+            @imgs << [ idx, src ]
+          else
+            idx = @imgs[i][0]
+          end
           @savedata << "[#{idx}]"
         end
       else
@@ -125,7 +139,7 @@ module FeedParser
         @pre = false
       when 'a'
         if @href
-          @savedata << "[#{@links.length}]"
+          @savedata << "[#{@curlink}]"
           @href = nil
         end
       end

data/lib/feedparser/rexml_patch.rb CHANGED Viewed

@@ -6,14 +6,17 @@ require 'feedparser/textconverters'
 # With those changes, it uses unpack/pack with some error handling
 module REXML
   module Encoding
+    alias rexml_decode decode
     def decode(str)
       return str.toUTF8(@encoding)
     end
+    alias rexml_encode encode
     def encode(str)
       return str
     end
+    alias rexml_encoding= encoding=
     def encoding=(enc)
       return if defined? @encoding and enc == @encoding
       @encoding = enc || 'utf-8'

data/lib/feedparser/sgml-parser.rb CHANGED Viewed

@@ -293,12 +293,11 @@ module FeedParser
     end
     def handle_charref(name)
-      n = name.to_i
-      if !(0 <= n && n <= 255)
+      if name =~ /[0-9]+/
         unknown_charref(name)
-        return
+      else
+        handle_data(name)
       end
-      handle_data(n.chr)
     end
     def handle_entityref(name)

data/lib/feedparser/text-output.rb CHANGED Viewed

@@ -1,4 +1,3 @@
-require 'feedparser'
 require 'feedparser/html2text-parser'
 require 'feedparser/filesizes'
@@ -61,7 +60,7 @@ module FeedParser
       if header
         s += "Item: "
         s += @title if @title
-        s += "\n<#{@link}>" if @link
+        s += "\n<#{link}>" if link
         if @date
           if localtime
             s += "\nDate: #{@date.to_s}"
@@ -71,7 +70,7 @@ module FeedParser
         end
         s += "\n"
       else
-        s += "<#{@link}>\n\n" if @link
+        s += "<#{link}>\n\n" if link
       end
       s += "#{@content.html2text(wrapto).chomp}\n" if @content
       if @enclosures and @enclosures.length > 0
@@ -89,7 +88,7 @@ module FeedParser
       if not header
         s += "\nItem: "
         s += @title if @title
-        s += "\n<#{@link}>" if @link
+        s += "\n<#{link}>" if link
         if @date
           if localtime
             s += "\nDate: #{@date.to_s}"

data/lib/feedparser/textconverters.rb CHANGED Viewed

@@ -59,7 +59,7 @@ end
       text.gsub!(/\A\s*(.*)\Z/m, '<p>\1</p>')
       text.gsub!(/\s*\n(\s*\n)+\s*/, "</p>\n<p>")
       # uris
-      text.gsub!(/([^'"])(#{URI::regexp(['http','ftp','https'])})/,
+      text.gsub!(/([^'"])(#{URI::DEFAULT_PARSER.make_regexp(['http','ftp','https'])})/,
           '\1<a href="\2">\2</a>')
     end
     # Handle broken hrefs in <a> and <img>

data/lib/feedparser/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module FeedParser
+  VERSION = "0.9.7"
+end

data/test/tc_feed_parse.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-#!/usr/bin/ruby -w
+# encoding: UTF-8
 $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
@@ -114,4 +114,55 @@ class FeedParserTest < Test::Unit::TestCase
     # the third one should be removed because an enclosure should have an url, or it's useless.
     assert_equal([["url1", "1", "type1"], ["url2", nil, "type2"], ["url1", "1", nil]], ch.items[0].enclosures)
   end
+  def test_recode_utf8
+    assert_equal 'UTF-8', FeedParser.recode("áéíóú").encoding.name
+  end
+  def test_recode_blank
+    assert_equal 'UTF-8', FeedParser.recode('').encoding.name
+  end
+  def test_recode_iso88519
+    assert_equal 'UTF-8', FeedParser.recode("áéíóú".encode('iso-8859-1')).encoding.name
+  end
+  def test_recode_utf8_mixed_with_ASCIIBIT
+    recoded = FeedParser.recode("áé\x8Díóú")
+    assert_equal'UTF-8', recoded.encoding.name
+    assert_equal 'áéíóú', recoded
+  end
+  def test_recode_unicode_char
+    assert_equal "1280×1024", FeedParser.recode("1280×1024")
+  end
+  def test_almost_valid_iso88591
+    input = "Codifica\xE7\xE3o \x96 quase v\xE1lida"
+    assert_equal "Codificação  quase válida", FeedParser.recode(input)
+  end
+  def test_feed_origin
+    feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
+    assert_equal "http://foo.com", feed.origin
+  end
+  def test_item_origin
+    feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
+    item = FeedParser::FeedItem.new(nil, feed)
+    item.link = '/foo/bar'
+    assert_equal 'http://foo.com/foo/bar', item.link
+  end
+  def test_item_origin_no_link
+    item = FeedParser::FeedItem.new(nil, nil)
+    assert_nil item.link
+  end
+  def test_item_no_feed
+    item = FeedParser::FeedItem.new(nil, nil)
+    item.link = '/foo/bar'
+    assert_equal '/foo/bar', item.link
+  end
 end

data/test/tc_feeditem.rb ADDED Viewed

@@ -0,0 +1,47 @@
+require 'feedparser/feedparser'
+require 'test/unit'
+class FeedItemTest < Test::Unit::TestCase
+  def setup
+    @item = FeedParser::FeedItem.new(nil, nil)
+  end
+  ########################################################################
+  def test_link_no_link
+    assert @item.link.nil?
+  end
+  def test_link_basic
+    @item.instance_variable_set('@link', 'https://www.example.com/')
+    assert_equal "https://www.example.com/", @item.link
+  end
+  def test_link_path_only
+    @item.instance_variable_set('@link', '/foo/bar/')
+    assert_equal "/foo/bar/", @item.link
+  end
+  def test_link_path_only_with_feed_origin
+    @item.instance_variable_set('@link', '/foo/bar/')
+    feed = FeedParser::Feed.new
+    feed.instance_variable_set('@origin', 'https://www.exampleorigin.com')
+    @item.instance_variable_set('@feed', feed)
+    assert_equal "https://www.exampleorigin.com/foo/bar/", @item.link
+  end
+  def test_link_full_link_with_feed_origin
+    @item.instance_variable_set('@link', 'https://www.exampleorigin.com/foo/bar/')
+    feed = FeedParser::Feed.new
+    feed.instance_variable_set('@origin', 'https://www.exampleorigin.com')
+    @item.instance_variable_set('@feed', feed)
+    assert_equal "https://www.exampleorigin.com/foo/bar/", @item.link
+  end
+  def test_link_with_non_ascii
+    @item.instance_variable_set('@link', 'https://www.example.people/☭/')
+    assert_equal "https://www.example.people/☭/", @item.link
+  end
+end

data/test/tc_html2text_parser.rb ADDED Viewed

@@ -0,0 +1,43 @@
+# encoding: UTF-8
+require 'test/unit'
+require 'feedparser/feedparser'
+class Html2TextParserTest < Test::Unit::TestCase
+  def test_next_img_index
+    parser = FeedParser::HTML2TextParser.new
+    assert_equal 'A', parser.next_img_index
+    assert_equal 'B', parser.next_img_index
+  end
+  def test_numerical_entity
+    parser = FeedParser::HTML2TextParser.new
+    parser.feed('1280&#215;1024')
+    parser.close
+    assert_equal "1280×1024", parser.savedata
+  end
+  def test_numerical_entity_large_known
+    parser = FeedParser::HTML2TextParser.new
+    parser.feed('&#8594;')
+    parser.close
+    assert_equal "→", parser.savedata
+  end
+  def test_numerical_entity_large
+    parser = FeedParser::HTML2TextParser.new
+    parser.feed('&#10000;')
+    parser.close
+    assert_equal "✐", parser.savedata
+  end
+  def test_non_numerical_entity
+    parser = FeedParser::HTML2TextParser.new
+    parser.feed('HTML&amp;CO')
+    parser.close
+    assert_equal "HTML&CO", parser.savedata
+  end
+end

data/test/tc_htmloutput.rb CHANGED Viewed

@@ -19,12 +19,10 @@ class HTMLOutputTest < Test::Unit::TestCase
   else
     raise 'source directory not found.'
   end
-  def test_parser
-    allok = true
-    Dir.foreach(SRCDIR) do |f|
-      next if f !~ /.xml$/
-      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
-      puts "Checking #{f}"
+  Dir.foreach(SRCDIR) do |f|
+    next if f !~ /.xml$/
+    testname = 'test_' + File.basename(f).gsub(/\W/, '_')
+    define_method(testname) do
       str = File::read(SRCDIR + '/' + f)
       chan = FeedParser::Feed::new(str)
       chanstr = chan.to_html(false)
@@ -34,19 +32,21 @@ class HTMLOutputTest < Test::Unit::TestCase
           File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
             fd.print(chanstr)
           end
-          puts "Test failed for #{f}."
-          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
-          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
-          allok = false
+          assert(
+            false,
+            [
+              "Test failed for #{f}.",
+              "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
+              "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
+            ].join("\n")
+          )
         end
       else
-        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
         File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
           f.print(chanstr)
         end
-        allok = false
+        assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
       end
     end
-    assert(allok)
   end
 end

data/test/tc_parser.rb CHANGED Viewed

@@ -15,12 +15,10 @@ class ParserTest < Test::Unit::TestCase
   else
     raise 'source directory not found.'
   end
-  def test_parser
-    allok = true
-    Dir.foreach(SRCDIR) do |f|
-      next if f !~ /.xml$/
-      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
-      puts "Checking #{f}"
+  Dir.foreach(SRCDIR) do |f|
+    next if f !~ /.xml$/
+    testname = 'test_' + File.basename(f).gsub(/\W/, '_')
+    define_method(testname) do
       str = File::read(SRCDIR + '/' + f)
       chan = FeedParser::Feed::new(str)
       chanstr = chan.to_s(false)
@@ -30,19 +28,21 @@ class ParserTest < Test::Unit::TestCase
           File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
             fd.print(chanstr)
           end
-          puts "Test failed for #{f}."
-          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
-          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
-          allok = false
+          assert(
+            false,
+            [
+              "Test failed for #{f}.",
+              "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
+              "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
+            ].join("\n")
+          )
         end
       else
-        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
         File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
           f.print(chanstr)
         end
-        allok = false
+        assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
       end
     end
-    assert(allok)
   end
 end

data/test/tc_sgml_parser.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# encoding: UTF-8
+require 'test/unit'
+require 'mocha/setup'
+require 'feedparser/sgml-parser'
+class SGMLParserTest < Test::Unit::TestCase
+  def test_numerical_charref
+    parser = FeedParser::SGMLParser.new
+    parser.expects(:unknown_charref).with('215')
+    parser.handle_charref('215')
+  end
+  def test_non_numerical_charref
+    parser = FeedParser::SGMLParser.new
+    parser.expects(:handle_data).with('amp')
+    parser.handle_charref('amp')
+  end
+end

data/test/tc_textoutput.rb CHANGED Viewed

@@ -15,12 +15,10 @@ class TextOutputTest < Test::Unit::TestCase
   else
     raise 'source directory not found.'
   end
-  def test_parser
-    allok = true
-    Dir.foreach(SRCDIR) do |f|
-      next if f !~ /.xml$/
-      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
-      puts "Checking #{f}"
+  Dir.foreach(SRCDIR) do |f|
+    next if f !~ /.xml$/
+    testname = 'test_' + File.basename(f).gsub(/\W/, '_')
+    define_method(testname) do
       str = File::read(SRCDIR + '/' + f)
       chan = FeedParser::Feed::new(str)
       chanstr = chan.to_text(false) # localtime set to false
@@ -30,19 +28,21 @@ class TextOutputTest < Test::Unit::TestCase
           File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
             fd.print(chanstr)
           end
-          puts "Test failed for #{f}."
-          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
-          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
-          allok = false
+          assert(
+            false,
+            [
+              "Test failed for #{f}.",
+              "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
+              "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
+            ].join("\n")
+          )
         end
       else
-        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
         File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
           f.print(chanstr)
         end
-        allok = false
+        assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
       end
     end
-    assert(allok)
   end
 end

data/test/tc_textwrappedoutput.rb CHANGED Viewed

@@ -15,12 +15,10 @@ class TextWrappedOutputTest < Test::Unit::TestCase
   else
     raise 'source directory not found.'
   end
-  def test_parser
-    allok = true
-    Dir.foreach(SRCDIR) do |f|
-      next if f !~ /.xml$/
-      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
-      puts "Checking #{f}"
+  Dir.foreach(SRCDIR) do |f|
+    next if f !~ /.xml$/
+    testname = 'test_' + File.basename(f).gsub(/\W/, '_')
+    define_method(testname) do
       str = File::read(SRCDIR + '/' + f)
       chan = FeedParser::Feed::new(str)
       chanstr = chan.to_text(false, 72) # localtime set to false
@@ -30,19 +28,21 @@ class TextWrappedOutputTest < Test::Unit::TestCase
           File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
             fd.print(chanstr)
           end
-          puts "Test failed for #{f}."
-          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
-          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
-          allok = false
+          assert(
+            false,
+            [
+              "Test failed for #{f}.",
+              "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
+              "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
+            ].join("\n")
+          )
         end
       else
-        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
         File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
           f.print(chanstr)
         end
-        allok = false
+        assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
       end
     end
-    assert(allok)
   end
 end

metadata CHANGED Viewed

@@ -1,76 +1,82 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: ruby-feedparser
-version: !ruby/object:Gem::Version
-  version: "0.7"
+version: !ruby/object:Gem::Version
+  version: 0.9.7
 platform: ruby
-authors: []
+authors:
+- Lucas Nussbaum
 autorequire: feedparser
 bindir: bin
 cert_chain: []
-date: 2009-07-27 00:00:00 +02:00
-default_executable:
-dependencies: []
+date: 2021-02-06 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: magic
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description: Ruby library to parse ATOM and RSS feeds
 email:
 executables: []
 extensions: []
 extra_rdoc_files: []
-files:
-- ChangeLog
-- README
+files:
 - COPYING
+- ChangeLog.md
 - LICENSE
-- setup.rb
+- README
 - Rakefile
-- lib/feedparser/text-output.rb
+- lib/feedparser.rb
+- lib/feedparser/feedparser.rb
 - lib/feedparser/filesizes.rb
 - lib/feedparser/html-output.rb
-- lib/feedparser/rexml_patch.rb
 - lib/feedparser/html2text-parser.rb
-- lib/feedparser/textconverters.rb
-- lib/feedparser/feedparser.rb
+- lib/feedparser/rexml_patch.rb
 - lib/feedparser/sgml-parser.rb
-- lib/feedparser.rb
+- lib/feedparser/text-output.rb
+- lib/feedparser/textconverters.rb
+- lib/feedparser/version.rb
+- setup.rb
 - test/tc_feed_parse.rb
-- test/tc_textoutput.rb
+- test/tc_feeditem.rb
+- test/tc_html2text_parser.rb
 - test/tc_htmloutput.rb
+- test/tc_parser.rb
+- test/tc_sgml_parser.rb
+- test/tc_textoutput.rb
 - test/tc_textwrappedoutput.rb
 - test/ts_feedparser.rb
-- test/tc_parser.rb
 - tools/doctoweb.bash
-has_rdoc: true
 homepage:
 licenses: []
+metadata: {}
 post_install_message:
 rdoc_options: []
-require_paths:
+require_paths:
 - lib
-required_ruby_version: !ruby/object:Gem::Requirement
-  requirements:
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
   - - ">="
-    - !ruby/object:Gem::Version
-      version: "0"
-  version:
-required_rubygems_version: !ruby/object:Gem::Requirement
-  requirements:
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
   - - ">="
-    - !ruby/object:Gem::Version
-      version: "0"
-  version:
-requirements:
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements:
 - none
-rubyforge_project:
-rubygems_version: 1.3.4
+rubygems_version: 3.2.5
 signing_key:
-specification_version: 3
+specification_version: 4
 summary: Ruby library to parse ATOM and RSS feeds
 test_files: []