RubyGems - ruby-feedparser - Versions diffs - 0.7 - Mend

ruby-feedparser 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

data/COPYING +340 -0
data/ChangeLog +59 -0
data/LICENSE +60 -0
data/README +14 -0
data/Rakefile +84 -0
data/lib/feedparser.rb +28 -0
data/lib/feedparser/feedparser.rb +343 -0
data/lib/feedparser/filesizes.rb +14 -0
data/lib/feedparser/html-output.rb +126 -0
data/lib/feedparser/html2text-parser.rb +413 -0
data/lib/feedparser/rexml_patch.rb +28 -0
data/lib/feedparser/sgml-parser.rb +332 -0
data/lib/feedparser/text-output.rb +108 -0
data/lib/feedparser/textconverters.rb +120 -0
data/setup.rb +1586 -0
data/test/tc_feed_parse.rb +117 -0
data/test/tc_htmloutput.rb +52 -0
data/test/tc_parser.rb +48 -0
data/test/tc_textoutput.rb +48 -0
data/test/tc_textwrappedoutput.rb +48 -0
data/test/ts_feedparser.rb +12 -0
data/tools/doctoweb.bash +30 -0
metadata +76 -0

data/test/tc_feed_parse.rb ADDED Viewed

@@ -0,0 +1,117 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+require 'test/unit'
+require 'feedparser'
+# This class includes some basic tests of the parser. More detailed test is
+# made by tc_parser.rb
+class FeedParserTest < Test::Unit::TestCase
+  # From http://my.netscape.com/publish/formats/rss-spec-0.91.html
+  def test_parse_rss091_1
+    ch = FeedParser::Feed::new <<-EOF
+<?xml version="1.0"?>
+<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
+<rss version="0.91">
+  <channel>
+    <language>en</language>
+    <description>News and commentary from the cross-platform scripting community.</description>
+    <link>http://www.scripting.com/</link>
+    <title>Scripting News</title>
+    <image>
+      <link>http://www.scripting.com/</link>
+      <title>Scripting News</title>
+      <url>http://www.scripting.com/gifs/tinyScriptingNews.gif</url>
+    </image>
+  </channel>
+</rss>
+    EOF
+    assert_equal('Scripting News', ch.title)
+    assert_equal('http://www.scripting.com/', ch.link)
+    assert_equal('News and commentary from the cross-platform scripting community.', ch.description)
+    assert_equal([], ch.items)
+  end
+  def test_parse_rss091_complete
+    ch = FeedParser::Feed::new <<-EOF
+<?xml version="1.0"?>
+<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
+<rss version="0.91">
+<channel>
+<copyright>Copyright 1997-1999 UserLand Software, Inc.</copyright>
+<pubDate>Thu, 08 Jul 1999 07:00:00 GMT</pubDate>
+<lastBuildDate>Thu, 08 Jul 1999 16:20:26 GMT</lastBuildDate>
+<docs>http://my.userland.com/stories/storyReader$11</docs>
+<description>News and commentary from the cross-platform scripting community.</description>
+<link>http://www.scripting.com/</link>
+<title>Scripting News</title>
+<image>
+  <link>http://www.scripting.com/</link>
+  <title>Scripting News</title>
+  <url>http://www.scripting.com/gifs/tinyScriptingNews.gif</url>
+  <height>40</height>
+  <width>78</width>
+  <description>What is this used for?</description>
+</image>
+<managingEditor>dave@userland.com (Dave Winer)</managingEditor>
+<webMaster>dave@userland.com (Dave Winer)</webMaster>
+<language>en-us</language>
+<skipHours>
+  <hour>6</hour><hour>7</hour><hour>8</hour><hour>9</hour><hour>10</hour><hour>11</hour>
+</skipHours>
+<skipDays>
+  <day>Sunday</day>
+</skipDays>
+<rating>(PICS-1.1 "http://www.rsac.org/ratingsv01.html" l gen true comment "RSACi North America Server" for "http://www.rsac.org" on "1996.04.16T08:15-0500" r (n 0 s 0 v 0 l 0))</rating>
+<item>
+  <title>stuff</title>
+  <link>http://bar</link>
+  <description>This is an article about some stuff</description>
+</item>
+<item>
+  <title>second item's title</title>
+  <link>http://link2</link>
+  <description>aa bb cc
+  dd ee ff</description>
+</item>
+<textinput>
+  <title>Search Now!</title>
+  <description>Enter your search &lt;terms&gt;</description>
+  <name>find</name>
+  <link>http://my.site.com/search.cgi</link>
+  </textinput>
+</channel>
+</rss>
+    EOF
+    assert_equal('Scripting News', ch.title)
+    assert_equal('http://www.scripting.com/', ch.link)
+    assert_equal('News and commentary from the cross-platform scripting community.', ch.description)
+    assert_equal(2, ch.items.length)
+    assert_equal('http://bar', ch.items[0].link)
+    assert_equal('<p>This is an article about some stuff</p>', ch.items[0].content)
+    assert_equal('stuff', ch.items[0].title)
+    assert_equal('http://link2', ch.items[1].link)
+    assert_equal("<p>aa bb cc\n  dd ee ff</p>", ch.items[1].content)
+    assert_equal('second item\'s title', ch.items[1].title)
+  end
+  def test_enclosures
+    ch = FeedParser::Feed::new <<-EOF
+<?xml version="1.0"?>
+<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
+<rss version="0.91">
+<channel>
+<item>
+  <enclosure url="url1" length="1" type="type1"/>
+  <enclosure url="url2" type="type2"/>
+  <enclosure length="3" type="type3"/>
+  <enclosure url="url1" length="1"/>
+</item>
+</channel>
+</rss>
+    EOF
+    # the third one should be removed because an enclosure should have an url, or it's useless.
+    assert_equal([["url1", "1", "type1"], ["url2", nil, "type2"], ["url1", "1", nil]], ch.items[0].enclosures)
+  end
+end

data/test/tc_htmloutput.rb ADDED Viewed

@@ -0,0 +1,52 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+$:.unshift File.join(File.dirname(__FILE__), '..', 'test')
+$:.unshift File.join(File.dirname(__FILE__), 'lib')
+$:.unshift File.join(File.dirname(__FILE__), 'test')
+require 'test/unit'
+require 'feedparser'
+require 'feedparser/html-output'
+class HTMLOutputTest < Test::Unit::TestCase
+  if File::directory?('test/source')
+    SRCDIR = 'test/source'
+    DSTDIR = 'test/html_output'
+  elsif File::directory?('source')
+    SRCDIR = 'source'
+    DSTDIR = 'html_output'
+  else
+    raise 'source directory not found.'
+  end
+  def test_parser
+    allok = true
+    Dir.foreach(SRCDIR) do |f|
+      next if f !~ /.xml$/
+      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
+      puts "Checking #{f}"
+      str = File::read(SRCDIR + '/' + f)
+      chan = FeedParser::Feed::new(str)
+      chanstr = chan.to_html(false)
+      if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        if output != chanstr
+          File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
+            fd.print(chanstr)
+          end
+          puts "Test failed for #{f}."
+          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
+          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
+          allok = false
+        end
+      else
+        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
+        File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
+          f.print(chanstr)
+        end
+        allok = false
+      end
+    end
+    assert(allok)
+  end
+end

data/test/tc_parser.rb ADDED Viewed

@@ -0,0 +1,48 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+require 'test/unit'
+require 'feedparser'
+class ParserTest < Test::Unit::TestCase
+  if File::directory?('test/source')
+    SRCDIR = 'test/source'
+    DSTDIR = 'test/parser_output'
+  elsif File::directory?('source')
+    SRCDIR = 'source'
+    DSTDIR = 'parser_output'
+  else
+    raise 'source directory not found.'
+  end
+  def test_parser
+    allok = true
+    Dir.foreach(SRCDIR) do |f|
+      next if f !~ /.xml$/
+      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
+      puts "Checking #{f}"
+      str = File::read(SRCDIR + '/' + f)
+      chan = FeedParser::Feed::new(str)
+      chanstr = chan.to_s(false)
+      if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        if output != chanstr
+          File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
+            fd.print(chanstr)
+          end
+          puts "Test failed for #{f}."
+          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
+          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
+          allok = false
+        end
+      else
+        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
+        File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
+          f.print(chanstr)
+        end
+        allok = false
+      end
+    end
+    assert(allok)
+  end
+end

data/test/tc_textoutput.rb ADDED Viewed

@@ -0,0 +1,48 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+require 'test/unit'
+require 'feedparser'
+class TextOutputTest < Test::Unit::TestCase
+  if File::directory?('test/source')
+    SRCDIR = 'test/source'
+    DSTDIR = 'test/text_output'
+  elsif File::directory?('source')
+    SRCDIR = 'source'
+    DSTDIR = 'text_output'
+  else
+    raise 'source directory not found.'
+  end
+  def test_parser
+    allok = true
+    Dir.foreach(SRCDIR) do |f|
+      next if f !~ /.xml$/
+      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
+      puts "Checking #{f}"
+      str = File::read(SRCDIR + '/' + f)
+      chan = FeedParser::Feed::new(str)
+      chanstr = chan.to_text(false) # localtime set to false
+      if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        if output != chanstr
+          File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
+            fd.print(chanstr)
+          end
+          puts "Test failed for #{f}."
+          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
+          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
+          allok = false
+        end
+      else
+        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
+        File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
+          f.print(chanstr)
+        end
+        allok = false
+      end
+    end
+    assert(allok)
+  end
+end

data/test/tc_textwrappedoutput.rb ADDED Viewed

@@ -0,0 +1,48 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+require 'test/unit'
+require 'feedparser'
+class TextWrappedOutputTest < Test::Unit::TestCase
+  if File::directory?('test/source')
+    SRCDIR = 'test/source'
+    DSTDIR = 'test/textwrapped_output'
+  elsif File::directory?('source')
+    SRCDIR = 'source'
+    DSTDIR = 'textwrapped_output'
+  else
+    raise 'source directory not found.'
+  end
+  def test_parser
+    allok = true
+    Dir.foreach(SRCDIR) do |f|
+      next if f !~ /.xml$/
+      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
+      puts "Checking #{f}"
+      str = File::read(SRCDIR + '/' + f)
+      chan = FeedParser::Feed::new(str)
+      chanstr = chan.to_text(false, 72) # localtime set to false
+      if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        if output != chanstr
+          File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
+            fd.print(chanstr)
+          end
+          puts "Test failed for #{f}."
+          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
+          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
+          allok = false
+        end
+      else
+        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
+        File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
+          f.print(chanstr)
+        end
+        allok = false
+      end
+    end
+    assert(allok)
+  end
+end

data/test/ts_feedparser.rb ADDED Viewed

@@ -0,0 +1,12 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+$:.unshift File.join(File.dirname(__FILE__), '..', 'test')
+$:.unshift File.join(File.dirname(__FILE__), 'lib')
+$:.unshift File.join(File.dirname(__FILE__), 'test')
+require 'tc_feed_parse'
+require 'tc_htmloutput'
+require 'tc_parser'
+require 'tc_textoutput'
+require 'tc_textwrappedoutput'

data/tools/doctoweb.bash ADDED Viewed

@@ -0,0 +1,30 @@
+#!/bin/bash
+if [ -z $CVSDIR ]; then
+	CVSDIR=$HOME/dev/ruby-feedparser/website
+fi
+TARGET=$CVSDIR/rdoc
+echo "Copying rdoc documentation to $TARGET."
+if [ ! -d $TARGET ]; then
+	echo "$TARGET doesn't exist, exiting."
+	exit 1
+fi
+rsync -a rdoc/ $TARGET/
+echo "###########################################################"
+echo "CVS status :"
+cd $TARGET
+svn st
+echo "CVS Adding files."
+while [ $(svn st | grep "^? " | wc -l) -gt 0 ]; do
+	svn add $(svn st | grep "^? " | awk '{print $2}')
+done
+echo "###########################################################"
+echo "CVS status after adding missing files:"
+svn st
+echo "Commit changes now with"
+echo "# (cd $TARGET && svn commit -m \"rdoc update\")"
+exit 0

metadata ADDED Viewed

@@ -0,0 +1,76 @@
+--- !ruby/object:Gem::Specification
+name: ruby-feedparser
+version: !ruby/object:Gem::Version
+  version: "0.7"
+platform: ruby
+authors: []
+autorequire: feedparser
+bindir: bin
+cert_chain: []
+date: 2009-07-27 00:00:00 +02:00
+default_executable:
+dependencies: []
+description: Ruby library to parse ATOM and RSS feeds
+email:
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- ChangeLog
+- README
+- COPYING
+- LICENSE
+- setup.rb
+- Rakefile
+- lib/feedparser/text-output.rb
+- lib/feedparser/filesizes.rb
+- lib/feedparser/html-output.rb
+- lib/feedparser/rexml_patch.rb
+- lib/feedparser/html2text-parser.rb
+- lib/feedparser/textconverters.rb
+- lib/feedparser/feedparser.rb
+- lib/feedparser/sgml-parser.rb
+- lib/feedparser.rb
+- test/tc_feed_parse.rb
+- test/tc_textoutput.rb
+- test/tc_htmloutput.rb
+- test/tc_textwrappedoutput.rb
+- test/ts_feedparser.rb
+- test/tc_parser.rb
+- tools/doctoweb.bash
+has_rdoc: true
+homepage:
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements:
+- none
+rubyforge_project:
+rubygems_version: 1.3.4
+signing_key:
+specification_version: 3
+summary: Ruby library to parse ATOM and RSS feeds
+test_files: []