RubyGems - penso-feedparser - Versions diffs - 0.8 - Mend

penso-feedparser 0.8

Files changed (23) hide show

data/COPYING +340 -0
data/ChangeLog +59 -0
data/LICENSE +60 -0
data/README +14 -0
data/Rakefile +85 -0
data/lib/feedparser.rb +28 -0
data/lib/feedparser/feedparser.rb +372 -0
data/lib/feedparser/filesizes.rb +14 -0
data/lib/feedparser/html-output.rb +126 -0
data/lib/feedparser/html2text-parser.rb +413 -0
data/lib/feedparser/rexml_patch.rb +28 -0
data/lib/feedparser/sgml-parser.rb +332 -0
data/lib/feedparser/text-output.rb +108 -0
data/lib/feedparser/textconverters.rb +120 -0
data/setup.rb +1586 -0
data/test/tc_feed_parse.rb +117 -0
data/test/tc_htmloutput.rb +52 -0
data/test/tc_parser.rb +48 -0
data/test/tc_textoutput.rb +48 -0
data/test/tc_textwrappedoutput.rb +48 -0
data/test/ts_feedparser.rb +12 -0
data/tools/doctoweb.bash +30 -0
metadata +85 -0

data/test/tc_feed_parse.rb ADDED Viewed

@@ -0,0 +1,117 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+require 'test/unit'
+require 'feedparser'
+# This class includes some basic tests of the parser. More detailed test is
+# made by tc_parser.rb
+class FeedParserTest < Test::Unit::TestCase
+  # From http://my.netscape.com/publish/formats/rss-spec-0.91.html
+  def test_parse_rss091_1
+    ch = FeedParser::Feed::new <<-EOF
+<?xml version="1.0"?>
+<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
+<rss version="0.91">
+  <channel>
+    <language>en</language>
+    <description>News and commentary from the cross-platform scripting community.</description>
+    <link>http://www.scripting.com/</link>
+    <title>Scripting News</title>
+    <image>
+      <link>http://www.scripting.com/</link>
+      <title>Scripting News</title>
+      <url>http://www.scripting.com/gifs/tinyScriptingNews.gif</url>
+    </image>
+  </channel>
+</rss>
+    EOF
+    assert_equal('Scripting News', ch.title)
+    assert_equal('http://www.scripting.com/', ch.link)
+    assert_equal('News and commentary from the cross-platform scripting community.', ch.description)
+    assert_equal([], ch.items)
+  end
+  def test_parse_rss091_complete
+    ch = FeedParser::Feed::new <<-EOF
+<?xml version="1.0"?>
+<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
+<rss version="0.91">
+<channel>
+<copyright>Copyright 1997-1999 UserLand Software, Inc.</copyright>
+<pubDate>Thu, 08 Jul 1999 07:00:00 GMT</pubDate>
+<lastBuildDate>Thu, 08 Jul 1999 16:20:26 GMT</lastBuildDate>
+<docs>http://my.userland.com/stories/storyReader$11</docs>
+<description>News and commentary from the cross-platform scripting community.</description>
+<link>http://www.scripting.com/</link>
+<title>Scripting News</title>
+<image>
+  <link>http://www.scripting.com/</link>
+  <title>Scripting News</title>
+  <url>http://www.scripting.com/gifs/tinyScriptingNews.gif</url>
+  <height>40</height>
+  <width>78</width>
+  <description>What is this used for?</description>
+</image>
+<managingEditor>dave@userland.com (Dave Winer)</managingEditor>
+<webMaster>dave@userland.com (Dave Winer)</webMaster>
+<language>en-us</language>
+<skipHours>
+  <hour>6</hour><hour>7</hour><hour>8</hour><hour>9</hour><hour>10</hour><hour>11</hour>
+</skipHours>
+<skipDays>
+  <day>Sunday</day>
+</skipDays>
+<rating>(PICS-1.1 "http://www.rsac.org/ratingsv01.html" l gen true comment "RSACi North America Server" for "http://www.rsac.org" on "1996.04.16T08:15-0500" r (n 0 s 0 v 0 l 0))</rating>
+<item>
+  <title>stuff</title>
+  <link>http://bar</link>
+  <description>This is an article about some stuff</description>
+</item>
+<item>
+  <title>second item's title</title>
+  <link>http://link2</link>
+  <description>aa bb cc
+  dd ee ff</description>
+</item>
+<textinput>
+  <title>Search Now!</title>
+  <description>Enter your search &lt;terms&gt;</description>
+  <name>find</name>
+  <link>http://my.site.com/search.cgi</link>
+  </textinput>
+</channel>
+</rss>
+    EOF
+    assert_equal('Scripting News', ch.title)
+    assert_equal('http://www.scripting.com/', ch.link)
+    assert_equal('News and commentary from the cross-platform scripting community.', ch.description)
+    assert_equal(2, ch.items.length)
+    assert_equal('http://bar', ch.items[0].link)
+    assert_equal('<p>This is an article about some stuff</p>', ch.items[0].content)
+    assert_equal('stuff', ch.items[0].title)
+    assert_equal('http://link2', ch.items[1].link)
+    assert_equal("<p>aa bb cc\n  dd ee ff</p>", ch.items[1].content)
+    assert_equal('second item\'s title', ch.items[1].title)
+  end
+  def test_enclosures
+    ch = FeedParser::Feed::new <<-EOF
+<?xml version="1.0"?>
+<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
+<rss version="0.91">
+<channel>
+<item>
+  <enclosure url="url1" length="1" type="type1"/>
+  <enclosure url="url2" type="type2"/>
+  <enclosure length="3" type="type3"/>
+  <enclosure url="url1" length="1"/>
+</item>
+</channel>
+</rss>
+    EOF
+    # the third one should be removed because an enclosure should have an url, or it's useless.
+    assert_equal([["url1", "1", "type1"], ["url2", nil, "type2"], ["url1", "1", nil]], ch.items[0].enclosures)
+  end
+end

data/test/tc_htmloutput.rb ADDED Viewed

@@ -0,0 +1,52 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+$:.unshift File.join(File.dirname(__FILE__), '..', 'test')
+$:.unshift File.join(File.dirname(__FILE__), 'lib')
+$:.unshift File.join(File.dirname(__FILE__), 'test')
+require 'test/unit'
+require 'feedparser'
+require 'feedparser/html-output'
+class HTMLOutputTest < Test::Unit::TestCase
+  if File::directory?('test/source')
+    SRCDIR = 'test/source'
+    DSTDIR = 'test/html_output'
+  elsif File::directory?('source')
+    SRCDIR = 'source'
+    DSTDIR = 'html_output'
+  else
+    raise 'source directory not found.'
+  end
+  def test_parser
+    allok = true
+    Dir.foreach(SRCDIR) do |f|
+      next if f !~ /.xml$/
+      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
+      puts "Checking #{f}"
+      str = File::read(SRCDIR + '/' + f)
+      chan = FeedParser::Feed::new(str)
+      chanstr = chan.to_html(false)
+      if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        if output != chanstr
+          File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
+            fd.print(chanstr)
+          end
+          puts "Test failed for #{f}."
+          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
+          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
+          allok = false
+        end
+      else
+        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
+        File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
+          f.print(chanstr)
+        end
+        allok = false
+      end
+    end
+    assert(allok)
+  end
+end

data/test/tc_parser.rb ADDED Viewed

@@ -0,0 +1,48 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+require 'test/unit'
+require 'feedparser'
+class ParserTest < Test::Unit::TestCase
+  if File::directory?('test/source')
+    SRCDIR = 'test/source'
+    DSTDIR = 'test/parser_output'
+  elsif File::directory?('source')
+    SRCDIR = 'source'
+    DSTDIR = 'parser_output'
+  else
+    raise 'source directory not found.'
+  end
+  def test_parser
+    allok = true
+    Dir.foreach(SRCDIR) do |f|
+      next if f !~ /.xml$/
+      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
+      puts "Checking #{f}"
+      str = File::read(SRCDIR + '/' + f)
+      chan = FeedParser::Feed::new(str)
+      chanstr = chan.to_s(false)
+      if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        if output != chanstr
+          File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
+            fd.print(chanstr)
+          end
+          puts "Test failed for #{f}."
+          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
+          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
+          allok = false
+        end
+      else
+        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
+        File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
+          f.print(chanstr)
+        end
+        allok = false
+      end
+    end
+    assert(allok)
+  end
+end

data/test/tc_textoutput.rb ADDED Viewed

@@ -0,0 +1,48 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+require 'test/unit'
+require 'feedparser'
+class TextOutputTest < Test::Unit::TestCase
+  if File::directory?('test/source')
+    SRCDIR = 'test/source'
+    DSTDIR = 'test/text_output'
+  elsif File::directory?('source')
+    SRCDIR = 'source'
+    DSTDIR = 'text_output'
+  else
+    raise 'source directory not found.'
+  end
+  def test_parser
+    allok = true
+    Dir.foreach(SRCDIR) do |f|
+      next if f !~ /.xml$/
+      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
+      puts "Checking #{f}"
+      str = File::read(SRCDIR + '/' + f)
+      chan = FeedParser::Feed::new(str)
+      chanstr = chan.to_text(false) # localtime set to false
+      if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        if output != chanstr
+          File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
+            fd.print(chanstr)
+          end
+          puts "Test failed for #{f}."
+          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
+          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
+          allok = false
+        end
+      else
+        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
+        File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
+          f.print(chanstr)
+        end
+        allok = false
+      end
+    end
+    assert(allok)
+  end
+end

data/test/tc_textwrappedoutput.rb ADDED Viewed

@@ -0,0 +1,48 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+require 'test/unit'
+require 'feedparser'
+class TextWrappedOutputTest < Test::Unit::TestCase
+  if File::directory?('test/source')
+    SRCDIR = 'test/source'
+    DSTDIR = 'test/textwrapped_output'
+  elsif File::directory?('source')
+    SRCDIR = 'source'
+    DSTDIR = 'textwrapped_output'
+  else
+    raise 'source directory not found.'
+  end
+  def test_parser
+    allok = true
+    Dir.foreach(SRCDIR) do |f|
+      next if f !~ /.xml$/
+      next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
+      puts "Checking #{f}"
+      str = File::read(SRCDIR + '/' + f)
+      chan = FeedParser::Feed::new(str)
+      chanstr = chan.to_text(false, 72) # localtime set to false
+      if File::exist?(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        output = File::read(DSTDIR + '/' + f.gsub(/.xml$/, '.output'))
+        if output != chanstr
+          File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
+            fd.print(chanstr)
+          end
+          puts "Test failed for #{f}."
+          puts "  Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
+          puts "  Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
+          allok = false
+        end
+      else
+        puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
+        File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
+          f.print(chanstr)
+        end
+        allok = false
+      end
+    end
+    assert(allok)
+  end
+end

data/test/ts_feedparser.rb ADDED Viewed

@@ -0,0 +1,12 @@
+#!/usr/bin/ruby -w
+$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
+$:.unshift File.join(File.dirname(__FILE__), '..', 'test')
+$:.unshift File.join(File.dirname(__FILE__), 'lib')
+$:.unshift File.join(File.dirname(__FILE__), 'test')
+require 'tc_feed_parse'
+require 'tc_htmloutput'
+require 'tc_parser'
+require 'tc_textoutput'
+require 'tc_textwrappedoutput'

data/tools/doctoweb.bash ADDED Viewed

@@ -0,0 +1,30 @@
+#!/bin/bash
+if [ -z $CVSDIR ]; then
+	CVSDIR=$HOME/dev/ruby-feedparser/website
+fi
+TARGET=$CVSDIR/rdoc
+echo "Copying rdoc documentation to $TARGET."
+if [ ! -d $TARGET ]; then
+	echo "$TARGET doesn't exist, exiting."
+	exit 1
+fi
+rsync -a rdoc/ $TARGET/
+echo "###########################################################"
+echo "CVS status :"
+cd $TARGET
+svn st
+echo "CVS Adding files."
+while [ $(svn st | grep "^? " | wc -l) -gt 0 ]; do
+	svn add $(svn st | grep "^? " | awk '{print $2}')
+done
+echo "###########################################################"
+echo "CVS status after adding missing files:"
+svn st
+echo "Commit changes now with"
+echo "# (cd $TARGET && svn commit -m \"rdoc update\")"
+exit 0

metadata ADDED Viewed

@@ -0,0 +1,85 @@
+--- !ruby/object:Gem::Specification
+name: penso-feedparser
+version: !ruby/object:Gem::Version
+  hash: 27
+  prerelease:
+  segments:
+  - 0
+  - 8
+  version: "0.8"
+platform: ruby
+authors:
+- gna, penso
+autorequire: feedparser
+bindir: bin
+cert_chain: []
+date: 2011-12-20 00:00:00 Z
+dependencies: []
+description: Ruby library to parse ATOM and RSS feeds
+email:
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- ChangeLog
+- README
+- COPYING
+- LICENSE
+- setup.rb
+- Rakefile
+- lib/feedparser.rb
+- lib/feedparser/filesizes.rb
+- lib/feedparser/sgml-parser.rb
+- lib/feedparser/html2text-parser.rb
+- lib/feedparser/rexml_patch.rb
+- lib/feedparser/feedparser.rb
+- lib/feedparser/html-output.rb
+- lib/feedparser/text-output.rb
+- lib/feedparser/textconverters.rb
+- test/tc_feed_parse.rb
+- test/tc_textwrappedoutput.rb
+- test/tc_htmloutput.rb
+- test/ts_feedparser.rb
+- test/tc_textoutput.rb
+- test/tc_parser.rb
+- tools/doctoweb.bash
+homepage:
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+requirements:
+- none
+rubyforge_project:
+rubygems_version: 1.8.6
+signing_key:
+specification_version: 3
+summary: Ruby library to parse ATOM and RSS feeds
+test_files: []