RubyGems - rfeedfinder - Versions diffs - 0.9.0 - Mend

rfeedfinder 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

data/History.txt +4 -0
data/License.txt +20 -0
data/Manifest.txt +16 -0
data/README.txt +3 -0
data/Rakefile +131 -0
data/lib/rfeedfinder/version.rb +9 -0
data/lib/rfeedfinder.rb +232 -0
data/scripts/txt2html +67 -0
data/setup.rb +1585 -0
data/test/test_helper.rb +10 -0
data/test/test_rfeedfinder.rb +102 -0
data/website/index.html +150 -0
data/website/index.txt +45 -0
data/website/javascripts/rounded_corners_lite.inc.js +285 -0
data/website/stylesheets/screen.css +138 -0
data/website/template.rhtml +48 -0
metadata +67 -0

data/History.txt ADDED Viewed

@@ -0,0 +1,4 @@
+== 0.0.1 2007-08-08
+* 1 major enhancement:
+  * Initial release

data/License.txt ADDED Viewed

@@ -0,0 +1,20 @@
+Copyright (c) 2007 FIXME full name
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/Manifest.txt ADDED Viewed

@@ -0,0 +1,16 @@
+History.txt
+License.txt
+Manifest.txt
+README.txt
+Rakefile
+lib/rfeedfinder.rb
+lib/rfeedfinder/version.rb
+scripts/txt2html
+setup.rb
+test/test_helper.rb
+test/test_rfeedfinder.rb
+website/index.html
+website/index.txt
+website/javascripts/rounded_corners_lite.inc.js
+website/stylesheets/screen.css
+website/template.rhtml

data/README.txt ADDED Viewed

@@ -0,0 +1,3 @@
+README for rfeedfinder
+======================

data/Rakefile ADDED Viewed

@@ -0,0 +1,131 @@
+require 'rubygems'
+require 'rake'
+require 'rake/clean'
+require 'rake/testtask'
+require 'rake/packagetask'
+require 'rake/gempackagetask'
+require 'rake/rdoctask'
+require 'rake/contrib/rubyforgepublisher'
+require 'fileutils'
+require 'hoe'
+include FileUtils
+require File.join(File.dirname(__FILE__), 'lib', 'rfeedfinder', 'version')
+AUTHOR = 'Alexandre Girard'  # can also be an array of Authors
+EMAIL = "alx.girard@gmail.com"
+DESCRIPTION = "rFeedFinder uses RSS autodiscovery, Atom autodiscovery, spidering, URL correction, and Web service queries -- whatever it takes -- to find the feed."
+GEM_NAME = 'rfeedfinder' # what ppl will type to install your gem
+@config_file = "~/.rubyforge/user-config.yml"
+@config = nil
+def rubyforge_username
+  unless @config
+    begin
+      @config = YAML.load(File.read(File.expand_path(@config_file)))
+    rescue
+      puts <<-EOS
+ERROR: No rubyforge config file found: #{@config_file}"
+Run 'rubyforge setup' to prepare your env for access to Rubyforge
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
+      EOS
+      exit
+    end
+  end
+  @rubyforge_username ||= @config["username"]
+end
+RUBYFORGE_PROJECT = 'rfeedfinder' # The unix name for your project
+HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
+DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
+NAME = "rfeedfinder"
+REV = nil
+# UNCOMMENT IF REQUIRED:
+# REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
+VERS = Rfeedfinder::VERSION::STRING + (REV ? ".#{REV}" : "")
+CLEAN.include ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']
+RDOC_OPTS = ['--quiet', '--title', 'rfeedfinder documentation',
+    "--opname", "index.html",
+    "--line-numbers",
+    "--main", "README",
+    "--inline-source"]
+class Hoe
+  def extra_deps
+    @extra_deps.reject { |x| Array(x).first == 'hoe' }
+  end
+end
+# Generate all the Rake tasks
+# Run 'rake -T' to see list of generated tasks (from gem root directory)
+hoe = Hoe.new(GEM_NAME, VERS) do |p|
+  p.author = AUTHOR
+  p.description = DESCRIPTION
+  p.email = EMAIL
+  p.summary = DESCRIPTION
+  p.url = HOMEPATH
+  p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
+  p.test_globs = ["test/**/test_*.rb"]
+  p.clean_globs |= CLEAN  #An array of file patterns to delete on clean.
+  # == Optional
+  p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
+  #p.extra_deps = []     # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
+  #p.spec_extras = {}    # A hash of extra values to set in the gemspec.
+end
+CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\n\n")
+PATH    = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
+hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
+desc 'Generate website files'
+task :website_generate do
+  Dir['website/**/*.txt'].each do |txt|
+    sh %{ ruby scripts/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
+  end
+end
+desc 'Upload website files to rubyforge'
+task :website_upload do
+  host = "#{rubyforge_username}@rubyforge.org"
+  remote_dir = "/var/www/gforge-projects/#{PATH}/"
+  local_dir = 'website'
+  sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
+end
+desc 'Generate and upload website files'
+task :website => [:website_generate, :website_upload, :publish_docs]
+desc 'Release the website and new gem version'
+task :deploy => [:check_version, :website, :release] do
+  puts "Remember to create SVN tag:"
+  puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
+    "svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
+  puts "Suggested comment:"
+  puts "Tagging release #{CHANGES}"
+end
+desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
+task :local_deploy => [:website_generate, :install_gem]
+task :check_version do
+  unless ENV['VERSION']
+    puts 'Must pass a VERSION=x.y.z release version'
+    exit
+  end
+  unless ENV['VERSION'] == VERS
+    puts "Please update your version.rb to match the release version, currently #{VERS}"
+    exit
+  end
+end
+rule "" do |t|
+  # test:file:method
+  if /test:(.*)(:([^.]+))?$/.match(t.name)
+    arguments = t.name.split(":")[1..-1]
+    test_name = arguments.first
+    run_file_name = "test_rfeedfinder.rb"
+    sh "ruby -Ilib:test test/#{run_file_name} -n /#{test_name}/"
+  end
+end

data/lib/rfeedfinder/version.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module Rfeedfinder #:nodoc:
+  module VERSION #:nodoc:
+    MAJOR = 0
+    MINOR = 9
+    TINY  = 0
+    STRING = [MAJOR, MINOR, TINY].join('.')
+  end
+end

data/lib/rfeedfinder.rb ADDED Viewed

@@ -0,0 +1,232 @@
+require File.dirname(__FILE__) + '/rfeedfinder/version.rb'
+require 'net/http'
+require 'rubygems'
+require 'open-uri'
+require 'hpricot'
+require 'timeout'
+module Rfeedfinder
+  module_function
+  def makeFullURI(uri)
+    uri = uri.strip.sub(/^feed(.*)/, 'http\1').downcase
+    if /^http|https/.match(uri)
+      return uri
+    else
+      return "http://" << uri
+    end
+  end
+  def getLinks(data, baseuri)
+    return searchLinks(data, baseuri, "[@rel=alternate]&[@type=xml]&[@href=http]")
+  end
+  def getALinks(data, baseuri)
+    return searchLinks(data, baseuri, "a")
+  end
+  def getFrameLinks(data, baseuri)
+    links = searchLinks(data, baseuri, "frame")
+    links += searchLinks(data, baseuri, "FRAME")
+    return links
+  end
+  def searchLinks(data, baseuri, regexp)
+    links = []
+    data.search(regexp).map!{|link|
+      if !link.to_s.strip.empty?
+        uri = link[:href].to_s
+        uri = link[:src].to_s if uri.empty?
+        uri = link[:SRC].to_s if uri.empty?
+        if !uri.strip.empty? and uri !~ /^javascript/
+          uri = URI.join(baseuri, uri).to_s if uri =~ /^\//
+          links << uri
+        end
+      end
+    }
+    #links.each{|link| puts "searchLinks: #{link}"}
+    return links.uniq
+  end
+  def getLocalLinks(links, baseuri)
+    locallinks = []
+    links.each do |link|
+      locallinks << URI.join(baseuri, link).to_s if link =~ /^\//
+    end
+    links = links.select{|link| link !~ /^\//} #remove local links from link array
+    return [links, locallinks]
+  end
+  def isFeedLink?(link)
+    return link.downcase =~ /\.rss$|\.rdf$|\.xml$|\.atom$/
+  end
+  def isXMLRelatedLink?(link)
+    return link.downcase =~ /rss|rdf|xml|atom/
+  end
+  def tryBrokenRedirect(data)
+    newuris = (data/:newLocation)
+    if !newuris.empty?
+      return newuris[0].strip
+    end
+  end
+  def isFeedData?(data)
+    # if no html tag and rss, rdf or feed tag, it's a feed
+    return ((data/:html).empty? and (!(data/:rss).nil? or !(data/:rdf).nil? or !(data/:feed).nil?))
+  end
+  def isFeed?(uri)
+    uri.gsub!(/\/\/www\d\./, "//www.")
+    begin
+      protocol = URI.split(uri)
+      return false if !protocol[0].index(/^[http|https]/)
+    rescue
+      # URI error
+      return false
+    end
+    begin
+      html = Net::HTTP.get(URI.parse(uri))
+      data = Hpricot(open(fulluri), :xml => true)
+      return isFeedData?(data)
+    rescue Timeout::Error
+      return false
+    rescue
+      return false
+    end
+  end
+  def getFeedsFromSyndic8(uri)
+    feeds = []
+    begin
+      server = Syndic8.new
+      feedids = server.find_feeds(uri)
+      infolist = server.feed_info(feedids, ['headlines_rank','status','dataurl'])
+      infolist.sort_by{|feedInfo| feedInfo[:headlines_rank]}
+      infolist.each do |feed|
+        feeds << feed[:dataurl] if feed[:status]=='Syndicated'
+      end
+    rescue
+    end
+    return feeds
+  end
+  def feeds(uri, all=false, querySyndic8=false, _recurs=nil)
+    _recurs = [uri] if _recurs.nil?
+    fulluri = makeFullURI(uri)
+    begin
+      html = Net::HTTP.get(URI.parse(fulluri))
+      data = Hpricot(open(fulluri), :xml => true)
+    rescue Timeout::Error
+      return []
+    rescue => err
+      puts "Error while opening #{fulluri} with Hpricot: " << $!
+      return []
+    end
+    # is this already a feed?
+    return [fulluri] if isFeedData?(data)
+    #verify redirection
+    newuri = tryBrokenRedirect(data)
+    if !newuri.nil? and !newuri.empty?
+      unless _recurs.include?(newuri)
+        _recurs << newuri
+        return feeds(newuri, all=all, querySyndic8=querySyndic8, _recurs=_recurs)
+      end
+    end
+    #verify frameset
+    frames = getFrameLinks(data, fulluri)
+    frames.each {|newuri|
+      if !newuri.nil? and !newuri.empty?
+        unless _recurs.include?(newuri)
+          _recurs << newuri
+          return feeds(newuri, all=all, querySyndic8=querySyndic8, _recurs=_recurs)
+        end
+      end
+    }
+    # nope, it's a page, try LINK tags first
+    outfeeds = getLinks(data, fulluri)
+    outfeeds.select {|link| isFeed?(link)}
+    #_debuglog('found %s feeds through LINK tags' % len(outfeeds))
+    if outfeeds.empty?
+      # no LINK tags, look for regular <A> links that point to feeds
+      begin
+        links = getALinks(data, fulluri)
+      rescue
+        links = []
+      end
+      # Get local links
+      links, locallinks = getLocalLinks(links, fulluri)
+      # look for obvious feed links on the same server
+      selected_feeds = locallinks.select{|link| isFeedLink?(link) and isFeed?(link)}
+      outfeeds << selected_feeds unless selected_feeds.empty?
+      # outfeeds.each{|link| puts "1 #{link}"}
+      # look harder for feed links on the same server
+      selected_feeds = locallinks.select{|link| isXMLRelatedLink?(link) and isFeed?(link)} if outfeeds.empty?
+      outfeeds << selected_feeds unless selected_feeds.empty?
+      # outfeeds.each{|link| puts "2 #{link}"}
+      # look for obvious feed links on another server
+      selected_feeds = links.select {|link| isFeedLink?(link) and isFeed?(link)} if outfeeds.empty?
+      outfeeds << selected_feeds unless selected_feeds.empty?
+      # outfeeds.each{|link| puts "3 #{link}"}
+      # look harder for feed links on another server
+      selected_feeds = links.select {|link| isXMLRelatedLink?(link) and isFeed?(link)} if outfeeds.empty?
+      outfeeds << selected_feeds unless selected_feeds.empty?
+      # outfeeds.each{|link| puts "4 #{link}"}
+    end
+    if outfeeds.empty?
+      # no A tags, guessing
+      # filenames used by popular software:
+      guesses = ['atom.xml', # blogger, TypePad
+        'feed/', # wordpress
+        'feeds/posts/default', # blogspot
+        'feed/main/rss20', # fotolog
+        'index.atom', # MT, apparently
+        'index.rdf', # MT
+        'rss.xml', # Dave Winer/Manila
+        'index.xml', # MT
+        'index.rss'] # Slash
+      guesses.each { |guess|
+        uri = URI.join(fulluri, guess).to_s
+        outfeeds << uri if isFeed?(uri)
+      }
+    end
+    # try with adding ending slash
+    if outfeeds.empty? and fulluri !~ /\/$/
+      outfeeds = feeds(fulluri + "/", all=all, querySyndic8=querySyndic8, _recurs=_recurs)
+    end
+    # still no luck, search Syndic8 for feeds (requires xmlrpclib)
+    #_debuglog('still no luck, searching Syndic8')
+    outfeeds << getFeedsFromSyndic8(uri) if querySyndic8 and outfeeds.empty?
+    #outfeeds = list(set(outfeeds)) if hasattr(__builtins__, 'set') or __builtins__.has_key('set')
+    return outfeeds.flatten
+  end
+  def feed(uri)
+    #todo: give preference to certain feed formats
+    feedlist = feeds(uri)
+    unless feedlist.empty?
+      return feedlist[0]
+    else
+      return nil
+    end
+  end
+end
+require 'rfeedfinder/version'

data/scripts/txt2html ADDED Viewed

@@ -0,0 +1,67 @@
+#!/usr/bin/env ruby
+require 'rubygems'
+require 'redcloth'
+require 'syntax/convertors/html'
+require 'erb'
+require File.dirname(__FILE__) + '/../lib/rfeedfinder/version.rb'
+version  = Rfeedfinder::VERSION::STRING
+download = 'http://rubyforge.org/projects/rfeedfinder'
+class Fixnum
+  def ordinal
+    # teens
+    return 'th' if (10..19).include?(self % 100)
+    # others
+    case self % 10
+    when 1: return 'st'
+    when 2: return 'nd'
+    when 3: return 'rd'
+    else    return 'th'
+    end
+  end
+end
+class Time
+  def pretty
+    return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
+  end
+end
+def convert_syntax(syntax, source)
+  return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
+end
+if ARGV.length >= 1
+  src, template = ARGV
+  template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
+else
+  puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
+  exit!
+end
+template = ERB.new(File.open(template).read)
+title = nil
+body = nil
+File.open(src) do |fsrc|
+  title_text = fsrc.readline
+  body_text = fsrc.read
+  syntax_items = []
+  body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
+    ident = syntax_items.length
+    element, syntax, source = $1, $2, $3
+    syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
+    "syntax-temp-#{ident}"
+  }
+  title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
+  body = RedCloth.new(body_text).to_html
+  body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
+end
+stat = File.stat(src)
+created = stat.ctime
+modified = stat.mtime
+$stdout << template.result(binding)