RubyGems - rfeedfinder - Versions diffs - 0.9.0 - Mend

rfeedfinder 0.9.0

Files changed (17) hide show

data/History.txt +4 -0
data/License.txt +20 -0
data/Manifest.txt +16 -0
data/README.txt +3 -0
data/Rakefile +131 -0
data/lib/rfeedfinder/version.rb +9 -0
data/lib/rfeedfinder.rb +232 -0
data/scripts/txt2html +67 -0
data/setup.rb +1585 -0
data/test/test_helper.rb +10 -0
data/test/test_rfeedfinder.rb +102 -0
data/website/index.html +150 -0
data/website/index.txt +45 -0
data/website/javascripts/rounded_corners_lite.inc.js +285 -0
data/website/stylesheets/screen.css +138 -0
data/website/template.rhtml +48 -0
metadata +67 -0

data/History.txt ADDED Viewed

@@ -0,0 +1,4 @@
+== 0.0.1 2007-08-08
+* 1 major enhancement:
+  * Initial release

data/License.txt ADDED Viewed

@@ -0,0 +1,20 @@
+Copyright (c) 2007 FIXME full name
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/Manifest.txt ADDED Viewed

@@ -0,0 +1,16 @@
+History.txt
+License.txt
+Manifest.txt
+README.txt
+Rakefile
+lib/rfeedfinder.rb
+lib/rfeedfinder/version.rb
+scripts/txt2html
+setup.rb
+test/test_helper.rb
+test/test_rfeedfinder.rb
+website/index.html
+website/index.txt
+website/javascripts/rounded_corners_lite.inc.js
+website/stylesheets/screen.css
+website/template.rhtml

data/README.txt ADDED Viewed

@@ -0,0 +1,3 @@
+README for rfeedfinder
+======================

data/Rakefile ADDED Viewed

@@ -0,0 +1,131 @@
+require 'rubygems'
+require 'rake'
+require 'rake/clean'
+require 'rake/testtask'
+require 'rake/packagetask'
+require 'rake/gempackagetask'
+require 'rake/rdoctask'
+require 'rake/contrib/rubyforgepublisher'
+require 'fileutils'
+require 'hoe'
+include FileUtils
+require File.join(File.dirname(__FILE__), 'lib', 'rfeedfinder', 'version')
+AUTHOR = 'Alexandre Girard'  # can also be an array of Authors
+EMAIL = "alx.girard@gmail.com"
+DESCRIPTION = "rFeedFinder uses RSS autodiscovery, Atom autodiscovery, spidering, URL correction, and Web service queries -- whatever it takes -- to find the feed."
+GEM_NAME = 'rfeedfinder' # what ppl will type to install your gem
+@config_file = "~/.rubyforge/user-config.yml"
+@config = nil
+def rubyforge_username
+  unless @config
+    begin
+      @config = YAML.load(File.read(File.expand_path(@config_file)))
+    rescue
+      puts <<-EOS
+ERROR: No rubyforge config file found: #{@config_file}"
+Run 'rubyforge setup' to prepare your env for access to Rubyforge
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
+      EOS
+      exit
+    end
+  end
+  @rubyforge_username ||= @config["username"]
+end
+RUBYFORGE_PROJECT = 'rfeedfinder' # The unix name for your project
+HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
+DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
+NAME = "rfeedfinder"
+REV = nil
+# UNCOMMENT IF REQUIRED:
+# REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
+VERS = Rfeedfinder::VERSION::STRING + (REV ? ".#{REV}" : "")
+CLEAN.include ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']
+RDOC_OPTS = ['--quiet', '--title', 'rfeedfinder documentation',
+    "--opname", "index.html",
+    "--line-numbers",
+    "--main", "README",
+    "--inline-source"]
+class Hoe
+  def extra_deps
+    @extra_deps.reject { |x| Array(x).first == 'hoe' }
+  end
+end
+# Generate all the Rake tasks
+# Run 'rake -T' to see list of generated tasks (from gem root directory)
+hoe = Hoe.new(GEM_NAME, VERS) do |p|
+  p.author = AUTHOR
+  p.description = DESCRIPTION
+  p.email = EMAIL
+  p.summary = DESCRIPTION
+  p.url = HOMEPATH
+  p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
+  p.test_globs = ["test/**/test_*.rb"]
+  p.clean_globs |= CLEAN  #An array of file patterns to delete on clean.
+  # == Optional
+  p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
+  #p.extra_deps = []     # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
+  #p.spec_extras = {}    # A hash of extra values to set in the gemspec.
+end
+CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\n\n")
+PATH    = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
+hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
+desc 'Generate website files'
+task :website_generate do
+  Dir['website/**/*.txt'].each do |txt|
+    sh %{ ruby scripts/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
+  end
+end
+desc 'Upload website files to rubyforge'
+task :website_upload do
+  host = "#{rubyforge_username}@rubyforge.org"
+  remote_dir = "/var/www/gforge-projects/#{PATH}/"
+  local_dir = 'website'
+  sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
+end
+desc 'Generate and upload website files'
+task :website => [:website_generate, :website_upload, :publish_docs]
+desc 'Release the website and new gem version'
+task :deploy => [:check_version, :website, :release] do
+  puts "Remember to create SVN tag:"
+  puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
+    "svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
+  puts "Suggested comment:"
+  puts "Tagging release #{CHANGES}"
+end
+desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
+task :local_deploy => [:website_generate, :install_gem]
+task :check_version do
+  unless ENV['VERSION']
+    puts 'Must pass a VERSION=x.y.z release version'
+    exit
+  end
+  unless ENV['VERSION'] == VERS
+    puts "Please update your version.rb to match the release version, currently #{VERS}"
+    exit
+  end
+end
+rule "" do |t|
+  # test:file:method
+  if /test:(.*)(:([^.]+))?$/.match(t.name)
+    arguments = t.name.split(":")[1..-1]
+    test_name = arguments.first
+    run_file_name = "test_rfeedfinder.rb"
+    sh "ruby -Ilib:test test/#{run_file_name} -n /#{test_name}/"
+  end
+end

data/lib/rfeedfinder/version.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module Rfeedfinder #:nodoc:
+  module VERSION #:nodoc:
+    MAJOR = 0
+    MINOR = 9
+    TINY  = 0
+    STRING = [MAJOR, MINOR, TINY].join('.')
+  end
+end

data/lib/rfeedfinder.rb ADDED Viewed

@@ -0,0 +1,232 @@
+require File.dirname(__FILE__) + '/rfeedfinder/version.rb'
+require 'net/http'
+require 'rubygems'
+require 'open-uri'
+require 'hpricot'
+require 'timeout'
+module Rfeedfinder
+  module_function
+  def makeFullURI(uri)
+    uri = uri.strip.sub(/^feed(.*)/, 'http\1').downcase
+    if /^http|https/.match(uri)
+      return uri
+    else
+      return "http://" << uri
+    end
+  end
+  def getLinks(data, baseuri)
+    return searchLinks(data, baseuri, "[@rel=alternate]&[@type=xml]&[@href=http]")
+  end
+  def getALinks(data, baseuri)
+    return searchLinks(data, baseuri, "a")
+  end
+  def getFrameLinks(data, baseuri)
+    links = searchLinks(data, baseuri, "frame")
+    links += searchLinks(data, baseuri, "FRAME")
+    return links
+  end
+  def searchLinks(data, baseuri, regexp)
+    links = []
+    data.search(regexp).map!{|link|
+      if !link.to_s.strip.empty?
+        uri = link[:href].to_s
+        uri = link[:src].to_s if uri.empty?
+        uri = link[:SRC].to_s if uri.empty?
+        if !uri.strip.empty? and uri !~ /^javascript/
+          uri = URI.join(baseuri, uri).to_s if uri =~ /^\//
+          links << uri
+        end
+      end
+    }
+    #links.each{|link| puts "searchLinks: #{link}"}
+    return links.uniq
+  end
+  def getLocalLinks(links, baseuri)
+    locallinks = []
+    links.each do |link|
+      locallinks << URI.join(baseuri, link).to_s if link =~ /^\//
+    end
+    links = links.select{|link| link !~ /^\//} #remove local links from link array
+    return [links, locallinks]
+  end
+  def isFeedLink?(link)
+    return link.downcase =~ /\.rss$|\.rdf$|\.xml$|\.atom$/
+  end
+  def isXMLRelatedLink?(link)
+    return link.downcase =~ /rss|rdf|xml|atom/
+  end
+  def tryBrokenRedirect(data)
+    newuris = (data/:newLocation)
+    if !newuris.empty?
+      return newuris[0].strip
+    end
+  end
+  def isFeedData?(data)
+    # if no html tag and rss, rdf or feed tag, it's a feed
+    return ((data/:html).empty? and (!(data/:rss).nil? or !(data/:rdf).nil? or !(data/:feed).nil?))
+  end
+  def isFeed?(uri)
+    uri.gsub!(/\/\/www\d\./, "//www.")
+    begin
+      protocol = URI.split(uri)
+      return false if !protocol[0].index(/^[http|https]/)
+    rescue
+      # URI error
+      return false
+    end
+    begin
+      html = Net::HTTP.get(URI.parse(uri))
+      data = Hpricot(open(fulluri), :xml => true)
+      return isFeedData?(data)
+    rescue Timeout::Error
+      return false
+    rescue
+      return false
+    end
+  end
+  def getFeedsFromSyndic8(uri)
+    feeds = []
+    begin
+      server = Syndic8.new
+      feedids = server.find_feeds(uri)
+      infolist = server.feed_info(feedids, ['headlines_rank','status','dataurl'])
+      infolist.sort_by{|feedInfo| feedInfo[:headlines_rank]}
+      infolist.each do |feed|
+        feeds << feed[:dataurl] if feed[:status]=='Syndicated'
+      end
+    rescue
+    end
+    return feeds
+  end
+  def feeds(uri, all=false, querySyndic8=false, _recurs=nil)
+    _recurs = [uri] if _recurs.nil?
+    fulluri = makeFullURI(uri)
+    begin
+      html = Net::HTTP.get(URI.parse(fulluri))
+      data = Hpricot(open(fulluri), :xml => true)
+    rescue Timeout::Error
+      return []
+    rescue => err
+      puts "Error while opening #{fulluri} with Hpricot: " << $!
+      return []
+    end
+    # is this already a feed?
+    return [fulluri] if isFeedData?(data)
+    #verify redirection
+    newuri = tryBrokenRedirect(data)
+    if !newuri.nil? and !newuri.empty?
+      unless _recurs.include?(newuri)
+        _recurs << newuri
+        return feeds(newuri, all=all, querySyndic8=querySyndic8, _recurs=_recurs)
+      end
+    end
+    #verify frameset
+    frames = getFrameLinks(data, fulluri)
+    frames.each {|newuri|
+      if !newuri.nil? and !newuri.empty?
+        unless _recurs.include?(newuri)
+          _recurs << newuri
+          return feeds(newuri, all=all, querySyndic8=querySyndic8, _recurs=_recurs)
+        end
+      end
+    }
+    # nope, it's a page, try LINK tags first
+    outfeeds = getLinks(data, fulluri)
+    outfeeds.select {|link| isFeed?(link)}
+    #_debuglog('found %s feeds through LINK tags' % len(outfeeds))
+    if outfeeds.empty?
+      # no LINK tags, look for regular <A> links that point to feeds
+      begin
+        links = getALinks(data, fulluri)
+      rescue
+        links = []
+      end
+      # Get local links
+      links, locallinks = getLocalLinks(links, fulluri)
+      # look for obvious feed links on the same server
+      selected_feeds = locallinks.select{|link| isFeedLink?(link) and isFeed?(link)}
+      outfeeds << selected_feeds unless selected_feeds.empty?
+      # outfeeds.each{|link| puts "1 #{link}"}
+      # look harder for feed links on the same server
+      selected_feeds = locallinks.select{|link| isXMLRelatedLink?(link) and isFeed?(link)} if outfeeds.empty?
+      outfeeds << selected_feeds unless selected_feeds.empty?
+      # outfeeds.each{|link| puts "2 #{link}"}
+      # look for obvious feed links on another server
+      selected_feeds = links.select {|link| isFeedLink?(link) and isFeed?(link)} if outfeeds.empty?
+      outfeeds << selected_feeds unless selected_feeds.empty?
+      # outfeeds.each{|link| puts "3 #{link}"}
+      # look harder for feed links on another server
+      selected_feeds = links.select {|link| isXMLRelatedLink?(link) and isFeed?(link)} if outfeeds.empty?
+      outfeeds << selected_feeds unless selected_feeds.empty?
+      # outfeeds.each{|link| puts "4 #{link}"}
+    end
+    if outfeeds.empty?
+      # no A tags, guessing
+      # filenames used by popular software:
+      guesses = ['atom.xml', # blogger, TypePad
+        'feed/', # wordpress
+        'feeds/posts/default', # blogspot
+        'feed/main/rss20', # fotolog
+        'index.atom', # MT, apparently
+        'index.rdf', # MT
+        'rss.xml', # Dave Winer/Manila
+        'index.xml', # MT
+        'index.rss'] # Slash
+      guesses.each { |guess|
+        uri = URI.join(fulluri, guess).to_s
+        outfeeds << uri if isFeed?(uri)
+      }
+    end
+    # try with adding ending slash
+    if outfeeds.empty? and fulluri !~ /\/$/
+      outfeeds = feeds(fulluri + "/", all=all, querySyndic8=querySyndic8, _recurs=_recurs)
+    end
+    # still no luck, search Syndic8 for feeds (requires xmlrpclib)
+    #_debuglog('still no luck, searching Syndic8')
+    outfeeds << getFeedsFromSyndic8(uri) if querySyndic8 and outfeeds.empty?
+    #outfeeds = list(set(outfeeds)) if hasattr(__builtins__, 'set') or __builtins__.has_key('set')
+    return outfeeds.flatten
+  end
+  def feed(uri)
+    #todo: give preference to certain feed formats
+    feedlist = feeds(uri)
+    unless feedlist.empty?
+      return feedlist[0]
+    else
+      return nil
+    end
+  end
+end
+require 'rfeedfinder/version'

data/scripts/txt2html ADDED Viewed

@@ -0,0 +1,67 @@
+#!/usr/bin/env ruby
+require 'rubygems'
+require 'redcloth'
+require 'syntax/convertors/html'
+require 'erb'
+require File.dirname(__FILE__) + '/../lib/rfeedfinder/version.rb'
+version  = Rfeedfinder::VERSION::STRING
+download = 'http://rubyforge.org/projects/rfeedfinder'
+class Fixnum
+  def ordinal
+    # teens
+    return 'th' if (10..19).include?(self % 100)
+    # others
+    case self % 10
+    when 1: return 'st'
+    when 2: return 'nd'
+    when 3: return 'rd'
+    else    return 'th'
+    end
+  end
+end
+class Time
+  def pretty
+    return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
+  end
+end
+def convert_syntax(syntax, source)
+  return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
+end
+if ARGV.length >= 1
+  src, template = ARGV
+  template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
+else
+  puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
+  exit!
+end
+template = ERB.new(File.open(template).read)
+title = nil
+body = nil
+File.open(src) do |fsrc|
+  title_text = fsrc.readline
+  body_text = fsrc.read
+  syntax_items = []
+  body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
+    ident = syntax_items.length
+    element, syntax, source = $1, $2, $3
+    syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
+    "syntax-temp-#{ident}"
+  }
+  title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
+  body = RedCloth.new(body_text).to_html
+  body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
+end
+stat = File.stat(src)
+created = stat.ctime
+modified = stat.mtime
+$stdout << template.result(binding)