rfeedfinder 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ == 0.0.1 2007-08-08
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/License.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2007 FIXME full name
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Manifest.txt ADDED
@@ -0,0 +1,16 @@
1
+ History.txt
2
+ License.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ lib/rfeedfinder.rb
7
+ lib/rfeedfinder/version.rb
8
+ scripts/txt2html
9
+ setup.rb
10
+ test/test_helper.rb
11
+ test/test_rfeedfinder.rb
12
+ website/index.html
13
+ website/index.txt
14
+ website/javascripts/rounded_corners_lite.inc.js
15
+ website/stylesheets/screen.css
16
+ website/template.rhtml
data/README.txt ADDED
@@ -0,0 +1,3 @@
1
+ README for rfeedfinder
2
+ ======================
3
+
data/Rakefile ADDED
@@ -0,0 +1,131 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/testtask'
5
+ require 'rake/packagetask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/rdoctask'
8
+ require 'rake/contrib/rubyforgepublisher'
9
+ require 'fileutils'
10
+ require 'hoe'
11
+ include FileUtils
12
+ require File.join(File.dirname(__FILE__), 'lib', 'rfeedfinder', 'version')
13
+
14
+ AUTHOR = 'Alexandre Girard' # can also be an array of Authors
15
+ EMAIL = "alx.girard@gmail.com"
16
+ DESCRIPTION = "rFeedFinder uses RSS autodiscovery, Atom autodiscovery, spidering, URL correction, and Web service queries -- whatever it takes -- to find the feed."
17
+ GEM_NAME = 'rfeedfinder' # what ppl will type to install your gem
18
+
19
+ @config_file = "~/.rubyforge/user-config.yml"
20
+ @config = nil
21
+ def rubyforge_username
22
+ unless @config
23
+ begin
24
+ @config = YAML.load(File.read(File.expand_path(@config_file)))
25
+ rescue
26
+ puts <<-EOS
27
+ ERROR: No rubyforge config file found: #{@config_file}"
28
+ Run 'rubyforge setup' to prepare your env for access to Rubyforge
29
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
30
+ EOS
31
+ exit
32
+ end
33
+ end
34
+ @rubyforge_username ||= @config["username"]
35
+ end
36
+
37
+ RUBYFORGE_PROJECT = 'rfeedfinder' # The unix name for your project
38
+ HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
39
+ DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
40
+
41
+ NAME = "rfeedfinder"
42
+ REV = nil
43
+ # UNCOMMENT IF REQUIRED:
44
+ # REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
45
+ VERS = Rfeedfinder::VERSION::STRING + (REV ? ".#{REV}" : "")
46
+ CLEAN.include ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']
47
+ RDOC_OPTS = ['--quiet', '--title', 'rfeedfinder documentation',
48
+ "--opname", "index.html",
49
+ "--line-numbers",
50
+ "--main", "README",
51
+ "--inline-source"]
52
+
53
+ class Hoe
54
+ def extra_deps
55
+ @extra_deps.reject { |x| Array(x).first == 'hoe' }
56
+ end
57
+ end
58
+
59
+ # Generate all the Rake tasks
60
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
61
+ hoe = Hoe.new(GEM_NAME, VERS) do |p|
62
+ p.author = AUTHOR
63
+ p.description = DESCRIPTION
64
+ p.email = EMAIL
65
+ p.summary = DESCRIPTION
66
+ p.url = HOMEPATH
67
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
68
+ p.test_globs = ["test/**/test_*.rb"]
69
+ p.clean_globs |= CLEAN #An array of file patterns to delete on clean.
70
+
71
+ # == Optional
72
+ p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
73
+ #p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
74
+ #p.spec_extras = {} # A hash of extra values to set in the gemspec.
75
+ end
76
+
77
+ CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\n\n")
78
+ PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
79
+ hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
80
+
81
+ desc 'Generate website files'
82
+ task :website_generate do
83
+ Dir['website/**/*.txt'].each do |txt|
84
+ sh %{ ruby scripts/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
85
+ end
86
+ end
87
+
88
+ desc 'Upload website files to rubyforge'
89
+ task :website_upload do
90
+ host = "#{rubyforge_username}@rubyforge.org"
91
+ remote_dir = "/var/www/gforge-projects/#{PATH}/"
92
+ local_dir = 'website'
93
+ sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
94
+ end
95
+
96
+ desc 'Generate and upload website files'
97
+ task :website => [:website_generate, :website_upload, :publish_docs]
98
+
99
+ desc 'Release the website and new gem version'
100
+ task :deploy => [:check_version, :website, :release] do
101
+ puts "Remember to create SVN tag:"
102
+ puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
103
+ "svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
104
+ puts "Suggested comment:"
105
+ puts "Tagging release #{CHANGES}"
106
+ end
107
+
108
+ desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
109
+ task :local_deploy => [:website_generate, :install_gem]
110
+
111
+ task :check_version do
112
+ unless ENV['VERSION']
113
+ puts 'Must pass a VERSION=x.y.z release version'
114
+ exit
115
+ end
116
+ unless ENV['VERSION'] == VERS
117
+ puts "Please update your version.rb to match the release version, currently #{VERS}"
118
+ exit
119
+ end
120
+ end
121
+
122
+ rule "" do |t|
123
+ # test:file:method
124
+ if /test:(.*)(:([^.]+))?$/.match(t.name)
125
+ arguments = t.name.split(":")[1..-1]
126
+ test_name = arguments.first
127
+ run_file_name = "test_rfeedfinder.rb"
128
+
129
+ sh "ruby -Ilib:test test/#{run_file_name} -n /#{test_name}/"
130
+ end
131
+ end
@@ -0,0 +1,9 @@
1
+ module Rfeedfinder #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 9
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,232 @@
1
+ require File.dirname(__FILE__) + '/rfeedfinder/version.rb'
2
+ require 'net/http'
3
+ require 'rubygems'
4
+ require 'open-uri'
5
+ require 'hpricot'
6
+ require 'timeout'
7
+
8
+ module Rfeedfinder
9
+
10
+ module_function
11
+
12
+ def makeFullURI(uri)
13
+ uri = uri.strip.sub(/^feed(.*)/, 'http\1').downcase
14
+ if /^http|https/.match(uri)
15
+ return uri
16
+ else
17
+ return "http://" << uri
18
+ end
19
+ end
20
+
21
+ def getLinks(data, baseuri)
22
+ return searchLinks(data, baseuri, "[@rel=alternate]&[@type=xml]&[@href=http]")
23
+ end
24
+
25
+ def getALinks(data, baseuri)
26
+ return searchLinks(data, baseuri, "a")
27
+ end
28
+
29
+ def getFrameLinks(data, baseuri)
30
+ links = searchLinks(data, baseuri, "frame")
31
+ links += searchLinks(data, baseuri, "FRAME")
32
+ return links
33
+ end
34
+
35
+ def searchLinks(data, baseuri, regexp)
36
+ links = []
37
+ data.search(regexp).map!{|link|
38
+ if !link.to_s.strip.empty?
39
+ uri = link[:href].to_s
40
+ uri = link[:src].to_s if uri.empty?
41
+ uri = link[:SRC].to_s if uri.empty?
42
+ if !uri.strip.empty? and uri !~ /^javascript/
43
+ uri = URI.join(baseuri, uri).to_s if uri =~ /^\//
44
+ links << uri
45
+ end
46
+ end
47
+ }
48
+ #links.each{|link| puts "searchLinks: #{link}"}
49
+ return links.uniq
50
+ end
51
+
52
+ def getLocalLinks(links, baseuri)
53
+ locallinks = []
54
+ links.each do |link|
55
+ locallinks << URI.join(baseuri, link).to_s if link =~ /^\//
56
+ end
57
+ links = links.select{|link| link !~ /^\//} #remove local links from link array
58
+ return [links, locallinks]
59
+ end
60
+
61
+ def isFeedLink?(link)
62
+ return link.downcase =~ /\.rss$|\.rdf$|\.xml$|\.atom$/
63
+ end
64
+
65
+ def isXMLRelatedLink?(link)
66
+ return link.downcase =~ /rss|rdf|xml|atom/
67
+ end
68
+
69
+ def tryBrokenRedirect(data)
70
+ newuris = (data/:newLocation)
71
+ if !newuris.empty?
72
+ return newuris[0].strip
73
+ end
74
+ end
75
+
76
+ def isFeedData?(data)
77
+ # if no html tag and rss, rdf or feed tag, it's a feed
78
+ return ((data/:html).empty? and (!(data/:rss).nil? or !(data/:rdf).nil? or !(data/:feed).nil?))
79
+ end
80
+
81
+ def isFeed?(uri)
82
+ uri.gsub!(/\/\/www\d\./, "//www.")
83
+ begin
84
+ protocol = URI.split(uri)
85
+ return false if !protocol[0].index(/^[http|https]/)
86
+ rescue
87
+ # URI error
88
+ return false
89
+ end
90
+ begin
91
+ html = Net::HTTP.get(URI.parse(uri))
92
+ data = Hpricot(open(fulluri), :xml => true)
93
+ return isFeedData?(data)
94
+ rescue Timeout::Error
95
+ return false
96
+ rescue
97
+ return false
98
+ end
99
+ end
100
+
101
+ def getFeedsFromSyndic8(uri)
102
+ feeds = []
103
+ begin
104
+ server = Syndic8.new
105
+ feedids = server.find_feeds(uri)
106
+ infolist = server.feed_info(feedids, ['headlines_rank','status','dataurl'])
107
+ infolist.sort_by{|feedInfo| feedInfo[:headlines_rank]}
108
+ infolist.each do |feed|
109
+ feeds << feed[:dataurl] if feed[:status]=='Syndicated'
110
+ end
111
+ rescue
112
+ end
113
+ return feeds
114
+ end
115
+
116
+ def feeds(uri, all=false, querySyndic8=false, _recurs=nil)
117
+ _recurs = [uri] if _recurs.nil?
118
+ fulluri = makeFullURI(uri)
119
+
120
+ begin
121
+ html = Net::HTTP.get(URI.parse(fulluri))
122
+ data = Hpricot(open(fulluri), :xml => true)
123
+ rescue Timeout::Error
124
+ return []
125
+ rescue => err
126
+ puts "Error while opening #{fulluri} with Hpricot: " << $!
127
+ return []
128
+ end
129
+
130
+ # is this already a feed?
131
+ return [fulluri] if isFeedData?(data)
132
+
133
+ #verify redirection
134
+ newuri = tryBrokenRedirect(data)
135
+ if !newuri.nil? and !newuri.empty?
136
+ unless _recurs.include?(newuri)
137
+ _recurs << newuri
138
+ return feeds(newuri, all=all, querySyndic8=querySyndic8, _recurs=_recurs)
139
+ end
140
+ end
141
+
142
+ #verify frameset
143
+ frames = getFrameLinks(data, fulluri)
144
+ frames.each {|newuri|
145
+ if !newuri.nil? and !newuri.empty?
146
+ unless _recurs.include?(newuri)
147
+ _recurs << newuri
148
+ return feeds(newuri, all=all, querySyndic8=querySyndic8, _recurs=_recurs)
149
+ end
150
+ end
151
+ }
152
+
153
+ # nope, it's a page, try LINK tags first
154
+ outfeeds = getLinks(data, fulluri)
155
+ outfeeds.select {|link| isFeed?(link)}
156
+
157
+ #_debuglog('found %s feeds through LINK tags' % len(outfeeds))
158
+ if outfeeds.empty?
159
+ # no LINK tags, look for regular <A> links that point to feeds
160
+ begin
161
+ links = getALinks(data, fulluri)
162
+ rescue
163
+ links = []
164
+ end
165
+
166
+ # Get local links
167
+ links, locallinks = getLocalLinks(links, fulluri)
168
+
169
+ # look for obvious feed links on the same server
170
+ selected_feeds = locallinks.select{|link| isFeedLink?(link) and isFeed?(link)}
171
+ outfeeds << selected_feeds unless selected_feeds.empty?
172
+ # outfeeds.each{|link| puts "1 #{link}"}
173
+
174
+ # look harder for feed links on the same server
175
+ selected_feeds = locallinks.select{|link| isXMLRelatedLink?(link) and isFeed?(link)} if outfeeds.empty?
176
+ outfeeds << selected_feeds unless selected_feeds.empty?
177
+ # outfeeds.each{|link| puts "2 #{link}"}
178
+
179
+ # look for obvious feed links on another server
180
+ selected_feeds = links.select {|link| isFeedLink?(link) and isFeed?(link)} if outfeeds.empty?
181
+ outfeeds << selected_feeds unless selected_feeds.empty?
182
+ # outfeeds.each{|link| puts "3 #{link}"}
183
+
184
+ # look harder for feed links on another server
185
+ selected_feeds = links.select {|link| isXMLRelatedLink?(link) and isFeed?(link)} if outfeeds.empty?
186
+ outfeeds << selected_feeds unless selected_feeds.empty?
187
+ # outfeeds.each{|link| puts "4 #{link}"}
188
+ end
189
+
190
+ if outfeeds.empty?
191
+ # no A tags, guessing
192
+ # filenames used by popular software:
193
+ guesses = ['atom.xml', # blogger, TypePad
194
+ 'feed/', # wordpress
195
+ 'feeds/posts/default', # blogspot
196
+ 'feed/main/rss20', # fotolog
197
+ 'index.atom', # MT, apparently
198
+ 'index.rdf', # MT
199
+ 'rss.xml', # Dave Winer/Manila
200
+ 'index.xml', # MT
201
+ 'index.rss'] # Slash
202
+
203
+ guesses.each { |guess|
204
+ uri = URI.join(fulluri, guess).to_s
205
+ outfeeds << uri if isFeed?(uri)
206
+ }
207
+ end
208
+
209
+ # try with adding ending slash
210
+ if outfeeds.empty? and fulluri !~ /\/$/
211
+ outfeeds = feeds(fulluri + "/", all=all, querySyndic8=querySyndic8, _recurs=_recurs)
212
+ end
213
+
214
+ # still no luck, search Syndic8 for feeds (requires xmlrpclib)
215
+ #_debuglog('still no luck, searching Syndic8')
216
+ outfeeds << getFeedsFromSyndic8(uri) if querySyndic8 and outfeeds.empty?
217
+ #outfeeds = list(set(outfeeds)) if hasattr(__builtins__, 'set') or __builtins__.has_key('set')
218
+ return outfeeds.flatten
219
+ end
220
+
221
+ def feed(uri)
222
+ #todo: give preference to certain feed formats
223
+ feedlist = feeds(uri)
224
+ unless feedlist.empty?
225
+ return feedlist[0]
226
+ else
227
+ return nil
228
+ end
229
+ end
230
+ end
231
+
232
+ require 'rfeedfinder/version'
data/scripts/txt2html ADDED
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'redcloth'
5
+ require 'syntax/convertors/html'
6
+ require 'erb'
7
+ require File.dirname(__FILE__) + '/../lib/rfeedfinder/version.rb'
8
+
9
+ version = Rfeedfinder::VERSION::STRING
10
+ download = 'http://rubyforge.org/projects/rfeedfinder'
11
+
12
+ class Fixnum
13
+ def ordinal
14
+ # teens
15
+ return 'th' if (10..19).include?(self % 100)
16
+ # others
17
+ case self % 10
18
+ when 1: return 'st'
19
+ when 2: return 'nd'
20
+ when 3: return 'rd'
21
+ else return 'th'
22
+ end
23
+ end
24
+ end
25
+
26
+ class Time
27
+ def pretty
28
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
29
+ end
30
+ end
31
+
32
+ def convert_syntax(syntax, source)
33
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
34
+ end
35
+
36
+ if ARGV.length >= 1
37
+ src, template = ARGV
38
+ template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
39
+
40
+ else
41
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
42
+ exit!
43
+ end
44
+
45
+ template = ERB.new(File.open(template).read)
46
+
47
+ title = nil
48
+ body = nil
49
+ File.open(src) do |fsrc|
50
+ title_text = fsrc.readline
51
+ body_text = fsrc.read
52
+ syntax_items = []
53
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
54
+ ident = syntax_items.length
55
+ element, syntax, source = $1, $2, $3
56
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
57
+ "syntax-temp-#{ident}"
58
+ }
59
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
60
+ body = RedCloth.new(body_text).to_html
61
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
62
+ end
63
+ stat = File.stat(src)
64
+ created = stat.ctime
65
+ modified = stat.mtime
66
+
67
+ $stdout << template.result(binding)