rfeedfinder 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ == 0.0.1 2007-08-08
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/License.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2007 FIXME full name
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Manifest.txt ADDED
@@ -0,0 +1,16 @@
1
+ History.txt
2
+ License.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ lib/rfeedfinder.rb
7
+ lib/rfeedfinder/version.rb
8
+ scripts/txt2html
9
+ setup.rb
10
+ test/test_helper.rb
11
+ test/test_rfeedfinder.rb
12
+ website/index.html
13
+ website/index.txt
14
+ website/javascripts/rounded_corners_lite.inc.js
15
+ website/stylesheets/screen.css
16
+ website/template.rhtml
data/README.txt ADDED
@@ -0,0 +1,3 @@
1
+ README for rfeedfinder
2
+ ======================
3
+
data/Rakefile ADDED
@@ -0,0 +1,131 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/testtask'
5
+ require 'rake/packagetask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/rdoctask'
8
+ require 'rake/contrib/rubyforgepublisher'
9
+ require 'fileutils'
10
+ require 'hoe'
11
+ include FileUtils
12
+ require File.join(File.dirname(__FILE__), 'lib', 'rfeedfinder', 'version')
13
+
14
+ AUTHOR = 'Alexandre Girard' # can also be an array of Authors
15
+ EMAIL = "alx.girard@gmail.com"
16
+ DESCRIPTION = "rFeedFinder uses RSS autodiscovery, Atom autodiscovery, spidering, URL correction, and Web service queries -- whatever it takes -- to find the feed."
17
+ GEM_NAME = 'rfeedfinder' # what ppl will type to install your gem
18
+
19
+ @config_file = "~/.rubyforge/user-config.yml"
20
+ @config = nil
21
+ def rubyforge_username
22
+ unless @config
23
+ begin
24
+ @config = YAML.load(File.read(File.expand_path(@config_file)))
25
+ rescue
26
+ puts <<-EOS
27
+ ERROR: No rubyforge config file found: #{@config_file}"
28
+ Run 'rubyforge setup' to prepare your env for access to Rubyforge
29
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
30
+ EOS
31
+ exit
32
+ end
33
+ end
34
+ @rubyforge_username ||= @config["username"]
35
+ end
36
+
37
+ RUBYFORGE_PROJECT = 'rfeedfinder' # The unix name for your project
38
+ HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
39
+ DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
40
+
41
+ NAME = "rfeedfinder"
42
+ REV = nil
43
+ # UNCOMMENT IF REQUIRED:
44
+ # REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
45
+ VERS = Rfeedfinder::VERSION::STRING + (REV ? ".#{REV}" : "")
46
+ CLEAN.include ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']
47
+ RDOC_OPTS = ['--quiet', '--title', 'rfeedfinder documentation',
48
+ "--opname", "index.html",
49
+ "--line-numbers",
50
+ "--main", "README",
51
+ "--inline-source"]
52
+
53
+ class Hoe
54
+ def extra_deps
55
+ @extra_deps.reject { |x| Array(x).first == 'hoe' }
56
+ end
57
+ end
58
+
59
+ # Generate all the Rake tasks
60
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
61
+ hoe = Hoe.new(GEM_NAME, VERS) do |p|
62
+ p.author = AUTHOR
63
+ p.description = DESCRIPTION
64
+ p.email = EMAIL
65
+ p.summary = DESCRIPTION
66
+ p.url = HOMEPATH
67
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
68
+ p.test_globs = ["test/**/test_*.rb"]
69
+ p.clean_globs |= CLEAN #An array of file patterns to delete on clean.
70
+
71
+ # == Optional
72
+ p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
73
+ #p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
74
+ #p.spec_extras = {} # A hash of extra values to set in the gemspec.
75
+ end
76
+
77
+ CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\n\n")
78
+ PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
79
+ hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
80
+
81
+ desc 'Generate website files'
82
+ task :website_generate do
83
+ Dir['website/**/*.txt'].each do |txt|
84
+ sh %{ ruby scripts/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
85
+ end
86
+ end
87
+
88
+ desc 'Upload website files to rubyforge'
89
+ task :website_upload do
90
+ host = "#{rubyforge_username}@rubyforge.org"
91
+ remote_dir = "/var/www/gforge-projects/#{PATH}/"
92
+ local_dir = 'website'
93
+ sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
94
+ end
95
+
96
+ desc 'Generate and upload website files'
97
+ task :website => [:website_generate, :website_upload, :publish_docs]
98
+
99
+ desc 'Release the website and new gem version'
100
+ task :deploy => [:check_version, :website, :release] do
101
+ puts "Remember to create SVN tag:"
102
+ puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
103
+ "svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
104
+ puts "Suggested comment:"
105
+ puts "Tagging release #{CHANGES}"
106
+ end
107
+
108
+ desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
109
+ task :local_deploy => [:website_generate, :install_gem]
110
+
111
+ task :check_version do
112
+ unless ENV['VERSION']
113
+ puts 'Must pass a VERSION=x.y.z release version'
114
+ exit
115
+ end
116
+ unless ENV['VERSION'] == VERS
117
+ puts "Please update your version.rb to match the release version, currently #{VERS}"
118
+ exit
119
+ end
120
+ end
121
+
122
+ rule "" do |t|
123
+ # test:file:method
124
+ if /test:(.*)(:([^.]+))?$/.match(t.name)
125
+ arguments = t.name.split(":")[1..-1]
126
+ test_name = arguments.first
127
+ run_file_name = "test_rfeedfinder.rb"
128
+
129
+ sh "ruby -Ilib:test test/#{run_file_name} -n /#{test_name}/"
130
+ end
131
+ end
@@ -0,0 +1,9 @@
1
+ module Rfeedfinder #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 9
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,232 @@
1
+ require File.dirname(__FILE__) + '/rfeedfinder/version.rb'
2
+ require 'net/http'
3
+ require 'rubygems'
4
+ require 'open-uri'
5
+ require 'hpricot'
6
+ require 'timeout'
7
+
8
+ module Rfeedfinder
9
+
10
+ module_function
11
+
12
+ def makeFullURI(uri)
13
+ uri = uri.strip.sub(/^feed(.*)/, 'http\1').downcase
14
+ if /^http|https/.match(uri)
15
+ return uri
16
+ else
17
+ return "http://" << uri
18
+ end
19
+ end
20
+
21
+ def getLinks(data, baseuri)
22
+ return searchLinks(data, baseuri, "[@rel=alternate]&[@type=xml]&[@href=http]")
23
+ end
24
+
25
+ def getALinks(data, baseuri)
26
+ return searchLinks(data, baseuri, "a")
27
+ end
28
+
29
+ def getFrameLinks(data, baseuri)
30
+ links = searchLinks(data, baseuri, "frame")
31
+ links += searchLinks(data, baseuri, "FRAME")
32
+ return links
33
+ end
34
+
35
+ def searchLinks(data, baseuri, regexp)
36
+ links = []
37
+ data.search(regexp).map!{|link|
38
+ if !link.to_s.strip.empty?
39
+ uri = link[:href].to_s
40
+ uri = link[:src].to_s if uri.empty?
41
+ uri = link[:SRC].to_s if uri.empty?
42
+ if !uri.strip.empty? and uri !~ /^javascript/
43
+ uri = URI.join(baseuri, uri).to_s if uri =~ /^\//
44
+ links << uri
45
+ end
46
+ end
47
+ }
48
+ #links.each{|link| puts "searchLinks: #{link}"}
49
+ return links.uniq
50
+ end
51
+
52
+ def getLocalLinks(links, baseuri)
53
+ locallinks = []
54
+ links.each do |link|
55
+ locallinks << URI.join(baseuri, link).to_s if link =~ /^\//
56
+ end
57
+ links = links.select{|link| link !~ /^\//} #remove local links from link array
58
+ return [links, locallinks]
59
+ end
60
+
61
+ def isFeedLink?(link)
62
+ return link.downcase =~ /\.rss$|\.rdf$|\.xml$|\.atom$/
63
+ end
64
+
65
+ def isXMLRelatedLink?(link)
66
+ return link.downcase =~ /rss|rdf|xml|atom/
67
+ end
68
+
69
+ def tryBrokenRedirect(data)
70
+ newuris = (data/:newLocation)
71
+ if !newuris.empty?
72
+ return newuris[0].strip
73
+ end
74
+ end
75
+
76
+ def isFeedData?(data)
77
+ # if no html tag and rss, rdf or feed tag, it's a feed
78
+ return ((data/:html).empty? and (!(data/:rss).nil? or !(data/:rdf).nil? or !(data/:feed).nil?))
79
+ end
80
+
81
+ def isFeed?(uri)
82
+ uri.gsub!(/\/\/www\d\./, "//www.")
83
+ begin
84
+ protocol = URI.split(uri)
85
+ return false if !protocol[0].index(/^[http|https]/)
86
+ rescue
87
+ # URI error
88
+ return false
89
+ end
90
+ begin
91
+ html = Net::HTTP.get(URI.parse(uri))
92
+ data = Hpricot(open(fulluri), :xml => true)
93
+ return isFeedData?(data)
94
+ rescue Timeout::Error
95
+ return false
96
+ rescue
97
+ return false
98
+ end
99
+ end
100
+
101
+ def getFeedsFromSyndic8(uri)
102
+ feeds = []
103
+ begin
104
+ server = Syndic8.new
105
+ feedids = server.find_feeds(uri)
106
+ infolist = server.feed_info(feedids, ['headlines_rank','status','dataurl'])
107
+ infolist.sort_by{|feedInfo| feedInfo[:headlines_rank]}
108
+ infolist.each do |feed|
109
+ feeds << feed[:dataurl] if feed[:status]=='Syndicated'
110
+ end
111
+ rescue
112
+ end
113
+ return feeds
114
+ end
115
+
116
+ def feeds(uri, all=false, querySyndic8=false, _recurs=nil)
117
+ _recurs = [uri] if _recurs.nil?
118
+ fulluri = makeFullURI(uri)
119
+
120
+ begin
121
+ html = Net::HTTP.get(URI.parse(fulluri))
122
+ data = Hpricot(open(fulluri), :xml => true)
123
+ rescue Timeout::Error
124
+ return []
125
+ rescue => err
126
+ puts "Error while opening #{fulluri} with Hpricot: " << $!
127
+ return []
128
+ end
129
+
130
+ # is this already a feed?
131
+ return [fulluri] if isFeedData?(data)
132
+
133
+ #verify redirection
134
+ newuri = tryBrokenRedirect(data)
135
+ if !newuri.nil? and !newuri.empty?
136
+ unless _recurs.include?(newuri)
137
+ _recurs << newuri
138
+ return feeds(newuri, all=all, querySyndic8=querySyndic8, _recurs=_recurs)
139
+ end
140
+ end
141
+
142
+ #verify frameset
143
+ frames = getFrameLinks(data, fulluri)
144
+ frames.each {|newuri|
145
+ if !newuri.nil? and !newuri.empty?
146
+ unless _recurs.include?(newuri)
147
+ _recurs << newuri
148
+ return feeds(newuri, all=all, querySyndic8=querySyndic8, _recurs=_recurs)
149
+ end
150
+ end
151
+ }
152
+
153
+ # nope, it's a page, try LINK tags first
154
+ outfeeds = getLinks(data, fulluri)
155
+ outfeeds.select {|link| isFeed?(link)}
156
+
157
+ #_debuglog('found %s feeds through LINK tags' % len(outfeeds))
158
+ if outfeeds.empty?
159
+ # no LINK tags, look for regular <A> links that point to feeds
160
+ begin
161
+ links = getALinks(data, fulluri)
162
+ rescue
163
+ links = []
164
+ end
165
+
166
+ # Get local links
167
+ links, locallinks = getLocalLinks(links, fulluri)
168
+
169
+ # look for obvious feed links on the same server
170
+ selected_feeds = locallinks.select{|link| isFeedLink?(link) and isFeed?(link)}
171
+ outfeeds << selected_feeds unless selected_feeds.empty?
172
+ # outfeeds.each{|link| puts "1 #{link}"}
173
+
174
+ # look harder for feed links on the same server
175
+ selected_feeds = locallinks.select{|link| isXMLRelatedLink?(link) and isFeed?(link)} if outfeeds.empty?
176
+ outfeeds << selected_feeds unless selected_feeds.empty?
177
+ # outfeeds.each{|link| puts "2 #{link}"}
178
+
179
+ # look for obvious feed links on another server
180
+ selected_feeds = links.select {|link| isFeedLink?(link) and isFeed?(link)} if outfeeds.empty?
181
+ outfeeds << selected_feeds unless selected_feeds.empty?
182
+ # outfeeds.each{|link| puts "3 #{link}"}
183
+
184
+ # look harder for feed links on another server
185
+ selected_feeds = links.select {|link| isXMLRelatedLink?(link) and isFeed?(link)} if outfeeds.empty?
186
+ outfeeds << selected_feeds unless selected_feeds.empty?
187
+ # outfeeds.each{|link| puts "4 #{link}"}
188
+ end
189
+
190
+ if outfeeds.empty?
191
+ # no A tags, guessing
192
+ # filenames used by popular software:
193
+ guesses = ['atom.xml', # blogger, TypePad
194
+ 'feed/', # wordpress
195
+ 'feeds/posts/default', # blogspot
196
+ 'feed/main/rss20', # fotolog
197
+ 'index.atom', # MT, apparently
198
+ 'index.rdf', # MT
199
+ 'rss.xml', # Dave Winer/Manila
200
+ 'index.xml', # MT
201
+ 'index.rss'] # Slash
202
+
203
+ guesses.each { |guess|
204
+ uri = URI.join(fulluri, guess).to_s
205
+ outfeeds << uri if isFeed?(uri)
206
+ }
207
+ end
208
+
209
+ # try with adding ending slash
210
+ if outfeeds.empty? and fulluri !~ /\/$/
211
+ outfeeds = feeds(fulluri + "/", all=all, querySyndic8=querySyndic8, _recurs=_recurs)
212
+ end
213
+
214
+ # still no luck, search Syndic8 for feeds (requires xmlrpclib)
215
+ #_debuglog('still no luck, searching Syndic8')
216
+ outfeeds << getFeedsFromSyndic8(uri) if querySyndic8 and outfeeds.empty?
217
+ #outfeeds = list(set(outfeeds)) if hasattr(__builtins__, 'set') or __builtins__.has_key('set')
218
+ return outfeeds.flatten
219
+ end
220
+
221
+ def feed(uri)
222
+ #todo: give preference to certain feed formats
223
+ feedlist = feeds(uri)
224
+ unless feedlist.empty?
225
+ return feedlist[0]
226
+ else
227
+ return nil
228
+ end
229
+ end
230
+ end
231
+
232
+ require 'rfeedfinder/version'
data/scripts/txt2html ADDED
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'redcloth'
5
+ require 'syntax/convertors/html'
6
+ require 'erb'
7
+ require File.dirname(__FILE__) + '/../lib/rfeedfinder/version.rb'
8
+
9
+ version = Rfeedfinder::VERSION::STRING
10
+ download = 'http://rubyforge.org/projects/rfeedfinder'
11
+
12
+ class Fixnum
13
+ def ordinal
14
+ # teens
15
+ return 'th' if (10..19).include?(self % 100)
16
+ # others
17
+ case self % 10
18
+ when 1: return 'st'
19
+ when 2: return 'nd'
20
+ when 3: return 'rd'
21
+ else return 'th'
22
+ end
23
+ end
24
+ end
25
+
26
+ class Time
27
+ def pretty
28
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
29
+ end
30
+ end
31
+
32
+ def convert_syntax(syntax, source)
33
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
34
+ end
35
+
36
+ if ARGV.length >= 1
37
+ src, template = ARGV
38
+ template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
39
+
40
+ else
41
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
42
+ exit!
43
+ end
44
+
45
+ template = ERB.new(File.open(template).read)
46
+
47
+ title = nil
48
+ body = nil
49
+ File.open(src) do |fsrc|
50
+ title_text = fsrc.readline
51
+ body_text = fsrc.read
52
+ syntax_items = []
53
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
54
+ ident = syntax_items.length
55
+ element, syntax, source = $1, $2, $3
56
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
57
+ "syntax-temp-#{ident}"
58
+ }
59
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
60
+ body = RedCloth.new(body_text).to_html
61
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
62
+ end
63
+ stat = File.stat(src)
64
+ created = stat.ctime
65
+ modified = stat.mtime
66
+
67
+ $stdout << template.result(binding)