damog-feedbag 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +76 -0
- data/feedbag.rb +137 -0
- metadata +64 -0
data/README
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
Feedbag
|
2
|
+
--------------------------------------------------------
|
3
|
+
Do you want me to drag my sack across your face?
|
4
|
+
- Glenn Quagmire
|
5
|
+
|
6
|
+
Feedbag is a feed auto-discovery Ruby library. You don't need to know
|
7
|
+
more about it.
|
8
|
+
|
9
|
+
Quick synopsis
|
10
|
+
--------------
|
11
|
+
~/axiombox/feedbag :master $ irb
|
12
|
+
>> require "feedbag"
|
13
|
+
=> true
|
14
|
+
>> Feedbag.find "http://log.damog.net/"
|
15
|
+
=> ["http://feeds.feedburner.com/TeoremaDelCerdoInfinito", "http://log.damog.net/comments/feed/"]
|
16
|
+
>>
|
17
|
+
|
18
|
+
Tutorial
|
19
|
+
--------
|
20
|
+
So you want to know more about it.
|
21
|
+
|
22
|
+
OK, if the URL passed to the find method is a feed itself, that only
|
23
|
+
feed URL will be returned.
|
24
|
+
|
25
|
+
>> Feedbag.find "github.com/damog.atom"
|
26
|
+
=> ["http://github.com/damog.atom"]
|
27
|
+
>>
|
28
|
+
|
29
|
+
Otherwise, it will always return LINK feeds first, A (anchor tags) feeds
|
30
|
+
later. Between A feeds, the ones hosted on the same URL's host, will
|
31
|
+
have larger priority:
|
32
|
+
|
33
|
+
>> Feedbag.find "http://ve.planetalinux.org"
|
34
|
+
=> ["http://feedproxy.google.com/PlanetaLinuxVenezuela", "http://rendergraf.wordpress.com/feed/", "http://rootweiller.wordpress.com/feed/", "http://skatox.com/blog/feed/", "http://kodegeek.com/atom.xml", "http://blog.0x29.com.ve/?feed=rss2&cat=8"]
|
35
|
+
>>
|
36
|
+
|
37
|
+
On your application you should only take the very first element of
|
38
|
+
the array, most of the times:
|
39
|
+
|
40
|
+
>> Feedbag.find("planet.debian.org").first(3)
|
41
|
+
=> ["http://planet.debian.org/rss10.xml", "http://planet.debian.org/rss20.xml", "http://planet.debian.org/atom.xml"]
|
42
|
+
>>
|
43
|
+
|
44
|
+
(Try running that same example without the "first" method. That
|
45
|
+
example's host is a blog aggregator, so it has hundreds of feed URLs:
|
46
|
+
|
47
|
+
>> Feedbag.find("planet.debian.org").size
|
48
|
+
=> 104
|
49
|
+
>>
|
50
|
+
|
51
|
+
Feedbag will find them all, but it will return the most important ones
|
52
|
+
on the first elements on the array returned.
|
53
|
+
|
54
|
+
>> Feedbag.find("cnn.com")
|
55
|
+
=> ["http://rss.cnn.com/rss/cnn_topstories.rss", "http://rss.cnn.com/rss/cnn_latest.rss", "http://rss.cnn.com/services/podcasting/robinmeade/rss.xml"]
|
56
|
+
>>
|
57
|
+
|
58
|
+
Why you should use it?
|
59
|
+
----------------------
|
60
|
+
- Because it's cool.
|
61
|
+
- Because it only uses Hpricot as dependency.
|
62
|
+
- Because it follows modern feed filename conventions (like those ones
|
63
|
+
used by WordPress blogs, or Blogger, etc).
|
64
|
+
- Because it's a single file you can embed easily in your application.
|
65
|
+
- Because it passes most of the Mark Pilgrim's Atom auto-discovery test
|
66
|
+
suite. It doesn't pass them all because some of those tests are
|
67
|
+
broken (citation needed).
|
68
|
+
|
69
|
+
Why did you build it?
|
70
|
+
---------------------
|
71
|
+
- Because I liked Benjamin Trott's Feed::Finder.
|
72
|
+
- Because I thought it would be good to have Feed::Finder's functionality
|
73
|
+
in Ruby.
|
74
|
+
- Because I thought it was going to be easy to maintain.
|
75
|
+
- Because I was going to use it on rFeed.
|
76
|
+
|
data/feedbag.rb
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
# Copyright Axiombox (c) 2008
|
4
|
+
# David Moreno <david@axiombox.com> (c) 2008
|
5
|
+
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
|
20
|
+
# Originally wrote by David Moreno <david@axiombox.com>
|
21
|
+
# mainly based on Benjamin Trott's Feed::Find
|
22
|
+
|
23
|
+
require "rubygems"
|
24
|
+
require "hpricot"
|
25
|
+
require "open-uri"
|
26
|
+
require "net/http"
|
27
|
+
|
28
|
+
module Feedbag
|
29
|
+
|
30
|
+
@content_types = [
|
31
|
+
'application/x.atom+xml',
|
32
|
+
'application/atom+xml',
|
33
|
+
'application/xml',
|
34
|
+
'text/xml',
|
35
|
+
'application/rss+xml',
|
36
|
+
'application/rdf+xml',
|
37
|
+
]
|
38
|
+
|
39
|
+
$feeds = []
|
40
|
+
$base_uri = nil
|
41
|
+
|
42
|
+
def self.find(url)
|
43
|
+
$feeds = []
|
44
|
+
|
45
|
+
url_uri = URI.parse(url)
|
46
|
+
url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
|
47
|
+
|
48
|
+
begin
|
49
|
+
html = open(url) do |f|
|
50
|
+
if @content_types.include?(f.content_type.downcase)
|
51
|
+
return self.add_feed(url, nil)
|
52
|
+
end
|
53
|
+
|
54
|
+
doc = Hpricot(f.read)
|
55
|
+
|
56
|
+
if doc.at("base") and doc.at("base")["href"]
|
57
|
+
$base_uri = doc.at("base")["href"]
|
58
|
+
else
|
59
|
+
$base_uri = nil
|
60
|
+
end
|
61
|
+
|
62
|
+
# first with links
|
63
|
+
(doc/"link").each do |l|
|
64
|
+
next unless l["rel"]
|
65
|
+
if l["type"] and @content_types.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
|
66
|
+
self.add_feed(l["href"], url, $base_uri)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
(doc/"a").each do |a|
|
71
|
+
next unless a["href"]
|
72
|
+
if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/)
|
73
|
+
self.add_feed(a["href"], url, $base_uri)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
(doc/"a").each do |a|
|
78
|
+
next unless a["href"]
|
79
|
+
if self.looks_like_feed?(a["href"])
|
80
|
+
self.add_feed(a["href"], url, $base_uri)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
rescue OpenURI::HTTPError => the_error
|
86
|
+
puts "Error ocurred with `#{url}': #{the_error}"
|
87
|
+
rescue SocketError => err
|
88
|
+
puts "Socket error ocurred with: `#{url}': #{err}"
|
89
|
+
end
|
90
|
+
|
91
|
+
$feeds
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.looks_like_feed?(url)
|
95
|
+
if url =~ /(\.(rdf|xml|rdf)$|feed=(rss|atom)|(atom|feed)\/$)/i
|
96
|
+
true
|
97
|
+
else
|
98
|
+
false
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.add_feed(feed_url, orig_url, base_uri = nil)
|
103
|
+
# puts "#{feed_url} - #{orig_url}"
|
104
|
+
url = feed_url.sub(/^feed:/, '').strip
|
105
|
+
|
106
|
+
if base_uri
|
107
|
+
# url = base_uri + feed_url
|
108
|
+
url = URI.parse(base_uri).merge(feed_url).to_s
|
109
|
+
end
|
110
|
+
|
111
|
+
begin
|
112
|
+
uri = URI.parse(url)
|
113
|
+
rescue
|
114
|
+
puts "Error with `#{url}'"
|
115
|
+
exit 1
|
116
|
+
end
|
117
|
+
unless uri.absolute?
|
118
|
+
orig = URI.parse(orig_url)
|
119
|
+
url = orig.merge(url).to_s
|
120
|
+
end
|
121
|
+
|
122
|
+
# verify url is really valid
|
123
|
+
$feeds.push(url) unless $feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url)
|
124
|
+
end
|
125
|
+
|
126
|
+
def self._is_http_valid(uri, orig_url)
|
127
|
+
req = Net::HTTP.get_response(uri)
|
128
|
+
orig_uri = URI.parse(orig_url)
|
129
|
+
case req
|
130
|
+
when Net::HTTPSuccess then
|
131
|
+
return true
|
132
|
+
else
|
133
|
+
return false
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: damog-feedbag
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.2"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Axiombox
|
8
|
+
- David Moreno
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2008-12-25 00:00:00 -08:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: hpricot
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
description: Ruby's favorite feed auto-discoverty tool
|
26
|
+
email: david@axiombox.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README
|
33
|
+
files:
|
34
|
+
- feedbag.rb
|
35
|
+
- README
|
36
|
+
has_rdoc: true
|
37
|
+
homepage: http://axiombox.com/feedbag
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options:
|
40
|
+
- --main
|
41
|
+
- README
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: "0"
|
49
|
+
version:
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
requirements: []
|
57
|
+
|
58
|
+
rubyforge_project: feedbag
|
59
|
+
rubygems_version: 1.2.0
|
60
|
+
signing_key:
|
61
|
+
specification_version: 2
|
62
|
+
summary: Ruby's favorite feed auto-discovery tool
|
63
|
+
test_files: []
|
64
|
+
|