muck-feedbag 0.1.0 → 0.5.100
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +2 -1
- data/lib/feedbag.rb +30 -35
- metadata +21 -30
- data/.gitignore +0 -30
- data/ChangeLog +0 -20
- data/TODO +0 -1
- data/VERSION +0 -1
- data/feedbag.gemspec +0 -20
- data/index.html +0 -115
- data/rails/init.rb +0 -1
- data/test/atom_autodiscovery_test.rb +0 -46
data/README.markdown
CHANGED
data/lib/feedbag.rb
CHANGED
@@ -20,7 +20,6 @@ require "rubygems"
|
|
20
20
|
require "hpricot"
|
21
21
|
require "open-uri"
|
22
22
|
require "net/http"
|
23
|
-
require 'timeout'
|
24
23
|
|
25
24
|
module Feedbag
|
26
25
|
Feed = Struct.new(:url, :title)
|
@@ -66,8 +65,6 @@ module Feedbag
|
|
66
65
|
end
|
67
66
|
#url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
|
68
67
|
|
69
|
-
return self.add_feed(url, nil) if looks_like_feed? url
|
70
|
-
|
71
68
|
# check if feed_valid is avail
|
72
69
|
unless args[:narrow]
|
73
70
|
begin
|
@@ -87,38 +84,36 @@ module Feedbag
|
|
87
84
|
end
|
88
85
|
|
89
86
|
begin
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
end
|
119
|
-
end
|
87
|
+
open(url) do |f|
|
88
|
+
if @content_types.include?(f.content_type.downcase)
|
89
|
+
return self.add_feed(url, nil)
|
90
|
+
end
|
91
|
+
|
92
|
+
doc = Hpricot(f.read)
|
93
|
+
|
94
|
+
if doc.at("base") and doc.at("base")["href"]
|
95
|
+
$base_uri = doc.at("base")["href"]
|
96
|
+
else
|
97
|
+
$base_uri = nil
|
98
|
+
end
|
99
|
+
|
100
|
+
# first with links
|
101
|
+
(doc/"link").each do |l|
|
102
|
+
next unless l["rel"]
|
103
|
+
if l["type"] and @content_types.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
|
104
|
+
self.add_feed(l["href"], url, $base_uri, l["title"])
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
unless args[:narrow]
|
109
|
+
(doc/"a").each do |a|
|
110
|
+
next unless a["href"]
|
111
|
+
if self.looks_like_feed?(a["href"])
|
112
|
+
self.add_feed(a["href"], url, $base_uri, a["title"] || a.inner_html || a['alt']) # multiple fallbacks, first title, then the tag content, then the alt tag (in case of image)
|
113
|
+
end
|
114
|
+
end
|
120
115
|
end
|
121
|
-
|
116
|
+
end
|
122
117
|
rescue Timeout::Error => err
|
123
118
|
$stderr.puts "Timeout error ocurred with `#{url}: #{err}'"
|
124
119
|
rescue OpenURI::HTTPError => the_error
|
@@ -133,7 +128,7 @@ module Feedbag
|
|
133
128
|
end
|
134
129
|
|
135
130
|
def self.looks_like_feed?(url)
|
136
|
-
if url =~ /((
|
131
|
+
if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
|
137
132
|
true
|
138
133
|
else
|
139
134
|
false
|
metadata
CHANGED
@@ -1,57 +1,48 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: muck-feedbag
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.100
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
8
|
-
-
|
7
|
+
- Axiombox
|
8
|
+
- David Moreno
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2009-
|
13
|
+
date: 2009-02-10 00:00:00 -08:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
|
-
name:
|
18
|
-
type: :
|
17
|
+
name: hpricot
|
18
|
+
type: :runtime
|
19
19
|
version_requirement:
|
20
20
|
version_requirements: !ruby/object:Gem::Requirement
|
21
21
|
requirements:
|
22
22
|
- - ">="
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version: "0"
|
24
|
+
version: "0.6"
|
25
25
|
version:
|
26
|
-
description:
|
27
|
-
email:
|
26
|
+
description: Ruby's favorite feed auto-discoverty tool
|
27
|
+
email: david@axiombox.com
|
28
28
|
executables: []
|
29
29
|
|
30
30
|
extensions: []
|
31
31
|
|
32
32
|
extra_rdoc_files:
|
33
|
-
- ChangeLog
|
34
33
|
- README.markdown
|
35
|
-
files:
|
36
|
-
- .gitignore
|
37
34
|
- COPYING
|
38
|
-
|
39
|
-
- README.markdown
|
40
|
-
- TODO
|
41
|
-
- VERSION
|
42
|
-
- benchmark/rfeedfinder_benchmark.rb
|
43
|
-
- feedbag.gemspec
|
44
|
-
- index.html
|
35
|
+
files:
|
45
36
|
- lib/feedbag.rb
|
46
|
-
-
|
47
|
-
-
|
37
|
+
- benchmark/rfeedfinder_benchmark.rb
|
38
|
+
- README.markdown
|
39
|
+
- COPYING
|
48
40
|
has_rdoc: true
|
49
|
-
homepage: http://
|
50
|
-
licenses: []
|
51
|
-
|
41
|
+
homepage: http://axiombox.com/feedbag
|
52
42
|
post_install_message:
|
53
43
|
rdoc_options:
|
54
|
-
- --
|
44
|
+
- --main
|
45
|
+
- README.markdown
|
55
46
|
require_paths:
|
56
47
|
- lib
|
57
48
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -69,9 +60,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
69
60
|
requirements: []
|
70
61
|
|
71
62
|
rubyforge_project:
|
72
|
-
rubygems_version: 1.
|
63
|
+
rubygems_version: 1.2.0
|
73
64
|
signing_key:
|
74
|
-
specification_version:
|
75
|
-
summary:
|
76
|
-
test_files:
|
77
|
-
|
65
|
+
specification_version: 2
|
66
|
+
summary: Ruby's favorite feed auto-discovery tool
|
67
|
+
test_files: []
|
68
|
+
|
data/.gitignore
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
*.swp
|
2
|
-
**/*.pid
|
3
|
-
log/*.log
|
4
|
-
log/*.pid
|
5
|
-
tmp
|
6
|
-
.DS_Store
|
7
|
-
public/cache/**/*
|
8
|
-
public/system/**/*
|
9
|
-
doc/**/*
|
10
|
-
db/*.sqlite3
|
11
|
-
.project
|
12
|
-
.loadpath
|
13
|
-
nbproject/
|
14
|
-
.idea
|
15
|
-
testjour.log
|
16
|
-
*.so
|
17
|
-
*.o
|
18
|
-
Makefile
|
19
|
-
mkmf.log
|
20
|
-
*.bundle
|
21
|
-
conftest
|
22
|
-
content/
|
23
|
-
.idea
|
24
|
-
*.sw?
|
25
|
-
.DS_Store
|
26
|
-
coverage
|
27
|
-
rdoc
|
28
|
-
pkg
|
29
|
-
pkg/*
|
30
|
-
log/*
|
data/ChangeLog
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
* 0.5.99 - Tue May 12 12:52:22 EDT 2009
|
2
|
-
- Added rails/init.rb to load easily on a Rails app.
|
3
|
-
|
4
|
-
* 0.5.13.1 - Wed Apr 22 11:16:19 EDT 2009
|
5
|
-
- Changed args on find() from nil to {}
|
6
|
-
|
7
|
-
* 0.5.13 - Wed Apr 22 11:12:40 EDT 2009
|
8
|
-
- Added :narrow option so find() skips feed_validate and A links.
|
9
|
-
|
10
|
-
* 0.5.12 - Fri Mar 20 12:34:48 EDT 2009
|
11
|
-
- Added support for "feed://" URLs
|
12
|
-
|
13
|
-
* 0.5.11 - Sat Mar 7 17:22:30 EST 2009
|
14
|
-
- Benchmark against Rfeedfinder added.
|
15
|
-
|
16
|
-
* 0.5.10 - Wed Mar 4 13:32:33 EST 2009
|
17
|
-
- Feeds whose URLs contained query string arguments were not being
|
18
|
-
auto-discovered -- fixed
|
19
|
-
|
20
|
-
** For previous changes, see the git log
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.1.0
|
data/feedbag.gemspec
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
Gem::Specification.new do |s|
|
4
|
-
s.name = %q{feedbag}
|
5
|
-
s.version = "0.5.103"
|
6
|
-
s.homepage = "http://axiombox.com/feedbag"
|
7
|
-
#s.rubyforge_project = "feedbag"
|
8
|
-
|
9
|
-
s.authors = ["Axiombox", "David Moreno"]
|
10
|
-
s.date = %q{2009-02-10}
|
11
|
-
s.description = %q{Ruby's favorite feed auto-discoverty tool}
|
12
|
-
s.email = %q{david@axiombox.com}
|
13
|
-
s.extra_rdoc_files = ["README.markdown", "COPYING"]
|
14
|
-
s.files = ["lib/feedbag.rb", "benchmark/rfeedfinder_benchmark.rb"]
|
15
|
-
s.has_rdoc = true
|
16
|
-
s.rdoc_options = ["--main", "README.markdown"]
|
17
|
-
s.summary = %q{Ruby's favorite feed auto-discovery tool}
|
18
|
-
s.add_dependency("hpricot", '>= 0.6')
|
19
|
-
end
|
20
|
-
|
data/index.html
DELETED
@@ -1,115 +0,0 @@
|
|
1
|
-
<h1>Feedbag</h1>
|
2
|
-
|
3
|
-
<blockquote>
|
4
|
-
<p>Do you want me to drag my sack across your face?
|
5
|
-
- Glenn Quagmire</p>
|
6
|
-
</blockquote>
|
7
|
-
|
8
|
-
<p>Feedbag is a feed auto-discovery Ruby library. You don't need to know more about it. It is said to be:</p>
|
9
|
-
|
10
|
-
<blockquote>
|
11
|
-
<p>Ruby's favorite auto-discovery tool/library!</p>
|
12
|
-
</blockquote>
|
13
|
-
|
14
|
-
<h3>Quick synopsis</h3>
|
15
|
-
|
16
|
-
<pre><code>>> require "rubygems"
|
17
|
-
=> true
|
18
|
-
>> require "feedbag"
|
19
|
-
=> true
|
20
|
-
>> Feedbag.find "log.damog.net"
|
21
|
-
=> ["http://feeds.feedburner.com/TeoremaDelCerdoInfinito", "http://log.damog.net/comments/feed/"]
|
22
|
-
</code></pre>
|
23
|
-
|
24
|
-
<h3>Installation</h3>
|
25
|
-
|
26
|
-
<pre><code>$ sudo gem install damog-feedbag -s http://gems.github.com/
|
27
|
-
</code></pre>
|
28
|
-
|
29
|
-
<p>Or just grab feedbag.rb and use it on your own project:</p>
|
30
|
-
|
31
|
-
<pre><code>$ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb
|
32
|
-
</code></pre>
|
33
|
-
|
34
|
-
<h2>Tutorial</h2>
|
35
|
-
|
36
|
-
<p>So you want to know more about it.</p>
|
37
|
-
|
38
|
-
<p>OK, if the URL passed to the find method is a feed itself, that only feed URL will be returned.</p>
|
39
|
-
|
40
|
-
<pre><code>>> Feedbag.find "github.com/damog.atom"
|
41
|
-
=> ["http://github.com/damog.atom"]
|
42
|
-
>>
|
43
|
-
</code></pre>
|
44
|
-
|
45
|
-
<p>Otherwise, it will always return LINK feeds first, A (anchor tags) feeds later. Between A feeds, the ones hosted on the same URL's host, will have larger priority:</p>
|
46
|
-
|
47
|
-
<pre><code>>> Feedbag.find "http://ve.planetalinux.org"
|
48
|
-
=> ["http://feedproxy.google.com/PlanetaLinuxVenezuela", "http://rendergraf.wordpress.com/feed/", "http://rootweiller.wordpress.com/feed/", "http://skatox.com/blog/feed/", "http://kodegeek.com/atom.xml", "http://blog.0x29.com.ve/?feed=rss2&cat=8"]
|
49
|
-
>>
|
50
|
-
</code></pre>
|
51
|
-
|
52
|
-
<p>On your application you should only take the very first element of the array, most of the times:</p>
|
53
|
-
|
54
|
-
<pre><code>>> Feedbag.find("planet.debian.org").first(3)
|
55
|
-
=> ["http://planet.debian.org/rss10.xml", "http://planet.debian.org/rss20.xml", "http://planet.debian.org/atom.xml"]
|
56
|
-
>>
|
57
|
-
</code></pre>
|
58
|
-
|
59
|
-
<p>(Try running that same example without the "first" method. That example's host is a blog aggregator, so it has hundreds of feed URLs:)</p>
|
60
|
-
|
61
|
-
<pre><code>>> Feedbag.find("planet.debian.org").size
|
62
|
-
=> 104
|
63
|
-
>>
|
64
|
-
</code></pre>
|
65
|
-
|
66
|
-
<p>Feedbag will find them all, but it will return the most important ones on the first elements on the array returned.</p>
|
67
|
-
|
68
|
-
<pre><code>>> Feedbag.find("cnn.com")
|
69
|
-
=> ["http://rss.cnn.com/rss/cnn_topstories.rss", "http://rss.cnn.com/rss/cnn_latest.rss", "http://rss.cnn.com/services/podcasting/robinmeade/rss.xml"]
|
70
|
-
>>
|
71
|
-
</code></pre>
|
72
|
-
|
73
|
-
<h3>Why should you use it?</h3>
|
74
|
-
|
75
|
-
<ul>
|
76
|
-
<li>Because it's cool.</li>
|
77
|
-
<li>Because it only uses <a href="https://code.whytheluckystiff.net/hpricot/">Hpricot</a> as dependency.</li>
|
78
|
-
<li>Because it follows modern feed filename conventions (like those ones used by WordPress blogs, or Blogger, etc).</li>
|
79
|
-
<li>Because it's a single file you can embed easily in your application.</li>
|
80
|
-
<li>Because it passes most of the Mark Pilgrim's <a href="http://diveintomark.org/tests/client/autodiscovery/">Atom auto-discovery test suite</a>. It doesn't pass them all because some of those tests are broken (citation needed).</li>
|
81
|
-
</ul>
|
82
|
-
|
83
|
-
<h3>Why did I build it?</h3>
|
84
|
-
|
85
|
-
<ul>
|
86
|
-
<li>Because I liked Benjamin Trott's <a href="http://search.cpan.org/~btrott/Feed-Find-0.06/lib/Feed/Find.pm">Feed::Find</a>.</li>
|
87
|
-
<li>Because I thought it would be good to have Feed::Find's functionality in Ruby.</li>
|
88
|
-
<li>Because I thought it was going to be easy to maintain.</li>
|
89
|
-
<li>Because I was going to use it on <a href="http://github.com/damog/rfeed">rFeed</a>.</li>
|
90
|
-
<li>And finally, because I didn't know <a href="http://rfeedfinder.rubyforge.org/">rfeedfinder</a> existed :-)</li>
|
91
|
-
</ul>
|
92
|
-
|
93
|
-
<h3>Bugs</h3>
|
94
|
-
|
95
|
-
<p>Please, report bugs to <a href="rt@support.axiombox.com">rt@support.axiombox.com</a> or directly to the author.</p>
|
96
|
-
|
97
|
-
<h3>Contribute</h3>
|
98
|
-
|
99
|
-
<blockquote>
|
100
|
-
<p>git clone git://github.com/damog/feedbag.git</p>
|
101
|
-
</blockquote>
|
102
|
-
|
103
|
-
<p>...patch, build, hack and make pull requests. I'll be glad.</p>
|
104
|
-
|
105
|
-
<h3>Author</h3>
|
106
|
-
|
107
|
-
<p><a href="http://damog.net/">David Moreno</a> <<a href="mailto:david@axiombox.com">david@axiombox.com</a>>.</p>
|
108
|
-
|
109
|
-
<h3>Copyright</h3>
|
110
|
-
|
111
|
-
<p>This is free software. See <a href="http://github.com/damog/feedbag/master/COPYING">COPYING</a> for more information.</p>
|
112
|
-
|
113
|
-
<h3>Thanks</h3>
|
114
|
-
|
115
|
-
<p><a href="http://maggit.net">Raquel</a>, for making <a href="http://axiombox.com">Axiombox</a> and most of my dreams possible. Also, <a href="http://github.com">GitHub</a> for making a nice code sharing service that doesn't suck.</p>
|
data/rails/init.rb
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
require File.join File.dirname(__FILE__), "..", "lib", "feedbag"
|
@@ -1,46 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
require "#{File.dirname(__FILE__)}/../feedbag"
|
4
|
-
require "test/unit"
|
5
|
-
require "open-uri"
|
6
|
-
require "hpricot"
|
7
|
-
require "pp"
|
8
|
-
|
9
|
-
class AtomAutoDiscoveryTest < Test::Unit::TestCase
|
10
|
-
def test_autodisc
|
11
|
-
base_url = "http://diveintomark.org/tests/client/autodiscovery/"
|
12
|
-
url = base_url + "html4-001.html"
|
13
|
-
|
14
|
-
i = 1
|
15
|
-
puts "trying now with #{url}"
|
16
|
-
while(i)
|
17
|
-
puts
|
18
|
-
i = 0 # unless otherwise found
|
19
|
-
|
20
|
-
f = Feedbag.find url
|
21
|
-
|
22
|
-
assert_instance_of Array, f
|
23
|
-
assert f.size == 1, "Feedbag didn't find a feed on #{url} or found more than one"
|
24
|
-
|
25
|
-
puts " found #{f[0]}"
|
26
|
-
feed = Hpricot(open(f[0]))
|
27
|
-
|
28
|
-
(feed/"link").each do |l|
|
29
|
-
next unless l["rel"] == "alternate"
|
30
|
-
assert_equal l["href"], url
|
31
|
-
end
|
32
|
-
|
33
|
-
# ahora me voy al siguiente
|
34
|
-
html = Hpricot(open(url))
|
35
|
-
(html/"link").each do |l|
|
36
|
-
next unless l["rel"] == "next"
|
37
|
-
url = URI.parse(base_url).merge(l["href"]).to_s
|
38
|
-
puts "trying now with #{url}"
|
39
|
-
i = 1
|
40
|
-
end
|
41
|
-
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
|
46
|
-
end
|