muck-feedbag 0.1.0 → 0.5.100

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,7 @@
1
1
  Feedbag
2
2
  =======
3
- Forked version of feedback that returns title and url.
3
+ > Do you want me to drag my sack across your face?
4
+ > - Glenn Quagmire
4
5
 
5
6
  Feedbag is a feed auto-discovery Ruby library. You don't need to know more about it. It is said to be:
6
7
 
@@ -20,7 +20,6 @@ require "rubygems"
20
20
  require "hpricot"
21
21
  require "open-uri"
22
22
  require "net/http"
23
- require 'timeout'
24
23
 
25
24
  module Feedbag
26
25
  Feed = Struct.new(:url, :title)
@@ -66,8 +65,6 @@ module Feedbag
66
65
  end
67
66
  #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
68
67
 
69
- return self.add_feed(url, nil) if looks_like_feed? url
70
-
71
68
  # check if feed_valid is avail
72
69
  unless args[:narrow]
73
70
  begin
@@ -87,38 +84,36 @@ module Feedbag
87
84
  end
88
85
 
89
86
  begin
90
- Timeout::timeout(10) do
91
- open(url) do |f|
92
- if @content_types.include?(f.content_type.downcase)
93
- return self.add_feed(url, nil)
94
- end
95
-
96
- doc = Hpricot(f.read)
97
-
98
- if doc.at("base") and doc.at("base")["href"]
99
- $base_uri = doc.at("base")["href"]
100
- else
101
- $base_uri = nil
102
- end
103
-
104
- # first with links
105
- (doc/"link").each do |l|
106
- next unless l["rel"]
107
- if l["type"] and @content_types.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
108
- self.add_feed(l["href"], url, $base_uri, l["title"])
109
- end
110
- end
111
-
112
- unless args[:narrow]
113
- (doc/"a").each do |a|
114
- next unless a["href"]
115
- if self.looks_like_feed?(a["href"])
116
- self.add_feed(a["href"], url, $base_uri, a["title"] || a.inner_html || a['alt']) # multiple fallbacks, first title, then the tag content, then the alt tag (in case of image)
117
- end
118
- end
119
- end
87
+ open(url) do |f|
88
+ if @content_types.include?(f.content_type.downcase)
89
+ return self.add_feed(url, nil)
90
+ end
91
+
92
+ doc = Hpricot(f.read)
93
+
94
+ if doc.at("base") and doc.at("base")["href"]
95
+ $base_uri = doc.at("base")["href"]
96
+ else
97
+ $base_uri = nil
98
+ end
99
+
100
+ # first with links
101
+ (doc/"link").each do |l|
102
+ next unless l["rel"]
103
+ if l["type"] and @content_types.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
104
+ self.add_feed(l["href"], url, $base_uri, l["title"])
105
+ end
106
+ end
107
+
108
+ unless args[:narrow]
109
+ (doc/"a").each do |a|
110
+ next unless a["href"]
111
+ if self.looks_like_feed?(a["href"])
112
+ self.add_feed(a["href"], url, $base_uri, a["title"] || a.inner_html || a['alt']) # multiple fallbacks, first title, then the tag content, then the alt tag (in case of image)
113
+ end
114
+ end
120
115
  end
121
- end
116
+ end
122
117
  rescue Timeout::Error => err
123
118
  $stderr.puts "Timeout error ocurred with `#{url}: #{err}'"
124
119
  rescue OpenURI::HTTPError => the_error
@@ -133,7 +128,7 @@ module Feedbag
133
128
  end
134
129
 
135
130
  def self.looks_like_feed?(url)
136
- if url =~ /((\.|\/)(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
131
+ if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
137
132
  true
138
133
  else
139
134
  false
metadata CHANGED
@@ -1,57 +1,48 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: muck-feedbag
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.5.100
5
5
  platform: ruby
6
6
  authors:
7
- - Joel Duffin
8
- - Justin Ball
7
+ - Axiombox
8
+ - David Moreno
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-11-10 00:00:00 -07:00
13
+ date: 2009-02-10 00:00:00 -08:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
- name: shoulda
18
- type: :development
17
+ name: hpricot
18
+ type: :runtime
19
19
  version_requirement:
20
20
  version_requirements: !ruby/object:Gem::Requirement
21
21
  requirements:
22
22
  - - ">="
23
23
  - !ruby/object:Gem::Version
24
- version: "0"
24
+ version: "0.6"
25
25
  version:
26
- description: This gem will return title and url for each feed discovered at a given url
27
- email: justin@tatemae.com
26
+ description: Ruby's favorite feed auto-discoverty tool
27
+ email: david@axiombox.com
28
28
  executables: []
29
29
 
30
30
  extensions: []
31
31
 
32
32
  extra_rdoc_files:
33
- - ChangeLog
34
33
  - README.markdown
35
- files:
36
- - .gitignore
37
34
  - COPYING
38
- - ChangeLog
39
- - README.markdown
40
- - TODO
41
- - VERSION
42
- - benchmark/rfeedfinder_benchmark.rb
43
- - feedbag.gemspec
44
- - index.html
35
+ files:
45
36
  - lib/feedbag.rb
46
- - rails/init.rb
47
- - test/atom_autodiscovery_test.rb
37
+ - benchmark/rfeedfinder_benchmark.rb
38
+ - README.markdown
39
+ - COPYING
48
40
  has_rdoc: true
49
- homepage: http://github.com/tatemae/muck-feedbag
50
- licenses: []
51
-
41
+ homepage: http://axiombox.com/feedbag
52
42
  post_install_message:
53
43
  rdoc_options:
54
- - --charset=UTF-8
44
+ - --main
45
+ - README.markdown
55
46
  require_paths:
56
47
  - lib
57
48
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -69,9 +60,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
69
60
  requirements: []
70
61
 
71
62
  rubyforge_project:
72
- rubygems_version: 1.3.5
63
+ rubygems_version: 1.2.0
73
64
  signing_key:
74
- specification_version: 3
75
- summary: Fork of the feedbag gem.
76
- test_files:
77
- - test/atom_autodiscovery_test.rb
65
+ specification_version: 2
66
+ summary: Ruby's favorite feed auto-discovery tool
67
+ test_files: []
68
+
data/.gitignore DELETED
@@ -1,30 +0,0 @@
1
- *.swp
2
- **/*.pid
3
- log/*.log
4
- log/*.pid
5
- tmp
6
- .DS_Store
7
- public/cache/**/*
8
- public/system/**/*
9
- doc/**/*
10
- db/*.sqlite3
11
- .project
12
- .loadpath
13
- nbproject/
14
- .idea
15
- testjour.log
16
- *.so
17
- *.o
18
- Makefile
19
- mkmf.log
20
- *.bundle
21
- conftest
22
- content/
23
- .idea
24
- *.sw?
25
- .DS_Store
26
- coverage
27
- rdoc
28
- pkg
29
- pkg/*
30
- log/*
data/ChangeLog DELETED
@@ -1,20 +0,0 @@
1
- * 0.5.99 - Tue May 12 12:52:22 EDT 2009
2
- - Added rails/init.rb to load easily on a Rails app.
3
-
4
- * 0.5.13.1 - Wed Apr 22 11:16:19 EDT 2009
5
- - Changed args on find() from nil to {}
6
-
7
- * 0.5.13 - Wed Apr 22 11:12:40 EDT 2009
8
- - Added :narrow option so find() skips feed_validate and A links.
9
-
10
- * 0.5.12 - Fri Mar 20 12:34:48 EDT 2009
11
- - Added support for "feed://" URLs
12
-
13
- * 0.5.11 - Sat Mar 7 17:22:30 EST 2009
14
- - Benchmark against Rfeedfinder added.
15
-
16
- * 0.5.10 - Wed Mar 4 13:32:33 EST 2009
17
- - Feeds whose URLs contained query string arguments were not being
18
- auto-discovered -- fixed
19
-
20
- ** For previous changes, see the git log
data/TODO DELETED
@@ -1 +0,0 @@
1
- - Document Feedbag.feed?
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.1.0
@@ -1,20 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- Gem::Specification.new do |s|
4
- s.name = %q{feedbag}
5
- s.version = "0.5.103"
6
- s.homepage = "http://axiombox.com/feedbag"
7
- #s.rubyforge_project = "feedbag"
8
-
9
- s.authors = ["Axiombox", "David Moreno"]
10
- s.date = %q{2009-02-10}
11
- s.description = %q{Ruby's favorite feed auto-discoverty tool}
12
- s.email = %q{david@axiombox.com}
13
- s.extra_rdoc_files = ["README.markdown", "COPYING"]
14
- s.files = ["lib/feedbag.rb", "benchmark/rfeedfinder_benchmark.rb"]
15
- s.has_rdoc = true
16
- s.rdoc_options = ["--main", "README.markdown"]
17
- s.summary = %q{Ruby's favorite feed auto-discovery tool}
18
- s.add_dependency("hpricot", '>= 0.6')
19
- end
20
-
data/index.html DELETED
@@ -1,115 +0,0 @@
1
- <h1>Feedbag</h1>
2
-
3
- <blockquote>
4
- <p>Do you want me to drag my sack across your face?
5
- - Glenn Quagmire</p>
6
- </blockquote>
7
-
8
- <p>Feedbag is a feed auto-discovery Ruby library. You don't need to know more about it. It is said to be:</p>
9
-
10
- <blockquote>
11
- <p>Ruby's favorite auto-discovery tool/library!</p>
12
- </blockquote>
13
-
14
- <h3>Quick synopsis</h3>
15
-
16
- <pre><code>&gt;&gt; require "rubygems"
17
- =&gt; true
18
- &gt;&gt; require "feedbag"
19
- =&gt; true
20
- &gt;&gt; Feedbag.find "log.damog.net"
21
- =&gt; ["http://feeds.feedburner.com/TeoremaDelCerdoInfinito", "http://log.damog.net/comments/feed/"]
22
- </code></pre>
23
-
24
- <h3>Installation</h3>
25
-
26
- <pre><code>$ sudo gem install damog-feedbag -s http://gems.github.com/
27
- </code></pre>
28
-
29
- <p>Or just grab feedbag.rb and use it on your own project:</p>
30
-
31
- <pre><code>$ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb
32
- </code></pre>
33
-
34
- <h2>Tutorial</h2>
35
-
36
- <p>So you want to know more about it.</p>
37
-
38
- <p>OK, if the URL passed to the find method is a feed itself, that only feed URL will be returned.</p>
39
-
40
- <pre><code>&gt;&gt; Feedbag.find "github.com/damog.atom"
41
- =&gt; ["http://github.com/damog.atom"]
42
- &gt;&gt;
43
- </code></pre>
44
-
45
- <p>Otherwise, it will always return LINK feeds first, A (anchor tags) feeds later. Between A feeds, the ones hosted on the same URL's host, will have larger priority:</p>
46
-
47
- <pre><code>&gt;&gt; Feedbag.find "http://ve.planetalinux.org"
48
- =&gt; ["http://feedproxy.google.com/PlanetaLinuxVenezuela", "http://rendergraf.wordpress.com/feed/", "http://rootweiller.wordpress.com/feed/", "http://skatox.com/blog/feed/", "http://kodegeek.com/atom.xml", "http://blog.0x29.com.ve/?feed=rss2&amp;cat=8"]
49
- &gt;&gt;
50
- </code></pre>
51
-
52
- <p>On your application you should only take the very first element of the array, most of the times:</p>
53
-
54
- <pre><code>&gt;&gt; Feedbag.find("planet.debian.org").first(3)
55
- =&gt; ["http://planet.debian.org/rss10.xml", "http://planet.debian.org/rss20.xml", "http://planet.debian.org/atom.xml"]
56
- &gt;&gt;
57
- </code></pre>
58
-
59
- <p>(Try running that same example without the "first" method. That example's host is a blog aggregator, so it has hundreds of feed URLs:)</p>
60
-
61
- <pre><code>&gt;&gt; Feedbag.find("planet.debian.org").size
62
- =&gt; 104
63
- &gt;&gt;
64
- </code></pre>
65
-
66
- <p>Feedbag will find them all, but it will return the most important ones on the first elements on the array returned.</p>
67
-
68
- <pre><code>&gt;&gt; Feedbag.find("cnn.com")
69
- =&gt; ["http://rss.cnn.com/rss/cnn_topstories.rss", "http://rss.cnn.com/rss/cnn_latest.rss", "http://rss.cnn.com/services/podcasting/robinmeade/rss.xml"]
70
- &gt;&gt;
71
- </code></pre>
72
-
73
- <h3>Why should you use it?</h3>
74
-
75
- <ul>
76
- <li>Because it's cool.</li>
77
- <li>Because it only uses <a href="https://code.whytheluckystiff.net/hpricot/">Hpricot</a> as dependency.</li>
78
- <li>Because it follows modern feed filename conventions (like those ones used by WordPress blogs, or Blogger, etc).</li>
79
- <li>Because it's a single file you can embed easily in your application.</li>
80
- <li>Because it passes most of the Mark Pilgrim's <a href="http://diveintomark.org/tests/client/autodiscovery/">Atom auto-discovery test suite</a>. It doesn't pass them all because some of those tests are broken (citation needed).</li>
81
- </ul>
82
-
83
- <h3>Why did I build it?</h3>
84
-
85
- <ul>
86
- <li>Because I liked Benjamin Trott's <a href="http://search.cpan.org/~btrott/Feed-Find-0.06/lib/Feed/Find.pm">Feed::Find</a>.</li>
87
- <li>Because I thought it would be good to have Feed::Find's functionality in Ruby.</li>
88
- <li>Because I thought it was going to be easy to maintain.</li>
89
- <li>Because I was going to use it on <a href="http://github.com/damog/rfeed">rFeed</a>.</li>
90
- <li>And finally, because I didn't know <a href="http://rfeedfinder.rubyforge.org/">rfeedfinder</a> existed :-)</li>
91
- </ul>
92
-
93
- <h3>Bugs</h3>
94
-
95
- <p>Please, report bugs to <a href="rt@support.axiombox.com">rt@support.axiombox.com</a> or directly to the author.</p>
96
-
97
- <h3>Contribute</h3>
98
-
99
- <blockquote>
100
- <p>git clone git://github.com/damog/feedbag.git</p>
101
- </blockquote>
102
-
103
- <p>...patch, build, hack and make pull requests. I'll be glad.</p>
104
-
105
- <h3>Author</h3>
106
-
107
- <p><a href="http://damog.net/">David Moreno</a> &lt;<a href="mailto:david@axiombox.com">david@axiombox.com</a>>.</p>
108
-
109
- <h3>Copyright</h3>
110
-
111
- <p>This is free software. See <a href="http://github.com/damog/feedbag/master/COPYING">COPYING</a> for more information.</p>
112
-
113
- <h3>Thanks</h3>
114
-
115
- <p><a href="http://maggit.net">Raquel</a>, for making <a href="http://axiombox.com">Axiombox</a> and most of my dreams possible. Also, <a href="http://github.com">GitHub</a> for making a nice code sharing service that doesn't suck.</p>
@@ -1 +0,0 @@
1
- require File.join File.dirname(__FILE__), "..", "lib", "feedbag"
@@ -1,46 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require "#{File.dirname(__FILE__)}/../feedbag"
4
- require "test/unit"
5
- require "open-uri"
6
- require "hpricot"
7
- require "pp"
8
-
9
- class AtomAutoDiscoveryTest < Test::Unit::TestCase
10
- def test_autodisc
11
- base_url = "http://diveintomark.org/tests/client/autodiscovery/"
12
- url = base_url + "html4-001.html"
13
-
14
- i = 1
15
- puts "trying now with #{url}"
16
- while(i)
17
- puts
18
- i = 0 # unless otherwise found
19
-
20
- f = Feedbag.find url
21
-
22
- assert_instance_of Array, f
23
- assert f.size == 1, "Feedbag didn't find a feed on #{url} or found more than one"
24
-
25
- puts " found #{f[0]}"
26
- feed = Hpricot(open(f[0]))
27
-
28
- (feed/"link").each do |l|
29
- next unless l["rel"] == "alternate"
30
- assert_equal l["href"], url
31
- end
32
-
33
- # ahora me voy al siguiente
34
- html = Hpricot(open(url))
35
- (html/"link").each do |l|
36
- next unless l["rel"] == "next"
37
- url = URI.parse(base_url).merge(l["href"]).to_s
38
- puts "trying now with #{url}"
39
- i = 1
40
- end
41
-
42
- end
43
- end
44
-
45
-
46
- end