muck-feedbag 0.1.0 → 0.5.100

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  Feedbag
2
2
  =======
3
- Forked version of feedback that returns title and url.
3
+ > Do you want me to drag my sack across your face?
4
+ > - Glenn Quagmire
4
5
 
5
6
  Feedbag is a feed auto-discovery Ruby library. You don't need to know more about it. It is said to be:
6
7
 
@@ -20,7 +20,6 @@ require "rubygems"
20
20
  require "hpricot"
21
21
  require "open-uri"
22
22
  require "net/http"
23
- require 'timeout'
24
23
 
25
24
  module Feedbag
26
25
  Feed = Struct.new(:url, :title)
@@ -66,8 +65,6 @@ module Feedbag
66
65
  end
67
66
  #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
68
67
 
69
- return self.add_feed(url, nil) if looks_like_feed? url
70
-
71
68
  # check if feed_valid is avail
72
69
  unless args[:narrow]
73
70
  begin
@@ -87,38 +84,36 @@ module Feedbag
87
84
  end
88
85
 
89
86
  begin
90
- Timeout::timeout(10) do
91
- open(url) do |f|
92
- if @content_types.include?(f.content_type.downcase)
93
- return self.add_feed(url, nil)
94
- end
95
-
96
- doc = Hpricot(f.read)
97
-
98
- if doc.at("base") and doc.at("base")["href"]
99
- $base_uri = doc.at("base")["href"]
100
- else
101
- $base_uri = nil
102
- end
103
-
104
- # first with links
105
- (doc/"link").each do |l|
106
- next unless l["rel"]
107
- if l["type"] and @content_types.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
108
- self.add_feed(l["href"], url, $base_uri, l["title"])
109
- end
110
- end
111
-
112
- unless args[:narrow]
113
- (doc/"a").each do |a|
114
- next unless a["href"]
115
- if self.looks_like_feed?(a["href"])
116
- self.add_feed(a["href"], url, $base_uri, a["title"] || a.inner_html || a['alt']) # multiple fallbacks, first title, then the tag content, then the alt tag (in case of image)
117
- end
118
- end
119
- end
87
+ open(url) do |f|
88
+ if @content_types.include?(f.content_type.downcase)
89
+ return self.add_feed(url, nil)
90
+ end
91
+
92
+ doc = Hpricot(f.read)
93
+
94
+ if doc.at("base") and doc.at("base")["href"]
95
+ $base_uri = doc.at("base")["href"]
96
+ else
97
+ $base_uri = nil
98
+ end
99
+
100
+ # first with links
101
+ (doc/"link").each do |l|
102
+ next unless l["rel"]
103
+ if l["type"] and @content_types.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
104
+ self.add_feed(l["href"], url, $base_uri, l["title"])
105
+ end
106
+ end
107
+
108
+ unless args[:narrow]
109
+ (doc/"a").each do |a|
110
+ next unless a["href"]
111
+ if self.looks_like_feed?(a["href"])
112
+ self.add_feed(a["href"], url, $base_uri, a["title"] || a.inner_html || a['alt']) # multiple fallbacks, first title, then the tag content, then the alt tag (in case of image)
113
+ end
114
+ end
120
115
  end
121
- end
116
+ end
122
117
  rescue Timeout::Error => err
123
118
  $stderr.puts "Timeout error ocurred with `#{url}: #{err}'"
124
119
  rescue OpenURI::HTTPError => the_error
@@ -133,7 +128,7 @@ module Feedbag
133
128
  end
134
129
 
135
130
  def self.looks_like_feed?(url)
136
- if url =~ /((\.|\/)(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
131
+ if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
137
132
  true
138
133
  else
139
134
  false
metadata CHANGED
@@ -1,57 +1,48 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: muck-feedbag
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.5.100
5
5
  platform: ruby
6
6
  authors:
7
- - Joel Duffin
8
- - Justin Ball
7
+ - Axiombox
8
+ - David Moreno
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-11-10 00:00:00 -07:00
13
+ date: 2009-02-10 00:00:00 -08:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
- name: shoulda
18
- type: :development
17
+ name: hpricot
18
+ type: :runtime
19
19
  version_requirement:
20
20
  version_requirements: !ruby/object:Gem::Requirement
21
21
  requirements:
22
22
  - - ">="
23
23
  - !ruby/object:Gem::Version
24
- version: "0"
24
+ version: "0.6"
25
25
  version:
26
- description: This gem will return title and url for each feed discovered at a given url
27
- email: justin@tatemae.com
26
+ description: Ruby's favorite feed auto-discoverty tool
27
+ email: david@axiombox.com
28
28
  executables: []
29
29
 
30
30
  extensions: []
31
31
 
32
32
  extra_rdoc_files:
33
- - ChangeLog
34
33
  - README.markdown
35
- files:
36
- - .gitignore
37
34
  - COPYING
38
- - ChangeLog
39
- - README.markdown
40
- - TODO
41
- - VERSION
42
- - benchmark/rfeedfinder_benchmark.rb
43
- - feedbag.gemspec
44
- - index.html
35
+ files:
45
36
  - lib/feedbag.rb
46
- - rails/init.rb
47
- - test/atom_autodiscovery_test.rb
37
+ - benchmark/rfeedfinder_benchmark.rb
38
+ - README.markdown
39
+ - COPYING
48
40
  has_rdoc: true
49
- homepage: http://github.com/tatemae/muck-feedbag
50
- licenses: []
51
-
41
+ homepage: http://axiombox.com/feedbag
52
42
  post_install_message:
53
43
  rdoc_options:
54
- - --charset=UTF-8
44
+ - --main
45
+ - README.markdown
55
46
  require_paths:
56
47
  - lib
57
48
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -69,9 +60,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
69
60
  requirements: []
70
61
 
71
62
  rubyforge_project:
72
- rubygems_version: 1.3.5
63
+ rubygems_version: 1.2.0
73
64
  signing_key:
74
- specification_version: 3
75
- summary: Fork of the feedbag gem.
76
- test_files:
77
- - test/atom_autodiscovery_test.rb
65
+ specification_version: 2
66
+ summary: Ruby's favorite feed auto-discovery tool
67
+ test_files: []
68
+
data/.gitignore DELETED
@@ -1,30 +0,0 @@
1
- *.swp
2
- **/*.pid
3
- log/*.log
4
- log/*.pid
5
- tmp
6
- .DS_Store
7
- public/cache/**/*
8
- public/system/**/*
9
- doc/**/*
10
- db/*.sqlite3
11
- .project
12
- .loadpath
13
- nbproject/
14
- .idea
15
- testjour.log
16
- *.so
17
- *.o
18
- Makefile
19
- mkmf.log
20
- *.bundle
21
- conftest
22
- content/
23
- .idea
24
- *.sw?
25
- .DS_Store
26
- coverage
27
- rdoc
28
- pkg
29
- pkg/*
30
- log/*
data/ChangeLog DELETED
@@ -1,20 +0,0 @@
1
- * 0.5.99 - Tue May 12 12:52:22 EDT 2009
2
- - Added rails/init.rb to load easily on a Rails app.
3
-
4
- * 0.5.13.1 - Wed Apr 22 11:16:19 EDT 2009
5
- - Changed args on find() from nil to {}
6
-
7
- * 0.5.13 - Wed Apr 22 11:12:40 EDT 2009
8
- - Added :narrow option so find() skips feed_validate and A links.
9
-
10
- * 0.5.12 - Fri Mar 20 12:34:48 EDT 2009
11
- - Added support for "feed://" URLs
12
-
13
- * 0.5.11 - Sat Mar 7 17:22:30 EST 2009
14
- - Benchmark against Rfeedfinder added.
15
-
16
- * 0.5.10 - Wed Mar 4 13:32:33 EST 2009
17
- - Feeds whose URLs contained query string arguments were not being
18
- auto-discovered -- fixed
19
-
20
- ** For previous changes, see the git log
data/TODO DELETED
@@ -1 +0,0 @@
1
- - Document Feedbag.feed?
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.1.0
@@ -1,20 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- Gem::Specification.new do |s|
4
- s.name = %q{feedbag}
5
- s.version = "0.5.103"
6
- s.homepage = "http://axiombox.com/feedbag"
7
- #s.rubyforge_project = "feedbag"
8
-
9
- s.authors = ["Axiombox", "David Moreno"]
10
- s.date = %q{2009-02-10}
11
- s.description = %q{Ruby's favorite feed auto-discoverty tool}
12
- s.email = %q{david@axiombox.com}
13
- s.extra_rdoc_files = ["README.markdown", "COPYING"]
14
- s.files = ["lib/feedbag.rb", "benchmark/rfeedfinder_benchmark.rb"]
15
- s.has_rdoc = true
16
- s.rdoc_options = ["--main", "README.markdown"]
17
- s.summary = %q{Ruby's favorite feed auto-discovery tool}
18
- s.add_dependency("hpricot", '>= 0.6')
19
- end
20
-
data/index.html DELETED
@@ -1,115 +0,0 @@
1
- <h1>Feedbag</h1>
2
-
3
- <blockquote>
4
- <p>Do you want me to drag my sack across your face?
5
- - Glenn Quagmire</p>
6
- </blockquote>
7
-
8
- <p>Feedbag is a feed auto-discovery Ruby library. You don't need to know more about it. It is said to be:</p>
9
-
10
- <blockquote>
11
- <p>Ruby's favorite auto-discovery tool/library!</p>
12
- </blockquote>
13
-
14
- <h3>Quick synopsis</h3>
15
-
16
- <pre><code>&gt;&gt; require "rubygems"
17
- =&gt; true
18
- &gt;&gt; require "feedbag"
19
- =&gt; true
20
- &gt;&gt; Feedbag.find "log.damog.net"
21
- =&gt; ["http://feeds.feedburner.com/TeoremaDelCerdoInfinito", "http://log.damog.net/comments/feed/"]
22
- </code></pre>
23
-
24
- <h3>Installation</h3>
25
-
26
- <pre><code>$ sudo gem install damog-feedbag -s http://gems.github.com/
27
- </code></pre>
28
-
29
- <p>Or just grab feedbag.rb and use it on your own project:</p>
30
-
31
- <pre><code>$ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb
32
- </code></pre>
33
-
34
- <h2>Tutorial</h2>
35
-
36
- <p>So you want to know more about it.</p>
37
-
38
- <p>OK, if the URL passed to the find method is a feed itself, that only feed URL will be returned.</p>
39
-
40
- <pre><code>&gt;&gt; Feedbag.find "github.com/damog.atom"
41
- =&gt; ["http://github.com/damog.atom"]
42
- &gt;&gt;
43
- </code></pre>
44
-
45
- <p>Otherwise, it will always return LINK feeds first, A (anchor tags) feeds later. Between A feeds, the ones hosted on the same URL's host, will have larger priority:</p>
46
-
47
- <pre><code>&gt;&gt; Feedbag.find "http://ve.planetalinux.org"
48
- =&gt; ["http://feedproxy.google.com/PlanetaLinuxVenezuela", "http://rendergraf.wordpress.com/feed/", "http://rootweiller.wordpress.com/feed/", "http://skatox.com/blog/feed/", "http://kodegeek.com/atom.xml", "http://blog.0x29.com.ve/?feed=rss2&amp;cat=8"]
49
- &gt;&gt;
50
- </code></pre>
51
-
52
- <p>On your application you should only take the very first element of the array, most of the times:</p>
53
-
54
- <pre><code>&gt;&gt; Feedbag.find("planet.debian.org").first(3)
55
- =&gt; ["http://planet.debian.org/rss10.xml", "http://planet.debian.org/rss20.xml", "http://planet.debian.org/atom.xml"]
56
- &gt;&gt;
57
- </code></pre>
58
-
59
- <p>(Try running that same example without the "first" method. That example's host is a blog aggregator, so it has hundreds of feed URLs:)</p>
60
-
61
- <pre><code>&gt;&gt; Feedbag.find("planet.debian.org").size
62
- =&gt; 104
63
- &gt;&gt;
64
- </code></pre>
65
-
66
- <p>Feedbag will find them all, but it will return the most important ones on the first elements on the array returned.</p>
67
-
68
- <pre><code>&gt;&gt; Feedbag.find("cnn.com")
69
- =&gt; ["http://rss.cnn.com/rss/cnn_topstories.rss", "http://rss.cnn.com/rss/cnn_latest.rss", "http://rss.cnn.com/services/podcasting/robinmeade/rss.xml"]
70
- &gt;&gt;
71
- </code></pre>
72
-
73
- <h3>Why should you use it?</h3>
74
-
75
- <ul>
76
- <li>Because it's cool.</li>
77
- <li>Because it only uses <a href="https://code.whytheluckystiff.net/hpricot/">Hpricot</a> as dependency.</li>
78
- <li>Because it follows modern feed filename conventions (like those ones used by WordPress blogs, or Blogger, etc).</li>
79
- <li>Because it's a single file you can embed easily in your application.</li>
80
- <li>Because it passes most of the Mark Pilgrim's <a href="http://diveintomark.org/tests/client/autodiscovery/">Atom auto-discovery test suite</a>. It doesn't pass them all because some of those tests are broken (citation needed).</li>
81
- </ul>
82
-
83
- <h3>Why did I build it?</h3>
84
-
85
- <ul>
86
- <li>Because I liked Benjamin Trott's <a href="http://search.cpan.org/~btrott/Feed-Find-0.06/lib/Feed/Find.pm">Feed::Find</a>.</li>
87
- <li>Because I thought it would be good to have Feed::Find's functionality in Ruby.</li>
88
- <li>Because I thought it was going to be easy to maintain.</li>
89
- <li>Because I was going to use it on <a href="http://github.com/damog/rfeed">rFeed</a>.</li>
90
- <li>And finally, because I didn't know <a href="http://rfeedfinder.rubyforge.org/">rfeedfinder</a> existed :-)</li>
91
- </ul>
92
-
93
- <h3>Bugs</h3>
94
-
95
- <p>Please, report bugs to <a href="rt@support.axiombox.com">rt@support.axiombox.com</a> or directly to the author.</p>
96
-
97
- <h3>Contribute</h3>
98
-
99
- <blockquote>
100
- <p>git clone git://github.com/damog/feedbag.git</p>
101
- </blockquote>
102
-
103
- <p>...patch, build, hack and make pull requests. I'll be glad.</p>
104
-
105
- <h3>Author</h3>
106
-
107
- <p><a href="http://damog.net/">David Moreno</a> &lt;<a href="mailto:david@axiombox.com">david@axiombox.com</a>>.</p>
108
-
109
- <h3>Copyright</h3>
110
-
111
- <p>This is free software. See <a href="http://github.com/damog/feedbag/master/COPYING">COPYING</a> for more information.</p>
112
-
113
- <h3>Thanks</h3>
114
-
115
- <p><a href="http://maggit.net">Raquel</a>, for making <a href="http://axiombox.com">Axiombox</a> and most of my dreams possible. Also, <a href="http://github.com">GitHub</a> for making a nice code sharing service that doesn't suck.</p>
@@ -1 +0,0 @@
1
- require File.join File.dirname(__FILE__), "..", "lib", "feedbag"
@@ -1,46 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require "#{File.dirname(__FILE__)}/../feedbag"
4
- require "test/unit"
5
- require "open-uri"
6
- require "hpricot"
7
- require "pp"
8
-
9
- class AtomAutoDiscoveryTest < Test::Unit::TestCase
10
- def test_autodisc
11
- base_url = "http://diveintomark.org/tests/client/autodiscovery/"
12
- url = base_url + "html4-001.html"
13
-
14
- i = 1
15
- puts "trying now with #{url}"
16
- while(i)
17
- puts
18
- i = 0 # unless otherwise found
19
-
20
- f = Feedbag.find url
21
-
22
- assert_instance_of Array, f
23
- assert f.size == 1, "Feedbag didn't find a feed on #{url} or found more than one"
24
-
25
- puts " found #{f[0]}"
26
- feed = Hpricot(open(f[0]))
27
-
28
- (feed/"link").each do |l|
29
- next unless l["rel"] == "alternate"
30
- assert_equal l["href"], url
31
- end
32
-
33
- # ahora me voy al siguiente
34
- html = Hpricot(open(url))
35
- (html/"link").each do |l|
36
- next unless l["rel"] == "next"
37
- url = URI.parse(base_url).merge(l["href"]).to_s
38
- puts "trying now with #{url}"
39
- i = 1
40
- end
41
-
42
- end
43
- end
44
-
45
-
46
- end