feedbag 0.9.2 → 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/README.markdown +8 -9
  3. data/lib/feedbag.rb +171 -164
  4. metadata +44 -19
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 13f1c9e29ccb4e1c9affd106d1a7dacb641ebecf
4
+ data.tar.gz: 0b3ed450ad3fe07bca31cc120429092809d2bbdf
5
+ SHA512:
6
+ metadata.gz: 03c341cc2fca2caf681ab6086b008498622ab639f3fb751ffe12a54c5a34e04f9d1c3c2cc145dcd25681dff02f16bbbe29e8149cd690429ede6159df6fc42ac0
7
+ data.tar.gz: 80b23d8060a4721b904e294196a1359f6d63efdcdac73d5ba715ccd0aff250713919f03fb28e7514d13436342cec6ccf3e1c42cb29cacb94c379dbf753484ad0
@@ -14,24 +14,23 @@ Feedbag is Ruby's favorite auto-discovery tool/library!
14
14
  >> Feedbag.feed?("http://jobs.perl.org/rss/standard.rss")
15
15
  => true
16
16
 
17
- You can also use an installed command line tool for quick queries, if you install the gem:
18
-
19
- $ feedbag http://rubygems.org/profiles/damog
20
- == http://rubygems.org/profiles/damog:
21
- - http://feeds.feedburner.com/gemcutter-latest
22
-
23
-
24
17
  ### Installation
25
18
 
26
- $ sudo gem install feedbag
19
+ $ gem install feedbag
27
20
 
28
21
  Or just grab feedbag.rb and use it on your own project:
29
22
 
30
23
  $ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb
31
24
 
25
+ You can also use the command line tool for quick queries, if you install the gem:
26
+
27
+ $ feedbag http://rubygems.org/profiles/damog
28
+ == http://rubygems.org/profiles/damog:
29
+ - http://feeds.feedburner.com/gemcutter-latest
30
+
32
31
  ### Why should you use it?
33
32
 
34
- - Because it only uses [Hpricot](https://code.whytheluckystiff.net/hpricot/) as dependency.
33
+ - Because it only uses [Nokogiri](http://nokogiri.org/) as dependency.
35
34
  - Because it follows modern feed filename conventions (like those ones used by WordPress blogs, or Blogger, etc).
36
35
  - Because it's a single file you can embed easily in your application.
37
36
  - Because it's faster than rfeedfinder.
@@ -22,180 +22,187 @@
22
22
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
23
 
24
24
  require "rubygems"
25
- require "hpricot"
25
+ require "nokogiri"
26
26
  require "open-uri"
27
27
  require "net/http"
28
28
 
29
- module Feedbag
30
-
31
- @content_types = [
32
- 'application/x.atom+xml',
33
- 'application/atom+xml',
34
- 'application/xml',
35
- 'text/xml',
36
- 'application/rss+xml',
37
- 'application/rdf+xml',
38
- ]
39
-
40
- $feeds = []
41
- $base_uri = nil
42
-
43
- def self.feed?(url)
44
- # use LWR::Simple.normalize some time
45
- url_uri = URI.parse(url)
46
- url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
47
- url << "?#{url_uri.query}" if url_uri.query
48
-
49
- # hack:
50
- url.sub!(/^feed:\/\//, 'http://')
51
-
52
- res = self.find(url)
53
- if res.size == 1 and res.first == url
54
- return true
55
- else
56
- return false
57
- end
58
- end
59
-
60
- def self.find(url, args = {})
61
- $feeds = []
62
-
63
- url_uri = URI.parse(url)
64
- url = nil
65
- if url_uri.scheme.nil?
66
- url = "http://#{url_uri.to_s}"
67
- elsif url_uri.scheme == "feed"
68
- return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil)
69
- else
70
- url = url_uri.to_s
71
- end
72
- #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
73
-
74
- # check if feed_valid is avail
29
+ class Feedbag
30
+
31
+ CONTENT_TYPES = [
32
+ 'application/x.atom+xml',
33
+ 'application/atom+xml',
34
+ 'application/xml',
35
+ 'text/xml',
36
+ 'application/rss+xml',
37
+ 'application/rdf+xml',
38
+ ].freeze
39
+
40
+ def self.feed?(url)
41
+ new.feed?(url)
42
+ end
43
+
44
+ def self.find(url, args = {})
45
+ new.find(url, args = {})
46
+ end
47
+
48
+ def initialize
49
+ @feeds = []
50
+ end
51
+
52
+ def feed?(url)
53
+ # use LWR::Simple.normalize some time
54
+ url_uri = URI.parse(url)
55
+ url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
56
+ url << "?#{url_uri.query}" if url_uri.query
57
+
58
+ # hack:
59
+ url.sub!(/^feed:\/\//, 'http://')
60
+
61
+ res = Feedbag.find(url)
62
+ if res.size == 1 and res.first == url
63
+ return true
64
+ else
65
+ return false
66
+ end
67
+ end
68
+
69
+ def find(url, args = {})
70
+ url_uri = URI.parse(url)
71
+ url = nil
72
+ if url_uri.scheme.nil?
73
+ url = "http://#{url_uri.to_s}"
74
+ elsif url_uri.scheme == "feed"
75
+ return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil)
76
+ else
77
+ url = url_uri.to_s
78
+ end
79
+ #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
80
+
81
+ # check if feed_valid is avail
75
82
  begin
76
- require "feed_validator"
77
- v = W3C::FeedValidator.new
78
- v.validate_url(url)
79
- return self.add_feed(url, nil) if v.valid?
80
- rescue LoadError
81
- # scoo
82
- rescue REXML::ParseException
83
- # usually indicates timeout
84
- # TODO: actually find out timeout. use Terminator?
85
- # $stderr.puts "Feed looked like feed but might not have passed validation or timed out"
83
+ require "feed_validator"
84
+ v = W3C::FeedValidator.new
85
+ v.validate_url(url)
86
+ return self.add_feed(url, nil) if v.valid?
87
+ rescue LoadError
88
+ # scoo
89
+ rescue REXML::ParseException
90
+ # usually indicates timeout
91
+ # TODO: actually find out timeout. use Terminator?
92
+ # $stderr.puts "Feed looked like feed but might not have passed validation or timed out"
86
93
  rescue => ex
87
- $stderr.puts "#{ex.class} error ocurred with: `#{url}': #{ex.message}"
88
- end
89
-
90
- begin
91
- html = open(url) do |f|
92
- content_type = f.content_type.downcase
93
- if content_type == "application/octet-stream" # open failed
94
- content_type = f.meta["content-type"].gsub(/;.*$/, '')
95
- end
96
- if @content_types.include?(content_type)
97
- return self.add_feed(url, nil)
98
- end
99
-
100
- doc = Hpricot(f.read)
101
-
102
- if doc.at("base") and doc.at("base")["href"]
103
- $base_uri = doc.at("base")["href"]
104
- else
105
- $base_uri = nil
106
- end
107
-
108
- # first with links
94
+ $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
95
+ end
96
+
97
+ begin
98
+ html = open(url) do |f|
99
+ content_type = f.content_type.downcase
100
+ if content_type == "application/octet-stream" # open failed
101
+ content_type = f.meta["content-type"].gsub(/;.*$/, '')
102
+ end
103
+ if CONTENT_TYPES.include?(content_type)
104
+ return self.add_feed(url, nil)
105
+ end
106
+
107
+ doc = Nokogiri::HTML(f.read)
108
+
109
+ if doc.at("base") and doc.at("base")["href"]
110
+ @base_uri = doc.at("base")["href"]
111
+ else
112
+ @base_uri = nil
113
+ end
114
+
115
+ # first with links
109
116
  (doc/"atom:link").each do |l|
110
- next unless l["rel"]
111
- if l["type"] and @content_types.include?(l["type"].downcase.strip) and l["rel"].downcase == "self"
112
- self.add_feed(l["href"], url, $base_uri)
113
- end
114
- end
115
-
116
- (doc/"link").each do |l|
117
- next unless l["rel"]
118
- if l["type"] and @content_types.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
119
- self.add_feed(l["href"], url, $base_uri)
120
- end
121
- end
122
-
123
- (doc/"a").each do |a|
124
- next unless a["href"]
125
- if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/)
126
- self.add_feed(a["href"], url, $base_uri)
127
- end
128
- end
129
-
130
- (doc/"a").each do |a|
131
- next unless a["href"]
132
- if self.looks_like_feed?(a["href"])
133
- self.add_feed(a["href"], url, $base_uri)
134
- end
135
- end
117
+ next unless l["rel"]
118
+ if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self"
119
+ self.add_feed(l["href"], url, @base_uri)
120
+ end
121
+ end
122
+
123
+ (doc/"link").each do |l|
124
+ next unless l["rel"]
125
+ if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
126
+ self.add_feed(l["href"], url, @base_uri)
127
+ end
128
+ end
129
+
130
+ (doc/"a").each do |a|
131
+ next unless a["href"]
132
+ if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/)
133
+ self.add_feed(a["href"], url, @base_uri)
134
+ end
135
+ end
136
+
137
+ (doc/"a").each do |a|
138
+ next unless a["href"]
139
+ if self.looks_like_feed?(a["href"])
140
+ self.add_feed(a["href"], url, @base_uri)
141
+ end
142
+ end
136
143
 
137
144
  # Added support for feeds like http://tabtimes.com/tbfeed/mashable/full.xml
138
145
  if url.match(/.xml$/) and doc.root and doc.root["xml:base"] and doc.root["xml:base"].strip == url.strip
139
- self.add_feed(url, nil)
146
+ self.add_feed(url, nil)
140
147
  end
141
- end
142
- rescue Timeout::Error => err
143
- $stderr.puts "Timeout error ocurred with `#{url}: #{err}'"
144
- rescue OpenURI::HTTPError => the_error
145
- $stderr.puts "Error ocurred with `#{url}': #{the_error}"
146
- rescue SocketError => err
147
- $stderr.puts "Socket error ocurred with: `#{url}': #{err}"
148
- rescue => ex
149
- $stderr.puts "#{ex.class} error ocurred with: `#{url}': #{ex.message}"
150
- ensure
151
- return $feeds
152
- end
153
-
154
- end
155
-
156
- def self.looks_like_feed?(url)
157
- if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
158
- true
159
- else
160
- false
161
- end
162
- end
163
-
164
- def self.add_feed(feed_url, orig_url, base_uri = nil)
165
- # puts "#{feed_url} - #{orig_url}"
166
- url = feed_url.sub(/^feed:/, '').strip
167
-
168
- if base_uri
169
- # url = base_uri + feed_url
170
- url = URI.parse(base_uri).merge(feed_url).to_s
171
- end
172
-
173
- begin
174
- uri = URI.parse(url)
175
- rescue
176
- puts "Error with `#{url}'"
177
- exit 1
178
- end
179
- unless uri.absolute?
180
- orig = URI.parse(orig_url)
181
- url = orig.merge(url).to_s
182
- end
183
-
184
- # verify url is really valid
185
- $feeds.push(url) unless $feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url)
186
- end
187
-
188
- # not used. yet.
189
- def self._is_http_valid(uri, orig_url)
190
- req = Net::HTTP.get_response(uri)
191
- orig_uri = URI.parse(orig_url)
192
- case req
193
- when Net::HTTPSuccess then
194
- return true
195
- else
196
- return false
197
- end
198
- end
148
+ end
149
+ rescue Timeout::Error => err
150
+ $stderr.puts "Timeout error occurred with `#{url}: #{err}'"
151
+ rescue OpenURI::HTTPError => the_error
152
+ $stderr.puts "Error occurred with `#{url}': #{the_error}"
153
+ rescue SocketError => err
154
+ $stderr.puts "Socket error occurred with: `#{url}': #{err}"
155
+ rescue => ex
156
+ $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
157
+ ensure
158
+ return @feeds
159
+ end
160
+
161
+ end
162
+
163
+ def looks_like_feed?(url)
164
+ if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
165
+ true
166
+ else
167
+ false
168
+ end
169
+ end
170
+
171
+ def add_feed(feed_url, orig_url, base_uri = nil)
172
+ # puts "#{feed_url} - #{orig_url}"
173
+ url = feed_url.sub(/^feed:/, '').strip
174
+
175
+ if base_uri
176
+ # url = base_uri + feed_url
177
+ url = URI.parse(base_uri).merge(feed_url).to_s
178
+ end
179
+
180
+ begin
181
+ uri = URI.parse(url)
182
+ rescue
183
+ puts "Error with `#{url}'"
184
+ exit 1
185
+ end
186
+ unless uri.absolute?
187
+ orig = URI.parse(orig_url)
188
+ url = orig.merge(url).to_s
189
+ end
190
+
191
+ # verify url is really valid
192
+ @feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url)
193
+ end
194
+
195
+ # not used. yet.
196
+ def _is_http_valid(uri, orig_url)
197
+ req = Net::HTTP.get_response(uri)
198
+ orig_uri = URI.parse(orig_url)
199
+ case req
200
+ when Net::HTTPSuccess then
201
+ return true
202
+ else
203
+ return false
204
+ end
205
+ end
199
206
  end
200
207
 
201
208
  if __FILE__ == $0
metadata CHANGED
@@ -1,32 +1,58 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedbag
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
5
- prerelease:
4
+ version: 0.9.3
6
5
  platform: ruby
7
6
  authors:
8
7
  - David Moreno
8
+ - Derek Willis
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
  date: 2013-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: hpricot
15
+ name: nokogiri
16
16
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
17
  requirements:
19
- - - ! '>='
18
+ - - ">="
20
19
  - !ruby/object:Gem::Version
21
- version: '0.6'
20
+ version: '0'
22
21
  type: :runtime
23
22
  prerelease: false
24
23
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
24
  requirements:
27
- - - ! '>='
25
+ - - ">="
28
26
  - !ruby/object:Gem::Version
29
- version: '0.6'
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: shoulda
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: mocha
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: 0.12.0
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: 0.12.0
30
56
  description: Ruby's favorite feed auto-discoverty tool
31
57
  email: david@axiombox.com
32
58
  executables:
@@ -36,35 +62,34 @@ extra_rdoc_files:
36
62
  - README.markdown
37
63
  - COPYING
38
64
  files:
39
- - lib/feedbag.rb
65
+ - COPYING
66
+ - README.markdown
40
67
  - benchmark/rfeedfinder_benchmark.rb
41
68
  - bin/feedbag
42
- - README.markdown
43
- - COPYING
69
+ - lib/feedbag.rb
44
70
  homepage: http://github.com/damog/feedbag
45
71
  licenses: []
72
+ metadata: {}
46
73
  post_install_message:
47
74
  rdoc_options:
48
- - --main
75
+ - "--main"
49
76
  - README.markdown
50
77
  require_paths:
51
78
  - lib
52
79
  required_ruby_version: !ruby/object:Gem::Requirement
53
- none: false
54
80
  requirements:
55
- - - ! '>='
81
+ - - ">="
56
82
  - !ruby/object:Gem::Version
57
83
  version: '0'
58
84
  required_rubygems_version: !ruby/object:Gem::Requirement
59
- none: false
60
85
  requirements:
61
- - - ! '>='
86
+ - - ">="
62
87
  - !ruby/object:Gem::Version
63
88
  version: '0'
64
89
  requirements: []
65
90
  rubyforge_project: feedbag
66
- rubygems_version: 1.8.23
91
+ rubygems_version: 2.2.2
67
92
  signing_key:
68
- specification_version: 3
93
+ specification_version: 4
69
94
  summary: Ruby's favorite feed auto-discovery tool
70
95
  test_files: []