feedbag 0.9.2 → 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.markdown +8 -9
- data/lib/feedbag.rb +171 -164
- metadata +44 -19
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 13f1c9e29ccb4e1c9affd106d1a7dacb641ebecf
|
4
|
+
data.tar.gz: 0b3ed450ad3fe07bca31cc120429092809d2bbdf
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 03c341cc2fca2caf681ab6086b008498622ab639f3fb751ffe12a54c5a34e04f9d1c3c2cc145dcd25681dff02f16bbbe29e8149cd690429ede6159df6fc42ac0
|
7
|
+
data.tar.gz: 80b23d8060a4721b904e294196a1359f6d63efdcdac73d5ba715ccd0aff250713919f03fb28e7514d13436342cec6ccf3e1c42cb29cacb94c379dbf753484ad0
|
data/README.markdown
CHANGED
@@ -14,24 +14,23 @@ Feedbag is Ruby's favorite auto-discovery tool/library!
|
|
14
14
|
>> Feedbag.feed?("http://jobs.perl.org/rss/standard.rss")
|
15
15
|
=> true
|
16
16
|
|
17
|
-
You can also use an installed command line tool for quick queries, if you install the gem:
|
18
|
-
|
19
|
-
$ feedbag http://rubygems.org/profiles/damog
|
20
|
-
== http://rubygems.org/profiles/damog:
|
21
|
-
- http://feeds.feedburner.com/gemcutter-latest
|
22
|
-
|
23
|
-
|
24
17
|
### Installation
|
25
18
|
|
26
|
-
$
|
19
|
+
$ gem install feedbag
|
27
20
|
|
28
21
|
Or just grab feedbag.rb and use it on your own project:
|
29
22
|
|
30
23
|
$ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb
|
31
24
|
|
25
|
+
You can also use the command line tool for quick queries, if you install the gem:
|
26
|
+
|
27
|
+
$ feedbag http://rubygems.org/profiles/damog
|
28
|
+
== http://rubygems.org/profiles/damog:
|
29
|
+
- http://feeds.feedburner.com/gemcutter-latest
|
30
|
+
|
32
31
|
### Why should you use it?
|
33
32
|
|
34
|
-
- Because it only uses [
|
33
|
+
- Because it only uses [Nokogiri](http://nokogiri.org/) as dependency.
|
35
34
|
- Because it follows modern feed filename conventions (like those ones used by WordPress blogs, or Blogger, etc).
|
36
35
|
- Because it's a single file you can embed easily in your application.
|
37
36
|
- Because it's faster than rfeedfinder.
|
data/lib/feedbag.rb
CHANGED
@@ -22,180 +22,187 @@
|
|
22
22
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
23
|
|
24
24
|
require "rubygems"
|
25
|
-
require "
|
25
|
+
require "nokogiri"
|
26
26
|
require "open-uri"
|
27
27
|
require "net/http"
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
29
|
+
class Feedbag
|
30
|
+
|
31
|
+
CONTENT_TYPES = [
|
32
|
+
'application/x.atom+xml',
|
33
|
+
'application/atom+xml',
|
34
|
+
'application/xml',
|
35
|
+
'text/xml',
|
36
|
+
'application/rss+xml',
|
37
|
+
'application/rdf+xml',
|
38
|
+
].freeze
|
39
|
+
|
40
|
+
def self.feed?(url)
|
41
|
+
new.feed?(url)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.find(url, args = {})
|
45
|
+
new.find(url, args = {})
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize
|
49
|
+
@feeds = []
|
50
|
+
end
|
51
|
+
|
52
|
+
def feed?(url)
|
53
|
+
# use LWR::Simple.normalize some time
|
54
|
+
url_uri = URI.parse(url)
|
55
|
+
url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
|
56
|
+
url << "?#{url_uri.query}" if url_uri.query
|
57
|
+
|
58
|
+
# hack:
|
59
|
+
url.sub!(/^feed:\/\//, 'http://')
|
60
|
+
|
61
|
+
res = Feedbag.find(url)
|
62
|
+
if res.size == 1 and res.first == url
|
63
|
+
return true
|
64
|
+
else
|
65
|
+
return false
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def find(url, args = {})
|
70
|
+
url_uri = URI.parse(url)
|
71
|
+
url = nil
|
72
|
+
if url_uri.scheme.nil?
|
73
|
+
url = "http://#{url_uri.to_s}"
|
74
|
+
elsif url_uri.scheme == "feed"
|
75
|
+
return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil)
|
76
|
+
else
|
77
|
+
url = url_uri.to_s
|
78
|
+
end
|
79
|
+
#url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
|
80
|
+
|
81
|
+
# check if feed_valid is avail
|
75
82
|
begin
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
require "feed_validator"
|
84
|
+
v = W3C::FeedValidator.new
|
85
|
+
v.validate_url(url)
|
86
|
+
return self.add_feed(url, nil) if v.valid?
|
87
|
+
rescue LoadError
|
88
|
+
# scoo
|
89
|
+
rescue REXML::ParseException
|
90
|
+
# usually indicates timeout
|
91
|
+
# TODO: actually find out timeout. use Terminator?
|
92
|
+
# $stderr.puts "Feed looked like feed but might not have passed validation or timed out"
|
86
93
|
rescue => ex
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
94
|
+
$stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
|
95
|
+
end
|
96
|
+
|
97
|
+
begin
|
98
|
+
html = open(url) do |f|
|
99
|
+
content_type = f.content_type.downcase
|
100
|
+
if content_type == "application/octet-stream" # open failed
|
101
|
+
content_type = f.meta["content-type"].gsub(/;.*$/, '')
|
102
|
+
end
|
103
|
+
if CONTENT_TYPES.include?(content_type)
|
104
|
+
return self.add_feed(url, nil)
|
105
|
+
end
|
106
|
+
|
107
|
+
doc = Nokogiri::HTML(f.read)
|
108
|
+
|
109
|
+
if doc.at("base") and doc.at("base")["href"]
|
110
|
+
@base_uri = doc.at("base")["href"]
|
111
|
+
else
|
112
|
+
@base_uri = nil
|
113
|
+
end
|
114
|
+
|
115
|
+
# first with links
|
109
116
|
(doc/"atom:link").each do |l|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
117
|
+
next unless l["rel"]
|
118
|
+
if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self"
|
119
|
+
self.add_feed(l["href"], url, @base_uri)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
(doc/"link").each do |l|
|
124
|
+
next unless l["rel"]
|
125
|
+
if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
|
126
|
+
self.add_feed(l["href"], url, @base_uri)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
(doc/"a").each do |a|
|
131
|
+
next unless a["href"]
|
132
|
+
if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/)
|
133
|
+
self.add_feed(a["href"], url, @base_uri)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
(doc/"a").each do |a|
|
138
|
+
next unless a["href"]
|
139
|
+
if self.looks_like_feed?(a["href"])
|
140
|
+
self.add_feed(a["href"], url, @base_uri)
|
141
|
+
end
|
142
|
+
end
|
136
143
|
|
137
144
|
# Added support for feeds like http://tabtimes.com/tbfeed/mashable/full.xml
|
138
145
|
if url.match(/.xml$/) and doc.root and doc.root["xml:base"] and doc.root["xml:base"].strip == url.strip
|
139
|
-
|
146
|
+
self.add_feed(url, nil)
|
140
147
|
end
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
148
|
+
end
|
149
|
+
rescue Timeout::Error => err
|
150
|
+
$stderr.puts "Timeout error occurred with `#{url}: #{err}'"
|
151
|
+
rescue OpenURI::HTTPError => the_error
|
152
|
+
$stderr.puts "Error occurred with `#{url}': #{the_error}"
|
153
|
+
rescue SocketError => err
|
154
|
+
$stderr.puts "Socket error occurred with: `#{url}': #{err}"
|
155
|
+
rescue => ex
|
156
|
+
$stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
|
157
|
+
ensure
|
158
|
+
return @feeds
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
|
163
|
+
def looks_like_feed?(url)
|
164
|
+
if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
|
165
|
+
true
|
166
|
+
else
|
167
|
+
false
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def add_feed(feed_url, orig_url, base_uri = nil)
|
172
|
+
# puts "#{feed_url} - #{orig_url}"
|
173
|
+
url = feed_url.sub(/^feed:/, '').strip
|
174
|
+
|
175
|
+
if base_uri
|
176
|
+
# url = base_uri + feed_url
|
177
|
+
url = URI.parse(base_uri).merge(feed_url).to_s
|
178
|
+
end
|
179
|
+
|
180
|
+
begin
|
181
|
+
uri = URI.parse(url)
|
182
|
+
rescue
|
183
|
+
puts "Error with `#{url}'"
|
184
|
+
exit 1
|
185
|
+
end
|
186
|
+
unless uri.absolute?
|
187
|
+
orig = URI.parse(orig_url)
|
188
|
+
url = orig.merge(url).to_s
|
189
|
+
end
|
190
|
+
|
191
|
+
# verify url is really valid
|
192
|
+
@feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url)
|
193
|
+
end
|
194
|
+
|
195
|
+
# not used. yet.
|
196
|
+
def _is_http_valid(uri, orig_url)
|
197
|
+
req = Net::HTTP.get_response(uri)
|
198
|
+
orig_uri = URI.parse(orig_url)
|
199
|
+
case req
|
200
|
+
when Net::HTTPSuccess then
|
201
|
+
return true
|
202
|
+
else
|
203
|
+
return false
|
204
|
+
end
|
205
|
+
end
|
199
206
|
end
|
200
207
|
|
201
208
|
if __FILE__ == $0
|
metadata
CHANGED
@@ -1,32 +1,58 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedbag
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
5
|
-
prerelease:
|
4
|
+
version: 0.9.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- David Moreno
|
8
|
+
- Derek Willis
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
date: 2013-12-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: nokogiri
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
17
|
requirements:
|
19
|
-
- -
|
18
|
+
- - ">="
|
20
19
|
- !ruby/object:Gem::Version
|
21
|
-
version: '0
|
20
|
+
version: '0'
|
22
21
|
type: :runtime
|
23
22
|
prerelease: false
|
24
23
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
24
|
requirements:
|
27
|
-
- -
|
25
|
+
- - ">="
|
28
26
|
- !ruby/object:Gem::Version
|
29
|
-
version: '0
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: shoulda
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: mocha
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 0.12.0
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 0.12.0
|
30
56
|
description: Ruby's favorite feed auto-discoverty tool
|
31
57
|
email: david@axiombox.com
|
32
58
|
executables:
|
@@ -36,35 +62,34 @@ extra_rdoc_files:
|
|
36
62
|
- README.markdown
|
37
63
|
- COPYING
|
38
64
|
files:
|
39
|
-
-
|
65
|
+
- COPYING
|
66
|
+
- README.markdown
|
40
67
|
- benchmark/rfeedfinder_benchmark.rb
|
41
68
|
- bin/feedbag
|
42
|
-
-
|
43
|
-
- COPYING
|
69
|
+
- lib/feedbag.rb
|
44
70
|
homepage: http://github.com/damog/feedbag
|
45
71
|
licenses: []
|
72
|
+
metadata: {}
|
46
73
|
post_install_message:
|
47
74
|
rdoc_options:
|
48
|
-
- --main
|
75
|
+
- "--main"
|
49
76
|
- README.markdown
|
50
77
|
require_paths:
|
51
78
|
- lib
|
52
79
|
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
-
none: false
|
54
80
|
requirements:
|
55
|
-
- -
|
81
|
+
- - ">="
|
56
82
|
- !ruby/object:Gem::Version
|
57
83
|
version: '0'
|
58
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
|
-
none: false
|
60
85
|
requirements:
|
61
|
-
- -
|
86
|
+
- - ">="
|
62
87
|
- !ruby/object:Gem::Version
|
63
88
|
version: '0'
|
64
89
|
requirements: []
|
65
90
|
rubyforge_project: feedbag
|
66
|
-
rubygems_version:
|
91
|
+
rubygems_version: 2.2.2
|
67
92
|
signing_key:
|
68
|
-
specification_version:
|
93
|
+
specification_version: 4
|
69
94
|
summary: Ruby's favorite feed auto-discovery tool
|
70
95
|
test_files: []
|