feedbag 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.markdown +8 -9
- data/lib/feedbag.rb +171 -164
- metadata +44 -19
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 13f1c9e29ccb4e1c9affd106d1a7dacb641ebecf
|
4
|
+
data.tar.gz: 0b3ed450ad3fe07bca31cc120429092809d2bbdf
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 03c341cc2fca2caf681ab6086b008498622ab639f3fb751ffe12a54c5a34e04f9d1c3c2cc145dcd25681dff02f16bbbe29e8149cd690429ede6159df6fc42ac0
|
7
|
+
data.tar.gz: 80b23d8060a4721b904e294196a1359f6d63efdcdac73d5ba715ccd0aff250713919f03fb28e7514d13436342cec6ccf3e1c42cb29cacb94c379dbf753484ad0
|
data/README.markdown
CHANGED
@@ -14,24 +14,23 @@ Feedbag is Ruby's favorite auto-discovery tool/library!
|
|
14
14
|
>> Feedbag.feed?("http://jobs.perl.org/rss/standard.rss")
|
15
15
|
=> true
|
16
16
|
|
17
|
-
You can also use an installed command line tool for quick queries, if you install the gem:
|
18
|
-
|
19
|
-
$ feedbag http://rubygems.org/profiles/damog
|
20
|
-
== http://rubygems.org/profiles/damog:
|
21
|
-
- http://feeds.feedburner.com/gemcutter-latest
|
22
|
-
|
23
|
-
|
24
17
|
### Installation
|
25
18
|
|
26
|
-
$
|
19
|
+
$ gem install feedbag
|
27
20
|
|
28
21
|
Or just grab feedbag.rb and use it on your own project:
|
29
22
|
|
30
23
|
$ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb
|
31
24
|
|
25
|
+
You can also use the command line tool for quick queries, if you install the gem:
|
26
|
+
|
27
|
+
$ feedbag http://rubygems.org/profiles/damog
|
28
|
+
== http://rubygems.org/profiles/damog:
|
29
|
+
- http://feeds.feedburner.com/gemcutter-latest
|
30
|
+
|
32
31
|
### Why should you use it?
|
33
32
|
|
34
|
-
- Because it only uses [
|
33
|
+
- Because it only uses [Nokogiri](http://nokogiri.org/) as dependency.
|
35
34
|
- Because it follows modern feed filename conventions (like those ones used by WordPress blogs, or Blogger, etc).
|
36
35
|
- Because it's a single file you can embed easily in your application.
|
37
36
|
- Because it's faster than rfeedfinder.
|
data/lib/feedbag.rb
CHANGED
@@ -22,180 +22,187 @@
|
|
22
22
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
23
|
|
24
24
|
require "rubygems"
|
25
|
-
require "
|
25
|
+
require "nokogiri"
|
26
26
|
require "open-uri"
|
27
27
|
require "net/http"
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
29
|
+
class Feedbag
|
30
|
+
|
31
|
+
CONTENT_TYPES = [
|
32
|
+
'application/x.atom+xml',
|
33
|
+
'application/atom+xml',
|
34
|
+
'application/xml',
|
35
|
+
'text/xml',
|
36
|
+
'application/rss+xml',
|
37
|
+
'application/rdf+xml',
|
38
|
+
].freeze
|
39
|
+
|
40
|
+
def self.feed?(url)
|
41
|
+
new.feed?(url)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.find(url, args = {})
|
45
|
+
new.find(url, args = {})
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize
|
49
|
+
@feeds = []
|
50
|
+
end
|
51
|
+
|
52
|
+
def feed?(url)
|
53
|
+
# use LWR::Simple.normalize some time
|
54
|
+
url_uri = URI.parse(url)
|
55
|
+
url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
|
56
|
+
url << "?#{url_uri.query}" if url_uri.query
|
57
|
+
|
58
|
+
# hack:
|
59
|
+
url.sub!(/^feed:\/\//, 'http://')
|
60
|
+
|
61
|
+
res = Feedbag.find(url)
|
62
|
+
if res.size == 1 and res.first == url
|
63
|
+
return true
|
64
|
+
else
|
65
|
+
return false
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def find(url, args = {})
|
70
|
+
url_uri = URI.parse(url)
|
71
|
+
url = nil
|
72
|
+
if url_uri.scheme.nil?
|
73
|
+
url = "http://#{url_uri.to_s}"
|
74
|
+
elsif url_uri.scheme == "feed"
|
75
|
+
return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil)
|
76
|
+
else
|
77
|
+
url = url_uri.to_s
|
78
|
+
end
|
79
|
+
#url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
|
80
|
+
|
81
|
+
# check if feed_valid is avail
|
75
82
|
begin
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
require "feed_validator"
|
84
|
+
v = W3C::FeedValidator.new
|
85
|
+
v.validate_url(url)
|
86
|
+
return self.add_feed(url, nil) if v.valid?
|
87
|
+
rescue LoadError
|
88
|
+
# scoo
|
89
|
+
rescue REXML::ParseException
|
90
|
+
# usually indicates timeout
|
91
|
+
# TODO: actually find out timeout. use Terminator?
|
92
|
+
# $stderr.puts "Feed looked like feed but might not have passed validation or timed out"
|
86
93
|
rescue => ex
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
94
|
+
$stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
|
95
|
+
end
|
96
|
+
|
97
|
+
begin
|
98
|
+
html = open(url) do |f|
|
99
|
+
content_type = f.content_type.downcase
|
100
|
+
if content_type == "application/octet-stream" # open failed
|
101
|
+
content_type = f.meta["content-type"].gsub(/;.*$/, '')
|
102
|
+
end
|
103
|
+
if CONTENT_TYPES.include?(content_type)
|
104
|
+
return self.add_feed(url, nil)
|
105
|
+
end
|
106
|
+
|
107
|
+
doc = Nokogiri::HTML(f.read)
|
108
|
+
|
109
|
+
if doc.at("base") and doc.at("base")["href"]
|
110
|
+
@base_uri = doc.at("base")["href"]
|
111
|
+
else
|
112
|
+
@base_uri = nil
|
113
|
+
end
|
114
|
+
|
115
|
+
# first with links
|
109
116
|
(doc/"atom:link").each do |l|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
117
|
+
next unless l["rel"]
|
118
|
+
if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self"
|
119
|
+
self.add_feed(l["href"], url, @base_uri)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
(doc/"link").each do |l|
|
124
|
+
next unless l["rel"]
|
125
|
+
if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
|
126
|
+
self.add_feed(l["href"], url, @base_uri)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
(doc/"a").each do |a|
|
131
|
+
next unless a["href"]
|
132
|
+
if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/)
|
133
|
+
self.add_feed(a["href"], url, @base_uri)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
(doc/"a").each do |a|
|
138
|
+
next unless a["href"]
|
139
|
+
if self.looks_like_feed?(a["href"])
|
140
|
+
self.add_feed(a["href"], url, @base_uri)
|
141
|
+
end
|
142
|
+
end
|
136
143
|
|
137
144
|
# Added support for feeds like http://tabtimes.com/tbfeed/mashable/full.xml
|
138
145
|
if url.match(/.xml$/) and doc.root and doc.root["xml:base"] and doc.root["xml:base"].strip == url.strip
|
139
|
-
|
146
|
+
self.add_feed(url, nil)
|
140
147
|
end
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
148
|
+
end
|
149
|
+
rescue Timeout::Error => err
|
150
|
+
$stderr.puts "Timeout error occurred with `#{url}: #{err}'"
|
151
|
+
rescue OpenURI::HTTPError => the_error
|
152
|
+
$stderr.puts "Error occurred with `#{url}': #{the_error}"
|
153
|
+
rescue SocketError => err
|
154
|
+
$stderr.puts "Socket error occurred with: `#{url}': #{err}"
|
155
|
+
rescue => ex
|
156
|
+
$stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
|
157
|
+
ensure
|
158
|
+
return @feeds
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
|
163
|
+
def looks_like_feed?(url)
|
164
|
+
if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
|
165
|
+
true
|
166
|
+
else
|
167
|
+
false
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def add_feed(feed_url, orig_url, base_uri = nil)
|
172
|
+
# puts "#{feed_url} - #{orig_url}"
|
173
|
+
url = feed_url.sub(/^feed:/, '').strip
|
174
|
+
|
175
|
+
if base_uri
|
176
|
+
# url = base_uri + feed_url
|
177
|
+
url = URI.parse(base_uri).merge(feed_url).to_s
|
178
|
+
end
|
179
|
+
|
180
|
+
begin
|
181
|
+
uri = URI.parse(url)
|
182
|
+
rescue
|
183
|
+
puts "Error with `#{url}'"
|
184
|
+
exit 1
|
185
|
+
end
|
186
|
+
unless uri.absolute?
|
187
|
+
orig = URI.parse(orig_url)
|
188
|
+
url = orig.merge(url).to_s
|
189
|
+
end
|
190
|
+
|
191
|
+
# verify url is really valid
|
192
|
+
@feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url)
|
193
|
+
end
|
194
|
+
|
195
|
+
# not used. yet.
|
196
|
+
def _is_http_valid(uri, orig_url)
|
197
|
+
req = Net::HTTP.get_response(uri)
|
198
|
+
orig_uri = URI.parse(orig_url)
|
199
|
+
case req
|
200
|
+
when Net::HTTPSuccess then
|
201
|
+
return true
|
202
|
+
else
|
203
|
+
return false
|
204
|
+
end
|
205
|
+
end
|
199
206
|
end
|
200
207
|
|
201
208
|
if __FILE__ == $0
|
metadata
CHANGED
@@ -1,32 +1,58 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedbag
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
5
|
-
prerelease:
|
4
|
+
version: 0.9.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- David Moreno
|
8
|
+
- Derek Willis
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
date: 2013-12-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: nokogiri
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
17
|
requirements:
|
19
|
-
- -
|
18
|
+
- - ">="
|
20
19
|
- !ruby/object:Gem::Version
|
21
|
-
version: '0
|
20
|
+
version: '0'
|
22
21
|
type: :runtime
|
23
22
|
prerelease: false
|
24
23
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
24
|
requirements:
|
27
|
-
- -
|
25
|
+
- - ">="
|
28
26
|
- !ruby/object:Gem::Version
|
29
|
-
version: '0
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: shoulda
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: mocha
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 0.12.0
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 0.12.0
|
30
56
|
description: Ruby's favorite feed auto-discoverty tool
|
31
57
|
email: david@axiombox.com
|
32
58
|
executables:
|
@@ -36,35 +62,34 @@ extra_rdoc_files:
|
|
36
62
|
- README.markdown
|
37
63
|
- COPYING
|
38
64
|
files:
|
39
|
-
-
|
65
|
+
- COPYING
|
66
|
+
- README.markdown
|
40
67
|
- benchmark/rfeedfinder_benchmark.rb
|
41
68
|
- bin/feedbag
|
42
|
-
-
|
43
|
-
- COPYING
|
69
|
+
- lib/feedbag.rb
|
44
70
|
homepage: http://github.com/damog/feedbag
|
45
71
|
licenses: []
|
72
|
+
metadata: {}
|
46
73
|
post_install_message:
|
47
74
|
rdoc_options:
|
48
|
-
- --main
|
75
|
+
- "--main"
|
49
76
|
- README.markdown
|
50
77
|
require_paths:
|
51
78
|
- lib
|
52
79
|
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
-
none: false
|
54
80
|
requirements:
|
55
|
-
- -
|
81
|
+
- - ">="
|
56
82
|
- !ruby/object:Gem::Version
|
57
83
|
version: '0'
|
58
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
|
-
none: false
|
60
85
|
requirements:
|
61
|
-
- -
|
86
|
+
- - ">="
|
62
87
|
- !ruby/object:Gem::Version
|
63
88
|
version: '0'
|
64
89
|
requirements: []
|
65
90
|
rubyforge_project: feedbag
|
66
|
-
rubygems_version:
|
91
|
+
rubygems_version: 2.2.2
|
67
92
|
signing_key:
|
68
|
-
specification_version:
|
93
|
+
specification_version: 4
|
69
94
|
summary: Ruby's favorite feed auto-discovery tool
|
70
95
|
test_files: []
|