feedbag 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/README.markdown +8 -9
  3. data/lib/feedbag.rb +171 -164
  4. metadata +44 -19
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 13f1c9e29ccb4e1c9affd106d1a7dacb641ebecf
4
+ data.tar.gz: 0b3ed450ad3fe07bca31cc120429092809d2bbdf
5
+ SHA512:
6
+ metadata.gz: 03c341cc2fca2caf681ab6086b008498622ab639f3fb751ffe12a54c5a34e04f9d1c3c2cc145dcd25681dff02f16bbbe29e8149cd690429ede6159df6fc42ac0
7
+ data.tar.gz: 80b23d8060a4721b904e294196a1359f6d63efdcdac73d5ba715ccd0aff250713919f03fb28e7514d13436342cec6ccf3e1c42cb29cacb94c379dbf753484ad0
@@ -14,24 +14,23 @@ Feedbag is Ruby's favorite auto-discovery tool/library!
14
14
  >> Feedbag.feed?("http://jobs.perl.org/rss/standard.rss")
15
15
  => true
16
16
 
17
- You can also use an installed command line tool for quick queries, if you install the gem:
18
-
19
- $ feedbag http://rubygems.org/profiles/damog
20
- == http://rubygems.org/profiles/damog:
21
- - http://feeds.feedburner.com/gemcutter-latest
22
-
23
-
24
17
  ### Installation
25
18
 
26
- $ sudo gem install feedbag
19
+ $ gem install feedbag
27
20
 
28
21
  Or just grab feedbag.rb and use it on your own project:
29
22
 
30
23
  $ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb
31
24
 
25
+ You can also use the command line tool for quick queries, if you install the gem:
26
+
27
+ $ feedbag http://rubygems.org/profiles/damog
28
+ == http://rubygems.org/profiles/damog:
29
+ - http://feeds.feedburner.com/gemcutter-latest
30
+
32
31
  ### Why should you use it?
33
32
 
34
- - Because it only uses [Hpricot](https://code.whytheluckystiff.net/hpricot/) as dependency.
33
+ - Because it only uses [Nokogiri](http://nokogiri.org/) as dependency.
35
34
  - Because it follows modern feed filename conventions (like those ones used by WordPress blogs, or Blogger, etc).
36
35
  - Because it's a single file you can embed easily in your application.
37
36
  - Because it's faster than rfeedfinder.
@@ -22,180 +22,187 @@
22
22
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
23
 
24
24
  require "rubygems"
25
- require "hpricot"
25
+ require "nokogiri"
26
26
  require "open-uri"
27
27
  require "net/http"
28
28
 
29
- module Feedbag
30
-
31
- @content_types = [
32
- 'application/x.atom+xml',
33
- 'application/atom+xml',
34
- 'application/xml',
35
- 'text/xml',
36
- 'application/rss+xml',
37
- 'application/rdf+xml',
38
- ]
39
-
40
- $feeds = []
41
- $base_uri = nil
42
-
43
- def self.feed?(url)
44
- # use LWR::Simple.normalize some time
45
- url_uri = URI.parse(url)
46
- url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
47
- url << "?#{url_uri.query}" if url_uri.query
48
-
49
- # hack:
50
- url.sub!(/^feed:\/\//, 'http://')
51
-
52
- res = self.find(url)
53
- if res.size == 1 and res.first == url
54
- return true
55
- else
56
- return false
57
- end
58
- end
59
-
60
- def self.find(url, args = {})
61
- $feeds = []
62
-
63
- url_uri = URI.parse(url)
64
- url = nil
65
- if url_uri.scheme.nil?
66
- url = "http://#{url_uri.to_s}"
67
- elsif url_uri.scheme == "feed"
68
- return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil)
69
- else
70
- url = url_uri.to_s
71
- end
72
- #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
73
-
74
- # check if feed_valid is avail
29
+ class Feedbag
30
+
31
+ CONTENT_TYPES = [
32
+ 'application/x.atom+xml',
33
+ 'application/atom+xml',
34
+ 'application/xml',
35
+ 'text/xml',
36
+ 'application/rss+xml',
37
+ 'application/rdf+xml',
38
+ ].freeze
39
+
40
+ def self.feed?(url)
41
+ new.feed?(url)
42
+ end
43
+
44
+ def self.find(url, args = {})
45
+ new.find(url, args = {})
46
+ end
47
+
48
+ def initialize
49
+ @feeds = []
50
+ end
51
+
52
+ def feed?(url)
53
+ # use LWR::Simple.normalize some time
54
+ url_uri = URI.parse(url)
55
+ url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
56
+ url << "?#{url_uri.query}" if url_uri.query
57
+
58
+ # hack:
59
+ url.sub!(/^feed:\/\//, 'http://')
60
+
61
+ res = Feedbag.find(url)
62
+ if res.size == 1 and res.first == url
63
+ return true
64
+ else
65
+ return false
66
+ end
67
+ end
68
+
69
+ def find(url, args = {})
70
+ url_uri = URI.parse(url)
71
+ url = nil
72
+ if url_uri.scheme.nil?
73
+ url = "http://#{url_uri.to_s}"
74
+ elsif url_uri.scheme == "feed"
75
+ return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil)
76
+ else
77
+ url = url_uri.to_s
78
+ end
79
+ #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
80
+
81
+ # check if feed_valid is avail
75
82
  begin
76
- require "feed_validator"
77
- v = W3C::FeedValidator.new
78
- v.validate_url(url)
79
- return self.add_feed(url, nil) if v.valid?
80
- rescue LoadError
81
- # scoo
82
- rescue REXML::ParseException
83
- # usually indicates timeout
84
- # TODO: actually find out timeout. use Terminator?
85
- # $stderr.puts "Feed looked like feed but might not have passed validation or timed out"
83
+ require "feed_validator"
84
+ v = W3C::FeedValidator.new
85
+ v.validate_url(url)
86
+ return self.add_feed(url, nil) if v.valid?
87
+ rescue LoadError
88
+ # scoo
89
+ rescue REXML::ParseException
90
+ # usually indicates timeout
91
+ # TODO: actually find out timeout. use Terminator?
92
+ # $stderr.puts "Feed looked like feed but might not have passed validation or timed out"
86
93
  rescue => ex
87
- $stderr.puts "#{ex.class} error ocurred with: `#{url}': #{ex.message}"
88
- end
89
-
90
- begin
91
- html = open(url) do |f|
92
- content_type = f.content_type.downcase
93
- if content_type == "application/octet-stream" # open failed
94
- content_type = f.meta["content-type"].gsub(/;.*$/, '')
95
- end
96
- if @content_types.include?(content_type)
97
- return self.add_feed(url, nil)
98
- end
99
-
100
- doc = Hpricot(f.read)
101
-
102
- if doc.at("base") and doc.at("base")["href"]
103
- $base_uri = doc.at("base")["href"]
104
- else
105
- $base_uri = nil
106
- end
107
-
108
- # first with links
94
+ $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
95
+ end
96
+
97
+ begin
98
+ html = open(url) do |f|
99
+ content_type = f.content_type.downcase
100
+ if content_type == "application/octet-stream" # open failed
101
+ content_type = f.meta["content-type"].gsub(/;.*$/, '')
102
+ end
103
+ if CONTENT_TYPES.include?(content_type)
104
+ return self.add_feed(url, nil)
105
+ end
106
+
107
+ doc = Nokogiri::HTML(f.read)
108
+
109
+ if doc.at("base") and doc.at("base")["href"]
110
+ @base_uri = doc.at("base")["href"]
111
+ else
112
+ @base_uri = nil
113
+ end
114
+
115
+ # first with links
109
116
  (doc/"atom:link").each do |l|
110
- next unless l["rel"]
111
- if l["type"] and @content_types.include?(l["type"].downcase.strip) and l["rel"].downcase == "self"
112
- self.add_feed(l["href"], url, $base_uri)
113
- end
114
- end
115
-
116
- (doc/"link").each do |l|
117
- next unless l["rel"]
118
- if l["type"] and @content_types.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
119
- self.add_feed(l["href"], url, $base_uri)
120
- end
121
- end
122
-
123
- (doc/"a").each do |a|
124
- next unless a["href"]
125
- if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/)
126
- self.add_feed(a["href"], url, $base_uri)
127
- end
128
- end
129
-
130
- (doc/"a").each do |a|
131
- next unless a["href"]
132
- if self.looks_like_feed?(a["href"])
133
- self.add_feed(a["href"], url, $base_uri)
134
- end
135
- end
117
+ next unless l["rel"]
118
+ if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self"
119
+ self.add_feed(l["href"], url, @base_uri)
120
+ end
121
+ end
122
+
123
+ (doc/"link").each do |l|
124
+ next unless l["rel"]
125
+ if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
126
+ self.add_feed(l["href"], url, @base_uri)
127
+ end
128
+ end
129
+
130
+ (doc/"a").each do |a|
131
+ next unless a["href"]
132
+ if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/)
133
+ self.add_feed(a["href"], url, @base_uri)
134
+ end
135
+ end
136
+
137
+ (doc/"a").each do |a|
138
+ next unless a["href"]
139
+ if self.looks_like_feed?(a["href"])
140
+ self.add_feed(a["href"], url, @base_uri)
141
+ end
142
+ end
136
143
 
137
144
  # Added support for feeds like http://tabtimes.com/tbfeed/mashable/full.xml
138
145
  if url.match(/.xml$/) and doc.root and doc.root["xml:base"] and doc.root["xml:base"].strip == url.strip
139
- self.add_feed(url, nil)
146
+ self.add_feed(url, nil)
140
147
  end
141
- end
142
- rescue Timeout::Error => err
143
- $stderr.puts "Timeout error ocurred with `#{url}: #{err}'"
144
- rescue OpenURI::HTTPError => the_error
145
- $stderr.puts "Error ocurred with `#{url}': #{the_error}"
146
- rescue SocketError => err
147
- $stderr.puts "Socket error ocurred with: `#{url}': #{err}"
148
- rescue => ex
149
- $stderr.puts "#{ex.class} error ocurred with: `#{url}': #{ex.message}"
150
- ensure
151
- return $feeds
152
- end
153
-
154
- end
155
-
156
- def self.looks_like_feed?(url)
157
- if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
158
- true
159
- else
160
- false
161
- end
162
- end
163
-
164
- def self.add_feed(feed_url, orig_url, base_uri = nil)
165
- # puts "#{feed_url} - #{orig_url}"
166
- url = feed_url.sub(/^feed:/, '').strip
167
-
168
- if base_uri
169
- # url = base_uri + feed_url
170
- url = URI.parse(base_uri).merge(feed_url).to_s
171
- end
172
-
173
- begin
174
- uri = URI.parse(url)
175
- rescue
176
- puts "Error with `#{url}'"
177
- exit 1
178
- end
179
- unless uri.absolute?
180
- orig = URI.parse(orig_url)
181
- url = orig.merge(url).to_s
182
- end
183
-
184
- # verify url is really valid
185
- $feeds.push(url) unless $feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url)
186
- end
187
-
188
- # not used. yet.
189
- def self._is_http_valid(uri, orig_url)
190
- req = Net::HTTP.get_response(uri)
191
- orig_uri = URI.parse(orig_url)
192
- case req
193
- when Net::HTTPSuccess then
194
- return true
195
- else
196
- return false
197
- end
198
- end
148
+ end
149
+ rescue Timeout::Error => err
150
+ $stderr.puts "Timeout error occurred with `#{url}: #{err}'"
151
+ rescue OpenURI::HTTPError => the_error
152
+ $stderr.puts "Error occurred with `#{url}': #{the_error}"
153
+ rescue SocketError => err
154
+ $stderr.puts "Socket error occurred with: `#{url}': #{err}"
155
+ rescue => ex
156
+ $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
157
+ ensure
158
+ return @feeds
159
+ end
160
+
161
+ end
162
+
163
+ def looks_like_feed?(url)
164
+ if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
165
+ true
166
+ else
167
+ false
168
+ end
169
+ end
170
+
171
+ def add_feed(feed_url, orig_url, base_uri = nil)
172
+ # puts "#{feed_url} - #{orig_url}"
173
+ url = feed_url.sub(/^feed:/, '').strip
174
+
175
+ if base_uri
176
+ # url = base_uri + feed_url
177
+ url = URI.parse(base_uri).merge(feed_url).to_s
178
+ end
179
+
180
+ begin
181
+ uri = URI.parse(url)
182
+ rescue
183
+ puts "Error with `#{url}'"
184
+ exit 1
185
+ end
186
+ unless uri.absolute?
187
+ orig = URI.parse(orig_url)
188
+ url = orig.merge(url).to_s
189
+ end
190
+
191
+ # verify url is really valid
192
+ @feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url)
193
+ end
194
+
195
+ # not used. yet.
196
+ def _is_http_valid(uri, orig_url)
197
+ req = Net::HTTP.get_response(uri)
198
+ orig_uri = URI.parse(orig_url)
199
+ case req
200
+ when Net::HTTPSuccess then
201
+ return true
202
+ else
203
+ return false
204
+ end
205
+ end
199
206
  end
200
207
 
201
208
  if __FILE__ == $0
metadata CHANGED
@@ -1,32 +1,58 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedbag
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
5
- prerelease:
4
+ version: 0.9.3
6
5
  platform: ruby
7
6
  authors:
8
7
  - David Moreno
8
+ - Derek Willis
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
  date: 2013-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: hpricot
15
+ name: nokogiri
16
16
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
17
  requirements:
19
- - - ! '>='
18
+ - - ">="
20
19
  - !ruby/object:Gem::Version
21
- version: '0.6'
20
+ version: '0'
22
21
  type: :runtime
23
22
  prerelease: false
24
23
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
24
  requirements:
27
- - - ! '>='
25
+ - - ">="
28
26
  - !ruby/object:Gem::Version
29
- version: '0.6'
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: shoulda
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: mocha
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: 0.12.0
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: 0.12.0
30
56
  description: Ruby's favorite feed auto-discoverty tool
31
57
  email: david@axiombox.com
32
58
  executables:
@@ -36,35 +62,34 @@ extra_rdoc_files:
36
62
  - README.markdown
37
63
  - COPYING
38
64
  files:
39
- - lib/feedbag.rb
65
+ - COPYING
66
+ - README.markdown
40
67
  - benchmark/rfeedfinder_benchmark.rb
41
68
  - bin/feedbag
42
- - README.markdown
43
- - COPYING
69
+ - lib/feedbag.rb
44
70
  homepage: http://github.com/damog/feedbag
45
71
  licenses: []
72
+ metadata: {}
46
73
  post_install_message:
47
74
  rdoc_options:
48
- - --main
75
+ - "--main"
49
76
  - README.markdown
50
77
  require_paths:
51
78
  - lib
52
79
  required_ruby_version: !ruby/object:Gem::Requirement
53
- none: false
54
80
  requirements:
55
- - - ! '>='
81
+ - - ">="
56
82
  - !ruby/object:Gem::Version
57
83
  version: '0'
58
84
  required_rubygems_version: !ruby/object:Gem::Requirement
59
- none: false
60
85
  requirements:
61
- - - ! '>='
86
+ - - ">="
62
87
  - !ruby/object:Gem::Version
63
88
  version: '0'
64
89
  requirements: []
65
90
  rubyforge_project: feedbag
66
- rubygems_version: 1.8.23
91
+ rubygems_version: 2.2.2
67
92
  signing_key:
68
- specification_version: 3
93
+ specification_version: 4
69
94
  summary: Ruby's favorite feed auto-discovery tool
70
95
  test_files: []