logophobia-feedbag 0.5.102 → 0.5.103
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/feedbag.rb +32 -29
- metadata +1 -1
data/lib/feedbag.rb
CHANGED
|
@@ -20,6 +20,7 @@ require "rubygems"
|
|
|
20
20
|
require "hpricot"
|
|
21
21
|
require "open-uri"
|
|
22
22
|
require "net/http"
|
|
23
|
+
require 'timeout'
|
|
23
24
|
|
|
24
25
|
module Feedbag
|
|
25
26
|
Feed = Struct.new(:url, :title)
|
|
@@ -86,36 +87,38 @@ module Feedbag
|
|
|
86
87
|
end
|
|
87
88
|
|
|
88
89
|
begin
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
90
|
+
Timeout::timeout(10) do
|
|
91
|
+
open(url) do |f|
|
|
92
|
+
if @content_types.include?(f.content_type.downcase)
|
|
93
|
+
return self.add_feed(url, nil)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
doc = Hpricot(f.read)
|
|
97
|
+
|
|
98
|
+
if doc.at("base") and doc.at("base")["href"]
|
|
99
|
+
$base_uri = doc.at("base")["href"]
|
|
100
|
+
else
|
|
101
|
+
$base_uri = nil
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# first with links
|
|
105
|
+
(doc/"link").each do |l|
|
|
106
|
+
next unless l["rel"]
|
|
107
|
+
if l["type"] and @content_types.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
|
|
108
|
+
self.add_feed(l["href"], url, $base_uri, l["title"])
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
unless args[:narrow]
|
|
113
|
+
(doc/"a").each do |a|
|
|
114
|
+
next unless a["href"]
|
|
115
|
+
if self.looks_like_feed?(a["href"])
|
|
116
|
+
self.add_feed(a["href"], url, $base_uri, a["title"] || a.inner_html || a['alt']) # multiple fallbacks, first title, then the tag content, then the alt tag (in case of image)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
117
120
|
end
|
|
118
|
-
|
|
121
|
+
end
|
|
119
122
|
rescue Timeout::Error => err
|
|
120
123
|
$stderr.puts "Timeout error ocurred with `#{url}: #{err}'"
|
|
121
124
|
rescue OpenURI::HTTPError => the_error
|