gozap_rss 0.0.4 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/gozap_rss/chouti_rss.rb +41 -17
- data/lib/gozap_rss/gozap_ext.rb +3 -2
- data/lib/gozap_rss/version.rb +1 -1
- data/lib/gozap_rss.rb +0 -1
- data/spec/alpha.xml +738 -0
- data/spec/gozap_rss_spec.rb +11 -1
- data/spec/netease_top.xml +449 -0
- metadata +5 -3
data/lib/gozap_rss/chouti_rss.rb
CHANGED
@@ -2,6 +2,26 @@
|
|
2
2
|
|
3
3
|
module GozapRss
|
4
4
|
|
5
|
+
class Error < StandardError; end
|
6
|
+
|
7
|
+
class GozapHttpTimeOutError < Error
|
8
|
+
def initialize(url)
|
9
|
+
super("http get #{url} get timeout")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class GozapHttpStatusError < Error
|
14
|
+
def initialize(url, code, msg)
|
15
|
+
super("http get #{url} return response_code is #{code} and error is #{msg}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class GozapHttpReceiveError < Error
|
20
|
+
def initialize(url, msg)
|
21
|
+
super("http get #{url} error is #{msg}")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
5
25
|
|
6
26
|
class ChoutiRssBase
|
7
27
|
def self.logger
|
@@ -41,12 +61,13 @@ module GozapRss
|
|
41
61
|
|
42
62
|
class ChoutiRss < ChoutiRssBase
|
43
63
|
|
44
|
-
attr_reader :
|
64
|
+
attr_reader :items
|
45
65
|
|
46
66
|
def initialize uri
|
47
67
|
@http_headers_option = {"User-Agent" => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
|
48
68
|
@url = uri
|
49
|
-
@
|
69
|
+
@items = []
|
70
|
+
@ttl = 120
|
50
71
|
content = get_feed_content uri
|
51
72
|
parse_rss(content)
|
52
73
|
end
|
@@ -60,14 +81,11 @@ module GozapRss
|
|
60
81
|
rss = RSS::Parser.parse(content, false)
|
61
82
|
@title = rss.channel.title.to_s.html_format
|
62
83
|
@description = rss.channel.description.to_s.html_format
|
63
|
-
@
|
64
|
-
@ttl = rss.channel.ttl.to_i == 0 ? 10 * 60 : rss.channel.ttl.to_i * 60
|
65
|
-
@rss_items = []
|
84
|
+
@ttl = rss.channel.ttl.to_i * 60 if (rss.channel.respond_to?(:ttl) && rss.channel.ttl.to_i > 0)
|
66
85
|
rss.items.each do |item|
|
67
86
|
rss_item = ChoutiRssItem.new(item)
|
68
|
-
@
|
87
|
+
@items << rss_item if rss_item
|
69
88
|
end
|
70
|
-
@rss_items.sort! { |a, b| b.pub_date <=>a.pub_date }
|
71
89
|
rescue Exception => e
|
72
90
|
logger_exception e
|
73
91
|
end
|
@@ -79,26 +97,35 @@ module GozapRss
|
|
79
97
|
def get_feed_content uri
|
80
98
|
|
81
99
|
content = ""
|
100
|
+
@retry = 3
|
82
101
|
begin
|
83
102
|
response = Typhoeus::Request.get(uri,
|
84
103
|
:headers['User-Agent'] => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7"\
|
85
104
|
"(KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7",
|
86
|
-
:timeout =>
|
105
|
+
:timeout => 30000,
|
87
106
|
:max_redirects => 3,
|
88
107
|
:follow_location => true
|
89
108
|
)
|
90
109
|
|
91
110
|
if response.success? || (response.code < 400 && response.code >= 301)
|
92
111
|
content = response.body
|
93
|
-
isutf8 = Kconv.isutf8(content)
|
94
|
-
content =
|
112
|
+
#isutf8 = Kconv.isutf8(content)
|
113
|
+
#content = content.encode("UTF-8", "GB2312") unless isutf8
|
95
114
|
elsif response.timed_out?
|
96
|
-
|
115
|
+
raise GozapHttpTimeOutError.new(uri)
|
97
116
|
elsif response.code == 0
|
98
|
-
|
117
|
+
raise GozapHttpReceiveError.new(uri,response.curl_error_message)
|
99
118
|
else
|
100
|
-
|
119
|
+
rails GozapHttpStatusError.new(uri, response.code, response.curl_error_message)
|
101
120
|
end
|
121
|
+
rescue GozapHttpTimeOutError => e
|
122
|
+
logger_exception e
|
123
|
+
logger.info "#{@retry} #{uri}"
|
124
|
+
retry if (@retry -= 1) > 0
|
125
|
+
rescue GozapHttpReceiveError => e
|
126
|
+
logger_exception e
|
127
|
+
logger.info "retry #{@retry} #{uri}"
|
128
|
+
retry if (@retry -= 1) > 0
|
102
129
|
rescue Exception => e
|
103
130
|
logger_exception e
|
104
131
|
end
|
@@ -109,16 +136,13 @@ module GozapRss
|
|
109
136
|
|
110
137
|
|
111
138
|
class ChoutiRssItem < ChoutiRssBase
|
112
|
-
attr_reader :url_md5
|
113
139
|
|
114
140
|
def initialize item
|
115
141
|
@title = item.title.to_s.html_format
|
116
|
-
@pub_date = item.pubDate || item.lastBuildDate
|
117
142
|
@description = item.description.to_s.html_format
|
118
143
|
@url = item.link.to_s.strip
|
119
|
-
@url_md5 = Digest::MD5.hexdigest(@url)
|
120
144
|
unless validate
|
121
|
-
logger.error "parser item error -- title=>#{@title},
|
145
|
+
logger.error "parser item error -- title=>#{@title}, url=>#{@url}"
|
122
146
|
return nil
|
123
147
|
end
|
124
148
|
self
|
data/lib/gozap_rss/gozap_ext.rb
CHANGED
data/lib/gozap_rss/version.rb
CHANGED