gozap_rss 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/gozap_rss/chouti_rss.rb +41 -17
- data/lib/gozap_rss/gozap_ext.rb +3 -2
- data/lib/gozap_rss/version.rb +1 -1
- data/lib/gozap_rss.rb +0 -1
- data/spec/alpha.xml +738 -0
- data/spec/gozap_rss_spec.rb +11 -1
- data/spec/netease_top.xml +449 -0
- metadata +5 -3
data/lib/gozap_rss/chouti_rss.rb
CHANGED
@@ -2,6 +2,26 @@
|
|
2
2
|
|
3
3
|
module GozapRss
|
4
4
|
|
5
|
+
class Error < StandardError; end
|
6
|
+
|
7
|
+
class GozapHttpTimeOutError < Error
|
8
|
+
def initialize(url)
|
9
|
+
super("http get #{url} get timeout")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class GozapHttpStatusError < Error
|
14
|
+
def initialize(url, code, msg)
|
15
|
+
super("http get #{url} return response_code is #{code} and error is #{msg}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class GozapHttpReceiveError < Error
|
20
|
+
def initialize(url, msg)
|
21
|
+
super("http get #{url} error is #{msg}")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
5
25
|
|
6
26
|
class ChoutiRssBase
|
7
27
|
def self.logger
|
@@ -41,12 +61,13 @@ module GozapRss
|
|
41
61
|
|
42
62
|
class ChoutiRss < ChoutiRssBase
|
43
63
|
|
44
|
-
attr_reader :
|
64
|
+
attr_reader :items
|
45
65
|
|
46
66
|
def initialize uri
|
47
67
|
@http_headers_option = {"User-Agent" => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
|
48
68
|
@url = uri
|
49
|
-
@
|
69
|
+
@items = []
|
70
|
+
@ttl = 120
|
50
71
|
content = get_feed_content uri
|
51
72
|
parse_rss(content)
|
52
73
|
end
|
@@ -60,14 +81,11 @@ module GozapRss
|
|
60
81
|
rss = RSS::Parser.parse(content, false)
|
61
82
|
@title = rss.channel.title.to_s.html_format
|
62
83
|
@description = rss.channel.description.to_s.html_format
|
63
|
-
@
|
64
|
-
@ttl = rss.channel.ttl.to_i == 0 ? 10 * 60 : rss.channel.ttl.to_i * 60
|
65
|
-
@rss_items = []
|
84
|
+
@ttl = rss.channel.ttl.to_i * 60 if (rss.channel.respond_to?(:ttl) && rss.channel.ttl.to_i > 0)
|
66
85
|
rss.items.each do |item|
|
67
86
|
rss_item = ChoutiRssItem.new(item)
|
68
|
-
@
|
87
|
+
@items << rss_item if rss_item
|
69
88
|
end
|
70
|
-
@rss_items.sort! { |a, b| b.pub_date <=>a.pub_date }
|
71
89
|
rescue Exception => e
|
72
90
|
logger_exception e
|
73
91
|
end
|
@@ -79,26 +97,35 @@ module GozapRss
|
|
79
97
|
def get_feed_content uri
|
80
98
|
|
81
99
|
content = ""
|
100
|
+
@retry = 3
|
82
101
|
begin
|
83
102
|
response = Typhoeus::Request.get(uri,
|
84
103
|
:headers['User-Agent'] => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7"\
|
85
104
|
"(KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7",
|
86
|
-
:timeout =>
|
105
|
+
:timeout => 30000,
|
87
106
|
:max_redirects => 3,
|
88
107
|
:follow_location => true
|
89
108
|
)
|
90
109
|
|
91
110
|
if response.success? || (response.code < 400 && response.code >= 301)
|
92
111
|
content = response.body
|
93
|
-
isutf8 = Kconv.isutf8(content)
|
94
|
-
content =
|
112
|
+
#isutf8 = Kconv.isutf8(content)
|
113
|
+
#content = content.encode("UTF-8", "GB2312") unless isutf8
|
95
114
|
elsif response.timed_out?
|
96
|
-
|
115
|
+
raise GozapHttpTimeOutError.new(uri)
|
97
116
|
elsif response.code == 0
|
98
|
-
|
117
|
+
raise GozapHttpReceiveError.new(uri,response.curl_error_message)
|
99
118
|
else
|
100
|
-
|
119
|
+
rails GozapHttpStatusError.new(uri, response.code, response.curl_error_message)
|
101
120
|
end
|
121
|
+
rescue GozapHttpTimeOutError => e
|
122
|
+
logger_exception e
|
123
|
+
logger.info "#{@retry} #{uri}"
|
124
|
+
retry if (@retry -= 1) > 0
|
125
|
+
rescue GozapHttpReceiveError => e
|
126
|
+
logger_exception e
|
127
|
+
logger.info "retry #{@retry} #{uri}"
|
128
|
+
retry if (@retry -= 1) > 0
|
102
129
|
rescue Exception => e
|
103
130
|
logger_exception e
|
104
131
|
end
|
@@ -109,16 +136,13 @@ module GozapRss
|
|
109
136
|
|
110
137
|
|
111
138
|
class ChoutiRssItem < ChoutiRssBase
|
112
|
-
attr_reader :url_md5
|
113
139
|
|
114
140
|
def initialize item
|
115
141
|
@title = item.title.to_s.html_format
|
116
|
-
@pub_date = item.pubDate || item.lastBuildDate
|
117
142
|
@description = item.description.to_s.html_format
|
118
143
|
@url = item.link.to_s.strip
|
119
|
-
@url_md5 = Digest::MD5.hexdigest(@url)
|
120
144
|
unless validate
|
121
|
-
logger.error "parser item error -- title=>#{@title},
|
145
|
+
logger.error "parser item error -- title=>#{@title}, url=>#{@url}"
|
122
146
|
return nil
|
123
147
|
end
|
124
148
|
self
|
data/lib/gozap_rss/gozap_ext.rb
CHANGED
data/lib/gozap_rss/version.rb
CHANGED