gozap_rss 0.0.4 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,6 +2,26 @@
2
2
 
3
3
  module GozapRss
4
4
 
5
+ class Error < StandardError; end
6
+
7
+ class GozapHttpTimeOutError < Error
8
+ def initialize(url)
9
+ super("http get #{url} get timeout")
10
+ end
11
+ end
12
+
13
+ class GozapHttpStatusError < Error
14
+ def initialize(url, code, msg)
15
+ super("http get #{url} return response_code is #{code} and error is #{msg}")
16
+ end
17
+ end
18
+
19
+ class GozapHttpReceiveError < Error
20
+ def initialize(url, msg)
21
+ super("http get #{url} error is #{msg}")
22
+ end
23
+ end
24
+
5
25
 
6
26
  class ChoutiRssBase
7
27
  def self.logger
@@ -41,12 +61,13 @@ module GozapRss
41
61
 
42
62
  class ChoutiRss < ChoutiRssBase
43
63
 
44
- attr_reader :rss_items
64
+ attr_reader :items
45
65
 
46
66
  def initialize uri
47
67
  @http_headers_option = {"User-Agent" => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
48
68
  @url = uri
49
- @rss_items = []
69
+ @items = []
70
+ @ttl = 120
50
71
  content = get_feed_content uri
51
72
  parse_rss(content)
52
73
  end
@@ -60,14 +81,11 @@ module GozapRss
60
81
  rss = RSS::Parser.parse(content, false)
61
82
  @title = rss.channel.title.to_s.html_format
62
83
  @description = rss.channel.description.to_s.html_format
63
- @pub_date = rss.channel.pubDate
64
- @ttl = rss.channel.ttl.to_i == 0 ? 10 * 60 : rss.channel.ttl.to_i * 60
65
- @rss_items = []
84
+ @ttl = rss.channel.ttl.to_i * 60 if (rss.channel.respond_to?(:ttl) && rss.channel.ttl.to_i > 0)
66
85
  rss.items.each do |item|
67
86
  rss_item = ChoutiRssItem.new(item)
68
- @rss_items << rss_item if rss_item
87
+ @items << rss_item if rss_item
69
88
  end
70
- @rss_items.sort! { |a, b| b.pub_date <=>a.pub_date }
71
89
  rescue Exception => e
72
90
  logger_exception e
73
91
  end
@@ -79,26 +97,35 @@ module GozapRss
79
97
  def get_feed_content uri
80
98
 
81
99
  content = ""
100
+ @retry = 3
82
101
  begin
83
102
  response = Typhoeus::Request.get(uri,
84
103
  :headers['User-Agent'] => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7"\
85
104
  "(KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7",
86
- :timeout => 120000,
105
+ :timeout => 30000,
87
106
  :max_redirects => 3,
88
107
  :follow_location => true
89
108
  )
90
109
 
91
110
  if response.success? || (response.code < 400 && response.code >= 301)
92
111
  content = response.body
93
- isutf8 = Kconv.isutf8(content)
94
- content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
112
+ #isutf8 = Kconv.isutf8(content)
113
+ #content = content.encode("UTF-8", "GB2312") unless isutf8
95
114
  elsif response.timed_out?
96
- log_failed response
115
+ raise GozapHttpTimeOutError.new(uri)
97
116
  elsif response.code == 0
98
- log_failed(response)
117
+ raise GozapHttpReceiveError.new(uri,response.curl_error_message)
99
118
  else
100
- log_failed(response)
119
+ rails GozapHttpStatusError.new(uri, response.code, response.curl_error_message)
101
120
  end
121
+ rescue GozapHttpTimeOutError => e
122
+ logger_exception e
123
+ logger.info "#{@retry} #{uri}"
124
+ retry if (@retry -= 1) > 0
125
+ rescue GozapHttpReceiveError => e
126
+ logger_exception e
127
+ logger.info "retry #{@retry} #{uri}"
128
+ retry if (@retry -= 1) > 0
102
129
  rescue Exception => e
103
130
  logger_exception e
104
131
  end
@@ -109,16 +136,13 @@ module GozapRss
109
136
 
110
137
 
111
138
  class ChoutiRssItem < ChoutiRssBase
112
- attr_reader :url_md5
113
139
 
114
140
  def initialize item
115
141
  @title = item.title.to_s.html_format
116
- @pub_date = item.pubDate || item.lastBuildDate
117
142
  @description = item.description.to_s.html_format
118
143
  @url = item.link.to_s.strip
119
- @url_md5 = Digest::MD5.hexdigest(@url)
120
144
  unless validate
121
- logger.error "parser item error -- title=>#{@title}, pub_date=>#{@pub_date} description=>#{@description}, url=>#{@url}"
145
+ logger.error "parser item error -- title=>#{@title}, url=>#{@url}"
122
146
  return nil
123
147
  end
124
148
  self
@@ -1,6 +1,7 @@
1
1
  class String
2
2
  def html_format
3
- str = Sanitize.clean(self);
4
- str.gsub(/[\s]+?/, "")
3
+ #str = Sanitize.clean(self);
4
+ #str.gsub(/[\s]+?/, "")
5
+ self
5
6
  end
6
7
  end
@@ -1,3 +1,3 @@
1
1
  module GozapRss
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.6"
3
3
  end
data/lib/gozap_rss.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require 'rubygems'
2
2
  require 'logger'
3
3
  require 'json'
4
- require "mysql2"
5
4
  require 'kconv'
6
5
  require 'iconv'
7
6
  require 'rss/1.0'