gozap_rss 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,26 @@
2
2
 
3
3
  module GozapRss
4
4
 
5
+ class Error < StandardError; end
6
+
7
+ class GozapHttpTimeOutError < Error
8
+ def initialize(url)
9
+ super("http get #{url} get timeout")
10
+ end
11
+ end
12
+
13
+ class GozapHttpStatusError < Error
14
+ def initialize(url, code, msg)
15
+ super("http get #{url} return response_code is #{code} and error is #{msg}")
16
+ end
17
+ end
18
+
19
+ class GozapHttpReceiveError < Error
20
+ def initialize(url, msg)
21
+ super("http get #{url} error is #{msg}")
22
+ end
23
+ end
24
+
5
25
 
6
26
  class ChoutiRssBase
7
27
  def self.logger
@@ -41,12 +61,13 @@ module GozapRss
41
61
 
42
62
  class ChoutiRss < ChoutiRssBase
43
63
 
44
- attr_reader :rss_items
64
+ attr_reader :items
45
65
 
46
66
  def initialize uri
47
67
  @http_headers_option = {"User-Agent" => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
48
68
  @url = uri
49
- @rss_items = []
69
+ @items = []
70
+ @ttl = 120
50
71
  content = get_feed_content uri
51
72
  parse_rss(content)
52
73
  end
@@ -60,14 +81,11 @@ module GozapRss
60
81
  rss = RSS::Parser.parse(content, false)
61
82
  @title = rss.channel.title.to_s.html_format
62
83
  @description = rss.channel.description.to_s.html_format
63
- @pub_date = rss.channel.pubDate
64
- @ttl = rss.channel.ttl.to_i == 0 ? 10 * 60 : rss.channel.ttl.to_i * 60
65
- @rss_items = []
84
+ @ttl = rss.channel.ttl.to_i * 60 if (rss.channel.respond_to?(:ttl) && rss.channel.ttl.to_i > 0)
66
85
  rss.items.each do |item|
67
86
  rss_item = ChoutiRssItem.new(item)
68
- @rss_items << rss_item if rss_item
87
+ @items << rss_item if rss_item
69
88
  end
70
- @rss_items.sort! { |a, b| b.pub_date <=>a.pub_date }
71
89
  rescue Exception => e
72
90
  logger_exception e
73
91
  end
@@ -79,26 +97,35 @@ module GozapRss
79
97
  def get_feed_content uri
80
98
 
81
99
  content = ""
100
+ @retry = 3
82
101
  begin
83
102
  response = Typhoeus::Request.get(uri,
84
103
  :headers['User-Agent'] => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7"\
85
104
  "(KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7",
86
- :timeout => 120000,
105
+ :timeout => 30000,
87
106
  :max_redirects => 3,
88
107
  :follow_location => true
89
108
  )
90
109
 
91
110
  if response.success? || (response.code < 400 && response.code >= 301)
92
111
  content = response.body
93
- isutf8 = Kconv.isutf8(content)
94
- content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
112
+ #isutf8 = Kconv.isutf8(content)
113
+ #content = content.encode("UTF-8", "GB2312") unless isutf8
95
114
  elsif response.timed_out?
96
- log_failed response
115
+ raise GozapHttpTimeOutError.new(uri)
97
116
  elsif response.code == 0
98
- log_failed(response)
117
+ raise GozapHttpReceiveError.new(uri,response.curl_error_message)
99
118
  else
100
- log_failed(response)
119
+ rails GozapHttpStatusError.new(uri, response.code, response.curl_error_message)
101
120
  end
121
+ rescue GozapHttpTimeOutError => e
122
+ logger_exception e
123
+ logger.info "#{@retry} #{uri}"
124
+ retry if (@retry -= 1) > 0
125
+ rescue GozapHttpReceiveError => e
126
+ logger_exception e
127
+ logger.info "retry #{@retry} #{uri}"
128
+ retry if (@retry -= 1) > 0
102
129
  rescue Exception => e
103
130
  logger_exception e
104
131
  end
@@ -109,16 +136,13 @@ module GozapRss
109
136
 
110
137
 
111
138
  class ChoutiRssItem < ChoutiRssBase
112
- attr_reader :url_md5
113
139
 
114
140
  def initialize item
115
141
  @title = item.title.to_s.html_format
116
- @pub_date = item.pubDate || item.lastBuildDate
117
142
  @description = item.description.to_s.html_format
118
143
  @url = item.link.to_s.strip
119
- @url_md5 = Digest::MD5.hexdigest(@url)
120
144
  unless validate
121
- logger.error "parser item error -- title=>#{@title}, pub_date=>#{@pub_date} description=>#{@description}, url=>#{@url}"
145
+ logger.error "parser item error -- title=>#{@title}, url=>#{@url}"
122
146
  return nil
123
147
  end
124
148
  self
@@ -1,6 +1,7 @@
1
1
  class String
2
2
  def html_format
3
- str = Sanitize.clean(self);
4
- str.gsub(/[\s]+?/, "")
3
+ #str = Sanitize.clean(self);
4
+ #str.gsub(/[\s]+?/, "")
5
+ self
5
6
  end
6
7
  end
@@ -1,3 +1,3 @@
1
1
  module GozapRss
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.6"
3
3
  end
data/lib/gozap_rss.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require 'rubygems'
2
2
  require 'logger'
3
3
  require 'json'
4
- require "mysql2"
5
4
  require 'kconv'
6
5
  require 'iconv'
7
6
  require 'rss/1.0'