gozap_rss 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -6,7 +6,7 @@ source "http://rubygems.org"
6
6
  # Add dependencies to develop your gem here.
7
7
  # Include everything needed to run rake, tests, features, etc.
8
8
  gem "logger"
9
-
9
+ gem "typhoeus"
10
10
  gem "sanitize", ">=2.0.3"
11
11
 
12
12
 
data/Gemfile.lock CHANGED
@@ -8,6 +8,7 @@ GEM
8
8
  git (>= 1.2.5)
9
9
  rake
10
10
  logger (1.2.8)
11
+ mime-types (1.17.2)
11
12
  multi_json (1.0.4)
12
13
  nokogiri (1.5.0)
13
14
  rake (0.9.2.2)
@@ -25,6 +26,8 @@ GEM
25
26
  multi_json (~> 1.0.3)
26
27
  simplecov-html (~> 0.5.3)
27
28
  simplecov-html (0.5.3)
29
+ typhoeus (0.3.3)
30
+ mime-types
28
31
 
29
32
  PLATFORMS
30
33
  ruby
@@ -36,3 +39,4 @@ DEPENDENCIES
36
39
  rspec (~> 2.7.0)
37
40
  sanitize (>= 2.0.3)
38
41
  simplecov
42
+ typhoeus
@@ -3,7 +3,6 @@
3
3
  module GozapRss
4
4
 
5
5
 
6
-
7
6
  class ChoutiRssBase
8
7
  def self.logger
9
8
  @logger || GozapRss.logger || Logger.new(STDOUT)
@@ -22,13 +21,18 @@ module GozapRss
22
21
  self.class.logger
23
22
  end
24
23
 
24
+ def log_failed(response)
25
+ msg = "#{response.code} URL: #{response.request.url} PARAMS: #{response.request.params.to_s} in #{response.time}s FAILED : #{response.curl_error_message}
26
+ BODY: #{response.body}"
27
+ logger.error(msg)
28
+ end
25
29
 
26
30
  def logger_exception e
27
31
  self.class.logger_exception e
28
32
  end
29
33
 
30
34
 
31
- attr_reader :url, :description, :title, :pub_date
35
+ attr_reader :url, :description, :title, :pub_date, :ttl
32
36
  attr_accessor :http_headers_option
33
37
 
34
38
 
@@ -40,7 +44,7 @@ module GozapRss
40
44
  attr_reader :rss_items
41
45
 
42
46
  def initialize uri
43
- @http_headers_option = {"User-Agent"=>"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
47
+ @http_headers_option = {"User-Agent" => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
44
48
  @url = uri
45
49
  @rss_items = []
46
50
  content = get_feed_content uri
@@ -48,7 +52,6 @@ module GozapRss
48
52
  end
49
53
 
50
54
 
51
-
52
55
  private
53
56
 
54
57
  def parse_rss content
@@ -58,12 +61,13 @@ module GozapRss
58
61
  @title = rss.channel.title.to_s.html_format
59
62
  @description = rss.channel.description.to_s.html_format
60
63
  @pub_date = rss.channel.pubDate
64
+ @ttl = rss.channel.ttl.to_i == 0 ? 10 * 60 : rss.channel.ttl.to_i * 60
61
65
  @rss_items = []
62
66
  rss.items.each do |item|
63
67
  rss_item = ChoutiRssItem.new(item)
64
68
  @rss_items << rss_item if rss_item
65
69
  end
66
- @rss_items.sort!{|a,b|b.pub_date <=>a.pub_date}
70
+ @rss_items.sort! { |a, b| b.pub_date <=>a.pub_date }
67
71
  rescue Exception => e
68
72
  logger_exception e
69
73
  end
@@ -73,15 +77,29 @@ module GozapRss
73
77
 
74
78
  #because some site feed refuse rss robot, so i set the http headers User-Agent to disguise as a browser
75
79
  def get_feed_content uri
80
+
76
81
  content = ""
77
82
  begin
78
- open(uri, @http_headers_option) do |s|
79
- content = s.read
83
+ response = Typhoeus::Request.get(uri,
84
+ :headers['User-Agent'] => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7"\
85
+ "(KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7",
86
+ :timeout => 120000,
87
+ :max_redirects => 3,
88
+ :follow_location => true
89
+ )
90
+
91
+ if response.success? || (response.code < 400 && response.code >= 301)
92
+ content = response.body
93
+ isutf8 = Kconv.isutf8(content)
94
+ content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
95
+ elsif response.timed_out?
96
+ log_failed response
97
+ elsif response.code == 0
98
+ log_failed(response)
99
+ else
100
+ log_failed(response)
80
101
  end
81
-
82
- isutf8 = Kconv.isutf8(content)
83
- content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
84
- rescue Exception=>e
102
+ rescue Exception => e
85
103
  logger_exception e
86
104
  end
87
105
  return content
@@ -95,10 +113,10 @@ module GozapRss
95
113
 
96
114
  def initialize item
97
115
  @title = item.title.to_s.html_format
98
- @pub_date = item.pubDate || item.lastBuildDate
99
- @description = item.description.to_s.html_format
100
- @url = item.link.to_s.strip
101
- @url_md5 = Digest::MD5.hexdigest(@url)
116
+ @pub_date = item.pubDate || item.lastBuildDate
117
+ @description = item.description.to_s.html_format
118
+ @url = item.link.to_s.strip
119
+ @url_md5 = Digest::MD5.hexdigest(@url)
102
120
  unless validate
103
121
  logger.error "parser item error -- title=>#{@title}, pub_date=>#{@pub_date} description=>#{@description}, url=>#{@url}"
104
122
  return nil
@@ -108,8 +126,8 @@ module GozapRss
108
126
 
109
127
  private
110
128
  def validate
111
- !(@url.nil? || @description.nil? || @title.nil? ||
112
- @url.empty? || @description.empty? || @title.empty?)
129
+ !(@url.nil? || @description.nil? || @title.nil? ||
130
+ @url.empty? || @description.empty? || @title.empty?)
113
131
  end
114
132
 
115
133
  end
@@ -1,3 +1,3 @@
1
1
  module GozapRss
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/gozap_rss.rb CHANGED
@@ -9,6 +9,7 @@ require 'rss/2.0'
9
9
  require 'open-uri'
10
10
  require "digest/md5"
11
11
  require "sanitize"
12
+ require "typhoeus"
12
13
 
13
14
 
14
15
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: gozap_rss
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.2
5
+ version: 0.0.3
6
6
  platform: ruby
7
7
  authors:
8
8
  - "\xE7\x8E\x8B\xE6\x98\x8E\xE5\x8D\x8E"
@@ -24,19 +24,30 @@ dependencies:
24
24
  prerelease: false
25
25
  version_requirements: *id001
26
26
  - !ruby/object:Gem::Dependency
27
- name: sanitize
27
+ name: typhoeus
28
28
  requirement: &id002 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 2.0.3
33
+ version: "0"
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: *id002
37
37
  - !ruby/object:Gem::Dependency
38
- name: rspec
38
+ name: sanitize
39
39
  requirement: &id003 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: 2.0.3
45
+ type: :runtime
46
+ prerelease: false
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ requirement: &id004 !ruby/object:Gem::Requirement
40
51
  none: false
41
52
  requirements:
42
53
  - - ~>
@@ -44,10 +55,10 @@ dependencies:
44
55
  version: 2.7.0
45
56
  type: :development
46
57
  prerelease: false
47
- version_requirements: *id003
58
+ version_requirements: *id004
48
59
  - !ruby/object:Gem::Dependency
49
60
  name: bundler
50
- requirement: &id004 !ruby/object:Gem::Requirement
61
+ requirement: &id005 !ruby/object:Gem::Requirement
51
62
  none: false
52
63
  requirements:
53
64
  - - ~>
@@ -55,10 +66,10 @@ dependencies:
55
66
  version: 1.0.0
56
67
  type: :development
57
68
  prerelease: false
58
- version_requirements: *id004
69
+ version_requirements: *id005
59
70
  - !ruby/object:Gem::Dependency
60
71
  name: jeweler
61
- requirement: &id005 !ruby/object:Gem::Requirement
72
+ requirement: &id006 !ruby/object:Gem::Requirement
62
73
  none: false
63
74
  requirements:
64
75
  - - ~>
@@ -66,10 +77,10 @@ dependencies:
66
77
  version: 1.6.4
67
78
  type: :development
68
79
  prerelease: false
69
- version_requirements: *id005
80
+ version_requirements: *id006
70
81
  - !ruby/object:Gem::Dependency
71
82
  name: simplecov
72
- requirement: &id006 !ruby/object:Gem::Requirement
83
+ requirement: &id007 !ruby/object:Gem::Requirement
73
84
  none: false
74
85
  requirements:
75
86
  - - ">="
@@ -77,7 +88,7 @@ dependencies:
77
88
  version: "0"
78
89
  type: :development
79
90
  prerelease: false
80
- version_requirements: *id006
91
+ version_requirements: *id007
81
92
  description: "\xE6\x8A\x93\xE5\x8F\x96RSS\xE6\x9C\x8D\xE5\x8A\xA1\xE7\x9A\x84\xE7\xAE\x80\xE5\x8D\x95\xE5\xBA\x94\xE7\x94\xA8"
82
93
  email: wangmh.bit@gmail.com
83
94
  executables: []
@@ -112,7 +123,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
112
123
  requirements:
113
124
  - - ">="
114
125
  - !ruby/object:Gem::Version
115
- hash: -1037996241
126
+ hash: -27366127
116
127
  segments:
117
128
  - 0
118
129
  version: "0"