gozap_rss 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -6,7 +6,7 @@ source "http://rubygems.org"
6
6
  # Add dependencies to develop your gem here.
7
7
  # Include everything needed to run rake, tests, features, etc.
8
8
  gem "logger"
9
-
9
+ gem "typhoeus"
10
10
  gem "sanitize", ">=2.0.3"
11
11
 
12
12
 
data/Gemfile.lock CHANGED
@@ -8,6 +8,7 @@ GEM
8
8
  git (>= 1.2.5)
9
9
  rake
10
10
  logger (1.2.8)
11
+ mime-types (1.17.2)
11
12
  multi_json (1.0.4)
12
13
  nokogiri (1.5.0)
13
14
  rake (0.9.2.2)
@@ -25,6 +26,8 @@ GEM
25
26
  multi_json (~> 1.0.3)
26
27
  simplecov-html (~> 0.5.3)
27
28
  simplecov-html (0.5.3)
29
+ typhoeus (0.3.3)
30
+ mime-types
28
31
 
29
32
  PLATFORMS
30
33
  ruby
@@ -36,3 +39,4 @@ DEPENDENCIES
36
39
  rspec (~> 2.7.0)
37
40
  sanitize (>= 2.0.3)
38
41
  simplecov
42
+ typhoeus
@@ -3,7 +3,6 @@
3
3
  module GozapRss
4
4
 
5
5
 
6
-
7
6
  class ChoutiRssBase
8
7
  def self.logger
9
8
  @logger || GozapRss.logger || Logger.new(STDOUT)
@@ -22,13 +21,18 @@ module GozapRss
22
21
  self.class.logger
23
22
  end
24
23
 
24
+ def log_failed(response)
25
+ msg = "#{response.code} URL: #{response.request.url} PARAMS: #{response.request.params.to_s} in #{response.time}s FAILED : #{response.curl_error_message}
26
+ BODY: #{response.body}"
27
+ logger.error(msg)
28
+ end
25
29
 
26
30
  def logger_exception e
27
31
  self.class.logger_exception e
28
32
  end
29
33
 
30
34
 
31
- attr_reader :url, :description, :title, :pub_date
35
+ attr_reader :url, :description, :title, :pub_date, :ttl
32
36
  attr_accessor :http_headers_option
33
37
 
34
38
 
@@ -40,7 +44,7 @@ module GozapRss
40
44
  attr_reader :rss_items
41
45
 
42
46
  def initialize uri
43
- @http_headers_option = {"User-Agent"=>"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
47
+ @http_headers_option = {"User-Agent" => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
44
48
  @url = uri
45
49
  @rss_items = []
46
50
  content = get_feed_content uri
@@ -48,7 +52,6 @@ module GozapRss
48
52
  end
49
53
 
50
54
 
51
-
52
55
  private
53
56
 
54
57
  def parse_rss content
@@ -58,12 +61,13 @@ module GozapRss
58
61
  @title = rss.channel.title.to_s.html_format
59
62
  @description = rss.channel.description.to_s.html_format
60
63
  @pub_date = rss.channel.pubDate
64
+ @ttl = rss.channel.ttl.to_i == 0 ? 10 * 60 : rss.channel.ttl.to_i * 60
61
65
  @rss_items = []
62
66
  rss.items.each do |item|
63
67
  rss_item = ChoutiRssItem.new(item)
64
68
  @rss_items << rss_item if rss_item
65
69
  end
66
- @rss_items.sort!{|a,b|b.pub_date <=>a.pub_date}
70
+ @rss_items.sort! { |a, b| b.pub_date <=>a.pub_date }
67
71
  rescue Exception => e
68
72
  logger_exception e
69
73
  end
@@ -73,15 +77,29 @@ module GozapRss
73
77
 
74
78
  #because some site feed refuse rss robot, so i set the http headers User-Agent to disguise as a browser
75
79
  def get_feed_content uri
80
+
76
81
  content = ""
77
82
  begin
78
- open(uri, @http_headers_option) do |s|
79
- content = s.read
83
+ response = Typhoeus::Request.get(uri,
84
+ :headers['User-Agent'] => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7"\
85
+ "(KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7",
86
+ :timeout => 120000,
87
+ :max_redirects => 3,
88
+ :follow_location => true
89
+ )
90
+
91
+ if response.success? || (response.code < 400 && response.code >= 301)
92
+ content = response.body
93
+ isutf8 = Kconv.isutf8(content)
94
+ content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
95
+ elsif response.timed_out?
96
+ log_failed response
97
+ elsif response.code == 0
98
+ log_failed(response)
99
+ else
100
+ log_failed(response)
80
101
  end
81
-
82
- isutf8 = Kconv.isutf8(content)
83
- content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
84
- rescue Exception=>e
102
+ rescue Exception => e
85
103
  logger_exception e
86
104
  end
87
105
  return content
@@ -95,10 +113,10 @@ module GozapRss
95
113
 
96
114
  def initialize item
97
115
  @title = item.title.to_s.html_format
98
- @pub_date = item.pubDate || item.lastBuildDate
99
- @description = item.description.to_s.html_format
100
- @url = item.link.to_s.strip
101
- @url_md5 = Digest::MD5.hexdigest(@url)
116
+ @pub_date = item.pubDate || item.lastBuildDate
117
+ @description = item.description.to_s.html_format
118
+ @url = item.link.to_s.strip
119
+ @url_md5 = Digest::MD5.hexdigest(@url)
102
120
  unless validate
103
121
  logger.error "parser item error -- title=>#{@title}, pub_date=>#{@pub_date} description=>#{@description}, url=>#{@url}"
104
122
  return nil
@@ -108,8 +126,8 @@ module GozapRss
108
126
 
109
127
  private
110
128
  def validate
111
- !(@url.nil? || @description.nil? || @title.nil? ||
112
- @url.empty? || @description.empty? || @title.empty?)
129
+ !(@url.nil? || @description.nil? || @title.nil? ||
130
+ @url.empty? || @description.empty? || @title.empty?)
113
131
  end
114
132
 
115
133
  end
@@ -1,3 +1,3 @@
1
1
  module GozapRss
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/gozap_rss.rb CHANGED
@@ -9,6 +9,7 @@ require 'rss/2.0'
9
9
  require 'open-uri'
10
10
  require "digest/md5"
11
11
  require "sanitize"
12
+ require "typhoeus"
12
13
 
13
14
 
14
15
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: gozap_rss
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.2
5
+ version: 0.0.3
6
6
  platform: ruby
7
7
  authors:
8
8
  - "\xE7\x8E\x8B\xE6\x98\x8E\xE5\x8D\x8E"
@@ -24,19 +24,30 @@ dependencies:
24
24
  prerelease: false
25
25
  version_requirements: *id001
26
26
  - !ruby/object:Gem::Dependency
27
- name: sanitize
27
+ name: typhoeus
28
28
  requirement: &id002 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 2.0.3
33
+ version: "0"
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: *id002
37
37
  - !ruby/object:Gem::Dependency
38
- name: rspec
38
+ name: sanitize
39
39
  requirement: &id003 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: 2.0.3
45
+ type: :runtime
46
+ prerelease: false
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ requirement: &id004 !ruby/object:Gem::Requirement
40
51
  none: false
41
52
  requirements:
42
53
  - - ~>
@@ -44,10 +55,10 @@ dependencies:
44
55
  version: 2.7.0
45
56
  type: :development
46
57
  prerelease: false
47
- version_requirements: *id003
58
+ version_requirements: *id004
48
59
  - !ruby/object:Gem::Dependency
49
60
  name: bundler
50
- requirement: &id004 !ruby/object:Gem::Requirement
61
+ requirement: &id005 !ruby/object:Gem::Requirement
51
62
  none: false
52
63
  requirements:
53
64
  - - ~>
@@ -55,10 +66,10 @@ dependencies:
55
66
  version: 1.0.0
56
67
  type: :development
57
68
  prerelease: false
58
- version_requirements: *id004
69
+ version_requirements: *id005
59
70
  - !ruby/object:Gem::Dependency
60
71
  name: jeweler
61
- requirement: &id005 !ruby/object:Gem::Requirement
72
+ requirement: &id006 !ruby/object:Gem::Requirement
62
73
  none: false
63
74
  requirements:
64
75
  - - ~>
@@ -66,10 +77,10 @@ dependencies:
66
77
  version: 1.6.4
67
78
  type: :development
68
79
  prerelease: false
69
- version_requirements: *id005
80
+ version_requirements: *id006
70
81
  - !ruby/object:Gem::Dependency
71
82
  name: simplecov
72
- requirement: &id006 !ruby/object:Gem::Requirement
83
+ requirement: &id007 !ruby/object:Gem::Requirement
73
84
  none: false
74
85
  requirements:
75
86
  - - ">="
@@ -77,7 +88,7 @@ dependencies:
77
88
  version: "0"
78
89
  type: :development
79
90
  prerelease: false
80
- version_requirements: *id006
91
+ version_requirements: *id007
81
92
  description: "\xE6\x8A\x93\xE5\x8F\x96RSS\xE6\x9C\x8D\xE5\x8A\xA1\xE7\x9A\x84\xE7\xAE\x80\xE5\x8D\x95\xE5\xBA\x94\xE7\x94\xA8"
82
93
  email: wangmh.bit@gmail.com
83
94
  executables: []
@@ -112,7 +123,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
112
123
  requirements:
113
124
  - - ">="
114
125
  - !ruby/object:Gem::Version
115
- hash: -1037996241
126
+ hash: -27366127
116
127
  segments:
117
128
  - 0
118
129
  version: "0"