gozap_rss 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -1
- data/Gemfile.lock +4 -0
- data/lib/gozap_rss/chouti_rss.rb +35 -17
- data/lib/gozap_rss/version.rb +1 -1
- data/lib/gozap_rss.rb +1 -0
- metadata +23 -12
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -8,6 +8,7 @@ GEM
|
|
8
8
|
git (>= 1.2.5)
|
9
9
|
rake
|
10
10
|
logger (1.2.8)
|
11
|
+
mime-types (1.17.2)
|
11
12
|
multi_json (1.0.4)
|
12
13
|
nokogiri (1.5.0)
|
13
14
|
rake (0.9.2.2)
|
@@ -25,6 +26,8 @@ GEM
|
|
25
26
|
multi_json (~> 1.0.3)
|
26
27
|
simplecov-html (~> 0.5.3)
|
27
28
|
simplecov-html (0.5.3)
|
29
|
+
typhoeus (0.3.3)
|
30
|
+
mime-types
|
28
31
|
|
29
32
|
PLATFORMS
|
30
33
|
ruby
|
@@ -36,3 +39,4 @@ DEPENDENCIES
|
|
36
39
|
rspec (~> 2.7.0)
|
37
40
|
sanitize (>= 2.0.3)
|
38
41
|
simplecov
|
42
|
+
typhoeus
|
data/lib/gozap_rss/chouti_rss.rb
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
module GozapRss
|
4
4
|
|
5
5
|
|
6
|
-
|
7
6
|
class ChoutiRssBase
|
8
7
|
def self.logger
|
9
8
|
@logger || GozapRss.logger || Logger.new(STDOUT)
|
@@ -22,13 +21,18 @@ module GozapRss
|
|
22
21
|
self.class.logger
|
23
22
|
end
|
24
23
|
|
24
|
+
def log_failed(response)
|
25
|
+
msg = "#{response.code} URL: #{response.request.url} PARAMS: #{response.request.params.to_s} in #{response.time}s FAILED : #{response.curl_error_message}
|
26
|
+
BODY: #{response.body}"
|
27
|
+
logger.error(msg)
|
28
|
+
end
|
25
29
|
|
26
30
|
def logger_exception e
|
27
31
|
self.class.logger_exception e
|
28
32
|
end
|
29
33
|
|
30
34
|
|
31
|
-
attr_reader :url, :description, :title, :pub_date
|
35
|
+
attr_reader :url, :description, :title, :pub_date, :ttl
|
32
36
|
attr_accessor :http_headers_option
|
33
37
|
|
34
38
|
|
@@ -40,7 +44,7 @@ module GozapRss
|
|
40
44
|
attr_reader :rss_items
|
41
45
|
|
42
46
|
def initialize uri
|
43
|
-
@http_headers_option = {"User-Agent"=>"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
|
47
|
+
@http_headers_option = {"User-Agent" => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
|
44
48
|
@url = uri
|
45
49
|
@rss_items = []
|
46
50
|
content = get_feed_content uri
|
@@ -48,7 +52,6 @@ module GozapRss
|
|
48
52
|
end
|
49
53
|
|
50
54
|
|
51
|
-
|
52
55
|
private
|
53
56
|
|
54
57
|
def parse_rss content
|
@@ -58,12 +61,13 @@ module GozapRss
|
|
58
61
|
@title = rss.channel.title.to_s.html_format
|
59
62
|
@description = rss.channel.description.to_s.html_format
|
60
63
|
@pub_date = rss.channel.pubDate
|
64
|
+
@ttl = rss.channel.ttl.to_i == 0 ? 10 * 60 : rss.channel.ttl.to_i * 60
|
61
65
|
@rss_items = []
|
62
66
|
rss.items.each do |item|
|
63
67
|
rss_item = ChoutiRssItem.new(item)
|
64
68
|
@rss_items << rss_item if rss_item
|
65
69
|
end
|
66
|
-
@rss_items.sort!{|a,b|b.pub_date <=>a.pub_date}
|
70
|
+
@rss_items.sort! { |a, b| b.pub_date <=>a.pub_date }
|
67
71
|
rescue Exception => e
|
68
72
|
logger_exception e
|
69
73
|
end
|
@@ -73,15 +77,29 @@ module GozapRss
|
|
73
77
|
|
74
78
|
#because some site feed refuse rss robot, so i set the http headers User-Agent to disguise as a browser
|
75
79
|
def get_feed_content uri
|
80
|
+
|
76
81
|
content = ""
|
77
82
|
begin
|
78
|
-
|
79
|
-
|
83
|
+
response = Typhoeus::Request.get(uri,
|
84
|
+
:headers['User-Agent'] => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7"\
|
85
|
+
"(KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7",
|
86
|
+
:timeout => 120000,
|
87
|
+
:max_redirects => 3,
|
88
|
+
:follow_location => true
|
89
|
+
)
|
90
|
+
|
91
|
+
if response.success? || (response.code < 400 && response.code >= 301)
|
92
|
+
content = response.body
|
93
|
+
isutf8 = Kconv.isutf8(content)
|
94
|
+
content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
|
95
|
+
elsif response.timed_out?
|
96
|
+
log_failed response
|
97
|
+
elsif response.code == 0
|
98
|
+
log_failed(response)
|
99
|
+
else
|
100
|
+
log_failed(response)
|
80
101
|
end
|
81
|
-
|
82
|
-
isutf8 = Kconv.isutf8(content)
|
83
|
-
content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
|
84
|
-
rescue Exception=>e
|
102
|
+
rescue Exception => e
|
85
103
|
logger_exception e
|
86
104
|
end
|
87
105
|
return content
|
@@ -95,10 +113,10 @@ module GozapRss
|
|
95
113
|
|
96
114
|
def initialize item
|
97
115
|
@title = item.title.to_s.html_format
|
98
|
-
@pub_date = item.pubDate ||
|
99
|
-
@description
|
100
|
-
@url
|
101
|
-
@url_md5 =
|
116
|
+
@pub_date = item.pubDate || item.lastBuildDate
|
117
|
+
@description = item.description.to_s.html_format
|
118
|
+
@url = item.link.to_s.strip
|
119
|
+
@url_md5 = Digest::MD5.hexdigest(@url)
|
102
120
|
unless validate
|
103
121
|
logger.error "parser item error -- title=>#{@title}, pub_date=>#{@pub_date} description=>#{@description}, url=>#{@url}"
|
104
122
|
return nil
|
@@ -108,8 +126,8 @@ module GozapRss
|
|
108
126
|
|
109
127
|
private
|
110
128
|
def validate
|
111
|
-
!(@url.nil? || @description.nil? ||
|
112
|
-
|
129
|
+
!(@url.nil? || @description.nil? || @title.nil? ||
|
130
|
+
@url.empty? || @description.empty? || @title.empty?)
|
113
131
|
end
|
114
132
|
|
115
133
|
end
|
data/lib/gozap_rss/version.rb
CHANGED
data/lib/gozap_rss.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: gozap_rss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.3
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- "\xE7\x8E\x8B\xE6\x98\x8E\xE5\x8D\x8E"
|
@@ -24,19 +24,30 @@ dependencies:
|
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: *id001
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
|
-
name:
|
27
|
+
name: typhoeus
|
28
28
|
requirement: &id002 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: "0"
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: *id002
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
|
-
name:
|
38
|
+
name: sanitize
|
39
39
|
requirement: &id003 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 2.0.3
|
45
|
+
type: :runtime
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *id003
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rspec
|
50
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
40
51
|
none: false
|
41
52
|
requirements:
|
42
53
|
- - ~>
|
@@ -44,10 +55,10 @@ dependencies:
|
|
44
55
|
version: 2.7.0
|
45
56
|
type: :development
|
46
57
|
prerelease: false
|
47
|
-
version_requirements: *
|
58
|
+
version_requirements: *id004
|
48
59
|
- !ruby/object:Gem::Dependency
|
49
60
|
name: bundler
|
50
|
-
requirement: &
|
61
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
51
62
|
none: false
|
52
63
|
requirements:
|
53
64
|
- - ~>
|
@@ -55,10 +66,10 @@ dependencies:
|
|
55
66
|
version: 1.0.0
|
56
67
|
type: :development
|
57
68
|
prerelease: false
|
58
|
-
version_requirements: *
|
69
|
+
version_requirements: *id005
|
59
70
|
- !ruby/object:Gem::Dependency
|
60
71
|
name: jeweler
|
61
|
-
requirement: &
|
72
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
62
73
|
none: false
|
63
74
|
requirements:
|
64
75
|
- - ~>
|
@@ -66,10 +77,10 @@ dependencies:
|
|
66
77
|
version: 1.6.4
|
67
78
|
type: :development
|
68
79
|
prerelease: false
|
69
|
-
version_requirements: *
|
80
|
+
version_requirements: *id006
|
70
81
|
- !ruby/object:Gem::Dependency
|
71
82
|
name: simplecov
|
72
|
-
requirement: &
|
83
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
73
84
|
none: false
|
74
85
|
requirements:
|
75
86
|
- - ">="
|
@@ -77,7 +88,7 @@ dependencies:
|
|
77
88
|
version: "0"
|
78
89
|
type: :development
|
79
90
|
prerelease: false
|
80
|
-
version_requirements: *
|
91
|
+
version_requirements: *id007
|
81
92
|
description: "\xE6\x8A\x93\xE5\x8F\x96RSS\xE6\x9C\x8D\xE5\x8A\xA1\xE7\x9A\x84\xE7\xAE\x80\xE5\x8D\x95\xE5\xBA\x94\xE7\x94\xA8"
|
82
93
|
email: wangmh.bit@gmail.com
|
83
94
|
executables: []
|
@@ -112,7 +123,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
112
123
|
requirements:
|
113
124
|
- - ">="
|
114
125
|
- !ruby/object:Gem::Version
|
115
|
-
hash: -
|
126
|
+
hash: -27366127
|
116
127
|
segments:
|
117
128
|
- 0
|
118
129
|
version: "0"
|