gozap_rss 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -1
- data/Gemfile.lock +4 -0
- data/lib/gozap_rss/chouti_rss.rb +35 -17
- data/lib/gozap_rss/version.rb +1 -1
- data/lib/gozap_rss.rb +1 -0
- metadata +23 -12
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -8,6 +8,7 @@ GEM
|
|
8
8
|
git (>= 1.2.5)
|
9
9
|
rake
|
10
10
|
logger (1.2.8)
|
11
|
+
mime-types (1.17.2)
|
11
12
|
multi_json (1.0.4)
|
12
13
|
nokogiri (1.5.0)
|
13
14
|
rake (0.9.2.2)
|
@@ -25,6 +26,8 @@ GEM
|
|
25
26
|
multi_json (~> 1.0.3)
|
26
27
|
simplecov-html (~> 0.5.3)
|
27
28
|
simplecov-html (0.5.3)
|
29
|
+
typhoeus (0.3.3)
|
30
|
+
mime-types
|
28
31
|
|
29
32
|
PLATFORMS
|
30
33
|
ruby
|
@@ -36,3 +39,4 @@ DEPENDENCIES
|
|
36
39
|
rspec (~> 2.7.0)
|
37
40
|
sanitize (>= 2.0.3)
|
38
41
|
simplecov
|
42
|
+
typhoeus
|
data/lib/gozap_rss/chouti_rss.rb
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
module GozapRss
|
4
4
|
|
5
5
|
|
6
|
-
|
7
6
|
class ChoutiRssBase
|
8
7
|
def self.logger
|
9
8
|
@logger || GozapRss.logger || Logger.new(STDOUT)
|
@@ -22,13 +21,18 @@ module GozapRss
|
|
22
21
|
self.class.logger
|
23
22
|
end
|
24
23
|
|
24
|
+
def log_failed(response)
|
25
|
+
msg = "#{response.code} URL: #{response.request.url} PARAMS: #{response.request.params.to_s} in #{response.time}s FAILED : #{response.curl_error_message}
|
26
|
+
BODY: #{response.body}"
|
27
|
+
logger.error(msg)
|
28
|
+
end
|
25
29
|
|
26
30
|
def logger_exception e
|
27
31
|
self.class.logger_exception e
|
28
32
|
end
|
29
33
|
|
30
34
|
|
31
|
-
attr_reader :url, :description, :title, :pub_date
|
35
|
+
attr_reader :url, :description, :title, :pub_date, :ttl
|
32
36
|
attr_accessor :http_headers_option
|
33
37
|
|
34
38
|
|
@@ -40,7 +44,7 @@ module GozapRss
|
|
40
44
|
attr_reader :rss_items
|
41
45
|
|
42
46
|
def initialize uri
|
43
|
-
@http_headers_option = {"User-Agent"=>"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
|
47
|
+
@http_headers_option = {"User-Agent" => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
|
44
48
|
@url = uri
|
45
49
|
@rss_items = []
|
46
50
|
content = get_feed_content uri
|
@@ -48,7 +52,6 @@ module GozapRss
|
|
48
52
|
end
|
49
53
|
|
50
54
|
|
51
|
-
|
52
55
|
private
|
53
56
|
|
54
57
|
def parse_rss content
|
@@ -58,12 +61,13 @@ module GozapRss
|
|
58
61
|
@title = rss.channel.title.to_s.html_format
|
59
62
|
@description = rss.channel.description.to_s.html_format
|
60
63
|
@pub_date = rss.channel.pubDate
|
64
|
+
@ttl = rss.channel.ttl.to_i == 0 ? 10 * 60 : rss.channel.ttl.to_i * 60
|
61
65
|
@rss_items = []
|
62
66
|
rss.items.each do |item|
|
63
67
|
rss_item = ChoutiRssItem.new(item)
|
64
68
|
@rss_items << rss_item if rss_item
|
65
69
|
end
|
66
|
-
@rss_items.sort!{|a,b|b.pub_date <=>a.pub_date}
|
70
|
+
@rss_items.sort! { |a, b| b.pub_date <=>a.pub_date }
|
67
71
|
rescue Exception => e
|
68
72
|
logger_exception e
|
69
73
|
end
|
@@ -73,15 +77,29 @@ module GozapRss
|
|
73
77
|
|
74
78
|
#because some site feed refuse rss robot, so i set the http headers User-Agent to disguise as a browser
|
75
79
|
def get_feed_content uri
|
80
|
+
|
76
81
|
content = ""
|
77
82
|
begin
|
78
|
-
|
79
|
-
|
83
|
+
response = Typhoeus::Request.get(uri,
|
84
|
+
:headers['User-Agent'] => "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7"\
|
85
|
+
"(KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7",
|
86
|
+
:timeout => 120000,
|
87
|
+
:max_redirects => 3,
|
88
|
+
:follow_location => true
|
89
|
+
)
|
90
|
+
|
91
|
+
if response.success? || (response.code < 400 && response.code >= 301)
|
92
|
+
content = response.body
|
93
|
+
isutf8 = Kconv.isutf8(content)
|
94
|
+
content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
|
95
|
+
elsif response.timed_out?
|
96
|
+
log_failed response
|
97
|
+
elsif response.code == 0
|
98
|
+
log_failed(response)
|
99
|
+
else
|
100
|
+
log_failed(response)
|
80
101
|
end
|
81
|
-
|
82
|
-
isutf8 = Kconv.isutf8(content)
|
83
|
-
content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
|
84
|
-
rescue Exception=>e
|
102
|
+
rescue Exception => e
|
85
103
|
logger_exception e
|
86
104
|
end
|
87
105
|
return content
|
@@ -95,10 +113,10 @@ module GozapRss
|
|
95
113
|
|
96
114
|
def initialize item
|
97
115
|
@title = item.title.to_s.html_format
|
98
|
-
@pub_date = item.pubDate ||
|
99
|
-
@description
|
100
|
-
@url
|
101
|
-
@url_md5 =
|
116
|
+
@pub_date = item.pubDate || item.lastBuildDate
|
117
|
+
@description = item.description.to_s.html_format
|
118
|
+
@url = item.link.to_s.strip
|
119
|
+
@url_md5 = Digest::MD5.hexdigest(@url)
|
102
120
|
unless validate
|
103
121
|
logger.error "parser item error -- title=>#{@title}, pub_date=>#{@pub_date} description=>#{@description}, url=>#{@url}"
|
104
122
|
return nil
|
@@ -108,8 +126,8 @@ module GozapRss
|
|
108
126
|
|
109
127
|
private
|
110
128
|
def validate
|
111
|
-
!(@url.nil? || @description.nil? ||
|
112
|
-
|
129
|
+
!(@url.nil? || @description.nil? || @title.nil? ||
|
130
|
+
@url.empty? || @description.empty? || @title.empty?)
|
113
131
|
end
|
114
132
|
|
115
133
|
end
|
data/lib/gozap_rss/version.rb
CHANGED
data/lib/gozap_rss.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: gozap_rss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.3
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- "\xE7\x8E\x8B\xE6\x98\x8E\xE5\x8D\x8E"
|
@@ -24,19 +24,30 @@ dependencies:
|
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: *id001
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
|
-
name:
|
27
|
+
name: typhoeus
|
28
28
|
requirement: &id002 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: "0"
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: *id002
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
|
-
name:
|
38
|
+
name: sanitize
|
39
39
|
requirement: &id003 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 2.0.3
|
45
|
+
type: :runtime
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *id003
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rspec
|
50
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
40
51
|
none: false
|
41
52
|
requirements:
|
42
53
|
- - ~>
|
@@ -44,10 +55,10 @@ dependencies:
|
|
44
55
|
version: 2.7.0
|
45
56
|
type: :development
|
46
57
|
prerelease: false
|
47
|
-
version_requirements: *
|
58
|
+
version_requirements: *id004
|
48
59
|
- !ruby/object:Gem::Dependency
|
49
60
|
name: bundler
|
50
|
-
requirement: &
|
61
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
51
62
|
none: false
|
52
63
|
requirements:
|
53
64
|
- - ~>
|
@@ -55,10 +66,10 @@ dependencies:
|
|
55
66
|
version: 1.0.0
|
56
67
|
type: :development
|
57
68
|
prerelease: false
|
58
|
-
version_requirements: *
|
69
|
+
version_requirements: *id005
|
59
70
|
- !ruby/object:Gem::Dependency
|
60
71
|
name: jeweler
|
61
|
-
requirement: &
|
72
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
62
73
|
none: false
|
63
74
|
requirements:
|
64
75
|
- - ~>
|
@@ -66,10 +77,10 @@ dependencies:
|
|
66
77
|
version: 1.6.4
|
67
78
|
type: :development
|
68
79
|
prerelease: false
|
69
|
-
version_requirements: *
|
80
|
+
version_requirements: *id006
|
70
81
|
- !ruby/object:Gem::Dependency
|
71
82
|
name: simplecov
|
72
|
-
requirement: &
|
83
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
73
84
|
none: false
|
74
85
|
requirements:
|
75
86
|
- - ">="
|
@@ -77,7 +88,7 @@ dependencies:
|
|
77
88
|
version: "0"
|
78
89
|
type: :development
|
79
90
|
prerelease: false
|
80
|
-
version_requirements: *
|
91
|
+
version_requirements: *id007
|
81
92
|
description: "\xE6\x8A\x93\xE5\x8F\x96RSS\xE6\x9C\x8D\xE5\x8A\xA1\xE7\x9A\x84\xE7\xAE\x80\xE5\x8D\x95\xE5\xBA\x94\xE7\x94\xA8"
|
82
93
|
email: wangmh.bit@gmail.com
|
83
94
|
executables: []
|
@@ -112,7 +123,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
112
123
|
requirements:
|
113
124
|
- - ">="
|
114
125
|
- !ruby/object:Gem::Version
|
115
|
-
hash: -
|
126
|
+
hash: -27366127
|
116
127
|
segments:
|
117
128
|
- 0
|
118
129
|
version: "0"
|