grab_epg 0.2.1 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.grabepg.gemspec +1 -1
- data/lib/grab_tvmao.rb +2 -5
- data/lib/grabepg/grab_base.rb +32 -23
- data/lib/grabepg/grab_tvsou.rb +36 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MTI5MTMwMTY1NTRmZjk5NGIwZGM4MTk3NTljNGFjMzAwOWY4NTdiNA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NzNjZTc3YzY1OWZkYTZjOGUzNTVjNzVmZjgzYjg3NjQyZWUzNGFmMg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZGMwNThmMzZlY2FmZmI1ZmQzNjY5ODdkYTI4MTk5MWI2NWZiODBlZjQ1YzNk
|
10
|
+
MzllOGE1YmNkZjRiNjc3MDlhY2FjZjMyNjFiNTcxYjFlZTlmYzgwNmVlMmQx
|
11
|
+
NzUxODIwMGE1MjgxZWM0NWY3ZDlmNWE0YmYyN2U0NTY1YjU3NmQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MTA3MmE5MGZkMzU5YzliYjljNTQ1NzljNWViYTQ5YWU5ZmNlZGQ5OWJmZTUz
|
14
|
+
YmZiN2QyNjUzOTk0OGQwMzM0NmZjOTEwOTI2MzJkZjAxMDg5YzdlNzUxNjM3
|
15
|
+
Y2VkNmQzMGUyMjQ0Nzc5MTZkMGE5NjY4Y2IwZTY2ZGI5Y2MyOTA=
|
data/.grabepg.gemspec
CHANGED
data/lib/grab_tvmao.rb
CHANGED
@@ -264,7 +264,7 @@ module GrabTvmao
|
|
264
264
|
proxy = proxylist[@proxyindex+1]
|
265
265
|
end
|
266
266
|
begin
|
267
|
-
doc = Nokogiri::HTML(open(url,:proxy=>"
|
267
|
+
doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}")) unless proxy.nil?||proxy.empty?
|
268
268
|
doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
|
269
269
|
@no_firest = 0
|
270
270
|
rescue => err
|
@@ -274,15 +274,12 @@ module GrabTvmao
|
|
274
274
|
end
|
275
275
|
|
276
276
|
@no_firest += 1
|
277
|
-
p "*************************Proxy:#{proxy}, url:#{url}"
|
277
|
+
p "*************************Proxy:#{proxy}, url:#{url} Error:#{err.to_s}"
|
278
278
|
#proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
|
279
279
|
get_doc_with_proxy(proxylist,url) if @no_firest<4
|
280
280
|
raise RuntimeError,"Error: #{err.to_s}" unless @no_firest<4
|
281
281
|
end
|
282
282
|
@proxyindex += 1
|
283
|
-
unless doc
|
284
|
-
p "*************************Proxy:#{proxy}, url:#{url}"
|
285
|
-
end
|
286
283
|
doc
|
287
284
|
end
|
288
285
|
|
data/lib/grabepg/grab_base.rb
CHANGED
@@ -119,8 +119,28 @@ module Grabepg
|
|
119
119
|
end
|
120
120
|
|
121
121
|
|
122
|
+
def err_doc_proxy(proxy,proxylist,url="",err="")
|
123
|
+
if proxy.empty?||proxy.nil?
|
124
|
+
proxylist.delete_at[@proxyindex]
|
125
|
+
end
|
126
|
+
|
122
127
|
|
128
|
+
unless @no_firest
|
129
|
+
@no_firest = 0
|
130
|
+
end
|
123
131
|
|
132
|
+
@no_firest += 1
|
133
|
+
p "*************************Proxy:#{proxy}, url:#{url} Error:#{err}"
|
134
|
+
#proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
|
135
|
+
@proxyindex += 1
|
136
|
+
@proxyindex=@proxyindex%@size
|
137
|
+
doc=get_doc_with_proxy(proxylist,url) if @no_firest<4
|
138
|
+
unless @no_firest<4
|
139
|
+
@no_firest=0
|
140
|
+
raise RuntimeError,"Error: #{err}"
|
141
|
+
end
|
142
|
+
doc
|
143
|
+
end
|
124
144
|
|
125
145
|
|
126
146
|
#使用代理获取url的html的doc值
|
@@ -129,6 +149,7 @@ module Grabepg
|
|
129
149
|
unless @proxyindex
|
130
150
|
@proxyindex = 0
|
131
151
|
end
|
152
|
+
@size = proxylist.size
|
132
153
|
@proxyindex=@proxyindex%proxylist.size
|
133
154
|
if(proxylist[@proxyindex])
|
134
155
|
proxy = proxylist[@proxyindex]
|
@@ -136,35 +157,23 @@ module Grabepg
|
|
136
157
|
proxy = proxylist[@proxyindex+1]
|
137
158
|
end
|
138
159
|
begin
|
139
|
-
doc = Nokogiri::HTML(open(url,:proxy=>"
|
140
|
-
|
141
|
-
|
160
|
+
doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}")) unless proxy.nil?||proxy.empty?
|
161
|
+
if doc.nil?
|
162
|
+
doc=err_doc_proxy(proxy,proxylist,url,"doc nil")
|
163
|
+
@no_firest=0
|
142
164
|
end
|
143
165
|
@no_firest = 0
|
144
166
|
rescue => err
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
unless @no_firest
|
151
|
-
@no_firest = 0
|
152
|
-
end
|
153
|
-
|
154
|
-
@no_firest += 1
|
155
|
-
p "*************************Proxy:#{proxy}, url:#{url}"
|
156
|
-
#proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
|
167
|
+
p "IN Rescue"
|
168
|
+
doc=err_doc_proxy(proxy,proxylist,url,err.to_s)
|
169
|
+
@no_firest=0
|
170
|
+
p "Get DOC"
|
157
171
|
@proxyindex += 1
|
158
|
-
|
159
|
-
|
160
|
-
@no_firest=0
|
161
|
-
raise RuntimeError,"Error: #{err.to_s}"
|
162
|
-
end
|
172
|
+
@proxyindex=@proxyindex%@size
|
173
|
+
return doc
|
163
174
|
end
|
164
175
|
@proxyindex += 1
|
165
|
-
|
166
|
-
p "*************************Proxy:#{proxy}, url:#{url}"
|
167
|
-
end
|
176
|
+
@proxyindex=@proxyindex%@size
|
168
177
|
else
|
169
178
|
begin
|
170
179
|
doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
|
data/lib/grabepg/grab_tvsou.rb
CHANGED
@@ -78,6 +78,7 @@ module Grabepg
|
|
78
78
|
|
79
79
|
|
80
80
|
doc = @grabbase.get_doc_with_proxy(@proxy_list,@home_page)
|
81
|
+
begin
|
81
82
|
doc.css("li").each do |li|
|
82
83
|
case ChannelTypeMap[li.get_attribute("class")]
|
83
84
|
when "央视"
|
@@ -90,6 +91,15 @@ module Grabepg
|
|
90
91
|
|
91
92
|
end
|
92
93
|
end
|
94
|
+
@error_num=0
|
95
|
+
rescue
|
96
|
+
unless @error_num
|
97
|
+
@error_num = 0
|
98
|
+
end
|
99
|
+
@error_num+=1
|
100
|
+
raise err.to_s if @error_num==5
|
101
|
+
dispose_home_page
|
102
|
+
end
|
93
103
|
return @channels
|
94
104
|
end
|
95
105
|
|
@@ -123,7 +133,17 @@ module Grabepg
|
|
123
133
|
def dispose_schedule_page(url,start_time,use_time)
|
124
134
|
url = @site +"/"+url
|
125
135
|
urls = url.split("?")
|
136
|
+
begin
|
126
137
|
doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
|
138
|
+
@error_num = 0
|
139
|
+
rescue => err
|
140
|
+
unless @error_num
|
141
|
+
@error_num = 0
|
142
|
+
end
|
143
|
+
@error_num+=1
|
144
|
+
raise err.to_s if @error_num==5
|
145
|
+
dispose_schedule_page(url,start_time,use_time)
|
146
|
+
end
|
127
147
|
_url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href")
|
128
148
|
_url = urls[0]+_url
|
129
149
|
urls = dispose_href_schedule_data(_url,start_time,use_time)
|
@@ -135,7 +155,9 @@ module Grabepg
|
|
135
155
|
if url
|
136
156
|
doc = @grabbase.get_doc_with_proxy(@proxy_list,url[:url])
|
137
157
|
schedules = []
|
138
|
-
doc.css('div[class="time"]')[0]
|
158
|
+
div = doc.css('div[class="time"]')[0]
|
159
|
+
if div
|
160
|
+
div.css("li[class='gray']").each do |schedule|
|
139
161
|
begin
|
140
162
|
_dispose = schedule.content
|
141
163
|
_dispose_show =schedule.css("span")[0].text
|
@@ -153,6 +175,9 @@ module Grabepg
|
|
153
175
|
end
|
154
176
|
end
|
155
177
|
ret.merge!({url[:date]=>schedules})
|
178
|
+
else
|
179
|
+
p "Error In this url: #{url} couldn't get doc.css('div[class=time]')[0]"
|
180
|
+
end
|
156
181
|
end
|
157
182
|
end
|
158
183
|
return ret
|
@@ -165,11 +190,21 @@ module Grabepg
|
|
165
190
|
#解析节目详情页面
|
166
191
|
def dispose_show_info(url)
|
167
192
|
doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
|
193
|
+
begin
|
168
194
|
show_name = doc.css('div[class="tv_info_top"]')[0].content
|
169
195
|
_doc=doc.css("div[class='tv_info']")
|
170
196
|
img_url = _doc.css("img")[0].get_attribute("src").gsub(" ","")
|
171
197
|
show_info = _doc.css("p")[0].content.gsub("[全文]","")
|
198
|
+
@error_num = 0
|
172
199
|
{show_name:show_name,img_url:img_url,show_info:show_info}
|
200
|
+
rescue => err
|
201
|
+
unless @error_num
|
202
|
+
@error_num = 0
|
203
|
+
end
|
204
|
+
@error_num+=1
|
205
|
+
raise err.to_s if @error_num==5
|
206
|
+
dispose_show_info(url)
|
207
|
+
end
|
173
208
|
end
|
174
209
|
|
175
210
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grab_epg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahazql
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-06-
|
11
|
+
date: 2013-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: ! '"用于抓取EPG信息"'
|
14
14
|
email:
|