grab_epg 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.grabepg.gemspec +1 -1
- data/lib/grabepg.rb +113 -39
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZGM4ZjA4YTk0ODU0ZDBkNjdlZjc1MGFiMDRjZGE5OTFlNTU5Nzc5Nw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MzIwODc1NzdiZDEzYjhiNWI3MDVlNzFkMTUyNTFhMGNiZjEzODM4Yg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZGQ5Nzc1MzdjYTVlODVlZjg2YmM3N2VkOTI5YjAwMzFjYTQ3MTFiMTlhOWI5
|
10
|
+
ZDY3OTgxNjllNDYyOTViZTViYTM0Njc1OGU2YmE2NzcyNTdhMzQ0ZDAzYzcz
|
11
|
+
MmMwYTk1MmFmZDE0YTM5YTBiNTk1NWM1YjAzNGJmNTRjZmExMzc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YTA5Y2IyMzFiOTkxMDIwMDJhOTBmMDQ5MGZhMDAxODI2MDcwZDRlM2M2Yzll
|
14
|
+
YTZmMDNjODY3MmQ5MzY2NWJhNjEwNWE5NDgyN2Y1ZDZjZTA2MDQ2YjkzMWFj
|
15
|
+
YjNlNDhlNTY3ZjhkOTkwODIwNzFhOTQ2ZWJlZWIyYzgwODAzMDg=
|
data/.grabepg.gemspec
CHANGED
data/lib/grabepg.rb
CHANGED
@@ -98,7 +98,7 @@ module Grabepg
|
|
98
98
|
@channel = []
|
99
99
|
@site = DEFAULT_SITE
|
100
100
|
channel_list = self.getchannels("/home/zql")
|
101
|
-
proxy_list=get_topfast_list
|
101
|
+
proxy_list=get_topfast_list(5)
|
102
102
|
img_down_path = self.img_down_path
|
103
103
|
p img_down_path
|
104
104
|
channel_urls = channel_list["channel_urls"]
|
@@ -186,10 +186,114 @@ module Grabepg
|
|
186
186
|
doc
|
187
187
|
end
|
188
188
|
|
189
|
+
#获取某天的节目表
|
190
|
+
def self.get_schedulelist_atday(channel,url,proxylist)
|
191
|
+
p "Grab: #{url}"
|
192
|
+
doc = get_doc_with_proxy(proxylist,url)
|
193
|
+
show_type = []
|
194
|
+
img_url = _img_url + channel+".jpg"
|
195
|
+
data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
|
196
|
+
date = data[0]
|
197
|
+
week = data[1]
|
198
|
+
p "Channel: #{channel} Date: #{date} Week: #{week}"
|
199
|
+
@date = "#{week}(#{date})"
|
200
|
+
schedule_list = []
|
201
|
+
doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
|
202
|
+
_herf= schedule.xpath('a[@href]')[0]
|
203
|
+
schedule_herf=_herf.get_attribute("href") if _herf
|
204
|
+
unless _herf
|
205
|
+
drama =schedule.css('a[class="drama"]')[0]
|
206
|
+
if drama
|
207
|
+
_herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
|
208
|
+
schedule_herf = _herfs.split("#%#")[0]
|
209
|
+
end
|
210
|
+
end
|
211
|
+
if schedule.content.split(" ").size>1
|
212
|
+
time = schedule.content.split(" ")[0]
|
213
|
+
schedule = schedule.content.split(" ")[1]
|
214
|
+
show_name = ""
|
215
|
+
unless schedule_herf.nil?||schedule_herf.empty?
|
216
|
+
show_infomation=get_show_infomation(proxylist,schedule_herf)
|
217
|
+
show_type=show_infomation["type"]
|
218
|
+
show_name = show_infomation["name"]
|
219
|
+
show_img = show_infomation["img"]
|
220
|
+
end
|
221
|
+
p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
|
222
|
+
schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
|
223
|
+
end
|
224
|
+
end
|
225
|
+
schedule_list
|
226
|
+
end
|
227
|
+
|
228
|
+
#获取制定时间和长度url
|
229
|
+
#start_time 为int型 开始时间和今天的差值 正数代表之后的第几天 负数代表之前的第几天
|
230
|
+
#day_num 为int型 代表抓取的时间从开始时间计算的多少天
|
231
|
+
def get_assign_date_url(url,start_time,day_num)
|
232
|
+
_url = site
|
233
|
+
urls = []
|
234
|
+
_urls = url.split("-")
|
235
|
+
|
236
|
+
time = Time.now
|
237
|
+
_wday = time.wday
|
238
|
+
wday = _wday + start_time
|
239
|
+
if wday<0
|
240
|
+
wday = 1
|
241
|
+
end
|
242
|
+
|
243
|
+
end_day = wday + day_num
|
244
|
+
|
245
|
+
if end_day>(_wday+7)
|
246
|
+
end_day = _wday + 7
|
247
|
+
end
|
248
|
+
|
249
|
+
0.upto(1).each do |i|
|
250
|
+
_url = _url+"#{_urls[i]}"+"-"
|
251
|
+
end
|
252
|
+
|
253
|
+
wday.upto(end_day).each do |i|
|
254
|
+
urls << _url+"w#{i}.html"
|
255
|
+
end
|
256
|
+
urls
|
257
|
+
end
|
258
|
+
|
259
|
+
|
260
|
+
|
261
|
+
#获取指定时间段的节目表
|
262
|
+
def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num,img_dir_down_path=@img_down_dir_path)
|
263
|
+
begin
|
264
|
+
day_num = 1 if day_num<1
|
265
|
+
rescue
|
266
|
+
day_num = 1
|
267
|
+
end
|
268
|
+
site="http://www.tvmao.com"
|
269
|
+
unless img_dir_down_path
|
270
|
+
img_dir_down_path = __FILE__
|
271
|
+
end
|
272
|
+
@img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
|
189
273
|
|
274
|
+
if(@site)
|
275
|
+
site=@site
|
276
|
+
end
|
277
|
+
_img_url = "http://static.haotv.me/channel/logo/"
|
278
|
+
@show_schedule = {}
|
190
279
|
|
191
|
-
|
192
|
-
|
280
|
+
channel_schedule = {}
|
281
|
+
get_assign_date_url(herf,start_num,day_num).each do |url|
|
282
|
+
@date = ""
|
283
|
+
schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
|
284
|
+
channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
|
285
|
+
end
|
286
|
+
@img_down_file.close
|
287
|
+
{"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
|
288
|
+
end
|
289
|
+
|
290
|
+
|
291
|
+
|
292
|
+
|
293
|
+
|
294
|
+
#因原已调用所以保留
|
295
|
+
#获取一周节目表
|
296
|
+
def self.getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
|
193
297
|
p "Day Num is #{day_num}"
|
194
298
|
begin
|
195
299
|
day_num = 1 if day_num<1
|
@@ -223,40 +327,9 @@ module Grabepg
|
|
223
327
|
|
224
328
|
channel_schedule = {}
|
225
329
|
get_week_url.call(herf,day_num).each do |url|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
img_url = _img_url + channel+".jpg"
|
230
|
-
data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
|
231
|
-
date = data[0]
|
232
|
-
week = data[1]
|
233
|
-
p "Channel: #{channel} Date: #{date} Week: #{week}"
|
234
|
-
schedule_list = []
|
235
|
-
doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
|
236
|
-
_herf= schedule.xpath('a[@href]')[0]
|
237
|
-
schedule_herf=_herf.get_attribute("href") if _herf
|
238
|
-
unless _herf
|
239
|
-
drama =schedule.css('a[class="drama"]')[0]
|
240
|
-
if drama
|
241
|
-
_herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
|
242
|
-
schedule_herf = _herfs.split("#%#")[0]
|
243
|
-
end
|
244
|
-
end
|
245
|
-
if schedule.content.split(" ").size>1
|
246
|
-
time = schedule.content.split(" ")[0]
|
247
|
-
schedule = schedule.content.split(" ")[1]
|
248
|
-
show_name = ""
|
249
|
-
unless schedule_herf.nil?||schedule_herf.empty?
|
250
|
-
show_infomation=get_show_infomation(proxylist,schedule_herf)
|
251
|
-
show_type=show_infomation["type"]
|
252
|
-
show_name = show_infomation["name"]
|
253
|
-
show_img = show_infomation["img"]
|
254
|
-
end
|
255
|
-
p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
|
256
|
-
schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
|
257
|
-
end
|
258
|
-
end
|
259
|
-
channel_schedule.merge!({"#{week}(#{date})"=>schedule_list})
|
330
|
+
@date = ""
|
331
|
+
schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
|
332
|
+
channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
|
260
333
|
end
|
261
334
|
@img_down_file.close
|
262
335
|
{"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
|
@@ -333,14 +406,15 @@ module Grabepg
|
|
333
406
|
|
334
407
|
|
335
408
|
#获取指定访问速度的代理服务器
|
336
|
-
|
409
|
+
#time为最慢速度的时间 int型 代表秒
|
410
|
+
def self.get_topfast_list(use_time)
|
337
411
|
fast_list = []
|
338
412
|
time_use = 0
|
339
413
|
ips_ports = get_proxy_list()
|
340
414
|
ips_ports.each do |ip_port|
|
341
415
|
time_start = Time.now.to_i
|
342
416
|
begin
|
343
|
-
timeout(
|
417
|
+
timeout(use_time) do
|
344
418
|
doc = Nokogiri::HTML(open("http://www.tvmao.com/program",:proxy=> "http://#{ip_port}"))
|
345
419
|
end
|
346
420
|
time_end = Time.now.to_i
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grab_epg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahazql
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-05-
|
11
|
+
date: 2013-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: ! '"用于从TVMAO抓取EPG信息"'
|
14
14
|
email:
|