grab_epg 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.grabepg.gemspec +1 -1
- data/lib/grabepg.rb +113 -39
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZGM4ZjA4YTk0ODU0ZDBkNjdlZjc1MGFiMDRjZGE5OTFlNTU5Nzc5Nw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MzIwODc1NzdiZDEzYjhiNWI3MDVlNzFkMTUyNTFhMGNiZjEzODM4Yg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZGQ5Nzc1MzdjYTVlODVlZjg2YmM3N2VkOTI5YjAwMzFjYTQ3MTFiMTlhOWI5
|
10
|
+
ZDY3OTgxNjllNDYyOTViZTViYTM0Njc1OGU2YmE2NzcyNTdhMzQ0ZDAzYzcz
|
11
|
+
MmMwYTk1MmFmZDE0YTM5YTBiNTk1NWM1YjAzNGJmNTRjZmExMzc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YTA5Y2IyMzFiOTkxMDIwMDJhOTBmMDQ5MGZhMDAxODI2MDcwZDRlM2M2Yzll
|
14
|
+
YTZmMDNjODY3MmQ5MzY2NWJhNjEwNWE5NDgyN2Y1ZDZjZTA2MDQ2YjkzMWFj
|
15
|
+
YjNlNDhlNTY3ZjhkOTkwODIwNzFhOTQ2ZWJlZWIyYzgwODAzMDg=
|
data/.grabepg.gemspec
CHANGED
data/lib/grabepg.rb
CHANGED
@@ -98,7 +98,7 @@ module Grabepg
|
|
98
98
|
@channel = []
|
99
99
|
@site = DEFAULT_SITE
|
100
100
|
channel_list = self.getchannels("/home/zql")
|
101
|
-
proxy_list=get_topfast_list
|
101
|
+
proxy_list=get_topfast_list(5)
|
102
102
|
img_down_path = self.img_down_path
|
103
103
|
p img_down_path
|
104
104
|
channel_urls = channel_list["channel_urls"]
|
@@ -186,10 +186,114 @@ module Grabepg
|
|
186
186
|
doc
|
187
187
|
end
|
188
188
|
|
189
|
+
#获取某天的节目表
|
190
|
+
def self.get_schedulelist_atday(channel,url,proxylist)
|
191
|
+
p "Grab: #{url}"
|
192
|
+
doc = get_doc_with_proxy(proxylist,url)
|
193
|
+
show_type = []
|
194
|
+
img_url = _img_url + channel+".jpg"
|
195
|
+
data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
|
196
|
+
date = data[0]
|
197
|
+
week = data[1]
|
198
|
+
p "Channel: #{channel} Date: #{date} Week: #{week}"
|
199
|
+
@date = "#{week}(#{date})"
|
200
|
+
schedule_list = []
|
201
|
+
doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
|
202
|
+
_herf= schedule.xpath('a[@href]')[0]
|
203
|
+
schedule_herf=_herf.get_attribute("href") if _herf
|
204
|
+
unless _herf
|
205
|
+
drama =schedule.css('a[class="drama"]')[0]
|
206
|
+
if drama
|
207
|
+
_herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
|
208
|
+
schedule_herf = _herfs.split("#%#")[0]
|
209
|
+
end
|
210
|
+
end
|
211
|
+
if schedule.content.split(" ").size>1
|
212
|
+
time = schedule.content.split(" ")[0]
|
213
|
+
schedule = schedule.content.split(" ")[1]
|
214
|
+
show_name = ""
|
215
|
+
unless schedule_herf.nil?||schedule_herf.empty?
|
216
|
+
show_infomation=get_show_infomation(proxylist,schedule_herf)
|
217
|
+
show_type=show_infomation["type"]
|
218
|
+
show_name = show_infomation["name"]
|
219
|
+
show_img = show_infomation["img"]
|
220
|
+
end
|
221
|
+
p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
|
222
|
+
schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
|
223
|
+
end
|
224
|
+
end
|
225
|
+
schedule_list
|
226
|
+
end
|
227
|
+
|
228
|
+
#获取制定时间和长度url
|
229
|
+
#start_time 为int型 开始时间和今天的差值 正数代表之后的第几天 负数代表之前的第几天
|
230
|
+
#day_num 为int型 代表抓取的时间从开始时间计算的多少天
|
231
|
+
def get_assign_date_url(url,start_time,day_num)
|
232
|
+
_url = site
|
233
|
+
urls = []
|
234
|
+
_urls = url.split("-")
|
235
|
+
|
236
|
+
time = Time.now
|
237
|
+
_wday = time.wday
|
238
|
+
wday = _wday + start_time
|
239
|
+
if wday<0
|
240
|
+
wday = 1
|
241
|
+
end
|
242
|
+
|
243
|
+
end_day = wday + day_num
|
244
|
+
|
245
|
+
if end_day>(_wday+7)
|
246
|
+
end_day = _wday + 7
|
247
|
+
end
|
248
|
+
|
249
|
+
0.upto(1).each do |i|
|
250
|
+
_url = _url+"#{_urls[i]}"+"-"
|
251
|
+
end
|
252
|
+
|
253
|
+
wday.upto(end_day).each do |i|
|
254
|
+
urls << _url+"w#{i}.html"
|
255
|
+
end
|
256
|
+
urls
|
257
|
+
end
|
258
|
+
|
259
|
+
|
260
|
+
|
261
|
+
#获取指定时间段的节目表
|
262
|
+
def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num,img_dir_down_path=@img_down_dir_path)
|
263
|
+
begin
|
264
|
+
day_num = 1 if day_num<1
|
265
|
+
rescue
|
266
|
+
day_num = 1
|
267
|
+
end
|
268
|
+
site="http://www.tvmao.com"
|
269
|
+
unless img_dir_down_path
|
270
|
+
img_dir_down_path = __FILE__
|
271
|
+
end
|
272
|
+
@img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
|
189
273
|
|
274
|
+
if(@site)
|
275
|
+
site=@site
|
276
|
+
end
|
277
|
+
_img_url = "http://static.haotv.me/channel/logo/"
|
278
|
+
@show_schedule = {}
|
190
279
|
|
191
|
-
|
192
|
-
|
280
|
+
channel_schedule = {}
|
281
|
+
get_assign_date_url(herf,start_num,day_num).each do |url|
|
282
|
+
@date = ""
|
283
|
+
schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
|
284
|
+
channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
|
285
|
+
end
|
286
|
+
@img_down_file.close
|
287
|
+
{"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
|
288
|
+
end
|
289
|
+
|
290
|
+
|
291
|
+
|
292
|
+
|
293
|
+
|
294
|
+
#因原已调用所以保留
|
295
|
+
#获取一周节目表
|
296
|
+
def self.getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
|
193
297
|
p "Day Num is #{day_num}"
|
194
298
|
begin
|
195
299
|
day_num = 1 if day_num<1
|
@@ -223,40 +327,9 @@ module Grabepg
|
|
223
327
|
|
224
328
|
channel_schedule = {}
|
225
329
|
get_week_url.call(herf,day_num).each do |url|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
img_url = _img_url + channel+".jpg"
|
230
|
-
data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
|
231
|
-
date = data[0]
|
232
|
-
week = data[1]
|
233
|
-
p "Channel: #{channel} Date: #{date} Week: #{week}"
|
234
|
-
schedule_list = []
|
235
|
-
doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
|
236
|
-
_herf= schedule.xpath('a[@href]')[0]
|
237
|
-
schedule_herf=_herf.get_attribute("href") if _herf
|
238
|
-
unless _herf
|
239
|
-
drama =schedule.css('a[class="drama"]')[0]
|
240
|
-
if drama
|
241
|
-
_herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
|
242
|
-
schedule_herf = _herfs.split("#%#")[0]
|
243
|
-
end
|
244
|
-
end
|
245
|
-
if schedule.content.split(" ").size>1
|
246
|
-
time = schedule.content.split(" ")[0]
|
247
|
-
schedule = schedule.content.split(" ")[1]
|
248
|
-
show_name = ""
|
249
|
-
unless schedule_herf.nil?||schedule_herf.empty?
|
250
|
-
show_infomation=get_show_infomation(proxylist,schedule_herf)
|
251
|
-
show_type=show_infomation["type"]
|
252
|
-
show_name = show_infomation["name"]
|
253
|
-
show_img = show_infomation["img"]
|
254
|
-
end
|
255
|
-
p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
|
256
|
-
schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
|
257
|
-
end
|
258
|
-
end
|
259
|
-
channel_schedule.merge!({"#{week}(#{date})"=>schedule_list})
|
330
|
+
@date = ""
|
331
|
+
schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
|
332
|
+
channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
|
260
333
|
end
|
261
334
|
@img_down_file.close
|
262
335
|
{"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
|
@@ -333,14 +406,15 @@ module Grabepg
|
|
333
406
|
|
334
407
|
|
335
408
|
#获取指定访问速度的代理服务器
|
336
|
-
|
409
|
+
#time为最慢速度的时间 int型 代表秒
|
410
|
+
def self.get_topfast_list(use_time)
|
337
411
|
fast_list = []
|
338
412
|
time_use = 0
|
339
413
|
ips_ports = get_proxy_list()
|
340
414
|
ips_ports.each do |ip_port|
|
341
415
|
time_start = Time.now.to_i
|
342
416
|
begin
|
343
|
-
timeout(
|
417
|
+
timeout(use_time) do
|
344
418
|
doc = Nokogiri::HTML(open("http://www.tvmao.com/program",:proxy=> "http://#{ip_port}"))
|
345
419
|
end
|
346
420
|
time_end = Time.now.to_i
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grab_epg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahazql
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-05-
|
11
|
+
date: 2013-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: ! '"用于从TVMAO抓取EPG信息"'
|
14
14
|
email:
|