grab_epg 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +8 -8
  2. data/.grabepg.gemspec +1 -1
  3. data/lib/grabepg.rb +113 -39
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZWZlNWRhZGY1NWRhNWU2NmQyYmQzZWUyNjI0NjcyOGYyZTIyMGI4NA==
4
+ ZGM4ZjA4YTk0ODU0ZDBkNjdlZjc1MGFiMDRjZGE5OTFlNTU5Nzc5Nw==
5
5
  data.tar.gz: !binary |-
6
- MGUwNmQzZjQ1MTMyZTk0YzUxNmY1OTY1ZTAwM2QzMmVlNDQ1Yzg5Yg==
6
+ MzIwODc1NzdiZDEzYjhiNWI3MDVlNzFkMTUyNTFhMGNiZjEzODM4Yg==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- NDgzMzY4ZTRjNWNjOTBjOGM5YmY3YTQyZDZiZjQ1MjhiMjM3M2EzZTUyOWQx
10
- ZTE4MjNmZjVjMWM3MTExMDI4YmI0MGZiMGFjZmJmNTJjM2IxMDQ4NWJjMWY0
11
- NDRmNzc4ODNmN2ZiZTQ3MmJlNDAyMGQ0MTdlNzYwZmViMmNlNzA=
9
+ ZGQ5Nzc1MzdjYTVlODVlZjg2YmM3N2VkOTI5YjAwMzFjYTQ3MTFiMTlhOWI5
10
+ ZDY3OTgxNjllNDYyOTViZTViYTM0Njc1OGU2YmE2NzcyNTdhMzQ0ZDAzYzcz
11
+ MmMwYTk1MmFmZDE0YTM5YTBiNTk1NWM1YjAzNGJmNTRjZmExMzc=
12
12
  data.tar.gz: !binary |-
13
- MWZlMzUzMzMzYzkwNGQ3NjQ0ZWJiNTJmYjczZjY0M2Q5ZGNjOTVmZmYyYzVm
14
- MjBhNGZkOTE0NGU1ZDEzMzA3YTIzZWZhNjljNmNiY2E0MGNjMzZhYzc4NzMz
15
- MDYyYjQ5ODFkMWZjOWRjNTJkMzZkMzJkNzZhNDc0MDQyMTEwNzI=
13
+ YTA5Y2IyMzFiOTkxMDIwMDJhOTBmMDQ5MGZhMDAxODI2MDcwZDRlM2M2Yzll
14
+ YTZmMDNjODY3MmQ5MzY2NWJhNjEwNWE5NDgyN2Y1ZDZjZTA2MDQ2YjkzMWFj
15
+ YjNlNDhlNTY3ZjhkOTkwODIwNzFhOTQ2ZWJlZWIyYzgwODAzMDg=
data/.grabepg.gemspec CHANGED
@@ -10,6 +10,6 @@ Gem::Specification.new do |gem|
10
10
  gem.files = `git ls-files`.split($\)
11
11
  gem.name = "grab_epg"
12
12
  gem.require_paths = ["lib"]
13
- gem.version = "0.1.2"
13
+ gem.version = "0.1.3"
14
14
  gem.homepage = "https://github.com/hahazql/grab_epg"
15
15
  end
data/lib/grabepg.rb CHANGED
@@ -98,7 +98,7 @@ module Grabepg
98
98
  @channel = []
99
99
  @site = DEFAULT_SITE
100
100
  channel_list = self.getchannels("/home/zql")
101
- proxy_list=get_topfast_list
101
+ proxy_list=get_topfast_list(5)
102
102
  img_down_path = self.img_down_path
103
103
  p img_down_path
104
104
  channel_urls = channel_list["channel_urls"]
@@ -186,10 +186,114 @@ module Grabepg
186
186
  doc
187
187
  end
188
188
 
189
+ #获取某天的节目表
190
+ def self.get_schedulelist_atday(channel,url,proxylist)
191
+ p "Grab: #{url}"
192
+ doc = get_doc_with_proxy(proxylist,url)
193
+ show_type = []
194
+ img_url = _img_url + channel+".jpg"
195
+ data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
196
+ date = data[0]
197
+ week = data[1]
198
+ p "Channel: #{channel} Date: #{date} Week: #{week}"
199
+ @date = "#{week}(#{date})"
200
+ schedule_list = []
201
+ doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
202
+ _herf= schedule.xpath('a[@href]')[0]
203
+ schedule_herf=_herf.get_attribute("href") if _herf
204
+ unless _herf
205
+ drama =schedule.css('a[class="drama"]')[0]
206
+ if drama
207
+ _herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
208
+ schedule_herf = _herfs.split("#%#")[0]
209
+ end
210
+ end
211
+ if schedule.content.split(" ").size>1
212
+ time = schedule.content.split(" ")[0]
213
+ schedule = schedule.content.split(" ")[1]
214
+ show_name = ""
215
+ unless schedule_herf.nil?||schedule_herf.empty?
216
+ show_infomation=get_show_infomation(proxylist,schedule_herf)
217
+ show_type=show_infomation["type"]
218
+ show_name = show_infomation["name"]
219
+ show_img = show_infomation["img"]
220
+ end
221
+ p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
222
+ schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
223
+ end
224
+ end
225
+ schedule_list
226
+ end
227
+
228
+ #获取制定时间和长度url
229
+ #start_time 为int型 开始时间和今天的差值 正数代表之后的第几天 负数代表之前的第几天
230
+ #day_num 为int型 代表抓取的时间从开始时间计算的多少天
231
+ def get_assign_date_url(url,start_time,day_num)
232
+ _url = site
233
+ urls = []
234
+ _urls = url.split("-")
235
+
236
+ time = Time.now
237
+ _wday = time.wday
238
+ wday = _wday + start_time
239
+ if wday<0
240
+ wday = 1
241
+ end
242
+
243
+ end_day = wday + day_num
244
+
245
+ if end_day>(_wday+7)
246
+ end_day = _wday + 7
247
+ end
248
+
249
+ 0.upto(1).each do |i|
250
+ _url = _url+"#{_urls[i]}"+"-"
251
+ end
252
+
253
+ wday.upto(end_day).each do |i|
254
+ urls << _url+"w#{i}.html"
255
+ end
256
+ urls
257
+ end
258
+
259
+
260
+
261
+ #获取指定时间段的节目表
262
+ def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num,img_dir_down_path=@img_down_dir_path)
263
+ begin
264
+ day_num = 1 if day_num<1
265
+ rescue
266
+ day_num = 1
267
+ end
268
+ site="http://www.tvmao.com"
269
+ unless img_dir_down_path
270
+ img_dir_down_path = __FILE__
271
+ end
272
+ @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
189
273
 
274
+ if(@site)
275
+ site=@site
276
+ end
277
+ _img_url = "http://static.haotv.me/channel/logo/"
278
+ @show_schedule = {}
190
279
 
191
- #获取节目表
192
- def self.getschedule(channel,herf,proxylist,day_num=1,img_dir_down_path=@img_down_dir_path)
280
+ channel_schedule = {}
281
+ get_assign_date_url(herf,start_num,day_num).each do |url|
282
+ @date = ""
283
+ schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
284
+ channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
285
+ end
286
+ @img_down_file.close
287
+ {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
288
+ end
289
+
290
+
291
+
292
+
293
+
294
+ #因原已调用所以保留
295
+ #获取一周节目表
296
+ def self.getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
193
297
  p "Day Num is #{day_num}"
194
298
  begin
195
299
  day_num = 1 if day_num<1
@@ -223,40 +327,9 @@ module Grabepg
223
327
 
224
328
  channel_schedule = {}
225
329
  get_week_url.call(herf,day_num).each do |url|
226
- p "Grab: #{url}"
227
- doc = get_doc_with_proxy(proxylist,url)
228
- show_type = []
229
- img_url = _img_url + channel+".jpg"
230
- data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
231
- date = data[0]
232
- week = data[1]
233
- p "Channel: #{channel} Date: #{date} Week: #{week}"
234
- schedule_list = []
235
- doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
236
- _herf= schedule.xpath('a[@href]')[0]
237
- schedule_herf=_herf.get_attribute("href") if _herf
238
- unless _herf
239
- drama =schedule.css('a[class="drama"]')[0]
240
- if drama
241
- _herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
242
- schedule_herf = _herfs.split("#%#")[0]
243
- end
244
- end
245
- if schedule.content.split(" ").size>1
246
- time = schedule.content.split(" ")[0]
247
- schedule = schedule.content.split(" ")[1]
248
- show_name = ""
249
- unless schedule_herf.nil?||schedule_herf.empty?
250
- show_infomation=get_show_infomation(proxylist,schedule_herf)
251
- show_type=show_infomation["type"]
252
- show_name = show_infomation["name"]
253
- show_img = show_infomation["img"]
254
- end
255
- p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
256
- schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
257
- end
258
- end
259
- channel_schedule.merge!({"#{week}(#{date})"=>schedule_list})
330
+ @date = ""
331
+ schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
332
+ channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
260
333
  end
261
334
  @img_down_file.close
262
335
  {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
@@ -333,14 +406,15 @@ module Grabepg
333
406
 
334
407
 
335
408
  #获取指定访问速度的代理服务器
336
- def self.get_topfast_list()
409
+ #time为最慢速度的时间 int型 代表秒
410
+ def self.get_topfast_list(use_time)
337
411
  fast_list = []
338
412
  time_use = 0
339
413
  ips_ports = get_proxy_list()
340
414
  ips_ports.each do |ip_port|
341
415
  time_start = Time.now.to_i
342
416
  begin
343
- timeout(5) do
417
+ timeout(use_time) do
344
418
  doc = Nokogiri::HTML(open("http://www.tvmao.com/program",:proxy=> "http://#{ip_port}"))
345
419
  end
346
420
  time_end = Time.now.to_i
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grab_epg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahazql
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-09 00:00:00.000000000 Z
11
+ date: 2013-05-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: ! '"用于从TVMAO抓取EPG信息"'
14
14
  email: