grab_epg 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +8 -8
  2. data/.grabepg.gemspec +1 -1
  3. data/lib/grabepg.rb +113 -39
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZWZlNWRhZGY1NWRhNWU2NmQyYmQzZWUyNjI0NjcyOGYyZTIyMGI4NA==
4
+ ZGM4ZjA4YTk0ODU0ZDBkNjdlZjc1MGFiMDRjZGE5OTFlNTU5Nzc5Nw==
5
5
  data.tar.gz: !binary |-
6
- MGUwNmQzZjQ1MTMyZTk0YzUxNmY1OTY1ZTAwM2QzMmVlNDQ1Yzg5Yg==
6
+ MzIwODc1NzdiZDEzYjhiNWI3MDVlNzFkMTUyNTFhMGNiZjEzODM4Yg==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- NDgzMzY4ZTRjNWNjOTBjOGM5YmY3YTQyZDZiZjQ1MjhiMjM3M2EzZTUyOWQx
10
- ZTE4MjNmZjVjMWM3MTExMDI4YmI0MGZiMGFjZmJmNTJjM2IxMDQ4NWJjMWY0
11
- NDRmNzc4ODNmN2ZiZTQ3MmJlNDAyMGQ0MTdlNzYwZmViMmNlNzA=
9
+ ZGQ5Nzc1MzdjYTVlODVlZjg2YmM3N2VkOTI5YjAwMzFjYTQ3MTFiMTlhOWI5
10
+ ZDY3OTgxNjllNDYyOTViZTViYTM0Njc1OGU2YmE2NzcyNTdhMzQ0ZDAzYzcz
11
+ MmMwYTk1MmFmZDE0YTM5YTBiNTk1NWM1YjAzNGJmNTRjZmExMzc=
12
12
  data.tar.gz: !binary |-
13
- MWZlMzUzMzMzYzkwNGQ3NjQ0ZWJiNTJmYjczZjY0M2Q5ZGNjOTVmZmYyYzVm
14
- MjBhNGZkOTE0NGU1ZDEzMzA3YTIzZWZhNjljNmNiY2E0MGNjMzZhYzc4NzMz
15
- MDYyYjQ5ODFkMWZjOWRjNTJkMzZkMzJkNzZhNDc0MDQyMTEwNzI=
13
+ YTA5Y2IyMzFiOTkxMDIwMDJhOTBmMDQ5MGZhMDAxODI2MDcwZDRlM2M2Yzll
14
+ YTZmMDNjODY3MmQ5MzY2NWJhNjEwNWE5NDgyN2Y1ZDZjZTA2MDQ2YjkzMWFj
15
+ YjNlNDhlNTY3ZjhkOTkwODIwNzFhOTQ2ZWJlZWIyYzgwODAzMDg=
data/.grabepg.gemspec CHANGED
@@ -10,6 +10,6 @@ Gem::Specification.new do |gem|
10
10
  gem.files = `git ls-files`.split($\)
11
11
  gem.name = "grab_epg"
12
12
  gem.require_paths = ["lib"]
13
- gem.version = "0.1.2"
13
+ gem.version = "0.1.3"
14
14
  gem.homepage = "https://github.com/hahazql/grab_epg"
15
15
  end
data/lib/grabepg.rb CHANGED
@@ -98,7 +98,7 @@ module Grabepg
98
98
  @channel = []
99
99
  @site = DEFAULT_SITE
100
100
  channel_list = self.getchannels("/home/zql")
101
- proxy_list=get_topfast_list
101
+ proxy_list=get_topfast_list(5)
102
102
  img_down_path = self.img_down_path
103
103
  p img_down_path
104
104
  channel_urls = channel_list["channel_urls"]
@@ -186,10 +186,114 @@ module Grabepg
186
186
  doc
187
187
  end
188
188
 
189
+ #获取某天的节目表
190
+ def self.get_schedulelist_atday(channel,url,proxylist)
191
+ p "Grab: #{url}"
192
+ doc = get_doc_with_proxy(proxylist,url)
193
+ show_type = []
194
+ img_url = _img_url + channel+".jpg"
195
+ data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
196
+ date = data[0]
197
+ week = data[1]
198
+ p "Channel: #{channel} Date: #{date} Week: #{week}"
199
+ @date = "#{week}(#{date})"
200
+ schedule_list = []
201
+ doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
202
+ _herf= schedule.xpath('a[@href]')[0]
203
+ schedule_herf=_herf.get_attribute("href") if _herf
204
+ unless _herf
205
+ drama =schedule.css('a[class="drama"]')[0]
206
+ if drama
207
+ _herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
208
+ schedule_herf = _herfs.split("#%#")[0]
209
+ end
210
+ end
211
+ if schedule.content.split(" ").size>1
212
+ time = schedule.content.split(" ")[0]
213
+ schedule = schedule.content.split(" ")[1]
214
+ show_name = ""
215
+ unless schedule_herf.nil?||schedule_herf.empty?
216
+ show_infomation=get_show_infomation(proxylist,schedule_herf)
217
+ show_type=show_infomation["type"]
218
+ show_name = show_infomation["name"]
219
+ show_img = show_infomation["img"]
220
+ end
221
+ p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
222
+ schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
223
+ end
224
+ end
225
+ schedule_list
226
+ end
227
+
228
+ #获取制定时间和长度url
229
+ #start_time 为int型 开始时间和今天的差值 正数代表之后的第几天 负数代表之前的第几天
230
+ #day_num 为int型 代表抓取的时间从开始时间计算的多少天
231
+ def get_assign_date_url(url,start_time,day_num)
232
+ _url = site
233
+ urls = []
234
+ _urls = url.split("-")
235
+
236
+ time = Time.now
237
+ _wday = time.wday
238
+ wday = _wday + start_time
239
+ if wday<0
240
+ wday = 1
241
+ end
242
+
243
+ end_day = wday + day_num
244
+
245
+ if end_day>(_wday+7)
246
+ end_day = _wday + 7
247
+ end
248
+
249
+ 0.upto(1).each do |i|
250
+ _url = _url+"#{_urls[i]}"+"-"
251
+ end
252
+
253
+ wday.upto(end_day).each do |i|
254
+ urls << _url+"w#{i}.html"
255
+ end
256
+ urls
257
+ end
258
+
259
+
260
+
261
+ #获取指定时间段的节目表
262
+ def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num,img_dir_down_path=@img_down_dir_path)
263
+ begin
264
+ day_num = 1 if day_num<1
265
+ rescue
266
+ day_num = 1
267
+ end
268
+ site="http://www.tvmao.com"
269
+ unless img_dir_down_path
270
+ img_dir_down_path = __FILE__
271
+ end
272
+ @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
189
273
 
274
+ if(@site)
275
+ site=@site
276
+ end
277
+ _img_url = "http://static.haotv.me/channel/logo/"
278
+ @show_schedule = {}
190
279
 
191
- #获取节目表
192
- def self.getschedule(channel,herf,proxylist,day_num=1,img_dir_down_path=@img_down_dir_path)
280
+ channel_schedule = {}
281
+ get_assign_date_url(herf,start_num,day_num).each do |url|
282
+ @date = ""
283
+ schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
284
+ channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
285
+ end
286
+ @img_down_file.close
287
+ {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
288
+ end
289
+
290
+
291
+
292
+
293
+
294
+ #因原已调用所以保留
295
+ #获取一周节目表
296
+ def self.getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
193
297
  p "Day Num is #{day_num}"
194
298
  begin
195
299
  day_num = 1 if day_num<1
@@ -223,40 +327,9 @@ module Grabepg
223
327
 
224
328
  channel_schedule = {}
225
329
  get_week_url.call(herf,day_num).each do |url|
226
- p "Grab: #{url}"
227
- doc = get_doc_with_proxy(proxylist,url)
228
- show_type = []
229
- img_url = _img_url + channel+".jpg"
230
- data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
231
- date = data[0]
232
- week = data[1]
233
- p "Channel: #{channel} Date: #{date} Week: #{week}"
234
- schedule_list = []
235
- doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
236
- _herf= schedule.xpath('a[@href]')[0]
237
- schedule_herf=_herf.get_attribute("href") if _herf
238
- unless _herf
239
- drama =schedule.css('a[class="drama"]')[0]
240
- if drama
241
- _herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
242
- schedule_herf = _herfs.split("#%#")[0]
243
- end
244
- end
245
- if schedule.content.split(" ").size>1
246
- time = schedule.content.split(" ")[0]
247
- schedule = schedule.content.split(" ")[1]
248
- show_name = ""
249
- unless schedule_herf.nil?||schedule_herf.empty?
250
- show_infomation=get_show_infomation(proxylist,schedule_herf)
251
- show_type=show_infomation["type"]
252
- show_name = show_infomation["name"]
253
- show_img = show_infomation["img"]
254
- end
255
- p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
256
- schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
257
- end
258
- end
259
- channel_schedule.merge!({"#{week}(#{date})"=>schedule_list})
330
+ @date = ""
331
+ schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
332
+ channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
260
333
  end
261
334
  @img_down_file.close
262
335
  {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
@@ -333,14 +406,15 @@ module Grabepg
333
406
 
334
407
 
335
408
  #获取指定访问速度的代理服务器
336
- def self.get_topfast_list()
409
+ #time为最慢速度的时间 int型 代表秒
410
+ def self.get_topfast_list(use_time)
337
411
  fast_list = []
338
412
  time_use = 0
339
413
  ips_ports = get_proxy_list()
340
414
  ips_ports.each do |ip_port|
341
415
  time_start = Time.now.to_i
342
416
  begin
343
- timeout(5) do
417
+ timeout(use_time) do
344
418
  doc = Nokogiri::HTML(open("http://www.tvmao.com/program",:proxy=> "http://#{ip_port}"))
345
419
  end
346
420
  time_end = Time.now.to_i
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grab_epg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahazql
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-09 00:00:00.000000000 Z
11
+ date: 2013-05-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: ! '"用于从TVMAO抓取EPG信息"'
14
14
  email: