grab_epg 0.1.6 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NmI0ZDZlY2FkNzVhNTNiNTQwYjU0NDNhOTNhYmUyZWUzY2ViN2U5OA==
4
+ MGEyOWI5Y2YwMTY2NzY1OWVmYzYwNTdmNDc4NGY4M2RlMzg2NWIwYg==
5
5
  data.tar.gz: !binary |-
6
- NzBkMjAxZmUwNDE1YzAzNWNmMTc1NjEwNjJlM2NhYjlmZmU2N2MzZg==
6
+ M2Y5NzBiMzA2MTg5MGM4ZjkxMWU0N2I2MWM0OWJjZThjNmE3NDkyNQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MmZiNGVkM2JjMGUwZDNmMDE5OTc2NzE2NjFmMDJmMzgyODU2Nzg1ZjgxZjBl
10
- NmVkMmU0MThhZmY1YjZhOWYxNDBlOWZjZDEwODNlYWI3MWFhNmY2ZDQ1OThl
11
- NTFjM2VmMjgxOTc1MmNkY2EzMmU2YzNjN2JkNjA0M2FlNjllZmE=
9
+ MmI4NjQwYTY3MWJhYWRkMjA1NDk2MWQ4MDdhOTg1N2ZkYmQwOTVkZDdiNjBh
10
+ NmI3M2Y0OTI2ZDcxMzE2YzEyYTMwZGQ3OGMzYzY2YTg1NjViZmMwOTkxOTgx
11
+ NGVmMDUzZmE1OTVlNTc4M2MxMWI3NzlhZDRmMGZjOWJlOWZiYTI=
12
12
  data.tar.gz: !binary |-
13
- ZTBmMzY2MDkyZDEzZDVhMzQ0MDA4MmVjYTBkMGM2YmJhYTlhYzkzOWYyNjJj
14
- ZjAxOTNmZjgwZTljNWI3MWQxMjY3Y2FmZjczODNhYWU4MGQ3YzZhNDRjOWM1
15
- NTJlZWYxNDViNDgwZjMxNDMxZWNhYzY1NzA1OTg5ZGIwY2YyYWI=
13
+ YWJmNDIyNzYyNDExNzMzMjA5NjU3ZDhmYzAzMDQ2ZjYwY2U4MTU0Y2ZjMmQ0
14
+ YzRmMzljMDRmNDJlM2I5ZWI5NTJjMjRkZmZkZmY2YzVmYTMyODJkNTQyZDNj
15
+ ZWM3ZTNhNDFlN2FjYTA1MDE0N2Y3OWU5MGU4NWZlNTQ3MGQzMjU=
data/.grabepg.gemspec CHANGED
@@ -3,13 +3,13 @@
3
3
  Gem::Specification.new do |gem|
4
4
  gem.authors = ["hahazql"]
5
5
  gem.email = ["hahazhouqunli@gmail.com"]
6
- gem.description = %q{"用于从TVMAO抓取EPG信息"}
6
+ gem.description = %q{"用于抓取EPG信息"}
7
7
  gem.summary = %q{"Grab EPG"}
8
8
  gem.homepage = ""
9
9
 
10
10
  gem.files = `git ls-files`.split($\)
11
11
  gem.name = "grab_epg"
12
12
  gem.require_paths = ["lib"]
13
- gem.version = "0.1.6"
13
+ gem.version = "0.1.8"
14
14
  gem.homepage = "https://github.com/hahazql/grab_epg"
15
15
  end
data/lib/debug.rb CHANGED
@@ -2,50 +2,11 @@
2
2
 
3
3
  require 'nokogiri'
4
4
  require 'open-uri'
5
- require File.expand_path("../grabepg.rb", __FILE__)
5
+ require File.expand_path("../test/test_grab_tvsou.rb", __FILE__)
6
+ #require 'test/test_grab_tvsou'
6
7
  class Debug
7
8
  # To change this template use File | Settings | File Templates.
8
- proxylist = ["219.234.82.84:24809", "219.234.82.84:17130", "219.234.82.84:23684", "219.234.82.84:18253", "219.234.82.84:33987", "219.234.82.84:17183", "219.234.82.84:13243", "219.234.82.84:16158", "219.234.82.84:14826", "219.234.82.84:8489", "219.234.82.84:22222", "219.234.82.84:6370", "219.234.82.84:7571", "219.234.82.84:33944", "219.234.82.84:9743", "219.234.82.84:8089", "219.234.82.84:20991", "219.234.82.84:34032", "219.234.82.84:9415", "219.234.82.84:26149", "219.234.82.84:11095", "219.234.82.84:21724", "219.234.82.84:9177", "219.234.82.84:34034", "219.234.82.84:17945", "219.234.82.85:32229", "219.234.82.85:28341", "219.234.82.85:36314", "219.234.82.85:30605", "219.234.82.85:23684", "219.234.82.85:34015", "219.234.82.85:33919", "219.234.82.85:30639", "219.234.82.85:33965", "219.234.82.85:37299", "219.234.82.85:20747", "219.234.82.86:6666", "219.234.82.86:34106", "219.234.82.86:25301", "219.234.82.86:32896", "219.234.82.86:23034", "219.234.82.86:22685", "219.234.82.86:13078", "219.234.82.86:38770", "219.234.82.86:28402", "219.234.82.86:18887", "219.234.82.86:6588", "219.234.82.86:7292", "219.234.82.86:24268", "219.234.82.86:16472", "219.234.82.86:32597", "219.234.82.86:31122", "219.234.82.88:8817", "219.234.82.88:8160", "219.234.82.88:9239", "219.234.82.88:6133", "114.141.162.53:8080", "123.125.116.243:17656", "123.125.116.241:29156", "123.125.116.243:6938", "219.234.82.88:29484", "219.234.82.88:8084", "219.234.82.88:32229", "219.234.82.88:22758", "219.234.82.88:5616", "124.225.52.14:8080", "219.234.82.88:30028", "219.234.82.88:23685", "219.234.82.88:29037", "219.234.82.88:8755"]
9
+ #proxylist = ["219.234.82.84:24809", "219.234.82.84:17130", "219.234.82.84:23684", "219.234.82.84:18253", "219.234.82.84:33987", "219.234.82.84:17183", "219.234.82.84:13243", "219.234.82.84:16158", "219.234.82.84:14826", "219.234.82.84:8489", "219.234.82.84:22222", "219.234.82.84:6370", "219.234.82.84:7571", "219.234.82.84:33944", "219.234.82.84:9743", "219.234.82.84:8089", "219.234.82.84:20991", "219.234.82.84:34032", "219.234.82.84:9415", "219.234.82.84:26149", "219.234.82.84:11095", "219.234.82.84:21724", "219.234.82.84:9177", "219.234.82.84:34034", "219.234.82.84:17945", "219.234.82.85:32229", "219.234.82.85:28341", "219.234.82.85:36314", "219.234.82.85:30605", "219.234.82.85:23684", "219.234.82.85:34015", "219.234.82.85:33919", "219.234.82.85:30639", "219.234.82.85:33965", "219.234.82.85:37299", "219.234.82.85:20747", "219.234.82.86:6666", "219.234.82.86:34106", "219.234.82.86:25301", "219.234.82.86:32896", "219.234.82.86:23034", "219.234.82.86:22685", "219.234.82.86:13078", "219.234.82.86:38770", "219.234.82.86:28402", "219.234.82.86:18887", "219.234.82.86:6588", "219.234.82.86:7292", "219.234.82.86:24268", "219.234.82.86:16472", "219.234.82.86:32597", "219.234.82.86:31122", "219.234.82.88:8817", "219.234.82.88:8160", "219.234.82.88:9239", "219.234.82.88:6133", "114.141.162.53:8080", "123.125.116.243:17656", "123.125.116.241:29156", "123.125.116.243:6938", "219.234.82.88:29484", "219.234.82.88:8084", "219.234.82.88:32229", "219.234.82.88:22758", "219.234.82.88:5616", "124.225.52.14:8080", "219.234.82.88:30028", "219.234.82.88:23685", "219.234.82.88:29037", "219.234.82.88:8755"]
9
10
 
10
- def self.test_get_doc_with_proxy(proxylist)
11
- herf = "http://www.tvmao.com/drama/HS5oLCs="
12
- Grabepg.get_doc_with_proxy(proxylist,herf)
13
- end
14
-
15
- def self.test_get_show_infomation(proxylist)
16
- herf = "http://www.tvmao.com/tvcolumn/cVhPLQ=="
17
- Grabepg.get_show_infomation(proxylist,herf)
18
- end
19
-
20
- def self.test_getschedule(proxylist)
21
- channel = "HUNANTV"
22
- herf = "/program/HUNANTV-HUNANTV-w1.html"
23
- Grabepg.getschedule(channel,herf,proxylist,1)
24
- end
25
-
26
- def self.test_get_assign_date_url
27
- herf = "/program/HUNANTV-HUNANTV-w1.html"
28
- Grabepg.get_assign_date_url(herf,7,1)
29
- end
30
-
31
- def self.test_get_show_schedule(proxylist)
32
- herf = "http://www.tvmao.com/tvcolumn/cVhPLQ=="
33
- Grabepg.get_show_schedule(proxylist,herf)
34
- end
35
-
36
- def self.test_get_schedulelist_atday(proxylist)
37
- Grabepg.get_schedulelist_atday("CCTV1"," http://www.tvmao.com/program/CCTV-CCTV1-w1.html",proxylist)
38
- end
39
-
40
- def self.debug_all
41
- Grabepg.start
42
- end
43
-
44
- #Grabepg.start
45
- #p test_get_show_schedule(proxylist)
46
- #p test_getschedule(proxylist)
47
- # p test_get_show_infomation(proxylist)
48
- #p test_get_assign_date_url
49
- # p test_get_schedulelist_atday(proxylist)
50
- p debug_all
11
+ p TestGrabTvsou.start
51
12
  end
data/lib/grab_tvmao.rb ADDED
@@ -0,0 +1,595 @@
1
+ #encoding:utf-8
2
+
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+
6
+ module GrabTvmao
7
+ # To change this template use File | Settings | File Templates.
8
+
9
+
10
+ #图片的获取: Net::HTTP.get(url)
11
+ #图片的文件类型获取:
12
+
13
+ attr_reader :channel #频道列表
14
+ attr_reader :site #网站地址
15
+ attr_reader :proxyindex #代理的索引
16
+ attr_reader :show_schedule #根据节目的时间表
17
+ attr_reader :img_down_path #图片下载路径存放
18
+
19
+ DEFAULT_GrabtvType=["cctv","satellite","digital",]
20
+ DEFAULT_SITE = "http://www.tvmao.com"
21
+
22
+
23
+
24
+
25
+
26
+ #将星期的wday获取值转化为中文名
27
+ #conversion wady to chinese
28
+ def self.conversion_what_day(whatday)
29
+ ret = "星期"
30
+ case whatday.to_i
31
+ when 1
32
+ ret += "一"
33
+ when 2
34
+ ret += "二"
35
+ when 3
36
+ ret += "三"
37
+ when 4
38
+ ret += "四"
39
+ when 5
40
+ ret += "五"
41
+ when 6
42
+ ret += "六"
43
+ when 7
44
+ ret += "七"
45
+ end
46
+ ret
47
+ end
48
+
49
+ #如果时间为1~9的一位则为其在数字前加0补齐二位
50
+ def self.dispose_time(num)
51
+ num = num.to_s
52
+ if num.length < 2
53
+ num = "0"+num
54
+ end
55
+ num
56
+ end
57
+
58
+ #转化当前时间的格式
59
+ def self.get_week_date_time(time)
60
+ month = time.month
61
+ day = time.day
62
+ whatday = time.wday
63
+ ret = conversion_what_day(whatday) + "(" + dispose_time(month) + "-"+dispose_time(day)+")"
64
+ ret
65
+ end
66
+
67
+ #前几天需要减去的num
68
+ def self.del_day_num(day_num)
69
+ ret = day_num*60*60*24
70
+ ret
71
+ end
72
+
73
+ #获取距离当前多少天的之前的日期
74
+ def self.get_time_day_prior(num)
75
+ time = Time.now - del_day_num(num)
76
+ ret = get_week_date_time(time)
77
+ ret
78
+ end
79
+
80
+ #前面一周要删除的日期的列表
81
+ def self.del_time_list
82
+ ret = []
83
+ time = Time.now
84
+ wday = time.wday
85
+ if(wday==1)
86
+ for i in 0..7
87
+ ret<<self.get_time_day_prior(i)
88
+ end
89
+ end
90
+ ret
91
+ end
92
+
93
+
94
+
95
+
96
+ #调用此方法的例子
97
+ def self.start
98
+ #作用是获取俩个字符串的相似度
99
+ #get str1 and str2 similarity
100
+ get_similarity_string = lambda { |str1,str2|
101
+ _length = 0
102
+ type = 0
103
+ if str1.length>str2.length
104
+ _length=str2.length
105
+ type = 2
106
+ else
107
+ _length=str1.length
108
+ type =1
109
+ end
110
+ _str_list = []
111
+ _str = ""
112
+ for i in 0.._length
113
+ case type
114
+ when 2
115
+ n=i
116
+ 0.upto(str1.length-1).each do |j|
117
+ p "N: #{n}"
118
+ if(str2[n]==str1[j])
119
+ _str =_str+str2[n]
120
+ n = n+1
121
+ p "Str = #{_str}"
122
+ else
123
+ _str_list << _str
124
+ _str = ""
125
+ end
126
+ end
127
+ when 1
128
+ n=i
129
+ 0.upto(str2.length-1).each do |j|
130
+ p "N: #{n}"
131
+ if(str1[n]==str2[j])
132
+ _str =_str+str1[n]
133
+ n=n+1
134
+ p "Str = #{_str}"
135
+ else
136
+ _str_list << _str
137
+ _str = ""
138
+ end
139
+ end
140
+ end
141
+ end
142
+ p _str_list
143
+ _str = ""
144
+ _str_list.each do |str|
145
+ if _str.length<str.length
146
+ _str=str
147
+ end
148
+ end
149
+ _str
150
+ }
151
+
152
+
153
+ path = "/home/zql/workspace/New/smart_remote/img_path"
154
+ channel_list = GrabTvmao.getchannels(path)
155
+ channel_urls = channel_list['channel_urls']
156
+ channel_infos = channel_list['channel_info']
157
+ p "Channel img save file,path='#{GrabTvmao.img_down_path}'"
158
+ proxy_list=GrabTvmao.get_topfast_list(5) #get_topfast_list 参数是代表最慢用时 单位秒
159
+
160
+
161
+ #Use for Test
162
+
163
+ p "************************************"
164
+ p "proxy_list:#{proxy_list}"
165
+ p "************************************"
166
+
167
+ bool_start = false
168
+
169
+
170
+ channel_urls.each do |channel,url|
171
+
172
+ if(channel=="CCTV16")
173
+ bool_start = true
174
+ end
175
+
176
+ if bool_start
177
+ previous_show_name = ""
178
+ channel_info = channel_infos[channel]
179
+ channel_name = channel_info["channel_name"]
180
+ channel_type = channel_info["channel_type"]
181
+ channel_id = channel_info["channel_id"]
182
+ channel_img_path = channel_info["img_path"]
183
+
184
+ #channel,herf,proxylist,day_num=7
185
+
186
+
187
+ start_time=0
188
+ use_num =1
189
+
190
+ #getScheduleAssignDate参数:
191
+ # channel 频道
192
+ # herf 频道地址
193
+ # proxylist 代理列表
194
+ # start_num 开始时间 int 为开始时间与今天的差值 正数代表今天之后的第几天 负数代表今天之前的第几天
195
+ # day_num 抓取的时间段天数
196
+ # img_dir_down_path 图片网络地址保存路径 有默认值 可不设置
197
+ schedule_list=GrabTvmao.getScheduleAssignDate(channel,url,proxy_list,start_time,use_num) #抓取的七天后的1天的数据
198
+
199
+
200
+ end
201
+ end
202
+ end
203
+
204
+ def self.img_down_path
205
+ @img_down_path
206
+ end
207
+
208
+
209
+ #获取网站的频道表
210
+ #img_path 图片存放路径
211
+ def self.getchannels(img_dir_path)
212
+ @channel = []
213
+ @site=DEFAULT_SITE
214
+ @proxyindex = 0
215
+ @img_down_dir_path = img_dir_path
216
+ @img_down_file = File.new(File.join(img_dir_path,"channel_img_down_path"),'w+')
217
+
218
+ channel_urls = {}
219
+ channel_info = {}
220
+ get_url =lambda { |type|
221
+ @site + "/program/duration/#{type}/w1.html" unless (type.nil?||type.empty?)
222
+ }
223
+
224
+ get_channel_id = lambda {|url|
225
+ channel_id = url.split("/")[2].split("-")[1] unless (url.nil?||url.empty?)
226
+ }
227
+
228
+ DEFAULT_GrabtvType.each do |type|
229
+ url = get_url.call(type)
230
+ p url
231
+ doc = Nokogiri::HTML(open(url))
232
+ p doc.content
233
+ p "*************************************************************"
234
+ doc.css('td[class="tdchn"]').each do |td|
235
+ channel_name=td.content
236
+ herf = ""
237
+ td.css('a').each do |a|
238
+ herf=a['href']
239
+ end
240
+ channel_id = get_channel_id.call(herf)
241
+
242
+ #获取频道图片的地址
243
+ img_path = "http://static.haotv.me/channel/logo/#{channel_id}.jpg"
244
+ @img_down_file.puts("#{channel_id}:#{img_path}")
245
+ @channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
246
+ channel_info.merge!({channel_id=>{"channel_name"=>channel_name,"channel_type"=>type,"channel_id"=>channel_id,"img_path"=>img_path}})
247
+ channel_urls.merge!({channel_id=>herf})
248
+ end
249
+ end
250
+ @img_down_file.close
251
+ p "Channel: #{@channel}"
252
+ {"channel_info"=>channel_info,"channel_urls"=>channel_urls}
253
+ end
254
+
255
+ #使用代理获取url的html的doc值
256
+ def self.get_doc_with_proxy(proxylist,url)
257
+ unless @proxyindex
258
+ @proxyindex = 0
259
+ end
260
+ @proxyindex=@proxyindex%proxylist.size
261
+ if(proxylist[@proxyindex])
262
+ proxy = proxylist[@proxyindex]
263
+ else
264
+ proxy = proxylist[@proxyindex+1]
265
+ end
266
+ begin
267
+ doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
268
+ doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
269
+ @no_firest = 0
270
+ rescue => err
271
+
272
+ unless @no_firest
273
+ @no_firest = 0
274
+ end
275
+
276
+ @no_firest += 1
277
+ p "*************************Proxy:#{proxy}, url:#{url}"
278
+ #proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
279
+ get_doc_with_proxy(proxylist,url) if @no_firest<4
280
+ raise RuntimeError,"Error: #{err.to_s}" unless @no_firest<4
281
+ end
282
+ @proxyindex += 1
283
+ unless doc
284
+ p "*************************Proxy:#{proxy}, url:#{url}"
285
+ end
286
+ doc
287
+ end
288
+
289
+ #获取某天的节目表
290
+ def self.get_schedulelist_atday(channel,url,proxylist)
291
+ p "Grab: #{url}"
292
+ doc = get_doc_with_proxy(proxylist,url)
293
+ show_type = []
294
+
295
+
296
+ _img_url = "http://static.haotv.me/channel/logo/"
297
+ img_url = _img_url + channel+".jpg"
298
+
299
+
300
+ data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
301
+ date = data[0]
302
+ week = data[1]
303
+ p "Channel: #{channel} Date: #{date} Week: #{week}"
304
+ @date = "#{week}(#{date})"
305
+ schedule_list = []
306
+
307
+ _herf = doc.css("h1[style='float:left']").xpath('img[@src]')[0]
308
+ img_url = _herf.get_attribute("src") if _herf
309
+
310
+ p "**************IMG: #{img_url}"
311
+
312
+
313
+ doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
314
+ _herf= schedule.xpath('a[@href]')[0]
315
+ schedule_herf=_herf.get_attribute("href") if _herf
316
+ unless _herf
317
+ drama =schedule.css('a[class="drama"]')[0]
318
+ if drama
319
+ _herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
320
+ schedule_herf = _herfs.split("#%#")[0]
321
+ end
322
+ end
323
+ if schedule.content.split(" ").size>1
324
+ time = schedule.content.split(" ")[0]
325
+ schedule = schedule.content.split(" ")[1]
326
+ show_name = ""
327
+ unless schedule_herf.nil?||schedule_herf.empty?
328
+ show_infomation=get_show_infomation(proxylist,schedule_herf)
329
+ show_type=show_infomation["type"]
330
+ show_name = show_infomation["name"]
331
+ show_img = show_infomation["img"]
332
+ end
333
+ p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
334
+ schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
335
+ end
336
+ end
337
+ schedule_list
338
+ end
339
+
340
+ #获取制定时间和长度url
341
+ #start_time 为int型 开始时间和今天的差值 正数代表之后的第几天 负数代表之前的第几天
342
+ #day_num 为int型 代表抓取的时间从开始时间计算的多少天
343
+ def self.get_assign_date_url(url,start_time,day_num)
344
+ site="http://www.tvmao.com"
345
+ if(@site)
346
+ site=@site
347
+ end
348
+
349
+ _url = site
350
+ urls = []
351
+ _urls = url.split("-")
352
+
353
+ time = Time.now
354
+ _wday = time.wday
355
+ wday = _wday + start_time
356
+ if wday<0
357
+ wday = 1
358
+ end
359
+
360
+ end_day = wday + day_num - 1
361
+
362
+ if end_day>(_wday+7)
363
+ end_day = _wday + 7
364
+ end
365
+
366
+ 0.upto(1).each do |i|
367
+ _url = _url+"#{_urls[i]}"+"-"
368
+ end
369
+
370
+ wday.upto(end_day).each do |i|
371
+ urls << _url+"w#{i}.html"
372
+ end
373
+ urls
374
+ end
375
+
376
+
377
+
378
+ #获取指定时间段的节目表
379
+ def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num=0,img_dir_down_path=@img_down_dir_path)
380
+ begin
381
+ day_num = 1 if day_num<1
382
+ rescue
383
+ day_num = 1
384
+ end
385
+ site="http://www.tvmao.com"
386
+ unless img_dir_down_path
387
+ img_dir_down_path = __FILE__
388
+ end
389
+ @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
390
+
391
+ if(@site)
392
+ site=@site
393
+ end
394
+ _img_url = "http://static.haotv.me/channel/logo/"
395
+ @show_schedule = {}
396
+
397
+ channel_schedule = {}
398
+ get_assign_date_url(herf,start_num,day_num).each do |url|
399
+ @date = ""
400
+ schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
401
+ channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
402
+ end
403
+ @img_down_file.close
404
+ {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
405
+ end
406
+
407
+
408
+
409
+
410
+
411
+ #因原已调用所以保留
412
+ #获取一周节目表
413
+ def self.getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
414
+ p "Day Num is #{day_num}"
415
+ begin
416
+ day_num = 1 if day_num<1
417
+ rescue
418
+ day_num = 1
419
+ end
420
+ site="http://www.tvmao.com"
421
+ unless img_dir_down_path
422
+ img_dir_down_path = __FILE__
423
+ end
424
+ @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
425
+
426
+ if(@site)
427
+ site=@site
428
+ end
429
+ _img_url = "http://static.haotv.me/channel/logo/"
430
+ @show_schedule = {}
431
+
432
+ get_week_url = lambda {|url,day_num|
433
+ _url = site
434
+ urls = []
435
+ _urls = url.split("-")
436
+ 0.upto(1).each do |i|
437
+ _url = _url+"#{_urls[i]}"+"-"
438
+ end
439
+ 1.upto(day_num).each do |i|
440
+ urls << _url+"w#{i}.html"
441
+ end
442
+ urls
443
+ }
444
+
445
+ channel_schedule = {}
446
+ get_week_url.call(herf,day_num).each do |url|
447
+ @date = ""
448
+ schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
449
+ channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
450
+ end
451
+ @img_down_file.close
452
+ {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
453
+ end
454
+
455
+
456
+ #获取节目详细信息
457
+ def self.get_show_infomation(proxy_list,schedule_herf)
458
+ begin
459
+ @proxyindex = 0
460
+ unless @site
461
+ @site = "http://www.tvmao.com"
462
+ end
463
+ schedule_herf = @site + schedule_herf
464
+ doc=get_doc_with_proxy(proxy_list,schedule_herf)
465
+ #title = doc.css("a[herf='#{schedule_herf}+/detail']")[0]['title']
466
+ # p "title: %s" % title
467
+ type = []
468
+ name = doc.css('span[itemprop="name"]')[0].content
469
+
470
+ #获取节目的图片
471
+ if doc.css('img[class="tvc"]')
472
+ schedule_img_down_path = doc.css('img[class="tvc"]')[0].get_attribute('src') if doc.css('img[class="tvc"]')[0]
473
+ end
474
+
475
+
476
+
477
+ doc.css('span[itemprop="genre"]').each do |_type|
478
+ type << _type.content
479
+ end
480
+ doc.css('a[itemprop="genre"]').each do |_type|
481
+ type<<_type.content
482
+ end
483
+ url = "#{schedule_herf}/detail"
484
+ doc = get_doc_with_proxy(proxy_list,url)
485
+ doc.css('span[itemprop="genre"]').each do |_type|
486
+ type << _type.content
487
+ end
488
+ doc.css('a[itemprop="genre"]').each do |_type|
489
+ type<<_type.content
490
+ end
491
+ type.uniq!
492
+ @img_down_file.puts("#{name}:#{schedule_img_down_path}")
493
+ @show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
494
+ {"type"=>type,"name"=>name,"img"=>schedule_img_down_path}
495
+ rescue => e
496
+ p "Error In get_show_infomation msg : #{e.to_s}"
497
+ end
498
+ end
499
+
500
+ #获取节目的时间表
501
+ def self.get_show_schedule(proxylist,herf)
502
+ url = herf + "/playingtime"
503
+ doc = get_doc_with_proxy(proxylist,url)
504
+ i = 0
505
+ schedule = []
506
+ doc.css('div[id="epg"]')[0].css("div[class='c1 col']").each do |epg|
507
+ unless(i==0)
508
+ time = epg.css('div[class="f1 fld"]')[0].content
509
+ channel_name = epg.css('div[class="f2 fld"]')[0].content
510
+ show_name = epg.css('div[class="f3 fld"]')[0].content
511
+ times = time.split(" ")
512
+ week = times[0]
513
+ date = times[1]
514
+ _time = times[2]
515
+ schedule << {"week"=>week,"date"=>date,"time"=>_time,"channel_name"=>channel_name,"show_name"=>show_name}
516
+ end
517
+ i += 1
518
+ end
519
+ schedule
520
+ end
521
+
522
+
523
+
524
+
525
+ #获取指定访问速度的代理服务器
526
+ #time为最慢速度的时间 int型 代表秒
527
+ def self.get_topfast_list(use_time)
528
+ fast_list = []
529
+ time_use = 0
530
+ ips_ports = get_proxy_list()
531
+ ips_ports.each do |ip_port|
532
+ time_start = Time.now.to_i
533
+ begin
534
+ timeout(use_time) do
535
+ doc = Nokogiri::HTML(open("http://www.tvmao.com/program",:proxy=> "http://#{ip_port}"))
536
+ end
537
+ time_end = Time.now.to_i
538
+ time_use = time_end - time_start
539
+ p "http://#{ip_port} use_time:#{time_use}"
540
+ rescue Exception =>e
541
+ case e
542
+ when Errno::ETIMEDOUT
543
+ p "Use http://#{ip_port} timeout"
544
+ when Timeout::Error
545
+ p "Use http://#{ip_port} timeout"
546
+ when Errno::ECONNREFUSED
547
+ p "Use http://#{ip_port} Error connection"
548
+ else
549
+ p "Use http://#{ip_port} Error:#{e.to_s}"
550
+ end
551
+ time_use = -1
552
+ end
553
+ if(time_use > 0 &&time_use < 8)
554
+ fast_list << ip_port
555
+ end
556
+ end
557
+ fast_list
558
+ end
559
+
560
+ #获取代理列表
561
+ def self.get_proxy_list()
562
+ list = gg('http://www.proxycn.cn/html_proxy/30fastproxy-1.html')
563
+ if list.count ==0
564
+ list = gg('http://www.proxycn.cn/html_proxy/http-1.html')
565
+ end
566
+ ips_ports = []
567
+ regex_port = /(?<=<TD class="list">)[0-9]*?(?=<\/TD>)/
568
+ regex_ip = /(?<=a href\=whois.php\?whois\=)[0-9,.]*/
569
+ list.each do |proxy_txt|
570
+ port = proxy_txt[regex_port]
571
+ ip = proxy_txt[regex_ip]
572
+ if(ip != ""&& !port.to_s.eql?('3128'))
573
+ port_ip = ip.to_s + ":" + port.to_s
574
+ ips_ports << port_ip
575
+ end
576
+ end
577
+ p "Count: #{ips_ports.count}"
578
+ ips_ports
579
+ end
580
+
581
+ def self.gg(url)
582
+ regex_list = /<TD class="list">.*<\/TD>/
583
+ href =URI.parse(url)
584
+ contxt = ""
585
+ href.open{ |f|
586
+ f.each_line {|line| contxt =contxt + line + "\n"}
587
+ }
588
+ list = contxt.scan(regex_list)
589
+ end
590
+
591
+ def save_img
592
+
593
+ end
594
+
595
+ end