grab_epg 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NmI0ZDZlY2FkNzVhNTNiNTQwYjU0NDNhOTNhYmUyZWUzY2ViN2U5OA==
4
+ MGEyOWI5Y2YwMTY2NzY1OWVmYzYwNTdmNDc4NGY4M2RlMzg2NWIwYg==
5
5
  data.tar.gz: !binary |-
6
- NzBkMjAxZmUwNDE1YzAzNWNmMTc1NjEwNjJlM2NhYjlmZmU2N2MzZg==
6
+ M2Y5NzBiMzA2MTg5MGM4ZjkxMWU0N2I2MWM0OWJjZThjNmE3NDkyNQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MmZiNGVkM2JjMGUwZDNmMDE5OTc2NzE2NjFmMDJmMzgyODU2Nzg1ZjgxZjBl
10
- NmVkMmU0MThhZmY1YjZhOWYxNDBlOWZjZDEwODNlYWI3MWFhNmY2ZDQ1OThl
11
- NTFjM2VmMjgxOTc1MmNkY2EzMmU2YzNjN2JkNjA0M2FlNjllZmE=
9
+ MmI4NjQwYTY3MWJhYWRkMjA1NDk2MWQ4MDdhOTg1N2ZkYmQwOTVkZDdiNjBh
10
+ NmI3M2Y0OTI2ZDcxMzE2YzEyYTMwZGQ3OGMzYzY2YTg1NjViZmMwOTkxOTgx
11
+ NGVmMDUzZmE1OTVlNTc4M2MxMWI3NzlhZDRmMGZjOWJlOWZiYTI=
12
12
  data.tar.gz: !binary |-
13
- ZTBmMzY2MDkyZDEzZDVhMzQ0MDA4MmVjYTBkMGM2YmJhYTlhYzkzOWYyNjJj
14
- ZjAxOTNmZjgwZTljNWI3MWQxMjY3Y2FmZjczODNhYWU4MGQ3YzZhNDRjOWM1
15
- NTJlZWYxNDViNDgwZjMxNDMxZWNhYzY1NzA1OTg5ZGIwY2YyYWI=
13
+ YWJmNDIyNzYyNDExNzMzMjA5NjU3ZDhmYzAzMDQ2ZjYwY2U4MTU0Y2ZjMmQ0
14
+ YzRmMzljMDRmNDJlM2I5ZWI5NTJjMjRkZmZkZmY2YzVmYTMyODJkNTQyZDNj
15
+ ZWM3ZTNhNDFlN2FjYTA1MDE0N2Y3OWU5MGU4NWZlNTQ3MGQzMjU=
data/.grabepg.gemspec CHANGED
@@ -3,13 +3,13 @@
3
3
  Gem::Specification.new do |gem|
4
4
  gem.authors = ["hahazql"]
5
5
  gem.email = ["hahazhouqunli@gmail.com"]
6
- gem.description = %q{"用于从TVMAO抓取EPG信息"}
6
+ gem.description = %q{"用于抓取EPG信息"}
7
7
  gem.summary = %q{"Grab EPG"}
8
8
  gem.homepage = ""
9
9
 
10
10
  gem.files = `git ls-files`.split($\)
11
11
  gem.name = "grab_epg"
12
12
  gem.require_paths = ["lib"]
13
- gem.version = "0.1.6"
13
+ gem.version = "0.1.8"
14
14
  gem.homepage = "https://github.com/hahazql/grab_epg"
15
15
  end
data/lib/debug.rb CHANGED
@@ -2,50 +2,11 @@
2
2
 
3
3
  require 'nokogiri'
4
4
  require 'open-uri'
5
- require File.expand_path("../grabepg.rb", __FILE__)
5
+ require File.expand_path("../test/test_grab_tvsou.rb", __FILE__)
6
+ #require 'test/test_grab_tvsou'
6
7
  class Debug
7
8
  # To change this template use File | Settings | File Templates.
8
- proxylist = ["219.234.82.84:24809", "219.234.82.84:17130", "219.234.82.84:23684", "219.234.82.84:18253", "219.234.82.84:33987", "219.234.82.84:17183", "219.234.82.84:13243", "219.234.82.84:16158", "219.234.82.84:14826", "219.234.82.84:8489", "219.234.82.84:22222", "219.234.82.84:6370", "219.234.82.84:7571", "219.234.82.84:33944", "219.234.82.84:9743", "219.234.82.84:8089", "219.234.82.84:20991", "219.234.82.84:34032", "219.234.82.84:9415", "219.234.82.84:26149", "219.234.82.84:11095", "219.234.82.84:21724", "219.234.82.84:9177", "219.234.82.84:34034", "219.234.82.84:17945", "219.234.82.85:32229", "219.234.82.85:28341", "219.234.82.85:36314", "219.234.82.85:30605", "219.234.82.85:23684", "219.234.82.85:34015", "219.234.82.85:33919", "219.234.82.85:30639", "219.234.82.85:33965", "219.234.82.85:37299", "219.234.82.85:20747", "219.234.82.86:6666", "219.234.82.86:34106", "219.234.82.86:25301", "219.234.82.86:32896", "219.234.82.86:23034", "219.234.82.86:22685", "219.234.82.86:13078", "219.234.82.86:38770", "219.234.82.86:28402", "219.234.82.86:18887", "219.234.82.86:6588", "219.234.82.86:7292", "219.234.82.86:24268", "219.234.82.86:16472", "219.234.82.86:32597", "219.234.82.86:31122", "219.234.82.88:8817", "219.234.82.88:8160", "219.234.82.88:9239", "219.234.82.88:6133", "114.141.162.53:8080", "123.125.116.243:17656", "123.125.116.241:29156", "123.125.116.243:6938", "219.234.82.88:29484", "219.234.82.88:8084", "219.234.82.88:32229", "219.234.82.88:22758", "219.234.82.88:5616", "124.225.52.14:8080", "219.234.82.88:30028", "219.234.82.88:23685", "219.234.82.88:29037", "219.234.82.88:8755"]
9
+ #proxylist = ["219.234.82.84:24809", "219.234.82.84:17130", "219.234.82.84:23684", "219.234.82.84:18253", "219.234.82.84:33987", "219.234.82.84:17183", "219.234.82.84:13243", "219.234.82.84:16158", "219.234.82.84:14826", "219.234.82.84:8489", "219.234.82.84:22222", "219.234.82.84:6370", "219.234.82.84:7571", "219.234.82.84:33944", "219.234.82.84:9743", "219.234.82.84:8089", "219.234.82.84:20991", "219.234.82.84:34032", "219.234.82.84:9415", "219.234.82.84:26149", "219.234.82.84:11095", "219.234.82.84:21724", "219.234.82.84:9177", "219.234.82.84:34034", "219.234.82.84:17945", "219.234.82.85:32229", "219.234.82.85:28341", "219.234.82.85:36314", "219.234.82.85:30605", "219.234.82.85:23684", "219.234.82.85:34015", "219.234.82.85:33919", "219.234.82.85:30639", "219.234.82.85:33965", "219.234.82.85:37299", "219.234.82.85:20747", "219.234.82.86:6666", "219.234.82.86:34106", "219.234.82.86:25301", "219.234.82.86:32896", "219.234.82.86:23034", "219.234.82.86:22685", "219.234.82.86:13078", "219.234.82.86:38770", "219.234.82.86:28402", "219.234.82.86:18887", "219.234.82.86:6588", "219.234.82.86:7292", "219.234.82.86:24268", "219.234.82.86:16472", "219.234.82.86:32597", "219.234.82.86:31122", "219.234.82.88:8817", "219.234.82.88:8160", "219.234.82.88:9239", "219.234.82.88:6133", "114.141.162.53:8080", "123.125.116.243:17656", "123.125.116.241:29156", "123.125.116.243:6938", "219.234.82.88:29484", "219.234.82.88:8084", "219.234.82.88:32229", "219.234.82.88:22758", "219.234.82.88:5616", "124.225.52.14:8080", "219.234.82.88:30028", "219.234.82.88:23685", "219.234.82.88:29037", "219.234.82.88:8755"]
9
10
 
10
- def self.test_get_doc_with_proxy(proxylist)
11
- herf = "http://www.tvmao.com/drama/HS5oLCs="
12
- Grabepg.get_doc_with_proxy(proxylist,herf)
13
- end
14
-
15
- def self.test_get_show_infomation(proxylist)
16
- herf = "http://www.tvmao.com/tvcolumn/cVhPLQ=="
17
- Grabepg.get_show_infomation(proxylist,herf)
18
- end
19
-
20
- def self.test_getschedule(proxylist)
21
- channel = "HUNANTV"
22
- herf = "/program/HUNANTV-HUNANTV-w1.html"
23
- Grabepg.getschedule(channel,herf,proxylist,1)
24
- end
25
-
26
- def self.test_get_assign_date_url
27
- herf = "/program/HUNANTV-HUNANTV-w1.html"
28
- Grabepg.get_assign_date_url(herf,7,1)
29
- end
30
-
31
- def self.test_get_show_schedule(proxylist)
32
- herf = "http://www.tvmao.com/tvcolumn/cVhPLQ=="
33
- Grabepg.get_show_schedule(proxylist,herf)
34
- end
35
-
36
- def self.test_get_schedulelist_atday(proxylist)
37
- Grabepg.get_schedulelist_atday("CCTV1"," http://www.tvmao.com/program/CCTV-CCTV1-w1.html",proxylist)
38
- end
39
-
40
- def self.debug_all
41
- Grabepg.start
42
- end
43
-
44
- #Grabepg.start
45
- #p test_get_show_schedule(proxylist)
46
- #p test_getschedule(proxylist)
47
- # p test_get_show_infomation(proxylist)
48
- #p test_get_assign_date_url
49
- # p test_get_schedulelist_atday(proxylist)
50
- p debug_all
11
+ p TestGrabTvsou.start
51
12
  end
data/lib/grab_tvmao.rb ADDED
@@ -0,0 +1,595 @@
1
+ #encoding:utf-8
2
+
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+
6
+ module GrabTvmao
7
+ # To change this template use File | Settings | File Templates.
8
+
9
+
10
+ #图片的获取: Net::HTTP.get(url)
11
+ #图片的文件类型获取:
12
+
13
+ attr_reader :channel #频道列表
14
+ attr_reader :site #网站地址
15
+ attr_reader :proxyindex #代理的索引
16
+ attr_reader :show_schedule #根据节目的时间表
17
+ attr_reader :img_down_path #图片下载路径存放
18
+
19
+ DEFAULT_GrabtvType=["cctv","satellite","digital",]
20
+ DEFAULT_SITE = "http://www.tvmao.com"
21
+
22
+
23
+
24
+
25
+
26
+ #将星期的wday获取值转化为中文名
27
+ #conversion wady to chinese
28
+ def self.conversion_what_day(whatday)
29
+ ret = "星期"
30
+ case whatday.to_i
31
+ when 1
32
+ ret += "一"
33
+ when 2
34
+ ret += "二"
35
+ when 3
36
+ ret += "三"
37
+ when 4
38
+ ret += "四"
39
+ when 5
40
+ ret += "五"
41
+ when 6
42
+ ret += "六"
43
+ when 7
44
+ ret += "七"
45
+ end
46
+ ret
47
+ end
48
+
49
+ #如果时间为1~9的一位则为其在数字前加0补齐二位
50
+ def self.dispose_time(num)
51
+ num = num.to_s
52
+ if num.length < 2
53
+ num = "0"+num
54
+ end
55
+ num
56
+ end
57
+
58
+ #转化当前时间的格式
59
+ def self.get_week_date_time(time)
60
+ month = time.month
61
+ day = time.day
62
+ whatday = time.wday
63
+ ret = conversion_what_day(whatday) + "(" + dispose_time(month) + "-"+dispose_time(day)+")"
64
+ ret
65
+ end
66
+
67
+ #前几天需要减去的num
68
+ def self.del_day_num(day_num)
69
+ ret = day_num*60*60*24
70
+ ret
71
+ end
72
+
73
+ #获取距离当前多少天的之前的日期
74
+ def self.get_time_day_prior(num)
75
+ time = Time.now - del_day_num(num)
76
+ ret = get_week_date_time(time)
77
+ ret
78
+ end
79
+
80
+ #前面一周要删除的日期的列表
81
+ def self.del_time_list
82
+ ret = []
83
+ time = Time.now
84
+ wday = time.wday
85
+ if(wday==1)
86
+ for i in 0..7
87
+ ret<<self.get_time_day_prior(i)
88
+ end
89
+ end
90
+ ret
91
+ end
92
+
93
+
94
+
95
+
96
+ #调用此方法的例子
97
+ def self.start
98
+ #作用是获取俩个字符串的相似度
99
+ #get str1 and str2 similarity
100
+ get_similarity_string = lambda { |str1,str2|
101
+ _length = 0
102
+ type = 0
103
+ if str1.length>str2.length
104
+ _length=str2.length
105
+ type = 2
106
+ else
107
+ _length=str1.length
108
+ type =1
109
+ end
110
+ _str_list = []
111
+ _str = ""
112
+ for i in 0.._length
113
+ case type
114
+ when 2
115
+ n=i
116
+ 0.upto(str1.length-1).each do |j|
117
+ p "N: #{n}"
118
+ if(str2[n]==str1[j])
119
+ _str =_str+str2[n]
120
+ n = n+1
121
+ p "Str = #{_str}"
122
+ else
123
+ _str_list << _str
124
+ _str = ""
125
+ end
126
+ end
127
+ when 1
128
+ n=i
129
+ 0.upto(str2.length-1).each do |j|
130
+ p "N: #{n}"
131
+ if(str1[n]==str2[j])
132
+ _str =_str+str1[n]
133
+ n=n+1
134
+ p "Str = #{_str}"
135
+ else
136
+ _str_list << _str
137
+ _str = ""
138
+ end
139
+ end
140
+ end
141
+ end
142
+ p _str_list
143
+ _str = ""
144
+ _str_list.each do |str|
145
+ if _str.length<str.length
146
+ _str=str
147
+ end
148
+ end
149
+ _str
150
+ }
151
+
152
+
153
+ path = "/home/zql/workspace/New/smart_remote/img_path"
154
+ channel_list = GrabTvmao.getchannels(path)
155
+ channel_urls = channel_list['channel_urls']
156
+ channel_infos = channel_list['channel_info']
157
+ p "Channel img save file,path='#{GrabTvmao.img_down_path}'"
158
+ proxy_list=GrabTvmao.get_topfast_list(5) #get_topfast_list 参数是代表最慢用时 单位秒
159
+
160
+
161
+ #Use for Test
162
+
163
+ p "************************************"
164
+ p "proxy_list:#{proxy_list}"
165
+ p "************************************"
166
+
167
+ bool_start = false
168
+
169
+
170
+ channel_urls.each do |channel,url|
171
+
172
+ if(channel=="CCTV16")
173
+ bool_start = true
174
+ end
175
+
176
+ if bool_start
177
+ previous_show_name = ""
178
+ channel_info = channel_infos[channel]
179
+ channel_name = channel_info["channel_name"]
180
+ channel_type = channel_info["channel_type"]
181
+ channel_id = channel_info["channel_id"]
182
+ channel_img_path = channel_info["img_path"]
183
+
184
+ #channel,herf,proxylist,day_num=7
185
+
186
+
187
+ start_time=0
188
+ use_num =1
189
+
190
+ #getScheduleAssignDate参数:
191
+ # channel 频道
192
+ # herf 频道地址
193
+ # proxylist 代理列表
194
+ # start_num 开始时间 int 为开始时间与今天的差值 正数代表今天之后的第几天 负数代表今天之前的第几天
195
+ # day_num 抓取的时间段天数
196
+ # img_dir_down_path 图片网络地址保存路径 有默认值 可不设置
197
+ schedule_list=GrabTvmao.getScheduleAssignDate(channel,url,proxy_list,start_time,use_num) #抓取的七天后的1天的数据
198
+
199
+
200
+ end
201
+ end
202
+ end
203
+
204
+ def self.img_down_path
205
+ @img_down_path
206
+ end
207
+
208
+
209
+ #获取网站的频道表
210
+ #img_path 图片存放路径
211
+ def self.getchannels(img_dir_path)
212
+ @channel = []
213
+ @site=DEFAULT_SITE
214
+ @proxyindex = 0
215
+ @img_down_dir_path = img_dir_path
216
+ @img_down_file = File.new(File.join(img_dir_path,"channel_img_down_path"),'w+')
217
+
218
+ channel_urls = {}
219
+ channel_info = {}
220
+ get_url =lambda { |type|
221
+ @site + "/program/duration/#{type}/w1.html" unless (type.nil?||type.empty?)
222
+ }
223
+
224
+ get_channel_id = lambda {|url|
225
+ channel_id = url.split("/")[2].split("-")[1] unless (url.nil?||url.empty?)
226
+ }
227
+
228
+ DEFAULT_GrabtvType.each do |type|
229
+ url = get_url.call(type)
230
+ p url
231
+ doc = Nokogiri::HTML(open(url))
232
+ p doc.content
233
+ p "*************************************************************"
234
+ doc.css('td[class="tdchn"]').each do |td|
235
+ channel_name=td.content
236
+ herf = ""
237
+ td.css('a').each do |a|
238
+ herf=a['href']
239
+ end
240
+ channel_id = get_channel_id.call(herf)
241
+
242
+ #获取频道图片的地址
243
+ img_path = "http://static.haotv.me/channel/logo/#{channel_id}.jpg"
244
+ @img_down_file.puts("#{channel_id}:#{img_path}")
245
+ @channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
246
+ channel_info.merge!({channel_id=>{"channel_name"=>channel_name,"channel_type"=>type,"channel_id"=>channel_id,"img_path"=>img_path}})
247
+ channel_urls.merge!({channel_id=>herf})
248
+ end
249
+ end
250
+ @img_down_file.close
251
+ p "Channel: #{@channel}"
252
+ {"channel_info"=>channel_info,"channel_urls"=>channel_urls}
253
+ end
254
+
255
+ #使用代理获取url的html的doc值
256
+ def self.get_doc_with_proxy(proxylist,url)
257
+ unless @proxyindex
258
+ @proxyindex = 0
259
+ end
260
+ @proxyindex=@proxyindex%proxylist.size
261
+ if(proxylist[@proxyindex])
262
+ proxy = proxylist[@proxyindex]
263
+ else
264
+ proxy = proxylist[@proxyindex+1]
265
+ end
266
+ begin
267
+ doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
268
+ doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
269
+ @no_firest = 0
270
+ rescue => err
271
+
272
+ unless @no_firest
273
+ @no_firest = 0
274
+ end
275
+
276
+ @no_firest += 1
277
+ p "*************************Proxy:#{proxy}, url:#{url}"
278
+ #proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
279
+ get_doc_with_proxy(proxylist,url) if @no_firest<4
280
+ raise RuntimeError,"Error: #{err.to_s}" unless @no_firest<4
281
+ end
282
+ @proxyindex += 1
283
+ unless doc
284
+ p "*************************Proxy:#{proxy}, url:#{url}"
285
+ end
286
+ doc
287
+ end
288
+
289
+ #获取某天的节目表
290
+ def self.get_schedulelist_atday(channel,url,proxylist)
291
+ p "Grab: #{url}"
292
+ doc = get_doc_with_proxy(proxylist,url)
293
+ show_type = []
294
+
295
+
296
+ _img_url = "http://static.haotv.me/channel/logo/"
297
+ img_url = _img_url + channel+".jpg"
298
+
299
+
300
+ data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
301
+ date = data[0]
302
+ week = data[1]
303
+ p "Channel: #{channel} Date: #{date} Week: #{week}"
304
+ @date = "#{week}(#{date})"
305
+ schedule_list = []
306
+
307
+ _herf = doc.css("h1[style='float:left']").xpath('img[@src]')[0]
308
+ img_url = _herf.get_attribute("src") if _herf
309
+
310
+ p "**************IMG: #{img_url}"
311
+
312
+
313
+ doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
314
+ _herf= schedule.xpath('a[@href]')[0]
315
+ schedule_herf=_herf.get_attribute("href") if _herf
316
+ unless _herf
317
+ drama =schedule.css('a[class="drama"]')[0]
318
+ if drama
319
+ _herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
320
+ schedule_herf = _herfs.split("#%#")[0]
321
+ end
322
+ end
323
+ if schedule.content.split(" ").size>1
324
+ time = schedule.content.split(" ")[0]
325
+ schedule = schedule.content.split(" ")[1]
326
+ show_name = ""
327
+ unless schedule_herf.nil?||schedule_herf.empty?
328
+ show_infomation=get_show_infomation(proxylist,schedule_herf)
329
+ show_type=show_infomation["type"]
330
+ show_name = show_infomation["name"]
331
+ show_img = show_infomation["img"]
332
+ end
333
+ p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf} type: #{show_type} name: #{show_name} img:#{show_img}"
334
+ schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
335
+ end
336
+ end
337
+ schedule_list
338
+ end
339
+
340
+ #获取制定时间和长度url
341
+ #start_time 为int型 开始时间和今天的差值 正数代表之后的第几天 负数代表之前的第几天
342
+ #day_num 为int型 代表抓取的时间从开始时间计算的多少天
343
+ def self.get_assign_date_url(url,start_time,day_num)
344
+ site="http://www.tvmao.com"
345
+ if(@site)
346
+ site=@site
347
+ end
348
+
349
+ _url = site
350
+ urls = []
351
+ _urls = url.split("-")
352
+
353
+ time = Time.now
354
+ _wday = time.wday
355
+ wday = _wday + start_time
356
+ if wday<0
357
+ wday = 1
358
+ end
359
+
360
+ end_day = wday + day_num - 1
361
+
362
+ if end_day>(_wday+7)
363
+ end_day = _wday + 7
364
+ end
365
+
366
+ 0.upto(1).each do |i|
367
+ _url = _url+"#{_urls[i]}"+"-"
368
+ end
369
+
370
+ wday.upto(end_day).each do |i|
371
+ urls << _url+"w#{i}.html"
372
+ end
373
+ urls
374
+ end
375
+
376
+
377
+
378
+ #获取指定时间段的节目表
379
+ def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num=0,img_dir_down_path=@img_down_dir_path)
380
+ begin
381
+ day_num = 1 if day_num<1
382
+ rescue
383
+ day_num = 1
384
+ end
385
+ site="http://www.tvmao.com"
386
+ unless img_dir_down_path
387
+ img_dir_down_path = __FILE__
388
+ end
389
+ @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
390
+
391
+ if(@site)
392
+ site=@site
393
+ end
394
+ _img_url = "http://static.haotv.me/channel/logo/"
395
+ @show_schedule = {}
396
+
397
+ channel_schedule = {}
398
+ get_assign_date_url(herf,start_num,day_num).each do |url|
399
+ @date = ""
400
+ schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
401
+ channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
402
+ end
403
+ @img_down_file.close
404
+ {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
405
+ end
406
+
407
+
408
+
409
+
410
+
411
+ #因原已调用所以保留
412
+ #获取一周节目表
413
+ def self.getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
414
+ p "Day Num is #{day_num}"
415
+ begin
416
+ day_num = 1 if day_num<1
417
+ rescue
418
+ day_num = 1
419
+ end
420
+ site="http://www.tvmao.com"
421
+ unless img_dir_down_path
422
+ img_dir_down_path = __FILE__
423
+ end
424
+ @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")
425
+
426
+ if(@site)
427
+ site=@site
428
+ end
429
+ _img_url = "http://static.haotv.me/channel/logo/"
430
+ @show_schedule = {}
431
+
432
+ get_week_url = lambda {|url,day_num|
433
+ _url = site
434
+ urls = []
435
+ _urls = url.split("-")
436
+ 0.upto(1).each do |i|
437
+ _url = _url+"#{_urls[i]}"+"-"
438
+ end
439
+ 1.upto(day_num).each do |i|
440
+ urls << _url+"w#{i}.html"
441
+ end
442
+ urls
443
+ }
444
+
445
+ channel_schedule = {}
446
+ get_week_url.call(herf,day_num).each do |url|
447
+ @date = ""
448
+ schedule_list = self.get_schedulelist_atday(channel,url,proxylist)
449
+ channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
450
+ end
451
+ @img_down_file.close
452
+ {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
453
+ end
454
+
455
+
456
+ #获取节目详细信息
457
+ def self.get_show_infomation(proxy_list,schedule_herf)
458
+ begin
459
+ @proxyindex = 0
460
+ unless @site
461
+ @site = "http://www.tvmao.com"
462
+ end
463
+ schedule_herf = @site + schedule_herf
464
+ doc=get_doc_with_proxy(proxy_list,schedule_herf)
465
+ #title = doc.css("a[herf='#{schedule_herf}+/detail']")[0]['title']
466
+ # p "title: %s" % title
467
+ type = []
468
+ name = doc.css('span[itemprop="name"]')[0].content
469
+
470
+ #获取节目的图片
471
+ if doc.css('img[class="tvc"]')
472
+ schedule_img_down_path = doc.css('img[class="tvc"]')[0].get_attribute('src') if doc.css('img[class="tvc"]')[0]
473
+ end
474
+
475
+
476
+
477
+ doc.css('span[itemprop="genre"]').each do |_type|
478
+ type << _type.content
479
+ end
480
+ doc.css('a[itemprop="genre"]').each do |_type|
481
+ type<<_type.content
482
+ end
483
+ url = "#{schedule_herf}/detail"
484
+ doc = get_doc_with_proxy(proxy_list,url)
485
+ doc.css('span[itemprop="genre"]').each do |_type|
486
+ type << _type.content
487
+ end
488
+ doc.css('a[itemprop="genre"]').each do |_type|
489
+ type<<_type.content
490
+ end
491
+ type.uniq!
492
+ @img_down_file.puts("#{name}:#{schedule_img_down_path}")
493
+ @show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
494
+ {"type"=>type,"name"=>name,"img"=>schedule_img_down_path}
495
+ rescue => e
496
+ p "Error In get_show_infomation msg : #{e.to_s}"
497
+ end
498
+ end
499
+
500
+ #获取节目的时间表
501
+ def self.get_show_schedule(proxylist,herf)
502
+ url = herf + "/playingtime"
503
+ doc = get_doc_with_proxy(proxylist,url)
504
+ i = 0
505
+ schedule = []
506
+ doc.css('div[id="epg"]')[0].css("div[class='c1 col']").each do |epg|
507
+ unless(i==0)
508
+ time = epg.css('div[class="f1 fld"]')[0].content
509
+ channel_name = epg.css('div[class="f2 fld"]')[0].content
510
+ show_name = epg.css('div[class="f3 fld"]')[0].content
511
+ times = time.split(" ")
512
+ week = times[0]
513
+ date = times[1]
514
+ _time = times[2]
515
+ schedule << {"week"=>week,"date"=>date,"time"=>_time,"channel_name"=>channel_name,"show_name"=>show_name}
516
+ end
517
+ i += 1
518
+ end
519
+ schedule
520
+ end
521
+
522
+
523
+
524
+
525
+ #获取指定访问速度的代理服务器
526
+ #time为最慢速度的时间 int型 代表秒
527
+ def self.get_topfast_list(use_time)
528
+ fast_list = []
529
+ time_use = 0
530
+ ips_ports = get_proxy_list()
531
+ ips_ports.each do |ip_port|
532
+ time_start = Time.now.to_i
533
+ begin
534
+ timeout(use_time) do
535
+ doc = Nokogiri::HTML(open("http://www.tvmao.com/program",:proxy=> "http://#{ip_port}"))
536
+ end
537
+ time_end = Time.now.to_i
538
+ time_use = time_end - time_start
539
+ p "http://#{ip_port} use_time:#{time_use}"
540
+ rescue Exception =>e
541
+ case e
542
+ when Errno::ETIMEDOUT
543
+ p "Use http://#{ip_port} timeout"
544
+ when Timeout::Error
545
+ p "Use http://#{ip_port} timeout"
546
+ when Errno::ECONNREFUSED
547
+ p "Use http://#{ip_port} Error connection"
548
+ else
549
+ p "Use http://#{ip_port} Error:#{e.to_s}"
550
+ end
551
+ time_use = -1
552
+ end
553
+ if(time_use > 0 &&time_use < 8)
554
+ fast_list << ip_port
555
+ end
556
+ end
557
+ fast_list
558
+ end
559
+
560
+ #获取代理列表
561
+ def self.get_proxy_list()
562
+ list = gg('http://www.proxycn.cn/html_proxy/30fastproxy-1.html')
563
+ if list.count ==0
564
+ list = gg('http://www.proxycn.cn/html_proxy/http-1.html')
565
+ end
566
+ ips_ports = []
567
+ regex_port = /(?<=<TD class="list">)[0-9]*?(?=<\/TD>)/
568
+ regex_ip = /(?<=a href\=whois.php\?whois\=)[0-9,.]*/
569
+ list.each do |proxy_txt|
570
+ port = proxy_txt[regex_port]
571
+ ip = proxy_txt[regex_ip]
572
+ if(ip != ""&& !port.to_s.eql?('3128'))
573
+ port_ip = ip.to_s + ":" + port.to_s
574
+ ips_ports << port_ip
575
+ end
576
+ end
577
+ p "Count: #{ips_ports.count}"
578
+ ips_ports
579
+ end
580
+
581
+ def self.gg(url)
582
+ regex_list = /<TD class="list">.*<\/TD>/
583
+ href =URI.parse(url)
584
+ contxt = ""
585
+ href.open{ |f|
586
+ f.each_line {|line| contxt =contxt + line + "\n"}
587
+ }
588
+ list = contxt.scan(regex_list)
589
+ end
590
+
591
+ def save_img
592
+
593
+ end
594
+
595
+ end