grab_epg 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.grabepg.gemspec +1 -1
- data/lib/debug.rb +12 -2
- data/lib/grabepg.rb +122 -11
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NmI0ZDZlY2FkNzVhNTNiNTQwYjU0NDNhOTNhYmUyZWUzY2ViN2U5OA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NzBkMjAxZmUwNDE1YzAzNWNmMTc1NjEwNjJlM2NhYjlmZmU2N2MzZg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MmZiNGVkM2JjMGUwZDNmMDE5OTc2NzE2NjFmMDJmMzgyODU2Nzg1ZjgxZjBl
|
10
|
+
NmVkMmU0MThhZmY1YjZhOWYxNDBlOWZjZDEwODNlYWI3MWFhNmY2ZDQ1OThl
|
11
|
+
NTFjM2VmMjgxOTc1MmNkY2EzMmU2YzNjN2JkNjA0M2FlNjllZmE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZTBmMzY2MDkyZDEzZDVhMzQ0MDA4MmVjYTBkMGM2YmJhYTlhYzkzOWYyNjJj
|
14
|
+
ZjAxOTNmZjgwZTljNWI3MWQxMjY3Y2FmZjczODNhYWU4MGQ3YzZhNDRjOWM1
|
15
|
+
NTJlZWYxNDViNDgwZjMxNDMxZWNhYzY1NzA1OTg5ZGIwY2YyYWI=
|
data/.grabepg.gemspec
CHANGED
data/lib/debug.rb
CHANGED
@@ -5,7 +5,7 @@ require 'open-uri'
|
|
5
5
|
require File.expand_path("../grabepg.rb", __FILE__)
|
6
6
|
class Debug
|
7
7
|
# To change this template use File | Settings | File Templates.
|
8
|
-
proxylist = [""]
|
8
|
+
proxylist = ["219.234.82.84:24809", "219.234.82.84:17130", "219.234.82.84:23684", "219.234.82.84:18253", "219.234.82.84:33987", "219.234.82.84:17183", "219.234.82.84:13243", "219.234.82.84:16158", "219.234.82.84:14826", "219.234.82.84:8489", "219.234.82.84:22222", "219.234.82.84:6370", "219.234.82.84:7571", "219.234.82.84:33944", "219.234.82.84:9743", "219.234.82.84:8089", "219.234.82.84:20991", "219.234.82.84:34032", "219.234.82.84:9415", "219.234.82.84:26149", "219.234.82.84:11095", "219.234.82.84:21724", "219.234.82.84:9177", "219.234.82.84:34034", "219.234.82.84:17945", "219.234.82.85:32229", "219.234.82.85:28341", "219.234.82.85:36314", "219.234.82.85:30605", "219.234.82.85:23684", "219.234.82.85:34015", "219.234.82.85:33919", "219.234.82.85:30639", "219.234.82.85:33965", "219.234.82.85:37299", "219.234.82.85:20747", "219.234.82.86:6666", "219.234.82.86:34106", "219.234.82.86:25301", "219.234.82.86:32896", "219.234.82.86:23034", "219.234.82.86:22685", "219.234.82.86:13078", "219.234.82.86:38770", "219.234.82.86:28402", "219.234.82.86:18887", "219.234.82.86:6588", "219.234.82.86:7292", "219.234.82.86:24268", "219.234.82.86:16472", "219.234.82.86:32597", "219.234.82.86:31122", "219.234.82.88:8817", "219.234.82.88:8160", "219.234.82.88:9239", "219.234.82.88:6133", "114.141.162.53:8080", "123.125.116.243:17656", "123.125.116.241:29156", "123.125.116.243:6938", "219.234.82.88:29484", "219.234.82.88:8084", "219.234.82.88:32229", "219.234.82.88:22758", "219.234.82.88:5616", "124.225.52.14:8080", "219.234.82.88:30028", "219.234.82.88:23685", "219.234.82.88:29037", "219.234.82.88:8755"]
|
9
9
|
|
10
10
|
def self.test_get_doc_with_proxy(proxylist)
|
11
11
|
herf = "http://www.tvmao.com/drama/HS5oLCs="
|
@@ -33,9 +33,19 @@ class Debug
|
|
33
33
|
Grabepg.get_show_schedule(proxylist,herf)
|
34
34
|
end
|
35
35
|
|
36
|
+
def self.test_get_schedulelist_atday(proxylist)
|
37
|
+
Grabepg.get_schedulelist_atday("CCTV1"," http://www.tvmao.com/program/CCTV-CCTV1-w1.html",proxylist)
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.debug_all
|
41
|
+
Grabepg.start
|
42
|
+
end
|
43
|
+
|
36
44
|
#Grabepg.start
|
37
45
|
#p test_get_show_schedule(proxylist)
|
38
46
|
#p test_getschedule(proxylist)
|
39
47
|
# p test_get_show_infomation(proxylist)
|
40
|
-
p test_get_assign_date_url
|
48
|
+
#p test_get_assign_date_url
|
49
|
+
# p test_get_schedulelist_atday(proxylist)
|
50
|
+
p debug_all
|
41
51
|
end
|
data/lib/grabepg.rb
CHANGED
@@ -95,17 +95,111 @@ module Grabepg
|
|
95
95
|
|
96
96
|
#调用此方法的例子
|
97
97
|
def self.start
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
98
|
+
#作用是获取俩个字符串的相似度
|
99
|
+
#get str1 and str2 similarity
|
100
|
+
get_similarity_string = lambda { |str1,str2|
|
101
|
+
_length = 0
|
102
|
+
type = 0
|
103
|
+
if str1.length>str2.length
|
104
|
+
_length=str2.length
|
105
|
+
type = 2
|
106
|
+
else
|
107
|
+
_length=str1.length
|
108
|
+
type =1
|
109
|
+
end
|
110
|
+
_str_list = []
|
111
|
+
_str = ""
|
112
|
+
for i in 0.._length
|
113
|
+
case type
|
114
|
+
when 2
|
115
|
+
n=i
|
116
|
+
0.upto(str1.length-1).each do |j|
|
117
|
+
p "N: #{n}"
|
118
|
+
if(str2[n]==str1[j])
|
119
|
+
_str =_str+str2[n]
|
120
|
+
n = n+1
|
121
|
+
p "Str = #{_str}"
|
122
|
+
else
|
123
|
+
_str_list << _str
|
124
|
+
_str = ""
|
125
|
+
end
|
126
|
+
end
|
127
|
+
when 1
|
128
|
+
n=i
|
129
|
+
0.upto(str2.length-1).each do |j|
|
130
|
+
p "N: #{n}"
|
131
|
+
if(str1[n]==str2[j])
|
132
|
+
_str =_str+str1[n]
|
133
|
+
n=n+1
|
134
|
+
p "Str = #{_str}"
|
135
|
+
else
|
136
|
+
_str_list << _str
|
137
|
+
_str = ""
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
p _str_list
|
143
|
+
_str = ""
|
144
|
+
_str_list.each do |str|
|
145
|
+
if _str.length<str.length
|
146
|
+
_str=str
|
147
|
+
end
|
148
|
+
end
|
149
|
+
_str
|
150
|
+
}
|
151
|
+
|
152
|
+
|
153
|
+
path = "/home/zql/workspace/New/smart_remote/img_path"
|
154
|
+
channel_list = Grabepg.getchannels(path)
|
155
|
+
channel_urls = channel_list['channel_urls']
|
156
|
+
channel_infos = channel_list['channel_info']
|
157
|
+
p "Channel img save file,path='#{Grabepg.img_down_path}'"
|
158
|
+
proxy_list=Grabepg.get_topfast_list(5) #get_topfast_list 参数是代表最慢用时 单位秒
|
159
|
+
|
160
|
+
|
161
|
+
#Use for Test
|
162
|
+
|
163
|
+
p "************************************"
|
164
|
+
p "proxy_list:#{proxy_list}"
|
165
|
+
p "************************************"
|
166
|
+
|
167
|
+
bool_start = false
|
168
|
+
|
169
|
+
|
105
170
|
channel_urls.each do |channel,url|
|
106
|
-
|
171
|
+
|
172
|
+
if(channel=="CCTV16")
|
173
|
+
bool_start = true
|
174
|
+
end
|
175
|
+
|
176
|
+
if bool_start
|
177
|
+
previous_show_name = ""
|
178
|
+
channel_info = channel_infos[channel]
|
179
|
+
channel_name = channel_info["channel_name"]
|
180
|
+
channel_type = channel_info["channel_type"]
|
181
|
+
channel_id = channel_info["channel_id"]
|
182
|
+
channel_img_path = channel_info["img_path"]
|
183
|
+
|
184
|
+
#channel,herf,proxylist,day_num=7
|
185
|
+
|
186
|
+
|
187
|
+
start_time=0
|
188
|
+
use_num =1
|
189
|
+
|
190
|
+
#getScheduleAssignDate参数:
|
191
|
+
# channel 频道
|
192
|
+
# herf 频道地址
|
193
|
+
# proxylist 代理列表
|
194
|
+
# start_num 开始时间 int 为开始时间与今天的差值 正数代表今天之后的第几天 负数代表今天之前的第几天
|
195
|
+
# day_num 抓取的时间段天数
|
196
|
+
# img_dir_down_path 图片网络地址保存路径 有默认值 可不设置
|
197
|
+
schedule_list=Grabepg.getScheduleAssignDate(channel,url,proxy_list,start_time,use_num) #抓取的七天后的1天的数据
|
198
|
+
|
199
|
+
|
200
|
+
end
|
201
|
+
end
|
107
202
|
end
|
108
|
-
end
|
109
203
|
|
110
204
|
def self.img_down_path
|
111
205
|
@img_down_path
|
@@ -174,6 +268,11 @@ module Grabepg
|
|
174
268
|
doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
|
175
269
|
@no_firest = 0
|
176
270
|
rescue => err
|
271
|
+
|
272
|
+
unless @no_firest
|
273
|
+
@no_firest = 0
|
274
|
+
end
|
275
|
+
|
177
276
|
@no_firest += 1
|
178
277
|
p "*************************Proxy:#{proxy}, url:#{url}"
|
179
278
|
proxylist.delete(proxy)
|
@@ -192,13 +291,25 @@ module Grabepg
|
|
192
291
|
p "Grab: #{url}"
|
193
292
|
doc = get_doc_with_proxy(proxylist,url)
|
194
293
|
show_type = []
|
294
|
+
|
295
|
+
|
296
|
+
_img_url = "http://static.haotv.me/channel/logo/"
|
195
297
|
img_url = _img_url + channel+".jpg"
|
298
|
+
|
299
|
+
|
196
300
|
data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
|
197
301
|
date = data[0]
|
198
302
|
week = data[1]
|
199
303
|
p "Channel: #{channel} Date: #{date} Week: #{week}"
|
200
304
|
@date = "#{week}(#{date})"
|
201
305
|
schedule_list = []
|
306
|
+
|
307
|
+
_herf = doc.css("h1[style='float:left']").xpath('img[@src]')[0]
|
308
|
+
img_url = _herf.get_attribute("src") if _herf
|
309
|
+
|
310
|
+
p "**************IMG: #{img_url}"
|
311
|
+
|
312
|
+
|
202
313
|
doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
|
203
314
|
_herf= schedule.xpath('a[@href]')[0]
|
204
315
|
schedule_herf=_herf.get_attribute("href") if _herf
|
@@ -246,7 +357,7 @@ module Grabepg
|
|
246
357
|
wday = 1
|
247
358
|
end
|
248
359
|
|
249
|
-
end_day = wday + day_num
|
360
|
+
end_day = wday + day_num - 1
|
250
361
|
|
251
362
|
if end_day>(_wday+7)
|
252
363
|
end_day = _wday + 7
|
@@ -265,7 +376,7 @@ module Grabepg
|
|
265
376
|
|
266
377
|
|
267
378
|
#获取指定时间段的节目表
|
268
|
-
def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num,img_dir_down_path=@img_down_dir_path)
|
379
|
+
def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num=0,img_dir_down_path=@img_down_dir_path)
|
269
380
|
begin
|
270
381
|
day_num = 1 if day_num<1
|
271
382
|
rescue
|