grab_epg 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +8 -8
  2. data/.grabepg.gemspec +1 -1
  3. data/lib/debug.rb +12 -2
  4. data/lib/grabepg.rb +122 -11
  5. metadata +1 -1
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NjYwZWJkYTFmZjM2ODEyNTlmNzdhMmRjMDJiMjczYzIzMGE2MTMyMg==
4
+ NmI0ZDZlY2FkNzVhNTNiNTQwYjU0NDNhOTNhYmUyZWUzY2ViN2U5OA==
5
5
  data.tar.gz: !binary |-
6
- MjIwNmU3YThlM2E5NTRlMzBiYzNiNTdlOGZkYzFkZmFjMGNhMGY0ZA==
6
+ NzBkMjAxZmUwNDE1YzAzNWNmMTc1NjEwNjJlM2NhYjlmZmU2N2MzZg==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MTU0MjBiYzNhZWYxOGY0OTEwMTIwMzkwNmQ5ZWFkNzc4MzA4OGQxNzkzMTIw
10
- MzdjZDMxNDNiMzZhOThlODQ0ZDRhZDNiNDkyYzc4N2UwZDFkMGYxYjFhZmJl
11
- NjRmMjdiZTBjMzIxNzYyZjdiMGVhMzNjZjdjNWQzZmFkYjU5MWQ=
9
+ MmZiNGVkM2JjMGUwZDNmMDE5OTc2NzE2NjFmMDJmMzgyODU2Nzg1ZjgxZjBl
10
+ NmVkMmU0MThhZmY1YjZhOWYxNDBlOWZjZDEwODNlYWI3MWFhNmY2ZDQ1OThl
11
+ NTFjM2VmMjgxOTc1MmNkY2EzMmU2YzNjN2JkNjA0M2FlNjllZmE=
12
12
  data.tar.gz: !binary |-
13
- NTRhZTA3YTdlZGIyZDZkZGIxNzViYWI0OTY4N2FhNDAyN2QyM2E3ZjE2Mzkx
14
- ZmY5NjlhNjNjNGM3MzMyNzk1YTY2ZWQ0ZWM2MDhkN2Q1OTlkOWE4OGQ3YjUy
15
- MGYxNTYxNWY3YmE5NWMyMDcwNDEwYWNkNDJkNWM2NDM5MTk3ZGU=
13
+ ZTBmMzY2MDkyZDEzZDVhMzQ0MDA4MmVjYTBkMGM2YmJhYTlhYzkzOWYyNjJj
14
+ ZjAxOTNmZjgwZTljNWI3MWQxMjY3Y2FmZjczODNhYWU4MGQ3YzZhNDRjOWM1
15
+ NTJlZWYxNDViNDgwZjMxNDMxZWNhYzY1NzA1OTg5ZGIwY2YyYWI=
data/.grabepg.gemspec CHANGED
@@ -10,6 +10,6 @@ Gem::Specification.new do |gem|
10
10
  gem.files = `git ls-files`.split($\)
11
11
  gem.name = "grab_epg"
12
12
  gem.require_paths = ["lib"]
13
- gem.version = "0.1.5"
13
+ gem.version = "0.1.6"
14
14
  gem.homepage = "https://github.com/hahazql/grab_epg"
15
15
  end
data/lib/debug.rb CHANGED
@@ -5,7 +5,7 @@ require 'open-uri'
5
5
  require File.expand_path("../grabepg.rb", __FILE__)
6
6
  class Debug
7
7
  # To change this template use File | Settings | File Templates.
8
- proxylist = [""]
8
+ proxylist = ["219.234.82.84:24809", "219.234.82.84:17130", "219.234.82.84:23684", "219.234.82.84:18253", "219.234.82.84:33987", "219.234.82.84:17183", "219.234.82.84:13243", "219.234.82.84:16158", "219.234.82.84:14826", "219.234.82.84:8489", "219.234.82.84:22222", "219.234.82.84:6370", "219.234.82.84:7571", "219.234.82.84:33944", "219.234.82.84:9743", "219.234.82.84:8089", "219.234.82.84:20991", "219.234.82.84:34032", "219.234.82.84:9415", "219.234.82.84:26149", "219.234.82.84:11095", "219.234.82.84:21724", "219.234.82.84:9177", "219.234.82.84:34034", "219.234.82.84:17945", "219.234.82.85:32229", "219.234.82.85:28341", "219.234.82.85:36314", "219.234.82.85:30605", "219.234.82.85:23684", "219.234.82.85:34015", "219.234.82.85:33919", "219.234.82.85:30639", "219.234.82.85:33965", "219.234.82.85:37299", "219.234.82.85:20747", "219.234.82.86:6666", "219.234.82.86:34106", "219.234.82.86:25301", "219.234.82.86:32896", "219.234.82.86:23034", "219.234.82.86:22685", "219.234.82.86:13078", "219.234.82.86:38770", "219.234.82.86:28402", "219.234.82.86:18887", "219.234.82.86:6588", "219.234.82.86:7292", "219.234.82.86:24268", "219.234.82.86:16472", "219.234.82.86:32597", "219.234.82.86:31122", "219.234.82.88:8817", "219.234.82.88:8160", "219.234.82.88:9239", "219.234.82.88:6133", "114.141.162.53:8080", "123.125.116.243:17656", "123.125.116.241:29156", "123.125.116.243:6938", "219.234.82.88:29484", "219.234.82.88:8084", "219.234.82.88:32229", "219.234.82.88:22758", "219.234.82.88:5616", "124.225.52.14:8080", "219.234.82.88:30028", "219.234.82.88:23685", "219.234.82.88:29037", "219.234.82.88:8755"]
9
9
 
10
10
  def self.test_get_doc_with_proxy(proxylist)
11
11
  herf = "http://www.tvmao.com/drama/HS5oLCs="
@@ -33,9 +33,19 @@ class Debug
33
33
  Grabepg.get_show_schedule(proxylist,herf)
34
34
  end
35
35
 
36
+ def self.test_get_schedulelist_atday(proxylist)
37
+ Grabepg.get_schedulelist_atday("CCTV1"," http://www.tvmao.com/program/CCTV-CCTV1-w1.html",proxylist)
38
+ end
39
+
40
+ def self.debug_all
41
+ Grabepg.start
42
+ end
43
+
36
44
  #Grabepg.start
37
45
  #p test_get_show_schedule(proxylist)
38
46
  #p test_getschedule(proxylist)
39
47
  # p test_get_show_infomation(proxylist)
40
- p test_get_assign_date_url
48
+ #p test_get_assign_date_url
49
+ # p test_get_schedulelist_atday(proxylist)
50
+ p debug_all
41
51
  end
data/lib/grabepg.rb CHANGED
@@ -95,17 +95,111 @@ module Grabepg
95
95
 
96
96
  #调用此方法的例子
97
97
  def self.start
98
- @channel = []
99
- @site = DEFAULT_SITE
100
- channel_list = self.getchannels("/home/zql")
101
- proxy_list=get_topfast_list(5)
102
- img_down_path = self.img_down_path
103
- p img_down_path
104
- channel_urls = channel_list["channel_urls"]
98
+ #作用是获取俩个字符串的相似度
99
+ #get str1 and str2 similarity
100
+ get_similarity_string = lambda { |str1,str2|
101
+ _length = 0
102
+ type = 0
103
+ if str1.length>str2.length
104
+ _length=str2.length
105
+ type = 2
106
+ else
107
+ _length=str1.length
108
+ type =1
109
+ end
110
+ _str_list = []
111
+ _str = ""
112
+ for i in 0.._length
113
+ case type
114
+ when 2
115
+ n=i
116
+ 0.upto(str1.length-1).each do |j|
117
+ p "N: #{n}"
118
+ if(str2[n]==str1[j])
119
+ _str =_str+str2[n]
120
+ n = n+1
121
+ p "Str = #{_str}"
122
+ else
123
+ _str_list << _str
124
+ _str = ""
125
+ end
126
+ end
127
+ when 1
128
+ n=i
129
+ 0.upto(str2.length-1).each do |j|
130
+ p "N: #{n}"
131
+ if(str1[n]==str2[j])
132
+ _str =_str+str1[n]
133
+ n=n+1
134
+ p "Str = #{_str}"
135
+ else
136
+ _str_list << _str
137
+ _str = ""
138
+ end
139
+ end
140
+ end
141
+ end
142
+ p _str_list
143
+ _str = ""
144
+ _str_list.each do |str|
145
+ if _str.length<str.length
146
+ _str=str
147
+ end
148
+ end
149
+ _str
150
+ }
151
+
152
+
153
+ path = "/home/zql/workspace/New/smart_remote/img_path"
154
+ channel_list = Grabepg.getchannels(path)
155
+ channel_urls = channel_list['channel_urls']
156
+ channel_infos = channel_list['channel_info']
157
+ p "Channel img save file,path='#{Grabepg.img_down_path}'"
158
+ proxy_list=Grabepg.get_topfast_list(5) #get_topfast_list 参数是代表最慢用时 单位秒
159
+
160
+
161
+ #Use for Test
162
+
163
+ p "************************************"
164
+ p "proxy_list:#{proxy_list}"
165
+ p "************************************"
166
+
167
+ bool_start = false
168
+
169
+
105
170
  channel_urls.each do |channel,url|
106
- p "****************************************GetSchedule : #{getschedule(channel,url,proxy_list)}"
171
+
172
+ if(channel=="CCTV16")
173
+ bool_start = true
174
+ end
175
+
176
+ if bool_start
177
+ previous_show_name = ""
178
+ channel_info = channel_infos[channel]
179
+ channel_name = channel_info["channel_name"]
180
+ channel_type = channel_info["channel_type"]
181
+ channel_id = channel_info["channel_id"]
182
+ channel_img_path = channel_info["img_path"]
183
+
184
+ #channel,herf,proxylist,day_num=7
185
+
186
+
187
+ start_time=0
188
+ use_num =1
189
+
190
+ #getScheduleAssignDate参数:
191
+ # channel 频道
192
+ # herf 频道地址
193
+ # proxylist 代理列表
194
+ # start_num 开始时间 int 为开始时间与今天的差值 正数代表今天之后的第几天 负数代表今天之前的第几天
195
+ # day_num 抓取的时间段天数
196
+ # img_dir_down_path 图片网络地址保存路径 有默认值 可不设置
197
+ schedule_list=Grabepg.getScheduleAssignDate(channel,url,proxy_list,start_time,use_num) #抓取的七天后的1天的数据
198
+
199
+
200
+ end
201
+ end
107
202
  end
108
- end
109
203
 
110
204
  def self.img_down_path
111
205
  @img_down_path
@@ -174,6 +268,11 @@ module Grabepg
174
268
  doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
175
269
  @no_firest = 0
176
270
  rescue => err
271
+
272
+ unless @no_firest
273
+ @no_firest = 0
274
+ end
275
+
177
276
  @no_firest += 1
178
277
  p "*************************Proxy:#{proxy}, url:#{url}"
179
278
  proxylist.delete(proxy)
@@ -192,13 +291,25 @@ module Grabepg
192
291
  p "Grab: #{url}"
193
292
  doc = get_doc_with_proxy(proxylist,url)
194
293
  show_type = []
294
+
295
+
296
+ _img_url = "http://static.haotv.me/channel/logo/"
195
297
  img_url = _img_url + channel+".jpg"
298
+
299
+
196
300
  data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
197
301
  date = data[0]
198
302
  week = data[1]
199
303
  p "Channel: #{channel} Date: #{date} Week: #{week}"
200
304
  @date = "#{week}(#{date})"
201
305
  schedule_list = []
306
+
307
+ _herf = doc.css("h1[style='float:left']").xpath('img[@src]')[0]
308
+ img_url = _herf.get_attribute("src") if _herf
309
+
310
+ p "**************IMG: #{img_url}"
311
+
312
+
202
313
  doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
203
314
  _herf= schedule.xpath('a[@href]')[0]
204
315
  schedule_herf=_herf.get_attribute("href") if _herf
@@ -246,7 +357,7 @@ module Grabepg
246
357
  wday = 1
247
358
  end
248
359
 
249
- end_day = wday + day_num
360
+ end_day = wday + day_num - 1
250
361
 
251
362
  if end_day>(_wday+7)
252
363
  end_day = _wday + 7
@@ -265,7 +376,7 @@ module Grabepg
265
376
 
266
377
 
267
378
  #获取指定时间段的节目表
268
- def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num,img_dir_down_path=@img_down_dir_path)
379
+ def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num=0,img_dir_down_path=@img_down_dir_path)
269
380
  begin
270
381
  day_num = 1 if day_num<1
271
382
  rescue
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grab_epg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahazql