grab_epg 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +8 -8
  2. data/.grabepg.gemspec +1 -1
  3. data/lib/debug.rb +12 -2
  4. data/lib/grabepg.rb +122 -11
  5. metadata +1 -1
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NjYwZWJkYTFmZjM2ODEyNTlmNzdhMmRjMDJiMjczYzIzMGE2MTMyMg==
4
+ NmI0ZDZlY2FkNzVhNTNiNTQwYjU0NDNhOTNhYmUyZWUzY2ViN2U5OA==
5
5
  data.tar.gz: !binary |-
6
- MjIwNmU3YThlM2E5NTRlMzBiYzNiNTdlOGZkYzFkZmFjMGNhMGY0ZA==
6
+ NzBkMjAxZmUwNDE1YzAzNWNmMTc1NjEwNjJlM2NhYjlmZmU2N2MzZg==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MTU0MjBiYzNhZWYxOGY0OTEwMTIwMzkwNmQ5ZWFkNzc4MzA4OGQxNzkzMTIw
10
- MzdjZDMxNDNiMzZhOThlODQ0ZDRhZDNiNDkyYzc4N2UwZDFkMGYxYjFhZmJl
11
- NjRmMjdiZTBjMzIxNzYyZjdiMGVhMzNjZjdjNWQzZmFkYjU5MWQ=
9
+ MmZiNGVkM2JjMGUwZDNmMDE5OTc2NzE2NjFmMDJmMzgyODU2Nzg1ZjgxZjBl
10
+ NmVkMmU0MThhZmY1YjZhOWYxNDBlOWZjZDEwODNlYWI3MWFhNmY2ZDQ1OThl
11
+ NTFjM2VmMjgxOTc1MmNkY2EzMmU2YzNjN2JkNjA0M2FlNjllZmE=
12
12
  data.tar.gz: !binary |-
13
- NTRhZTA3YTdlZGIyZDZkZGIxNzViYWI0OTY4N2FhNDAyN2QyM2E3ZjE2Mzkx
14
- ZmY5NjlhNjNjNGM3MzMyNzk1YTY2ZWQ0ZWM2MDhkN2Q1OTlkOWE4OGQ3YjUy
15
- MGYxNTYxNWY3YmE5NWMyMDcwNDEwYWNkNDJkNWM2NDM5MTk3ZGU=
13
+ ZTBmMzY2MDkyZDEzZDVhMzQ0MDA4MmVjYTBkMGM2YmJhYTlhYzkzOWYyNjJj
14
+ ZjAxOTNmZjgwZTljNWI3MWQxMjY3Y2FmZjczODNhYWU4MGQ3YzZhNDRjOWM1
15
+ NTJlZWYxNDViNDgwZjMxNDMxZWNhYzY1NzA1OTg5ZGIwY2YyYWI=
data/.grabepg.gemspec CHANGED
@@ -10,6 +10,6 @@ Gem::Specification.new do |gem|
10
10
  gem.files = `git ls-files`.split($\)
11
11
  gem.name = "grab_epg"
12
12
  gem.require_paths = ["lib"]
13
- gem.version = "0.1.5"
13
+ gem.version = "0.1.6"
14
14
  gem.homepage = "https://github.com/hahazql/grab_epg"
15
15
  end
data/lib/debug.rb CHANGED
@@ -5,7 +5,7 @@ require 'open-uri'
5
5
  require File.expand_path("../grabepg.rb", __FILE__)
6
6
  class Debug
7
7
  # To change this template use File | Settings | File Templates.
8
- proxylist = [""]
8
+ proxylist = ["219.234.82.84:24809", "219.234.82.84:17130", "219.234.82.84:23684", "219.234.82.84:18253", "219.234.82.84:33987", "219.234.82.84:17183", "219.234.82.84:13243", "219.234.82.84:16158", "219.234.82.84:14826", "219.234.82.84:8489", "219.234.82.84:22222", "219.234.82.84:6370", "219.234.82.84:7571", "219.234.82.84:33944", "219.234.82.84:9743", "219.234.82.84:8089", "219.234.82.84:20991", "219.234.82.84:34032", "219.234.82.84:9415", "219.234.82.84:26149", "219.234.82.84:11095", "219.234.82.84:21724", "219.234.82.84:9177", "219.234.82.84:34034", "219.234.82.84:17945", "219.234.82.85:32229", "219.234.82.85:28341", "219.234.82.85:36314", "219.234.82.85:30605", "219.234.82.85:23684", "219.234.82.85:34015", "219.234.82.85:33919", "219.234.82.85:30639", "219.234.82.85:33965", "219.234.82.85:37299", "219.234.82.85:20747", "219.234.82.86:6666", "219.234.82.86:34106", "219.234.82.86:25301", "219.234.82.86:32896", "219.234.82.86:23034", "219.234.82.86:22685", "219.234.82.86:13078", "219.234.82.86:38770", "219.234.82.86:28402", "219.234.82.86:18887", "219.234.82.86:6588", "219.234.82.86:7292", "219.234.82.86:24268", "219.234.82.86:16472", "219.234.82.86:32597", "219.234.82.86:31122", "219.234.82.88:8817", "219.234.82.88:8160", "219.234.82.88:9239", "219.234.82.88:6133", "114.141.162.53:8080", "123.125.116.243:17656", "123.125.116.241:29156", "123.125.116.243:6938", "219.234.82.88:29484", "219.234.82.88:8084", "219.234.82.88:32229", "219.234.82.88:22758", "219.234.82.88:5616", "124.225.52.14:8080", "219.234.82.88:30028", "219.234.82.88:23685", "219.234.82.88:29037", "219.234.82.88:8755"]
9
9
 
10
10
  def self.test_get_doc_with_proxy(proxylist)
11
11
  herf = "http://www.tvmao.com/drama/HS5oLCs="
@@ -33,9 +33,19 @@ class Debug
33
33
  Grabepg.get_show_schedule(proxylist,herf)
34
34
  end
35
35
 
36
+ def self.test_get_schedulelist_atday(proxylist)
37
+ Grabepg.get_schedulelist_atday("CCTV1"," http://www.tvmao.com/program/CCTV-CCTV1-w1.html",proxylist)
38
+ end
39
+
40
+ def self.debug_all
41
+ Grabepg.start
42
+ end
43
+
36
44
  #Grabepg.start
37
45
  #p test_get_show_schedule(proxylist)
38
46
  #p test_getschedule(proxylist)
39
47
  # p test_get_show_infomation(proxylist)
40
- p test_get_assign_date_url
48
+ #p test_get_assign_date_url
49
+ # p test_get_schedulelist_atday(proxylist)
50
+ p debug_all
41
51
  end
data/lib/grabepg.rb CHANGED
@@ -95,17 +95,111 @@ module Grabepg
95
95
 
96
96
  #调用此方法的例子
97
97
  def self.start
98
- @channel = []
99
- @site = DEFAULT_SITE
100
- channel_list = self.getchannels("/home/zql")
101
- proxy_list=get_topfast_list(5)
102
- img_down_path = self.img_down_path
103
- p img_down_path
104
- channel_urls = channel_list["channel_urls"]
98
+ #作用是获取俩个字符串的相似度
99
+ #get str1 and str2 similarity
100
+ get_similarity_string = lambda { |str1,str2|
101
+ _length = 0
102
+ type = 0
103
+ if str1.length>str2.length
104
+ _length=str2.length
105
+ type = 2
106
+ else
107
+ _length=str1.length
108
+ type =1
109
+ end
110
+ _str_list = []
111
+ _str = ""
112
+ for i in 0.._length
113
+ case type
114
+ when 2
115
+ n=i
116
+ 0.upto(str1.length-1).each do |j|
117
+ p "N: #{n}"
118
+ if(str2[n]==str1[j])
119
+ _str =_str+str2[n]
120
+ n = n+1
121
+ p "Str = #{_str}"
122
+ else
123
+ _str_list << _str
124
+ _str = ""
125
+ end
126
+ end
127
+ when 1
128
+ n=i
129
+ 0.upto(str2.length-1).each do |j|
130
+ p "N: #{n}"
131
+ if(str1[n]==str2[j])
132
+ _str =_str+str1[n]
133
+ n=n+1
134
+ p "Str = #{_str}"
135
+ else
136
+ _str_list << _str
137
+ _str = ""
138
+ end
139
+ end
140
+ end
141
+ end
142
+ p _str_list
143
+ _str = ""
144
+ _str_list.each do |str|
145
+ if _str.length<str.length
146
+ _str=str
147
+ end
148
+ end
149
+ _str
150
+ }
151
+
152
+
153
+ path = "/home/zql/workspace/New/smart_remote/img_path"
154
+ channel_list = Grabepg.getchannels(path)
155
+ channel_urls = channel_list['channel_urls']
156
+ channel_infos = channel_list['channel_info']
157
+ p "Channel img save file,path='#{Grabepg.img_down_path}'"
158
+ proxy_list=Grabepg.get_topfast_list(5) #get_topfast_list 参数是代表最慢用时 单位秒
159
+
160
+
161
+ #Use for Test
162
+
163
+ p "************************************"
164
+ p "proxy_list:#{proxy_list}"
165
+ p "************************************"
166
+
167
+ bool_start = false
168
+
169
+
105
170
  channel_urls.each do |channel,url|
106
- p "****************************************GetSchedule : #{getschedule(channel,url,proxy_list)}"
171
+
172
+ if(channel=="CCTV16")
173
+ bool_start = true
174
+ end
175
+
176
+ if bool_start
177
+ previous_show_name = ""
178
+ channel_info = channel_infos[channel]
179
+ channel_name = channel_info["channel_name"]
180
+ channel_type = channel_info["channel_type"]
181
+ channel_id = channel_info["channel_id"]
182
+ channel_img_path = channel_info["img_path"]
183
+
184
+ #channel,herf,proxylist,day_num=7
185
+
186
+
187
+ start_time=0
188
+ use_num =1
189
+
190
+ #getScheduleAssignDate参数:
191
+ # channel 频道
192
+ # herf 频道地址
193
+ # proxylist 代理列表
194
+ # start_num 开始时间 int 为开始时间与今天的差值 正数代表今天之后的第几天 负数代表今天之前的第几天
195
+ # day_num 抓取的时间段天数
196
+ # img_dir_down_path 图片网络地址保存路径 有默认值 可不设置
197
+ schedule_list=Grabepg.getScheduleAssignDate(channel,url,proxy_list,start_time,use_num) #抓取的七天后的1天的数据
198
+
199
+
200
+ end
201
+ end
107
202
  end
108
- end
109
203
 
110
204
  def self.img_down_path
111
205
  @img_down_path
@@ -174,6 +268,11 @@ module Grabepg
174
268
  doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
175
269
  @no_firest = 0
176
270
  rescue => err
271
+
272
+ unless @no_firest
273
+ @no_firest = 0
274
+ end
275
+
177
276
  @no_firest += 1
178
277
  p "*************************Proxy:#{proxy}, url:#{url}"
179
278
  proxylist.delete(proxy)
@@ -192,13 +291,25 @@ module Grabepg
192
291
  p "Grab: #{url}"
193
292
  doc = get_doc_with_proxy(proxylist,url)
194
293
  show_type = []
294
+
295
+
296
+ _img_url = "http://static.haotv.me/channel/logo/"
195
297
  img_url = _img_url + channel+".jpg"
298
+
299
+
196
300
  data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
197
301
  date = data[0]
198
302
  week = data[1]
199
303
  p "Channel: #{channel} Date: #{date} Week: #{week}"
200
304
  @date = "#{week}(#{date})"
201
305
  schedule_list = []
306
+
307
+ _herf = doc.css("h1[style='float:left']").xpath('img[@src]')[0]
308
+ img_url = _herf.get_attribute("src") if _herf
309
+
310
+ p "**************IMG: #{img_url}"
311
+
312
+
202
313
  doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
203
314
  _herf= schedule.xpath('a[@href]')[0]
204
315
  schedule_herf=_herf.get_attribute("href") if _herf
@@ -246,7 +357,7 @@ module Grabepg
246
357
  wday = 1
247
358
  end
248
359
 
249
- end_day = wday + day_num
360
+ end_day = wday + day_num - 1
250
361
 
251
362
  if end_day>(_wday+7)
252
363
  end_day = _wday + 7
@@ -265,7 +376,7 @@ module Grabepg
265
376
 
266
377
 
267
378
  #获取指定时间段的节目表
268
- def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num,img_dir_down_path=@img_down_dir_path)
379
+ def self.getScheduleAssignDate(channel,herf,proxylist,start_num,day_num=0,img_dir_down_path=@img_down_dir_path)
269
380
  begin
270
381
  day_num = 1 if day_num<1
271
382
  rescue
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grab_epg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahazql