grab_epg 0.2.1 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZjJjOTExYzRhNzM5NjA1NjNhNjU1NTU2NjVmOWM3NTEyNDQ2MDZkMw==
4
+ MTI5MTMwMTY1NTRmZjk5NGIwZGM4MTk3NTljNGFjMzAwOWY4NTdiNA==
5
5
  data.tar.gz: !binary |-
6
- ODlhNjQ2NDI0ZmQ1ZDU5M2M4YTA4NTg0ZWQ0ZmU0NWQwMDhjOTlhYg==
6
+ NzNjZTc3YzY1OWZkYTZjOGUzNTVjNzVmZjgzYjg3NjQyZWUzNGFmMg==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- NDg4MmNiYjYwMDk1NzMzMWIzMzJkNjQ0OGU2MjVhYTE5Y2RlZGE4NmM3YWE3
10
- M2UxNmY2M2ZmZjIzZTY0MDQ0MTBhMTQ4Y2JmMTJhNTg1YTZmZjk5MWVlNjk4
11
- YTdhODQ0NzU0MmU0OTk5MDczMmEzNGYyZTNkNDQzMWVjNjEyMDQ=
9
+ ZGMwNThmMzZlY2FmZmI1ZmQzNjY5ODdkYTI4MTk5MWI2NWZiODBlZjQ1YzNk
10
+ MzllOGE1YmNkZjRiNjc3MDlhY2FjZjMyNjFiNTcxYjFlZTlmYzgwNmVlMmQx
11
+ NzUxODIwMGE1MjgxZWM0NWY3ZDlmNWE0YmYyN2U0NTY1YjU3NmQ=
12
12
  data.tar.gz: !binary |-
13
- MWUxNTFkNjRmZDE0Y2VhYjRhMWRmNjcyMmNlZDc5NWVmYWEzYTg5ZDhkOGY5
14
- YTQ3OTBhMmVmMzY2ZTY5NjIwMjQ0MzM3NDAwNjQyZTdmNDQ0NTc0NGQzZTUx
15
- OTdmMjM1ZTQwNDhjN2VjYTQ5MWJkMWQ0MTFmOWU5NDMzZjllZGE=
13
+ MTA3MmE5MGZkMzU5YzliYjljNTQ1NzljNWViYTQ5YWU5ZmNlZGQ5OWJmZTUz
14
+ YmZiN2QyNjUzOTk0OGQwMzM0NmZjOTEwOTI2MzJkZjAxMDg5YzdlNzUxNjM3
15
+ Y2VkNmQzMGUyMjQ0Nzc5MTZkMGE5NjY4Y2IwZTY2ZGI5Y2MyOTA=
data/.grabepg.gemspec CHANGED
@@ -10,6 +10,6 @@ Gem::Specification.new do |gem|
10
10
  gem.files = `git ls-files`.split($\)
11
11
  gem.name = "grab_epg"
12
12
  gem.require_paths = ["lib"]
13
- gem.version = "0.2.1"
13
+ gem.version = "0.2.3"
14
14
  gem.homepage = "https://github.com/hahazql/grab_epg"
15
15
  end
data/lib/grab_tvmao.rb CHANGED
@@ -264,7 +264,7 @@ module GrabTvmao
264
264
  proxy = proxylist[@proxyindex+1]
265
265
  end
266
266
  begin
267
- doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
267
+ doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}")) unless proxy.nil?||proxy.empty?
268
268
  doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
269
269
  @no_firest = 0
270
270
  rescue => err
@@ -274,15 +274,12 @@ module GrabTvmao
274
274
  end
275
275
 
276
276
  @no_firest += 1
277
- p "*************************Proxy:#{proxy}, url:#{url}"
277
+ p "*************************Proxy:#{proxy}, url:#{url} Error:#{err.to_s}"
278
278
  #proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
279
279
  get_doc_with_proxy(proxylist,url) if @no_firest<4
280
280
  raise RuntimeError,"Error: #{err.to_s}" unless @no_firest<4
281
281
  end
282
282
  @proxyindex += 1
283
- unless doc
284
- p "*************************Proxy:#{proxy}, url:#{url}"
285
- end
286
283
  doc
287
284
  end
288
285
 
@@ -119,8 +119,28 @@ module Grabepg
119
119
  end
120
120
 
121
121
 
122
+ def err_doc_proxy(proxy,proxylist,url="",err="")
123
+ if proxy.empty?||proxy.nil?
124
+ proxylist.delete_at[@proxyindex]
125
+ end
126
+
122
127
 
128
+ unless @no_firest
129
+ @no_firest = 0
130
+ end
123
131
 
132
+ @no_firest += 1
133
+ p "*************************Proxy:#{proxy}, url:#{url} Error:#{err}"
134
+ #proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
135
+ @proxyindex += 1
136
+ @proxyindex=@proxyindex%@size
137
+ doc=get_doc_with_proxy(proxylist,url) if @no_firest<4
138
+ unless @no_firest<4
139
+ @no_firest=0
140
+ raise RuntimeError,"Error: #{err}"
141
+ end
142
+ doc
143
+ end
124
144
 
125
145
 
126
146
  #使用代理获取url的html的doc值
@@ -129,6 +149,7 @@ module Grabepg
129
149
  unless @proxyindex
130
150
  @proxyindex = 0
131
151
  end
152
+ @size = proxylist.size
132
153
  @proxyindex=@proxyindex%proxylist.size
133
154
  if(proxylist[@proxyindex])
134
155
  proxy = proxylist[@proxyindex]
@@ -136,35 +157,23 @@ module Grabepg
136
157
  proxy = proxylist[@proxyindex+1]
137
158
  end
138
159
  begin
139
- doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
140
- unless doc
141
- raise RuntimeError,"DOC is nil"
160
+ doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}")) unless proxy.nil?||proxy.empty?
161
+ if doc.nil?
162
+ doc=err_doc_proxy(proxy,proxylist,url,"doc nil")
163
+ @no_firest=0
142
164
  end
143
165
  @no_firest = 0
144
166
  rescue => err
145
- if proxy.empty?||proxy.nil?
146
- proxylist.delete_at[@proxyindex]
147
- end
148
-
149
-
150
- unless @no_firest
151
- @no_firest = 0
152
- end
153
-
154
- @no_firest += 1
155
- p "*************************Proxy:#{proxy}, url:#{url}"
156
- #proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
167
+ p "IN Rescue"
168
+ doc=err_doc_proxy(proxy,proxylist,url,err.to_s)
169
+ @no_firest=0
170
+ p "Get DOC"
157
171
  @proxyindex += 1
158
- get_doc_with_proxy(proxylist,url) if @no_firest<4
159
- unless @no_firest<4
160
- @no_firest=0
161
- raise RuntimeError,"Error: #{err.to_s}"
162
- end
172
+ @proxyindex=@proxyindex%@size
173
+ return doc
163
174
  end
164
175
  @proxyindex += 1
165
- unless doc
166
- p "*************************Proxy:#{proxy}, url:#{url}"
167
- end
176
+ @proxyindex=@proxyindex%@size
168
177
  else
169
178
  begin
170
179
  doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
@@ -78,6 +78,7 @@ module Grabepg
78
78
 
79
79
 
80
80
  doc = @grabbase.get_doc_with_proxy(@proxy_list,@home_page)
81
+ begin
81
82
  doc.css("li").each do |li|
82
83
  case ChannelTypeMap[li.get_attribute("class")]
83
84
  when "央视"
@@ -90,6 +91,15 @@ module Grabepg
90
91
 
91
92
  end
92
93
  end
94
+ @error_num=0
95
+ rescue
96
+ unless @error_num
97
+ @error_num = 0
98
+ end
99
+ @error_num+=1
100
+ raise err.to_s if @error_num==5
101
+ dispose_home_page
102
+ end
93
103
  return @channels
94
104
  end
95
105
 
@@ -123,7 +133,17 @@ module Grabepg
123
133
  def dispose_schedule_page(url,start_time,use_time)
124
134
  url = @site +"/"+url
125
135
  urls = url.split("?")
136
+ begin
126
137
  doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
138
+ @error_num = 0
139
+ rescue => err
140
+ unless @error_num
141
+ @error_num = 0
142
+ end
143
+ @error_num+=1
144
+ raise err.to_s if @error_num==5
145
+ dispose_schedule_page(url,start_time,use_time)
146
+ end
127
147
  _url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href")
128
148
  _url = urls[0]+_url
129
149
  urls = dispose_href_schedule_data(_url,start_time,use_time)
@@ -135,7 +155,9 @@ module Grabepg
135
155
  if url
136
156
  doc = @grabbase.get_doc_with_proxy(@proxy_list,url[:url])
137
157
  schedules = []
138
- doc.css('div[class="time"]')[0].css("li[class='gray']").each do |schedule|
158
+ div = doc.css('div[class="time"]')[0]
159
+ if div
160
+ div.css("li[class='gray']").each do |schedule|
139
161
  begin
140
162
  _dispose = schedule.content
141
163
  _dispose_show =schedule.css("span")[0].text
@@ -153,6 +175,9 @@ module Grabepg
153
175
  end
154
176
  end
155
177
  ret.merge!({url[:date]=>schedules})
178
+ else
179
+ p "Error In this url: #{url} couldn't get doc.css('div[class=time]')[0]"
180
+ end
156
181
  end
157
182
  end
158
183
  return ret
@@ -165,11 +190,21 @@ module Grabepg
165
190
  #解析节目详情页面
166
191
  def dispose_show_info(url)
167
192
  doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
193
+ begin
168
194
  show_name = doc.css('div[class="tv_info_top"]')[0].content
169
195
  _doc=doc.css("div[class='tv_info']")
170
196
  img_url = _doc.css("img")[0].get_attribute("src").gsub(" ","")
171
197
  show_info = _doc.css("p")[0].content.gsub("[全文]","")
198
+ @error_num = 0
172
199
  {show_name:show_name,img_url:img_url,show_info:show_info}
200
+ rescue => err
201
+ unless @error_num
202
+ @error_num = 0
203
+ end
204
+ @error_num+=1
205
+ raise err.to_s if @error_num==5
206
+ dispose_show_info(url)
207
+ end
173
208
  end
174
209
 
175
210
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grab_epg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahazql
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-06-03 00:00:00.000000000 Z
11
+ date: 2013-06-04 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: ! '"用于抓取EPG信息"'
14
14
  email: