grab_epg 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZjJjOTExYzRhNzM5NjA1NjNhNjU1NTU2NjVmOWM3NTEyNDQ2MDZkMw==
4
+ MTI5MTMwMTY1NTRmZjk5NGIwZGM4MTk3NTljNGFjMzAwOWY4NTdiNA==
5
5
  data.tar.gz: !binary |-
6
- ODlhNjQ2NDI0ZmQ1ZDU5M2M4YTA4NTg0ZWQ0ZmU0NWQwMDhjOTlhYg==
6
+ NzNjZTc3YzY1OWZkYTZjOGUzNTVjNzVmZjgzYjg3NjQyZWUzNGFmMg==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- NDg4MmNiYjYwMDk1NzMzMWIzMzJkNjQ0OGU2MjVhYTE5Y2RlZGE4NmM3YWE3
10
- M2UxNmY2M2ZmZjIzZTY0MDQ0MTBhMTQ4Y2JmMTJhNTg1YTZmZjk5MWVlNjk4
11
- YTdhODQ0NzU0MmU0OTk5MDczMmEzNGYyZTNkNDQzMWVjNjEyMDQ=
9
+ ZGMwNThmMzZlY2FmZmI1ZmQzNjY5ODdkYTI4MTk5MWI2NWZiODBlZjQ1YzNk
10
+ MzllOGE1YmNkZjRiNjc3MDlhY2FjZjMyNjFiNTcxYjFlZTlmYzgwNmVlMmQx
11
+ NzUxODIwMGE1MjgxZWM0NWY3ZDlmNWE0YmYyN2U0NTY1YjU3NmQ=
12
12
  data.tar.gz: !binary |-
13
- MWUxNTFkNjRmZDE0Y2VhYjRhMWRmNjcyMmNlZDc5NWVmYWEzYTg5ZDhkOGY5
14
- YTQ3OTBhMmVmMzY2ZTY5NjIwMjQ0MzM3NDAwNjQyZTdmNDQ0NTc0NGQzZTUx
15
- OTdmMjM1ZTQwNDhjN2VjYTQ5MWJkMWQ0MTFmOWU5NDMzZjllZGE=
13
+ MTA3MmE5MGZkMzU5YzliYjljNTQ1NzljNWViYTQ5YWU5ZmNlZGQ5OWJmZTUz
14
+ YmZiN2QyNjUzOTk0OGQwMzM0NmZjOTEwOTI2MzJkZjAxMDg5YzdlNzUxNjM3
15
+ Y2VkNmQzMGUyMjQ0Nzc5MTZkMGE5NjY4Y2IwZTY2ZGI5Y2MyOTA=
data/.grabepg.gemspec CHANGED
@@ -10,6 +10,6 @@ Gem::Specification.new do |gem|
10
10
  gem.files = `git ls-files`.split($\)
11
11
  gem.name = "grab_epg"
12
12
  gem.require_paths = ["lib"]
13
- gem.version = "0.2.1"
13
+ gem.version = "0.2.3"
14
14
  gem.homepage = "https://github.com/hahazql/grab_epg"
15
15
  end
data/lib/grab_tvmao.rb CHANGED
@@ -264,7 +264,7 @@ module GrabTvmao
264
264
  proxy = proxylist[@proxyindex+1]
265
265
  end
266
266
  begin
267
- doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
267
+ doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}")) unless proxy.nil?||proxy.empty?
268
268
  doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
269
269
  @no_firest = 0
270
270
  rescue => err
@@ -274,15 +274,12 @@ module GrabTvmao
274
274
  end
275
275
 
276
276
  @no_firest += 1
277
- p "*************************Proxy:#{proxy}, url:#{url}"
277
+ p "*************************Proxy:#{proxy}, url:#{url} Error:#{err.to_s}"
278
278
  #proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
279
279
  get_doc_with_proxy(proxylist,url) if @no_firest<4
280
280
  raise RuntimeError,"Error: #{err.to_s}" unless @no_firest<4
281
281
  end
282
282
  @proxyindex += 1
283
- unless doc
284
- p "*************************Proxy:#{proxy}, url:#{url}"
285
- end
286
283
  doc
287
284
  end
288
285
 
@@ -119,8 +119,28 @@ module Grabepg
119
119
  end
120
120
 
121
121
 
122
+ def err_doc_proxy(proxy,proxylist,url="",err="")
123
+ if proxy.empty?||proxy.nil?
124
+ proxylist.delete_at[@proxyindex]
125
+ end
126
+
122
127
 
128
+ unless @no_firest
129
+ @no_firest = 0
130
+ end
123
131
 
132
+ @no_firest += 1
133
+ p "*************************Proxy:#{proxy}, url:#{url} Error:#{err}"
134
+ #proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
135
+ @proxyindex += 1
136
+ @proxyindex=@proxyindex%@size
137
+ doc=get_doc_with_proxy(proxylist,url) if @no_firest<4
138
+ unless @no_firest<4
139
+ @no_firest=0
140
+ raise RuntimeError,"Error: #{err}"
141
+ end
142
+ doc
143
+ end
124
144
 
125
145
 
126
146
  #使用代理获取url的html的doc值
@@ -129,6 +149,7 @@ module Grabepg
129
149
  unless @proxyindex
130
150
  @proxyindex = 0
131
151
  end
152
+ @size = proxylist.size
132
153
  @proxyindex=@proxyindex%proxylist.size
133
154
  if(proxylist[@proxyindex])
134
155
  proxy = proxylist[@proxyindex]
@@ -136,35 +157,23 @@ module Grabepg
136
157
  proxy = proxylist[@proxyindex+1]
137
158
  end
138
159
  begin
139
- doc = Nokogiri::HTML(open(url,:proxy=>"http://#{proxy}")) unless proxy.nil?||proxy.empty?
140
- unless doc
141
- raise RuntimeError,"DOC is nil"
160
+ doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}")) unless proxy.nil?||proxy.empty?
161
+ if doc.nil?
162
+ doc=err_doc_proxy(proxy,proxylist,url,"doc nil")
163
+ @no_firest=0
142
164
  end
143
165
  @no_firest = 0
144
166
  rescue => err
145
- if proxy.empty?||proxy.nil?
146
- proxylist.delete_at[@proxyindex]
147
- end
148
-
149
-
150
- unless @no_firest
151
- @no_firest = 0
152
- end
153
-
154
- @no_firest += 1
155
- p "*************************Proxy:#{proxy}, url:#{url}"
156
- #proxylist.delete(proxy) #删除出错的代理 但如果是此网页错误则会引起BUG待修复
167
+ p "IN Rescue"
168
+ doc=err_doc_proxy(proxy,proxylist,url,err.to_s)
169
+ @no_firest=0
170
+ p "Get DOC"
157
171
  @proxyindex += 1
158
- get_doc_with_proxy(proxylist,url) if @no_firest<4
159
- unless @no_firest<4
160
- @no_firest=0
161
- raise RuntimeError,"Error: #{err.to_s}"
162
- end
172
+ @proxyindex=@proxyindex%@size
173
+ return doc
163
174
  end
164
175
  @proxyindex += 1
165
- unless doc
166
- p "*************************Proxy:#{proxy}, url:#{url}"
167
- end
176
+ @proxyindex=@proxyindex%@size
168
177
  else
169
178
  begin
170
179
  doc = Nokogiri::HTML(open(url)) if proxy.nil?||proxy.empty?
@@ -78,6 +78,7 @@ module Grabepg
78
78
 
79
79
 
80
80
  doc = @grabbase.get_doc_with_proxy(@proxy_list,@home_page)
81
+ begin
81
82
  doc.css("li").each do |li|
82
83
  case ChannelTypeMap[li.get_attribute("class")]
83
84
  when "央视"
@@ -90,6 +91,15 @@ module Grabepg
90
91
 
91
92
  end
92
93
  end
94
+ @error_num=0
95
+ rescue
96
+ unless @error_num
97
+ @error_num = 0
98
+ end
99
+ @error_num+=1
100
+ raise err.to_s if @error_num==5
101
+ dispose_home_page
102
+ end
93
103
  return @channels
94
104
  end
95
105
 
@@ -123,7 +133,17 @@ module Grabepg
123
133
  def dispose_schedule_page(url,start_time,use_time)
124
134
  url = @site +"/"+url
125
135
  urls = url.split("?")
136
+ begin
126
137
  doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
138
+ @error_num = 0
139
+ rescue => err
140
+ unless @error_num
141
+ @error_num = 0
142
+ end
143
+ @error_num+=1
144
+ raise err.to_s if @error_num==5
145
+ dispose_schedule_page(url,start_time,use_time)
146
+ end
127
147
  _url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href")
128
148
  _url = urls[0]+_url
129
149
  urls = dispose_href_schedule_data(_url,start_time,use_time)
@@ -135,7 +155,9 @@ module Grabepg
135
155
  if url
136
156
  doc = @grabbase.get_doc_with_proxy(@proxy_list,url[:url])
137
157
  schedules = []
138
- doc.css('div[class="time"]')[0].css("li[class='gray']").each do |schedule|
158
+ div = doc.css('div[class="time"]')[0]
159
+ if div
160
+ div.css("li[class='gray']").each do |schedule|
139
161
  begin
140
162
  _dispose = schedule.content
141
163
  _dispose_show =schedule.css("span")[0].text
@@ -153,6 +175,9 @@ module Grabepg
153
175
  end
154
176
  end
155
177
  ret.merge!({url[:date]=>schedules})
178
+ else
179
+ p "Error In this url: #{url} couldn't get doc.css('div[class=time]')[0]"
180
+ end
156
181
  end
157
182
  end
158
183
  return ret
@@ -165,11 +190,21 @@ module Grabepg
165
190
  #解析节目详情页面
166
191
  def dispose_show_info(url)
167
192
  doc = @grabbase.get_doc_with_proxy(@proxy_list,url)
193
+ begin
168
194
  show_name = doc.css('div[class="tv_info_top"]')[0].content
169
195
  _doc=doc.css("div[class='tv_info']")
170
196
  img_url = _doc.css("img")[0].get_attribute("src").gsub(" ","")
171
197
  show_info = _doc.css("p")[0].content.gsub("[全文]","")
198
+ @error_num = 0
172
199
  {show_name:show_name,img_url:img_url,show_info:show_info}
200
+ rescue => err
201
+ unless @error_num
202
+ @error_num = 0
203
+ end
204
+ @error_num+=1
205
+ raise err.to_s if @error_num==5
206
+ dispose_show_info(url)
207
+ end
173
208
  end
174
209
 
175
210
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grab_epg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahazql
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-06-03 00:00:00.000000000 Z
11
+ date: 2013-06-04 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: ! '"用于抓取EPG信息"'
14
14
  email: