cobweb-launcher 1.2.41__py3-none-any.whl → 1.2.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,9 @@ from inspect import isgenerator
6
6
  from typing import Union, Callable, Mapping
7
7
  from urllib.parse import urlparse
8
8
 
9
+ import urllib3
9
10
  from requests import HTTPError, Response as Res
11
+ from requests.exceptions import ChunkedEncodingError
10
12
 
11
13
  from cobweb.constant import DealModel, LogTemplate
12
14
  from cobweb.base import (
@@ -164,29 +166,19 @@ class Crawler(threading.Thread):
164
166
 
165
167
  if not iterator_status:
166
168
  raise ValueError("request/download/parse function yield value error!")
167
- except HTTPError as e:
168
- if isinstance(e.response, Res):
169
+ except Exception as e:
170
+ exception_msg = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
171
+ url = seed.url
172
+ status = str(e)
173
+ if getattr(e, "response", None) and isinstance(e.response, Res):
169
174
  url = e.response.request.url
170
175
  status = e.response.status_code
171
- exception_msg = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
172
- self.loghub_dot.build(
173
- topic=urlparse(url).netloc,
174
- data_size=-1, cost_time=-1,
175
- status=status, url=url,
176
- msg=exception_msg
177
- )
178
- logger.info(LogTemplate.download_exception.format(
179
- detail=seed_detail_log_info,
180
- retry=seed.params.retry,
181
- priority=seed.params.priority,
182
- seed_version=seed.params.seed_version,
183
- identifier=seed.identifier or "",
184
- exception=''.join(traceback.format_exception(type(e), e, e.__traceback__))
185
- ))
186
- seed.params.retry += 1
187
- self._set_seed(seed)
188
- time.sleep(self.time_sleep * seed.params.retry)
189
- except Exception as e:
176
+ self.loghub_dot.build(
177
+ topic=urlparse(url).netloc,
178
+ data_size=-1, cost_time=-1,
179
+ status=status, url=url,
180
+ msg=exception_msg
181
+ )
190
182
  logger.info(LogTemplate.download_exception.format(
191
183
  detail=seed_detail_log_info,
192
184
  retry=seed.params.retry,
@@ -196,9 +188,21 @@ class Crawler(threading.Thread):
196
188
  exception=''.join(traceback.format_exception(type(e), e, e.__traceback__))
197
189
  ))
198
190
  seed.params.retry += 1
199
- # self._todo.push(seed)
200
191
  self._set_seed(seed)
201
192
  # time.sleep(self.time_sleep * seed.params.retry)
193
+ # except Exception as e:
194
+ # logger.info(LogTemplate.download_exception.format(
195
+ # detail=seed_detail_log_info,
196
+ # retry=seed.params.retry,
197
+ # priority=seed.params.priority,
198
+ # seed_version=seed.params.seed_version,
199
+ # identifier=seed.identifier or "",
200
+ # exception=''.join(traceback.format_exception(type(e), e, e.__traceback__))
201
+ # ))
202
+ # seed.params.retry += 1
203
+ # # self._todo.push(seed)
204
+ # self._set_seed(seed)
205
+ # # time.sleep(self.time_sleep * seed.params.retry)
202
206
  finally:
203
207
  time.sleep(0.1)
204
208
  logger.info("spider thread close")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.41
3
+ Version: 1.2.43
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -13,7 +13,7 @@ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
13
13
  cobweb/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
14
14
  cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
15
15
  cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
16
- cobweb/crawlers/crawler.py,sha256=UojWdymPCwit0MOkqHsYRoe4hXyHdZhgh7-MBPfrhQo,8373
16
+ cobweb/crawlers/crawler.py,sha256=pEukp5tC-axkzmcagPIpWPgmpxP0NHC1eu8iyJDFegA,8537
17
17
  cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
18
18
  cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
19
19
  cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
@@ -37,8 +37,8 @@ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
37
37
  cobweb/utils/dotting.py,sha256=PgsWdM-724Jy-MZWUsaygNWV-huqLMmdLgop7gaBxlo,872
38
38
  cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
39
39
  cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
40
- cobweb_launcher-1.2.41.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
41
- cobweb_launcher-1.2.41.dist-info/METADATA,sha256=ZuTN2RXJGQB6qWfjgTtcvwoVrjxvS6-ho0z7V9BTR8A,6510
42
- cobweb_launcher-1.2.41.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
43
- cobweb_launcher-1.2.41.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
44
- cobweb_launcher-1.2.41.dist-info/RECORD,,
40
+ cobweb_launcher-1.2.43.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
41
+ cobweb_launcher-1.2.43.dist-info/METADATA,sha256=GcKa3nUwsRKVxxoe2lKqHylsTYtXHxbveUMAizWtdJc,6510
42
+ cobweb_launcher-1.2.43.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
43
+ cobweb_launcher-1.2.43.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
44
+ cobweb_launcher-1.2.43.dist-info/RECORD,,