cobweb-launcher 1.2.41__py3-none-any.whl → 1.2.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/crawlers/crawler.py +13 -10
- {cobweb_launcher-1.2.41.dist-info → cobweb_launcher-1.2.42.dist-info}/METADATA +1 -1
- {cobweb_launcher-1.2.41.dist-info → cobweb_launcher-1.2.42.dist-info}/RECORD +6 -6
- {cobweb_launcher-1.2.41.dist-info → cobweb_launcher-1.2.42.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.2.41.dist-info → cobweb_launcher-1.2.42.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.2.41.dist-info → cobweb_launcher-1.2.42.dist-info}/top_level.txt +0 -0
cobweb/crawlers/crawler.py
CHANGED
@@ -6,6 +6,7 @@ from inspect import isgenerator
|
|
6
6
|
from typing import Union, Callable, Mapping
|
7
7
|
from urllib.parse import urlparse
|
8
8
|
|
9
|
+
import urllib3
|
9
10
|
from requests import HTTPError, Response as Res
|
10
11
|
|
11
12
|
from cobweb.constant import DealModel, LogTemplate
|
@@ -164,17 +165,19 @@ class Crawler(threading.Thread):
|
|
164
165
|
|
165
166
|
if not iterator_status:
|
166
167
|
raise ValueError("request/download/parse function yield value error!")
|
167
|
-
except HTTPError as e:
|
168
|
-
|
168
|
+
except (HTTPError, urllib3.exceptions.HTTPError, urllib3.exceptions.PoolError) as e:
|
169
|
+
exception_msg = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
170
|
+
url = seed.url
|
171
|
+
status = str(e)
|
172
|
+
if getattr(e, "response", None) and isinstance(e.response, Res):
|
169
173
|
url = e.response.request.url
|
170
174
|
status = e.response.status_code
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
)
|
175
|
+
self.loghub_dot.build(
|
176
|
+
topic=urlparse(url).netloc,
|
177
|
+
data_size=-1, cost_time=-1,
|
178
|
+
status=status, url=url,
|
179
|
+
msg=exception_msg
|
180
|
+
)
|
178
181
|
logger.info(LogTemplate.download_exception.format(
|
179
182
|
detail=seed_detail_log_info,
|
180
183
|
retry=seed.params.retry,
|
@@ -185,7 +188,7 @@ class Crawler(threading.Thread):
|
|
185
188
|
))
|
186
189
|
seed.params.retry += 1
|
187
190
|
self._set_seed(seed)
|
188
|
-
time.sleep(self.time_sleep * seed.params.retry)
|
191
|
+
# time.sleep(self.time_sleep * seed.params.retry)
|
189
192
|
except Exception as e:
|
190
193
|
logger.info(LogTemplate.download_exception.format(
|
191
194
|
detail=seed_detail_log_info,
|
@@ -13,7 +13,7 @@ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
|
|
13
13
|
cobweb/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
|
14
14
|
cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
|
15
15
|
cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
|
16
|
-
cobweb/crawlers/crawler.py,sha256=
|
16
|
+
cobweb/crawlers/crawler.py,sha256=3uHuyFNP_w5WLk1_iBFKFhjDcCM7l89_aiD_pUhWT3s,8520
|
17
17
|
cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
|
18
18
|
cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
|
19
19
|
cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
|
@@ -37,8 +37,8 @@ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
|
|
37
37
|
cobweb/utils/dotting.py,sha256=PgsWdM-724Jy-MZWUsaygNWV-huqLMmdLgop7gaBxlo,872
|
38
38
|
cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
|
39
39
|
cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
|
40
|
-
cobweb_launcher-1.2.
|
41
|
-
cobweb_launcher-1.2.
|
42
|
-
cobweb_launcher-1.2.
|
43
|
-
cobweb_launcher-1.2.
|
44
|
-
cobweb_launcher-1.2.
|
40
|
+
cobweb_launcher-1.2.42.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
41
|
+
cobweb_launcher-1.2.42.dist-info/METADATA,sha256=F3sVh5KuEoo8LwY_ael_sOdWe5TiB83QvYahq13NWes,6510
|
42
|
+
cobweb_launcher-1.2.42.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
43
|
+
cobweb_launcher-1.2.42.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
44
|
+
cobweb_launcher-1.2.42.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|