cobweb-launcher 1.2.42__py3-none-any.whl → 1.2.43__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- cobweb/crawlers/crawler.py +15 -14
- {cobweb_launcher-1.2.42.dist-info → cobweb_launcher-1.2.43.dist-info}/METADATA +1 -1
- {cobweb_launcher-1.2.42.dist-info → cobweb_launcher-1.2.43.dist-info}/RECORD +6 -6
- {cobweb_launcher-1.2.42.dist-info → cobweb_launcher-1.2.43.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.2.42.dist-info → cobweb_launcher-1.2.43.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.2.42.dist-info → cobweb_launcher-1.2.43.dist-info}/top_level.txt +0 -0
cobweb/crawlers/crawler.py
CHANGED
@@ -8,6 +8,7 @@ from urllib.parse import urlparse
|
|
8
8
|
|
9
9
|
import urllib3
|
10
10
|
from requests import HTTPError, Response as Res
|
11
|
+
from requests.exceptions import ChunkedEncodingError
|
11
12
|
|
12
13
|
from cobweb.constant import DealModel, LogTemplate
|
13
14
|
from cobweb.base import (
|
@@ -165,7 +166,7 @@ class Crawler(threading.Thread):
|
|
165
166
|
|
166
167
|
if not iterator_status:
|
167
168
|
raise ValueError("request/download/parse function yield value error!")
|
168
|
-
except
|
169
|
+
except Exception as e:
|
169
170
|
exception_msg = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
170
171
|
url = seed.url
|
171
172
|
status = str(e)
|
@@ -189,19 +190,19 @@ class Crawler(threading.Thread):
|
|
189
190
|
seed.params.retry += 1
|
190
191
|
self._set_seed(seed)
|
191
192
|
# time.sleep(self.time_sleep * seed.params.retry)
|
192
|
-
except Exception as e:
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
193
|
+
# except Exception as e:
|
194
|
+
# logger.info(LogTemplate.download_exception.format(
|
195
|
+
# detail=seed_detail_log_info,
|
196
|
+
# retry=seed.params.retry,
|
197
|
+
# priority=seed.params.priority,
|
198
|
+
# seed_version=seed.params.seed_version,
|
199
|
+
# identifier=seed.identifier or "",
|
200
|
+
# exception=''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
201
|
+
# ))
|
202
|
+
# seed.params.retry += 1
|
203
|
+
# # self._todo.push(seed)
|
204
|
+
# self._set_seed(seed)
|
205
|
+
# # time.sleep(self.time_sleep * seed.params.retry)
|
205
206
|
finally:
|
206
207
|
time.sleep(0.1)
|
207
208
|
logger.info("spider thread close")
|
@@ -13,7 +13,7 @@ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
|
|
13
13
|
cobweb/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
|
14
14
|
cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
|
15
15
|
cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
|
16
|
-
cobweb/crawlers/crawler.py,sha256=
|
16
|
+
cobweb/crawlers/crawler.py,sha256=pEukp5tC-axkzmcagPIpWPgmpxP0NHC1eu8iyJDFegA,8537
|
17
17
|
cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
|
18
18
|
cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
|
19
19
|
cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
|
@@ -37,8 +37,8 @@ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
|
|
37
37
|
cobweb/utils/dotting.py,sha256=PgsWdM-724Jy-MZWUsaygNWV-huqLMmdLgop7gaBxlo,872
|
38
38
|
cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
|
39
39
|
cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
|
40
|
-
cobweb_launcher-1.2.
|
41
|
-
cobweb_launcher-1.2.
|
42
|
-
cobweb_launcher-1.2.
|
43
|
-
cobweb_launcher-1.2.
|
44
|
-
cobweb_launcher-1.2.
|
40
|
+
cobweb_launcher-1.2.43.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
41
|
+
cobweb_launcher-1.2.43.dist-info/METADATA,sha256=GcKa3nUwsRKVxxoe2lKqHylsTYtXHxbveUMAizWtdJc,6510
|
42
|
+
cobweb_launcher-1.2.43.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
43
|
+
cobweb_launcher-1.2.43.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
44
|
+
cobweb_launcher-1.2.43.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|