cobweb-launcher 1.2.42__py3-none-any.whl → 1.2.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/crawlers/crawler.py +15 -14
- {cobweb_launcher-1.2.42.dist-info → cobweb_launcher-1.2.43.dist-info}/METADATA +1 -1
- {cobweb_launcher-1.2.42.dist-info → cobweb_launcher-1.2.43.dist-info}/RECORD +6 -6
- {cobweb_launcher-1.2.42.dist-info → cobweb_launcher-1.2.43.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.2.42.dist-info → cobweb_launcher-1.2.43.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.2.42.dist-info → cobweb_launcher-1.2.43.dist-info}/top_level.txt +0 -0
cobweb/crawlers/crawler.py
CHANGED
@@ -8,6 +8,7 @@ from urllib.parse import urlparse
|
|
8
8
|
|
9
9
|
import urllib3
|
10
10
|
from requests import HTTPError, Response as Res
|
11
|
+
from requests.exceptions import ChunkedEncodingError
|
11
12
|
|
12
13
|
from cobweb.constant import DealModel, LogTemplate
|
13
14
|
from cobweb.base import (
|
@@ -165,7 +166,7 @@ class Crawler(threading.Thread):
|
|
165
166
|
|
166
167
|
if not iterator_status:
|
167
168
|
raise ValueError("request/download/parse function yield value error!")
|
168
|
-
except
|
169
|
+
except Exception as e:
|
169
170
|
exception_msg = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
170
171
|
url = seed.url
|
171
172
|
status = str(e)
|
@@ -189,19 +190,19 @@ class Crawler(threading.Thread):
|
|
189
190
|
seed.params.retry += 1
|
190
191
|
self._set_seed(seed)
|
191
192
|
# time.sleep(self.time_sleep * seed.params.retry)
|
192
|
-
except Exception as e:
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
193
|
+
# except Exception as e:
|
194
|
+
# logger.info(LogTemplate.download_exception.format(
|
195
|
+
# detail=seed_detail_log_info,
|
196
|
+
# retry=seed.params.retry,
|
197
|
+
# priority=seed.params.priority,
|
198
|
+
# seed_version=seed.params.seed_version,
|
199
|
+
# identifier=seed.identifier or "",
|
200
|
+
# exception=''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
201
|
+
# ))
|
202
|
+
# seed.params.retry += 1
|
203
|
+
# # self._todo.push(seed)
|
204
|
+
# self._set_seed(seed)
|
205
|
+
# # time.sleep(self.time_sleep * seed.params.retry)
|
205
206
|
finally:
|
206
207
|
time.sleep(0.1)
|
207
208
|
logger.info("spider thread close")
|
@@ -13,7 +13,7 @@ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
|
|
13
13
|
cobweb/base/seed.py,sha256=Uz_VBRlAxNYQcFHk3tsZFMlU96yPOedHaWGTvk-zKd8,2908
|
14
14
|
cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
|
15
15
|
cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
|
16
|
-
cobweb/crawlers/crawler.py,sha256=
|
16
|
+
cobweb/crawlers/crawler.py,sha256=pEukp5tC-axkzmcagPIpWPgmpxP0NHC1eu8iyJDFegA,8537
|
17
17
|
cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
|
18
18
|
cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
|
19
19
|
cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
|
@@ -37,8 +37,8 @@ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
|
|
37
37
|
cobweb/utils/dotting.py,sha256=PgsWdM-724Jy-MZWUsaygNWV-huqLMmdLgop7gaBxlo,872
|
38
38
|
cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
|
39
39
|
cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
|
40
|
-
cobweb_launcher-1.2.
|
41
|
-
cobweb_launcher-1.2.
|
42
|
-
cobweb_launcher-1.2.
|
43
|
-
cobweb_launcher-1.2.
|
44
|
-
cobweb_launcher-1.2.
|
40
|
+
cobweb_launcher-1.2.43.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
41
|
+
cobweb_launcher-1.2.43.dist-info/METADATA,sha256=GcKa3nUwsRKVxxoe2lKqHylsTYtXHxbveUMAizWtdJc,6510
|
42
|
+
cobweb_launcher-1.2.43.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
43
|
+
cobweb_launcher-1.2.43.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
44
|
+
cobweb_launcher-1.2.43.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|