cobweb-launcher 1.2.41__tar.gz → 1.2.43__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {cobweb-launcher-1.2.41/cobweb_launcher.egg-info → cobweb-launcher-1.2.43}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/crawlers/crawler.py +26 -22
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43/cobweb_launcher.egg-info}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/setup.py +1 -1
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/LICENSE +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/README.md +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/__init__.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/base/__init__.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/base/common_queue.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/base/decorators.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/base/item.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/base/log.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/base/request.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/base/response.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/base/seed.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/constant.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/crawlers/base_crawler.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/crawlers/file_crawler.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/db/api_db.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/db/redis_db.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/launchers/launcher.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/launchers/launcher_air.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/launchers/launcher_api.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/launchers/launcher_pro.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/pipelines/pipeline.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/pipelines/pipeline_console.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/pipelines/pipeline_loghub.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/setting.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/utils/bloom.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/utils/dotting.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/setup.cfg +0 -0
- {cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/test/test.py +0 -0
@@ -6,7 +6,9 @@ from inspect import isgenerator
|
|
6
6
|
from typing import Union, Callable, Mapping
|
7
7
|
from urllib.parse import urlparse
|
8
8
|
|
9
|
+
import urllib3
|
9
10
|
from requests import HTTPError, Response as Res
|
11
|
+
from requests.exceptions import ChunkedEncodingError
|
10
12
|
|
11
13
|
from cobweb.constant import DealModel, LogTemplate
|
12
14
|
from cobweb.base import (
|
@@ -164,29 +166,19 @@ class Crawler(threading.Thread):
|
|
164
166
|
|
165
167
|
if not iterator_status:
|
166
168
|
raise ValueError("request/download/parse function yield value error!")
|
167
|
-
except
|
168
|
-
|
169
|
+
except Exception as e:
|
170
|
+
exception_msg = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
171
|
+
url = seed.url
|
172
|
+
status = str(e)
|
173
|
+
if getattr(e, "response", None) and isinstance(e.response, Res):
|
169
174
|
url = e.response.request.url
|
170
175
|
status = e.response.status_code
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
)
|
178
|
-
logger.info(LogTemplate.download_exception.format(
|
179
|
-
detail=seed_detail_log_info,
|
180
|
-
retry=seed.params.retry,
|
181
|
-
priority=seed.params.priority,
|
182
|
-
seed_version=seed.params.seed_version,
|
183
|
-
identifier=seed.identifier or "",
|
184
|
-
exception=''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
185
|
-
))
|
186
|
-
seed.params.retry += 1
|
187
|
-
self._set_seed(seed)
|
188
|
-
time.sleep(self.time_sleep * seed.params.retry)
|
189
|
-
except Exception as e:
|
176
|
+
self.loghub_dot.build(
|
177
|
+
topic=urlparse(url).netloc,
|
178
|
+
data_size=-1, cost_time=-1,
|
179
|
+
status=status, url=url,
|
180
|
+
msg=exception_msg
|
181
|
+
)
|
190
182
|
logger.info(LogTemplate.download_exception.format(
|
191
183
|
detail=seed_detail_log_info,
|
192
184
|
retry=seed.params.retry,
|
@@ -196,9 +188,21 @@ class Crawler(threading.Thread):
|
|
196
188
|
exception=''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
197
189
|
))
|
198
190
|
seed.params.retry += 1
|
199
|
-
# self._todo.push(seed)
|
200
191
|
self._set_seed(seed)
|
201
192
|
# time.sleep(self.time_sleep * seed.params.retry)
|
193
|
+
# except Exception as e:
|
194
|
+
# logger.info(LogTemplate.download_exception.format(
|
195
|
+
# detail=seed_detail_log_info,
|
196
|
+
# retry=seed.params.retry,
|
197
|
+
# priority=seed.params.priority,
|
198
|
+
# seed_version=seed.params.seed_version,
|
199
|
+
# identifier=seed.identifier or "",
|
200
|
+
# exception=''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
201
|
+
# ))
|
202
|
+
# seed.params.retry += 1
|
203
|
+
# # self._todo.push(seed)
|
204
|
+
# self._set_seed(seed)
|
205
|
+
# # time.sleep(self.time_sleep * seed.params.retry)
|
202
206
|
finally:
|
203
207
|
time.sleep(0.1)
|
204
208
|
logger.info("spider thread close")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cobweb-launcher-1.2.41 → cobweb-launcher-1.2.43}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|