cobweb-launcher 1.2.64__py3-none-any.whl → 1.2.66__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/crawlers/crawler.py +8 -1
- cobweb/launchers/launcher.py +2 -0
- cobweb/setting.py +1 -0
- {cobweb_launcher-1.2.64.dist-info → cobweb_launcher-1.2.66.dist-info}/METADATA +1 -1
- {cobweb_launcher-1.2.64.dist-info → cobweb_launcher-1.2.66.dist-info}/RECORD +8 -8
- {cobweb_launcher-1.2.64.dist-info → cobweb_launcher-1.2.66.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.2.64.dist-info → cobweb_launcher-1.2.66.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.2.64.dist-info → cobweb_launcher-1.2.66.dist-info}/top_level.txt +0 -0
cobweb/crawlers/crawler.py
CHANGED
@@ -36,6 +36,7 @@ class Crawler(threading.Thread):
|
|
36
36
|
delete_seed: Callable,
|
37
37
|
upload_data: Callable,
|
38
38
|
custom_func: Union[Mapping[str, Callable]],
|
39
|
+
record_failed: bool,
|
39
40
|
thread_num: int,
|
40
41
|
max_retries: int,
|
41
42
|
time_sleep: int,
|
@@ -50,6 +51,7 @@ class Crawler(threading.Thread):
|
|
50
51
|
self._add_seed = add_seed
|
51
52
|
self._delete_seed = delete_seed
|
52
53
|
self._upload_data = upload_data
|
54
|
+
self._record_failed = record_failed
|
53
55
|
|
54
56
|
for func_name, _callable in custom_func.items():
|
55
57
|
if isinstance(_callable, Callable):
|
@@ -105,7 +107,10 @@ class Crawler(threading.Thread):
|
|
105
107
|
|
106
108
|
elif seed.params.retry > self.max_retries:
|
107
109
|
seed.params.seed_status = DealModel.fail
|
108
|
-
self.
|
110
|
+
if self._record_failed:
|
111
|
+
self.parse(Response(seed, "failed"))
|
112
|
+
else:
|
113
|
+
self._delete_seed(seed)
|
109
114
|
continue
|
110
115
|
|
111
116
|
seed_detail_log_info = LogTemplate.log_info(seed.to_dict)
|
@@ -149,6 +154,7 @@ class Crawler(threading.Thread):
|
|
149
154
|
data_size=int(download_item.response.headers.get("content-length", 0)),
|
150
155
|
cost_time=end_time - start_time, status = 200,
|
151
156
|
url=download_item.response.url,
|
157
|
+
seed=download_item.seed.to_string,
|
152
158
|
proxy_type=seed.params.proxy_type,
|
153
159
|
proxy=seed.params.proxy,
|
154
160
|
project=self.project, task=self.task,
|
@@ -179,6 +185,7 @@ class Crawler(threading.Thread):
|
|
179
185
|
topic=urlparse(url).netloc,
|
180
186
|
data_size=-1, cost_time=-1,
|
181
187
|
status=status, url=url,
|
188
|
+
seed=seed.to_string,
|
182
189
|
proxy_type=seed.params.proxy_type,
|
183
190
|
proxy=seed.params.proxy,
|
184
191
|
project=self.project,
|
cobweb/launchers/launcher.py
CHANGED
@@ -98,6 +98,7 @@ class Launcher(threading.Thread):
|
|
98
98
|
self._upload_queue_max_size = setting.UPLOAD_QUEUE_MAX_SIZE
|
99
99
|
|
100
100
|
self._spider_max_retries = setting.SPIDER_MAX_RETRIES
|
101
|
+
self._record_failed = setting.RECORD_FAILED_SPIDER
|
101
102
|
self._spider_thread_num = setting.SPIDER_THREAD_NUM
|
102
103
|
self._spider_time_sleep = setting.SPIDER_TIME_SLEEP
|
103
104
|
self._spider_max_count = setting.SPIDER_MAX_COUNT
|
@@ -215,6 +216,7 @@ class Launcher(threading.Thread):
|
|
215
216
|
delete_seed=self._delete_seed,
|
216
217
|
upload_data=self._upload_data,
|
217
218
|
custom_func=self.__CUSTOM_FUNC__,
|
219
|
+
record_failed=self._record_failed,
|
218
220
|
thread_num = self._spider_thread_num,
|
219
221
|
max_retries = self._spider_max_retries,
|
220
222
|
time_sleep=self._spider_time_sleep
|
cobweb/setting.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
cobweb/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
|
2
2
|
cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
|
3
|
-
cobweb/setting.py,sha256=
|
3
|
+
cobweb/setting.py,sha256=YlNrQSUgly5ah1-7Mx1RWypA5cPcaEIGV4neNJItGZ4,2302
|
4
4
|
cobweb/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
|
5
5
|
cobweb/base/basic.py,sha256=Z56SSLB3I2IGHWCCcSy0Qbfzj8Qbg_po3gP32q1jh4k,7741
|
6
6
|
cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
|
@@ -13,7 +13,7 @@ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
|
|
13
13
|
cobweb/base/seed.py,sha256=A-F1urjbE5hYNWTCwq3sUV4nrxlK_RGMoCmjBmIwYsI,3158
|
14
14
|
cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
|
15
15
|
cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
|
16
|
-
cobweb/crawlers/crawler.py,sha256=
|
16
|
+
cobweb/crawlers/crawler.py,sha256=ZSau2W7YMR_XiJmHlkmAJbeH2sbWyma_yGEY2h70yVs,9289
|
17
17
|
cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
|
18
18
|
cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
|
19
19
|
cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
|
@@ -22,7 +22,7 @@ cobweb/db/redis_db_new.py,sha256=F09LWVjtC2JFdCaKatZ2bAOLKbsnes85_nZRe2dtSIc,469
|
|
22
22
|
cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
|
23
23
|
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
24
24
|
cobweb/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
|
25
|
-
cobweb/launchers/launcher.py,sha256=
|
25
|
+
cobweb/launchers/launcher.py,sha256=m6NKrVxcqEGXctIAAXXGEsdSFAyK0t49AWxQDCKGJiY,8027
|
26
26
|
cobweb/launchers/launcher_air.py,sha256=qdcxq41I9zN5snEpMUUNEQNTtUiZM1Hw_3N9zu4PuAs,3058
|
27
27
|
cobweb/launchers/launcher_api.py,sha256=3-A6k3Igvi-xnvP9M_3NkJKUweDJ_pY10ZrHClteD-g,8628
|
28
28
|
cobweb/launchers/launcher_pro.py,sha256=xM2z3nCTb6gehuD1Ggyvli2knGAPjGczbjOhKWswVpo,8424
|
@@ -38,8 +38,8 @@ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
|
|
38
38
|
cobweb/utils/dotting.py,sha256=mVICaa26R-dQ4JGmPK-kkR6QjX38QiRewXZnGb2DCIc,1784
|
39
39
|
cobweb/utils/oss.py,sha256=6x_ugXanh1R-6ZylQzUDQh4OeFZHujhWFCOxbzy53JY,3984
|
40
40
|
cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
|
41
|
-
cobweb_launcher-1.2.
|
42
|
-
cobweb_launcher-1.2.
|
43
|
-
cobweb_launcher-1.2.
|
44
|
-
cobweb_launcher-1.2.
|
45
|
-
cobweb_launcher-1.2.
|
41
|
+
cobweb_launcher-1.2.66.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
42
|
+
cobweb_launcher-1.2.66.dist-info/METADATA,sha256=jmTWYE_UNh52CVZ9lV5D6THP36hrkcWAaawHkKvLH_s,6510
|
43
|
+
cobweb_launcher-1.2.66.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
44
|
+
cobweb_launcher-1.2.66.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
45
|
+
cobweb_launcher-1.2.66.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|