cobweb-launcher 1.2.65__py3-none-any.whl → 1.2.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,7 @@ class Crawler(threading.Thread):
36
36
  delete_seed: Callable,
37
37
  upload_data: Callable,
38
38
  custom_func: Union[Mapping[str, Callable]],
39
+ record_failed: bool,
39
40
  thread_num: int,
40
41
  max_retries: int,
41
42
  time_sleep: int,
@@ -50,6 +51,7 @@ class Crawler(threading.Thread):
50
51
  self._add_seed = add_seed
51
52
  self._delete_seed = delete_seed
52
53
  self._upload_data = upload_data
54
+ self._record_failed = record_failed
53
55
 
54
56
  for func_name, _callable in custom_func.items():
55
57
  if isinstance(_callable, Callable):
@@ -105,7 +107,10 @@ class Crawler(threading.Thread):
105
107
 
106
108
  elif seed.params.retry > self.max_retries:
107
109
  seed.params.seed_status = DealModel.fail
108
- self._delete_seed(seed)
110
+ if self._record_failed:
111
+ self.parse(Response(seed, "failed"))
112
+ else:
113
+ self._delete_seed(seed)
109
114
  continue
110
115
 
111
116
  seed_detail_log_info = LogTemplate.log_info(seed.to_dict)
@@ -98,6 +98,7 @@ class Launcher(threading.Thread):
98
98
  self._upload_queue_max_size = setting.UPLOAD_QUEUE_MAX_SIZE
99
99
 
100
100
  self._spider_max_retries = setting.SPIDER_MAX_RETRIES
101
+ self._record_failed = setting.RECORD_FAILED_SPIDER
101
102
  self._spider_thread_num = setting.SPIDER_THREAD_NUM
102
103
  self._spider_time_sleep = setting.SPIDER_TIME_SLEEP
103
104
  self._spider_max_count = setting.SPIDER_MAX_COUNT
@@ -215,6 +216,7 @@ class Launcher(threading.Thread):
215
216
  delete_seed=self._delete_seed,
216
217
  upload_data=self._upload_data,
217
218
  custom_func=self.__CUSTOM_FUNC__,
219
+ record_failed=self._record_failed,
218
220
  thread_num = self._spider_thread_num,
219
221
  max_retries = self._spider_max_retries,
220
222
  time_sleep=self._spider_time_sleep
cobweb/setting.py CHANGED
@@ -58,6 +58,7 @@ DONE_MODEL = 0 # 0:种子消费成功直接从队列移除,失败则添加
58
58
  SPIDER_THREAD_NUM = 10
59
59
  SPIDER_MAX_RETRIES = 5
60
60
  SPIDER_TIME_SLEEP = 10
61
+ RECORD_FAILED_SPIDER = False
61
62
 
62
63
  SPIDER_MAX_COUNT = 1000 # 在规定时间窗口内最大采集数
63
64
  TIME_WINDOW = 60 # 频控固定时间窗口(秒)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.65
3
+ Version: 1.2.66
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -1,6 +1,6 @@
1
1
  cobweb/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
2
2
  cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
3
- cobweb/setting.py,sha256=LlWZeZLVMeCd9NRCPkFDf_DK0IUua1L2U_zYFPJpn_k,2273
3
+ cobweb/setting.py,sha256=YlNrQSUgly5ah1-7Mx1RWypA5cPcaEIGV4neNJItGZ4,2302
4
4
  cobweb/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
5
5
  cobweb/base/basic.py,sha256=Z56SSLB3I2IGHWCCcSy0Qbfzj8Qbg_po3gP32q1jh4k,7741
6
6
  cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
@@ -13,7 +13,7 @@ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
13
13
  cobweb/base/seed.py,sha256=A-F1urjbE5hYNWTCwq3sUV4nrxlK_RGMoCmjBmIwYsI,3158
14
14
  cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
15
15
  cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
16
- cobweb/crawlers/crawler.py,sha256=yV0R-AoVb7xg722zpIEs9aYFYV2MfZ634qzKJgDXbZo,9089
16
+ cobweb/crawlers/crawler.py,sha256=ZSau2W7YMR_XiJmHlkmAJbeH2sbWyma_yGEY2h70yVs,9289
17
17
  cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
18
18
  cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
19
19
  cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
@@ -22,7 +22,7 @@ cobweb/db/redis_db_new.py,sha256=F09LWVjtC2JFdCaKatZ2bAOLKbsnes85_nZRe2dtSIc,469
22
22
  cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
23
23
  cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
24
24
  cobweb/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
25
- cobweb/launchers/launcher.py,sha256=aR1cnQymjQQUJe-W_dRoQTd9t3Qav9QkLDzeWW7i0xE,7921
25
+ cobweb/launchers/launcher.py,sha256=m6NKrVxcqEGXctIAAXXGEsdSFAyK0t49AWxQDCKGJiY,8027
26
26
  cobweb/launchers/launcher_air.py,sha256=qdcxq41I9zN5snEpMUUNEQNTtUiZM1Hw_3N9zu4PuAs,3058
27
27
  cobweb/launchers/launcher_api.py,sha256=3-A6k3Igvi-xnvP9M_3NkJKUweDJ_pY10ZrHClteD-g,8628
28
28
  cobweb/launchers/launcher_pro.py,sha256=xM2z3nCTb6gehuD1Ggyvli2knGAPjGczbjOhKWswVpo,8424
@@ -38,8 +38,8 @@ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
38
38
  cobweb/utils/dotting.py,sha256=mVICaa26R-dQ4JGmPK-kkR6QjX38QiRewXZnGb2DCIc,1784
39
39
  cobweb/utils/oss.py,sha256=6x_ugXanh1R-6ZylQzUDQh4OeFZHujhWFCOxbzy53JY,3984
40
40
  cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
41
- cobweb_launcher-1.2.65.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
42
- cobweb_launcher-1.2.65.dist-info/METADATA,sha256=zFRzApm4_ADTxcg4-18wMQRM6AcxeD0Oww0lJY7_DBk,6510
43
- cobweb_launcher-1.2.65.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
44
- cobweb_launcher-1.2.65.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
45
- cobweb_launcher-1.2.65.dist-info/RECORD,,
41
+ cobweb_launcher-1.2.66.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
42
+ cobweb_launcher-1.2.66.dist-info/METADATA,sha256=jmTWYE_UNh52CVZ9lV5D6THP36hrkcWAaawHkKvLH_s,6510
43
+ cobweb_launcher-1.2.66.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
44
+ cobweb_launcher-1.2.66.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
45
+ cobweb_launcher-1.2.66.dist-info/RECORD,,