cobweb-launcher 1.2.64__py3-none-any.whl → 1.2.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,7 @@ class Crawler(threading.Thread):
36
36
  delete_seed: Callable,
37
37
  upload_data: Callable,
38
38
  custom_func: Union[Mapping[str, Callable]],
39
+ record_failed: bool,
39
40
  thread_num: int,
40
41
  max_retries: int,
41
42
  time_sleep: int,
@@ -50,6 +51,7 @@ class Crawler(threading.Thread):
50
51
  self._add_seed = add_seed
51
52
  self._delete_seed = delete_seed
52
53
  self._upload_data = upload_data
54
+ self._record_failed = record_failed
53
55
 
54
56
  for func_name, _callable in custom_func.items():
55
57
  if isinstance(_callable, Callable):
@@ -105,7 +107,10 @@ class Crawler(threading.Thread):
105
107
 
106
108
  elif seed.params.retry > self.max_retries:
107
109
  seed.params.seed_status = DealModel.fail
108
- self._delete_seed(seed)
110
+ if self._record_failed:
111
+ self.parse(Response(seed, "failed"))
112
+ else:
113
+ self._delete_seed(seed)
109
114
  continue
110
115
 
111
116
  seed_detail_log_info = LogTemplate.log_info(seed.to_dict)
@@ -149,6 +154,7 @@ class Crawler(threading.Thread):
149
154
  data_size=int(download_item.response.headers.get("content-length", 0)),
150
155
  cost_time=end_time - start_time, status = 200,
151
156
  url=download_item.response.url,
157
+ seed=download_item.seed.to_string,
152
158
  proxy_type=seed.params.proxy_type,
153
159
  proxy=seed.params.proxy,
154
160
  project=self.project, task=self.task,
@@ -179,6 +185,7 @@ class Crawler(threading.Thread):
179
185
  topic=urlparse(url).netloc,
180
186
  data_size=-1, cost_time=-1,
181
187
  status=status, url=url,
188
+ seed=seed.to_string,
182
189
  proxy_type=seed.params.proxy_type,
183
190
  proxy=seed.params.proxy,
184
191
  project=self.project,
@@ -98,6 +98,7 @@ class Launcher(threading.Thread):
98
98
  self._upload_queue_max_size = setting.UPLOAD_QUEUE_MAX_SIZE
99
99
 
100
100
  self._spider_max_retries = setting.SPIDER_MAX_RETRIES
101
+ self._record_failed = setting.RECORD_FAILED_SPIDER
101
102
  self._spider_thread_num = setting.SPIDER_THREAD_NUM
102
103
  self._spider_time_sleep = setting.SPIDER_TIME_SLEEP
103
104
  self._spider_max_count = setting.SPIDER_MAX_COUNT
@@ -215,6 +216,7 @@ class Launcher(threading.Thread):
215
216
  delete_seed=self._delete_seed,
216
217
  upload_data=self._upload_data,
217
218
  custom_func=self.__CUSTOM_FUNC__,
219
+ record_failed=self._record_failed,
218
220
  thread_num = self._spider_thread_num,
219
221
  max_retries = self._spider_max_retries,
220
222
  time_sleep=self._spider_time_sleep
cobweb/setting.py CHANGED
@@ -58,6 +58,7 @@ DONE_MODEL = 0 # 0:种子消费成功直接从队列移除,失败则添加
58
58
  SPIDER_THREAD_NUM = 10
59
59
  SPIDER_MAX_RETRIES = 5
60
60
  SPIDER_TIME_SLEEP = 10
61
+ RECORD_FAILED_SPIDER = False
61
62
 
62
63
  SPIDER_MAX_COUNT = 1000 # 在规定时间窗口内最大采集数
63
64
  TIME_WINDOW = 60 # 频控固定时间窗口(秒)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.64
3
+ Version: 1.2.66
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -1,6 +1,6 @@
1
1
  cobweb/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
2
2
  cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
3
- cobweb/setting.py,sha256=LlWZeZLVMeCd9NRCPkFDf_DK0IUua1L2U_zYFPJpn_k,2273
3
+ cobweb/setting.py,sha256=YlNrQSUgly5ah1-7Mx1RWypA5cPcaEIGV4neNJItGZ4,2302
4
4
  cobweb/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
5
5
  cobweb/base/basic.py,sha256=Z56SSLB3I2IGHWCCcSy0Qbfzj8Qbg_po3gP32q1jh4k,7741
6
6
  cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
@@ -13,7 +13,7 @@ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
13
13
  cobweb/base/seed.py,sha256=A-F1urjbE5hYNWTCwq3sUV4nrxlK_RGMoCmjBmIwYsI,3158
14
14
  cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
15
15
  cobweb/crawlers/base_crawler.py,sha256=ee_WSDnPQpPTk6wlFuY2UEx5L3hcsAZFcr6i3GLSry8,5751
16
- cobweb/crawlers/crawler.py,sha256=kbpgBllmA2ve3Hp-XvVH89t2q5G39i-m7it5xJ1p1WE,8973
16
+ cobweb/crawlers/crawler.py,sha256=ZSau2W7YMR_XiJmHlkmAJbeH2sbWyma_yGEY2h70yVs,9289
17
17
  cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
18
18
  cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
19
19
  cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
@@ -22,7 +22,7 @@ cobweb/db/redis_db_new.py,sha256=F09LWVjtC2JFdCaKatZ2bAOLKbsnes85_nZRe2dtSIc,469
22
22
  cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
23
23
  cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
24
24
  cobweb/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
25
- cobweb/launchers/launcher.py,sha256=aR1cnQymjQQUJe-W_dRoQTd9t3Qav9QkLDzeWW7i0xE,7921
25
+ cobweb/launchers/launcher.py,sha256=m6NKrVxcqEGXctIAAXXGEsdSFAyK0t49AWxQDCKGJiY,8027
26
26
  cobweb/launchers/launcher_air.py,sha256=qdcxq41I9zN5snEpMUUNEQNTtUiZM1Hw_3N9zu4PuAs,3058
27
27
  cobweb/launchers/launcher_api.py,sha256=3-A6k3Igvi-xnvP9M_3NkJKUweDJ_pY10ZrHClteD-g,8628
28
28
  cobweb/launchers/launcher_pro.py,sha256=xM2z3nCTb6gehuD1Ggyvli2knGAPjGczbjOhKWswVpo,8424
@@ -38,8 +38,8 @@ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
38
38
  cobweb/utils/dotting.py,sha256=mVICaa26R-dQ4JGmPK-kkR6QjX38QiRewXZnGb2DCIc,1784
39
39
  cobweb/utils/oss.py,sha256=6x_ugXanh1R-6ZylQzUDQh4OeFZHujhWFCOxbzy53JY,3984
40
40
  cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
41
- cobweb_launcher-1.2.64.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
42
- cobweb_launcher-1.2.64.dist-info/METADATA,sha256=NikLvEH211SWsw5jTUhaLsXROPJWvc2bkEUq3n4wlaM,6510
43
- cobweb_launcher-1.2.64.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
44
- cobweb_launcher-1.2.64.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
45
- cobweb_launcher-1.2.64.dist-info/RECORD,,
41
+ cobweb_launcher-1.2.66.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
42
+ cobweb_launcher-1.2.66.dist-info/METADATA,sha256=jmTWYE_UNh52CVZ9lV5D6THP36hrkcWAaawHkKvLH_s,6510
43
+ cobweb_launcher-1.2.66.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
44
+ cobweb_launcher-1.2.66.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
45
+ cobweb_launcher-1.2.66.dist-info/RECORD,,