cobweb-launcher 1.2.65__tar.gz → 1.2.67__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {cobweb-launcher-1.2.65/cobweb_launcher.egg-info → cobweb-launcher-1.2.67}/PKG-INFO +1 -1
  2. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/crawlers/crawler.py +6 -1
  3. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/launchers/launcher.py +2 -0
  4. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/setting.py +1 -0
  5. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67/cobweb_launcher.egg-info}/PKG-INFO +1 -1
  6. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/setup.py +1 -1
  7. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/LICENSE +0 -0
  8. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/README.md +0 -0
  9. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/__init__.py +0 -0
  10. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/base/__init__.py +0 -0
  11. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/base/common_queue.py +0 -0
  12. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/base/decorators.py +0 -0
  13. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/base/item.py +0 -0
  14. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/base/log.py +0 -0
  15. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/base/request.py +0 -0
  16. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/base/response.py +0 -0
  17. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/base/seed.py +0 -0
  18. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/constant.py +0 -0
  19. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/crawlers/__init__.py +0 -0
  20. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/crawlers/base_crawler.py +0 -0
  21. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/crawlers/file_crawler.py +0 -0
  22. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/db/__init__.py +0 -0
  23. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/db/api_db.py +0 -0
  24. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/db/redis_db.py +0 -0
  25. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/db/redis_db_new.py +0 -0
  26. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/exceptions/__init__.py +0 -0
  27. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/exceptions/oss_db_exception.py +0 -0
  28. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/launchers/__init__.py +0 -0
  29. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/launchers/launcher_air.py +0 -0
  30. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/launchers/launcher_api.py +0 -0
  31. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/launchers/launcher_pro.py +0 -0
  32. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/pipelines/__init__.py +0 -0
  33. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/pipelines/pipeline.py +0 -0
  34. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/pipelines/pipeline_console.py +0 -0
  35. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/pipelines/pipeline_loghub.py +0 -0
  36. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/utils/__init__.py +0 -0
  37. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/utils/bloom.py +0 -0
  38. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/utils/dotting.py +0 -0
  39. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/utils/oss.py +0 -0
  40. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb/utils/tools.py +0 -0
  41. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  42. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  43. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb_launcher.egg-info/requires.txt +0 -0
  44. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/cobweb_launcher.egg-info/top_level.txt +0 -0
  45. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/setup.cfg +0 -0
  46. {cobweb-launcher-1.2.65 → cobweb-launcher-1.2.67}/test/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.65
3
+ Version: 1.2.67
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -36,6 +36,7 @@ class Crawler(threading.Thread):
36
36
  delete_seed: Callable,
37
37
  upload_data: Callable,
38
38
  custom_func: Union[Mapping[str, Callable]],
39
+ record_failed: bool,
39
40
  thread_num: int,
40
41
  max_retries: int,
41
42
  time_sleep: int,
@@ -50,6 +51,7 @@ class Crawler(threading.Thread):
50
51
  self._add_seed = add_seed
51
52
  self._delete_seed = delete_seed
52
53
  self._upload_data = upload_data
54
+ self._record_failed = record_failed
53
55
 
54
56
  for func_name, _callable in custom_func.items():
55
57
  if isinstance(_callable, Callable):
@@ -105,7 +107,10 @@ class Crawler(threading.Thread):
105
107
 
106
108
  elif seed.params.retry > self.max_retries:
107
109
  seed.params.seed_status = DealModel.fail
108
- self._delete_seed(seed)
110
+ if self._record_failed:
111
+ self.parse(Response(seed, "max_retries", fialed=True))
112
+ else:
113
+ self._delete_seed(seed)
109
114
  continue
110
115
 
111
116
  seed_detail_log_info = LogTemplate.log_info(seed.to_dict)
@@ -98,6 +98,7 @@ class Launcher(threading.Thread):
98
98
  self._upload_queue_max_size = setting.UPLOAD_QUEUE_MAX_SIZE
99
99
 
100
100
  self._spider_max_retries = setting.SPIDER_MAX_RETRIES
101
+ self._record_failed = setting.RECORD_FAILED_SPIDER
101
102
  self._spider_thread_num = setting.SPIDER_THREAD_NUM
102
103
  self._spider_time_sleep = setting.SPIDER_TIME_SLEEP
103
104
  self._spider_max_count = setting.SPIDER_MAX_COUNT
@@ -215,6 +216,7 @@ class Launcher(threading.Thread):
215
216
  delete_seed=self._delete_seed,
216
217
  upload_data=self._upload_data,
217
218
  custom_func=self.__CUSTOM_FUNC__,
219
+ record_failed=self._record_failed,
218
220
  thread_num = self._spider_thread_num,
219
221
  max_retries = self._spider_max_retries,
220
222
  time_sleep=self._spider_time_sleep
@@ -58,6 +58,7 @@ DONE_MODEL = 0 # 0:种子消费成功直接从队列移除,失败则添加
58
58
  SPIDER_THREAD_NUM = 10
59
59
  SPIDER_MAX_RETRIES = 5
60
60
  SPIDER_TIME_SLEEP = 10
61
+ RECORD_FAILED_SPIDER = False
61
62
 
62
63
  SPIDER_MAX_COUNT = 1000 # 在规定时间窗口内最大采集数
63
64
  TIME_WINDOW = 60 # 频控固定时间窗口(秒)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.65
3
+ Version: 1.2.67
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="1.2.65",
8
+ version="1.2.67",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",