cobweb-launcher 1.2.64__tar.gz → 1.2.66__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cobweb-launcher might be problematic. Click here for more details.

Files changed (46) hide show
  1. {cobweb-launcher-1.2.64/cobweb_launcher.egg-info → cobweb-launcher-1.2.66}/PKG-INFO +1 -1
  2. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/crawlers/crawler.py +8 -1
  3. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/launchers/launcher.py +2 -0
  4. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/setting.py +1 -0
  5. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66/cobweb_launcher.egg-info}/PKG-INFO +1 -1
  6. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/setup.py +1 -1
  7. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/LICENSE +0 -0
  8. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/README.md +0 -0
  9. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/__init__.py +0 -0
  10. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/base/__init__.py +0 -0
  11. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/base/common_queue.py +0 -0
  12. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/base/decorators.py +0 -0
  13. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/base/item.py +0 -0
  14. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/base/log.py +0 -0
  15. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/base/request.py +0 -0
  16. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/base/response.py +0 -0
  17. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/base/seed.py +0 -0
  18. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/constant.py +0 -0
  19. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/crawlers/__init__.py +0 -0
  20. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/crawlers/base_crawler.py +0 -0
  21. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/crawlers/file_crawler.py +0 -0
  22. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/db/__init__.py +0 -0
  23. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/db/api_db.py +0 -0
  24. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/db/redis_db.py +0 -0
  25. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/db/redis_db_new.py +0 -0
  26. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/exceptions/__init__.py +0 -0
  27. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/exceptions/oss_db_exception.py +0 -0
  28. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/launchers/__init__.py +0 -0
  29. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/launchers/launcher_air.py +0 -0
  30. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/launchers/launcher_api.py +0 -0
  31. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/launchers/launcher_pro.py +0 -0
  32. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/pipelines/__init__.py +0 -0
  33. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/pipelines/pipeline.py +0 -0
  34. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/pipelines/pipeline_console.py +0 -0
  35. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/pipelines/pipeline_loghub.py +0 -0
  36. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/utils/__init__.py +0 -0
  37. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/utils/bloom.py +0 -0
  38. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/utils/dotting.py +0 -0
  39. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/utils/oss.py +0 -0
  40. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb/utils/tools.py +0 -0
  41. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  42. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  43. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb_launcher.egg-info/requires.txt +0 -0
  44. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/cobweb_launcher.egg-info/top_level.txt +0 -0
  45. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/setup.cfg +0 -0
  46. {cobweb-launcher-1.2.64 → cobweb-launcher-1.2.66}/test/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.64
3
+ Version: 1.2.66
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -36,6 +36,7 @@ class Crawler(threading.Thread):
36
36
  delete_seed: Callable,
37
37
  upload_data: Callable,
38
38
  custom_func: Union[Mapping[str, Callable]],
39
+ record_failed: bool,
39
40
  thread_num: int,
40
41
  max_retries: int,
41
42
  time_sleep: int,
@@ -50,6 +51,7 @@ class Crawler(threading.Thread):
50
51
  self._add_seed = add_seed
51
52
  self._delete_seed = delete_seed
52
53
  self._upload_data = upload_data
54
+ self._record_failed = record_failed
53
55
 
54
56
  for func_name, _callable in custom_func.items():
55
57
  if isinstance(_callable, Callable):
@@ -105,7 +107,10 @@ class Crawler(threading.Thread):
105
107
 
106
108
  elif seed.params.retry > self.max_retries:
107
109
  seed.params.seed_status = DealModel.fail
108
- self._delete_seed(seed)
110
+ if self._record_failed:
111
+ self.parse(Response(seed, "failed"))
112
+ else:
113
+ self._delete_seed(seed)
109
114
  continue
110
115
 
111
116
  seed_detail_log_info = LogTemplate.log_info(seed.to_dict)
@@ -149,6 +154,7 @@ class Crawler(threading.Thread):
149
154
  data_size=int(download_item.response.headers.get("content-length", 0)),
150
155
  cost_time=end_time - start_time, status = 200,
151
156
  url=download_item.response.url,
157
+ seed=download_item.seed.to_string,
152
158
  proxy_type=seed.params.proxy_type,
153
159
  proxy=seed.params.proxy,
154
160
  project=self.project, task=self.task,
@@ -179,6 +185,7 @@ class Crawler(threading.Thread):
179
185
  topic=urlparse(url).netloc,
180
186
  data_size=-1, cost_time=-1,
181
187
  status=status, url=url,
188
+ seed=seed.to_string,
182
189
  proxy_type=seed.params.proxy_type,
183
190
  proxy=seed.params.proxy,
184
191
  project=self.project,
@@ -98,6 +98,7 @@ class Launcher(threading.Thread):
98
98
  self._upload_queue_max_size = setting.UPLOAD_QUEUE_MAX_SIZE
99
99
 
100
100
  self._spider_max_retries = setting.SPIDER_MAX_RETRIES
101
+ self._record_failed = setting.RECORD_FAILED_SPIDER
101
102
  self._spider_thread_num = setting.SPIDER_THREAD_NUM
102
103
  self._spider_time_sleep = setting.SPIDER_TIME_SLEEP
103
104
  self._spider_max_count = setting.SPIDER_MAX_COUNT
@@ -215,6 +216,7 @@ class Launcher(threading.Thread):
215
216
  delete_seed=self._delete_seed,
216
217
  upload_data=self._upload_data,
217
218
  custom_func=self.__CUSTOM_FUNC__,
219
+ record_failed=self._record_failed,
218
220
  thread_num = self._spider_thread_num,
219
221
  max_retries = self._spider_max_retries,
220
222
  time_sleep=self._spider_time_sleep
@@ -58,6 +58,7 @@ DONE_MODEL = 0 # 0:种子消费成功直接从队列移除,失败则添加
58
58
  SPIDER_THREAD_NUM = 10
59
59
  SPIDER_MAX_RETRIES = 5
60
60
  SPIDER_TIME_SLEEP = 10
61
+ RECORD_FAILED_SPIDER = False
61
62
 
62
63
  SPIDER_MAX_COUNT = 1000 # 在规定时间窗口内最大采集数
63
64
  TIME_WINDOW = 60 # 频控固定时间窗口(秒)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.64
3
+ Version: 1.2.66
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="1.2.64",
8
+ version="1.2.66",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",