cobweb-launcher 1.1.6__tar.gz → 1.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cobweb-launcher might be problematic. Click here for more details.

Files changed (37) hide show
  1. {cobweb-launcher-1.1.6/cobweb_launcher.egg-info → cobweb-launcher-1.1.10}/PKG-INFO +1 -1
  2. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/__init__.py +1 -1
  3. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/constant.py +1 -1
  4. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/crawlers/file_crawler.py +6 -4
  5. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/launchers/launcher.py +10 -6
  6. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/launchers/launcher_pro.py +4 -4
  7. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/setting.py +1 -0
  8. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/utils/oss.py +3 -0
  9. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10/cobweb_launcher.egg-info}/PKG-INFO +1 -1
  10. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/setup.py +2 -2
  11. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/LICENSE +0 -0
  12. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/README.md +0 -0
  13. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/base/__init__.py +0 -0
  14. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/base/common_queue.py +0 -0
  15. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/base/decorators.py +0 -0
  16. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/base/item.py +0 -0
  17. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/base/log.py +0 -0
  18. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/base/request.py +0 -0
  19. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/base/response.py +0 -0
  20. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/base/seed.py +0 -0
  21. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/crawlers/__init__.py +0 -0
  22. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/crawlers/base_crawler.py +0 -0
  23. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/db/__init__.py +0 -0
  24. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/db/redis_db.py +0 -0
  25. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/exceptions/__init__.py +0 -0
  26. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/exceptions/oss_db_exception.py +0 -0
  27. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/launchers/__init__.py +0 -0
  28. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/pipelines/__init__.py +0 -0
  29. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/pipelines/base_pipeline.py +0 -0
  30. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/pipelines/loghub_pipeline.py +0 -0
  31. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/utils/__init__.py +0 -0
  32. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb/utils/tools.py +0 -0
  33. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  34. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  35. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb_launcher.egg-info/requires.txt +0 -0
  36. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/cobweb_launcher.egg-info/top_level.txt +0 -0
  37. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.10}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.1.6
3
+ Version: 1.1.10
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -1,2 +1,2 @@
1
1
  from .launchers import Launcher, LauncherPro
2
- from .constant import Crawler
2
+ from .constant import CrawlerModel
@@ -1,5 +1,5 @@
1
1
 
2
- class Crawler:
2
+ class CrawlerModel:
3
3
 
4
4
  default = "cobweb.crawlers.Crawler"
5
5
  file_air = "cobweb.crawlers.FileCrawlerAir"
@@ -17,7 +17,7 @@ class FileCrawlerAir(Crawler):
17
17
  seed_dict = item.seed.to_dict
18
18
  bucket_name = oss_util.bucket
19
19
  try:
20
- key = item.seed.oss_path
20
+ key = item.seed.oss_path or getattr(item, "oss_path", None)
21
21
  if oss_util.exists(key):
22
22
  content_length = oss_util.head(key).content_length
23
23
  yield Response(item.seed, "exists", bucket_name=bucket_name, data_size=content_length, **seed_dict)
@@ -90,14 +90,14 @@ class FileCrawlerAir(Crawler):
90
90
  yield Seed(seed_dict, identifier="merge")
91
91
 
92
92
 
93
- class FileCrawlerPro(Crawler):
93
+ class VideoCrawler(FileCrawlerAir):
94
94
 
95
95
  @staticmethod
96
96
  def download(item: Request) -> Union[Seed, BaseItem, Response, str]:
97
97
  seed_dict = item.seed.to_dict
98
98
  bucket_name = oss_util.bucket
99
99
  try:
100
- key = item.seed.oss_path
100
+ key = item.seed.oss_path or getattr(item, "oss_path", None)
101
101
  if oss_util.exists(key):
102
102
  content_length = oss_util.head(key).content_length
103
103
  yield Response(item.seed, "exists", bucket_name=bucket_name, data_size=content_length, **seed_dict)
@@ -123,12 +123,14 @@ class FileCrawlerPro(Crawler):
123
123
  item.seed, response, filter=True, msg=f"response content type is {content_type}",
124
124
  bucket_name=bucket_name, data_size=content_length, **seed_dict
125
125
  )
126
+ response.close()
126
127
  elif position == 1 and min_upload_size >= content_length > 0:
127
128
  """过小文件标识返回"""
128
129
  yield Response(
129
130
  item.seed, response, filter=True, msg="file size is too small",
130
131
  bucket_name=bucket_name, data_size=content_length, **seed_dict
131
132
  )
133
+ response.close()
132
134
  elif position == 1 and chunk_size > content_length > min_upload_size:
133
135
  """小文件直接下载"""
134
136
  for part_data in response.iter_content(chunk_size):
@@ -153,12 +155,12 @@ class FileCrawlerPro(Crawler):
153
155
  seed_dict['position'] = position
154
156
  seed_dict['start'] = upload_content_length
155
157
 
156
- response.close()
157
158
  if content:
158
159
  oss_util.put_part(key, upload_id, position, content)
159
160
  content_length += len(content)
160
161
  oss_util.merge(key, upload_id)
161
162
  yield Response(item.seed, response, bucket_name=bucket_name, data_size=content_length, **seed_dict)
163
+ response.close()
162
164
 
163
165
  elif item.seed.params.identifier == "merge":
164
166
  oss_util.merge(key, seed_dict["upload_id"])
@@ -34,7 +34,7 @@ class Launcher(threading.Thread):
34
34
  "_delete",
35
35
  ]
36
36
 
37
- def __init__(self, task, project, custom_setting=None):
37
+ def __init__(self, task, project, custom_setting=None, **kwargs):
38
38
  super().__init__()
39
39
  self.task = task
40
40
  self.project = project
@@ -42,10 +42,11 @@ class Launcher(threading.Thread):
42
42
  self._stop = threading.Event() # 结束事件
43
43
  self._pause = threading.Event() # 暂停事件
44
44
 
45
+ _setting = dict()
46
+
45
47
  if custom_setting:
46
- setting_ = dict()
47
48
  if isinstance(custom_setting, dict):
48
- setting_ = custom_setting
49
+ _setting = custom_setting
49
50
  else:
50
51
  if isinstance(custom_setting, str):
51
52
  custom_setting = importlib.import_module(custom_setting)
@@ -53,9 +54,12 @@ class Launcher(threading.Thread):
53
54
  raise Exception
54
55
  for k, v in custom_setting.__dict__.items():
55
56
  if not k.startswith("__") and not inspect.ismodule(v):
56
- setting_[k] = v
57
- for k, v in setting_.items():
58
- setattr(setting, k, v)
57
+ _setting[k] = v
58
+
59
+ _setting.update(**kwargs)
60
+
61
+ for k, v in _setting.items():
62
+ setattr(setting, k.upper(), v)
59
63
 
60
64
  self._Crawler = dynamic_load_class(setting.CRAWLER)
61
65
  self._Pipeline = dynamic_load_class(setting.PIPELINE)
@@ -9,8 +9,8 @@ from cobweb.constant import DealModel, LogTemplate
9
9
 
10
10
  class LauncherPro(Launcher):
11
11
 
12
- def __init__(self, task, project, custom_setting=None):
13
- super().__init__(task, project, custom_setting)
12
+ def __init__(self, task, project, custom_setting=None, **kwargs):
13
+ super().__init__(task, project, custom_setting, **kwargs)
14
14
  self._todo = "{%s:%s}:todo" % (project, task)
15
15
  self._done = "{%s:%s}:done" % (project, task)
16
16
  self._fail = "{%s:%s}:fail" % (project, task)
@@ -147,12 +147,12 @@ class LauncherPro(Launcher):
147
147
  self._pause.clear()
148
148
  self._execute()
149
149
  elif queue_not_empty_count == 0:
150
- pooling_wait_seconds = 3
150
+ pooling_wait_seconds = 5
151
151
  check_emtpy_times += 1
152
152
  else:
153
153
  check_emtpy_times = 0
154
154
 
155
- if not self._db.zcard(self._todo) and check_emtpy_times > 2:
155
+ if not self._db.zcount(self._todo, _min=0, _max="(1000") and check_emtpy_times > 2:
156
156
  check_emtpy_times = 0
157
157
  self.__DOING__ = {}
158
158
  self._pause.set()
@@ -26,6 +26,7 @@ OSS_SECRET_KEY = os.getenv("OSS_SECRET_KEY")
26
26
  OSS_CHUNK_SIZE = 10 * 1024 ** 2
27
27
  OSS_MIN_UPLOAD_SIZE = 1024
28
28
 
29
+
29
30
  # 采集器选择
30
31
  CRAWLER = "cobweb.crawlers.Crawler"
31
32
 
@@ -15,11 +15,13 @@ class OssUtil:
15
15
  secret_key=None,
16
16
  chunk_size=None,
17
17
  min_upload_size=None,
18
+ **kwargs
18
19
  ):
19
20
  self.bucket = bucket or setting.OSS_BUCKET
20
21
  self.endpoint = endpoint or setting.OSS_ENDPOINT
21
22
  self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
22
23
  self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
24
+
23
25
  self._auth = Auth(
24
26
  access_key_id=access_key or setting.OSS_ACCESS_KEY,
25
27
  access_key_secret=secret_key or setting.OSS_SECRET_KEY
@@ -28,6 +30,7 @@ class OssUtil:
28
30
  auth=self._auth,
29
31
  endpoint=self.endpoint,
30
32
  bucket_name=self.bucket,
33
+ **kwargs
31
34
  )
32
35
 
33
36
  def exists(self, key: str) -> bool:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.1.6
3
+ Version: 1.1.10
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="1.1.6",
8
+ version="1.1.10",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",
@@ -22,4 +22,4 @@ setup(
22
22
  "cobweb-launcher, cobweb",
23
23
  ],
24
24
  python_requires=">=3.7",
25
- )
25
+ )