cobweb-launcher 1.3.14__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. cobweb/__init__.py +1 -1
  2. cobweb/base/__init__.py +4 -149
  3. cobweb/base/common_queue.py +0 -13
  4. cobweb/base/request.py +2 -14
  5. cobweb/base/seed.py +16 -12
  6. cobweb/constant.py +0 -16
  7. cobweb/crawlers/crawler.py +3 -85
  8. cobweb/db/redis_db.py +109 -52
  9. cobweb/launchers/__init__.py +8 -2
  10. cobweb/launchers/distributor.py +171 -0
  11. cobweb/launchers/launcher.py +87 -131
  12. cobweb/launchers/uploader.py +65 -0
  13. cobweb/pipelines/pipeline.py +3 -36
  14. cobweb/schedulers/__init__.py +1 -3
  15. cobweb/schedulers/launcher_air.py +93 -0
  16. cobweb/schedulers/launcher_api.py +225 -0
  17. cobweb/schedulers/scheduler.py +85 -0
  18. cobweb/schedulers/scheduler_with_redis.py +177 -0
  19. cobweb/setting.py +15 -32
  20. cobweb/utils/__init__.py +2 -1
  21. cobweb/utils/decorators.py +43 -0
  22. cobweb/utils/dotting.py +55 -0
  23. cobweb/utils/oss.py +28 -9
  24. {cobweb_launcher-1.3.14.dist-info → cobweb_launcher-3.1.0.dist-info}/METADATA +1 -1
  25. cobweb_launcher-3.1.0.dist-info/RECORD +41 -0
  26. cobweb/base/basic.py +0 -295
  27. cobweb/base/dotting.py +0 -35
  28. cobweb/launchers/launcher_air.py +0 -88
  29. cobweb/launchers/launcher_api.py +0 -88
  30. cobweb/launchers/launcher_pro.py +0 -88
  31. cobweb/schedulers/scheduler_api.py +0 -72
  32. cobweb/schedulers/scheduler_redis.py +0 -72
  33. cobweb_launcher-1.3.14.dist-info/RECORD +0 -40
  34. {cobweb_launcher-1.3.14.dist-info → cobweb_launcher-3.1.0.dist-info}/LICENSE +0 -0
  35. {cobweb_launcher-1.3.14.dist-info → cobweb_launcher-3.1.0.dist-info}/WHEEL +0 -0
  36. {cobweb_launcher-1.3.14.dist-info → cobweb_launcher-3.1.0.dist-info}/top_level.txt +0 -0
cobweb/utils/oss.py CHANGED
@@ -1,9 +1,9 @@
1
- # from typing import List
1
+
2
2
  from cobweb import setting
3
3
  from requests import Response
4
4
  from oss2 import Auth, Bucket, models, PartIterator
5
5
  from cobweb.exceptions import oss_db_exception
6
- from cobweb.base import Decorators
6
+ from cobweb.base.decorators import decorator_oss_db
7
7
 
8
8
 
9
9
  class OssUtil:
@@ -23,6 +23,9 @@ class OssUtil:
23
23
  self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
24
24
  self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
25
25
 
26
+ self.failed_count = 0
27
+ self._kw = kwargs
28
+
26
29
  self._auth = Auth(
27
30
  access_key_id=access_key or setting.OSS_ACCESS_KEY,
28
31
  access_key_secret=secret_key or setting.OSS_SECRET_KEY
@@ -31,26 +34,42 @@ class OssUtil:
31
34
  auth=self._auth,
32
35
  endpoint=self.endpoint,
33
36
  bucket_name=self.bucket,
34
- **kwargs
37
+ **self._kw
35
38
  )
36
39
 
40
+ def failed(self):
41
+ self.failed_count += 1
42
+ if self.failed_count >= 5:
43
+ self._client = Bucket(
44
+ auth=self._auth,
45
+ endpoint=self.endpoint,
46
+ bucket_name=self.bucket,
47
+ **self._kw
48
+ )
49
+
37
50
  def exists(self, key: str) -> bool:
38
- return self._client.object_exists(key)
51
+ try:
52
+ result = self._client.object_exists(key)
53
+ self.failed_count = 0
54
+ return result
55
+ except Exception as e:
56
+ self.failed()
57
+ raise e
39
58
 
40
59
  def head(self, key: str) -> models.HeadObjectResult:
41
60
  return self._client.head_object(key)
42
61
 
43
- @Decorators.decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
62
+ @decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
44
63
  def init_part(self, key) -> models.InitMultipartUploadResult:
45
64
  """初始化分片上传"""
46
65
  return self._client.init_multipart_upload(key)
47
66
 
48
- @Decorators.decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
67
+ @decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
49
68
  def put(self, key, data) -> models.PutObjectResult:
50
69
  """文件上传"""
51
70
  return self._client.put_object(key, data)
52
71
 
53
- @Decorators.decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
72
+ @decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
54
73
  def put_part(self, key, upload_id, position, data) -> models.PutObjectResult:
55
74
  """分片上传"""
56
75
  return self._client.upload_part(key, upload_id, position, data)
@@ -59,13 +78,13 @@ class OssUtil:
59
78
  """获取分片列表"""
60
79
  return [part_info for part_info in PartIterator(self._client, key, upload_id)]
61
80
 
62
- @Decorators.decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
81
+ @decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
63
82
  def merge(self, key, upload_id, parts=None) -> models.PutObjectResult:
64
83
  """合并分片"""
65
84
  headers = None if parts else {"x-oss-complete-all": "yes"}
66
85
  return self._client.complete_multipart_upload(key, upload_id, parts, headers=headers)
67
86
 
68
- @Decorators.decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
87
+ @decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
69
88
  def append(self, key, position, data) -> models.AppendObjectResult:
70
89
  """追加上传"""
71
90
  return self._client.append_object(key, position, data)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.3.14
3
+ Version: 3.1.0
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -0,0 +1,41 @@
1
+ cobweb/__init__.py,sha256=UfNq1asNq7_a8IKf5WCbk0ju5fxT4wElAyaItf_a-d8,67
2
+ cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
3
+ cobweb/setting.py,sha256=yP9ZkVShTY4sCZ4DpzI_hO-FMSf0vGrQvkp7UCmZaa0,2338
4
+ cobweb/base/__init__.py,sha256=epVQttTHQcux9kAtrGuO9HB_wP74L-pi74vhzc4QOCw,224
5
+ cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
6
+ cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
7
+ cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
8
+ cobweb/base/request.py,sha256=tEkgMVUfdQI-kZuzWuiit9P_q4Q9-_RZh9aXXpc0314,2352
9
+ cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
10
+ cobweb/base/seed.py,sha256=A-F1urjbE5hYNWTCwq3sUV4nrxlK_RGMoCmjBmIwYsI,3158
11
+ cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
12
+ cobweb/crawlers/crawler.py,sha256=73WFGVNvIFvH8cP5RfDhAhM-WaFL_mdwnAhNohDnBO0,696
13
+ cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
14
+ cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
15
+ cobweb/db/redis_db.py,sha256=rK6PnKLLVTeg6HMGVtlYl4_b5og3aO2J5jZrqU2Aoso,7721
16
+ cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
17
+ cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
18
+ cobweb/launchers/__init__.py,sha256=XEW4hmBMPKp2bRkHBxxlplXwvKdlLVYBFg3etjtbRXo,222
19
+ cobweb/launchers/distributor.py,sha256=Br5G6Jn6aJeaAX9y3yBfNVRvq2-kIIz9LsjRCXLmzGg,6447
20
+ cobweb/launchers/launcher.py,sha256=WTyv5bgKE_VwZXuWia_F26fH2zi6mps0wsnC16bks9k,5253
21
+ cobweb/launchers/uploader.py,sha256=2zAYMEpfUYo8zCzH6Jhwkj9Y4_aJYVNnx0D5H8srKW4,1806
22
+ cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
23
+ cobweb/pipelines/pipeline.py,sha256=qwoOYMhlAB-MnEmMNpNeauTHoRTOr2wyBDYS4MF6B1c,261
24
+ cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
25
+ cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
26
+ cobweb/schedulers/__init__.py,sha256=LEya11fdAv0X28YzbQTeC1LQZ156Fj4cyEMGqQHUWW0,49
27
+ cobweb/schedulers/launcher_air.py,sha256=qdcxq41I9zN5snEpMUUNEQNTtUiZM1Hw_3N9zu4PuAs,3058
28
+ cobweb/schedulers/launcher_api.py,sha256=3-A6k3Igvi-xnvP9M_3NkJKUweDJ_pY10ZrHClteD-g,8628
29
+ cobweb/schedulers/scheduler.py,sha256=mN9XvaOCzNnBWQfzslTIM860ZGq2gyLtxpbVUd0Slqs,2240
30
+ cobweb/schedulers/scheduler_with_redis.py,sha256=pCimXqqpJ2xI1fs-6Ecy2vs9vHC-PPFX4IBMLDtpbho,6394
31
+ cobweb/utils/__init__.py,sha256=8Bu5iZrIOUMS4jv4hi0inRPtscf6MK0ZFa7gQ7ZFoqw,145
32
+ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
33
+ cobweb/utils/decorators.py,sha256=066JCY_RNMr2mXkhEv8XTtOOKkv9CFiBm0ZNCcC-2ag,1131
34
+ cobweb/utils/dotting.py,sha256=mVICaa26R-dQ4JGmPK-kkR6QjX38QiRewXZnGb2DCIc,1784
35
+ cobweb/utils/oss.py,sha256=6x_ugXanh1R-6ZylQzUDQh4OeFZHujhWFCOxbzy53JY,3984
36
+ cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
37
+ cobweb_launcher-3.1.0.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
38
+ cobweb_launcher-3.1.0.dist-info/METADATA,sha256=Cl_g5S9zmKx6qhAPllDGYYpow_slpwwXYxJxtZoOzck,6509
39
+ cobweb_launcher-3.1.0.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
40
+ cobweb_launcher-3.1.0.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
41
+ cobweb_launcher-3.1.0.dist-info/RECORD,,
cobweb/base/basic.py DELETED
@@ -1,295 +0,0 @@
1
- import json
2
- import random
3
- import time
4
- import hashlib
5
- import requests
6
-
7
-
8
- class Params:
9
-
10
- def __init__(self, retry=None, priority=None, version=None, status=None):
11
- self.retry = retry or 0
12
- self.priority = priority or 300
13
- self.version = version or int(time.time())
14
- self.status = status
15
-
16
-
17
- class Seed:
18
- __SEED_PARAMS__ = [
19
- "retry",
20
- "priority",
21
- "version",
22
- "status"
23
- ]
24
-
25
- def __init__(
26
- self,
27
- seed,
28
- sid=None,
29
- retry=None,
30
- priority=None,
31
- version=None,
32
- status=None,
33
- **kwargs
34
- ):
35
- if any(isinstance(seed, t) for t in (str, bytes)):
36
- try:
37
- item = json.loads(seed)
38
- self._init_seed(item)
39
- except json.JSONDecodeError:
40
- self.__setattr__("url", seed)
41
- elif isinstance(seed, dict):
42
- self._init_seed(seed)
43
- else:
44
- raise TypeError(Exception(
45
- f"seed type error, "
46
- f"must be str or dict! "
47
- f"seed: {seed}"
48
- ))
49
-
50
- seed_params = {
51
- "retry": retry,
52
- "priority": priority,
53
- "version": version,
54
- "status": status,
55
- }
56
-
57
- if kwargs:
58
- # for k, v in kwargs.items():
59
- # if k in seed_params.keys():
60
- # seed_params[k] = v
61
- # else:
62
- # self.__setattr__(k, v)
63
- self._init_seed(kwargs)
64
- seed_params.update({
65
- k: v for k, v in kwargs.items()
66
- if k in self.__SEED_PARAMS__
67
- })
68
- if sid or not getattr(self, "sid", None):
69
- self._init_id(sid)
70
- self.params = Params(**seed_params)
71
-
72
- def __getattr__(self, name):
73
- return None
74
-
75
- def __setitem__(self, key, value):
76
- setattr(self, key, value)
77
-
78
- def __getitem__(self, item):
79
- return getattr(self, item)
80
-
81
- def __str__(self):
82
- return json.dumps(self.__dict__, ensure_ascii=False)
83
-
84
- def __repr__(self):
85
- chars = [f"{k}={v}" for k, v in self.__dict__.items()]
86
- return f'{self.__class__.__name__}({", ".join(chars)})'
87
-
88
- def _init_seed(self, seed_info: dict):
89
- for k, v in seed_info.items():
90
- if k not in self.__SEED_PARAMS__:
91
- self.__setattr__(k, v)
92
-
93
- def _init_id(self, sid):
94
- if not sid:
95
- sid = hashlib.md5(self.to_string.encode()).hexdigest()
96
- self.__setattr__("sid", sid)
97
-
98
- @property
99
- def to_dict(self) -> dict:
100
- seed = self.__dict__.copy()
101
- if seed.get("params"):
102
- del seed["params"]
103
- return seed
104
-
105
- @property
106
- def to_string(self) -> str:
107
- return json.dumps(
108
- self.to_dict,
109
- ensure_ascii=False,
110
- separators=(",", ":")
111
- )
112
-
113
- @property
114
- def seed(self):
115
- return self.to_string
116
-
117
-
118
- class Request:
119
- __SEED_PARAMS__ = [
120
- "retry",
121
- "priority",
122
- "version",
123
- "status"
124
- ]
125
-
126
- __REQUEST_ATTRS__ = {
127
- "params",
128
- "headers",
129
- "cookies",
130
- "data",
131
- "json",
132
- "files",
133
- "auth",
134
- "timeout",
135
- "proxies",
136
- "hooks",
137
- "stream",
138
- "verify",
139
- "cert",
140
- "allow_redirects",
141
- }
142
-
143
- def __init__(
144
- self,
145
- # url,
146
- seed,
147
- random_ua=True,
148
- check_status_code=True,
149
- retry=None,
150
- priority=None,
151
- version=None,
152
- status=None,
153
- **kwargs
154
- ):
155
- # self.url = url
156
- self.check_status_code = check_status_code
157
- self.request_setting = {}
158
-
159
- seed_params = {
160
- "retry": retry,
161
- "priority": priority,
162
- "version": version,
163
- "status": status,
164
- }
165
-
166
- if isinstance(seed, Seed):
167
- kwargs.update(**seed.to_dict)
168
- elif isinstance(seed, str):
169
- kwargs.update(**json.loads(seed))
170
-
171
- for k, v in kwargs.items():
172
- if k in self.__class__.__REQUEST_ATTRS__:
173
- self.request_setting[k] = v
174
- continue
175
- elif k in self.__SEED_PARAMS__:
176
- seed_params[k] = v
177
- self.__setattr__(k, v)
178
-
179
- if not getattr(self, "method", None):
180
- self.method = "POST" if self.request_setting.get("data") or self.request_setting.get("json") else "GET"
181
-
182
- if random_ua:
183
- self._build_header()
184
-
185
- self.params = Params(**seed_params)
186
- # self.seed = self.to_string
187
-
188
- @property
189
- def _random_ua(self) -> str:
190
- v1 = random.randint(4, 15)
191
- v2 = random.randint(3, 11)
192
- v3 = random.randint(1, 16)
193
- v4 = random.randint(533, 605)
194
- v5 = random.randint(1000, 6000)
195
- v6 = random.randint(10, 80)
196
- user_agent = (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_{v1}_{v2}) AppleWebKit/{v4}.{v3} "
197
- f"(KHTML, like Gecko) Chrome/105.0.0.0 Safari/{v4}.{v3} Edg/105.0.{v5}.{v6}")
198
- return user_agent
199
-
200
- def _build_header(self) -> dict:
201
- if not self.request_setting.get("headers"):
202
- self.request_setting["headers"] = {"accept": "*/*", "user-agent": self._random_ua}
203
- elif "user-agent" not in [key.lower() for key in self.request_setting["headers"].keys()]:
204
- self.request_setting["headers"]["user-agent"] = self._random_ua
205
-
206
- def download(self) -> requests.Response:
207
- response = requests.request(self.method, self.url, **self.request_setting)
208
- if self.check_status_code:
209
- response.raise_for_status()
210
- return response
211
-
212
- def __getattr__(self, name):
213
- return None
214
-
215
- def __setitem__(self, key, value):
216
- setattr(self, key, value)
217
-
218
- def __getitem__(self, item):
219
- return getattr(self, item)
220
-
221
- @property
222
- def to_dict(self):
223
- _dict = self.__dict__.copy()
224
- # _dict.pop('seed')
225
- _dict.pop('params')
226
- _dict.pop('check_status_code')
227
- # _dict.pop('request_setting')
228
- return _dict
229
-
230
- @property
231
- def to_string(self) -> str:
232
- return json.dumps(
233
- self.to_dict,
234
- ensure_ascii=False,
235
- separators=(",", ":")
236
- )
237
-
238
- @property
239
- def seed(self):
240
- return self.to_string
241
-
242
-
243
- class Response:
244
-
245
- def __init__(
246
- self,
247
- seed,
248
- response,
249
- retry=None,
250
- priority=None,
251
- version=None,
252
- status=None,
253
- **kwargs
254
- ):
255
- self.seed = seed
256
- self.response = response
257
- seed_params = {
258
- "retry": retry,
259
- "priority": priority,
260
- "version": version,
261
- "status": status,
262
- }
263
- for k, v in kwargs.items():
264
- if k in seed_params.keys():
265
- seed_params[k] = v
266
- else:
267
- self.__setattr__(k, v)
268
- self.params = Params(**seed_params)
269
-
270
- @property
271
- def to_dict(self):
272
- _dict = self.__dict__.copy()
273
- _dict.pop('seed')
274
- _dict.pop('response')
275
- _dict.pop('method')
276
- _dict.pop('params')
277
- _dict.pop('request_setting')
278
- return _dict
279
-
280
- @property
281
- def to_string(self) -> str:
282
- return json.dumps(
283
- self.to_dict,
284
- ensure_ascii=False,
285
- separators=(",", ":")
286
- )
287
-
288
- def __getattr__(self, name):
289
- return None
290
-
291
- def __setitem__(self, key, value):
292
- setattr(self, key, value)
293
-
294
- def __getitem__(self, item):
295
- return getattr(self, item)
cobweb/base/dotting.py DELETED
@@ -1,35 +0,0 @@
1
- import os
2
- import json
3
- from aliyun.log import LogClient, LogItem, PutLogsRequest
4
-
5
-
6
- class LoghubDot:
7
-
8
- def __init__(self):
9
- endpoint = os.getenv("DOTTING_ENDPOINT", "")
10
- accessKeyId = os.getenv("DOTTING_ACCESS_KEY", "")
11
- accessKey = os.getenv("DOTTING_SECRET_KEY", "")
12
- self.client = LogClient(endpoint=endpoint, accessKeyId=accessKeyId, accessKey=accessKey) \
13
- if endpoint and accessKeyId and accessKey else None
14
-
15
- def build(self, topic, **kwargs):
16
- if self.client:
17
- temp = {}
18
- log_items = []
19
- log_item = LogItem()
20
- for key, value in kwargs.items():
21
- if not isinstance(value, str):
22
- temp[key] = json.dumps(value, ensure_ascii=False)
23
- else:
24
- temp[key] = value
25
- contents = sorted(temp.items())
26
- log_item.set_contents(contents)
27
- log_items.append(log_item)
28
- request = PutLogsRequest(
29
- project="databee-download-log",
30
- logstore="download-logging",
31
- topic=topic,
32
- logitems=log_items,
33
- compress=True
34
- )
35
- self.client.put_logs(request=request)
@@ -1,88 +0,0 @@
1
- # import time
2
- #
3
- # from cobweb.base import logger
4
- # from cobweb.constant import LogTemplate
5
- # from .launcher import Launcher, check_pause
6
- #
7
- #
8
- # class LauncherAir(Launcher):
9
- #
10
- # # def _scheduler(self):
11
- # # if self.start_seeds:
12
- # # self.__LAUNCHER_QUEUE__['todo'].push(self.start_seeds)
13
- #
14
- # @check_pause
15
- # def _insert(self):
16
- # seeds = {}
17
- # status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
18
- # for _ in range(self._new_queue_max_size):
19
- # seed = self.__LAUNCHER_QUEUE__['new'].pop()
20
- # if not seed:
21
- # break
22
- # seeds[seed.to_string] = seed.params.priority
23
- # if seeds:
24
- # self.__LAUNCHER_QUEUE__['todo'].push(seeds)
25
- # if status:
26
- # time.sleep(self._new_queue_wait_seconds)
27
- #
28
- # @check_pause
29
- # def _delete(self):
30
- # seeds = []
31
- # status = self.__LAUNCHER_QUEUE__['done'].length < self._done_queue_max_size
32
- #
33
- # for _ in range(self._done_queue_max_size):
34
- # seed = self.__LAUNCHER_QUEUE__['done'].pop()
35
- # if not seed:
36
- # break
37
- # seeds.append(seed.to_string)
38
- #
39
- # if seeds:
40
- # self._remove_doing_seeds(seeds)
41
- #
42
- # if status:
43
- # time.sleep(self._done_queue_wait_seconds)
44
- #
45
- # def _polling(self):
46
- #
47
- # check_emtpy_times = 0
48
- #
49
- # while not self._stop.is_set():
50
- #
51
- # queue_not_empty_count = 0
52
- # pooling_wait_seconds = 30
53
- #
54
- # for q in self.__LAUNCHER_QUEUE__.values():
55
- # if q.length != 0:
56
- # queue_not_empty_count += 1
57
- #
58
- # if queue_not_empty_count == 0:
59
- # pooling_wait_seconds = 3
60
- # if self._pause.is_set():
61
- # check_emtpy_times = 0
62
- # if not self._task_model:
63
- # logger.info("Done! Ready to close thread...")
64
- # self._stop.set()
65
- # elif check_emtpy_times > 2:
66
- # self.__DOING__ = {}
67
- # self._pause.set()
68
- # else:
69
- # logger.info(
70
- # "check whether the task is complete, "
71
- # f"reset times {3 - check_emtpy_times}"
72
- # )
73
- # check_emtpy_times += 1
74
- # elif self._pause.is_set():
75
- # self._pause.clear()
76
- # self._execute()
77
- # else:
78
- # logger.info(LogTemplate.launcher_air_polling.format(
79
- # task=self.task,
80
- # doing_len=len(self.__DOING__.keys()),
81
- # todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
82
- # done_len=self.__LAUNCHER_QUEUE__['done'].length,
83
- # upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
84
- # ))
85
- #
86
- # time.sleep(pooling_wait_seconds)
87
- #
88
- #
@@ -1,88 +0,0 @@
1
- import time
2
-
3
- from cobweb.base import TaskQueue, Decorators, Seed, Request
4
- from cobweb.schedulers import ApiScheduler
5
- from .launcher import Launcher
6
-
7
-
8
- class LauncherApi(Launcher):
9
-
10
- def __init__(self, task, project, custom_setting=None, **kwargs):
11
- super().__init__(task, project, custom_setting, **kwargs)
12
- self._redis_download = "{%s:%s}:download" % (project, task)
13
- self._redis_todo = "{%s:%s}:todo" % (project, task)
14
- self._scheduler = ApiScheduler(task, project)
15
-
16
- @Decorators.stop
17
- def _schedule(self):
18
- thread_sleep = self.scheduling_wait_time
19
- for q, key, size, item_info, Cls in [
20
- (TaskQueue.TODO, self._redis_todo, self.todo_queue_size, self._task_info["todo"], Seed),
21
- (TaskQueue.DOWNLOAD, self._redis_download, self.download_queue_size, self._task_info["download"], Request),
22
- ]:
23
- if q.length < size:
24
- for member, priority in self._scheduler.schedule(key, self.scheduling_size):
25
- q.push(Cls(member, priority=priority))
26
- self.add_working_item(key.split(":")[-1], member, priority)
27
- thread_sleep = 0.1
28
- time.sleep(thread_sleep)
29
-
30
- @Decorators.stop
31
- def _heartbeat(self):
32
- if self._scheduler.working.is_set():
33
- self._scheduler.set_heartbeat()
34
- time.sleep(3)
35
-
36
- @Decorators.stop
37
- def _reset(self):
38
- self._scheduler.reset(
39
- keys=[self._redis_todo, self._redis_download],
40
- reset_time=self.seed_reset_seconds
41
- )
42
- time.sleep(30)
43
-
44
- @Decorators.pause
45
- def _insert(self):
46
- thread_sleep = 0.1
47
- for q, key, size in [
48
- (TaskQueue.SEED, self._redis_todo, self.seed_queue_size),
49
- (TaskQueue.REQUEST, self._redis_download, self.request_queue_size),
50
- ]:
51
- item_info = {}
52
- while (item := q.pop()) and len(item_info.keys()) < self.inserting_size:
53
- item_info[item.seed] = item.params.priority
54
- if q.length >= size:
55
- thread_sleep = self.inserting_wait_time
56
- self._scheduler.insert(key, item_info)
57
- time.sleep(thread_sleep)
58
-
59
- @Decorators.pause
60
- def _refresh(self):
61
- self._scheduler.refresh(self._redis_todo, self._task_info["todo"])
62
- self._scheduler.refresh(self._redis_download, self._task_info["download"])
63
- time.sleep(10)
64
-
65
- @Decorators.pause
66
- def _remove(self):
67
- thread_sleep = self.removing_wait_time
68
- for q, key, size in [
69
- (TaskQueue.DELETE, self._redis_todo, self.delete_queue_size),
70
- (TaskQueue.DONE, self._redis_download, self.done_queue_size),
71
- ]:
72
- items = []
73
- while (item := q.pop()) and len(items) < self.removing_size:
74
- items.append(item)
75
- self._scheduler.delete(key, items)
76
- self.remove_working_items(key.split(":")[-1], items)
77
- if q.length >= size:
78
- thread_sleep = 0.1
79
- time.sleep(thread_sleep)
80
-
81
- def _init_schedule_thread(self):
82
- self._add_thread(func=self._heartbeat)
83
- self._add_thread(func=self._reset)
84
- self._add_thread(func=self._refresh)
85
- self._add_thread(func=self._schedule)
86
- self._add_thread(func=self._insert)
87
- self._add_thread(func=self._remove)
88
- # self._add_thread(func=self._polling)