cobweb-launcher 1.3.14__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. cobweb/__init__.py +1 -1
  2. cobweb/base/__init__.py +4 -149
  3. cobweb/base/common_queue.py +0 -13
  4. cobweb/base/request.py +2 -14
  5. cobweb/base/seed.py +16 -12
  6. cobweb/constant.py +0 -16
  7. cobweb/crawlers/crawler.py +3 -85
  8. cobweb/db/redis_db.py +109 -52
  9. cobweb/launchers/__init__.py +8 -2
  10. cobweb/launchers/distributor.py +171 -0
  11. cobweb/launchers/launcher.py +87 -131
  12. cobweb/launchers/uploader.py +65 -0
  13. cobweb/pipelines/pipeline.py +3 -36
  14. cobweb/schedulers/__init__.py +1 -3
  15. cobweb/schedulers/launcher_air.py +93 -0
  16. cobweb/schedulers/launcher_api.py +225 -0
  17. cobweb/schedulers/scheduler.py +85 -0
  18. cobweb/schedulers/scheduler_with_redis.py +177 -0
  19. cobweb/setting.py +15 -32
  20. cobweb/utils/__init__.py +2 -1
  21. cobweb/utils/decorators.py +43 -0
  22. cobweb/utils/dotting.py +55 -0
  23. cobweb/utils/oss.py +28 -9
  24. {cobweb_launcher-1.3.14.dist-info → cobweb_launcher-3.1.0.dist-info}/METADATA +1 -1
  25. cobweb_launcher-3.1.0.dist-info/RECORD +41 -0
  26. cobweb/base/basic.py +0 -295
  27. cobweb/base/dotting.py +0 -35
  28. cobweb/launchers/launcher_air.py +0 -88
  29. cobweb/launchers/launcher_api.py +0 -88
  30. cobweb/launchers/launcher_pro.py +0 -88
  31. cobweb/schedulers/scheduler_api.py +0 -72
  32. cobweb/schedulers/scheduler_redis.py +0 -72
  33. cobweb_launcher-1.3.14.dist-info/RECORD +0 -40
  34. {cobweb_launcher-1.3.14.dist-info → cobweb_launcher-3.1.0.dist-info}/LICENSE +0 -0
  35. {cobweb_launcher-1.3.14.dist-info → cobweb_launcher-3.1.0.dist-info}/WHEEL +0 -0
  36. {cobweb_launcher-1.3.14.dist-info → cobweb_launcher-3.1.0.dist-info}/top_level.txt +0 -0
@@ -1,88 +0,0 @@
1
- import time
2
-
3
- from cobweb.base import TaskQueue, Decorators, Seed, Request
4
- from cobweb.schedulers import RedisScheduler
5
- from .launcher import Launcher
6
-
7
-
8
- class LauncherPro(Launcher):
9
-
10
- def __init__(self, task, project, custom_setting=None, **kwargs):
11
- super().__init__(task, project, custom_setting, **kwargs)
12
- self._redis_download = "{%s:%s}:download" % (project, task)
13
- self._redis_todo = "{%s:%s}:todo" % (project, task)
14
- self._scheduler = RedisScheduler(task, project)
15
-
16
- @Decorators.stop
17
- def _schedule(self):
18
- thread_sleep = self.scheduling_wait_time
19
- for q, key, size, item_info, Cls in [
20
- (TaskQueue.TODO, self._redis_todo, self.todo_queue_size, self._task_info["todo"], Seed),
21
- (TaskQueue.DOWNLOAD, self._redis_download, self.download_queue_size, self._task_info["download"], Request),
22
- ]:
23
- if q.length < size:
24
- for member, priority in self._scheduler.schedule(key, self.scheduling_size):
25
- q.push(Cls(member, priority=priority))
26
- self.add_working_item(key.split(":")[-1], member, priority)
27
- thread_sleep = 0.1
28
- time.sleep(thread_sleep)
29
-
30
- @Decorators.stop
31
- def _heartbeat(self):
32
- if self._scheduler.working.is_set():
33
- self._scheduler.set_heartbeat()
34
- time.sleep(3)
35
-
36
- @Decorators.stop
37
- def _reset(self):
38
- self._scheduler.reset(
39
- keys=[self._redis_todo, self._redis_download],
40
- reset_time=self.seed_reset_seconds
41
- )
42
- time.sleep(30)
43
-
44
- @Decorators.pause
45
- def _insert(self):
46
- thread_sleep = 0.1
47
- for q, key, size in [
48
- (TaskQueue.SEED, self._redis_todo, self.seed_queue_size),
49
- (TaskQueue.REQUEST, self._redis_download, self.request_queue_size),
50
- ]:
51
- item_info = {}
52
- while (item := q.pop()) and len(item_info.keys()) < self.inserting_size:
53
- item_info[item.seed] = item.params.priority
54
- if q.length >= size:
55
- thread_sleep = self.inserting_wait_time
56
- self._scheduler.insert(key, item_info)
57
- time.sleep(thread_sleep)
58
-
59
- @Decorators.pause
60
- def _refresh(self):
61
- self._scheduler.refresh(self._redis_todo, self._task_info["todo"])
62
- self._scheduler.refresh(self._redis_download, self._task_info["download"])
63
- time.sleep(10)
64
-
65
- @Decorators.pause
66
- def _remove(self):
67
- thread_sleep = self.removing_wait_time
68
- for q, key, size in [
69
- (TaskQueue.DELETE, self._redis_todo, self.delete_queue_size),
70
- (TaskQueue.DONE, self._redis_download, self.done_queue_size),
71
- ]:
72
- items = []
73
- while (item := q.pop()) and len(items) < self.removing_size:
74
- items.append(item)
75
- self._scheduler.delete(key, items)
76
- self.remove_working_items(key.split(":")[-1], items)
77
- if q.length >= size:
78
- thread_sleep = 0.1
79
- time.sleep(thread_sleep)
80
-
81
- def _init_schedule_thread(self):
82
- self._add_thread(func=self._heartbeat)
83
- self._add_thread(func=self._reset)
84
- self._add_thread(func=self._refresh)
85
- self._add_thread(func=self._schedule)
86
- self._add_thread(func=self._insert)
87
- self._add_thread(func=self._remove)
88
- # self._add_thread(func=self._polling)
@@ -1,72 +0,0 @@
1
- import threading
2
- import time
3
-
4
- # from cobweb.base import Seed
5
- from cobweb.db import ApiDB
6
-
7
-
8
- class ApiScheduler:
9
-
10
- def __init__(self, task, project, scheduler_wait_seconds=30):
11
- self._todo_key = "{%s:%s}:todo" % (project, task)
12
- self._download_key = "{%s:%s}:download" % (project, task)
13
- self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
14
- self._speed_control_key = "speed_control:%s_%s" % (project, task)
15
- self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
16
- self._db = ApiDB()
17
-
18
- self.scheduler_wait_seconds = scheduler_wait_seconds
19
- self.working = threading.Event()
20
-
21
- @property
22
- def heartbeat(self):
23
- return self._db.exists(self._heartbeat_key)
24
-
25
- def set_heartbeat(self):
26
- return self._db.setex(self._heartbeat_key, 5)
27
-
28
- def schedule(self, key, count):
29
- if not self._db.zcount(key, 0, "(1000"):
30
- time.sleep(self.scheduler_wait_seconds)
31
- else:
32
- source = int(time.time())
33
- members = self._db.members(key, source, count=count, _min=0, _max="(1000")
34
- for member, priority in members:
35
- # seed = Seed(member, priority=priority)
36
- yield member, priority
37
-
38
- def insert(self, key, items):
39
- if items:
40
- self._db.zadd(key, items, nx=True)
41
-
42
- def reset(self, keys, reset_time=30):
43
- if self._db.lock(self._reset_lock_key, t=120):
44
-
45
- if isinstance(keys, str):
46
- keys = [keys]
47
-
48
- _min = reset_time - int(time.time()) if self.heartbeat else "-inf"
49
-
50
- for key in keys:
51
- if self._db.exists(key):
52
- self._db.members(key, 0, _min=_min, _max="(0")
53
-
54
- if not self.heartbeat:
55
- self.working.set()
56
- time.sleep(10)
57
-
58
- self._db.delete(self._reset_lock_key)
59
-
60
- def refresh(self, key, items: dict[str, int]):
61
- refresh_time = int(time.time())
62
- its = {k: -refresh_time - v / 1000 for k, v in items.items()}
63
- if its:
64
- self._db.zadd(key, item=its, xx=True)
65
-
66
- def delete(self, key, values):
67
- if values:
68
- self._db.zrem(key, *values)
69
-
70
-
71
-
72
-
@@ -1,72 +0,0 @@
1
- import threading
2
- import time
3
-
4
- # from cobweb.base import Seed
5
- from cobweb.db import RedisDB
6
-
7
-
8
- class RedisScheduler:
9
-
10
- def __init__(self, task, project, scheduler_wait_seconds=30):
11
- self._todo_key = "{%s:%s}:todo" % (project, task)
12
- self._download_key = "{%s:%s}:download" % (project, task)
13
- self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
14
- self._speed_control_key = "speed_control:%s_%s" % (project, task)
15
- self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
16
- self._db = RedisDB()
17
-
18
- self.scheduler_wait_seconds = scheduler_wait_seconds
19
- self.working = threading.Event()
20
-
21
- @property
22
- def heartbeat(self):
23
- return self._db.exists(self._heartbeat_key)
24
-
25
- def set_heartbeat(self):
26
- return self._db.setex(self._heartbeat_key, 5)
27
-
28
- def schedule(self, key, count):
29
- if not self._db.zcount(key, 0, "(1000"):
30
- time.sleep(self.scheduler_wait_seconds)
31
- else:
32
- source = int(time.time())
33
- members = self._db.members(key, source, count=count, _min=0, _max="(1000")
34
- for member, priority in members:
35
- # seed = Seed(member, priority=priority)
36
- yield member, priority
37
-
38
- def insert(self, key, items):
39
- if items:
40
- self._db.zadd(key, items, nx=True)
41
-
42
- def reset(self, keys, reset_time=30):
43
- if self._db.lock(self._reset_lock_key, t=120):
44
-
45
- if isinstance(keys, str):
46
- keys = [keys]
47
-
48
- _min = reset_time - int(time.time()) if self.heartbeat else "-inf"
49
-
50
- for key in keys:
51
- if self._db.exists(key):
52
- self._db.members(key, 0, _min=_min, _max="(0")
53
-
54
- if not self.heartbeat:
55
- self.working.set()
56
- time.sleep(10)
57
-
58
- self._db.delete(self._reset_lock_key)
59
-
60
- def refresh(self, key, items: dict[str, int]):
61
- refresh_time = int(time.time())
62
- its = {k: -refresh_time - v / 1000 for k, v in items.items()}
63
- if its:
64
- self._db.zadd(key, item=its, xx=True)
65
-
66
- def delete(self, key, values):
67
- if values:
68
- self._db.zrem(key, *values)
69
-
70
-
71
-
72
-
@@ -1,40 +0,0 @@
1
- cobweb/__init__.py,sha256=oaEfsGUuGP0s39UbFRwrnsjMUeuB6QvQIAwStKFyUTk,83
2
- cobweb/constant.py,sha256=eofONAntk9O6S-cb4KbYGYHL_u7nBlOqqFOw_HzJHAU,3588
3
- cobweb/setting.py,sha256=pY6LKsgWI3164GiGA1z_y26LVf5-3mpiEgmm86mKRdY,3135
4
- cobweb/base/__init__.py,sha256=CgNg7BK8uPICSWrLI9Bi6vNQaquBNY2H31TrDo9-fTI,5245
5
- cobweb/base/basic.py,sha256=eOSHnZT2xR-sOND8J4M3iCJJJUV51QiFi8Yn8JxV7s4,7670
6
- cobweb/base/common_queue.py,sha256=Gor7sR3h1hlZWaI0XcNAbf0S15Ftjr3DFRWNTGL13uU,1137
7
- cobweb/base/dotting.py,sha256=lfFXXqnVP__hxlW3qH5Bnuq69KtnFaQLbcz1M8e2Ajg,1239
8
- cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
9
- cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
10
- cobweb/base/request.py,sha256=acGm3OzxsPed5VUTk7D9eeHZPMh7KUNQRUv44G5znZg,2659
11
- cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
12
- cobweb/base/seed.py,sha256=PN5J4gKPEXylwyQeSGOBfauxHktxFr7RJe8nVX1hBw4,2987
13
- cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
14
- cobweb/crawlers/crawler.py,sha256=A7_qVGXHiULk7PsYIxmYvVCyILdWy4Er8_qmbVDzFzE,3950
15
- cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
16
- cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
17
- cobweb/db/redis_db.py,sha256=FvMzckJtmhwKhZqKoS23iXmJti5P2dnMVD5rJ__5LUw,5139
18
- cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
19
- cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
20
- cobweb/launchers/__init__.py,sha256=m_XNG2bWuMbirPt3d0_s-Ezl1xycfUxeqZnwq_kkfuo,116
21
- cobweb/launchers/launcher.py,sha256=KB7aL38T3uMh1s78HyHLcS0DS8ovx7VuW6JHn5ooec8,7807
22
- cobweb/launchers/launcher_air.py,sha256=yPr395HVIIHAq6lqRcYJu7c0KkfO9V8O-2sn0hC96p0,2990
23
- cobweb/launchers/launcher_api.py,sha256=vpwFxB1azgBk1bS7VhX3jOprQS8fl6Iu_5M-Y3QT67A,3394
24
- cobweb/launchers/launcher_pro.py,sha256=Kl64CQLcUwW9FeaKFg0GIK51fLZaWkoQlQ346zEvJEE,3398
25
- cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
26
- cobweb/pipelines/pipeline.py,sha256=FHY7ZHNZgx-AdbHt1MKHm-w0aigxvyXFV10T0NMdrOE,1381
27
- cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
28
- cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
29
- cobweb/schedulers/__init__.py,sha256=y7Lv_7b0zfTl0OhIONb_8u1K1C9gVlBA-xz_XG_kI9g,85
30
- cobweb/schedulers/scheduler_api.py,sha256=pFEdS1H4zuzxwMhCV-G7CoLz-rEOPv4EVo3xZUXTyDo,2199
31
- cobweb/schedulers/scheduler_redis.py,sha256=E5fjc3nNld8GbUhUGT7uY4smRejj2J2ZIzp2g6lhxFM,2205
32
- cobweb/utils/__init__.py,sha256=YvD4mIDBd9jmGA6WJBcwkgDU2jRFNBCEbarZCSUBAHE,114
33
- cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
34
- cobweb/utils/oss.py,sha256=6Qlhdde7CcwD69bBe2rGWHY3-aptG9NXB_DZLhjgDRQ,3553
35
- cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
36
- cobweb_launcher-1.3.14.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
37
- cobweb_launcher-1.3.14.dist-info/METADATA,sha256=l2r1AYRaNcYTFGxPQoqXTo-5SNt470NZRxzpj_QSj_o,6510
38
- cobweb_launcher-1.3.14.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
39
- cobweb_launcher-1.3.14.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
40
- cobweb_launcher-1.3.14.dist-info/RECORD,,