jettask 0.2.19__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. jettask/__init__.py +10 -3
  2. jettask/cli.py +314 -228
  3. jettask/config/__init__.py +9 -1
  4. jettask/config/config.py +245 -0
  5. jettask/config/env_loader.py +381 -0
  6. jettask/config/lua_scripts.py +158 -0
  7. jettask/config/nacos_config.py +132 -5
  8. jettask/core/__init__.py +1 -1
  9. jettask/core/app.py +1573 -666
  10. jettask/core/app_importer.py +33 -16
  11. jettask/core/container.py +532 -0
  12. jettask/core/task.py +1 -4
  13. jettask/core/unified_manager_base.py +2 -2
  14. jettask/executor/__init__.py +38 -0
  15. jettask/executor/core.py +625 -0
  16. jettask/executor/executor.py +338 -0
  17. jettask/executor/orchestrator.py +290 -0
  18. jettask/executor/process_entry.py +638 -0
  19. jettask/executor/task_executor.py +317 -0
  20. jettask/messaging/__init__.py +68 -0
  21. jettask/messaging/event_pool.py +2188 -0
  22. jettask/messaging/reader.py +519 -0
  23. jettask/messaging/registry.py +266 -0
  24. jettask/messaging/scanner.py +369 -0
  25. jettask/messaging/sender.py +312 -0
  26. jettask/persistence/__init__.py +118 -0
  27. jettask/persistence/backlog_monitor.py +567 -0
  28. jettask/{backend/data_access.py → persistence/base.py} +58 -57
  29. jettask/persistence/consumer.py +315 -0
  30. jettask/{core → persistence}/db_manager.py +23 -22
  31. jettask/persistence/maintenance.py +81 -0
  32. jettask/persistence/message_consumer.py +259 -0
  33. jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
  34. jettask/persistence/offline_recovery.py +196 -0
  35. jettask/persistence/queue_discovery.py +215 -0
  36. jettask/persistence/task_persistence.py +218 -0
  37. jettask/persistence/task_updater.py +583 -0
  38. jettask/scheduler/__init__.py +2 -2
  39. jettask/scheduler/loader.py +6 -5
  40. jettask/scheduler/run_scheduler.py +1 -1
  41. jettask/scheduler/scheduler.py +7 -7
  42. jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
  43. jettask/task/__init__.py +16 -0
  44. jettask/{router.py → task/router.py} +26 -8
  45. jettask/task/task_center/__init__.py +9 -0
  46. jettask/task/task_executor.py +318 -0
  47. jettask/task/task_registry.py +291 -0
  48. jettask/test_connection_monitor.py +73 -0
  49. jettask/utils/__init__.py +31 -1
  50. jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
  51. jettask/utils/db_connector.py +1629 -0
  52. jettask/{db_init.py → utils/db_init.py} +1 -1
  53. jettask/utils/rate_limit/__init__.py +30 -0
  54. jettask/utils/rate_limit/concurrency_limiter.py +665 -0
  55. jettask/utils/rate_limit/config.py +145 -0
  56. jettask/utils/rate_limit/limiter.py +41 -0
  57. jettask/utils/rate_limit/manager.py +269 -0
  58. jettask/utils/rate_limit/qps_limiter.py +154 -0
  59. jettask/utils/rate_limit/task_limiter.py +384 -0
  60. jettask/utils/serializer.py +3 -0
  61. jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
  62. jettask/utils/time_sync.py +173 -0
  63. jettask/webui/__init__.py +27 -0
  64. jettask/{api/v1 → webui/api}/alerts.py +1 -1
  65. jettask/{api/v1 → webui/api}/analytics.py +2 -2
  66. jettask/{api/v1 → webui/api}/namespaces.py +1 -1
  67. jettask/{api/v1 → webui/api}/overview.py +1 -1
  68. jettask/{api/v1 → webui/api}/queues.py +3 -3
  69. jettask/{api/v1 → webui/api}/scheduled.py +1 -1
  70. jettask/{api/v1 → webui/api}/settings.py +1 -1
  71. jettask/{api.py → webui/app.py} +253 -145
  72. jettask/webui/namespace_manager/__init__.py +10 -0
  73. jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
  74. jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
  75. jettask/{run.py → webui/run.py} +2 -2
  76. jettask/{services → webui/services}/__init__.py +1 -3
  77. jettask/{services → webui/services}/overview_service.py +34 -16
  78. jettask/{services → webui/services}/queue_service.py +1 -1
  79. jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
  80. jettask/{services → webui/services}/settings_service.py +1 -1
  81. jettask/worker/__init__.py +53 -0
  82. jettask/worker/lifecycle.py +1507 -0
  83. jettask/worker/manager.py +583 -0
  84. jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
  85. {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
  86. jettask-0.2.20.dist-info/RECORD +145 -0
  87. jettask/__main__.py +0 -140
  88. jettask/api/__init__.py +0 -103
  89. jettask/backend/__init__.py +0 -1
  90. jettask/backend/api/__init__.py +0 -3
  91. jettask/backend/api/v1/__init__.py +0 -17
  92. jettask/backend/api/v1/monitoring.py +0 -431
  93. jettask/backend/api/v1/namespaces.py +0 -504
  94. jettask/backend/api/v1/queues.py +0 -342
  95. jettask/backend/api/v1/tasks.py +0 -367
  96. jettask/backend/core/__init__.py +0 -3
  97. jettask/backend/core/cache.py +0 -221
  98. jettask/backend/core/database.py +0 -200
  99. jettask/backend/core/exceptions.py +0 -102
  100. jettask/backend/dependencies.py +0 -261
  101. jettask/backend/init_meta_db.py +0 -158
  102. jettask/backend/main.py +0 -1426
  103. jettask/backend/main_unified.py +0 -78
  104. jettask/backend/main_v2.py +0 -394
  105. jettask/backend/models/__init__.py +0 -3
  106. jettask/backend/models/requests.py +0 -236
  107. jettask/backend/models/responses.py +0 -230
  108. jettask/backend/namespace_api_old.py +0 -267
  109. jettask/backend/services/__init__.py +0 -3
  110. jettask/backend/start.py +0 -42
  111. jettask/backend/unified_api_router.py +0 -1541
  112. jettask/cleanup_deprecated_tables.sql +0 -16
  113. jettask/core/consumer_manager.py +0 -1695
  114. jettask/core/delay_scanner.py +0 -256
  115. jettask/core/event_pool.py +0 -1700
  116. jettask/core/heartbeat_process.py +0 -222
  117. jettask/core/task_batch.py +0 -153
  118. jettask/core/worker_scanner.py +0 -271
  119. jettask/executors/__init__.py +0 -5
  120. jettask/executors/asyncio.py +0 -876
  121. jettask/executors/base.py +0 -30
  122. jettask/executors/common.py +0 -148
  123. jettask/executors/multi_asyncio.py +0 -309
  124. jettask/gradio_app.py +0 -570
  125. jettask/integrated_gradio_app.py +0 -1088
  126. jettask/main.py +0 -0
  127. jettask/monitoring/__init__.py +0 -3
  128. jettask/pg_consumer.py +0 -1896
  129. jettask/run_monitor.py +0 -22
  130. jettask/run_webui.py +0 -148
  131. jettask/scheduler/multi_namespace_scheduler.py +0 -294
  132. jettask/scheduler/unified_manager.py +0 -450
  133. jettask/task_center_client.py +0 -150
  134. jettask/utils/serializer_optimized.py +0 -33
  135. jettask/webui_exceptions.py +0 -67
  136. jettask-0.2.19.dist-info/RECORD +0 -150
  137. /jettask/{constants.py → config/constants.py} +0 -0
  138. /jettask/{backend/config.py → config/task_center.py} +0 -0
  139. /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
  140. /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
  141. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
  142. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
  143. /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
  144. /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
  145. /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
  146. /jettask/{models.py → persistence/models.py} +0 -0
  147. /jettask/scheduler/{manager.py → task_crud.py} +0 -0
  148. /jettask/{schema.sql → schemas/schema.sql} +0 -0
  149. /jettask/{task_center.py → task/task_center/client.py} +0 -0
  150. /jettask/{monitoring → utils}/file_watcher.py +0 -0
  151. /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
  152. /jettask/{api/v1 → webui/api}/__init__.py +0 -0
  153. /jettask/{webui_config.py → webui/config.py} +0 -0
  154. /jettask/{webui_models → webui/models}/__init__.py +0 -0
  155. /jettask/{webui_models → webui/models}/namespace.py +0 -0
  156. /jettask/{services → webui/services}/alert_service.py +0 -0
  157. /jettask/{services → webui/services}/analytics_service.py +0 -0
  158. /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
  159. /jettask/{services → webui/services}/task_service.py +0 -0
  160. /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
  161. /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
  162. {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
  163. {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
  164. {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
  165. {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -1,222 +0,0 @@
1
- """
2
- 独立的心跳上报进程,避免被CPU密集型任务阻塞
3
- """
4
- import multiprocessing
5
- import time
6
- import os
7
- import logging
8
- import signal
9
- import redis
10
- from typing import Dict, Set, Optional
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
-
15
- class HeartbeatProcess:
16
- """独立的心跳上报进程"""
17
-
18
- def __init__(self, redis_url: str, worker_key: str, consumer_id: str,
19
- heartbeat_interval: float = 5.0, heartbeat_timeout: float = 15.0):
20
- self.redis_url = redis_url
21
- self.worker_key = worker_key
22
- self.consumer_id = consumer_id
23
- self.heartbeat_interval = heartbeat_interval
24
- self.heartbeat_timeout = heartbeat_timeout
25
- self.process: Optional[multiprocessing.Process] = None
26
- self.stop_event = multiprocessing.Event()
27
- # 使用共享内存记录最后心跳时间
28
- self.last_heartbeat_time = multiprocessing.Value('d', time.time())
29
-
30
- def start(self):
31
- """启动心跳进程"""
32
- if self.process and self.process.is_alive():
33
- logger.warning("Heartbeat process already running")
34
- return
35
-
36
- self.stop_event.clear()
37
- self.process = multiprocessing.Process(
38
- target=self._heartbeat_loop,
39
- args=(self.redis_url, self.worker_key, self.consumer_id,
40
- self.heartbeat_interval, self.heartbeat_timeout, self.stop_event,
41
- self.last_heartbeat_time),
42
- daemon=True,
43
- name=f"heartbeat-{self.consumer_id}"
44
- )
45
- self.process.start()
46
- logger.debug(f"Started heartbeat process for {self.consumer_id}, PID: {self.process.pid}")
47
-
48
- def stop(self):
49
- """停止心跳进程"""
50
- if not self.process:
51
- return
52
-
53
- try:
54
- # 检查process是否有必要的属性
55
- if hasattr(self.process, 'is_alive') and callable(self.process.is_alive):
56
- if self.process.is_alive():
57
- self.stop_event.set()
58
- if hasattr(self.process, 'terminate'):
59
- self.process.terminate()
60
- self.process.join(timeout=5)
61
- if self.process.is_alive():
62
- logger.warning("Heartbeat process did not stop gracefully, forcing kill")
63
- if hasattr(self.process, 'kill'):
64
- self.process.kill()
65
- self.process.join()
66
- logger.debug(f"Stopped heartbeat process for {self.consumer_id}")
67
- else:
68
- logger.debug(f"Heartbeat process for {self.consumer_id} is not a valid process object")
69
- except AttributeError as e:
70
- logger.debug(f"Heartbeat process attributes error: {e}")
71
- except Exception as e:
72
- logger.warning(f"Error stopping heartbeat process: {e}")
73
- finally:
74
- self.process = None
75
-
76
- def get_last_heartbeat_time(self) -> float:
77
- """获取最后一次心跳时间"""
78
- with self.last_heartbeat_time.get_lock():
79
- return self.last_heartbeat_time.value
80
-
81
- @staticmethod
82
- def _heartbeat_loop(redis_url: str, worker_key: str, consumer_id: str,
83
- heartbeat_interval: float, heartbeat_timeout: float,
84
- stop_event: multiprocessing.Event,
85
- last_heartbeat_time: multiprocessing.Value):
86
- """心跳循环 - 在独立进程中运行"""
87
- # 忽略中断信号,让主进程处理
88
- signal.signal(signal.SIGINT, signal.SIG_IGN)
89
-
90
- # 创建独立的Redis连接
91
- redis_client = redis.from_url(redis_url, decode_responses=True)
92
-
93
- # 获取主机信息
94
- import socket
95
- try:
96
- hostname = socket.gethostname()
97
- if not hostname or hostname == 'localhost':
98
- hostname = socket.gethostbyname(socket.gethostname())
99
- except:
100
- hostname = os.environ.get('HOSTNAME', 'unknown')
101
-
102
- logger.debug(f"Heartbeat process started for {consumer_id} in PID {os.getpid()}")
103
-
104
- heartbeat_count = 0
105
- last_log_time = time.time()
106
-
107
- while not stop_event.is_set():
108
- try:
109
- current_time = time.time()
110
-
111
- # 更新心跳信息
112
- redis_client.hset(worker_key, mapping={
113
- 'last_heartbeat': str(current_time),
114
- 'heartbeat_pid': str(os.getpid()), # 记录心跳进程PID
115
- 'is_alive': 'true'
116
- })
117
-
118
- # 同时更新 sorted set
119
- redis_prefix = worker_key.split(':')[0] # 获取前缀(如 'jettask')
120
- worker_id = worker_key.split(':')[-1] # 获取 worker ID
121
- redis_client.zadd(f"{redis_prefix}:ACTIVE_WORKERS", {worker_id: current_time})
122
-
123
- # 更新本地心跳时间记录
124
- with last_heartbeat_time.get_lock():
125
- last_heartbeat_time.value = current_time
126
-
127
- heartbeat_count += 1
128
-
129
- # 每30秒记录一次日志
130
- if current_time - last_log_time >= 30:
131
- logger.debug(f"Heartbeat process for {consumer_id} sent {heartbeat_count} heartbeats")
132
- last_log_time = current_time
133
- heartbeat_count = 0
134
-
135
- # 等待下一次心跳
136
- stop_event.wait(heartbeat_interval)
137
-
138
- except Exception as e:
139
- logger.error(f"Error in heartbeat process for {consumer_id}: {e}")
140
- time.sleep(1) # 出错时短暂等待
141
-
142
- logger.debug(f"Heartbeat process for {consumer_id} exiting")
143
-
144
-
145
- class HeartbeatProcessManager:
146
- """管理worker的心跳进程(只需要一个进程)"""
147
-
148
- def __init__(self, redis_url: str, consumer_id: str, heartbeat_interval: float = 5.0,
149
- heartbeat_timeout: float = 15.0):
150
- self.redis_url = redis_url
151
- self.consumer_id = consumer_id
152
- self.heartbeat_interval = heartbeat_interval
153
- self.heartbeat_timeout = heartbeat_timeout
154
- self.heartbeat_process: Optional[HeartbeatProcess] = None
155
- self.worker_key: Optional[str] = None
156
- self.queues: Set[str] = set() # 记录worker处理的队列
157
- self.redis_client = redis.from_url(redis_url, decode_responses=True)
158
-
159
- def add_queue(self, queue: str, worker_key: str):
160
- """添加队列(只在第一次时启动心跳进程)"""
161
- self.queues.add(queue)
162
-
163
- # 只需要启动一次心跳进程
164
- if self.heartbeat_process is not None:
165
- logger.debug(f"Heartbeat process already running, added queue {queue}")
166
- return
167
-
168
- # 第一次调用时启动心跳进程
169
- self.worker_key = worker_key
170
- self.heartbeat_process = HeartbeatProcess(
171
- redis_url=self.redis_url,
172
- worker_key=worker_key,
173
- consumer_id=self.consumer_id,
174
- heartbeat_interval=self.heartbeat_interval,
175
- heartbeat_timeout=self.heartbeat_timeout
176
- )
177
- self.heartbeat_process.start()
178
- logger.debug(f"Started single heartbeat process for worker {self.consumer_id}")
179
-
180
- def remove_queue(self, queue: str):
181
- """移除队列"""
182
- if queue in self.queues:
183
- self.queues.remove(queue)
184
- logger.debug(f"Removed queue {queue} from heartbeat monitoring")
185
-
186
- # 如果没有队列了,停止心跳进程
187
- if not self.queues and self.heartbeat_process:
188
- self.heartbeat_process.stop()
189
- self.heartbeat_process = None
190
- logger.debug("No more queues, stopped heartbeat process")
191
-
192
- def stop_all(self):
193
- """停止心跳进程"""
194
- if self.heartbeat_process:
195
- self.heartbeat_process.stop()
196
- self.heartbeat_process = None
197
- self.queues.clear()
198
-
199
- def is_healthy(self) -> bool:
200
- """检查心跳进程是否健康"""
201
- if not self.heartbeat_process:
202
- return len(self.queues) == 0 # 如果没有队列,则是健康的
203
-
204
- if not self.heartbeat_process.process or not self.heartbeat_process.process.is_alive():
205
- logger.error(f"Heartbeat process for worker {self.consumer_id} is not alive")
206
- return False
207
- return True
208
-
209
- def get_last_heartbeat_time(self) -> Optional[float]:
210
- """获取最后一次心跳时间"""
211
- if self.heartbeat_process:
212
- return self.heartbeat_process.get_last_heartbeat_time()
213
- return None
214
-
215
- def is_heartbeat_timeout(self) -> bool:
216
- """检查心跳是否已超时"""
217
- last_heartbeat = self.get_last_heartbeat_time()
218
- if last_heartbeat is None:
219
- return False # 心跳进程未启动
220
-
221
- current_time = time.time()
222
- return (current_time - last_heartbeat) > self.heartbeat_timeout
@@ -1,153 +0,0 @@
1
- """
2
- 任务批量发送构建器
3
- 提供类型安全的批量任务发送接口
4
- """
5
- from typing import List, Dict, Any, Optional, Union, TYPE_CHECKING
6
- import asyncio
7
- from dataclasses import dataclass, field
8
-
9
- if TYPE_CHECKING:
10
- from .task import Task
11
- from .app import JetTaskApp
12
-
13
-
14
- @dataclass
15
- class TaskMessage:
16
- """单个任务消息"""
17
- task_name: str
18
- queue: str
19
- args: tuple = field(default_factory=tuple)
20
- kwargs: dict = field(default_factory=dict)
21
- delay: Optional[int] = None
22
- timeout: Optional[int] = None
23
- max_retries: Optional[int] = None
24
- retry_delay: Optional[int] = None
25
- scheduled_task_id: Optional[int] = None
26
- routing: Optional[dict] = None
27
-
28
- def to_dict(self) -> dict:
29
- """转换为字典格式"""
30
- data = {
31
- 'task_name': self.task_name,
32
- 'args': self.args,
33
- 'kwargs': self.kwargs,
34
- }
35
-
36
- # 只添加非None的可选参数
37
- optional_fields = ['delay', 'timeout', 'max_retries', 'retry_delay',
38
- 'scheduled_task_id', 'routing']
39
- for field in optional_fields:
40
- value = getattr(self, field)
41
- if value is not None:
42
- data[field] = value
43
-
44
- return data
45
-
46
-
47
- class TaskBatch:
48
- """
49
- 任务批量构建器
50
-
51
- 使用示例:
52
- batch = task.batch()
53
- batch.add(args=(1,), kwargs={'user': 'alice'})
54
- batch.add(args=(2,), kwargs={'user': 'bob'}, delay=5)
55
- results = await batch.send()
56
- """
57
-
58
- def __init__(self, task: 'Task', app: 'JetTaskApp'):
59
- self.task = task
60
- self.app = app
61
- self.messages: List[TaskMessage] = []
62
- self._queue = task.queue
63
-
64
- def add(
65
- self,
66
- args: tuple = None,
67
- kwargs: dict = None,
68
- queue: str = None,
69
- delay: int = None,
70
- timeout: int = None,
71
- max_retries: int = None,
72
- retry_delay: int = None,
73
- scheduled_task_id: int = None,
74
- routing: dict = None,
75
- ) -> 'TaskBatch':
76
- """
77
- 添加一个任务到批量队列
78
-
79
- 参数签名与 apply_async 完全一致,保证IDE提示
80
-
81
- Args:
82
- args: 位置参数
83
- kwargs: 关键字参数
84
- queue: 指定队列(默认使用task的队列)
85
- delay: 延迟执行时间(秒)
86
- timeout: 任务超时时间(秒)
87
- max_retries: 最大重试次数
88
- retry_delay: 重试间隔(秒)
89
- scheduled_task_id: 定时任务ID
90
- routing: 路由信息
91
-
92
- Returns:
93
- self: 支持链式调用
94
- """
95
- message = TaskMessage(
96
- task_name=self.task.name,
97
- queue=queue or self._queue,
98
- args=args or (),
99
- kwargs=kwargs or {},
100
- delay=delay,
101
- timeout=timeout,
102
- max_retries=max_retries,
103
- retry_delay=retry_delay,
104
- scheduled_task_id=scheduled_task_id,
105
- routing=routing,
106
- )
107
- self.messages.append(message)
108
- return self
109
-
110
- async def send(self) -> List[str]:
111
- """
112
- 批量发送所有任务
113
-
114
- Returns:
115
- List[str]: 任务ID列表
116
- """
117
- if not self.messages:
118
- return []
119
-
120
- # 转换为批量写入格式
121
- batch_data = []
122
- for msg in self.messages:
123
- batch_data.append({
124
- 'queue': msg.queue,
125
- 'data': msg.to_dict()
126
- })
127
-
128
- # 调用app的批量写入方法
129
- # 这里假设app有一个内部的批量写入方法
130
- result = await self.app._bulk_write_messages(batch_data)
131
-
132
- # 清空消息列表,允许复用
133
- self.messages.clear()
134
-
135
- return result
136
-
137
- def send_sync(self) -> List[str]:
138
- """
139
- 同步版本的批量发送
140
-
141
- Returns:
142
- List[str]: 任务ID列表
143
- """
144
- loop = asyncio.get_event_loop()
145
- return loop.run_until_complete(self.send())
146
-
147
- def __len__(self) -> int:
148
- """返回当前批量任务的数量"""
149
- return len(self.messages)
150
-
151
- def clear(self) -> None:
152
- """清空批量任务列表"""
153
- self.messages.clear()
@@ -1,271 +0,0 @@
1
- """
2
- Worker 扫描器 - 高效检测超时 Worker
3
- 使用 Redis Sorted Set 实现 O(log N) 复杂度
4
- """
5
-
6
- import time
7
- import logging
8
- import asyncio
9
- from typing import Dict, List, Optional
10
-
11
- logger = logging.getLogger('app')
12
-
13
-
14
- class WorkerScanner:
15
- """
16
- 使用 Redis Sorted Set 优化的 Worker 扫描器
17
-
18
- 核心优化:
19
- 1. O(log N) 的超时检测复杂度
20
- 2. 自动一致性维护
21
- 3. 原子性操作保证数据一致
22
- """
23
-
24
- def __init__(self, sync_redis, async_redis, redis_prefix: str = 'jettask',
25
- heartbeat_timeout: float = 3.0):
26
- self.redis = sync_redis
27
- self.async_redis = async_redis
28
- self.redis_prefix = redis_prefix
29
- self.heartbeat_timeout = heartbeat_timeout
30
- self.active_workers_key = f"{redis_prefix}:ACTIVE_WORKERS"
31
-
32
- # 一致性维护
33
- self._initialized = False
34
- self._last_full_sync = 0
35
- self._full_sync_interval = 60 # 每60秒完整同步
36
- self._scan_counter = 0
37
- self._partial_check_interval = 10 # 每10次扫描做部分检查
38
-
39
- # async def ensure_initialized(self):
40
- # """确保 Sorted Set 已初始化并保持一致"""
41
- # current_time = time.time()
42
-
43
- # # 首次初始化或定期完整同步
44
- # if not self._initialized or (current_time - self._last_full_sync > self._full_sync_interval):
45
- # await self._full_sync()
46
- # self._initialized = True
47
- # self._last_full_sync = current_time
48
-
49
- # async def _full_sync(self):
50
- # """完整同步 Hash 和 Sorted Set"""
51
- # try:
52
- # logger.debug("Starting full synchronization of ACTIVE_WORKERS")
53
-
54
- # # 1. 收集所有活跃 worker 的 Hash 数据
55
- # pattern = f"{self.redis_prefix}:WORKER:*"
56
- # hash_workers = {}
57
-
58
- # async for key in self.async_redis.scan_iter(match=pattern, count=100):
59
- # if ':HISTORY:' not in key and ':REUSE:' not in key:
60
- # worker_id = key.split(':')[-1]
61
- # worker_data = await self.async_redis.hgetall(key)
62
-
63
- # if worker_data and worker_data.get('is_alive', 'true').lower() == 'true':
64
- # try:
65
- # heartbeat = float(worker_data.get('last_heartbeat', 0))
66
- # hash_workers[worker_id] = heartbeat
67
- # except (ValueError, TypeError):
68
- # continue
69
-
70
- # # 2. 获取 Sorted Set 中的数据
71
- # zset_workers = await self.async_redis.zrange(
72
- # self.active_workers_key, 0, -1, withscores=True
73
- # )
74
- # zset_dict = {worker_id: score for worker_id, score in zset_workers}
75
-
76
- # # 3. 计算差异
77
- # hash_ids = set(hash_workers.keys())
78
- # zset_ids = set(zset_dict.keys())
79
-
80
- # to_add = hash_ids - zset_ids # Hash有但ZSet无
81
- # to_remove = zset_ids - hash_ids # ZSet有但Hash无
82
- # to_update = {} # 时间戳不一致的
83
-
84
- # for worker_id in hash_ids & zset_ids:
85
- # if abs(hash_workers[worker_id] - zset_dict[worker_id]) > 0.1:
86
- # to_update[worker_id] = hash_workers[worker_id]
87
-
88
- # # 4. 批量修复
89
- # if to_add or to_remove or to_update:
90
- # pipeline = self.async_redis.pipeline()
91
-
92
- # if to_add:
93
- # members = {worker_id: hash_workers[worker_id] for worker_id in to_add}
94
- # pipeline.zadd(self.active_workers_key, members)
95
-
96
- # if to_remove:
97
- # pipeline.zrem(self.active_workers_key, *to_remove)
98
-
99
- # if to_update:
100
- # pipeline.zadd(self.active_workers_key, to_update)
101
-
102
- # await pipeline.execute()
103
- # logger.info(f"Full sync: +{len(to_add)}, -{len(to_remove)}, ~{len(to_update)}")
104
-
105
- # except Exception as e:
106
- # logger.error(f"Full sync failed: {e}")
107
-
108
- async def scan_timeout_workers(self) -> List[Dict]:
109
- """
110
- 快速扫描超时的 worker - O(log N) 复杂度
111
- 注意:需要考虑每个worker自己的heartbeat_timeout
112
- """
113
- # await self.ensure_initialized()
114
-
115
- # 定期部分检查
116
- self._scan_counter += 1
117
- if self._scan_counter >= self._partial_check_interval:
118
- self._scan_counter = 0
119
- asyncio.create_task(self._partial_check())
120
-
121
- current_time = time.time()
122
- # 使用最大可能的超时时间作为初始筛选(避免遗漏)
123
- # 实际超时判断会在后面使用每个worker自己的timeout
124
- max_possible_timeout = 300 # 5分钟,足够覆盖大多数情况
125
- cutoff_time = current_time - max_possible_timeout
126
-
127
- # O(log N) 获取可能超时的 workers(宽松筛选)
128
- potential_timeout_worker_ids = await self.async_redis.zrangebyscore(
129
- self.active_workers_key,
130
- min=0,
131
- max=current_time - 1 # 至少1秒没更新的
132
- )
133
-
134
- if not potential_timeout_worker_ids:
135
- return []
136
-
137
- # 批量获取 worker 详细信息
138
- pipeline = self.async_redis.pipeline()
139
- for worker_id in potential_timeout_worker_ids:
140
- worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
141
- pipeline.hgetall(worker_key)
142
-
143
- workers_data = await pipeline.execute()
144
-
145
- # 验证并构造结果
146
- result = []
147
- cleanup_pipeline = self.async_redis.pipeline()
148
- need_cleanup = False
149
-
150
- for worker_id, worker_data in zip(potential_timeout_worker_ids, workers_data):
151
- if not worker_data:
152
- # Hash 不存在,清理 ZSet
153
- cleanup_pipeline.zrem(self.active_workers_key, worker_id)
154
- need_cleanup = True
155
- continue
156
-
157
- # 获取该worker自己的heartbeat_timeout
158
- worker_heartbeat_timeout = float(worker_data.get('heartbeat_timeout', self.heartbeat_timeout))
159
-
160
- # 双重检查实际心跳时间(使用worker自己的timeout)
161
- last_heartbeat = float(worker_data.get('last_heartbeat', 0))
162
- worker_cutoff_time = current_time - worker_heartbeat_timeout
163
-
164
- if last_heartbeat >= worker_cutoff_time:
165
- # 未超时,更新 ZSet 并跳过
166
- cleanup_pipeline.zadd(self.active_workers_key, {worker_id: last_heartbeat})
167
- need_cleanup = True
168
- continue
169
-
170
- # 检查存活状态
171
- is_alive = worker_data.get('is_alive', 'true').lower() == 'true'
172
- if not is_alive:
173
- # 已离线,清理 ZSet
174
- cleanup_pipeline.zrem(self.active_workers_key, worker_id)
175
- need_cleanup = True
176
- continue
177
-
178
- # 确认超时(使用worker自己的timeout判断)
179
- logger.debug(f"Worker {worker_id} timeout: last_heartbeat={last_heartbeat}, "
180
- f"timeout={worker_heartbeat_timeout}s, cutoff={worker_cutoff_time}")
181
- worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
182
- result.append({
183
- 'worker_key': worker_key,
184
- 'worker_data': worker_data,
185
- 'worker_id': worker_id
186
- })
187
-
188
- if need_cleanup:
189
- await cleanup_pipeline.execute()
190
-
191
- if result:
192
- logger.info(f"Found {len(result)} timeout workers")
193
-
194
- return result
195
-
196
- async def update_heartbeat(self, worker_id: str, heartbeat_time: Optional[float] = None):
197
- """原子性更新心跳(同时更新 Hash 和 ZSet)"""
198
- if heartbeat_time is None:
199
- heartbeat_time = time.time()
200
-
201
- pipeline = self.async_redis.pipeline()
202
- worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
203
-
204
- # 同时更新两个数据结构
205
- pipeline.hset(worker_key, 'last_heartbeat', str(heartbeat_time))
206
- pipeline.zadd(self.active_workers_key, {worker_id: heartbeat_time})
207
-
208
- await pipeline.execute()
209
-
210
- async def add_worker(self, worker_id: str, worker_data: Dict):
211
- """添加新 worker(同时加入 Hash 和 ZSet)"""
212
- heartbeat_time = float(worker_data.get('last_heartbeat', time.time()))
213
-
214
- pipeline = self.async_redis.pipeline()
215
- worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
216
-
217
- # 创建 Hash
218
- pipeline.hset(worker_key, mapping=worker_data)
219
- # 加入 ZSet
220
- pipeline.zadd(self.active_workers_key, {worker_id: heartbeat_time})
221
-
222
- await pipeline.execute()
223
- logger.debug(f"Added worker {worker_id} to system")
224
-
225
- async def remove_worker(self, worker_id: str):
226
- """移除 worker(同时更新 Hash 和 ZSet)"""
227
- pipeline = self.async_redis.pipeline()
228
- worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
229
-
230
- # 标记 Hash 为离线
231
- pipeline.hset(worker_key, 'is_alive', 'false')
232
- # 从 ZSet 移除
233
- pipeline.zrem(self.active_workers_key, worker_id)
234
-
235
- await pipeline.execute()
236
- logger.debug(f"Removed worker {worker_id} from active set")
237
-
238
- async def _partial_check(self):
239
- """部分一致性检查"""
240
- try:
241
- # 随机检查10个 worker
242
- sample_size = min(10, await self.async_redis.zcard(self.active_workers_key))
243
- if sample_size == 0:
244
- return
245
-
246
- # 随机抽样
247
- random_workers = await self.async_redis.zrandmember(
248
- self.active_workers_key, sample_size, withscores=True
249
- )
250
-
251
- for worker_id, zset_score in random_workers:
252
- worker_key = f"{self.redis_prefix}:WORKER:{worker_id}"
253
- hash_heartbeat = await self.async_redis.hget(worker_key, 'last_heartbeat')
254
-
255
- if not hash_heartbeat:
256
- # Hash 不存在,删除 ZSet 条目
257
- await self.async_redis.zrem(self.active_workers_key, worker_id)
258
- logger.debug(f"Partial check: removed {worker_id}")
259
- else:
260
- # 检查时间戳一致性
261
- hash_time = float(hash_heartbeat)
262
- if abs(hash_time - zset_score) > 1.0:
263
- await self.async_redis.zadd(self.active_workers_key, {worker_id: hash_time})
264
- logger.debug(f"Partial check: synced {worker_id}")
265
-
266
- except Exception as e:
267
- logger.debug(f"Partial check error: {e}")
268
-
269
- async def get_active_count(self) -> int:
270
- """获取活跃 worker 数量 - O(1)"""
271
- return await self.async_redis.zcard(self.active_workers_key)
@@ -1,5 +0,0 @@
1
- from .base import BaseExecutor
2
- from .asyncio import AsyncioExecutor
3
- from .multi_asyncio import MultiAsyncioExecutor
4
-
5
- __all__ = ["BaseExecutor", "AsyncioExecutor", "MultiAsyncioExecutor"]