jettask 0.2.19__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +10 -3
- jettask/cli.py +314 -228
- jettask/config/__init__.py +9 -1
- jettask/config/config.py +245 -0
- jettask/config/env_loader.py +381 -0
- jettask/config/lua_scripts.py +158 -0
- jettask/config/nacos_config.py +132 -5
- jettask/core/__init__.py +1 -1
- jettask/core/app.py +1573 -666
- jettask/core/app_importer.py +33 -16
- jettask/core/container.py +532 -0
- jettask/core/task.py +1 -4
- jettask/core/unified_manager_base.py +2 -2
- jettask/executor/__init__.py +38 -0
- jettask/executor/core.py +625 -0
- jettask/executor/executor.py +338 -0
- jettask/executor/orchestrator.py +290 -0
- jettask/executor/process_entry.py +638 -0
- jettask/executor/task_executor.py +317 -0
- jettask/messaging/__init__.py +68 -0
- jettask/messaging/event_pool.py +2188 -0
- jettask/messaging/reader.py +519 -0
- jettask/messaging/registry.py +266 -0
- jettask/messaging/scanner.py +369 -0
- jettask/messaging/sender.py +312 -0
- jettask/persistence/__init__.py +118 -0
- jettask/persistence/backlog_monitor.py +567 -0
- jettask/{backend/data_access.py → persistence/base.py} +58 -57
- jettask/persistence/consumer.py +315 -0
- jettask/{core → persistence}/db_manager.py +23 -22
- jettask/persistence/maintenance.py +81 -0
- jettask/persistence/message_consumer.py +259 -0
- jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
- jettask/persistence/offline_recovery.py +196 -0
- jettask/persistence/queue_discovery.py +215 -0
- jettask/persistence/task_persistence.py +218 -0
- jettask/persistence/task_updater.py +583 -0
- jettask/scheduler/__init__.py +2 -2
- jettask/scheduler/loader.py +6 -5
- jettask/scheduler/run_scheduler.py +1 -1
- jettask/scheduler/scheduler.py +7 -7
- jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
- jettask/task/__init__.py +16 -0
- jettask/{router.py → task/router.py} +26 -8
- jettask/task/task_center/__init__.py +9 -0
- jettask/task/task_executor.py +318 -0
- jettask/task/task_registry.py +291 -0
- jettask/test_connection_monitor.py +73 -0
- jettask/utils/__init__.py +31 -1
- jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
- jettask/utils/db_connector.py +1629 -0
- jettask/{db_init.py → utils/db_init.py} +1 -1
- jettask/utils/rate_limit/__init__.py +30 -0
- jettask/utils/rate_limit/concurrency_limiter.py +665 -0
- jettask/utils/rate_limit/config.py +145 -0
- jettask/utils/rate_limit/limiter.py +41 -0
- jettask/utils/rate_limit/manager.py +269 -0
- jettask/utils/rate_limit/qps_limiter.py +154 -0
- jettask/utils/rate_limit/task_limiter.py +384 -0
- jettask/utils/serializer.py +3 -0
- jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
- jettask/utils/time_sync.py +173 -0
- jettask/webui/__init__.py +27 -0
- jettask/{api/v1 → webui/api}/alerts.py +1 -1
- jettask/{api/v1 → webui/api}/analytics.py +2 -2
- jettask/{api/v1 → webui/api}/namespaces.py +1 -1
- jettask/{api/v1 → webui/api}/overview.py +1 -1
- jettask/{api/v1 → webui/api}/queues.py +3 -3
- jettask/{api/v1 → webui/api}/scheduled.py +1 -1
- jettask/{api/v1 → webui/api}/settings.py +1 -1
- jettask/{api.py → webui/app.py} +253 -145
- jettask/webui/namespace_manager/__init__.py +10 -0
- jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
- jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
- jettask/{run.py → webui/run.py} +2 -2
- jettask/{services → webui/services}/__init__.py +1 -3
- jettask/{services → webui/services}/overview_service.py +34 -16
- jettask/{services → webui/services}/queue_service.py +1 -1
- jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
- jettask/{services → webui/services}/settings_service.py +1 -1
- jettask/worker/__init__.py +53 -0
- jettask/worker/lifecycle.py +1507 -0
- jettask/worker/manager.py +583 -0
- jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
- jettask-0.2.20.dist-info/RECORD +145 -0
- jettask/__main__.py +0 -140
- jettask/api/__init__.py +0 -103
- jettask/backend/__init__.py +0 -1
- jettask/backend/api/__init__.py +0 -3
- jettask/backend/api/v1/__init__.py +0 -17
- jettask/backend/api/v1/monitoring.py +0 -431
- jettask/backend/api/v1/namespaces.py +0 -504
- jettask/backend/api/v1/queues.py +0 -342
- jettask/backend/api/v1/tasks.py +0 -367
- jettask/backend/core/__init__.py +0 -3
- jettask/backend/core/cache.py +0 -221
- jettask/backend/core/database.py +0 -200
- jettask/backend/core/exceptions.py +0 -102
- jettask/backend/dependencies.py +0 -261
- jettask/backend/init_meta_db.py +0 -158
- jettask/backend/main.py +0 -1426
- jettask/backend/main_unified.py +0 -78
- jettask/backend/main_v2.py +0 -394
- jettask/backend/models/__init__.py +0 -3
- jettask/backend/models/requests.py +0 -236
- jettask/backend/models/responses.py +0 -230
- jettask/backend/namespace_api_old.py +0 -267
- jettask/backend/services/__init__.py +0 -3
- jettask/backend/start.py +0 -42
- jettask/backend/unified_api_router.py +0 -1541
- jettask/cleanup_deprecated_tables.sql +0 -16
- jettask/core/consumer_manager.py +0 -1695
- jettask/core/delay_scanner.py +0 -256
- jettask/core/event_pool.py +0 -1700
- jettask/core/heartbeat_process.py +0 -222
- jettask/core/task_batch.py +0 -153
- jettask/core/worker_scanner.py +0 -271
- jettask/executors/__init__.py +0 -5
- jettask/executors/asyncio.py +0 -876
- jettask/executors/base.py +0 -30
- jettask/executors/common.py +0 -148
- jettask/executors/multi_asyncio.py +0 -309
- jettask/gradio_app.py +0 -570
- jettask/integrated_gradio_app.py +0 -1088
- jettask/main.py +0 -0
- jettask/monitoring/__init__.py +0 -3
- jettask/pg_consumer.py +0 -1896
- jettask/run_monitor.py +0 -22
- jettask/run_webui.py +0 -148
- jettask/scheduler/multi_namespace_scheduler.py +0 -294
- jettask/scheduler/unified_manager.py +0 -450
- jettask/task_center_client.py +0 -150
- jettask/utils/serializer_optimized.py +0 -33
- jettask/webui_exceptions.py +0 -67
- jettask-0.2.19.dist-info/RECORD +0 -150
- /jettask/{constants.py → config/constants.py} +0 -0
- /jettask/{backend/config.py → config/task_center.py} +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
- /jettask/{models.py → persistence/models.py} +0 -0
- /jettask/scheduler/{manager.py → task_crud.py} +0 -0
- /jettask/{schema.sql → schemas/schema.sql} +0 -0
- /jettask/{task_center.py → task/task_center/client.py} +0 -0
- /jettask/{monitoring → utils}/file_watcher.py +0 -0
- /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
- /jettask/{api/v1 → webui/api}/__init__.py +0 -0
- /jettask/{webui_config.py → webui/config.py} +0 -0
- /jettask/{webui_models → webui/models}/__init__.py +0 -0
- /jettask/{webui_models → webui/models}/namespace.py +0 -0
- /jettask/{services → webui/services}/alert_service.py +0 -0
- /jettask/{services → webui/services}/analytics_service.py +0 -0
- /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
- /jettask/{services → webui/services}/task_service.py +0 -0
- /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
- /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.19.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,665 @@
|
|
1
|
+
"""
|
2
|
+
并发限流器
|
3
|
+
|
4
|
+
基于Redis锁的并发数限流实现
|
5
|
+
支持:
|
6
|
+
1. 并发锁管理
|
7
|
+
2. 自动锁超时清理
|
8
|
+
3. Worker下线时的锁释放
|
9
|
+
"""
|
10
|
+
|
11
|
+
import asyncio
|
12
|
+
import logging
|
13
|
+
import time
|
14
|
+
import traceback
|
15
|
+
import uuid
|
16
|
+
from redis.asyncio import Redis
|
17
|
+
from typing import Dict, Optional, Set, List, Tuple
|
18
|
+
from collections import defaultdict
|
19
|
+
|
20
|
+
from jettask.utils.db_connector import get_sync_redis_client
|
21
|
+
|
22
|
+
logger = logging.getLogger('app')
|
23
|
+
|
24
|
+
|
25
|
+
class ConcurrencyRateLimiter:
|
26
|
+
"""并发限流器 - 基于 Redis 的分布式信号量
|
27
|
+
|
28
|
+
使用 Redis 实现分布式信号量,控制全局并发数。
|
29
|
+
所有 workers 共享同一个信号量,保证全局并发不超过限制。
|
30
|
+
|
31
|
+
特点:
|
32
|
+
- 分布式协调,真正的全局并发控制
|
33
|
+
- 使用 Redis 有序集合(Sorted Set)追踪正在运行的任务
|
34
|
+
- 自动清理超时任务
|
35
|
+
- 支持多进程、多机器部署
|
36
|
+
|
37
|
+
性能优化:
|
38
|
+
- 使用 Redis Pub/Sub 代替轮询等待
|
39
|
+
- 分离超时清理,避免每次 acquire 都清理
|
40
|
+
- 本地缓存减少 Redis 访问
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(
|
44
|
+
self,
|
45
|
+
redis_client: Redis,
|
46
|
+
task_name: str,
|
47
|
+
worker_id: str,
|
48
|
+
max_concurrency: int,
|
49
|
+
redis_prefix: str = "jettask",
|
50
|
+
timeout: float = 5.0, # 锁超时时间(秒),改为5秒以支持心跳续租
|
51
|
+
cleanup_interval: float = 1.0, # 超时清理间隔(秒),改为1秒更频繁检测
|
52
|
+
renewal_interval: float = 1.0 # 心跳续租间隔(秒)
|
53
|
+
):
|
54
|
+
"""初始化并发限流器(支持心跳续租机制)
|
55
|
+
|
56
|
+
Args:
|
57
|
+
redis_client: 异步 Redis 客户端
|
58
|
+
task_name: 任务名称
|
59
|
+
worker_id: Worker ID
|
60
|
+
max_concurrency: 全局最大并发数
|
61
|
+
redis_prefix: Redis key 前缀
|
62
|
+
timeout: 锁超时时间(秒),默认5秒
|
63
|
+
cleanup_interval: 超时清理间隔(秒),默认1秒
|
64
|
+
renewal_interval: 心跳续租间隔(秒),默认1秒
|
65
|
+
"""
|
66
|
+
self.redis = redis_client
|
67
|
+
self.task_name = task_name
|
68
|
+
self.worker_id = worker_id
|
69
|
+
self.max_concurrency = max_concurrency
|
70
|
+
self.redis_prefix = redis_prefix
|
71
|
+
self.timeout = timeout
|
72
|
+
self.cleanup_interval = cleanup_interval
|
73
|
+
self.renewal_interval = renewal_interval
|
74
|
+
|
75
|
+
# Redis key 定义
|
76
|
+
# 使用有序集合存储正在运行的任务,score 为最后更新时间
|
77
|
+
self.semaphore_key = f"{redis_prefix}:RATE_LIMIT:CONCURRENCY:{task_name}"
|
78
|
+
# Pub/Sub 通道,用于通知有信号量释放
|
79
|
+
self.release_channel = f"{redis_prefix}:RATE_LIMIT:CONCURRENCY_RELEASE:{task_name}"
|
80
|
+
|
81
|
+
# Lua 脚本:原子性地获取信号量(优化版,不做清理)
|
82
|
+
self.acquire_script = """
|
83
|
+
local semaphore_key = KEYS[1]
|
84
|
+
local max_concurrency = tonumber(ARGV[1])
|
85
|
+
local current_time = tonumber(ARGV[2])
|
86
|
+
local task_id = ARGV[3]
|
87
|
+
|
88
|
+
-- 检查当前并发数(不做清理,由后台任务定期清理)
|
89
|
+
local current_count = redis.call('ZCARD', semaphore_key)
|
90
|
+
|
91
|
+
if current_count < max_concurrency then
|
92
|
+
-- 未达到限制,添加任务
|
93
|
+
redis.call('ZADD', semaphore_key, current_time, task_id)
|
94
|
+
return 1
|
95
|
+
else
|
96
|
+
return 0
|
97
|
+
end
|
98
|
+
"""
|
99
|
+
|
100
|
+
# Lua 脚本:释放信号量并通知
|
101
|
+
self.release_script = """
|
102
|
+
local semaphore_key = KEYS[1]
|
103
|
+
local release_channel = KEYS[2]
|
104
|
+
local task_id = ARGV[1]
|
105
|
+
|
106
|
+
-- 移除任务
|
107
|
+
local removed = redis.call('ZREM', semaphore_key, task_id)
|
108
|
+
|
109
|
+
-- 如果成功移除,通知等待者
|
110
|
+
if removed > 0 then
|
111
|
+
redis.call('PUBLISH', release_channel, '1')
|
112
|
+
end
|
113
|
+
|
114
|
+
return removed
|
115
|
+
"""
|
116
|
+
|
117
|
+
# Lua 脚本:清理超时任务
|
118
|
+
self.cleanup_script = """
|
119
|
+
local semaphore_key = KEYS[1]
|
120
|
+
local release_channel = KEYS[2]
|
121
|
+
local timeout_threshold = tonumber(ARGV[1])
|
122
|
+
|
123
|
+
-- 清理超时任务
|
124
|
+
local removed = redis.call('ZREMRANGEBYSCORE', semaphore_key, '-inf', timeout_threshold)
|
125
|
+
|
126
|
+
-- 如果清理了任务,通知等待者
|
127
|
+
if removed > 0 then
|
128
|
+
redis.call('PUBLISH', release_channel, tostring(removed))
|
129
|
+
end
|
130
|
+
|
131
|
+
return removed
|
132
|
+
"""
|
133
|
+
|
134
|
+
# Lua 脚本:续租(更新时间戳)
|
135
|
+
self.renewal_script = """
|
136
|
+
local semaphore_key = KEYS[1]
|
137
|
+
local task_id = ARGV[1]
|
138
|
+
local current_time = tonumber(ARGV[2])
|
139
|
+
|
140
|
+
-- 检查任务是否存在
|
141
|
+
local exists = redis.call('ZSCORE', semaphore_key, task_id)
|
142
|
+
if exists then
|
143
|
+
-- 更新时间戳
|
144
|
+
redis.call('ZADD', semaphore_key, current_time, task_id)
|
145
|
+
return 1
|
146
|
+
else
|
147
|
+
return 0
|
148
|
+
end
|
149
|
+
"""
|
150
|
+
|
151
|
+
# 无锁设计:
|
152
|
+
# - _local_tasks: set 操作通过 GIL 保护,在单个 worker 内线程安全
|
153
|
+
# - 所有状态变更都是原子的,不需要显式锁
|
154
|
+
self._local_tasks = set() # 本地追踪当前 worker 获取的任务 ID(正在运行的任务)
|
155
|
+
# 已移除 _renewal_tasks,改用统一的心跳管理器
|
156
|
+
self._unified_heartbeat_task = None # 统一的心跳协程
|
157
|
+
self._cleanup_task = None # 后台清理任务
|
158
|
+
self._pubsub = None # Pub/Sub 订阅
|
159
|
+
self._pubsub_listener_task = None # Pub/Sub 监听协程
|
160
|
+
self._periodic_trigger_task = None # 定时触发协程
|
161
|
+
self._release_event = asyncio.Event() # 释放事件,用于通知等待者
|
162
|
+
self._poll_interval = 10.0 # 定时触发间隔(秒)
|
163
|
+
|
164
|
+
def _trigger_release_signal(self):
|
165
|
+
"""触发释放信号,唤醒所有等待者"""
|
166
|
+
self._release_event.set()
|
167
|
+
# 立即清除事件,为下次通知做准备
|
168
|
+
self._release_event.clear()
|
169
|
+
|
170
|
+
async def _pubsub_listener(self):
|
171
|
+
"""Pub/Sub 监听协程:持续监听释放通知并触发信号"""
|
172
|
+
try:
|
173
|
+
logger.debug(f"[CONCURRENCY] Pub/Sub listener started for {self.release_channel}")
|
174
|
+
async for message in self._pubsub.listen():
|
175
|
+
# 忽略订阅确认消息
|
176
|
+
if message['type'] == 'message':
|
177
|
+
logger.debug(f"[CONCURRENCY] Received Pub/Sub release notification, triggering signal")
|
178
|
+
self._trigger_release_signal()
|
179
|
+
except asyncio.CancelledError:
|
180
|
+
logger.debug(f"[CONCURRENCY] Pub/Sub listener cancelled")
|
181
|
+
raise
|
182
|
+
except Exception as e:
|
183
|
+
logger.error(f"[CONCURRENCY] Error in Pub/Sub listener: {e}")
|
184
|
+
|
185
|
+
async def _periodic_trigger(self):
|
186
|
+
"""定时触发协程:定期触发信号作为兜底机制"""
|
187
|
+
try:
|
188
|
+
logger.debug(f"[CONCURRENCY] Periodic trigger started, interval={self._poll_interval}s")
|
189
|
+
while True:
|
190
|
+
await asyncio.sleep(self._poll_interval)
|
191
|
+
logger.debug(f"[CONCURRENCY] Periodic trigger firing, triggering signal")
|
192
|
+
self._trigger_release_signal()
|
193
|
+
except asyncio.CancelledError:
|
194
|
+
logger.debug(f"[CONCURRENCY] Periodic trigger cancelled")
|
195
|
+
raise
|
196
|
+
except Exception as e:
|
197
|
+
logger.error(f"[CONCURRENCY] Error in periodic trigger: {e}")
|
198
|
+
|
199
|
+
async def _unified_heartbeat_manager(self):
|
200
|
+
"""统一的心跳管理器:定期为所有正在运行的任务更新时间戳(无锁设计)
|
201
|
+
|
202
|
+
这个协程会定期扫描 _local_tasks 集合,批量更新所有任务的心跳
|
203
|
+
"""
|
204
|
+
try:
|
205
|
+
logger.debug(f"[CONCURRENCY] Unified heartbeat manager started, interval={self.renewal_interval}s")
|
206
|
+
while True:
|
207
|
+
await asyncio.sleep(self.renewal_interval)
|
208
|
+
|
209
|
+
# 获取当前所有任务的快照(set 是线程安全的)
|
210
|
+
if not self._local_tasks:
|
211
|
+
# 没有任务,继续等待
|
212
|
+
continue
|
213
|
+
|
214
|
+
# 批量更新所有任务的心跳(使用 Pipeline 减少网络往返)
|
215
|
+
current_time = time.time()
|
216
|
+
tasks_snapshot = list(self._local_tasks) # 复制一份避免迭代时修改
|
217
|
+
|
218
|
+
logger.debug(f"[CONCURRENCY] Renewing heartbeat for {len(tasks_snapshot)} tasks")
|
219
|
+
|
220
|
+
try:
|
221
|
+
# 使用 Pipeline 批量执行所有续租操作
|
222
|
+
pipe = self.redis.pipeline()
|
223
|
+
for task_id in tasks_snapshot:
|
224
|
+
pipe.eval(
|
225
|
+
self.renewal_script,
|
226
|
+
1,
|
227
|
+
self.semaphore_key,
|
228
|
+
task_id,
|
229
|
+
current_time
|
230
|
+
)
|
231
|
+
|
232
|
+
# 一次性执行所有命令
|
233
|
+
results = await pipe.execute()
|
234
|
+
|
235
|
+
# 处理结果
|
236
|
+
for task_id, result in zip(tasks_snapshot, results):
|
237
|
+
if result == 0:
|
238
|
+
# 任务已被清理(可能是超时),从本地集合移除
|
239
|
+
logger.warning(f"[CONCURRENCY] Task {task_id} was cleaned up, removing from local tasks")
|
240
|
+
self._local_tasks.discard(task_id)
|
241
|
+
else:
|
242
|
+
logger.debug(f"[CONCURRENCY] Renewed lease for task {task_id}")
|
243
|
+
|
244
|
+
except Exception as e:
|
245
|
+
logger.error(f"[CONCURRENCY] Error renewing leases in batch: {e}")
|
246
|
+
# Pipeline 失败,降级为逐个更新
|
247
|
+
for task_id in tasks_snapshot:
|
248
|
+
try:
|
249
|
+
result = await self.redis.eval(
|
250
|
+
self.renewal_script,
|
251
|
+
1,
|
252
|
+
self.semaphore_key,
|
253
|
+
task_id,
|
254
|
+
current_time
|
255
|
+
)
|
256
|
+
if result == 0:
|
257
|
+
logger.warning(f"[CONCURRENCY] Task {task_id} was cleaned up, removing from local tasks")
|
258
|
+
self._local_tasks.discard(task_id)
|
259
|
+
except Exception as e2:
|
260
|
+
logger.error(f"[CONCURRENCY] Error renewing lease for {task_id}: {e2}")
|
261
|
+
|
262
|
+
except asyncio.CancelledError:
|
263
|
+
logger.debug(f"[CONCURRENCY] Unified heartbeat manager cancelled")
|
264
|
+
raise
|
265
|
+
|
266
|
+
async def _ensure_pubsub(self):
|
267
|
+
"""确保 Pub/Sub 订阅和后台协程已启动"""
|
268
|
+
if self._pubsub is None:
|
269
|
+
# 订阅 Pub/Sub 频道
|
270
|
+
self._pubsub = self.redis.pubsub()
|
271
|
+
await self._pubsub.subscribe(self.release_channel)
|
272
|
+
logger.debug(f"[CONCURRENCY] Subscribed to channel {self.release_channel}")
|
273
|
+
|
274
|
+
# 启动 Pub/Sub 监听协程
|
275
|
+
if self._pubsub_listener_task is None:
|
276
|
+
self._pubsub_listener_task = asyncio.create_task(self._pubsub_listener())
|
277
|
+
logger.debug(f"[CONCURRENCY] Pub/Sub listener task started")
|
278
|
+
|
279
|
+
# 启动定时触发协程
|
280
|
+
if self._periodic_trigger_task is None:
|
281
|
+
self._periodic_trigger_task = asyncio.create_task(self._periodic_trigger())
|
282
|
+
logger.debug(f"[CONCURRENCY] Periodic trigger task started")
|
283
|
+
|
284
|
+
# 启动统一心跳管理器
|
285
|
+
if self._unified_heartbeat_task is None:
|
286
|
+
self._unified_heartbeat_task = asyncio.create_task(self._unified_heartbeat_manager())
|
287
|
+
logger.debug(f"[CONCURRENCY] Unified heartbeat manager started")
|
288
|
+
|
289
|
+
# 启动定期清理任务
|
290
|
+
if self._cleanup_task is None:
|
291
|
+
self._cleanup_task = asyncio.create_task(self._periodic_cleanup())
|
292
|
+
logger.debug(f"[CONCURRENCY] Periodic cleanup task started")
|
293
|
+
|
294
|
+
async def _try_acquire_slot(self, task_id: str) -> bool:
|
295
|
+
"""尝试获取一个执行许可槽位
|
296
|
+
|
297
|
+
Args:
|
298
|
+
task_id: 任务ID
|
299
|
+
|
300
|
+
Returns:
|
301
|
+
成功返回 True,失败返回 False
|
302
|
+
"""
|
303
|
+
try:
|
304
|
+
# 执行 Lua 脚本尝试获取
|
305
|
+
result = await self.redis.eval(
|
306
|
+
self.acquire_script,
|
307
|
+
1,
|
308
|
+
self.semaphore_key,
|
309
|
+
self.max_concurrency,
|
310
|
+
time.time(),
|
311
|
+
task_id
|
312
|
+
)
|
313
|
+
|
314
|
+
if result == 1:
|
315
|
+
logger.debug(f"[CONCURRENCY] Acquired slot for task_id={task_id}")
|
316
|
+
# 成功获取,添加到正在运行的任务集合(无锁操作)
|
317
|
+
# set.add() 在 Python 中是原子操作(GIL保护)
|
318
|
+
# 统一的心跳管理器会自动为这个任务续租
|
319
|
+
self._local_tasks.add(task_id)
|
320
|
+
|
321
|
+
# 确保统一心跳管理器已启动
|
322
|
+
if self._unified_heartbeat_task is None:
|
323
|
+
self._unified_heartbeat_task = asyncio.create_task(self._unified_heartbeat_manager())
|
324
|
+
logger.debug(f"[CONCURRENCY] Unified heartbeat manager started on first acquire")
|
325
|
+
|
326
|
+
return True
|
327
|
+
else:
|
328
|
+
return False
|
329
|
+
|
330
|
+
except Exception as e:
|
331
|
+
logger.error(f"[CONCURRENCY] Error in _try_acquire_slot: {e}")
|
332
|
+
return False
|
333
|
+
|
334
|
+
async def _periodic_cleanup(self):
|
335
|
+
"""定期清理超时任务的后台协程"""
|
336
|
+
try:
|
337
|
+
logger.debug(f"[CONCURRENCY] Periodic cleanup task started, interval={self.cleanup_interval}s")
|
338
|
+
while True:
|
339
|
+
await asyncio.sleep(self.cleanup_interval)
|
340
|
+
|
341
|
+
current_time = time.time()
|
342
|
+
timeout_threshold = current_time - self.timeout
|
343
|
+
try:
|
344
|
+
removed = await self.redis.eval(
|
345
|
+
self.cleanup_script,
|
346
|
+
2, # 2 个 keys
|
347
|
+
self.semaphore_key,
|
348
|
+
self.release_channel,
|
349
|
+
timeout_threshold
|
350
|
+
)
|
351
|
+
if removed > 0:
|
352
|
+
logger.info(f"[CONCURRENCY] Cleaned up {removed} timeout tasks")
|
353
|
+
except Exception as e:
|
354
|
+
logger.error(f"[CONCURRENCY] Cleanup error: {e}")
|
355
|
+
except asyncio.CancelledError:
|
356
|
+
logger.debug(f"[CONCURRENCY] Periodic cleanup task cancelled")
|
357
|
+
raise
|
358
|
+
except Exception as e:
|
359
|
+
logger.error(f"[CONCURRENCY] Error in periodic cleanup: {e}")
|
360
|
+
|
361
|
+
async def acquire(self, timeout: float = 10.0) -> Optional[str]:
|
362
|
+
"""获取一个执行许可(优化版,使用 Pub/Sub + 定时触发)
|
363
|
+
|
364
|
+
Args:
|
365
|
+
timeout: 等待超时时间(秒)
|
366
|
+
|
367
|
+
Returns:
|
368
|
+
成功获取返回 task_id,超时返回 None
|
369
|
+
"""
|
370
|
+
start_time = time.time()
|
371
|
+
task_id = f"{self.worker_id}:{uuid.uuid4().hex}"
|
372
|
+
|
373
|
+
logger.debug(f"[CONCURRENCY] Attempting to acquire, task_id={task_id}")
|
374
|
+
# 首次尝试直接获取
|
375
|
+
if await self._try_acquire_slot(task_id):
|
376
|
+
logger.debug(f"[WORKER:{self.worker_id}] [CONCURRENCY] Acquired immediately, task_id={task_id}, current_concurrency={len(self._local_tasks)}")
|
377
|
+
return task_id
|
378
|
+
|
379
|
+
# 如果首次失败,启动后台协程并等待信号
|
380
|
+
await self._ensure_pubsub()
|
381
|
+
|
382
|
+
while True:
|
383
|
+
# 检查超时
|
384
|
+
logger.debug(f"[CONCURRENCY] Waiting to acquire, task_id={task_id}")
|
385
|
+
elapsed = time.time() - start_time
|
386
|
+
if timeout is not None and elapsed >= timeout:
|
387
|
+
logger.warning(f"[CONCURRENCY] Acquire timeout after {timeout}s")
|
388
|
+
return None
|
389
|
+
|
390
|
+
# 等待内部信号(由 Pub/Sub 监听协程或定时触发协程触发)
|
391
|
+
try:
|
392
|
+
# 计算剩余超时时间
|
393
|
+
if timeout is not None:
|
394
|
+
remaining = timeout - elapsed
|
395
|
+
await asyncio.wait_for(self._release_event.wait(), timeout=remaining)
|
396
|
+
else:
|
397
|
+
# 无超时限制,纯等待信号
|
398
|
+
await self._release_event.wait()
|
399
|
+
|
400
|
+
logger.debug(f"[CONCURRENCY] Received signal, attempting acquire for task_id={task_id}")
|
401
|
+
|
402
|
+
except asyncio.TimeoutError:
|
403
|
+
# 达到用户指定的超时时间
|
404
|
+
logger.warning(f"[CONCURRENCY] Acquire timeout after {timeout}s")
|
405
|
+
return None
|
406
|
+
|
407
|
+
# 收到信号,尝试获取槽位
|
408
|
+
if await self._try_acquire_slot(task_id):
|
409
|
+
logger.debug(f"[WORKER:{self.worker_id}] [CONCURRENCY] Acquired after wait, task_id={task_id}, current_concurrency={len(self._local_tasks)}")
|
410
|
+
return task_id
|
411
|
+
else:
|
412
|
+
# 获取失败(可能被其他任务抢占),继续等待下一个信号
|
413
|
+
logger.debug(f"[CONCURRENCY] Acquire failed (slot taken by others), waiting for next signal")
|
414
|
+
|
415
|
+
async def try_acquire(self) -> bool:
|
416
|
+
"""尝试获取执行许可(非阻塞)
|
417
|
+
|
418
|
+
Returns:
|
419
|
+
成功获取返回 True,失败返回 False
|
420
|
+
"""
|
421
|
+
task_id = f"{self.worker_id}:{uuid.uuid4().hex}"
|
422
|
+
|
423
|
+
if await self._try_acquire_slot(task_id):
|
424
|
+
logger.debug(f"[CONCURRENCY] Try acquired, task_id={task_id}")
|
425
|
+
return True
|
426
|
+
else:
|
427
|
+
logger.debug(f"[CONCURRENCY] Try acquire failed, no available slot")
|
428
|
+
return False
|
429
|
+
|
430
|
+
async def release(self, task_id: str):
|
431
|
+
"""释放一个执行许可(无锁设计,通知等待者)
|
432
|
+
|
433
|
+
Args:
|
434
|
+
task_id: 要释放的任务ID(由 acquire() 返回)
|
435
|
+
"""
|
436
|
+
# 从本地集合移除(set.discard 是原子操作,且不会抛异常)
|
437
|
+
# 统一的心跳管理器会自动停止为这个任务续租
|
438
|
+
self._local_tasks.discard(task_id)
|
439
|
+
|
440
|
+
try:
|
441
|
+
# 执行 Lua 脚本,释放信号量并通知
|
442
|
+
result = await self.redis.eval(
|
443
|
+
self.release_script,
|
444
|
+
2, # 2 个 keys
|
445
|
+
self.semaphore_key,
|
446
|
+
self.release_channel,
|
447
|
+
task_id
|
448
|
+
)
|
449
|
+
|
450
|
+
logger.debug(f"[WORKER:{self.worker_id}] [CONCURRENCY] Released semaphore, task_id={task_id}, remaining_concurrency={len(self._local_tasks)}")
|
451
|
+
|
452
|
+
except Exception as e:
|
453
|
+
logger.error(f"[CONCURRENCY] Error releasing semaphore: {e}")
|
454
|
+
|
455
|
+
async def update_limit(self, new_limit: int):
|
456
|
+
"""动态更新并发限制
|
457
|
+
|
458
|
+
Args:
|
459
|
+
new_limit: 新的并发限制
|
460
|
+
"""
|
461
|
+
if self.max_concurrency != new_limit:
|
462
|
+
old_limit = self.max_concurrency
|
463
|
+
self.max_concurrency = new_limit
|
464
|
+
logger.debug(
|
465
|
+
f"[WORKER:{self.worker_id}] [CONCURRENCY] Limit changed: {old_limit} → {new_limit}"
|
466
|
+
)
|
467
|
+
|
468
|
+
async def get_stats(self) -> dict:
|
469
|
+
"""获取统计信息"""
|
470
|
+
try:
|
471
|
+
# 清理超时任务
|
472
|
+
timeout_threshold = time.time() - self.timeout
|
473
|
+
await self.redis.zremrangebyscore(
|
474
|
+
self.semaphore_key,
|
475
|
+
'-inf',
|
476
|
+
timeout_threshold
|
477
|
+
)
|
478
|
+
|
479
|
+
# 获取当前并发数
|
480
|
+
current_count = await self.redis.zcard(self.semaphore_key)
|
481
|
+
|
482
|
+
return {
|
483
|
+
'mode': 'concurrency',
|
484
|
+
'concurrency_limit': self.max_concurrency,
|
485
|
+
'current_concurrency': current_count,
|
486
|
+
'local_tasks': len(self._local_tasks),
|
487
|
+
}
|
488
|
+
|
489
|
+
except Exception as e:
|
490
|
+
logger.error(f"[CONCURRENCY] Error getting stats: {e}")
|
491
|
+
return {
|
492
|
+
'mode': 'concurrency',
|
493
|
+
'concurrency_limit': self.max_concurrency,
|
494
|
+
'current_concurrency': 0,
|
495
|
+
'local_tasks': len(self._local_tasks),
|
496
|
+
}
|
497
|
+
|
498
|
+
async def stop(self):
|
499
|
+
"""停止并清理资源"""
|
500
|
+
try:
|
501
|
+
# 取消统一心跳管理器
|
502
|
+
if self._unified_heartbeat_task is not None:
|
503
|
+
self._unified_heartbeat_task.cancel()
|
504
|
+
try:
|
505
|
+
await self._unified_heartbeat_task
|
506
|
+
except asyncio.CancelledError:
|
507
|
+
pass
|
508
|
+
self._unified_heartbeat_task = None
|
509
|
+
logger.debug(f"[CONCURRENCY] Unified heartbeat manager cancelled")
|
510
|
+
|
511
|
+
# 取消 Pub/Sub 监听协程
|
512
|
+
if self._pubsub_listener_task is not None:
|
513
|
+
self._pubsub_listener_task.cancel()
|
514
|
+
try:
|
515
|
+
await self._pubsub_listener_task
|
516
|
+
except asyncio.CancelledError:
|
517
|
+
pass
|
518
|
+
self._pubsub_listener_task = None
|
519
|
+
logger.debug(f"[CONCURRENCY] Pub/Sub listener task cancelled")
|
520
|
+
|
521
|
+
# 取消定时触发协程
|
522
|
+
if self._periodic_trigger_task is not None:
|
523
|
+
self._periodic_trigger_task.cancel()
|
524
|
+
try:
|
525
|
+
await self._periodic_trigger_task
|
526
|
+
except asyncio.CancelledError:
|
527
|
+
pass
|
528
|
+
self._periodic_trigger_task = None
|
529
|
+
logger.debug(f"[CONCURRENCY] Periodic trigger task cancelled")
|
530
|
+
|
531
|
+
# 清理 Pub/Sub 订阅
|
532
|
+
if self._pubsub is not None:
|
533
|
+
await self._pubsub.unsubscribe(self.release_channel)
|
534
|
+
await self._pubsub.close()
|
535
|
+
self._pubsub = None
|
536
|
+
logger.debug(f"[CONCURRENCY] Cleaned up pubsub for {self.task_name}")
|
537
|
+
|
538
|
+
# 清理本地追踪的任务(从 Redis 中移除)
|
539
|
+
if self._local_tasks:
|
540
|
+
for task_id in list(self._local_tasks):
|
541
|
+
try:
|
542
|
+
await self.redis.zrem(self.semaphore_key, task_id)
|
543
|
+
except Exception as e:
|
544
|
+
logger.error(f"[CONCURRENCY] Error removing task {task_id}: {e}")
|
545
|
+
self._local_tasks.clear()
|
546
|
+
logger.debug(f"[CONCURRENCY] Cleaned up local tasks for {self.task_name}")
|
547
|
+
|
548
|
+
except Exception as e:
|
549
|
+
logger.error(f"[CONCURRENCY] Error during stop: {e}")
|
550
|
+
|
551
|
+
async def __aenter__(self):
|
552
|
+
"""异步上下文管理器入口 - 获取执行许可
|
553
|
+
|
554
|
+
使用示例:
|
555
|
+
async with limiter:
|
556
|
+
# 执行任务
|
557
|
+
await do_something()
|
558
|
+
|
559
|
+
Returns:
|
560
|
+
self: 限流器实例
|
561
|
+
"""
|
562
|
+
task_id = await self.acquire()
|
563
|
+
if task_id is None:
|
564
|
+
raise TimeoutError("Failed to acquire concurrency slot")
|
565
|
+
# 保存task_id供__aexit__使用
|
566
|
+
self._current_task_id = task_id
|
567
|
+
return self
|
568
|
+
|
569
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
570
|
+
"""异步上下文管理器退出 - 自动释放许可"""
|
571
|
+
if hasattr(self, '_current_task_id'):
|
572
|
+
await self.release(self._current_task_id)
|
573
|
+
delattr(self, '_current_task_id')
|
574
|
+
return False # 不抑制异常
|
575
|
+
|
576
|
+
@classmethod
|
577
|
+
def cleanup_worker_locks(cls, redis_url: str, redis_prefix: str, worker_id: str = None, task_names: list = None):
|
578
|
+
"""
|
579
|
+
清理指定worker的并发锁(同步方法,用于进程退出时)
|
580
|
+
|
581
|
+
Args:
|
582
|
+
redis_url: Redis连接URL
|
583
|
+
redis_prefix: Redis key前缀
|
584
|
+
worker_id: Worker ID(如果提供,精确清理该worker的锁;否则清理过期锁)
|
585
|
+
task_names: 任务名称列表(如果提供,只清理这些任务的锁;否则无法清理)
|
586
|
+
|
587
|
+
Returns:
|
588
|
+
清理的锁数量
|
589
|
+
|
590
|
+
注意:
|
591
|
+
如果没有提供 task_names,方法将无法清理锁。
|
592
|
+
调用者需要维护任务名称列表以便进行清理。
|
593
|
+
"""
|
594
|
+
try:
|
595
|
+
if not task_names:
|
596
|
+
logger.warning(f"[CONCURRENCY] No task_names provided, cannot cleanup locks. Please provide task_names.")
|
597
|
+
return 0
|
598
|
+
|
599
|
+
# 使用全局单例获取同步 Redis 客户端
|
600
|
+
sync_redis = get_sync_redis_client(redis_url, decode_responses=False)
|
601
|
+
try:
|
602
|
+
total_cleaned = 0
|
603
|
+
|
604
|
+
# 遍历所有提供的任务名称
|
605
|
+
for task_name in task_names:
|
606
|
+
key = f"{redis_prefix}:RATE_LIMIT:CONCURRENCY:{task_name}"
|
607
|
+
|
608
|
+
# 检查 key 是否存在
|
609
|
+
if not sync_redis.exists(key):
|
610
|
+
continue
|
611
|
+
|
612
|
+
# 如果知道worker_id,精确清理该worker的锁
|
613
|
+
if worker_id:
|
614
|
+
try:
|
615
|
+
# 获取所有成员
|
616
|
+
all_members = sync_redis.zrange(key, 0, -1)
|
617
|
+
to_remove = []
|
618
|
+
for member in all_members:
|
619
|
+
# task_id格式: worker_id:uuid
|
620
|
+
if isinstance(member, bytes):
|
621
|
+
member_str = member.decode('utf-8')
|
622
|
+
else:
|
623
|
+
member_str = member
|
624
|
+
|
625
|
+
# 检查是否属于当前worker
|
626
|
+
if member_str.startswith(worker_id + ':'):
|
627
|
+
to_remove.append(member)
|
628
|
+
|
629
|
+
if to_remove:
|
630
|
+
removed = sync_redis.zrem(key, *to_remove)
|
631
|
+
total_cleaned += removed
|
632
|
+
logger.debug(f"[CONCURRENCY] Cleaned up {removed} locks for worker {worker_id} from {key}")
|
633
|
+
except Exception as e:
|
634
|
+
logger.error(f"[CONCURRENCY] Error cleaning {key}: {e}")
|
635
|
+
else:
|
636
|
+
# 如果不知道worker_id,清理所有超过5秒的锁
|
637
|
+
try:
|
638
|
+
current_time = __import__('time').time()
|
639
|
+
# 清理5秒前的所有task
|
640
|
+
timeout_threshold = current_time - 5
|
641
|
+
removed = sync_redis.zremrangebyscore(key, '-inf', timeout_threshold)
|
642
|
+
if removed > 0:
|
643
|
+
total_cleaned += removed
|
644
|
+
logger.debug(f"[CONCURRENCY] Cleaned up {removed} stale locks from {key}")
|
645
|
+
except Exception as e:
|
646
|
+
logger.error(f"[CONCURRENCY] Error cleaning {key}: {e}")
|
647
|
+
|
648
|
+
logger.debug(f"[CONCURRENCY] Cleanup completed, total cleaned: {total_cleaned}")
|
649
|
+
return total_cleaned
|
650
|
+
finally:
|
651
|
+
# 关闭客户端连接
|
652
|
+
sync_redis.close()
|
653
|
+
|
654
|
+
except Exception as e:
|
655
|
+
logger.error(f"[CONCURRENCY] Error during cleanup_worker_locks: {e}")
|
656
|
+
return 0
|
657
|
+
|
658
|
+
|
659
|
+
# ============================================================
|
660
|
+
# 统一的任务限流器
|
661
|
+
# ============================================================
|
662
|
+
|
663
|
+
|
664
|
+
|
665
|
+
__all__ = ['ConcurrencyRateLimiter']
|