jettask 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. jettask/__init__.py +60 -2
  2. jettask/cli.py +314 -228
  3. jettask/config/__init__.py +9 -1
  4. jettask/config/config.py +245 -0
  5. jettask/config/env_loader.py +381 -0
  6. jettask/config/lua_scripts.py +158 -0
  7. jettask/config/nacos_config.py +132 -5
  8. jettask/core/__init__.py +1 -1
  9. jettask/core/app.py +1573 -666
  10. jettask/core/app_importer.py +33 -16
  11. jettask/core/container.py +532 -0
  12. jettask/core/task.py +1 -4
  13. jettask/core/unified_manager_base.py +2 -2
  14. jettask/executor/__init__.py +38 -0
  15. jettask/executor/core.py +625 -0
  16. jettask/executor/executor.py +338 -0
  17. jettask/executor/orchestrator.py +290 -0
  18. jettask/executor/process_entry.py +638 -0
  19. jettask/executor/task_executor.py +317 -0
  20. jettask/messaging/__init__.py +68 -0
  21. jettask/messaging/event_pool.py +2188 -0
  22. jettask/messaging/reader.py +519 -0
  23. jettask/messaging/registry.py +266 -0
  24. jettask/messaging/scanner.py +369 -0
  25. jettask/messaging/sender.py +312 -0
  26. jettask/persistence/__init__.py +118 -0
  27. jettask/persistence/backlog_monitor.py +567 -0
  28. jettask/{backend/data_access.py → persistence/base.py} +58 -57
  29. jettask/persistence/consumer.py +315 -0
  30. jettask/{core → persistence}/db_manager.py +23 -22
  31. jettask/persistence/maintenance.py +81 -0
  32. jettask/persistence/message_consumer.py +259 -0
  33. jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
  34. jettask/persistence/offline_recovery.py +196 -0
  35. jettask/persistence/queue_discovery.py +215 -0
  36. jettask/persistence/task_persistence.py +218 -0
  37. jettask/persistence/task_updater.py +583 -0
  38. jettask/scheduler/__init__.py +2 -2
  39. jettask/scheduler/loader.py +6 -5
  40. jettask/scheduler/run_scheduler.py +1 -1
  41. jettask/scheduler/scheduler.py +7 -7
  42. jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
  43. jettask/task/__init__.py +16 -0
  44. jettask/{router.py → task/router.py} +26 -8
  45. jettask/task/task_center/__init__.py +9 -0
  46. jettask/task/task_executor.py +318 -0
  47. jettask/task/task_registry.py +291 -0
  48. jettask/test_connection_monitor.py +73 -0
  49. jettask/utils/__init__.py +31 -1
  50. jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
  51. jettask/utils/db_connector.py +1629 -0
  52. jettask/{db_init.py → utils/db_init.py} +1 -1
  53. jettask/utils/rate_limit/__init__.py +30 -0
  54. jettask/utils/rate_limit/concurrency_limiter.py +665 -0
  55. jettask/utils/rate_limit/config.py +145 -0
  56. jettask/utils/rate_limit/limiter.py +41 -0
  57. jettask/utils/rate_limit/manager.py +269 -0
  58. jettask/utils/rate_limit/qps_limiter.py +154 -0
  59. jettask/utils/rate_limit/task_limiter.py +384 -0
  60. jettask/utils/serializer.py +3 -0
  61. jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
  62. jettask/utils/time_sync.py +173 -0
  63. jettask/webui/__init__.py +27 -0
  64. jettask/{api/v1 → webui/api}/alerts.py +1 -1
  65. jettask/{api/v1 → webui/api}/analytics.py +2 -2
  66. jettask/{api/v1 → webui/api}/namespaces.py +1 -1
  67. jettask/{api/v1 → webui/api}/overview.py +1 -1
  68. jettask/{api/v1 → webui/api}/queues.py +3 -3
  69. jettask/{api/v1 → webui/api}/scheduled.py +1 -1
  70. jettask/{api/v1 → webui/api}/settings.py +1 -1
  71. jettask/{api.py → webui/app.py} +253 -145
  72. jettask/webui/namespace_manager/__init__.py +10 -0
  73. jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
  74. jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
  75. jettask/{run.py → webui/run.py} +2 -2
  76. jettask/{services → webui/services}/__init__.py +1 -3
  77. jettask/{services → webui/services}/overview_service.py +34 -16
  78. jettask/{services → webui/services}/queue_service.py +1 -1
  79. jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
  80. jettask/{services → webui/services}/settings_service.py +1 -1
  81. jettask/worker/__init__.py +53 -0
  82. jettask/worker/lifecycle.py +1507 -0
  83. jettask/worker/manager.py +583 -0
  84. jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
  85. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
  86. jettask-0.2.20.dist-info/RECORD +145 -0
  87. jettask/__main__.py +0 -140
  88. jettask/api/__init__.py +0 -103
  89. jettask/backend/__init__.py +0 -1
  90. jettask/backend/api/__init__.py +0 -3
  91. jettask/backend/api/v1/__init__.py +0 -17
  92. jettask/backend/api/v1/monitoring.py +0 -431
  93. jettask/backend/api/v1/namespaces.py +0 -504
  94. jettask/backend/api/v1/queues.py +0 -342
  95. jettask/backend/api/v1/tasks.py +0 -367
  96. jettask/backend/core/__init__.py +0 -3
  97. jettask/backend/core/cache.py +0 -221
  98. jettask/backend/core/database.py +0 -200
  99. jettask/backend/core/exceptions.py +0 -102
  100. jettask/backend/dependencies.py +0 -261
  101. jettask/backend/init_meta_db.py +0 -158
  102. jettask/backend/main.py +0 -1426
  103. jettask/backend/main_unified.py +0 -78
  104. jettask/backend/main_v2.py +0 -394
  105. jettask/backend/models/__init__.py +0 -3
  106. jettask/backend/models/requests.py +0 -236
  107. jettask/backend/models/responses.py +0 -230
  108. jettask/backend/namespace_api_old.py +0 -267
  109. jettask/backend/services/__init__.py +0 -3
  110. jettask/backend/start.py +0 -42
  111. jettask/backend/unified_api_router.py +0 -1541
  112. jettask/cleanup_deprecated_tables.sql +0 -16
  113. jettask/core/consumer_manager.py +0 -1695
  114. jettask/core/delay_scanner.py +0 -256
  115. jettask/core/event_pool.py +0 -1700
  116. jettask/core/heartbeat_process.py +0 -222
  117. jettask/core/task_batch.py +0 -153
  118. jettask/core/worker_scanner.py +0 -271
  119. jettask/executors/__init__.py +0 -5
  120. jettask/executors/asyncio.py +0 -876
  121. jettask/executors/base.py +0 -30
  122. jettask/executors/common.py +0 -148
  123. jettask/executors/multi_asyncio.py +0 -309
  124. jettask/gradio_app.py +0 -570
  125. jettask/integrated_gradio_app.py +0 -1088
  126. jettask/main.py +0 -0
  127. jettask/monitoring/__init__.py +0 -3
  128. jettask/pg_consumer.py +0 -1896
  129. jettask/run_monitor.py +0 -22
  130. jettask/run_webui.py +0 -148
  131. jettask/scheduler/multi_namespace_scheduler.py +0 -294
  132. jettask/scheduler/unified_manager.py +0 -450
  133. jettask/task_center_client.py +0 -150
  134. jettask/utils/serializer_optimized.py +0 -33
  135. jettask/webui_exceptions.py +0 -67
  136. jettask-0.2.18.dist-info/RECORD +0 -150
  137. /jettask/{constants.py → config/constants.py} +0 -0
  138. /jettask/{backend/config.py → config/task_center.py} +0 -0
  139. /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
  140. /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
  141. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
  142. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
  143. /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
  144. /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
  145. /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
  146. /jettask/{models.py → persistence/models.py} +0 -0
  147. /jettask/scheduler/{manager.py → task_crud.py} +0 -0
  148. /jettask/{schema.sql → schemas/schema.sql} +0 -0
  149. /jettask/{task_center.py → task/task_center/client.py} +0 -0
  150. /jettask/{monitoring → utils}/file_watcher.py +0 -0
  151. /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
  152. /jettask/{api/v1 → webui/api}/__init__.py +0 -0
  153. /jettask/{webui_config.py → webui/config.py} +0 -0
  154. /jettask/{webui_models → webui/models}/__init__.py +0 -0
  155. /jettask/{webui_models → webui/models}/namespace.py +0 -0
  156. /jettask/{services → webui/services}/alert_service.py +0 -0
  157. /jettask/{services → webui/services}/analytics_service.py +0 -0
  158. /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
  159. /jettask/{services → webui/services}/task_service.py +0 -0
  160. /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
  161. /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
  162. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
  163. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
  164. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
  165. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,625 @@
1
+ """
2
+ 执行器核心逻辑
3
+
4
+ 从AsyncioExecutor提取的核心执行逻辑
5
+ 职责:
6
+ 1. 任务执行
7
+ 2. Pipeline管理
8
+ 3. 限流控制
9
+ 4. 统计收集
10
+ """
11
+
12
+ import asyncio
13
+ import logging
14
+ import time
15
+ import os
16
+ from enum import Enum
17
+ from typing import Dict, Optional
18
+ from collections import defaultdict, deque
19
+
20
+ from ..utils.traceback_filter import filter_framework_traceback
21
+ from ..utils.task_logger import TaskContextManager, configure_task_logging
22
+ from ..utils.serializer import dumps_str
23
+ from ..exceptions import RetryableError
24
+ from ..core.enums import TaskStatus
25
+ from ..utils.rate_limit.limiter import RateLimiterManager, ConcurrencyRateLimiter
26
+
27
+ logger = logging.getLogger('app')
28
+
29
+
30
+ class ExecutionMode(Enum):
31
+ """执行模式"""
32
+ SINGLE_PROCESS = "single_process" # 单进程模式
33
+ MULTI_PROCESS = "multi_process" # 多进程模式
34
+ AUTO = "auto" # 自动选择
35
+
36
+ # Lua脚本:原子地更新Redis hash中的最大值
37
+ UPDATE_MAX_OFFSET_LUA = """
38
+ local hash_key = KEYS[1]
39
+ local field = KEYS[2]
40
+ local new_value = tonumber(ARGV[1])
41
+
42
+ local current = redis.call('HGET', hash_key, field)
43
+ if current == false or tonumber(current) < new_value then
44
+ redis.call('HSET', hash_key, field, new_value)
45
+ return 1
46
+ else
47
+ return 0
48
+ end
49
+ """
50
+
51
+
52
+ class ExecutorCore:
53
+ """
54
+ 执行器核心逻辑
55
+
56
+ 从AsyncioExecutor提取的核心执行逻辑
57
+ 职责:
58
+ 1. 任务执行
59
+ 2. Pipeline管理
60
+ 3. 限流控制
61
+ 4. 统计收集
62
+ """
63
+
64
+ def __init__(self, app, task_name: str, concurrency: int = 100):
65
+ """
66
+ 初始化执行器核心
67
+
68
+ Args:
69
+ app: Application实例
70
+ task_name: 任务名称
71
+ concurrency: 并发数
72
+ """
73
+ self.app = app
74
+ self.task_name = task_name
75
+ self.concurrency = concurrency
76
+
77
+ # Pipeline配置
78
+ self.pipeline_config = {
79
+ 'ack': {'max_batch': 1000, 'max_delay': 0.05},
80
+ 'task_info': {'max_batch': 2000, 'max_delay': 0.1},
81
+ 'status': {'max_batch': 1000, 'max_delay': 0.15},
82
+ 'data': {'max_batch': 1000, 'max_delay': 0.15},
83
+ 'stats': {'max_batch': 5000, 'max_delay': 0.2}
84
+ }
85
+
86
+ # Pipeline缓冲区
87
+ self.pending_acks = []
88
+ self.status_updates = []
89
+ self.data_updates = []
90
+ self.task_info_updates = {}
91
+ self.stats_updates = []
92
+
93
+ # Pipeline时间跟踪
94
+ self.last_pipeline_flush = {
95
+ 'ack': time.time(),
96
+ 'task_info': time.time(),
97
+ 'status': time.time(),
98
+ 'data': time.time(),
99
+ 'stats': time.time()
100
+ }
101
+
102
+ # 性能计数
103
+ self.batch_counter = 0
104
+ self.pipeline_operation_count = 0
105
+
106
+ # 前缀和缓存
107
+ self.prefix = self.app.ep.redis_prefix or 'jettask'
108
+ self._status_prefix = self.app._status_prefix
109
+ self._result_prefix = self.app._result_prefix
110
+ self._prefixed_queue_cache = {}
111
+
112
+ # Pending count缓存
113
+ self.pending_cache = {}
114
+ self.pending_cache_expire = 0
115
+
116
+ # 限流器管理器
117
+ self.rate_limiter_manager = None
118
+
119
+ # 配置任务日志
120
+ log_format = os.environ.get('JETTASK_LOG_FORMAT', 'text').lower()
121
+ if log_format == 'json':
122
+ configure_task_logging(format='json')
123
+ else:
124
+ format_string = os.environ.get('JETTASK_LOG_FORMAT_STRING')
125
+ if format_string:
126
+ configure_task_logging(format='text', format_string=format_string)
127
+
128
+ logger.debug(f"ExecutorCore initialized for task {task_name}")
129
+
130
+ def _get_prefixed_queue_cached(self, queue: str) -> str:
131
+ """缓存队列名称以避免重复字符串拼接"""
132
+ if queue not in self._prefixed_queue_cache:
133
+ self._prefixed_queue_cache[queue] = self.app.ep.get_prefixed_queue_name(queue)
134
+ return self._prefixed_queue_cache[queue]
135
+
136
+ async def get_pending_count_cached(self, queue: str) -> int:
137
+ """获取缓存的pending计数"""
138
+ current_time = time.time()
139
+
140
+ if (current_time - self.pending_cache_expire > 30 or
141
+ queue not in self.pending_cache):
142
+ try:
143
+ pending_info = await self.app.ep.async_redis_client.xpending(queue, queue)
144
+ self.pending_cache[queue] = pending_info.get("pending", 0)
145
+ self.pending_cache_expire = current_time
146
+ except Exception:
147
+ self.pending_cache[queue] = 0
148
+
149
+ return self.pending_cache.get(queue, 0)
150
+
151
+ async def _quick_ack(self, queue: str, event_id: str, group_name: str = None,
152
+ offset: int = None):
153
+ """快速ACK with unified pipeline management"""
154
+ group_name = group_name or queue
155
+ self.pending_acks.append((queue, event_id, group_name, offset))
156
+ current_time = time.time()
157
+
158
+ ack_config = self.pipeline_config['ack']
159
+ time_since_flush = current_time - self.last_pipeline_flush['ack']
160
+
161
+ should_flush = (
162
+ len(self.pending_acks) >= ack_config['max_batch'] or
163
+ (len(self.pending_acks) >= 50 and time_since_flush >= ack_config['max_delay'])
164
+ )
165
+
166
+ if should_flush:
167
+ await self._flush_all_buffers()
168
+
169
+ async def _flush_all_buffers(self):
170
+ """统一Pipeline刷新"""
171
+ pipeline = self.app.ep.async_redis_client.pipeline()
172
+ operations_count = 0
173
+
174
+ # 1. 处理ACK操作
175
+ if self.pending_acks:
176
+ acks_by_queue_group = defaultdict(lambda: defaultdict(list))
177
+ max_offsets = {}
178
+
179
+ for item in self.pending_acks:
180
+ if len(item) == 4:
181
+ queue, event_id, group_name, offset = item
182
+ elif len(item) == 3:
183
+ queue, event_id, group_name = item
184
+ offset = None
185
+ else:
186
+ queue, event_id = item
187
+ group_name = queue
188
+ offset = None
189
+
190
+ prefixed_queue = self._get_prefixed_queue_cached(queue)
191
+ acks_by_queue_group[prefixed_queue][group_name].append(event_id)
192
+
193
+ if group_name and offset is not None:
194
+ key = (queue, group_name)
195
+ if key not in max_offsets or offset > max_offsets[key]:
196
+ max_offsets[key] = offset
197
+
198
+ # 处理offset更新
199
+ if max_offsets:
200
+ task_offset_key = f"{self.prefix}:TASK_OFFSETS"
201
+ for (queue, group_name), offset in max_offsets.items():
202
+ # 从 group_name 中提取 task_name(最后一段)
203
+ task_name = group_name.split(':')[-1]
204
+ # 构建 field:队列名(含优先级)+ 任务名
205
+ # 例如:robust_bench2:8:benchmark_task
206
+ task_field = f"{queue}:{task_name}"
207
+ pipeline.eval(UPDATE_MAX_OFFSET_LUA, 2, task_offset_key, task_field, offset)
208
+ operations_count += 1
209
+
210
+ # 执行stream ACK
211
+ for prefixed_queue, groups in acks_by_queue_group.items():
212
+ for group_name, event_ids in groups.items():
213
+ stream_key = prefixed_queue.encode() if isinstance(prefixed_queue, str) else prefixed_queue
214
+ group_key = group_name.encode() if isinstance(group_name, str) else group_name
215
+ batch_bytes = [b.encode() if isinstance(b, str) else b for b in event_ids]
216
+
217
+ pipeline.xack(stream_key, group_key, *batch_bytes)
218
+ operations_count += 1
219
+
220
+ self.pending_acks.clear()
221
+
222
+ # 2. 处理任务信息更新
223
+ task_change_events = []
224
+ if self.task_info_updates:
225
+ for event_key, updates in self.task_info_updates.items():
226
+ key = f"{self.prefix}:TASK:{event_key}".encode()
227
+ if updates:
228
+ encoded_updates = {k.encode(): v.encode() if isinstance(v, str) else v
229
+ for k, v in updates.items()}
230
+ pipeline.hset(key, mapping=encoded_updates)
231
+ pipeline.expire(key, 3600)
232
+ operations_count += 2
233
+
234
+ full_task_id = f"{self.prefix}:TASK:{event_key}"
235
+ task_change_events.append(full_task_id)
236
+
237
+ # 发送变更事件
238
+ change_stream_key = f"{self.prefix}:TASK_CHANGES".encode()
239
+ for task_id in task_change_events:
240
+ change_data = {b'id': task_id.encode() if isinstance(task_id, str) else task_id}
241
+ pipeline.xadd(change_stream_key, change_data, maxlen=1000000)
242
+ operations_count += 1
243
+
244
+ self.task_info_updates.clear()
245
+
246
+ # 3. 处理统计信息
247
+ if hasattr(self, 'stats_updates') and self.stats_updates:
248
+ for stat_op in self.stats_updates:
249
+ if 'queue' in stat_op and 'field' in stat_op:
250
+ stats_key = f"{self.prefix}:STATS:{stat_op['queue']}".encode()
251
+ field = stat_op['field'].encode() if isinstance(stat_op['field'], str) else stat_op['field']
252
+ pipeline.hincrby(stats_key, field, stat_op.get('value', 1))
253
+ operations_count += 1
254
+ self.stats_updates.clear()
255
+
256
+ # 执行pipeline
257
+ if operations_count > 0:
258
+ try:
259
+ results = await pipeline.execute()
260
+
261
+ if isinstance(results, Exception):
262
+ logger.error(f"Pipeline execution error: {results}")
263
+ else:
264
+ for i, result in enumerate(results):
265
+ if isinstance(result, Exception):
266
+ logger.error(f"Pipeline operation {i} error: {result}")
267
+
268
+ logger.debug(f"Unified pipeline executed {operations_count} operations")
269
+ self.pipeline_operation_count += operations_count
270
+
271
+ except Exception as e:
272
+ logger.error(f"Pipeline flush error: {e}")
273
+
274
+ # 更新刷新时间
275
+ current_time = time.time()
276
+ for key in self.last_pipeline_flush:
277
+ self.last_pipeline_flush[key] = current_time
278
+
279
+ async def _collect_stats_async(self, queue: str, success: bool,
280
+ processing_time: float, total_latency: float):
281
+ """高性能异步统计收集"""
282
+ try:
283
+ if hasattr(self.app, 'consumer_manager') and self.app.consumer_manager:
284
+ if hasattr(self, 'stats_updates'):
285
+ self.stats_updates.append({
286
+ 'queue': queue,
287
+ 'field': 'success_count' if success else 'error_count',
288
+ 'value': 1
289
+ })
290
+ self.stats_updates.append({
291
+ 'queue': queue,
292
+ 'field': 'total_processing_time',
293
+ 'value': int(processing_time * 1000)
294
+ })
295
+
296
+ if len(self.stats_updates) >= self.pipeline_config['stats']['max_batch']:
297
+ asyncio.create_task(self._flush_all_buffers())
298
+ except Exception:
299
+ pass
300
+
301
+ async def execute_task(self, event_id: str, event_data: dict, queue: str,
302
+ routing: dict = None, consumer: str = None,
303
+ group_name: str = None, **kwargs):
304
+ """
305
+ 执行单个任务
306
+
307
+ 这是从AsyncioExecutor.logic()提取的核心执行逻辑
308
+ """
309
+ status = "success"
310
+ exception = None
311
+ error_msg = None
312
+ ret = None
313
+ task = None
314
+ args = ()
315
+ kwargs_inner = {}
316
+
317
+ status_key = f"{event_id}:{group_name}"
318
+ task_name = event_data.get("_task_name") or event_data.get("name")
319
+
320
+ if not task_name:
321
+ logger.error(f"No _task_name in event_data for event {event_id}")
322
+ return
323
+
324
+ # 设置任务日志上下文
325
+ async with TaskContextManager(
326
+ event_id=event_id,
327
+ task_name=task_name,
328
+ queue=queue,
329
+ worker_id=consumer
330
+ ):
331
+ try:
332
+ # 初始化执行时间(在 finally 块中需要用到)
333
+ execution_start_time = time.time()
334
+
335
+ # 检查恢复消息
336
+ if kwargs.get('_recovery'):
337
+ logger.debug(f"Processing recovered message {event_id}")
338
+
339
+ # 检查延迟任务
340
+ if event_data.get('is_delayed') and 'execute_at' in event_data:
341
+ execute_at = float(event_data['execute_at'])
342
+ if execute_at > time.time():
343
+ logger.debug(f"Task {event_id} delayed until {execute_at}")
344
+ return
345
+
346
+ # 获取重试配置
347
+ retry_config = event_data.get('retry_config', {})
348
+ max_retries = retry_config.get('max_retries', 0)
349
+
350
+ # 获取任务
351
+ task = self.app.get_task_by_name(task_name)
352
+
353
+ if not task:
354
+ exception = f"{task_name=} {queue=} 未绑定任何task"
355
+ logger.error(exception)
356
+
357
+ offset = self._extract_offset(event_data)
358
+ await self._quick_ack(queue, event_id, group_name, offset)
359
+
360
+ current_time = time.time()
361
+ trigger_time_float = float(event_data.get('trigger_time', current_time))
362
+ duration = current_time - trigger_time_float
363
+
364
+ self.task_info_updates[status_key] = {
365
+ "status": TaskStatus.ERROR.value,
366
+ "exception": exception,
367
+ "started_at": str(current_time),
368
+ "completed_at": str(current_time),
369
+ "duration": str(duration),
370
+ "consumer": consumer,
371
+ }
372
+ await self._flush_all_buffers()
373
+ return
374
+
375
+ self.pedding_count = await self.get_pending_count_cached(queue)
376
+
377
+ # 获取参数
378
+ args = event_data.get("args", ()) or ()
379
+ kwargs_inner = event_data.get("kwargs", {}) or {}
380
+
381
+ if 'scheduled_task_id' in event_data:
382
+ kwargs_inner['__scheduled_task_id'] = event_data['scheduled_task_id']
383
+
384
+ # 处理特殊事件类型
385
+ if "event_type" in event_data and "customer_data" in event_data:
386
+ args = (event_data["event_type"], event_data["customer_data"])
387
+ extra_kwargs = {k: v for k, v in event_data.items()
388
+ if k not in ["event_type", "customer_data", "_broadcast",
389
+ "_target_tasks", "_timestamp", "trigger_time",
390
+ "name", "_task_name"]}
391
+ kwargs_inner.update(extra_kwargs)
392
+
393
+ # 执行on_before
394
+ result = task.on_before(
395
+ event_id=event_id,
396
+ pedding_count=self.pedding_count,
397
+ args=args,
398
+ kwargs=kwargs_inner,
399
+ )
400
+ if asyncio.iscoroutine(result):
401
+ result = await result
402
+
403
+ if result and result.reject:
404
+ self.task_info_updates[status_key] = {
405
+ "status": TaskStatus.REJECTED.value,
406
+ "consumer": consumer,
407
+ "started_at": str(execution_start_time),
408
+ "completed_at": str(time.time()),
409
+ "error_msg": "Task rejected by on_before"
410
+ }
411
+ await self._flush_all_buffers()
412
+ return
413
+
414
+ # 标记任务开始(execution_start_time 已在 try 块开始时初始化)
415
+ self.task_info_updates[status_key] = {
416
+ "status": TaskStatus.RUNNING.value,
417
+ "consumer": consumer,
418
+ "started_at": str(execution_start_time)
419
+ }
420
+
421
+ # 重试循环
422
+ current_retry = 0
423
+ last_exception = None
424
+
425
+ while current_retry <= max_retries:
426
+ try:
427
+ if current_retry > 0:
428
+ logger.debug(f"Retry attempt {current_retry}/{max_retries} for task {event_id}")
429
+
430
+ clean_kwargs = {k: v for k, v in kwargs_inner.items()
431
+ if not k.startswith('_') and not k.startswith('__')}
432
+
433
+ task_result = task(event_id, event_data['trigger_time'], *args, **clean_kwargs)
434
+ if asyncio.iscoroutine(task_result):
435
+ ret = await task_result
436
+ else:
437
+ ret = task_result
438
+
439
+ result = task.on_success(
440
+ event_id=event_id,
441
+ args=args,
442
+ kwargs=clean_kwargs,
443
+ result=ret,
444
+ )
445
+ if asyncio.iscoroutine(result):
446
+ await result
447
+
448
+ # 任务成功,ACK消息
449
+ offset = self._extract_offset(event_data)
450
+ # await self.app.ep.async_redis_client.xack(
451
+ # self._get_prefixed_queue_cached(queue), group_name, event_id
452
+ # )
453
+ # await self.app.ep.async_redis_client.close()
454
+ await self._quick_ack(queue, event_id, group_name, offset)
455
+ break
456
+
457
+ except SystemExit:
458
+ logger.debug('Task interrupted by system exit, leaving message pending for recovery')
459
+ status = "interrupted"
460
+ exception = "System exit"
461
+ error_msg = "Task interrupted by shutdown"
462
+ # 不 ACK 被中断的任务,让其他 worker 恢复
463
+ # offset = self._extract_offset(event_data)
464
+ # await self._quick_ack(queue, event_id, group_name, offset)
465
+ break
466
+
467
+ except Exception as e:
468
+ last_exception = e
469
+
470
+ # 检查是否重试
471
+ should_retry = False
472
+ if current_retry < max_retries:
473
+ retry_on_exceptions = retry_config.get('retry_on_exceptions')
474
+
475
+ if retry_on_exceptions:
476
+ exc_type_name = type(e).__name__
477
+ should_retry = exc_type_name in retry_on_exceptions
478
+ else:
479
+ should_retry = True
480
+
481
+ if should_retry:
482
+ current_retry += 1
483
+
484
+ # 计算重试延迟
485
+ if isinstance(e, RetryableError) and e.retry_after is not None:
486
+ delay = e.retry_after
487
+ else:
488
+ retry_backoff = retry_config.get('retry_backoff', True)
489
+ if retry_backoff:
490
+ base_delay = 1.0
491
+ delay = min(base_delay * (2 ** (current_retry - 1)),
492
+ retry_config.get('retry_backoff_max', 60))
493
+ else:
494
+ delay = 1.0
495
+
496
+ logger.debug(f"Task {event_id} will retry after {delay:.2f}s")
497
+ await asyncio.sleep(delay)
498
+ continue
499
+ else:
500
+ logger.error(f'任务执行出错: {str(e)}')
501
+ status = "error"
502
+ exception = filter_framework_traceback()
503
+ error_msg = str(e)
504
+ logger.error(exception)
505
+
506
+ offset = self._extract_offset(event_data)
507
+ await self._quick_ack(queue, event_id, group_name, offset)
508
+ break
509
+
510
+ # 所有重试失败
511
+ if current_retry > max_retries and last_exception:
512
+ logger.error(f'任务在 {max_retries} 次重试后仍然失败')
513
+ status = "error"
514
+ exception = filter_framework_traceback()
515
+ error_msg = str(last_exception)
516
+ offset = self._extract_offset(event_data)
517
+ await self._quick_ack(queue, event_id, group_name, offset)
518
+
519
+ finally:
520
+ # 计算完成时间
521
+ completed_at = time.time()
522
+ trigger_time_float = float(event_data.get('trigger_time', execution_start_time))
523
+ execution_time = max(0, completed_at - execution_start_time)
524
+ total_latency = max(0, completed_at - trigger_time_float)
525
+
526
+ # 收集统计
527
+ await self._collect_stats_async(
528
+ queue=queue,
529
+ success=(status == "success"),
530
+ processing_time=execution_time,
531
+ total_latency=total_latency
532
+ )
533
+
534
+ # 更新任务信息
535
+ task_info = {
536
+ "completed_at": str(completed_at),
537
+ "execution_time": execution_time,
538
+ "duration": total_latency,
539
+ "consumer": consumer,
540
+ 'status': status
541
+ }
542
+
543
+ if ret is None:
544
+ task_info["result"] = "null"
545
+ else:
546
+ task_info["result"] = ret if isinstance(ret, str) else dumps_str(ret)
547
+
548
+ if exception:
549
+ task_info["exception"] = exception
550
+ if error_msg:
551
+ task_info["error_msg"] = error_msg
552
+
553
+ if status_key in self.task_info_updates:
554
+ self.task_info_updates[status_key].update(task_info)
555
+ else:
556
+ self.task_info_updates[status_key] = task_info
557
+
558
+ # 调用on_end
559
+ if task:
560
+ if 'clean_kwargs' not in locals():
561
+ clean_kwargs = {k: v for k, v in kwargs_inner.items()
562
+ if not k.startswith('_') and not k.startswith('__')}
563
+
564
+ result = task.on_end(
565
+ event_id=event_id,
566
+ args=args,
567
+ kwargs=clean_kwargs,
568
+ result=ret,
569
+ pedding_count=self.pedding_count,
570
+ )
571
+ if asyncio.iscoroutine(result):
572
+ await result
573
+
574
+ # 处理routing
575
+ if routing:
576
+ agg_key = routing.get("agg_key")
577
+ routing_key = routing.get("routing_key")
578
+ if routing_key and agg_key:
579
+ if queue in self.app.ep.solo_running_state and routing_key in self.app.ep.solo_running_state[queue]:
580
+ self.app.ep.solo_running_state[queue][routing_key] -= 1
581
+ try:
582
+ if result and result.urgent_retry:
583
+ self.app.ep.solo_urgent_retry[routing_key] = True
584
+ except:
585
+ pass
586
+ if result and result.delay:
587
+ self.app.ep.task_scheduler[queue][routing_key] = time.time() + result.delay
588
+
589
+ self.batch_counter -= 1
590
+
591
+ def _extract_offset(self, event_data: dict) -> Optional[int]:
592
+ """从event_data中提取offset"""
593
+ if isinstance(event_data, dict):
594
+ offset = event_data.get('offset')
595
+ if offset is not None:
596
+ try:
597
+ return int(offset)
598
+ except (ValueError, TypeError):
599
+ pass
600
+ return None
601
+
602
+ async def cleanup(self):
603
+ """清理资源"""
604
+ logger.debug("ExecutorCore cleaning up...")
605
+
606
+ # 停止限流器(释放所有持有的锁)
607
+ if self.rate_limiter_manager:
608
+ try:
609
+ await self.rate_limiter_manager.stop_all()
610
+ logger.debug("Rate limiter manager stopped and locks released")
611
+ except Exception as e:
612
+ logger.error(f"Error stopping rate limiter manager: {e}")
613
+
614
+ # 刷新所有缓冲区
615
+ try:
616
+ await asyncio.wait_for(self._flush_all_buffers(), timeout=0.5)
617
+ logger.debug("Buffers flushed successfully")
618
+ except asyncio.TimeoutError:
619
+ logger.warning("Buffer flush timeout")
620
+ except Exception as e:
621
+ logger.error(f"Error flushing buffers: {e}")
622
+
623
+
624
+
625
+ __all__ = ['ExecutorCore', 'ExecutionMode', 'UPDATE_MAX_OFFSET_LUA']