jettask 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. jettask/__init__.py +60 -2
  2. jettask/cli.py +314 -228
  3. jettask/config/__init__.py +9 -1
  4. jettask/config/config.py +245 -0
  5. jettask/config/env_loader.py +381 -0
  6. jettask/config/lua_scripts.py +158 -0
  7. jettask/config/nacos_config.py +132 -5
  8. jettask/core/__init__.py +1 -1
  9. jettask/core/app.py +1573 -666
  10. jettask/core/app_importer.py +33 -16
  11. jettask/core/container.py +532 -0
  12. jettask/core/task.py +1 -4
  13. jettask/core/unified_manager_base.py +2 -2
  14. jettask/executor/__init__.py +38 -0
  15. jettask/executor/core.py +625 -0
  16. jettask/executor/executor.py +338 -0
  17. jettask/executor/orchestrator.py +290 -0
  18. jettask/executor/process_entry.py +638 -0
  19. jettask/executor/task_executor.py +317 -0
  20. jettask/messaging/__init__.py +68 -0
  21. jettask/messaging/event_pool.py +2188 -0
  22. jettask/messaging/reader.py +519 -0
  23. jettask/messaging/registry.py +266 -0
  24. jettask/messaging/scanner.py +369 -0
  25. jettask/messaging/sender.py +312 -0
  26. jettask/persistence/__init__.py +118 -0
  27. jettask/persistence/backlog_monitor.py +567 -0
  28. jettask/{backend/data_access.py → persistence/base.py} +58 -57
  29. jettask/persistence/consumer.py +315 -0
  30. jettask/{core → persistence}/db_manager.py +23 -22
  31. jettask/persistence/maintenance.py +81 -0
  32. jettask/persistence/message_consumer.py +259 -0
  33. jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
  34. jettask/persistence/offline_recovery.py +196 -0
  35. jettask/persistence/queue_discovery.py +215 -0
  36. jettask/persistence/task_persistence.py +218 -0
  37. jettask/persistence/task_updater.py +583 -0
  38. jettask/scheduler/__init__.py +2 -2
  39. jettask/scheduler/loader.py +6 -5
  40. jettask/scheduler/run_scheduler.py +1 -1
  41. jettask/scheduler/scheduler.py +7 -7
  42. jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
  43. jettask/task/__init__.py +16 -0
  44. jettask/{router.py → task/router.py} +26 -8
  45. jettask/task/task_center/__init__.py +9 -0
  46. jettask/task/task_executor.py +318 -0
  47. jettask/task/task_registry.py +291 -0
  48. jettask/test_connection_monitor.py +73 -0
  49. jettask/utils/__init__.py +31 -1
  50. jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
  51. jettask/utils/db_connector.py +1629 -0
  52. jettask/{db_init.py → utils/db_init.py} +1 -1
  53. jettask/utils/rate_limit/__init__.py +30 -0
  54. jettask/utils/rate_limit/concurrency_limiter.py +665 -0
  55. jettask/utils/rate_limit/config.py +145 -0
  56. jettask/utils/rate_limit/limiter.py +41 -0
  57. jettask/utils/rate_limit/manager.py +269 -0
  58. jettask/utils/rate_limit/qps_limiter.py +154 -0
  59. jettask/utils/rate_limit/task_limiter.py +384 -0
  60. jettask/utils/serializer.py +3 -0
  61. jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
  62. jettask/utils/time_sync.py +173 -0
  63. jettask/webui/__init__.py +27 -0
  64. jettask/{api/v1 → webui/api}/alerts.py +1 -1
  65. jettask/{api/v1 → webui/api}/analytics.py +2 -2
  66. jettask/{api/v1 → webui/api}/namespaces.py +1 -1
  67. jettask/{api/v1 → webui/api}/overview.py +1 -1
  68. jettask/{api/v1 → webui/api}/queues.py +3 -3
  69. jettask/{api/v1 → webui/api}/scheduled.py +1 -1
  70. jettask/{api/v1 → webui/api}/settings.py +1 -1
  71. jettask/{api.py → webui/app.py} +253 -145
  72. jettask/webui/namespace_manager/__init__.py +10 -0
  73. jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
  74. jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
  75. jettask/{run.py → webui/run.py} +2 -2
  76. jettask/{services → webui/services}/__init__.py +1 -3
  77. jettask/{services → webui/services}/overview_service.py +34 -16
  78. jettask/{services → webui/services}/queue_service.py +1 -1
  79. jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
  80. jettask/{services → webui/services}/settings_service.py +1 -1
  81. jettask/worker/__init__.py +53 -0
  82. jettask/worker/lifecycle.py +1507 -0
  83. jettask/worker/manager.py +583 -0
  84. jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
  85. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
  86. jettask-0.2.20.dist-info/RECORD +145 -0
  87. jettask/__main__.py +0 -140
  88. jettask/api/__init__.py +0 -103
  89. jettask/backend/__init__.py +0 -1
  90. jettask/backend/api/__init__.py +0 -3
  91. jettask/backend/api/v1/__init__.py +0 -17
  92. jettask/backend/api/v1/monitoring.py +0 -431
  93. jettask/backend/api/v1/namespaces.py +0 -504
  94. jettask/backend/api/v1/queues.py +0 -342
  95. jettask/backend/api/v1/tasks.py +0 -367
  96. jettask/backend/core/__init__.py +0 -3
  97. jettask/backend/core/cache.py +0 -221
  98. jettask/backend/core/database.py +0 -200
  99. jettask/backend/core/exceptions.py +0 -102
  100. jettask/backend/dependencies.py +0 -261
  101. jettask/backend/init_meta_db.py +0 -158
  102. jettask/backend/main.py +0 -1426
  103. jettask/backend/main_unified.py +0 -78
  104. jettask/backend/main_v2.py +0 -394
  105. jettask/backend/models/__init__.py +0 -3
  106. jettask/backend/models/requests.py +0 -236
  107. jettask/backend/models/responses.py +0 -230
  108. jettask/backend/namespace_api_old.py +0 -267
  109. jettask/backend/services/__init__.py +0 -3
  110. jettask/backend/start.py +0 -42
  111. jettask/backend/unified_api_router.py +0 -1541
  112. jettask/cleanup_deprecated_tables.sql +0 -16
  113. jettask/core/consumer_manager.py +0 -1695
  114. jettask/core/delay_scanner.py +0 -256
  115. jettask/core/event_pool.py +0 -1700
  116. jettask/core/heartbeat_process.py +0 -222
  117. jettask/core/task_batch.py +0 -153
  118. jettask/core/worker_scanner.py +0 -271
  119. jettask/executors/__init__.py +0 -5
  120. jettask/executors/asyncio.py +0 -876
  121. jettask/executors/base.py +0 -30
  122. jettask/executors/common.py +0 -148
  123. jettask/executors/multi_asyncio.py +0 -309
  124. jettask/gradio_app.py +0 -570
  125. jettask/integrated_gradio_app.py +0 -1088
  126. jettask/main.py +0 -0
  127. jettask/monitoring/__init__.py +0 -3
  128. jettask/pg_consumer.py +0 -1896
  129. jettask/run_monitor.py +0 -22
  130. jettask/run_webui.py +0 -148
  131. jettask/scheduler/multi_namespace_scheduler.py +0 -294
  132. jettask/scheduler/unified_manager.py +0 -450
  133. jettask/task_center_client.py +0 -150
  134. jettask/utils/serializer_optimized.py +0 -33
  135. jettask/webui_exceptions.py +0 -67
  136. jettask-0.2.18.dist-info/RECORD +0 -150
  137. /jettask/{constants.py → config/constants.py} +0 -0
  138. /jettask/{backend/config.py → config/task_center.py} +0 -0
  139. /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
  140. /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
  141. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
  142. /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
  143. /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
  144. /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
  145. /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
  146. /jettask/{models.py → persistence/models.py} +0 -0
  147. /jettask/scheduler/{manager.py → task_crud.py} +0 -0
  148. /jettask/{schema.sql → schemas/schema.sql} +0 -0
  149. /jettask/{task_center.py → task/task_center/client.py} +0 -0
  150. /jettask/{monitoring → utils}/file_watcher.py +0 -0
  151. /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
  152. /jettask/{api/v1 → webui/api}/__init__.py +0 -0
  153. /jettask/{webui_config.py → webui/config.py} +0 -0
  154. /jettask/{webui_models → webui/models}/__init__.py +0 -0
  155. /jettask/{webui_models → webui/models}/namespace.py +0 -0
  156. /jettask/{services → webui/services}/alert_service.py +0 -0
  157. /jettask/{services → webui/services}/analytics_service.py +0 -0
  158. /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
  159. /jettask/{services → webui/services}/task_service.py +0 -0
  160. /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
  161. /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
  162. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
  163. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
  164. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
  165. {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
@@ -1,876 +0,0 @@
1
- import asyncio
2
- import time
3
- import logging
4
- import traceback
5
- from ..utils.traceback_filter import filter_framework_traceback
6
- from ..utils.task_logger import TaskContextManager, configure_task_logging
7
- from ..utils.serializer import dumps_str
8
- from typing import Optional, Union
9
- from collections import defaultdict, deque
10
- import os
11
- # 按队列分组以优化批量操作
12
- from collections import defaultdict
13
- from .base import BaseExecutor
14
- import random
15
- from ..exceptions import RetryableError
16
- from ..core.enums import TaskStatus
17
-
18
- logger = logging.getLogger('app')
19
-
20
- # Lua脚本:原子地更新Redis hash中的最大值
21
- UPDATE_MAX_OFFSET_LUA = """
22
- local hash_key = KEYS[1]
23
- local field = KEYS[2]
24
- local new_value = tonumber(ARGV[1])
25
-
26
- local current = redis.call('HGET', hash_key, field)
27
- if current == false or tonumber(current) < new_value then
28
- redis.call('HSET', hash_key, field, new_value)
29
- return 1
30
- else
31
- return 0
32
- end
33
- """
34
-
35
- # Try to use uvloop for better performance
36
- try:
37
- import uvloop
38
- uvloop.install()
39
- logger.info("Using uvloop for better performance")
40
- except ImportError:
41
- pass
42
-
43
-
44
- class AsyncioExecutor(BaseExecutor):
45
- """High-performance asyncio executor"""
46
-
47
- def __init__(self, event_queue, app, concurrency=100):
48
- super().__init__(event_queue, app, concurrency)
49
-
50
- # Caching for pending count
51
- self.pending_cache = {}
52
- self.pending_cache_expire = 0
53
-
54
- # 统一 Pipeline 管理器配置
55
- self.pipeline_config = {
56
- 'ack': {'max_batch': 1000, 'max_delay': 0.05}, # 50ms
57
- 'task_info': {'max_batch': 2000, 'max_delay': 0.1}, # 100ms
58
- 'status': {'max_batch': 1000, 'max_delay': 0.15}, # 150ms
59
- 'data': {'max_batch': 1000, 'max_delay': 0.15}, # 150ms
60
- 'stats': {'max_batch': 5000, 'max_delay': 0.2} # 200ms
61
- }
62
-
63
- # 统一的 Pipeline 缓冲区
64
- self.pending_acks = []
65
- self.status_updates = []
66
- self.data_updates = []
67
- self.task_info_updates = {} # 使用字典存储每个任务的Hash更新
68
- self.stats_updates = [] # 新增:统计信息缓冲区
69
-
70
- # Pipeline 时间跟踪
71
- self.last_pipeline_flush = {
72
- 'ack': time.time(),
73
- 'task_info': time.time(),
74
- 'status': time.time(),
75
- 'data': time.time(),
76
- 'stats': time.time()
77
- }
78
-
79
- # 兼容旧代码的设置
80
- self.ack_buffer_size = self.pipeline_config['ack']['max_batch']
81
- self.max_ack_buffer_size = 2000
82
- self.status_batch_size = self.pipeline_config['status']['max_batch']
83
- self.data_batch_size = self.pipeline_config['data']['max_batch']
84
-
85
- # 添加前缀
86
- self.prefix = self.app.ep.redis_prefix or 'jettask'
87
-
88
- # 统一 Pipeline 刷新策略
89
- self.last_flush_time = time.time()
90
- self.pipeline_operation_count = 0 # 统计总操作数
91
-
92
- # 配置任务日志格式(根据环境变量)
93
- log_format = os.environ.get('JETTASK_LOG_FORMAT', 'text').lower()
94
- if log_format == 'json':
95
- configure_task_logging(format='json')
96
- else:
97
- # 可以自定义文本格式
98
- format_string = os.environ.get('JETTASK_LOG_FORMAT_STRING')
99
- if format_string:
100
- configure_task_logging(format='text', format_string=format_string)
101
- self.max_flush_interval = 0.05 # 50ms最大刷新间隔
102
- self.min_flush_interval = 0.005 # 5ms最小刷新间隔
103
-
104
- # 性能优化4: 预编译常量和缓存
105
- self._status_prefix = self.app._status_prefix
106
- self._result_prefix = self.app._result_prefix
107
- self._prefixed_queue_cache = {} # 缓存队列名称
108
-
109
- # 默认启用高性能模式
110
- self._stats_lock = asyncio.Lock()
111
- self._high_performance_mode = True # 始终启用高性能模式
112
-
113
- def _get_prefixed_queue_cached(self, queue: str) -> str:
114
- """缓存队列名称以避免重复字符串拼接"""
115
- if queue not in self._prefixed_queue_cache:
116
- self._prefixed_queue_cache[queue] = self.app.ep.get_prefixed_queue_name(queue)
117
- return self._prefixed_queue_cache[queue]
118
-
119
-
120
- async def get_pending_count_cached(self, queue: str) -> int:
121
- """Get cached pending count"""
122
- current_time = time.time()
123
-
124
- if (current_time - self.pending_cache_expire > 30 or # 优化:延长缓存时间
125
- queue not in self.pending_cache):
126
- try:
127
- pending_info = await self.app.ep.async_redis_client.xpending(queue, queue)
128
- self.pending_cache[queue] = pending_info.get("pending", 0)
129
- self.pending_cache_expire = current_time
130
- except Exception:
131
- self.pending_cache[queue] = 0
132
-
133
- return self.pending_cache.get(queue, 0)
134
-
135
- async def _quick_ack(self, queue: str, event_id: str, group_name: str = None, offset: int = None):
136
- """Quick ACK with unified pipeline management and offset tracking"""
137
- # 如果没有提供group_name,使用queue作为默认值(兼容旧代码)
138
- group_name = group_name or queue
139
- self.pending_acks.append((queue, event_id, group_name, offset))
140
- current_time = time.time()
141
-
142
- # 检查是否需要刷新统一 Pipeline
143
- ack_config = self.pipeline_config['ack']
144
- time_since_flush = current_time - self.last_pipeline_flush['ack']
145
-
146
- should_flush = (
147
- len(self.pending_acks) >= ack_config['max_batch'] or # 达到批量大小
148
- (len(self.pending_acks) >= 50 and # 或有50个且超时
149
- time_since_flush >= ack_config['max_delay']) or
150
- len(self.pending_acks) >= self.max_ack_buffer_size * 0.1 # 达到最大缓冲区10%
151
- )
152
-
153
- if should_flush:
154
- await self._flush_all_buffers() # 使用统一的刷新
155
-
156
- async def _flush_all_buffers(self):
157
- """统一 Pipeline 刷新 - 一次提交所有操作"""
158
- # 创建统一的 pipeline(使用二进制客户端,避免编码问题)
159
- pipeline = self.app.ep.async_binary_redis_client.pipeline()
160
-
161
- operations_count = 0
162
-
163
- # 1. 处理 ACK 操作(使用二进制客户端)
164
- if self.pending_acks:
165
- acks_by_queue_group = defaultdict(lambda: defaultdict(list))
166
- offset_updates = [] # 收集需要更新的offset
167
-
168
- # 按照 queue+group_name 分组,记录每个组的最大offset
169
- max_offsets = {} # {(queue, group_name): max_offset}
170
-
171
- for item in self.pending_acks:
172
- # print(f'{item=}')
173
- if len(item) == 4:
174
- queue, event_id, group_name, offset = item
175
- elif len(item) == 3:
176
- queue, event_id, group_name = item
177
- offset = None
178
- else:
179
- queue, event_id = item
180
- group_name = queue
181
- offset = None
182
-
183
- prefixed_queue = self._get_prefixed_queue_cached(queue)
184
- acks_by_queue_group[prefixed_queue][group_name].append(event_id)
185
-
186
- # 收集offset更新信息(只记录最大值)
187
- if group_name and offset is not None:
188
- key = (queue, group_name)
189
- if key not in max_offsets or offset > max_offsets[key]:
190
- max_offsets[key] = offset
191
-
192
- # logger.info(f'{max_offsets=}')
193
- # 处理offset更新(使用Lua脚本确保原子性和最大值约束)
194
- if max_offsets:
195
- task_offset_key = f"{self.prefix}:TASK_OFFSETS"
196
- for (queue, group_name), offset in max_offsets.items():
197
- task_field = f"{queue}:{group_name}"
198
-
199
- # 使用Lua脚本原子地更新最大offset
200
- pipeline.eval(UPDATE_MAX_OFFSET_LUA, 2, task_offset_key, task_field, offset)
201
- operations_count += 1
202
-
203
- # 执行stream ACK
204
- for prefixed_queue, groups in acks_by_queue_group.items():
205
- for group_name, event_ids in groups.items():
206
- stream_key = prefixed_queue.encode() if isinstance(prefixed_queue, str) else prefixed_queue
207
- group_key = group_name.encode() if isinstance(group_name, str) else group_name
208
- batch_bytes = [b.encode() if isinstance(b, str) else b for b in event_ids]
209
-
210
- # 添加到统一 pipeline
211
- # logger.info(f'准备ack {batch_bytes=} {stream_key=} {group_key}')
212
- pipeline.xack(stream_key, group_key, *batch_bytes)
213
- operations_count += 1
214
-
215
- self.pending_acks.clear()
216
-
217
- # 2. 处理任务信息更新(Hash)
218
- task_change_events = [] # 收集变更的任务ID
219
- if self.task_info_updates:
220
- for event_key, updates in self.task_info_updates.items():
221
- # event_key 可能是 "event_id" 或 "event_id:task_name"(广播模式)
222
- # key格式: jettask:TASK:event_id:group_name
223
- key = f"{self.prefix}:TASK:{event_key}".encode() # 转为 bytes
224
- if updates:
225
- # 将更新的值编码为 bytes
226
- encoded_updates = {k.encode(): v.encode() if isinstance(v, str) else v for k, v in updates.items()}
227
- pipeline.hset(key, mapping=encoded_updates)
228
- pipeline.expire(key, 3600)
229
- operations_count += 2
230
-
231
- # 收集变更的任务ID(包含完整的key路径)
232
- # event_key 可能是 "event_id" 或 "event_id:task_name"(广播模式)
233
- # 发送完整的task_id,例如 "jettask:TASK:1756956517980-0:jettask:QUEUE:queue_name:task_name"
234
- full_task_id = f"{self.prefix}:TASK:{event_key}"
235
- task_change_events.append(full_task_id)
236
-
237
- # 发送变更事件到专门的 Stream 队列
238
- change_stream_key = f"{self.prefix}:TASK_CHANGES".encode()
239
- for task_id in task_change_events:
240
- # 发送完整的task_id(包含前缀)
241
- change_data = {
242
- b'id': task_id.encode() if isinstance(task_id, str) else task_id
243
- }
244
- pipeline.xadd(change_stream_key, change_data, maxlen=1000000) # 保留最近100000条变更
245
- operations_count += 1
246
-
247
- self.task_info_updates.clear()
248
-
249
- # 3. 处理统计信息(如果有)
250
- if hasattr(self, 'stats_updates') and self.stats_updates:
251
- # 批量更新统计信息
252
- for stat_op in self.stats_updates:
253
- # 执行统计操作
254
- if 'queue' in stat_op and 'field' in stat_op:
255
- stats_key = f"{self.prefix}:STATS:{stat_op['queue']}".encode() # 转为 bytes
256
- field = stat_op['field'].encode() if isinstance(stat_op['field'], str) else stat_op['field']
257
- pipeline.hincrby(stats_key, field, stat_op.get('value', 1))
258
- operations_count += 1
259
- self.stats_updates.clear()
260
-
261
- # 统一执行所有 pipeline 操作
262
- if operations_count > 0:
263
- try:
264
- # 执行统一的 pipeline
265
- results = await pipeline.execute()
266
-
267
- # 检查结果
268
- if isinstance(results, Exception):
269
- logger.error(f"Pipeline execution error: {results}")
270
- else:
271
- # 检查各个操作的结果
272
- for i, result in enumerate(results):
273
- if isinstance(result, Exception):
274
- logger.error(f"Pipeline operation {i} error: {result}")
275
-
276
- logger.debug(f"Unified pipeline executed {operations_count} operations")
277
- self.pipeline_operation_count += operations_count
278
-
279
- except Exception as e:
280
- logger.error(f"Pipeline flush error: {e}")
281
-
282
- # 更新所有刷新时间
283
- current_time = time.time()
284
- for key in self.last_pipeline_flush:
285
- self.last_pipeline_flush[key] = current_time
286
- self.last_flush_time = current_time
287
-
288
- async def _collect_stats_async(self, queue: str, success: bool, processing_time: float, total_latency: float):
289
- """高性能异步统计收集 - 加入 Pipeline 缓冲区"""
290
- try:
291
- if hasattr(self.app, 'consumer_manager') and self.app.consumer_manager:
292
- # 将统计信息加入缓冲区而不是立即发送
293
- if hasattr(self, 'stats_updates'):
294
- self.stats_updates.append({
295
- 'queue': queue,
296
- 'field': 'success_count' if success else 'error_count',
297
- 'value': 1
298
- })
299
- self.stats_updates.append({
300
- 'queue': queue,
301
- 'field': 'total_processing_time',
302
- 'value': int(processing_time * 1000) # 转换为毫秒
303
- })
304
-
305
- # 检查是否需要刷新统计缓冲区
306
- if len(self.stats_updates) >= self.pipeline_config['stats']['max_batch']:
307
- asyncio.create_task(self._flush_all_buffers())
308
- else:
309
- # 兼容旧方式
310
- asyncio.create_task(self._update_stats_nonblocking(queue, success, processing_time, total_latency))
311
- except Exception:
312
- pass # 统计错误不应影响主流程
313
-
314
- async def _update_stats_nonblocking(self, queue: str, success: bool, processing_time: float, total_latency: float):
315
- """非阻塞统计更新"""
316
- try:
317
- self.app.consumer_manager.task_finished(queue)
318
- self.app.consumer_manager.update_stats(
319
- queue=queue,
320
- success=success,
321
- processing_time=processing_time,
322
- total_latency=total_latency
323
- )
324
- except Exception as e:
325
- logger.debug(f"Stats collection error (non-critical): {e}")
326
-
327
-
328
- async def logic(self, semaphore: asyncio.Semaphore, event_id: str, event_data: dict, queue: str, routing: dict = None, consumer: str = None, group_name: str = None, **kwargs):
329
- """Process a single task"""
330
- status = "success" # 默认状态
331
- exception = None
332
- error_msg = None
333
- ret = None
334
- task = None # 初始化 task 变量
335
- args = () # 初始化参数
336
- kwargs_inner = {} # 初始化关键字参数(避免与函数参数 kwargs 冲突)
337
- # print(f'{group_name=}')
338
- # 尽早初始化status_key,避免在finally块中未定义
339
- # 使用传入的group_name参数,如果没有则使用queue作为默认值
340
- status_key = f"{event_id}:{group_name}" # 组合key
341
-
342
- # 获取任务名称(尽早获取,以便设置日志上下文)
343
- # 使用_task_name字段(由listen_event_by_task设置)
344
- task_name = event_data.get("_task_name") or event_data.get("name")
345
- # print(f'{event_data=}')
346
- # 如果消息中没有task_name,记录错误并返回
347
- if not task_name:
348
- logger.error(f"No _task_name in event_data for event {event_id}")
349
- # 返回,不处理没有task_name的消息
350
- return
351
- # 设置任务日志上下文 - 包含整个任务处理流程
352
- async with TaskContextManager(
353
- event_id=event_id,
354
- task_name=task_name,
355
- queue=queue,
356
- worker_id=consumer # 使用consumer作为worker_id
357
- ):
358
- try:
359
- # 检查是否是恢复的消息
360
- if kwargs.get('_recovery'):
361
- logger.info(f"Processing recovered message {event_id} from {kwargs.get('_claimed_from', 'unknown')}")
362
- # print(f'{event_data=}')
363
- # 检查是否是延迟任务
364
- if event_data.get('is_delayed') and 'execute_at' in event_data:
365
- execute_at = float(event_data['execute_at'])
366
- current_time = time.time()
367
-
368
- if execute_at > current_time:
369
- # 任务还没到执行时间,直接丢弃
370
- # 不ACK消息,让它保持在pending状态
371
- # event_pool会通过zset检查并在时间到期后通过xclaim认领
372
- logger.info(f"Task {event_id} delayed until {execute_at}, keeping in pending state")
373
- return
374
-
375
- # 获取重试配置(来自任务装饰器或apply_async)
376
- retry_config = event_data.get('retry_config', {})
377
- max_retries = retry_config.get('max_retries', 0)
378
-
379
-
380
- # async with semaphore:
381
- # 任务名称已经在外层获取过了
382
-
383
- if not task_name:
384
- logger.error(f"No task name found! event_data keys: {list(event_data.keys())}, event_id: {event_id}")
385
-
386
- task = self.app.get_task_by_name(task_name)
387
-
388
- # status_key已经在方法开头初始化过了
389
-
390
- if not task:
391
- exception = f"{task_name=} {queue=} {event_data=} 未绑定任何task"
392
- logger.error(exception)
393
- # 从 event_data 中获取 offset
394
- offset = None
395
- if isinstance(event_data, dict):
396
- offset = event_data.get('offset')
397
- if offset is not None:
398
- try:
399
- offset = int(offset)
400
- except (ValueError, TypeError):
401
- offset = None
402
-
403
- await self._quick_ack(queue, event_id, group_name, offset)
404
-
405
- # 任务不存在时也记录started_at(使用当前时间)
406
- current_time = time.time()
407
- # 恢复的消息可能没有trigger_time,使用当前时间作为默认值
408
- trigger_time_float = float(event_data.get('trigger_time', current_time))
409
- duration = current_time - trigger_time_float
410
- # 使用Hash更新
411
- self.task_info_updates[status_key] = {
412
- "status": TaskStatus.ERROR.value,
413
- "exception": exception,
414
- "started_at": str(current_time),
415
- "completed_at": str(current_time),
416
- "duration": str(duration),
417
- "consumer": consumer,
418
- }
419
- # 使用统一的 pipeline 刷新
420
- await self._flush_all_buffers()
421
- return
422
-
423
- self.pedding_count = await self.get_pending_count_cached(queue)
424
-
425
- # 重置状态为 success(默认是 error)
426
- status = "success"
427
-
428
- # 获取参数(现在直接是对象,不需要反序列化)
429
- args = event_data.get("args", ()) or ()
430
-
431
- # 统一处理kwargs(现在直接是对象,不需要反序列化)
432
- kwargs_inner = event_data.get("kwargs", {}) or {}
433
-
434
- # 如果event_data中有scheduled_task_id,添加到kwargs中供TaskContext使用
435
- if 'scheduled_task_id' in event_data:
436
- kwargs_inner['__scheduled_task_id'] = event_data['scheduled_task_id']
437
-
438
- # 检查是否需要提取特定字段作为参数
439
- # 如果消息包含 event_type 和 customer_data,将它们作为参数传递
440
- if "event_type" in event_data and "customer_data" in event_data:
441
- # 将这些字段作为位置参数传递,其他字段作为kwargs
442
- args = (event_data["event_type"], event_data["customer_data"])
443
- # 保留其他字段在kwargs中,但排除已作为args的字段
444
- extra_kwargs = {k: v for k, v in event_data.items()
445
- if k not in ["event_type", "customer_data", "_broadcast", "_target_tasks", "_timestamp", "trigger_time", "name", "_task_name"]}
446
- kwargs_inner.update(extra_kwargs)
447
-
448
- # Execute lifecycle methods
449
- result = task.on_before(
450
- event_id=event_id,
451
- pedding_count=self.pedding_count,
452
- args=args,
453
- kwargs=kwargs_inner,
454
- )
455
- if asyncio.iscoroutine(result):
456
- result = await result
457
-
458
- if result and result.reject:
459
- # 任务被reject,使用Hash更新
460
- self.task_info_updates[status_key] = {
461
- "status": TaskStatus.REJECTED.value,
462
- "consumer": consumer,
463
- "started_at": str(time.time()),
464
- "completed_at": str(time.time()),
465
- "error_msg": "Task rejected by on_before"
466
- }
467
- # 使用统一的 pipeline 刷新
468
- await self._flush_all_buffers()
469
- return
470
-
471
- # 标记任务开始执行
472
- # if hasattr(self.app, 'consumer_manager') and self.app.consumer_manager:
473
- # self.app.consumer_manager.task_started(queue)
474
-
475
- # 更新任务真正开始执行的时间(在on_before之后)
476
- execution_start_time = time.time()
477
-
478
- # 使用Hash更新running状态
479
- # 为了让用户能看到任务正在运行,立即写入running状态
480
- # running_key = f"{self.prefix}:TASK:{status_key}"
481
- # 保存开始信息,但不设置status为running,避免竞态条件
482
- self.task_info_updates[status_key] = {
483
- "status": TaskStatus.RUNNING.value,
484
- "consumer": consumer,
485
- "started_at": str(execution_start_time)
486
- }
487
- # await self.app.ep.async_redis_client.hset(running_key, mapping={
488
- # "status": TaskStatus.RUNNING.value,
489
- # "consumer": consumer,
490
- # "started_at": str(execution_start_time)
491
- # })
492
-
493
- # 在worker内部进行重试循环
494
- current_retry = 0
495
- last_exception = None
496
-
497
- while current_retry <= max_retries:
498
- try:
499
- # 如果当前是重试,记录日志
500
- if current_retry > 0:
501
- logger.info(f"Retry attempt {current_retry}/{max_retries} for task {event_id}")
502
-
503
- # 从kwargs中移除内部参数,避免传递给用户的任务函数
504
- clean_kwargs = {k: v for k, v in kwargs_inner.items()
505
- if not k.startswith('_') and not k.startswith('__')}
506
-
507
- logger.debug(f"Calling task with clean_kwargs: {clean_kwargs}")
508
- task_result = task(event_id, event_data['trigger_time'], *args, **clean_kwargs)
509
- if asyncio.iscoroutine(task_result):
510
- ret = await task_result
511
- else:
512
- ret = task_result
513
- result = task.on_success(
514
- event_id=event_id,
515
- args=args,
516
- kwargs=clean_kwargs,
517
- result=ret,
518
- )
519
- if asyncio.iscoroutine(result):
520
- await result
521
-
522
- # 任务成功执行,现在可以ACK消息了
523
- # 从 event_data 中获取 offset
524
- offset = None
525
- if isinstance(event_data, dict):
526
- offset = event_data.get('offset')
527
- if offset is not None:
528
- try:
529
- offset = int(offset)
530
- except (ValueError, TypeError):
531
- offset = None
532
-
533
- await self._quick_ack(queue, event_id, group_name, offset)
534
-
535
- # 任务成功,跳出重试循环
536
- break
537
-
538
- except SystemExit:
539
- # 处理系统退出信号,不重试
540
- logger.info('Task interrupted by system exit')
541
- status = "interrupted"
542
- exception = "System exit"
543
- error_msg = "Task interrupted by shutdown"
544
- # 系统退出时也需要ACK消息
545
- # 从 event_data 中获取 offset
546
- offset = None
547
- if isinstance(event_data, dict):
548
- offset = event_data.get('offset')
549
- if offset is not None:
550
- try:
551
- offset = int(offset)
552
- except (ValueError, TypeError):
553
- offset = None
554
-
555
- await self._quick_ack(queue, event_id, group_name, offset)
556
- break
557
-
558
- except Exception as e:
559
- last_exception = e
560
-
561
- # 检查是否应该重试
562
- should_retry = False
563
- if current_retry < max_retries:
564
- # 检查异常类型是否可重试
565
- retry_on_exceptions = retry_config.get('retry_on_exceptions')
566
-
567
- if retry_on_exceptions:
568
- # retry_on_exceptions 是异常类名字符串列表
569
- exc_type_name = type(e).__name__
570
- should_retry = exc_type_name in retry_on_exceptions
571
- else:
572
- # 默认重试所有异常
573
- should_retry = True
574
-
575
- if should_retry:
576
- current_retry += 1
577
-
578
- # 计算重试延迟
579
- delay = None
580
-
581
- # 如果是RetryableError并且指定了retry_after,使用指定的延迟
582
- if isinstance(e, RetryableError) and e.retry_after is not None:
583
- delay = e.retry_after
584
- logger.info(f"Using RetryableError suggested delay: {delay:.1f}s")
585
- else:
586
- # 使用配置的重试策略
587
- retry_backoff = retry_config.get('retry_backoff', True)
588
-
589
- if retry_backoff:
590
- # 指数退避:1s, 2s, 4s, 8s, ...
591
- base_delay = 1.0
592
- delay = min(base_delay * (2 ** (current_retry - 1)),
593
- retry_config.get('retry_backoff_max', 60))
594
- else:
595
- # 固定延迟:始终1秒
596
- delay = 1.0
597
-
598
- logger.info(f"Task {event_id} will retry after {delay:.2f} seconds (attempt {current_retry}/{max_retries})")
599
-
600
- # 在worker内部等待,而不是重新发送到队列
601
- await asyncio.sleep(delay)
602
- continue # 继续下一次重试
603
- else:
604
- # 不再重试,记录错误并退出
605
- logger.error(f'任务执行出错: {str(e)}')
606
- status = "error"
607
- exception = filter_framework_traceback()
608
- error_msg = str(e)
609
- logger.error(exception)
610
- # 任务失败且不重试,需要ACK消息
611
- # 从 event_data 中获取 offset
612
- offset = None
613
- if isinstance(event_data, dict):
614
- offset = event_data.get('offset')
615
- if offset is not None:
616
- try:
617
- offset = int(offset)
618
- except (ValueError, TypeError):
619
- offset = None
620
-
621
- await self._quick_ack(queue, event_id, group_name, offset)
622
- break
623
-
624
- # 如果所有重试都失败了
625
- if current_retry > max_retries and last_exception:
626
- logger.error(f'任务在 {max_retries} 次重试后仍然失败')
627
- status = "error"
628
- exception = filter_framework_traceback()
629
- error_msg = str(last_exception)
630
- # 任务最终失败,也需要ACK消息
631
- # 从 event_data 中获取 offset
632
- offset = None
633
- if isinstance(event_data, dict):
634
- offset = event_data.get('offset')
635
- if offset is not None:
636
- try:
637
- offset = int(offset)
638
- except (ValueError, TypeError):
639
- offset = None
640
-
641
- await self._quick_ack(queue, event_id, group_name, offset)
642
-
643
- # except块已经移到while循环内部,这里不需要了
644
- finally:
645
- # 计算完成时间和消耗时间
646
- completed_at = time.time()
647
- # 恢复的消息可能没有trigger_time,使用执行开始时间作为默认值
648
- trigger_time_float = float(event_data.get('trigger_time', execution_start_time))
649
- # 计算两个时间指标,确保不会出现负数
650
- execution_time = max(0, completed_at - execution_start_time) # 实际执行时间
651
- total_latency = max(0, completed_at - trigger_time_float) # 总延迟时间(包含等待)
652
-
653
- # 异步收集统计信息(高性能模式下非阻塞)
654
- await self._collect_stats_async(
655
- queue=queue,
656
- success=(status == "success"),
657
- processing_time=execution_time,
658
- total_latency=total_latency
659
- )
660
-
661
- # 使用Hash原子更新所有信息
662
- # 重要:先设置result,再设置status,确保不会出现status=success但result还没写入的情况
663
- task_info = {
664
- "completed_at": str(completed_at),
665
- "execution_time": execution_time,
666
- "duration": total_latency,
667
- "consumer": consumer,
668
- 'status': status
669
- }
670
-
671
- # 先写入结果
672
- if ret is None:
673
- task_info["result"] = "null" # JSON null
674
- else:
675
- task_info["result"] = ret if isinstance(ret, str) else dumps_str(ret)
676
-
677
- # 再写入错误信息(如果有)
678
- if exception:
679
- task_info["exception"] = exception
680
- if error_msg:
681
- task_info["error_msg"] = error_msg
682
-
683
-
684
- # 更新到缓冲区
685
- if status_key in self.task_info_updates:
686
- # 合并更新(保留started_at等之前的信息)
687
- # 重要:确保最终状态覆盖之前的running状态
688
- self.task_info_updates[status_key].update(task_info)
689
- else:
690
- self.task_info_updates[status_key] = task_info
691
-
692
- # 只有在 task 存在时才调用 on_end
693
- if task:
694
- # 为on_end使用clean_kwargs(如果clean_kwargs未定义,则创建它)
695
- if 'clean_kwargs' not in locals():
696
- clean_kwargs = {k: v for k, v in kwargs_inner.items()
697
- if not k.startswith('_') and not k.startswith('__')}
698
-
699
- result = task.on_end(
700
- event_id=event_id,
701
- args=args,
702
- kwargs=clean_kwargs,
703
- result=ret,
704
- pedding_count=self.pedding_count,
705
- )
706
- if asyncio.iscoroutine(result):
707
- await result
708
- # Handle routing
709
- if routing:
710
- agg_key = routing.get("agg_key")
711
- routing_key = routing.get("routing_key")
712
- if routing_key and agg_key:
713
- # 避免在多进程环境下使用跨进程的锁
714
- # 直接操作,依赖 Python GIL 和原子操作
715
- if queue in self.app.ep.solo_running_state and routing_key in self.app.ep.solo_running_state[queue]:
716
- self.app.ep.solo_running_state[queue][routing_key] -= 1
717
- try:
718
- if result and result.urgent_retry:
719
- self.app.ep.solo_urgent_retry[routing_key] = True
720
- except:
721
- pass
722
- if result and result.delay:
723
- self.app.ep.task_scheduler[queue][routing_key] = time.time() + result.delay
724
-
725
- self.batch_counter -= 1
726
-
727
- async def loop(self):
728
- """Optimized main loop with dynamic batching"""
729
- # semaphore = asyncio.Semaphore(self.concurrency) # 当前未使用,保留以备后用
730
-
731
-
732
- # Dynamic batch processing
733
- min_batch_size = 10 # 优化:降低最小批次
734
- max_batch_size = 500 # 优化:提高最大批次
735
- batch_size = 100
736
- tasks_batch = []
737
-
738
- # Performance tracking
739
- # last_periodic_flush = time.time() # 已被统一 pipeline 管理替代
740
- last_batch_adjust = time.time()
741
- # last_buffer_check = time.time() # 当前未使用
742
-
743
- # 高性能缓冲区监控阈值
744
- max_buffer_size = 5000
745
-
746
- try:
747
- while True:
748
- # 检查是否需要退出
749
- if hasattr(self.app, '_should_exit') and self.app._should_exit:
750
- logger.info("AsyncioExecutor detected shutdown signal, exiting...")
751
- break
752
-
753
- # # 动态调整批处理大小
754
- current_time = time.time()
755
- if current_time - last_batch_adjust > 1.0:
756
- # 根据队列类型获取长度
757
- if isinstance(self.event_queue, deque):
758
- queue_len = len(self.event_queue)
759
- elif isinstance(self.event_queue, asyncio.Queue):
760
- queue_len = self.event_queue.qsize()
761
- else:
762
- queue_len = 0
763
-
764
- # 优化:更智能的动态调整
765
- if queue_len > 5000:
766
- batch_size = min(max_batch_size, batch_size + 50)
767
- elif queue_len > 1000:
768
- batch_size = min(max_batch_size, batch_size + 20)
769
- elif queue_len < 100:
770
- batch_size = max(min_batch_size, batch_size - 20)
771
- elif queue_len < 500:
772
- batch_size = max(min_batch_size, batch_size - 10)
773
- last_batch_adjust = current_time
774
-
775
- # 从队列获取事件
776
- event = None
777
- try:
778
- event = await asyncio.wait_for(self.event_queue.get(), timeout=0.1)
779
- except asyncio.TimeoutError:
780
- event = None
781
-
782
- if event:
783
- event.pop("execute_time", None)
784
- tasks_batch.append(event)
785
- logger.debug(f"Got event from queue: {event.get('event_id', 'unknown')}")
786
- # 批量创建协程任务
787
- if tasks_batch:
788
- for event in tasks_batch:
789
- self.batch_counter += 1
790
- logger.debug(f"Creating task for event: {event.get('event_id', 'unknown')}")
791
- asyncio.create_task(self.logic(None, **event)) # semaphore 参数暂时传 None
792
-
793
- tasks_batch.clear()
794
-
795
- # 智能缓冲区管理和刷新
796
- buffer_full = (
797
- len(self.pending_acks) >= max_buffer_size or
798
- len(self.status_updates) >= max_buffer_size or
799
- len(self.data_updates) >= max_buffer_size or
800
- len(self.task_info_updates) >= max_buffer_size # 新增:检查Hash缓冲区
801
- )
802
-
803
- # 定期或缓冲区满时刷新 - 使用统一 Pipeline 策略
804
- should_flush_periodic = False
805
- has_pending_data = (self.pending_acks or self.status_updates or self.data_updates or self.task_info_updates)
806
-
807
- # 检查每种类型的数据是否需要刷新
808
- if has_pending_data:
809
- for data_type, config in self.pipeline_config.items():
810
- if data_type == 'ack' and self.pending_acks:
811
- if current_time - self.last_pipeline_flush[data_type] >= config['max_delay']:
812
- should_flush_periodic = True
813
- break
814
- elif data_type == 'task_info' and self.task_info_updates:
815
- if current_time - self.last_pipeline_flush[data_type] >= config['max_delay']:
816
- should_flush_periodic = True
817
- break
818
- elif data_type == 'status' and self.status_updates:
819
- if current_time - self.last_pipeline_flush[data_type] >= config['max_delay']:
820
- should_flush_periodic = True
821
- break
822
- elif data_type == 'data' and self.data_updates:
823
- if current_time - self.last_pipeline_flush[data_type] >= config['max_delay']:
824
- should_flush_periodic = True
825
- break
826
- elif data_type == 'stats' and hasattr(self, 'stats_updates') and self.stats_updates:
827
- if current_time - self.last_pipeline_flush[data_type] >= config['max_delay']:
828
- should_flush_periodic = True
829
- break
830
-
831
- if buffer_full or should_flush_periodic:
832
- asyncio.create_task(self._flush_all_buffers())
833
- # 刷新时间已在 _flush_all_buffers 中更新
834
-
835
-
836
- # 智能休眠策略
837
- has_events = False
838
- if isinstance(self.event_queue, deque):
839
- has_events = bool(self.event_queue)
840
- elif isinstance(self.event_queue, asyncio.Queue):
841
- has_events = not self.event_queue.empty()
842
-
843
- if has_events:
844
- await asyncio.sleep(0) # 有任务时立即切换
845
- else:
846
- # 检查是否需要立即刷新缓冲区
847
- if (self.pending_acks or self.status_updates or self.data_updates or self.task_info_updates):
848
- await self._flush_all_buffers()
849
- await asyncio.sleep(0.001) # 无任务时短暂休眠
850
-
851
- except KeyboardInterrupt:
852
- logger.info("AsyncioExecutor received KeyboardInterrupt")
853
- except Exception as e:
854
- logger.error(f"AsyncioExecutor loop error: {e}")
855
- finally:
856
- # 确保清理逻辑总是执行
857
- logger.info("AsyncioExecutor cleaning up...")
858
-
859
- # 1. 刷新所有缓冲区(设置超时避免卡住)
860
- try:
861
- await asyncio.wait_for(self._flush_all_buffers(), timeout=2.0)
862
- logger.info("Buffers flushed successfully")
863
- except asyncio.TimeoutError:
864
- logger.warning("Buffer flush timeout, some data may be lost")
865
- except Exception as e:
866
- logger.error(f"Error flushing buffers: {e}")
867
-
868
- # 2. 标记worker为离线(最重要的清理操作)
869
- if self.app.consumer_manager:
870
- try:
871
- self.app.consumer_manager.cleanup()
872
- logger.info("Worker marked as offline")
873
- except Exception as e:
874
- logger.error(f"Error marking worker offline: {e}")
875
-
876
- logger.info("AsyncioExecutor stopped")