jettask 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jettask/__init__.py +60 -2
- jettask/cli.py +314 -228
- jettask/config/__init__.py +9 -1
- jettask/config/config.py +245 -0
- jettask/config/env_loader.py +381 -0
- jettask/config/lua_scripts.py +158 -0
- jettask/config/nacos_config.py +132 -5
- jettask/core/__init__.py +1 -1
- jettask/core/app.py +1573 -666
- jettask/core/app_importer.py +33 -16
- jettask/core/container.py +532 -0
- jettask/core/task.py +1 -4
- jettask/core/unified_manager_base.py +2 -2
- jettask/executor/__init__.py +38 -0
- jettask/executor/core.py +625 -0
- jettask/executor/executor.py +338 -0
- jettask/executor/orchestrator.py +290 -0
- jettask/executor/process_entry.py +638 -0
- jettask/executor/task_executor.py +317 -0
- jettask/messaging/__init__.py +68 -0
- jettask/messaging/event_pool.py +2188 -0
- jettask/messaging/reader.py +519 -0
- jettask/messaging/registry.py +266 -0
- jettask/messaging/scanner.py +369 -0
- jettask/messaging/sender.py +312 -0
- jettask/persistence/__init__.py +118 -0
- jettask/persistence/backlog_monitor.py +567 -0
- jettask/{backend/data_access.py → persistence/base.py} +58 -57
- jettask/persistence/consumer.py +315 -0
- jettask/{core → persistence}/db_manager.py +23 -22
- jettask/persistence/maintenance.py +81 -0
- jettask/persistence/message_consumer.py +259 -0
- jettask/{backend/namespace_data_access.py → persistence/namespace.py} +66 -98
- jettask/persistence/offline_recovery.py +196 -0
- jettask/persistence/queue_discovery.py +215 -0
- jettask/persistence/task_persistence.py +218 -0
- jettask/persistence/task_updater.py +583 -0
- jettask/scheduler/__init__.py +2 -2
- jettask/scheduler/loader.py +6 -5
- jettask/scheduler/run_scheduler.py +1 -1
- jettask/scheduler/scheduler.py +7 -7
- jettask/scheduler/{unified_scheduler_manager.py → scheduler_coordinator.py} +18 -13
- jettask/task/__init__.py +16 -0
- jettask/{router.py → task/router.py} +26 -8
- jettask/task/task_center/__init__.py +9 -0
- jettask/task/task_executor.py +318 -0
- jettask/task/task_registry.py +291 -0
- jettask/test_connection_monitor.py +73 -0
- jettask/utils/__init__.py +31 -1
- jettask/{monitor/run_backlog_collector.py → utils/backlog_collector.py} +1 -1
- jettask/utils/db_connector.py +1629 -0
- jettask/{db_init.py → utils/db_init.py} +1 -1
- jettask/utils/rate_limit/__init__.py +30 -0
- jettask/utils/rate_limit/concurrency_limiter.py +665 -0
- jettask/utils/rate_limit/config.py +145 -0
- jettask/utils/rate_limit/limiter.py +41 -0
- jettask/utils/rate_limit/manager.py +269 -0
- jettask/utils/rate_limit/qps_limiter.py +154 -0
- jettask/utils/rate_limit/task_limiter.py +384 -0
- jettask/utils/serializer.py +3 -0
- jettask/{monitor/stream_backlog_monitor.py → utils/stream_backlog.py} +14 -6
- jettask/utils/time_sync.py +173 -0
- jettask/webui/__init__.py +27 -0
- jettask/{api/v1 → webui/api}/alerts.py +1 -1
- jettask/{api/v1 → webui/api}/analytics.py +2 -2
- jettask/{api/v1 → webui/api}/namespaces.py +1 -1
- jettask/{api/v1 → webui/api}/overview.py +1 -1
- jettask/{api/v1 → webui/api}/queues.py +3 -3
- jettask/{api/v1 → webui/api}/scheduled.py +1 -1
- jettask/{api/v1 → webui/api}/settings.py +1 -1
- jettask/{api.py → webui/app.py} +253 -145
- jettask/webui/namespace_manager/__init__.py +10 -0
- jettask/{multi_namespace_consumer.py → webui/namespace_manager/multi.py} +69 -22
- jettask/{unified_consumer_manager.py → webui/namespace_manager/unified.py} +1 -1
- jettask/{run.py → webui/run.py} +2 -2
- jettask/{services → webui/services}/__init__.py +1 -3
- jettask/{services → webui/services}/overview_service.py +34 -16
- jettask/{services → webui/services}/queue_service.py +1 -1
- jettask/{backend → webui/services}/queue_stats_v2.py +1 -1
- jettask/{services → webui/services}/settings_service.py +1 -1
- jettask/worker/__init__.py +53 -0
- jettask/worker/lifecycle.py +1507 -0
- jettask/worker/manager.py +583 -0
- jettask/{core/offline_worker_recovery.py → worker/recovery.py} +268 -175
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/METADATA +2 -71
- jettask-0.2.20.dist-info/RECORD +145 -0
- jettask/__main__.py +0 -140
- jettask/api/__init__.py +0 -103
- jettask/backend/__init__.py +0 -1
- jettask/backend/api/__init__.py +0 -3
- jettask/backend/api/v1/__init__.py +0 -17
- jettask/backend/api/v1/monitoring.py +0 -431
- jettask/backend/api/v1/namespaces.py +0 -504
- jettask/backend/api/v1/queues.py +0 -342
- jettask/backend/api/v1/tasks.py +0 -367
- jettask/backend/core/__init__.py +0 -3
- jettask/backend/core/cache.py +0 -221
- jettask/backend/core/database.py +0 -200
- jettask/backend/core/exceptions.py +0 -102
- jettask/backend/dependencies.py +0 -261
- jettask/backend/init_meta_db.py +0 -158
- jettask/backend/main.py +0 -1426
- jettask/backend/main_unified.py +0 -78
- jettask/backend/main_v2.py +0 -394
- jettask/backend/models/__init__.py +0 -3
- jettask/backend/models/requests.py +0 -236
- jettask/backend/models/responses.py +0 -230
- jettask/backend/namespace_api_old.py +0 -267
- jettask/backend/services/__init__.py +0 -3
- jettask/backend/start.py +0 -42
- jettask/backend/unified_api_router.py +0 -1541
- jettask/cleanup_deprecated_tables.sql +0 -16
- jettask/core/consumer_manager.py +0 -1695
- jettask/core/delay_scanner.py +0 -256
- jettask/core/event_pool.py +0 -1700
- jettask/core/heartbeat_process.py +0 -222
- jettask/core/task_batch.py +0 -153
- jettask/core/worker_scanner.py +0 -271
- jettask/executors/__init__.py +0 -5
- jettask/executors/asyncio.py +0 -876
- jettask/executors/base.py +0 -30
- jettask/executors/common.py +0 -148
- jettask/executors/multi_asyncio.py +0 -309
- jettask/gradio_app.py +0 -570
- jettask/integrated_gradio_app.py +0 -1088
- jettask/main.py +0 -0
- jettask/monitoring/__init__.py +0 -3
- jettask/pg_consumer.py +0 -1896
- jettask/run_monitor.py +0 -22
- jettask/run_webui.py +0 -148
- jettask/scheduler/multi_namespace_scheduler.py +0 -294
- jettask/scheduler/unified_manager.py +0 -450
- jettask/task_center_client.py +0 -150
- jettask/utils/serializer_optimized.py +0 -33
- jettask/webui_exceptions.py +0 -67
- jettask-0.2.18.dist-info/RECORD +0 -150
- /jettask/{constants.py → config/constants.py} +0 -0
- /jettask/{backend/config.py → config/task_center.py} +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/pg_consumer_v2.py +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/add_execution_time_field.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_new_tables.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/create_tables_v3.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/migrate_to_new_structure.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql/modify_time_fields.sql +0 -0
- /jettask/{pg_consumer → messaging/pg_consumer}/sql_utils.py +0 -0
- /jettask/{models.py → persistence/models.py} +0 -0
- /jettask/scheduler/{manager.py → task_crud.py} +0 -0
- /jettask/{schema.sql → schemas/schema.sql} +0 -0
- /jettask/{task_center.py → task/task_center/client.py} +0 -0
- /jettask/{monitoring → utils}/file_watcher.py +0 -0
- /jettask/{services/redis_monitor_service.py → utils/redis_monitor.py} +0 -0
- /jettask/{api/v1 → webui/api}/__init__.py +0 -0
- /jettask/{webui_config.py → webui/config.py} +0 -0
- /jettask/{webui_models → webui/models}/__init__.py +0 -0
- /jettask/{webui_models → webui/models}/namespace.py +0 -0
- /jettask/{services → webui/services}/alert_service.py +0 -0
- /jettask/{services → webui/services}/analytics_service.py +0 -0
- /jettask/{services → webui/services}/scheduled_task_service.py +0 -0
- /jettask/{services → webui/services}/task_service.py +0 -0
- /jettask/{webui_sql → webui/sql}/batch_upsert_functions.sql +0 -0
- /jettask/{webui_sql → webui/sql}/verify_database.sql +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/WHEEL +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/entry_points.txt +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/licenses/LICENSE +0 -0
- {jettask-0.2.18.dist-info → jettask-0.2.20.dist-info}/top_level.txt +0 -0
jettask/executors/asyncio.py
DELETED
@@ -1,876 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import time
|
3
|
-
import logging
|
4
|
-
import traceback
|
5
|
-
from ..utils.traceback_filter import filter_framework_traceback
|
6
|
-
from ..utils.task_logger import TaskContextManager, configure_task_logging
|
7
|
-
from ..utils.serializer import dumps_str
|
8
|
-
from typing import Optional, Union
|
9
|
-
from collections import defaultdict, deque
|
10
|
-
import os
|
11
|
-
# 按队列分组以优化批量操作
|
12
|
-
from collections import defaultdict
|
13
|
-
from .base import BaseExecutor
|
14
|
-
import random
|
15
|
-
from ..exceptions import RetryableError
|
16
|
-
from ..core.enums import TaskStatus
|
17
|
-
|
18
|
-
logger = logging.getLogger('app')
|
19
|
-
|
20
|
-
# Lua脚本:原子地更新Redis hash中的最大值
|
21
|
-
UPDATE_MAX_OFFSET_LUA = """
|
22
|
-
local hash_key = KEYS[1]
|
23
|
-
local field = KEYS[2]
|
24
|
-
local new_value = tonumber(ARGV[1])
|
25
|
-
|
26
|
-
local current = redis.call('HGET', hash_key, field)
|
27
|
-
if current == false or tonumber(current) < new_value then
|
28
|
-
redis.call('HSET', hash_key, field, new_value)
|
29
|
-
return 1
|
30
|
-
else
|
31
|
-
return 0
|
32
|
-
end
|
33
|
-
"""
|
34
|
-
|
35
|
-
# Try to use uvloop for better performance
|
36
|
-
try:
|
37
|
-
import uvloop
|
38
|
-
uvloop.install()
|
39
|
-
logger.info("Using uvloop for better performance")
|
40
|
-
except ImportError:
|
41
|
-
pass
|
42
|
-
|
43
|
-
|
44
|
-
class AsyncioExecutor(BaseExecutor):
|
45
|
-
"""High-performance asyncio executor"""
|
46
|
-
|
47
|
-
def __init__(self, event_queue, app, concurrency=100):
|
48
|
-
super().__init__(event_queue, app, concurrency)
|
49
|
-
|
50
|
-
# Caching for pending count
|
51
|
-
self.pending_cache = {}
|
52
|
-
self.pending_cache_expire = 0
|
53
|
-
|
54
|
-
# 统一 Pipeline 管理器配置
|
55
|
-
self.pipeline_config = {
|
56
|
-
'ack': {'max_batch': 1000, 'max_delay': 0.05}, # 50ms
|
57
|
-
'task_info': {'max_batch': 2000, 'max_delay': 0.1}, # 100ms
|
58
|
-
'status': {'max_batch': 1000, 'max_delay': 0.15}, # 150ms
|
59
|
-
'data': {'max_batch': 1000, 'max_delay': 0.15}, # 150ms
|
60
|
-
'stats': {'max_batch': 5000, 'max_delay': 0.2} # 200ms
|
61
|
-
}
|
62
|
-
|
63
|
-
# 统一的 Pipeline 缓冲区
|
64
|
-
self.pending_acks = []
|
65
|
-
self.status_updates = []
|
66
|
-
self.data_updates = []
|
67
|
-
self.task_info_updates = {} # 使用字典存储每个任务的Hash更新
|
68
|
-
self.stats_updates = [] # 新增:统计信息缓冲区
|
69
|
-
|
70
|
-
# Pipeline 时间跟踪
|
71
|
-
self.last_pipeline_flush = {
|
72
|
-
'ack': time.time(),
|
73
|
-
'task_info': time.time(),
|
74
|
-
'status': time.time(),
|
75
|
-
'data': time.time(),
|
76
|
-
'stats': time.time()
|
77
|
-
}
|
78
|
-
|
79
|
-
# 兼容旧代码的设置
|
80
|
-
self.ack_buffer_size = self.pipeline_config['ack']['max_batch']
|
81
|
-
self.max_ack_buffer_size = 2000
|
82
|
-
self.status_batch_size = self.pipeline_config['status']['max_batch']
|
83
|
-
self.data_batch_size = self.pipeline_config['data']['max_batch']
|
84
|
-
|
85
|
-
# 添加前缀
|
86
|
-
self.prefix = self.app.ep.redis_prefix or 'jettask'
|
87
|
-
|
88
|
-
# 统一 Pipeline 刷新策略
|
89
|
-
self.last_flush_time = time.time()
|
90
|
-
self.pipeline_operation_count = 0 # 统计总操作数
|
91
|
-
|
92
|
-
# 配置任务日志格式(根据环境变量)
|
93
|
-
log_format = os.environ.get('JETTASK_LOG_FORMAT', 'text').lower()
|
94
|
-
if log_format == 'json':
|
95
|
-
configure_task_logging(format='json')
|
96
|
-
else:
|
97
|
-
# 可以自定义文本格式
|
98
|
-
format_string = os.environ.get('JETTASK_LOG_FORMAT_STRING')
|
99
|
-
if format_string:
|
100
|
-
configure_task_logging(format='text', format_string=format_string)
|
101
|
-
self.max_flush_interval = 0.05 # 50ms最大刷新间隔
|
102
|
-
self.min_flush_interval = 0.005 # 5ms最小刷新间隔
|
103
|
-
|
104
|
-
# 性能优化4: 预编译常量和缓存
|
105
|
-
self._status_prefix = self.app._status_prefix
|
106
|
-
self._result_prefix = self.app._result_prefix
|
107
|
-
self._prefixed_queue_cache = {} # 缓存队列名称
|
108
|
-
|
109
|
-
# 默认启用高性能模式
|
110
|
-
self._stats_lock = asyncio.Lock()
|
111
|
-
self._high_performance_mode = True # 始终启用高性能模式
|
112
|
-
|
113
|
-
def _get_prefixed_queue_cached(self, queue: str) -> str:
|
114
|
-
"""缓存队列名称以避免重复字符串拼接"""
|
115
|
-
if queue not in self._prefixed_queue_cache:
|
116
|
-
self._prefixed_queue_cache[queue] = self.app.ep.get_prefixed_queue_name(queue)
|
117
|
-
return self._prefixed_queue_cache[queue]
|
118
|
-
|
119
|
-
|
120
|
-
async def get_pending_count_cached(self, queue: str) -> int:
|
121
|
-
"""Get cached pending count"""
|
122
|
-
current_time = time.time()
|
123
|
-
|
124
|
-
if (current_time - self.pending_cache_expire > 30 or # 优化:延长缓存时间
|
125
|
-
queue not in self.pending_cache):
|
126
|
-
try:
|
127
|
-
pending_info = await self.app.ep.async_redis_client.xpending(queue, queue)
|
128
|
-
self.pending_cache[queue] = pending_info.get("pending", 0)
|
129
|
-
self.pending_cache_expire = current_time
|
130
|
-
except Exception:
|
131
|
-
self.pending_cache[queue] = 0
|
132
|
-
|
133
|
-
return self.pending_cache.get(queue, 0)
|
134
|
-
|
135
|
-
async def _quick_ack(self, queue: str, event_id: str, group_name: str = None, offset: int = None):
|
136
|
-
"""Quick ACK with unified pipeline management and offset tracking"""
|
137
|
-
# 如果没有提供group_name,使用queue作为默认值(兼容旧代码)
|
138
|
-
group_name = group_name or queue
|
139
|
-
self.pending_acks.append((queue, event_id, group_name, offset))
|
140
|
-
current_time = time.time()
|
141
|
-
|
142
|
-
# 检查是否需要刷新统一 Pipeline
|
143
|
-
ack_config = self.pipeline_config['ack']
|
144
|
-
time_since_flush = current_time - self.last_pipeline_flush['ack']
|
145
|
-
|
146
|
-
should_flush = (
|
147
|
-
len(self.pending_acks) >= ack_config['max_batch'] or # 达到批量大小
|
148
|
-
(len(self.pending_acks) >= 50 and # 或有50个且超时
|
149
|
-
time_since_flush >= ack_config['max_delay']) or
|
150
|
-
len(self.pending_acks) >= self.max_ack_buffer_size * 0.1 # 达到最大缓冲区10%
|
151
|
-
)
|
152
|
-
|
153
|
-
if should_flush:
|
154
|
-
await self._flush_all_buffers() # 使用统一的刷新
|
155
|
-
|
156
|
-
async def _flush_all_buffers(self):
|
157
|
-
"""统一 Pipeline 刷新 - 一次提交所有操作"""
|
158
|
-
# 创建统一的 pipeline(使用二进制客户端,避免编码问题)
|
159
|
-
pipeline = self.app.ep.async_binary_redis_client.pipeline()
|
160
|
-
|
161
|
-
operations_count = 0
|
162
|
-
|
163
|
-
# 1. 处理 ACK 操作(使用二进制客户端)
|
164
|
-
if self.pending_acks:
|
165
|
-
acks_by_queue_group = defaultdict(lambda: defaultdict(list))
|
166
|
-
offset_updates = [] # 收集需要更新的offset
|
167
|
-
|
168
|
-
# 按照 queue+group_name 分组,记录每个组的最大offset
|
169
|
-
max_offsets = {} # {(queue, group_name): max_offset}
|
170
|
-
|
171
|
-
for item in self.pending_acks:
|
172
|
-
# print(f'{item=}')
|
173
|
-
if len(item) == 4:
|
174
|
-
queue, event_id, group_name, offset = item
|
175
|
-
elif len(item) == 3:
|
176
|
-
queue, event_id, group_name = item
|
177
|
-
offset = None
|
178
|
-
else:
|
179
|
-
queue, event_id = item
|
180
|
-
group_name = queue
|
181
|
-
offset = None
|
182
|
-
|
183
|
-
prefixed_queue = self._get_prefixed_queue_cached(queue)
|
184
|
-
acks_by_queue_group[prefixed_queue][group_name].append(event_id)
|
185
|
-
|
186
|
-
# 收集offset更新信息(只记录最大值)
|
187
|
-
if group_name and offset is not None:
|
188
|
-
key = (queue, group_name)
|
189
|
-
if key not in max_offsets or offset > max_offsets[key]:
|
190
|
-
max_offsets[key] = offset
|
191
|
-
|
192
|
-
# logger.info(f'{max_offsets=}')
|
193
|
-
# 处理offset更新(使用Lua脚本确保原子性和最大值约束)
|
194
|
-
if max_offsets:
|
195
|
-
task_offset_key = f"{self.prefix}:TASK_OFFSETS"
|
196
|
-
for (queue, group_name), offset in max_offsets.items():
|
197
|
-
task_field = f"{queue}:{group_name}"
|
198
|
-
|
199
|
-
# 使用Lua脚本原子地更新最大offset
|
200
|
-
pipeline.eval(UPDATE_MAX_OFFSET_LUA, 2, task_offset_key, task_field, offset)
|
201
|
-
operations_count += 1
|
202
|
-
|
203
|
-
# 执行stream ACK
|
204
|
-
for prefixed_queue, groups in acks_by_queue_group.items():
|
205
|
-
for group_name, event_ids in groups.items():
|
206
|
-
stream_key = prefixed_queue.encode() if isinstance(prefixed_queue, str) else prefixed_queue
|
207
|
-
group_key = group_name.encode() if isinstance(group_name, str) else group_name
|
208
|
-
batch_bytes = [b.encode() if isinstance(b, str) else b for b in event_ids]
|
209
|
-
|
210
|
-
# 添加到统一 pipeline
|
211
|
-
# logger.info(f'准备ack {batch_bytes=} {stream_key=} {group_key}')
|
212
|
-
pipeline.xack(stream_key, group_key, *batch_bytes)
|
213
|
-
operations_count += 1
|
214
|
-
|
215
|
-
self.pending_acks.clear()
|
216
|
-
|
217
|
-
# 2. 处理任务信息更新(Hash)
|
218
|
-
task_change_events = [] # 收集变更的任务ID
|
219
|
-
if self.task_info_updates:
|
220
|
-
for event_key, updates in self.task_info_updates.items():
|
221
|
-
# event_key 可能是 "event_id" 或 "event_id:task_name"(广播模式)
|
222
|
-
# key格式: jettask:TASK:event_id:group_name
|
223
|
-
key = f"{self.prefix}:TASK:{event_key}".encode() # 转为 bytes
|
224
|
-
if updates:
|
225
|
-
# 将更新的值编码为 bytes
|
226
|
-
encoded_updates = {k.encode(): v.encode() if isinstance(v, str) else v for k, v in updates.items()}
|
227
|
-
pipeline.hset(key, mapping=encoded_updates)
|
228
|
-
pipeline.expire(key, 3600)
|
229
|
-
operations_count += 2
|
230
|
-
|
231
|
-
# 收集变更的任务ID(包含完整的key路径)
|
232
|
-
# event_key 可能是 "event_id" 或 "event_id:task_name"(广播模式)
|
233
|
-
# 发送完整的task_id,例如 "jettask:TASK:1756956517980-0:jettask:QUEUE:queue_name:task_name"
|
234
|
-
full_task_id = f"{self.prefix}:TASK:{event_key}"
|
235
|
-
task_change_events.append(full_task_id)
|
236
|
-
|
237
|
-
# 发送变更事件到专门的 Stream 队列
|
238
|
-
change_stream_key = f"{self.prefix}:TASK_CHANGES".encode()
|
239
|
-
for task_id in task_change_events:
|
240
|
-
# 发送完整的task_id(包含前缀)
|
241
|
-
change_data = {
|
242
|
-
b'id': task_id.encode() if isinstance(task_id, str) else task_id
|
243
|
-
}
|
244
|
-
pipeline.xadd(change_stream_key, change_data, maxlen=1000000) # 保留最近100000条变更
|
245
|
-
operations_count += 1
|
246
|
-
|
247
|
-
self.task_info_updates.clear()
|
248
|
-
|
249
|
-
# 3. 处理统计信息(如果有)
|
250
|
-
if hasattr(self, 'stats_updates') and self.stats_updates:
|
251
|
-
# 批量更新统计信息
|
252
|
-
for stat_op in self.stats_updates:
|
253
|
-
# 执行统计操作
|
254
|
-
if 'queue' in stat_op and 'field' in stat_op:
|
255
|
-
stats_key = f"{self.prefix}:STATS:{stat_op['queue']}".encode() # 转为 bytes
|
256
|
-
field = stat_op['field'].encode() if isinstance(stat_op['field'], str) else stat_op['field']
|
257
|
-
pipeline.hincrby(stats_key, field, stat_op.get('value', 1))
|
258
|
-
operations_count += 1
|
259
|
-
self.stats_updates.clear()
|
260
|
-
|
261
|
-
# 统一执行所有 pipeline 操作
|
262
|
-
if operations_count > 0:
|
263
|
-
try:
|
264
|
-
# 执行统一的 pipeline
|
265
|
-
results = await pipeline.execute()
|
266
|
-
|
267
|
-
# 检查结果
|
268
|
-
if isinstance(results, Exception):
|
269
|
-
logger.error(f"Pipeline execution error: {results}")
|
270
|
-
else:
|
271
|
-
# 检查各个操作的结果
|
272
|
-
for i, result in enumerate(results):
|
273
|
-
if isinstance(result, Exception):
|
274
|
-
logger.error(f"Pipeline operation {i} error: {result}")
|
275
|
-
|
276
|
-
logger.debug(f"Unified pipeline executed {operations_count} operations")
|
277
|
-
self.pipeline_operation_count += operations_count
|
278
|
-
|
279
|
-
except Exception as e:
|
280
|
-
logger.error(f"Pipeline flush error: {e}")
|
281
|
-
|
282
|
-
# 更新所有刷新时间
|
283
|
-
current_time = time.time()
|
284
|
-
for key in self.last_pipeline_flush:
|
285
|
-
self.last_pipeline_flush[key] = current_time
|
286
|
-
self.last_flush_time = current_time
|
287
|
-
|
288
|
-
async def _collect_stats_async(self, queue: str, success: bool, processing_time: float, total_latency: float):
|
289
|
-
"""高性能异步统计收集 - 加入 Pipeline 缓冲区"""
|
290
|
-
try:
|
291
|
-
if hasattr(self.app, 'consumer_manager') and self.app.consumer_manager:
|
292
|
-
# 将统计信息加入缓冲区而不是立即发送
|
293
|
-
if hasattr(self, 'stats_updates'):
|
294
|
-
self.stats_updates.append({
|
295
|
-
'queue': queue,
|
296
|
-
'field': 'success_count' if success else 'error_count',
|
297
|
-
'value': 1
|
298
|
-
})
|
299
|
-
self.stats_updates.append({
|
300
|
-
'queue': queue,
|
301
|
-
'field': 'total_processing_time',
|
302
|
-
'value': int(processing_time * 1000) # 转换为毫秒
|
303
|
-
})
|
304
|
-
|
305
|
-
# 检查是否需要刷新统计缓冲区
|
306
|
-
if len(self.stats_updates) >= self.pipeline_config['stats']['max_batch']:
|
307
|
-
asyncio.create_task(self._flush_all_buffers())
|
308
|
-
else:
|
309
|
-
# 兼容旧方式
|
310
|
-
asyncio.create_task(self._update_stats_nonblocking(queue, success, processing_time, total_latency))
|
311
|
-
except Exception:
|
312
|
-
pass # 统计错误不应影响主流程
|
313
|
-
|
314
|
-
async def _update_stats_nonblocking(self, queue: str, success: bool, processing_time: float, total_latency: float):
|
315
|
-
"""非阻塞统计更新"""
|
316
|
-
try:
|
317
|
-
self.app.consumer_manager.task_finished(queue)
|
318
|
-
self.app.consumer_manager.update_stats(
|
319
|
-
queue=queue,
|
320
|
-
success=success,
|
321
|
-
processing_time=processing_time,
|
322
|
-
total_latency=total_latency
|
323
|
-
)
|
324
|
-
except Exception as e:
|
325
|
-
logger.debug(f"Stats collection error (non-critical): {e}")
|
326
|
-
|
327
|
-
|
328
|
-
async def logic(self, semaphore: asyncio.Semaphore, event_id: str, event_data: dict, queue: str, routing: dict = None, consumer: str = None, group_name: str = None, **kwargs):
|
329
|
-
"""Process a single task"""
|
330
|
-
status = "success" # 默认状态
|
331
|
-
exception = None
|
332
|
-
error_msg = None
|
333
|
-
ret = None
|
334
|
-
task = None # 初始化 task 变量
|
335
|
-
args = () # 初始化参数
|
336
|
-
kwargs_inner = {} # 初始化关键字参数(避免与函数参数 kwargs 冲突)
|
337
|
-
# print(f'{group_name=}')
|
338
|
-
# 尽早初始化status_key,避免在finally块中未定义
|
339
|
-
# 使用传入的group_name参数,如果没有则使用queue作为默认值
|
340
|
-
status_key = f"{event_id}:{group_name}" # 组合key
|
341
|
-
|
342
|
-
# 获取任务名称(尽早获取,以便设置日志上下文)
|
343
|
-
# 使用_task_name字段(由listen_event_by_task设置)
|
344
|
-
task_name = event_data.get("_task_name") or event_data.get("name")
|
345
|
-
# print(f'{event_data=}')
|
346
|
-
# 如果消息中没有task_name,记录错误并返回
|
347
|
-
if not task_name:
|
348
|
-
logger.error(f"No _task_name in event_data for event {event_id}")
|
349
|
-
# 返回,不处理没有task_name的消息
|
350
|
-
return
|
351
|
-
# 设置任务日志上下文 - 包含整个任务处理流程
|
352
|
-
async with TaskContextManager(
|
353
|
-
event_id=event_id,
|
354
|
-
task_name=task_name,
|
355
|
-
queue=queue,
|
356
|
-
worker_id=consumer # 使用consumer作为worker_id
|
357
|
-
):
|
358
|
-
try:
|
359
|
-
# 检查是否是恢复的消息
|
360
|
-
if kwargs.get('_recovery'):
|
361
|
-
logger.info(f"Processing recovered message {event_id} from {kwargs.get('_claimed_from', 'unknown')}")
|
362
|
-
# print(f'{event_data=}')
|
363
|
-
# 检查是否是延迟任务
|
364
|
-
if event_data.get('is_delayed') and 'execute_at' in event_data:
|
365
|
-
execute_at = float(event_data['execute_at'])
|
366
|
-
current_time = time.time()
|
367
|
-
|
368
|
-
if execute_at > current_time:
|
369
|
-
# 任务还没到执行时间,直接丢弃
|
370
|
-
# 不ACK消息,让它保持在pending状态
|
371
|
-
# event_pool会通过zset检查并在时间到期后通过xclaim认领
|
372
|
-
logger.info(f"Task {event_id} delayed until {execute_at}, keeping in pending state")
|
373
|
-
return
|
374
|
-
|
375
|
-
# 获取重试配置(来自任务装饰器或apply_async)
|
376
|
-
retry_config = event_data.get('retry_config', {})
|
377
|
-
max_retries = retry_config.get('max_retries', 0)
|
378
|
-
|
379
|
-
|
380
|
-
# async with semaphore:
|
381
|
-
# 任务名称已经在外层获取过了
|
382
|
-
|
383
|
-
if not task_name:
|
384
|
-
logger.error(f"No task name found! event_data keys: {list(event_data.keys())}, event_id: {event_id}")
|
385
|
-
|
386
|
-
task = self.app.get_task_by_name(task_name)
|
387
|
-
|
388
|
-
# status_key已经在方法开头初始化过了
|
389
|
-
|
390
|
-
if not task:
|
391
|
-
exception = f"{task_name=} {queue=} {event_data=} 未绑定任何task"
|
392
|
-
logger.error(exception)
|
393
|
-
# 从 event_data 中获取 offset
|
394
|
-
offset = None
|
395
|
-
if isinstance(event_data, dict):
|
396
|
-
offset = event_data.get('offset')
|
397
|
-
if offset is not None:
|
398
|
-
try:
|
399
|
-
offset = int(offset)
|
400
|
-
except (ValueError, TypeError):
|
401
|
-
offset = None
|
402
|
-
|
403
|
-
await self._quick_ack(queue, event_id, group_name, offset)
|
404
|
-
|
405
|
-
# 任务不存在时也记录started_at(使用当前时间)
|
406
|
-
current_time = time.time()
|
407
|
-
# 恢复的消息可能没有trigger_time,使用当前时间作为默认值
|
408
|
-
trigger_time_float = float(event_data.get('trigger_time', current_time))
|
409
|
-
duration = current_time - trigger_time_float
|
410
|
-
# 使用Hash更新
|
411
|
-
self.task_info_updates[status_key] = {
|
412
|
-
"status": TaskStatus.ERROR.value,
|
413
|
-
"exception": exception,
|
414
|
-
"started_at": str(current_time),
|
415
|
-
"completed_at": str(current_time),
|
416
|
-
"duration": str(duration),
|
417
|
-
"consumer": consumer,
|
418
|
-
}
|
419
|
-
# 使用统一的 pipeline 刷新
|
420
|
-
await self._flush_all_buffers()
|
421
|
-
return
|
422
|
-
|
423
|
-
self.pedding_count = await self.get_pending_count_cached(queue)
|
424
|
-
|
425
|
-
# 重置状态为 success(默认是 error)
|
426
|
-
status = "success"
|
427
|
-
|
428
|
-
# 获取参数(现在直接是对象,不需要反序列化)
|
429
|
-
args = event_data.get("args", ()) or ()
|
430
|
-
|
431
|
-
# 统一处理kwargs(现在直接是对象,不需要反序列化)
|
432
|
-
kwargs_inner = event_data.get("kwargs", {}) or {}
|
433
|
-
|
434
|
-
# 如果event_data中有scheduled_task_id,添加到kwargs中供TaskContext使用
|
435
|
-
if 'scheduled_task_id' in event_data:
|
436
|
-
kwargs_inner['__scheduled_task_id'] = event_data['scheduled_task_id']
|
437
|
-
|
438
|
-
# 检查是否需要提取特定字段作为参数
|
439
|
-
# 如果消息包含 event_type 和 customer_data,将它们作为参数传递
|
440
|
-
if "event_type" in event_data and "customer_data" in event_data:
|
441
|
-
# 将这些字段作为位置参数传递,其他字段作为kwargs
|
442
|
-
args = (event_data["event_type"], event_data["customer_data"])
|
443
|
-
# 保留其他字段在kwargs中,但排除已作为args的字段
|
444
|
-
extra_kwargs = {k: v for k, v in event_data.items()
|
445
|
-
if k not in ["event_type", "customer_data", "_broadcast", "_target_tasks", "_timestamp", "trigger_time", "name", "_task_name"]}
|
446
|
-
kwargs_inner.update(extra_kwargs)
|
447
|
-
|
448
|
-
# Execute lifecycle methods
|
449
|
-
result = task.on_before(
|
450
|
-
event_id=event_id,
|
451
|
-
pedding_count=self.pedding_count,
|
452
|
-
args=args,
|
453
|
-
kwargs=kwargs_inner,
|
454
|
-
)
|
455
|
-
if asyncio.iscoroutine(result):
|
456
|
-
result = await result
|
457
|
-
|
458
|
-
if result and result.reject:
|
459
|
-
# 任务被reject,使用Hash更新
|
460
|
-
self.task_info_updates[status_key] = {
|
461
|
-
"status": TaskStatus.REJECTED.value,
|
462
|
-
"consumer": consumer,
|
463
|
-
"started_at": str(time.time()),
|
464
|
-
"completed_at": str(time.time()),
|
465
|
-
"error_msg": "Task rejected by on_before"
|
466
|
-
}
|
467
|
-
# 使用统一的 pipeline 刷新
|
468
|
-
await self._flush_all_buffers()
|
469
|
-
return
|
470
|
-
|
471
|
-
# 标记任务开始执行
|
472
|
-
# if hasattr(self.app, 'consumer_manager') and self.app.consumer_manager:
|
473
|
-
# self.app.consumer_manager.task_started(queue)
|
474
|
-
|
475
|
-
# 更新任务真正开始执行的时间(在on_before之后)
|
476
|
-
execution_start_time = time.time()
|
477
|
-
|
478
|
-
# 使用Hash更新running状态
|
479
|
-
# 为了让用户能看到任务正在运行,立即写入running状态
|
480
|
-
# running_key = f"{self.prefix}:TASK:{status_key}"
|
481
|
-
# 保存开始信息,但不设置status为running,避免竞态条件
|
482
|
-
self.task_info_updates[status_key] = {
|
483
|
-
"status": TaskStatus.RUNNING.value,
|
484
|
-
"consumer": consumer,
|
485
|
-
"started_at": str(execution_start_time)
|
486
|
-
}
|
487
|
-
# await self.app.ep.async_redis_client.hset(running_key, mapping={
|
488
|
-
# "status": TaskStatus.RUNNING.value,
|
489
|
-
# "consumer": consumer,
|
490
|
-
# "started_at": str(execution_start_time)
|
491
|
-
# })
|
492
|
-
|
493
|
-
# 在worker内部进行重试循环
|
494
|
-
current_retry = 0
|
495
|
-
last_exception = None
|
496
|
-
|
497
|
-
while current_retry <= max_retries:
|
498
|
-
try:
|
499
|
-
# 如果当前是重试,记录日志
|
500
|
-
if current_retry > 0:
|
501
|
-
logger.info(f"Retry attempt {current_retry}/{max_retries} for task {event_id}")
|
502
|
-
|
503
|
-
# 从kwargs中移除内部参数,避免传递给用户的任务函数
|
504
|
-
clean_kwargs = {k: v for k, v in kwargs_inner.items()
|
505
|
-
if not k.startswith('_') and not k.startswith('__')}
|
506
|
-
|
507
|
-
logger.debug(f"Calling task with clean_kwargs: {clean_kwargs}")
|
508
|
-
task_result = task(event_id, event_data['trigger_time'], *args, **clean_kwargs)
|
509
|
-
if asyncio.iscoroutine(task_result):
|
510
|
-
ret = await task_result
|
511
|
-
else:
|
512
|
-
ret = task_result
|
513
|
-
result = task.on_success(
|
514
|
-
event_id=event_id,
|
515
|
-
args=args,
|
516
|
-
kwargs=clean_kwargs,
|
517
|
-
result=ret,
|
518
|
-
)
|
519
|
-
if asyncio.iscoroutine(result):
|
520
|
-
await result
|
521
|
-
|
522
|
-
# 任务成功执行,现在可以ACK消息了
|
523
|
-
# 从 event_data 中获取 offset
|
524
|
-
offset = None
|
525
|
-
if isinstance(event_data, dict):
|
526
|
-
offset = event_data.get('offset')
|
527
|
-
if offset is not None:
|
528
|
-
try:
|
529
|
-
offset = int(offset)
|
530
|
-
except (ValueError, TypeError):
|
531
|
-
offset = None
|
532
|
-
|
533
|
-
await self._quick_ack(queue, event_id, group_name, offset)
|
534
|
-
|
535
|
-
# 任务成功,跳出重试循环
|
536
|
-
break
|
537
|
-
|
538
|
-
except SystemExit:
|
539
|
-
# 处理系统退出信号,不重试
|
540
|
-
logger.info('Task interrupted by system exit')
|
541
|
-
status = "interrupted"
|
542
|
-
exception = "System exit"
|
543
|
-
error_msg = "Task interrupted by shutdown"
|
544
|
-
# 系统退出时也需要ACK消息
|
545
|
-
# 从 event_data 中获取 offset
|
546
|
-
offset = None
|
547
|
-
if isinstance(event_data, dict):
|
548
|
-
offset = event_data.get('offset')
|
549
|
-
if offset is not None:
|
550
|
-
try:
|
551
|
-
offset = int(offset)
|
552
|
-
except (ValueError, TypeError):
|
553
|
-
offset = None
|
554
|
-
|
555
|
-
await self._quick_ack(queue, event_id, group_name, offset)
|
556
|
-
break
|
557
|
-
|
558
|
-
except Exception as e:
|
559
|
-
last_exception = e
|
560
|
-
|
561
|
-
# 检查是否应该重试
|
562
|
-
should_retry = False
|
563
|
-
if current_retry < max_retries:
|
564
|
-
# 检查异常类型是否可重试
|
565
|
-
retry_on_exceptions = retry_config.get('retry_on_exceptions')
|
566
|
-
|
567
|
-
if retry_on_exceptions:
|
568
|
-
# retry_on_exceptions 是异常类名字符串列表
|
569
|
-
exc_type_name = type(e).__name__
|
570
|
-
should_retry = exc_type_name in retry_on_exceptions
|
571
|
-
else:
|
572
|
-
# 默认重试所有异常
|
573
|
-
should_retry = True
|
574
|
-
|
575
|
-
if should_retry:
|
576
|
-
current_retry += 1
|
577
|
-
|
578
|
-
# 计算重试延迟
|
579
|
-
delay = None
|
580
|
-
|
581
|
-
# 如果是RetryableError并且指定了retry_after,使用指定的延迟
|
582
|
-
if isinstance(e, RetryableError) and e.retry_after is not None:
|
583
|
-
delay = e.retry_after
|
584
|
-
logger.info(f"Using RetryableError suggested delay: {delay:.1f}s")
|
585
|
-
else:
|
586
|
-
# 使用配置的重试策略
|
587
|
-
retry_backoff = retry_config.get('retry_backoff', True)
|
588
|
-
|
589
|
-
if retry_backoff:
|
590
|
-
# 指数退避:1s, 2s, 4s, 8s, ...
|
591
|
-
base_delay = 1.0
|
592
|
-
delay = min(base_delay * (2 ** (current_retry - 1)),
|
593
|
-
retry_config.get('retry_backoff_max', 60))
|
594
|
-
else:
|
595
|
-
# 固定延迟:始终1秒
|
596
|
-
delay = 1.0
|
597
|
-
|
598
|
-
logger.info(f"Task {event_id} will retry after {delay:.2f} seconds (attempt {current_retry}/{max_retries})")
|
599
|
-
|
600
|
-
# 在worker内部等待,而不是重新发送到队列
|
601
|
-
await asyncio.sleep(delay)
|
602
|
-
continue # 继续下一次重试
|
603
|
-
else:
|
604
|
-
# 不再重试,记录错误并退出
|
605
|
-
logger.error(f'任务执行出错: {str(e)}')
|
606
|
-
status = "error"
|
607
|
-
exception = filter_framework_traceback()
|
608
|
-
error_msg = str(e)
|
609
|
-
logger.error(exception)
|
610
|
-
# 任务失败且不重试,需要ACK消息
|
611
|
-
# 从 event_data 中获取 offset
|
612
|
-
offset = None
|
613
|
-
if isinstance(event_data, dict):
|
614
|
-
offset = event_data.get('offset')
|
615
|
-
if offset is not None:
|
616
|
-
try:
|
617
|
-
offset = int(offset)
|
618
|
-
except (ValueError, TypeError):
|
619
|
-
offset = None
|
620
|
-
|
621
|
-
await self._quick_ack(queue, event_id, group_name, offset)
|
622
|
-
break
|
623
|
-
|
624
|
-
# 如果所有重试都失败了
|
625
|
-
if current_retry > max_retries and last_exception:
|
626
|
-
logger.error(f'任务在 {max_retries} 次重试后仍然失败')
|
627
|
-
status = "error"
|
628
|
-
exception = filter_framework_traceback()
|
629
|
-
error_msg = str(last_exception)
|
630
|
-
# 任务最终失败,也需要ACK消息
|
631
|
-
# 从 event_data 中获取 offset
|
632
|
-
offset = None
|
633
|
-
if isinstance(event_data, dict):
|
634
|
-
offset = event_data.get('offset')
|
635
|
-
if offset is not None:
|
636
|
-
try:
|
637
|
-
offset = int(offset)
|
638
|
-
except (ValueError, TypeError):
|
639
|
-
offset = None
|
640
|
-
|
641
|
-
await self._quick_ack(queue, event_id, group_name, offset)
|
642
|
-
|
643
|
-
# except块已经移到while循环内部,这里不需要了
|
644
|
-
finally:
|
645
|
-
# 计算完成时间和消耗时间
|
646
|
-
completed_at = time.time()
|
647
|
-
# 恢复的消息可能没有trigger_time,使用执行开始时间作为默认值
|
648
|
-
trigger_time_float = float(event_data.get('trigger_time', execution_start_time))
|
649
|
-
# 计算两个时间指标,确保不会出现负数
|
650
|
-
execution_time = max(0, completed_at - execution_start_time) # 实际执行时间
|
651
|
-
total_latency = max(0, completed_at - trigger_time_float) # 总延迟时间(包含等待)
|
652
|
-
|
653
|
-
# 异步收集统计信息(高性能模式下非阻塞)
|
654
|
-
await self._collect_stats_async(
|
655
|
-
queue=queue,
|
656
|
-
success=(status == "success"),
|
657
|
-
processing_time=execution_time,
|
658
|
-
total_latency=total_latency
|
659
|
-
)
|
660
|
-
|
661
|
-
# 使用Hash原子更新所有信息
|
662
|
-
# 重要:先设置result,再设置status,确保不会出现status=success但result还没写入的情况
|
663
|
-
task_info = {
|
664
|
-
"completed_at": str(completed_at),
|
665
|
-
"execution_time": execution_time,
|
666
|
-
"duration": total_latency,
|
667
|
-
"consumer": consumer,
|
668
|
-
'status': status
|
669
|
-
}
|
670
|
-
|
671
|
-
# 先写入结果
|
672
|
-
if ret is None:
|
673
|
-
task_info["result"] = "null" # JSON null
|
674
|
-
else:
|
675
|
-
task_info["result"] = ret if isinstance(ret, str) else dumps_str(ret)
|
676
|
-
|
677
|
-
# 再写入错误信息(如果有)
|
678
|
-
if exception:
|
679
|
-
task_info["exception"] = exception
|
680
|
-
if error_msg:
|
681
|
-
task_info["error_msg"] = error_msg
|
682
|
-
|
683
|
-
|
684
|
-
# 更新到缓冲区
|
685
|
-
if status_key in self.task_info_updates:
|
686
|
-
# 合并更新(保留started_at等之前的信息)
|
687
|
-
# 重要:确保最终状态覆盖之前的running状态
|
688
|
-
self.task_info_updates[status_key].update(task_info)
|
689
|
-
else:
|
690
|
-
self.task_info_updates[status_key] = task_info
|
691
|
-
|
692
|
-
# 只有在 task 存在时才调用 on_end
|
693
|
-
if task:
|
694
|
-
# 为on_end使用clean_kwargs(如果clean_kwargs未定义,则创建它)
|
695
|
-
if 'clean_kwargs' not in locals():
|
696
|
-
clean_kwargs = {k: v for k, v in kwargs_inner.items()
|
697
|
-
if not k.startswith('_') and not k.startswith('__')}
|
698
|
-
|
699
|
-
result = task.on_end(
|
700
|
-
event_id=event_id,
|
701
|
-
args=args,
|
702
|
-
kwargs=clean_kwargs,
|
703
|
-
result=ret,
|
704
|
-
pedding_count=self.pedding_count,
|
705
|
-
)
|
706
|
-
if asyncio.iscoroutine(result):
|
707
|
-
await result
|
708
|
-
# Handle routing
|
709
|
-
if routing:
|
710
|
-
agg_key = routing.get("agg_key")
|
711
|
-
routing_key = routing.get("routing_key")
|
712
|
-
if routing_key and agg_key:
|
713
|
-
# 避免在多进程环境下使用跨进程的锁
|
714
|
-
# 直接操作,依赖 Python GIL 和原子操作
|
715
|
-
if queue in self.app.ep.solo_running_state and routing_key in self.app.ep.solo_running_state[queue]:
|
716
|
-
self.app.ep.solo_running_state[queue][routing_key] -= 1
|
717
|
-
try:
|
718
|
-
if result and result.urgent_retry:
|
719
|
-
self.app.ep.solo_urgent_retry[routing_key] = True
|
720
|
-
except:
|
721
|
-
pass
|
722
|
-
if result and result.delay:
|
723
|
-
self.app.ep.task_scheduler[queue][routing_key] = time.time() + result.delay
|
724
|
-
|
725
|
-
self.batch_counter -= 1
|
726
|
-
|
727
|
-
async def loop(self):
|
728
|
-
"""Optimized main loop with dynamic batching"""
|
729
|
-
# semaphore = asyncio.Semaphore(self.concurrency) # 当前未使用,保留以备后用
|
730
|
-
|
731
|
-
|
732
|
-
# Dynamic batch processing
|
733
|
-
min_batch_size = 10 # 优化:降低最小批次
|
734
|
-
max_batch_size = 500 # 优化:提高最大批次
|
735
|
-
batch_size = 100
|
736
|
-
tasks_batch = []
|
737
|
-
|
738
|
-
# Performance tracking
|
739
|
-
# last_periodic_flush = time.time() # 已被统一 pipeline 管理替代
|
740
|
-
last_batch_adjust = time.time()
|
741
|
-
# last_buffer_check = time.time() # 当前未使用
|
742
|
-
|
743
|
-
# 高性能缓冲区监控阈值
|
744
|
-
max_buffer_size = 5000
|
745
|
-
|
746
|
-
try:
|
747
|
-
while True:
|
748
|
-
# 检查是否需要退出
|
749
|
-
if hasattr(self.app, '_should_exit') and self.app._should_exit:
|
750
|
-
logger.info("AsyncioExecutor detected shutdown signal, exiting...")
|
751
|
-
break
|
752
|
-
|
753
|
-
# # 动态调整批处理大小
|
754
|
-
current_time = time.time()
|
755
|
-
if current_time - last_batch_adjust > 1.0:
|
756
|
-
# 根据队列类型获取长度
|
757
|
-
if isinstance(self.event_queue, deque):
|
758
|
-
queue_len = len(self.event_queue)
|
759
|
-
elif isinstance(self.event_queue, asyncio.Queue):
|
760
|
-
queue_len = self.event_queue.qsize()
|
761
|
-
else:
|
762
|
-
queue_len = 0
|
763
|
-
|
764
|
-
# 优化:更智能的动态调整
|
765
|
-
if queue_len > 5000:
|
766
|
-
batch_size = min(max_batch_size, batch_size + 50)
|
767
|
-
elif queue_len > 1000:
|
768
|
-
batch_size = min(max_batch_size, batch_size + 20)
|
769
|
-
elif queue_len < 100:
|
770
|
-
batch_size = max(min_batch_size, batch_size - 20)
|
771
|
-
elif queue_len < 500:
|
772
|
-
batch_size = max(min_batch_size, batch_size - 10)
|
773
|
-
last_batch_adjust = current_time
|
774
|
-
|
775
|
-
# 从队列获取事件
|
776
|
-
event = None
|
777
|
-
try:
|
778
|
-
event = await asyncio.wait_for(self.event_queue.get(), timeout=0.1)
|
779
|
-
except asyncio.TimeoutError:
|
780
|
-
event = None
|
781
|
-
|
782
|
-
if event:
|
783
|
-
event.pop("execute_time", None)
|
784
|
-
tasks_batch.append(event)
|
785
|
-
logger.debug(f"Got event from queue: {event.get('event_id', 'unknown')}")
|
786
|
-
# 批量创建协程任务
|
787
|
-
if tasks_batch:
|
788
|
-
for event in tasks_batch:
|
789
|
-
self.batch_counter += 1
|
790
|
-
logger.debug(f"Creating task for event: {event.get('event_id', 'unknown')}")
|
791
|
-
asyncio.create_task(self.logic(None, **event)) # semaphore 参数暂时传 None
|
792
|
-
|
793
|
-
tasks_batch.clear()
|
794
|
-
|
795
|
-
# 智能缓冲区管理和刷新
|
796
|
-
buffer_full = (
|
797
|
-
len(self.pending_acks) >= max_buffer_size or
|
798
|
-
len(self.status_updates) >= max_buffer_size or
|
799
|
-
len(self.data_updates) >= max_buffer_size or
|
800
|
-
len(self.task_info_updates) >= max_buffer_size # 新增:检查Hash缓冲区
|
801
|
-
)
|
802
|
-
|
803
|
-
# 定期或缓冲区满时刷新 - 使用统一 Pipeline 策略
|
804
|
-
should_flush_periodic = False
|
805
|
-
has_pending_data = (self.pending_acks or self.status_updates or self.data_updates or self.task_info_updates)
|
806
|
-
|
807
|
-
# 检查每种类型的数据是否需要刷新
|
808
|
-
if has_pending_data:
|
809
|
-
for data_type, config in self.pipeline_config.items():
|
810
|
-
if data_type == 'ack' and self.pending_acks:
|
811
|
-
if current_time - self.last_pipeline_flush[data_type] >= config['max_delay']:
|
812
|
-
should_flush_periodic = True
|
813
|
-
break
|
814
|
-
elif data_type == 'task_info' and self.task_info_updates:
|
815
|
-
if current_time - self.last_pipeline_flush[data_type] >= config['max_delay']:
|
816
|
-
should_flush_periodic = True
|
817
|
-
break
|
818
|
-
elif data_type == 'status' and self.status_updates:
|
819
|
-
if current_time - self.last_pipeline_flush[data_type] >= config['max_delay']:
|
820
|
-
should_flush_periodic = True
|
821
|
-
break
|
822
|
-
elif data_type == 'data' and self.data_updates:
|
823
|
-
if current_time - self.last_pipeline_flush[data_type] >= config['max_delay']:
|
824
|
-
should_flush_periodic = True
|
825
|
-
break
|
826
|
-
elif data_type == 'stats' and hasattr(self, 'stats_updates') and self.stats_updates:
|
827
|
-
if current_time - self.last_pipeline_flush[data_type] >= config['max_delay']:
|
828
|
-
should_flush_periodic = True
|
829
|
-
break
|
830
|
-
|
831
|
-
if buffer_full or should_flush_periodic:
|
832
|
-
asyncio.create_task(self._flush_all_buffers())
|
833
|
-
# 刷新时间已在 _flush_all_buffers 中更新
|
834
|
-
|
835
|
-
|
836
|
-
# 智能休眠策略
|
837
|
-
has_events = False
|
838
|
-
if isinstance(self.event_queue, deque):
|
839
|
-
has_events = bool(self.event_queue)
|
840
|
-
elif isinstance(self.event_queue, asyncio.Queue):
|
841
|
-
has_events = not self.event_queue.empty()
|
842
|
-
|
843
|
-
if has_events:
|
844
|
-
await asyncio.sleep(0) # 有任务时立即切换
|
845
|
-
else:
|
846
|
-
# 检查是否需要立即刷新缓冲区
|
847
|
-
if (self.pending_acks or self.status_updates or self.data_updates or self.task_info_updates):
|
848
|
-
await self._flush_all_buffers()
|
849
|
-
await asyncio.sleep(0.001) # 无任务时短暂休眠
|
850
|
-
|
851
|
-
except KeyboardInterrupt:
|
852
|
-
logger.info("AsyncioExecutor received KeyboardInterrupt")
|
853
|
-
except Exception as e:
|
854
|
-
logger.error(f"AsyncioExecutor loop error: {e}")
|
855
|
-
finally:
|
856
|
-
# 确保清理逻辑总是执行
|
857
|
-
logger.info("AsyncioExecutor cleaning up...")
|
858
|
-
|
859
|
-
# 1. 刷新所有缓冲区(设置超时避免卡住)
|
860
|
-
try:
|
861
|
-
await asyncio.wait_for(self._flush_all_buffers(), timeout=2.0)
|
862
|
-
logger.info("Buffers flushed successfully")
|
863
|
-
except asyncio.TimeoutError:
|
864
|
-
logger.warning("Buffer flush timeout, some data may be lost")
|
865
|
-
except Exception as e:
|
866
|
-
logger.error(f"Error flushing buffers: {e}")
|
867
|
-
|
868
|
-
# 2. 标记worker为离线(最重要的清理操作)
|
869
|
-
if self.app.consumer_manager:
|
870
|
-
try:
|
871
|
-
self.app.consumer_manager.cleanup()
|
872
|
-
logger.info("Worker marked as offline")
|
873
|
-
except Exception as e:
|
874
|
-
logger.error(f"Error marking worker offline: {e}")
|
875
|
-
|
876
|
-
logger.info("AsyncioExecutor stopped")
|