triflux 3.3.0-dev.8 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +108 -199
- package/README.md +108 -199
- package/bin/triflux.mjs +2415 -1762
- package/hooks/keyword-rules.json +361 -354
- package/hooks/pipeline-stop.mjs +5 -2
- package/hub/assign-callbacks.mjs +136 -136
- package/hub/bridge.mjs +734 -684
- package/hub/delegator/contracts.mjs +38 -38
- package/hub/delegator/index.mjs +14 -14
- package/hub/delegator/schema/delegator-tools.schema.json +250 -250
- package/hub/delegator/service.mjs +302 -118
- package/hub/delegator/tool-definitions.mjs +35 -35
- package/hub/hitl.mjs +67 -67
- package/hub/paths.mjs +28 -0
- package/hub/pipe.mjs +589 -561
- package/hub/pipeline/state.mjs +23 -0
- package/hub/public/dashboard.html +349 -0
- package/hub/public/tray-icon.ico +0 -0
- package/hub/public/tray-icon.png +0 -0
- package/hub/router.mjs +782 -782
- package/hub/schema.sql +40 -40
- package/hub/server.mjs +810 -637
- package/hub/store.mjs +706 -706
- package/hub/team/cli/commands/attach.mjs +37 -0
- package/hub/team/cli/commands/control.mjs +43 -0
- package/hub/team/cli/commands/debug.mjs +74 -0
- package/hub/team/cli/commands/focus.mjs +53 -0
- package/hub/team/cli/commands/interrupt.mjs +36 -0
- package/hub/team/cli/commands/kill.mjs +37 -0
- package/hub/team/cli/commands/list.mjs +24 -0
- package/hub/team/cli/commands/send.mjs +37 -0
- package/hub/team/cli/commands/start/index.mjs +87 -0
- package/hub/team/cli/commands/start/parse-args.mjs +32 -0
- package/hub/team/cli/commands/start/start-in-process.mjs +40 -0
- package/hub/team/cli/commands/start/start-mux.mjs +73 -0
- package/hub/team/cli/commands/start/start-wt.mjs +69 -0
- package/hub/team/cli/commands/status.mjs +87 -0
- package/hub/team/cli/commands/stop.mjs +31 -0
- package/hub/team/cli/commands/task.mjs +30 -0
- package/hub/team/cli/commands/tasks.mjs +13 -0
- package/hub/team/{cli.mjs → cli/help.mjs} +38 -99
- package/hub/team/cli/index.mjs +39 -0
- package/hub/team/cli/manifest.mjs +28 -0
- package/hub/team/cli/render.mjs +30 -0
- package/hub/team/cli/services/attach-fallback.mjs +54 -0
- package/hub/team/cli/services/hub-client.mjs +171 -0
- package/hub/team/cli/services/member-selector.mjs +30 -0
- package/hub/team/cli/services/native-control.mjs +115 -0
- package/hub/team/cli/services/runtime-mode.mjs +60 -0
- package/hub/team/cli/services/state-store.mjs +34 -0
- package/hub/team/cli/services/task-model.mjs +30 -0
- package/hub/team/native-supervisor.mjs +69 -63
- package/hub/team/native.mjs +367 -367
- package/hub/team/nativeProxy.mjs +217 -173
- package/hub/team/pane.mjs +149 -149
- package/hub/team/psmux.mjs +946 -946
- package/hub/team/session.mjs +608 -608
- package/hub/team/staleState.mjs +369 -299
- package/hub/tools.mjs +107 -107
- package/hub/tray.mjs +332 -0
- package/hub/workers/claude-worker.mjs +446 -446
- package/hub/workers/codex-mcp.mjs +414 -414
- package/hub/workers/delegator-mcp.mjs +1045 -1045
- package/hub/workers/factory.mjs +21 -21
- package/hub/workers/gemini-worker.mjs +349 -349
- package/hub/workers/interface.mjs +41 -41
- package/package.json +61 -60
- package/scripts/__tests__/keyword-detector.test.mjs +234 -234
- package/scripts/hub-ensure.mjs +102 -101
- package/scripts/keyword-detector.mjs +272 -272
- package/scripts/keyword-rules-expander.mjs +521 -521
- package/scripts/lib/keyword-rules.mjs +168 -168
- package/scripts/lib/mcp-filter.mjs +642 -642
- package/scripts/lib/mcp-server-catalog.mjs +118 -118
- package/scripts/mcp-check.mjs +126 -126
- package/scripts/preflight-cache.mjs +19 -0
- package/scripts/run.cjs +62 -62
- package/scripts/setup.mjs +68 -31
- package/scripts/test-tfx-route-no-claude-native.mjs +57 -57
- package/scripts/tfx-route-worker.mjs +161 -161
- package/scripts/tfx-route.sh +1360 -1326
- package/skills/tfx-auto/SKILL.md +196 -196
- package/skills/tfx-auto-codex/SKILL.md +77 -77
- package/skills/tfx-multi/SKILL.md +378 -378
- package/hub/team/cli-team-common.mjs +0 -348
- package/hub/team/cli-team-control.mjs +0 -393
- package/hub/team/cli-team-start.mjs +0 -516
- package/hub/team/cli-team-status.mjs +0 -283
- package/skills/auto-verify/SKILL.md +0 -145
- package/skills/manage-skills/SKILL.md +0 -192
- package/skills/verify-implementation/SKILL.md +0 -138
package/hub/router.mjs
CHANGED
|
@@ -1,782 +1,782 @@
|
|
|
1
|
-
// hub/router.mjs — 실시간 라우팅/수신함 상태 관리자
|
|
2
|
-
// SQLite는 감사 로그만 담당하고, 실제 배달 상태는 메모리에서 관리한다.
|
|
3
|
-
import { EventEmitter, once } from 'node:events';
|
|
4
|
-
import { uuidv7 } from './store.mjs';
|
|
5
|
-
|
|
6
|
-
const ASSIGN_PENDING_STATUSES = new Set(['queued', 'running']);
|
|
7
|
-
|
|
8
|
-
function uniqueStrings(values = []) {
|
|
9
|
-
return Array.from(new Set((values || []).map((value) => String(value || '').trim()).filter(Boolean)));
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
function clampAssignDuration(value, fallback = 600000, min = 1000, max = 86400000) {
|
|
13
|
-
const num = Number(value);
|
|
14
|
-
if (!Number.isFinite(num)) return fallback;
|
|
15
|
-
return Math.max(min, Math.min(Math.trunc(num), max));
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
function normalizeAssignTerminalStatus(input, metadata = {}) {
|
|
19
|
-
const status = String(input || '').trim().toLowerCase();
|
|
20
|
-
const resultTag = String(
|
|
21
|
-
metadata?.result
|
|
22
|
-
?? metadata?.status
|
|
23
|
-
?? metadata?.outcome
|
|
24
|
-
?? '',
|
|
25
|
-
).trim().toLowerCase();
|
|
26
|
-
|
|
27
|
-
if (status === 'queued') return 'queued';
|
|
28
|
-
if (status === 'running' || status === 'in_progress') return 'running';
|
|
29
|
-
if (status === 'timed_out' || status === 'timeout') return 'timed_out';
|
|
30
|
-
if (status === 'failed' || status === 'error') return 'failed';
|
|
31
|
-
if (status === 'succeeded' || status === 'success') return 'succeeded';
|
|
32
|
-
|
|
33
|
-
if (status === 'completed') {
|
|
34
|
-
if (resultTag === 'failed' || resultTag === 'error') return 'failed';
|
|
35
|
-
if (resultTag === 'timed_out' || resultTag === 'timeout') return 'timed_out';
|
|
36
|
-
return 'succeeded';
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
if (resultTag === 'failed' || resultTag === 'error') return 'failed';
|
|
40
|
-
if (resultTag === 'timed_out' || resultTag === 'timeout') return 'timed_out';
|
|
41
|
-
if (resultTag === 'succeeded' || resultTag === 'success') return 'succeeded';
|
|
42
|
-
return 'succeeded';
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
function normalizeAgentTopics(store, agentId, runtimeTopics) {
|
|
46
|
-
const topics = new Set(runtimeTopics || []);
|
|
47
|
-
const persisted = store.getAgent(agentId)?.topics || [];
|
|
48
|
-
for (const topic of persisted) topics.add(topic);
|
|
49
|
-
return Array.from(topics);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* 라우터 생성
|
|
54
|
-
* @param {object} store
|
|
55
|
-
*/
|
|
56
|
-
export function createRouter(store) {
|
|
57
|
-
let sweepTimer = null;
|
|
58
|
-
let staleTimer = null;
|
|
59
|
-
const responseEmitter = new EventEmitter();
|
|
60
|
-
const deliveryEmitter = new EventEmitter();
|
|
61
|
-
responseEmitter.setMaxListeners(200);
|
|
62
|
-
deliveryEmitter.setMaxListeners(200);
|
|
63
|
-
|
|
64
|
-
const runtimeTopics = new Map();
|
|
65
|
-
const queuesByAgent = new Map();
|
|
66
|
-
const liveMessages = new Map();
|
|
67
|
-
const deliveryLatencies = [];
|
|
68
|
-
|
|
69
|
-
function ensureAgentQueue(agentId) {
|
|
70
|
-
let queue = queuesByAgent.get(agentId);
|
|
71
|
-
if (!queue) {
|
|
72
|
-
queue = new Map();
|
|
73
|
-
queuesByAgent.set(agentId, queue);
|
|
74
|
-
}
|
|
75
|
-
return queue;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
function pruneDeliveryStats(now = Date.now()) {
|
|
79
|
-
while (deliveryLatencies.length && deliveryLatencies[0].at < now - 300000) {
|
|
80
|
-
deliveryLatencies.shift();
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
function upsertRuntimeTopics(agentId, topics, { replace = true } = {}) {
|
|
85
|
-
const normalized = uniqueStrings(topics);
|
|
86
|
-
const current = replace ? new Set() : new Set(runtimeTopics.get(agentId) || []);
|
|
87
|
-
for (const topic of normalized) current.add(topic);
|
|
88
|
-
runtimeTopics.set(agentId, current);
|
|
89
|
-
store.updateAgentTopics(agentId, Array.from(current));
|
|
90
|
-
return Array.from(current);
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
function listRuntimeTopics(agentId) {
|
|
94
|
-
return normalizeAgentTopics(store, agentId, runtimeTopics.get(agentId));
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
function trackMessage(message, recipients) {
|
|
98
|
-
liveMessages.set(message.id, {
|
|
99
|
-
message,
|
|
100
|
-
recipients: new Set(recipients),
|
|
101
|
-
ackedBy: new Set(),
|
|
102
|
-
});
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
function getMessageRecord(messageId) {
|
|
106
|
-
return liveMessages.get(messageId) || null;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
function removeMessage(messageId) {
|
|
110
|
-
const record = liveMessages.get(messageId);
|
|
111
|
-
if (!record) return;
|
|
112
|
-
for (const agentId of record.recipients) {
|
|
113
|
-
queuesByAgent.get(agentId)?.delete(messageId);
|
|
114
|
-
}
|
|
115
|
-
liveMessages.delete(messageId);
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
function queueMessage(agentId, message) {
|
|
119
|
-
const queue = ensureAgentQueue(agentId);
|
|
120
|
-
queue.set(message.id, {
|
|
121
|
-
message,
|
|
122
|
-
attempts: 0,
|
|
123
|
-
delivered_at_ms: null,
|
|
124
|
-
acked_at_ms: null,
|
|
125
|
-
});
|
|
126
|
-
deliveryEmitter.emit('message', agentId, message);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
function resolveRecipients(msg) {
|
|
130
|
-
const to = msg.to_agent ?? msg.to;
|
|
131
|
-
if (!to?.startsWith('topic:')) {
|
|
132
|
-
return [to];
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
const topic = to.slice(6);
|
|
136
|
-
const recipients = new Set();
|
|
137
|
-
for (const [agentId, topics] of runtimeTopics) {
|
|
138
|
-
if (topics.has(topic)) recipients.add(agentId);
|
|
139
|
-
}
|
|
140
|
-
for (const agent of store.getAgentsByTopic(topic)) {
|
|
141
|
-
recipients.add(agent.agent_id);
|
|
142
|
-
}
|
|
143
|
-
return Array.from(recipients);
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
function sortedPending(agentId, { max_messages = 20, include_topics = null } = {}) {
|
|
147
|
-
const queue = ensureAgentQueue(agentId);
|
|
148
|
-
const topicFilter = include_topics?.length ? new Set(include_topics) : null;
|
|
149
|
-
const now = Date.now();
|
|
150
|
-
const pending = [];
|
|
151
|
-
|
|
152
|
-
for (const delivery of queue.values()) {
|
|
153
|
-
const { message } = delivery;
|
|
154
|
-
if (delivery.acked_at_ms) continue;
|
|
155
|
-
if (message.expires_at_ms <= now) continue;
|
|
156
|
-
if (topicFilter && !topicFilter.has(message.topic)) continue;
|
|
157
|
-
pending.push(message);
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
pending.sort((a, b) => {
|
|
161
|
-
if (b.priority !== a.priority) return b.priority - a.priority;
|
|
162
|
-
return a.created_at_ms - b.created_at_ms;
|
|
163
|
-
});
|
|
164
|
-
return pending.slice(0, max_messages);
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
function markDelivered(agentId, messageId) {
|
|
168
|
-
const delivery = queuesByAgent.get(agentId)?.get(messageId);
|
|
169
|
-
const record = getMessageRecord(messageId);
|
|
170
|
-
if (!delivery || !record) return false;
|
|
171
|
-
|
|
172
|
-
delivery.attempts += 1;
|
|
173
|
-
if (!delivery.delivered_at_ms) {
|
|
174
|
-
delivery.delivered_at_ms = Date.now();
|
|
175
|
-
record.message.status = 'delivered';
|
|
176
|
-
store.updateMessageStatus(messageId, 'delivered');
|
|
177
|
-
deliveryLatencies.push({
|
|
178
|
-
at: delivery.delivered_at_ms,
|
|
179
|
-
ms: delivery.delivered_at_ms - record.message.created_at_ms,
|
|
180
|
-
});
|
|
181
|
-
pruneDeliveryStats(delivery.delivered_at_ms);
|
|
182
|
-
return true;
|
|
183
|
-
}
|
|
184
|
-
return false;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
function ackMessages(ids, agentId) {
|
|
188
|
-
const now = Date.now();
|
|
189
|
-
let count = 0;
|
|
190
|
-
|
|
191
|
-
for (const id of ids || []) {
|
|
192
|
-
const delivery = queuesByAgent.get(agentId)?.get(id);
|
|
193
|
-
const record = getMessageRecord(id);
|
|
194
|
-
if (!delivery || !record || delivery.acked_at_ms) continue;
|
|
195
|
-
|
|
196
|
-
delivery.acked_at_ms = now;
|
|
197
|
-
record.ackedBy.add(agentId);
|
|
198
|
-
count += 1;
|
|
199
|
-
|
|
200
|
-
if (record.ackedBy.size >= record.recipients.size) {
|
|
201
|
-
record.message.status = 'acked';
|
|
202
|
-
store.updateMessageStatus(id, 'acked');
|
|
203
|
-
removeMessage(id);
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
return count;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
function dispatchMessage({ type, from, to, topic, priority = 5, ttl_ms = 300000, payload = {}, trace_id, correlation_id }) {
|
|
211
|
-
const msg = store.auditLog({
|
|
212
|
-
type,
|
|
213
|
-
from,
|
|
214
|
-
to,
|
|
215
|
-
topic,
|
|
216
|
-
priority,
|
|
217
|
-
ttl_ms,
|
|
218
|
-
payload,
|
|
219
|
-
trace_id,
|
|
220
|
-
correlation_id,
|
|
221
|
-
});
|
|
222
|
-
const recipients = uniqueStrings(resolveRecipients(msg));
|
|
223
|
-
if (recipients.length) {
|
|
224
|
-
trackMessage(msg, recipients);
|
|
225
|
-
for (const agentId of recipients) {
|
|
226
|
-
queueMessage(agentId, msg);
|
|
227
|
-
}
|
|
228
|
-
msg.status = 'delivered';
|
|
229
|
-
store.updateMessageStatus(msg.id, 'delivered');
|
|
230
|
-
}
|
|
231
|
-
if (msg.type === 'response') {
|
|
232
|
-
responseEmitter.emit(msg.correlation_id, msg.payload);
|
|
233
|
-
}
|
|
234
|
-
return { msg, recipients };
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
function buildAssignSnapshot(job, extra = {}) {
|
|
238
|
-
if (!job) return null;
|
|
239
|
-
return {
|
|
240
|
-
job_id: job.job_id,
|
|
241
|
-
supervisor_agent: job.supervisor_agent,
|
|
242
|
-
worker_agent: job.worker_agent,
|
|
243
|
-
topic: job.topic,
|
|
244
|
-
task: job.task,
|
|
245
|
-
status: job.status,
|
|
246
|
-
attempt: job.attempt,
|
|
247
|
-
retry_count: job.retry_count,
|
|
248
|
-
max_retries: job.max_retries,
|
|
249
|
-
timeout_ms: job.timeout_ms,
|
|
250
|
-
deadline_ms: job.deadline_ms,
|
|
251
|
-
trace_id: job.trace_id,
|
|
252
|
-
correlation_id: job.correlation_id,
|
|
253
|
-
last_message_id: job.last_message_id,
|
|
254
|
-
result: job.result,
|
|
255
|
-
error: job.error,
|
|
256
|
-
updated_at_ms: job.updated_at_ms,
|
|
257
|
-
completed_at_ms: job.completed_at_ms,
|
|
258
|
-
...extra,
|
|
259
|
-
};
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
function notifyAssignSupervisor(job, event, extra = {}) {
|
|
263
|
-
if (!job?.supervisor_agent) return null;
|
|
264
|
-
const { msg } = dispatchMessage({
|
|
265
|
-
type: 'event',
|
|
266
|
-
from: job.worker_agent || 'assign-router',
|
|
267
|
-
to: job.supervisor_agent,
|
|
268
|
-
topic: 'assign.result',
|
|
269
|
-
priority: Math.max(5, job.priority || 5),
|
|
270
|
-
ttl_ms: job.ttl_ms || job.timeout_ms || 600000,
|
|
271
|
-
payload: {
|
|
272
|
-
event,
|
|
273
|
-
...buildAssignSnapshot(job),
|
|
274
|
-
...extra,
|
|
275
|
-
},
|
|
276
|
-
trace_id: job.trace_id,
|
|
277
|
-
correlation_id: job.correlation_id,
|
|
278
|
-
});
|
|
279
|
-
return msg;
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
function dispatchAssignJob(job, reason = 'dispatch') {
|
|
283
|
-
const { msg, recipients } = dispatchMessage({
|
|
284
|
-
type: 'handoff',
|
|
285
|
-
from: job.supervisor_agent,
|
|
286
|
-
to: job.worker_agent,
|
|
287
|
-
topic: job.topic || 'assign.job',
|
|
288
|
-
priority: job.priority || 5,
|
|
289
|
-
ttl_ms: job.ttl_ms || job.timeout_ms || 600000,
|
|
290
|
-
payload: {
|
|
291
|
-
kind: 'assign.job',
|
|
292
|
-
reason,
|
|
293
|
-
assign_job_id: job.job_id,
|
|
294
|
-
attempt: job.attempt,
|
|
295
|
-
retry_count: job.retry_count,
|
|
296
|
-
max_retries: job.max_retries,
|
|
297
|
-
timeout_ms: job.timeout_ms,
|
|
298
|
-
supervisor_agent: job.supervisor_agent,
|
|
299
|
-
worker_agent: job.worker_agent,
|
|
300
|
-
task: job.task,
|
|
301
|
-
payload: job.payload || {},
|
|
302
|
-
},
|
|
303
|
-
trace_id: job.trace_id,
|
|
304
|
-
correlation_id: job.correlation_id,
|
|
305
|
-
});
|
|
306
|
-
|
|
307
|
-
const updated = store.updateAssignStatus(job.job_id, job.status, {
|
|
308
|
-
last_message_id: msg.id,
|
|
309
|
-
});
|
|
310
|
-
return { job: updated || job, recipients, message_id: msg.id };
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
function scheduleAssignRetry(job, reason, error = null, requested_by = 'system') {
|
|
314
|
-
if (!job) {
|
|
315
|
-
return { ok: false, error: { code: 'ASSIGN_NOT_FOUND', message: 'assign job not found' } };
|
|
316
|
-
}
|
|
317
|
-
if (job.retry_count >= job.max_retries) {
|
|
318
|
-
return {
|
|
319
|
-
ok: false,
|
|
320
|
-
error: {
|
|
321
|
-
code: 'ASSIGN_RETRY_EXHAUSTED',
|
|
322
|
-
message: `retry exhausted for ${job.job_id}`,
|
|
323
|
-
},
|
|
324
|
-
};
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
const queued = store.retryAssign(job.job_id, {
|
|
328
|
-
error,
|
|
329
|
-
timeout_ms: job.timeout_ms,
|
|
330
|
-
ttl_ms: job.ttl_ms,
|
|
331
|
-
});
|
|
332
|
-
const dispatched = dispatchAssignJob(queued, 'retry');
|
|
333
|
-
notifyAssignSupervisor(dispatched.job, 'retry_scheduled', {
|
|
334
|
-
retry_reason: reason,
|
|
335
|
-
requested_by,
|
|
336
|
-
});
|
|
337
|
-
return {
|
|
338
|
-
ok: true,
|
|
339
|
-
data: {
|
|
340
|
-
retried: true,
|
|
341
|
-
...buildAssignSnapshot(dispatched.job, {
|
|
342
|
-
retry_reason: reason,
|
|
343
|
-
requested_by,
|
|
344
|
-
}),
|
|
345
|
-
},
|
|
346
|
-
};
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
function handleAssignTimeout(job) {
|
|
350
|
-
const timedOut = store.updateAssignStatus(job.job_id, 'timed_out', {
|
|
351
|
-
error: job.error ?? { message: 'assign job timed out' },
|
|
352
|
-
});
|
|
353
|
-
|
|
354
|
-
if (timedOut.retry_count < timedOut.max_retries) {
|
|
355
|
-
return scheduleAssignRetry(timedOut, 'timed_out', timedOut.error, 'sweeper');
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
notifyAssignSupervisor(timedOut, 'completed', {
|
|
359
|
-
completion_reason: 'timed_out',
|
|
360
|
-
});
|
|
361
|
-
return { ok: true, data: buildAssignSnapshot(timedOut, { completion_reason: 'timed_out' }) };
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
const router = {
|
|
365
|
-
responseEmitter,
|
|
366
|
-
deliveryEmitter,
|
|
367
|
-
|
|
368
|
-
registerAgent(args) {
|
|
369
|
-
const result = store.registerAgent(args);
|
|
370
|
-
upsertRuntimeTopics(args.agent_id, args.topics || [], { replace: true });
|
|
371
|
-
return result;
|
|
372
|
-
},
|
|
373
|
-
|
|
374
|
-
refreshAgentLease(agentId, ttlMs = 30000) {
|
|
375
|
-
return store.refreshLease(agentId, ttlMs);
|
|
376
|
-
},
|
|
377
|
-
|
|
378
|
-
subscribeAgent(agentId, topics, { replace = false } = {}) {
|
|
379
|
-
const nextTopics = upsertRuntimeTopics(agentId, topics, { replace });
|
|
380
|
-
return { agent_id: agentId, topics: nextTopics };
|
|
381
|
-
},
|
|
382
|
-
|
|
383
|
-
getSubscribedTopics(agentId) {
|
|
384
|
-
return listRuntimeTopics(agentId);
|
|
385
|
-
},
|
|
386
|
-
|
|
387
|
-
updateAgentStatus(agentId, status) {
|
|
388
|
-
if (status === 'offline') {
|
|
389
|
-
runtimeTopics.delete(agentId);
|
|
390
|
-
}
|
|
391
|
-
return store.updateAgentStatus(agentId, status);
|
|
392
|
-
},
|
|
393
|
-
|
|
394
|
-
route(msg) {
|
|
395
|
-
const recipients = uniqueStrings(resolveRecipients(msg));
|
|
396
|
-
if (!recipients.length) return 0;
|
|
397
|
-
if (!getMessageRecord(msg.id)) {
|
|
398
|
-
trackMessage(msg, recipients);
|
|
399
|
-
}
|
|
400
|
-
for (const agentId of recipients) {
|
|
401
|
-
queueMessage(agentId, msg);
|
|
402
|
-
}
|
|
403
|
-
store.updateMessageStatus(msg.id, 'delivered');
|
|
404
|
-
return recipients.length;
|
|
405
|
-
},
|
|
406
|
-
|
|
407
|
-
getPendingMessages(agentId, options = {}) {
|
|
408
|
-
return sortedPending(agentId, options);
|
|
409
|
-
},
|
|
410
|
-
|
|
411
|
-
markMessagePushed(agentId, messageId) {
|
|
412
|
-
return markDelivered(agentId, messageId);
|
|
413
|
-
},
|
|
414
|
-
|
|
415
|
-
drainAgent(agentId, { max_messages = 20, include_topics = null, auto_ack = false } = {}) {
|
|
416
|
-
const messages = sortedPending(agentId, { max_messages, include_topics });
|
|
417
|
-
for (const message of messages) {
|
|
418
|
-
markDelivered(agentId, message.id);
|
|
419
|
-
}
|
|
420
|
-
if (auto_ack && messages.length) {
|
|
421
|
-
ackMessages(messages.map((message) => message.id), agentId);
|
|
422
|
-
}
|
|
423
|
-
return messages;
|
|
424
|
-
},
|
|
425
|
-
|
|
426
|
-
ackMessages(ids, agentId) {
|
|
427
|
-
return ackMessages(ids, agentId);
|
|
428
|
-
},
|
|
429
|
-
|
|
430
|
-
async handleAsk({
|
|
431
|
-
from, to, topic, question, context_refs,
|
|
432
|
-
payload = {}, priority = 5, ttl_ms = 300000,
|
|
433
|
-
await_response_ms = 0, trace_id, correlation_id,
|
|
434
|
-
}) {
|
|
435
|
-
const cid = correlation_id || uuidv7();
|
|
436
|
-
const tid = trace_id || uuidv7();
|
|
437
|
-
|
|
438
|
-
const { msg } = dispatchMessage({
|
|
439
|
-
type: 'request',
|
|
440
|
-
from,
|
|
441
|
-
to,
|
|
442
|
-
topic,
|
|
443
|
-
priority,
|
|
444
|
-
ttl_ms,
|
|
445
|
-
payload: { question, context_refs, ...payload },
|
|
446
|
-
correlation_id: cid,
|
|
447
|
-
trace_id: tid,
|
|
448
|
-
});
|
|
449
|
-
|
|
450
|
-
if (await_response_ms <= 0) {
|
|
451
|
-
return {
|
|
452
|
-
ok: true,
|
|
453
|
-
data: { request_message_id: msg.id, correlation_id: cid, trace_id: tid, state: 'queued' },
|
|
454
|
-
};
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
try {
|
|
458
|
-
const [response] = await once(responseEmitter, cid, {
|
|
459
|
-
signal: AbortSignal.timeout(Math.min(await_response_ms, 30000)),
|
|
460
|
-
});
|
|
461
|
-
return {
|
|
462
|
-
ok: true,
|
|
463
|
-
data: { request_message_id: msg.id, correlation_id: cid, trace_id: tid, state: 'answered', response },
|
|
464
|
-
};
|
|
465
|
-
} catch {
|
|
466
|
-
const resp = store.getResponseByCorrelation(cid);
|
|
467
|
-
if (resp) {
|
|
468
|
-
return {
|
|
469
|
-
ok: true,
|
|
470
|
-
data: { request_message_id: msg.id, correlation_id: cid, trace_id: tid, state: 'answered', response: resp.payload },
|
|
471
|
-
};
|
|
472
|
-
}
|
|
473
|
-
return {
|
|
474
|
-
ok: true,
|
|
475
|
-
data: { request_message_id: msg.id, correlation_id: cid, trace_id: tid, state: 'delivered' },
|
|
476
|
-
};
|
|
477
|
-
}
|
|
478
|
-
},
|
|
479
|
-
|
|
480
|
-
handlePublish({
|
|
481
|
-
from, to, topic, priority = 5, ttl_ms = 300000,
|
|
482
|
-
payload = {}, trace_id, correlation_id, message_type,
|
|
483
|
-
}) {
|
|
484
|
-
const type = message_type || (correlation_id ? 'response' : 'event');
|
|
485
|
-
const { msg, recipients } = dispatchMessage({
|
|
486
|
-
type,
|
|
487
|
-
from,
|
|
488
|
-
to,
|
|
489
|
-
topic,
|
|
490
|
-
priority,
|
|
491
|
-
ttl_ms,
|
|
492
|
-
payload,
|
|
493
|
-
trace_id: trace_id || uuidv7(),
|
|
494
|
-
correlation_id: correlation_id || uuidv7(),
|
|
495
|
-
});
|
|
496
|
-
return {
|
|
497
|
-
ok: true,
|
|
498
|
-
data: {
|
|
499
|
-
message_id: msg.id,
|
|
500
|
-
fanout_count: recipients.length,
|
|
501
|
-
expires_at_ms: msg.expires_at_ms,
|
|
502
|
-
},
|
|
503
|
-
};
|
|
504
|
-
},
|
|
505
|
-
|
|
506
|
-
handleHandoff({
|
|
507
|
-
from, to, topic, task, acceptance_criteria, context_refs,
|
|
508
|
-
priority = 5, ttl_ms = 600000, trace_id, correlation_id,
|
|
509
|
-
}) {
|
|
510
|
-
const { msg } = dispatchMessage({
|
|
511
|
-
type: 'handoff',
|
|
512
|
-
from,
|
|
513
|
-
to,
|
|
514
|
-
topic,
|
|
515
|
-
priority,
|
|
516
|
-
ttl_ms,
|
|
517
|
-
payload: { task, acceptance_criteria, context_refs },
|
|
518
|
-
trace_id: trace_id || uuidv7(),
|
|
519
|
-
correlation_id: correlation_id || uuidv7(),
|
|
520
|
-
});
|
|
521
|
-
return {
|
|
522
|
-
ok: true,
|
|
523
|
-
data: { handoff_message_id: msg.id, state: 'queued', assigned_to: to },
|
|
524
|
-
};
|
|
525
|
-
},
|
|
526
|
-
|
|
527
|
-
assignAsync({
|
|
528
|
-
supervisor_agent,
|
|
529
|
-
worker_agent,
|
|
530
|
-
topic = 'assign.job',
|
|
531
|
-
task = '',
|
|
532
|
-
payload = {},
|
|
533
|
-
priority = 5,
|
|
534
|
-
ttl_ms = 600000,
|
|
535
|
-
timeout_ms = 600000,
|
|
536
|
-
max_retries = 0,
|
|
537
|
-
trace_id,
|
|
538
|
-
correlation_id,
|
|
539
|
-
}) {
|
|
540
|
-
const job = store.createAssign({
|
|
541
|
-
supervisor_agent,
|
|
542
|
-
worker_agent,
|
|
543
|
-
topic,
|
|
544
|
-
task,
|
|
545
|
-
payload,
|
|
546
|
-
priority,
|
|
547
|
-
ttl_ms,
|
|
548
|
-
timeout_ms,
|
|
549
|
-
max_retries,
|
|
550
|
-
trace_id,
|
|
551
|
-
correlation_id,
|
|
552
|
-
});
|
|
553
|
-
const dispatched = dispatchAssignJob(job, 'create');
|
|
554
|
-
return {
|
|
555
|
-
ok: true,
|
|
556
|
-
data: {
|
|
557
|
-
assigned_to: worker_agent,
|
|
558
|
-
...buildAssignSnapshot(dispatched.job),
|
|
559
|
-
},
|
|
560
|
-
};
|
|
561
|
-
},
|
|
562
|
-
|
|
563
|
-
reportAssignResult({
|
|
564
|
-
job_id,
|
|
565
|
-
worker_agent,
|
|
566
|
-
status,
|
|
567
|
-
attempt,
|
|
568
|
-
result,
|
|
569
|
-
error,
|
|
570
|
-
payload = {},
|
|
571
|
-
metadata = {},
|
|
572
|
-
}) {
|
|
573
|
-
const job = store.getAssign(job_id);
|
|
574
|
-
if (!job) {
|
|
575
|
-
return {
|
|
576
|
-
ok: false,
|
|
577
|
-
error: { code: 'ASSIGN_NOT_FOUND', message: `assign job not found: ${job_id}` },
|
|
578
|
-
};
|
|
579
|
-
}
|
|
580
|
-
if (worker_agent && worker_agent !== job.worker_agent) {
|
|
581
|
-
return {
|
|
582
|
-
ok: false,
|
|
583
|
-
error: { code: 'ASSIGN_WORKER_MISMATCH', message: `worker mismatch: ${worker_agent}` },
|
|
584
|
-
};
|
|
585
|
-
}
|
|
586
|
-
if (Number.isFinite(Number(attempt)) && Number(attempt) !== job.attempt) {
|
|
587
|
-
return {
|
|
588
|
-
ok: false,
|
|
589
|
-
error: {
|
|
590
|
-
code: 'ASSIGN_ATTEMPT_MISMATCH',
|
|
591
|
-
message: `stale assign result for attempt ${attempt} (current ${job.attempt})`,
|
|
592
|
-
},
|
|
593
|
-
};
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
const mergedMetadata = {
|
|
597
|
-
...(payload?.metadata || {}),
|
|
598
|
-
...(metadata || {}),
|
|
599
|
-
};
|
|
600
|
-
const normalizedStatus = normalizeAssignTerminalStatus(
|
|
601
|
-
status || payload?.status,
|
|
602
|
-
mergedMetadata,
|
|
603
|
-
);
|
|
604
|
-
const nextResult = result ?? (Object.prototype.hasOwnProperty.call(payload || {}, 'result') ? payload.result : payload);
|
|
605
|
-
const nextError = error ?? payload?.error ?? null;
|
|
606
|
-
|
|
607
|
-
if (normalizedStatus === 'running') {
|
|
608
|
-
const running = store.updateAssignStatus(job.job_id, 'running', {
|
|
609
|
-
started_at_ms: job.started_at_ms || Date.now(),
|
|
610
|
-
deadline_ms: Date.now() + clampAssignDuration(job.timeout_ms, job.timeout_ms),
|
|
611
|
-
result: nextResult,
|
|
612
|
-
error: nextError,
|
|
613
|
-
});
|
|
614
|
-
notifyAssignSupervisor(running, 'progress');
|
|
615
|
-
return { ok: true, data: buildAssignSnapshot(running) };
|
|
616
|
-
}
|
|
617
|
-
|
|
618
|
-
const finalized = store.updateAssignStatus(job.job_id, normalizedStatus, {
|
|
619
|
-
result: nextResult,
|
|
620
|
-
error: nextError,
|
|
621
|
-
});
|
|
622
|
-
|
|
623
|
-
if ((normalizedStatus === 'failed' || normalizedStatus === 'timed_out')
|
|
624
|
-
&& finalized.retry_count < finalized.max_retries) {
|
|
625
|
-
return scheduleAssignRetry(finalized, normalizedStatus, nextError, worker_agent || finalized.worker_agent);
|
|
626
|
-
}
|
|
627
|
-
|
|
628
|
-
notifyAssignSupervisor(finalized, 'completed');
|
|
629
|
-
return { ok: true, data: buildAssignSnapshot(finalized) };
|
|
630
|
-
},
|
|
631
|
-
|
|
632
|
-
getAssignStatus({ job_id, ...filters } = {}) {
|
|
633
|
-
if (job_id) {
|
|
634
|
-
const job = store.getAssign(job_id);
|
|
635
|
-
return job
|
|
636
|
-
? { ok: true, data: buildAssignSnapshot(job) }
|
|
637
|
-
: { ok: false, error: { code: 'ASSIGN_NOT_FOUND', message: `assign job not found: ${job_id}` } };
|
|
638
|
-
}
|
|
639
|
-
return {
|
|
640
|
-
ok: true,
|
|
641
|
-
data: {
|
|
642
|
-
assigns: store.listAssigns(filters).map((job) => buildAssignSnapshot(job)),
|
|
643
|
-
},
|
|
644
|
-
};
|
|
645
|
-
},
|
|
646
|
-
|
|
647
|
-
retryAssign(job_id, { reason = 'manual', requested_by = 'manual' } = {}) {
|
|
648
|
-
const job = store.getAssign(job_id);
|
|
649
|
-
if (!job) {
|
|
650
|
-
return {
|
|
651
|
-
ok: false,
|
|
652
|
-
error: { code: 'ASSIGN_NOT_FOUND', message: `assign job not found: ${job_id}` },
|
|
653
|
-
};
|
|
654
|
-
}
|
|
655
|
-
return scheduleAssignRetry(job, reason, job.error, requested_by);
|
|
656
|
-
},
|
|
657
|
-
|
|
658
|
-
sweepExpired() {
|
|
659
|
-
const now = Date.now();
|
|
660
|
-
let expired = 0;
|
|
661
|
-
for (const [messageId, record] of Array.from(liveMessages.entries())) {
|
|
662
|
-
if (record.message.expires_at_ms > now) continue;
|
|
663
|
-
store.moveToDeadLetter(messageId, 'ttl_expired', null);
|
|
664
|
-
removeMessage(messageId);
|
|
665
|
-
expired += 1;
|
|
666
|
-
}
|
|
667
|
-
return { messages: expired };
|
|
668
|
-
},
|
|
669
|
-
|
|
670
|
-
sweepTimedOutAssigns() {
|
|
671
|
-
const expiredAssigns = store.listAssigns({
|
|
672
|
-
statuses: Array.from(ASSIGN_PENDING_STATUSES),
|
|
673
|
-
active_before_ms: Date.now(),
|
|
674
|
-
limit: 100,
|
|
675
|
-
});
|
|
676
|
-
let timed_out = 0;
|
|
677
|
-
let retried = 0;
|
|
678
|
-
|
|
679
|
-
for (const job of expiredAssigns) {
|
|
680
|
-
const result = handleAssignTimeout(job);
|
|
681
|
-
timed_out += 1;
|
|
682
|
-
if (result?.data?.retried) retried += 1;
|
|
683
|
-
}
|
|
684
|
-
|
|
685
|
-
return { timed_out, retried };
|
|
686
|
-
},
|
|
687
|
-
|
|
688
|
-
startSweeper() {
|
|
689
|
-
if (sweepTimer) return;
|
|
690
|
-
sweepTimer = setInterval(() => {
|
|
691
|
-
try {
|
|
692
|
-
router.sweepExpired();
|
|
693
|
-
router.sweepTimedOutAssigns();
|
|
694
|
-
} catch {}
|
|
695
|
-
}, 10000);
|
|
696
|
-
staleTimer = setInterval(() => {
|
|
697
|
-
try { store.sweepStaleAgents(); } catch {}
|
|
698
|
-
}, 120000);
|
|
699
|
-
sweepTimer.unref();
|
|
700
|
-
staleTimer.unref();
|
|
701
|
-
},
|
|
702
|
-
|
|
703
|
-
stopSweeper() {
|
|
704
|
-
if (sweepTimer) { clearInterval(sweepTimer); sweepTimer = null; }
|
|
705
|
-
if (staleTimer) { clearInterval(staleTimer); staleTimer = null; }
|
|
706
|
-
},
|
|
707
|
-
|
|
708
|
-
getQueueDepths() {
|
|
709
|
-
const counts = { urgent: 0, normal: 0, dlq: store.getAuditStats().dlq };
|
|
710
|
-
for (const record of liveMessages.values()) {
|
|
711
|
-
const pending = record.recipients.size > record.ackedBy.size;
|
|
712
|
-
if (!pending) continue;
|
|
713
|
-
if (record.message.priority >= 7) counts.urgent += 1;
|
|
714
|
-
else counts.normal += 1;
|
|
715
|
-
}
|
|
716
|
-
return counts;
|
|
717
|
-
},
|
|
718
|
-
|
|
719
|
-
getDeliveryStats() {
|
|
720
|
-
pruneDeliveryStats();
|
|
721
|
-
if (!deliveryLatencies.length) {
|
|
722
|
-
return { total_deliveries: 0, avg_delivery_ms: 0 };
|
|
723
|
-
}
|
|
724
|
-
const total = deliveryLatencies.reduce((sum, item) => sum + item.ms, 0);
|
|
725
|
-
return {
|
|
726
|
-
total_deliveries: deliveryLatencies.length,
|
|
727
|
-
avg_delivery_ms: Math.round(total / deliveryLatencies.length),
|
|
728
|
-
};
|
|
729
|
-
},
|
|
730
|
-
|
|
731
|
-
getStatus(scope = 'hub', { agent_id, trace_id, include_metrics = true } = {}) {
|
|
732
|
-
const data = {};
|
|
733
|
-
|
|
734
|
-
if (scope === 'hub' || scope === 'queue') {
|
|
735
|
-
data.hub = {
|
|
736
|
-
state: 'healthy',
|
|
737
|
-
uptime_ms: process.uptime() * 1000 | 0,
|
|
738
|
-
realtime_transport: 'named-pipe',
|
|
739
|
-
audit_store: 'sqlite',
|
|
740
|
-
};
|
|
741
|
-
if (include_metrics) {
|
|
742
|
-
const depths = router.getQueueDepths();
|
|
743
|
-
const stats = router.getDeliveryStats();
|
|
744
|
-
const auditStats = store.getAuditStats();
|
|
745
|
-
data.queues = {
|
|
746
|
-
urgent_depth: depths.urgent,
|
|
747
|
-
normal_depth: depths.normal,
|
|
748
|
-
dlq_depth: depths.dlq,
|
|
749
|
-
avg_delivery_ms: stats.avg_delivery_ms,
|
|
750
|
-
};
|
|
751
|
-
data.assigns = {
|
|
752
|
-
queued: auditStats.assign_queued,
|
|
753
|
-
running: auditStats.assign_running,
|
|
754
|
-
failed: auditStats.assign_failed,
|
|
755
|
-
timed_out: auditStats.assign_timed_out,
|
|
756
|
-
};
|
|
757
|
-
}
|
|
758
|
-
}
|
|
759
|
-
|
|
760
|
-
if (scope === 'agent' && agent_id) {
|
|
761
|
-
const agent = store.getAgent(agent_id);
|
|
762
|
-
if (agent) {
|
|
763
|
-
data.agent = {
|
|
764
|
-
agent_id: agent.agent_id,
|
|
765
|
-
status: agent.status,
|
|
766
|
-
pending: sortedPending(agent_id, { max_messages: 1000 }).length,
|
|
767
|
-
last_seen_ms: agent.last_seen_ms,
|
|
768
|
-
topics: listRuntimeTopics(agent_id),
|
|
769
|
-
};
|
|
770
|
-
}
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
if (scope === 'trace' && trace_id) {
|
|
774
|
-
data.trace = store.getMessagesByTrace(trace_id);
|
|
775
|
-
}
|
|
776
|
-
|
|
777
|
-
return { ok: true, data };
|
|
778
|
-
},
|
|
779
|
-
};
|
|
780
|
-
|
|
781
|
-
return router;
|
|
782
|
-
}
|
|
1
|
+
// hub/router.mjs — 실시간 라우팅/수신함 상태 관리자
|
|
2
|
+
// SQLite는 감사 로그만 담당하고, 실제 배달 상태는 메모리에서 관리한다.
|
|
3
|
+
import { EventEmitter, once } from 'node:events';
|
|
4
|
+
import { uuidv7 } from './store.mjs';
|
|
5
|
+
|
|
6
|
+
const ASSIGN_PENDING_STATUSES = new Set(['queued', 'running']);
|
|
7
|
+
|
|
8
|
+
function uniqueStrings(values = []) {
|
|
9
|
+
return Array.from(new Set((values || []).map((value) => String(value || '').trim()).filter(Boolean)));
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function clampAssignDuration(value, fallback = 600000, min = 1000, max = 86400000) {
|
|
13
|
+
const num = Number(value);
|
|
14
|
+
if (!Number.isFinite(num)) return fallback;
|
|
15
|
+
return Math.max(min, Math.min(Math.trunc(num), max));
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function normalizeAssignTerminalStatus(input, metadata = {}) {
|
|
19
|
+
const status = String(input || '').trim().toLowerCase();
|
|
20
|
+
const resultTag = String(
|
|
21
|
+
metadata?.result
|
|
22
|
+
?? metadata?.status
|
|
23
|
+
?? metadata?.outcome
|
|
24
|
+
?? '',
|
|
25
|
+
).trim().toLowerCase();
|
|
26
|
+
|
|
27
|
+
if (status === 'queued') return 'queued';
|
|
28
|
+
if (status === 'running' || status === 'in_progress') return 'running';
|
|
29
|
+
if (status === 'timed_out' || status === 'timeout') return 'timed_out';
|
|
30
|
+
if (status === 'failed' || status === 'error') return 'failed';
|
|
31
|
+
if (status === 'succeeded' || status === 'success') return 'succeeded';
|
|
32
|
+
|
|
33
|
+
if (status === 'completed') {
|
|
34
|
+
if (resultTag === 'failed' || resultTag === 'error') return 'failed';
|
|
35
|
+
if (resultTag === 'timed_out' || resultTag === 'timeout') return 'timed_out';
|
|
36
|
+
return 'succeeded';
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (resultTag === 'failed' || resultTag === 'error') return 'failed';
|
|
40
|
+
if (resultTag === 'timed_out' || resultTag === 'timeout') return 'timed_out';
|
|
41
|
+
if (resultTag === 'succeeded' || resultTag === 'success') return 'succeeded';
|
|
42
|
+
return 'succeeded';
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function normalizeAgentTopics(store, agentId, runtimeTopics) {
|
|
46
|
+
const topics = new Set(runtimeTopics || []);
|
|
47
|
+
const persisted = store.getAgent(agentId)?.topics || [];
|
|
48
|
+
for (const topic of persisted) topics.add(topic);
|
|
49
|
+
return Array.from(topics);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* 라우터 생성
|
|
54
|
+
* @param {object} store
|
|
55
|
+
*/
|
|
56
|
+
export function createRouter(store) {
|
|
57
|
+
let sweepTimer = null;
|
|
58
|
+
let staleTimer = null;
|
|
59
|
+
const responseEmitter = new EventEmitter();
|
|
60
|
+
const deliveryEmitter = new EventEmitter();
|
|
61
|
+
responseEmitter.setMaxListeners(200);
|
|
62
|
+
deliveryEmitter.setMaxListeners(200);
|
|
63
|
+
|
|
64
|
+
const runtimeTopics = new Map();
|
|
65
|
+
const queuesByAgent = new Map();
|
|
66
|
+
const liveMessages = new Map();
|
|
67
|
+
const deliveryLatencies = [];
|
|
68
|
+
|
|
69
|
+
function ensureAgentQueue(agentId) {
|
|
70
|
+
let queue = queuesByAgent.get(agentId);
|
|
71
|
+
if (!queue) {
|
|
72
|
+
queue = new Map();
|
|
73
|
+
queuesByAgent.set(agentId, queue);
|
|
74
|
+
}
|
|
75
|
+
return queue;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function pruneDeliveryStats(now = Date.now()) {
|
|
79
|
+
while (deliveryLatencies.length && deliveryLatencies[0].at < now - 300000) {
|
|
80
|
+
deliveryLatencies.shift();
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function upsertRuntimeTopics(agentId, topics, { replace = true } = {}) {
|
|
85
|
+
const normalized = uniqueStrings(topics);
|
|
86
|
+
const current = replace ? new Set() : new Set(runtimeTopics.get(agentId) || []);
|
|
87
|
+
for (const topic of normalized) current.add(topic);
|
|
88
|
+
runtimeTopics.set(agentId, current);
|
|
89
|
+
store.updateAgentTopics(agentId, Array.from(current));
|
|
90
|
+
return Array.from(current);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function listRuntimeTopics(agentId) {
|
|
94
|
+
return normalizeAgentTopics(store, agentId, runtimeTopics.get(agentId));
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function trackMessage(message, recipients) {
|
|
98
|
+
liveMessages.set(message.id, {
|
|
99
|
+
message,
|
|
100
|
+
recipients: new Set(recipients),
|
|
101
|
+
ackedBy: new Set(),
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function getMessageRecord(messageId) {
|
|
106
|
+
return liveMessages.get(messageId) || null;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function removeMessage(messageId) {
|
|
110
|
+
const record = liveMessages.get(messageId);
|
|
111
|
+
if (!record) return;
|
|
112
|
+
for (const agentId of record.recipients) {
|
|
113
|
+
queuesByAgent.get(agentId)?.delete(messageId);
|
|
114
|
+
}
|
|
115
|
+
liveMessages.delete(messageId);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function queueMessage(agentId, message) {
|
|
119
|
+
const queue = ensureAgentQueue(agentId);
|
|
120
|
+
queue.set(message.id, {
|
|
121
|
+
message,
|
|
122
|
+
attempts: 0,
|
|
123
|
+
delivered_at_ms: null,
|
|
124
|
+
acked_at_ms: null,
|
|
125
|
+
});
|
|
126
|
+
deliveryEmitter.emit('message', agentId, message);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function resolveRecipients(msg) {
|
|
130
|
+
const to = msg.to_agent ?? msg.to;
|
|
131
|
+
if (!to?.startsWith('topic:')) {
|
|
132
|
+
return [to];
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const topic = to.slice(6);
|
|
136
|
+
const recipients = new Set();
|
|
137
|
+
for (const [agentId, topics] of runtimeTopics) {
|
|
138
|
+
if (topics.has(topic)) recipients.add(agentId);
|
|
139
|
+
}
|
|
140
|
+
for (const agent of store.getAgentsByTopic(topic)) {
|
|
141
|
+
recipients.add(agent.agent_id);
|
|
142
|
+
}
|
|
143
|
+
return Array.from(recipients);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function sortedPending(agentId, { max_messages = 20, include_topics = null } = {}) {
|
|
147
|
+
const queue = ensureAgentQueue(agentId);
|
|
148
|
+
const topicFilter = include_topics?.length ? new Set(include_topics) : null;
|
|
149
|
+
const now = Date.now();
|
|
150
|
+
const pending = [];
|
|
151
|
+
|
|
152
|
+
for (const delivery of queue.values()) {
|
|
153
|
+
const { message } = delivery;
|
|
154
|
+
if (delivery.acked_at_ms) continue;
|
|
155
|
+
if (message.expires_at_ms <= now) continue;
|
|
156
|
+
if (topicFilter && !topicFilter.has(message.topic)) continue;
|
|
157
|
+
pending.push(message);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
pending.sort((a, b) => {
|
|
161
|
+
if (b.priority !== a.priority) return b.priority - a.priority;
|
|
162
|
+
return a.created_at_ms - b.created_at_ms;
|
|
163
|
+
});
|
|
164
|
+
return pending.slice(0, max_messages);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function markDelivered(agentId, messageId) {
|
|
168
|
+
const delivery = queuesByAgent.get(agentId)?.get(messageId);
|
|
169
|
+
const record = getMessageRecord(messageId);
|
|
170
|
+
if (!delivery || !record) return false;
|
|
171
|
+
|
|
172
|
+
delivery.attempts += 1;
|
|
173
|
+
if (!delivery.delivered_at_ms) {
|
|
174
|
+
delivery.delivered_at_ms = Date.now();
|
|
175
|
+
record.message.status = 'delivered';
|
|
176
|
+
store.updateMessageStatus(messageId, 'delivered');
|
|
177
|
+
deliveryLatencies.push({
|
|
178
|
+
at: delivery.delivered_at_ms,
|
|
179
|
+
ms: delivery.delivered_at_ms - record.message.created_at_ms,
|
|
180
|
+
});
|
|
181
|
+
pruneDeliveryStats(delivery.delivered_at_ms);
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
return false;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function ackMessages(ids, agentId) {
|
|
188
|
+
const now = Date.now();
|
|
189
|
+
let count = 0;
|
|
190
|
+
|
|
191
|
+
for (const id of ids || []) {
|
|
192
|
+
const delivery = queuesByAgent.get(agentId)?.get(id);
|
|
193
|
+
const record = getMessageRecord(id);
|
|
194
|
+
if (!delivery || !record || delivery.acked_at_ms) continue;
|
|
195
|
+
|
|
196
|
+
delivery.acked_at_ms = now;
|
|
197
|
+
record.ackedBy.add(agentId);
|
|
198
|
+
count += 1;
|
|
199
|
+
|
|
200
|
+
if (record.ackedBy.size >= record.recipients.size) {
|
|
201
|
+
record.message.status = 'acked';
|
|
202
|
+
store.updateMessageStatus(id, 'acked');
|
|
203
|
+
removeMessage(id);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return count;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function dispatchMessage({ type, from, to, topic, priority = 5, ttl_ms = 300000, payload = {}, trace_id, correlation_id }) {
|
|
211
|
+
const msg = store.auditLog({
|
|
212
|
+
type,
|
|
213
|
+
from,
|
|
214
|
+
to,
|
|
215
|
+
topic,
|
|
216
|
+
priority,
|
|
217
|
+
ttl_ms,
|
|
218
|
+
payload,
|
|
219
|
+
trace_id,
|
|
220
|
+
correlation_id,
|
|
221
|
+
});
|
|
222
|
+
const recipients = uniqueStrings(resolveRecipients(msg));
|
|
223
|
+
if (recipients.length) {
|
|
224
|
+
trackMessage(msg, recipients);
|
|
225
|
+
for (const agentId of recipients) {
|
|
226
|
+
queueMessage(agentId, msg);
|
|
227
|
+
}
|
|
228
|
+
msg.status = 'delivered';
|
|
229
|
+
store.updateMessageStatus(msg.id, 'delivered');
|
|
230
|
+
}
|
|
231
|
+
if (msg.type === 'response') {
|
|
232
|
+
responseEmitter.emit(msg.correlation_id, msg.payload);
|
|
233
|
+
}
|
|
234
|
+
return { msg, recipients };
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function buildAssignSnapshot(job, extra = {}) {
|
|
238
|
+
if (!job) return null;
|
|
239
|
+
return {
|
|
240
|
+
job_id: job.job_id,
|
|
241
|
+
supervisor_agent: job.supervisor_agent,
|
|
242
|
+
worker_agent: job.worker_agent,
|
|
243
|
+
topic: job.topic,
|
|
244
|
+
task: job.task,
|
|
245
|
+
status: job.status,
|
|
246
|
+
attempt: job.attempt,
|
|
247
|
+
retry_count: job.retry_count,
|
|
248
|
+
max_retries: job.max_retries,
|
|
249
|
+
timeout_ms: job.timeout_ms,
|
|
250
|
+
deadline_ms: job.deadline_ms,
|
|
251
|
+
trace_id: job.trace_id,
|
|
252
|
+
correlation_id: job.correlation_id,
|
|
253
|
+
last_message_id: job.last_message_id,
|
|
254
|
+
result: job.result,
|
|
255
|
+
error: job.error,
|
|
256
|
+
updated_at_ms: job.updated_at_ms,
|
|
257
|
+
completed_at_ms: job.completed_at_ms,
|
|
258
|
+
...extra,
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function notifyAssignSupervisor(job, event, extra = {}) {
|
|
263
|
+
if (!job?.supervisor_agent) return null;
|
|
264
|
+
const { msg } = dispatchMessage({
|
|
265
|
+
type: 'event',
|
|
266
|
+
from: job.worker_agent || 'assign-router',
|
|
267
|
+
to: job.supervisor_agent,
|
|
268
|
+
topic: 'assign.result',
|
|
269
|
+
priority: Math.max(5, job.priority || 5),
|
|
270
|
+
ttl_ms: job.ttl_ms || job.timeout_ms || 600000,
|
|
271
|
+
payload: {
|
|
272
|
+
event,
|
|
273
|
+
...buildAssignSnapshot(job),
|
|
274
|
+
...extra,
|
|
275
|
+
},
|
|
276
|
+
trace_id: job.trace_id,
|
|
277
|
+
correlation_id: job.correlation_id,
|
|
278
|
+
});
|
|
279
|
+
return msg;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function dispatchAssignJob(job, reason = 'dispatch') {
|
|
283
|
+
const { msg, recipients } = dispatchMessage({
|
|
284
|
+
type: 'handoff',
|
|
285
|
+
from: job.supervisor_agent,
|
|
286
|
+
to: job.worker_agent,
|
|
287
|
+
topic: job.topic || 'assign.job',
|
|
288
|
+
priority: job.priority || 5,
|
|
289
|
+
ttl_ms: job.ttl_ms || job.timeout_ms || 600000,
|
|
290
|
+
payload: {
|
|
291
|
+
kind: 'assign.job',
|
|
292
|
+
reason,
|
|
293
|
+
assign_job_id: job.job_id,
|
|
294
|
+
attempt: job.attempt,
|
|
295
|
+
retry_count: job.retry_count,
|
|
296
|
+
max_retries: job.max_retries,
|
|
297
|
+
timeout_ms: job.timeout_ms,
|
|
298
|
+
supervisor_agent: job.supervisor_agent,
|
|
299
|
+
worker_agent: job.worker_agent,
|
|
300
|
+
task: job.task,
|
|
301
|
+
payload: job.payload || {},
|
|
302
|
+
},
|
|
303
|
+
trace_id: job.trace_id,
|
|
304
|
+
correlation_id: job.correlation_id,
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
const updated = store.updateAssignStatus(job.job_id, job.status, {
|
|
308
|
+
last_message_id: msg.id,
|
|
309
|
+
});
|
|
310
|
+
return { job: updated || job, recipients, message_id: msg.id };
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
function scheduleAssignRetry(job, reason, error = null, requested_by = 'system') {
|
|
314
|
+
if (!job) {
|
|
315
|
+
return { ok: false, error: { code: 'ASSIGN_NOT_FOUND', message: 'assign job not found' } };
|
|
316
|
+
}
|
|
317
|
+
if (job.retry_count >= job.max_retries) {
|
|
318
|
+
return {
|
|
319
|
+
ok: false,
|
|
320
|
+
error: {
|
|
321
|
+
code: 'ASSIGN_RETRY_EXHAUSTED',
|
|
322
|
+
message: `retry exhausted for ${job.job_id}`,
|
|
323
|
+
},
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
const queued = store.retryAssign(job.job_id, {
|
|
328
|
+
error,
|
|
329
|
+
timeout_ms: job.timeout_ms,
|
|
330
|
+
ttl_ms: job.ttl_ms,
|
|
331
|
+
});
|
|
332
|
+
const dispatched = dispatchAssignJob(queued, 'retry');
|
|
333
|
+
notifyAssignSupervisor(dispatched.job, 'retry_scheduled', {
|
|
334
|
+
retry_reason: reason,
|
|
335
|
+
requested_by,
|
|
336
|
+
});
|
|
337
|
+
return {
|
|
338
|
+
ok: true,
|
|
339
|
+
data: {
|
|
340
|
+
retried: true,
|
|
341
|
+
...buildAssignSnapshot(dispatched.job, {
|
|
342
|
+
retry_reason: reason,
|
|
343
|
+
requested_by,
|
|
344
|
+
}),
|
|
345
|
+
},
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
function handleAssignTimeout(job) {
|
|
350
|
+
const timedOut = store.updateAssignStatus(job.job_id, 'timed_out', {
|
|
351
|
+
error: job.error ?? { message: 'assign job timed out' },
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
if (timedOut.retry_count < timedOut.max_retries) {
|
|
355
|
+
return scheduleAssignRetry(timedOut, 'timed_out', timedOut.error, 'sweeper');
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
notifyAssignSupervisor(timedOut, 'completed', {
|
|
359
|
+
completion_reason: 'timed_out',
|
|
360
|
+
});
|
|
361
|
+
return { ok: true, data: buildAssignSnapshot(timedOut, { completion_reason: 'timed_out' }) };
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const router = {
|
|
365
|
+
responseEmitter,
|
|
366
|
+
deliveryEmitter,
|
|
367
|
+
|
|
368
|
+
registerAgent(args) {
|
|
369
|
+
const result = store.registerAgent(args);
|
|
370
|
+
upsertRuntimeTopics(args.agent_id, args.topics || [], { replace: true });
|
|
371
|
+
return result;
|
|
372
|
+
},
|
|
373
|
+
|
|
374
|
+
refreshAgentLease(agentId, ttlMs = 30000) {
|
|
375
|
+
return store.refreshLease(agentId, ttlMs);
|
|
376
|
+
},
|
|
377
|
+
|
|
378
|
+
subscribeAgent(agentId, topics, { replace = false } = {}) {
|
|
379
|
+
const nextTopics = upsertRuntimeTopics(agentId, topics, { replace });
|
|
380
|
+
return { agent_id: agentId, topics: nextTopics };
|
|
381
|
+
},
|
|
382
|
+
|
|
383
|
+
getSubscribedTopics(agentId) {
|
|
384
|
+
return listRuntimeTopics(agentId);
|
|
385
|
+
},
|
|
386
|
+
|
|
387
|
+
updateAgentStatus(agentId, status) {
|
|
388
|
+
if (status === 'offline') {
|
|
389
|
+
runtimeTopics.delete(agentId);
|
|
390
|
+
}
|
|
391
|
+
return store.updateAgentStatus(agentId, status);
|
|
392
|
+
},
|
|
393
|
+
|
|
394
|
+
route(msg) {
|
|
395
|
+
const recipients = uniqueStrings(resolveRecipients(msg));
|
|
396
|
+
if (!recipients.length) return 0;
|
|
397
|
+
if (!getMessageRecord(msg.id)) {
|
|
398
|
+
trackMessage(msg, recipients);
|
|
399
|
+
}
|
|
400
|
+
for (const agentId of recipients) {
|
|
401
|
+
queueMessage(agentId, msg);
|
|
402
|
+
}
|
|
403
|
+
store.updateMessageStatus(msg.id, 'delivered');
|
|
404
|
+
return recipients.length;
|
|
405
|
+
},
|
|
406
|
+
|
|
407
|
+
getPendingMessages(agentId, options = {}) {
|
|
408
|
+
return sortedPending(agentId, options);
|
|
409
|
+
},
|
|
410
|
+
|
|
411
|
+
markMessagePushed(agentId, messageId) {
|
|
412
|
+
return markDelivered(agentId, messageId);
|
|
413
|
+
},
|
|
414
|
+
|
|
415
|
+
drainAgent(agentId, { max_messages = 20, include_topics = null, auto_ack = false } = {}) {
|
|
416
|
+
const messages = sortedPending(agentId, { max_messages, include_topics });
|
|
417
|
+
for (const message of messages) {
|
|
418
|
+
markDelivered(agentId, message.id);
|
|
419
|
+
}
|
|
420
|
+
if (auto_ack && messages.length) {
|
|
421
|
+
ackMessages(messages.map((message) => message.id), agentId);
|
|
422
|
+
}
|
|
423
|
+
return messages;
|
|
424
|
+
},
|
|
425
|
+
|
|
426
|
+
ackMessages(ids, agentId) {
|
|
427
|
+
return ackMessages(ids, agentId);
|
|
428
|
+
},
|
|
429
|
+
|
|
430
|
+
async handleAsk({
|
|
431
|
+
from, to, topic, question, context_refs,
|
|
432
|
+
payload = {}, priority = 5, ttl_ms = 300000,
|
|
433
|
+
await_response_ms = 0, trace_id, correlation_id,
|
|
434
|
+
}) {
|
|
435
|
+
const cid = correlation_id || uuidv7();
|
|
436
|
+
const tid = trace_id || uuidv7();
|
|
437
|
+
|
|
438
|
+
const { msg } = dispatchMessage({
|
|
439
|
+
type: 'request',
|
|
440
|
+
from,
|
|
441
|
+
to,
|
|
442
|
+
topic,
|
|
443
|
+
priority,
|
|
444
|
+
ttl_ms,
|
|
445
|
+
payload: { question, context_refs, ...payload },
|
|
446
|
+
correlation_id: cid,
|
|
447
|
+
trace_id: tid,
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
if (await_response_ms <= 0) {
|
|
451
|
+
return {
|
|
452
|
+
ok: true,
|
|
453
|
+
data: { request_message_id: msg.id, correlation_id: cid, trace_id: tid, state: 'queued' },
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
try {
|
|
458
|
+
const [response] = await once(responseEmitter, cid, {
|
|
459
|
+
signal: AbortSignal.timeout(Math.min(await_response_ms, 30000)),
|
|
460
|
+
});
|
|
461
|
+
return {
|
|
462
|
+
ok: true,
|
|
463
|
+
data: { request_message_id: msg.id, correlation_id: cid, trace_id: tid, state: 'answered', response },
|
|
464
|
+
};
|
|
465
|
+
} catch {
|
|
466
|
+
const resp = store.getResponseByCorrelation(cid);
|
|
467
|
+
if (resp) {
|
|
468
|
+
return {
|
|
469
|
+
ok: true,
|
|
470
|
+
data: { request_message_id: msg.id, correlation_id: cid, trace_id: tid, state: 'answered', response: resp.payload },
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
return {
|
|
474
|
+
ok: true,
|
|
475
|
+
data: { request_message_id: msg.id, correlation_id: cid, trace_id: tid, state: 'delivered' },
|
|
476
|
+
};
|
|
477
|
+
}
|
|
478
|
+
},
|
|
479
|
+
|
|
480
|
+
handlePublish({
|
|
481
|
+
from, to, topic, priority = 5, ttl_ms = 300000,
|
|
482
|
+
payload = {}, trace_id, correlation_id, message_type,
|
|
483
|
+
}) {
|
|
484
|
+
const type = message_type || (correlation_id ? 'response' : 'event');
|
|
485
|
+
const { msg, recipients } = dispatchMessage({
|
|
486
|
+
type,
|
|
487
|
+
from,
|
|
488
|
+
to,
|
|
489
|
+
topic,
|
|
490
|
+
priority,
|
|
491
|
+
ttl_ms,
|
|
492
|
+
payload,
|
|
493
|
+
trace_id: trace_id || uuidv7(),
|
|
494
|
+
correlation_id: correlation_id || uuidv7(),
|
|
495
|
+
});
|
|
496
|
+
return {
|
|
497
|
+
ok: true,
|
|
498
|
+
data: {
|
|
499
|
+
message_id: msg.id,
|
|
500
|
+
fanout_count: recipients.length,
|
|
501
|
+
expires_at_ms: msg.expires_at_ms,
|
|
502
|
+
},
|
|
503
|
+
};
|
|
504
|
+
},
|
|
505
|
+
|
|
506
|
+
handleHandoff({
|
|
507
|
+
from, to, topic, task, acceptance_criteria, context_refs,
|
|
508
|
+
priority = 5, ttl_ms = 600000, trace_id, correlation_id,
|
|
509
|
+
}) {
|
|
510
|
+
const { msg } = dispatchMessage({
|
|
511
|
+
type: 'handoff',
|
|
512
|
+
from,
|
|
513
|
+
to,
|
|
514
|
+
topic,
|
|
515
|
+
priority,
|
|
516
|
+
ttl_ms,
|
|
517
|
+
payload: { task, acceptance_criteria, context_refs },
|
|
518
|
+
trace_id: trace_id || uuidv7(),
|
|
519
|
+
correlation_id: correlation_id || uuidv7(),
|
|
520
|
+
});
|
|
521
|
+
return {
|
|
522
|
+
ok: true,
|
|
523
|
+
data: { handoff_message_id: msg.id, state: 'queued', assigned_to: to },
|
|
524
|
+
};
|
|
525
|
+
},
|
|
526
|
+
|
|
527
|
+
assignAsync({
|
|
528
|
+
supervisor_agent,
|
|
529
|
+
worker_agent,
|
|
530
|
+
topic = 'assign.job',
|
|
531
|
+
task = '',
|
|
532
|
+
payload = {},
|
|
533
|
+
priority = 5,
|
|
534
|
+
ttl_ms = 600000,
|
|
535
|
+
timeout_ms = 600000,
|
|
536
|
+
max_retries = 0,
|
|
537
|
+
trace_id,
|
|
538
|
+
correlation_id,
|
|
539
|
+
}) {
|
|
540
|
+
const job = store.createAssign({
|
|
541
|
+
supervisor_agent,
|
|
542
|
+
worker_agent,
|
|
543
|
+
topic,
|
|
544
|
+
task,
|
|
545
|
+
payload,
|
|
546
|
+
priority,
|
|
547
|
+
ttl_ms,
|
|
548
|
+
timeout_ms,
|
|
549
|
+
max_retries,
|
|
550
|
+
trace_id,
|
|
551
|
+
correlation_id,
|
|
552
|
+
});
|
|
553
|
+
const dispatched = dispatchAssignJob(job, 'create');
|
|
554
|
+
return {
|
|
555
|
+
ok: true,
|
|
556
|
+
data: {
|
|
557
|
+
assigned_to: worker_agent,
|
|
558
|
+
...buildAssignSnapshot(dispatched.job),
|
|
559
|
+
},
|
|
560
|
+
};
|
|
561
|
+
},
|
|
562
|
+
|
|
563
|
+
reportAssignResult({
|
|
564
|
+
job_id,
|
|
565
|
+
worker_agent,
|
|
566
|
+
status,
|
|
567
|
+
attempt,
|
|
568
|
+
result,
|
|
569
|
+
error,
|
|
570
|
+
payload = {},
|
|
571
|
+
metadata = {},
|
|
572
|
+
}) {
|
|
573
|
+
const job = store.getAssign(job_id);
|
|
574
|
+
if (!job) {
|
|
575
|
+
return {
|
|
576
|
+
ok: false,
|
|
577
|
+
error: { code: 'ASSIGN_NOT_FOUND', message: `assign job not found: ${job_id}` },
|
|
578
|
+
};
|
|
579
|
+
}
|
|
580
|
+
if (worker_agent && worker_agent !== job.worker_agent) {
|
|
581
|
+
return {
|
|
582
|
+
ok: false,
|
|
583
|
+
error: { code: 'ASSIGN_WORKER_MISMATCH', message: `worker mismatch: ${worker_agent}` },
|
|
584
|
+
};
|
|
585
|
+
}
|
|
586
|
+
if (Number.isFinite(Number(attempt)) && Number(attempt) !== job.attempt) {
|
|
587
|
+
return {
|
|
588
|
+
ok: false,
|
|
589
|
+
error: {
|
|
590
|
+
code: 'ASSIGN_ATTEMPT_MISMATCH',
|
|
591
|
+
message: `stale assign result for attempt ${attempt} (current ${job.attempt})`,
|
|
592
|
+
},
|
|
593
|
+
};
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
const mergedMetadata = {
|
|
597
|
+
...(payload?.metadata || {}),
|
|
598
|
+
...(metadata || {}),
|
|
599
|
+
};
|
|
600
|
+
const normalizedStatus = normalizeAssignTerminalStatus(
|
|
601
|
+
status || payload?.status,
|
|
602
|
+
mergedMetadata,
|
|
603
|
+
);
|
|
604
|
+
const nextResult = result ?? (Object.prototype.hasOwnProperty.call(payload || {}, 'result') ? payload.result : payload);
|
|
605
|
+
const nextError = error ?? payload?.error ?? null;
|
|
606
|
+
|
|
607
|
+
if (normalizedStatus === 'running') {
|
|
608
|
+
const running = store.updateAssignStatus(job.job_id, 'running', {
|
|
609
|
+
started_at_ms: job.started_at_ms || Date.now(),
|
|
610
|
+
deadline_ms: Date.now() + clampAssignDuration(job.timeout_ms, job.timeout_ms),
|
|
611
|
+
result: nextResult,
|
|
612
|
+
error: nextError,
|
|
613
|
+
});
|
|
614
|
+
notifyAssignSupervisor(running, 'progress');
|
|
615
|
+
return { ok: true, data: buildAssignSnapshot(running) };
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
const finalized = store.updateAssignStatus(job.job_id, normalizedStatus, {
|
|
619
|
+
result: nextResult,
|
|
620
|
+
error: nextError,
|
|
621
|
+
});
|
|
622
|
+
|
|
623
|
+
if ((normalizedStatus === 'failed' || normalizedStatus === 'timed_out')
|
|
624
|
+
&& finalized.retry_count < finalized.max_retries) {
|
|
625
|
+
return scheduleAssignRetry(finalized, normalizedStatus, nextError, worker_agent || finalized.worker_agent);
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
notifyAssignSupervisor(finalized, 'completed');
|
|
629
|
+
return { ok: true, data: buildAssignSnapshot(finalized) };
|
|
630
|
+
},
|
|
631
|
+
|
|
632
|
+
getAssignStatus({ job_id, ...filters } = {}) {
|
|
633
|
+
if (job_id) {
|
|
634
|
+
const job = store.getAssign(job_id);
|
|
635
|
+
return job
|
|
636
|
+
? { ok: true, data: buildAssignSnapshot(job) }
|
|
637
|
+
: { ok: false, error: { code: 'ASSIGN_NOT_FOUND', message: `assign job not found: ${job_id}` } };
|
|
638
|
+
}
|
|
639
|
+
return {
|
|
640
|
+
ok: true,
|
|
641
|
+
data: {
|
|
642
|
+
assigns: store.listAssigns(filters).map((job) => buildAssignSnapshot(job)),
|
|
643
|
+
},
|
|
644
|
+
};
|
|
645
|
+
},
|
|
646
|
+
|
|
647
|
+
retryAssign(job_id, { reason = 'manual', requested_by = 'manual' } = {}) {
|
|
648
|
+
const job = store.getAssign(job_id);
|
|
649
|
+
if (!job) {
|
|
650
|
+
return {
|
|
651
|
+
ok: false,
|
|
652
|
+
error: { code: 'ASSIGN_NOT_FOUND', message: `assign job not found: ${job_id}` },
|
|
653
|
+
};
|
|
654
|
+
}
|
|
655
|
+
return scheduleAssignRetry(job, reason, job.error, requested_by);
|
|
656
|
+
},
|
|
657
|
+
|
|
658
|
+
sweepExpired() {
|
|
659
|
+
const now = Date.now();
|
|
660
|
+
let expired = 0;
|
|
661
|
+
for (const [messageId, record] of Array.from(liveMessages.entries())) {
|
|
662
|
+
if (record.message.expires_at_ms > now) continue;
|
|
663
|
+
store.moveToDeadLetter(messageId, 'ttl_expired', null);
|
|
664
|
+
removeMessage(messageId);
|
|
665
|
+
expired += 1;
|
|
666
|
+
}
|
|
667
|
+
return { messages: expired };
|
|
668
|
+
},
|
|
669
|
+
|
|
670
|
+
sweepTimedOutAssigns() {
|
|
671
|
+
const expiredAssigns = store.listAssigns({
|
|
672
|
+
statuses: Array.from(ASSIGN_PENDING_STATUSES),
|
|
673
|
+
active_before_ms: Date.now(),
|
|
674
|
+
limit: 100,
|
|
675
|
+
});
|
|
676
|
+
let timed_out = 0;
|
|
677
|
+
let retried = 0;
|
|
678
|
+
|
|
679
|
+
for (const job of expiredAssigns) {
|
|
680
|
+
const result = handleAssignTimeout(job);
|
|
681
|
+
timed_out += 1;
|
|
682
|
+
if (result?.data?.retried) retried += 1;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
return { timed_out, retried };
|
|
686
|
+
},
|
|
687
|
+
|
|
688
|
+
startSweeper() {
|
|
689
|
+
if (sweepTimer) return;
|
|
690
|
+
sweepTimer = setInterval(() => {
|
|
691
|
+
try {
|
|
692
|
+
router.sweepExpired();
|
|
693
|
+
router.sweepTimedOutAssigns();
|
|
694
|
+
} catch {}
|
|
695
|
+
}, 10000);
|
|
696
|
+
staleTimer = setInterval(() => {
|
|
697
|
+
try { store.sweepStaleAgents(); } catch {}
|
|
698
|
+
}, 120000);
|
|
699
|
+
sweepTimer.unref();
|
|
700
|
+
staleTimer.unref();
|
|
701
|
+
},
|
|
702
|
+
|
|
703
|
+
stopSweeper() {
|
|
704
|
+
if (sweepTimer) { clearInterval(sweepTimer); sweepTimer = null; }
|
|
705
|
+
if (staleTimer) { clearInterval(staleTimer); staleTimer = null; }
|
|
706
|
+
},
|
|
707
|
+
|
|
708
|
+
getQueueDepths() {
|
|
709
|
+
const counts = { urgent: 0, normal: 0, dlq: store.getAuditStats().dlq };
|
|
710
|
+
for (const record of liveMessages.values()) {
|
|
711
|
+
const pending = record.recipients.size > record.ackedBy.size;
|
|
712
|
+
if (!pending) continue;
|
|
713
|
+
if (record.message.priority >= 7) counts.urgent += 1;
|
|
714
|
+
else counts.normal += 1;
|
|
715
|
+
}
|
|
716
|
+
return counts;
|
|
717
|
+
},
|
|
718
|
+
|
|
719
|
+
getDeliveryStats() {
|
|
720
|
+
pruneDeliveryStats();
|
|
721
|
+
if (!deliveryLatencies.length) {
|
|
722
|
+
return { total_deliveries: 0, avg_delivery_ms: 0 };
|
|
723
|
+
}
|
|
724
|
+
const total = deliveryLatencies.reduce((sum, item) => sum + item.ms, 0);
|
|
725
|
+
return {
|
|
726
|
+
total_deliveries: deliveryLatencies.length,
|
|
727
|
+
avg_delivery_ms: Math.round(total / deliveryLatencies.length),
|
|
728
|
+
};
|
|
729
|
+
},
|
|
730
|
+
|
|
731
|
+
getStatus(scope = 'hub', { agent_id, trace_id, include_metrics = true } = {}) {
|
|
732
|
+
const data = {};
|
|
733
|
+
|
|
734
|
+
if (scope === 'hub' || scope === 'queue') {
|
|
735
|
+
data.hub = {
|
|
736
|
+
state: 'healthy',
|
|
737
|
+
uptime_ms: process.uptime() * 1000 | 0,
|
|
738
|
+
realtime_transport: 'named-pipe',
|
|
739
|
+
audit_store: 'sqlite',
|
|
740
|
+
};
|
|
741
|
+
if (include_metrics) {
|
|
742
|
+
const depths = router.getQueueDepths();
|
|
743
|
+
const stats = router.getDeliveryStats();
|
|
744
|
+
const auditStats = store.getAuditStats();
|
|
745
|
+
data.queues = {
|
|
746
|
+
urgent_depth: depths.urgent,
|
|
747
|
+
normal_depth: depths.normal,
|
|
748
|
+
dlq_depth: depths.dlq,
|
|
749
|
+
avg_delivery_ms: stats.avg_delivery_ms,
|
|
750
|
+
};
|
|
751
|
+
data.assigns = {
|
|
752
|
+
queued: auditStats.assign_queued,
|
|
753
|
+
running: auditStats.assign_running,
|
|
754
|
+
failed: auditStats.assign_failed,
|
|
755
|
+
timed_out: auditStats.assign_timed_out,
|
|
756
|
+
};
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
if (scope === 'agent' && agent_id) {
|
|
761
|
+
const agent = store.getAgent(agent_id);
|
|
762
|
+
if (agent) {
|
|
763
|
+
data.agent = {
|
|
764
|
+
agent_id: agent.agent_id,
|
|
765
|
+
status: agent.status,
|
|
766
|
+
pending: sortedPending(agent_id, { max_messages: 1000 }).length,
|
|
767
|
+
last_seen_ms: agent.last_seen_ms,
|
|
768
|
+
topics: listRuntimeTopics(agent_id),
|
|
769
|
+
};
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
if (scope === 'trace' && trace_id) {
|
|
774
|
+
data.trace = store.getMessagesByTrace(trace_id);
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
return { ok: true, data };
|
|
778
|
+
},
|
|
779
|
+
};
|
|
780
|
+
|
|
781
|
+
return router;
|
|
782
|
+
}
|