@askexenow/exe-os 0.9.295 → 0.9.296
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deploy/compose/cloudflared/config.yml.example +14 -9
- package/deploy/compose/docker-compose.yml +86 -8
- package/deploy/compose/sso-edge/default.conf.template +87 -0
- package/deploy/compose/sso-edge/entrypoint.sh +23 -0
- package/deploy/compose/sso-edge/sso-redirect.conf +63 -0
- package/deploy/stack-manifests/v0.9.json +1 -1
- package/dist/active-agent-AFX2FODG.js +28 -0
- package/dist/active-agent-E2IJA7YX.js +27 -0
- package/dist/agentic-ontology-A2YUZK5O.js +25 -0
- package/dist/assets/com.askexe.exed.plist +4 -1
- package/dist/backfill-metadata-OC7EOD5U.js +600 -0
- package/dist/behaviors-H5ZOVHDH.js +46 -0
- package/dist/bin/agentic-ontology-backfill.js +5 -5
- package/dist/bin/agentic-reflection-backfill.js +6 -6
- package/dist/bin/agentic-semantic-label.js +5 -5
- package/dist/bin/backfill-conversations.js +6 -6
- package/dist/bin/backfill-responses.js +6 -6
- package/dist/bin/backfill-vectors.js +8 -8
- package/dist/bin/bulk-sync-postgres.js +7 -7
- package/dist/bin/cc-doctor.js +4 -4
- package/dist/bin/cleanup-stale-review-tasks.js +11 -11
- package/dist/bin/cli.js +16 -16
- package/dist/bin/deferred-daemon-restart.js +1 -1
- package/dist/bin/exe-agent-config.js +2 -2
- package/dist/bin/exe-agent.js +4 -4
- package/dist/bin/exe-assign.js +8 -8
- package/dist/bin/exe-boot.js +21 -18
- package/dist/bin/exe-call.js +4 -4
- package/dist/bin/exe-cloud.js +7 -7
- package/dist/bin/exe-dispatch.js +11 -11
- package/dist/bin/exe-doctor.js +3 -2
- package/dist/bin/exe-export-behaviors.js +7 -7
- package/dist/bin/exe-forget.js +6 -6
- package/dist/bin/exe-gateway.js +7 -7
- package/dist/bin/exe-healthcheck.js +6 -4
- package/dist/bin/exe-heartbeat.js +11 -11
- package/dist/bin/exe-kill.js +14 -14
- package/dist/bin/exe-launch-agent.js +18 -18
- package/dist/bin/exe-new-employee.js +6 -6
- package/dist/bin/exe-pending-messages.js +12 -12
- package/dist/bin/exe-pending-notifications.js +11 -11
- package/dist/bin/exe-pending-reviews.js +11 -11
- package/dist/bin/exe-rename.js +4 -4
- package/dist/bin/exe-review.js +13 -13
- package/dist/bin/exe-search.js +5 -5
- package/dist/bin/exe-session-cleanup.js +16 -16
- package/dist/bin/exe-settings.js +39 -9
- package/dist/bin/exe-start-codex.js +11 -11
- package/dist/bin/exe-start-opencode.js +8 -8
- package/dist/bin/exe-status.js +12 -12
- package/dist/bin/exe-team.js +3 -3
- package/dist/bin/git-sweep.js +12 -12
- package/dist/bin/graph-backfill.js +4 -4
- package/dist/bin/graph-export.js +5 -5
- package/dist/bin/import-history.js +7 -7
- package/dist/bin/install-launchd.js +13 -6
- package/dist/bin/install.js +26 -14
- package/dist/bin/intercom-check.js +4 -4
- package/dist/bin/mcp-sessions.js +2 -2
- package/dist/bin/orchestration-metrics.js +4 -4
- package/dist/bin/postgres-agentic-reflection-backfill.js +2 -2
- package/dist/bin/postgres-agentic-semantic-backfill.js +1 -1
- package/dist/bin/scan-tasks.js +11 -11
- package/dist/bin/setup.js +1 -1
- package/dist/bin/shard-migrate.js +4 -4
- package/dist/bin/stack-update.js +2 -2
- package/dist/bin/vps-health-gate.js +1 -1
- package/dist/capability-cards-4USI7CUW.js +89 -0
- package/dist/capacity-monitor-WLCBTEYR.js +51 -0
- package/dist/catchup-brief-ZR3NX6LZ.js +175 -0
- package/dist/chunk-22TVSRQQ.js +226 -0
- package/dist/chunk-2E43UXRH.js +395 -0
- package/dist/chunk-2PIGT6UJ.js +460 -0
- package/dist/chunk-3XTMW2MZ.js +535 -0
- package/dist/chunk-465PQFTH.js +262 -0
- package/dist/chunk-5CCXU2AW.js +129 -0
- package/dist/chunk-5D6MPWR7.js +1094 -0
- package/dist/chunk-5Q4MR6SL.js +123 -0
- package/dist/chunk-6327RBWR.js +345 -0
- package/dist/chunk-6MZZREZY.js +199 -0
- package/dist/chunk-7DI2Q4O5.js +1186 -0
- package/dist/chunk-7PW5VNIY.js +122 -0
- package/dist/chunk-7T7Y56HW.js +43 -0
- package/dist/chunk-7UHCWCLT.js +128 -0
- package/dist/chunk-A2ZUMF6L.js +1350 -0
- package/dist/chunk-AKV44JEH.js +185 -0
- package/dist/chunk-ANHWGX5N.js +735 -0
- package/dist/chunk-BQ3P4TKD.js +97 -0
- package/dist/chunk-BUZMT3KZ.js +604 -0
- package/dist/chunk-C2SBESBO.js +210 -0
- package/dist/chunk-CLSXZUZW.js +51 -0
- package/dist/chunk-CONHLVAR.js +1079 -0
- package/dist/chunk-D3WTZPFX.js +456 -0
- package/dist/chunk-DE6SOIYL.js +197 -0
- package/dist/chunk-EIVNMA3Q.js +284 -0
- package/dist/chunk-EJIF4FNT.js +12 -0
- package/dist/chunk-FDFOW564.js +171 -0
- package/dist/chunk-GZUBJ5EC.js +127 -0
- package/dist/chunk-HGZITN22.js +105 -0
- package/dist/chunk-HSRKDU6X.js +362 -0
- package/dist/chunk-IIEN2PHV.js +85 -0
- package/dist/chunk-JQ56VLMM.js +567 -0
- package/dist/chunk-JVHHXRFY.js +280 -0
- package/dist/chunk-JXCXGZ3S.js +55 -0
- package/dist/chunk-K5ZO532Q.js +4388 -0
- package/dist/chunk-K6CAAMXF.js +97 -0
- package/dist/chunk-KA26YTNU.js +81 -0
- package/dist/chunk-KMUW5C3R.js +381 -0
- package/dist/chunk-KOO3J5PV.js +20 -0
- package/dist/chunk-LSV7OFIH.js +290 -0
- package/dist/chunk-LSVFDVNY.js +1158 -0
- package/dist/chunk-LXDQTW32.js +230 -0
- package/dist/chunk-MEP7OUVZ.js +181 -0
- package/dist/chunk-MN2B2LKS.js +240 -0
- package/dist/chunk-N2EAYPYQ.js +1352 -0
- package/dist/chunk-N7I2A667.js +70 -0
- package/dist/chunk-NLZHVIOP.js +630 -0
- package/dist/chunk-NUH5TRZL.js +227 -0
- package/dist/chunk-OAHEIH3G.js +167 -0
- package/dist/chunk-OBHRQGCK.js +58 -0
- package/dist/chunk-ODFA7B2V.js +54 -0
- package/dist/chunk-OSNUP45F.js +731 -0
- package/dist/chunk-OTPRHBTO.js +33 -0
- package/dist/chunk-P6MUA4QU.js +157 -0
- package/dist/chunk-PGIOFKSK.js +2093 -0
- package/dist/chunk-PSE7VHWK.js +50 -0
- package/dist/chunk-QIFUVZFW.js +331 -0
- package/dist/chunk-RDPXKTVK.js +221 -0
- package/dist/chunk-RKYTYJGB.js +76 -0
- package/dist/chunk-RXLR6EFM.js +348 -0
- package/dist/chunk-SDB67PQJ.js +159 -0
- package/dist/chunk-SF2T7MP3.js +402 -0
- package/dist/chunk-SLU3FRFQ.js +2133 -0
- package/dist/chunk-SNDZJ5IV.js +214 -0
- package/dist/chunk-STEEAABW.js +448 -0
- package/dist/chunk-TUTWNHIQ.js +244 -0
- package/dist/chunk-UDP35QBR.js +30 -0
- package/dist/chunk-UKFHNJBI.js +85 -0
- package/dist/chunk-VC2DTK2X.js +382 -0
- package/dist/chunk-VRRAE5JX.js +836 -0
- package/dist/chunk-VVJTBQPR.js +38 -0
- package/dist/chunk-W3EQ362K.js +581 -0
- package/dist/chunk-WHIXIFHC.js +2242 -0
- package/dist/chunk-WRNGJJNR.js +377 -0
- package/dist/chunk-WUKHLCBE.js +3313 -0
- package/dist/chunk-WVPLHGDG.js +150 -0
- package/dist/chunk-XJZBSTL5.js +204 -0
- package/dist/chunk-Y3PMNUM5.js +304 -0
- package/dist/chunk-YHVS4QOV.js +14597 -0
- package/dist/chunk-YJ2OYAOC.js +668 -0
- package/dist/chunk-YYAD2GXX.js +128 -0
- package/dist/chunk-ZQML7EWE.js +333 -0
- package/dist/co-activation-XJLH46OX.js +74 -0
- package/dist/co-occurrence-GNN2X526.js +95 -0
- package/dist/code-context-index-OCPRLFG5.js +30 -0
- package/dist/core-memory-J4W2IYOF.js +110 -0
- package/dist/crdt-sync-QCBTSHIH.js +33 -0
- package/dist/crm-webhook-EM442VUW.js +10 -0
- package/dist/cto-delegation-gate-MLJMVHBK.js +280 -0
- package/dist/daemon-orchestration-2VNLZVTW.js +139 -0
- package/dist/db-backup-VUGFTPJ4.js +43 -0
- package/dist/doc-graph-extractor-PNRSFPSS.js +133 -0
- package/dist/dreaming-SK5VEQRF.js +34 -0
- package/dist/entity-boost-TQWWJUC2.js +375 -0
- package/dist/exe-drift-N34UPO7S.js +70 -0
- package/dist/exe-export-KACBKGVV.js +77 -0
- package/dist/exe-import-GXGDWACG.js +80 -0
- package/dist/exe-key-XPDOZBWW.js +673 -0
- package/dist/exe-snapshot-32GQKGQ5.js +338 -0
- package/dist/fast-db-init-F3TDD5VV.js +7 -0
- package/dist/gateway/index.js +8 -8
- package/dist/git-staleness-J45WNYRF.js +112 -0
- package/dist/git-task-sweep-BTGVQPFB.js +42 -0
- package/dist/global-procedures-6JCQWU4D.js +22 -0
- package/dist/graph-auto-extract-3ZQNXTPC.js +183 -0
- package/dist/hooks/bug-report-worker.js +13 -13
- package/dist/hooks/codex-stop-task-finalizer.js +13 -13
- package/dist/hooks/commit-complete.js +13 -13
- package/dist/hooks/error-recall.js +6 -6
- package/dist/hooks/exe-heartbeat-hook.js +3 -3
- package/dist/hooks/ingest-worker.js +3 -3
- package/dist/hooks/ingest.js +6 -6
- package/dist/hooks/instructions-loaded.js +4 -4
- package/dist/hooks/manifest.json +20 -20
- package/dist/hooks/notification.js +4 -4
- package/dist/hooks/post-compact.js +12 -12
- package/dist/hooks/post-tool-combined.js +6 -6
- package/dist/hooks/pre-compact.js +16 -16
- package/dist/hooks/pre-tool-use.js +16 -16
- package/dist/hooks/prompt-submit.js +24 -24
- package/dist/hooks/session-end.js +21 -21
- package/dist/hooks/session-start.js +12 -12
- package/dist/hooks/stop.js +19 -19
- package/dist/hooks/subagent-stop.js +12 -12
- package/dist/hooks/summary-worker.js +19 -19
- package/dist/index.js +19 -19
- package/dist/installer-5VPFY7SB.js +298 -0
- package/dist/installer-OENFPMA2.js +344 -0
- package/dist/installer-OIX4QOG5.js +40 -0
- package/dist/lib/cloud-sync.js +7 -7
- package/dist/lib/consolidation.js +6 -5
- package/dist/lib/database.js +2 -2
- package/dist/lib/db-daemon-client.js +2 -2
- package/dist/lib/db.js +2 -2
- package/dist/lib/embed-worker.js +1 -0
- package/dist/lib/embedder.js +7 -3
- package/dist/lib/employee-templates.js +4 -4
- package/dist/lib/employees.js +2 -2
- package/dist/lib/exe-daemon-client.js +2 -2
- package/dist/lib/exe-daemon.js +160 -79
- package/dist/lib/hybrid-search.js +5 -5
- package/dist/lib/identity.js +2 -2
- package/dist/lib/messaging.js +11 -11
- package/dist/lib/reminders.js +3 -3
- package/dist/lib/schedules.js +5 -5
- package/dist/lib/session-registry.js +4 -4
- package/dist/lib/skill-learning.js +6 -6
- package/dist/lib/store.js +4 -4
- package/dist/lib/task-router.js +3 -3
- package/dist/lib/tasks.js +12 -12
- package/dist/lib/tmux-routing.js +12 -10
- package/dist/lib/tmux-transport.js +1 -1
- package/dist/lib/token-spend.js +3 -3
- package/dist/lib/transport.js +2 -2
- package/dist/mcp/register-tools.js +62 -61
- package/dist/mcp/server.js +63 -62
- package/dist/mcp/tools/complete-reminder.js +4 -4
- package/dist/mcp/tools/create-reminder.js +4 -4
- package/dist/mcp/tools/create-task.js +14 -14
- package/dist/mcp/tools/deactivate-behavior.js +7 -7
- package/dist/mcp/tools/list-reminders.js +4 -4
- package/dist/mcp/tools/list-tasks.js +14 -14
- package/dist/mcp/tools/send-message.js +13 -13
- package/dist/mcp/tools/update-task.js +13 -13
- package/dist/mcp-http-config-PQTOLCTP.js +29 -0
- package/dist/memory-cards-4RVDZIY2.js +180 -0
- package/dist/memory-graph-extractor-L6YC7G4M.js +22 -0
- package/dist/memory-poisoning-defense-4YVJYH4G.js +224 -0
- package/dist/memory-queue-client-MVAUOZNJ.js +16 -0
- package/dist/memory-reflection-SHHDQNOH.js +244 -0
- package/dist/message-queue-client-DCKZT6X2.js +92 -0
- package/dist/notifications-JFR3G42W.js +47 -0
- package/dist/orchestration-events-MGCGPTDN.js +27 -0
- package/dist/orchestrator-DAFL2YZB.js +35 -0
- package/dist/pipeline-router-WWSZVPCH.js +15 -0
- package/dist/plan-limits-C7XCSDZC.js +28 -0
- package/dist/project-boot-N3NTBVLE.js +299 -0
- package/dist/projection-worker-MTPAPCWX.js +1084 -0
- package/dist/prospective-memory-BTIVUJSB.js +232 -0
- package/dist/reranker-UA6WVESJ.js +19 -0
- package/dist/retrieval-health-7XNZJEBF.js +12 -0
- package/dist/review-polling-4ALGMXC3.js +126 -0
- package/dist/runtime/index.js +13 -13
- package/dist/self-query-router-MROFQLQB.js +192 -0
- package/dist/session-events-CK44XOU4.js +38 -0
- package/dist/session-kill-telemetry-MT6ITDOG.js +31 -0
- package/dist/session-scope-3XDBWV65.js +88 -0
- package/dist/setup-wizard-X6DOD7MC.js +12 -0
- package/dist/skill-refinement-G2CCY3GM.js +159 -0
- package/dist/stack-update-JF7F56AS.js +84 -0
- package/dist/steward-gate-YF2CYXE7.js +15 -0
- package/dist/task-enforcement-YN6HK7NE.js +506 -0
- package/dist/task-scope-CVK6ISCZ.js +37 -0
- package/dist/tasks-crud-NTNET4JE.js +79 -0
- package/dist/tasks-notify-4LJVFPCV.js +40 -0
- package/dist/tasks-review-3V4WOIRG.js +49 -0
- package/dist/telemetry-upload-5PNUKGTM.js +741 -0
- package/dist/token-budget-E46G7ZAQ.js +86 -0
- package/dist/tool-capability-index-JDSMKJER.js +10 -0
- package/dist/tool-telemetry-J3NLS3LJ.js +17 -0
- package/dist/tui/App.js +18 -18
- package/dist/tui-data-6DOMUUCM.js +260 -0
- package/dist/wiki-acl-5UK37LKF.js +111 -0
- package/dist/worker-gate-FM7AEC7G.js +21 -0
- package/dist/workflow-engine-2EDUHUIY.js +28 -0
- package/dist/worktree-7YKKJIYR.js +28 -0
- package/dist/worktree-sweep-C3ELFGDN.js +21 -0
- package/package.json +1 -1
- package/release-notes.json +23 -23
|
@@ -0,0 +1,630 @@
|
|
|
1
|
+
import {
|
|
2
|
+
getClient
|
|
3
|
+
} from "./chunk-WUKHLCBE.js";
|
|
4
|
+
|
|
5
|
+
// src/lib/orchestration-events.ts
|
|
6
|
+
import crypto from "crypto";
|
|
7
|
+
var ORCHESTRATION_COUNTER_KEYS = [
|
|
8
|
+
"duplicateNudges",
|
|
9
|
+
"missedReviews",
|
|
10
|
+
"tmuxCommandTimeouts",
|
|
11
|
+
"claimCollisions",
|
|
12
|
+
"crossSessionBlocks",
|
|
13
|
+
"dispatchUnclaimed",
|
|
14
|
+
"sessionExits",
|
|
15
|
+
"sessionBorns",
|
|
16
|
+
"sessionIdleKills",
|
|
17
|
+
"sessionTtlKills",
|
|
18
|
+
"sessionContextKills",
|
|
19
|
+
"sessionResumed",
|
|
20
|
+
"eventLoopBlocks",
|
|
21
|
+
"shardCircuitOpens",
|
|
22
|
+
"taskBlocked",
|
|
23
|
+
"taskCancelled",
|
|
24
|
+
"checkpoints",
|
|
25
|
+
"messagesSent",
|
|
26
|
+
"reviewsCreated",
|
|
27
|
+
"reviewsApproved",
|
|
28
|
+
"reviewsRejected",
|
|
29
|
+
"dispatches",
|
|
30
|
+
"signalsCreated",
|
|
31
|
+
"signalsConsumed",
|
|
32
|
+
"workerInstancesSelected",
|
|
33
|
+
"contextPressure",
|
|
34
|
+
"mcpUnreachable",
|
|
35
|
+
"dispatchAttempted",
|
|
36
|
+
"dispatchFailed",
|
|
37
|
+
"reviewStarted",
|
|
38
|
+
"messagesDelivered",
|
|
39
|
+
"messagesRead",
|
|
40
|
+
"signalsStale",
|
|
41
|
+
"tmuxNudgeAttempted",
|
|
42
|
+
"tmuxNudgeCompleted",
|
|
43
|
+
"tmuxSpawnAttempted",
|
|
44
|
+
"tmuxSpawnCompleted",
|
|
45
|
+
"consistencyMismatches",
|
|
46
|
+
"consistencyRepaired",
|
|
47
|
+
"fileRowsResynced",
|
|
48
|
+
"fileOrphansCleaned",
|
|
49
|
+
"worktreeCreated",
|
|
50
|
+
"worktreeReused",
|
|
51
|
+
"worktreeFailed",
|
|
52
|
+
"worktreePruned",
|
|
53
|
+
"worktreeSkipped",
|
|
54
|
+
"registryMissingTmux",
|
|
55
|
+
"registryMissingEntry",
|
|
56
|
+
"taskGroupsCreated",
|
|
57
|
+
"taskGroupBarriersFired",
|
|
58
|
+
"taskGroupTimeouts",
|
|
59
|
+
"taskGroupPartialFailures"
|
|
60
|
+
];
|
|
61
|
+
var ORCHESTRATION_LATENCY_METRIC_KEYS = [
|
|
62
|
+
"createToClaimMs",
|
|
63
|
+
"dispatchToClaimMs",
|
|
64
|
+
"doneToReviewMs",
|
|
65
|
+
"reviewToStartedMs",
|
|
66
|
+
"reviewToApprovedMs",
|
|
67
|
+
"signalToConsumedMs",
|
|
68
|
+
"blockedDurationMs",
|
|
69
|
+
"sessionDurationMs",
|
|
70
|
+
"daemonTickMs"
|
|
71
|
+
];
|
|
72
|
+
var SENSITIVE_PAYLOAD_KEY = /(context|prompt|secret|token|password|passphrase|recovery|raw|key)/i;
|
|
73
|
+
var BEST_EFFORT_QUEUE_LIMIT = 200;
|
|
74
|
+
var schemaInitializedClients = /* @__PURE__ */ new WeakSet();
|
|
75
|
+
var schemaInitializingClients = /* @__PURE__ */ new WeakMap();
|
|
76
|
+
var bestEffortQueue = Promise.resolve();
|
|
77
|
+
var bestEffortPending = 0;
|
|
78
|
+
async function ensureOrchestrationEventsSchema(client = getClient()) {
|
|
79
|
+
if (schemaInitializedClients.has(client)) return;
|
|
80
|
+
const existing = schemaInitializingClients.get(client);
|
|
81
|
+
if (existing) return existing;
|
|
82
|
+
const initialization = createOrchestrationEventsSchema(client).then(() => {
|
|
83
|
+
schemaInitializedClients.add(client);
|
|
84
|
+
schemaInitializingClients.delete(client);
|
|
85
|
+
}).catch((err) => {
|
|
86
|
+
schemaInitializingClients.delete(client);
|
|
87
|
+
throw err;
|
|
88
|
+
});
|
|
89
|
+
schemaInitializingClients.set(client, initialization);
|
|
90
|
+
return initialization;
|
|
91
|
+
}
|
|
92
|
+
async function createOrchestrationEventsSchema(client) {
|
|
93
|
+
await client.execute({
|
|
94
|
+
sql: `CREATE TABLE IF NOT EXISTS orchestration_events (
|
|
95
|
+
id TEXT PRIMARY KEY,
|
|
96
|
+
ts TEXT NOT NULL,
|
|
97
|
+
event_type TEXT NOT NULL,
|
|
98
|
+
source TEXT NOT NULL,
|
|
99
|
+
severity TEXT NOT NULL DEFAULT 'info',
|
|
100
|
+
task_id TEXT,
|
|
101
|
+
agent_id TEXT,
|
|
102
|
+
reviewer TEXT,
|
|
103
|
+
session_scope TEXT,
|
|
104
|
+
project_name TEXT,
|
|
105
|
+
instance_id TEXT,
|
|
106
|
+
tmux_session TEXT,
|
|
107
|
+
runtime TEXT,
|
|
108
|
+
correlation_id TEXT,
|
|
109
|
+
attempt INTEGER,
|
|
110
|
+
duration_ms INTEGER,
|
|
111
|
+
result TEXT,
|
|
112
|
+
error_code TEXT,
|
|
113
|
+
payload_json TEXT,
|
|
114
|
+
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
115
|
+
)`,
|
|
116
|
+
args: []
|
|
117
|
+
});
|
|
118
|
+
await client.execute({ sql: "CREATE INDEX IF NOT EXISTS idx_orch_events_ts ON orchestration_events(ts)", args: [] });
|
|
119
|
+
await client.execute({ sql: "CREATE INDEX IF NOT EXISTS idx_orch_events_task ON orchestration_events(task_id, ts)", args: [] });
|
|
120
|
+
await client.execute({ sql: "CREATE INDEX IF NOT EXISTS idx_orch_events_agent ON orchestration_events(agent_id, session_scope, ts)", args: [] });
|
|
121
|
+
await client.execute({ sql: "CREATE INDEX IF NOT EXISTS idx_orch_events_type ON orchestration_events(event_type, ts)", args: [] });
|
|
122
|
+
}
|
|
123
|
+
async function recordOrchestrationEvent(input) {
|
|
124
|
+
try {
|
|
125
|
+
const client = getClient();
|
|
126
|
+
await ensureOrchestrationEventsSchema(client);
|
|
127
|
+
const payloadJson = sanitizePayload(input.payload);
|
|
128
|
+
await client.execute({
|
|
129
|
+
sql: `INSERT INTO orchestration_events (
|
|
130
|
+
id, ts, event_type, source, severity,
|
|
131
|
+
task_id, agent_id, reviewer, session_scope, project_name,
|
|
132
|
+
instance_id, tmux_session, runtime, correlation_id, attempt,
|
|
133
|
+
duration_ms, result, error_code, payload_json
|
|
134
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
135
|
+
args: [
|
|
136
|
+
crypto.randomUUID(),
|
|
137
|
+
input.timestamp ?? (/* @__PURE__ */ new Date()).toISOString(),
|
|
138
|
+
input.eventType,
|
|
139
|
+
input.source,
|
|
140
|
+
input.severity ?? "info",
|
|
141
|
+
input.taskId ?? null,
|
|
142
|
+
input.agentId ?? null,
|
|
143
|
+
input.reviewer ?? null,
|
|
144
|
+
input.sessionScope ?? null,
|
|
145
|
+
input.projectName ?? null,
|
|
146
|
+
input.instanceId ?? null,
|
|
147
|
+
input.tmuxSession ?? null,
|
|
148
|
+
input.runtime ?? null,
|
|
149
|
+
input.correlationId ?? null,
|
|
150
|
+
input.attempt ?? null,
|
|
151
|
+
input.durationMs ?? null,
|
|
152
|
+
input.result ?? null,
|
|
153
|
+
input.errorCode ?? null,
|
|
154
|
+
payloadJson
|
|
155
|
+
]
|
|
156
|
+
});
|
|
157
|
+
return true;
|
|
158
|
+
} catch (err) {
|
|
159
|
+
if (process.env.EXE_DEBUG === "1") {
|
|
160
|
+
process.stderr.write(
|
|
161
|
+
`[orchestration-events] write skipped: ${err instanceof Error ? err.message : String(err)}
|
|
162
|
+
`
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
function recordOrchestrationEventBestEffort(input) {
|
|
169
|
+
if (bestEffortPending >= BEST_EFFORT_QUEUE_LIMIT) {
|
|
170
|
+
if (process.env.EXE_DEBUG === "1") {
|
|
171
|
+
process.stderr.write(
|
|
172
|
+
`[orchestration-events] best-effort queue saturated; dropped ${input.eventType}
|
|
173
|
+
`
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
bestEffortPending++;
|
|
179
|
+
bestEffortQueue = bestEffortQueue.then(async () => {
|
|
180
|
+
try {
|
|
181
|
+
await recordOrchestrationEvent(input);
|
|
182
|
+
} finally {
|
|
183
|
+
bestEffortPending = Math.max(0, bestEffortPending - 1);
|
|
184
|
+
}
|
|
185
|
+
}).catch((err) => {
|
|
186
|
+
if (process.env.EXE_DEBUG === "1") {
|
|
187
|
+
process.stderr.write(
|
|
188
|
+
`[orchestration-events] best-effort write skipped: ${err instanceof Error ? err.message : String(err)}
|
|
189
|
+
`
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
async function getOrchestrationMetrics(input = {}) {
|
|
195
|
+
const client = getClient();
|
|
196
|
+
await ensureOrchestrationEventsSchema(client);
|
|
197
|
+
const sinceIso = input.sinceIso ?? null;
|
|
198
|
+
const result = sinceIso ? await client.execute({
|
|
199
|
+
sql: `SELECT event_type, ts, task_id, tmux_session, agent_id, reviewer, attempt, duration_ms, result, error_code
|
|
200
|
+
FROM orchestration_events
|
|
201
|
+
WHERE ts >= ?
|
|
202
|
+
ORDER BY ts ASC`,
|
|
203
|
+
args: [sinceIso]
|
|
204
|
+
}) : await client.execute({
|
|
205
|
+
sql: `SELECT event_type, ts, task_id, tmux_session, agent_id, reviewer, attempt, duration_ms, result, error_code
|
|
206
|
+
FROM orchestration_events
|
|
207
|
+
ORDER BY ts ASC`,
|
|
208
|
+
args: []
|
|
209
|
+
});
|
|
210
|
+
const rows = result.rows;
|
|
211
|
+
const taskEvents = /* @__PURE__ */ new Map();
|
|
212
|
+
const sessionEvents = /* @__PURE__ */ new Map();
|
|
213
|
+
const daemonTickDurations = [];
|
|
214
|
+
const resumeTimestamps = /* @__PURE__ */ new Map();
|
|
215
|
+
const reviewLatenciesByReviewer = /* @__PURE__ */ new Map();
|
|
216
|
+
const counts = {
|
|
217
|
+
duplicateNudges: 0,
|
|
218
|
+
missedReviews: 0,
|
|
219
|
+
tmuxCommandTimeouts: 0,
|
|
220
|
+
claimCollisions: 0,
|
|
221
|
+
crossSessionBlocks: 0,
|
|
222
|
+
dispatchUnclaimed: 0,
|
|
223
|
+
sessionExits: 0,
|
|
224
|
+
sessionBorns: 0,
|
|
225
|
+
sessionIdleKills: 0,
|
|
226
|
+
sessionTtlKills: 0,
|
|
227
|
+
sessionContextKills: 0,
|
|
228
|
+
sessionResumed: 0,
|
|
229
|
+
eventLoopBlocks: 0,
|
|
230
|
+
shardCircuitOpens: 0,
|
|
231
|
+
taskBlocked: 0,
|
|
232
|
+
taskCancelled: 0,
|
|
233
|
+
checkpoints: 0,
|
|
234
|
+
messagesSent: 0,
|
|
235
|
+
reviewsCreated: 0,
|
|
236
|
+
reviewsApproved: 0,
|
|
237
|
+
reviewsRejected: 0,
|
|
238
|
+
dispatches: 0,
|
|
239
|
+
signalsCreated: 0,
|
|
240
|
+
signalsConsumed: 0,
|
|
241
|
+
workerInstancesSelected: 0,
|
|
242
|
+
contextPressure: 0,
|
|
243
|
+
mcpUnreachable: 0,
|
|
244
|
+
dispatchAttempted: 0,
|
|
245
|
+
dispatchFailed: 0,
|
|
246
|
+
reviewStarted: 0,
|
|
247
|
+
messagesDelivered: 0,
|
|
248
|
+
messagesRead: 0,
|
|
249
|
+
signalsStale: 0,
|
|
250
|
+
tmuxNudgeAttempted: 0,
|
|
251
|
+
tmuxNudgeCompleted: 0,
|
|
252
|
+
tmuxSpawnAttempted: 0,
|
|
253
|
+
tmuxSpawnCompleted: 0,
|
|
254
|
+
consistencyMismatches: 0,
|
|
255
|
+
consistencyRepaired: 0,
|
|
256
|
+
fileRowsResynced: 0,
|
|
257
|
+
fileOrphansCleaned: 0,
|
|
258
|
+
worktreeCreated: 0,
|
|
259
|
+
worktreeReused: 0,
|
|
260
|
+
worktreeFailed: 0,
|
|
261
|
+
worktreePruned: 0,
|
|
262
|
+
worktreeSkipped: 0,
|
|
263
|
+
registryMissingTmux: 0,
|
|
264
|
+
registryMissingEntry: 0,
|
|
265
|
+
taskGroupsCreated: 0,
|
|
266
|
+
taskGroupBarriersFired: 0,
|
|
267
|
+
taskGroupTimeouts: 0,
|
|
268
|
+
taskGroupPartialFailures: 0
|
|
269
|
+
};
|
|
270
|
+
for (const row of rows) {
|
|
271
|
+
const type = String(row.event_type ?? "");
|
|
272
|
+
const taskId = row.task_id == null ? "" : String(row.task_id);
|
|
273
|
+
const ts = Date.parse(String(row.ts ?? ""));
|
|
274
|
+
const eventCount = eventWeight(row);
|
|
275
|
+
if (type === "nudge.duplicate") counts.duplicateNudges++;
|
|
276
|
+
if (type === "review.missed") counts.missedReviews++;
|
|
277
|
+
if (type === "tmux.command.timeout") counts.tmuxCommandTimeouts++;
|
|
278
|
+
if (type === "claim.collision") counts.claimCollisions++;
|
|
279
|
+
if (type === "session.scope_violation") counts.crossSessionBlocks++;
|
|
280
|
+
if (type === "dispatch.unclaimed") counts.dispatchUnclaimed++;
|
|
281
|
+
if (type === "session.exit") counts.sessionExits++;
|
|
282
|
+
if (type === "session.born") counts.sessionBorns++;
|
|
283
|
+
if (type === "session.idle_kill") counts.sessionIdleKills++;
|
|
284
|
+
if (type === "session.ttl_kill") counts.sessionTtlKills++;
|
|
285
|
+
if (type === "session.context_kill") counts.sessionContextKills++;
|
|
286
|
+
if (type === "session.resumed") counts.sessionResumed++;
|
|
287
|
+
if (type === "daemon.event_loop_blocked") counts.eventLoopBlocks++;
|
|
288
|
+
if (type === "shard.circuit_open") counts.shardCircuitOpens++;
|
|
289
|
+
if (type === "task.blocked") counts.taskBlocked++;
|
|
290
|
+
if (type === "task.cancelled") counts.taskCancelled++;
|
|
291
|
+
if (type === "task.checkpoint") counts.checkpoints++;
|
|
292
|
+
if (type === "message.sent") counts.messagesSent++;
|
|
293
|
+
if (type === "review.created") counts.reviewsCreated++;
|
|
294
|
+
if (type === "review.approved") counts.reviewsApproved++;
|
|
295
|
+
if (type === "review.rejected") counts.reviewsRejected++;
|
|
296
|
+
if (type === "dispatch.completed") counts.dispatches++;
|
|
297
|
+
if (type === "signal.created") counts.signalsCreated++;
|
|
298
|
+
if (type === "signal.consumed") counts.signalsConsumed++;
|
|
299
|
+
if (type === "worker.instance.selected") counts.workerInstancesSelected++;
|
|
300
|
+
if (type === "context.pressure") counts.contextPressure++;
|
|
301
|
+
if (type === "mcp.unreachable") counts.mcpUnreachable++;
|
|
302
|
+
if (type === "dispatch.attempted") counts.dispatchAttempted++;
|
|
303
|
+
if (type === "dispatch.failed") counts.dispatchFailed++;
|
|
304
|
+
if (type === "review.started") counts.reviewStarted++;
|
|
305
|
+
if (type === "message.delivered") counts.messagesDelivered++;
|
|
306
|
+
if (type === "message.read") counts.messagesRead++;
|
|
307
|
+
if (type === "signal.stale") counts.signalsStale++;
|
|
308
|
+
if (type === "tmux.nudge.attempted") counts.tmuxNudgeAttempted++;
|
|
309
|
+
if (type === "tmux.nudge.completed") {
|
|
310
|
+
counts.tmuxNudgeCompleted++;
|
|
311
|
+
if (String(row.result ?? "") === "debounced") counts.duplicateNudges++;
|
|
312
|
+
}
|
|
313
|
+
if (type === "tmux.spawn.attempted") counts.tmuxSpawnAttempted++;
|
|
314
|
+
if (type === "tmux.spawn.completed") counts.tmuxSpawnCompleted++;
|
|
315
|
+
if (type === "consistency.mismatch") counts.consistencyMismatches++;
|
|
316
|
+
if (type === "consistency.repaired") counts.consistencyRepaired++;
|
|
317
|
+
if (type === "task_file.resynced") counts.fileRowsResynced++;
|
|
318
|
+
if (type === "task_file.orphan_cleaned") counts.fileOrphansCleaned++;
|
|
319
|
+
if (type === "worktree.created") counts.worktreeCreated++;
|
|
320
|
+
if (type === "worktree.reused") counts.worktreeReused++;
|
|
321
|
+
if (type === "worktree.failed") counts.worktreeFailed++;
|
|
322
|
+
if (type === "worktree.pruned") counts.worktreePruned++;
|
|
323
|
+
if (type === "worktree.skipped") counts.worktreeSkipped++;
|
|
324
|
+
if (type === "registry.missing_tmux") counts.registryMissingTmux += eventCount;
|
|
325
|
+
if (type === "registry.missing_entry") counts.registryMissingEntry += eventCount;
|
|
326
|
+
if (type === "task_group.created") counts.taskGroupsCreated++;
|
|
327
|
+
if (type === "task_group.barrier_fired") counts.taskGroupBarriersFired++;
|
|
328
|
+
if (type === "task_group.timeout") counts.taskGroupTimeouts++;
|
|
329
|
+
if (type === "task_group.partial_failure") counts.taskGroupPartialFailures++;
|
|
330
|
+
if (type.startsWith("tmux.") && String(row.error_code ?? "").toLowerCase().includes("timeout")) {
|
|
331
|
+
counts.tmuxCommandTimeouts++;
|
|
332
|
+
}
|
|
333
|
+
if (type === "session.resumed" && Number.isFinite(ts)) {
|
|
334
|
+
const agentId = row.agent_id == null ? "" : String(row.agent_id);
|
|
335
|
+
if (agentId) {
|
|
336
|
+
const list = resumeTimestamps.get(agentId) ?? [];
|
|
337
|
+
list.push(ts);
|
|
338
|
+
resumeTimestamps.set(agentId, list);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
if (type === "daemon.tick.completed") {
|
|
342
|
+
const duration = Number(row.duration_ms ?? NaN);
|
|
343
|
+
if (Number.isFinite(duration)) daemonTickDurations.push(duration);
|
|
344
|
+
}
|
|
345
|
+
const tmuxSess = row.tmux_session == null ? "" : String(row.tmux_session);
|
|
346
|
+
if (tmuxSess && Number.isFinite(ts)) {
|
|
347
|
+
if (type === "session.born") sessionEvents.set(tmuxSess, { ...sessionEvents.get(tmuxSess) ?? {}, born: ts });
|
|
348
|
+
if ((type === "session.exit" || type === "session.idle_kill" || type === "session.ttl_kill" || type === "session.context_kill") && !sessionEvents.get(tmuxSess)?.exited) {
|
|
349
|
+
const entry = sessionEvents.get(tmuxSess) ?? {};
|
|
350
|
+
entry.exited = ts;
|
|
351
|
+
const hookDuration = Number(row.duration_ms ?? NaN);
|
|
352
|
+
if (Number.isFinite(hookDuration)) entry.durationMs = hookDuration;
|
|
353
|
+
sessionEvents.set(tmuxSess, entry);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
if (!taskId || !Number.isFinite(ts)) continue;
|
|
357
|
+
const bucket = taskEvents.get(taskId) ?? {};
|
|
358
|
+
if (type === "task.created" && bucket.created === void 0) bucket.created = ts;
|
|
359
|
+
if (type === "task.claimed" && bucket.claimed === void 0) bucket.claimed = ts;
|
|
360
|
+
if (type === "task.blocked" && bucket.blocked === void 0) bucket.blocked = ts;
|
|
361
|
+
if (type === "task.unblocked" && bucket.unblocked === void 0) bucket.unblocked = ts;
|
|
362
|
+
if (type === "task.done" && bucket.done === void 0) bucket.done = ts;
|
|
363
|
+
if (type === "review.ready" && bucket.reviewReady === void 0) {
|
|
364
|
+
bucket.reviewReady = ts;
|
|
365
|
+
if (row.reviewer != null) bucket.reviewer = String(row.reviewer);
|
|
366
|
+
}
|
|
367
|
+
if (type === "review.started" && bucket.reviewStarted === void 0) bucket.reviewStarted = ts;
|
|
368
|
+
if (type === "review.approved" && bucket.reviewApproved === void 0) bucket.reviewApproved = ts;
|
|
369
|
+
if (type === "review.rejected" && bucket.reviewRejected === void 0) bucket.reviewRejected = ts;
|
|
370
|
+
if (type === "signal.created" && bucket.signalCreated === void 0) bucket.signalCreated = ts;
|
|
371
|
+
if (type === "signal.consumed" && bucket.signalConsumed === void 0) bucket.signalConsumed = ts;
|
|
372
|
+
if (type === "dispatch.attempted" && bucket.dispatched === void 0) bucket.dispatched = ts;
|
|
373
|
+
taskEvents.set(taskId, bucket);
|
|
374
|
+
}
|
|
375
|
+
const createToClaim = [];
|
|
376
|
+
const doneToReview = [];
|
|
377
|
+
const reviewToApproved = [];
|
|
378
|
+
const reviewToStarted = [];
|
|
379
|
+
const signalToConsumed = [];
|
|
380
|
+
const dispatchToClaim = [];
|
|
381
|
+
const blockedDuration = [];
|
|
382
|
+
for (const eventSet of taskEvents.values()) {
|
|
383
|
+
if (eventSet.created !== void 0 && eventSet.claimed !== void 0 && eventSet.claimed >= eventSet.created) {
|
|
384
|
+
createToClaim.push(eventSet.claimed - eventSet.created);
|
|
385
|
+
}
|
|
386
|
+
if (eventSet.done !== void 0 && eventSet.reviewReady !== void 0 && eventSet.reviewReady >= eventSet.done) {
|
|
387
|
+
doneToReview.push(eventSet.reviewReady - eventSet.done);
|
|
388
|
+
}
|
|
389
|
+
if (eventSet.reviewReady !== void 0 && eventSet.reviewApproved !== void 0 && eventSet.reviewApproved >= eventSet.reviewReady) {
|
|
390
|
+
reviewToApproved.push(eventSet.reviewApproved - eventSet.reviewReady);
|
|
391
|
+
}
|
|
392
|
+
if (eventSet.reviewReady !== void 0 && eventSet.reviewStarted !== void 0 && eventSet.reviewStarted >= eventSet.reviewReady) {
|
|
393
|
+
reviewToStarted.push(eventSet.reviewStarted - eventSet.reviewReady);
|
|
394
|
+
}
|
|
395
|
+
if (eventSet.signalCreated !== void 0 && eventSet.signalConsumed !== void 0 && eventSet.signalConsumed >= eventSet.signalCreated) {
|
|
396
|
+
signalToConsumed.push(eventSet.signalConsumed - eventSet.signalCreated);
|
|
397
|
+
}
|
|
398
|
+
if (eventSet.dispatched !== void 0 && eventSet.claimed !== void 0 && eventSet.claimed >= eventSet.dispatched) {
|
|
399
|
+
dispatchToClaim.push(eventSet.claimed - eventSet.dispatched);
|
|
400
|
+
}
|
|
401
|
+
if (eventSet.blocked !== void 0 && eventSet.unblocked !== void 0 && eventSet.unblocked >= eventSet.blocked) {
|
|
402
|
+
blockedDuration.push(eventSet.unblocked - eventSet.blocked);
|
|
403
|
+
}
|
|
404
|
+
if (eventSet.dispatched !== void 0 && eventSet.claimed === void 0 && Date.now() - eventSet.dispatched >= 5 * 6e4) {
|
|
405
|
+
counts.dispatchUnclaimed++;
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
const STORM_WINDOW_MS = 30 * 6e4;
|
|
409
|
+
const STORM_THRESHOLD = 3;
|
|
410
|
+
const resumeStorms = [];
|
|
411
|
+
for (const [agentId, timestamps] of resumeTimestamps) {
|
|
412
|
+
if (timestamps.length < STORM_THRESHOLD) continue;
|
|
413
|
+
const sorted = [...timestamps].sort((a, b) => a - b);
|
|
414
|
+
let maxInWindow = 0;
|
|
415
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
416
|
+
let j = i;
|
|
417
|
+
while (j < sorted.length && sorted[j] - sorted[i] <= STORM_WINDOW_MS) j++;
|
|
418
|
+
maxInWindow = Math.max(maxInWindow, j - i);
|
|
419
|
+
}
|
|
420
|
+
if (maxInWindow >= STORM_THRESHOLD) {
|
|
421
|
+
resumeStorms.push({ agentId, count: maxInWindow, windowMinutes: 30 });
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
for (const row of rows) {
|
|
425
|
+
const type = String(row.event_type ?? "");
|
|
426
|
+
if (type !== "review.approved" && type !== "review.rejected") continue;
|
|
427
|
+
const reviewerStr = row.reviewer == null ? "" : String(row.reviewer);
|
|
428
|
+
const taskId = row.task_id == null ? "" : String(row.task_id);
|
|
429
|
+
if (!reviewerStr || !taskId) continue;
|
|
430
|
+
const bucket = taskEvents.get(taskId);
|
|
431
|
+
if (bucket?.reviewReady !== void 0) {
|
|
432
|
+
const ts = Date.parse(String(row.ts ?? ""));
|
|
433
|
+
if (Number.isFinite(ts) && ts >= bucket.reviewReady) {
|
|
434
|
+
const latencies = reviewLatenciesByReviewer.get(reviewerStr) ?? [];
|
|
435
|
+
latencies.push(ts - bucket.reviewReady);
|
|
436
|
+
reviewLatenciesByReviewer.set(reviewerStr, latencies);
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
const BOTTLENECK_P95_MS = 30 * 6e4;
|
|
441
|
+
const reviewerBottlenecks = [];
|
|
442
|
+
for (const [reviewer, latencies] of reviewLatenciesByReviewer) {
|
|
443
|
+
if (latencies.length < 2) continue;
|
|
444
|
+
const p95 = percentile([...latencies].sort((a, b) => a - b), 95);
|
|
445
|
+
if (p95 > BOTTLENECK_P95_MS) {
|
|
446
|
+
reviewerBottlenecks.push({ reviewer, pendingCount: latencies.length, p95Ms: p95 });
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
const BACKLOG_THRESHOLD_MS = 30 * 6e4;
|
|
450
|
+
const reviewerBacklogMap = /* @__PURE__ */ new Map();
|
|
451
|
+
const nowMs = Date.now();
|
|
452
|
+
for (const eventSet of taskEvents.values()) {
|
|
453
|
+
if (eventSet.reviewReady === void 0 || eventSet.reviewApproved !== void 0 || eventSet.reviewRejected !== void 0) continue;
|
|
454
|
+
const ageMs = nowMs - eventSet.reviewReady;
|
|
455
|
+
if (ageMs < BACKLOG_THRESHOLD_MS) continue;
|
|
456
|
+
const reviewer = eventSet.reviewer ?? "unknown";
|
|
457
|
+
const current = reviewerBacklogMap.get(reviewer) ?? { pendingCount: 0, oldestMs: 0 };
|
|
458
|
+
current.pendingCount++;
|
|
459
|
+
current.oldestMs = Math.max(current.oldestMs, ageMs);
|
|
460
|
+
reviewerBacklogMap.set(reviewer, current);
|
|
461
|
+
}
|
|
462
|
+
const reviewerBacklogs = [...reviewerBacklogMap.entries()].map(([reviewer, value]) => ({ reviewer, ...value })).sort((a, b) => b.oldestMs - a.oldestMs);
|
|
463
|
+
const sessionDurations = [];
|
|
464
|
+
for (const sess of sessionEvents.values()) {
|
|
465
|
+
if (sess.durationMs !== void 0) {
|
|
466
|
+
sessionDurations.push(sess.durationMs);
|
|
467
|
+
} else if (sess.born !== void 0 && sess.exited !== void 0 && sess.exited >= sess.born) {
|
|
468
|
+
sessionDurations.push(sess.exited - sess.born);
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
return {
|
|
472
|
+
sinceIso,
|
|
473
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
474
|
+
totalEvents: rows.length,
|
|
475
|
+
counts,
|
|
476
|
+
createToClaimMs: percentileMetric(createToClaim),
|
|
477
|
+
doneToReviewMs: percentileMetric(doneToReview),
|
|
478
|
+
reviewToApprovedMs: percentileMetric(reviewToApproved),
|
|
479
|
+
reviewToStartedMs: percentileMetric(reviewToStarted),
|
|
480
|
+
signalToConsumedMs: percentileMetric(signalToConsumed),
|
|
481
|
+
dispatchToClaimMs: percentileMetric(dispatchToClaim),
|
|
482
|
+
blockedDurationMs: percentileMetric(blockedDuration),
|
|
483
|
+
sessionDurationMs: percentileMetric(sessionDurations),
|
|
484
|
+
daemonTickMs: percentileMetric(daemonTickDurations),
|
|
485
|
+
resumeStorms,
|
|
486
|
+
reviewerBottlenecks,
|
|
487
|
+
reviewerBacklogs
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
function formatOrchestrationMetrics(metrics) {
|
|
491
|
+
const lines = [
|
|
492
|
+
"# Orchestration Metrics",
|
|
493
|
+
"",
|
|
494
|
+
`Generated: ${metrics.generatedAt}`,
|
|
495
|
+
`Since: ${metrics.sinceIso ?? "beginning"}`,
|
|
496
|
+
`Total events: ${metrics.totalEvents}`,
|
|
497
|
+
"",
|
|
498
|
+
"## Latency",
|
|
499
|
+
`- create\u2192claim: ${formatMetric(metrics.createToClaimMs)}`,
|
|
500
|
+
`- dispatch\u2192claim: ${formatMetric(metrics.dispatchToClaimMs)}`,
|
|
501
|
+
`- done\u2192review.ready: ${formatMetric(metrics.doneToReviewMs)}`,
|
|
502
|
+
`- review.ready\u2192started: ${formatMetric(metrics.reviewToStartedMs)}`,
|
|
503
|
+
`- review.ready\u2192approved: ${formatMetric(metrics.reviewToApprovedMs)}`,
|
|
504
|
+
`- signal.created\u2192consumed: ${formatMetric(metrics.signalToConsumedMs)}`,
|
|
505
|
+
`- task blocked duration: ${formatMetric(metrics.blockedDurationMs)}`,
|
|
506
|
+
`- session duration (born\u2192exit): ${formatMetric(metrics.sessionDurationMs)}`,
|
|
507
|
+
`- daemon tick: ${formatMetric(metrics.daemonTickMs)}`,
|
|
508
|
+
"",
|
|
509
|
+
"## Task Lifecycle",
|
|
510
|
+
`- dispatches: ${metrics.counts.dispatches}`,
|
|
511
|
+
`- checkpoints: ${metrics.counts.checkpoints}`,
|
|
512
|
+
`- reviews created: ${metrics.counts.reviewsCreated}`,
|
|
513
|
+
`- reviews started: ${metrics.counts.reviewStarted}`,
|
|
514
|
+
`- reviews approved: ${metrics.counts.reviewsApproved}`,
|
|
515
|
+
`- reviews rejected: ${metrics.counts.reviewsRejected}`,
|
|
516
|
+
`- tasks blocked: ${metrics.counts.taskBlocked}`,
|
|
517
|
+
`- tasks cancelled: ${metrics.counts.taskCancelled}`,
|
|
518
|
+
`- messages sent: ${metrics.counts.messagesSent}`,
|
|
519
|
+
`- messages delivered: ${metrics.counts.messagesDelivered}`,
|
|
520
|
+
`- messages read: ${metrics.counts.messagesRead}`,
|
|
521
|
+
"",
|
|
522
|
+
"## Sessions & Workers",
|
|
523
|
+
`- sessions born: ${metrics.counts.sessionBorns}`,
|
|
524
|
+
`- session exits: ${metrics.counts.sessionExits}`,
|
|
525
|
+
`- idle kills: ${metrics.counts.sessionIdleKills}`,
|
|
526
|
+
`- TTL kills: ${metrics.counts.sessionTtlKills}`,
|
|
527
|
+
`- context kills: ${metrics.counts.sessionContextKills}`,
|
|
528
|
+
`- session resumes: ${metrics.counts.sessionResumed}`,
|
|
529
|
+
`- worker instances selected: ${metrics.counts.workerInstancesSelected}`,
|
|
530
|
+
`- tmux spawn attempted: ${metrics.counts.tmuxSpawnAttempted}`,
|
|
531
|
+
`- tmux spawn completed: ${metrics.counts.tmuxSpawnCompleted}`,
|
|
532
|
+
`- worktrees created: ${metrics.counts.worktreeCreated}`,
|
|
533
|
+
`- worktrees reused: ${metrics.counts.worktreeReused}`,
|
|
534
|
+
`- worktrees failed: ${metrics.counts.worktreeFailed}`,
|
|
535
|
+
`- worktrees pruned: ${metrics.counts.worktreePruned}`,
|
|
536
|
+
`- worktrees skipped: ${metrics.counts.worktreeSkipped}`,
|
|
537
|
+
"",
|
|
538
|
+
"## Dispatch & Signals",
|
|
539
|
+
`- dispatch attempted: ${metrics.counts.dispatchAttempted}`,
|
|
540
|
+
`- dispatch failed: ${metrics.counts.dispatchFailed}`,
|
|
541
|
+
`- tmux nudges attempted: ${metrics.counts.tmuxNudgeAttempted}`,
|
|
542
|
+
`- tmux nudges completed: ${metrics.counts.tmuxNudgeCompleted}`,
|
|
543
|
+
`- signals created: ${metrics.counts.signalsCreated}`,
|
|
544
|
+
`- signals consumed: ${metrics.counts.signalsConsumed}`,
|
|
545
|
+
`- signals stale (cleaned): ${metrics.counts.signalsStale}`,
|
|
546
|
+
`- context pressure (>70%): ${metrics.counts.contextPressure}`,
|
|
547
|
+
"",
|
|
548
|
+
"## Errors & Anomalies",
|
|
549
|
+
`- claim collisions: ${metrics.counts.claimCollisions}`,
|
|
550
|
+
`- duplicate nudges: ${metrics.counts.duplicateNudges}`,
|
|
551
|
+
`- missed reviews: ${metrics.counts.missedReviews}`,
|
|
552
|
+
`- dispatch unclaimed: ${metrics.counts.dispatchUnclaimed}`,
|
|
553
|
+
`- DB/file consistency mismatches: ${metrics.counts.consistencyMismatches}`,
|
|
554
|
+
`- file rows resynced into DB: ${metrics.counts.fileRowsResynced}`,
|
|
555
|
+
`- file orphans cleaned: ${metrics.counts.fileOrphansCleaned}`,
|
|
556
|
+
`- registry missing tmux sessions: ${metrics.counts.registryMissingTmux}`,
|
|
557
|
+
`- registry missing entries: ${metrics.counts.registryMissingEntry}`,
|
|
558
|
+
`- cross-session blocks: ${metrics.counts.crossSessionBlocks}`,
|
|
559
|
+
`- tmux command timeouts: ${metrics.counts.tmuxCommandTimeouts}`,
|
|
560
|
+
`- MCP unreachable: ${metrics.counts.mcpUnreachable}`,
|
|
561
|
+
"",
|
|
562
|
+
"## Task Groups (Fan-out/Fan-in)",
|
|
563
|
+
`- groups created: ${metrics.counts.taskGroupsCreated}`,
|
|
564
|
+
`- barriers fired: ${metrics.counts.taskGroupBarriersFired}`,
|
|
565
|
+
`- group timeouts: ${metrics.counts.taskGroupTimeouts}`,
|
|
566
|
+
`- partial failures: ${metrics.counts.taskGroupPartialFailures}`,
|
|
567
|
+
"",
|
|
568
|
+
"## Daemon Health",
|
|
569
|
+
`- event loop blocks: ${metrics.counts.eventLoopBlocks}`,
|
|
570
|
+
`- shard circuit opens: ${metrics.counts.shardCircuitOpens}`,
|
|
571
|
+
"",
|
|
572
|
+
"## Derived Analysis",
|
|
573
|
+
`- RESUME storms: ${metrics.resumeStorms.length === 0 ? "none" : metrics.resumeStorms.map((s) => `${s.agentId} (${s.count}x in ${s.windowMinutes}min)`).join(", ")}`,
|
|
574
|
+
`- reviewer bottlenecks: ${metrics.reviewerBottlenecks.length === 0 ? "none" : metrics.reviewerBottlenecks.map((b) => `${b.reviewer} (p95=${formatMs(b.p95Ms)}, n=${b.pendingCount})`).join(", ")}`,
|
|
575
|
+
`- reviewer backlogs: ${metrics.reviewerBacklogs.length === 0 ? "none" : metrics.reviewerBacklogs.map((b) => `${b.reviewer} (${b.pendingCount} pending, oldest=${formatMs(b.oldestMs)})`).join(", ")}`
|
|
576
|
+
];
|
|
577
|
+
return lines.join("\n");
|
|
578
|
+
}
|
|
579
|
+
function sanitizePayload(payload) {
|
|
580
|
+
if (!payload) return null;
|
|
581
|
+
const safe = {};
|
|
582
|
+
for (const [key, value] of Object.entries(payload)) {
|
|
583
|
+
if (SENSITIVE_PAYLOAD_KEY.test(key)) continue;
|
|
584
|
+
if (value == null || typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
585
|
+
safe[key] = value;
|
|
586
|
+
} else if (Array.isArray(value)) {
|
|
587
|
+
safe[key] = value.filter((item) => ["string", "number", "boolean"].includes(typeof item)).slice(0, 20);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
return Object.keys(safe).length > 0 ? JSON.stringify(safe) : null;
|
|
591
|
+
}
|
|
592
|
+
function percentileMetric(values) {
|
|
593
|
+
if (values.length === 0) return { count: 0, p50: null, p95: null, p99: null };
|
|
594
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
595
|
+
return {
|
|
596
|
+
count: sorted.length,
|
|
597
|
+
p50: percentile(sorted, 50),
|
|
598
|
+
p95: percentile(sorted, 95),
|
|
599
|
+
p99: percentile(sorted, 99)
|
|
600
|
+
};
|
|
601
|
+
}
|
|
602
|
+
function percentile(sorted, percentileValue) {
|
|
603
|
+
if (sorted.length === 0) return 0;
|
|
604
|
+
const rank = Math.ceil(percentileValue / 100 * sorted.length) - 1;
|
|
605
|
+
return sorted[Math.min(sorted.length - 1, Math.max(0, rank))] ?? 0;
|
|
606
|
+
}
|
|
607
|
+
function eventWeight(row) {
|
|
608
|
+
const value = Number(row.attempt ?? 1);
|
|
609
|
+
if (!Number.isFinite(value) || value < 1) return 1;
|
|
610
|
+
return Math.floor(value);
|
|
611
|
+
}
|
|
612
|
+
function formatMetric(metric) {
|
|
613
|
+
if (metric.count === 0) return "n=0";
|
|
614
|
+
return `n=${metric.count}, p50=${formatMs(metric.p50)}, p95=${formatMs(metric.p95)}, p99=${formatMs(metric.p99)}`;
|
|
615
|
+
}
|
|
616
|
+
function formatMs(value) {
|
|
617
|
+
if (value == null) return "n/a";
|
|
618
|
+
if (value < 1e3) return `${value}ms`;
|
|
619
|
+
return `${Math.round(value / 1e3 * 10) / 10}s`;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
export {
|
|
623
|
+
ORCHESTRATION_COUNTER_KEYS,
|
|
624
|
+
ORCHESTRATION_LATENCY_METRIC_KEYS,
|
|
625
|
+
ensureOrchestrationEventsSchema,
|
|
626
|
+
recordOrchestrationEvent,
|
|
627
|
+
recordOrchestrationEventBestEffort,
|
|
628
|
+
getOrchestrationMetrics,
|
|
629
|
+
formatOrchestrationMetrics
|
|
630
|
+
};
|