@virtengine/openfleet 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +914 -0
- package/LICENSE +190 -0
- package/README.md +500 -0
- package/agent-endpoint.mjs +918 -0
- package/agent-hook-bridge.mjs +230 -0
- package/agent-hooks.mjs +1188 -0
- package/agent-pool.mjs +2403 -0
- package/agent-prompts.mjs +689 -0
- package/agent-sdk.mjs +141 -0
- package/anomaly-detector.mjs +1195 -0
- package/autofix.mjs +1294 -0
- package/claude-shell.mjs +708 -0
- package/cli.mjs +906 -0
- package/codex-config.mjs +1274 -0
- package/codex-model-profiles.mjs +135 -0
- package/codex-shell.mjs +762 -0
- package/config-doctor.mjs +613 -0
- package/config.mjs +1720 -0
- package/conflict-resolver.mjs +248 -0
- package/container-runner.mjs +450 -0
- package/copilot-shell.mjs +827 -0
- package/daemon-restart-policy.mjs +56 -0
- package/diff-stats.mjs +282 -0
- package/error-detector.mjs +829 -0
- package/fetch-runtime.mjs +34 -0
- package/fleet-coordinator.mjs +838 -0
- package/get-telegram-chat-id.mjs +71 -0
- package/git-safety.mjs +170 -0
- package/github-reconciler.mjs +403 -0
- package/hook-profiles.mjs +651 -0
- package/kanban-adapter.mjs +4491 -0
- package/lib/logger.mjs +645 -0
- package/maintenance.mjs +828 -0
- package/merge-strategy.mjs +1171 -0
- package/monitor.mjs +12207 -0
- package/openfleet.config.example.json +115 -0
- package/openfleet.schema.json +465 -0
- package/package.json +203 -0
- package/postinstall.mjs +187 -0
- package/pr-cleanup-daemon.mjs +978 -0
- package/preflight.mjs +408 -0
- package/prepublish-check.mjs +90 -0
- package/presence.mjs +328 -0
- package/primary-agent.mjs +282 -0
- package/publish.mjs +151 -0
- package/repo-root.mjs +29 -0
- package/restart-controller.mjs +100 -0
- package/review-agent.mjs +557 -0
- package/rotate-agent-logs.sh +133 -0
- package/sdk-conflict-resolver.mjs +973 -0
- package/session-tracker.mjs +880 -0
- package/setup.mjs +3937 -0
- package/shared-knowledge.mjs +410 -0
- package/shared-state-manager.mjs +841 -0
- package/shared-workspace-cli.mjs +199 -0
- package/shared-workspace-registry.mjs +537 -0
- package/shared-workspaces.json +18 -0
- package/startup-service.mjs +1070 -0
- package/sync-engine.mjs +1063 -0
- package/task-archiver.mjs +801 -0
- package/task-assessment.mjs +550 -0
- package/task-claims.mjs +924 -0
- package/task-complexity.mjs +581 -0
- package/task-executor.mjs +5111 -0
- package/task-store.mjs +753 -0
- package/telegram-bot.mjs +9281 -0
- package/telegram-sentinel.mjs +2010 -0
- package/ui/app.js +867 -0
- package/ui/app.legacy.js +1464 -0
- package/ui/app.monolith.js +2488 -0
- package/ui/components/charts.js +226 -0
- package/ui/components/chat-view.js +567 -0
- package/ui/components/command-palette.js +587 -0
- package/ui/components/diff-viewer.js +190 -0
- package/ui/components/forms.js +327 -0
- package/ui/components/kanban-board.js +451 -0
- package/ui/components/session-list.js +305 -0
- package/ui/components/shared.js +473 -0
- package/ui/index.html +70 -0
- package/ui/modules/api.js +297 -0
- package/ui/modules/icons.js +461 -0
- package/ui/modules/router.js +81 -0
- package/ui/modules/settings-schema.js +261 -0
- package/ui/modules/state.js +679 -0
- package/ui/modules/telegram.js +331 -0
- package/ui/modules/utils.js +270 -0
- package/ui/styles/animations.css +140 -0
- package/ui/styles/base.css +98 -0
- package/ui/styles/components.css +1915 -0
- package/ui/styles/kanban.css +286 -0
- package/ui/styles/layout.css +809 -0
- package/ui/styles/sessions.css +827 -0
- package/ui/styles/variables.css +188 -0
- package/ui/styles.css +141 -0
- package/ui/styles.monolith.css +1046 -0
- package/ui/tabs/agents.js +1417 -0
- package/ui/tabs/chat.js +74 -0
- package/ui/tabs/control.js +887 -0
- package/ui/tabs/dashboard.js +515 -0
- package/ui/tabs/infra.js +537 -0
- package/ui/tabs/logs.js +783 -0
- package/ui/tabs/settings.js +1487 -0
- package/ui/tabs/tasks.js +1385 -0
- package/ui-server.mjs +4073 -0
- package/update-check.mjs +465 -0
- package/utils.mjs +172 -0
- package/ve-kanban.mjs +654 -0
- package/ve-kanban.ps1 +1365 -0
- package/ve-kanban.sh +18 -0
- package/ve-orchestrator.mjs +340 -0
- package/ve-orchestrator.ps1 +6546 -0
- package/ve-orchestrator.sh +18 -0
- package/vibe-kanban-wrapper.mjs +41 -0
- package/vk-error-resolver.mjs +470 -0
- package/vk-log-stream.mjs +914 -0
- package/whatsapp-channel.mjs +520 -0
- package/workspace-monitor.mjs +581 -0
- package/workspace-reaper.mjs +405 -0
- package/workspace-registry.mjs +238 -0
- package/worktree-manager.mjs +1266 -0
|
@@ -0,0 +1,841 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Shared State Manager for Codex Monitor Task Coordination
|
|
3
|
+
*
|
|
4
|
+
* Manages distributed task execution state across multiple agents and workstations.
|
|
5
|
+
* Provides atomic operations for claiming, updating, and releasing task ownership
|
|
6
|
+
* with heartbeat-based liveness detection and conflict resolution.
|
|
7
|
+
*
|
|
8
|
+
* Designed for eventual consistency on distributed filesystems.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { readFile, writeFile, mkdir, rename, unlink } from "node:fs/promises";
|
|
12
|
+
import { existsSync } from "node:fs";
|
|
13
|
+
import { join, dirname } from "node:path";
|
|
14
|
+
import { randomUUID } from "node:crypto";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* @typedef {Object} EventLogEntry
|
|
18
|
+
* @property {string} timestamp - ISO 8601 timestamp
|
|
19
|
+
* @property {string} event - Event type (claimed/renewed/released/abandoned/failed/conflict)
|
|
20
|
+
* @property {string} ownerId - Owner identifier at time of event
|
|
21
|
+
* @property {string} [details] - Optional event details
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* @typedef {Object} TaskSharedState
|
|
26
|
+
* @property {string} taskId - Unique task identifier
|
|
27
|
+
* @property {string} ownerId - Current owner (workstation ID + agent ID)
|
|
28
|
+
* @property {string} ownerHeartbeat - ISO 8601 timestamp of last heartbeat
|
|
29
|
+
* @property {string} attemptToken - UUID for this attempt
|
|
30
|
+
* @property {string} attemptStarted - ISO 8601 timestamp when attempt began
|
|
31
|
+
* @property {string} attemptStatus - claimed/working/failed/abandoned/complete
|
|
32
|
+
* @property {number} retryCount - Number of previous attempts
|
|
33
|
+
* @property {string} [lastError] - Error message from last failure
|
|
34
|
+
* @property {string} [ignoreReason] - Reason task should be ignored by agents
|
|
35
|
+
* @property {EventLogEntry[]} eventLog - Chronological event history
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* @typedef {Object} SharedStateRegistry
|
|
40
|
+
* @property {string} version - Schema version
|
|
41
|
+
* @property {string} lastUpdated - ISO 8601 timestamp
|
|
42
|
+
* @property {Object.<string, TaskSharedState>} tasks - Map of taskId to state
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
const REGISTRY_VERSION = "1.0.0";
|
|
46
|
+
const DEFAULT_TTL_SECONDS = 300; // 5 minutes
|
|
47
|
+
const MAX_EVENT_LOG_ENTRIES = 100;
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Get the path to the shared state registry file
|
|
51
|
+
* @param {string} [repoRoot] - Repository root path
|
|
52
|
+
* @returns {string} Registry file path
|
|
53
|
+
*/
|
|
54
|
+
function getRegistryPath(repoRoot = process.cwd()) {
|
|
55
|
+
return join(repoRoot, ".cache", "openfleet", "shared-task-states.json");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Ensure registry directory exists
|
|
60
|
+
* @param {string} registryPath - Registry file path
|
|
61
|
+
* @returns {Promise<void>}
|
|
62
|
+
*/
|
|
63
|
+
async function ensureRegistryDir(registryPath) {
|
|
64
|
+
const dir = dirname(registryPath);
|
|
65
|
+
if (!existsSync(dir)) {
|
|
66
|
+
await mkdir(dir, { recursive: true });
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Load registry from disk with corruption recovery
|
|
72
|
+
* @param {string} registryPath - Registry file path
|
|
73
|
+
* @returns {Promise<SharedStateRegistry>}
|
|
74
|
+
*/
|
|
75
|
+
async function loadRegistry(registryPath) {
|
|
76
|
+
try {
|
|
77
|
+
if (!existsSync(registryPath)) {
|
|
78
|
+
return createEmptyRegistry();
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const content = await readFile(registryPath, "utf-8");
|
|
82
|
+
const registry = JSON.parse(content);
|
|
83
|
+
|
|
84
|
+
// Validate structure
|
|
85
|
+
if (
|
|
86
|
+
!registry.version ||
|
|
87
|
+
!registry.tasks ||
|
|
88
|
+
typeof registry.tasks !== "object"
|
|
89
|
+
) {
|
|
90
|
+
console.warn(
|
|
91
|
+
"[SharedStateManager] Invalid registry structure, resetting",
|
|
92
|
+
);
|
|
93
|
+
return createEmptyRegistry();
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return registry;
|
|
97
|
+
} catch (error) {
|
|
98
|
+
console.error(
|
|
99
|
+
"[SharedStateManager] Registry corruption detected:",
|
|
100
|
+
error.message,
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
// Attempt to backup corrupted file
|
|
104
|
+
try {
|
|
105
|
+
const backupPath = `${registryPath}.corrupt-${Date.now()}.bak`;
|
|
106
|
+
await rename(registryPath, backupPath);
|
|
107
|
+
console.log(
|
|
108
|
+
`[SharedStateManager] Corrupted registry backed up to: ${backupPath}`,
|
|
109
|
+
);
|
|
110
|
+
} catch (backupError) {
|
|
111
|
+
console.error(
|
|
112
|
+
"[SharedStateManager] Failed to backup corrupted registry:",
|
|
113
|
+
backupError.message,
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return createEmptyRegistry();
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Create empty registry structure
|
|
123
|
+
* @returns {SharedStateRegistry}
|
|
124
|
+
*/
|
|
125
|
+
function createEmptyRegistry() {
|
|
126
|
+
return {
|
|
127
|
+
version: REGISTRY_VERSION,
|
|
128
|
+
lastUpdated: new Date().toISOString(),
|
|
129
|
+
tasks: {},
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Save registry to disk with atomic write
|
|
135
|
+
* @param {string} registryPath - Registry file path
|
|
136
|
+
* @param {SharedStateRegistry} registry - Registry data
|
|
137
|
+
* @returns {Promise<void>}
|
|
138
|
+
*/
|
|
139
|
+
async function saveRegistry(registryPath, registry) {
|
|
140
|
+
await ensureRegistryDir(registryPath);
|
|
141
|
+
|
|
142
|
+
registry.lastUpdated = new Date().toISOString();
|
|
143
|
+
|
|
144
|
+
const tempPath = `${registryPath}.tmp-${randomUUID()}`;
|
|
145
|
+
|
|
146
|
+
try {
|
|
147
|
+
await writeFile(tempPath, JSON.stringify(registry, null, 2), "utf-8");
|
|
148
|
+
|
|
149
|
+
// Atomic rename
|
|
150
|
+
await rename(tempPath, registryPath);
|
|
151
|
+
} catch (error) {
|
|
152
|
+
// Clean up temp file on failure
|
|
153
|
+
try {
|
|
154
|
+
if (existsSync(tempPath)) {
|
|
155
|
+
await unlink(tempPath);
|
|
156
|
+
}
|
|
157
|
+
} catch (cleanupError) {
|
|
158
|
+
// Ignore cleanup errors
|
|
159
|
+
}
|
|
160
|
+
throw error;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Add event to task's event log
|
|
166
|
+
* @param {TaskSharedState} state - Task state
|
|
167
|
+
* @param {string} event - Event type
|
|
168
|
+
* @param {string} ownerId - Owner ID
|
|
169
|
+
* @param {string} [details] - Optional details
|
|
170
|
+
*/
|
|
171
|
+
function logEvent(state, event, ownerId, details) {
|
|
172
|
+
if (!state.eventLog) {
|
|
173
|
+
state.eventLog = [];
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
state.eventLog.push({
|
|
177
|
+
timestamp: new Date().toISOString(),
|
|
178
|
+
event,
|
|
179
|
+
ownerId,
|
|
180
|
+
...(details && { details }),
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
// Keep log bounded
|
|
184
|
+
if (state.eventLog.length > MAX_EVENT_LOG_ENTRIES) {
|
|
185
|
+
state.eventLog = state.eventLog.slice(-MAX_EVENT_LOG_ENTRIES);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Check if a heartbeat is stale
|
|
191
|
+
* @param {string} heartbeat - ISO timestamp
|
|
192
|
+
* @param {number} staleThresholdMs - Threshold in milliseconds
|
|
193
|
+
* @returns {boolean}
|
|
194
|
+
*/
|
|
195
|
+
function isHeartbeatStale(heartbeat, staleThresholdMs) {
|
|
196
|
+
const heartbeatTime = new Date(heartbeat).getTime();
|
|
197
|
+
const now = Date.now();
|
|
198
|
+
return now - heartbeatTime > staleThresholdMs;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Resolve conflict between two claims
|
|
203
|
+
* @param {TaskSharedState} existing - Existing state
|
|
204
|
+
* @param {string} newOwnerId - New claimant
|
|
205
|
+
* @param {number} staleThresholdMs - Heartbeat staleness threshold
|
|
206
|
+
* @returns {{winner: string, reason: string}} - Resolution decision
|
|
207
|
+
*/
|
|
208
|
+
function resolveConflict(existing, newOwnerId, staleThresholdMs) {
|
|
209
|
+
const existingStale = isHeartbeatStale(
|
|
210
|
+
existing.ownerHeartbeat,
|
|
211
|
+
staleThresholdMs,
|
|
212
|
+
);
|
|
213
|
+
|
|
214
|
+
if (existingStale) {
|
|
215
|
+
return {
|
|
216
|
+
winner: newOwnerId,
|
|
217
|
+
reason: "existing_owner_stale",
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Both active - prefer existing owner (first-come-first-served)
|
|
222
|
+
return {
|
|
223
|
+
winner: existing.ownerId,
|
|
224
|
+
reason: "existing_owner_active",
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Claim a task in shared state with heartbeat-based leasing
|
|
230
|
+
*
|
|
231
|
+
* @param {string} taskId - Task identifier
|
|
232
|
+
* @param {string} ownerId - Owner identifier (workstationId + agentId)
|
|
233
|
+
* @param {string} attemptToken - Unique attempt token
|
|
234
|
+
* @param {number} [ttlSeconds=300] - Heartbeat TTL in seconds
|
|
235
|
+
* @param {string} [repoRoot] - Repository root path
|
|
236
|
+
* @returns {Promise<{success: boolean, reason?: string, state?: TaskSharedState}>}
|
|
237
|
+
*/
|
|
238
|
+
export async function claimTaskInSharedState(
|
|
239
|
+
taskId,
|
|
240
|
+
ownerId,
|
|
241
|
+
attemptToken,
|
|
242
|
+
ttlSeconds = DEFAULT_TTL_SECONDS,
|
|
243
|
+
repoRoot = process.cwd(),
|
|
244
|
+
) {
|
|
245
|
+
const registryPath = getRegistryPath(repoRoot);
|
|
246
|
+
const staleThresholdMs = ttlSeconds * 1000;
|
|
247
|
+
|
|
248
|
+
try {
|
|
249
|
+
const registry = await loadRegistry(registryPath);
|
|
250
|
+
const existing = registry.tasks[taskId];
|
|
251
|
+
const now = new Date().toISOString();
|
|
252
|
+
|
|
253
|
+
// Task has ignore flag - cannot claim
|
|
254
|
+
if (existing?.ignoreReason) {
|
|
255
|
+
return {
|
|
256
|
+
success: false,
|
|
257
|
+
reason: `task_ignored: ${existing.ignoreReason}`,
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// No existing claim or terminal status - claim it
|
|
262
|
+
if (
|
|
263
|
+
!existing ||
|
|
264
|
+
existing.attemptStatus === "complete" ||
|
|
265
|
+
existing.attemptStatus === "failed" ||
|
|
266
|
+
existing.attemptStatus === "abandoned" ||
|
|
267
|
+
existing.attemptStatus === "ignored"
|
|
268
|
+
) {
|
|
269
|
+
const newState = {
|
|
270
|
+
taskId,
|
|
271
|
+
ownerId,
|
|
272
|
+
ownerHeartbeat: now,
|
|
273
|
+
attemptToken,
|
|
274
|
+
attemptStarted: now,
|
|
275
|
+
attemptStatus: "claimed",
|
|
276
|
+
retryCount: existing ? existing.retryCount + 1 : 0,
|
|
277
|
+
ttlSeconds,
|
|
278
|
+
eventLog: existing?.eventLog || [],
|
|
279
|
+
};
|
|
280
|
+
|
|
281
|
+
if (existing?.lastError) {
|
|
282
|
+
newState.lastError = existing.lastError;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
logEvent(newState, "claimed", ownerId);
|
|
286
|
+
registry.tasks[taskId] = newState;
|
|
287
|
+
await saveRegistry(registryPath, registry);
|
|
288
|
+
|
|
289
|
+
return { success: true, state: newState };
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Existing claim - check for conflict
|
|
293
|
+
if (existing.ownerId !== ownerId) {
|
|
294
|
+
const existingStaleMs = (existing.ttlSeconds || ttlSeconds) * 1000;
|
|
295
|
+
const resolution = resolveConflict(existing, ownerId, existingStaleMs);
|
|
296
|
+
|
|
297
|
+
if (resolution.winner === ownerId) {
|
|
298
|
+
// Take over stale claim
|
|
299
|
+
const newState = {
|
|
300
|
+
...existing,
|
|
301
|
+
ownerId,
|
|
302
|
+
ownerHeartbeat: now,
|
|
303
|
+
attemptToken,
|
|
304
|
+
attemptStarted: now,
|
|
305
|
+
attemptStatus: "claimed",
|
|
306
|
+
retryCount: existing.retryCount + 1,
|
|
307
|
+
ttlSeconds,
|
|
308
|
+
};
|
|
309
|
+
|
|
310
|
+
logEvent(
|
|
311
|
+
newState,
|
|
312
|
+
"conflict",
|
|
313
|
+
ownerId,
|
|
314
|
+
`takeover: ${resolution.reason}`,
|
|
315
|
+
);
|
|
316
|
+
registry.tasks[taskId] = newState;
|
|
317
|
+
await saveRegistry(registryPath, registry);
|
|
318
|
+
|
|
319
|
+
return { success: true, state: newState };
|
|
320
|
+
} else {
|
|
321
|
+
// Existing owner wins
|
|
322
|
+
logEvent(
|
|
323
|
+
existing,
|
|
324
|
+
"conflict",
|
|
325
|
+
ownerId,
|
|
326
|
+
`rejected: ${resolution.reason}`,
|
|
327
|
+
);
|
|
328
|
+
registry.tasks[taskId] = existing;
|
|
329
|
+
await saveRegistry(registryPath, registry);
|
|
330
|
+
|
|
331
|
+
return {
|
|
332
|
+
success: false,
|
|
333
|
+
reason: `conflict: ${resolution.reason}`,
|
|
334
|
+
state: existing,
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Same owner reclaiming - update heartbeat
|
|
340
|
+
existing.ownerHeartbeat = now;
|
|
341
|
+
existing.attemptToken = attemptToken;
|
|
342
|
+
existing.ttlSeconds = ttlSeconds;
|
|
343
|
+
logEvent(existing, "reclaimed", ownerId);
|
|
344
|
+
registry.tasks[taskId] = existing;
|
|
345
|
+
await saveRegistry(registryPath, registry);
|
|
346
|
+
|
|
347
|
+
return { success: true, state: existing };
|
|
348
|
+
} catch (error) {
|
|
349
|
+
console.error("[SharedStateManager] Failed to claim task:", error);
|
|
350
|
+
return {
|
|
351
|
+
success: false,
|
|
352
|
+
reason: `error: ${error.message}`,
|
|
353
|
+
};
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Renew heartbeat for an active task claim
|
|
359
|
+
*
|
|
360
|
+
* @param {string} taskId - Task identifier
|
|
361
|
+
* @param {string} ownerId - Owner identifier
|
|
362
|
+
* @param {string} attemptToken - Attempt token for verification
|
|
363
|
+
* @param {string} [repoRoot] - Repository root path
|
|
364
|
+
* @returns {Promise<{success: boolean, reason?: string}>}
|
|
365
|
+
*/
|
|
366
|
+
export async function renewSharedStateHeartbeat(
|
|
367
|
+
taskId,
|
|
368
|
+
ownerId,
|
|
369
|
+
attemptToken,
|
|
370
|
+
repoRoot = process.cwd(),
|
|
371
|
+
) {
|
|
372
|
+
const registryPath = getRegistryPath(repoRoot);
|
|
373
|
+
|
|
374
|
+
try {
|
|
375
|
+
const registry = await loadRegistry(registryPath);
|
|
376
|
+
const state = registry.tasks[taskId];
|
|
377
|
+
|
|
378
|
+
if (!state) {
|
|
379
|
+
return {
|
|
380
|
+
success: false,
|
|
381
|
+
reason: "task_not_found",
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
if (state.ownerId !== ownerId) {
|
|
386
|
+
return {
|
|
387
|
+
success: false,
|
|
388
|
+
reason: "owner_mismatch",
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
if (state.attemptToken !== attemptToken) {
|
|
393
|
+
return {
|
|
394
|
+
success: false,
|
|
395
|
+
reason: "attempt_token_mismatch",
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
if (
|
|
400
|
+
state.attemptStatus === "complete" ||
|
|
401
|
+
state.attemptStatus === "failed"
|
|
402
|
+
) {
|
|
403
|
+
return {
|
|
404
|
+
success: false,
|
|
405
|
+
reason: `task_already_${state.attemptStatus}`,
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
state.ownerHeartbeat = new Date().toISOString();
|
|
410
|
+
state.attemptStatus = "working";
|
|
411
|
+
logEvent(state, "renewed", ownerId);
|
|
412
|
+
|
|
413
|
+
await saveRegistry(registryPath, registry);
|
|
414
|
+
|
|
415
|
+
return { success: true };
|
|
416
|
+
} catch (error) {
|
|
417
|
+
console.error("[SharedStateManager] Failed to renew heartbeat:", error);
|
|
418
|
+
return {
|
|
419
|
+
success: false,
|
|
420
|
+
reason: `error: ${error.message}`,
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Release task claim by marking it complete, failed, or abandoned
|
|
427
|
+
*
|
|
428
|
+
* @param {string} taskId - Task identifier
|
|
429
|
+
* @param {string} attemptToken - Attempt token for verification
|
|
430
|
+
* @param {'complete'|'failed'|'abandoned'} status - Final status
|
|
431
|
+
* @param {string} [errorMessage] - Error message if failed
|
|
432
|
+
* @param {string} [repoRoot] - Repository root path
|
|
433
|
+
* @returns {Promise<{success: boolean, reason?: string}>}
|
|
434
|
+
*/
|
|
435
|
+
export async function releaseSharedState(
|
|
436
|
+
taskId,
|
|
437
|
+
attemptToken,
|
|
438
|
+
status,
|
|
439
|
+
errorMessage,
|
|
440
|
+
repoRoot = process.cwd(),
|
|
441
|
+
) {
|
|
442
|
+
const registryPath = getRegistryPath(repoRoot);
|
|
443
|
+
|
|
444
|
+
try {
|
|
445
|
+
const registry = await loadRegistry(registryPath);
|
|
446
|
+
const state = registry.tasks[taskId];
|
|
447
|
+
|
|
448
|
+
if (!state) {
|
|
449
|
+
return {
|
|
450
|
+
success: false,
|
|
451
|
+
reason: "task_not_found",
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
if (state.attemptToken !== attemptToken) {
|
|
456
|
+
return {
|
|
457
|
+
success: false,
|
|
458
|
+
reason: "attempt_token_mismatch",
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const now = new Date().toISOString();
|
|
463
|
+
state.attemptStatus = status;
|
|
464
|
+
state.ownerHeartbeat = now;
|
|
465
|
+
|
|
466
|
+
if (errorMessage) {
|
|
467
|
+
state.lastError = errorMessage;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
logEvent(state, "released", state.ownerId, `status: ${status}`);
|
|
471
|
+
|
|
472
|
+
await saveRegistry(registryPath, registry);
|
|
473
|
+
|
|
474
|
+
return { success: true };
|
|
475
|
+
} catch (error) {
|
|
476
|
+
console.error("[SharedStateManager] Failed to release state:", error);
|
|
477
|
+
return {
|
|
478
|
+
success: false,
|
|
479
|
+
reason: `error: ${error.message}`,
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* Get current shared state for a task
|
|
486
|
+
*
|
|
487
|
+
* @param {string} taskId - Task identifier
|
|
488
|
+
* @param {string} [repoRoot] - Repository root path
|
|
489
|
+
* @returns {Promise<TaskSharedState|null>}
|
|
490
|
+
*/
|
|
491
|
+
export async function getSharedState(taskId, repoRoot = process.cwd()) {
|
|
492
|
+
const registryPath = getRegistryPath(repoRoot);
|
|
493
|
+
|
|
494
|
+
try {
|
|
495
|
+
const registry = await loadRegistry(registryPath);
|
|
496
|
+
return registry.tasks[taskId] || null;
|
|
497
|
+
} catch (error) {
|
|
498
|
+
console.error("[SharedStateManager] Failed to get shared state:", error);
|
|
499
|
+
return null;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
/**
|
|
504
|
+
* Get all shared states (for monitoring/debugging)
|
|
505
|
+
*
|
|
506
|
+
* @param {string} [repoRoot] - Repository root path
|
|
507
|
+
* @returns {Promise<Object.<string, TaskSharedState>>}
|
|
508
|
+
*/
|
|
509
|
+
export async function getAllSharedStates(repoRoot = process.cwd()) {
|
|
510
|
+
const registryPath = getRegistryPath(repoRoot);
|
|
511
|
+
|
|
512
|
+
try {
|
|
513
|
+
const registry = await loadRegistry(registryPath);
|
|
514
|
+
return registry.tasks || {};
|
|
515
|
+
} catch (error) {
|
|
516
|
+
console.error(
|
|
517
|
+
"[SharedStateManager] Failed to get all shared states:",
|
|
518
|
+
error,
|
|
519
|
+
);
|
|
520
|
+
return {};
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Sweep through tasks and mark stale owners as abandoned
|
|
526
|
+
*
|
|
527
|
+
* @param {number} staleThresholdMs - Heartbeat staleness threshold in milliseconds
|
|
528
|
+
* @param {string} [repoRoot] - Repository root path
|
|
529
|
+
* @returns {Promise<{sweptCount: number, abandonedTasks: string[]}>}
|
|
530
|
+
*/
|
|
531
|
+
export async function sweepStaleSharedStates(
|
|
532
|
+
staleThresholdMs,
|
|
533
|
+
repoRoot = process.cwd(),
|
|
534
|
+
) {
|
|
535
|
+
const registryPath = getRegistryPath(repoRoot);
|
|
536
|
+
|
|
537
|
+
try {
|
|
538
|
+
const registry = await loadRegistry(registryPath);
|
|
539
|
+
const abandonedTasks = [];
|
|
540
|
+
let sweptCount = 0;
|
|
541
|
+
|
|
542
|
+
for (const [taskId, state] of Object.entries(registry.tasks)) {
|
|
543
|
+
// Skip already completed/failed tasks
|
|
544
|
+
if (
|
|
545
|
+
state.attemptStatus === "complete" ||
|
|
546
|
+
state.attemptStatus === "failed"
|
|
547
|
+
) {
|
|
548
|
+
continue;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
// Skip tasks with ignore flag
|
|
552
|
+
if (state.ignoreReason) {
|
|
553
|
+
continue;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
if (isHeartbeatStale(state.ownerHeartbeat, staleThresholdMs)) {
|
|
557
|
+
state.attemptStatus = "abandoned";
|
|
558
|
+
state.lastError = `Heartbeat stale (last: ${state.ownerHeartbeat})`;
|
|
559
|
+
logEvent(state, "abandoned", "system", "stale_heartbeat");
|
|
560
|
+
|
|
561
|
+
abandonedTasks.push(taskId);
|
|
562
|
+
sweptCount++;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
if (sweptCount > 0) {
|
|
567
|
+
await saveRegistry(registryPath, registry);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
return { sweptCount, abandonedTasks };
|
|
571
|
+
} catch (error) {
|
|
572
|
+
console.error("[SharedStateManager] Failed to sweep stale states:", error);
|
|
573
|
+
return { sweptCount: 0, abandonedTasks: [] };
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
/**
|
|
578
|
+
* Check if a task should be retried or permanently ignored
|
|
579
|
+
*
|
|
580
|
+
* @param {string} taskId - Task identifier
|
|
581
|
+
* @param {number} maxRetries - Maximum retry attempts
|
|
582
|
+
* @param {string} [repoRoot] - Repository root path
|
|
583
|
+
* @returns {Promise<{shouldRetry: boolean, reason: string}>}
|
|
584
|
+
*/
|
|
585
|
+
export async function shouldRetryTask(
|
|
586
|
+
taskId,
|
|
587
|
+
maxRetries,
|
|
588
|
+
repoRoot = process.cwd(),
|
|
589
|
+
) {
|
|
590
|
+
try {
|
|
591
|
+
const state = await getSharedState(taskId, repoRoot);
|
|
592
|
+
|
|
593
|
+
if (!state) {
|
|
594
|
+
return { shouldRetry: true, reason: "no_previous_attempts" };
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
if (state.ignoreReason) {
|
|
598
|
+
return { shouldRetry: false, reason: `ignored: ${state.ignoreReason}` };
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
if (state.attemptStatus === "complete") {
|
|
602
|
+
return { shouldRetry: false, reason: "already_complete" };
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
if (state.retryCount >= maxRetries) {
|
|
606
|
+
return {
|
|
607
|
+
shouldRetry: false,
|
|
608
|
+
reason: `max_retries_exceeded: ${state.retryCount}/${maxRetries}`,
|
|
609
|
+
};
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
// Check if currently claimed by active owner
|
|
613
|
+
if (
|
|
614
|
+
state.attemptStatus === "claimed" ||
|
|
615
|
+
state.attemptStatus === "working"
|
|
616
|
+
) {
|
|
617
|
+
const staleThresholdMs = (state.ttlSeconds || DEFAULT_TTL_SECONDS) * 1000;
|
|
618
|
+
if (!isHeartbeatStale(state.ownerHeartbeat, staleThresholdMs)) {
|
|
619
|
+
return {
|
|
620
|
+
shouldRetry: false,
|
|
621
|
+
reason: "currently_owned_by_active_agent",
|
|
622
|
+
};
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
return { shouldRetry: true, reason: "eligible_for_retry" };
|
|
627
|
+
} catch (error) {
|
|
628
|
+
console.error(
|
|
629
|
+
"[SharedStateManager] Failed to check retry eligibility:",
|
|
630
|
+
error,
|
|
631
|
+
);
|
|
632
|
+
return { shouldRetry: true, reason: "error_checking_state" };
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
/**
|
|
637
|
+
* Mark a task as permanently ignored by agents
|
|
638
|
+
*
|
|
639
|
+
* @param {string} taskId - Task identifier
|
|
640
|
+
* @param {string} reason - Reason for ignoring (e.g., "human_created", "invalid_spec")
|
|
641
|
+
* @param {string} [repoRoot] - Repository root path
|
|
642
|
+
* @returns {Promise<{success: boolean, reason?: string}>}
|
|
643
|
+
*/
|
|
644
|
+
export async function setIgnoreFlag(taskId, reason, repoRoot = process.cwd()) {
|
|
645
|
+
const registryPath = getRegistryPath(repoRoot);
|
|
646
|
+
|
|
647
|
+
try {
|
|
648
|
+
const registry = await loadRegistry(registryPath);
|
|
649
|
+
let state = registry.tasks[taskId];
|
|
650
|
+
|
|
651
|
+
if (!state) {
|
|
652
|
+
// Create new state entry for ignored task
|
|
653
|
+
state = {
|
|
654
|
+
taskId,
|
|
655
|
+
ownerId: "system",
|
|
656
|
+
ownerHeartbeat: new Date().toISOString(),
|
|
657
|
+
attemptToken: "N/A",
|
|
658
|
+
attemptStarted: new Date().toISOString(),
|
|
659
|
+
attemptStatus: "ignored",
|
|
660
|
+
retryCount: 0,
|
|
661
|
+
ignoreReason: reason,
|
|
662
|
+
eventLog: [],
|
|
663
|
+
};
|
|
664
|
+
} else {
|
|
665
|
+
state.ignoreReason = reason;
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
logEvent(state, "ignored", "system", reason);
|
|
669
|
+
registry.tasks[taskId] = state;
|
|
670
|
+
await saveRegistry(registryPath, registry);
|
|
671
|
+
|
|
672
|
+
return { success: true };
|
|
673
|
+
} catch (error) {
|
|
674
|
+
console.error("[SharedStateManager] Failed to set ignore flag:", error);
|
|
675
|
+
return {
|
|
676
|
+
success: false,
|
|
677
|
+
reason: `error: ${error.message}`,
|
|
678
|
+
};
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
/**
|
|
683
|
+
* Clear ignore flag from a task
|
|
684
|
+
*
|
|
685
|
+
* @param {string} taskId - Task identifier
|
|
686
|
+
* @param {string} [repoRoot] - Repository root path
|
|
687
|
+
* @returns {Promise<{success: boolean, reason?: string}>}
|
|
688
|
+
*/
|
|
689
|
+
export async function clearIgnoreFlag(taskId, repoRoot = process.cwd()) {
|
|
690
|
+
const registryPath = getRegistryPath(repoRoot);
|
|
691
|
+
|
|
692
|
+
try {
|
|
693
|
+
const registry = await loadRegistry(registryPath);
|
|
694
|
+
const state = registry.tasks[taskId];
|
|
695
|
+
|
|
696
|
+
if (!state) {
|
|
697
|
+
return {
|
|
698
|
+
success: false,
|
|
699
|
+
reason: "task_not_found",
|
|
700
|
+
};
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
if (!state.ignoreReason) {
|
|
704
|
+
return {
|
|
705
|
+
success: false,
|
|
706
|
+
reason: "not_ignored",
|
|
707
|
+
};
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
delete state.ignoreReason;
|
|
711
|
+
logEvent(state, "unignored", "system");
|
|
712
|
+
await saveRegistry(registryPath, registry);
|
|
713
|
+
|
|
714
|
+
return { success: true };
|
|
715
|
+
} catch (error) {
|
|
716
|
+
console.error("[SharedStateManager] Failed to clear ignore flag:", error);
|
|
717
|
+
return {
|
|
718
|
+
success: false,
|
|
719
|
+
reason: `error: ${error.message}`,
|
|
720
|
+
};
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
/**
|
|
725
|
+
* Clean up old completed/failed task states
|
|
726
|
+
*
|
|
727
|
+
* @param {number} retentionDays - Days to retain completed tasks
|
|
728
|
+
* @param {string} [repoRoot] - Repository root path
|
|
729
|
+
* @returns {Promise<{cleanedCount: number, cleanedTasks: string[]}>}
|
|
730
|
+
*/
|
|
731
|
+
export async function cleanupOldStates(
|
|
732
|
+
retentionDays,
|
|
733
|
+
repoRoot = process.cwd(),
|
|
734
|
+
) {
|
|
735
|
+
const registryPath = getRegistryPath(repoRoot);
|
|
736
|
+
const retentionMs = retentionDays * 24 * 60 * 60 * 1000;
|
|
737
|
+
const cutoffTime = Date.now() - retentionMs;
|
|
738
|
+
|
|
739
|
+
try {
|
|
740
|
+
const registry = await loadRegistry(registryPath);
|
|
741
|
+
const cleanedTasks = [];
|
|
742
|
+
let cleanedCount = 0;
|
|
743
|
+
|
|
744
|
+
for (const [taskId, state] of Object.entries(registry.tasks)) {
|
|
745
|
+
// Only clean up completed/failed tasks
|
|
746
|
+
if (
|
|
747
|
+
state.attemptStatus !== "complete" &&
|
|
748
|
+
state.attemptStatus !== "failed"
|
|
749
|
+
) {
|
|
750
|
+
continue;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
// Check if old enough
|
|
754
|
+
const lastUpdate = new Date(state.ownerHeartbeat).getTime();
|
|
755
|
+
if (lastUpdate < cutoffTime) {
|
|
756
|
+
delete registry.tasks[taskId];
|
|
757
|
+
cleanedTasks.push(taskId);
|
|
758
|
+
cleanedCount++;
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
if (cleanedCount > 0) {
|
|
763
|
+
await saveRegistry(registryPath, registry);
|
|
764
|
+
console.log(
|
|
765
|
+
`[SharedStateManager] Cleaned up ${cleanedCount} old task states`,
|
|
766
|
+
);
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
return { cleanedCount, cleanedTasks };
|
|
770
|
+
} catch (error) {
|
|
771
|
+
console.error("[SharedStateManager] Failed to cleanup old states:", error);
|
|
772
|
+
return { cleanedCount: 0, cleanedTasks: [] };
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
/**
|
|
777
|
+
* Get statistics about current shared state
|
|
778
|
+
*
|
|
779
|
+
* @param {string} [repoRoot] - Repository root path
|
|
780
|
+
* @returns {Promise<Object>}
|
|
781
|
+
*/
|
|
782
|
+
export async function getStateStatistics(repoRoot = process.cwd()) {
|
|
783
|
+
try {
|
|
784
|
+
const registry = await loadRegistry(getRegistryPath(repoRoot));
|
|
785
|
+
const stats = {
|
|
786
|
+
total: 0,
|
|
787
|
+
claimed: 0,
|
|
788
|
+
working: 0,
|
|
789
|
+
complete: 0,
|
|
790
|
+
failed: 0,
|
|
791
|
+
abandoned: 0,
|
|
792
|
+
ignored: 0,
|
|
793
|
+
stale: 0,
|
|
794
|
+
byOwner: {},
|
|
795
|
+
};
|
|
796
|
+
|
|
797
|
+
for (const state of Object.values(registry.tasks)) {
|
|
798
|
+
stats.total++;
|
|
799
|
+
|
|
800
|
+
if (state.ignoreReason) {
|
|
801
|
+
stats.ignored++;
|
|
802
|
+
} else {
|
|
803
|
+
stats[state.attemptStatus] = (stats[state.attemptStatus] || 0) + 1;
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
if (
|
|
807
|
+
state.attemptStatus !== "complete" &&
|
|
808
|
+
state.attemptStatus !== "failed" &&
|
|
809
|
+
state.attemptStatus !== "ignored"
|
|
810
|
+
) {
|
|
811
|
+
const staleMs = (state.ttlSeconds || DEFAULT_TTL_SECONDS) * 1000;
|
|
812
|
+
if (isHeartbeatStale(state.ownerHeartbeat, staleMs)) {
|
|
813
|
+
stats.stale++;
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
if (!stats.byOwner[state.ownerId]) {
|
|
818
|
+
stats.byOwner[state.ownerId] = 0;
|
|
819
|
+
}
|
|
820
|
+
stats.byOwner[state.ownerId]++;
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
return stats;
|
|
824
|
+
} catch (error) {
|
|
825
|
+
console.error("[SharedStateManager] Failed to get statistics:", error);
|
|
826
|
+
return {
|
|
827
|
+
total: 0,
|
|
828
|
+
claimed: 0,
|
|
829
|
+
working: 0,
|
|
830
|
+
complete: 0,
|
|
831
|
+
failed: 0,
|
|
832
|
+
abandoned: 0,
|
|
833
|
+
ignored: 0,
|
|
834
|
+
stale: 0,
|
|
835
|
+
byOwner: {},
|
|
836
|
+
};
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
// Export constants for external use
|
|
841
|
+
export { REGISTRY_VERSION, DEFAULT_TTL_SECONDS, MAX_EVENT_LOG_ENTRIES };
|