@botbotgo/agent-harness 0.0.104 → 0.0.105

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.103";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.104";
@@ -1 +1 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.103";
1
+ export const AGENT_HARNESS_VERSION = "0.0.104";
@@ -24,18 +24,6 @@ type HealthMonitorConfig = {
24
24
  degradedAbove: number;
25
25
  unhealthyAbove: number;
26
26
  };
27
- checkpointBytes: {
28
- degradedAbove: number;
29
- unhealthyAbove: number;
30
- };
31
- runtimeDbBytes: {
32
- degradedAbove: number;
33
- unhealthyAbove: number;
34
- };
35
- artifactBytes: {
36
- degradedAbove: number;
37
- unhealthyAbove: number;
38
- };
39
27
  };
40
28
  };
41
29
  type HealthMonitorOptions = {
@@ -51,12 +39,9 @@ export declare function readHealthMonitorConfig(workspace: WorkspaceBundle): Hea
51
39
  export declare class HealthMonitor {
52
40
  private readonly options;
53
41
  private readonly config;
54
- private readonly runRoots;
55
- private readonly checkpointDbPaths;
56
42
  private readonly llmSamples;
57
43
  private timer;
58
44
  private latestSnapshot;
59
- private runtimeEventSequence;
60
45
  constructor(options: HealthMonitorOptions);
61
46
  recordLlmSuccess(latencyMs: number, nowMs?: number): void;
62
47
  recordLlmFailure(latencyMs: number, nowMs?: number): void;
@@ -67,15 +52,7 @@ export declare class HealthMonitor {
67
52
  evaluate(nowMs?: number): Promise<RuntimeHealthSnapshot>;
68
53
  private llmStats;
69
54
  private evaluateLlmCheck;
70
- private llmSymptoms;
71
- private evaluatePersistenceCheck;
72
- private evaluateCapacityCheck;
73
- private capacitySymptoms;
74
55
  private evaluateWorkloadCheck;
75
- private workloadSymptoms;
76
56
  private countStuckRuns;
77
- private sumRuntimeDbBytes;
78
- private sumCheckpointDbBytes;
79
- private sumArtifactBytes;
80
57
  }
81
58
  export {};
@@ -1,7 +1,4 @@
1
- import path from "node:path";
2
- import { readdir, stat } from "node:fs/promises";
3
1
  import { getRuntimeDefaults } from "../../../workspace/support/workspace-ref-utils.js";
4
- import { discoverCheckpointMaintenanceTargets } from "../../maintenance/checkpoint-maintenance.js";
5
2
  const DEFAULT_HEALTH_CONFIG = {
6
3
  enabled: false,
7
4
  evaluateIntervalSeconds: 30,
@@ -25,18 +22,6 @@ const DEFAULT_HEALTH_CONFIG = {
25
22
  degradedAbove: 300,
26
23
  unhealthyAbove: 900,
27
24
  },
28
- checkpointBytes: {
29
- degradedAbove: 512 * 1024 * 1024,
30
- unhealthyAbove: 2 * 1024 * 1024 * 1024,
31
- },
32
- runtimeDbBytes: {
33
- degradedAbove: 256 * 1024 * 1024,
34
- unhealthyAbove: 1024 * 1024 * 1024,
35
- },
36
- artifactBytes: {
37
- degradedAbove: 512 * 1024 * 1024,
38
- unhealthyAbove: 2 * 1024 * 1024 * 1024,
39
- },
40
25
  },
41
26
  };
42
27
  function asObject(value) {
@@ -52,23 +37,18 @@ function maxStatus(left, right) {
52
37
  const rank = { healthy: 0, degraded: 1, unhealthy: 2 };
53
38
  return rank[left] >= rank[right] ? left : right;
54
39
  }
55
- function compareStatus(left, right) {
56
- return left === right;
57
- }
58
- function computeP95(values) {
59
- if (values.length === 0) {
60
- return undefined;
61
- }
62
- const sorted = [...values].sort((a, b) => a - b);
63
- const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(sorted.length * 0.95) - 1));
64
- return sorted[index];
65
- }
66
- function isoFromMs(value) {
67
- return new Date(value).toISOString();
68
- }
69
40
  function buildCheck(status, updatedAt, reason) {
70
41
  return reason ? { status, updatedAt, reason } : { status, updatedAt };
71
42
  }
43
+ function createSymptom(code, status, message, timestamp) {
44
+ return {
45
+ code,
46
+ severity: status === "unhealthy" ? "error" : status === "degraded" ? "warn" : "info",
47
+ message,
48
+ firstSeenAt: timestamp,
49
+ lastSeenAt: timestamp,
50
+ };
51
+ }
72
52
  function describeThresholdStatus(value, thresholds) {
73
53
  if (value >= thresholds.unhealthyAbove) {
74
54
  return "unhealthy";
@@ -78,6 +58,14 @@ function describeThresholdStatus(value, thresholds) {
78
58
  }
79
59
  return "healthy";
80
60
  }
61
+ function computeP95(values) {
62
+ if (values.length === 0) {
63
+ return undefined;
64
+ }
65
+ const sorted = [...values].sort((a, b) => a - b);
66
+ const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(sorted.length * 0.95) - 1));
67
+ return sorted[index];
68
+ }
81
69
  export function readHealthMonitorConfig(workspace) {
82
70
  const runtimeDefaults = getRuntimeDefaults(workspace.refs);
83
71
  const observability = asObject(runtimeDefaults?.observability);
@@ -87,9 +75,6 @@ export function readHealthMonitorConfig(workspace) {
87
75
  const llmP95LatencyMs = asObject(thresholds?.llmP95LatencyMs);
88
76
  const pendingApprovals = asObject(thresholds?.pendingApprovals);
89
77
  const stuckRunSeconds = asObject(thresholds?.stuckRunSeconds);
90
- const checkpointBytes = asObject(thresholds?.checkpointBytes);
91
- const runtimeDbBytes = asObject(thresholds?.runtimeDbBytes);
92
- const artifactBytes = asObject(thresholds?.artifactBytes);
93
78
  return {
94
79
  enabled: health?.enabled === true,
95
80
  evaluateIntervalSeconds: readPositiveNumber(health?.evaluateIntervalSeconds, DEFAULT_HEALTH_CONFIG.evaluateIntervalSeconds),
@@ -113,112 +98,18 @@ export function readHealthMonitorConfig(workspace) {
113
98
  degradedAbove: readNonNegativeNumber(stuckRunSeconds?.degradedAbove, DEFAULT_HEALTH_CONFIG.thresholds.stuckRunSeconds.degradedAbove),
114
99
  unhealthyAbove: readNonNegativeNumber(stuckRunSeconds?.unhealthyAbove, DEFAULT_HEALTH_CONFIG.thresholds.stuckRunSeconds.unhealthyAbove),
115
100
  },
116
- checkpointBytes: {
117
- degradedAbove: readNonNegativeNumber(checkpointBytes?.degradedAbove, DEFAULT_HEALTH_CONFIG.thresholds.checkpointBytes.degradedAbove),
118
- unhealthyAbove: readNonNegativeNumber(checkpointBytes?.unhealthyAbove, DEFAULT_HEALTH_CONFIG.thresholds.checkpointBytes.unhealthyAbove),
119
- },
120
- runtimeDbBytes: {
121
- degradedAbove: readNonNegativeNumber(runtimeDbBytes?.degradedAbove, DEFAULT_HEALTH_CONFIG.thresholds.runtimeDbBytes.degradedAbove),
122
- unhealthyAbove: readNonNegativeNumber(runtimeDbBytes?.unhealthyAbove, DEFAULT_HEALTH_CONFIG.thresholds.runtimeDbBytes.unhealthyAbove),
123
- },
124
- artifactBytes: {
125
- degradedAbove: readNonNegativeNumber(artifactBytes?.degradedAbove, DEFAULT_HEALTH_CONFIG.thresholds.artifactBytes.degradedAbove),
126
- unhealthyAbove: readNonNegativeNumber(artifactBytes?.unhealthyAbove, DEFAULT_HEALTH_CONFIG.thresholds.artifactBytes.unhealthyAbove),
127
- },
128
101
  },
129
102
  };
130
103
  }
131
- async function safeFileSize(filePath) {
132
- try {
133
- const stats = await stat(filePath);
134
- return stats.size;
135
- }
136
- catch {
137
- return 0;
138
- }
139
- }
140
- async function directorySize(root) {
141
- try {
142
- const entries = await readdir(root, { withFileTypes: true });
143
- let total = 0;
144
- for (const entry of entries) {
145
- const fullPath = path.join(root, entry.name);
146
- if (entry.isDirectory()) {
147
- total += await directorySize(fullPath);
148
- continue;
149
- }
150
- if (entry.isFile()) {
151
- total += await safeFileSize(fullPath);
152
- }
153
- }
154
- return total;
155
- }
156
- catch {
157
- return 0;
158
- }
159
- }
160
- function evaluateMaintenanceStatus(label, status, updatedAt) {
161
- if (!status) {
162
- return {
163
- check: buildCheck("healthy", updatedAt, `${label} disabled`),
164
- symptoms: [],
165
- };
166
- }
167
- if (status.consecutiveFailures > 0) {
168
- return {
169
- check: buildCheck("degraded", updatedAt, `${label} has ${status.consecutiveFailures} consecutive failure(s)`),
170
- symptoms: [{
171
- code: `${label}.loop.failure`,
172
- severity: "warn",
173
- message: status.lastError ?? `${label} maintenance loop failed`,
174
- firstSeenAt: status.lastFailedAt ?? updatedAt,
175
- lastSeenAt: status.lastFailedAt ?? updatedAt,
176
- }],
177
- };
178
- }
179
- if (status.lastCompletedAt) {
180
- return {
181
- check: buildCheck("healthy", updatedAt, `${label} completed successfully`),
182
- symptoms: [],
183
- };
184
- }
185
- return {
186
- check: buildCheck("healthy", updatedAt, `${label} idle`),
187
- symptoms: [],
188
- };
189
- }
190
- function normalizeSeverity(status) {
191
- if (status === "unhealthy") {
192
- return "error";
193
- }
194
- if (status === "degraded") {
195
- return "warn";
196
- }
197
- return "info";
198
- }
199
- function createSymptom(code, status, message, timestamp) {
200
- return {
201
- code,
202
- severity: normalizeSeverity(status),
203
- message,
204
- firstSeenAt: timestamp,
205
- lastSeenAt: timestamp,
206
- };
207
- }
208
104
  export class HealthMonitor {
209
105
  options;
210
106
  config;
211
- runRoots;
212
- checkpointDbPaths;
213
107
  llmSamples = [];
214
108
  timer = null;
215
109
  latestSnapshot = null;
216
- runtimeEventSequence = 0;
217
110
  constructor(options) {
218
111
  this.options = options;
219
112
  this.config = readHealthMonitorConfig(options.workspace);
220
- this.runRoots = Array.from(new Set(Array.from(options.workspace.bindings.values()).map((binding) => binding.harnessRuntime.runRoot)));
221
- this.checkpointDbPaths = Array.from(new Set(discoverCheckpointMaintenanceTargets(options.workspace).map((target) => target.dbPath)));
222
113
  }
223
114
  recordLlmSuccess(latencyMs, nowMs = Date.now()) {
224
115
  this.recordLlmSample({ timestampMs: nowMs, latencyMs, success: true });
@@ -253,172 +144,96 @@ export class HealthMonitor {
253
144
  return this.evaluate();
254
145
  }
255
146
  async evaluate(nowMs = Date.now()) {
256
- const updatedAt = isoFromMs(nowMs);
257
- const [runs, approvals, runtimeDbBytes, checkpointBytes, artifactBytes] = await Promise.all([
147
+ const updatedAt = new Date(nowMs).toISOString();
148
+ const [runs, approvals] = await Promise.all([
258
149
  this.options.persistence.listRuns(),
259
150
  this.options.persistence.listApprovals(),
260
- this.sumRuntimeDbBytes(),
261
- this.sumCheckpointDbBytes(),
262
- this.sumArtifactBytes(),
263
151
  ]);
264
152
  const pendingApprovals = approvals.filter((approval) => approval.status === "pending").length;
265
153
  const stuckRuns = this.countStuckRuns(runs, nowMs);
266
- const runtimeMaintenance = evaluateMaintenanceStatus("checkpoint", this.options.getCheckpointMaintenanceStatus(), updatedAt);
267
- const recordMaintenance = evaluateMaintenanceStatus("records", this.options.getRuntimeRecordMaintenanceStatus(), updatedAt);
268
- let runtimeCheck = buildCheck("healthy", updatedAt, "runtime loops healthy");
269
- const runtimeSymptoms = [...runtimeMaintenance.symptoms, ...recordMaintenance.symptoms];
270
- runtimeCheck = buildCheck(maxStatus(runtimeMaintenance.check.status, recordMaintenance.check.status), updatedAt, runtimeSymptoms.length > 0 ? runtimeSymptoms.map((symptom) => symptom.message).join("; ") : "runtime loops healthy");
271
154
  const llmCheck = this.evaluateLlmCheck(updatedAt, nowMs);
272
- const persistenceCheck = this.evaluatePersistenceCheck(updatedAt, runtimeDbBytes, runtimeSymptoms);
273
- const capacityCheck = this.evaluateCapacityCheck(updatedAt, checkpointBytes, runtimeDbBytes, artifactBytes);
274
155
  const workloadCheck = this.evaluateWorkloadCheck(updatedAt, pendingApprovals, stuckRuns);
275
- const symptoms = [
276
- ...runtimeSymptoms,
277
- ...this.capacitySymptoms(capacityCheck, checkpointBytes, runtimeDbBytes, artifactBytes, updatedAt),
278
- ...this.workloadSymptoms(workloadCheck, pendingApprovals, stuckRuns, updatedAt),
279
- ...this.llmSymptoms(llmCheck, updatedAt),
280
- ...(persistenceCheck.status === "healthy"
281
- ? []
282
- : [createSymptom("persistence.runtime-db", persistenceCheck.status, persistenceCheck.reason ?? "runtime persistence degraded", updatedAt)]),
283
- ];
284
- const checks = {
285
- runtime: runtimeCheck,
286
- llm: llmCheck,
287
- persistence: persistenceCheck,
288
- capacity: capacityCheck,
289
- workload: workloadCheck,
290
- };
156
+ const runtimeCheck = buildCheck("healthy", updatedAt, "runtime health monitoring enabled");
157
+ const persistenceCheck = buildCheck("healthy", updatedAt, "runtime persistence accessible");
158
+ const capacityCheck = buildCheck("healthy", updatedAt, "capacity checks disabled");
159
+ const symptoms = [];
160
+ if (describeThresholdStatus(pendingApprovals, this.config.thresholds.pendingApprovals) !== "healthy") {
161
+ symptoms.push(createSymptom("workload.pending-approvals", describeThresholdStatus(pendingApprovals, this.config.thresholds.pendingApprovals), `pendingApprovals=${pendingApprovals}`, updatedAt));
162
+ }
163
+ if (stuckRuns > 0) {
164
+ symptoms.push(createSymptom("workload.stuck-runs", workloadCheck.status, `stuckRuns=${stuckRuns}`, updatedAt));
165
+ }
166
+ if (llmCheck.status !== "healthy") {
167
+ symptoms.push(createSymptom("llm.invocations", llmCheck.status, llmCheck.reason ?? "llm health degraded", updatedAt));
168
+ }
291
169
  let overallStatus = "healthy";
292
- for (const check of Object.values(checks)) {
170
+ for (const check of [runtimeCheck, llmCheck, persistenceCheck, capacityCheck, workloadCheck]) {
293
171
  overallStatus = maxStatus(overallStatus, check.status);
294
172
  }
295
173
  const snapshot = {
296
174
  status: overallStatus,
297
175
  updatedAt,
298
- checks,
176
+ checks: {
177
+ runtime: runtimeCheck,
178
+ llm: llmCheck,
179
+ persistence: persistenceCheck,
180
+ capacity: capacityCheck,
181
+ workload: workloadCheck,
182
+ },
299
183
  symptoms,
300
184
  stats: {
301
185
  activeRunSlots: this.options.getActiveRunSlots(),
302
186
  pendingRunSlots: this.options.getPendingRunSlots(),
303
187
  pendingApprovals,
304
188
  stuckRuns,
305
- checkpointBytes,
306
- runtimeDbBytes,
307
- artifactBytes,
308
189
  ...this.llmStats(nowMs),
309
190
  },
310
191
  };
311
- const previous = this.latestSnapshot;
312
- this.latestSnapshot = snapshot;
313
192
  if (this.config.enabled &&
314
193
  this.config.emitEvents &&
315
194
  this.options.publishEvent &&
316
- previous &&
317
- !compareStatus(previous.status, snapshot.status)) {
195
+ this.latestSnapshot &&
196
+ this.latestSnapshot.status !== snapshot.status) {
318
197
  await this.options.publishEvent({
319
- previousStatus: previous.status,
198
+ previousStatus: this.latestSnapshot.status,
320
199
  status: snapshot.status,
321
200
  checks: snapshot.checks,
322
201
  stats: snapshot.stats,
323
202
  });
324
203
  }
204
+ this.latestSnapshot = snapshot;
325
205
  return snapshot;
326
206
  }
327
207
  llmStats(nowMs) {
328
- const oneMinuteWindowMs = 60 * 1000;
329
- const recent = this.llmSamples.filter((sample) => nowMs - sample.timestampMs <= oneMinuteWindowMs);
208
+ const recent = this.llmSamples.filter((sample) => nowMs - sample.timestampMs <= 60 * 1000);
330
209
  const successes = recent.filter((sample) => sample.success).length;
331
- const latencies = recent.map((sample) => sample.latencyMs);
332
210
  return {
333
211
  llmSuccessRate1m: recent.length > 0 ? successes / recent.length : undefined,
334
- llmP95LatencyMs1m: computeP95(latencies),
212
+ llmP95LatencyMs1m: computeP95(recent.map((sample) => sample.latencyMs)),
335
213
  };
336
214
  }
337
215
  evaluateLlmCheck(updatedAt, nowMs) {
338
- const errorWindow = this.config.thresholds.llmErrorRate.windowSeconds * 1000;
339
- const latencyWindow = this.config.thresholds.llmP95LatencyMs.windowSeconds * 1000;
340
- const errorSamples = this.llmSamples.filter((sample) => nowMs - sample.timestampMs <= errorWindow);
341
- const latencySamples = this.llmSamples.filter((sample) => nowMs - sample.timestampMs <= latencyWindow);
216
+ const errorSamples = this.llmSamples.filter((sample) => nowMs - sample.timestampMs <= this.config.thresholds.llmErrorRate.windowSeconds * 1000);
217
+ const latencySamples = this.llmSamples.filter((sample) => nowMs - sample.timestampMs <= this.config.thresholds.llmP95LatencyMs.windowSeconds * 1000);
342
218
  if (errorSamples.length === 0 && latencySamples.length === 0) {
343
219
  return buildCheck("healthy", updatedAt, "no recent llm invocations");
344
220
  }
345
- const errors = errorSamples.filter((sample) => !sample.success).length;
346
- const errorRate = errorSamples.length > 0 ? errors / errorSamples.length : 0;
221
+ const errorRate = errorSamples.length > 0
222
+ ? errorSamples.filter((sample) => !sample.success).length / errorSamples.length
223
+ : 0;
347
224
  const p95Latency = computeP95(latencySamples.map((sample) => sample.latencyMs));
348
- let status = "healthy";
349
- if (errorRate >= this.config.thresholds.llmErrorRate.unhealthyAbove) {
350
- status = "unhealthy";
351
- }
352
- else if (errorRate >= this.config.thresholds.llmErrorRate.degradedAbove) {
353
- status = "degraded";
354
- }
225
+ let status = describeThresholdStatus(errorRate, this.config.thresholds.llmErrorRate);
355
226
  if (p95Latency !== undefined) {
356
- const latencyStatus = describeThresholdStatus(p95Latency, this.config.thresholds.llmP95LatencyMs);
357
- status = maxStatus(status, latencyStatus);
227
+ status = maxStatus(status, describeThresholdStatus(p95Latency, this.config.thresholds.llmP95LatencyMs));
358
228
  }
359
229
  return buildCheck(status, updatedAt, `errorRate=${errorRate.toFixed(2)}${p95Latency !== undefined ? ` p95LatencyMs=${Math.round(p95Latency)}` : ""}`);
360
230
  }
361
- llmSymptoms(check, updatedAt) {
362
- if (check.status === "healthy") {
363
- return [];
364
- }
365
- return [createSymptom("llm.invocations", check.status, check.reason ?? "llm health degraded", updatedAt)];
366
- }
367
- evaluatePersistenceCheck(updatedAt, runtimeDbBytes, runtimeSymptoms) {
368
- if (runtimeSymptoms.length > 0) {
369
- return buildCheck("degraded", updatedAt, runtimeSymptoms.map((symptom) => symptom.message).join("; "));
370
- }
371
- if (runtimeDbBytes === 0) {
372
- return buildCheck("healthy", updatedAt, "runtime sqlite not materialized yet");
373
- }
374
- return buildCheck("healthy", updatedAt, "runtime sqlite accessible");
375
- }
376
- evaluateCapacityCheck(updatedAt, checkpointBytes, runtimeDbBytes, artifactBytes) {
377
- let status = "healthy";
378
- status = maxStatus(status, describeThresholdStatus(checkpointBytes, this.config.thresholds.checkpointBytes));
379
- status = maxStatus(status, describeThresholdStatus(runtimeDbBytes, this.config.thresholds.runtimeDbBytes));
380
- status = maxStatus(status, describeThresholdStatus(artifactBytes, this.config.thresholds.artifactBytes));
381
- return buildCheck(status, updatedAt, `checkpointBytes=${checkpointBytes} runtimeDbBytes=${runtimeDbBytes} artifactBytes=${artifactBytes}`);
382
- }
383
- capacitySymptoms(check, checkpointBytes, runtimeDbBytes, artifactBytes, updatedAt) {
384
- if (check.status === "healthy") {
385
- return [];
386
- }
387
- const symptoms = [];
388
- if (describeThresholdStatus(checkpointBytes, this.config.thresholds.checkpointBytes) !== "healthy") {
389
- symptoms.push(createSymptom("capacity.checkpoints", describeThresholdStatus(checkpointBytes, this.config.thresholds.checkpointBytes), `checkpointBytes=${checkpointBytes}`, updatedAt));
390
- }
391
- if (describeThresholdStatus(runtimeDbBytes, this.config.thresholds.runtimeDbBytes) !== "healthy") {
392
- symptoms.push(createSymptom("capacity.runtime-db", describeThresholdStatus(runtimeDbBytes, this.config.thresholds.runtimeDbBytes), `runtimeDbBytes=${runtimeDbBytes}`, updatedAt));
393
- }
394
- if (describeThresholdStatus(artifactBytes, this.config.thresholds.artifactBytes) !== "healthy") {
395
- symptoms.push(createSymptom("capacity.artifacts", describeThresholdStatus(artifactBytes, this.config.thresholds.artifactBytes), `artifactBytes=${artifactBytes}`, updatedAt));
396
- }
397
- return symptoms;
398
- }
399
231
  evaluateWorkloadCheck(updatedAt, pendingApprovals, stuckRuns) {
400
232
  let status = describeThresholdStatus(pendingApprovals, this.config.thresholds.pendingApprovals);
401
- status = maxStatus(status, describeThresholdStatus(stuckRuns, {
402
- degradedAbove: this.config.thresholds.stuckRunSeconds.degradedAbove > 0 ? 1 : Number.MAX_SAFE_INTEGER,
403
- unhealthyAbove: this.config.thresholds.stuckRunSeconds.unhealthyAbove > 0 ? 1 : Number.MAX_SAFE_INTEGER,
404
- }));
405
- return buildCheck(status, updatedAt, `pendingApprovals=${pendingApprovals} stuckRuns=${stuckRuns}`);
406
- }
407
- workloadSymptoms(check, pendingApprovals, stuckRuns, updatedAt) {
408
- if (check.status === "healthy") {
409
- return [];
410
- }
411
- const symptoms = [];
412
- if (describeThresholdStatus(pendingApprovals, this.config.thresholds.pendingApprovals) !== "healthy") {
413
- symptoms.push(createSymptom("workload.pending-approvals", describeThresholdStatus(pendingApprovals, this.config.thresholds.pendingApprovals), `pendingApprovals=${pendingApprovals}`, updatedAt));
414
- }
415
233
  if (stuckRuns > 0) {
416
- const stuckStatus = stuckRuns >= 1 && this.config.thresholds.stuckRunSeconds.unhealthyAbove <= this.config.thresholds.stuckRunSeconds.degradedAbove
417
- ? "unhealthy"
418
- : "degraded";
419
- symptoms.push(createSymptom("workload.stuck-runs", stuckStatus, `stuckRuns=${stuckRuns}`, updatedAt));
234
+ status = maxStatus(status, stuckRuns >= this.config.thresholds.stuckRunSeconds.unhealthyAbove ? "unhealthy" : "degraded");
420
235
  }
421
- return symptoms;
236
+ return buildCheck(status, updatedAt, `pendingApprovals=${pendingApprovals} stuckRuns=${stuckRuns}`);
422
237
  }
423
238
  countStuckRuns(runs, nowMs) {
424
239
  return runs.filter((run) => {
@@ -429,20 +244,7 @@ export class HealthMonitor {
429
244
  if (!Number.isFinite(updatedAtMs)) {
430
245
  return false;
431
246
  }
432
- const ageSeconds = (nowMs - updatedAtMs) / 1000;
433
- return ageSeconds >= this.config.thresholds.stuckRunSeconds.degradedAbove;
247
+ return (nowMs - updatedAtMs) / 1000 >= this.config.thresholds.stuckRunSeconds.degradedAbove;
434
248
  }).length;
435
249
  }
436
- async sumRuntimeDbBytes() {
437
- const sizes = await Promise.all(this.runRoots.map((runRoot) => safeFileSize(path.join(runRoot, "runtime.sqlite"))));
438
- return sizes.reduce((sum, value) => sum + value, 0);
439
- }
440
- async sumCheckpointDbBytes() {
441
- const sizes = await Promise.all(this.checkpointDbPaths.map((dbPath) => safeFileSize(dbPath)));
442
- return sizes.reduce((sum, value) => sum + value, 0);
443
- }
444
- async sumArtifactBytes() {
445
- const sizes = await Promise.all(this.runRoots.map((runRoot) => directorySize(path.join(runRoot, "threads"))));
446
- return sizes.reduce((sum, value) => sum + value, 0);
447
- }
448
250
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.104",
3
+ "version": "0.0.105",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "type": "module",
6
6
  "packageManager": "npm@10.9.2",