@integrity-labs/agt-cli 0.28.104 → 0.28.105

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -100,7 +100,7 @@ async function spawnPairSession(session) {
100
100
  return { ok: true };
101
101
  } catch {
102
102
  }
103
- const { resolveClaudeBinary } = await import("./persistent-session-6CU3BIHP.js");
103
+ const { resolveClaudeBinary } = await import("./persistent-session-Q65SZNJ6.js");
104
104
  const claudeBin = resolveClaudeBinary();
105
105
  const pairEnv = {
106
106
  ...process.env,
@@ -373,4 +373,4 @@ export {
373
373
  startClaudePair,
374
374
  submitClaudePairCode
375
375
  };
376
- //# sourceMappingURL=claude-pair-runtime-JP3TEGDV.js.map
376
+ //# sourceMappingURL=claude-pair-runtime-WOE2EELU.js.map
@@ -28,7 +28,7 @@ import {
28
28
  requireHost,
29
29
  safeWriteJsonAtomic,
30
30
  setConfigHash
31
- } from "../chunk-DO6ZJ2SC.js";
31
+ } from "../chunk-5EVWGQ5X.js";
32
32
  import {
33
33
  getProjectDir as getProjectDir2,
34
34
  getReadyTasks,
@@ -71,7 +71,7 @@ import {
71
71
  takeZombieDetection,
72
72
  transcriptActivityAgeSeconds,
73
73
  writeEgressAllowlist
74
- } from "../chunk-WNN5WT42.js";
74
+ } from "../chunk-GBE523G5.js";
75
75
  import {
76
76
  FLAGS_SCHEMA_VERSION,
77
77
  FLAG_REGISTRY,
@@ -106,7 +106,7 @@ import {
106
106
  resolveDmTarget,
107
107
  sumTranscriptUsageInWindow,
108
108
  wrapScheduledTaskPrompt
109
- } from "../chunk-5DYG42FL.js";
109
+ } from "../chunk-YUTJO6FU.js";
110
110
  import {
111
111
  parsePsRows,
112
112
  reapOrphanChannelMcps
@@ -2558,7 +2558,9 @@ async function maybeEvaluateConversations(args) {
2558
2558
  );
2559
2559
  pending = resp?.conversations ?? [];
2560
2560
  } catch (err) {
2561
- log2(`[conversation-eval] ${codeName}: pending fetch failed: ${err.message}`);
2561
+ const msg = toHealthErrorMessage(err);
2562
+ log2(`[conversation-eval] ${codeName}: pending fetch failed: ${msg}`);
2563
+ await reportBackendHealth(api2, log2, codeName, { ok: false, error: `pending fetch: ${msg}` });
2562
2564
  return;
2563
2565
  }
2564
2566
  if (pending.length === 0) return;
@@ -2566,22 +2568,24 @@ async function maybeEvaluateConversations(args) {
2566
2568
  const allTurns = readRecentTurns(dir, nowMs);
2567
2569
  if (allTurns.length === 0) {
2568
2570
  for (const conv of pending) {
2569
- await reportSkip(api2, agentId, conv.conversation_id, log2, codeName);
2571
+ await reportSkip(api2, agentId, conv.conversation_id, "no_transcript", log2, codeName);
2570
2572
  }
2571
2573
  return;
2572
2574
  }
2575
+ let backendSucceeded = false;
2576
+ let backendError = null;
2573
2577
  for (const conv of pending) {
2574
2578
  const tokens = channelRefTokens(conv.channel_ref);
2575
2579
  const windowStart = Date.parse(conv.started_at) - WINDOW_PAD_MS;
2576
2580
  const windowEnd = Date.parse(conv.last_message_at) + WINDOW_PAD_MS;
2577
2581
  const turns = reconstructConversation(allTurns, tokens, windowStart, windowEnd);
2578
2582
  if (turns.length === 0) {
2579
- await reportSkip(api2, agentId, conv.conversation_id, log2, codeName);
2583
+ await reportSkip(api2, agentId, conv.conversation_id, "not_reconstructable", log2, codeName);
2580
2584
  continue;
2581
2585
  }
2582
2586
  const transcript = renderTranscript(turns);
2583
2587
  if (!transcript.trim()) {
2584
- await reportSkip(api2, agentId, conv.conversation_id, log2, codeName);
2588
+ await reportSkip(api2, agentId, conv.conversation_id, "empty_transcript", log2, codeName);
2585
2589
  continue;
2586
2590
  }
2587
2591
  let verdict;
@@ -2589,13 +2593,16 @@ async function maybeEvaluateConversations(args) {
2589
2593
  const out = await backend.run(buildEvalPrompt(conv.channel, transcript));
2590
2594
  verdict = parseVerdict(out);
2591
2595
  } catch (err) {
2592
- log2(`[conversation-eval] ${codeName}: scoring failed: ${err.message}`);
2596
+ backendError = toHealthErrorMessage(err);
2597
+ log2(`[conversation-eval] ${codeName}: scoring failed: ${backendError}`);
2593
2598
  continue;
2594
2599
  }
2595
2600
  if (!verdict) {
2601
+ backendError = "unparseable verdict";
2596
2602
  log2(`[conversation-eval] ${codeName}: unparseable verdict for ${conv.conversation_id.slice(0, 8)}`);
2597
2603
  continue;
2598
2604
  }
2605
+ backendSucceeded = true;
2599
2606
  try {
2600
2607
  await api2.post("/host/conversations/evaluation", {
2601
2608
  agent_id: agentId,
@@ -2612,18 +2619,42 @@ async function maybeEvaluateConversations(args) {
2612
2619
  log2(`[conversation-eval] ${codeName}: report failed: ${err.message}`);
2613
2620
  }
2614
2621
  }
2622
+ if (backendSucceeded) {
2623
+ await reportBackendHealth(api2, log2, codeName, { ok: true });
2624
+ } else if (backendError) {
2625
+ await reportBackendHealth(api2, log2, codeName, { ok: false, error: backendError, model: backend.model });
2626
+ }
2615
2627
  }
2616
- async function reportSkip(api2, agentId, conversationId, log2, codeName) {
2628
+ async function reportSkip(api2, agentId, conversationId, reason, log2, codeName) {
2617
2629
  try {
2618
2630
  await api2.post("/host/conversations/evaluation", {
2619
2631
  agent_id: agentId,
2620
2632
  conversation_id: conversationId,
2621
- skipped: true
2633
+ skipped: true,
2634
+ // The data-shaped reason this conversation couldn't be scored. The API
2635
+ // stores it alongside the eval_attempts bump so the eventual give-up is
2636
+ // explainable on the reporting surface.
2637
+ failure_reason: reason
2622
2638
  });
2623
2639
  } catch (err) {
2624
2640
  log2(`[conversation-eval] ${codeName}: skip report failed: ${err.message}`);
2625
2641
  }
2626
2642
  }
2643
+ function toHealthErrorMessage(err) {
2644
+ const raw = err instanceof Error ? err.message : typeof err === "string" ? err : "unknown backend error";
2645
+ return raw.replace(/\s+/g, " ").replace(/(Bearer\s+)[^\s]+/gi, "$1[redacted]").replace(/([?&](?:api[_-]?key|token|secret|key)=)[^&\s]+/gi, "$1[redacted]").replace(/:\/\/[^/\s]+@/g, "://[redacted]@").trim().slice(0, 300);
2646
+ }
2647
+ async function reportBackendHealth(api2, log2, codeName, health) {
2648
+ try {
2649
+ await api2.post("/host/conversations/eval-backend-health", {
2650
+ ok: health.ok,
2651
+ error: health.error,
2652
+ model: health.model
2653
+ });
2654
+ } catch (err) {
2655
+ log2(`[conversation-eval] ${codeName}: backend-health report failed: ${err.message}`);
2656
+ }
2657
+ }
2627
2658
  function readRecentTurns(dir, nowMs) {
2628
2659
  let entries;
2629
2660
  try {
@@ -6961,7 +6992,7 @@ var cachedMaintenanceWindow = null;
6961
6992
  var lastVersionCheckAt = 0;
6962
6993
  var VERSION_CHECK_INTERVAL_MS = 5 * 60 * 1e3;
6963
6994
  var lastResponsivenessProbeAt = 0;
6964
- var agtCliVersion = true ? "0.28.104" : "dev";
6995
+ var agtCliVersion = true ? "0.28.105" : "dev";
6965
6996
  function resolveBrewPath(execFileSync4) {
6966
6997
  try {
6967
6998
  const out = execFileSync4("which", ["brew"], { timeout: 5e3 }).toString().trim();
@@ -8072,7 +8103,7 @@ async function pollCycle() {
8072
8103
  }
8073
8104
  try {
8074
8105
  const { detectHostSecurity } = await import("../host-security-6PDFG7F5.js");
8075
- const { collectDiagnostics } = await import("../persistent-session-6CU3BIHP.js");
8106
+ const { collectDiagnostics } = await import("../persistent-session-Q65SZNJ6.js");
8076
8107
  const diagCodeNames = [...agentState.persistentSessionAgents];
8077
8108
  const agentDiagnostics = diagCodeNames.length > 0 ? collectDiagnostics(diagCodeNames) : void 0;
8078
8109
  let tailscaleHostname;
@@ -8173,7 +8204,7 @@ async function pollCycle() {
8173
8204
  const {
8174
8205
  collectResponsivenessProbes,
8175
8206
  getResponsivenessIntervalMs
8176
- } = await import("../responsiveness-probe-GLZZZ2OK.js");
8207
+ } = await import("../responsiveness-probe-FS3JNFI2.js");
8177
8208
  const probeIntervalMs = getResponsivenessIntervalMs();
8178
8209
  if (now - lastResponsivenessProbeAt > probeIntervalMs) {
8179
8210
  const probeCodeNames = [...agentState.persistentSessionAgents];
@@ -8205,7 +8236,7 @@ async function pollCycle() {
8205
8236
  collectResponsivenessProbes,
8206
8237
  livePendingInboundOldestAgeSeconds,
8207
8238
  parkPendingInbound
8208
- } = await import("../responsiveness-probe-GLZZZ2OK.js");
8239
+ } = await import("../responsiveness-probe-FS3JNFI2.js");
8209
8240
  const { getProjectDir: wedgeProjectDir } = await import("../claude-scheduler-FATCLHDM.js");
8210
8241
  const wedgeNow = /* @__PURE__ */ new Date();
8211
8242
  const liveAgents = agentState.persistentSessionAgents;
@@ -11733,7 +11764,7 @@ async function processClaudePairSessions(agents) {
11733
11764
  killPairSession,
11734
11765
  pairTmuxSession,
11735
11766
  finalizeClaudePairOnboarding
11736
- } = await import("../claude-pair-runtime-JP3TEGDV.js");
11767
+ } = await import("../claude-pair-runtime-WOE2EELU.js");
11737
11768
  for (const pairId of pendingResp.cancelled_pair_ids ?? []) {
11738
11769
  log(`[claude-pair] sweeping orphan tmux session for pair ${pairId.slice(0, 8)}`);
11739
11770
  const killed = await killPairSession(pairTmuxSession(pairId));