@integrity-labs/agt-cli 0.28.104 → 0.28.106

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ import {
3
3
  formatMissingVar,
4
4
  isClaudeFastMode,
5
5
  probeMcpEnvSubstitution
6
- } from "./chunk-5DYG42FL.js";
6
+ } from "./chunk-D22IMWAZ.js";
7
7
  import {
8
8
  reapOrphanChannelMcps
9
9
  } from "./chunk-XWVM4KPK.js";
@@ -1565,4 +1565,4 @@ export {
1565
1565
  stopAllSessionsAndWait,
1566
1566
  getProjectDir
1567
1567
  };
1568
- //# sourceMappingURL=chunk-WNN5WT42.js.map
1568
+ //# sourceMappingURL=chunk-O4OYAFTZ.js.map
@@ -100,7 +100,7 @@ async function spawnPairSession(session) {
100
100
  return { ok: true };
101
101
  } catch {
102
102
  }
103
- const { resolveClaudeBinary } = await import("./persistent-session-6CU3BIHP.js");
103
+ const { resolveClaudeBinary } = await import("./persistent-session-RMRG5HXI.js");
104
104
  const claudeBin = resolveClaudeBinary();
105
105
  const pairEnv = {
106
106
  ...process.env,
@@ -373,4 +373,4 @@ export {
373
373
  startClaudePair,
374
374
  submitClaudePairCode
375
375
  };
376
- //# sourceMappingURL=claude-pair-runtime-JP3TEGDV.js.map
376
+ //# sourceMappingURL=claude-pair-runtime-6DZEDQAG.js.map
@@ -28,7 +28,7 @@ import {
28
28
  requireHost,
29
29
  safeWriteJsonAtomic,
30
30
  setConfigHash
31
- } from "../chunk-DO6ZJ2SC.js";
31
+ } from "../chunk-5VPPNSKR.js";
32
32
  import {
33
33
  getProjectDir as getProjectDir2,
34
34
  getReadyTasks,
@@ -71,7 +71,7 @@ import {
71
71
  takeZombieDetection,
72
72
  transcriptActivityAgeSeconds,
73
73
  writeEgressAllowlist
74
- } from "../chunk-WNN5WT42.js";
74
+ } from "../chunk-O4OYAFTZ.js";
75
75
  import {
76
76
  FLAGS_SCHEMA_VERSION,
77
77
  FLAG_REGISTRY,
@@ -96,6 +96,7 @@ import {
96
96
  isParseError,
97
97
  isResolveError,
98
98
  isSelfCompletion,
99
+ laneTagFragment,
99
100
  parseDeliveryTarget,
100
101
  parseEnvIntegrations,
101
102
  parseTranscriptUsage,
@@ -106,7 +107,7 @@ import {
106
107
  resolveDmTarget,
107
108
  sumTranscriptUsageInWindow,
108
109
  wrapScheduledTaskPrompt
109
- } from "../chunk-5DYG42FL.js";
110
+ } from "../chunk-D22IMWAZ.js";
110
111
  import {
111
112
  parsePsRows,
112
113
  reapOrphanChannelMcps
@@ -2558,7 +2559,9 @@ async function maybeEvaluateConversations(args) {
2558
2559
  );
2559
2560
  pending = resp?.conversations ?? [];
2560
2561
  } catch (err) {
2561
- log2(`[conversation-eval] ${codeName}: pending fetch failed: ${err.message}`);
2562
+ const msg = toHealthErrorMessage(err);
2563
+ log2(`[conversation-eval] ${codeName}: pending fetch failed: ${msg}`);
2564
+ await reportBackendHealth(api2, log2, codeName, { ok: false, error: `pending fetch: ${msg}` });
2562
2565
  return;
2563
2566
  }
2564
2567
  if (pending.length === 0) return;
@@ -2566,22 +2569,24 @@ async function maybeEvaluateConversations(args) {
2566
2569
  const allTurns = readRecentTurns(dir, nowMs);
2567
2570
  if (allTurns.length === 0) {
2568
2571
  for (const conv of pending) {
2569
- await reportSkip(api2, agentId, conv.conversation_id, log2, codeName);
2572
+ await reportSkip(api2, agentId, conv.conversation_id, "no_transcript", log2, codeName);
2570
2573
  }
2571
2574
  return;
2572
2575
  }
2576
+ let backendSucceeded = false;
2577
+ let backendError = null;
2573
2578
  for (const conv of pending) {
2574
2579
  const tokens = channelRefTokens(conv.channel_ref);
2575
2580
  const windowStart = Date.parse(conv.started_at) - WINDOW_PAD_MS;
2576
2581
  const windowEnd = Date.parse(conv.last_message_at) + WINDOW_PAD_MS;
2577
2582
  const turns = reconstructConversation(allTurns, tokens, windowStart, windowEnd);
2578
2583
  if (turns.length === 0) {
2579
- await reportSkip(api2, agentId, conv.conversation_id, log2, codeName);
2584
+ await reportSkip(api2, agentId, conv.conversation_id, "not_reconstructable", log2, codeName);
2580
2585
  continue;
2581
2586
  }
2582
2587
  const transcript = renderTranscript(turns);
2583
2588
  if (!transcript.trim()) {
2584
- await reportSkip(api2, agentId, conv.conversation_id, log2, codeName);
2589
+ await reportSkip(api2, agentId, conv.conversation_id, "empty_transcript", log2, codeName);
2585
2590
  continue;
2586
2591
  }
2587
2592
  let verdict;
@@ -2589,13 +2594,16 @@ async function maybeEvaluateConversations(args) {
2589
2594
  const out = await backend.run(buildEvalPrompt(conv.channel, transcript));
2590
2595
  verdict = parseVerdict(out);
2591
2596
  } catch (err) {
2592
- log2(`[conversation-eval] ${codeName}: scoring failed: ${err.message}`);
2597
+ backendError = toHealthErrorMessage(err);
2598
+ log2(`[conversation-eval] ${codeName}: scoring failed: ${backendError}`);
2593
2599
  continue;
2594
2600
  }
2595
2601
  if (!verdict) {
2602
+ backendError = "unparseable verdict";
2596
2603
  log2(`[conversation-eval] ${codeName}: unparseable verdict for ${conv.conversation_id.slice(0, 8)}`);
2597
2604
  continue;
2598
2605
  }
2606
+ backendSucceeded = true;
2599
2607
  try {
2600
2608
  await api2.post("/host/conversations/evaluation", {
2601
2609
  agent_id: agentId,
@@ -2612,18 +2620,42 @@ async function maybeEvaluateConversations(args) {
2612
2620
  log2(`[conversation-eval] ${codeName}: report failed: ${err.message}`);
2613
2621
  }
2614
2622
  }
2623
+ if (backendSucceeded) {
2624
+ await reportBackendHealth(api2, log2, codeName, { ok: true });
2625
+ } else if (backendError) {
2626
+ await reportBackendHealth(api2, log2, codeName, { ok: false, error: backendError, model: backend.model });
2627
+ }
2615
2628
  }
2616
- async function reportSkip(api2, agentId, conversationId, log2, codeName) {
2629
+ async function reportSkip(api2, agentId, conversationId, reason, log2, codeName) {
2617
2630
  try {
2618
2631
  await api2.post("/host/conversations/evaluation", {
2619
2632
  agent_id: agentId,
2620
2633
  conversation_id: conversationId,
2621
- skipped: true
2634
+ skipped: true,
2635
+ // The data-shaped reason this conversation couldn't be scored. The API
2636
+ // stores it alongside the eval_attempts bump so the eventual give-up is
2637
+ // explainable on the reporting surface.
2638
+ failure_reason: reason
2622
2639
  });
2623
2640
  } catch (err) {
2624
2641
  log2(`[conversation-eval] ${codeName}: skip report failed: ${err.message}`);
2625
2642
  }
2626
2643
  }
2644
+ function toHealthErrorMessage(err) {
2645
+ const raw = err instanceof Error ? err.message : typeof err === "string" ? err : "unknown backend error";
2646
+ return raw.replace(/\s+/g, " ").replace(/(Bearer\s+)[^\s]+/gi, "$1[redacted]").replace(/([?&](?:api[_-]?key|token|secret|key)=)[^&\s]+/gi, "$1[redacted]").replace(/:\/\/[^/\s]+@/g, "://[redacted]@").trim().slice(0, 300);
2647
+ }
2648
+ async function reportBackendHealth(api2, log2, codeName, health) {
2649
+ try {
2650
+ await api2.post("/host/conversations/eval-backend-health", {
2651
+ ok: health.ok,
2652
+ error: health.error,
2653
+ model: health.model
2654
+ });
2655
+ } catch (err) {
2656
+ log2(`[conversation-eval] ${codeName}: backend-health report failed: ${err.message}`);
2657
+ }
2658
+ }
2627
2659
  function readRecentTurns(dir, nowMs) {
2628
2660
  let entries;
2629
2661
  try {
@@ -6961,7 +6993,7 @@ var cachedMaintenanceWindow = null;
6961
6993
  var lastVersionCheckAt = 0;
6962
6994
  var VERSION_CHECK_INTERVAL_MS = 5 * 60 * 1e3;
6963
6995
  var lastResponsivenessProbeAt = 0;
6964
- var agtCliVersion = true ? "0.28.104" : "dev";
6996
+ var agtCliVersion = true ? "0.28.106" : "dev";
6965
6997
  function resolveBrewPath(execFileSync4) {
6966
6998
  try {
6967
6999
  const out = execFileSync4("which", ["brew"], { timeout: 5e3 }).toString().trim();
@@ -8072,7 +8104,7 @@ async function pollCycle() {
8072
8104
  }
8073
8105
  try {
8074
8106
  const { detectHostSecurity } = await import("../host-security-6PDFG7F5.js");
8075
- const { collectDiagnostics } = await import("../persistent-session-6CU3BIHP.js");
8107
+ const { collectDiagnostics } = await import("../persistent-session-RMRG5HXI.js");
8076
8108
  const diagCodeNames = [...agentState.persistentSessionAgents];
8077
8109
  const agentDiagnostics = diagCodeNames.length > 0 ? collectDiagnostics(diagCodeNames) : void 0;
8078
8110
  let tailscaleHostname;
@@ -8173,7 +8205,7 @@ async function pollCycle() {
8173
8205
  const {
8174
8206
  collectResponsivenessProbes,
8175
8207
  getResponsivenessIntervalMs
8176
- } = await import("../responsiveness-probe-GLZZZ2OK.js");
8208
+ } = await import("../responsiveness-probe-2KVIALGI.js");
8177
8209
  const probeIntervalMs = getResponsivenessIntervalMs();
8178
8210
  if (now - lastResponsivenessProbeAt > probeIntervalMs) {
8179
8211
  const probeCodeNames = [...agentState.persistentSessionAgents];
@@ -8205,7 +8237,7 @@ async function pollCycle() {
8205
8237
  collectResponsivenessProbes,
8206
8238
  livePendingInboundOldestAgeSeconds,
8207
8239
  parkPendingInbound
8208
- } = await import("../responsiveness-probe-GLZZZ2OK.js");
8240
+ } = await import("../responsiveness-probe-2KVIALGI.js");
8209
8241
  const { getProjectDir: wedgeProjectDir } = await import("../claude-scheduler-FATCLHDM.js");
8210
8242
  const wedgeNow = /* @__PURE__ */ new Date();
8211
8243
  const liveAgents = agentState.persistentSessionAgents;
@@ -11314,7 +11346,7 @@ async function processDirectChatMessage(agent, msg) {
11314
11346
  }
11315
11347
  }
11316
11348
  const escapeXml = (value) => value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&apos;");
11317
- const channelEnvelope = `<channel source="direct-chat" session_id="${escapeXml(msg.session_id)}" user="webapp">
11349
+ const channelEnvelope = `<channel source="direct-chat" session_id="${escapeXml(msg.session_id)}" user="webapp" ${laneTagFragment("conversational", true)}>
11318
11350
  ${escapeXml(msg.content)}
11319
11351
  </channel>`;
11320
11352
  const delivered = await injectMessage(
@@ -11733,7 +11765,7 @@ async function processClaudePairSessions(agents) {
11733
11765
  killPairSession,
11734
11766
  pairTmuxSession,
11735
11767
  finalizeClaudePairOnboarding
11736
- } = await import("../claude-pair-runtime-JP3TEGDV.js");
11768
+ } = await import("../claude-pair-runtime-6DZEDQAG.js");
11737
11769
  for (const pairId of pendingResp.cancelled_pair_ids ?? []) {
11738
11770
  log(`[claude-pair] sweeping orphan tmux session for pair ${pairId.slice(0, 8)}`);
11739
11771
  const killed = await killPairSession(pairTmuxSession(pairId));