@integrity-labs/agt-cli 0.28.184 → 0.28.186

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ import {
3
3
  formatMissingVar,
4
4
  isClaudeFastMode,
5
5
  probeMcpEnvSubstitution
6
- } from "./chunk-K2N44EUC.js";
6
+ } from "./chunk-ERHHYBVS.js";
7
7
  import {
8
8
  reapOrphanChannelMcps
9
9
  } from "./chunk-XWVM4KPK.js";
@@ -1588,4 +1588,4 @@ export {
1588
1588
  stopAllSessionsAndWait,
1589
1589
  getProjectDir
1590
1590
  };
1591
- //# sourceMappingURL=chunk-RLDZLSML.js.map
1591
+ //# sourceMappingURL=chunk-ORO4YLTN.js.map
@@ -100,7 +100,7 @@ async function spawnPairSession(session) {
100
100
  return { ok: true };
101
101
  } catch {
102
102
  }
103
- const { resolveClaudeBinary } = await import("./persistent-session-KBZFQ7SX.js");
103
+ const { resolveClaudeBinary } = await import("./persistent-session-SNLNY2VB.js");
104
104
  const claudeBin = resolveClaudeBinary();
105
105
  const pairEnv = {
106
106
  ...process.env,
@@ -373,4 +373,4 @@ export {
373
373
  startClaudePair,
374
374
  submitClaudePairCode
375
375
  };
376
- //# sourceMappingURL=claude-pair-runtime-COEJFJJX.js.map
376
+ //# sourceMappingURL=claude-pair-runtime-VBTV4KZO.js.map
@@ -28,7 +28,7 @@ import {
28
28
  requireHost,
29
29
  safeWriteJsonAtomic,
30
30
  setConfigHash
31
- } from "../chunk-YYIBKSSD.js";
31
+ } from "../chunk-3UMHVNGB.js";
32
32
  import {
33
33
  getProjectDir as getProjectDir2,
34
34
  getReadyTasks,
@@ -70,7 +70,7 @@ import {
70
70
  takeZombieDetection,
71
71
  transcriptActivityAgeSeconds,
72
72
  writeEgressAllowlist
73
- } from "../chunk-RLDZLSML.js";
73
+ } from "../chunk-ORO4YLTN.js";
74
74
  import {
75
75
  CONVERSATION_FAILURE_CATEGORIES,
76
76
  DEFAULT_FRAMEWORK,
@@ -111,7 +111,7 @@ import {
111
111
  resolveChannels,
112
112
  resolveDmTarget,
113
113
  sumTranscriptUsageInWindow
114
- } from "../chunk-K2N44EUC.js";
114
+ } from "../chunk-ERHHYBVS.js";
115
115
  import {
116
116
  parsePsRows,
117
117
  reapOrphanChannelMcps
@@ -2500,8 +2500,14 @@ ${transcript}
2500
2500
 
2501
2501
  Score the agent's success:
2502
2502
  - score: integer 0-100 (0 = ignored/unhelpful/wrong, 100 = fully resolved the user's need)
2503
- - verdict: "success" (need clearly met), "partial" (some help but incomplete/ambiguous), or "failure" (did not help / made it worse)
2503
+ - verdict: "success" (need clearly met), "partial" (some help but genuinely incomplete), or "failure" (did not help / made it worse)
2504
2504
  - summary: ONE short sentence (max 140 chars) explaining the score. Do NOT quote sensitive user content.
2505
+
2506
+ How to choose the verdict (read carefully \u2014 most conversations are short and one-sided):
2507
+ - A complete, correct, self-contained answer to the user's question is a "success" EVEN IF the user never replied to confirm. End-users rarely send a closing "thanks". Do NOT downgrade a good answer to "partial" merely because the transcript ends after the agent's reply or because there is no explicit confirmation. Absence of confirmation is NOT ambiguity.
2508
+ - Use "partial" ONLY when the agent's own response is genuinely incomplete: it asked the user a clarifying question that was never answered, gave a hedged/half answer, promised a follow-up that is not present, or addressed only part of a multi-part request.
2509
+ - Use "failure" when the agent ignored the user, was wrong, or made things worse.
2510
+ - If the transcript looks truncated or cut off mid-exchange (a reconstruction artifact, not the agent's fault), judge only what the agent actually delivered; do not invent incompleteness from a missing tail.
2505
2511
  - failure_category: ONLY when verdict is "failure", the single best-fitting reason from this exact set: ${buildFailureCategoryPromptLines()}. Use null when verdict is "success" or "partial".
2506
2512
 
2507
2513
  Respond with ONLY a JSON object, no other text:
@@ -6596,7 +6602,7 @@ var agentRestartTimezoneInputs = /* @__PURE__ */ new Map();
6596
6602
  var lastVersionCheckAt = 0;
6597
6603
  var VERSION_CHECK_INTERVAL_MS = 5 * 60 * 1e3;
6598
6604
  var lastResponsivenessProbeAt = 0;
6599
- var agtCliVersion = true ? "0.28.184" : "dev";
6605
+ var agtCliVersion = true ? "0.28.186" : "dev";
6600
6606
  function resolveBrewPath(execFileSync4) {
6601
6607
  try {
6602
6608
  const out = execFileSync4("which", ["brew"], { timeout: 5e3 }).toString().trim();
@@ -7489,7 +7495,7 @@ async function pollCycle() {
7489
7495
  }
7490
7496
  try {
7491
7497
  const { detectHostSecurity } = await import("../host-security-6PDFG7F5.js");
7492
- const { collectDiagnostics } = await import("../persistent-session-KBZFQ7SX.js");
7498
+ const { collectDiagnostics } = await import("../persistent-session-SNLNY2VB.js");
7493
7499
  const diagCodeNames = [...agentState.persistentSessionAgents];
7494
7500
  const agentDiagnostics = diagCodeNames.length > 0 ? collectDiagnostics(diagCodeNames) : void 0;
7495
7501
  let tailscaleHostname;
@@ -7637,7 +7643,7 @@ async function pollCycle() {
7637
7643
  const {
7638
7644
  collectResponsivenessProbes,
7639
7645
  getResponsivenessIntervalMs
7640
- } = await import("../responsiveness-probe-O4F7H7IB.js");
7646
+ } = await import("../responsiveness-probe-6HC3PENG.js");
7641
7647
  const probeIntervalMs = getResponsivenessIntervalMs();
7642
7648
  if (now - lastResponsivenessProbeAt > probeIntervalMs) {
7643
7649
  const probeCodeNames = [...agentState.persistentSessionAgents];
@@ -7669,7 +7675,7 @@ async function pollCycle() {
7669
7675
  collectResponsivenessProbes,
7670
7676
  livePendingInboundOldestAgeSeconds,
7671
7677
  parkPendingInbound
7672
- } = await import("../responsiveness-probe-O4F7H7IB.js");
7678
+ } = await import("../responsiveness-probe-6HC3PENG.js");
7673
7679
  const { getProjectDir: wedgeProjectDir } = await import("../claude-scheduler-FATCLHDM.js");
7674
7680
  const wedgeNow = /* @__PURE__ */ new Date();
7675
7681
  const liveAgents = agentState.persistentSessionAgents;
@@ -10644,7 +10650,7 @@ async function processClaudePairSessions(agents) {
10644
10650
  killPairSession,
10645
10651
  pairTmuxSession,
10646
10652
  finalizeClaudePairOnboarding
10647
- } = await import("../claude-pair-runtime-COEJFJJX.js");
10653
+ } = await import("../claude-pair-runtime-VBTV4KZO.js");
10648
10654
  for (const pairId of pendingResp.cancelled_pair_ids ?? []) {
10649
10655
  log(`[claude-pair] sweeping orphan tmux session for pair ${pairId.slice(0, 8)}`);
10650
10656
  const killed = await killPairSession(pairTmuxSession(pairId));