@ouro.bot/cli 0.1.0-alpha.91 → 0.1.0-alpha.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/changelog.json CHANGED
@@ -1,6 +1,19 @@
1
1
  {
2
2
  "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
3
3
  "versions": [
4
+ {
5
+ "version": "0.1.0-alpha.93",
6
+ "changes": [
7
+ "Final-answer truth checks now require fresh external-state verification (gh pr view, npm view, etc.) before allowing intent=complete when a live obligation is active, preventing stale or guessed merge/publish/deploy claims."
8
+ ]
9
+ },
10
+ {
11
+ "version": "0.1.0-alpha.92",
12
+ "changes": [
13
+ "Final-answer truth checks now reject `intent=complete` when a must-resolve handoff still has an active live-session return obligation and no newer follow-up proves the loop resumed.",
14
+ "This keeps visible operator loops open until the agent actually brings back the external-state update or reports a concrete blocker instead of going dark early."
15
+ ]
16
+ },
4
17
  {
5
18
  "version": "0.1.0-alpha.91",
6
19
  "changes": [
@@ -7,6 +7,7 @@ exports.getModel = getModel;
7
7
  exports.getProvider = getProvider;
8
8
  exports.createSummarize = createSummarize;
9
9
  exports.getProviderDisplayLabel = getProviderDisplayLabel;
10
+ exports.isExternalStateQuery = isExternalStateQuery;
10
11
  exports.getFinalAnswerRetryError = getFinalAnswerRetryError;
11
12
  exports.stripLastToolCalls = stripLastToolCalls;
12
13
  exports.repairOrphanedToolCalls = repairOrphanedToolCalls;
@@ -228,7 +229,14 @@ function parseFinalAnswerPayload(argumentsText) {
228
229
  return {};
229
230
  }
230
231
  }
231
- function getFinalAnswerRetryError(mustResolveBeforeHandoff, intent, sawSteeringFollowUp, delegationDecision, sawSendMessageSelf, sawGoInward, sawQuerySession, innerJob) {
232
+ /** Returns true when a tool call queries external state (GitHub, npm registry). */
233
+ function isExternalStateQuery(toolName, args) {
234
+ if (toolName !== "shell")
235
+ return false;
236
+ const cmd = String(args.command ?? "");
237
+ return /\bgh\s+(pr|run|api|issue)\b/.test(cmd) || /\bnpm\s+(view|info|show)\b/.test(cmd);
238
+ }
239
+ function getFinalAnswerRetryError(mustResolveBeforeHandoff, intent, sawSteeringFollowUp, delegationDecision, sawSendMessageSelf, sawGoInward, sawQuerySession, currentObligation, innerJob, sawExternalStateQuery) {
232
240
  // 1. Delegation adherence: delegate-inward without evidence of inward action
233
241
  if (delegationDecision?.target === "delegate-inward" && !sawSendMessageSelf && !sawGoInward && !sawQuerySession) {
234
242
  (0, runtime_1.emitNervesEvent)({
@@ -254,8 +262,15 @@ function getFinalAnswerRetryError(mustResolveBeforeHandoff, intent, sawSteeringF
254
262
  if (mustResolveBeforeHandoff && intent === "direct_reply" && !sawSteeringFollowUp) {
255
263
  return "your final_answer used intent=direct_reply without a newer steering follow-up. continue the unresolved work, or call final_answer again with intent=complete or blocked when appropriate.";
256
264
  }
257
- // 5. Default malformed fallback
258
- return "your final_answer was incomplete or malformed. call final_answer again with your complete response.";
265
+ // 5. mustResolveBeforeHandoff + complete while a live return loop is still active
266
+ if (mustResolveBeforeHandoff && intent === "complete" && currentObligation && !sawSteeringFollowUp) {
267
+ return "you still owe the live session a visible return on this work. don't end the turn yet — continue until you've brought back the external-state update, or use intent=blocked with the concrete blocker.";
268
+ }
269
+ // 6. External-state grounding: obligation + complete requires fresh external verification
270
+ if (intent === "complete" && currentObligation && !sawExternalStateQuery && !sawSteeringFollowUp) {
271
+ return "you're claiming this work is complete, but the external state hasn't been verified this turn. ground your claim with a fresh check (gh pr view, npm view, gh run view, etc.) before calling final_answer.";
272
+ }
273
+ return null;
259
274
  }
260
275
  // Re-export kick utilities for backward compat
261
276
  var kicks_1 = require("./kicks");
@@ -474,6 +489,7 @@ async function runAgent(messages, callbacks, channel, signal, options) {
474
489
  let sawGoInward = false;
475
490
  let sawQuerySession = false;
476
491
  let sawBridgeManage = false;
492
+ let sawExternalStateQuery = false;
477
493
  // Prevent MaxListenersExceeded warning — each iteration adds a listener
478
494
  try {
479
495
  require("events").setMaxListeners(50, signal);
@@ -582,9 +598,11 @@ async function runAgent(messages, callbacks, channel, signal, options) {
582
598
  // Extract answer from the tool call arguments.
583
599
  // Supports: {"answer":"text","intent":"..."} or "text" (JSON string).
584
600
  const { answer, intent } = parseFinalAnswerPayload(result.toolCalls[0].arguments);
601
+ const retryError = getFinalAnswerRetryError(mustResolveBeforeHandoffActive, intent, sawSteeringFollowUp, options?.delegationDecision, sawSendMessageSelf, sawGoInward, sawQuerySession, options?.currentObligation ?? null, options?.activeWorkFrame?.inner?.job, sawExternalStateQuery);
585
602
  const validDirectReply = mustResolveBeforeHandoffActive && intent === "direct_reply" && sawSteeringFollowUp;
586
603
  const validTerminalIntent = intent === "complete" || intent === "blocked";
587
604
  const validClosure = answer != null
605
+ && !retryError
588
606
  && (!mustResolveBeforeHandoffActive || validDirectReply || validTerminalIntent);
589
607
  if (validClosure) {
590
608
  completion = {
@@ -622,10 +640,9 @@ async function runAgent(messages, callbacks, channel, signal, options) {
622
640
  // malformed. Clear any partial streamed text or noise, then push the
623
641
  // assistant msg + error tool result and let the model try again.
624
642
  callbacks.onClearText?.();
625
- const retryError = getFinalAnswerRetryError(mustResolveBeforeHandoffActive, intent, sawSteeringFollowUp, options?.delegationDecision, sawSendMessageSelf, sawGoInward);
626
643
  messages.push(msg);
627
- messages.push({ role: "tool", tool_call_id: result.toolCalls[0].id, content: retryError });
628
- providerRuntime.appendToolOutput(result.toolCalls[0].id, retryError);
644
+ messages.push({ role: "tool", tool_call_id: result.toolCalls[0].id, content: retryError ?? "your final_answer was incomplete or malformed. call final_answer again with your complete response." });
645
+ providerRuntime.appendToolOutput(result.toolCalls[0].id, retryError ?? "your final_answer was incomplete or malformed. call final_answer again with your complete response.");
629
646
  }
630
647
  continue;
631
648
  }
@@ -779,6 +796,9 @@ async function runAgent(messages, callbacks, channel, signal, options) {
779
796
  /* v8 ignore next -- flag tested via truth-check integration tests @preserve */
780
797
  if (tc.name === "bridge_manage")
781
798
  sawBridgeManage = true;
799
+ /* v8 ignore next -- flag tested via truth-check integration tests @preserve */
800
+ if (isExternalStateQuery(tc.name, args))
801
+ sawExternalStateQuery = true;
782
802
  const argSummary = (0, tools_1.summarizeArgs)(tc.name, args);
783
803
  // Confirmation check for mutate tools
784
804
  if ((0, tools_1.isConfirmationRequired)(tc.name) && !options?.skipConfirmation) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ouro.bot/cli",
3
- "version": "0.1.0-alpha.91",
3
+ "version": "0.1.0-alpha.93",
4
4
  "main": "dist/heart/daemon/ouro-entry.js",
5
5
  "bin": {
6
6
  "cli": "dist/heart/daemon/ouro-bot-entry.js",