npm - @ouro.bot/cli - Versions diffs - 0.1.0-alpha.347 → 0.1.0-alpha.349 - Mend

@ouro.bot/cli 0.1.0-alpha.347 → 0.1.0-alpha.349

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/changelog.json +13 -0
package/dist/heart/core.js +116 -132
package/dist/heart/daemon/cli-exec.js +408 -73
package/dist/heart/daemon/cli-parse.js +90 -1
package/dist/heart/provider-attempt.js +133 -0
package/dist/heart/provider-credential-pool.js +3 -2
package/dist/heart/provider-ping.js +116 -92
package/dist/senses/trust-gate.js +5 -5
package/package.json +1 -1

package/changelog.json CHANGED Viewed

@@ -1,6 +1,19 @@
 {
   "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
   "versions": [
+    {
+      "version": "0.1.0-alpha.349",
+      "changes": [
+        "`ouro use`, `ouro check`, provider-scoped `ouro status --agent`, and machine-pool `ouro auth` now operate on machine-local provider state and credentials, with safe provenance, explicit lane/provider/model repair guidance, and legacy `auth switch`/`config model` compatibility routed through `state/providers.json` instead of mutating synced `agent.json`."
+      ]
+    },
+    {
+      "version": "0.1.0-alpha.348",
+      "changes": [
+        "Provider attempts now share one bounded retry runner across real runtime turns, provider pings, health inventory, working-provider discovery, and GitHub Copilot model validation, retrying every provider failure class before terminal handling while preserving attempt metadata and nerves events.",
+        "Provider checks can now ping the selected lane model instead of drifting to provider defaults, and readiness/model pings use zero-delay retries so health checks stay fast."
+      ]
+    },
     {
       "version": "0.1.0-alpha.347",
       "changes": [

package/dist/heart/core.js CHANGED Viewed

@@ -10,7 +10,6 @@ exports.isExternalStateQuery = isExternalStateQuery;
 exports.getSettleRetryError = getSettleRetryError;
 exports.stripLastToolCalls = stripLastToolCalls;
 exports.repairOrphanedToolCalls = repairOrphanedToolCalls;
-exports.isRetryBlocked = isRetryBlocked;
 exports.runAgent = runAgent;
 const config_1 = require("./config");
 const identity_1 = require("./identity");
@@ -33,6 +32,7 @@ const tool_loop_1 = require("./tool-loop");
 const packets_1 = require("../arc/packets");
 const tool_friction_1 = require("./tool-friction");
 const provider_models_1 = require("./provider-models");
+const provider_attempt_1 = require("./provider-attempt");
 const _providerRuntimes = {
     human: null,
     agent: null,
@@ -366,38 +366,6 @@ function isContextOverflow(err) {
         return true;
     return false;
 }
-// HTTP statuses that will never become retryable on their own — the request is
-// semantically wrong (malformed, unauthorized, missing route, etc.) and the
-// caller has to do something different before it can succeed.
-const NON_RETRYABLE_HTTP_STATUSES = new Set([
-    400, // Bad Request — malformed payload
-    401, // Unauthorized — credentials invalid/expired
-    403, // Forbidden — credentials lack permission
-    404, // Not Found — model/route doesn't exist
-    422, // Unprocessable Entity — semantic validation failure
-]);
-// Provider-classified error categories that we never retry. usage-limit is
-// distinct from rate-limit: rate limits clear in seconds (retryable), usage
-// limits are billing quotas that take hours/days to reset.
-const NON_RETRYABLE_CLASSIFICATIONS = new Set([
-    "auth-failure",
-    "usage-limit",
-]);
-// Default policy: retry every error from the provider, EXCEPT the small set
-// above. The user explicitly requested this — past behavior was to retry only
-// on a known-transient list, which silently dropped real harness/SDK timeouts
-// (e.g. OpenAI SDK's "Request timed out." has no err.code and no status, so
-// the substring matchers missed it).
-function isRetryBlocked(error, classification) {
-    const status = error.status;
-    if (status !== undefined && NON_RETRYABLE_HTTP_STATUSES.has(status))
-        return true;
-    if (NON_RETRYABLE_CLASSIFICATIONS.has(classification))
-        return true;
-    return false;
-}
-const MAX_RETRIES = 3;
-const RETRY_BASE_MS = 2000;
 const RETRY_LABELS = {
     "auth-failure": "auth error",
     "usage-limit": "usage limit",
@@ -406,6 +374,29 @@ const RETRY_LABELS = {
     "network-error": "network error",
     "unknown": "error",
 };
+function waitForProviderRetry(delayMs, signal) {
+    if (!signal) {
+        return new Promise((resolve) => {
+            setTimeout(resolve, delayMs);
+        });
+    }
+    return new Promise((resolve, reject) => {
+        let timer;
+        const onAbort = () => {
+            clearTimeout(timer);
+            reject(new provider_attempt_1.ProviderAttemptAbortError());
+        };
+        timer = setTimeout(() => {
+            signal.removeEventListener("abort", onAbort);
+            resolve();
+        }, delayMs);
+        if (signal.aborted) {
+            onAbort();
+            return;
+        }
+        signal.addEventListener("abort", onAbort, { once: true });
+    });
+}
 function buildAuthFailureGuidance(provider, model, agentName, detail) {
     const mismatch = (0, provider_models_1.getProviderModelMismatchMessage)(provider, model);
     const modelLabel = model
@@ -500,7 +491,6 @@ async function runAgent(messages, callbacks, channel, signal, options) {
     let done = false;
     let lastUsage;
     let overflowRetried = false;
-    let retryCount = 0;
     let outcome = "settled";
     let completion;
     let terminalError;
@@ -515,6 +505,35 @@ async function runAgent(messages, callbacks, channel, signal, options) {
     let sawExternalStateQuery = false;
     const toolLoopState = (0, tool_loop_1.createToolLoopState)();
     const toolFrictionLedger = (0, tool_friction_1.createToolFrictionLedger)();
+    const finishTerminalProviderError = (error, classification) => {
+        terminalError = error;
+        terminalErrorClassification = classification;
+        /* v8 ignore start — auth-failure guidance: tested via provider error classification tests @preserve */
+        if (terminalErrorClassification === "auth-failure") {
+            const agentName = (0, identity_2.getAgentName)();
+            const currentProvider = providerRuntime.id;
+            callbacks.onError(new Error(buildAuthFailureGuidance(currentProvider, providerRuntime.model, agentName, terminalError.message)), "terminal");
+        }
+        else {
+            callbacks.onError(terminalError, "terminal");
+        }
+        /* v8 ignore stop */
+        (0, runtime_1.emitNervesEvent)({
+            level: "error",
+            event: "engine.error",
+            trace_id: traceId,
+            component: "engine",
+            message: terminalError.message,
+            meta: {
+                provider: providerRuntime.id,
+                model: providerRuntime.model,
+                errorClassification: terminalErrorClassification,
+            },
+        });
+        stripLastToolCalls(messages);
+        outcome = "errored";
+        done = true;
+    };
     // Prevent MaxListenersExceeded warning — each iteration adds a listener
     try {
         require("events").setMaxListeners(50, signal);
@@ -581,21 +600,71 @@ async function runAgent(messages, callbacks, channel, signal, options) {
             break;
         }
         try {
-            callbacks.onModelStart();
-            const result = await providerRuntime.streamTurn({
-                messages,
-                activeTools,
-                callbacks,
-                signal,
-                traceId,
-                toolChoiceRequired,
-                reasoningEffort: currentReasoningEffort,
-                eagerSettleStreaming: true,
+            const callProviderTurn = async () => {
+                callbacks.onModelStart();
+                try {
+                    return await providerRuntime.streamTurn({
+                        messages,
+                        activeTools,
+                        callbacks,
+                        signal,
+                        traceId,
+                        toolChoiceRequired,
+                        reasoningEffort: currentReasoningEffort,
+                        eagerSettleStreaming: true,
+                    });
+                }
+                catch (error) {
+                    if (signal?.aborted)
+                        throw new provider_attempt_1.ProviderAttemptAbortError();
+                    throw error;
+                }
+            };
+            const callProviderTurnWithOverflowRecovery = async () => {
+                try {
+                    return await callProviderTurn();
+                }
+                catch (error) {
+                    if (error instanceof provider_attempt_1.ProviderAttemptAbortError)
+                        throw error;
+                    if (isContextOverflow(error) && !overflowRetried) {
+                        overflowRetried = true;
+                        stripLastToolCalls(messages);
+                        const { maxTokens, contextMargin } = (0, config_1.getContextConfig)();
+                        const trimmed = (0, context_1.trimMessages)(messages, maxTokens, contextMargin, maxTokens * 2);
+                        messages.splice(0, messages.length, ...trimmed);
+                        providerRuntime.resetTurnState(messages);
+                        callbacks.onError(new Error("context trimmed, retrying..."), "transient");
+                        return callProviderTurn();
+                    }
+                    throw error;
+                }
+            };
+            const attempt = await (0, provider_attempt_1.runProviderAttempt)({
+                operation: "turn",
+                provider: providerRuntime.id,
+                model: providerRuntime.model,
+                run: callProviderTurnWithOverflowRecovery,
+                classifyError: (error) => providerRuntime.classifyError(error),
+                onRetry: (record, maxAttempts) => {
+                    const delayMs = record.delayMs;
+                    const seconds = delayMs / 1000;
+                    const cause = RETRY_LABELS[record.classification];
+                    callbacks.onError(new Error(`${cause}, retrying in ${seconds}s (${record.attempt}/${maxAttempts})...`), "transient");
+                },
+                sleep: async (delayMs) => {
+                    await waitForProviderRetry(delayMs, signal);
+                    providerRuntime.resetTurnState(messages);
+                },
             });
+            if (!attempt.ok) {
+                finishTerminalProviderError(attempt.error, attempt.classification);
+                continue;
+            }
+            const result = attempt.value;
             // Track usage from the latest API call
             if (result.usage)
                 lastUsage = result.usage;
-            retryCount = 0; // reset on success
             // SHARED: build CC-format assistant message from TurnResult
             const msg = {
                 role: "assistant",
@@ -978,26 +1047,11 @@ async function runAgent(messages, callbacks, channel, signal, options) {
         }
         catch (e) {
             // Abort is not an error — just stop cleanly
-            if (signal?.aborted) {
+            if (e instanceof provider_attempt_1.ProviderAttemptAbortError || signal?.aborted) {
                 stripLastToolCalls(messages);
                 outcome = "aborted";
                 break;
             }
-            // Context overflow: trim aggressively and retry once
-            if (isContextOverflow(e) && !overflowRetried) {
-                overflowRetried = true;
-                stripLastToolCalls(messages);
-                const { maxTokens, contextMargin } = (0, config_1.getContextConfig)();
-                const trimmed = (0, context_1.trimMessages)(messages, maxTokens, contextMargin, maxTokens * 2);
-                messages.splice(0, messages.length, ...trimmed);
-                providerRuntime.resetTurnState(messages);
-                callbacks.onError(new Error("context trimmed, retrying..."), "transient");
-                continue;
-            }
-            // Retry policy: retry every error EXCEPT those on the blocklist
-            // (NON_RETRYABLE_HTTP_STATUSES / NON_RETRYABLE_CLASSIFICATIONS).
-            // The classification still drives the user-facing label and the
-            // auth-failure guidance message below — it just no longer gates retries.
             const errorForClassification = e instanceof Error ? e : /* v8 ignore next -- defensive @preserve */ new Error(String(e));
             let providerClassification;
             try {
@@ -1007,77 +1061,7 @@ async function runAgent(messages, callbacks, channel, signal, options) {
                 /* v8 ignore next -- defensive: classifyError should not throw @preserve */
                 providerClassification = "unknown";
             }
-            const blocked = isRetryBlocked(errorForClassification, providerClassification);
-            const shouldRetry = !blocked && retryCount < MAX_RETRIES;
-            (0, runtime_1.emitNervesEvent)({
-                level: shouldRetry ? "info" : "warn",
-                event: shouldRetry ? "engine.provider_retry" : "engine.provider_retry_skip",
-                component: "engine",
-                message: shouldRetry
-                    ? `provider error is retryable (attempt ${retryCount + 1}/${MAX_RETRIES})`
-                    : blocked
-                        ? `provider error is on retry blocklist`
-                        : `provider error retries exhausted`,
-                meta: {
-                    provider: providerRuntime.id,
-                    model: providerRuntime.model,
-                    retryCount,
-                    maxRetries: MAX_RETRIES,
-                    blocked,
-                    providerClassification,
-                    errorMessage: errorForClassification.message.slice(0, 200),
-                    httpStatus: e.status ?? null,
-                },
-            });
-            if (shouldRetry) {
-                retryCount++;
-                const delay = RETRY_BASE_MS * Math.pow(2, retryCount - 1);
-                const cause = RETRY_LABELS[providerClassification];
-                callbacks.onError(new Error(`${cause}, retrying in ${delay / 1000}s (${retryCount}/${MAX_RETRIES})...`), "transient");
-                // Wait with abort support
-                const aborted = await new Promise((resolve) => {
-                    const timer = setTimeout(() => resolve(false), delay);
-                    if (signal) {
-                        const onAbort = () => { clearTimeout(timer); resolve(true); };
-                        if (signal.aborted) {
-                            clearTimeout(timer);
-                            resolve(true);
-                            return;
-                        }
-                        signal.addEventListener("abort", onAbort, { once: true });
-                    }
-                });
-                if (aborted) {
-                    stripLastToolCalls(messages);
-                    outcome = "aborted";
-                    break;
-                }
-                providerRuntime.resetTurnState(messages);
-                continue;
-            }
-            terminalError = errorForClassification;
-            terminalErrorClassification = providerClassification;
-            /* v8 ignore start — auth-failure guidance: tested via provider error classification tests @preserve */
-            if (terminalErrorClassification === "auth-failure") {
-                const agentName = (0, identity_2.getAgentName)();
-                const currentProvider = providerRuntime.id;
-                callbacks.onError(new Error(buildAuthFailureGuidance(currentProvider, providerRuntime.model, agentName, terminalError.message)), "terminal");
-            }
-            else {
-                callbacks.onError(terminalError, "terminal");
-            }
-            /* v8 ignore stop */
-            (0, runtime_1.emitNervesEvent)({
-                level: "error",
-                event: "engine.error",
-                trace_id: traceId,
-                component: "engine",
-                message: terminalError.message,
-                meta: { errorClassification: terminalErrorClassification },
-            });
-            stripLastToolCalls(messages);
-            outcome = "errored";
-            done = true;
+            finishTerminalProviderError(errorForClassification, providerClassification);
         }
     }
     (0, runtime_1.emitNervesEvent)({