npm - aui-agent-builder - Versions diffs - 0.3.85 → 0.3.87 - Mend

aui-agent-builder 0.3.85 → 0.3.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/dist/api-client/index.d.ts +35 -1
package/dist/api-client/index.d.ts.map +1 -1
package/dist/api-client/index.js +103 -17
package/dist/api-client/index.js.map +1 -1
package/dist/api-client/kb-view-client.d.ts.map +1 -1
package/dist/api-client/kb-view-client.js +39 -4
package/dist/api-client/kb-view-client.js.map +1 -1
package/dist/commands/import-agent.js +5 -4
package/dist/commands/import-agent.js.map +1 -1
package/dist/commands/pull-agent.js +14 -3
package/dist/commands/pull-agent.js.map +1 -1
package/dist/commands/push.d.ts.map +1 -1
package/dist/commands/push.js +449 -92
package/dist/commands/push.js.map +1 -1
package/dist/commands/validate.js +90 -0
package/dist/commands/validate.js.map +1 -1
package/dist/config/index.d.ts +9 -1
package/dist/config/index.d.ts.map +1 -1
package/dist/config/index.js.map +1 -1
package/dist/services/auth.service.d.ts.map +1 -1
package/dist/services/auth.service.js +26 -14
package/dist/services/auth.service.js.map +1 -1
package/dist/utils/fetch-with-timeout.d.ts +57 -0
package/dist/utils/fetch-with-timeout.d.ts.map +1 -0
package/dist/utils/fetch-with-timeout.js +125 -0
package/dist/utils/fetch-with-timeout.js.map +1 -0
package/package.json +1 -1

package/dist/commands/push.js CHANGED Viewed

@@ -10,6 +10,7 @@ import { AUIClient, applyScopeLevel } from "../api-client/index.js";
 import { findAuiFiles, parseAuiFile } from "../utils/index.js";
 import { validate } from "./validate.js";
 import { getTracer, SpanStatusCode, setUserContext } from "../telemetry.js";
+import { trace } from "@opentelemetry/api";
 import { getItemLevelDiff } from "../utils/git.js";
 import { AuthenticationError, CLIError, ConfigError, ValidationError } from "../errors/index.js";
 import { StatusLine, Spinner, ErrorDisplay, Hint, } from "../ui/components/index.js";
@@ -23,20 +24,54 @@ function log(node) {
 }
 function startSpinner(label) {
     const inst = render(_jsx(Spinner, { label: label }));
+    let unmounted = false;
+    const safeUnmount = () => {
+        if (unmounted)
+            return;
+        unmounted = true;
+        inst.unmount();
+    };
     return {
         succeed(msg) {
-            inst.unmount();
+            safeUnmount();
             log(_jsx(StatusLine, { kind: "success", label: msg }));
         },
         fail(msg) {
-            inst.unmount();
+            safeUnmount();
             log(_jsx(StatusLine, { kind: "error", label: msg }));
         },
         stop() {
-            inst.unmount();
+            safeUnmount();
+        },
+        /**
+         * Internal: unconditionally unmount, no log line. Used by `withSpinner`
+         * to guarantee the spinner stops even when the wrapped body throws an
+         * exception that escapes the surrounding try/catch (which would
+         * otherwise leave a phantom spinner spinning forever).
+         */
+        _forceUnmount() {
+            safeUnmount();
         },
     };
 }
+/**
+ * Wrap a sync- or async-returning callback so the spinner ALWAYS unmounts,
+ * even on uncaught exceptions. The callback can call `.succeed()` / `.fail()`
+ * itself to render a final status line; otherwise the spinner just stops.
+ *
+ * This pattern eliminates the "phantom spinner" foot-gun where an exception
+ * thrown between `startSpinner(...)` and `.succeed/.fail` leaves the Ink
+ * render mounted forever — blocking the chat UI's "Still thinking…" state.
+ */
+async function withSpinner(label, fn) {
+    const spinner = startSpinner(label);
+    try {
+        return await fn(spinner);
+    }
+    finally {
+        spinner._forceUnmount();
+    }
+}
 /**
  * Push local agent configuration to the backend
  */
@@ -90,13 +125,47 @@ async function _push(pushSpan, agentCode, options = {}) {
             log(_jsx(StatusLine, { kind: "info", label: "Validating configuration..." }));
         else
             stderrLog("Validating configuration...");
-        const valid = await validate(projectRoot, { verbose: false });
+        // Wrap the validate call in its own span so a "stuck at validate"
+        // hang shows up clearly in Logfire as `aui.push.preflight.validate`
+        // with status = unset (still running) — instead of the parent
+        // `aui.push` span just sitting there with no clue why.
+        const validateTracer = getTracer();
+        const valid = await validateTracer.startActiveSpan("aui.push.preflight.validate", async (vSpan) => {
+            vSpan.setAttribute("push.preflight.step", "validate");
+            vSpan.setAttribute("push.preflight.skipValidation", false);
+            vSpan.setAttribute("push.preflight.force", options.force === true);
+            try {
+                const ok = await validate(projectRoot, { verbose: false });
+                vSpan.setAttribute("push.preflight.validate.ok", ok);
+                vSpan.setStatus({ code: SpanStatusCode.OK });
+                return ok;
+            }
+            catch (err) {
+                // validate() shouldn't throw under normal conditions, but if a
+                // schema fetch or git call inside it does, surface it here so
+                // we don't lose the error to the parent span's generic handler.
+                const msg = err instanceof Error ? err.message : String(err);
+                vSpan.setStatus({ code: SpanStatusCode.ERROR, message: msg });
+                vSpan.recordException(err instanceof Error ? err : new Error(msg));
+                throw err;
+            }
+            finally {
+                vSpan.end();
+            }
+        });
         if (!valid && !options.force) {
             pushSpan.setAttribute("push.exit_reason", "validation_failed");
+            pushSpan.addEvent("preflight.validation_rejected_push");
             throw new ValidationError("Push aborted due to validation errors.", {
                 suggestion: "Fix the errors above, or use --force to push anyway.",
             });
         }
+        if (!valid && options.force) {
+            pushSpan.addEvent("preflight.validation_failed_but_forced");
+        }
+    }
+    else {
+        pushSpan.addEvent("preflight.validation_skipped");
     }
     if (!json)
         log(_jsx(StatusLine, { kind: "info", label: "Pushing agent changes..." }));
@@ -284,11 +353,78 @@ async function _push(pushSpan, agentCode, options = {}) {
         // If the project has version_id in .auirc or --version-id is passed,
         // we validate it's a draft. If no version context exists, we auto-detect
         // available drafts. Push is rejected if no draft is found.
+        //
+        // Wrapped in an `aui.push.preflight.resolve-version` span so a hang on
+        // listAgents / listVersions / getVersion shows up clearly in Logfire
+        // instead of being lumped under the parent push span. This is the
+        // step that hits agent-management with up to 3 sequential calls.
         let prePushDraft = null;
         if (projectConfig.version_id || options.versionId) {
-            prePushDraft = await resolveVersionDraft(config, projectConfig, session, options.versionId);
+            const resolveTracer = getTracer();
+            prePushDraft = await resolveTracer.startActiveSpan("aui.push.preflight.resolve-version", async (rSpan) => {
+                rSpan.setAttribute("push.preflight.step", "resolve-version");
+                rSpan.setAttribute("push.preflight.has_explicit_version_id", !!options.versionId);
+                rSpan.setAttribute("push.preflight.has_auirc_version_id", !!projectConfig.version_id);
+                if (projectConfig.agent_id) {
+                    rSpan.setAttribute("push.preflight.network_id", projectConfig.agent_id);
+                }
+                try {
+                    const draft = await resolveVersionDraft(config, projectConfig, session, options.versionId);
+                    rSpan.setAttribute("push.preflight.resolved_version_id", draft.versionId);
+                    rSpan.setAttribute("push.preflight.resolved_version_label", draft.label);
+                    rSpan.setAttribute("push.preflight.resolved_agent_id", draft.agentId);
+                    rSpan.setStatus({ code: SpanStatusCode.OK });
+                    return draft;
+                }
+                catch (err) {
+                    const msg = err instanceof Error ? err.message : String(err);
+                    rSpan.setStatus({ code: SpanStatusCode.ERROR, message: msg });
+                    rSpan.recordException(err instanceof Error ? err : new Error(msg));
+                    throw err;
+                }
+                finally {
+                    rSpan.end();
+                }
+            });
             agentSettingsParams.version_id = prePushDraft.versionId;
-            log(_jsx(StatusLine, { kind: "info", label: `Pushing into draft version: ${prePushDraft.label}` }));
+            // Per a117251 (alboim): every agent-settings write body must carry the
+            // agent-management UUID. Setting it on `agentSettingsParams` here means
+            // every subsequent `client.<entity>` call funnels through `versionBody`
+            // and includes `agent_id` automatically.
+            agentSettingsParams.agent_id = prePushDraft.agentId;
+            pushSpan.setAttribute("push.version_id", prePushDraft.versionId);
+            pushSpan.setAttribute("push.version_label", prePushDraft.label);
+            pushSpan.setAttribute("push.agent_management_id", prePushDraft.agentId);
+            // Persist agent_management_id back to .auirc on first push so subsequent
+            // pushes skip the listAgents lookup. Mirrors what
+            // `resolvePushAgentManagementId` does in the legacy branch — keeps both
+            // paths converging on the same .auirc state. Non-fatal if the write
+            // fails (we already have the id in memory for this push).
+            if (!projectConfig.agent_management_id) {
+                try {
+                    saveProjectConfig({ ...projectConfig, agent_management_id: prePushDraft.agentId }, projectRoot);
+                    pushSpan.addEvent("auirc.agent_management_id_persisted_from_draft", {
+                        agent_management_id: prePushDraft.agentId,
+                    });
+                }
+                catch (err) {
+                    if (process.env.AUI_DEBUG) {
+                        console.warn("[debug] failed to persist agent_management_id back to .auirc:", err instanceof Error ? err.message : err);
+                    }
+                }
+            }
+        }
+        else {
+            // Legacy push (no version_id) — still need agent_id on write bodies.
+            // resolvePushAgentManagementId reads from .auirc first (cached by
+            // import-agent / pull-agent), falls back to listAgents lookup + writes
+            // back to .auirc so subsequent pushes skip the lookup.
+            pushSpan.addEvent("preflight.no_draft_version_required", {
+                reason: "legacy push (no version_id in .auirc or --version-id flag)",
+            });
+            const legacyAgentId = await resolvePushAgentManagementId(config, projectConfig, session, projectRoot);
+            agentSettingsParams.agent_id = legacyAgentId;
+            pushSpan.setAttribute("push.agent_management_id", legacyAgentId);
         }
         const pushTasks = buildPushTasks(diff, fileData, projectRoot, getFileDiff);
         pushSpan.setAttribute("push.task_count", pushTasks.length);
@@ -418,7 +554,12 @@ async function _push(pushSpan, agentCode, options = {}) {
         //    JSON envelope, and the non-zero exit code (BFF contract: zero silent
         //    errors anywhere in the push pipeline).
         const kbResult = await pushKnowledgeHubs(projectRoot, projectConfig);
+        pushSpan.setAttribute("push.kb.ok", kbResult.ok);
+        pushSpan.setAttribute("push.kb.failures", kbResult.failures.length);
         if (!kbResult.ok) {
+            pushSpan.addEvent("kb.failures_folded_into_pushFailures", {
+                count: kbResult.failures.length,
+            });
             for (const kbFailure of kbResult.failures) {
                 failed++;
                 pushFailures.push(kbFailure);
@@ -450,10 +591,16 @@ async function _push(pushSpan, agentCode, options = {}) {
                 process.stdout.isTTY === true;
             if (!isInteractive) {
                 failed += authFailedTasks.length;
+                pushSpan.addEvent("auth.fallback.non_interactive_rejected", {
+                    failed_task_count: authFailedTasks.length,
+                });
                 throw new AuthenticationError(`Authentication failed for ${authFailedTasks.length} push task(s); cannot prompt for an API key (non-interactive session).`, {
                     suggestion: "Pass --api-key <key>, set AUI_AGENT_TOOLS_API_KEY, or run `aui login` to refresh credentials.",
                 });
             }
+            pushSpan.addEvent("auth.fallback.api_key_prompted", {
+                failed_task_count: authFailedTasks.length,
+            });
             log(_jsxs(Box, { flexDirection: "column", paddingX: 1, children: [_jsx(StatusLine, { kind: "warning", label: "Authentication failed. Your access token may not have permission." }), _jsx(Hint, { message: "You can provide an API key as a fallback. It will be saved to ~/.aui/agent-settings-key" })] }));
             const { key } = await inquirer.prompt([
                 {
@@ -466,6 +613,9 @@ async function _push(pushSpan, agentCode, options = {}) {
             if (key && key.trim()) {
                 saveAgentSettingsApiKey(key.trim());
                 client.setAgentSettingsApiKey(key.trim());
+                pushSpan.addEvent("auth.fallback.api_key_provided", {
+                    retrying_task_count: authFailedTasks.length,
+                });
                 log(_jsx(StatusLine, { kind: "success", label: "Key saved." }));
                 log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "info", label: `Retrying ${authFailedTasks.length} change(s) with API key...` }) }));
                 authFailed = false;
@@ -495,10 +645,16 @@ async function _push(pushSpan, agentCode, options = {}) {
             }
             else {
                 failed += authFailedTasks.length;
+                pushSpan.addEvent("auth.fallback.api_key_skipped", {
+                    uncovered_task_count: authFailedTasks.length,
+                });
             }
         }
         else if (authFailed && authFailedTasks.length > 0) {
             failed += authFailedTasks.length;
+            pushSpan.addEvent("auth.fallback.saved_key_still_failed", {
+                failed_task_count: authFailedTasks.length,
+            });
             log(_jsx(ErrorDisplay, { error: new AuthenticationError("Auth failed even with saved API key.", {
                     suggestion: "Try: rm ~/.aui/agent-settings-key",
                 }) }));
@@ -532,6 +688,7 @@ async function _push(pushSpan, agentCode, options = {}) {
         if (prePushDraft) {
             const SNAPSHOT_MAX_ATTEMPTS = 4;
             const SNAPSHOT_RETRY_BASE_MS = 1000;
+            const snapshotTracer = getTracer();
             for (let attempt = 1; attempt <= SNAPSHOT_MAX_ATTEMPTS; attempt++) {
                 snapshotAttempts = attempt;
                 const label = attempt === 1
@@ -540,26 +697,65 @@ async function _push(pushSpan, agentCode, options = {}) {
                 if (json)
                     stderrLog(label);
                 const snapshotSpinner = json ? null : startSpinner(label);
+                // Per-attempt span — each snapshot upload is a network call that can
+                // hang for minutes (large multipart upload). Surfacing each attempt
+                // separately in Logfire lets us see retry behavior, attempt latency,
+                // and which attempt finally succeeded. Logfire query:
+                //   `name:"aui.push.task.snapshot" AND attributes."snapshot.attempt":3`
+                // finds every push that needed a third try.
                 let attemptError;
-                try {
-                    const snapshotResult = await pushSnapshot(client, prePushDraft.agentId, prePushDraft.versionId, projectRoot, fileData);
-                    if (snapshotResult.success) {
-                        const okMsg = attempt === 1
-                            ? `Snapshot pushed (${fileData.length} file(s))`
-                            : `Snapshot pushed (${fileData.length} file(s), attempt ${attempt}/${SNAPSHOT_MAX_ATTEMPTS})`;
-                        if (snapshotSpinner)
-                            snapshotSpinner.succeed(okMsg);
-                        else
-                            stderrLog(okMsg);
-                        snapshotSucceeded = true;
-                        snapshotError = undefined;
-                        break;
+                const attemptResolved = await snapshotTracer.startActiveSpan("aui.push.task.snapshot", async (snapSpan) => {
+                    snapSpan.setAttribute("push.task.type", "snapshot");
+                    snapSpan.setAttribute("push.task.label", label);
+                    snapSpan.setAttribute("snapshot.attempt", attempt);
+                    snapSpan.setAttribute("snapshot.max_attempts", SNAPSHOT_MAX_ATTEMPTS);
+                    snapSpan.setAttribute("snapshot.file_count", fileData.length);
+                    snapSpan.setAttribute("snapshot.agent_id", prePushDraft.agentId);
+                    snapSpan.setAttribute("snapshot.version_id", prePushDraft.versionId);
+                    try {
+                        const snapshotResult = await pushSnapshot(client, prePushDraft.agentId, prePushDraft.versionId, projectRoot, fileData);
+                        if (snapshotResult.success) {
+                            snapSpan.setStatus({ code: SpanStatusCode.OK });
+                            snapSpan.setAttribute("snapshot.outcome", "success");
+                            return { ok: true, error: undefined };
+                        }
+                        const errMsg = snapshotResult.error || "Unknown snapshot error";
+                        snapSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
+                        snapSpan.setAttribute("snapshot.outcome", "failed");
+                        snapSpan.setAttribute("push.task.error", errMsg);
+                        if (attempt < SNAPSHOT_MAX_ATTEMPTS) {
+                            snapSpan.addEvent("snapshot.retry_will_follow", {
+                                next_attempt: attempt + 1,
+                                backoff_ms: SNAPSHOT_RETRY_BASE_MS * Math.pow(2, attempt - 1),
+                            });
+                        }
+                        return { ok: false, error: errMsg };
                     }
-                    attemptError = snapshotResult.error || "Unknown snapshot error";
-                }
-                catch (error) {
-                    attemptError = error instanceof Error ? error.message : String(error);
+                    catch (error) {
+                        const errMsg = error instanceof Error ? error.message : String(error);
+                        snapSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
+                        snapSpan.recordException(error instanceof Error ? error : new Error(errMsg));
+                        snapSpan.setAttribute("snapshot.outcome", "exception");
+                        snapSpan.setAttribute("push.task.error", errMsg);
+                        return { ok: false, error: errMsg };
+                    }
+                    finally {
+                        snapSpan.end();
+                    }
+                });
+                if (attemptResolved.ok) {
+                    const okMsg = attempt === 1
+                        ? `Snapshot pushed (${fileData.length} file(s))`
+                        : `Snapshot pushed (${fileData.length} file(s), attempt ${attempt}/${SNAPSHOT_MAX_ATTEMPTS})`;
+                    if (snapshotSpinner)
+                        snapshotSpinner.succeed(okMsg);
+                    else
+                        stderrLog(okMsg);
+                    snapshotSucceeded = true;
+                    snapshotError = undefined;
+                    break;
                 }
+                attemptError = attemptResolved.error;
                 snapshotError = attemptError;
                 const isLast = attempt === SNAPSHOT_MAX_ATTEMPTS;
                 const failMsg = isLast
@@ -617,13 +813,27 @@ async function _push(pushSpan, agentCode, options = {}) {
                 if (filesSafeToCommit.length > 0) {
                     commitBaselineFiles(projectRoot, filesSafeToCommit, `pushed ${succeeded} change(s) (${failedFiles.size} file(s) held back due to per-task failures)`);
                     baselineUpdated = true;
+                    pushSpan.addEvent("baseline.partial_commit", {
+                        committed_files: filesSafeToCommit.length,
+                        held_back_files: failedFiles.size,
+                    });
+                }
+                else {
+                    pushSpan.addEvent("baseline.fully_held_back", {
+                        failed_files: failedFiles.size,
+                    });
                 }
             }
             else if (failed === 0) {
                 commitBaseline(projectRoot, "pushed changes");
                 baselineUpdated = true;
+                pushSpan.addEvent("baseline.full_commit");
             }
         }
+        else {
+            pushSpan.addEvent("baseline.skipped_due_to_snapshot_failure");
+        }
+        pushSpan.setAttribute("push.baseline_updated", baselineUpdated);
         log(_jsx(PushFinalSummary, { succeeded: succeeded, failed: failed, baselineUpdated: baselineUpdated, logDir: logRelPath, memoryPath: memoryPath, snapshotStatus: snapshotStatus, snapshotError: snapshotError }));
         if (failed > 0) {
             log(_jsxs(Box, { flexDirection: "column", paddingX: 1, children: [_jsx(StatusLine, { kind: "warning", label: `${failed} entity change(s) failed to push to DB.` }), pushFailures.map((f) => (_jsxs(Box, { flexDirection: "column", marginLeft: 2, children: [_jsxs(Text, { color: "red", children: ["  ", icons.error, " ", f.label] }), _jsxs(Text, { color: colors.muted, children: ["    Error: ", f.error] }), f.file && _jsxs(Text, { color: colors.muted, children: ["    File: ", f.file] })] }, f.label))), _jsxs(Box, { marginTop: 1, children: [_jsx(Text, { color: colors.info, bold: true, children: "What to do next: " }), _jsxs(Text, { color: colors.muted, children: ["Fix the issues above and re-run ", _jsx(Text, { bold: true, children: "aui push" }), " to retry the failed changes."] })] })] }));
@@ -717,11 +927,13 @@ async function _push(pushSpan, agentCode, options = {}) {
             throw error;
     }
 }
-async function resolveVersionDraft(config, projectConfig, session, explicitVersionId) {
-    // Every error path below MUST throw a typed CLIError (not return null).
-    // Returning null silently exits the CLI with code 0 — the BFF then thinks
-    // the push succeeded when nothing actually happened, and the failure
-    // never reaches Logfire because no exception bubbled to handleError.
+/**
+ * Lookup the agent-management record for the current `.auirc` project
+ * (preferred) or the active session fallback — same precedence as draft
+ * resolution. Each attempt records its error so callers can surface the full
+ * picture instead of silently dropping `agent_id` from request bodies.
+ */
+async function lookupAgentManagementInfoForPush(config, projectConfig, session) {
     const client = new AUIClient({
         baseUrl: config.apiUrl,
         authToken: config.authToken,
@@ -733,55 +945,110 @@ async function resolveVersionDraft(config, projectConfig, session, explicitVersi
     if (key)
         client.setAgentSettingsApiKey(key);
     let agentInfo;
+    const errors = [];
     const agentMgmtId = session.agent_management_id;
-    // Project's network_id (from .auirc) takes priority over session — when
-    // you're inside a project, that's the agent you mean. Session agent may
-    // point at a different agent (e.g. last `aui agents --switch`).
     const projectNetworkId = projectConfig.agent_id;
     const fallbackNetworkId = session.network_id;
     if (projectNetworkId) {
         try {
             const resp = await client.agentManagement.listAgents(client.getOrganizationId(), 1, 50, { network_id: projectNetworkId });
-            agentInfo = resp.items.find((a) => a.scope.network_id === projectNetworkId || a.id === projectNetworkId);
+            agentInfo = resp.items.find((a) => a.scope.network_id === projectNetworkId ||
+                a.id === projectNetworkId);
+            if (!agentInfo) {
+                errors.push(`listAgents(network_id=${projectNetworkId}) returned ${resp.items.length} item(s), none matched.`);
+            }
         }
         catch (err) {
-            // Listing fall-through is fine because the next two branches try other
-            // resolution paths AND a final ConfigError is thrown below if none
-            // succeed. But emit a debug warning so an operator with AUI_DEBUG=1
-            // can see WHICH branch failed and why (zero silent errors policy).
+            // Accumulate into `errors` so the eventual ConfigError can list every
+            // resolution path that failed (alboim's a117251). Also emit AUI_DEBUG
+            // warning for live operator observability (zero silent errors policy).
+            errors.push(`listAgents(network_id=${projectNetworkId}) threw: ${err instanceof Error ? err.message : String(err)}`);
             if (process.env.AUI_DEBUG) {
                 console.warn(`[debug] resolveVersionDraft: listAgents(network_id=${projectNetworkId}) failed:`, err instanceof Error ? err.message : err);
             }
         }
     }
-    // Fall back to session's agent_management_id only when not inside a project
-    if (!agentInfo && !projectNetworkId && agentMgmtId) {
+    // Try the session's agent_management_id even when the project has a network
+    // id — it's a direct getAgent call, no list scan, and it gracefully covers
+    // the case where listAgents fell through above.
+    if (!agentInfo && agentMgmtId) {
         try {
             agentInfo = await client.agentManagement.getAgent(agentMgmtId);
         }
         catch (err) {
+            errors.push(`getAgent(${agentMgmtId}) threw: ${err instanceof Error ? err.message : String(err)}`);
             if (process.env.AUI_DEBUG) {
                 console.warn(`[debug] resolveVersionDraft: getAgent(${agentMgmtId}) failed (stale id?):`, err instanceof Error ? err.message : err);
             }
         }
     }
-    // Last resort: session's network_id
-    if (!agentInfo && fallbackNetworkId) {
+    if (!agentInfo && fallbackNetworkId && fallbackNetworkId !== projectNetworkId) {
         try {
             const resp = await client.agentManagement.listAgents(client.getOrganizationId(), 1, 50, { network_id: fallbackNetworkId });
-            agentInfo = resp.items.find((a) => a.scope.network_id === fallbackNetworkId || a.id === fallbackNetworkId);
+            agentInfo = resp.items.find((a) => a.scope.network_id === fallbackNetworkId ||
+                a.id === fallbackNetworkId);
+            if (!agentInfo) {
+                errors.push(`listAgents(network_id=${fallbackNetworkId}) returned ${resp.items.length} item(s), none matched.`);
+            }
         }
         catch (err) {
+            errors.push(`listAgents(network_id=${fallbackNetworkId}) threw: ${err instanceof Error ? err.message : String(err)}`);
             if (process.env.AUI_DEBUG) {
                 console.warn(`[debug] resolveVersionDraft: listAgents(network_id=${fallbackNetworkId}) failed:`, err instanceof Error ? err.message : err);
             }
         }
     }
+    return { agentInfo, errors };
+}
+/**
+ * Return the agent-management UUID to send as `agent_id` on agent-settings
+ * write bodies. Reads `.auirc` first; falls back to `lookupAgentManagementInfoForPush`
+ * and **persists** the resolved id back to `.auirc` so subsequent pushes don't
+ * pay the lookup cost. Throws `ConfigError` if no id can be resolved — never
+ * silently returns undefined, because that's how entities ended up in the DB
+ * without `agent_id`.
+ */
+async function resolvePushAgentManagementId(config, projectConfig, session, projectRoot) {
+    if (projectConfig.agent_management_id)
+        return projectConfig.agent_management_id;
+    const { agentInfo, errors } = await lookupAgentManagementInfoForPush(config, projectConfig, session);
     if (!agentInfo) {
-        throw new ConfigError("Could not resolve agent for version management.", {
+        const detail = errors.length > 0 ? `\n  - ${errors.join("\n  - ")}` : "";
+        throw new ConfigError(`Could not resolve agent-management id for this project.${detail}`, {
+            suggestion: "Re-run `aui import-agent` (will populate .auirc.agent_management_id) or `aui pull` to back-fill it.",
+        });
+    }
+    // Migrate legacy projects: persist back so the next push skips the lookup.
+    try {
+        saveProjectConfig({ ...projectConfig, agent_management_id: agentInfo.id }, projectRoot);
+    }
+    catch {
+        // .auirc write failure is non-fatal — we already have the id in memory.
+    }
+    return agentInfo.id;
+}
+async function resolveVersionDraft(config, projectConfig, session, explicitVersionId) {
+    // Every error path below MUST throw a typed CLIError (not return null).
+    // Returning null silently exits the CLI with code 0 — the BFF then thinks
+    // the push succeeded when nothing actually happened, and the failure
+    // never reaches Logfire because no exception bubbled to handleError.
+    const { agentInfo, errors: lookupErrors } = await lookupAgentManagementInfoForPush(config, projectConfig, session);
+    if (!agentInfo) {
+        const detail = lookupErrors.length > 0 ? `\n  - ${lookupErrors.join("\n  - ")}` : "";
+        throw new ConfigError(`Could not resolve agent for version management.${detail}`, {
             suggestion: "Run `aui import-agent` to link an agent, or check your session with `aui status`.",
         });
     }
+    const client = new AUIClient({
+        baseUrl: config.apiUrl,
+        authToken: config.authToken,
+        accountId: config.accountId,
+        organizationId: config.organizationId,
+        environment: config.environment,
+    });
+    const key = loadAgentSettingsApiKey();
+    if (key)
+        client.setAgentSettingsApiKey(key);
     // If user passed --version-id, validate it's a draft
     if (explicitVersionId) {
         let ver;
@@ -1029,26 +1296,50 @@ async function pushKnowledgeHubs(projectRoot, projectConfig) {
                     log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "warning", label: `Cannot delete "${kbName}" — no knowledge_base_id stored. Push the KB first, then delete.` }) }));
                     continue;
                 }
-                try {
-                    await kbViewClient.deleteKnowledgeBase(kbId, scope, kbName);
-                    log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "success", label: `Deleted: ${kbName}` }) }));
-                }
-                catch (delErr) {
-                    // Per-KB error: count it, keep going so partial work shows up.
-                    if (isNotFoundError(delErr)) {
-                        log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "success", label: `Deleted: ${kbName} (already absent)` }) }));
+                // Per-KB delete in its own span so each one shows up in Logfire as
+                // `aui.push.task.kb-delete` with status, kb name, kb id, and error
+                // body. Same observability shape as agent-settings entity tasks.
+                const kbDelTracer = getTracer();
+                await kbDelTracer.startActiveSpan("aui.push.task.kb-delete", async (span) => {
+                    span.setAttribute("push.task.type", "kb-delete");
+                    span.setAttribute("push.task.label", `Delete knowledge base: ${kbName}`);
+                    span.setAttribute("push.task.file", `knowledge-hubs/${kbDirName}/kb.json`);
+                    span.setAttribute("push.task.kb_id", kbId);
+                    span.setAttribute("push.task.kb_name", kbName);
+                    try {
+                        await kbViewClient.deleteKnowledgeBase(kbId, scope, kbName);
+                        span.setStatus({ code: SpanStatusCode.OK });
+                        log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "success", label: `Deleted: ${kbName}` }) }));
                     }
-                    else {
-                        kbDeleteSucceeded = false;
-                        const errMsg = delErr instanceof Error ? delErr.message : String(delErr);
-                        failures.push({
-                            label: `Delete knowledge base: ${kbName}`,
-                            file: `knowledge-hubs/${kbDirName}/kb.json`,
-                            error: errMsg,
-                        });
-                        log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "error", label: `Failed to delete "${kbName}": ${errMsg}` }) }));
+                    catch (delErr) {
+                        // Per-KB error: count it, keep going so partial work shows up.
+                        if (isNotFoundError(delErr)) {
+                            span.setStatus({ code: SpanStatusCode.OK });
+                            span.addEvent("fallback.delete_404_already_absent");
+                            span.setAttribute("push.task.fallback", "delete_404_already_absent");
+                            log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "success", label: `Deleted: ${kbName} (already absent)` }) }));
+                        }
+                        else {
+                            kbDeleteSucceeded = false;
+                            const errMsg = delErr instanceof Error ? delErr.message : String(delErr);
+                            span.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
+                            span.recordException(delErr instanceof Error ? delErr : new Error(errMsg));
+                            span.setAttribute("push.task.error", errMsg);
+                            if (delErr.statusCode) {
+                                span.setAttribute("push.task.error_status_code", delErr.statusCode);
+                            }
+                            failures.push({
+                                label: `Delete knowledge base: ${kbName}`,
+                                file: `knowledge-hubs/${kbDirName}/kb.json`,
+                                error: errMsg,
+                            });
+                            log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "error", label: `Failed to delete "${kbName}": ${errMsg}` }) }));
+                        }
                     }
-                }
+                    finally {
+                        span.end();
+                    }
+                });
             }
             if (kbDeleteSucceeded) {
                 deleteSpinner.succeed(`${deletedKBDirs.length} knowledge base(s) deleted`);
@@ -1085,41 +1376,64 @@ async function pushKnowledgeHubs(projectRoot, projectConfig) {
                     log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "warning", label: `Skipped unsupported file: ${path.basename(skipped)} (only .pdf, .md, .txt, .json)` }) }));
                 }
                 if (supportedFiles.length > 0) {
-                    try {
-                        const importResult = await kbViewClient.importFiles({
-                            files: supportedFiles,
-                            scope,
-                            created_by: userId,
-                            knowledge_base_name: kbData.name,
-                            knowledge_base_description: kbData.description,
-                        });
-                        if (importResult.knowledge_base_id) {
-                            const kbJsonPath = path.join(kbDir, "kb.json");
-                            try {
-                                const raw = JSON.parse(fs.readFileSync(kbJsonPath, "utf-8"));
-                                raw.knowledge_base_id = importResult.knowledge_base_id;
-                                fs.writeFileSync(kbJsonPath, JSON.stringify(raw, null, 2) + "\n");
-                            }
-                            catch (writeErr) {
-                                // kb.json id write fail is non-fatal but tell the user so the
-                                // next push doesn't surprise them with "no knowledge_base_id stored".
-                                if (process.env.AUI_DEBUG) {
-                                    console.warn(`[debug] failed to write knowledge_base_id back to ${kbJsonPath}:`, writeErr);
+                    // Per-KB upload in its own span — Logfire query
+                    // `name:"aui.push.task.kb-upload" AND status_code:ERROR` finds
+                    // every KB push failure across all agents.
+                    const kbUpTracer = getTracer();
+                    await kbUpTracer.startActiveSpan("aui.push.task.kb-upload", async (span) => {
+                        span.setAttribute("push.task.type", "kb-upload");
+                        span.setAttribute("push.task.label", `Push knowledge base: ${kbData.name || kbDirName}`);
+                        span.setAttribute("push.task.file", `knowledge-hubs/${kbDirName}/kb.json`);
+                        span.setAttribute("push.task.kb_name", kbData.name || kbDirName);
+                        span.setAttribute("push.task.file_count", supportedFiles.length);
+                        try {
+                            const importResult = await kbViewClient.importFiles({
+                                files: supportedFiles,
+                                scope,
+                                created_by: userId,
+                                knowledge_base_name: kbData.name,
+                                knowledge_base_description: kbData.description,
+                            });
+                            span.setStatus({ code: SpanStatusCode.OK });
+                            if (importResult.knowledge_base_id) {
+                                span.setAttribute("push.task.kb_id", importResult.knowledge_base_id);
+                                const kbJsonPath = path.join(kbDir, "kb.json");
+                                try {
+                                    const raw = JSON.parse(fs.readFileSync(kbJsonPath, "utf-8"));
+                                    raw.knowledge_base_id = importResult.knowledge_base_id;
+                                    fs.writeFileSync(kbJsonPath, JSON.stringify(raw, null, 2) + "\n");
+                                }
+                                catch (writeErr) {
+                                    // kb.json id write fail is non-fatal but tell the user so the
+                                    // next push doesn't surprise them with "no knowledge_base_id stored".
+                                    span.addEvent("kb_id_writeback_failed");
+                                    if (process.env.AUI_DEBUG) {
+                                        console.warn(`[debug] failed to write knowledge_base_id back to ${kbJsonPath}:`, writeErr);
+                                    }
+                                    log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "warning", label: `Could not persist knowledge_base_id back to ${path.basename(kbJsonPath)} — re-import or run \`aui pull\` to recover.` }) }));
                                 }
-                                log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "warning", label: `Could not persist knowledge_base_id back to ${path.basename(kbJsonPath)} — re-import or run \`aui pull\` to recover.` }) }));
                             }
                         }
-                    }
-                    catch (uploadErr) {
-                        hadUploadFailure = true;
-                        const errMsg = uploadErr instanceof Error ? uploadErr.message : String(uploadErr);
-                        failures.push({
-                            label: `Push knowledge base: ${kbData.name || kbDirName}`,
-                            file: `knowledge-hubs/${kbDirName}/kb.json`,
-                            error: errMsg,
-                        });
-                        log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "error", label: `Failed to push "${kbData.name || kbDirName}": ${errMsg}` }) }));
-                    }
+                        catch (uploadErr) {
+                            hadUploadFailure = true;
+                            const errMsg = uploadErr instanceof Error ? uploadErr.message : String(uploadErr);
+                            span.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
+                            span.recordException(uploadErr instanceof Error ? uploadErr : new Error(errMsg));
+                            span.setAttribute("push.task.error", errMsg);
+                            if (uploadErr.statusCode) {
+                                span.setAttribute("push.task.error_status_code", uploadErr.statusCode);
+                            }
+                            failures.push({
+                                label: `Push knowledge base: ${kbData.name || kbDirName}`,
+                                file: `knowledge-hubs/${kbDirName}/kb.json`,
+                                error: errMsg,
+                            });
+                            log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "error", label: `Failed to push "${kbData.name || kbDirName}": ${errMsg}` }) }));
+                        }
+                        finally {
+                            span.end();
+                        }
+                    });
                 }
             }
             if (hadUploadFailure) {
@@ -1622,6 +1936,32 @@ function isTransient5xx(err) {
         ?? err.status;
     return code === 500 || code === 502 || code === 503 || code === 504;
 }
+/**
+ * Tag the currently-active span with a fallback-decision event + attribute,
+ * so Logfire shows exactly which adaptive layer fired during a push.
+ *
+ * Useful queries once published:
+ *   - `attributes."push.task.fallback":"patch_404_to_post"` → every drift
+ *     recovery (next push self-healed a previously-failed POST).
+ *   - `attributes."push.task.fallback":"transient_retry"` → backend 5xx
+ *     events that were absorbed by the retry layer.
+ *   - `attributes."push.task.fallback":"post_409_to_patch"` → "create"
+ *     calls that converted to "update" because the row pre-existed.
+ *   - `attributes."push.task.fallback":"delete_404_already_absent"` →
+ *     deletes that no-op'd because the row was already gone.
+ *
+ * No-op when there's no active span (e.g. unit tests outside the push flow).
+ */
+function recordFallbackEvent(kind, detail) {
+    const span = trace.getActiveSpan();
+    if (!span)
+        return;
+    span.addEvent(`fallback.${kind}`, detail);
+    span.setAttribute("push.task.fallback", kind);
+    for (const [k, v] of Object.entries(detail ?? {})) {
+        span.setAttribute(`push.task.fallback.${k}`, v);
+    }
+}
 /**
  * Run one entity-settings write call once, and retry exactly once on a
  * transient 5xx after a 1s back-off. The snapshot upload has its own
@@ -1640,6 +1980,11 @@ async function withTransientRetry(label, fn) {
         if (process.env.AUI_DEBUG) {
             console.log(`[debug] ${label} got ${code}, retrying once after 1000ms`);
         }
+        recordFallbackEvent("transient_retry", {
+            label,
+            status_code: code ?? 0,
+            backoff_ms: 1000,
+        });
         await new Promise((r) => setTimeout(r, 1000));
         return await fn();
     }
@@ -1671,6 +2016,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] patch-tool ${task.toolName}: 404 not found, falling back to POST`);
                         }
+                        recordFallbackEvent("patch_404_to_post", { task_type: "patch-tool", tool: String(task.toolName ?? "") });
                         return client.createTool(params, task.body);
                     }
                     throw err;
@@ -1689,6 +2035,7 @@ async function _executePushTask(client, params, task) {
                         const body = task.body;
                         const toolCode = body.code || "";
                         const toolName = toolCode.toUpperCase().replace(/-/g, "_");
+                        recordFallbackEvent("post_409_to_patch", { task_type: "create-tool", tool: toolName });
                         return client.patchTool(params, toolName, body);
                     }
                     throw err;
@@ -1704,6 +2051,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] delete-tool ${task.toolName}: 404 already absent`);
                         }
+                        recordFallbackEvent("delete_404_already_absent", { task_type: "delete-tool", tool: String(task.toolName ?? "") });
                         return DELETE_ALREADY_ABSENT;
                     }
                     throw err;
@@ -1727,6 +2075,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] create-parameter ${task.itemCode}: 409, falling back to PATCH`);
                         }
+                        recordFallbackEvent("post_409_to_patch", { task_type: "create-parameter", code: String(task.itemCode ?? "") });
                         return client.patchParameter(params, task.itemCode, task.body);
                     }
                     throw err;
@@ -1742,6 +2091,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] patch-parameter ${task.itemCode}: 404 not found, falling back to POST`);
                         }
+                        recordFallbackEvent("patch_404_to_post", { task_type: "patch-parameter", code: String(task.itemCode ?? "") });
                         return client.createParameter(params, task.body);
                     }
                     throw err;
@@ -1757,6 +2107,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] delete-parameter ${task.itemCode}: 404 already absent`);
                         }
+                        recordFallbackEvent("delete_404_already_absent", { task_type: "delete-parameter", code: String(task.itemCode ?? "") });
                         return DELETE_ALREADY_ABSENT;
                     }
                     throw err;
@@ -1772,6 +2123,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] create-entity ${task.itemCode}: 409, falling back to PATCH`);
                         }
+                        recordFallbackEvent("post_409_to_patch", { task_type: "create-entity", code: String(task.itemCode ?? "") });
                         return client.patchEntity(params, task.itemCode, task.body);
                     }
                     throw err;
@@ -1787,6 +2139,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] patch-entity ${task.itemCode}: 404, falling back to POST`);
                         }
+                        recordFallbackEvent("patch_404_to_post", { task_type: "patch-entity", code: String(task.itemCode ?? "") });
                         return client.createEntity(params, task.body);
                     }
                     throw err;
@@ -1802,6 +2155,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] delete-entity ${task.itemCode}: 404 already absent`);
                         }
+                        recordFallbackEvent("delete_404_already_absent", { task_type: "delete-entity", code: String(task.itemCode ?? "") });
                         return DELETE_ALREADY_ABSENT;
                     }
                     throw err;
@@ -1817,6 +2171,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] create-integration ${task.itemCode}: 409, falling back to PATCH`);
                         }
+                        recordFallbackEvent("post_409_to_patch", { task_type: "create-integration", code: String(task.itemCode ?? "") });
                         return client.patchIntegration(params, task.itemCode, task.body);
                     }
                     throw err;
@@ -1832,6 +2187,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] patch-integration ${task.itemCode}: 404 not found, falling back to POST`);
                         }
+                        recordFallbackEvent("patch_404_to_post", { task_type: "patch-integration", code: String(task.itemCode ?? "") });
                         return client.createIntegration(params, task.body);
                     }
                     throw err;
@@ -1847,6 +2203,7 @@ async function _executePushTask(client, params, task) {
                         if (process.env.AUI_DEBUG) {
                             console.log(`[debug] delete-integration ${task.itemCode}: 404 already absent`);
                         }
+                        recordFallbackEvent("delete_404_already_absent", { task_type: "delete-integration", code: String(task.itemCode ?? "") });
                         return DELETE_ALREADY_ABSENT;
                     }
                     throw err;