aui-agent-builder 0.3.83 → 0.3.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ import { AUIClient, applyScopeLevel } from "../api-client/index.js";
10
10
  import { findAuiFiles, parseAuiFile } from "../utils/index.js";
11
11
  import { validate } from "./validate.js";
12
12
  import { getTracer, SpanStatusCode, setUserContext } from "../telemetry.js";
13
+ import { trace } from "@opentelemetry/api";
13
14
  import { getItemLevelDiff } from "../utils/git.js";
14
15
  import { AuthenticationError, CLIError, ConfigError, ValidationError } from "../errors/index.js";
15
16
  import { StatusLine, Spinner, ErrorDisplay, Hint, } from "../ui/components/index.js";
@@ -23,20 +24,54 @@ function log(node) {
23
24
  }
24
25
  function startSpinner(label) {
25
26
  const inst = render(_jsx(Spinner, { label: label }));
27
+ let unmounted = false;
28
+ const safeUnmount = () => {
29
+ if (unmounted)
30
+ return;
31
+ unmounted = true;
32
+ inst.unmount();
33
+ };
26
34
  return {
27
35
  succeed(msg) {
28
- inst.unmount();
36
+ safeUnmount();
29
37
  log(_jsx(StatusLine, { kind: "success", label: msg }));
30
38
  },
31
39
  fail(msg) {
32
- inst.unmount();
40
+ safeUnmount();
33
41
  log(_jsx(StatusLine, { kind: "error", label: msg }));
34
42
  },
35
43
  stop() {
36
- inst.unmount();
44
+ safeUnmount();
45
+ },
46
+ /**
47
+ * Internal: unconditionally unmount, no log line. Used by `withSpinner`
48
+ * to guarantee the spinner stops even when the wrapped body throws an
49
+ * exception that escapes the surrounding try/catch (which would
50
+ * otherwise leave a phantom spinner spinning forever).
51
+ */
52
+ _forceUnmount() {
53
+ safeUnmount();
37
54
  },
38
55
  };
39
56
  }
57
+ /**
58
+ * Wrap a sync- or async-returning callback so the spinner ALWAYS unmounts,
59
+ * even on uncaught exceptions. The callback can call `.succeed()` / `.fail()`
60
+ * itself to render a final status line; otherwise the spinner just stops.
61
+ *
62
+ * This pattern eliminates the "phantom spinner" foot-gun where an exception
63
+ * thrown between `startSpinner(...)` and `.succeed/.fail` leaves the Ink
64
+ * render mounted forever — blocking the chat UI's "Still thinking…" state.
65
+ */
66
+ async function withSpinner(label, fn) {
67
+ const spinner = startSpinner(label);
68
+ try {
69
+ return await fn(spinner);
70
+ }
71
+ finally {
72
+ spinner._forceUnmount();
73
+ }
74
+ }
40
75
  /**
41
76
  * Push local agent configuration to the backend
42
77
  */
@@ -90,13 +125,47 @@ async function _push(pushSpan, agentCode, options = {}) {
90
125
  log(_jsx(StatusLine, { kind: "info", label: "Validating configuration..." }));
91
126
  else
92
127
  stderrLog("Validating configuration...");
93
- const valid = await validate(projectRoot, { verbose: false });
128
+ // Wrap the validate call in its own span so a "stuck at validate"
129
+ // hang shows up clearly in Logfire as `aui.push.preflight.validate`
130
+ // with status = unset (still running) — instead of the parent
131
+ // `aui.push` span just sitting there with no clue why.
132
+ const validateTracer = getTracer();
133
+ const valid = await validateTracer.startActiveSpan("aui.push.preflight.validate", async (vSpan) => {
134
+ vSpan.setAttribute("push.preflight.step", "validate");
135
+ vSpan.setAttribute("push.preflight.skipValidation", false);
136
+ vSpan.setAttribute("push.preflight.force", options.force === true);
137
+ try {
138
+ const ok = await validate(projectRoot, { verbose: false });
139
+ vSpan.setAttribute("push.preflight.validate.ok", ok);
140
+ vSpan.setStatus({ code: SpanStatusCode.OK });
141
+ return ok;
142
+ }
143
+ catch (err) {
144
+ // validate() shouldn't throw under normal conditions, but if a
145
+ // schema fetch or git call inside it does, surface it here so
146
+ // we don't lose the error to the parent span's generic handler.
147
+ const msg = err instanceof Error ? err.message : String(err);
148
+ vSpan.setStatus({ code: SpanStatusCode.ERROR, message: msg });
149
+ vSpan.recordException(err instanceof Error ? err : new Error(msg));
150
+ throw err;
151
+ }
152
+ finally {
153
+ vSpan.end();
154
+ }
155
+ });
94
156
  if (!valid && !options.force) {
95
157
  pushSpan.setAttribute("push.exit_reason", "validation_failed");
158
+ pushSpan.addEvent("preflight.validation_rejected_push");
96
159
  throw new ValidationError("Push aborted due to validation errors.", {
97
160
  suggestion: "Fix the errors above, or use --force to push anyway.",
98
161
  });
99
162
  }
163
+ if (!valid && options.force) {
164
+ pushSpan.addEvent("preflight.validation_failed_but_forced");
165
+ }
166
+ }
167
+ else {
168
+ pushSpan.addEvent("preflight.validation_skipped");
100
169
  }
101
170
  if (!json)
102
171
  log(_jsx(StatusLine, { kind: "info", label: "Pushing agent changes..." }));
@@ -237,208 +306,15 @@ async function _push(pushSpan, agentCode, options = {}) {
237
306
  log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "muted", label: "Dry run \u2014 no changes pushed." }) }));
238
307
  return;
239
308
  }
240
- // ─── Push integration upserts before KB ───
241
- // KB uploads need the integration (and its knowledge base) to exist first.
242
- // Only create/patch/replace integrations here — not deletes: entities/workflows
243
- // may still reference an integration until we push entity changes below.
244
- let integrationUpsertsAlreadyPushed = false;
245
- /** When set, reused in the main Agent Settings push — avoids resolving the draft twice. */
246
- let resolvedPushDraftCache = null;
247
- let cachedPushAgentManagementId;
248
- if (diff && diff.hasChanges) {
249
- const intSession = await getValidSession();
250
- if (intSession) {
251
- const intAsp = await resolveAgentSettingsParams(config, projectConfig, intSession, projectRoot);
252
- if (intAsp) {
253
- const intClient = new AUIClient({
254
- baseUrl: config.apiUrl,
255
- authToken: config.authToken,
256
- accountId: config.accountId,
257
- organizationId: config.organizationId,
258
- environment: config.environment,
259
- });
260
- const intPushLogDir = path.join(projectRoot, ".aui", "push-logs");
261
- fs.mkdirSync(intPushLogDir, { recursive: true });
262
- intClient.setPushLogDir(intPushLogDir);
263
- if (options.apiKey) {
264
- saveAgentSettingsApiKey(options.apiKey);
265
- intClient.setAgentSettingsApiKey(options.apiKey);
266
- }
267
- const intSavedKey = loadAgentSettingsApiKey();
268
- if (intSavedKey && !options.apiKey) {
269
- intClient.setAgentSettingsApiKey(intSavedKey);
270
- }
271
- const intPushTasks = buildPushTasks(diff, fileData, projectRoot, getFileDiff);
272
- const integrationUpsertTasksEarly = intPushTasks.filter(isIntegrationUpsertTask);
273
- if (integrationUpsertTasksEarly.length > 0) {
274
- // Same as main push: integrations must carry agent_version_id on the body.
275
- // Without this, pre-KB upserts omit version_id while parameters / scope_entities
276
- // / rules use agentSettingsParams after resolveVersionDraft (and main skips upserts).
277
- if (projectConfig.version_id || options.versionId) {
278
- resolvedPushDraftCache = await resolveVersionDraft(config, projectConfig, intSession, options.versionId);
279
- intAsp.version_id = resolvedPushDraftCache.versionId;
280
- intAsp.agent_id = resolvedPushDraftCache.agentId;
281
- cachedPushAgentManagementId = resolvedPushDraftCache.agentId;
282
- }
283
- else {
284
- if (!cachedPushAgentManagementId) {
285
- cachedPushAgentManagementId = await resolvePushAgentManagementId(config, projectConfig, intSession, projectRoot);
286
- }
287
- intAsp.agent_id = cachedPushAgentManagementId;
288
- }
289
- log(_jsx(Box, { paddingX: 1, children: _jsxs(Text, { color: colors.info, children: [icons.bullet, " Pushing integrations (before KB upload)..."] }) }));
290
- for (const task of integrationUpsertTasksEarly) {
291
- const taskResult = {
292
- label: task.label,
293
- status: "success",
294
- };
295
- try {
296
- await executePushTask(intClient, intAsp, task);
297
- }
298
- catch (err) {
299
- const errMsg = err instanceof Error ? err.message : String(err);
300
- taskResult.status = "failed";
301
- taskResult.error = errMsg;
302
- }
303
- log(_jsx(Box, { paddingX: 2, children: _jsx(PushTaskLine, { result: taskResult }) }));
304
- }
305
- integrationUpsertsAlreadyPushed = true;
306
- }
307
- }
308
- }
309
- }
310
- // ─── Knowledge Hubs Push (full files) ───
311
- const { getKnowledgeHubChanges } = await import("../utils/git.js");
312
- const kbChanges = getKnowledgeHubChanges(projectRoot);
313
- if (kbChanges.length > 0) {
314
- const kbConfig = getConfig();
315
- const kbSession = await getValidSession();
316
- const kbNetworkId = projectConfig.agent_id || kbSession?.network_id;
317
- if (kbNetworkId && kbConfig.authToken) {
318
- const { KBViewClient } = await import("../api-client/kb-view-client.js");
319
- const { buildScope, readKbFolder } = await import("../services/kb-view.service.js");
320
- const { loadAgentSettingsApiKey: loadAsKey } = await import("../config/index.js");
321
- const kbViewClient = new KBViewClient({
322
- authToken: kbConfig.authToken,
323
- apiKey: loadAsKey() || undefined,
324
- organizationId: kbConfig.organizationId || "",
325
- environment: kbConfig.environment || "staging",
326
- });
327
- const kbLogDir = path.join(projectRoot, ".aui", "push-logs");
328
- fs.mkdirSync(kbLogDir, { recursive: true });
329
- kbViewClient.setPushLogDir(kbLogDir);
330
- const scope = buildScope({
331
- networkId: kbNetworkId,
332
- organizationId: projectConfig.organization_id || kbConfig.organizationId || "",
333
- accountId: projectConfig.account_id || kbConfig.accountId || "",
334
- });
335
- const userId = kbSession?.user_id || "cli";
336
- // Collect all changed KB directories (skip root-level files)
337
- const changedKBDirs = new Set();
338
- for (const change of kbChanges) {
339
- if (change.kbDirName) {
340
- changedKBDirs.add(change.kbDirName);
341
- }
342
- }
343
- // Split into existing (will upload) and deleted (will delete from server)
344
- const existingKBDirs = [...changedKBDirs].filter((d) => fs.existsSync(path.join(projectRoot, "knowledge-hubs", d)));
345
- const deletedKBDirs = [...changedKBDirs].filter((d) => !fs.existsSync(path.join(projectRoot, "knowledge-hubs", d)));
346
- // Delete KBs that were removed locally
347
- let kbDeleteSucceeded = true;
348
- if (deletedKBDirs.length > 0) {
349
- const { getBaselineFileContent } = await import("../utils/git.js");
350
- const deleteSpinner = startSpinner(`Deleting ${deletedKBDirs.length} knowledge base(s) from server...`);
351
- try {
352
- for (const kbDirName of deletedKBDirs) {
353
- const baselineKb = getBaselineFileContent(projectRoot, `knowledge-hubs/${kbDirName}/kb.json`);
354
- const kbName = baselineKb?.name || kbDirName;
355
- const kbId = baselineKb?.knowledge_base_id;
356
- if (!kbId) {
357
- log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "warning", label: `Cannot delete "${kbName}" — no knowledge_base_id stored. Push the KB first, then delete.` }) }));
358
- continue;
359
- }
360
- await kbViewClient.deleteKnowledgeBase(kbId, scope, kbName);
361
- log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "success", label: `Deleted: ${kbName}` }) }));
362
- }
363
- deleteSpinner.succeed(`${deletedKBDirs.length} knowledge base(s) deleted`);
364
- }
365
- catch (error) {
366
- kbDeleteSucceeded = false;
367
- deleteSpinner.fail("Knowledge base deletion failed");
368
- log(_jsx(ErrorDisplay, { error: error }));
369
- }
370
- }
371
- // Upload full files for each changed KB
372
- let kbUploadSucceeded = false;
373
- if (existingKBDirs.length > 0) {
374
- const kbSpinner = startSpinner(`Pushing ${existingKBDirs.length} knowledge base(s)...`);
375
- try {
376
- for (const kbDirName of existingKBDirs) {
377
- const kbDir = path.join(projectRoot, "knowledge-hubs", kbDirName);
378
- const kbData = readKbFolder(kbDir);
379
- if (!kbData)
380
- continue;
381
- const SUPPORTED_EXTENSIONS = new Set([".pdf", ".md", ".txt", ".json"]);
382
- const supportedFiles = kbData.binaryFiles.filter((f) => SUPPORTED_EXTENSIONS.has(path.extname(f).toLowerCase()));
383
- const skippedFiles = kbData.binaryFiles.filter((f) => !SUPPORTED_EXTENSIONS.has(path.extname(f).toLowerCase()));
384
- for (const skipped of skippedFiles) {
385
- log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "warning", label: `Skipped unsupported file: ${path.basename(skipped)} (only .pdf, .md, .txt, .json)` }) }));
386
- }
387
- if (supportedFiles.length > 0) {
388
- const importResult = await kbViewClient.importFiles({
389
- files: supportedFiles,
390
- scope,
391
- created_by: userId,
392
- knowledge_base_name: kbData.name,
393
- knowledge_base_description: kbData.description,
394
- });
395
- if (importResult.knowledge_base_id) {
396
- const kbJsonPath = path.join(kbDir, "kb.json");
397
- try {
398
- const raw = JSON.parse(fs.readFileSync(kbJsonPath, "utf-8"));
399
- raw.knowledge_base_id = importResult.knowledge_base_id;
400
- fs.writeFileSync(kbJsonPath, JSON.stringify(raw, null, 2) + "\n");
401
- }
402
- catch { /* kb.json write failed, non-fatal */ }
403
- }
404
- }
405
- }
406
- kbSpinner.succeed(`Knowledge base(s) pushed`);
407
- kbUploadSucceeded = true;
408
- }
409
- catch (error) {
410
- kbSpinner.fail("Knowledge base push failed");
411
- log(_jsx(ErrorDisplay, { error: error }));
412
- }
413
- }
414
- else {
415
- kbUploadSucceeded = true;
416
- }
417
- const kbPushSucceeded = kbUploadSucceeded && kbDeleteSucceeded;
418
- // Commit KB changes to baseline only if push succeeded
419
- if (kbPushSucceeded) {
420
- const kbFilesToAdd = kbChanges
421
- .filter((c) => c.status !== "deleted")
422
- .map((c) => c.file);
423
- const kbFilesToDelete = kbChanges
424
- .filter((c) => c.status === "deleted")
425
- .map((c) => c.file);
426
- if (kbFilesToAdd.length > 0 || kbFilesToDelete.length > 0) {
427
- const { commitBaselineFiles: commitKBFiles, removeBaselineFiles } = await import("../utils/git.js");
428
- if (kbFilesToDelete.length > 0) {
429
- removeBaselineFiles(projectRoot, kbFilesToDelete);
430
- }
431
- if (kbFilesToAdd.length > 0) {
432
- commitKBFiles(projectRoot, kbFilesToAdd, "pushed knowledge hub changes");
433
- }
434
- else {
435
- commitKBFiles(projectRoot, [], "removed knowledge hub files");
436
- }
437
- }
438
- }
439
- }
440
- }
441
309
  // ─── Agent Config Push ───
310
+ //
311
+ // Knowledge Bases used to be pushed here (BEFORE entity writes) with a
312
+ // special pre-step that pushed integrations even earlier so KB uploads
313
+ // would find their integration. That ordering caused two production
314
+ // bugs: integrations were PATCHed before the parameters they reference
315
+ // existed (CTS-12425), and tools were pushed in parallel with their
316
+ // dependencies (CTS-12426). The KB push has been moved into the unified
317
+ // dependency-ordered flow below — see `pushKnowledgeHubs` invocation.
442
318
  if (!diff || !diff.hasChanges) {
443
319
  pushSpan.setAttribute("push.exit_reason", "no_agent_config_changes");
444
320
  log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "success", label: "No agent config changes to push." }) }));
@@ -477,21 +353,48 @@ async function _push(pushSpan, agentCode, options = {}) {
477
353
  // If the project has version_id in .auirc or --version-id is passed,
478
354
  // we validate it's a draft. If no version context exists, we auto-detect
479
355
  // available drafts. Push is rejected if no draft is found.
356
+ //
357
+ // Wrapped in an `aui.push.preflight.resolve-version` span so a hang on
358
+ // listAgents / listVersions / getVersion shows up clearly in Logfire
359
+ // instead of being lumped under the parent push span. This is the
360
+ // step that hits agent-management with up to 3 sequential calls.
480
361
  let prePushDraft = null;
481
362
  if (projectConfig.version_id || options.versionId) {
482
- prePushDraft =
483
- resolvedPushDraftCache ??
484
- (await resolveVersionDraft(config, projectConfig, session, options.versionId));
363
+ const resolveTracer = getTracer();
364
+ prePushDraft = await resolveTracer.startActiveSpan("aui.push.preflight.resolve-version", async (rSpan) => {
365
+ rSpan.setAttribute("push.preflight.step", "resolve-version");
366
+ rSpan.setAttribute("push.preflight.has_explicit_version_id", !!options.versionId);
367
+ rSpan.setAttribute("push.preflight.has_auirc_version_id", !!projectConfig.version_id);
368
+ if (projectConfig.agent_id) {
369
+ rSpan.setAttribute("push.preflight.network_id", projectConfig.agent_id);
370
+ }
371
+ try {
372
+ const draft = await resolveVersionDraft(config, projectConfig, session, options.versionId);
373
+ rSpan.setAttribute("push.preflight.resolved_version_id", draft.versionId);
374
+ rSpan.setAttribute("push.preflight.resolved_version_label", draft.label);
375
+ rSpan.setAttribute("push.preflight.resolved_agent_id", draft.agentId);
376
+ rSpan.setStatus({ code: SpanStatusCode.OK });
377
+ return draft;
378
+ }
379
+ catch (err) {
380
+ const msg = err instanceof Error ? err.message : String(err);
381
+ rSpan.setStatus({ code: SpanStatusCode.ERROR, message: msg });
382
+ rSpan.recordException(err instanceof Error ? err : new Error(msg));
383
+ throw err;
384
+ }
385
+ finally {
386
+ rSpan.end();
387
+ }
388
+ });
485
389
  agentSettingsParams.version_id = prePushDraft.versionId;
486
- agentSettingsParams.agent_id = prePushDraft.agentId;
487
- cachedPushAgentManagementId = prePushDraft.agentId;
390
+ pushSpan.setAttribute("push.version_id", prePushDraft.versionId);
391
+ pushSpan.setAttribute("push.version_label", prePushDraft.label);
488
392
  log(_jsx(StatusLine, { kind: "info", label: `Pushing into draft version: ${prePushDraft.label}` }));
489
393
  }
490
394
  else {
491
- if (!cachedPushAgentManagementId) {
492
- cachedPushAgentManagementId = await resolvePushAgentManagementId(config, projectConfig, session, projectRoot);
493
- }
494
- agentSettingsParams.agent_id = cachedPushAgentManagementId;
395
+ pushSpan.addEvent("preflight.no_draft_version_required", {
396
+ reason: "legacy push (no version_id in .auirc or --version-id flag)",
397
+ });
495
398
  }
496
399
  const pushTasks = buildPushTasks(diff, fileData, projectRoot, getFileDiff);
497
400
  pushSpan.setAttribute("push.task_count", pushTasks.length);
@@ -526,82 +429,55 @@ async function _push(pushSpan, agentCode, options = {}) {
526
429
  };
527
430
  const agentCodeStr = projectConfig.agent_code || projectConfig.agent_id || "unknown";
528
431
  const agentIdStr = projectConfig.agent_id || "unknown";
529
- const pushStep = async (tasks, label, parallel) => {
432
+ /**
433
+ * Run one push step (a group of related tasks for one entity-type)
434
+ * STRICTLY SEQUENTIALLY. There is intentionally no `parallel` flag — the
435
+ * agent-settings backend has no optimistic locking and concurrent writes
436
+ * to the same agent silently merge / drop unresolvable references / re-
437
+ * sequence array items (see file header doc + CTS-12340 / -12425 / -12426
438
+ * for prior incidents). If you think you need to parallelize, you don't.
439
+ */
440
+ const pushStep = async (tasks, label) => {
530
441
  if (tasks.length === 0)
531
442
  return true;
532
443
  log(_jsx(Box, { paddingX: 1, children: _jsxs(Text, { color: colors.info, children: [icons.bullet, " ", label, "..."] }) }));
533
444
  const stepFailed = [];
534
445
  try {
535
- if (parallel) {
536
- const results = await Promise.allSettled(tasks.map((t) => executePushTask(client, agentSettingsParams, t)));
537
- for (let i = 0; i < results.length; i++) {
538
- const taskResult = {
539
- label: tasks[i].label,
540
- status: "success",
541
- };
542
- if (results[i].status === "fulfilled") {
543
- succeeded++;
544
- if (tasks[i].file)
545
- succeededFiles.push(tasks[i].file);
546
- }
547
- else {
548
- const err = results[i].reason;
549
- if (isAuthError(err)) {
550
- authFailed = true;
551
- authFailedTasks.push(tasks[i]);
552
- taskResult.status = "auth-failed";
553
- }
554
- else {
555
- failed++;
556
- const errMsg = err instanceof Error ? err.message : String(err);
557
- const failure = {
558
- label: tasks[i].label,
559
- file: tasks[i].file,
560
- error: errMsg,
561
- };
562
- pushFailures.push(failure);
563
- stepFailed.push(failure);
564
- taskResult.status = "failed";
565
- taskResult.error = errMsg;
566
- }
446
+ for (const task of tasks) {
447
+ const taskResult = {
448
+ label: task.label,
449
+ status: "success",
450
+ };
451
+ try {
452
+ const result = await executePushTask(client, agentSettingsParams, task);
453
+ succeeded++;
454
+ if (task.file)
455
+ succeededFiles.push(task.file);
456
+ if (isAlreadyAbsentResult(result)) {
457
+ taskResult.label = `${task.label} (already absent)`;
567
458
  }
568
- log(_jsx(Box, { paddingX: 2, children: _jsx(PushTaskLine, { result: taskResult }) }));
569
459
  }
570
- }
571
- else {
572
- for (const task of tasks) {
573
- const taskResult = {
574
- label: task.label,
575
- status: "success",
576
- };
577
- try {
578
- await executePushTask(client, agentSettingsParams, task);
579
- succeeded++;
580
- if (task.file)
581
- succeededFiles.push(task.file);
460
+ catch (err) {
461
+ if (isAuthError(err)) {
462
+ authFailed = true;
463
+ authFailedTasks.push(task);
464
+ taskResult.status = "auth-failed";
582
465
  }
583
- catch (err) {
584
- if (isAuthError(err)) {
585
- authFailed = true;
586
- authFailedTasks.push(task);
587
- taskResult.status = "auth-failed";
588
- }
589
- else {
590
- failed++;
591
- const errMsg = err instanceof Error ? err.message : String(err);
592
- const failure = {
593
- label: task.label,
594
- file: task.file,
595
- error: errMsg,
596
- };
597
- pushFailures.push(failure);
598
- stepFailed.push(failure);
599
- taskResult.status = "failed";
600
- taskResult.error = errMsg;
601
- }
466
+ else {
467
+ failed++;
468
+ const errMsg = err instanceof Error ? err.message : String(err);
469
+ const failure = {
470
+ label: task.label,
471
+ file: task.file,
472
+ error: errMsg,
473
+ };
474
+ pushFailures.push(failure);
475
+ stepFailed.push(failure);
476
+ taskResult.status = "failed";
477
+ taskResult.error = errMsg;
602
478
  }
603
- log(_jsx(Box, { paddingX: 2, children: _jsx(PushTaskLine, { result: taskResult }) }));
604
479
  }
480
+ log(_jsx(Box, { paddingX: 2, children: _jsx(PushTaskLine, { result: taskResult }) }));
605
481
  }
606
482
  return stepFailed.length === 0 && !authFailed;
607
483
  }
@@ -627,17 +503,50 @@ async function _push(pushSpan, agentCode, options = {}) {
627
503
  t.type === "delete-tool");
628
504
  const settingsTasks = pushTasks.filter((t) => t.type === "patch-general-settings");
629
505
  const rulesTasks = pushTasks.filter((t) => t.type === "put-rules");
630
- await pushStep(paramTasks, "Pushing parameters", false);
631
- // Integrations before entities so workflows/tools can safely reference integration codes.
632
- if (!integrationUpsertsAlreadyPushed) {
633
- await pushStep(integrationUpsertTasks, "Pushing integrations", false);
634
- }
635
- await pushStep(entityTasks, "Pushing entities", false);
636
- // Delete integrations after entity changes drop references where needed.
637
- await pushStep(integrationDeleteTasks, "Removing integrations", false);
638
- await pushStep(toolTasks, "Pushing tools", true);
639
- await pushStep(settingsTasks, "Pushing general settings", false);
640
- await pushStep(rulesTasks, "Pushing rules", false);
506
+ // ─── Push order — see file header for rationale ─────────────────────
507
+ //
508
+ // Phase 1: UPSERTS, top-down by dependency (least → most depends-on).
509
+ // Every step is sequential by construction (`pushStep` has no
510
+ // parallel mode). Do not work around this — the agent-settings
511
+ // backend silently merges / drops unresolvable refs on concurrent
512
+ // writes.
513
+ // 1. Parameters — referenced by entities, integrations, tools, rules.
514
+ await pushStep(paramTasks, "Pushing parameters");
515
+ // 2. Entities reference parameters; referenced by tools, integrations.
516
+ await pushStep(entityTasks, "Pushing entities");
517
+ // 3. Integration upserts — reference parameters / entities. Must be
518
+ // pushed BEFORE knowledge-base uploads (KBs attach to integrations)
519
+ // AND before tools (tools reference integration codes).
520
+ await pushStep(integrationUpsertTasks, "Pushing integrations");
521
+ // 4. Knowledge Bases — reference integrations existing on the platform.
522
+ // KB failures are folded into the same `failed` counter / pushFailures
523
+ // array as agent-settings writes, so they hit the "X failed." line, the
524
+ // JSON envelope, and the non-zero exit code (BFF contract: zero silent
525
+ // errors anywhere in the push pipeline).
526
+ const kbResult = await pushKnowledgeHubs(projectRoot, projectConfig);
527
+ pushSpan.setAttribute("push.kb.ok", kbResult.ok);
528
+ pushSpan.setAttribute("push.kb.failures", kbResult.failures.length);
529
+ if (!kbResult.ok) {
530
+ pushSpan.addEvent("kb.failures_folded_into_pushFailures", {
531
+ count: kbResult.failures.length,
532
+ });
533
+ for (const kbFailure of kbResult.failures) {
534
+ failed++;
535
+ pushFailures.push(kbFailure);
536
+ }
537
+ }
538
+ // 5. Tools — reference parameters, entities, integrations. Sequential:
539
+ // parallel tool pushes caused inter-tool race conditions in
540
+ // production (chain-activation, success-rule re-sequencing).
541
+ await pushStep(toolTasks, "Pushing tools");
542
+ // 6. Rules — reference tools, parameters, entities.
543
+ await pushStep(rulesTasks, "Pushing rules");
544
+ // 7. General settings — mostly standalone but may reference defaults.
545
+ await pushStep(settingsTasks, "Pushing general settings");
546
+ // Phase 2: DELETES, bottom-up. Integrations get deleted last so any
547
+ // tool / entity update above that still referenced them succeeds first.
548
+ await pushStep(integrationDeleteTasks, "Removing integrations");
549
+ // Phase 3: Snapshot — runs at the very end of `_push` (see below).
641
550
  // Auth fallback
642
551
  if (authFailed && authFailedTasks.length > 0 && !savedApiKey) {
643
552
  // The auth fallback prompts for an API key. In a non-TTY environment
@@ -652,10 +561,16 @@ async function _push(pushSpan, agentCode, options = {}) {
652
561
  process.stdout.isTTY === true;
653
562
  if (!isInteractive) {
654
563
  failed += authFailedTasks.length;
564
+ pushSpan.addEvent("auth.fallback.non_interactive_rejected", {
565
+ failed_task_count: authFailedTasks.length,
566
+ });
655
567
  throw new AuthenticationError(`Authentication failed for ${authFailedTasks.length} push task(s); cannot prompt for an API key (non-interactive session).`, {
656
568
  suggestion: "Pass --api-key <key>, set AUI_AGENT_TOOLS_API_KEY, or run `aui login` to refresh credentials.",
657
569
  });
658
570
  }
571
+ pushSpan.addEvent("auth.fallback.api_key_prompted", {
572
+ failed_task_count: authFailedTasks.length,
573
+ });
659
574
  log(_jsxs(Box, { flexDirection: "column", paddingX: 1, children: [_jsx(StatusLine, { kind: "warning", label: "Authentication failed. Your access token may not have permission." }), _jsx(Hint, { message: "You can provide an API key as a fallback. It will be saved to ~/.aui/agent-settings-key" })] }));
660
575
  const { key } = await inquirer.prompt([
661
576
  {
@@ -668,6 +583,9 @@ async function _push(pushSpan, agentCode, options = {}) {
668
583
  if (key && key.trim()) {
669
584
  saveAgentSettingsApiKey(key.trim());
670
585
  client.setAgentSettingsApiKey(key.trim());
586
+ pushSpan.addEvent("auth.fallback.api_key_provided", {
587
+ retrying_task_count: authFailedTasks.length,
588
+ });
671
589
  log(_jsx(StatusLine, { kind: "success", label: "Key saved." }));
672
590
  log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "info", label: `Retrying ${authFailedTasks.length} change(s) with API key...` }) }));
673
591
  authFailed = false;
@@ -697,10 +615,16 @@ async function _push(pushSpan, agentCode, options = {}) {
697
615
  }
698
616
  else {
699
617
  failed += authFailedTasks.length;
618
+ pushSpan.addEvent("auth.fallback.api_key_skipped", {
619
+ uncovered_task_count: authFailedTasks.length,
620
+ });
700
621
  }
701
622
  }
702
623
  else if (authFailed && authFailedTasks.length > 0) {
703
624
  failed += authFailedTasks.length;
625
+ pushSpan.addEvent("auth.fallback.saved_key_still_failed", {
626
+ failed_task_count: authFailedTasks.length,
627
+ });
704
628
  log(_jsx(ErrorDisplay, { error: new AuthenticationError("Auth failed even with saved API key.", {
705
629
  suggestion: "Try: rm ~/.aui/agent-settings-key",
706
630
  }) }));
@@ -734,6 +658,7 @@ async function _push(pushSpan, agentCode, options = {}) {
734
658
  if (prePushDraft) {
735
659
  const SNAPSHOT_MAX_ATTEMPTS = 4;
736
660
  const SNAPSHOT_RETRY_BASE_MS = 1000;
661
+ const snapshotTracer = getTracer();
737
662
  for (let attempt = 1; attempt <= SNAPSHOT_MAX_ATTEMPTS; attempt++) {
738
663
  snapshotAttempts = attempt;
739
664
  const label = attempt === 1
@@ -742,26 +667,65 @@ async function _push(pushSpan, agentCode, options = {}) {
742
667
  if (json)
743
668
  stderrLog(label);
744
669
  const snapshotSpinner = json ? null : startSpinner(label);
670
+ // Per-attempt span — each snapshot upload is a network call that can
671
+ // hang for minutes (large multipart upload). Surfacing each attempt
672
+ // separately in Logfire lets us see retry behavior, attempt latency,
673
+ // and which attempt finally succeeded. Logfire query:
674
+ // `name:"aui.push.task.snapshot" AND attributes."snapshot.attempt":3`
675
+ // finds every push that needed a third try.
745
676
  let attemptError;
746
- try {
747
- const snapshotResult = await pushSnapshot(client, prePushDraft.agentId, prePushDraft.versionId, projectRoot, fileData);
748
- if (snapshotResult.success) {
749
- const okMsg = attempt === 1
750
- ? `Snapshot pushed (${fileData.length} file(s))`
751
- : `Snapshot pushed (${fileData.length} file(s), attempt ${attempt}/${SNAPSHOT_MAX_ATTEMPTS})`;
752
- if (snapshotSpinner)
753
- snapshotSpinner.succeed(okMsg);
754
- else
755
- stderrLog(okMsg);
756
- snapshotSucceeded = true;
757
- snapshotError = undefined;
758
- break;
677
+ const attemptResolved = await snapshotTracer.startActiveSpan("aui.push.task.snapshot", async (snapSpan) => {
678
+ snapSpan.setAttribute("push.task.type", "snapshot");
679
+ snapSpan.setAttribute("push.task.label", label);
680
+ snapSpan.setAttribute("snapshot.attempt", attempt);
681
+ snapSpan.setAttribute("snapshot.max_attempts", SNAPSHOT_MAX_ATTEMPTS);
682
+ snapSpan.setAttribute("snapshot.file_count", fileData.length);
683
+ snapSpan.setAttribute("snapshot.agent_id", prePushDraft.agentId);
684
+ snapSpan.setAttribute("snapshot.version_id", prePushDraft.versionId);
685
+ try {
686
+ const snapshotResult = await pushSnapshot(client, prePushDraft.agentId, prePushDraft.versionId, projectRoot, fileData);
687
+ if (snapshotResult.success) {
688
+ snapSpan.setStatus({ code: SpanStatusCode.OK });
689
+ snapSpan.setAttribute("snapshot.outcome", "success");
690
+ return { ok: true, error: undefined };
691
+ }
692
+ const errMsg = snapshotResult.error || "Unknown snapshot error";
693
+ snapSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
694
+ snapSpan.setAttribute("snapshot.outcome", "failed");
695
+ snapSpan.setAttribute("push.task.error", errMsg);
696
+ if (attempt < SNAPSHOT_MAX_ATTEMPTS) {
697
+ snapSpan.addEvent("snapshot.retry_will_follow", {
698
+ next_attempt: attempt + 1,
699
+ backoff_ms: SNAPSHOT_RETRY_BASE_MS * Math.pow(2, attempt - 1),
700
+ });
701
+ }
702
+ return { ok: false, error: errMsg };
759
703
  }
760
- attemptError = snapshotResult.error || "Unknown snapshot error";
761
- }
762
- catch (error) {
763
- attemptError = error instanceof Error ? error.message : String(error);
704
+ catch (error) {
705
+ const errMsg = error instanceof Error ? error.message : String(error);
706
+ snapSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
707
+ snapSpan.recordException(error instanceof Error ? error : new Error(errMsg));
708
+ snapSpan.setAttribute("snapshot.outcome", "exception");
709
+ snapSpan.setAttribute("push.task.error", errMsg);
710
+ return { ok: false, error: errMsg };
711
+ }
712
+ finally {
713
+ snapSpan.end();
714
+ }
715
+ });
716
+ if (attemptResolved.ok) {
717
+ const okMsg = attempt === 1
718
+ ? `Snapshot pushed (${fileData.length} file(s))`
719
+ : `Snapshot pushed (${fileData.length} file(s), attempt ${attempt}/${SNAPSHOT_MAX_ATTEMPTS})`;
720
+ if (snapshotSpinner)
721
+ snapshotSpinner.succeed(okMsg);
722
+ else
723
+ stderrLog(okMsg);
724
+ snapshotSucceeded = true;
725
+ snapshotError = undefined;
726
+ break;
764
727
  }
728
+ attemptError = attemptResolved.error;
765
729
  snapshotError = attemptError;
766
730
  const isLast = attempt === SNAPSHOT_MAX_ATTEMPTS;
767
731
  const failMsg = isLast
@@ -795,20 +759,51 @@ async function _push(pushSpan, agentCode, options = {}) {
795
759
  // This ensures: if snapshot fails, user re-runs `aui push` to retry both
796
760
  // failed entity pushes AND the snapshot. Local files remain the source
797
761
  // of truth until the server has captured them.
762
+ //
763
+ // CRITICAL (CTS-12340 follow-up): when one file has BOTH succeeded and
764
+ // failed tasks (e.g. integrations.aui.json with a successful DELETE on
765
+ // web-search and a failed POST on search-flights), do NOT commit that
766
+ // file to baseline. If we did, the next push's git diff would treat the
767
+ // failed item as "already on the platform" and emit a PATCH that 404s.
768
+ // The previous behaviour stuck users in an unrecoverable retry loop.
798
769
  let baselineUpdated = false;
799
770
  const canCommitBaseline = !prePushDraft || snapshotSucceeded;
800
771
  if (canCommitBaseline) {
772
+ // A file is committable iff EVERY task that targeted it succeeded.
773
+ // Build the failed-files set from `pushFailures` (which now includes
774
+ // both agent-settings entity failures AND knowledge-hub failures —
775
+ // see the KB push step).
776
+ const failedFiles = new Set();
777
+ for (const f of pushFailures) {
778
+ if (f.file)
779
+ failedFiles.add(f.file);
780
+ }
781
+ const filesSafeToCommit = succeededFiles.filter((f) => !failedFiles.has(f));
801
782
  if (failed > 0 && succeeded > 0) {
802
- if (succeededFiles.length > 0) {
803
- commitBaselineFiles(projectRoot, succeededFiles, `pushed ${succeeded} change(s)`);
783
+ if (filesSafeToCommit.length > 0) {
784
+ commitBaselineFiles(projectRoot, filesSafeToCommit, `pushed ${succeeded} change(s) (${failedFiles.size} file(s) held back due to per-task failures)`);
804
785
  baselineUpdated = true;
786
+ pushSpan.addEvent("baseline.partial_commit", {
787
+ committed_files: filesSafeToCommit.length,
788
+ held_back_files: failedFiles.size,
789
+ });
790
+ }
791
+ else {
792
+ pushSpan.addEvent("baseline.fully_held_back", {
793
+ failed_files: failedFiles.size,
794
+ });
805
795
  }
806
796
  }
807
797
  else if (failed === 0) {
808
798
  commitBaseline(projectRoot, "pushed changes");
809
799
  baselineUpdated = true;
800
+ pushSpan.addEvent("baseline.full_commit");
810
801
  }
811
802
  }
803
+ else {
804
+ pushSpan.addEvent("baseline.skipped_due_to_snapshot_failure");
805
+ }
806
+ pushSpan.setAttribute("push.baseline_updated", baselineUpdated);
812
807
  log(_jsx(PushFinalSummary, { succeeded: succeeded, failed: failed, baselineUpdated: baselineUpdated, logDir: logRelPath, memoryPath: memoryPath, snapshotStatus: snapshotStatus, snapshotError: snapshotError }));
813
808
  if (failed > 0) {
814
809
  log(_jsxs(Box, { flexDirection: "column", paddingX: 1, children: [_jsx(StatusLine, { kind: "warning", label: `${failed} entity change(s) failed to push to DB.` }), pushFailures.map((f) => (_jsxs(Box, { flexDirection: "column", marginLeft: 2, children: [_jsxs(Text, { color: "red", children: [" ", icons.error, " ", f.label] }), _jsxs(Text, { color: colors.muted, children: [" Error: ", f.error] }), f.file && _jsxs(Text, { color: colors.muted, children: [" File: ", f.file] })] }, f.label))), _jsxs(Box, { marginTop: 1, children: [_jsx(Text, { color: colors.info, bold: true, children: "What to do next: " }), _jsxs(Text, { color: colors.muted, children: ["Fix the issues above and re-run ", _jsx(Text, { bold: true, children: "aui push" }), " to retry the failed changes."] })] })] }));
@@ -902,13 +897,11 @@ async function _push(pushSpan, agentCode, options = {}) {
902
897
  throw error;
903
898
  }
904
899
  }
905
- /**
906
- * Lookup the agent-management record for the current `.auirc` project
907
- * (preferred) or the active session fallbacksame precedence as draft
908
- * resolution. Each attempt records its error so callers can surface the full
909
- * picture instead of silently dropping `agent_id` from request bodies.
910
- */
911
- async function lookupAgentManagementInfoForPush(config, projectConfig, session) {
900
+ async function resolveVersionDraft(config, projectConfig, session, explicitVersionId) {
901
+ // Every error path below MUST throw a typed CLIError (not return null).
902
+ // Returning null silently exits the CLI with code 0 the BFF then thinks
903
+ // the push succeeded when nothing actually happened, and the failure
904
+ // never reaches Logfire because no exception bubbled to handleError.
912
905
  const client = new AUIClient({
913
906
  baseUrl: config.apiUrl,
914
907
  authToken: config.authToken,
@@ -920,98 +913,55 @@ async function lookupAgentManagementInfoForPush(config, projectConfig, session)
920
913
  if (key)
921
914
  client.setAgentSettingsApiKey(key);
922
915
  let agentInfo;
923
- const errors = [];
924
916
  const agentMgmtId = session.agent_management_id;
917
+ // Project's network_id (from .auirc) takes priority over session — when
918
+ // you're inside a project, that's the agent you mean. Session agent may
919
+ // point at a different agent (e.g. last `aui agents --switch`).
925
920
  const projectNetworkId = projectConfig.agent_id;
926
921
  const fallbackNetworkId = session.network_id;
927
922
  if (projectNetworkId) {
928
923
  try {
929
924
  const resp = await client.agentManagement.listAgents(client.getOrganizationId(), 1, 50, { network_id: projectNetworkId });
930
- agentInfo = resp.items.find((a) => a.scope.network_id === projectNetworkId ||
931
- a.id === projectNetworkId);
932
- if (!agentInfo) {
933
- errors.push(`listAgents(network_id=${projectNetworkId}) returned ${resp.items.length} item(s), none matched.`);
934
- }
925
+ agentInfo = resp.items.find((a) => a.scope.network_id === projectNetworkId || a.id === projectNetworkId);
935
926
  }
936
927
  catch (err) {
937
- errors.push(`listAgents(network_id=${projectNetworkId}) threw: ${err instanceof Error ? err.message : String(err)}`);
928
+ // Listing fall-through is fine because the next two branches try other
929
+ // resolution paths AND a final ConfigError is thrown below if none
930
+ // succeed. But emit a debug warning so an operator with AUI_DEBUG=1
931
+ // can see WHICH branch failed and why (zero silent errors policy).
932
+ if (process.env.AUI_DEBUG) {
933
+ console.warn(`[debug] resolveVersionDraft: listAgents(network_id=${projectNetworkId}) failed:`, err instanceof Error ? err.message : err);
934
+ }
938
935
  }
939
936
  }
940
- // Try the session's agent_management_id even when the project has a network
941
- // id it's a direct getAgent call, no list scan, and it gracefully covers
942
- // the case where listAgents fell through above.
943
- if (!agentInfo && agentMgmtId) {
937
+ // Fall back to session's agent_management_id only when not inside a project
938
+ if (!agentInfo && !projectNetworkId && agentMgmtId) {
944
939
  try {
945
940
  agentInfo = await client.agentManagement.getAgent(agentMgmtId);
946
941
  }
947
942
  catch (err) {
948
- errors.push(`getAgent(${agentMgmtId}) threw: ${err instanceof Error ? err.message : String(err)}`);
943
+ if (process.env.AUI_DEBUG) {
944
+ console.warn(`[debug] resolveVersionDraft: getAgent(${agentMgmtId}) failed (stale id?):`, err instanceof Error ? err.message : err);
945
+ }
949
946
  }
950
947
  }
951
- if (!agentInfo && fallbackNetworkId && fallbackNetworkId !== projectNetworkId) {
948
+ // Last resort: session's network_id
949
+ if (!agentInfo && fallbackNetworkId) {
952
950
  try {
953
951
  const resp = await client.agentManagement.listAgents(client.getOrganizationId(), 1, 50, { network_id: fallbackNetworkId });
954
- agentInfo = resp.items.find((a) => a.scope.network_id === fallbackNetworkId ||
955
- a.id === fallbackNetworkId);
956
- if (!agentInfo) {
957
- errors.push(`listAgents(network_id=${fallbackNetworkId}) returned ${resp.items.length} item(s), none matched.`);
958
- }
952
+ agentInfo = resp.items.find((a) => a.scope.network_id === fallbackNetworkId || a.id === fallbackNetworkId);
959
953
  }
960
954
  catch (err) {
961
- errors.push(`listAgents(network_id=${fallbackNetworkId}) threw: ${err instanceof Error ? err.message : String(err)}`);
955
+ if (process.env.AUI_DEBUG) {
956
+ console.warn(`[debug] resolveVersionDraft: listAgents(network_id=${fallbackNetworkId}) failed:`, err instanceof Error ? err.message : err);
957
+ }
962
958
  }
963
959
  }
964
- return { agentInfo, errors };
965
- }
966
- /**
967
- * Return the agent-management UUID to send as `agent_id` on agent-settings
968
- * write bodies. Reads `.auirc` first; falls back to `lookupAgentManagementInfoForPush`
969
- * and **persists** the resolved id back to `.auirc` so subsequent pushes don't
970
- * pay the lookup cost. Throws `ConfigError` if no id can be resolved — never
971
- * silently returns undefined, because that's how entities ended up in the DB
972
- * without `agent_id`.
973
- */
974
- async function resolvePushAgentManagementId(config, projectConfig, session, projectRoot) {
975
- if (projectConfig.agent_management_id)
976
- return projectConfig.agent_management_id;
977
- const { agentInfo, errors } = await lookupAgentManagementInfoForPush(config, projectConfig, session);
978
- if (!agentInfo) {
979
- const detail = errors.length > 0 ? `\n - ${errors.join("\n - ")}` : "";
980
- throw new ConfigError(`Could not resolve agent-management id for this project.${detail}`, {
981
- suggestion: "Re-run `aui import-agent` (will populate .auirc.agent_management_id) or `aui pull` to back-fill it.",
982
- });
983
- }
984
- // Migrate legacy projects: persist back so the next push skips the lookup.
985
- try {
986
- saveProjectConfig({ ...projectConfig, agent_management_id: agentInfo.id }, projectRoot);
987
- }
988
- catch {
989
- // .auirc write failure is non-fatal — we already have the id in memory.
990
- }
991
- return agentInfo.id;
992
- }
993
- async function resolveVersionDraft(config, projectConfig, session, explicitVersionId) {
994
- // Every error path below MUST throw a typed CLIError (not return null).
995
- // Returning null silently exits the CLI with code 0 — the BFF then thinks
996
- // the push succeeded when nothing actually happened, and the failure
997
- // never reaches Logfire because no exception bubbled to handleError.
998
- const { agentInfo, errors: lookupErrors } = await lookupAgentManagementInfoForPush(config, projectConfig, session);
999
960
  if (!agentInfo) {
1000
- const detail = lookupErrors.length > 0 ? `\n - ${lookupErrors.join("\n - ")}` : "";
1001
- throw new ConfigError(`Could not resolve agent for version management.${detail}`, {
961
+ throw new ConfigError("Could not resolve agent for version management.", {
1002
962
  suggestion: "Run `aui import-agent` to link an agent, or check your session with `aui status`.",
1003
963
  });
1004
964
  }
1005
- const client = new AUIClient({
1006
- baseUrl: config.apiUrl,
1007
- authToken: config.authToken,
1008
- accountId: config.accountId,
1009
- organizationId: config.organizationId,
1010
- environment: config.environment,
1011
- });
1012
- const key = loadAgentSettingsApiKey();
1013
- if (key)
1014
- client.setAgentSettingsApiKey(key);
1015
965
  // If user passed --version-id, validate it's a draft
1016
966
  if (explicitVersionId) {
1017
967
  let ver;
@@ -1146,8 +1096,14 @@ async function resolveAgentSettingsParams(config, projectConfig, session, projec
1146
1096
  saveProjectConfig({ ...projectConfig, network_category_id: categoryId }, projectRoot);
1147
1097
  }
1148
1098
  }
1149
- catch {
1150
- // ignore fetch failure
1099
+ catch (err) {
1100
+ // Falls through to the explicit ConfigError below if no categoryId
1101
+ // could be resolved. Surface in AUI_DEBUG so the operator can see
1102
+ // why the auto-fetch failed instead of just the generic "Missing
1103
+ // network_category_id" message.
1104
+ if (process.env.AUI_DEBUG) {
1105
+ console.warn(`[debug] resolveAgentSettingsParams: networks.get(${networkId}) failed:`, err instanceof Error ? err.message : err);
1106
+ }
1151
1107
  }
1152
1108
  }
1153
1109
  if (!categoryId) {
@@ -1182,6 +1138,262 @@ async function resolveAgentSettingsParams(config, projectConfig, session, projec
1182
1138
  }
1183
1139
  return baseParams;
1184
1140
  }
1141
+ // ─── Push Task Classification Helpers ───
1142
+ /**
1143
+ * Integration tasks split into two phases by the unified push order:
1144
+ * - Upserts (POST/PATCH/PUT) run BEFORE knowledge bases + tools, so KBs
1145
+ * can attach to integrations and tools can reference integration codes.
1146
+ * - Deletes run AFTER tools / entities, so the last write that mentioned
1147
+ * the integration succeeds before the row is removed.
1148
+ */
1149
+ function isIntegrationUpsertTask(t) {
1150
+ return (t.type === "put-integrations" ||
1151
+ t.type === "create-integration" ||
1152
+ t.type === "patch-integration");
1153
+ }
1154
+ async function pushKnowledgeHubs(projectRoot, projectConfig) {
1155
+ const { getKnowledgeHubChanges } = await import("../utils/git.js");
1156
+ const kbChanges = getKnowledgeHubChanges(projectRoot);
1157
+ if (kbChanges.length === 0)
1158
+ return { ok: true, failures: [] };
1159
+ const kbConfig = getConfig();
1160
+ const kbSession = await getValidSession();
1161
+ const kbNetworkId = projectConfig.agent_id || kbSession?.network_id;
1162
+ if (!kbNetworkId || !kbConfig.authToken) {
1163
+ return {
1164
+ ok: false,
1165
+ failures: [
1166
+ {
1167
+ label: "Push knowledge hubs",
1168
+ error: "Cannot push knowledge hubs: missing network_id or auth token. Re-run `aui login` and `aui import-agent`.",
1169
+ },
1170
+ ],
1171
+ };
1172
+ }
1173
+ const { KBViewClient } = await import("../api-client/kb-view-client.js");
1174
+ const { buildScope, readKbFolder } = await import("../services/kb-view.service.js");
1175
+ const { loadAgentSettingsApiKey: loadAsKey } = await import("../config/index.js");
1176
+ const kbViewClient = new KBViewClient({
1177
+ authToken: kbConfig.authToken,
1178
+ apiKey: loadAsKey() || undefined,
1179
+ organizationId: kbConfig.organizationId || "",
1180
+ environment: kbConfig.environment || "staging",
1181
+ });
1182
+ const kbLogDir = path.join(projectRoot, ".aui", "push-logs");
1183
+ fs.mkdirSync(kbLogDir, { recursive: true });
1184
+ kbViewClient.setPushLogDir(kbLogDir);
1185
+ const scope = buildScope({
1186
+ networkId: kbNetworkId,
1187
+ organizationId: projectConfig.organization_id || kbConfig.organizationId || "",
1188
+ accountId: projectConfig.account_id || kbConfig.accountId || "",
1189
+ });
1190
+ const userId = kbSession?.user_id || "cli";
1191
+ const changedKBDirs = new Set();
1192
+ for (const change of kbChanges) {
1193
+ if (change.kbDirName)
1194
+ changedKBDirs.add(change.kbDirName);
1195
+ }
1196
+ const existingKBDirs = [...changedKBDirs].filter((d) => fs.existsSync(path.join(projectRoot, "knowledge-hubs", d)));
1197
+ const deletedKBDirs = [...changedKBDirs].filter((d) => !fs.existsSync(path.join(projectRoot, "knowledge-hubs", d)));
1198
+ const failures = [];
1199
+ let kbDeleteSucceeded = true;
1200
+ if (deletedKBDirs.length > 0) {
1201
+ const { getBaselineFileContent } = await import("../utils/git.js");
1202
+ const deleteSpinner = startSpinner(`Deleting ${deletedKBDirs.length} knowledge base(s) from server...`);
1203
+ try {
1204
+ for (const kbDirName of deletedKBDirs) {
1205
+ const baselineKb = getBaselineFileContent(projectRoot, `knowledge-hubs/${kbDirName}/kb.json`);
1206
+ const kbName = baselineKb?.name || kbDirName;
1207
+ const kbId = baselineKb?.knowledge_base_id;
1208
+ if (!kbId) {
1209
+ log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "warning", label: `Cannot delete "${kbName}" — no knowledge_base_id stored. Push the KB first, then delete.` }) }));
1210
+ continue;
1211
+ }
1212
+ // Per-KB delete in its own span so each one shows up in Logfire as
1213
+ // `aui.push.task.kb-delete` with status, kb name, kb id, and error
1214
+ // body. Same observability shape as agent-settings entity tasks.
1215
+ const kbDelTracer = getTracer();
1216
+ await kbDelTracer.startActiveSpan("aui.push.task.kb-delete", async (span) => {
1217
+ span.setAttribute("push.task.type", "kb-delete");
1218
+ span.setAttribute("push.task.label", `Delete knowledge base: ${kbName}`);
1219
+ span.setAttribute("push.task.file", `knowledge-hubs/${kbDirName}/kb.json`);
1220
+ span.setAttribute("push.task.kb_id", kbId);
1221
+ span.setAttribute("push.task.kb_name", kbName);
1222
+ try {
1223
+ await kbViewClient.deleteKnowledgeBase(kbId, scope, kbName);
1224
+ span.setStatus({ code: SpanStatusCode.OK });
1225
+ log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "success", label: `Deleted: ${kbName}` }) }));
1226
+ }
1227
+ catch (delErr) {
1228
+ // Per-KB error: count it, keep going so partial work shows up.
1229
+ if (isNotFoundError(delErr)) {
1230
+ span.setStatus({ code: SpanStatusCode.OK });
1231
+ span.addEvent("fallback.delete_404_already_absent");
1232
+ span.setAttribute("push.task.fallback", "delete_404_already_absent");
1233
+ log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "success", label: `Deleted: ${kbName} (already absent)` }) }));
1234
+ }
1235
+ else {
1236
+ kbDeleteSucceeded = false;
1237
+ const errMsg = delErr instanceof Error ? delErr.message : String(delErr);
1238
+ span.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
1239
+ span.recordException(delErr instanceof Error ? delErr : new Error(errMsg));
1240
+ span.setAttribute("push.task.error", errMsg);
1241
+ if (delErr.statusCode) {
1242
+ span.setAttribute("push.task.error_status_code", delErr.statusCode);
1243
+ }
1244
+ failures.push({
1245
+ label: `Delete knowledge base: ${kbName}`,
1246
+ file: `knowledge-hubs/${kbDirName}/kb.json`,
1247
+ error: errMsg,
1248
+ });
1249
+ log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "error", label: `Failed to delete "${kbName}": ${errMsg}` }) }));
1250
+ }
1251
+ }
1252
+ finally {
1253
+ span.end();
1254
+ }
1255
+ });
1256
+ }
1257
+ if (kbDeleteSucceeded) {
1258
+ deleteSpinner.succeed(`${deletedKBDirs.length} knowledge base(s) deleted`);
1259
+ }
1260
+ else {
1261
+ deleteSpinner.fail(`Knowledge base deletion completed with errors`);
1262
+ }
1263
+ }
1264
+ catch (error) {
1265
+ kbDeleteSucceeded = false;
1266
+ deleteSpinner.fail("Knowledge base deletion failed");
1267
+ const errMsg = error instanceof Error ? error.message : String(error);
1268
+ failures.push({
1269
+ label: "Delete knowledge bases (batch)",
1270
+ error: errMsg,
1271
+ });
1272
+ log(_jsx(ErrorDisplay, { error: error }));
1273
+ }
1274
+ }
1275
+ let kbUploadSucceeded = false;
1276
+ if (existingKBDirs.length > 0) {
1277
+ const kbSpinner = startSpinner(`Pushing ${existingKBDirs.length} knowledge base(s)...`);
1278
+ let hadUploadFailure = false;
1279
+ try {
1280
+ for (const kbDirName of existingKBDirs) {
1281
+ const kbDir = path.join(projectRoot, "knowledge-hubs", kbDirName);
1282
+ const kbData = readKbFolder(kbDir);
1283
+ if (!kbData)
1284
+ continue;
1285
+ const SUPPORTED_EXTENSIONS = new Set([".pdf", ".md", ".txt", ".json"]);
1286
+ const supportedFiles = kbData.binaryFiles.filter((f) => SUPPORTED_EXTENSIONS.has(path.extname(f).toLowerCase()));
1287
+ const skippedFiles = kbData.binaryFiles.filter((f) => !SUPPORTED_EXTENSIONS.has(path.extname(f).toLowerCase()));
1288
+ for (const skipped of skippedFiles) {
1289
+ log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "warning", label: `Skipped unsupported file: ${path.basename(skipped)} (only .pdf, .md, .txt, .json)` }) }));
1290
+ }
1291
+ if (supportedFiles.length > 0) {
1292
+ // Per-KB upload in its own span — Logfire query
1293
+ // `name:"aui.push.task.kb-upload" AND status_code:ERROR` finds
1294
+ // every KB push failure across all agents.
1295
+ const kbUpTracer = getTracer();
1296
+ await kbUpTracer.startActiveSpan("aui.push.task.kb-upload", async (span) => {
1297
+ span.setAttribute("push.task.type", "kb-upload");
1298
+ span.setAttribute("push.task.label", `Push knowledge base: ${kbData.name || kbDirName}`);
1299
+ span.setAttribute("push.task.file", `knowledge-hubs/${kbDirName}/kb.json`);
1300
+ span.setAttribute("push.task.kb_name", kbData.name || kbDirName);
1301
+ span.setAttribute("push.task.file_count", supportedFiles.length);
1302
+ try {
1303
+ const importResult = await kbViewClient.importFiles({
1304
+ files: supportedFiles,
1305
+ scope,
1306
+ created_by: userId,
1307
+ knowledge_base_name: kbData.name,
1308
+ knowledge_base_description: kbData.description,
1309
+ });
1310
+ span.setStatus({ code: SpanStatusCode.OK });
1311
+ if (importResult.knowledge_base_id) {
1312
+ span.setAttribute("push.task.kb_id", importResult.knowledge_base_id);
1313
+ const kbJsonPath = path.join(kbDir, "kb.json");
1314
+ try {
1315
+ const raw = JSON.parse(fs.readFileSync(kbJsonPath, "utf-8"));
1316
+ raw.knowledge_base_id = importResult.knowledge_base_id;
1317
+ fs.writeFileSync(kbJsonPath, JSON.stringify(raw, null, 2) + "\n");
1318
+ }
1319
+ catch (writeErr) {
1320
+ // kb.json id write fail is non-fatal but tell the user so the
1321
+ // next push doesn't surprise them with "no knowledge_base_id stored".
1322
+ span.addEvent("kb_id_writeback_failed");
1323
+ if (process.env.AUI_DEBUG) {
1324
+ console.warn(`[debug] failed to write knowledge_base_id back to ${kbJsonPath}:`, writeErr);
1325
+ }
1326
+ log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "warning", label: `Could not persist knowledge_base_id back to ${path.basename(kbJsonPath)} — re-import or run \`aui pull\` to recover.` }) }));
1327
+ }
1328
+ }
1329
+ }
1330
+ catch (uploadErr) {
1331
+ hadUploadFailure = true;
1332
+ const errMsg = uploadErr instanceof Error ? uploadErr.message : String(uploadErr);
1333
+ span.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
1334
+ span.recordException(uploadErr instanceof Error ? uploadErr : new Error(errMsg));
1335
+ span.setAttribute("push.task.error", errMsg);
1336
+ if (uploadErr.statusCode) {
1337
+ span.setAttribute("push.task.error_status_code", uploadErr.statusCode);
1338
+ }
1339
+ failures.push({
1340
+ label: `Push knowledge base: ${kbData.name || kbDirName}`,
1341
+ file: `knowledge-hubs/${kbDirName}/kb.json`,
1342
+ error: errMsg,
1343
+ });
1344
+ log(_jsx(Box, { paddingX: 1, children: _jsx(StatusLine, { kind: "error", label: `Failed to push "${kbData.name || kbDirName}": ${errMsg}` }) }));
1345
+ }
1346
+ finally {
1347
+ span.end();
1348
+ }
1349
+ });
1350
+ }
1351
+ }
1352
+ if (hadUploadFailure) {
1353
+ kbSpinner.fail(`Knowledge base push completed with errors`);
1354
+ kbUploadSucceeded = false;
1355
+ }
1356
+ else {
1357
+ kbSpinner.succeed(`Knowledge base(s) pushed`);
1358
+ kbUploadSucceeded = true;
1359
+ }
1360
+ }
1361
+ catch (error) {
1362
+ kbSpinner.fail("Knowledge base push failed");
1363
+ const errMsg = error instanceof Error ? error.message : String(error);
1364
+ failures.push({
1365
+ label: "Push knowledge bases (batch)",
1366
+ error: errMsg,
1367
+ });
1368
+ log(_jsx(ErrorDisplay, { error: error }));
1369
+ }
1370
+ }
1371
+ else {
1372
+ kbUploadSucceeded = true;
1373
+ }
1374
+ const kbPushSucceeded = kbUploadSucceeded && kbDeleteSucceeded;
1375
+ if (kbPushSucceeded) {
1376
+ const kbFilesToAdd = kbChanges
1377
+ .filter((c) => c.status !== "deleted")
1378
+ .map((c) => c.file);
1379
+ const kbFilesToDelete = kbChanges
1380
+ .filter((c) => c.status === "deleted")
1381
+ .map((c) => c.file);
1382
+ if (kbFilesToAdd.length > 0 || kbFilesToDelete.length > 0) {
1383
+ const { commitBaselineFiles: commitKBFiles, removeBaselineFiles } = await import("../utils/git.js");
1384
+ if (kbFilesToDelete.length > 0) {
1385
+ removeBaselineFiles(projectRoot, kbFilesToDelete);
1386
+ }
1387
+ if (kbFilesToAdd.length > 0) {
1388
+ commitKBFiles(projectRoot, kbFilesToAdd, "pushed knowledge hub changes");
1389
+ }
1390
+ else {
1391
+ commitKBFiles(projectRoot, [], "removed knowledge hub files");
1392
+ }
1393
+ }
1394
+ }
1395
+ return { ok: kbPushSucceeded && failures.length === 0, failures };
1396
+ }
1185
1397
  // ─── Array File Info Helper ───
1186
1398
  function getArrayFileInfoForPush(filePath, dir) {
1187
1399
  try {
@@ -1210,11 +1422,6 @@ function getArrayFileInfoForPush(filePath, dir) {
1210
1422
  return null;
1211
1423
  }
1212
1424
  }
1213
- function isIntegrationUpsertTask(t) {
1214
- return (t.type === "put-integrations" ||
1215
- t.type === "create-integration" ||
1216
- t.type === "patch-integration");
1217
- }
1218
1425
  function writePushMemory(projectRoot, agentCode, agentId, pushTasks, succeededFiles, pushFailures) {
1219
1426
  try {
1220
1427
  const memoryDir = path.join(projectRoot, "memory");
@@ -1304,7 +1511,13 @@ function writePushMemory(projectRoot, agentCode, agentId, pushTasks, succeededFi
1304
1511
  fs.writeFileSync(filePath, lines.join("\n"), "utf-8");
1305
1512
  return path.relative(projectRoot, filePath);
1306
1513
  }
1307
- catch {
1514
+ catch (err) {
1515
+ // Memory file is diagnostic only — its failure shouldn't block the push.
1516
+ // But emit a debug warning so an operator chasing "where's my push memory"
1517
+ // sees what went wrong.
1518
+ if (process.env.AUI_DEBUG) {
1519
+ console.warn("[debug] writePushMemory failed:", err instanceof Error ? err.message : err);
1520
+ }
1308
1521
  return undefined;
1309
1522
  }
1310
1523
  }
@@ -1594,95 +1807,323 @@ async function executePushTask(client, params, task) {
1594
1807
  }
1595
1808
  });
1596
1809
  }
1597
- // Fall back to PATCH only on a 409 Conflict from the create call — the BFF
1598
- // returns 409 specifically for "already exists" conflicts, so any other
1599
- // status code is a real error and must be surfaced (not masked behind a
1600
- // follow-up PATCH that would likely fail with a misleading "not found").
1810
+ // ─── Adaptive fallback matrix (per write task) ────────────────────────────
1811
+ //
1812
+ // Every entity write goes through three layers, applied in this order:
1813
+ //
1814
+ // (a) `withTransientRetry` — retries once on 500/502/503/504 with a 1s
1815
+ // back-off. Per-call, isolated from other
1816
+ // tasks. 4xx is never retried (deterministic).
1817
+ // (b) `POST 409 → PATCH` — the create call hit a row with the same
1818
+ // code; the platform already has it. Convert
1819
+ // to a PATCH and continue. Pre-existing.
1820
+ // (c) `PATCH 404 → POST` — the patch call hit "not found"; baseline
1821
+ // drifted (item never landed on the platform
1822
+ // from a prior partial push). Convert to a
1823
+ // POST so the row reappears. NEW.
1824
+ // (d) `DELETE 404 → success` — the delete target is already gone. The
1825
+ // desired end state is reached. Treat as
1826
+ // success and log "(already absent)" so the
1827
+ // user can see what happened. NEW.
1828
+ //
1829
+ // All four layers are visible in the per-task push log files under
1830
+ // `.aui/push-logs/` so the BFF / agent-builder-bff can audit decisions.
1601
1831
  function isAlreadyExistsConflict(err) {
1602
1832
  if (!err || typeof err !== "object")
1603
1833
  return false;
1604
- const statusCode = err.statusCode
1834
+ const code = err.statusCode
1835
+ ?? err.status;
1836
+ return code === 409;
1837
+ }
1838
+ function isNotFoundError(err) {
1839
+ if (!err || typeof err !== "object")
1840
+ return false;
1841
+ const code = err.statusCode
1605
1842
  ?? err.status;
1606
- return statusCode === 409;
1843
+ return code === 404;
1844
+ }
1845
+ function isTransient5xx(err) {
1846
+ if (!err || typeof err !== "object")
1847
+ return false;
1848
+ const code = err.statusCode
1849
+ ?? err.status;
1850
+ return code === 500 || code === 502 || code === 503 || code === 504;
1851
+ }
1852
+ /**
1853
+ * Tag the currently-active span with a fallback-decision event + attribute,
1854
+ * so Logfire shows exactly which adaptive layer fired during a push.
1855
+ *
1856
+ * Useful queries once published:
1857
+ * - `attributes."push.task.fallback":"patch_404_to_post"` → every drift
1858
+ * recovery (next push self-healed a previously-failed POST).
1859
+ * - `attributes."push.task.fallback":"transient_retry"` → backend 5xx
1860
+ * events that were absorbed by the retry layer.
1861
+ * - `attributes."push.task.fallback":"post_409_to_patch"` → "create"
1862
+ * calls that converted to "update" because the row pre-existed.
1863
+ * - `attributes."push.task.fallback":"delete_404_already_absent"` →
1864
+ * deletes that no-op'd because the row was already gone.
1865
+ *
1866
+ * No-op when there's no active span (e.g. unit tests outside the push flow).
1867
+ */
1868
+ function recordFallbackEvent(kind, detail) {
1869
+ const span = trace.getActiveSpan();
1870
+ if (!span)
1871
+ return;
1872
+ span.addEvent(`fallback.${kind}`, detail);
1873
+ span.setAttribute("push.task.fallback", kind);
1874
+ for (const [k, v] of Object.entries(detail ?? {})) {
1875
+ span.setAttribute(`push.task.fallback.${k}`, v);
1876
+ }
1877
+ }
1878
+ /**
1879
+ * Run one entity-settings write call once, and retry exactly once on a
1880
+ * transient 5xx after a 1s back-off. The snapshot upload has its own
1881
+ * retry loop (see `pushSnapshot`); this is the equivalent for individual
1882
+ * agent-settings writes. Never retries on 4xx — those are deterministic.
1883
+ */
1884
+ async function withTransientRetry(label, fn) {
1885
+ try {
1886
+ return await fn();
1887
+ }
1888
+ catch (err) {
1889
+ if (!isTransient5xx(err))
1890
+ throw err;
1891
+ const code = err.statusCode
1892
+ ?? err.status;
1893
+ if (process.env.AUI_DEBUG) {
1894
+ console.log(`[debug] ${label} got ${code}, retrying once after 1000ms`);
1895
+ }
1896
+ recordFallbackEvent("transient_retry", {
1897
+ label,
1898
+ status_code: code ?? 0,
1899
+ backoff_ms: 1000,
1900
+ });
1901
+ await new Promise((r) => setTimeout(r, 1000));
1902
+ return await fn();
1903
+ }
1904
+ }
1905
+ /**
1906
+ * A delete that has been short-circuited because the row was already absent
1907
+ * on the platform. Returned as a successful resolution so callers don't
1908
+ * count the task as failed, but tagged so the per-task log line can show
1909
+ * "(already absent)" instead of a generic ✓.
1910
+ */
1911
+ const DELETE_ALREADY_ABSENT = Object.freeze({
1912
+ __aui_already_absent__: true,
1913
+ message: "Already absent on platform — treated as success",
1914
+ });
1915
+ function isAlreadyAbsentResult(value) {
1916
+ return (!!value
1917
+ && typeof value === "object"
1918
+ && value.__aui_already_absent__ === true);
1607
1919
  }
1608
1920
  async function _executePushTask(client, params, task) {
1609
1921
  switch (task.type) {
1610
1922
  case "patch-tool":
1611
- return client.patchTool(params, task.toolName, task.body);
1612
- case "create-tool":
1613
- try {
1614
- return await client.createTool(params, task.body);
1615
- }
1616
- catch (err) {
1617
- if (isAlreadyExistsConflict(err)) {
1618
- if (process.env.AUI_DEBUG) {
1619
- console.log(`[debug] create-tool: already-exists detected, falling back to PATCH`);
1923
+ return withTransientRetry(`PATCH tool ${task.toolName}`, async () => {
1924
+ try {
1925
+ return await client.patchTool(params, task.toolName, task.body);
1926
+ }
1927
+ catch (err) {
1928
+ if (isNotFoundError(err)) {
1929
+ if (process.env.AUI_DEBUG) {
1930
+ console.log(`[debug] patch-tool ${task.toolName}: 404 not found, falling back to POST`);
1931
+ }
1932
+ recordFallbackEvent("patch_404_to_post", { task_type: "patch-tool", tool: String(task.toolName ?? "") });
1933
+ return client.createTool(params, task.body);
1620
1934
  }
1621
- const body = task.body;
1622
- const toolCode = body.code || "";
1623
- const toolName = toolCode.toUpperCase().replace(/-/g, "_");
1624
- return client.patchTool(params, toolName, body);
1935
+ throw err;
1625
1936
  }
1626
- if (process.env.AUI_DEBUG) {
1627
- const statusCode = err.statusCode ?? err.status;
1628
- console.log(`[debug] create-tool failed (${statusCode}); not already-exists, surfacing original`);
1937
+ });
1938
+ case "create-tool":
1939
+ return withTransientRetry(`POST tool ${task.toolName ?? task.itemCode}`, async () => {
1940
+ try {
1941
+ return await client.createTool(params, task.body);
1629
1942
  }
1630
- throw err;
1631
- }
1943
+ catch (err) {
1944
+ if (isAlreadyExistsConflict(err)) {
1945
+ if (process.env.AUI_DEBUG) {
1946
+ console.log(`[debug] create-tool: 409 already-exists, falling back to PATCH`);
1947
+ }
1948
+ const body = task.body;
1949
+ const toolCode = body.code || "";
1950
+ const toolName = toolCode.toUpperCase().replace(/-/g, "_");
1951
+ recordFallbackEvent("post_409_to_patch", { task_type: "create-tool", tool: toolName });
1952
+ return client.patchTool(params, toolName, body);
1953
+ }
1954
+ throw err;
1955
+ }
1956
+ });
1632
1957
  case "delete-tool":
1633
- return client.deleteTool(params, task.toolName);
1958
+ return withTransientRetry(`DELETE tool ${task.toolName}`, async () => {
1959
+ try {
1960
+ return await client.deleteTool(params, task.toolName);
1961
+ }
1962
+ catch (err) {
1963
+ if (isNotFoundError(err)) {
1964
+ if (process.env.AUI_DEBUG) {
1965
+ console.log(`[debug] delete-tool ${task.toolName}: 404 already absent`);
1966
+ }
1967
+ recordFallbackEvent("delete_404_already_absent", { task_type: "delete-tool", tool: String(task.toolName ?? "") });
1968
+ return DELETE_ALREADY_ABSENT;
1969
+ }
1970
+ throw err;
1971
+ }
1972
+ });
1634
1973
  case "patch-general-settings":
1635
- return client.patchGeneralSettings(params, task.body);
1974
+ return withTransientRetry("PATCH general-settings", () => client.patchGeneralSettings(params, task.body));
1636
1975
  case "put-parameters":
1637
- return client.putParameters(params, task.body, task.oldBody);
1976
+ return withTransientRetry("PUT parameters", () => client.putParameters(params, task.body, task.oldBody));
1638
1977
  case "put-entities":
1639
- return client.putEntities(params, task.body, task.oldBody);
1978
+ return withTransientRetry("PUT entities", () => client.putEntities(params, task.body, task.oldBody));
1640
1979
  case "put-integrations":
1641
- return client.putIntegrations(params, task.body, task.oldBody);
1980
+ return withTransientRetry("PUT integrations", () => client.putIntegrations(params, task.body, task.oldBody));
1642
1981
  case "create-parameter":
1643
- try {
1644
- return await client.createParameter(params, task.body);
1645
- }
1646
- catch (err) {
1647
- if (isAlreadyExistsConflict(err)) {
1648
- return client.patchParameter(params, task.itemCode, task.body);
1982
+ return withTransientRetry(`POST param ${task.itemCode}`, async () => {
1983
+ try {
1984
+ return await client.createParameter(params, task.body);
1649
1985
  }
1650
- throw err;
1651
- }
1986
+ catch (err) {
1987
+ if (isAlreadyExistsConflict(err)) {
1988
+ if (process.env.AUI_DEBUG) {
1989
+ console.log(`[debug] create-parameter ${task.itemCode}: 409, falling back to PATCH`);
1990
+ }
1991
+ recordFallbackEvent("post_409_to_patch", { task_type: "create-parameter", code: String(task.itemCode ?? "") });
1992
+ return client.patchParameter(params, task.itemCode, task.body);
1993
+ }
1994
+ throw err;
1995
+ }
1996
+ });
1652
1997
  case "patch-parameter":
1653
- return client.patchParameter(params, task.itemCode, task.body);
1998
+ return withTransientRetry(`PATCH param ${task.itemCode}`, async () => {
1999
+ try {
2000
+ return await client.patchParameter(params, task.itemCode, task.body);
2001
+ }
2002
+ catch (err) {
2003
+ if (isNotFoundError(err)) {
2004
+ if (process.env.AUI_DEBUG) {
2005
+ console.log(`[debug] patch-parameter ${task.itemCode}: 404 not found, falling back to POST`);
2006
+ }
2007
+ recordFallbackEvent("patch_404_to_post", { task_type: "patch-parameter", code: String(task.itemCode ?? "") });
2008
+ return client.createParameter(params, task.body);
2009
+ }
2010
+ throw err;
2011
+ }
2012
+ });
1654
2013
  case "delete-parameter":
1655
- return client.deleteParameter(params, task.itemCode, task.body);
2014
+ return withTransientRetry(`DELETE param ${task.itemCode}`, async () => {
2015
+ try {
2016
+ return await client.deleteParameter(params, task.itemCode, task.body);
2017
+ }
2018
+ catch (err) {
2019
+ if (isNotFoundError(err)) {
2020
+ if (process.env.AUI_DEBUG) {
2021
+ console.log(`[debug] delete-parameter ${task.itemCode}: 404 already absent`);
2022
+ }
2023
+ recordFallbackEvent("delete_404_already_absent", { task_type: "delete-parameter", code: String(task.itemCode ?? "") });
2024
+ return DELETE_ALREADY_ABSENT;
2025
+ }
2026
+ throw err;
2027
+ }
2028
+ });
1656
2029
  case "create-entity":
1657
- try {
1658
- return await client.createEntity(params, task.body);
1659
- }
1660
- catch (err) {
1661
- if (isAlreadyExistsConflict(err)) {
1662
- return client.patchEntity(params, task.itemCode, task.body);
2030
+ return withTransientRetry(`POST entity ${task.itemCode}`, async () => {
2031
+ try {
2032
+ return await client.createEntity(params, task.body);
1663
2033
  }
1664
- throw err;
1665
- }
2034
+ catch (err) {
2035
+ if (isAlreadyExistsConflict(err)) {
2036
+ if (process.env.AUI_DEBUG) {
2037
+ console.log(`[debug] create-entity ${task.itemCode}: 409, falling back to PATCH`);
2038
+ }
2039
+ recordFallbackEvent("post_409_to_patch", { task_type: "create-entity", code: String(task.itemCode ?? "") });
2040
+ return client.patchEntity(params, task.itemCode, task.body);
2041
+ }
2042
+ throw err;
2043
+ }
2044
+ });
1666
2045
  case "patch-entity":
1667
- return client.patchEntity(params, task.itemCode, task.body);
2046
+ return withTransientRetry(`PATCH entity ${task.itemCode}`, async () => {
2047
+ try {
2048
+ return await client.patchEntity(params, task.itemCode, task.body);
2049
+ }
2050
+ catch (err) {
2051
+ if (isNotFoundError(err)) {
2052
+ if (process.env.AUI_DEBUG) {
2053
+ console.log(`[debug] patch-entity ${task.itemCode}: 404, falling back to POST`);
2054
+ }
2055
+ recordFallbackEvent("patch_404_to_post", { task_type: "patch-entity", code: String(task.itemCode ?? "") });
2056
+ return client.createEntity(params, task.body);
2057
+ }
2058
+ throw err;
2059
+ }
2060
+ });
1668
2061
  case "delete-entity":
1669
- return client.deleteEntity(params, task.itemCode);
2062
+ return withTransientRetry(`DELETE entity ${task.itemCode}`, async () => {
2063
+ try {
2064
+ return await client.deleteEntity(params, task.itemCode);
2065
+ }
2066
+ catch (err) {
2067
+ if (isNotFoundError(err)) {
2068
+ if (process.env.AUI_DEBUG) {
2069
+ console.log(`[debug] delete-entity ${task.itemCode}: 404 already absent`);
2070
+ }
2071
+ recordFallbackEvent("delete_404_already_absent", { task_type: "delete-entity", code: String(task.itemCode ?? "") });
2072
+ return DELETE_ALREADY_ABSENT;
2073
+ }
2074
+ throw err;
2075
+ }
2076
+ });
1670
2077
  case "create-integration":
1671
- try {
1672
- return await client.createIntegration(params, task.body);
1673
- }
1674
- catch (err) {
1675
- if (isAlreadyExistsConflict(err)) {
1676
- return client.patchIntegration(params, task.itemCode, task.body);
2078
+ return withTransientRetry(`POST integration ${task.itemCode}`, async () => {
2079
+ try {
2080
+ return await client.createIntegration(params, task.body);
1677
2081
  }
1678
- throw err;
1679
- }
2082
+ catch (err) {
2083
+ if (isAlreadyExistsConflict(err)) {
2084
+ if (process.env.AUI_DEBUG) {
2085
+ console.log(`[debug] create-integration ${task.itemCode}: 409, falling back to PATCH`);
2086
+ }
2087
+ recordFallbackEvent("post_409_to_patch", { task_type: "create-integration", code: String(task.itemCode ?? "") });
2088
+ return client.patchIntegration(params, task.itemCode, task.body);
2089
+ }
2090
+ throw err;
2091
+ }
2092
+ });
1680
2093
  case "patch-integration":
1681
- return client.patchIntegration(params, task.itemCode, task.body);
2094
+ return withTransientRetry(`PATCH integration ${task.itemCode}`, async () => {
2095
+ try {
2096
+ return await client.patchIntegration(params, task.itemCode, task.body);
2097
+ }
2098
+ catch (err) {
2099
+ if (isNotFoundError(err)) {
2100
+ if (process.env.AUI_DEBUG) {
2101
+ console.log(`[debug] patch-integration ${task.itemCode}: 404 not found, falling back to POST`);
2102
+ }
2103
+ recordFallbackEvent("patch_404_to_post", { task_type: "patch-integration", code: String(task.itemCode ?? "") });
2104
+ return client.createIntegration(params, task.body);
2105
+ }
2106
+ throw err;
2107
+ }
2108
+ });
1682
2109
  case "delete-integration":
1683
- return client.deleteIntegration(params, task.itemCode);
2110
+ return withTransientRetry(`DELETE integration ${task.itemCode}`, async () => {
2111
+ try {
2112
+ return await client.deleteIntegration(params, task.itemCode);
2113
+ }
2114
+ catch (err) {
2115
+ if (isNotFoundError(err)) {
2116
+ if (process.env.AUI_DEBUG) {
2117
+ console.log(`[debug] delete-integration ${task.itemCode}: 404 already absent`);
2118
+ }
2119
+ recordFallbackEvent("delete_404_already_absent", { task_type: "delete-integration", code: String(task.itemCode ?? "") });
2120
+ return DELETE_ALREADY_ABSENT;
2121
+ }
2122
+ throw err;
2123
+ }
2124
+ });
1684
2125
  case "put-rules":
1685
- return client.putRules(params, task.body);
2126
+ return withTransientRetry("PUT rules", () => client.putRules(params, task.body));
1686
2127
  default:
1687
2128
  throw new Error(`Unknown push task type: ${task.type}`);
1688
2129
  }