npm - @vellumai/assistant - Versions diffs - 0.7.3 → 0.8.0 - Mend

@vellumai/assistant 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

package/ARCHITECTURE.md +29 -28
package/Dockerfile +1 -0
package/__tests__/permissions/gateway-threshold-reader.test.ts +236 -9
package/bun.lock +3 -0
package/knip.json +1 -0
package/node_modules/@vellumai/ipc-server-utils/bun.lock +24 -0
package/node_modules/@vellumai/ipc-server-utils/package.json +18 -0
package/node_modules/@vellumai/ipc-server-utils/src/index.ts +6 -0
package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.test.ts +430 -0
package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.ts +221 -0
package/node_modules/@vellumai/ipc-server-utils/tsconfig.json +20 -0
package/openapi.yaml +22 -4
package/package.json +3 -1
package/src/__tests__/annotate-risk-options.test.ts +291 -0
package/src/__tests__/approval-cascade.test.ts +8 -16
package/src/__tests__/approval-routes-http.test.ts +6 -0
package/src/__tests__/auto-analysis-end-to-end.test.ts +12 -25
package/src/__tests__/call-constants.test.ts +10 -1
package/src/__tests__/call-controller.test.ts +127 -0
package/src/__tests__/cli-memory-v2-reembed-skills.test.ts +58 -28
package/src/__tests__/config-loader-platform-defaults.test.ts +284 -1
package/src/__tests__/context-search-memory-source.test.ts +3 -26
package/src/__tests__/context-search-pkb-source.test.ts +12 -6
package/src/__tests__/conversation-abort-tool-results.test.ts +1 -6
package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -1
package/src/__tests__/conversation-agent-loop.test.ts +3 -3
package/src/__tests__/conversation-confirmation-signals.test.ts +5 -13
package/src/__tests__/conversation-init.benchmark.test.ts +1 -1
package/src/__tests__/conversation-process-callsite.test.ts +1 -6
package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -6
package/src/__tests__/conversation-runtime-assembly.test.ts +15 -6
package/src/__tests__/conversation-slash-unknown.test.ts +1 -6
package/src/__tests__/conversation-surfaces-action-delivery.test.ts +170 -9
package/src/__tests__/conversation-surfaces-data-persist.test.ts +73 -1
package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +59 -0
package/src/__tests__/conversation-workspace-injection.test.ts +1 -7
package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -7
package/src/__tests__/filing-service.test.ts +2 -19
package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +10 -26
package/src/__tests__/injector-chain.test.ts +24 -16
package/src/__tests__/injector-pkb-v2-silenced.test.ts +10 -7
package/src/__tests__/lifecycle-memory-v2-seed.test.ts +154 -67
package/src/__tests__/notification-decision-fallback.test.ts +91 -0
package/src/__tests__/notification-decision-strategy.test.ts +22 -0
package/src/__tests__/oauth-cli.test.ts +121 -0
package/src/__tests__/relay-server.test.ts +46 -2
package/src/__tests__/secret-prompt-log-hygiene.test.ts +7 -5
package/src/__tests__/secret-prompter-channel-fallback.test.ts +7 -5
package/src/__tests__/secret-response-routing.test.ts +7 -5
package/src/__tests__/server-history-render.test.ts +82 -0
package/src/__tests__/skill-include-graph.test.ts +31 -0
package/src/__tests__/skill-load-tool.test.ts +44 -16
package/src/__tests__/skills.test.ts +39 -0
package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -42
package/src/__tests__/tool-executor.test.ts +155 -0
package/src/__tests__/voice-session-bridge.test.ts +3 -0
package/src/__tests__/workspace-migration-069-seed-onboarding-threads.test.ts +120 -0
package/src/__tests__/workspace-migration-071-remove-safe-storage-release-note.test.ts +206 -0
package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +15 -27
package/src/agent/loop.ts +11 -0
package/src/approvals/guardian-decision-primitive.ts +0 -13
package/src/approvals/guardian-request-resolvers.ts +4 -32
package/src/calls/call-constants.ts +5 -8
package/src/calls/call-controller.ts +130 -67
package/src/calls/relay-server.ts +7 -1
package/src/calls/voice-session-bridge.ts +1 -1
package/src/cli/commands/memory-v2.ts +7 -7
package/src/cli/commands/oauth/__tests__/connect.test.ts +0 -254
package/src/cli/commands/oauth/connect.ts +10 -52
package/src/config/bundled-skills/app-builder/SKILL.md +1 -3
package/src/config/feature-flag-registry.json +1 -17
package/src/config/loader.ts +72 -19
package/src/config/schemas/memory-v2.ts +1 -1
package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +32 -0
package/src/daemon/conversation-agent-loop-handlers.ts +32 -0
package/src/daemon/conversation-agent-loop.ts +13 -10
package/src/daemon/conversation-lifecycle.ts +22 -8
package/src/daemon/conversation-surfaces.ts +16 -14
package/src/daemon/conversation-tool-setup.ts +9 -5
package/src/daemon/conversation.ts +1 -1
package/src/daemon/handlers/shared.ts +26 -0
package/src/daemon/host-bash-proxy.ts +1 -1
package/src/daemon/host-browser-proxy.ts +1 -1
package/src/daemon/host-cu-proxy.ts +1 -1
package/src/daemon/host-file-proxy.ts +1 -1
package/src/daemon/host-transfer-proxy.ts +2 -2
package/src/daemon/lifecycle.ts +88 -73
package/src/daemon/memory-v2-startup.ts +55 -14
package/src/daemon/message-types/messages.ts +19 -1
package/src/documents/document-store.ts +35 -1
package/src/filing/filing-service.ts +2 -3
package/src/heartbeat/heartbeat-service.ts +1 -1
package/src/ipc/assistant-server.ts +93 -36
package/src/ipc/skill-server.ts +99 -42
package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +10 -57
package/src/memory/context-search/sources/memory-v2.ts +1 -17
package/src/memory/context-search/sources/memory.ts +2 -2
package/src/memory/context-search/sources/pkb.ts +2 -3
package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +104 -61
package/src/memory/graph/__tests__/handle-remember-v2.test.ts +11 -26
package/src/memory/graph/conversation-graph-memory.ts +32 -9
package/src/memory/graph/graph-search.test.ts +6 -5
package/src/memory/graph/graph-search.ts +3 -4
package/src/memory/graph/retriever.test.ts +12 -7
package/src/memory/graph/retriever.ts +4 -5
package/src/memory/graph/tool-handlers.ts +3 -4
package/src/memory/graph/tools.ts +4 -4
package/src/memory/indexer.ts +1 -2
package/src/memory/jobs/__tests__/embed-concept-page.test.ts +116 -0
package/src/memory/jobs/embed-concept-page.ts +223 -87
package/src/memory/jobs-worker.ts +8 -4
package/src/memory/pkb/pkb-search.test.ts +6 -5
package/src/memory/pkb/pkb-search.ts +4 -5
package/src/memory/qdrant-client.ts +3 -0
package/src/memory/search/semantic.ts +4 -5
package/src/memory/v2/__tests__/activation.test.ts +35 -5
package/src/memory/v2/__tests__/consolidation-job.test.ts +21 -32
package/src/memory/v2/__tests__/injection.test.ts +140 -23
package/src/memory/v2/__tests__/qdrant.test.ts +310 -9
package/src/memory/v2/__tests__/sim.test.ts +118 -7
package/src/memory/v2/__tests__/static-context.test.ts +1 -13
package/src/memory/v2/__tests__/sweep-job.test.ts +19 -33
package/src/memory/v2/consolidation-job.ts +7 -8
package/src/memory/v2/injection.ts +32 -12
package/src/memory/v2/page-store.ts +39 -0
package/src/memory/v2/prompts/consolidation.ts +5 -0
package/src/memory/v2/qdrant.ts +209 -48
package/src/memory/v2/sim.ts +67 -26
package/src/memory/v2/static-context.ts +4 -8
package/src/memory/v2/sweep-job.ts +5 -6
package/src/memory/v2/types.ts +7 -0
package/src/notifications/copy-composer.ts +46 -12
package/src/notifications/decision-engine.ts +46 -0
package/src/permissions/gateway-threshold-reader.ts +116 -8
package/src/permissions/prompter.ts +86 -96
package/src/permissions/secret-prompter.ts +31 -31
package/src/plugins/defaults/injectors.ts +1 -2
package/src/proactive-artifact/job.test.ts +51 -4
package/src/proactive-artifact/job.ts +16 -2
package/src/proactive-artifact/message-copy.ts +18 -1
package/src/prompts/templates/SOUL.md +13 -28
package/src/runtime/auth/route-policy.ts +1 -0
package/src/runtime/channel-approvals.ts +3 -2
package/src/runtime/guardian-reply-router.ts +0 -10
package/src/runtime/pending-interactions.ts +19 -15
package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +147 -0
package/src/runtime/routes/approval-routes.ts +7 -3
package/src/runtime/routes/consolidation-routes.ts +8 -9
package/src/runtime/routes/conversation-query-routes.ts +44 -1
package/src/runtime/routes/debug-bash-routes.ts +2 -0
package/src/runtime/routes/filing-routes.ts +2 -3
package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +0 -3
package/src/runtime/routes/memory-item-routes.test.ts +3 -9
package/src/runtime/routes/memory-item-routes.ts +5 -6
package/src/runtime/routes/memory-v2-routes.ts +103 -17
package/src/skills/include-graph.ts +35 -13
package/src/tools/document/document-tool.ts +20 -0
package/src/tools/executor.ts +18 -2
package/src/tools/memory/register.test.ts +7 -5
package/src/tools/permission-checker.ts +15 -0
package/src/tools/skills/load.ts +24 -20
package/src/tools/tool-name-aliases.ts +19 -0
package/src/tools/types.ts +19 -1
package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +4 -62
package/src/workspace/migrations/069-seed-onboarding-threads.ts +28 -0
package/src/workspace/migrations/070-memory-v2-summary-schema-rebuild.ts +31 -0
package/src/workspace/migrations/071-remove-safe-storage-release-note.ts +111 -0
package/src/workspace/migrations/registry.ts +6 -0

package/src/__tests__/conversation-process-callsite.test.ts CHANGED Viewed

@@ -12,14 +12,8 @@
  */
 import { describe, expect, mock, test } from "bun:test";
-import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
 import type { Message, ProviderResponse } from "../providers/types.js";
-// This test exercises v1 conversation routing. The `memory-v2-enabled` flag
-// (registry default `true`) flips memory routing to v2 — disable it here so
-// the v1 paths under test stay active.
-_setOverridesForTesting({ "memory-v2-enabled": false });
 // Use an object wrapper so TypeScript doesn't narrow the captured type to
 // `undefined` based on the initial assignment in the test setup.
 const captured: {
@@ -83,6 +77,7 @@ mock.module("../config/loader.js", () => ({
       pricingOverrides: [],
     },
     rateLimit: { maxRequestsPerMinute: 0 },
+    memory: { v2: { enabled: false } },
     daemon: {
       startupSocketWaitMs: 5000,
       stopTimeoutMs: 5000,

package/src/__tests__/conversation-provider-retry-repair.test.ts CHANGED Viewed

@@ -1,17 +1,11 @@
 import { beforeEach, describe, expect, mock, test } from "bun:test";
 import type { AgentEvent } from "../agent/loop.js";
-import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
 import type { UserMessageAttachment } from "../daemon/message-protocol.js";
 import { resetPluginRegistryAndRegisterDefaults } from "../plugins/defaults/index.js";
 import type { Message, ProviderResponse } from "../providers/types.js";
 import { ProviderError } from "../util/errors.js";
-// This test exercises v1 conversation routing. The `memory-v2-enabled` flag
-// (registry default `true`) flips memory routing to v2 — disable it here so
-// the v1 paths under test stay active.
-_setOverridesForTesting({ "memory-v2-enabled": false });
 mock.module("../util/logger.js", () => ({
   getLogger: () =>
     new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
@@ -63,6 +57,7 @@ mock.module("../config/loader.js", () => ({
       pricingOverrides: [],
     },
     rateLimit: { maxRequestsPerMinute: 0 },
+    memory: { v2: { enabled: false } },
     services: {
       inference: {
         mode: "your-own",

package/src/__tests__/conversation-runtime-assembly.test.ts CHANGED Viewed

@@ -1,11 +1,20 @@
 import { beforeEach, describe, expect, mock, test } from "bun:test";
-import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
-// This test exercises v1 conversation routing. The `memory-v2-enabled` flag
-// (registry default `true`) flips memory routing to v2 — disable it here so
-// the v1 paths under test stay active.
-_setOverridesForTesting({ "memory-v2-enabled": false });
+// This test exercises v1 PKB injection. `config.memory.v2.enabled` (default
+// `true`) makes the PKB injector go silent — force it off here so the v1
+// injection chain assertions stay meaningful.
+const realLoaderForAssemblyTest = await import("../config/loader.js");
+const realGetConfigForAssemblyTest = realLoaderForAssemblyTest.getConfig;
+mock.module("../config/loader.js", () => ({
+  ...realLoaderForAssemblyTest,
+  getConfig: () => {
+    const real = realGetConfigForAssemblyTest();
+    return {
+      ...real,
+      memory: { ...real.memory, v2: { ...real.memory.v2, enabled: false } },
+    };
+  },
+}));
 // PKB search is mocked so the reminder-hints tests can assert behavior
 // without standing up Qdrant. The mock returns whatever is staged in

package/src/__tests__/conversation-slash-unknown.test.ts CHANGED Viewed

@@ -5,15 +5,9 @@ import type {
   CheckpointDecision,
   CheckpointInfo,
 } from "../agent/loop.js";
-import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
 import type { ServerMessage } from "../daemon/message-protocol.js";
 import type { Message, ProviderResponse } from "../providers/types.js";
-// This test exercises v1 conversation routing. The `memory-v2-enabled` flag
-// (registry default `true`) flips memory routing to v2 — disable it here so
-// the v1 paths under test stay active.
-_setOverridesForTesting({ "memory-v2-enabled": false });
 // ---------------------------------------------------------------------------
 // Mocks — must precede the Conversation import so Bun applies them at load time.
 // ---------------------------------------------------------------------------
@@ -66,6 +60,7 @@ mock.module("../config/loader.js", () => ({
       pricingOverrides: [],
     },
     rateLimit: { maxRequestsPerMinute: 0 },
+    memory: { v2: { enabled: false } },
     daemon: {
       startupSocketWaitMs: 5000,
       stopTimeoutMs: 5000,

package/src/__tests__/conversation-surfaces-action-delivery.test.ts CHANGED Viewed

@@ -1,13 +1,19 @@
-import { describe, expect, test } from "bun:test";
-import {
-  createSurfaceMutex,
-  handleSurfaceAction,
-  type SurfaceConversationContext,
-  surfaceProxyResolver,
-} from "../daemon/conversation-surfaces.js";
+import { beforeEach, describe, expect, mock, test } from "bun:test";
+import type { ServerMessage } from "../daemon/message-protocol.js";
+let broadcastedMessages: ServerMessage[] = [];
+const realEventHub = await import("../runtime/assistant-event-hub.js");
+mock.module("../runtime/assistant-event-hub.js", () => ({
+  ...realEventHub,
+  broadcastMessage: (msg: ServerMessage) => broadcastedMessages.push(msg),
+}));
+const { createSurfaceMutex, handleSurfaceAction, surfaceProxyResolver } =
+  await import("../daemon/conversation-surfaces.js");
+import type { SurfaceConversationContext } from "../daemon/conversation-surfaces.js";
 import type {
-  ServerMessage,
   SurfaceData,
   SurfaceType,
   UiSurfaceShow,
@@ -81,6 +87,10 @@ function makeContext(sent: ServerMessage[] = []): SurfaceConversationContext & {
 }
 describe("surface action delivery to assistant", () => {
+  beforeEach(() => {
+    broadcastedMessages = [];
+  });
   test("table action button click triggers processMessage with action content", async () => {
     const sent: ServerMessage[] = [];
     const ctx = makeContext(sent);
@@ -199,4 +209,155 @@ describe("surface action delivery to assistant", () => {
       "[User action on app:",
     );
   });
+  test("confirmation surface broadcasts ui_surface_complete on action", async () => {
+    const sent: ServerMessage[] = [];
+    const ctx = makeContext(sent);
+    const showResult = await surfaceProxyResolver(ctx, "ui_show", {
+      surface_type: "confirmation",
+      title: "Delete files?",
+      data: {
+        message: "This will permanently delete 3 files.",
+        confirmLabel: "Delete",
+        cancelLabel: "Keep",
+      },
+    });
+    expect(showResult.isError).toBe(false);
+    expect(showResult.yieldToUser).toBe(true);
+    const showMessage = sent.find(
+      (msg): msg is UiSurfaceShow => msg.type === "ui_surface_show",
+    ) as UiSurfaceShow;
+    const surfaceId = showMessage.surfaceId;
+    expect(ctx.pendingSurfaceActions.has(surfaceId)).toBe(true);
+    await handleSurfaceAction(ctx, surfaceId, "confirm", {});
+    const completeMsg = broadcastedMessages.find(
+      (m) =>
+        (m as unknown as Record<string, unknown>).type ===
+          "ui_surface_complete" &&
+        (m as unknown as Record<string, unknown>).surfaceId === surfaceId,
+    ) as unknown as Record<string, unknown> | undefined;
+    expect(completeMsg).toBeDefined();
+    expect(completeMsg?.conversationId).toBe("conv-1");
+    expect(completeMsg?.summary).toContain("Delete");
+  });
+  test("file_upload surface broadcasts ui_surface_complete on action", async () => {
+    const sent: ServerMessage[] = [];
+    const ctx = makeContext(sent);
+    const showResult = await surfaceProxyResolver(ctx, "ui_show", {
+      surface_type: "file_upload",
+      title: "Upload documents",
+      data: { accept: ".pdf,.docx", maxFiles: 5 },
+    });
+    expect(showResult.isError).toBe(false);
+    expect(showResult.yieldToUser).toBe(true);
+    const showMessage = sent.find(
+      (msg): msg is UiSurfaceShow => msg.type === "ui_surface_show",
+    ) as UiSurfaceShow;
+    const surfaceId = showMessage.surfaceId;
+    expect(ctx.pendingSurfaceActions.has(surfaceId)).toBe(true);
+    await handleSurfaceAction(ctx, surfaceId, "submit", {
+      files: [
+        {
+          filename: "doc.pdf",
+          mimeType: "application/pdf",
+          data: "base64encodedcontent",
+        },
+      ],
+    });
+    const completeMsg = broadcastedMessages.find(
+      (m) =>
+        (m as unknown as Record<string, unknown>).type ===
+          "ui_surface_complete" &&
+        (m as unknown as Record<string, unknown>).surfaceId === surfaceId,
+    ) as unknown as Record<string, unknown> | undefined;
+    expect(completeMsg).toBeDefined();
+    expect(completeMsg?.conversationId).toBe("conv-1");
+  });
+  test("file_upload completion event does not include base64 file blobs", async () => {
+    const sent: ServerMessage[] = [];
+    const ctx = makeContext(sent);
+    await surfaceProxyResolver(ctx, "ui_show", {
+      surface_type: "file_upload",
+      title: "Upload",
+      data: { accept: "*" },
+    });
+    const showMessage = sent.find(
+      (msg): msg is UiSurfaceShow => msg.type === "ui_surface_show",
+    ) as UiSurfaceShow;
+    const surfaceId = showMessage.surfaceId;
+    const largeBase64 = "A".repeat(10_000);
+    await handleSurfaceAction(ctx, surfaceId, "submit", {
+      files: [
+        {
+          filename: "big.pdf",
+          mimeType: "application/pdf",
+          data: largeBase64,
+        },
+      ],
+    });
+    const completeMsg = broadcastedMessages.find(
+      (m) =>
+        (m as unknown as Record<string, unknown>).type ===
+          "ui_surface_complete" &&
+        (m as unknown as Record<string, unknown>).surfaceId === surfaceId,
+    ) as unknown as Record<string, unknown> | undefined;
+    expect(completeMsg).toBeDefined();
+    const submittedData = completeMsg?.submittedData as
+      | Record<string, unknown>
+      | undefined;
+    // The files array with base64 blobs should be stripped from the
+    // completion event — only the sanitized payload (without files) is sent.
+    expect(submittedData?.files).toBeUndefined();
+    // The raw base64 content should not appear anywhere in the event
+    expect(JSON.stringify(completeMsg)).not.toContain(largeBase64);
+  });
+  test("table surface does NOT broadcast ui_surface_complete (not one-shot)", async () => {
+    const sent: ServerMessage[] = [];
+    const ctx = makeContext(sent);
+    await surfaceProxyResolver(ctx, "ui_show", {
+      surface_type: "table",
+      title: "Items",
+      data: {
+        columns: [{ id: "name", label: "Name" }],
+        rows: [{ id: "r1", cells: { name: "Item 1" } }],
+      },
+      actions: [{ id: "select", label: "Select" }],
+    });
+    const showMessage = sent.find(
+      (msg): msg is UiSurfaceShow => msg.type === "ui_surface_show",
+    ) as UiSurfaceShow;
+    const surfaceId = showMessage.surfaceId;
+    broadcastedMessages = [];
+    await handleSurfaceAction(ctx, surfaceId, "select", {
+      selectedIds: ["r1"],
+    });
+    const completeMsg = broadcastedMessages.find(
+      (m) =>
+        (m as unknown as Record<string, unknown>).type ===
+        "ui_surface_complete",
+    );
+    expect(completeMsg).toBeUndefined();
+  });
 });

package/src/__tests__/conversation-surfaces-data-persist.test.ts CHANGED Viewed

@@ -1,5 +1,14 @@
 import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
+import type { ServerMessage } from "../daemon/message-protocol.js";
+const realEventHub = await import("../runtime/assistant-event-hub.js");
+mock.module("../runtime/assistant-event-hub.js", () => ({
+  ...realEventHub,
+  broadcastMessage: (_msg: ServerMessage) => {},
+}));
 // Mock the persistence layer the surface helpers reach into so we can
 // observe writes without touching SQLite. We swap this out per test by
 // re-assigning the spies recorded on the closure below.
@@ -29,15 +38,16 @@ const {
   flushPendingSurfaceDataPersists,
   createSurfaceMutex,
   flushSurfaceDataPersist,
+  handleSurfaceAction,
   markSurfaceCompleted,
   scheduleSurfaceDataPersist,
+  showStandaloneSurface,
   surfaceProxyResolver,
 } = await import("../daemon/conversation-surfaces.js");
 import type { SurfaceConversationContext } from "../daemon/conversation-surfaces.js";
 import type {
   CardSurfaceData,
-  ServerMessage,
   SurfaceData,
   SurfaceType,
 } from "../daemon/message-protocol.js";
@@ -60,6 +70,8 @@ function makeContext(sent: ServerMessage[] = []): SurfaceConversationContext {
     accumulatedSurfaceState: new Map<string, Record<string, unknown>>(),
     surfaceActionRequestIds: new Set<string>(),
     currentTurnSurfaces: [],
+    pendingStandaloneSurfaces: new Map(),
+    recentlyCompletedStandaloneSurfaces: new Map(),
     isProcessing: () => false,
     enqueueMessage: () => ({ queued: false, requestId: "req-1" }),
     getQueueDepth: () => 0,
@@ -402,3 +414,63 @@ describe("ui_surface_update persistence", () => {
     cancelPendingSurfaceDataPersists("conv-other");
   });
 });
+describe("standalone surface DB persistence", () => {
+  let writes: Array<{ id: string; content: unknown }> = [];
+  beforeEach(() => {
+    writes = [];
+    updateMessageContentSpy = (id: string, content: string) => {
+      writes.push({ id, content: JSON.parse(content) });
+    };
+    getMessagesImpl = () => [];
+    cancelPendingSurfaceDataPersists();
+  });
+  afterEach(() => {
+    cancelPendingSurfaceDataPersists();
+  });
+  test("standalone surface action persists completed state to DB", async () => {
+    const ctx = makeContext();
+    const surfaceId = "standalone-persist-1";
+    seedRows([
+      {
+        id: "msg-standalone",
+        content: [
+          { type: "text", text: "confirm this" },
+          {
+            type: "ui_surface",
+            surfaceId,
+            surfaceType: "confirmation",
+            data: { message: "Proceed?" },
+          },
+        ],
+      },
+    ]);
+    const resultPromise = showStandaloneSurface(
+      ctx,
+      {
+        conversationId: "conv-persist-1",
+        surfaceType: "confirmation",
+        data: { message: "Proceed?" },
+        timeoutMs: 60_000,
+      },
+      surfaceId,
+    );
+    await handleSurfaceAction(ctx, surfaceId, "confirm", {});
+    const result = await resultPromise;
+    expect(result.status).toBe("submitted");
+    expect(writes.length).toBeGreaterThanOrEqual(1);
+    const finalBlocks = writes[writes.length - 1].content as Array<
+      Record<string, unknown>
+    >;
+    const surfaceBlock = finalBlocks.find((b) => b.type === "ui_surface")!;
+    expect(surfaceBlock.completed).toBe(true);
+    expect(surfaceBlock.completionSummary).toBe("Confirmed");
+  });
+});

package/src/__tests__/conversation-tool-setup-app-refresh.test.ts CHANGED Viewed

@@ -264,6 +264,65 @@ describe("session-tool-setup app refresh side effects", () => {
       });
     });
+    test("canonicalizes create_app skill_execute alias before hooks run", async () => {
+      const ctx = makeCtx({ allowedToolNames: new Set(["app_create"]) });
+      const executor = makeFakeExecutor({
+        content: JSON.stringify({ id: "alias-app-1", name: "Alias App" }),
+        isError: false,
+      });
+      const toolFn = createToolExecutor(
+        executor as unknown as ToolExecutor,
+        noopPrompter,
+        noopSecretPrompter,
+        ctx,
+        noopLifecycleHandler,
+      );
+      await toolFn("skill_execute", {
+        tool: "create_app",
+        input: { name: "Alias App" },
+        activity: "Building app",
+      });
+      const calls = executor.execute.mock.calls as unknown[][];
+      expect(calls[0][0]).toBe("app_create");
+      expect(calls[0][1]).toEqual({ name: "Alias App" });
+      expect(broadcastSpy.mock.calls.length).toBeGreaterThanOrEqual(1);
+      expect((broadcastSpy.mock.calls as unknown[][])[0][0]).toEqual({
+        type: "app_files_changed",
+        appId: "alias-app-1",
+      });
+    });
+    test("preserves exact active create_app skill tool when app_create is also active", async () => {
+      const ctx = makeCtx({
+        allowedToolNames: new Set(["create_app", "app_create"]),
+      });
+      const executor = makeFakeExecutor({
+        content: JSON.stringify({ id: "custom-app-1", name: "Custom App" }),
+        isError: false,
+      });
+      const toolFn = createToolExecutor(
+        executor as unknown as ToolExecutor,
+        noopPrompter,
+        noopSecretPrompter,
+        ctx,
+        noopLifecycleHandler,
+      );
+      await toolFn("skill_execute", {
+        tool: "create_app",
+        input: { name: "Custom App" },
+        activity: "Running custom app tool",
+      });
+      const calls = executor.execute.mock.calls as unknown[][];
+      expect(calls[0][0]).toBe("create_app");
+      expect(broadcastSpy).not.toHaveBeenCalled();
+    });
     test("skips side effects when app_create result is an error", async () => {
       const ctx = makeCtx();
       const executor = makeFakeExecutor({ content: "Error", isError: true });

package/src/__tests__/conversation-workspace-injection.test.ts CHANGED Viewed

@@ -1,14 +1,8 @@
 import { beforeEach, describe, expect, mock, test } from "bun:test";
 import type { AgentEvent } from "../agent/loop.js";
-import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
 import type { Message, ProviderResponse } from "../providers/types.js";
-// This test exercises v1 conversation routing. The `memory-v2-enabled` flag
-// (registry default `true`) flips memory routing to v2 — disable it here so
-// the v1 paths under test stay active.
-_setOverridesForTesting({ "memory-v2-enabled": false });
 // ---------------------------------------------------------------------------
 // Track agent loop calls
 // ---------------------------------------------------------------------------
@@ -69,7 +63,7 @@ mock.module("../config/loader.js", () => ({
       pricingOverrides: [],
     },
     rateLimit: { maxRequestsPerMinute: 0 },
-    memory: { enabled: false },
+    memory: { enabled: false, v2: { enabled: false } },
     daemon: {
       startupSocketWaitMs: 5000,
       stopTimeoutMs: 5000,

package/src/__tests__/conversation-workspace-tool-tracking.test.ts CHANGED Viewed

@@ -1,14 +1,8 @@
 import { beforeEach, describe, expect, mock, test } from "bun:test";
 import type { AgentEvent } from "../agent/loop.js";
-import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
 import type { Message, ProviderResponse } from "../providers/types.js";
-// This test exercises v1 conversation routing. The `memory-v2-enabled` flag
-// (registry default `true`) flips memory routing to v2 — disable it here so
-// the v1 paths under test stay active.
-_setOverridesForTesting({ "memory-v2-enabled": false });
 // ---------------------------------------------------------------------------
 // Configurable agent loop behavior
 // ---------------------------------------------------------------------------
@@ -67,7 +61,7 @@ mock.module("../config/loader.js", () => ({
       pricingOverrides: [],
     },
     rateLimit: { maxRequestsPerMinute: 0 },
-    memory: { enabled: false },
+    memory: { enabled: false, v2: { enabled: false } },
     daemon: {
       startupSocketWaitMs: 5000,
       stopTimeoutMs: 5000,

package/src/__tests__/filing-service.test.ts CHANGED Viewed

@@ -34,9 +34,6 @@ mock.module("../config/loader.js", () => ({
   invalidateConfigCache: () => {},
 }));
-const { _setOverridesForTesting } =
-  await import("../config/assistant-feature-flags.js");
 // Mock conversation store
 const createdConversations: Array<{ title: string; conversationType: string }> =
   [];
@@ -128,7 +125,6 @@ describe("FilingService", () => {
     } catch {
       // best-effort
     }
-    _setOverridesForTesting({});
   });
   beforeEach(() => {
@@ -331,8 +327,7 @@ describe("FilingService", () => {
   });
   describe("memory v2 gate", () => {
-    test("start() does not schedule timers when v2 flag and config are both on", () => {
-      _setOverridesForTesting({ "memory-v2-enabled": true });
+    test("start() does not schedule timers when memory.v2.enabled is true", () => {
       mockConfig.memory.v2.enabled = true;
       const service = createService();
@@ -342,24 +337,12 @@ describe("FilingService", () => {
       expect(service.nextCompactionAt).toBeNull();
     });
-    test("start() does not schedule timers when only the flag is on", () => {
-      _setOverridesForTesting({ "memory-v2-enabled": true });
+    test("start() schedules timers when memory.v2.enabled is false (v1 filing runs)", () => {
       mockConfig.memory.v2.enabled = false;
       const service = createService();
       service.start();
-      expect(service.nextRunAt).toBeNull();
-      expect(service.nextCompactionAt).toBeNull();
-    });
-    test("start() schedules timers when only the config is on", () => {
-      _setOverridesForTesting({ "memory-v2-enabled": false });
-      mockConfig.memory.v2.enabled = true;
-      const service = createService();
-      service.start();
       expect(service.nextRunAt).not.toBeNull();
       expect(service.nextCompactionAt).not.toBeNull();
       service.stop();

package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts CHANGED Viewed

@@ -5,17 +5,16 @@
  * One representative call site (the `installSkill` bundled branch) is
  * exercised — all 5 sites share the same delegation to
  * `maybeSeedMemoryV2Skills`, so a single suite covers behavior. Validates:
- *   - flag + config both on → helper invoked after seedSkillGraphNodes
- *     and the seed observed (callOrder picks up "v2")
- *   - flag off → helper still invoked, but the seed short-circuits
- *   - config.memory.v2.enabled off → helper still invoked, seed short-circuits
+ *   - config on → helper invoked after seedSkillGraphNodes and the seed
+ *     observed (callOrder picks up "v2")
+ *   - config off → helper still invoked, but the seed short-circuits
  *
  * The handler delegates to `maybeSeedMemoryV2Skills` from
  * `daemon/memory-v2-startup.ts`. We mock that module directly so the test
  * does not have to drain the dynamic-import microtask chain. The helper's
- * gate semantics (flag + config + rejection swallowing) are covered by
- * `lifecycle-memory-v2-seed.test.ts`; here we only verify that the
- * handler invokes the helper synchronously with the live config.
+ * gate semantics are covered by `lifecycle-memory-v2-seed.test.ts`; here
+ * we only verify that the handler invokes the helper synchronously with
+ * the live config.
  */
 import { beforeEach, describe, expect, mock, test } from "bun:test";
@@ -23,7 +22,7 @@ import { beforeEach, describe, expect, mock, test } from "bun:test";
 // Programmable test state
 // ---------------------------------------------------------------------------
-const flagsState = { flagEnabled: true, configV2Enabled: true };
+const flagsState = { configV2Enabled: true };
 const callOrder: string[] = [];
@@ -55,10 +54,7 @@ mock.module("../config/skills.js", () => ({
 }));
 mock.module("../config/assistant-feature-flags.js", () => ({
-  isAssistantFeatureFlagEnabled: (key: string) => {
-    if (key === "memory-v2-enabled") return flagsState.flagEnabled;
-    return true;
-  },
+  isAssistantFeatureFlagEnabled: () => true,
 }));
 // Stub both `getConfig` and `loadConfig`. `loadConfig` is reached by code
@@ -220,18 +216,17 @@ const { installSkill } = await import("../daemon/handlers/skills.js");
 describe("v2 skill re-seed gating in skill handlers", () => {
   beforeEach(() => {
-    flagsState.flagEnabled = true;
     flagsState.configV2Enabled = true;
     callOrder.length = 0;
     mockSeedSkillGraphNodes.mockClear();
     mockMaybeSeedMemoryV2Skills.mockClear();
     mockMaybeSeedMemoryV2Skills.mockImplementation((config) => {
-      if (!flagsState.flagEnabled || !config.memory.v2.enabled) return;
+      if (!config.memory.v2.enabled) return;
       callOrder.push("v2");
     });
   });
-  test("flag + config both on → maybeSeedMemoryV2Skills invoked after seedSkillGraphNodes", async () => {
+  test("config on → maybeSeedMemoryV2Skills invoked after seedSkillGraphNodes", async () => {
     const result = await installSkill({ slug: "bundled-skill" });
     expect(result.success).toBe(true);
@@ -240,17 +235,6 @@ describe("v2 skill re-seed gating in skill handlers", () => {
     expect(callOrder).toEqual(["v1", "v2"]);
   });
-  test("flag off → seed mock observes the disabled flag and skips", async () => {
-    flagsState.flagEnabled = false;
-    const result = await installSkill({ slug: "bundled-skill" });
-    expect(result.success).toBe(true);
-    expect(mockSeedSkillGraphNodes).toHaveBeenCalledTimes(1);
-    expect(mockMaybeSeedMemoryV2Skills).toHaveBeenCalledTimes(1);
-    expect(callOrder).toEqual(["v1"]);
-  });
   test("config.memory.v2.enabled off → seed mock observes config and skips", async () => {
     flagsState.configV2Enabled = false;