npm - @vellumai/assistant - Versions diffs - 0.10.3 → 0.10.4-staging.1 - Mend

@vellumai/assistant 0.10.3 → 0.10.4-staging.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (239) hide show

package/openapi.yaml +73 -56
package/package.json +1 -1
package/src/__tests__/actor-trust-resolver-address-fallback.test.ts +83 -31
package/src/__tests__/assistant-stream-state.test.ts +3 -76
package/src/__tests__/background-workers-disk-pressure.test.ts +4 -2
package/src/__tests__/channel-approval-routes.test.ts +21 -26
package/src/__tests__/channel-delivery-store.test.ts +28 -0
package/src/__tests__/channel-guardian.test.ts +82 -32
package/src/__tests__/channel-inbound-disk-pressure.test.ts +11 -19
package/src/__tests__/channel-reply-delivery.test.ts +6 -2
package/src/__tests__/compaction-ledger-store.test.ts +128 -0
package/src/__tests__/config-loader-backfill.test.ts +148 -0
package/src/__tests__/consult-deadline.test.ts +60 -0
package/src/__tests__/contact-store-interaction-info.test.ts +156 -0
package/src/__tests__/contact-store-user-file.test.ts +7 -10
package/src/__tests__/contacts-relay-reads.test.ts +6 -9
package/src/__tests__/contacts-write.test.ts +0 -2
package/src/__tests__/conversation-agent-loop-overflow.test.ts +4 -2
package/src/__tests__/conversation-agent-loop.test.ts +98 -7
package/src/__tests__/conversation-attention-telegram.test.ts +9 -11
package/src/__tests__/conversation-error.test.ts +18 -0
package/src/__tests__/conversation-fork-crud.test.ts +354 -24
package/src/__tests__/conversation-title-service.test.ts +222 -201
package/src/__tests__/db-compaction-events-migration.test.ts +129 -0
package/src/__tests__/delete-propagation.test.ts +5 -3
package/src/__tests__/dm-backfill.test.ts +6 -4
package/src/__tests__/emit-signal-routing-intent.test.ts +2 -6
package/src/__tests__/guardian-binding-drift-heal.test.ts +43 -23
package/src/__tests__/guardian-dispatch.test.ts +50 -5
package/src/__tests__/guardian-routing-state.test.ts +6 -10
package/src/__tests__/helpers/channel-test-adapter.ts +45 -12
package/src/__tests__/helpers/create-guardian-binding.ts +15 -23
package/src/__tests__/helpers/mock-logger.ts +1 -0
package/src/__tests__/helpers/seed-contact-channel.ts +96 -0
package/src/__tests__/inbound-invite-redemption.test.ts +87 -10
package/src/__tests__/invite-redemption-service.test.ts +273 -53
package/src/__tests__/invite-routes-http.test.ts +34 -0
package/src/__tests__/invite-service-ipc.test.ts +65 -2
package/src/__tests__/list-messages-page-latest.test.ts +173 -4
package/src/__tests__/mcp-config-secret-boundary.test.ts +3 -0
package/src/__tests__/non-member-access-request.test.ts +15 -13
package/src/__tests__/onboarding-persona-write.test.ts +52 -22
package/src/__tests__/persist-onboarding-artifacts.test.ts +1 -0
package/src/__tests__/persona-resolver.test.ts +75 -45
package/src/__tests__/plugin-bootstrap.test.ts +13 -5
package/src/__tests__/plugin-disabled-state.test.ts +190 -0
package/src/__tests__/provider-usage-tracking.test.ts +1 -1
package/src/__tests__/reaction-intercept-cold-cache-warm.test.ts +135 -0
package/src/__tests__/reaction-intercept-member-verdict-warm.test.ts +158 -0
package/src/__tests__/reaction-persistence.test.ts +51 -4
package/src/__tests__/relay-server.test.ts +88 -31
package/src/__tests__/runtime-attachment-metadata.test.ts +9 -11
package/src/__tests__/settings-routes.test.ts +32 -0
package/src/__tests__/slack-block-formatting.test.ts +1 -38
package/src/__tests__/sse-actor-principal-guardian-source.test.ts +13 -36
package/src/__tests__/stt-hints.test.ts +6 -3
package/src/__tests__/subagent-fork-prompt-role.test.ts +195 -0
package/src/__tests__/subagent-fork-spawn.test.ts +6 -7
package/src/__tests__/subagent-role-registry.test.ts +17 -4
package/src/__tests__/subagent-spawn-and-await.test.ts +546 -0
package/src/__tests__/subagent-tools.test.ts +398 -3
package/src/__tests__/thread-backfill.test.ts +3 -3
package/src/__tests__/tool-preview-lifecycle.test.ts +26 -10
package/src/__tests__/tool-start-timestamp.test.ts +4 -3
package/src/__tests__/trusted-contact-approval-notifier.test.ts +37 -51
package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +2 -2
package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +9 -7
package/src/__tests__/trusted-contact-multichannel.test.ts +16 -7
package/src/__tests__/trusted-contact-verification.test.ts +79 -54
package/src/__tests__/voice-guardian-cold-cache-warm.test.ts +137 -0
package/src/__tests__/voice-invite-redemption.test.ts +183 -20
package/src/__tests__/workspace-migration-102-preserve-heartbeat-enabled-for-existing-workspaces.test.ts +3 -3
package/src/__tests__/workspace-migration-111-prune-seeded-callsite-defaults.test.ts +2 -2
package/src/__tests__/workspace-migration-112-remove-advisor-callsite-override.test.ts +170 -0
package/src/__tests__/workspace-migration-drop-user-md.test.ts +196 -238
package/src/a2a/__tests__/e2e-a2a-channel.test.ts +35 -47
package/src/agent/loop-exclusive-tool.test.ts +19 -15
package/src/agent/loop-native-web-search.test.ts +200 -0
package/src/agent/loop.ts +108 -1
package/src/api/responses/conversation-message.ts +9 -0
package/src/approvals/guardian-request-resolvers.ts +16 -4
package/src/calls/__tests__/relay-setup-router.test.ts +10 -18
package/src/calls/guardian-dispatch.ts +14 -11
package/src/calls/inbound-trust-reader.ts +7 -1
package/src/calls/relay-access-wait.ts +6 -6
package/src/calls/relay-server.ts +22 -2
package/src/calls/relay-setup-router.ts +10 -10
package/src/cli/commands/__tests__/conversations-slack.test.ts +1 -0
package/src/cli/commands/contacts.ts +10 -7
package/src/cli/commands/memory/__tests__/worker.test.ts +147 -17
package/src/cli/commands/memory/worker.ts +97 -30
package/src/cli/commands/plugins.ts +3 -146
package/src/cli/lib/__tests__/list-installed-plugins.test.ts +17 -17
package/src/cli/lib/__tests__/publish-plugin.test.ts +98 -0
package/src/cli/lib/publish-plugin.ts +231 -1
package/src/config/__tests__/sync-gated-profiles.test.ts +5 -7
package/src/config/bundled-skills/subagent/SKILL.md +16 -1
package/src/config/bundled-skills/subagent/TOOLS.json +5 -4
package/src/config/call-site-defaults.ts +0 -6
package/src/config/llm-resolver.ts +0 -3
package/src/config/schemas/call-site-catalog.ts +0 -7
package/src/config/schemas/heartbeat.ts +2 -5
package/src/config/schemas/llm.ts +3 -12
package/src/config/schemas/memory-lifecycle.ts +1 -1
package/src/config/seed-inference-profiles.ts +76 -35
package/src/config/sync-gated-profiles.ts +0 -3
package/src/contacts/__tests__/contacts-write-revoke-relay.test.ts +7 -8
package/src/contacts/__tests__/member-write-relay.test.ts +35 -11
package/src/contacts/contact-store.ts +27 -237
package/src/contacts/contacts-write.ts +18 -58
package/src/contacts/gateway-channel-read.ts +51 -0
package/src/contacts/member-write-relay.ts +25 -31
package/src/contacts/types.ts +3 -15
package/src/daemon/__tests__/conversation-tool-setup.test.ts +0 -44
package/src/daemon/conversation-agent-loop-handlers.ts +29 -10
package/src/daemon/conversation-agent-loop.ts +68 -61
package/src/daemon/conversation-error.ts +7 -10
package/src/daemon/conversation-tool-setup.ts +0 -10
package/src/daemon/conversation.ts +10 -0
package/src/daemon/external-plugins-bootstrap.ts +8 -2
package/src/daemon/handlers/__tests__/config-a2a-accept.test.ts +0 -1
package/src/daemon/handlers/__tests__/config-a2a-complete.test.ts +0 -2
package/src/daemon/handlers/__tests__/config-a2a-redeem.test.ts +0 -2
package/src/daemon/handlers/__tests__/config-channels.test.ts +9 -14
package/src/daemon/handlers/config-channels.ts +14 -29
package/src/daemon/lifecycle.ts +16 -4
package/src/daemon/message-types/surfaces.ts +2 -0
package/src/heartbeat/heartbeat-service.ts +5 -0
package/src/home/relationship-state-writer.ts +5 -0
package/src/memory/__tests__/embedding-cache.test.ts +136 -0
package/src/memory/compaction-ledger-store.ts +107 -0
package/src/memory/conversation-crud.ts +136 -61
package/src/memory/conversation-title-service.ts +173 -24
package/src/memory/embedding-backend.ts +8 -1
package/src/memory/embedding-cache.ts +139 -0
package/src/memory/jobs-worker.ts +75 -29
package/src/memory/memory-retrospective-job.ts +5 -0
package/src/memory/migrations/209-strip-thinking-from-consolidated.ts +27 -5
package/src/memory/migrations/302-create-compaction-events.ts +107 -0
package/src/memory/migrations/303-add-conversation-creation-seq.ts +33 -0
package/src/memory/migrations/__tests__/209-strip-thinking-from-consolidated.test.ts +79 -6
package/src/memory/schema/contacts.ts +6 -2
package/src/memory/schema/conversations.ts +39 -0
package/src/memory/steps.ts +1090 -367
package/src/memory/worker-control.ts +104 -18
package/src/memory/worker-process.ts +17 -0
package/src/messaging/channel-binding-metadata.ts +31 -0
package/src/messaging/channel-binding-schema.ts +51 -0
package/src/messaging/providers/__tests__/callback-routing.test.ts +45 -0
package/src/messaging/providers/__tests__/transport-dispatch.test.ts +195 -0
package/src/messaging/providers/a2a/__tests__/deliver.test.ts +11 -0
package/src/messaging/providers/a2a/deliver.ts +5 -1
package/src/messaging/providers/a2a/transport.ts +10 -0
package/src/messaging/providers/callback-routing.ts +48 -0
package/src/messaging/providers/channel-transport.ts +55 -0
package/src/messaging/providers/index.ts +65 -241
package/src/messaging/providers/slack/binding-metadata.ts +62 -0
package/src/messaging/providers/slack/transport.ts +92 -0
package/src/messaging/providers/telegram-bot/transport.ts +51 -0
package/src/messaging/providers/whatsapp/transport.ts +38 -0
package/src/notifications/__tests__/broadcaster.test.ts +0 -8
package/src/notifications/__tests__/connected-channels.test.ts +8 -36
package/src/notifications/__tests__/destination-resolver.test.ts +12 -117
package/src/notifications/destination-resolver.ts +7 -23
package/src/notifications/emit-signal.ts +5 -11
package/src/plugins/defaults/index.ts +0 -35
package/src/plugins/defaults/memory-v3-shadow/__tests__/dense.test.ts +11 -0
package/src/plugins/defaults/memory-v3-shadow/__tests__/section-dense-store.test.ts +243 -2
package/src/plugins/defaults/memory-v3-shadow/section-dense-store.ts +167 -14
package/src/plugins/disabled-state.ts +31 -0
package/src/plugins/registry.ts +55 -12
package/src/prompts/persona-resolver.ts +43 -11
package/src/providers/call-site-routing.ts +41 -0
package/src/providers/provider-send-message.ts +6 -0
package/src/providers/ratelimit.ts +6 -0
package/src/providers/registry.ts +1 -1
package/src/providers/retry.ts +6 -0
package/src/providers/types.ts +13 -0
package/src/providers/usage-tracking.ts +6 -0
package/src/runtime/__tests__/guardian-vellum-migration.test.ts +30 -27
package/src/runtime/__tests__/local-principal-trust.test.ts +16 -18
package/src/runtime/__tests__/member-verdict-cache.test.ts +119 -0
package/src/runtime/__tests__/trust-verdict-consumer.test.ts +115 -168
package/src/runtime/access-request-helper.ts +1 -2
package/src/runtime/actor-trust-resolver.ts +44 -17
package/src/runtime/anchored-guardian.test.ts +7 -54
package/src/runtime/anchored-guardian.ts +4 -53
package/src/runtime/assistant-stream-state.ts +12 -74
package/src/runtime/channel-reply-delivery.ts +3 -8
package/src/runtime/guardian-vellum-migration.ts +18 -16
package/src/runtime/invite-redemption-service.ts +25 -10
package/src/runtime/local-actor-identity.test.ts +108 -0
package/src/runtime/local-actor-identity.ts +27 -20
package/src/runtime/member-verdict-cache.ts +0 -0
package/src/runtime/routes/__tests__/contact-routes.test.ts +100 -7
package/src/runtime/routes/__tests__/global-search-routes.test.ts +1 -2
package/src/runtime/routes/__tests__/surface-action-routes.test.ts +2 -1
package/src/runtime/routes/contact-routes.ts +40 -25
package/src/runtime/routes/conversation-list-routes.ts +1 -29
package/src/runtime/routes/conversation-routes.ts +27 -7
package/src/runtime/routes/inbound-stages/acl-enforcement.ts +0 -10
package/src/runtime/routes/inbound-stages/background-dispatch.ts +4 -8
package/src/runtime/routes/inbound-stages/reaction-intercept.ts +19 -0
package/src/runtime/routes/settings-routes.ts +8 -3
package/src/runtime/services/conversation-serializer.ts +6 -49
package/src/runtime/slack-block-formatting.ts +0 -15
package/src/runtime/trust-verdict-consumer.ts +36 -41
package/src/subagent/__tests__/consult-prompt.test.ts +35 -0
package/src/{plugins/defaults/advisor/__tests__/transcript.test.ts → subagent/__tests__/consult-transcript.test.ts} +47 -10
package/src/{plugins/defaults/advisor/steering.ts → subagent/consult-prompt.ts} +17 -39
package/src/{plugins/defaults/advisor/transcript.ts → subagent/consult-transcript.ts} +18 -8
package/src/subagent/index.ts +1 -1
package/src/subagent/manager.ts +245 -33
package/src/subagent/types.ts +8 -1
package/src/tools/registry.ts +10 -3
package/src/tools/subagent/consult-deadline.ts +49 -0
package/src/tools/subagent/spawn.ts +234 -5
package/src/util/logger.ts +9 -0
package/src/util/platform.ts +14 -0
package/src/workspace/migrations/031-drop-user-md.ts +232 -148
package/src/workspace/migrations/112-remove-advisor-callsite-override.ts +64 -0
package/src/workspace/migrations/registry.ts +2 -0
package/src/plugins/defaults/advisor/__tests__/advisor-gate.test.ts +0 -56
package/src/plugins/defaults/advisor/__tests__/advisor-state-store.test.ts +0 -43
package/src/plugins/defaults/advisor/__tests__/agent-loop-integration.test.ts +0 -137
package/src/plugins/defaults/advisor/__tests__/consult.test.ts +0 -314
package/src/plugins/defaults/advisor/__tests__/context-pack-gating.test.ts +0 -106
package/src/plugins/defaults/advisor/__tests__/context-pack.test.ts +0 -60
package/src/plugins/defaults/advisor/__tests__/hooks.test.ts +0 -138
package/src/plugins/defaults/advisor/advisor-gate.ts +0 -29
package/src/plugins/defaults/advisor/advisor-state-store.ts +0 -94
package/src/plugins/defaults/advisor/config.ts +0 -21
package/src/plugins/defaults/advisor/consult.ts +0 -197
package/src/plugins/defaults/advisor/context-pack.ts +0 -288
package/src/plugins/defaults/advisor/hooks/post-model-call.ts +0 -34
package/src/plugins/defaults/advisor/hooks/pre-model-call.ts +0 -30
package/src/plugins/defaults/advisor/hooks/user-prompt-submit.ts +0 -19
package/src/plugins/defaults/advisor/package.json +0 -14
package/src/plugins/defaults/advisor/tools/advisor.ts +0 -92

package/src/memory/conversation-title-service.ts CHANGED Viewed

@@ -7,9 +7,14 @@
  * overwritten, never user-provided custom titles.
  */
-import { getConfiguredProvider } from "../providers/provider-send-message.js";
-import type { Provider } from "../providers/types.js";
-import { runBtwSidechain } from "../runtime/btw-sidechain.js";
+import {
+  createTimeout,
+  extractAllText,
+  extractToolUse,
+  getConfiguredProvider,
+  userMessage as buildUserMessage,
+} from "../providers/provider-send-message.js";
+import type { Provider, ToolDefinition } from "../providers/types.js";
 import { publishConversationTitleChanged } from "../runtime/sync/resource-sync-events.js";
 import { getLogger } from "../util/logger.js";
 import { Mutex } from "../util/mutex.js";
@@ -171,16 +176,7 @@ export async function generateAndPersistConversationTitle(
   }
   const prompt = buildTitlePrompt(context, userMessage, assistantResponse);
-  const result = await runBtwSidechain({
-    content: prompt,
-    provider,
-    systemPrompt: buildTitleSystemPrompt(),
-    tools: [],
-    callSite: "conversationTitle",
-    signal,
-    timeoutMs: 15_000,
-  });
-  const title = normalizeTitle(result.text);
+  const title = await generateTitleViaLLM(provider, prompt, signal);
   if (title) {
     // Re-check replaceability before persisting (race guard)
     const current = getConversation(conversationId);
@@ -318,16 +314,7 @@ export async function regenerateConversationTitle(
   if (!/\n(?:User|Assistant): /.test(prompt)) {
     return { title: conversation.title ?? UNTITLED_FALLBACK, updated: false };
   }
-  const result = await runBtwSidechain({
-    content: prompt,
-    provider,
-    systemPrompt: buildTitleSystemPrompt(),
-    tools: [],
-    callSite: "conversationTitle",
-    signal,
-    timeoutMs: 15_000,
-  });
-  const title = normalizeTitle(result.text);
+  const title = await generateTitleViaLLM(provider, prompt, signal);
   if (title) {
     // Re-check isAutoTitle before persisting (race guard against manual rename)
     const current = getConversation(conversationId);
@@ -396,6 +383,81 @@ function buildTitleSystemPrompt(): string {
   ].join("\n");
 }
+const TITLE_TOOL_NAME = "record_conversation_title";
+/**
+ * Tool the title model is forced to call. Constraining the output to a single
+ * `title` argument keeps weak/fast models (e.g. Haiku-class title models) from
+ * "thinking aloud" or continuing the conversation in the response text —
+ * failure modes that otherwise get captured verbatim as the title
+ * (e.g. "I need to generate a…", "I'll work through these files…").
+ */
+function buildTitleTool(): ToolDefinition {
+  return {
+    name: TITLE_TOOL_NAME,
+    description:
+      "Record the conversation's title. Call this exactly once with a short noun phrase naming the TOPIC — never a sentence, a reply, or any preamble.",
+    input_schema: {
+      type: "object",
+      properties: {
+        title: {
+          type: "string",
+          description:
+            "2–5 words, 40 characters max. A scannable sidebar label naming the topic (e.g. 'Auth Middleware Rewrite', 'Docker Volume Mounts'). No quotes, markdown, or trailing punctuation.",
+        },
+      },
+      required: ["title"],
+    },
+  };
+}
+/**
+ * Run the title LLM call with a forced tool so the model returns a structured
+ * `{ title }` rather than free text. Returns a normalized title, or "" when the
+ * model declines or misbehaves — callers fall back to a deterministic title.
+ *
+ * Forcing the tool is the primary guard against prose leakage; `normalizeTitle`
+ * is the backstop for the text-fallback path and for any provider that ignores
+ * forced `tool_choice`.
+ */
+async function generateTitleViaLLM(
+  provider: Provider,
+  prompt: string,
+  signal?: AbortSignal,
+): Promise<string> {
+  const { signal: timeoutSignal, cleanup } = createTimeout(15_000);
+  const combinedSignal = signal
+    ? AbortSignal.any([signal, timeoutSignal])
+    : timeoutSignal;
+  try {
+    const response = await provider.sendMessage([buildUserMessage(prompt)], {
+      tools: [buildTitleTool()],
+      systemPrompt: buildTitleSystemPrompt(),
+      config: {
+        max_tokens: 256,
+        callSite: "conversationTitle",
+        tool_choice: { type: "tool", name: TITLE_TOOL_NAME },
+        disableCache: true,
+      },
+      signal: combinedSignal,
+    });
+    const toolBlock = extractToolUse(response);
+    const titleInput = toolBlock?.input as { title?: unknown } | undefined;
+    if (
+      toolBlock?.name === TITLE_TOOL_NAME &&
+      typeof titleInput?.title === "string"
+    ) {
+      return normalizeTitle(titleInput.title);
+    }
+    // Provider ignored the forced tool (or the model emitted prose instead of
+    // calling it). Fall back to the response text — `normalizeTitle`'s prose
+    // guard rejects a ramble while keeping a compliant plain-text title.
+    return normalizeTitle(extractAllText(response));
+  } finally {
+    cleanup();
+  }
+}
 function buildTitlePrompt(
   context?: TitleContext,
   userMessage?: string,
@@ -503,13 +565,100 @@ function truncateTitle(title: string): string {
 function normalizeTitle(raw: string): string {
   let title = raw.trim().replace(/^["']|["']$/g, "");
   title = stripMarkdown(title);
-  title = stripThinkingTags(title);
+  title = stripThinkingTags(title).trim();
+  if (!title) return "";
+  // Reject outputs that are the model reasoning aloud or continuing the
+  // conversation instead of naming it (e.g. "I need to generate a…", "I'll
+  // work through these files…"). Callers fall back to a deterministic title.
+  if (looksLikeLeakedProse(title)) {
+    return "";
+  }
   if (META_FAILURE_TITLES.has(title.toLowerCase())) {
     return "";
   }
   return truncateTitle(title);
 }
+/** Reasoning/sentence openers that never start a legitimate topic title. */
+const LEAKED_PROSE_PREFIXES = [
+  "i need to",
+  "i needed to",
+  "i should",
+  "i will",
+  "i'll",
+  "i can ",
+  "i can't",
+  "i cannot",
+  "i'm ",
+  "i am ",
+  "i've ",
+  "i have ",
+  "i'd ",
+  "i would",
+  "let me",
+  "looking at",
+  "based on",
+  "given the",
+  "to generate",
+  "to summarize",
+  "to title",
+  // Subject-led reasoning openers. A bare noun phrase ("The User Interface
+  // Redesign", "The Conversation API") is a valid title, so each subject only
+  // counts as leaked prose when a verb or possessive follows it — marking the
+  // output as a sentence rather than a topic.
+  "the user wants",
+  "the user asked",
+  "the user is",
+  "the user wanted",
+  "the user needs",
+  "the user said",
+  "the user has",
+  "the user would",
+  "the user's request",
+  "the conversation is",
+  "this conversation is",
+  "the conversation appears",
+  "the conversation seems",
+  "the conversation covers",
+  "the conversation discusses",
+  "the assistant should",
+  "the assistant is",
+  "the assistant wants",
+  "the assistant needs",
+  "the title should",
+  "the title is",
+  "the title would",
+  "the title for",
+  "here's ",
+  "here is ",
+  "here are ",
+  "sure,",
+  "okay,",
+  "ok,",
+];
+/**
+ * Heuristic guard for title outputs that are clearly prose — the model
+ * reasoning aloud or replying to the conversation rather than naming it. A real
+ * title is a single-line short noun phrase, so we reject multi-line output,
+ * embedded transcript markers, leading reasoning openers, and sentence-shaped
+ * clauses. Deliberately tight: a false reject only costs a deterministic
+ * fallback title, while a false accept persists a broken one.
+ */
+function looksLikeLeakedProse(title: string): boolean {
+  if (/\n/.test(title)) return true;
+  if (/\b(?:user|assistant)\s*:/i.test(title)) return true;
+  const lower = title.toLowerCase();
+  if (LEAKED_PROSE_PREFIXES.some((prefix) => lower.startsWith(prefix))) {
+    return true;
+  }
+  // Sentence-shaped: terminal punctuation on a multi-word clause.
+  if (/[.?!]$/.test(title) && title.split(/\s+/).length > 5) {
+    return true;
+  }
+  return false;
+}
 /** Strip thinking tags so they don't bleed into generated titles. */
 function stripThinkingTags(text: string): string {
   return text

package/src/memory/embedding-backend.ts CHANGED Viewed

@@ -262,7 +262,14 @@ function getCached(
   return backendCache.get(cacheKey(provider, model, extras));
 }
-function geminiCacheExtras(config: AssistantConfig): string[] {
+/**
+ * The Gemini embedding options that change the output vector for identical
+ * input — task type and output dimensionality — rendered as stable cache-key
+ * fragments. Empty for a default Gemini config and for every non-Gemini
+ * provider. Part of the in-memory vector-cache identity here, and reused by the
+ * v3 section dense store so its persistent cache shares the same identity.
+ */
+export function geminiCacheExtras(config: AssistantConfig): string[] {
   const extras: string[] = [];
   if (config.memory.embeddings.geminiTaskType) {
     extras.push(`task=${config.memory.embeddings.geminiTaskType}`);

package/src/memory/embedding-cache.ts ADDED Viewed

@@ -0,0 +1,139 @@
+// ---------------------------------------------------------------------------
+// Shared dense-embedding cache over the `memory_embeddings` SQLite table
+// ---------------------------------------------------------------------------
+//
+// A read/write pair that caches one dense vector keyed on
+// `(targetType, targetId, provider, model)` alongside the content hash it was
+// embedded from, so callers can skip the embedding-backend round-trip when an
+// input's text is unchanged. The `embed_concept_page` job pioneered this
+// pattern for whole-page bodies; this module factors out the generic mechanics
+// — dim-match gating, legacy-null-hash handling, blob encode/decode, and the
+// upsert on the unique key — so other embedders (e.g. the v3 section dense
+// store) reuse one implementation instead of duplicating it.
+import { randomUUID } from "node:crypto";
+import { and, eq } from "drizzle-orm";
+import { getLogger } from "../util/logger.js";
+import type { getDb } from "./db-connection.js";
+import { blobToVector, vectorToBlob } from "./job-utils.js";
+import { memoryEmbeddings } from "./schema.js";
+const log = getLogger("memory-embedding-cache");
+type MemoryDb = ReturnType<typeof getDb>;
+/** Lookup key for {@link readEmbeddingCache}. */
+export interface EmbeddingCacheKey {
+  targetType: string;
+  targetId: string;
+  provider: string;
+  model: string;
+  /** Configured embedding dimension; a row at a different size is a miss. */
+  expectedDim: number;
+}
+/** A cached dense vector plus the content hash it was embedded from. */
+export interface EmbeddingCacheEntry {
+  dense: number[];
+  contentHash: string;
+}
+/**
+ * Look up a cached dense vector keyed on `(targetType, targetId, provider,
+ * model)`. Returns the row only when the persisted dimensions match
+ * `expectedDim` — a stale row from a previous `vectorSize` is treated as a miss
+ * so the caller re-embeds. A row with a null `contentHash` (legacy/corrupt) is
+ * likewise a miss rather than a key the caller could misalign against.
+ */
+export function readEmbeddingCache(
+  db: MemoryDb,
+  key: EmbeddingCacheKey,
+): EmbeddingCacheEntry | null {
+  const row = db
+    .select({
+      vectorBlob: memoryEmbeddings.vectorBlob,
+      vectorJson: memoryEmbeddings.vectorJson,
+      dimensions: memoryEmbeddings.dimensions,
+      contentHash: memoryEmbeddings.contentHash,
+    })
+    .from(memoryEmbeddings)
+    .where(
+      and(
+        eq(memoryEmbeddings.targetType, key.targetType),
+        eq(memoryEmbeddings.targetId, key.targetId),
+        eq(memoryEmbeddings.provider, key.provider),
+        eq(memoryEmbeddings.model, key.model),
+      ),
+    )
+    .get();
+  if (!row || row.dimensions !== key.expectedDim) return null;
+  if (row.contentHash === null) return null;
+  const dense = row.vectorBlob
+    ? blobToVector(row.vectorBlob as Buffer)
+    : (JSON.parse(row.vectorJson!) as number[]);
+  return { dense, contentHash: row.contentHash };
+}
+/** Parameters for {@link writeEmbeddingCache}. */
+export interface EmbeddingCacheWrite {
+  targetType: string;
+  targetId: string;
+  dense: number[];
+  contentHash: string;
+  provider: string;
+  model: string;
+  now: number;
+}
+/**
+ * Persist a freshly embedded dense vector, upserting on the
+ * `(targetType, targetId, provider, model)` unique key. Best-effort: a write
+ * failure is logged and swallowed so the caller's downstream write still runs.
+ */
+export function writeEmbeddingCache(
+  db: MemoryDb,
+  params: EmbeddingCacheWrite,
+): void {
+  const { targetType, targetId, dense, contentHash, provider, model, now } =
+    params;
+  try {
+    const blobValue = vectorToBlob(dense);
+    db.insert(memoryEmbeddings)
+      .values({
+        id: randomUUID(),
+        targetType,
+        targetId,
+        provider,
+        model,
+        dimensions: dense.length,
+        vectorBlob: blobValue,
+        vectorJson: null,
+        contentHash,
+        createdAt: now,
+        updatedAt: now,
+      })
+      .onConflictDoUpdate({
+        target: [
+          memoryEmbeddings.targetType,
+          memoryEmbeddings.targetId,
+          memoryEmbeddings.provider,
+          memoryEmbeddings.model,
+        ],
+        set: {
+          vectorBlob: blobValue,
+          vectorJson: null,
+          dimensions: dense.length,
+          contentHash,
+          updatedAt: now,
+        },
+      })
+      .run();
+  } catch (err) {
+    log.warn(
+      { err, targetType, targetId },
+      "Failed to write embedding cache row",
+    );
+  }
+}

package/src/memory/jobs-worker.ts CHANGED Viewed

@@ -93,7 +93,11 @@ import {
   memoryV2ConsolidateJob,
 } from "./v2/consolidation-job.js";
 import { memoryV2SweepJob } from "./v2/sweep-job.js";
-import { spawnMemoryWorkerProcess } from "./worker-control.js";
+import {
+  removeSyncRunnerMarker,
+  spawnMemoryWorkerProcess,
+  writeSyncRunnerMarker,
+} from "./worker-control.js";
 const log = getLogger("memory-jobs-worker");
@@ -163,23 +167,33 @@ export interface MemoryJobsWorker {
 }
 /**
- * Start the memory jobs worker using the configured implementation.
+ * Start the daemon's memory jobs worker supervisor.
  *
- * `memory.worker.enabled` selects between two implementations:
- *   - enabled: spawn the worker as a separate OS process (the same path as
- *     `assistant memory worker start`), keeping long-running jobs off the
- *     caller's event loop.
- *   - disabled (default): run the worker in-process on the caller's event
- *     loop.
+ * The daemon always runs the in-process supervisor returned here. The
+ * supervisor owns the synchronous in-process runner and reconciles to
+ * `memory.worker.enabled` on every poll, re-reading the flag from disk so a
+ * runtime change takes effect without a restart:
+ *   - flag off (default): drain the queue in-process and publish the
+ *     sync-runner marker so `status` reports the synchronous runner as going.
+ *   - flag on: stand down (the out-of-process worker owns the queue) and clear
+ *     the marker.
+ * Gating on the flag — rather than on the worker process actually being present
+ * — keeps exactly one drainer active and avoids a boot race: when the flag is
+ * on the supervisor never processes, so it can't claim jobs that the spawning
+ * worker's startup recovery would then reset out from under it.
  *
- * The flag is read here so callers don't branch on it themselves. It only
- * governs which implementation starts; shutdown stops whichever worker is
- * actually running (see daemon/shutdown-handlers.ts), so the handle returned
- * for the out-of-process implementation has a no-op `stop()`.
+ * `memory.worker.enabled` is also the persisted boot preference: when set, the
+ * out-of-process worker is spawned here at startup so it is running
+ * immediately. The CLI `memory worker start`/`stop` commands flip the flag (and
+ * spawn/stop the worker process), so the supervisor switches the running daemon
+ * between synchronous and out-of-process modes within one poll. When the flag
+ * is on but no worker process is running, neither drainer processes — `status`
+ * surfaces this (worker not running, synchronous runner not running).
  *
  * This dispatcher must not be used as the standalone worker process's entry —
- * that would recurse and fork-bomb. `worker-process.ts` calls
- * {@link startInProcessMemoryJobsWorker} directly.
+ * that would recurse and fork-bomb, and the flag-on worker process would stand
+ * itself down. `worker-process.ts` calls {@link startInProcessMemoryJobsWorker}
+ * directly with no options.
  */
 export function startMemoryJobsWorker(): MemoryJobsWorker {
   if (getConfig().memory.worker?.enabled === true) {
@@ -195,31 +209,31 @@ export function startMemoryJobsWorker(): MemoryJobsWorker {
       .catch((err) =>
         log.warn(
           { err },
-          "Failed to start memory worker process — memory jobs will not be processed",
+          "Failed to start memory worker process — the in-process supervisor will drain the queue instead",
         ),
       );
-    return {
-      async runOnce(): Promise<number> {
-        return 0;
-      },
-      // No-op: shutdown always stops the worker process via the live-state
-      // PID probe in daemon/shutdown-handlers.ts, since it can't know whether
-      // the process was started here or out of band (e.g. `assistant memory
-      // worker start`) after boot.
-      stop(): void {},
-    };
   }
-  return startInProcessMemoryJobsWorker();
+  return startInProcessMemoryJobsWorker({ standDownForWorkerProcess: true });
 }
 /**
  * Run the memory jobs worker in-process on the caller's event loop: poll for
  * claimable jobs with adaptive backoff until {@link MemoryJobsWorker.stop} is
- * called. This is the worker loop itself — used directly by the daemon (when
- * `memory.worker.enabled` is off) and by the standalone worker process.
+ * called. This is the worker loop itself — used by the daemon supervisor (with
+ * `standDownForWorkerProcess`) and by the standalone worker process (without).
+ *
+ * When `standDownForWorkerProcess` is set the loop acts as the daemon's
+ * synchronous-runner supervisor: each tick it skips processing while
+ * `memory.worker.enabled` is on (clearing the sync-runner marker), and
+ * publishes the marker while it owns processing. The standalone worker process
+ * must NOT set this — it runs precisely when the flag is on and would otherwise
+ * stand itself down forever.
  */
-export function startInProcessMemoryJobsWorker(): MemoryJobsWorker {
+export function startInProcessMemoryJobsWorker(
+  opts: { standDownForWorkerProcess?: boolean } = {},
+): MemoryJobsWorker {
+  const standDownForWorkerProcess = opts.standDownForWorkerProcess === true;
   const recovered = resetRunningJobsToPending();
   if (recovered > 0) {
     log.info({ recovered }, "Recovered stale running memory jobs");
@@ -242,11 +256,39 @@ export function startInProcessMemoryJobsWorker(): MemoryJobsWorker {
   let tickRunning = false;
   let timer: ReturnType<typeof setTimeout>;
   let currentIntervalMs = POLL_INTERVAL_MIN_MS;
+  // Tracks whether this supervisor currently owns processing (and so has
+  // published the sync-runner marker). Only meaningful when
+  // `standDownForWorkerProcess` is set.
+  let syncRunnerMarked = false;
   const tick = async () => {
     if (stopped || tickRunning) return;
     tickRunning = true;
     try {
+      if (
+        standDownForWorkerProcess &&
+        getConfig().memory.worker?.enabled === true
+      ) {
+        // The out-of-process worker owns the queue — stand the synchronous
+        // runner down so jobs aren't processed twice, and retract the marker.
+        if (syncRunnerMarked) {
+          removeSyncRunnerMarker();
+          syncRunnerMarked = false;
+        }
+        // Switching modes is a rare operator action, so poll at the slow cap
+        // while standing down: it still picks up a `memory worker stop` (which
+        // flips the flag back off) within one interval, without waking every
+        // couple seconds for the whole time the worker owns the queue.
+        currentIntervalMs = POLL_INTERVAL_MAX_MS;
+        return;
+      }
+      if (standDownForWorkerProcess && !syncRunnerMarked) {
+        // The flag is off — this in-process runner owns processing. Publish the
+        // marker so `memory worker status` reports the synchronous runner as
+        // going.
+        writeSyncRunnerMarker(process.pid);
+        syncRunnerMarked = true;
+      }
       const processed = await runMemoryJobsOnce({
         enableScheduledCleanup: true,
       });
@@ -295,6 +337,10 @@ export function startInProcessMemoryJobsWorker(): MemoryJobsWorker {
     stop(): void {
       stopped = true;
       clearTimeout(timer);
+      if (syncRunnerMarked) {
+        removeSyncRunnerMarker();
+        syncRunnerMarked = false;
+      }
     },
   };
 }

package/src/memory/memory-retrospective-job.ts CHANGED Viewed

@@ -43,6 +43,7 @@ import {
   parseInterfaceId,
 } from "../channels/types.js";
 import type { AssistantConfig } from "../config/types.js";
+import { getGuardianDelivery } from "../contacts/guardian-delivery-reader.js";
 import { extractTurnContextTimestamp } from "../context/compactor.js";
 import {
   formatLocalTimestamp,
@@ -290,6 +291,10 @@ export async function runForkBasedRetrospective(
   // parity — the fork always runs execution gate mode below, so the source's
   // full tool surface stays on the wire while the allowlist holds at
   // execution time.
+  // Warm the vellum guardian-delivery cache so the sync slug resolution inside
+  // resolveSourceParityPins (resolveUserSlug(undefined)) hits a fresh key
+  // instead of falling back to "default" on a cold/TTL-expired cache.
+  await getGuardianDelivery({ channelTypes: ["vellum"] });
   const { personaOverride, toolContextPin } = resolveSourceParityPins(
     sourceConversation,
     newMessages,

package/src/memory/migrations/209-strip-thinking-from-consolidated.ts CHANGED Viewed

@@ -18,11 +18,30 @@ const WATERMARK_KEY = "migration_209_strip_thinking_watermark";
 /**
  * Number of `rowid` values swept per `runAsyncSqlite` dispatch. Each window is
- * one off-thread subprocess transaction, so the size bounds both the WAL growth
- * per statement and how long a single write lock is held, while keeping the
- * number of subprocess spawns low on a large table.
+ * one off-thread subprocess transaction. The size stays well below the row
+ * count of a typical `messages` table so the whole table is never swept in a
+ * single subprocess: a window must finish inside {@link WINDOW_TIMEOUT_MS} for
+ * the per-window watermark to advance, and only an advancing watermark lets an
+ * interrupted run resume from the last completed window instead of re-running
+ * the same window forever. A smaller window also bounds WAL growth per statement
+ * and how long a single write lock is held, at the cost of more (cheap)
+ * subprocess spawns.
+ *
+ * Exported for the regression test that asserts the table is swept across
+ * multiple bounded windows rather than one table-sized sweep.
+ */
+export const ROWID_WINDOW = 2_000;
+/**
+ * Per-window wall-clock cap for the sweep subprocess. Set well above the time a
+ * {@link ROWID_WINDOW}-sized window needs even on a multi-GB table with large
+ * content blobs, so it trips only on a genuinely stuck subprocess (e.g. one
+ * blocked on a stale write lock) rather than on legitimately slow progress.
+ * Far below `runAsyncSqlite`'s one-hour whole-process default so a stuck window
+ * surfaces in minutes and the runner retries from the last completed window on
+ * the next boot.
  */
-const ROWID_WINDOW = 100_000;
+export const WINDOW_TIMEOUT_MS = 15 * 60 * 1000;
 /** SQL predicate: this `json_each` element is an internal reasoning block. */
 const IS_THINKING = `json_extract(value, '$.type') IN ('thinking', 'redacted_thinking')`;
@@ -127,7 +146,10 @@ export async function migrateStripThinkingFromConsolidated(
   while (lo < maxRow) {
     const hi = Math.min(lo + ROWID_WINDOW, maxRow);
-    const res = await runAsyncSqlite(windowSql(lo, hi), { dbPath });
+    const res = await runAsyncSqlite(windowSql(lo, hi), {
+      dbPath,
+      timeoutMs: WINDOW_TIMEOUT_MS,
+    });
     if (!res.ok) {
       // Leave the watermark at the last completed window; throwing reports the
       // step failed so the runner retries it (from the watermark) next boot