@vellumai/assistant 0.10.3 → 0.10.4-staging.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/openapi.yaml +73 -56
  2. package/package.json +1 -1
  3. package/src/__tests__/actor-trust-resolver-address-fallback.test.ts +83 -31
  4. package/src/__tests__/assistant-stream-state.test.ts +3 -76
  5. package/src/__tests__/background-workers-disk-pressure.test.ts +4 -2
  6. package/src/__tests__/channel-approval-routes.test.ts +21 -26
  7. package/src/__tests__/channel-delivery-store.test.ts +28 -0
  8. package/src/__tests__/channel-guardian.test.ts +82 -32
  9. package/src/__tests__/channel-inbound-disk-pressure.test.ts +11 -19
  10. package/src/__tests__/channel-reply-delivery.test.ts +6 -2
  11. package/src/__tests__/compaction-ledger-store.test.ts +128 -0
  12. package/src/__tests__/config-loader-backfill.test.ts +148 -0
  13. package/src/__tests__/consult-deadline.test.ts +60 -0
  14. package/src/__tests__/contact-store-interaction-info.test.ts +156 -0
  15. package/src/__tests__/contact-store-user-file.test.ts +7 -10
  16. package/src/__tests__/contacts-relay-reads.test.ts +6 -9
  17. package/src/__tests__/contacts-write.test.ts +0 -2
  18. package/src/__tests__/conversation-agent-loop-overflow.test.ts +4 -2
  19. package/src/__tests__/conversation-agent-loop.test.ts +98 -7
  20. package/src/__tests__/conversation-attention-telegram.test.ts +9 -11
  21. package/src/__tests__/conversation-error.test.ts +18 -0
  22. package/src/__tests__/conversation-fork-crud.test.ts +354 -24
  23. package/src/__tests__/conversation-title-service.test.ts +222 -201
  24. package/src/__tests__/db-compaction-events-migration.test.ts +129 -0
  25. package/src/__tests__/delete-propagation.test.ts +5 -3
  26. package/src/__tests__/dm-backfill.test.ts +6 -4
  27. package/src/__tests__/emit-signal-routing-intent.test.ts +2 -6
  28. package/src/__tests__/guardian-binding-drift-heal.test.ts +43 -23
  29. package/src/__tests__/guardian-dispatch.test.ts +50 -5
  30. package/src/__tests__/guardian-routing-state.test.ts +6 -10
  31. package/src/__tests__/helpers/channel-test-adapter.ts +45 -12
  32. package/src/__tests__/helpers/create-guardian-binding.ts +15 -23
  33. package/src/__tests__/helpers/mock-logger.ts +1 -0
  34. package/src/__tests__/helpers/seed-contact-channel.ts +96 -0
  35. package/src/__tests__/inbound-invite-redemption.test.ts +87 -10
  36. package/src/__tests__/invite-redemption-service.test.ts +273 -53
  37. package/src/__tests__/invite-routes-http.test.ts +34 -0
  38. package/src/__tests__/invite-service-ipc.test.ts +65 -2
  39. package/src/__tests__/list-messages-page-latest.test.ts +173 -4
  40. package/src/__tests__/mcp-config-secret-boundary.test.ts +3 -0
  41. package/src/__tests__/non-member-access-request.test.ts +15 -13
  42. package/src/__tests__/onboarding-persona-write.test.ts +52 -22
  43. package/src/__tests__/persist-onboarding-artifacts.test.ts +1 -0
  44. package/src/__tests__/persona-resolver.test.ts +75 -45
  45. package/src/__tests__/plugin-bootstrap.test.ts +13 -5
  46. package/src/__tests__/plugin-disabled-state.test.ts +190 -0
  47. package/src/__tests__/provider-usage-tracking.test.ts +1 -1
  48. package/src/__tests__/reaction-intercept-cold-cache-warm.test.ts +135 -0
  49. package/src/__tests__/reaction-intercept-member-verdict-warm.test.ts +158 -0
  50. package/src/__tests__/reaction-persistence.test.ts +51 -4
  51. package/src/__tests__/relay-server.test.ts +88 -31
  52. package/src/__tests__/runtime-attachment-metadata.test.ts +9 -11
  53. package/src/__tests__/settings-routes.test.ts +32 -0
  54. package/src/__tests__/slack-block-formatting.test.ts +1 -38
  55. package/src/__tests__/sse-actor-principal-guardian-source.test.ts +13 -36
  56. package/src/__tests__/stt-hints.test.ts +6 -3
  57. package/src/__tests__/subagent-fork-prompt-role.test.ts +195 -0
  58. package/src/__tests__/subagent-fork-spawn.test.ts +6 -7
  59. package/src/__tests__/subagent-role-registry.test.ts +17 -4
  60. package/src/__tests__/subagent-spawn-and-await.test.ts +546 -0
  61. package/src/__tests__/subagent-tools.test.ts +398 -3
  62. package/src/__tests__/thread-backfill.test.ts +3 -3
  63. package/src/__tests__/tool-preview-lifecycle.test.ts +26 -10
  64. package/src/__tests__/tool-start-timestamp.test.ts +4 -3
  65. package/src/__tests__/trusted-contact-approval-notifier.test.ts +37 -51
  66. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +2 -2
  67. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +9 -7
  68. package/src/__tests__/trusted-contact-multichannel.test.ts +16 -7
  69. package/src/__tests__/trusted-contact-verification.test.ts +79 -54
  70. package/src/__tests__/voice-guardian-cold-cache-warm.test.ts +137 -0
  71. package/src/__tests__/voice-invite-redemption.test.ts +183 -20
  72. package/src/__tests__/workspace-migration-102-preserve-heartbeat-enabled-for-existing-workspaces.test.ts +3 -3
  73. package/src/__tests__/workspace-migration-111-prune-seeded-callsite-defaults.test.ts +2 -2
  74. package/src/__tests__/workspace-migration-112-remove-advisor-callsite-override.test.ts +170 -0
  75. package/src/__tests__/workspace-migration-drop-user-md.test.ts +196 -238
  76. package/src/a2a/__tests__/e2e-a2a-channel.test.ts +35 -47
  77. package/src/agent/loop-exclusive-tool.test.ts +19 -15
  78. package/src/agent/loop-native-web-search.test.ts +200 -0
  79. package/src/agent/loop.ts +108 -1
  80. package/src/api/responses/conversation-message.ts +9 -0
  81. package/src/approvals/guardian-request-resolvers.ts +16 -4
  82. package/src/calls/__tests__/relay-setup-router.test.ts +10 -18
  83. package/src/calls/guardian-dispatch.ts +14 -11
  84. package/src/calls/inbound-trust-reader.ts +7 -1
  85. package/src/calls/relay-access-wait.ts +6 -6
  86. package/src/calls/relay-server.ts +22 -2
  87. package/src/calls/relay-setup-router.ts +10 -10
  88. package/src/cli/commands/__tests__/conversations-slack.test.ts +1 -0
  89. package/src/cli/commands/contacts.ts +10 -7
  90. package/src/cli/commands/memory/__tests__/worker.test.ts +147 -17
  91. package/src/cli/commands/memory/worker.ts +97 -30
  92. package/src/cli/commands/plugins.ts +3 -146
  93. package/src/cli/lib/__tests__/list-installed-plugins.test.ts +17 -17
  94. package/src/cli/lib/__tests__/publish-plugin.test.ts +98 -0
  95. package/src/cli/lib/publish-plugin.ts +231 -1
  96. package/src/config/__tests__/sync-gated-profiles.test.ts +5 -7
  97. package/src/config/bundled-skills/subagent/SKILL.md +16 -1
  98. package/src/config/bundled-skills/subagent/TOOLS.json +5 -4
  99. package/src/config/call-site-defaults.ts +0 -6
  100. package/src/config/llm-resolver.ts +0 -3
  101. package/src/config/schemas/call-site-catalog.ts +0 -7
  102. package/src/config/schemas/heartbeat.ts +2 -5
  103. package/src/config/schemas/llm.ts +3 -12
  104. package/src/config/schemas/memory-lifecycle.ts +1 -1
  105. package/src/config/seed-inference-profiles.ts +76 -35
  106. package/src/config/sync-gated-profiles.ts +0 -3
  107. package/src/contacts/__tests__/contacts-write-revoke-relay.test.ts +7 -8
  108. package/src/contacts/__tests__/member-write-relay.test.ts +35 -11
  109. package/src/contacts/contact-store.ts +27 -237
  110. package/src/contacts/contacts-write.ts +18 -58
  111. package/src/contacts/gateway-channel-read.ts +51 -0
  112. package/src/contacts/member-write-relay.ts +25 -31
  113. package/src/contacts/types.ts +3 -15
  114. package/src/daemon/__tests__/conversation-tool-setup.test.ts +0 -44
  115. package/src/daemon/conversation-agent-loop-handlers.ts +29 -10
  116. package/src/daemon/conversation-agent-loop.ts +68 -61
  117. package/src/daemon/conversation-error.ts +7 -10
  118. package/src/daemon/conversation-tool-setup.ts +0 -10
  119. package/src/daemon/conversation.ts +10 -0
  120. package/src/daemon/external-plugins-bootstrap.ts +8 -2
  121. package/src/daemon/handlers/__tests__/config-a2a-accept.test.ts +0 -1
  122. package/src/daemon/handlers/__tests__/config-a2a-complete.test.ts +0 -2
  123. package/src/daemon/handlers/__tests__/config-a2a-redeem.test.ts +0 -2
  124. package/src/daemon/handlers/__tests__/config-channels.test.ts +9 -14
  125. package/src/daemon/handlers/config-channels.ts +14 -29
  126. package/src/daemon/lifecycle.ts +16 -4
  127. package/src/daemon/message-types/surfaces.ts +2 -0
  128. package/src/heartbeat/heartbeat-service.ts +5 -0
  129. package/src/home/relationship-state-writer.ts +5 -0
  130. package/src/memory/__tests__/embedding-cache.test.ts +136 -0
  131. package/src/memory/compaction-ledger-store.ts +107 -0
  132. package/src/memory/conversation-crud.ts +136 -61
  133. package/src/memory/conversation-title-service.ts +173 -24
  134. package/src/memory/embedding-backend.ts +8 -1
  135. package/src/memory/embedding-cache.ts +139 -0
  136. package/src/memory/jobs-worker.ts +75 -29
  137. package/src/memory/memory-retrospective-job.ts +5 -0
  138. package/src/memory/migrations/209-strip-thinking-from-consolidated.ts +27 -5
  139. package/src/memory/migrations/302-create-compaction-events.ts +107 -0
  140. package/src/memory/migrations/303-add-conversation-creation-seq.ts +33 -0
  141. package/src/memory/migrations/__tests__/209-strip-thinking-from-consolidated.test.ts +79 -6
  142. package/src/memory/schema/contacts.ts +6 -2
  143. package/src/memory/schema/conversations.ts +39 -0
  144. package/src/memory/steps.ts +1090 -367
  145. package/src/memory/worker-control.ts +104 -18
  146. package/src/memory/worker-process.ts +17 -0
  147. package/src/messaging/channel-binding-metadata.ts +31 -0
  148. package/src/messaging/channel-binding-schema.ts +51 -0
  149. package/src/messaging/providers/__tests__/callback-routing.test.ts +45 -0
  150. package/src/messaging/providers/__tests__/transport-dispatch.test.ts +195 -0
  151. package/src/messaging/providers/a2a/__tests__/deliver.test.ts +11 -0
  152. package/src/messaging/providers/a2a/deliver.ts +5 -1
  153. package/src/messaging/providers/a2a/transport.ts +10 -0
  154. package/src/messaging/providers/callback-routing.ts +48 -0
  155. package/src/messaging/providers/channel-transport.ts +55 -0
  156. package/src/messaging/providers/index.ts +65 -241
  157. package/src/messaging/providers/slack/binding-metadata.ts +62 -0
  158. package/src/messaging/providers/slack/transport.ts +92 -0
  159. package/src/messaging/providers/telegram-bot/transport.ts +51 -0
  160. package/src/messaging/providers/whatsapp/transport.ts +38 -0
  161. package/src/notifications/__tests__/broadcaster.test.ts +0 -8
  162. package/src/notifications/__tests__/connected-channels.test.ts +8 -36
  163. package/src/notifications/__tests__/destination-resolver.test.ts +12 -117
  164. package/src/notifications/destination-resolver.ts +7 -23
  165. package/src/notifications/emit-signal.ts +5 -11
  166. package/src/plugins/defaults/index.ts +0 -35
  167. package/src/plugins/defaults/memory-v3-shadow/__tests__/dense.test.ts +11 -0
  168. package/src/plugins/defaults/memory-v3-shadow/__tests__/section-dense-store.test.ts +243 -2
  169. package/src/plugins/defaults/memory-v3-shadow/section-dense-store.ts +167 -14
  170. package/src/plugins/disabled-state.ts +31 -0
  171. package/src/plugins/registry.ts +55 -12
  172. package/src/prompts/persona-resolver.ts +43 -11
  173. package/src/providers/call-site-routing.ts +41 -0
  174. package/src/providers/provider-send-message.ts +6 -0
  175. package/src/providers/ratelimit.ts +6 -0
  176. package/src/providers/registry.ts +1 -1
  177. package/src/providers/retry.ts +6 -0
  178. package/src/providers/types.ts +13 -0
  179. package/src/providers/usage-tracking.ts +6 -0
  180. package/src/runtime/__tests__/guardian-vellum-migration.test.ts +30 -27
  181. package/src/runtime/__tests__/local-principal-trust.test.ts +16 -18
  182. package/src/runtime/__tests__/member-verdict-cache.test.ts +119 -0
  183. package/src/runtime/__tests__/trust-verdict-consumer.test.ts +115 -168
  184. package/src/runtime/access-request-helper.ts +1 -2
  185. package/src/runtime/actor-trust-resolver.ts +44 -17
  186. package/src/runtime/anchored-guardian.test.ts +7 -54
  187. package/src/runtime/anchored-guardian.ts +4 -53
  188. package/src/runtime/assistant-stream-state.ts +12 -74
  189. package/src/runtime/channel-reply-delivery.ts +3 -8
  190. package/src/runtime/guardian-vellum-migration.ts +18 -16
  191. package/src/runtime/invite-redemption-service.ts +25 -10
  192. package/src/runtime/local-actor-identity.test.ts +108 -0
  193. package/src/runtime/local-actor-identity.ts +27 -20
  194. package/src/runtime/member-verdict-cache.ts +0 -0
  195. package/src/runtime/routes/__tests__/contact-routes.test.ts +100 -7
  196. package/src/runtime/routes/__tests__/global-search-routes.test.ts +1 -2
  197. package/src/runtime/routes/__tests__/surface-action-routes.test.ts +2 -1
  198. package/src/runtime/routes/contact-routes.ts +40 -25
  199. package/src/runtime/routes/conversation-list-routes.ts +1 -29
  200. package/src/runtime/routes/conversation-routes.ts +27 -7
  201. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +0 -10
  202. package/src/runtime/routes/inbound-stages/background-dispatch.ts +4 -8
  203. package/src/runtime/routes/inbound-stages/reaction-intercept.ts +19 -0
  204. package/src/runtime/routes/settings-routes.ts +8 -3
  205. package/src/runtime/services/conversation-serializer.ts +6 -49
  206. package/src/runtime/slack-block-formatting.ts +0 -15
  207. package/src/runtime/trust-verdict-consumer.ts +36 -41
  208. package/src/subagent/__tests__/consult-prompt.test.ts +35 -0
  209. package/src/{plugins/defaults/advisor/__tests__/transcript.test.ts → subagent/__tests__/consult-transcript.test.ts} +47 -10
  210. package/src/{plugins/defaults/advisor/steering.ts → subagent/consult-prompt.ts} +17 -39
  211. package/src/{plugins/defaults/advisor/transcript.ts → subagent/consult-transcript.ts} +18 -8
  212. package/src/subagent/index.ts +1 -1
  213. package/src/subagent/manager.ts +245 -33
  214. package/src/subagent/types.ts +8 -1
  215. package/src/tools/registry.ts +10 -3
  216. package/src/tools/subagent/consult-deadline.ts +49 -0
  217. package/src/tools/subagent/spawn.ts +234 -5
  218. package/src/util/logger.ts +9 -0
  219. package/src/util/platform.ts +14 -0
  220. package/src/workspace/migrations/031-drop-user-md.ts +232 -148
  221. package/src/workspace/migrations/112-remove-advisor-callsite-override.ts +64 -0
  222. package/src/workspace/migrations/registry.ts +2 -0
  223. package/src/plugins/defaults/advisor/__tests__/advisor-gate.test.ts +0 -56
  224. package/src/plugins/defaults/advisor/__tests__/advisor-state-store.test.ts +0 -43
  225. package/src/plugins/defaults/advisor/__tests__/agent-loop-integration.test.ts +0 -137
  226. package/src/plugins/defaults/advisor/__tests__/consult.test.ts +0 -314
  227. package/src/plugins/defaults/advisor/__tests__/context-pack-gating.test.ts +0 -106
  228. package/src/plugins/defaults/advisor/__tests__/context-pack.test.ts +0 -60
  229. package/src/plugins/defaults/advisor/__tests__/hooks.test.ts +0 -138
  230. package/src/plugins/defaults/advisor/advisor-gate.ts +0 -29
  231. package/src/plugins/defaults/advisor/advisor-state-store.ts +0 -94
  232. package/src/plugins/defaults/advisor/config.ts +0 -21
  233. package/src/plugins/defaults/advisor/consult.ts +0 -197
  234. package/src/plugins/defaults/advisor/context-pack.ts +0 -288
  235. package/src/plugins/defaults/advisor/hooks/post-model-call.ts +0 -34
  236. package/src/plugins/defaults/advisor/hooks/pre-model-call.ts +0 -30
  237. package/src/plugins/defaults/advisor/hooks/user-prompt-submit.ts +0 -19
  238. package/src/plugins/defaults/advisor/package.json +0 -14
  239. package/src/plugins/defaults/advisor/tools/advisor.ts +0 -92
@@ -7,9 +7,14 @@
7
7
  * overwritten, never user-provided custom titles.
8
8
  */
9
9
 
10
- import { getConfiguredProvider } from "../providers/provider-send-message.js";
11
- import type { Provider } from "../providers/types.js";
12
- import { runBtwSidechain } from "../runtime/btw-sidechain.js";
10
+ import {
11
+ createTimeout,
12
+ extractAllText,
13
+ extractToolUse,
14
+ getConfiguredProvider,
15
+ userMessage as buildUserMessage,
16
+ } from "../providers/provider-send-message.js";
17
+ import type { Provider, ToolDefinition } from "../providers/types.js";
13
18
  import { publishConversationTitleChanged } from "../runtime/sync/resource-sync-events.js";
14
19
  import { getLogger } from "../util/logger.js";
15
20
  import { Mutex } from "../util/mutex.js";
@@ -171,16 +176,7 @@ export async function generateAndPersistConversationTitle(
171
176
  }
172
177
 
173
178
  const prompt = buildTitlePrompt(context, userMessage, assistantResponse);
174
- const result = await runBtwSidechain({
175
- content: prompt,
176
- provider,
177
- systemPrompt: buildTitleSystemPrompt(),
178
- tools: [],
179
- callSite: "conversationTitle",
180
- signal,
181
- timeoutMs: 15_000,
182
- });
183
- const title = normalizeTitle(result.text);
179
+ const title = await generateTitleViaLLM(provider, prompt, signal);
184
180
  if (title) {
185
181
  // Re-check replaceability before persisting (race guard)
186
182
  const current = getConversation(conversationId);
@@ -318,16 +314,7 @@ export async function regenerateConversationTitle(
318
314
  if (!/\n(?:User|Assistant): /.test(prompt)) {
319
315
  return { title: conversation.title ?? UNTITLED_FALLBACK, updated: false };
320
316
  }
321
- const result = await runBtwSidechain({
322
- content: prompt,
323
- provider,
324
- systemPrompt: buildTitleSystemPrompt(),
325
- tools: [],
326
- callSite: "conversationTitle",
327
- signal,
328
- timeoutMs: 15_000,
329
- });
330
- const title = normalizeTitle(result.text);
317
+ const title = await generateTitleViaLLM(provider, prompt, signal);
331
318
  if (title) {
332
319
  // Re-check isAutoTitle before persisting (race guard against manual rename)
333
320
  const current = getConversation(conversationId);
@@ -396,6 +383,81 @@ function buildTitleSystemPrompt(): string {
396
383
  ].join("\n");
397
384
  }
398
385
 
386
+ const TITLE_TOOL_NAME = "record_conversation_title";
387
+
388
+ /**
389
+ * Tool the title model is forced to call. Constraining the output to a single
390
+ * `title` argument keeps weak/fast models (e.g. Haiku-class title models) from
391
+ * "thinking aloud" or continuing the conversation in the response text —
392
+ * failure modes that otherwise get captured verbatim as the title
393
+ * (e.g. "I need to generate a…", "I'll work through these files…").
394
+ */
395
+ function buildTitleTool(): ToolDefinition {
396
+ return {
397
+ name: TITLE_TOOL_NAME,
398
+ description:
399
+ "Record the conversation's title. Call this exactly once with a short noun phrase naming the TOPIC — never a sentence, a reply, or any preamble.",
400
+ input_schema: {
401
+ type: "object",
402
+ properties: {
403
+ title: {
404
+ type: "string",
405
+ description:
406
+ "2–5 words, 40 characters max. A scannable sidebar label naming the topic (e.g. 'Auth Middleware Rewrite', 'Docker Volume Mounts'). No quotes, markdown, or trailing punctuation.",
407
+ },
408
+ },
409
+ required: ["title"],
410
+ },
411
+ };
412
+ }
413
+
414
+ /**
415
+ * Run the title LLM call with a forced tool so the model returns a structured
416
+ * `{ title }` rather than free text. Returns a normalized title, or "" when the
417
+ * model declines or misbehaves — callers fall back to a deterministic title.
418
+ *
419
+ * Forcing the tool is the primary guard against prose leakage; `normalizeTitle`
420
+ * is the backstop for the text-fallback path and for any provider that ignores
421
+ * forced `tool_choice`.
422
+ */
423
+ async function generateTitleViaLLM(
424
+ provider: Provider,
425
+ prompt: string,
426
+ signal?: AbortSignal,
427
+ ): Promise<string> {
428
+ const { signal: timeoutSignal, cleanup } = createTimeout(15_000);
429
+ const combinedSignal = signal
430
+ ? AbortSignal.any([signal, timeoutSignal])
431
+ : timeoutSignal;
432
+ try {
433
+ const response = await provider.sendMessage([buildUserMessage(prompt)], {
434
+ tools: [buildTitleTool()],
435
+ systemPrompt: buildTitleSystemPrompt(),
436
+ config: {
437
+ max_tokens: 256,
438
+ callSite: "conversationTitle",
439
+ tool_choice: { type: "tool", name: TITLE_TOOL_NAME },
440
+ disableCache: true,
441
+ },
442
+ signal: combinedSignal,
443
+ });
444
+ const toolBlock = extractToolUse(response);
445
+ const titleInput = toolBlock?.input as { title?: unknown } | undefined;
446
+ if (
447
+ toolBlock?.name === TITLE_TOOL_NAME &&
448
+ typeof titleInput?.title === "string"
449
+ ) {
450
+ return normalizeTitle(titleInput.title);
451
+ }
452
+ // Provider ignored the forced tool (or the model emitted prose instead of
453
+ // calling it). Fall back to the response text — `normalizeTitle`'s prose
454
+ // guard rejects a ramble while keeping a compliant plain-text title.
455
+ return normalizeTitle(extractAllText(response));
456
+ } finally {
457
+ cleanup();
458
+ }
459
+ }
460
+
399
461
  function buildTitlePrompt(
400
462
  context?: TitleContext,
401
463
  userMessage?: string,
@@ -503,13 +565,100 @@ function truncateTitle(title: string): string {
503
565
  function normalizeTitle(raw: string): string {
504
566
  let title = raw.trim().replace(/^["']|["']$/g, "");
505
567
  title = stripMarkdown(title);
506
- title = stripThinkingTags(title);
568
+ title = stripThinkingTags(title).trim();
569
+ if (!title) return "";
570
+ // Reject outputs that are the model reasoning aloud or continuing the
571
+ // conversation instead of naming it (e.g. "I need to generate a…", "I'll
572
+ // work through these files…"). Callers fall back to a deterministic title.
573
+ if (looksLikeLeakedProse(title)) {
574
+ return "";
575
+ }
507
576
  if (META_FAILURE_TITLES.has(title.toLowerCase())) {
508
577
  return "";
509
578
  }
510
579
  return truncateTitle(title);
511
580
  }
512
581
 
582
+ /** Reasoning/sentence openers that never start a legitimate topic title. */
583
+ const LEAKED_PROSE_PREFIXES = [
584
+ "i need to",
585
+ "i needed to",
586
+ "i should",
587
+ "i will",
588
+ "i'll",
589
+ "i can ",
590
+ "i can't",
591
+ "i cannot",
592
+ "i'm ",
593
+ "i am ",
594
+ "i've ",
595
+ "i have ",
596
+ "i'd ",
597
+ "i would",
598
+ "let me",
599
+ "looking at",
600
+ "based on",
601
+ "given the",
602
+ "to generate",
603
+ "to summarize",
604
+ "to title",
605
+ // Subject-led reasoning openers. A bare noun phrase ("The User Interface
606
+ // Redesign", "The Conversation API") is a valid title, so each subject only
607
+ // counts as leaked prose when a verb or possessive follows it — marking the
608
+ // output as a sentence rather than a topic.
609
+ "the user wants",
610
+ "the user asked",
611
+ "the user is",
612
+ "the user wanted",
613
+ "the user needs",
614
+ "the user said",
615
+ "the user has",
616
+ "the user would",
617
+ "the user's request",
618
+ "the conversation is",
619
+ "this conversation is",
620
+ "the conversation appears",
621
+ "the conversation seems",
622
+ "the conversation covers",
623
+ "the conversation discusses",
624
+ "the assistant should",
625
+ "the assistant is",
626
+ "the assistant wants",
627
+ "the assistant needs",
628
+ "the title should",
629
+ "the title is",
630
+ "the title would",
631
+ "the title for",
632
+ "here's ",
633
+ "here is ",
634
+ "here are ",
635
+ "sure,",
636
+ "okay,",
637
+ "ok,",
638
+ ];
639
+
640
+ /**
641
+ * Heuristic guard for title outputs that are clearly prose — the model
642
+ * reasoning aloud or replying to the conversation rather than naming it. A real
643
+ * title is a single-line short noun phrase, so we reject multi-line output,
644
+ * embedded transcript markers, leading reasoning openers, and sentence-shaped
645
+ * clauses. Deliberately tight: a false reject only costs a deterministic
646
+ * fallback title, while a false accept persists a broken one.
647
+ */
648
+ function looksLikeLeakedProse(title: string): boolean {
649
+ if (/\n/.test(title)) return true;
650
+ if (/\b(?:user|assistant)\s*:/i.test(title)) return true;
651
+ const lower = title.toLowerCase();
652
+ if (LEAKED_PROSE_PREFIXES.some((prefix) => lower.startsWith(prefix))) {
653
+ return true;
654
+ }
655
+ // Sentence-shaped: terminal punctuation on a multi-word clause.
656
+ if (/[.?!]$/.test(title) && title.split(/\s+/).length > 5) {
657
+ return true;
658
+ }
659
+ return false;
660
+ }
661
+
513
662
  /** Strip thinking tags so they don't bleed into generated titles. */
514
663
  function stripThinkingTags(text: string): string {
515
664
  return text
@@ -262,7 +262,14 @@ function getCached(
262
262
  return backendCache.get(cacheKey(provider, model, extras));
263
263
  }
264
264
 
265
- function geminiCacheExtras(config: AssistantConfig): string[] {
265
+ /**
266
+ * The Gemini embedding options that change the output vector for identical
267
+ * input — task type and output dimensionality — rendered as stable cache-key
268
+ * fragments. Empty for a default Gemini config and for every non-Gemini
269
+ * provider. Part of the in-memory vector-cache identity here, and reused by the
270
+ * v3 section dense store so its persistent cache shares the same identity.
271
+ */
272
+ export function geminiCacheExtras(config: AssistantConfig): string[] {
266
273
  const extras: string[] = [];
267
274
  if (config.memory.embeddings.geminiTaskType) {
268
275
  extras.push(`task=${config.memory.embeddings.geminiTaskType}`);
@@ -0,0 +1,139 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Shared dense-embedding cache over the `memory_embeddings` SQLite table
3
+ // ---------------------------------------------------------------------------
4
+ //
5
+ // A read/write pair that caches one dense vector keyed on
6
+ // `(targetType, targetId, provider, model)` alongside the content hash it was
7
+ // embedded from, so callers can skip the embedding-backend round-trip when an
8
+ // input's text is unchanged. The `embed_concept_page` job pioneered this
9
+ // pattern for whole-page bodies; this module factors out the generic mechanics
10
+ // — dim-match gating, legacy-null-hash handling, blob encode/decode, and the
11
+ // upsert on the unique key — so other embedders (e.g. the v3 section dense
12
+ // store) reuse one implementation instead of duplicating it.
13
+
14
+ import { randomUUID } from "node:crypto";
15
+
16
+ import { and, eq } from "drizzle-orm";
17
+
18
+ import { getLogger } from "../util/logger.js";
19
+ import type { getDb } from "./db-connection.js";
20
+ import { blobToVector, vectorToBlob } from "./job-utils.js";
21
+ import { memoryEmbeddings } from "./schema.js";
22
+
23
+ const log = getLogger("memory-embedding-cache");
24
+
25
+ type MemoryDb = ReturnType<typeof getDb>;
26
+
27
+ /** Lookup key for {@link readEmbeddingCache}. */
28
+ export interface EmbeddingCacheKey {
29
+ targetType: string;
30
+ targetId: string;
31
+ provider: string;
32
+ model: string;
33
+ /** Configured embedding dimension; a row at a different size is a miss. */
34
+ expectedDim: number;
35
+ }
36
+
37
+ /** A cached dense vector plus the content hash it was embedded from. */
38
+ export interface EmbeddingCacheEntry {
39
+ dense: number[];
40
+ contentHash: string;
41
+ }
42
+
43
+ /**
44
+ * Look up a cached dense vector keyed on `(targetType, targetId, provider,
45
+ * model)`. Returns the row only when the persisted dimensions match
46
+ * `expectedDim` — a stale row from a previous `vectorSize` is treated as a miss
47
+ * so the caller re-embeds. A row with a null `contentHash` (legacy/corrupt) is
48
+ * likewise a miss rather than a key the caller could misalign against.
49
+ */
50
+ export function readEmbeddingCache(
51
+ db: MemoryDb,
52
+ key: EmbeddingCacheKey,
53
+ ): EmbeddingCacheEntry | null {
54
+ const row = db
55
+ .select({
56
+ vectorBlob: memoryEmbeddings.vectorBlob,
57
+ vectorJson: memoryEmbeddings.vectorJson,
58
+ dimensions: memoryEmbeddings.dimensions,
59
+ contentHash: memoryEmbeddings.contentHash,
60
+ })
61
+ .from(memoryEmbeddings)
62
+ .where(
63
+ and(
64
+ eq(memoryEmbeddings.targetType, key.targetType),
65
+ eq(memoryEmbeddings.targetId, key.targetId),
66
+ eq(memoryEmbeddings.provider, key.provider),
67
+ eq(memoryEmbeddings.model, key.model),
68
+ ),
69
+ )
70
+ .get();
71
+ if (!row || row.dimensions !== key.expectedDim) return null;
72
+ if (row.contentHash === null) return null;
73
+ const dense = row.vectorBlob
74
+ ? blobToVector(row.vectorBlob as Buffer)
75
+ : (JSON.parse(row.vectorJson!) as number[]);
76
+ return { dense, contentHash: row.contentHash };
77
+ }
78
+
79
+ /** Parameters for {@link writeEmbeddingCache}. */
80
+ export interface EmbeddingCacheWrite {
81
+ targetType: string;
82
+ targetId: string;
83
+ dense: number[];
84
+ contentHash: string;
85
+ provider: string;
86
+ model: string;
87
+ now: number;
88
+ }
89
+
90
+ /**
91
+ * Persist a freshly embedded dense vector, upserting on the
92
+ * `(targetType, targetId, provider, model)` unique key. Best-effort: a write
93
+ * failure is logged and swallowed so the caller's downstream write still runs.
94
+ */
95
+ export function writeEmbeddingCache(
96
+ db: MemoryDb,
97
+ params: EmbeddingCacheWrite,
98
+ ): void {
99
+ const { targetType, targetId, dense, contentHash, provider, model, now } =
100
+ params;
101
+ try {
102
+ const blobValue = vectorToBlob(dense);
103
+ db.insert(memoryEmbeddings)
104
+ .values({
105
+ id: randomUUID(),
106
+ targetType,
107
+ targetId,
108
+ provider,
109
+ model,
110
+ dimensions: dense.length,
111
+ vectorBlob: blobValue,
112
+ vectorJson: null,
113
+ contentHash,
114
+ createdAt: now,
115
+ updatedAt: now,
116
+ })
117
+ .onConflictDoUpdate({
118
+ target: [
119
+ memoryEmbeddings.targetType,
120
+ memoryEmbeddings.targetId,
121
+ memoryEmbeddings.provider,
122
+ memoryEmbeddings.model,
123
+ ],
124
+ set: {
125
+ vectorBlob: blobValue,
126
+ vectorJson: null,
127
+ dimensions: dense.length,
128
+ contentHash,
129
+ updatedAt: now,
130
+ },
131
+ })
132
+ .run();
133
+ } catch (err) {
134
+ log.warn(
135
+ { err, targetType, targetId },
136
+ "Failed to write embedding cache row",
137
+ );
138
+ }
139
+ }
@@ -93,7 +93,11 @@ import {
93
93
  memoryV2ConsolidateJob,
94
94
  } from "./v2/consolidation-job.js";
95
95
  import { memoryV2SweepJob } from "./v2/sweep-job.js";
96
- import { spawnMemoryWorkerProcess } from "./worker-control.js";
96
+ import {
97
+ removeSyncRunnerMarker,
98
+ spawnMemoryWorkerProcess,
99
+ writeSyncRunnerMarker,
100
+ } from "./worker-control.js";
97
101
 
98
102
  const log = getLogger("memory-jobs-worker");
99
103
 
@@ -163,23 +167,33 @@ export interface MemoryJobsWorker {
163
167
  }
164
168
 
165
169
  /**
166
- * Start the memory jobs worker using the configured implementation.
170
+ * Start the daemon's memory jobs worker supervisor.
167
171
  *
168
- * `memory.worker.enabled` selects between two implementations:
169
- * - enabled: spawn the worker as a separate OS process (the same path as
170
- * `assistant memory worker start`), keeping long-running jobs off the
171
- * caller's event loop.
172
- * - disabled (default): run the worker in-process on the caller's event
173
- * loop.
172
+ * The daemon always runs the in-process supervisor returned here. The
173
+ * supervisor owns the synchronous in-process runner and reconciles to
174
+ * `memory.worker.enabled` on every poll, re-reading the flag from disk so a
175
+ * runtime change takes effect without a restart:
176
+ * - flag off (default): drain the queue in-process and publish the
177
+ * sync-runner marker so `status` reports the synchronous runner as going.
178
+ * - flag on: stand down (the out-of-process worker owns the queue) and clear
179
+ * the marker.
180
+ * Gating on the flag — rather than on the worker process actually being present
181
+ * — keeps exactly one drainer active and avoids a boot race: when the flag is
182
+ * on the supervisor never processes, so it can't claim jobs that the spawning
183
+ * worker's startup recovery would then reset out from under it.
174
184
  *
175
- * The flag is read here so callers don't branch on it themselves. It only
176
- * governs which implementation starts; shutdown stops whichever worker is
177
- * actually running (see daemon/shutdown-handlers.ts), so the handle returned
178
- * for the out-of-process implementation has a no-op `stop()`.
185
+ * `memory.worker.enabled` is also the persisted boot preference: when set, the
186
+ * out-of-process worker is spawned here at startup so it is running
187
+ * immediately. The CLI `memory worker start`/`stop` commands flip the flag (and
188
+ * spawn/stop the worker process), so the supervisor switches the running daemon
189
+ * between synchronous and out-of-process modes within one poll. When the flag
190
+ * is on but no worker process is running, neither drainer processes — `status`
191
+ * surfaces this (worker not running, synchronous runner not running).
179
192
  *
180
193
  * This dispatcher must not be used as the standalone worker process's entry —
181
- * that would recurse and fork-bomb. `worker-process.ts` calls
182
- * {@link startInProcessMemoryJobsWorker} directly.
194
+ * that would recurse and fork-bomb, and the flag-on worker process would stand
195
+ * itself down. `worker-process.ts` calls {@link startInProcessMemoryJobsWorker}
196
+ * directly with no options.
183
197
  */
184
198
  export function startMemoryJobsWorker(): MemoryJobsWorker {
185
199
  if (getConfig().memory.worker?.enabled === true) {
@@ -195,31 +209,31 @@ export function startMemoryJobsWorker(): MemoryJobsWorker {
195
209
  .catch((err) =>
196
210
  log.warn(
197
211
  { err },
198
- "Failed to start memory worker process — memory jobs will not be processed",
212
+ "Failed to start memory worker process — the in-process supervisor will drain the queue instead",
199
213
  ),
200
214
  );
201
- return {
202
- async runOnce(): Promise<number> {
203
- return 0;
204
- },
205
- // No-op: shutdown always stops the worker process via the live-state
206
- // PID probe in daemon/shutdown-handlers.ts, since it can't know whether
207
- // the process was started here or out of band (e.g. `assistant memory
208
- // worker start`) after boot.
209
- stop(): void {},
210
- };
211
215
  }
212
216
 
213
- return startInProcessMemoryJobsWorker();
217
+ return startInProcessMemoryJobsWorker({ standDownForWorkerProcess: true });
214
218
  }
215
219
 
216
220
  /**
217
221
  * Run the memory jobs worker in-process on the caller's event loop: poll for
218
222
  * claimable jobs with adaptive backoff until {@link MemoryJobsWorker.stop} is
219
- * called. This is the worker loop itself — used directly by the daemon (when
220
- * `memory.worker.enabled` is off) and by the standalone worker process.
223
+ * called. This is the worker loop itself — used by the daemon supervisor (with
224
+ * `standDownForWorkerProcess`) and by the standalone worker process (without).
225
+ *
226
+ * When `standDownForWorkerProcess` is set the loop acts as the daemon's
227
+ * synchronous-runner supervisor: each tick it skips processing while
228
+ * `memory.worker.enabled` is on (clearing the sync-runner marker), and
229
+ * publishes the marker while it owns processing. The standalone worker process
230
+ * must NOT set this — it runs precisely when the flag is on and would otherwise
231
+ * stand itself down forever.
221
232
  */
222
- export function startInProcessMemoryJobsWorker(): MemoryJobsWorker {
233
+ export function startInProcessMemoryJobsWorker(
234
+ opts: { standDownForWorkerProcess?: boolean } = {},
235
+ ): MemoryJobsWorker {
236
+ const standDownForWorkerProcess = opts.standDownForWorkerProcess === true;
223
237
  const recovered = resetRunningJobsToPending();
224
238
  if (recovered > 0) {
225
239
  log.info({ recovered }, "Recovered stale running memory jobs");
@@ -242,11 +256,39 @@ export function startInProcessMemoryJobsWorker(): MemoryJobsWorker {
242
256
  let tickRunning = false;
243
257
  let timer: ReturnType<typeof setTimeout>;
244
258
  let currentIntervalMs = POLL_INTERVAL_MIN_MS;
259
+ // Tracks whether this supervisor currently owns processing (and so has
260
+ // published the sync-runner marker). Only meaningful when
261
+ // `standDownForWorkerProcess` is set.
262
+ let syncRunnerMarked = false;
245
263
 
246
264
  const tick = async () => {
247
265
  if (stopped || tickRunning) return;
248
266
  tickRunning = true;
249
267
  try {
268
+ if (
269
+ standDownForWorkerProcess &&
270
+ getConfig().memory.worker?.enabled === true
271
+ ) {
272
+ // The out-of-process worker owns the queue — stand the synchronous
273
+ // runner down so jobs aren't processed twice, and retract the marker.
274
+ if (syncRunnerMarked) {
275
+ removeSyncRunnerMarker();
276
+ syncRunnerMarked = false;
277
+ }
278
+ // Switching modes is a rare operator action, so poll at the slow cap
279
+ // while standing down: it still picks up a `memory worker stop` (which
280
+ // flips the flag back off) within one interval, without waking every
281
+ // couple seconds for the whole time the worker owns the queue.
282
+ currentIntervalMs = POLL_INTERVAL_MAX_MS;
283
+ return;
284
+ }
285
+ if (standDownForWorkerProcess && !syncRunnerMarked) {
286
+ // The flag is off — this in-process runner owns processing. Publish the
287
+ // marker so `memory worker status` reports the synchronous runner as
288
+ // going.
289
+ writeSyncRunnerMarker(process.pid);
290
+ syncRunnerMarked = true;
291
+ }
250
292
  const processed = await runMemoryJobsOnce({
251
293
  enableScheduledCleanup: true,
252
294
  });
@@ -295,6 +337,10 @@ export function startInProcessMemoryJobsWorker(): MemoryJobsWorker {
295
337
  stop(): void {
296
338
  stopped = true;
297
339
  clearTimeout(timer);
340
+ if (syncRunnerMarked) {
341
+ removeSyncRunnerMarker();
342
+ syncRunnerMarked = false;
343
+ }
298
344
  },
299
345
  };
300
346
  }
@@ -43,6 +43,7 @@ import {
43
43
  parseInterfaceId,
44
44
  } from "../channels/types.js";
45
45
  import type { AssistantConfig } from "../config/types.js";
46
+ import { getGuardianDelivery } from "../contacts/guardian-delivery-reader.js";
46
47
  import { extractTurnContextTimestamp } from "../context/compactor.js";
47
48
  import {
48
49
  formatLocalTimestamp,
@@ -290,6 +291,10 @@ export async function runForkBasedRetrospective(
290
291
  // parity — the fork always runs execution gate mode below, so the source's
291
292
  // full tool surface stays on the wire while the allowlist holds at
292
293
  // execution time.
294
+ // Warm the vellum guardian-delivery cache so the sync slug resolution inside
295
+ // resolveSourceParityPins (resolveUserSlug(undefined)) hits a fresh key
296
+ // instead of falling back to "default" on a cold/TTL-expired cache.
297
+ await getGuardianDelivery({ channelTypes: ["vellum"] });
293
298
  const { personaOverride, toolContextPin } = resolveSourceParityPins(
294
299
  sourceConversation,
295
300
  newMessages,
@@ -18,11 +18,30 @@ const WATERMARK_KEY = "migration_209_strip_thinking_watermark";
18
18
 
19
19
  /**
20
20
  * Number of `rowid` values swept per `runAsyncSqlite` dispatch. Each window is
21
- * one off-thread subprocess transaction, so the size bounds both the WAL growth
22
- * per statement and how long a single write lock is held, while keeping the
23
- * number of subprocess spawns low on a large table.
21
+ * one off-thread subprocess transaction. The size stays well below the row
22
+ * count of a typical `messages` table so the whole table is never swept in a
23
+ * single subprocess: a window must finish inside {@link WINDOW_TIMEOUT_MS} for
24
+ * the per-window watermark to advance, and only an advancing watermark lets an
25
+ * interrupted run resume from the last completed window instead of re-running
26
+ * the same window forever. A smaller window also bounds WAL growth per statement
27
+ * and how long a single write lock is held, at the cost of more (cheap)
28
+ * subprocess spawns.
29
+ *
30
+ * Exported for the regression test that asserts the table is swept across
31
+ * multiple bounded windows rather than one table-sized sweep.
32
+ */
33
+ export const ROWID_WINDOW = 2_000;
34
+
35
+ /**
36
+ * Per-window wall-clock cap for the sweep subprocess. Set well above the time a
37
+ * {@link ROWID_WINDOW}-sized window needs even on a multi-GB table with large
38
+ * content blobs, so it trips only on a genuinely stuck subprocess (e.g. one
39
+ * blocked on a stale write lock) rather than on legitimately slow progress.
40
+ * Far below `runAsyncSqlite`'s one-hour whole-process default so a stuck window
41
+ * surfaces in minutes and the runner retries from the last completed window on
42
+ * the next boot.
24
43
  */
25
- const ROWID_WINDOW = 100_000;
44
+ export const WINDOW_TIMEOUT_MS = 15 * 60 * 1000;
26
45
 
27
46
  /** SQL predicate: this `json_each` element is an internal reasoning block. */
28
47
  const IS_THINKING = `json_extract(value, '$.type') IN ('thinking', 'redacted_thinking')`;
@@ -127,7 +146,10 @@ export async function migrateStripThinkingFromConsolidated(
127
146
  while (lo < maxRow) {
128
147
  const hi = Math.min(lo + ROWID_WINDOW, maxRow);
129
148
 
130
- const res = await runAsyncSqlite(windowSql(lo, hi), { dbPath });
149
+ const res = await runAsyncSqlite(windowSql(lo, hi), {
150
+ dbPath,
151
+ timeoutMs: WINDOW_TIMEOUT_MS,
152
+ });
131
153
  if (!res.ok) {
132
154
  // Leave the watermark at the last completed window; throwing reports the
133
155
  // step failed so the runner retries it (from the watermark) next boot