@vellumai/assistant 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/ARCHITECTURE.md +109 -0
  2. package/docs/architecture/memory.md +105 -0
  3. package/docs/skills.md +100 -0
  4. package/package.json +1 -1
  5. package/src/__tests__/archive-recall.test.ts +560 -0
  6. package/src/__tests__/conversation-agent-loop-overflow.test.ts +7 -0
  7. package/src/__tests__/conversation-agent-loop.test.ts +7 -0
  8. package/src/__tests__/conversation-clear-safety.test.ts +259 -0
  9. package/src/__tests__/conversation-memory-dirty-tail.test.ts +150 -0
  10. package/src/__tests__/conversation-provider-retry-repair.test.ts +7 -0
  11. package/src/__tests__/conversation-switch-memory-reduction.test.ts +474 -0
  12. package/src/__tests__/conversation-wipe.test.ts +226 -0
  13. package/src/__tests__/db-memory-archive-migration.test.ts +372 -0
  14. package/src/__tests__/db-memory-brief-state-migration.test.ts +213 -0
  15. package/src/__tests__/db-memory-reducer-checkpoints.test.ts +273 -0
  16. package/src/__tests__/db-schedule-syntax-migration.test.ts +3 -0
  17. package/src/__tests__/inline-command-runner.test.ts +311 -0
  18. package/src/__tests__/inline-skill-authoring-guard.test.ts +220 -0
  19. package/src/__tests__/inline-skill-load-permissions.test.ts +435 -0
  20. package/src/__tests__/list-messages-attachments.test.ts +96 -0
  21. package/src/__tests__/memory-brief-open-loops.test.ts +530 -0
  22. package/src/__tests__/memory-brief-time.test.ts +285 -0
  23. package/src/__tests__/memory-brief-wrapper.test.ts +311 -0
  24. package/src/__tests__/memory-chunk-archive.test.ts +400 -0
  25. package/src/__tests__/memory-chunk-dual-write.test.ts +453 -0
  26. package/src/__tests__/memory-episode-archive.test.ts +370 -0
  27. package/src/__tests__/memory-episode-dual-write.test.ts +626 -0
  28. package/src/__tests__/memory-observation-archive.test.ts +375 -0
  29. package/src/__tests__/memory-observation-dual-write.test.ts +318 -0
  30. package/src/__tests__/memory-recall-quality.test.ts +2 -2
  31. package/src/__tests__/memory-reducer-job.test.ts +538 -0
  32. package/src/__tests__/memory-reducer-scheduling.test.ts +473 -0
  33. package/src/__tests__/memory-reducer-store.test.ts +728 -0
  34. package/src/__tests__/memory-reducer-types.test.ts +707 -0
  35. package/src/__tests__/memory-reducer.test.ts +704 -0
  36. package/src/__tests__/memory-regressions.test.ts +30 -8
  37. package/src/__tests__/memory-simplified-config.test.ts +281 -0
  38. package/src/__tests__/parse-identity-fields.test.ts +129 -0
  39. package/src/__tests__/simplified-memory-e2e.test.ts +666 -0
  40. package/src/__tests__/simplified-memory-runtime.test.ts +616 -0
  41. package/src/__tests__/skill-load-inline-command.test.ts +598 -0
  42. package/src/__tests__/skill-load-inline-includes.test.ts +644 -0
  43. package/src/__tests__/skills-inline-command-expansions.test.ts +301 -0
  44. package/src/__tests__/skills-transitive-hash.test.ts +333 -0
  45. package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +320 -0
  46. package/src/__tests__/workspace-migration-backfill-installation-id.test.ts +4 -4
  47. package/src/cli/commands/conversations.ts +18 -0
  48. package/src/config/bundled-skills/app-builder/SKILL.md +8 -8
  49. package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
  50. package/src/config/bundled-skills/skill-management/SKILL.md +1 -1
  51. package/src/config/bundled-skills/skill-management/TOOLS.json +2 -2
  52. package/src/config/feature-flag-registry.json +16 -0
  53. package/src/config/raw-config-utils.ts +28 -0
  54. package/src/config/schema.ts +12 -0
  55. package/src/config/schemas/memory-simplified.ts +101 -0
  56. package/src/config/schemas/memory.ts +4 -0
  57. package/src/config/skills.ts +50 -4
  58. package/src/daemon/conversation-agent-loop-handlers.ts +8 -3
  59. package/src/daemon/conversation-agent-loop.ts +71 -1
  60. package/src/daemon/conversation-lifecycle.ts +11 -1
  61. package/src/daemon/conversation-memory.ts +117 -0
  62. package/src/daemon/conversation-runtime-assembly.ts +3 -1
  63. package/src/daemon/conversation-surfaces.ts +31 -8
  64. package/src/daemon/conversation.ts +40 -23
  65. package/src/daemon/handlers/config-embeddings.ts +10 -2
  66. package/src/daemon/handlers/config-model.ts +0 -9
  67. package/src/daemon/handlers/conversations.ts +11 -0
  68. package/src/daemon/handlers/identity.ts +12 -1
  69. package/src/daemon/lifecycle.ts +52 -1
  70. package/src/daemon/message-types/conversations.ts +0 -1
  71. package/src/daemon/server.ts +1 -1
  72. package/src/followups/followup-store.ts +47 -1
  73. package/src/memory/archive-recall.ts +516 -0
  74. package/src/memory/archive-store.ts +400 -0
  75. package/src/memory/brief-formatting.ts +33 -0
  76. package/src/memory/brief-open-loops.ts +266 -0
  77. package/src/memory/brief-time.ts +162 -0
  78. package/src/memory/brief.ts +75 -0
  79. package/src/memory/conversation-crud.ts +455 -101
  80. package/src/memory/conversation-key-store.ts +33 -4
  81. package/src/memory/db-init.ts +16 -0
  82. package/src/memory/indexer.ts +106 -15
  83. package/src/memory/job-handlers/backfill-simplified-memory.ts +462 -0
  84. package/src/memory/job-handlers/conversation-starters.ts +9 -3
  85. package/src/memory/job-handlers/embedding.test.ts +1 -0
  86. package/src/memory/job-handlers/embedding.ts +83 -0
  87. package/src/memory/job-handlers/reduce-conversation-memory.ts +229 -0
  88. package/src/memory/job-utils.ts +1 -1
  89. package/src/memory/jobs-store.ts +8 -0
  90. package/src/memory/jobs-worker.ts +20 -0
  91. package/src/memory/migrations/036-normalize-phone-identities.ts +49 -14
  92. package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +9 -1
  93. package/src/memory/migrations/141-rename-verification-table.ts +8 -0
  94. package/src/memory/migrations/142-rename-verification-session-id-column.ts +7 -2
  95. package/src/memory/migrations/174-rename-thread-starters-table.ts +8 -0
  96. package/src/memory/migrations/185-memory-brief-state.ts +52 -0
  97. package/src/memory/migrations/186-memory-archive.ts +109 -0
  98. package/src/memory/migrations/187-memory-reducer-checkpoints.ts +19 -0
  99. package/src/memory/migrations/188-schedule-quiet-flag.ts +13 -0
  100. package/src/memory/migrations/index.ts +4 -0
  101. package/src/memory/qdrant-client.ts +23 -4
  102. package/src/memory/reducer-scheduler.ts +242 -0
  103. package/src/memory/reducer-store.ts +271 -0
  104. package/src/memory/reducer-types.ts +106 -0
  105. package/src/memory/reducer.ts +467 -0
  106. package/src/memory/schema/conversations.ts +3 -0
  107. package/src/memory/schema/index.ts +2 -0
  108. package/src/memory/schema/infrastructure.ts +1 -0
  109. package/src/memory/schema/memory-archive.ts +121 -0
  110. package/src/memory/schema/memory-brief.ts +55 -0
  111. package/src/memory/search/semantic.ts +17 -4
  112. package/src/oauth/oauth-store.ts +3 -1
  113. package/src/permissions/checker.ts +89 -6
  114. package/src/permissions/defaults.ts +14 -0
  115. package/src/runtime/auth/route-policy.ts +10 -1
  116. package/src/runtime/routes/conversation-management-routes.ts +94 -2
  117. package/src/runtime/routes/conversation-query-routes.ts +7 -0
  118. package/src/runtime/routes/conversation-routes.ts +52 -5
  119. package/src/runtime/routes/guardian-bootstrap-routes.ts +19 -7
  120. package/src/runtime/routes/identity-routes.ts +2 -35
  121. package/src/runtime/routes/llm-context-normalization.ts +14 -1
  122. package/src/runtime/routes/memory-item-routes.ts +90 -5
  123. package/src/runtime/routes/secret-routes.ts +3 -0
  124. package/src/runtime/routes/surface-action-routes.ts +68 -1
  125. package/src/schedule/schedule-store.ts +28 -0
  126. package/src/schedule/scheduler.ts +6 -2
  127. package/src/skills/inline-command-expansions.ts +204 -0
  128. package/src/skills/inline-command-render.ts +127 -0
  129. package/src/skills/inline-command-runner.ts +242 -0
  130. package/src/skills/transitive-version-hash.ts +88 -0
  131. package/src/tasks/task-store.ts +43 -1
  132. package/src/telemetry/usage-telemetry-reporter.ts +1 -1
  133. package/src/tools/filesystem/edit.ts +6 -1
  134. package/src/tools/filesystem/read.ts +6 -1
  135. package/src/tools/filesystem/write.ts +6 -1
  136. package/src/tools/memory/handlers.ts +129 -1
  137. package/src/tools/permission-checker.ts +8 -1
  138. package/src/tools/schedule/create.ts +3 -0
  139. package/src/tools/schedule/list.ts +5 -1
  140. package/src/tools/schedule/update.ts +6 -0
  141. package/src/tools/skills/load.ts +140 -6
  142. package/src/util/platform.ts +18 -0
  143. package/src/workspace/migrations/{002-backfill-installation-id.ts → 011-backfill-installation-id.ts} +1 -1
  144. package/src/workspace/migrations/registry.ts +1 -1
@@ -10,6 +10,7 @@ import {
10
10
  MemorySummarizationConfigSchema,
11
11
  } from "./memory-processing.js";
12
12
  import { MemoryRetrievalConfigSchema } from "./memory-retrieval.js";
13
+ import { MemorySimplifiedConfigSchema } from "./memory-simplified.js";
13
14
  import {
14
15
  MemoryEmbeddingsConfigSchema,
15
16
  MemorySegmentationConfigSchema,
@@ -45,6 +46,9 @@ export const MemoryConfigSchema = z
45
46
  summarization: MemorySummarizationConfigSchema.default(
46
47
  MemorySummarizationConfigSchema.parse({}),
47
48
  ),
49
+ simplified: MemorySimplifiedConfigSchema.default(
50
+ MemorySimplifiedConfigSchema.parse({}),
51
+ ),
48
52
  })
49
53
  .describe(
50
54
  "Long-term memory system — stores, retrieves, and manages persistent knowledge across conversations",
@@ -25,10 +25,15 @@ import {
25
25
  userMessage,
26
26
  } from "../providers/provider-send-message.js";
27
27
  import { parseFrontmatterFields } from "../skills/frontmatter.js";
28
+ import type { InlineCommandExpansion } from "../skills/inline-command-expansions.js";
29
+ import { parseInlineCommandExpansions } from "../skills/inline-command-expansions.js";
28
30
  import { parseToolManifestFile } from "../skills/tool-manifest.js";
29
31
  import { computeSkillVersionHash } from "../skills/version-hash.js";
30
32
  import { getLogger } from "../util/logger.js";
31
- import { getWorkspaceSkillsDir } from "../util/platform.js";
33
+ import {
34
+ getWorkspaceDirDisplay,
35
+ getWorkspaceSkillsDir,
36
+ } from "../util/platform.js";
32
37
  import { isAssistantFeatureFlagEnabled } from "./assistant-feature-flags.js";
33
38
  import { getConfig } from "./loader.js";
34
39
 
@@ -80,6 +85,8 @@ export interface SkillSummary {
80
85
  activationHints?: string[];
81
86
  /** Conditions under which this skill should NOT be loaded. */
82
87
  avoidWhen?: string[];
88
+ /** Parsed inline command expansion descriptors (`!\`command\``) found in the skill body. */
89
+ inlineCommandExpansions?: InlineCommandExpansion[];
83
90
  }
84
91
 
85
92
  export interface SkillDefinition extends SkillSummary {
@@ -198,6 +205,7 @@ interface ParsedFrontmatter {
198
205
  featureFlag?: string;
199
206
  activationHints?: string[];
200
207
  avoidWhen?: string[];
208
+ inlineCommandExpansions?: InlineCommandExpansion[];
201
209
  }
202
210
 
203
211
  function normalizeStringArray(raw: unknown): string[] | undefined {
@@ -302,16 +310,29 @@ function parseFrontmatter(
302
310
  const activationHints = normalizeStringArray(vellum?.["activation-hints"]);
303
311
  const avoidWhen = normalizeStringArray(vellum?.["avoid-when"]);
304
312
 
313
+ const strippedBody = stripCommentLines(body);
314
+
315
+ // Parse inline command expansions from the body (after frontmatter/comment stripping)
316
+ const expansionResult = parseInlineCommandExpansions(strippedBody);
317
+ const inlineCommandExpansions =
318
+ expansionResult.expansions.length > 0
319
+ ? expansionResult.expansions
320
+ : undefined;
321
+
322
+ // Fail closed: if there are malformed tokens, log and exclude from parsed expansions
323
+ // (errors are already logged inside parseInlineCommandExpansions)
324
+
305
325
  return {
306
326
  name,
307
327
  displayName,
308
328
  description,
309
- body: stripCommentLines(body),
329
+ body: strippedBody,
310
330
  emoji,
311
331
  includes,
312
332
  featureFlag,
313
333
  activationHints,
314
334
  avoidWhen,
335
+ inlineCommandExpansions,
315
336
  };
316
337
  }
317
338
 
@@ -466,6 +487,7 @@ function readSkillFromDirectory(
466
487
  featureFlag: parsed.featureFlag,
467
488
  activationHints: parsed.activationHints,
468
489
  avoidWhen: parsed.avoidWhen,
490
+ inlineCommandExpansions: parsed.inlineCommandExpansions,
469
491
  };
470
492
  } catch (err) {
471
493
  log.warn({ err, skillFilePath }, "Failed to read skill file");
@@ -516,6 +538,7 @@ function readBundledSkillFromDirectory(
516
538
  featureFlag: parsed.featureFlag,
517
539
  activationHints: parsed.activationHints,
518
540
  avoidWhen: parsed.avoidWhen,
541
+ inlineCommandExpansions: parsed.inlineCommandExpansions,
519
542
  };
520
543
  } catch (err) {
521
544
  log.warn({ err, skillFilePath }, "Failed to read bundled skill file");
@@ -574,6 +597,7 @@ function loadBundledSkills(): SkillSummary[] {
574
597
  featureFlag: skill.featureFlag,
575
598
  activationHints: skill.activationHints,
576
599
  avoidWhen: skill.avoidWhen,
600
+ inlineCommandExpansions: skill.inlineCommandExpansions,
577
601
  });
578
602
  }
579
603
 
@@ -710,6 +734,7 @@ function skillSummaryFromDefinition(
710
734
  featureFlag: skill.featureFlag,
711
735
  activationHints: skill.activationHints,
712
736
  avoidWhen: skill.avoidWhen,
737
+ inlineCommandExpansions: skill.inlineCommandExpansions,
713
738
  };
714
739
  }
715
740
 
@@ -760,6 +785,7 @@ export function loadSkillCatalog(
760
785
  toolManifest: detectToolManifest(directory),
761
786
  includes: parsed.includes,
762
787
  featureFlag: parsed.featureFlag,
788
+ inlineCommandExpansions: parsed.inlineCommandExpansions,
763
789
  });
764
790
  } catch (err) {
765
791
  log.warn({ err, directory }, "Failed to read skill from extraDirs");
@@ -854,6 +880,7 @@ export function loadSkillCatalog(
854
880
  toolManifest: detectToolManifest(directory),
855
881
  includes: parsed.includes,
856
882
  featureFlag: parsed.featureFlag,
883
+ inlineCommandExpansions: parsed.inlineCommandExpansions,
857
884
  };
858
885
 
859
886
  if (seenIds.has(id)) {
@@ -1001,8 +1028,28 @@ function loadSkillDefinition(skill: SkillSummary): SkillLookupResult {
1001
1028
  }
1002
1029
  // Replace {baseDir} placeholders with the actual skill directory path
1003
1030
  loaded.body = loaded.body.replaceAll("{baseDir}", loaded.directoryPath);
1031
+ // Replace {workspaceDir} placeholders with the runtime workspace display path
1032
+ loaded.body = loaded.body.replaceAll(
1033
+ "{workspaceDir}",
1034
+ getWorkspaceDirDisplay(),
1035
+ );
1004
1036
  // Strip feature-gated sections based on assistant feature flags
1005
1037
  loaded.body = applyFeatureGatedSections(loaded.body);
1038
+
1039
+ // Re-parse inline command expansions after placeholder substitution.
1040
+ // The initial parse (during SKILL.md parsing) produces byte offsets against
1041
+ // the pre-substitution body. Since {baseDir} and {workspaceDir} replacements
1042
+ // change the body length, those offsets become stale. Re-parsing ensures the
1043
+ // offsets match the final body that renderInlineCommands will operate on.
1044
+ if (
1045
+ loaded.inlineCommandExpansions &&
1046
+ loaded.inlineCommandExpansions.length > 0
1047
+ ) {
1048
+ const reparse = parseInlineCommandExpansions(loaded.body);
1049
+ loaded.inlineCommandExpansions =
1050
+ reparse.expansions.length > 0 ? reparse.expansions : undefined;
1051
+ }
1052
+
1006
1053
  return { skill: loaded };
1007
1054
  }
1008
1055
 
@@ -1021,8 +1068,7 @@ export function resolveSkillSelector(
1021
1068
  const catalog = loadSkillCatalog(workspaceSkillsDir);
1022
1069
  if (catalog.length === 0) {
1023
1070
  return {
1024
- error:
1025
- "No skills are available. Configure ~/.vellum/workspace/skills/SKILLS.md or add skill directories.",
1071
+ error: `No skills are available. Configure ${getWorkspaceDirDisplay()}/skills/SKILLS.md or add skill directories.`,
1026
1072
  errorCode: "empty_catalog",
1027
1073
  };
1028
1074
  }
@@ -279,7 +279,12 @@ export function handleToolUse(
279
279
  state.toolCallTimestamps.set(event.id, { startedAt: Date.now() });
280
280
  state.currentToolUseId = event.id;
281
281
  state.currentTurnToolUseIds.push(event.id);
282
- const statusText = `Running ${friendlyToolName(event.name)}`;
282
+ const statusText =
283
+ event.name === "skill_execute" &&
284
+ typeof event.input.activity === "string" &&
285
+ event.input.activity.length > 0
286
+ ? event.input.activity
287
+ : `Running ${friendlyToolName(event.name)}`;
283
288
  deps.ctx.emitActivityState(
284
289
  "tool_running",
285
290
  "tool_use_start",
@@ -398,8 +403,8 @@ export function handleInputJsonDelta(
398
403
  event: Extract<AgentEvent, { type: "input_json_delta" }>,
399
404
  ): void {
400
405
  // Only forward input deltas for app tools — the client only uses this
401
- // stream for app_create/app_update code previews. Non-app tools would
402
- // send large cumulative JSON on every delta with no benefit.
406
+ // stream for app_create code previews. Non-app tools would send large
407
+ // cumulative JSON on every delta with no benefit.
403
408
  if (!APP_TOOL_NAMES.has(event.toolName)) return;
404
409
  deps.onEvent({
405
410
  type: "tool_input_delta",
@@ -33,6 +33,7 @@ import {
33
33
  } from "../instrument.js";
34
34
  import { commitAppTurnChanges } from "../memory/app-git-service.js";
35
35
  import { getApp, listAppFiles, resolveAppDir } from "../memory/app-store.js";
36
+ import { insertCompactionEpisode } from "../memory/archive-store.js";
36
37
  import {
37
38
  addMessage,
38
39
  deleteMessageById,
@@ -208,7 +209,17 @@ export interface AgentLoopConversationContext {
208
209
  currentPage?: string;
209
210
  readonly surfaceState: Map<
210
211
  string,
211
- { surfaceType: SurfaceType; data: SurfaceData; title?: string }
212
+ {
213
+ surfaceType: SurfaceType;
214
+ data: SurfaceData;
215
+ title?: string;
216
+ actions?: Array<{
217
+ id: string;
218
+ label: string;
219
+ style?: string;
220
+ data?: Record<string, unknown>;
221
+ }>;
222
+ }
212
223
  >;
213
224
  pendingSurfaceActions: Map<string, { surfaceType: SurfaceType }>;
214
225
  surfaceActionRequestIds: Set<string>;
@@ -503,6 +514,12 @@ export async function runAgentLoopImpl(
503
514
  compacted.summaryText,
504
515
  ctx.contextCompactedMessageCount,
505
516
  );
517
+ dualWriteCompactionEpisode(
518
+ ctx.conversationId,
519
+ ctx.memoryPolicy.scopeId,
520
+ compacted.summaryText,
521
+ compacted.summaryOutputTokens,
522
+ );
506
523
  onEvent({
507
524
  type: "context_compacted",
508
525
  previousEstimatedInputTokens: compacted.previousEstimatedInputTokens,
@@ -770,6 +787,12 @@ export async function runAgentLoopImpl(
770
787
  step.compactionResult.summaryText,
771
788
  ctx.contextCompactedMessageCount,
772
789
  );
790
+ dualWriteCompactionEpisode(
791
+ ctx.conversationId,
792
+ ctx.memoryPolicy.scopeId,
793
+ step.compactionResult.summaryText,
794
+ step.compactionResult.summaryOutputTokens,
795
+ );
773
796
  onEvent({
774
797
  type: "context_compacted",
775
798
  previousEstimatedInputTokens:
@@ -954,6 +977,12 @@ export async function runAgentLoopImpl(
954
977
  midLoopCompact.summaryText,
955
978
  ctx.contextCompactedMessageCount,
956
979
  );
980
+ dualWriteCompactionEpisode(
981
+ ctx.conversationId,
982
+ ctx.memoryPolicy.scopeId,
983
+ midLoopCompact.summaryText,
984
+ midLoopCompact.summaryOutputTokens,
985
+ );
957
986
  onEvent({
958
987
  type: "context_compacted",
959
988
  previousEstimatedInputTokens:
@@ -1150,6 +1179,12 @@ export async function runAgentLoopImpl(
1150
1179
  step.compactionResult.summaryText,
1151
1180
  ctx.contextCompactedMessageCount,
1152
1181
  );
1182
+ dualWriteCompactionEpisode(
1183
+ ctx.conversationId,
1184
+ ctx.memoryPolicy.scopeId,
1185
+ step.compactionResult.summaryText,
1186
+ step.compactionResult.summaryOutputTokens,
1187
+ );
1153
1188
  onEvent({
1154
1189
  type: "context_compacted",
1155
1190
  previousEstimatedInputTokens:
@@ -1257,6 +1292,12 @@ export async function runAgentLoopImpl(
1257
1292
  emergencyCompact.summaryText,
1258
1293
  ctx.contextCompactedMessageCount,
1259
1294
  );
1295
+ dualWriteCompactionEpisode(
1296
+ ctx.conversationId,
1297
+ ctx.memoryPolicy.scopeId,
1298
+ emergencyCompact.summaryText,
1299
+ emergencyCompact.summaryOutputTokens,
1300
+ );
1260
1301
  onEvent({
1261
1302
  type: "context_compacted",
1262
1303
  previousEstimatedInputTokens:
@@ -1361,6 +1402,12 @@ export async function runAgentLoopImpl(
1361
1402
  emergencyCompact.summaryText,
1362
1403
  ctx.contextCompactedMessageCount,
1363
1404
  );
1405
+ dualWriteCompactionEpisode(
1406
+ ctx.conversationId,
1407
+ ctx.memoryPolicy.scopeId,
1408
+ emergencyCompact.summaryText,
1409
+ emergencyCompact.summaryOutputTokens,
1410
+ );
1364
1411
  onEvent({
1365
1412
  type: "context_compacted",
1366
1413
  previousEstimatedInputTokens:
@@ -1826,3 +1873,26 @@ function collapseRawResponses(rawResponses?: unknown[]): unknown | undefined {
1826
1873
  if (!rawResponses || rawResponses.length === 0) return undefined;
1827
1874
  return rawResponses.length === 1 ? rawResponses[0] : rawResponses;
1828
1875
  }
1876
+
1877
+ /**
1878
+ * Dual-write a compaction summary as an archive episode so it becomes
1879
+ * searchable via vector recall. Called after each successful compaction
1880
+ * that produces a new summary.
1881
+ */
1882
+ function dualWriteCompactionEpisode(
1883
+ conversationId: string,
1884
+ scopeId: string,
1885
+ summaryText: string,
1886
+ summaryOutputTokens: number,
1887
+ ): void {
1888
+ const now = Date.now();
1889
+ insertCompactionEpisode({
1890
+ conversationId,
1891
+ scopeId,
1892
+ title: truncate(summaryText, 120, ""),
1893
+ summary: summaryText,
1894
+ tokenEstimate: summaryOutputTokens,
1895
+ startAt: now,
1896
+ endAt: now,
1897
+ });
1898
+ }
@@ -123,7 +123,17 @@ export interface AbortContext {
123
123
  surfaceActionRequestIds: Set<string>;
124
124
  surfaceState: Map<
125
125
  string,
126
- { surfaceType: SurfaceType; data: SurfaceData; title?: string }
126
+ {
127
+ surfaceType: SurfaceType;
128
+ data: SurfaceData;
129
+ title?: string;
130
+ actions?: Array<{
131
+ id: string;
132
+ label: string;
133
+ style?: string;
134
+ data?: Record<string, unknown>;
135
+ }>;
136
+ }
127
137
  >;
128
138
  accumulatedSurfaceState: Map<string, Record<string, unknown>>;
129
139
  readonly queue: MessageQueue;
@@ -1,5 +1,8 @@
1
1
  import { getConfig } from "../config/loader.js";
2
2
  import { estimatePromptTokens } from "../context/token-estimator.js";
3
+ import { buildArchiveRecall } from "../memory/archive-recall.js";
4
+ import { compileMemoryBrief } from "../memory/brief.js";
5
+ import { getDb } from "../memory/db.js";
3
6
  import { buildMemoryQuery } from "../memory/query-builder.js";
4
7
  import { computeRecallBudget } from "../memory/retrieval-budget.js";
5
8
  import {
@@ -9,8 +12,11 @@ import {
9
12
  import type { ScopePolicyOverride } from "../memory/search/types.js";
10
13
  import type { Message } from "../providers/types.js";
11
14
  import type { Provider } from "../providers/types.js";
15
+ import { getLogger } from "../util/logger.js";
12
16
  import type { ServerMessage } from "./message-protocol.js";
13
17
 
18
+ const log = getLogger("conversation-memory");
19
+
14
20
  export interface MemoryRecallResult {
15
21
  runMessages: Message[];
16
22
  recall: Awaited<ReturnType<typeof buildMemoryRecall>>;
@@ -115,6 +121,14 @@ export async function prepareMemoryContext(
115
121
 
116
122
  const runtimeConfig = getConfig();
117
123
 
124
+ // ── Simplified memory path ──────────────────────────────────────────
125
+ // When `memory.simplified.enabled` is true, inject the brief and
126
+ // optional archive recall instead of the legacy hybrid pipeline.
127
+ if (runtimeConfig.memory?.simplified?.enabled) {
128
+ return prepareSimplifiedMemoryContext(ctx, content, userMessageId, onEvent);
129
+ }
130
+
131
+ // ── Legacy memory path (fallback) ──────────────────────────────────
118
132
  // Memory recall via the V2 hybrid pipeline
119
133
  const recallQuery = buildMemoryQuery(content, ctx.messages);
120
134
  const dynamicBudgetConfig = runtimeConfig.memory?.retrieval?.dynamicBudget;
@@ -207,3 +221,106 @@ export async function prepareMemoryContext(
207
221
  recall,
208
222
  };
209
223
  }
224
+
225
+ // ── Simplified memory injection ─────────────────────────────────────────
226
+
227
+ /**
228
+ * Build simplified memory context for a turn: compiles the `<memory_brief>`
229
+ * block and conditionally appends `<supporting_recall>` from the archive.
230
+ *
231
+ * Non-empty blocks are injected as text content blocks prepended to the
232
+ * last user message, following the same injection pattern as the legacy
233
+ * pipeline. Stripping is handled by `RUNTIME_INJECTION_PREFIXES` which
234
+ * already includes `<memory_brief>`.
235
+ */
236
+ function prepareSimplifiedMemoryContext(
237
+ ctx: MemoryPrepareContext,
238
+ content: string,
239
+ userMessageId: string,
240
+ onEvent: (msg: ServerMessage) => void,
241
+ ): MemoryRecallResult {
242
+ const start = Date.now();
243
+
244
+ // Build a no-op recall result matching the legacy shape.
245
+ const noopRecall = (): Awaited<ReturnType<typeof buildMemoryRecall>> =>
246
+ ({
247
+ enabled: true,
248
+ degraded: false,
249
+ injectedText: "",
250
+ semanticHits: 0,
251
+ recencyHits: 0,
252
+ mergedCount: 0,
253
+ selectedCount: 0,
254
+ injectedTokens: 0,
255
+ latencyMs: 0,
256
+ topCandidates: [],
257
+ tier1Count: 0,
258
+ tier2Count: 0,
259
+ }) as Awaited<ReturnType<typeof buildMemoryRecall>>;
260
+
261
+ try {
262
+ const db = getDb();
263
+
264
+ // Step 1: Build the memory brief
265
+ const briefResult = compileMemoryBrief(db, ctx.scopeId, userMessageId);
266
+
267
+ // Step 2: Conditionally build supporting recall from the archive
268
+ const archiveResult = buildArchiveRecall(ctx.scopeId, content);
269
+
270
+ // Step 3: Assemble the injection blocks (non-empty only)
271
+ const blocks: string[] = [];
272
+ if (briefResult.text.length > 0) {
273
+ blocks.push(briefResult.text);
274
+ }
275
+ if (archiveResult.text.length > 0) {
276
+ blocks.push(archiveResult.text);
277
+ }
278
+
279
+ const latencyMs = Date.now() - start;
280
+
281
+ // Emit memory status for the simplified path
282
+ onEvent({
283
+ type: "memory_status",
284
+ enabled: true,
285
+ degraded: false,
286
+ });
287
+
288
+ // Inject non-empty blocks into the last user message
289
+ let runMessages = ctx.messages;
290
+ if (blocks.length > 0) {
291
+ const injectedText = blocks.join("\n\n");
292
+ const userTail = ctx.messages[ctx.messages.length - 1];
293
+ if (userTail && userTail.role === "user") {
294
+ runMessages = injectMemoryRecallAsUserBlock(ctx.messages, injectedText);
295
+ }
296
+
297
+ log.debug(
298
+ {
299
+ briefLength: briefResult.text.length,
300
+ recallTrigger: archiveResult.trigger,
301
+ recallBullets: archiveResult.bullets.length,
302
+ latencyMs,
303
+ },
304
+ "Simplified memory injection completed",
305
+ );
306
+ }
307
+
308
+ return {
309
+ runMessages,
310
+ recall: {
311
+ ...noopRecall(),
312
+ injectedText: blocks.length > 0 ? blocks.join("\n\n") : "",
313
+ latencyMs,
314
+ },
315
+ };
316
+ } catch (err) {
317
+ log.warn({ err }, "Simplified memory injection failed, returning no-op");
318
+ return {
319
+ runMessages: ctx.messages,
320
+ recall: {
321
+ ...noopRecall(),
322
+ latencyMs: Date.now() - start,
323
+ },
324
+ };
325
+ }
326
+ }
@@ -306,7 +306,7 @@ export function injectActiveSurfaceContext(
306
306
  'PREREQUISITE: If `app_refresh` is not yet available, call `skill_load` with `id: "app-builder"` first to load it.',
307
307
  "",
308
308
  "RULES FOR WORKSPACE MODIFICATION:",
309
- `1. Use \`file_edit\` to make surgical changes to app files. The file path is \`~/.vellum/workspace/data/apps/${slug}/<path>\`.`,
309
+ `1. Use \`file_edit\` to make surgical changes to app files. The file path is \`${getAppDirPath(ctx.appId)}/<path>\`.`,
310
310
  "2. Use `file_write` to create new files or rewrite files.",
311
311
  "3. Use `file_read` to read any file with line numbers before editing.",
312
312
  "4. Use `bash ls` to see all files in the app directory.",
@@ -961,6 +961,8 @@ const RUNTIME_INJECTION_PREFIXES = [
961
961
  "<inbound_actor_context>",
962
962
  "<interface_turn_context>",
963
963
  "<turn_context>",
964
+ "<memory_brief>",
965
+ "<supporting_recall>",
964
966
  "<memory_context __injected>",
965
967
  "<memory_context>", // backward-compat: strip legacy blocks from pre-__injected history
966
968
  "<voice_call_control>",
@@ -172,6 +172,7 @@ export interface SurfaceConversationContext {
172
172
  emit(type: string, message: string, meta?: Record<string, unknown>): void;
173
173
  };
174
174
  sendToClient(msg: ServerMessage): void;
175
+ broadcastToAllClients?(msg: ServerMessage): void;
175
176
  pendingSurfaceActions: Map<string, { surfaceType: SurfaceType }>;
176
177
  lastSurfaceAction: Map<
177
178
  string,
@@ -617,10 +618,18 @@ export function handleSurfaceAction(
617
618
  const prompt =
618
619
  isRelay && typeof data?.prompt === "string" ? data.prompt.trim() : "";
619
620
 
621
+ // Read accumulated state once — used by both relay and custom action paths.
622
+ const accState = ctx.accumulatedSurfaceState.get(surfaceId);
623
+ const hasAccState = accState && Object.keys(accState).length > 0;
624
+
620
625
  let content: string;
621
626
  let displayContent: string | undefined;
622
627
  if (prompt) {
623
628
  content = prompt;
629
+ // Re-append accumulated state so the LLM sees it, matching the pending path.
630
+ if (hasAccState) {
631
+ content += `\n\nAccumulated surface state: ${JSON.stringify(accState)}`;
632
+ }
624
633
  } else {
625
634
  // Custom action from an app (e.g. sendAction('answer_selected', {...}))
626
635
  const summary = actionId
@@ -630,17 +639,20 @@ export function handleSurfaceAction(
630
639
  if (data && Object.keys(data).length > 0) {
631
640
  content += `\n\nAction data: ${JSON.stringify(data)}`;
632
641
  }
633
- const accState = ctx.accumulatedSurfaceState.get(surfaceId);
634
- if (accState && Object.keys(accState).length > 0) {
642
+ if (hasAccState) {
635
643
  content += `\n\nAccumulated surface state: ${JSON.stringify(accState)}`;
636
- ctx.accumulatedSurfaceState.delete(surfaceId);
637
644
  }
638
645
  displayContent = summary;
639
646
  }
640
647
 
641
648
  const requestId = uuid();
642
649
  ctx.surfaceActionRequestIds.add(requestId);
643
- const onEvent = (msg: ServerMessage) => ctx.sendToClient(msg);
650
+ // Use broadcastToAllClients (publishes to the SSE event hub) instead of
651
+ // sendToClient, which is reset to a no-op between HTTP requests. Without
652
+ // this, surface action responses are persisted to DB but never reach the
653
+ // client's SSE stream.
654
+ const emit = ctx.broadcastToAllClients ?? ctx.sendToClient.bind(ctx);
655
+ const onEvent = (msg: ServerMessage) => emit(msg);
644
656
 
645
657
  ctx.traceEmitter.emit("request_received", "Surface action received", {
646
658
  requestId,
@@ -665,10 +677,16 @@ export function handleSurfaceAction(
665
677
  return;
666
678
  }
667
679
 
680
+ // One-shot: clear accumulated state now that the message has been accepted.
681
+ // Deferred until after rejection check so state is preserved for retry on rejection.
682
+ if (hasAccState) {
683
+ ctx.accumulatedSurfaceState.delete(surfaceId);
684
+ }
685
+
668
686
  // Echo the prompt to the client so it appears in the chat UI.
669
687
  // Deferred until after rejection check to avoid ghost messages.
670
688
  if (prompt) {
671
- ctx.sendToClient({
689
+ emit({
672
690
  type: "user_message_echo",
673
691
  text: prompt,
674
692
  conversationId: ctx.conversationId,
@@ -768,11 +786,16 @@ export function handleSurfaceAction(
768
786
  surfaceData,
769
787
  );
770
788
 
789
+ // Use broadcastToAllClients so events reach the SSE hub — sendToClient is
790
+ // reset to a no-op between HTTP requests (see history-restored path for
791
+ // full rationale).
792
+ const emit = ctx.broadcastToAllClients ?? ctx.sendToClient.bind(ctx);
793
+
771
794
  // Forms are one-shot surfaces — auto-complete immediately so the client
772
795
  // transitions from the "Submitting…" spinner to a completion chip without
773
796
  // requiring the LLM to call ui_dismiss.
774
797
  if (pending.surfaceType === "form") {
775
- ctx.sendToClient({
798
+ emit({
776
799
  type: "ui_surface_complete",
777
800
  conversationId: ctx.conversationId,
778
801
  surfaceId,
@@ -833,7 +856,7 @@ export function handleSurfaceAction(
833
856
 
834
857
  const requestId = uuid();
835
858
  ctx.surfaceActionRequestIds.add(requestId);
836
- const onEvent = (msg: ServerMessage) => ctx.sendToClient(msg);
859
+ const onEvent = (msg: ServerMessage) => emit(msg);
837
860
 
838
861
  ctx.traceEmitter.emit("request_received", "Surface action received", {
839
862
  requestId,
@@ -866,7 +889,7 @@ export function handleSurfaceAction(
866
889
  // Echo the user's prompt to the client so it appears in the chat UI.
867
890
  // Deferred until after rejection check to avoid ghost messages.
868
891
  if (shouldRelayPrompt && prompt) {
869
- ctx.sendToClient({
892
+ emit({
870
893
  type: "user_message_echo",
871
894
  text: prompt,
872
895
  conversationId: ctx.conversationId,