@vellumai/assistant 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/docs/architecture/memory.md +105 -0
  2. package/package.json +1 -1
  3. package/src/__tests__/archive-recall.test.ts +560 -0
  4. package/src/__tests__/conversation-clear-safety.test.ts +259 -0
  5. package/src/__tests__/conversation-switch-memory-reduction.test.ts +474 -0
  6. package/src/__tests__/db-schedule-syntax-migration.test.ts +3 -0
  7. package/src/__tests__/memory-reducer-job.test.ts +538 -0
  8. package/src/__tests__/memory-reducer-scheduling.test.ts +473 -0
  9. package/src/__tests__/memory-reducer-types.test.ts +12 -4
  10. package/src/__tests__/memory-reducer.test.ts +7 -1
  11. package/src/__tests__/memory-regressions.test.ts +24 -4
  12. package/src/__tests__/memory-simplified-config.test.ts +4 -4
  13. package/src/__tests__/simplified-memory-e2e.test.ts +666 -0
  14. package/src/__tests__/simplified-memory-runtime.test.ts +616 -0
  15. package/src/cli/commands/conversations.ts +18 -0
  16. package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
  17. package/src/config/loader.ts +0 -1
  18. package/src/config/schemas/memory-simplified.ts +1 -1
  19. package/src/daemon/conversation-memory.ts +117 -0
  20. package/src/daemon/conversation-runtime-assembly.ts +1 -0
  21. package/src/daemon/handlers/conversations.ts +11 -0
  22. package/src/daemon/lifecycle.ts +44 -1
  23. package/src/memory/archive-recall.ts +516 -0
  24. package/src/memory/brief-time.ts +5 -4
  25. package/src/memory/conversation-crud.ts +210 -0
  26. package/src/memory/conversation-key-store.ts +33 -4
  27. package/src/memory/db-init.ts +4 -0
  28. package/src/memory/job-handlers/backfill-simplified-memory.ts +462 -0
  29. package/src/memory/job-handlers/conversation-starters.ts +9 -3
  30. package/src/memory/job-handlers/reduce-conversation-memory.ts +229 -0
  31. package/src/memory/jobs-store.ts +2 -0
  32. package/src/memory/jobs-worker.ts +8 -0
  33. package/src/memory/migrations/036-normalize-phone-identities.ts +49 -14
  34. package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +9 -1
  35. package/src/memory/migrations/141-rename-verification-table.ts +8 -0
  36. package/src/memory/migrations/142-rename-verification-session-id-column.ts +7 -2
  37. package/src/memory/migrations/174-rename-thread-starters-table.ts +8 -0
  38. package/src/memory/migrations/188-schedule-quiet-flag.ts +13 -0
  39. package/src/memory/migrations/index.ts +1 -0
  40. package/src/memory/reducer-scheduler.ts +242 -0
  41. package/src/memory/reducer-types.ts +9 -2
  42. package/src/memory/reducer.ts +25 -11
  43. package/src/memory/schema/infrastructure.ts +1 -0
  44. package/src/runtime/auth/route-policy.ts +10 -1
  45. package/src/runtime/routes/conversation-management-routes.ts +88 -2
  46. package/src/runtime/routes/guardian-bootstrap-routes.ts +19 -7
  47. package/src/runtime/routes/secret-routes.ts +1 -0
  48. package/src/schedule/schedule-store.ts +7 -0
  49. package/src/schedule/scheduler.ts +6 -2
  50. package/src/telemetry/usage-telemetry-reporter.ts +1 -1
  51. package/src/tools/filesystem/edit.ts +6 -1
  52. package/src/tools/filesystem/read.ts +6 -1
  53. package/src/tools/filesystem/write.ts +6 -1
  54. package/src/tools/memory/handlers.ts +129 -1
  55. package/src/tools/schedule/create.ts +3 -0
  56. package/src/tools/schedule/list.ts +5 -1
  57. package/src/tools/schedule/update.ts +6 -0
@@ -1,5 +1,8 @@
1
1
  import { getConfig } from "../config/loader.js";
2
2
  import { estimatePromptTokens } from "../context/token-estimator.js";
3
+ import { buildArchiveRecall } from "../memory/archive-recall.js";
4
+ import { compileMemoryBrief } from "../memory/brief.js";
5
+ import { getDb } from "../memory/db.js";
3
6
  import { buildMemoryQuery } from "../memory/query-builder.js";
4
7
  import { computeRecallBudget } from "../memory/retrieval-budget.js";
5
8
  import {
@@ -9,8 +12,11 @@ import {
9
12
  import type { ScopePolicyOverride } from "../memory/search/types.js";
10
13
  import type { Message } from "../providers/types.js";
11
14
  import type { Provider } from "../providers/types.js";
15
+ import { getLogger } from "../util/logger.js";
12
16
  import type { ServerMessage } from "./message-protocol.js";
13
17
 
18
+ const log = getLogger("conversation-memory");
19
+
14
20
  export interface MemoryRecallResult {
15
21
  runMessages: Message[];
16
22
  recall: Awaited<ReturnType<typeof buildMemoryRecall>>;
@@ -115,6 +121,14 @@ export async function prepareMemoryContext(
115
121
 
116
122
  const runtimeConfig = getConfig();
117
123
 
124
+ // ── Simplified memory path ──────────────────────────────────────────
125
+ // When `memory.simplified.enabled` is true, inject the brief and
126
+ // optional archive recall instead of the legacy hybrid pipeline.
127
+ if (runtimeConfig.memory?.simplified?.enabled) {
128
+ return prepareSimplifiedMemoryContext(ctx, content, userMessageId, onEvent);
129
+ }
130
+
131
+ // ── Legacy memory path (fallback) ──────────────────────────────────
118
132
  // Memory recall via the V2 hybrid pipeline
119
133
  const recallQuery = buildMemoryQuery(content, ctx.messages);
120
134
  const dynamicBudgetConfig = runtimeConfig.memory?.retrieval?.dynamicBudget;
@@ -207,3 +221,106 @@ export async function prepareMemoryContext(
207
221
  recall,
208
222
  };
209
223
  }
224
+
225
+ // ── Simplified memory injection ─────────────────────────────────────────
226
+
227
+ /**
228
+ * Build simplified memory context for a turn: compiles the `<memory_brief>`
229
+ * block and conditionally appends `<supporting_recall>` from the archive.
230
+ *
231
+ * Non-empty blocks are injected as text content blocks prepended to the
232
+ * last user message, following the same injection pattern as the legacy
233
+ * pipeline. Stripping is handled by `RUNTIME_INJECTION_PREFIXES` which
234
+ * already includes `<memory_brief>`.
235
+ */
236
+ function prepareSimplifiedMemoryContext(
237
+ ctx: MemoryPrepareContext,
238
+ content: string,
239
+ userMessageId: string,
240
+ onEvent: (msg: ServerMessage) => void,
241
+ ): MemoryRecallResult {
242
+ const start = Date.now();
243
+
244
+ // Build a no-op recall result matching the legacy shape.
245
+ const noopRecall = (): Awaited<ReturnType<typeof buildMemoryRecall>> =>
246
+ ({
247
+ enabled: true,
248
+ degraded: false,
249
+ injectedText: "",
250
+ semanticHits: 0,
251
+ recencyHits: 0,
252
+ mergedCount: 0,
253
+ selectedCount: 0,
254
+ injectedTokens: 0,
255
+ latencyMs: 0,
256
+ topCandidates: [],
257
+ tier1Count: 0,
258
+ tier2Count: 0,
259
+ }) as Awaited<ReturnType<typeof buildMemoryRecall>>;
260
+
261
+ try {
262
+ const db = getDb();
263
+
264
+ // Step 1: Build the memory brief
265
+ const briefResult = compileMemoryBrief(db, ctx.scopeId, userMessageId);
266
+
267
+ // Step 2: Conditionally build supporting recall from the archive
268
+ const archiveResult = buildArchiveRecall(ctx.scopeId, content);
269
+
270
+ // Step 3: Assemble the injection blocks (non-empty only)
271
+ const blocks: string[] = [];
272
+ if (briefResult.text.length > 0) {
273
+ blocks.push(briefResult.text);
274
+ }
275
+ if (archiveResult.text.length > 0) {
276
+ blocks.push(archiveResult.text);
277
+ }
278
+
279
+ const latencyMs = Date.now() - start;
280
+
281
+ // Emit memory status for the simplified path
282
+ onEvent({
283
+ type: "memory_status",
284
+ enabled: true,
285
+ degraded: false,
286
+ });
287
+
288
+ // Inject non-empty blocks into the last user message
289
+ let runMessages = ctx.messages;
290
+ if (blocks.length > 0) {
291
+ const injectedText = blocks.join("\n\n");
292
+ const userTail = ctx.messages[ctx.messages.length - 1];
293
+ if (userTail && userTail.role === "user") {
294
+ runMessages = injectMemoryRecallAsUserBlock(ctx.messages, injectedText);
295
+ }
296
+
297
+ log.debug(
298
+ {
299
+ briefLength: briefResult.text.length,
300
+ recallTrigger: archiveResult.trigger,
301
+ recallBullets: archiveResult.bullets.length,
302
+ latencyMs,
303
+ },
304
+ "Simplified memory injection completed",
305
+ );
306
+ }
307
+
308
+ return {
309
+ runMessages,
310
+ recall: {
311
+ ...noopRecall(),
312
+ injectedText: blocks.length > 0 ? blocks.join("\n\n") : "",
313
+ latencyMs,
314
+ },
315
+ };
316
+ } catch (err) {
317
+ log.warn({ err }, "Simplified memory injection failed, returning no-op");
318
+ return {
319
+ runMessages: ctx.messages,
320
+ recall: {
321
+ ...noopRecall(),
322
+ latencyMs: Date.now() - start,
323
+ },
324
+ };
325
+ }
326
+ }
@@ -962,6 +962,7 @@ const RUNTIME_INJECTION_PREFIXES = [
962
962
  "<interface_turn_context>",
963
963
  "<turn_context>",
964
964
  "<memory_brief>",
965
+ "<supporting_recall>",
965
966
  "<memory_context __injected>",
966
967
  "<memory_context>", // backward-compat: strip legacy blocks from pre-__injected history
967
968
  "<voice_call_control>",
@@ -23,6 +23,7 @@ import {
23
23
  queueGenerateConversationTitle,
24
24
  UNTITLED_FALLBACK,
25
25
  } from "../../memory/conversation-title-service.js";
26
+ import { reduceBeforeSwitch } from "../../memory/reducer-scheduler.js";
26
27
  import * as pendingInteractions from "../../runtime/pending-interactions.js";
27
28
  import { getSubagentManager } from "../../subagent/index.js";
28
29
  import { truncate } from "../../util/truncate.js";
@@ -233,6 +234,12 @@ export async function handleConversationCreate(
233
234
  conversationType: normalizeConversationType(conversation.conversationType),
234
235
  });
235
236
 
237
+ // Reduce the previous dirty conversation before processing the initial
238
+ // message so its memory is fresh for the next read.
239
+ if (msg.initialMessage) {
240
+ await reduceBeforeSwitch(conversation.id);
241
+ }
242
+
236
243
  // Auto-send the initial message if provided, kick-starting the skill.
237
244
  if (msg.initialMessage) {
238
245
  // Queue title generation eagerly — some processMessage paths (guardian
@@ -343,6 +350,10 @@ export async function switchConversation(
343
350
  return null;
344
351
  }
345
352
 
353
+ // Reduce the previous dirty conversation before switching so its memory
354
+ // is fresh for the next read.
355
+ await reduceBeforeSwitch(conversationId);
356
+
346
357
  // If the target conversation is headless-locked (actively executing a task run),
347
358
  // skip rebinding so tool confirmations stay suppressed.
348
359
  const existingConversation = ctx.conversations.get(conversationId);
@@ -30,6 +30,7 @@ import {
30
30
  getConversationType,
31
31
  getMessages,
32
32
  purgePrivateConversations,
33
+ sweepStaleReducerJobs,
33
34
  } from "../memory/conversation-crud.js";
34
35
  import { resolveConversationId } from "../memory/conversation-key-store.js";
35
36
  import { initializeDb } from "../memory/db.js";
@@ -206,16 +207,40 @@ export async function runDaemon(): Promise<void> {
206
207
  targetId: summaryId,
207
208
  });
208
209
  }
210
+ for (const obsId of deletedMemory.deletedObservationIds) {
211
+ enqueueMemoryJob("delete_qdrant_vectors", {
212
+ targetType: "observation",
213
+ targetId: obsId,
214
+ });
215
+ }
216
+ for (const chunkId of deletedMemory.deletedChunkIds) {
217
+ enqueueMemoryJob("delete_qdrant_vectors", {
218
+ targetType: "chunk",
219
+ targetId: chunkId,
220
+ });
221
+ }
222
+ for (const episodeId of deletedMemory.deletedEpisodeIds) {
223
+ enqueueMemoryJob("delete_qdrant_vectors", {
224
+ targetType: "episode",
225
+ targetId: episodeId,
226
+ });
227
+ }
209
228
  if (
210
229
  deletedMemory.segmentIds.length > 0 ||
211
230
  deletedMemory.orphanedItemIds.length > 0 ||
212
- deletedMemory.deletedSummaryIds.length > 0
231
+ deletedMemory.deletedSummaryIds.length > 0 ||
232
+ deletedMemory.deletedObservationIds.length > 0 ||
233
+ deletedMemory.deletedChunkIds.length > 0 ||
234
+ deletedMemory.deletedEpisodeIds.length > 0
213
235
  ) {
214
236
  log.info(
215
237
  {
216
238
  segments: deletedMemory.segmentIds.length,
217
239
  orphanedItems: deletedMemory.orphanedItemIds.length,
218
240
  deletedSummaries: deletedMemory.deletedSummaryIds.length,
241
+ deletedObservations: deletedMemory.deletedObservationIds.length,
242
+ deletedChunks: deletedMemory.deletedChunkIds.length,
243
+ deletedEpisodes: deletedMemory.deletedEpisodeIds.length,
219
244
  },
220
245
  "Enqueued Qdrant vector cleanup jobs for purged private conversations",
221
246
  );
@@ -240,6 +265,24 @@ export async function runDaemon(): Promise<void> {
240
265
  );
241
266
  }
242
267
 
268
+ // Sweep dirty conversations whose tail messages are already past the
269
+ // idle delay — they should have been reduced while the daemon was down.
270
+ // Enqueue immediate reducer jobs so the memory worker picks them up.
271
+ try {
272
+ const sweepCount = sweepStaleReducerJobs();
273
+ if (sweepCount > 0) {
274
+ log.info(
275
+ { sweepCount },
276
+ `Enqueued reducer jobs for ${sweepCount} stale dirty conversation(s)`,
277
+ );
278
+ }
279
+ } catch (err) {
280
+ log.warn(
281
+ { err },
282
+ "Startup sweep for stale reducer jobs failed — continuing startup",
283
+ );
284
+ }
285
+
243
286
  // Ensure a vellum guardian binding exists so the identity system works
244
287
  // without requiring a manual bootstrap step.
245
288
  try {