@vellumai/assistant 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/architecture/memory.md +105 -0
- package/package.json +1 -1
- package/src/__tests__/archive-recall.test.ts +560 -0
- package/src/__tests__/conversation-clear-safety.test.ts +259 -0
- package/src/__tests__/conversation-switch-memory-reduction.test.ts +474 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +3 -0
- package/src/__tests__/memory-reducer-job.test.ts +538 -0
- package/src/__tests__/memory-reducer-scheduling.test.ts +473 -0
- package/src/__tests__/memory-reducer-types.test.ts +12 -4
- package/src/__tests__/memory-reducer.test.ts +7 -1
- package/src/__tests__/memory-regressions.test.ts +24 -4
- package/src/__tests__/memory-simplified-config.test.ts +4 -4
- package/src/__tests__/simplified-memory-e2e.test.ts +666 -0
- package/src/__tests__/simplified-memory-runtime.test.ts +616 -0
- package/src/cli/commands/conversations.ts +18 -0
- package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
- package/src/config/loader.ts +0 -1
- package/src/config/schemas/memory-simplified.ts +1 -1
- package/src/daemon/conversation-memory.ts +117 -0
- package/src/daemon/conversation-runtime-assembly.ts +1 -0
- package/src/daemon/handlers/conversations.ts +11 -0
- package/src/daemon/lifecycle.ts +44 -1
- package/src/memory/archive-recall.ts +516 -0
- package/src/memory/brief-time.ts +5 -4
- package/src/memory/conversation-crud.ts +210 -0
- package/src/memory/conversation-key-store.ts +33 -4
- package/src/memory/db-init.ts +4 -0
- package/src/memory/job-handlers/backfill-simplified-memory.ts +462 -0
- package/src/memory/job-handlers/conversation-starters.ts +9 -3
- package/src/memory/job-handlers/reduce-conversation-memory.ts +229 -0
- package/src/memory/jobs-store.ts +2 -0
- package/src/memory/jobs-worker.ts +8 -0
- package/src/memory/migrations/036-normalize-phone-identities.ts +49 -14
- package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +9 -1
- package/src/memory/migrations/141-rename-verification-table.ts +8 -0
- package/src/memory/migrations/142-rename-verification-session-id-column.ts +7 -2
- package/src/memory/migrations/174-rename-thread-starters-table.ts +8 -0
- package/src/memory/migrations/188-schedule-quiet-flag.ts +13 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/reducer-scheduler.ts +242 -0
- package/src/memory/reducer-types.ts +9 -2
- package/src/memory/reducer.ts +25 -11
- package/src/memory/schema/infrastructure.ts +1 -0
- package/src/runtime/auth/route-policy.ts +10 -1
- package/src/runtime/routes/conversation-management-routes.ts +88 -2
- package/src/runtime/routes/guardian-bootstrap-routes.ts +19 -7
- package/src/runtime/routes/secret-routes.ts +1 -0
- package/src/schedule/schedule-store.ts +7 -0
- package/src/schedule/scheduler.ts +6 -2
- package/src/telemetry/usage-telemetry-reporter.ts +1 -1
- package/src/tools/filesystem/edit.ts +6 -1
- package/src/tools/filesystem/read.ts +6 -1
- package/src/tools/filesystem/write.ts +6 -1
- package/src/tools/memory/handlers.ts +129 -1
- package/src/tools/schedule/create.ts +3 -0
- package/src/tools/schedule/list.ts +5 -1
- package/src/tools/schedule/update.ts +6 -0
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { getConfig } from "../config/loader.js";
|
|
2
2
|
import { estimatePromptTokens } from "../context/token-estimator.js";
|
|
3
|
+
import { buildArchiveRecall } from "../memory/archive-recall.js";
|
|
4
|
+
import { compileMemoryBrief } from "../memory/brief.js";
|
|
5
|
+
import { getDb } from "../memory/db.js";
|
|
3
6
|
import { buildMemoryQuery } from "../memory/query-builder.js";
|
|
4
7
|
import { computeRecallBudget } from "../memory/retrieval-budget.js";
|
|
5
8
|
import {
|
|
@@ -9,8 +12,11 @@ import {
|
|
|
9
12
|
import type { ScopePolicyOverride } from "../memory/search/types.js";
|
|
10
13
|
import type { Message } from "../providers/types.js";
|
|
11
14
|
import type { Provider } from "../providers/types.js";
|
|
15
|
+
import { getLogger } from "../util/logger.js";
|
|
12
16
|
import type { ServerMessage } from "./message-protocol.js";
|
|
13
17
|
|
|
18
|
+
const log = getLogger("conversation-memory");
|
|
19
|
+
|
|
14
20
|
export interface MemoryRecallResult {
|
|
15
21
|
runMessages: Message[];
|
|
16
22
|
recall: Awaited<ReturnType<typeof buildMemoryRecall>>;
|
|
@@ -115,6 +121,14 @@ export async function prepareMemoryContext(
|
|
|
115
121
|
|
|
116
122
|
const runtimeConfig = getConfig();
|
|
117
123
|
|
|
124
|
+
// ── Simplified memory path ──────────────────────────────────────────
|
|
125
|
+
// When `memory.simplified.enabled` is true, inject the brief and
|
|
126
|
+
// optional archive recall instead of the legacy hybrid pipeline.
|
|
127
|
+
if (runtimeConfig.memory?.simplified?.enabled) {
|
|
128
|
+
return prepareSimplifiedMemoryContext(ctx, content, userMessageId, onEvent);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// ── Legacy memory path (fallback) ──────────────────────────────────
|
|
118
132
|
// Memory recall via the V2 hybrid pipeline
|
|
119
133
|
const recallQuery = buildMemoryQuery(content, ctx.messages);
|
|
120
134
|
const dynamicBudgetConfig = runtimeConfig.memory?.retrieval?.dynamicBudget;
|
|
@@ -207,3 +221,106 @@ export async function prepareMemoryContext(
|
|
|
207
221
|
recall,
|
|
208
222
|
};
|
|
209
223
|
}
|
|
224
|
+
|
|
225
|
+
// ── Simplified memory injection ─────────────────────────────────────────
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Build simplified memory context for a turn: compiles the `<memory_brief>`
|
|
229
|
+
* block and conditionally appends `<supporting_recall>` from the archive.
|
|
230
|
+
*
|
|
231
|
+
* Non-empty blocks are injected as text content blocks prepended to the
|
|
232
|
+
* last user message, following the same injection pattern as the legacy
|
|
233
|
+
* pipeline. Stripping is handled by `RUNTIME_INJECTION_PREFIXES` which
|
|
234
|
+
* already includes `<memory_brief>`.
|
|
235
|
+
*/
|
|
236
|
+
function prepareSimplifiedMemoryContext(
|
|
237
|
+
ctx: MemoryPrepareContext,
|
|
238
|
+
content: string,
|
|
239
|
+
userMessageId: string,
|
|
240
|
+
onEvent: (msg: ServerMessage) => void,
|
|
241
|
+
): MemoryRecallResult {
|
|
242
|
+
const start = Date.now();
|
|
243
|
+
|
|
244
|
+
// Build a no-op recall result matching the legacy shape.
|
|
245
|
+
const noopRecall = (): Awaited<ReturnType<typeof buildMemoryRecall>> =>
|
|
246
|
+
({
|
|
247
|
+
enabled: true,
|
|
248
|
+
degraded: false,
|
|
249
|
+
injectedText: "",
|
|
250
|
+
semanticHits: 0,
|
|
251
|
+
recencyHits: 0,
|
|
252
|
+
mergedCount: 0,
|
|
253
|
+
selectedCount: 0,
|
|
254
|
+
injectedTokens: 0,
|
|
255
|
+
latencyMs: 0,
|
|
256
|
+
topCandidates: [],
|
|
257
|
+
tier1Count: 0,
|
|
258
|
+
tier2Count: 0,
|
|
259
|
+
}) as Awaited<ReturnType<typeof buildMemoryRecall>>;
|
|
260
|
+
|
|
261
|
+
try {
|
|
262
|
+
const db = getDb();
|
|
263
|
+
|
|
264
|
+
// Step 1: Build the memory brief
|
|
265
|
+
const briefResult = compileMemoryBrief(db, ctx.scopeId, userMessageId);
|
|
266
|
+
|
|
267
|
+
// Step 2: Conditionally build supporting recall from the archive
|
|
268
|
+
const archiveResult = buildArchiveRecall(ctx.scopeId, content);
|
|
269
|
+
|
|
270
|
+
// Step 3: Assemble the injection blocks (non-empty only)
|
|
271
|
+
const blocks: string[] = [];
|
|
272
|
+
if (briefResult.text.length > 0) {
|
|
273
|
+
blocks.push(briefResult.text);
|
|
274
|
+
}
|
|
275
|
+
if (archiveResult.text.length > 0) {
|
|
276
|
+
blocks.push(archiveResult.text);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const latencyMs = Date.now() - start;
|
|
280
|
+
|
|
281
|
+
// Emit memory status for the simplified path
|
|
282
|
+
onEvent({
|
|
283
|
+
type: "memory_status",
|
|
284
|
+
enabled: true,
|
|
285
|
+
degraded: false,
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
// Inject non-empty blocks into the last user message
|
|
289
|
+
let runMessages = ctx.messages;
|
|
290
|
+
if (blocks.length > 0) {
|
|
291
|
+
const injectedText = blocks.join("\n\n");
|
|
292
|
+
const userTail = ctx.messages[ctx.messages.length - 1];
|
|
293
|
+
if (userTail && userTail.role === "user") {
|
|
294
|
+
runMessages = injectMemoryRecallAsUserBlock(ctx.messages, injectedText);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
log.debug(
|
|
298
|
+
{
|
|
299
|
+
briefLength: briefResult.text.length,
|
|
300
|
+
recallTrigger: archiveResult.trigger,
|
|
301
|
+
recallBullets: archiveResult.bullets.length,
|
|
302
|
+
latencyMs,
|
|
303
|
+
},
|
|
304
|
+
"Simplified memory injection completed",
|
|
305
|
+
);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return {
|
|
309
|
+
runMessages,
|
|
310
|
+
recall: {
|
|
311
|
+
...noopRecall(),
|
|
312
|
+
injectedText: blocks.length > 0 ? blocks.join("\n\n") : "",
|
|
313
|
+
latencyMs,
|
|
314
|
+
},
|
|
315
|
+
};
|
|
316
|
+
} catch (err) {
|
|
317
|
+
log.warn({ err }, "Simplified memory injection failed, returning no-op");
|
|
318
|
+
return {
|
|
319
|
+
runMessages: ctx.messages,
|
|
320
|
+
recall: {
|
|
321
|
+
...noopRecall(),
|
|
322
|
+
latencyMs: Date.now() - start,
|
|
323
|
+
},
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
}
|
|
@@ -962,6 +962,7 @@ const RUNTIME_INJECTION_PREFIXES = [
|
|
|
962
962
|
"<interface_turn_context>",
|
|
963
963
|
"<turn_context>",
|
|
964
964
|
"<memory_brief>",
|
|
965
|
+
"<supporting_recall>",
|
|
965
966
|
"<memory_context __injected>",
|
|
966
967
|
"<memory_context>", // backward-compat: strip legacy blocks from pre-__injected history
|
|
967
968
|
"<voice_call_control>",
|
|
@@ -23,6 +23,7 @@ import {
|
|
|
23
23
|
queueGenerateConversationTitle,
|
|
24
24
|
UNTITLED_FALLBACK,
|
|
25
25
|
} from "../../memory/conversation-title-service.js";
|
|
26
|
+
import { reduceBeforeSwitch } from "../../memory/reducer-scheduler.js";
|
|
26
27
|
import * as pendingInteractions from "../../runtime/pending-interactions.js";
|
|
27
28
|
import { getSubagentManager } from "../../subagent/index.js";
|
|
28
29
|
import { truncate } from "../../util/truncate.js";
|
|
@@ -233,6 +234,12 @@ export async function handleConversationCreate(
|
|
|
233
234
|
conversationType: normalizeConversationType(conversation.conversationType),
|
|
234
235
|
});
|
|
235
236
|
|
|
237
|
+
// Reduce the previous dirty conversation before processing the initial
|
|
238
|
+
// message so its memory is fresh for the next read.
|
|
239
|
+
if (msg.initialMessage) {
|
|
240
|
+
await reduceBeforeSwitch(conversation.id);
|
|
241
|
+
}
|
|
242
|
+
|
|
236
243
|
// Auto-send the initial message if provided, kick-starting the skill.
|
|
237
244
|
if (msg.initialMessage) {
|
|
238
245
|
// Queue title generation eagerly — some processMessage paths (guardian
|
|
@@ -343,6 +350,10 @@ export async function switchConversation(
|
|
|
343
350
|
return null;
|
|
344
351
|
}
|
|
345
352
|
|
|
353
|
+
// Reduce the previous dirty conversation before switching so its memory
|
|
354
|
+
// is fresh for the next read.
|
|
355
|
+
await reduceBeforeSwitch(conversationId);
|
|
356
|
+
|
|
346
357
|
// If the target conversation is headless-locked (actively executing a task run),
|
|
347
358
|
// skip rebinding so tool confirmations stay suppressed.
|
|
348
359
|
const existingConversation = ctx.conversations.get(conversationId);
|
package/src/daemon/lifecycle.ts
CHANGED
|
@@ -30,6 +30,7 @@ import {
|
|
|
30
30
|
getConversationType,
|
|
31
31
|
getMessages,
|
|
32
32
|
purgePrivateConversations,
|
|
33
|
+
sweepStaleReducerJobs,
|
|
33
34
|
} from "../memory/conversation-crud.js";
|
|
34
35
|
import { resolveConversationId } from "../memory/conversation-key-store.js";
|
|
35
36
|
import { initializeDb } from "../memory/db.js";
|
|
@@ -206,16 +207,40 @@ export async function runDaemon(): Promise<void> {
|
|
|
206
207
|
targetId: summaryId,
|
|
207
208
|
});
|
|
208
209
|
}
|
|
210
|
+
for (const obsId of deletedMemory.deletedObservationIds) {
|
|
211
|
+
enqueueMemoryJob("delete_qdrant_vectors", {
|
|
212
|
+
targetType: "observation",
|
|
213
|
+
targetId: obsId,
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
for (const chunkId of deletedMemory.deletedChunkIds) {
|
|
217
|
+
enqueueMemoryJob("delete_qdrant_vectors", {
|
|
218
|
+
targetType: "chunk",
|
|
219
|
+
targetId: chunkId,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
for (const episodeId of deletedMemory.deletedEpisodeIds) {
|
|
223
|
+
enqueueMemoryJob("delete_qdrant_vectors", {
|
|
224
|
+
targetType: "episode",
|
|
225
|
+
targetId: episodeId,
|
|
226
|
+
});
|
|
227
|
+
}
|
|
209
228
|
if (
|
|
210
229
|
deletedMemory.segmentIds.length > 0 ||
|
|
211
230
|
deletedMemory.orphanedItemIds.length > 0 ||
|
|
212
|
-
deletedMemory.deletedSummaryIds.length > 0
|
|
231
|
+
deletedMemory.deletedSummaryIds.length > 0 ||
|
|
232
|
+
deletedMemory.deletedObservationIds.length > 0 ||
|
|
233
|
+
deletedMemory.deletedChunkIds.length > 0 ||
|
|
234
|
+
deletedMemory.deletedEpisodeIds.length > 0
|
|
213
235
|
) {
|
|
214
236
|
log.info(
|
|
215
237
|
{
|
|
216
238
|
segments: deletedMemory.segmentIds.length,
|
|
217
239
|
orphanedItems: deletedMemory.orphanedItemIds.length,
|
|
218
240
|
deletedSummaries: deletedMemory.deletedSummaryIds.length,
|
|
241
|
+
deletedObservations: deletedMemory.deletedObservationIds.length,
|
|
242
|
+
deletedChunks: deletedMemory.deletedChunkIds.length,
|
|
243
|
+
deletedEpisodes: deletedMemory.deletedEpisodeIds.length,
|
|
219
244
|
},
|
|
220
245
|
"Enqueued Qdrant vector cleanup jobs for purged private conversations",
|
|
221
246
|
);
|
|
@@ -240,6 +265,24 @@ export async function runDaemon(): Promise<void> {
|
|
|
240
265
|
);
|
|
241
266
|
}
|
|
242
267
|
|
|
268
|
+
// Sweep dirty conversations whose tail messages are already past the
|
|
269
|
+
// idle delay — they should have been reduced while the daemon was down.
|
|
270
|
+
// Enqueue immediate reducer jobs so the memory worker picks them up.
|
|
271
|
+
try {
|
|
272
|
+
const sweepCount = sweepStaleReducerJobs();
|
|
273
|
+
if (sweepCount > 0) {
|
|
274
|
+
log.info(
|
|
275
|
+
{ sweepCount },
|
|
276
|
+
`Enqueued reducer jobs for ${sweepCount} stale dirty conversation(s)`,
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
} catch (err) {
|
|
280
|
+
log.warn(
|
|
281
|
+
{ err },
|
|
282
|
+
"Startup sweep for stale reducer jobs failed — continuing startup",
|
|
283
|
+
);
|
|
284
|
+
}
|
|
285
|
+
|
|
243
286
|
// Ensure a vellum guardian binding exists so the identity system works
|
|
244
287
|
// without requiring a manual bootstrap step.
|
|
245
288
|
try {
|