@desplega.ai/agent-swarm 1.92.1 → 1.92.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,14 @@ export const argsSchema = z.object({
18
18
  .boolean()
19
19
  .optional()
20
20
  .describe("Include high-frequency tool-triplet candidates for future seed scripts (default true)"),
21
+ includeScriptUsage: z
22
+ .boolean()
23
+ .optional()
24
+ .describe("Include actual script run, creation, and edit metrics (default true)"),
25
+ includeCostAndTokens: z
26
+ .boolean()
27
+ .optional()
28
+ .describe("Include session cost and token metrics with honesty rails (default true)"),
21
29
  includeByAgent: z
22
30
  .boolean()
23
31
  .optional()
@@ -59,6 +67,17 @@ function percent(part: number, total: number): number {
59
67
  return total > 0 ? round1((part / total) * 100) : 0;
60
68
  }
61
69
 
70
+ function round4(value: number): number {
71
+ return Math.round(value * 10000) / 10000;
72
+ }
73
+
74
+ function percentile(values: number[], p: number): number | null {
75
+ if (values.length === 0) return null;
76
+ const sorted = [...values].sort((a, b) => a - b);
77
+ const index = Math.ceil((p / 100) * sorted.length) - 1;
78
+ return sorted[Math.max(0, Math.min(sorted.length - 1, index))] ?? null;
79
+ }
80
+
62
81
  function extractToolName(content: string): string | null {
63
82
  const match = content.match(/"type"\s*:\s*"tool_use"[\s\S]*?"name"\s*:\s*"([^"]+)"/);
64
83
  return match?.[1] ?? null;
@@ -106,6 +125,220 @@ function cosineSimilarity(a: Float32Array, b: Float32Array): number {
106
125
  return dot / (Math.sqrt(na) * Math.sqrt(nb));
107
126
  }
108
127
 
128
+ function summarizeScriptUsage(rows: any[], creationRows: any[], editRows: any[], toolRows: any[]) {
129
+ const terminalStatuses = new Set(["completed", "failed", "cancelled", "aborted_limit"]);
130
+ const failureStatuses = new Set(["failed", "cancelled", "aborted_limit"]);
131
+ const durations = rows
132
+ .map((r) => asNumber(r.durationMs))
133
+ .filter((duration) => duration > 0);
134
+ const byScript = new Map<
135
+ string,
136
+ {
137
+ scriptName: string;
138
+ runs: number;
139
+ completed: number;
140
+ failed: number;
141
+ successRate: number;
142
+ durationP50Ms: number | null;
143
+ durationP95Ms: number | null;
144
+ inline: number;
145
+ workflow: number;
146
+ durations: number[];
147
+ }
148
+ >();
149
+
150
+ for (const row of rows) {
151
+ const name = String(row.scriptName || "(inline source)");
152
+ const current =
153
+ byScript.get(name) ??
154
+ {
155
+ scriptName: name,
156
+ runs: 0,
157
+ completed: 0,
158
+ failed: 0,
159
+ successRate: 0,
160
+ durationP50Ms: null,
161
+ durationP95Ms: null,
162
+ inline: 0,
163
+ workflow: 0,
164
+ durations: [],
165
+ };
166
+ current.runs += 1;
167
+ if (row.kind === "inline") current.inline += 1;
168
+ if (row.kind === "workflow") current.workflow += 1;
169
+ if (row.status === "completed") current.completed += 1;
170
+ if (failureStatuses.has(String(row.status))) current.failed += 1;
171
+ const duration = asNumber(row.durationMs);
172
+ if (duration > 0) current.durations.push(duration);
173
+ byScript.set(name, current);
174
+ }
175
+
176
+ const perScript = [...byScript.values()]
177
+ .map((script) => ({
178
+ scriptName: script.scriptName,
179
+ runs: script.runs,
180
+ completed: script.completed,
181
+ failed: script.failed,
182
+ successRate: percent(script.completed, script.runs),
183
+ durationP50Ms: percentile(script.durations, 50),
184
+ durationP95Ms: percentile(script.durations, 95),
185
+ inline: script.inline,
186
+ workflow: script.workflow,
187
+ }))
188
+ .sort((a, b) => b.runs - a.runs)
189
+ .slice(0, 20);
190
+
191
+ const creationsByScope: Record<string, number> = {};
192
+ let creations = 0;
193
+ let scratchCreations = 0;
194
+ for (const row of creationRows) {
195
+ const count = asNumber(row.count);
196
+ if (asNumber(row.isScratch) === 1) {
197
+ scratchCreations += count;
198
+ } else {
199
+ creations += count;
200
+ creationsByScope[String(row.scope || "unknown")] =
201
+ (creationsByScope[String(row.scope || "unknown")] ?? 0) + count;
202
+ }
203
+ }
204
+
205
+ const editsByScope: Record<string, number> = {};
206
+ let edits = 0;
207
+ for (const row of editRows) {
208
+ const count = asNumber(row.count);
209
+ edits += count;
210
+ editsByScope[String(row.scope || "unknown")] =
211
+ (editsByScope[String(row.scope || "unknown")] ?? 0) + count;
212
+ }
213
+
214
+ return {
215
+ source: {
216
+ authoritativeRuns: "script_runs",
217
+ mcpCallSignal: "session_logs tool_use for script tools",
218
+ reconciliation:
219
+ "`script-run` via MCP calls /api/scripts/run, which records kind='inline' rows in script_runs; launch-script-run/workflows record kind='workflow'. session_logs counts agent tool calls and must not be added to script_runs totals.",
220
+ },
221
+ runs: {
222
+ total: rows.length,
223
+ inline: rows.filter((r) => r.kind === "inline").length,
224
+ workflow: rows.filter((r) => r.kind === "workflow").length,
225
+ completed: rows.filter((r) => r.status === "completed").length,
226
+ failed: rows.filter((r) => failureStatuses.has(String(r.status))).length,
227
+ runningOrPaused: rows.filter((r) => !terminalStatuses.has(String(r.status))).length,
228
+ successRate: percent(
229
+ rows.filter((r) => r.status === "completed").length,
230
+ rows.length,
231
+ ),
232
+ durationP50Ms: percentile(durations, 50),
233
+ durationP95Ms: percentile(durations, 95),
234
+ perScript,
235
+ },
236
+ creations: {
237
+ totalNonScratch: creations,
238
+ scratch: scratchCreations,
239
+ byScope: creationsByScope,
240
+ },
241
+ edits: {
242
+ total: edits,
243
+ byScope: editsByScope,
244
+ },
245
+ mcpToolCalls: toolRows.map((r) => ({ tool: r.tool, calls: asNumber(r.calls) })),
246
+ };
247
+ }
248
+
249
+ function summarizeCostAndTokens(rows: any[]) {
250
+ const trustedSources = new Set(["harness", "pricing-table"]);
251
+ const trustedRows = rows.filter((r) => trustedSources.has(String(r.costSource)));
252
+ const unpricedRows = rows.filter((r) => String(r.costSource) === "unpriced");
253
+ const trustedTaskRows = trustedRows.filter((r) => r.taskId);
254
+ const trustedTaskIds = new Set(trustedTaskRows.map((r) => String(r.taskId)));
255
+ const trustedTaskSpend = trustedTaskRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0);
256
+ const nonTaskRows = rows.filter((r) => !r.taskId);
257
+ const totalSpend = rows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0);
258
+ const trustedSpend = trustedRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0);
259
+
260
+ const sumToken = (field: string) =>
261
+ rows.reduce((sum, r) => (r[field] === null || r[field] === undefined ? sum : sum + asNumber(r[field])), 0);
262
+ const unknownCount = (field: string) =>
263
+ rows.filter((r) => r[field] === null || r[field] === undefined).length;
264
+
265
+ const groupBy = (field: string) => {
266
+ const grouped = new Map<
267
+ string,
268
+ {
269
+ key: string;
270
+ rows: number;
271
+ spendUsd: number;
272
+ trustedSpendUsd: number;
273
+ unpricedRows: number;
274
+ }
275
+ >();
276
+ for (const row of rows) {
277
+ const key = String(row[field] || "unknown");
278
+ const current =
279
+ grouped.get(key) ?? {
280
+ key,
281
+ rows: 0,
282
+ spendUsd: 0,
283
+ trustedSpendUsd: 0,
284
+ unpricedRows: 0,
285
+ };
286
+ current.rows += 1;
287
+ current.spendUsd += asNumber(row.totalCostUsd);
288
+ if (trustedSources.has(String(row.costSource))) current.trustedSpendUsd += asNumber(row.totalCostUsd);
289
+ if (String(row.costSource) === "unpriced") current.unpricedRows += 1;
290
+ grouped.set(key, current);
291
+ }
292
+ return [...grouped.values()]
293
+ .map((r) => ({
294
+ ...r,
295
+ spendUsd: round4(r.spendUsd),
296
+ trustedSpendUsd: round4(r.trustedSpendUsd),
297
+ }))
298
+ .sort((a, b) => b.spendUsd - a.spendUsd);
299
+ };
300
+
301
+ return {
302
+ source: {
303
+ table: "session_costs",
304
+ providerDerivation:
305
+ "provider is derived from agents.harness_provider, then agents.provider, because session_costs does not carry a provider column",
306
+ headlineAvgCostRule:
307
+ "avgCostPerTaskUsd excludes unpriced rows and rows with null taskId; null-task sessions are reported separately",
308
+ },
309
+ rows: rows.length,
310
+ taskCountForHeadlineAvg: trustedTaskIds.size,
311
+ avgCostPerTaskUsd:
312
+ trustedTaskIds.size > 0 ? round4(trustedTaskSpend / trustedTaskIds.size) : null,
313
+ totalSpendUsd: round4(totalSpend),
314
+ trustedSpendUsd: round4(trustedSpend),
315
+ trustedRows: trustedRows.length,
316
+ trustedRowPercent: percent(trustedRows.length, rows.length),
317
+ unpricedRows: unpricedRows.length,
318
+ unpricedSpendUsd: round4(unpricedRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0)),
319
+ nonTaskSessionRows: nonTaskRows.length,
320
+ nonTaskSessionSpendUsd: round4(
321
+ nonTaskRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0),
322
+ ),
323
+ tokenTotals: {
324
+ inputTokens: sumToken("inputTokens"),
325
+ outputTokens: sumToken("outputTokens"),
326
+ cacheReadTokens: sumToken("cacheReadTokens"),
327
+ cacheWriteTokens: sumToken("cacheWriteTokens"),
328
+ reasoningOutputTokens: sumToken("reasoningOutputTokens"),
329
+ thinkingTokens: sumToken("thinkingTokens"),
330
+ },
331
+ unknownCounts: {
332
+ cacheWriteTokens: unknownCount("cacheWriteTokens"),
333
+ numTurns: unknownCount("numTurns"),
334
+ },
335
+ byModel: groupBy("model"),
336
+ byAgent: groupBy("agentName"),
337
+ byProvider: groupBy("provider"),
338
+ byCostSource: groupBy("costSource"),
339
+ };
340
+ }
341
+
109
342
  /**
110
343
  * Daily compounding insights — compressed JSON for Phase 0 evolution.
111
344
  *
@@ -121,6 +354,8 @@ export default async function compoundInsights(args: any, ctx: any) {
121
354
  const includeScheduleHealth = parsed.data.includeScheduleHealth !== false;
122
355
  const includeMemoryHealth = parsed.data.includeMemoryHealth !== false;
123
356
  const includeScriptCandidates = parsed.data.includeScriptCandidates !== false;
357
+ const includeScriptUsage = parsed.data.includeScriptUsage !== false;
358
+ const includeCostAndTokens = parsed.data.includeCostAndTokens !== false;
124
359
  const includeByAgent = parsed.data.includeByAgent !== false;
125
360
  const publishPage = parsed.data.publishPage !== false;
126
361
 
@@ -378,6 +613,101 @@ export default async function compoundInsights(args: any, ctx: any) {
378
613
  }));
379
614
  }
380
615
 
616
+ // Actual script usage. Authoritative run counts come from `script_runs`;
617
+ // session_logs tool_use rows are a separate MCP-call signal for reconciliation
618
+ // and are intentionally not added to run totals.
619
+ if (includeScriptUsage) {
620
+ const runRows = rowsToObjects(
621
+ await ctx.swarm.db_query({
622
+ sql: `WITH journal_durations AS (
623
+ SELECT runId, sum(durationMs) AS journalDurationMs
624
+ FROM script_run_journal
625
+ WHERE durationMs IS NOT NULL
626
+ GROUP BY runId
627
+ )
628
+ SELECT sr.scriptName, sr.kind, sr.status, sr.startedAt, sr.finishedAt,
629
+ COALESCE(
630
+ jd.journalDurationMs,
631
+ CASE
632
+ WHEN sr.finishedAt IS NOT NULL
633
+ THEN CAST((julianday(sr.finishedAt) - julianday(sr.startedAt)) * 86400000 AS INTEGER)
634
+ ELSE NULL
635
+ END
636
+ ) AS durationMs
637
+ FROM script_runs sr
638
+ LEFT JOIN journal_durations jd ON jd.runId = sr.id
639
+ WHERE sr.startedAt > ${w}
640
+ ORDER BY sr.startedAt DESC`,
641
+ }),
642
+ );
643
+ const creationRows = rowsToObjects(
644
+ await ctx.swarm.db_query({
645
+ sql: `SELECT scope, isScratch, count(*) AS count
646
+ FROM scripts
647
+ WHERE createdAt > ${w}
648
+ GROUP BY scope, isScratch`,
649
+ }),
650
+ );
651
+ const editRows = rowsToObjects(
652
+ await ctx.swarm.db_query({
653
+ sql: `SELECT s.scope, count(*) AS count
654
+ FROM script_versions sv
655
+ JOIN scripts s ON s.id = sv.scriptId
656
+ WHERE sv.changedAt > ${w} AND sv.version > 1
657
+ GROUP BY s.scope`,
658
+ }),
659
+ );
660
+ const scriptToolRows = rowsToObjects(
661
+ await ctx.swarm.db_query({
662
+ sql: `WITH tu AS (
663
+ SELECT substr(content, instr(content,'"type":"tool_use"')) AS tail,
664
+ json_extract(content, '$.tool_name') as jsonToolName
665
+ FROM session_logs
666
+ WHERE createdAt > ${w}
667
+ AND (content LIKE '%script-run%'
668
+ OR content LIKE '%launch-script-run%'
669
+ OR content LIKE '%get-script-run%'
670
+ OR content LIKE '%list-script-runs%')
671
+ ),
672
+ nm AS (
673
+ SELECT COALESCE(
674
+ jsonToolName,
675
+ CASE
676
+ WHEN instr(tail,'"name":"') > 0
677
+ THEN substr(substr(tail, instr(tail,'"name":"')+8), 1, instr(substr(tail, instr(tail,'"name":"')+8), '"')-1)
678
+ ELSE NULL
679
+ END
680
+ ) AS tool
681
+ FROM tu
682
+ )
683
+ SELECT tool, count(*) AS calls
684
+ FROM nm
685
+ WHERE tool IS NOT NULL AND tool LIKE '%script%'
686
+ GROUP BY tool
687
+ ORDER BY calls DESC`,
688
+ }),
689
+ );
690
+ insights.scriptUsage = summarizeScriptUsage(runRows, creationRows, editRows, scriptToolRows);
691
+ }
692
+
693
+ // Cost and token accounting. `costSource='unpriced'` rows are excluded from
694
+ // the headline per-task average, and null taskId rows are reported separately.
695
+ if (includeCostAndTokens) {
696
+ const costRows = rowsToObjects(
697
+ await ctx.swarm.db_query({
698
+ sql: `SELECT sc.taskId, sc.agentId, COALESCE(a.name, sc.agentId, 'unknown') AS agentName,
699
+ COALESCE(a.harness_provider, a.provider, 'unknown') AS provider,
700
+ sc.totalCostUsd, sc.inputTokens, sc.outputTokens, sc.cacheReadTokens,
701
+ sc.cacheWriteTokens, sc.reasoningOutputTokens, sc.thinkingTokens,
702
+ sc.numTurns, sc.model, sc.costSource
703
+ FROM session_costs sc
704
+ LEFT JOIN agents a ON a.id = sc.agentId
705
+ WHERE sc.createdAt > ${w}`,
706
+ }),
707
+ );
708
+ insights.costAndTokens = summarizeCostAndTokens(costRows);
709
+ }
710
+
381
711
  // Per-agent breakdown — covers every agent that ran a task in the window.
382
712
  if (includeByAgent) {
383
713
  insights.byAgent = rowsToObjects(
@@ -431,6 +761,31 @@ export default async function compoundInsights(args: any, ctx: any) {
431
761
  action: "Consider turning this repeated workflow into a reusable seeded script.",
432
762
  samples: [candidate],
433
763
  }));
764
+ const scriptUsageFindings = insights.scriptUsage
765
+ ? [
766
+ {
767
+ id: "script-usage.actual-runs",
768
+ severity: "low",
769
+ summary: `${insights.scriptUsage.runs.total} actual script run(s): ${insights.scriptUsage.runs.inline} one-off, ${insights.scriptUsage.runs.workflow} recurring/workflow.`,
770
+ action: "Use script_runs as the authoritative run count; use session_logs only as an MCP-call reconciliation signal.",
771
+ samples: [insights.scriptUsage],
772
+ },
773
+ ]
774
+ : [];
775
+ const costFindings = insights.costAndTokens
776
+ ? [
777
+ {
778
+ id: "cost-and-tokens.headline",
779
+ severity:
780
+ insights.costAndTokens.unpricedRows > 0 || insights.costAndTokens.nonTaskSessionRows > 0
781
+ ? "medium"
782
+ : "low",
783
+ summary: `$${insights.costAndTokens.totalSpendUsd} total session spend; avg task cost $${insights.costAndTokens.avgCostPerTaskUsd ?? "n/a"} over trusted task rows.`,
784
+ action: "Keep unpriced and null-task session spend separate from the headline per-task average.",
785
+ samples: [insights.costAndTokens],
786
+ },
787
+ ]
788
+ : [];
434
789
 
435
790
  insights.page = await publishCatalogReportPage(
436
791
  {
@@ -444,6 +799,8 @@ export default async function compoundInsights(args: any, ctx: any) {
444
799
  ["Completed", insights.taskSummary.completed],
445
800
  ["Failed", insights.taskSummary.failed],
446
801
  ["Failure clusters", insights.failureClusters?.length || 0],
802
+ ["Script runs", insights.scriptUsage?.runs?.total ?? 0],
803
+ ["Total spend", insights.costAndTokens?.totalSpendUsd ?? 0],
447
804
  ],
448
805
  sections: [
449
806
  {
@@ -481,6 +838,20 @@ export default async function compoundInsights(args: any, ctx: any) {
481
838
  checks: { candidates: scriptFindings.length },
482
839
  findings: scriptFindings,
483
840
  },
841
+ {
842
+ key: "script-usage",
843
+ goal: "Track actual one-off and recurring script execution without double-counting MCP tool-use logs.",
844
+ findingCount: scriptUsageFindings.length,
845
+ checks: insights.scriptUsage ?? {},
846
+ findings: scriptUsageFindings,
847
+ },
848
+ {
849
+ key: "cost-and-tokens",
850
+ goal: "Track per-task cost and token consumption while separating unpriced and non-task sessions.",
851
+ findingCount: costFindings.length,
852
+ checks: insights.costAndTokens ?? {},
853
+ findings: costFindings,
854
+ },
484
855
  ],
485
856
  appendix: insights,
486
857
  },
package/src/http/index.ts CHANGED
@@ -556,6 +556,15 @@ httpServer
556
556
 
557
557
  // Start expired-memory garbage collector (1-hour tick, immediate first run)
558
558
  startMemoryGc();
559
+
560
+ // Background backfill: re-embed any agent_memory rows with wrong-dimension
561
+ // embeddings (e.g. 1536d instead of 512d). Non-blocking, idempotent, no-op
562
+ // when the DB is clean. See src/be/memory/boot-reembed.ts.
563
+ import("../be/memory/boot-reembed")
564
+ .then(({ runBootReembed }) => runBootReembed())
565
+ .catch((err) => {
566
+ console.error("[boot-reembed] startup backfill failed (non-fatal):", err);
567
+ });
559
568
  })
560
569
  .on("error", (err) => {
561
570
  console.error("HTTP Server Error:", err);
@@ -387,6 +387,8 @@ export async function handleMemory(
387
387
  name: r.name,
388
388
  content: r.content,
389
389
  similarity: r.similarity,
390
+ rawSimilarity: r.rawSimilarity,
391
+ compositeScore: r.compositeScore,
390
392
  source: r.source,
391
393
  scope: r.scope,
392
394
  })),
@@ -442,6 +444,8 @@ export async function handleMemory(
442
444
  scope: r.scope,
443
445
  source: r.source,
444
446
  similarity: r.similarity,
447
+ rawSimilarity: r.rawSimilarity,
448
+ compositeScore: r.compositeScore,
445
449
  createdAt: r.createdAt,
446
450
  accessedAt: r.accessedAt,
447
451
  accessCount: r.accessCount ?? 0,
@@ -2,6 +2,7 @@ import {
2
2
  createTaskExtended,
3
3
  getActiveTaskCount,
4
4
  getAgentById,
5
+ getDependentTasks,
5
6
  getLeadAgent,
6
7
  getTaskAttachments,
7
8
  getTaskById,
@@ -115,6 +116,17 @@ export function createWorkerTaskFollowUp(args: {
115
116
  task_id: task.id,
116
117
  });
117
118
  followUpDescription = failedResult.text;
119
+
120
+ // Enrich with cascade info: list dependents that were cascade-failed.
121
+ const cascadedDeps = getDependentTasks(task.id, { includeTerminal: true }).filter(
122
+ (t) => t.status === "failed" && t.failureReason?.includes("Blocked dependency"),
123
+ );
124
+ if (cascadedDeps.length > 0) {
125
+ const depLines = cascadedDeps.map(
126
+ (d) => `- ${d.id.slice(0, 8)} — "${d.task.slice(0, 100)}" (${d.failureReason})`,
127
+ );
128
+ followUpDescription += `\n\n⚠️ Cascade impact: ${cascadedDeps.length} dependent task(s) were also failed because they depend on this task:\n${depLines.join("\n")}`;
129
+ }
118
130
  }
119
131
 
120
132
  return createTaskExtended(followUpDescription, {
@@ -123,13 +123,13 @@ describe("Memory E2E Lifecycle", () => {
123
123
 
124
124
  describe("reranking affects result order", () => {
125
125
  test("newer memory with same embedding ranks higher", () => {
126
- // Create old memory
126
+ // Use task_completion source so recency decay applies (manual has no decay)
127
127
  const old = store.store({
128
128
  agentId: agentA,
129
129
  scope: "agent",
130
130
  name: "old knowledge",
131
131
  content: "Old deployment docs",
132
- source: "manual",
132
+ source: "task_completion",
133
133
  });
134
134
  store.updateEmbedding(old.id, new Float32Array([0.5, 0.5, 0.0]), "test-model");
135
135
 
@@ -144,7 +144,7 @@ describe("Memory E2E Lifecycle", () => {
144
144
  scope: "agent",
145
145
  name: "fresh knowledge",
146
146
  content: "New deployment docs",
147
- source: "manual",
147
+ source: "task_completion",
148
148
  });
149
149
  store.updateEmbedding(fresh.id, new Float32Array([0.5, 0.5, 0.0]), "test-model");
150
150
 
@@ -302,7 +302,7 @@ describe("Memory E2E Lifecycle", () => {
302
302
  content: "Agent A only",
303
303
  source: "manual",
304
304
  });
305
- store.updateEmbedding(m1.id, new Float32Array([1, 0, 0]), "test-model");
305
+ store.updateEmbedding(m1.id, new Float32Array([1, 0.3, 0.3]), "test-model");
306
306
  agentMemId = m1.id;
307
307
 
308
308
  const m2 = store.store({
@@ -312,7 +312,7 @@ describe("Memory E2E Lifecycle", () => {
312
312
  content: "Visible to all",
313
313
  source: "manual",
314
314
  });
315
- store.updateEmbedding(m2.id, new Float32Array([0, 1, 0]), "test-model");
315
+ store.updateEmbedding(m2.id, new Float32Array([0.3, 1, 0.3]), "test-model");
316
316
  swarmMemId = m2.id;
317
317
 
318
318
  const m3 = store.store({
@@ -322,7 +322,7 @@ describe("Memory E2E Lifecycle", () => {
322
322
  content: "Agent B only",
323
323
  source: "manual",
324
324
  });
325
- store.updateEmbedding(m3.id, new Float32Array([0, 0, 1]), "test-model");
325
+ store.updateEmbedding(m3.id, new Float32Array([0.3, 0.3, 1]), "test-model");
326
326
  otherAgentMemId = m3.id;
327
327
  });
328
328
 
@@ -558,13 +558,12 @@ describe("memory-rater v1.5 — cross-cutting e2e", () => {
558
558
  );
559
559
 
560
560
  // The usefulness factor at Beta(1,1) is exactly 1.0; a memory with no
561
- // ratings should score within numerical noise of similarity * recency *
562
- // access (the original pre-v1.5 formula).
561
+ // ratings should score = similarity * recency * access * sourceQuality * usefulness.
562
+ // For source=manual: sourceQuality=1.5, recency=1.0 (no decay for manual),
563
+ // access=1.0, usefulness=1.0. So score = 0.5 * 1.5 = 0.75.
563
564
  const fresh = buildCandidate(0.5);
564
565
  const score = rerank([fresh], { limit: 1 })[0]!.similarity;
565
- // recency at age = 0 is exactly 1; access_boost at count=0 is exactly 1;
566
- // usefulness at (1,1) is exactly 1. So score === 0.5 to machine precision.
567
- expect(score).toBeCloseTo(0.5, 10);
566
+ expect(score).toBeCloseTo(0.75, 10);
568
567
  });
569
568
  });
570
569