@poncho-ai/harness 0.24.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,363 @@
1
+ import { mkdir, readFile, rename, writeFile } from "node:fs/promises";
2
+ import { dirname, resolve } from "node:path";
3
+ import { defineTool, type ToolDefinition } from "@poncho-ai/sdk";
4
+ import type { StateConfig } from "./state.js";
5
+ import {
6
+ ensureAgentIdentity,
7
+ getAgentStoreDirectory,
8
+ slugifyStorageComponent,
9
+ STORAGE_SCHEMA_VERSION,
10
+ } from "./agent-identity.js";
11
+ import { createRawKVStore, type RawKVStore } from "./kv-store.js";
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // Data model
15
+ // ---------------------------------------------------------------------------
16
+
17
+ export type TodoStatus = "pending" | "in_progress" | "completed";
18
+ export type TodoPriority = "high" | "medium" | "low";
19
+
20
+ export interface TodoItem {
21
+ id: string;
22
+ content: string;
23
+ status: TodoStatus;
24
+ priority: TodoPriority;
25
+ createdAt: number;
26
+ updatedAt: number;
27
+ }
28
+
29
+ export interface TodoStore {
30
+ get(conversationId: string): Promise<TodoItem[]>;
31
+ set(conversationId: string, todos: TodoItem[]): Promise<void>;
32
+ }
33
+
34
+ // ---------------------------------------------------------------------------
35
+ // Helpers
36
+ // ---------------------------------------------------------------------------
37
+
38
+ const VALID_STATUSES: TodoStatus[] = ["pending", "in_progress", "completed"];
39
+ const VALID_PRIORITIES: TodoPriority[] = ["high", "medium", "low"];
40
+ const TODOS_DIRECTORY = "todos";
41
+
42
+ const writeJsonAtomic = async (filePath: string, payload: unknown): Promise<void> => {
43
+ await mkdir(dirname(filePath), { recursive: true });
44
+ const tmpPath = `${filePath}.tmp`;
45
+ await writeFile(tmpPath, JSON.stringify(payload, null, 2), "utf8");
46
+ await rename(tmpPath, filePath);
47
+ };
48
+
49
+ const parseTodoList = (raw: unknown): TodoItem[] => {
50
+ if (!Array.isArray(raw)) return [];
51
+ return raw.filter(
52
+ (item): item is TodoItem =>
53
+ typeof item === "object" &&
54
+ item !== null &&
55
+ typeof (item as Record<string, unknown>).id === "string" &&
56
+ typeof (item as Record<string, unknown>).content === "string",
57
+ );
58
+ };
59
+
60
+ const generateId = (): string =>
61
+ (globalThis.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`).slice(0, 8);
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // InMemoryTodoStore
65
+ // ---------------------------------------------------------------------------
66
+
67
+ class InMemoryTodoStore implements TodoStore {
68
+ private readonly store = new Map<string, TodoItem[]>();
69
+
70
+ async get(conversationId: string): Promise<TodoItem[]> {
71
+ return this.store.get(conversationId) ?? [];
72
+ }
73
+
74
+ async set(conversationId: string, todos: TodoItem[]): Promise<void> {
75
+ this.store.set(conversationId, todos);
76
+ }
77
+ }
78
+
79
+ // ---------------------------------------------------------------------------
80
+ // FileTodoStore — one JSON file per conversation
81
+ // ---------------------------------------------------------------------------
82
+
83
+ class FileTodoStore implements TodoStore {
84
+ private readonly workingDir: string;
85
+ private todosDir = "";
86
+
87
+ constructor(workingDir: string) {
88
+ this.workingDir = workingDir;
89
+ }
90
+
91
+ private async ensureTodosDir(): Promise<string> {
92
+ if (this.todosDir) return this.todosDir;
93
+ const identity = await ensureAgentIdentity(this.workingDir);
94
+ this.todosDir = resolve(getAgentStoreDirectory(identity), TODOS_DIRECTORY);
95
+ await mkdir(this.todosDir, { recursive: true });
96
+ return this.todosDir;
97
+ }
98
+
99
+ private async filePath(conversationId: string): Promise<string> {
100
+ const dir = await this.ensureTodosDir();
101
+ return resolve(dir, `${slugifyStorageComponent(conversationId)}.json`);
102
+ }
103
+
104
+ async get(conversationId: string): Promise<TodoItem[]> {
105
+ try {
106
+ const fp = await this.filePath(conversationId);
107
+ const raw = await readFile(fp, "utf8");
108
+ return parseTodoList(JSON.parse(raw));
109
+ } catch {
110
+ return [];
111
+ }
112
+ }
113
+
114
+ async set(conversationId: string, todos: TodoItem[]): Promise<void> {
115
+ const fp = await this.filePath(conversationId);
116
+ await writeJsonAtomic(fp, todos);
117
+ }
118
+ }
119
+
120
+ // ---------------------------------------------------------------------------
121
+ // KVBackedTodoStore — wraps any RawKVStore (Upstash, Redis, DynamoDB)
122
+ // ---------------------------------------------------------------------------
123
+
124
+ class KVBackedTodoStore implements TodoStore {
125
+ private readonly kv: RawKVStore;
126
+ private readonly baseKey: string;
127
+ private readonly ttl?: number;
128
+ private readonly memoryFallback = new InMemoryTodoStore();
129
+
130
+ constructor(kv: RawKVStore, baseKey: string, ttl?: number) {
131
+ this.kv = kv;
132
+ this.baseKey = baseKey;
133
+ this.ttl = ttl;
134
+ }
135
+
136
+ private keyFor(conversationId: string): string {
137
+ return `${this.baseKey}:${slugifyStorageComponent(conversationId)}`;
138
+ }
139
+
140
+ async get(conversationId: string): Promise<TodoItem[]> {
141
+ try {
142
+ const raw = await this.kv.get(this.keyFor(conversationId));
143
+ if (!raw) return [];
144
+ return parseTodoList(JSON.parse(raw));
145
+ } catch {
146
+ return this.memoryFallback.get(conversationId);
147
+ }
148
+ }
149
+
150
+ async set(conversationId: string, todos: TodoItem[]): Promise<void> {
151
+ try {
152
+ const serialized = JSON.stringify(todos);
153
+ const key = this.keyFor(conversationId);
154
+ if (typeof this.ttl === "number") {
155
+ await this.kv.setWithTtl(key, serialized, Math.max(1, this.ttl));
156
+ } else {
157
+ await this.kv.set(key, serialized);
158
+ }
159
+ } catch {
160
+ await this.memoryFallback.set(conversationId, todos);
161
+ }
162
+ }
163
+ }
164
+
165
+ // ---------------------------------------------------------------------------
166
+ // Factory
167
+ // ---------------------------------------------------------------------------
168
+
169
+ export const createTodoStore = (
170
+ agentId: string,
171
+ config?: StateConfig,
172
+ options?: { workingDir?: string },
173
+ ): TodoStore => {
174
+ const provider = config?.provider ?? "local";
175
+ const ttl = config?.ttl;
176
+ const workingDir = options?.workingDir ?? process.cwd();
177
+
178
+ if (provider === "local") {
179
+ return new FileTodoStore(workingDir);
180
+ }
181
+ if (provider === "memory") {
182
+ return new InMemoryTodoStore();
183
+ }
184
+
185
+ const kv = createRawKVStore(config);
186
+ if (kv) {
187
+ const baseKey = `poncho:${STORAGE_SCHEMA_VERSION}:${slugifyStorageComponent(agentId)}:todos`;
188
+ return new KVBackedTodoStore(kv, baseKey, ttl);
189
+ }
190
+ return new InMemoryTodoStore();
191
+ };
192
+
193
+ // ---------------------------------------------------------------------------
194
+ // Tool definitions
195
+ // ---------------------------------------------------------------------------
196
+
197
+ export const createTodoTools = (store: TodoStore): ToolDefinition[] => {
198
+ const resolveKey = (context: { conversationId?: string; runId: string }): string =>
199
+ context.conversationId || context.runId;
200
+
201
+ return [
202
+ defineTool({
203
+ name: "todo_list",
204
+ description:
205
+ "List all todo items for the current conversation. " +
206
+ "Use this to check progress and plan next steps.",
207
+ inputSchema: {
208
+ type: "object",
209
+ properties: {
210
+ status: {
211
+ type: "string",
212
+ enum: VALID_STATUSES,
213
+ description: "Filter by status (omit to list all)",
214
+ },
215
+ },
216
+ additionalProperties: false,
217
+ },
218
+ handler: async (input, context) => {
219
+ const key = resolveKey(context);
220
+ let todos = await store.get(key);
221
+ const status = typeof input.status === "string" ? input.status : undefined;
222
+ if (status && VALID_STATUSES.includes(status as TodoStatus)) {
223
+ todos = todos.filter((t) => t.status === status);
224
+ }
225
+ return { todos, count: todos.length };
226
+ },
227
+ }),
228
+
229
+ defineTool({
230
+ name: "todo_add",
231
+ description:
232
+ "Add a new todo item for the current conversation. " +
233
+ "Use proactively for complex multi-step tasks (3+ steps).",
234
+ inputSchema: {
235
+ type: "object",
236
+ properties: {
237
+ content: {
238
+ type: "string",
239
+ description: "Description of the task",
240
+ },
241
+ status: {
242
+ type: "string",
243
+ enum: VALID_STATUSES,
244
+ description: "Initial status (default: pending)",
245
+ },
246
+ priority: {
247
+ type: "string",
248
+ enum: VALID_PRIORITIES,
249
+ description: "Priority level (default: medium)",
250
+ },
251
+ },
252
+ required: ["content"],
253
+ additionalProperties: false,
254
+ },
255
+ handler: async (input, context) => {
256
+ const content = typeof input.content === "string" ? input.content.trim() : "";
257
+ if (!content) throw new Error("content is required");
258
+ const status: TodoStatus =
259
+ typeof input.status === "string" && VALID_STATUSES.includes(input.status as TodoStatus)
260
+ ? (input.status as TodoStatus)
261
+ : "pending";
262
+ const priority: TodoPriority =
263
+ typeof input.priority === "string" && VALID_PRIORITIES.includes(input.priority as TodoPriority)
264
+ ? (input.priority as TodoPriority)
265
+ : "medium";
266
+ const now = Date.now();
267
+ const todo: TodoItem = {
268
+ id: generateId(),
269
+ content,
270
+ status,
271
+ priority,
272
+ createdAt: now,
273
+ updatedAt: now,
274
+ };
275
+ const key = resolveKey(context);
276
+ const todos = await store.get(key);
277
+ todos.push(todo);
278
+ await store.set(key, todos);
279
+ return { todo, todos };
280
+ },
281
+ }),
282
+
283
+ defineTool({
284
+ name: "todo_update",
285
+ description:
286
+ "Update an existing todo item's status, content, or priority. " +
287
+ "Mark tasks in_progress when starting and completed when done.",
288
+ inputSchema: {
289
+ type: "object",
290
+ properties: {
291
+ id: {
292
+ type: "string",
293
+ description: "ID of the todo to update",
294
+ },
295
+ status: {
296
+ type: "string",
297
+ enum: VALID_STATUSES,
298
+ description: "New status",
299
+ },
300
+ content: {
301
+ type: "string",
302
+ description: "New content/description",
303
+ },
304
+ priority: {
305
+ type: "string",
306
+ enum: VALID_PRIORITIES,
307
+ description: "New priority level",
308
+ },
309
+ },
310
+ required: ["id"],
311
+ additionalProperties: false,
312
+ },
313
+ handler: async (input, context) => {
314
+ const id = typeof input.id === "string" ? input.id : "";
315
+ if (!id) throw new Error("id is required");
316
+ const key = resolveKey(context);
317
+ const todos = await store.get(key);
318
+ const todo = todos.find((t) => t.id === id);
319
+ if (!todo) throw new Error(`Todo with id "${id}" not found`);
320
+
321
+ if (typeof input.status === "string" && VALID_STATUSES.includes(input.status as TodoStatus)) {
322
+ todo.status = input.status as TodoStatus;
323
+ }
324
+ if (typeof input.content === "string" && input.content.trim()) {
325
+ todo.content = input.content.trim();
326
+ }
327
+ if (typeof input.priority === "string" && VALID_PRIORITIES.includes(input.priority as TodoPriority)) {
328
+ todo.priority = input.priority as TodoPriority;
329
+ }
330
+ todo.updatedAt = Date.now();
331
+ await store.set(key, todos);
332
+ return { todo, todos };
333
+ },
334
+ }),
335
+
336
+ defineTool({
337
+ name: "todo_remove",
338
+ description: "Remove a todo item by ID.",
339
+ inputSchema: {
340
+ type: "object",
341
+ properties: {
342
+ id: {
343
+ type: "string",
344
+ description: "ID of the todo to remove",
345
+ },
346
+ },
347
+ required: ["id"],
348
+ additionalProperties: false,
349
+ },
350
+ handler: async (input, context) => {
351
+ const id = typeof input.id === "string" ? input.id : "";
352
+ if (!id) throw new Error("id is required");
353
+ const key = resolveKey(context);
354
+ const todos = await store.get(key);
355
+ const index = todos.findIndex((t) => t.id === id);
356
+ if (index === -1) throw new Error(`Todo with id "${id}" not found`);
357
+ const [removed] = todos.splice(index, 1);
358
+ await store.set(key, todos);
359
+ return { removed, todos };
360
+ },
361
+ }),
362
+ ];
363
+ };
@@ -367,7 +367,7 @@ description: Beta skill
367
367
  });
368
368
  });
369
369
 
370
- it("clears active skills when skill metadata changes in development mode", async () => {
370
+ it("preserves active skills when skill metadata changes in development mode", async () => {
371
371
  const dir = await mkdtemp(join(tmpdir(), "poncho-harness-skill-refresh-clear-active-"));
372
372
  await writeFile(
373
373
  join(dir, "AGENT.md"),
@@ -405,6 +405,7 @@ description: Alpha skill
405
405
  await activate!.handler({ name: "alpha" }, {} as any);
406
406
  expect(await listActive!.handler({}, {} as any)).toEqual({ activeSkills: ["alpha"] });
407
407
 
408
+ // Update the skill metadata — the skill keeps the same name so it stays active
408
409
  await writeFile(
409
410
  join(dir, "skills", "alpha", "SKILL.md"),
410
411
  `---
@@ -417,7 +418,7 @@ description: Alpha skill updated
417
418
  "utf8",
418
419
  );
419
420
  await (harness as any).refreshSkillsIfChanged();
420
- expect(await listActive!.handler({}, {} as any)).toEqual({ activeSkills: [] });
421
+ expect(await listActive!.handler({}, {} as any)).toEqual({ activeSkills: ["alpha"] });
421
422
  });
422
423
 
423
424
  it("lists skill scripts through list_skill_scripts", async () => {
@@ -910,6 +911,132 @@ allowed-tools:
910
911
  await new Promise<void>((resolveClose) => mcpServer.close(() => resolveClose()));
911
912
  });
912
913
 
914
+ it("agent-level MCP tools persist when a skill is activated (additive)", async () => {
915
+ process.env.LINEAR_TOKEN = "token-123";
916
+ const mcpServer = createServer(async (req, res) => {
917
+ if (req.method === "DELETE") {
918
+ res.statusCode = 200;
919
+ res.end();
920
+ return;
921
+ }
922
+ const chunks: Buffer[] = [];
923
+ for await (const chunk of req) chunks.push(Buffer.from(chunk));
924
+ const body = Buffer.concat(chunks).toString("utf8");
925
+ const payload = body.trim().length > 0 ? (JSON.parse(body) as any) : {};
926
+ if (payload.method === "initialize") {
927
+ res.setHeader("Content-Type", "application/json");
928
+ res.setHeader("Mcp-Session-Id", "sess");
929
+ res.end(
930
+ JSON.stringify({
931
+ jsonrpc: "2.0",
932
+ id: payload.id,
933
+ result: {
934
+ protocolVersion: "2025-03-26",
935
+ capabilities: { tools: { listChanged: true } },
936
+ serverInfo: { name: "remote", version: "1.0.0" },
937
+ },
938
+ }),
939
+ );
940
+ return;
941
+ }
942
+ if (payload.method === "notifications/initialized") {
943
+ res.statusCode = 202;
944
+ res.end();
945
+ return;
946
+ }
947
+ if (payload.method === "tools/list") {
948
+ res.setHeader("Content-Type", "application/json");
949
+ res.end(
950
+ JSON.stringify({
951
+ jsonrpc: "2.0",
952
+ id: payload.id,
953
+ result: {
954
+ tools: [
955
+ { name: "a", inputSchema: { type: "object", properties: {} } },
956
+ { name: "b", inputSchema: { type: "object", properties: {} } },
957
+ ],
958
+ },
959
+ }),
960
+ );
961
+ return;
962
+ }
963
+ if (payload.method === "tools/call") {
964
+ res.setHeader("Content-Type", "application/json");
965
+ res.end(
966
+ JSON.stringify({
967
+ jsonrpc: "2.0",
968
+ id: payload.id,
969
+ result: { result: { ok: true } },
970
+ }),
971
+ );
972
+ return;
973
+ }
974
+ res.statusCode = 404;
975
+ res.end();
976
+ });
977
+ await new Promise<void>((resolveOpen) => mcpServer.listen(0, () => resolveOpen()));
978
+ const address = mcpServer.address();
979
+ if (!address || typeof address === "string") throw new Error("Unexpected address");
980
+ const dir = await mkdtemp(join(tmpdir(), "poncho-harness-additive-mcp-"));
981
+ await writeFile(
982
+ join(dir, "AGENT.md"),
983
+ `---
984
+ name: additive-agent
985
+ model:
986
+ provider: anthropic
987
+ name: claude-opus-4-5
988
+ allowed-tools:
989
+ - mcp:remote/a
990
+ ---
991
+
992
+ # Additive Agent
993
+ `,
994
+ "utf8",
995
+ );
996
+ await writeFile(
997
+ join(dir, "poncho.config.js"),
998
+ `export default {
999
+ mcp: [
1000
+ {
1001
+ name: "remote",
1002
+ url: "http://127.0.0.1:${address.port}/mcp",
1003
+ auth: { type: "bearer", tokenEnv: "LINEAR_TOKEN" }
1004
+ }
1005
+ ]
1006
+ };
1007
+ `,
1008
+ "utf8",
1009
+ );
1010
+ await mkdir(join(dir, "skills", "skill-b"), { recursive: true });
1011
+ await writeFile(
1012
+ join(dir, "skills", "skill-b", "SKILL.md"),
1013
+ `---
1014
+ name: skill-b
1015
+ description: B
1016
+ allowed-tools:
1017
+ - mcp:remote/b
1018
+ ---
1019
+ # B
1020
+ `,
1021
+ "utf8",
1022
+ );
1023
+ const harness = new AgentHarness({ workingDir: dir });
1024
+ await harness.initialize();
1025
+ const toolNames = () => harness.listTools().map((t) => t.name);
1026
+ expect(toolNames()).toContain("remote/a");
1027
+ expect(toolNames()).not.toContain("remote/b");
1028
+ const activate = harness.listTools().find((t) => t.name === "activate_skill")!;
1029
+ const deactivate = harness.listTools().find((t) => t.name === "deactivate_skill")!;
1030
+ await activate.handler({ name: "skill-b" }, {} as any);
1031
+ expect(toolNames()).toContain("remote/a");
1032
+ expect(toolNames()).toContain("remote/b");
1033
+ await deactivate.handler({ name: "skill-b" }, {} as any);
1034
+ expect(toolNames()).toContain("remote/a");
1035
+ expect(toolNames()).not.toContain("remote/b");
1036
+ await harness.shutdown();
1037
+ await new Promise<void>((resolveClose) => mcpServer.close(() => resolveClose()));
1038
+ });
1039
+
913
1040
  it("supports flat tool access config format", async () => {
914
1041
  const dir = await mkdtemp(join(tmpdir(), "poncho-harness-flat-tool-access-"));
915
1042
  await writeFile(
@@ -5,7 +5,6 @@ describe("memory store factory", () => {
5
5
  it("uses memory provider by default", async () => {
6
6
  const store = createMemoryStore("agent-test");
7
7
  const updated = await store.updateMainMemory({
8
- mode: "replace",
9
8
  content: "Cesar prefers short bullet points.",
10
9
  });
11
10
  expect(updated.content).toContain("short bullet points");
@@ -13,24 +12,17 @@ describe("memory store factory", () => {
13
12
  expect(fetched.content).toContain("short bullet points");
14
13
  });
15
14
 
16
- it("supports append updates", async () => {
17
- const store = createMemoryStore("agent-append");
18
- await store.updateMainMemory({
19
- mode: "replace",
20
- content: "Initial memory.",
21
- });
22
- const result = await store.updateMainMemory({
23
- mode: "append",
24
- content: "Appended line.",
25
- });
26
- expect(result.content).toContain("Initial memory.");
27
- expect(result.content).toContain("Appended line.");
15
+ it("overwrites previous content on update", async () => {
16
+ const store = createMemoryStore("agent-overwrite");
17
+ await store.updateMainMemory({ content: "First version." });
18
+ const result = await store.updateMainMemory({ content: "Second version." });
19
+ expect(result.content).toBe("Second version.");
20
+ expect(result.content).not.toContain("First version.");
28
21
  });
29
22
 
30
23
  it("falls back gracefully when upstash is not configured", async () => {
31
24
  const store = createMemoryStore("agent-fallback", { provider: "upstash" });
32
25
  const updated = await store.updateMainMemory({
33
- mode: "replace",
34
26
  content: "Fallback path still stores memory",
35
27
  });
36
28
  expect(updated.content).toContain("Fallback path");
@@ -43,8 +35,101 @@ describe("memory tools", () => {
43
35
  const tools = createMemoryTools(store);
44
36
  expect(tools.map((tool) => tool.name)).toEqual([
45
37
  "memory_main_get",
46
- "memory_main_update",
38
+ "memory_main_write",
39
+ "memory_main_edit",
47
40
  "conversation_recall",
48
41
  ]);
49
42
  });
43
+
44
+ describe("memory_main_write", () => {
45
+ it("writes content to memory", async () => {
46
+ const store = createMemoryStore("agent-write");
47
+ const tools = createMemoryTools(store);
48
+ const writeTool = tools.find((t) => t.name === "memory_main_write")!;
49
+ const result = await writeTool.handler(
50
+ { content: "User prefers dark mode." },
51
+ { runId: "r1", agentId: "a1", step: 0, workingDir: ".", parameters: {} },
52
+ );
53
+ expect(result).toEqual({
54
+ ok: true,
55
+ memory: expect.objectContaining({ content: "User prefers dark mode." }),
56
+ });
57
+ });
58
+
59
+ it("errors when content is empty", async () => {
60
+ const store = createMemoryStore("agent-write-empty");
61
+ const tools = createMemoryTools(store);
62
+ const writeTool = tools.find((t) => t.name === "memory_main_write")!;
63
+ await expect(
64
+ writeTool.handler(
65
+ { content: " " },
66
+ { runId: "r1", agentId: "a1", step: 0, workingDir: ".", parameters: {} },
67
+ ),
68
+ ).rejects.toThrow("content is required");
69
+ });
70
+ });
71
+
72
+ describe("memory_main_edit", () => {
73
+ const setupMemory = async () => {
74
+ const store = createMemoryStore("agent-edit-" + Math.random());
75
+ await store.updateMainMemory({
76
+ content: "- prefers dark mode\n- likes TypeScript\n- uses vim",
77
+ });
78
+ const tools = createMemoryTools(store);
79
+ const editTool = tools.find((t) => t.name === "memory_main_edit")!;
80
+ const ctx = { runId: "r1", agentId: "a1", step: 0, workingDir: ".", parameters: {} };
81
+ return { store, editTool, ctx };
82
+ };
83
+
84
+ it("replaces a unique string match in memory", async () => {
85
+ const { store, editTool, ctx } = await setupMemory();
86
+ const result = await editTool.handler(
87
+ { old_str: "likes TypeScript", new_str: "loves TypeScript" },
88
+ ctx,
89
+ );
90
+ expect(result).toEqual({
91
+ ok: true,
92
+ memory: expect.objectContaining({
93
+ content: "- prefers dark mode\n- loves TypeScript\n- uses vim",
94
+ }),
95
+ });
96
+ const fetched = await store.getMainMemory();
97
+ expect(fetched.content).toContain("loves TypeScript");
98
+ });
99
+
100
+ it("deletes matched content when new_str is empty", async () => {
101
+ const { store, editTool, ctx } = await setupMemory();
102
+ await editTool.handler(
103
+ { old_str: "\n- likes TypeScript", new_str: "" },
104
+ ctx,
105
+ );
106
+ const fetched = await store.getMainMemory();
107
+ expect(fetched.content).toBe("- prefers dark mode\n- uses vim");
108
+ });
109
+
110
+ it("errors when old_str is empty", async () => {
111
+ const { editTool, ctx } = await setupMemory();
112
+ await expect(
113
+ editTool.handler({ old_str: "", new_str: "anything" }, ctx),
114
+ ).rejects.toThrow("old_str must not be empty");
115
+ });
116
+
117
+ it("errors when old_str is not found in memory", async () => {
118
+ const { editTool, ctx } = await setupMemory();
119
+ await expect(
120
+ editTool.handler({ old_str: "nonexistent text", new_str: "x" }, ctx),
121
+ ).rejects.toThrow("old_str not found in memory");
122
+ });
123
+
124
+ it("errors when old_str matches multiple locations", async () => {
125
+ const store = createMemoryStore("agent-edit-dup");
126
+ await store.updateMainMemory({ content: "foo bar foo" });
127
+ const tools = createMemoryTools(store);
128
+ const editTool = tools.find((t) => t.name === "memory_main_edit")!;
129
+ const ctx = { runId: "r1", agentId: "a1", step: 0, workingDir: ".", parameters: {} };
130
+ await expect(
131
+ editTool.handler({ old_str: "foo", new_str: "baz" }, ctx),
132
+ ).rejects.toThrow("old_str appears multiple times");
133
+ });
134
+ });
50
135
  });