lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (92) hide show
  1. lm_deluge/__init__.py +25 -2
  2. lm_deluge/api_requests/anthropic.py +92 -17
  3. lm_deluge/api_requests/base.py +47 -11
  4. lm_deluge/api_requests/bedrock.py +7 -4
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +138 -18
  7. lm_deluge/api_requests/openai.py +114 -21
  8. lm_deluge/client.py +282 -49
  9. lm_deluge/config.py +15 -3
  10. lm_deluge/mock_openai.py +643 -0
  11. lm_deluge/models/__init__.py +12 -1
  12. lm_deluge/models/anthropic.py +17 -2
  13. lm_deluge/models/arcee.py +16 -0
  14. lm_deluge/models/deepseek.py +36 -4
  15. lm_deluge/models/google.py +29 -0
  16. lm_deluge/models/grok.py +24 -0
  17. lm_deluge/models/kimi.py +36 -0
  18. lm_deluge/models/minimax.py +10 -0
  19. lm_deluge/models/openai.py +100 -0
  20. lm_deluge/models/openrouter.py +86 -8
  21. lm_deluge/models/together.py +11 -0
  22. lm_deluge/models/zai.py +1 -0
  23. lm_deluge/pipelines/gepa/__init__.py +95 -0
  24. lm_deluge/pipelines/gepa/core.py +354 -0
  25. lm_deluge/pipelines/gepa/docs/samples.py +696 -0
  26. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
  27. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
  28. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
  29. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
  30. lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
  31. lm_deluge/pipelines/gepa/optimizer.py +435 -0
  32. lm_deluge/pipelines/gepa/proposer.py +235 -0
  33. lm_deluge/pipelines/gepa/util.py +165 -0
  34. lm_deluge/{llm_tools → pipelines}/score.py +2 -2
  35. lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
  36. lm_deluge/prompt.py +224 -40
  37. lm_deluge/request_context.py +7 -2
  38. lm_deluge/tool/__init__.py +1118 -0
  39. lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
  40. lm_deluge/tool/builtin/gemini.py +59 -0
  41. lm_deluge/tool/builtin/openai.py +74 -0
  42. lm_deluge/tool/cua/__init__.py +173 -0
  43. lm_deluge/tool/cua/actions.py +148 -0
  44. lm_deluge/tool/cua/base.py +27 -0
  45. lm_deluge/tool/cua/batch.py +215 -0
  46. lm_deluge/tool/cua/converters.py +466 -0
  47. lm_deluge/tool/cua/kernel.py +702 -0
  48. lm_deluge/tool/cua/trycua.py +989 -0
  49. lm_deluge/tool/prefab/__init__.py +45 -0
  50. lm_deluge/tool/prefab/batch_tool.py +156 -0
  51. lm_deluge/tool/prefab/docs.py +1119 -0
  52. lm_deluge/tool/prefab/email.py +294 -0
  53. lm_deluge/tool/prefab/filesystem.py +1711 -0
  54. lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
  55. lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
  56. lm_deluge/tool/prefab/memory.py +458 -0
  57. lm_deluge/tool/prefab/otc/__init__.py +165 -0
  58. lm_deluge/tool/prefab/otc/executor.py +281 -0
  59. lm_deluge/tool/prefab/otc/parse.py +188 -0
  60. lm_deluge/tool/prefab/random.py +212 -0
  61. lm_deluge/tool/prefab/rlm/__init__.py +296 -0
  62. lm_deluge/tool/prefab/rlm/executor.py +349 -0
  63. lm_deluge/tool/prefab/rlm/parse.py +144 -0
  64. lm_deluge/tool/prefab/sandbox.py +1621 -0
  65. lm_deluge/tool/prefab/sheets.py +385 -0
  66. lm_deluge/tool/prefab/subagents.py +233 -0
  67. lm_deluge/tool/prefab/todos.py +342 -0
  68. lm_deluge/tool/prefab/tool_search.py +169 -0
  69. lm_deluge/tool/prefab/web_search.py +199 -0
  70. lm_deluge/tracker.py +16 -13
  71. lm_deluge/util/schema.py +412 -0
  72. lm_deluge/warnings.py +8 -0
  73. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
  74. lm_deluge-0.0.88.dist-info/RECORD +117 -0
  75. lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
  76. lm_deluge/built_in_tools/openai.py +0 -28
  77. lm_deluge/presets/cerebras.py +0 -17
  78. lm_deluge/presets/meta.py +0 -13
  79. lm_deluge/tool.py +0 -849
  80. lm_deluge-0.0.67.dist-info/RECORD +0 -72
  81. lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
  82. /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
  83. /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
  84. /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
  85. /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
  86. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
  87. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
  88. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
  89. /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
  90. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
  91. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
  92. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1711 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import json
5
+ import os
6
+ import random
7
+ import re
8
+ import time
9
+ import zipfile
10
+ from collections.abc import Callable, Iterable, Sequence
11
+ from dataclasses import dataclass
12
+ from functools import partial
13
+ from pathlib import Path
14
+ from typing import Any, Dict, Literal, Optional, Protocol
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+ from .. import Tool
19
+
20
+ FS_DESCRIPTION = """Interact with an isolated virtual filesystem that belongs to this session.
21
+
22
+ Paths are always relative to the workspace root and use forward slashes. Use this tool to:
23
+ - inspect files with optional line ranges
24
+ - create, overwrite, or append to files
25
+ - delete files or folders from the workspace
26
+ - list directory contents
27
+ - search for text across the workspace using regular expressions
28
+ - apply OpenAI-style apply_patch operations (create/update/delete)
29
+
30
+ This virtual filesystem is safe-by-construction. Paths that try to escape the workspace
31
+ or reference missing files will raise clear errors."""
32
+
33
+
34
+ class WorkspaceBackend(Protocol):
35
+ """Abstract filesystem operations used by FilesystemManager."""
36
+
37
+ def read_file(self, path: str) -> str: ...
38
+
39
+ def write_file(self, path: str, content: str, *, overwrite: bool) -> None: ...
40
+
41
+ def append_file(self, path: str, content: str) -> None: ...
42
+
43
+ def delete_path(self, path: str) -> None: ...
44
+
45
+ def list_dir(self, path: str, recursive: bool) -> list[dict[str, Any]]: ...
46
+
47
+ def grep(
48
+ self, pattern: str, path: str | None, limit: int
49
+ ) -> list[dict[str, Any]]: ...
50
+
51
+
52
+ def _normalize_path(path: str | None, *, allow_root: bool = False) -> str:
53
+ """Normalize user-provided paths and prevent directory traversal."""
54
+ if path is None or path.strip() == "":
55
+ if allow_root:
56
+ return "."
57
+ raise ValueError("Path is required")
58
+
59
+ raw = path.strip()
60
+ if raw.startswith("/"):
61
+ raw = raw.lstrip("/")
62
+
63
+ parts: list[str] = []
64
+ for part in raw.split("/"):
65
+ if part in ("", "."):
66
+ continue
67
+ if part == "..":
68
+ if parts:
69
+ parts.pop()
70
+ else:
71
+ raise ValueError("Path traversal outside the workspace is not allowed")
72
+ continue
73
+ parts.append(part)
74
+
75
+ normalized = "/".join(parts)
76
+ if normalized:
77
+ return normalized
78
+ if allow_root:
79
+ return "."
80
+ raise ValueError("Path must reference a file inside the workspace")
81
+
82
+
83
+ class InMemoryWorkspaceBackend:
84
+ """Simple backend that stores files in memory."""
85
+
86
+ def __init__(self, files: dict[str, str] | None = None):
87
+ self._files: Dict[str, str] = {}
88
+ if files:
89
+ for path, content in files.items():
90
+ key = _normalize_path(path)
91
+ self._files[key] = content
92
+
93
+ def read_file(self, path: str) -> str:
94
+ key = _normalize_path(path)
95
+ if key not in self._files:
96
+ raise FileNotFoundError(f"{key} does not exist")
97
+ return self._files[key]
98
+
99
+ def write_file(self, path: str, content: str, *, overwrite: bool) -> None:
100
+ key = _normalize_path(path)
101
+ if not overwrite and key in self._files:
102
+ raise FileExistsError(f"{key} already exists")
103
+ self._files[key] = content
104
+
105
+ def append_file(self, path: str, content: str) -> None:
106
+ key = _normalize_path(path)
107
+ if key in self._files:
108
+ self._files[key] = f"{self._files[key]}{content}"
109
+ else:
110
+ self._files[key] = content
111
+
112
+ def delete_path(self, path: str) -> None:
113
+ key = _normalize_path(path, allow_root=True)
114
+ if key == ".":
115
+ self._files.clear()
116
+ return
117
+ if key in self._files:
118
+ del self._files[key]
119
+ return
120
+
121
+ prefix = f"{key}/"
122
+ targets = [p for p in self._files if p.startswith(prefix)]
123
+ if not targets:
124
+ raise FileNotFoundError(f"{key} does not exist")
125
+ for target in targets:
126
+ del self._files[target]
127
+
128
+ def list_dir(self, path: str, recursive: bool) -> list[dict[str, Any]]:
129
+ key = _normalize_path(path, allow_root=True)
130
+ if key != "." and key in self._files and not recursive:
131
+ # Listing a file path shows metadata for that file.
132
+ return [self._format_file_entry(key)]
133
+
134
+ prefix = "" if key == "." else f"{key}/"
135
+ entries: list[dict[str, Any]] = []
136
+
137
+ if key != "." and not any(
138
+ p == key or p.startswith(prefix) for p in self._files
139
+ ):
140
+ raise FileNotFoundError(f"{key} does not exist")
141
+
142
+ if recursive:
143
+ for file_path in sorted(self._files):
144
+ if not (file_path == key or file_path.startswith(prefix)):
145
+ continue
146
+ entries.append(self._format_file_entry(file_path))
147
+ return entries
148
+
149
+ seen_dirs: set[str] = set()
150
+ for file_path in sorted(self._files):
151
+ if not (file_path == key or file_path.startswith(prefix)):
152
+ continue
153
+ remainder = file_path[len(prefix) :]
154
+ if remainder == "":
155
+ entries.append(self._format_file_entry(file_path))
156
+ continue
157
+ head, _, tail = remainder.partition("/")
158
+ if tail:
159
+ dir_path = head if key == "." else f"{key}/{head}"
160
+ if dir_path not in seen_dirs:
161
+ entries.append(
162
+ {"path": dir_path, "type": "directory", "size": None}
163
+ )
164
+ seen_dirs.add(dir_path)
165
+ else:
166
+ entries.append(self._format_file_entry(file_path))
167
+ return entries
168
+
169
+ def grep(self, pattern: str, path: str | None, limit: int) -> list[dict[str, Any]]:
170
+ regex = re.compile(pattern)
171
+ key = _normalize_path(path, allow_root=True) if path is not None else "."
172
+ prefix = "" if key == "." else f"{key}/"
173
+ matches: list[dict[str, Any]] = []
174
+
175
+ for file_path in sorted(self._files):
176
+ if not (file_path == key or file_path.startswith(prefix)):
177
+ continue
178
+ content = self._files[file_path]
179
+ for line_no, line in enumerate(content.splitlines(), start=1):
180
+ if regex.search(line):
181
+ matches.append(
182
+ {"path": file_path, "line": line_no, "text": line.strip()}
183
+ )
184
+ if len(matches) >= limit:
185
+ return matches
186
+ if (
187
+ key != "."
188
+ and key not in self._files
189
+ and not any(p.startswith(prefix) for p in self._files)
190
+ ):
191
+ raise FileNotFoundError(f"{key} does not exist")
192
+ return matches
193
+
194
+ def _format_file_entry(self, path: str) -> dict[str, Any]:
195
+ content = self._files[path]
196
+ if content == "":
197
+ line_count = 0
198
+ else:
199
+ line_count = content.count("\n") + (0 if content.endswith("\n") else 1)
200
+ return {
201
+ "path": path,
202
+ "type": "file",
203
+ "size": len(content),
204
+ "line_count": max(line_count, 0),
205
+ }
206
+
207
+
208
+ FsCommand = Literal[
209
+ "read_file",
210
+ "write_file",
211
+ "delete_path",
212
+ "list_dir",
213
+ "grep",
214
+ "apply_patch",
215
+ ]
216
+ ALL_COMMANDS: tuple[FsCommand, ...] = (
217
+ "read_file",
218
+ "write_file",
219
+ "delete_path",
220
+ "list_dir",
221
+ "grep",
222
+ "apply_patch",
223
+ )
224
+
225
+
226
+ class ApplyPatchOperation(BaseModel):
227
+ """Subset of OpenAI apply_patch operation payload."""
228
+
229
+ type: Literal["create_file", "update_file", "delete_file"] = Field(
230
+ description="Type of patch operation to perform."
231
+ )
232
+ path: str = Field(description="Path to the file being modified.")
233
+ diff: str | None = Field(
234
+ default=None,
235
+ description="V4A diff to apply for create/update operations.",
236
+ )
237
+
238
+ @property
239
+ def requires_diff(self) -> bool:
240
+ return self.type in {"create_file", "update_file"}
241
+
242
+
243
+ class FilesystemParams(BaseModel):
244
+ """Schema describing filesystem tool calls."""
245
+
246
+ command: FsCommand = Field(
247
+ description="Filesystem operation to perform (read_file, write_file, delete_path, list_dir, grep)"
248
+ )
249
+ path: Optional[str] = Field(
250
+ default=None,
251
+ description="Path to operate on, relative to workspace root. Use '.' for the root directory.",
252
+ )
253
+ start_line: Optional[int] = Field(
254
+ default=None,
255
+ description="1-based inclusive start line when reading a file. Leave unset to read from the beginning.",
256
+ ge=1,
257
+ )
258
+ end_line: Optional[int] = Field(
259
+ default=None,
260
+ description="1-based inclusive end line when reading a file. Leave unset to read through the end.",
261
+ ge=1,
262
+ )
263
+ content: Optional[str] = Field(
264
+ default=None,
265
+ description="Content to write when using write_file.",
266
+ )
267
+ mode: Optional[Literal["overwrite", "append", "create_if_missing"]] = Field(
268
+ default="overwrite",
269
+ description="How to write content. Overwrite replaces the file, append adds to the end, create_if_missing leaves existing files untouched.",
270
+ )
271
+ recursive: Optional[bool] = Field(
272
+ default=None,
273
+ description="When listing directories, set to true to recurse.",
274
+ )
275
+ pattern: Optional[str] = Field(
276
+ default=None,
277
+ description="Regular expression pattern to search for when using grep.",
278
+ )
279
+ max_results: Optional[int] = Field(
280
+ default=50,
281
+ description="Maximum number of grep matches to return.",
282
+ ge=1,
283
+ )
284
+ operation: ApplyPatchOperation | None = Field(
285
+ default=None,
286
+ description=(
287
+ "When using command='apply_patch', include an operation matching the "
288
+ "OpenAI apply_patch_call payload (type, path, diff)."
289
+ ),
290
+ )
291
+
292
+
293
+ class FilesystemManager:
294
+ """Expose a TodoManager-style tool for interacting with a workspace."""
295
+
296
+ def __init__(
297
+ self,
298
+ backend: WorkspaceBackend | None = None,
299
+ *,
300
+ tool_name: str = "filesystem",
301
+ ):
302
+ self.backend = backend or InMemoryWorkspaceBackend()
303
+ self.tool_name = tool_name
304
+ self._tool_cache: dict[tuple[str, ...], list[Tool]] = {}
305
+
306
+ def _handle_read(
307
+ self, path: str, start_line: Optional[int], end_line: Optional[int]
308
+ ) -> dict[str, Any]:
309
+ content = self.backend.read_file(path)
310
+ total_lines = len(content.splitlines()) or (0 if content == "" else 1)
311
+ start = start_line or 1
312
+ end = end_line or total_lines
313
+ if end < start:
314
+ if not (total_lines == 0 and end_line is None and start == 1):
315
+ raise ValueError("end_line must be greater than or equal to start_line")
316
+
317
+ if start == 1 and end >= total_lines:
318
+ snippet = content
319
+ else:
320
+ lines = content.splitlines()
321
+ snippet = "\n".join(lines[start - 1 : end])
322
+
323
+ return {
324
+ "path": path,
325
+ "start_line": start,
326
+ "end_line": end,
327
+ "content": snippet,
328
+ "total_lines": total_lines,
329
+ "character_count": len(content),
330
+ }
331
+
332
+ def _handle_write(
333
+ self, path: str, content: str, mode: Optional[str]
334
+ ) -> dict[str, Any]:
335
+ write_mode = mode or "overwrite"
336
+ if write_mode == "overwrite":
337
+ self.backend.write_file(path, content, overwrite=True)
338
+ elif write_mode == "append":
339
+ self.backend.append_file(path, content)
340
+ elif write_mode == "create_if_missing":
341
+ try:
342
+ self.backend.write_file(path, content, overwrite=False)
343
+ except FileExistsError:
344
+ pass
345
+ else:
346
+ raise ValueError(f"Unsupported write mode: {write_mode}")
347
+ return {"path": path, "status": "ok", "mode": write_mode}
348
+
349
+ def _handle_delete(self, path: str) -> dict[str, Any]:
350
+ self.backend.delete_path(path)
351
+ return {"path": path, "status": "ok"}
352
+
353
+ def _handle_list(
354
+ self, path: Optional[str], recursive: Optional[bool]
355
+ ) -> dict[str, Any]:
356
+ listing = self.backend.list_dir(path or ".", recursive=bool(recursive))
357
+ return {"path": path or ".", "recursive": bool(recursive), "entries": listing}
358
+
359
+ def _handle_grep(
360
+ self, pattern: str, path: Optional[str], limit: Optional[int]
361
+ ) -> dict[str, Any]:
362
+ max_results = limit or 50
363
+ matches = self.backend.grep(pattern, path=path, limit=max_results)
364
+ return {
365
+ "pattern": pattern,
366
+ "path": path,
367
+ "max_results": max_results,
368
+ "matches": matches,
369
+ }
370
+
371
+ def _handle_apply_patch(self, operation: ApplyPatchOperation) -> dict[str, Any]:
372
+ if operation.requires_diff and not operation.diff:
373
+ raise ValueError("diff is required for create_file and update_file")
374
+
375
+ if operation.type == "delete_file":
376
+ self.backend.delete_path(operation.path)
377
+ return {"path": operation.path, "operation": "delete_file"}
378
+
379
+ assert operation.diff is not None # for type checkers
380
+ if operation.type == "create_file":
381
+ new_content = apply_diff("", operation.diff, mode="create")
382
+ self.backend.write_file(operation.path, new_content, overwrite=False)
383
+ return {"path": operation.path, "operation": "create_file"}
384
+
385
+ if operation.type == "update_file":
386
+ current = self.backend.read_file(operation.path)
387
+ new_content = apply_diff(current, operation.diff, mode="default")
388
+ self.backend.write_file(operation.path, new_content, overwrite=True)
389
+ return {"path": operation.path, "operation": "update_file"}
390
+
391
+ raise ValueError(f"Unsupported patch operation: {operation.type}")
392
+
393
+ def _filesystem_tool(self, allowed_commands: set[FsCommand], **kwargs: Any) -> str:
394
+ params = FilesystemParams.model_validate(kwargs)
395
+
396
+ try:
397
+ if params.command not in allowed_commands:
398
+ raise ValueError(
399
+ f"The '{params.command}' command is disabled for this tool instance"
400
+ )
401
+ if params.command == "read_file":
402
+ if not params.path:
403
+ raise ValueError("path is required for read_file")
404
+ result = self._handle_read(
405
+ params.path, params.start_line, params.end_line
406
+ )
407
+ elif params.command == "write_file":
408
+ if params.path is None or params.content is None:
409
+ raise ValueError("path and content are required for write_file")
410
+ result = self._handle_write(params.path, params.content, params.mode)
411
+ elif params.command == "delete_path":
412
+ if not params.path:
413
+ raise ValueError("path is required for delete_path")
414
+ result = self._handle_delete(params.path)
415
+ elif params.command == "list_dir":
416
+ result = self._handle_list(params.path, params.recursive)
417
+ elif params.command == "grep":
418
+ if not params.pattern:
419
+ raise ValueError("pattern is required for grep")
420
+ result = self._handle_grep(
421
+ params.pattern, params.path, params.max_results
422
+ )
423
+ elif params.command == "apply_patch":
424
+ if params.operation is None:
425
+ raise ValueError("operation is required for apply_patch")
426
+ result = self._handle_apply_patch(params.operation)
427
+ else:
428
+ raise ValueError(f"Unknown command: {params.command}")
429
+ return json.dumps({"ok": True, "result": result}, indent=2)
430
+ except Exception as exc:
431
+ return json.dumps(
432
+ {"ok": False, "error": type(exc).__name__, "message": str(exc)},
433
+ indent=2,
434
+ )
435
+
436
+ def get_tools(self, *, exclude: Iterable[FsCommand] | None = None) -> list[Tool]:
437
+ exclude_set = set(exclude or [])
438
+ unknown = exclude_set.difference(ALL_COMMANDS)
439
+ if unknown:
440
+ raise ValueError(f"Unknown commands in exclude list: {sorted(unknown)}")
441
+
442
+ allowed = tuple(cmd for cmd in ALL_COMMANDS if cmd not in exclude_set)
443
+ if not allowed:
444
+ raise ValueError("Cannot exclude every filesystem command")
445
+
446
+ cache_key = allowed
447
+ if cache_key in self._tool_cache:
448
+ return self._tool_cache[cache_key]
449
+
450
+ allowed_set = set(allowed)
451
+ schema = FilesystemParams.model_json_schema(ref_template="#/$defs/{model}")
452
+ if (
453
+ "properties" in schema
454
+ and "command" in schema["properties"]
455
+ and isinstance(schema["properties"]["command"], dict)
456
+ ):
457
+ schema["properties"]["command"]["enum"] = list(allowed)
458
+
459
+ tool = Tool(
460
+ name=self.tool_name,
461
+ description=FS_DESCRIPTION,
462
+ parameters=schema.get("properties", {}),
463
+ required=schema.get("required", []),
464
+ definitions=schema.get("$defs"),
465
+ run=partial(self._filesystem_tool, allowed_set), # type: ignore
466
+ )
467
+
468
+ self._tool_cache[cache_key] = [tool]
469
+ return [tool]
470
+
471
+ def dump(
472
+ self,
473
+ destination: str | os.PathLike[str],
474
+ *,
475
+ as_zip: bool = False,
476
+ ) -> list[str]:
477
+ """
478
+ Copy the virtual workspace to the given filesystem path.
479
+
480
+ Args:
481
+ destination: Path to write to. If as_zip=True, this should be a .zip file path.
482
+ as_zip: If True, write as a zip archive instead of a directory.
483
+
484
+ Returns:
485
+ List of file paths that were written.
486
+ """
487
+ entries = self.backend.list_dir(".", recursive=True)
488
+ written: list[str] = []
489
+
490
+ if as_zip:
491
+ target_path = Path(destination)
492
+ target_path.parent.mkdir(parents=True, exist_ok=True)
493
+
494
+ with zipfile.ZipFile(target_path, "w", zipfile.ZIP_DEFLATED) as zf:
495
+ for entry in entries:
496
+ if entry.get("type") != "file":
497
+ continue
498
+ rel_path = entry["path"]
499
+ content = self.backend.read_file(rel_path)
500
+ zf.writestr(rel_path, content)
501
+ written.append(rel_path)
502
+ else:
503
+ target_root = Path(destination)
504
+ if target_root.exists() and not target_root.is_dir():
505
+ raise NotADirectoryError(f"{target_root} exists and is not a directory")
506
+ target_root.mkdir(parents=True, exist_ok=True)
507
+
508
+ for entry in entries:
509
+ if entry.get("type") != "file":
510
+ continue
511
+ rel_path = entry["path"]
512
+ destination_path = target_root.joinpath(*rel_path.split("/"))
513
+ destination_path.parent.mkdir(parents=True, exist_ok=True)
514
+ destination_path.write_text(self.backend.read_file(rel_path))
515
+ written.append(rel_path)
516
+
517
+ return sorted(written)
518
+
519
+ @classmethod
520
+ def from_dir(
521
+ cls,
522
+ source: str | os.PathLike[str],
523
+ *,
524
+ max_files: int = 100,
525
+ tool_name: str = "filesystem",
526
+ ) -> "FilesystemManager":
527
+ """
528
+ Create a FilesystemManager pre-populated with files from a directory.
529
+
530
+ Args:
531
+ source: Path to the directory to load files from.
532
+ max_files: Maximum number of files to load (default 100).
533
+ tool_name: Name for the filesystem tool.
534
+
535
+ Returns:
536
+ A new FilesystemManager with the files loaded into memory.
537
+
538
+ Raises:
539
+ ValueError: If more than max_files files are found.
540
+ NotADirectoryError: If source is not a directory.
541
+ """
542
+ source_path = Path(source)
543
+ if not source_path.is_dir():
544
+ raise NotADirectoryError(f"{source_path} is not a directory")
545
+
546
+ files: dict[str, str] = {}
547
+ file_count = 0
548
+
549
+ for file_path in source_path.rglob("*"):
550
+ if not file_path.is_file():
551
+ continue
552
+
553
+ file_count += 1
554
+ if file_count > max_files:
555
+ raise ValueError(
556
+ f"Directory contains more than {max_files} files. "
557
+ f"Increase max_files or use a smaller directory."
558
+ )
559
+
560
+ rel_path = file_path.relative_to(source_path).as_posix()
561
+
562
+ # Try to read as text, skip binary files
563
+ try:
564
+ content = file_path.read_text()
565
+ files[rel_path] = content
566
+ except UnicodeDecodeError:
567
+ # Skip binary files
568
+ continue
569
+
570
+ backend = InMemoryWorkspaceBackend(files)
571
+ return cls(backend=backend, tool_name=tool_name)
572
+
573
+ @classmethod
574
+ def from_zip(
575
+ cls,
576
+ source: str | os.PathLike[str] | io.BytesIO,
577
+ *,
578
+ max_files: int = 100,
579
+ tool_name: str = "filesystem",
580
+ ) -> "FilesystemManager":
581
+ """
582
+ Create a FilesystemManager pre-populated with files from a zip archive.
583
+
584
+ Args:
585
+ source: Path to the zip file, or a BytesIO containing zip data.
586
+ max_files: Maximum number of files to load (default 100).
587
+ tool_name: Name for the filesystem tool.
588
+
589
+ Returns:
590
+ A new FilesystemManager with the files loaded into memory.
591
+
592
+ Raises:
593
+ ValueError: If more than max_files files are found.
594
+ zipfile.BadZipFile: If the source is not a valid zip file.
595
+ """
596
+ files: dict[str, str] = {}
597
+ file_count = 0
598
+
599
+ with zipfile.ZipFile(source, "r") as zf:
600
+ for info in zf.infolist():
601
+ # Skip directories
602
+ if info.is_dir():
603
+ continue
604
+
605
+ file_count += 1
606
+ if file_count > max_files:
607
+ raise ValueError(
608
+ f"Zip archive contains more than {max_files} files. "
609
+ f"Increase max_files or use a smaller archive."
610
+ )
611
+
612
+ # Normalize path (remove leading slashes, handle Windows paths)
613
+ rel_path = info.filename.lstrip("/").replace("\\", "/")
614
+ if not rel_path:
615
+ continue
616
+
617
+ # Try to read as text, skip binary files
618
+ try:
619
+ content = zf.read(info.filename).decode("utf-8")
620
+ files[rel_path] = content
621
+ except UnicodeDecodeError:
622
+ # Skip binary files
623
+ continue
624
+
625
+ backend = InMemoryWorkspaceBackend(files)
626
+ return cls(backend=backend, tool_name=tool_name)
627
+
628
+
629
+ ApplyDiffMode = Literal["default", "create"]
630
+
631
+
632
+ @dataclass
633
+ class Chunk:
634
+ orig_index: int
635
+ del_lines: list[str]
636
+ ins_lines: list[str]
637
+
638
+
639
+ @dataclass
640
+ class ParserState:
641
+ lines: list[str]
642
+ index: int = 0
643
+ fuzz: int = 0
644
+
645
+
646
+ @dataclass
647
+ class ParsedUpdateDiff:
648
+ chunks: list[Chunk]
649
+ fuzz: int
650
+
651
+
652
+ @dataclass
653
+ class ReadSectionResult:
654
+ next_context: list[str]
655
+ section_chunks: list[Chunk]
656
+ end_index: int
657
+ eof: bool
658
+
659
+
660
+ END_PATCH = "*** End Patch"
661
+ END_FILE = "*** End of File"
662
+ SECTION_TERMINATORS = [
663
+ END_PATCH,
664
+ "*** Update File:",
665
+ "*** Delete File:",
666
+ "*** Add File:",
667
+ ]
668
+ END_SECTION_MARKERS = [*SECTION_TERMINATORS, END_FILE]
669
+
670
+
671
+ def apply_diff(input_text: str, diff: str, mode: ApplyDiffMode = "default") -> str:
672
+ """Apply a V4A diff to the provided text."""
673
+
674
+ diff_lines = _normalize_diff_lines(diff)
675
+ if mode == "create":
676
+ return _parse_create_diff(diff_lines)
677
+
678
+ parsed = _parse_update_diff(diff_lines, input_text)
679
+ return _apply_chunks(input_text, parsed.chunks)
680
+
681
+
682
+ def _normalize_diff_lines(diff: str) -> list[str]:
683
+ lines = [line.rstrip("\r") for line in re.split(r"\r?\n", diff)]
684
+ if lines and lines[-1] == "":
685
+ lines.pop()
686
+ return lines
687
+
688
+
689
+ def _is_done(state: ParserState, prefixes: Sequence[str]) -> bool:
690
+ if state.index >= len(state.lines):
691
+ return True
692
+ if any(state.lines[state.index].startswith(prefix) for prefix in prefixes):
693
+ return True
694
+ return False
695
+
696
+
697
+ def _read_str(state: ParserState, prefix: str) -> str:
698
+ if state.index >= len(state.lines):
699
+ return ""
700
+ current = state.lines[state.index]
701
+ if current.startswith(prefix):
702
+ state.index += 1
703
+ return current[len(prefix) :]
704
+ return ""
705
+
706
+
707
+ def _parse_create_diff(lines: list[str]) -> str:
708
+ parser = ParserState(lines=[*lines, END_PATCH])
709
+ output: list[str] = []
710
+
711
+ while not _is_done(parser, SECTION_TERMINATORS):
712
+ if parser.index >= len(parser.lines):
713
+ break
714
+ line = parser.lines[parser.index]
715
+ parser.index += 1
716
+ if not line.startswith("+"):
717
+ raise ValueError(f"Invalid Add File Line: {line}")
718
+ output.append(line[1:])
719
+
720
+ return "\n".join(output)
721
+
722
+
723
+ def _parse_update_diff(lines: list[str], input_text: str) -> ParsedUpdateDiff:
724
+ parser = ParserState(lines=[*lines, END_PATCH])
725
+ input_lines = input_text.split("\n")
726
+ chunks: list[Chunk] = []
727
+ cursor = 0
728
+
729
+ while not _is_done(parser, END_SECTION_MARKERS):
730
+ anchor = _read_str(parser, "@@ ")
731
+ has_bare_anchor = (
732
+ anchor == ""
733
+ and parser.index < len(parser.lines)
734
+ and parser.lines[parser.index] == "@@"
735
+ )
736
+ if has_bare_anchor:
737
+ parser.index += 1
738
+
739
+ if not (anchor or has_bare_anchor or cursor == 0):
740
+ current_line = (
741
+ parser.lines[parser.index] if parser.index < len(parser.lines) else ""
742
+ )
743
+ raise ValueError(f"Invalid Line:\n{current_line}")
744
+
745
+ if anchor.strip():
746
+ cursor = _advance_cursor_to_anchor(anchor, input_lines, cursor, parser)
747
+
748
+ section = _read_section(parser.lines, parser.index)
749
+ find_result = _find_context(
750
+ input_lines, section.next_context, cursor, section.eof
751
+ )
752
+ if find_result.new_index == -1:
753
+ ctx_text = "\n".join(section.next_context)
754
+ if section.eof:
755
+ raise ValueError(f"Invalid EOF Context {cursor}:\n{ctx_text}")
756
+ raise ValueError(f"Invalid Context {cursor}:\n{ctx_text}")
757
+
758
+ cursor = find_result.new_index + len(section.next_context)
759
+ parser.fuzz += find_result.fuzz
760
+ parser.index = section.end_index
761
+
762
+ for ch in section.section_chunks:
763
+ chunks.append(
764
+ Chunk(
765
+ orig_index=ch.orig_index + find_result.new_index,
766
+ del_lines=list(ch.del_lines),
767
+ ins_lines=list(ch.ins_lines),
768
+ )
769
+ )
770
+
771
+ return ParsedUpdateDiff(chunks=chunks, fuzz=parser.fuzz)
772
+
773
+
774
+ def _advance_cursor_to_anchor(
775
+ anchor: str,
776
+ input_lines: list[str],
777
+ cursor: int,
778
+ parser: ParserState,
779
+ ) -> int:
780
+ found = False
781
+
782
+ if not any(line == anchor for line in input_lines[:cursor]):
783
+ for i in range(cursor, len(input_lines)):
784
+ if input_lines[i] == anchor:
785
+ cursor = i + 1
786
+ found = True
787
+ break
788
+
789
+ if not found and not any(
790
+ line.strip() == anchor.strip() for line in input_lines[:cursor]
791
+ ):
792
+ for i in range(cursor, len(input_lines)):
793
+ if input_lines[i].strip() == anchor.strip():
794
+ cursor = i + 1
795
+ parser.fuzz += 1
796
+ found = True
797
+ break
798
+
799
+ return cursor
800
+
801
+
802
+ def _read_section(lines: list[str], start_index: int) -> ReadSectionResult:
803
+ context: list[str] = []
804
+ del_lines: list[str] = []
805
+ ins_lines: list[str] = []
806
+ section_chunks: list[Chunk] = []
807
+ mode: Literal["keep", "add", "delete"] = "keep"
808
+ index = start_index
809
+ orig_index = index
810
+
811
+ while index < len(lines):
812
+ raw = lines[index]
813
+ if (
814
+ raw.startswith("@@")
815
+ or raw.startswith(END_PATCH)
816
+ or raw.startswith("*** Update File:")
817
+ or raw.startswith("*** Delete File:")
818
+ or raw.startswith("*** Add File:")
819
+ or raw.startswith(END_FILE)
820
+ ):
821
+ break
822
+ if raw == "***":
823
+ break
824
+ if raw.startswith("***"):
825
+ raise ValueError(f"Invalid Line: {raw}")
826
+
827
+ index += 1
828
+ last_mode = mode
829
+ line = raw if raw else " "
830
+ prefix = line[0]
831
+ if prefix == "+":
832
+ mode = "add"
833
+ elif prefix == "-":
834
+ mode = "delete"
835
+ elif prefix == " ":
836
+ mode = "keep"
837
+ else:
838
+ raise ValueError(f"Invalid Line: {line}")
839
+
840
+ line_content = line[1:]
841
+ switching_to_context = mode == "keep" and last_mode != mode
842
+ if switching_to_context and (del_lines or ins_lines):
843
+ section_chunks.append(
844
+ Chunk(
845
+ orig_index=len(context) - len(del_lines),
846
+ del_lines=list(del_lines),
847
+ ins_lines=list(ins_lines),
848
+ )
849
+ )
850
+ del_lines = []
851
+ ins_lines = []
852
+
853
+ if mode == "delete":
854
+ del_lines.append(line_content)
855
+ context.append(line_content)
856
+ elif mode == "add":
857
+ ins_lines.append(line_content)
858
+ else:
859
+ context.append(line_content)
860
+
861
+ if del_lines or ins_lines:
862
+ section_chunks.append(
863
+ Chunk(
864
+ orig_index=len(context) - len(del_lines),
865
+ del_lines=list(del_lines),
866
+ ins_lines=list(ins_lines),
867
+ )
868
+ )
869
+
870
+ if index < len(lines) and lines[index] == END_FILE:
871
+ return ReadSectionResult(context, section_chunks, index + 1, True)
872
+
873
+ if index == orig_index:
874
+ next_line = lines[index] if index < len(lines) else ""
875
+ raise ValueError(f"Nothing in this section - index={index} {next_line}")
876
+
877
+ return ReadSectionResult(context, section_chunks, index, False)
878
+
879
+
880
+ @dataclass
881
+ class ContextMatch:
882
+ new_index: int
883
+ fuzz: int
884
+
885
+
886
+ def _find_context(
887
+ lines: list[str], context: list[str], start: int, eof: bool
888
+ ) -> ContextMatch:
889
+ if eof:
890
+ end_start = max(0, len(lines) - len(context))
891
+ end_match = _find_context_core(lines, context, end_start)
892
+ if end_match.new_index != -1:
893
+ return end_match
894
+ fallback = _find_context_core(lines, context, start)
895
+ return ContextMatch(new_index=fallback.new_index, fuzz=fallback.fuzz + 10000)
896
+ return _find_context_core(lines, context, start)
897
+
898
+
899
+ def _find_context_core(
900
+ lines: list[str], context: list[str], start: int
901
+ ) -> ContextMatch:
902
+ if not context:
903
+ return ContextMatch(new_index=start, fuzz=0)
904
+
905
+ for i in range(start, len(lines)):
906
+ if _equals_slice(lines, context, i, lambda value: value):
907
+ return ContextMatch(new_index=i, fuzz=0)
908
+ for i in range(start, len(lines)):
909
+ if _equals_slice(lines, context, i, lambda value: value.rstrip()):
910
+ return ContextMatch(new_index=i, fuzz=1)
911
+ for i in range(start, len(lines)):
912
+ if _equals_slice(lines, context, i, lambda value: value.strip()):
913
+ return ContextMatch(new_index=i, fuzz=100)
914
+
915
+ return ContextMatch(new_index=-1, fuzz=0)
916
+
917
+
918
+ def _equals_slice(
919
+ source: list[str], target: list[str], start: int, map_fn: Callable[[str], str]
920
+ ) -> bool:
921
+ if start + len(target) > len(source):
922
+ return False
923
+ for offset, target_value in enumerate(target):
924
+ if map_fn(source[start + offset]) != map_fn(target_value):
925
+ return False
926
+ return True
927
+
928
+
929
+ def _apply_chunks(input_text: str, chunks: list[Chunk]) -> str:
930
+ orig_lines = input_text.split("\n")
931
+ dest_lines: list[str] = []
932
+ cursor = 0
933
+
934
+ for chunk in chunks:
935
+ if chunk.orig_index > len(orig_lines):
936
+ raise ValueError(
937
+ f"apply_diff: chunk.origIndex {chunk.orig_index} > input length {len(orig_lines)}"
938
+ )
939
+ if cursor > chunk.orig_index:
940
+ raise ValueError(
941
+ f"apply_diff: overlapping chunk at {chunk.orig_index} (cursor {cursor})"
942
+ )
943
+
944
+ dest_lines.extend(orig_lines[cursor : chunk.orig_index])
945
+ cursor = chunk.orig_index
946
+
947
+ if chunk.ins_lines:
948
+ dest_lines.extend(chunk.ins_lines)
949
+
950
+ cursor += len(chunk.del_lines)
951
+
952
+ dest_lines.extend(orig_lines[cursor:])
953
+ return "\n".join(dest_lines)
954
+
955
+
956
+ # S3 filesystem description
957
+ S3_FS_DESCRIPTION = """Interact with a remote S3-backed filesystem that supports safe concurrent access.
958
+
959
+ This filesystem is backed by Amazon S3 with optimistic concurrency control, meaning multiple
960
+ agents can safely read and write to the same workspace without conflicts. If a write conflict
961
+ occurs (another agent modified the file), the operation will automatically retry.
962
+
963
+ Paths are always relative to the workspace root and use forward slashes. Use this tool to:
964
+ - inspect files with optional line ranges
965
+ - create, overwrite, or append to files (with automatic conflict resolution)
966
+ - delete files or folders from the workspace
967
+ - list directory contents
968
+ - search for text across the workspace using regular expressions
969
+
970
+ This filesystem is safe for distributed use - conflicts are automatically detected and resolved."""
971
+
972
+
973
+ @dataclass
974
+ class S3FileMetadata:
975
+ """Metadata for a file stored in S3."""
976
+
977
+ key: str
978
+ etag: str
979
+ size: int
980
+ last_modified: float
981
+
982
+
983
+ @dataclass
984
+ class ConflictError(Exception):
985
+ """Raised when a write conflict occurs due to concurrent modification."""
986
+
987
+ key: str
988
+ expected_etag: str | None
989
+ actual_etag: str | None
990
+ message: str = ""
991
+
992
+ def __str__(self) -> str:
993
+ if self.message:
994
+ return self.message
995
+ return f"Conflict writing {self.key}: expected ETag {self.expected_etag}, got {self.actual_etag}"
996
+
997
+
998
+ @dataclass
999
+ class RetryConfig:
1000
+ """Configuration for retry behavior on conflicts."""
1001
+
1002
+ max_retries: int = 5
1003
+ base_delay: float = 0.1 # seconds
1004
+ max_delay: float = 5.0 # seconds
1005
+ jitter: float = 0.1 # random jitter factor
1006
+
1007
+
1008
+ class S3WorkspaceBackend:
1009
+ """
1010
+ S3 backend with optimistic concurrency control using conditional writes.
1011
+
1012
+ Uses:
1013
+ - If-None-Match: * for create-if-not-exists operations
1014
+ - If-Match: <etag> for update-only-if-unchanged operations
1015
+ """
1016
+
1017
+ def __init__(
1018
+ self,
1019
+ bucket: str,
1020
+ prefix: str = "",
1021
+ s3_client: Any | None = None,
1022
+ retry_config: RetryConfig | None = None,
1023
+ ):
1024
+ self.bucket = bucket
1025
+ self.prefix = prefix.rstrip("/") + "/" if prefix else ""
1026
+ self._client = s3_client
1027
+ self.retry_config = retry_config or RetryConfig()
1028
+ # Local cache of ETags for optimistic locking
1029
+ self._etag_cache: dict[str, str] = {}
1030
+
1031
+ @property
1032
+ def client(self) -> Any:
1033
+ """Lazy initialization of S3 client."""
1034
+ if self._client is None:
1035
+ import boto3
1036
+
1037
+ self._client = boto3.client("s3")
1038
+ return self._client
1039
+
1040
+ def _full_key(self, path: str) -> str:
1041
+ """Convert a normalized path to a full S3 key."""
1042
+ if path == ".":
1043
+ return self.prefix.rstrip("/") if self.prefix else ""
1044
+ return f"{self.prefix}{path}"
1045
+
1046
+ def _strip_prefix(self, key: str) -> str:
1047
+ """Strip the prefix from an S3 key to get the relative path."""
1048
+ if self.prefix and key.startswith(self.prefix):
1049
+ return key[len(self.prefix) :]
1050
+ return key
1051
+
1052
+ def _get_with_etag(self, path: str) -> tuple[str, str]:
1053
+ """Read a file and return (content, etag)."""
1054
+ key = self._full_key(path)
1055
+ try:
1056
+ response = self.client.get_object(Bucket=self.bucket, Key=key)
1057
+ content = response["Body"].read().decode("utf-8")
1058
+ etag = response["ETag"].strip('"')
1059
+ self._etag_cache[key] = etag
1060
+ return content, etag
1061
+ except self.client.exceptions.NoSuchKey:
1062
+ raise FileNotFoundError(f"{path} does not exist")
1063
+
1064
+ def _put_with_condition(
1065
+ self,
1066
+ path: str,
1067
+ content: str,
1068
+ *,
1069
+ if_none_match: bool = False,
1070
+ if_match: str | None = None,
1071
+ ) -> str:
1072
+ """
1073
+ Write a file with conditional headers.
1074
+
1075
+ Args:
1076
+ path: The file path
1077
+ content: The content to write
1078
+ if_none_match: If True, only write if file doesn't exist
1079
+ if_match: If provided, only write if ETag matches
1080
+
1081
+ Returns:
1082
+ The new ETag of the written object
1083
+
1084
+ Raises:
1085
+ ConflictError: If the condition fails
1086
+ FileExistsError: If if_none_match=True and file exists
1087
+ """
1088
+ key = self._full_key(path)
1089
+ kwargs: dict[str, Any] = {
1090
+ "Bucket": self.bucket,
1091
+ "Key": key,
1092
+ "Body": content.encode("utf-8"),
1093
+ "ContentType": "text/plain; charset=utf-8",
1094
+ }
1095
+
1096
+ if if_none_match:
1097
+ kwargs["IfNoneMatch"] = "*"
1098
+ elif if_match:
1099
+ kwargs["IfMatch"] = (
1100
+ f'"{if_match}"' if not if_match.startswith('"') else if_match
1101
+ )
1102
+
1103
+ try:
1104
+ response = self.client.put_object(**kwargs)
1105
+ new_etag = response["ETag"].strip('"')
1106
+ self._etag_cache[key] = new_etag
1107
+ return new_etag
1108
+ except self.client.exceptions.ClientError as e:
1109
+ error_code = e.response.get("Error", {}).get("Code", "")
1110
+ if error_code == "PreconditionFailed":
1111
+ if if_none_match:
1112
+ raise FileExistsError(f"{path} already exists")
1113
+ raise ConflictError(
1114
+ key=key,
1115
+ expected_etag=if_match,
1116
+ actual_etag=None,
1117
+ message=f"File {path} was modified by another process",
1118
+ )
1119
+ raise
1120
+
1121
+ def _retry_with_backoff(
1122
+ self,
1123
+ operation: str,
1124
+ path: str,
1125
+ func: Any,
1126
+ ) -> Any:
1127
+ """Execute a function with retry and exponential backoff on conflicts."""
1128
+ config = self.retry_config
1129
+ last_error: Exception | None = None
1130
+
1131
+ for attempt in range(config.max_retries + 1):
1132
+ try:
1133
+ return func()
1134
+ except ConflictError as e:
1135
+ last_error = e
1136
+ if attempt >= config.max_retries:
1137
+ break
1138
+ # Calculate backoff with jitter
1139
+ delay = min(
1140
+ config.base_delay * (2**attempt),
1141
+ config.max_delay,
1142
+ )
1143
+ jitter = delay * config.jitter * random.random()
1144
+ time.sleep(delay + jitter)
1145
+ # Clear cached ETag to force fresh read
1146
+ key = self._full_key(path)
1147
+ self._etag_cache.pop(key, None)
1148
+
1149
+ raise last_error or RuntimeError(f"Retry failed for {operation} on {path}")
1150
+
1151
+ def read_file(self, path: str) -> str:
1152
+ """Read a file from S3."""
1153
+ key = _normalize_path(path)
1154
+ content, _ = self._get_with_etag(key)
1155
+ return content
1156
+
1157
+ def write_file(self, path: str, content: str, *, overwrite: bool) -> None:
1158
+ """
1159
+ Write a file to S3 with optimistic locking.
1160
+
1161
+ If overwrite=False, uses If-None-Match: * to ensure create-only.
1162
+ If overwrite=True, uses If-Match with cached ETag if available,
1163
+ otherwise does unconditional write.
1164
+ """
1165
+ key = _normalize_path(path)
1166
+ s3_key = self._full_key(key)
1167
+
1168
+ if not overwrite:
1169
+ # Create-if-not-exists
1170
+ self._put_with_condition(key, content, if_none_match=True)
1171
+ else:
1172
+ # Check if we have a cached ETag for optimistic locking
1173
+ cached_etag = self._etag_cache.get(s3_key)
1174
+ if cached_etag:
1175
+ # Use optimistic locking with retry
1176
+ def _do_write():
1177
+ nonlocal cached_etag
1178
+ try:
1179
+ self._put_with_condition(key, content, if_match=cached_etag)
1180
+ except ConflictError:
1181
+ # Refresh ETag and retry
1182
+ _, cached_etag = self._get_with_etag(key)
1183
+ raise
1184
+
1185
+ self._retry_with_backoff("write_file", key, _do_write)
1186
+ else:
1187
+ # No cached ETag - try to get current state first for safety
1188
+ try:
1189
+ _, etag = self._get_with_etag(key)
1190
+ self._put_with_condition(key, content, if_match=etag)
1191
+ except FileNotFoundError:
1192
+ # File doesn't exist, create it
1193
+ self._put_with_condition(key, content, if_none_match=True)
1194
+
1195
+ def append_file(self, path: str, content: str) -> None:
1196
+ """
1197
+ Append to a file with optimistic locking.
1198
+
1199
+ Uses read-modify-write with If-Match for consistency.
1200
+ """
1201
+ key = _normalize_path(path)
1202
+
1203
+ def _do_append():
1204
+ try:
1205
+ current, etag = self._get_with_etag(key)
1206
+ new_content = current + content
1207
+ except FileNotFoundError:
1208
+ # File doesn't exist, create it
1209
+ self._put_with_condition(key, content, if_none_match=True)
1210
+ return
1211
+
1212
+ self._put_with_condition(key, new_content, if_match=etag)
1213
+
1214
+ self._retry_with_backoff("append_file", key, _do_append)
1215
+
1216
+ def delete_path(self, path: str) -> None:
1217
+ """Delete a file or directory from S3."""
1218
+ key = _normalize_path(path, allow_root=True)
1219
+
1220
+ if key == ".":
1221
+ # Delete all files under the prefix
1222
+ paginator = self.client.get_paginator("list_objects_v2")
1223
+ prefix = self.prefix if self.prefix else ""
1224
+ for page in paginator.paginate(Bucket=self.bucket, Prefix=prefix):
1225
+ for obj in page.get("Contents", []):
1226
+ self.client.delete_object(Bucket=self.bucket, Key=obj["Key"])
1227
+ self._etag_cache.pop(obj["Key"], None)
1228
+ return
1229
+
1230
+ s3_key = self._full_key(key)
1231
+
1232
+ # Try to delete as a single file first
1233
+ try:
1234
+ self.client.head_object(Bucket=self.bucket, Key=s3_key)
1235
+ self.client.delete_object(Bucket=self.bucket, Key=s3_key)
1236
+ self._etag_cache.pop(s3_key, None)
1237
+ return
1238
+ except self.client.exceptions.ClientError:
1239
+ pass
1240
+
1241
+ # Try as a directory prefix
1242
+ prefix = f"{s3_key}/"
1243
+ paginator = self.client.get_paginator("list_objects_v2")
1244
+ deleted_any = False
1245
+
1246
+ for page in paginator.paginate(Bucket=self.bucket, Prefix=prefix):
1247
+ for obj in page.get("Contents", []):
1248
+ self.client.delete_object(Bucket=self.bucket, Key=obj["Key"])
1249
+ self._etag_cache.pop(obj["Key"], None)
1250
+ deleted_any = True
1251
+
1252
+ if not deleted_any:
1253
+ raise FileNotFoundError(f"{key} does not exist")
1254
+
1255
+ def list_dir(self, path: str, recursive: bool) -> list[dict[str, Any]]:
1256
+ """List directory contents from S3."""
1257
+ key = _normalize_path(path, allow_root=True)
1258
+
1259
+ if key == ".":
1260
+ prefix = self.prefix
1261
+ else:
1262
+ prefix = f"{self._full_key(key)}/"
1263
+
1264
+ entries: list[dict[str, Any]] = []
1265
+ seen_dirs: set[str] = set()
1266
+
1267
+ paginator = self.client.get_paginator("list_objects_v2")
1268
+ kwargs: dict[str, Any] = {"Bucket": self.bucket, "Prefix": prefix}
1269
+
1270
+ if not recursive:
1271
+ kwargs["Delimiter"] = "/"
1272
+
1273
+ for page in paginator.paginate(**kwargs):
1274
+ # Handle common prefixes (directories) in non-recursive mode
1275
+ for common_prefix in page.get("CommonPrefixes", []):
1276
+ dir_key = common_prefix["Prefix"].rstrip("/")
1277
+ rel_path = self._strip_prefix(dir_key)
1278
+ if rel_path and rel_path not in seen_dirs:
1279
+ entries.append(
1280
+ {"path": rel_path, "type": "directory", "size": None}
1281
+ )
1282
+ seen_dirs.add(rel_path)
1283
+
1284
+ # Handle files
1285
+ for obj in page.get("Contents", []):
1286
+ obj_key = obj["Key"]
1287
+ rel_path = self._strip_prefix(obj_key)
1288
+
1289
+ # Skip the prefix itself if it's an empty marker
1290
+ if not rel_path or rel_path.endswith("/"):
1291
+ continue
1292
+
1293
+ if recursive:
1294
+ entries.append(self._format_file_entry(rel_path, obj))
1295
+ else:
1296
+ # Check if this is a direct child
1297
+ remainder = rel_path
1298
+ if key != ".":
1299
+ # Remove the directory prefix to get relative path
1300
+ dir_prefix = key + "/"
1301
+ if rel_path.startswith(dir_prefix):
1302
+ remainder = rel_path[len(dir_prefix) :]
1303
+ else:
1304
+ continue
1305
+
1306
+ if "/" not in remainder:
1307
+ entries.append(self._format_file_entry(rel_path, obj))
1308
+
1309
+ # Sort by path for consistent ordering
1310
+ entries.sort(key=lambda e: e["path"])
1311
+
1312
+ if not entries and key != ".":
1313
+ # Check if the path itself exists as a file
1314
+ try:
1315
+ s3_key = self._full_key(key)
1316
+ response = self.client.head_object(Bucket=self.bucket, Key=s3_key)
1317
+ return [
1318
+ {
1319
+ "path": key,
1320
+ "type": "file",
1321
+ "size": response["ContentLength"],
1322
+ "etag": response["ETag"].strip('"'),
1323
+ }
1324
+ ]
1325
+ except self.client.exceptions.ClientError:
1326
+ raise FileNotFoundError(f"{key} does not exist")
1327
+
1328
+ return entries
1329
+
1330
+ def grep(self, pattern: str, path: str | None, limit: int) -> list[dict[str, Any]]:
1331
+ """Search for pattern in files."""
1332
+ regex = re.compile(pattern)
1333
+ key = _normalize_path(path, allow_root=True) if path is not None else "."
1334
+
1335
+ matches: list[dict[str, Any]] = []
1336
+ paginator = self.client.get_paginator("list_objects_v2")
1337
+
1338
+ def process_object(obj_key: str, rel_path: str) -> None:
1339
+ if len(matches) >= limit:
1340
+ return
1341
+ try:
1342
+ response = self.client.get_object(Bucket=self.bucket, Key=obj_key)
1343
+ content = response["Body"].read().decode("utf-8")
1344
+
1345
+ for line_no, line in enumerate(content.splitlines(), start=1):
1346
+ if regex.search(line):
1347
+ matches.append(
1348
+ {
1349
+ "path": rel_path,
1350
+ "line": line_no,
1351
+ "text": line.strip(),
1352
+ }
1353
+ )
1354
+ if len(matches) >= limit:
1355
+ return
1356
+ except Exception:
1357
+ # Skip files that can't be read as text
1358
+ return
1359
+
1360
+ # If a specific path is provided, check if it's a file first
1361
+ if key == ".":
1362
+ prefix = self.prefix
1363
+ else:
1364
+ s3_key = self._full_key(key)
1365
+ try:
1366
+ self.client.head_object(Bucket=self.bucket, Key=s3_key)
1367
+ rel_path = self._strip_prefix(s3_key)
1368
+ process_object(s3_key, rel_path)
1369
+ return matches
1370
+ except self.client.exceptions.ClientError:
1371
+ prefix = f"{s3_key}/"
1372
+
1373
+ for page in paginator.paginate(Bucket=self.bucket, Prefix=prefix):
1374
+ for obj in page.get("Contents", []):
1375
+ if len(matches) >= limit:
1376
+ return matches
1377
+
1378
+ obj_key = obj["Key"]
1379
+ rel_path = self._strip_prefix(obj_key)
1380
+
1381
+ if not rel_path or rel_path.endswith("/"):
1382
+ continue
1383
+
1384
+ process_object(obj_key, rel_path)
1385
+
1386
+ return matches
1387
+
1388
+ def _format_file_entry(self, path: str, obj: dict[str, Any]) -> dict[str, Any]:
1389
+ """Format a file entry from S3 object metadata."""
1390
+ return {
1391
+ "path": path,
1392
+ "type": "file",
1393
+ "size": obj["Size"],
1394
+ "etag": obj["ETag"].strip('"'),
1395
+ }
1396
+
1397
+ def get_file_etag(self, path: str) -> str | None:
1398
+ """Get the cached ETag for a file, or fetch it if not cached."""
1399
+ key = _normalize_path(path)
1400
+ s3_key = self._full_key(key)
1401
+
1402
+ if s3_key in self._etag_cache:
1403
+ return self._etag_cache[s3_key]
1404
+
1405
+ try:
1406
+ response = self.client.head_object(Bucket=self.bucket, Key=s3_key)
1407
+ etag = response["ETag"].strip('"')
1408
+ self._etag_cache[s3_key] = etag
1409
+ return etag
1410
+ except self.client.exceptions.ClientError:
1411
+ return None
1412
+
1413
+
1414
+ # Command types for the S3 filesystem tool
1415
+ S3FsCommand = Literal[
1416
+ "read_file",
1417
+ "write_file",
1418
+ "delete_path",
1419
+ "list_dir",
1420
+ "grep",
1421
+ ]
1422
+
1423
+ ALL_S3_COMMANDS: tuple[S3FsCommand, ...] = (
1424
+ "read_file",
1425
+ "write_file",
1426
+ "delete_path",
1427
+ "list_dir",
1428
+ "grep",
1429
+ )
1430
+
1431
+
1432
+ class S3FilesystemParams(BaseModel):
1433
+ """Schema describing S3 filesystem tool calls."""
1434
+
1435
+ command: S3FsCommand = Field(
1436
+ description="Filesystem operation to perform (read_file, write_file, delete_path, list_dir, grep)"
1437
+ )
1438
+ path: Optional[str] = Field(
1439
+ default=None,
1440
+ description="Path to operate on, relative to workspace root. Use '.' for the root directory.",
1441
+ )
1442
+ start_line: Optional[int] = Field(
1443
+ default=None,
1444
+ description="1-based inclusive start line when reading a file. Leave unset to read from the beginning.",
1445
+ ge=1,
1446
+ )
1447
+ end_line: Optional[int] = Field(
1448
+ default=None,
1449
+ description="1-based inclusive end line when reading a file. Leave unset to read through the end.",
1450
+ ge=1,
1451
+ )
1452
+ content: Optional[str] = Field(
1453
+ default=None,
1454
+ description="Content to write when using write_file.",
1455
+ )
1456
+ mode: Optional[Literal["overwrite", "append", "create_if_missing"]] = Field(
1457
+ default="overwrite",
1458
+ description="How to write content. Overwrite replaces the file, append adds to the end, create_if_missing leaves existing files untouched.",
1459
+ )
1460
+ recursive: Optional[bool] = Field(
1461
+ default=None,
1462
+ description="When listing directories, set to true to recurse.",
1463
+ )
1464
+ pattern: Optional[str] = Field(
1465
+ default=None,
1466
+ description="Regular expression pattern to search for when using grep.",
1467
+ )
1468
+ max_results: Optional[int] = Field(
1469
+ default=50,
1470
+ description="Maximum number of grep matches to return.",
1471
+ ge=1,
1472
+ )
1473
+
1474
+
1475
+ class S3FilesystemManager:
1476
+ """
1477
+ S3-backed filesystem manager with optimistic concurrency control.
1478
+
1479
+ Uses S3 conditional writes (If-None-Match and If-Match) for safe distributed
1480
+ operations, allowing multiple AI agents to share filesystem state.
1481
+
1482
+ Example:
1483
+ manager = S3FilesystemManager(
1484
+ bucket="my-ai-workspace",
1485
+ prefix="agent-123/",
1486
+ )
1487
+ tools = manager.get_tools()
1488
+ """
1489
+
1490
+ def __init__(
1491
+ self,
1492
+ bucket: str,
1493
+ prefix: str = "",
1494
+ s3_client: Any | None = None,
1495
+ retry_config: RetryConfig | None = None,
1496
+ tool_name: str = "s3_filesystem",
1497
+ ):
1498
+ """
1499
+ Initialize the S3 filesystem manager.
1500
+
1501
+ Args:
1502
+ bucket: The S3 bucket name
1503
+ prefix: Optional prefix for all keys (like a workspace directory)
1504
+ s3_client: Optional pre-configured S3 client
1505
+ retry_config: Configuration for retry behavior on conflicts
1506
+ tool_name: Name for the tool (default: "s3_filesystem")
1507
+ """
1508
+ self.backend = S3WorkspaceBackend(
1509
+ bucket=bucket,
1510
+ prefix=prefix,
1511
+ s3_client=s3_client,
1512
+ retry_config=retry_config,
1513
+ )
1514
+ self.tool_name = tool_name
1515
+ self._tool_cache: dict[tuple[str, ...], list[Tool]] = {}
1516
+
1517
+ def _handle_read(
1518
+ self, path: str, start_line: Optional[int], end_line: Optional[int]
1519
+ ) -> dict[str, Any]:
1520
+ content = self.backend.read_file(path)
1521
+ total_lines = len(content.splitlines()) or (0 if content == "" else 1)
1522
+ start = start_line or 1
1523
+ end = end_line or total_lines
1524
+ if end < start:
1525
+ if not (total_lines == 0 and end_line is None and start == 1):
1526
+ raise ValueError("end_line must be greater than or equal to start_line")
1527
+
1528
+ if start == 1 and end >= total_lines:
1529
+ snippet = content
1530
+ else:
1531
+ lines = content.splitlines()
1532
+ snippet = "\n".join(lines[start - 1 : end])
1533
+
1534
+ # Include ETag in response for transparency
1535
+ etag = self.backend.get_file_etag(path)
1536
+
1537
+ return {
1538
+ "path": path,
1539
+ "start_line": start,
1540
+ "end_line": end,
1541
+ "content": snippet,
1542
+ "total_lines": total_lines,
1543
+ "character_count": len(content),
1544
+ "etag": etag,
1545
+ }
1546
+
1547
+ def _handle_write(
1548
+ self, path: str, content: str, mode: Optional[str]
1549
+ ) -> dict[str, Any]:
1550
+ write_mode = mode or "overwrite"
1551
+ if write_mode == "overwrite":
1552
+ self.backend.write_file(path, content, overwrite=True)
1553
+ elif write_mode == "append":
1554
+ self.backend.append_file(path, content)
1555
+ elif write_mode == "create_if_missing":
1556
+ try:
1557
+ self.backend.write_file(path, content, overwrite=False)
1558
+ except FileExistsError:
1559
+ pass
1560
+ else:
1561
+ raise ValueError(f"Unsupported write mode: {write_mode}")
1562
+
1563
+ # Get new ETag after write
1564
+ etag = self.backend.get_file_etag(path)
1565
+
1566
+ return {"path": path, "status": "ok", "mode": write_mode, "etag": etag}
1567
+
1568
+ def _handle_delete(self, path: str) -> dict[str, Any]:
1569
+ self.backend.delete_path(path)
1570
+ return {"path": path, "status": "ok"}
1571
+
1572
+ def _handle_list(
1573
+ self, path: Optional[str], recursive: Optional[bool]
1574
+ ) -> dict[str, Any]:
1575
+ listing = self.backend.list_dir(path or ".", recursive=bool(recursive))
1576
+ return {"path": path or ".", "recursive": bool(recursive), "entries": listing}
1577
+
1578
+ def _handle_grep(
1579
+ self, pattern: str, path: Optional[str], limit: Optional[int]
1580
+ ) -> dict[str, Any]:
1581
+ max_results = limit or 50
1582
+ matches = self.backend.grep(pattern, path=path, limit=max_results)
1583
+ return {
1584
+ "pattern": pattern,
1585
+ "path": path,
1586
+ "max_results": max_results,
1587
+ "matches": matches,
1588
+ }
1589
+
1590
+ def _filesystem_tool(self, allowed_commands: set[str], **kwargs: Any) -> str:
1591
+ params = S3FilesystemParams.model_validate(kwargs)
1592
+
1593
+ try:
1594
+ if params.command not in allowed_commands:
1595
+ raise ValueError(
1596
+ f"The '{params.command}' command is disabled for this tool instance"
1597
+ )
1598
+ if params.command == "read_file":
1599
+ if not params.path:
1600
+ raise ValueError("path is required for read_file")
1601
+ result = self._handle_read(
1602
+ params.path, params.start_line, params.end_line
1603
+ )
1604
+ elif params.command == "write_file":
1605
+ if params.path is None or params.content is None:
1606
+ raise ValueError("path and content are required for write_file")
1607
+ result = self._handle_write(params.path, params.content, params.mode)
1608
+ elif params.command == "delete_path":
1609
+ if not params.path:
1610
+ raise ValueError("path is required for delete_path")
1611
+ result = self._handle_delete(params.path)
1612
+ elif params.command == "list_dir":
1613
+ result = self._handle_list(params.path, params.recursive)
1614
+ elif params.command == "grep":
1615
+ if not params.pattern:
1616
+ raise ValueError("pattern is required for grep")
1617
+ result = self._handle_grep(
1618
+ params.pattern, params.path, params.max_results
1619
+ )
1620
+ else:
1621
+ raise ValueError(f"Unknown command: {params.command}")
1622
+ return json.dumps({"ok": True, "result": result}, indent=2)
1623
+ except Exception as exc:
1624
+ return json.dumps(
1625
+ {"ok": False, "error": type(exc).__name__, "message": str(exc)},
1626
+ indent=2,
1627
+ )
1628
+
1629
+ def get_tools(self, *, exclude: Iterable[S3FsCommand] | None = None) -> list[Tool]:
1630
+ """
1631
+ Get the filesystem tools.
1632
+
1633
+ Args:
1634
+ exclude: Optional list of commands to exclude from the tool
1635
+
1636
+ Returns:
1637
+ List containing the S3 filesystem tool
1638
+ """
1639
+ exclude_set = set(exclude or [])
1640
+ unknown = exclude_set.difference(ALL_S3_COMMANDS)
1641
+ if unknown:
1642
+ raise ValueError(f"Unknown commands in exclude list: {sorted(unknown)}")
1643
+
1644
+ allowed = tuple(cmd for cmd in ALL_S3_COMMANDS if cmd not in exclude_set)
1645
+ if not allowed:
1646
+ raise ValueError("Cannot exclude every filesystem command")
1647
+
1648
+ cache_key = allowed
1649
+ if cache_key in self._tool_cache:
1650
+ return self._tool_cache[cache_key]
1651
+
1652
+ allowed_set = {cmd for cmd in allowed}
1653
+ schema = S3FilesystemParams.model_json_schema(ref_template="#/$defs/{model}")
1654
+ if (
1655
+ "properties" in schema
1656
+ and "command" in schema["properties"]
1657
+ and isinstance(schema["properties"]["command"], dict)
1658
+ ):
1659
+ schema["properties"]["command"]["enum"] = list(allowed)
1660
+
1661
+ tool = Tool(
1662
+ name=self.tool_name,
1663
+ description=S3_FS_DESCRIPTION,
1664
+ parameters=schema.get("properties", {}),
1665
+ required=schema.get("required", []),
1666
+ definitions=schema.get("$defs"),
1667
+ run=partial(self._filesystem_tool, allowed_set),
1668
+ )
1669
+
1670
+ self._tool_cache[cache_key] = [tool]
1671
+ return [tool]
1672
+
1673
+
1674
+ __all__ = [
1675
+ "FilesystemManager",
1676
+ "FilesystemParams",
1677
+ "InMemoryWorkspaceBackend",
1678
+ "WorkspaceBackend",
1679
+ "S3FilesystemManager",
1680
+ "S3FilesystemParams",
1681
+ "S3WorkspaceBackend",
1682
+ "ConflictError",
1683
+ "RetryConfig",
1684
+ ]
1685
+
1686
+
1687
+ description = """
1688
+ S3-backed remote filesystem tool with optimistic concurrency control.
1689
+
1690
+ Uses S3 conditional writes (If-None-Match and If-Match) for safe distributed
1691
+ operations, allowing multiple AI agents to share filesystem state without conflicts.
1692
+
1693
+ Features:
1694
+ - If-None-Match: * -> Create-if-not-exists (distributed locks, idempotent writes)
1695
+ - If-Match: <etag> -> Update-only-if-unchanged (optimistic locking)
1696
+ - Automatic retry with exponential backoff on conflicts
1697
+ - ETag tracking for all file operations
1698
+
1699
+ Example:
1700
+ from lm_deluge.tool.prefab.s3_filesystem import S3FilesystemManager
1701
+
1702
+ manager = S3FilesystemManager(
1703
+ bucket="my-ai-workspace",
1704
+ prefix="agent-123/", # Optional: isolate agent's workspace
1705
+ )
1706
+
1707
+ # Get tools for the agent
1708
+ tools = manager.get_tools()
1709
+
1710
+ # The filesystem operations are now safe for concurrent access
1711
+ """