lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +1 -2
- lm_deluge/api_requests/anthropic.py +117 -22
- lm_deluge/api_requests/base.py +84 -11
- lm_deluge/api_requests/bedrock.py +30 -6
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +166 -20
- lm_deluge/api_requests/openai.py +145 -25
- lm_deluge/batches.py +15 -45
- lm_deluge/client.py +309 -50
- lm_deluge/config.py +15 -3
- lm_deluge/models/__init__.py +14 -1
- lm_deluge/models/anthropic.py +29 -14
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +42 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +18 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +133 -7
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +50 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +705 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +537 -88
- lm_deluge/request_context.py +7 -2
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/tool/__init__.py +1130 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/anthropic/bash.py +0 -0
- lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
- lm_deluge-0.0.90.dist-info/RECORD +132 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
- /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1711 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import random
|
|
7
|
+
import re
|
|
8
|
+
import time
|
|
9
|
+
import zipfile
|
|
10
|
+
from collections.abc import Callable, Iterable, Sequence
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from functools import partial
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Dict, Literal, Optional, Protocol
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
|
|
18
|
+
from .. import Tool
|
|
19
|
+
|
|
20
|
+
FS_DESCRIPTION = """Interact with an isolated virtual filesystem that belongs to this session.
|
|
21
|
+
|
|
22
|
+
Paths are always relative to the workspace root and use forward slashes. Use this tool to:
|
|
23
|
+
- inspect files with optional line ranges
|
|
24
|
+
- create, overwrite, or append to files
|
|
25
|
+
- delete files or folders from the workspace
|
|
26
|
+
- list directory contents
|
|
27
|
+
- search for text across the workspace using regular expressions
|
|
28
|
+
- apply OpenAI-style apply_patch operations (create/update/delete)
|
|
29
|
+
|
|
30
|
+
This virtual filesystem is safe-by-construction. Paths that try to escape the workspace
|
|
31
|
+
or reference missing files will raise clear errors."""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class WorkspaceBackend(Protocol):
|
|
35
|
+
"""Abstract filesystem operations used by FilesystemManager."""
|
|
36
|
+
|
|
37
|
+
def read_file(self, path: str) -> str: ...
|
|
38
|
+
|
|
39
|
+
def write_file(self, path: str, content: str, *, overwrite: bool) -> None: ...
|
|
40
|
+
|
|
41
|
+
def append_file(self, path: str, content: str) -> None: ...
|
|
42
|
+
|
|
43
|
+
def delete_path(self, path: str) -> None: ...
|
|
44
|
+
|
|
45
|
+
def list_dir(self, path: str, recursive: bool) -> list[dict[str, Any]]: ...
|
|
46
|
+
|
|
47
|
+
def grep(
|
|
48
|
+
self, pattern: str, path: str | None, limit: int
|
|
49
|
+
) -> list[dict[str, Any]]: ...
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _normalize_path(path: str | None, *, allow_root: bool = False) -> str:
|
|
53
|
+
"""Normalize user-provided paths and prevent directory traversal."""
|
|
54
|
+
if path is None or path.strip() == "":
|
|
55
|
+
if allow_root:
|
|
56
|
+
return "."
|
|
57
|
+
raise ValueError("Path is required")
|
|
58
|
+
|
|
59
|
+
raw = path.strip()
|
|
60
|
+
if raw.startswith("/"):
|
|
61
|
+
raw = raw.lstrip("/")
|
|
62
|
+
|
|
63
|
+
parts: list[str] = []
|
|
64
|
+
for part in raw.split("/"):
|
|
65
|
+
if part in ("", "."):
|
|
66
|
+
continue
|
|
67
|
+
if part == "..":
|
|
68
|
+
if parts:
|
|
69
|
+
parts.pop()
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError("Path traversal outside the workspace is not allowed")
|
|
72
|
+
continue
|
|
73
|
+
parts.append(part)
|
|
74
|
+
|
|
75
|
+
normalized = "/".join(parts)
|
|
76
|
+
if normalized:
|
|
77
|
+
return normalized
|
|
78
|
+
if allow_root:
|
|
79
|
+
return "."
|
|
80
|
+
raise ValueError("Path must reference a file inside the workspace")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class InMemoryWorkspaceBackend:
|
|
84
|
+
"""Simple backend that stores files in memory."""
|
|
85
|
+
|
|
86
|
+
def __init__(self, files: dict[str, str] | None = None):
|
|
87
|
+
self._files: Dict[str, str] = {}
|
|
88
|
+
if files:
|
|
89
|
+
for path, content in files.items():
|
|
90
|
+
key = _normalize_path(path)
|
|
91
|
+
self._files[key] = content
|
|
92
|
+
|
|
93
|
+
def read_file(self, path: str) -> str:
|
|
94
|
+
key = _normalize_path(path)
|
|
95
|
+
if key not in self._files:
|
|
96
|
+
raise FileNotFoundError(f"{key} does not exist")
|
|
97
|
+
return self._files[key]
|
|
98
|
+
|
|
99
|
+
def write_file(self, path: str, content: str, *, overwrite: bool) -> None:
|
|
100
|
+
key = _normalize_path(path)
|
|
101
|
+
if not overwrite and key in self._files:
|
|
102
|
+
raise FileExistsError(f"{key} already exists")
|
|
103
|
+
self._files[key] = content
|
|
104
|
+
|
|
105
|
+
def append_file(self, path: str, content: str) -> None:
|
|
106
|
+
key = _normalize_path(path)
|
|
107
|
+
if key in self._files:
|
|
108
|
+
self._files[key] = f"{self._files[key]}{content}"
|
|
109
|
+
else:
|
|
110
|
+
self._files[key] = content
|
|
111
|
+
|
|
112
|
+
def delete_path(self, path: str) -> None:
|
|
113
|
+
key = _normalize_path(path, allow_root=True)
|
|
114
|
+
if key == ".":
|
|
115
|
+
self._files.clear()
|
|
116
|
+
return
|
|
117
|
+
if key in self._files:
|
|
118
|
+
del self._files[key]
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
prefix = f"{key}/"
|
|
122
|
+
targets = [p for p in self._files if p.startswith(prefix)]
|
|
123
|
+
if not targets:
|
|
124
|
+
raise FileNotFoundError(f"{key} does not exist")
|
|
125
|
+
for target in targets:
|
|
126
|
+
del self._files[target]
|
|
127
|
+
|
|
128
|
+
def list_dir(self, path: str, recursive: bool) -> list[dict[str, Any]]:
|
|
129
|
+
key = _normalize_path(path, allow_root=True)
|
|
130
|
+
if key != "." and key in self._files and not recursive:
|
|
131
|
+
# Listing a file path shows metadata for that file.
|
|
132
|
+
return [self._format_file_entry(key)]
|
|
133
|
+
|
|
134
|
+
prefix = "" if key == "." else f"{key}/"
|
|
135
|
+
entries: list[dict[str, Any]] = []
|
|
136
|
+
|
|
137
|
+
if key != "." and not any(
|
|
138
|
+
p == key or p.startswith(prefix) for p in self._files
|
|
139
|
+
):
|
|
140
|
+
raise FileNotFoundError(f"{key} does not exist")
|
|
141
|
+
|
|
142
|
+
if recursive:
|
|
143
|
+
for file_path in sorted(self._files):
|
|
144
|
+
if not (file_path == key or file_path.startswith(prefix)):
|
|
145
|
+
continue
|
|
146
|
+
entries.append(self._format_file_entry(file_path))
|
|
147
|
+
return entries
|
|
148
|
+
|
|
149
|
+
seen_dirs: set[str] = set()
|
|
150
|
+
for file_path in sorted(self._files):
|
|
151
|
+
if not (file_path == key or file_path.startswith(prefix)):
|
|
152
|
+
continue
|
|
153
|
+
remainder = file_path[len(prefix) :]
|
|
154
|
+
if remainder == "":
|
|
155
|
+
entries.append(self._format_file_entry(file_path))
|
|
156
|
+
continue
|
|
157
|
+
head, _, tail = remainder.partition("/")
|
|
158
|
+
if tail:
|
|
159
|
+
dir_path = head if key == "." else f"{key}/{head}"
|
|
160
|
+
if dir_path not in seen_dirs:
|
|
161
|
+
entries.append(
|
|
162
|
+
{"path": dir_path, "type": "directory", "size": None}
|
|
163
|
+
)
|
|
164
|
+
seen_dirs.add(dir_path)
|
|
165
|
+
else:
|
|
166
|
+
entries.append(self._format_file_entry(file_path))
|
|
167
|
+
return entries
|
|
168
|
+
|
|
169
|
+
def grep(self, pattern: str, path: str | None, limit: int) -> list[dict[str, Any]]:
|
|
170
|
+
regex = re.compile(pattern)
|
|
171
|
+
key = _normalize_path(path, allow_root=True) if path is not None else "."
|
|
172
|
+
prefix = "" if key == "." else f"{key}/"
|
|
173
|
+
matches: list[dict[str, Any]] = []
|
|
174
|
+
|
|
175
|
+
for file_path in sorted(self._files):
|
|
176
|
+
if not (file_path == key or file_path.startswith(prefix)):
|
|
177
|
+
continue
|
|
178
|
+
content = self._files[file_path]
|
|
179
|
+
for line_no, line in enumerate(content.splitlines(), start=1):
|
|
180
|
+
if regex.search(line):
|
|
181
|
+
matches.append(
|
|
182
|
+
{"path": file_path, "line": line_no, "text": line.strip()}
|
|
183
|
+
)
|
|
184
|
+
if len(matches) >= limit:
|
|
185
|
+
return matches
|
|
186
|
+
if (
|
|
187
|
+
key != "."
|
|
188
|
+
and key not in self._files
|
|
189
|
+
and not any(p.startswith(prefix) for p in self._files)
|
|
190
|
+
):
|
|
191
|
+
raise FileNotFoundError(f"{key} does not exist")
|
|
192
|
+
return matches
|
|
193
|
+
|
|
194
|
+
def _format_file_entry(self, path: str) -> dict[str, Any]:
|
|
195
|
+
content = self._files[path]
|
|
196
|
+
if content == "":
|
|
197
|
+
line_count = 0
|
|
198
|
+
else:
|
|
199
|
+
line_count = content.count("\n") + (0 if content.endswith("\n") else 1)
|
|
200
|
+
return {
|
|
201
|
+
"path": path,
|
|
202
|
+
"type": "file",
|
|
203
|
+
"size": len(content),
|
|
204
|
+
"line_count": max(line_count, 0),
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
FsCommand = Literal[
|
|
209
|
+
"read_file",
|
|
210
|
+
"write_file",
|
|
211
|
+
"delete_path",
|
|
212
|
+
"list_dir",
|
|
213
|
+
"grep",
|
|
214
|
+
"apply_patch",
|
|
215
|
+
]
|
|
216
|
+
ALL_COMMANDS: tuple[FsCommand, ...] = (
|
|
217
|
+
"read_file",
|
|
218
|
+
"write_file",
|
|
219
|
+
"delete_path",
|
|
220
|
+
"list_dir",
|
|
221
|
+
"grep",
|
|
222
|
+
"apply_patch",
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class ApplyPatchOperation(BaseModel):
|
|
227
|
+
"""Subset of OpenAI apply_patch operation payload."""
|
|
228
|
+
|
|
229
|
+
type: Literal["create_file", "update_file", "delete_file"] = Field(
|
|
230
|
+
description="Type of patch operation to perform."
|
|
231
|
+
)
|
|
232
|
+
path: str = Field(description="Path to the file being modified.")
|
|
233
|
+
diff: str | None = Field(
|
|
234
|
+
default=None,
|
|
235
|
+
description="V4A diff to apply for create/update operations.",
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def requires_diff(self) -> bool:
|
|
240
|
+
return self.type in {"create_file", "update_file"}
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class FilesystemParams(BaseModel):
|
|
244
|
+
"""Schema describing filesystem tool calls."""
|
|
245
|
+
|
|
246
|
+
command: FsCommand = Field(
|
|
247
|
+
description="Filesystem operation to perform (read_file, write_file, delete_path, list_dir, grep)"
|
|
248
|
+
)
|
|
249
|
+
path: Optional[str] = Field(
|
|
250
|
+
default=None,
|
|
251
|
+
description="Path to operate on, relative to workspace root. Use '.' for the root directory.",
|
|
252
|
+
)
|
|
253
|
+
start_line: Optional[int] = Field(
|
|
254
|
+
default=None,
|
|
255
|
+
description="1-based inclusive start line when reading a file. Leave unset to read from the beginning.",
|
|
256
|
+
ge=1,
|
|
257
|
+
)
|
|
258
|
+
end_line: Optional[int] = Field(
|
|
259
|
+
default=None,
|
|
260
|
+
description="1-based inclusive end line when reading a file. Leave unset to read through the end.",
|
|
261
|
+
ge=1,
|
|
262
|
+
)
|
|
263
|
+
content: Optional[str] = Field(
|
|
264
|
+
default=None,
|
|
265
|
+
description="Content to write when using write_file.",
|
|
266
|
+
)
|
|
267
|
+
mode: Optional[Literal["overwrite", "append", "create_if_missing"]] = Field(
|
|
268
|
+
default="overwrite",
|
|
269
|
+
description="How to write content. Overwrite replaces the file, append adds to the end, create_if_missing leaves existing files untouched.",
|
|
270
|
+
)
|
|
271
|
+
recursive: Optional[bool] = Field(
|
|
272
|
+
default=None,
|
|
273
|
+
description="When listing directories, set to true to recurse.",
|
|
274
|
+
)
|
|
275
|
+
pattern: Optional[str] = Field(
|
|
276
|
+
default=None,
|
|
277
|
+
description="Regular expression pattern to search for when using grep.",
|
|
278
|
+
)
|
|
279
|
+
max_results: Optional[int] = Field(
|
|
280
|
+
default=50,
|
|
281
|
+
description="Maximum number of grep matches to return.",
|
|
282
|
+
ge=1,
|
|
283
|
+
)
|
|
284
|
+
operation: ApplyPatchOperation | None = Field(
|
|
285
|
+
default=None,
|
|
286
|
+
description=(
|
|
287
|
+
"When using command='apply_patch', include an operation matching the "
|
|
288
|
+
"OpenAI apply_patch_call payload (type, path, diff)."
|
|
289
|
+
),
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class FilesystemManager:
|
|
294
|
+
"""Expose a TodoManager-style tool for interacting with a workspace."""
|
|
295
|
+
|
|
296
|
+
def __init__(
|
|
297
|
+
self,
|
|
298
|
+
backend: WorkspaceBackend | None = None,
|
|
299
|
+
*,
|
|
300
|
+
tool_name: str = "filesystem",
|
|
301
|
+
):
|
|
302
|
+
self.backend = backend or InMemoryWorkspaceBackend()
|
|
303
|
+
self.tool_name = tool_name
|
|
304
|
+
self._tool_cache: dict[tuple[str, ...], list[Tool]] = {}
|
|
305
|
+
|
|
306
|
+
def _handle_read(
|
|
307
|
+
self, path: str, start_line: Optional[int], end_line: Optional[int]
|
|
308
|
+
) -> dict[str, Any]:
|
|
309
|
+
content = self.backend.read_file(path)
|
|
310
|
+
total_lines = len(content.splitlines()) or (0 if content == "" else 1)
|
|
311
|
+
start = start_line or 1
|
|
312
|
+
end = end_line or total_lines
|
|
313
|
+
if end < start:
|
|
314
|
+
if not (total_lines == 0 and end_line is None and start == 1):
|
|
315
|
+
raise ValueError("end_line must be greater than or equal to start_line")
|
|
316
|
+
|
|
317
|
+
if start == 1 and end >= total_lines:
|
|
318
|
+
snippet = content
|
|
319
|
+
else:
|
|
320
|
+
lines = content.splitlines()
|
|
321
|
+
snippet = "\n".join(lines[start - 1 : end])
|
|
322
|
+
|
|
323
|
+
return {
|
|
324
|
+
"path": path,
|
|
325
|
+
"start_line": start,
|
|
326
|
+
"end_line": end,
|
|
327
|
+
"content": snippet,
|
|
328
|
+
"total_lines": total_lines,
|
|
329
|
+
"character_count": len(content),
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
def _handle_write(
|
|
333
|
+
self, path: str, content: str, mode: Optional[str]
|
|
334
|
+
) -> dict[str, Any]:
|
|
335
|
+
write_mode = mode or "overwrite"
|
|
336
|
+
if write_mode == "overwrite":
|
|
337
|
+
self.backend.write_file(path, content, overwrite=True)
|
|
338
|
+
elif write_mode == "append":
|
|
339
|
+
self.backend.append_file(path, content)
|
|
340
|
+
elif write_mode == "create_if_missing":
|
|
341
|
+
try:
|
|
342
|
+
self.backend.write_file(path, content, overwrite=False)
|
|
343
|
+
except FileExistsError:
|
|
344
|
+
pass
|
|
345
|
+
else:
|
|
346
|
+
raise ValueError(f"Unsupported write mode: {write_mode}")
|
|
347
|
+
return {"path": path, "status": "ok", "mode": write_mode}
|
|
348
|
+
|
|
349
|
+
def _handle_delete(self, path: str) -> dict[str, Any]:
|
|
350
|
+
self.backend.delete_path(path)
|
|
351
|
+
return {"path": path, "status": "ok"}
|
|
352
|
+
|
|
353
|
+
def _handle_list(
|
|
354
|
+
self, path: Optional[str], recursive: Optional[bool]
|
|
355
|
+
) -> dict[str, Any]:
|
|
356
|
+
listing = self.backend.list_dir(path or ".", recursive=bool(recursive))
|
|
357
|
+
return {"path": path or ".", "recursive": bool(recursive), "entries": listing}
|
|
358
|
+
|
|
359
|
+
def _handle_grep(
|
|
360
|
+
self, pattern: str, path: Optional[str], limit: Optional[int]
|
|
361
|
+
) -> dict[str, Any]:
|
|
362
|
+
max_results = limit or 50
|
|
363
|
+
matches = self.backend.grep(pattern, path=path, limit=max_results)
|
|
364
|
+
return {
|
|
365
|
+
"pattern": pattern,
|
|
366
|
+
"path": path,
|
|
367
|
+
"max_results": max_results,
|
|
368
|
+
"matches": matches,
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
def _handle_apply_patch(self, operation: ApplyPatchOperation) -> dict[str, Any]:
|
|
372
|
+
if operation.requires_diff and not operation.diff:
|
|
373
|
+
raise ValueError("diff is required for create_file and update_file")
|
|
374
|
+
|
|
375
|
+
if operation.type == "delete_file":
|
|
376
|
+
self.backend.delete_path(operation.path)
|
|
377
|
+
return {"path": operation.path, "operation": "delete_file"}
|
|
378
|
+
|
|
379
|
+
assert operation.diff is not None # for type checkers
|
|
380
|
+
if operation.type == "create_file":
|
|
381
|
+
new_content = apply_diff("", operation.diff, mode="create")
|
|
382
|
+
self.backend.write_file(operation.path, new_content, overwrite=False)
|
|
383
|
+
return {"path": operation.path, "operation": "create_file"}
|
|
384
|
+
|
|
385
|
+
if operation.type == "update_file":
|
|
386
|
+
current = self.backend.read_file(operation.path)
|
|
387
|
+
new_content = apply_diff(current, operation.diff, mode="default")
|
|
388
|
+
self.backend.write_file(operation.path, new_content, overwrite=True)
|
|
389
|
+
return {"path": operation.path, "operation": "update_file"}
|
|
390
|
+
|
|
391
|
+
raise ValueError(f"Unsupported patch operation: {operation.type}")
|
|
392
|
+
|
|
393
|
+
def _filesystem_tool(self, allowed_commands: set[FsCommand], **kwargs: Any) -> str:
|
|
394
|
+
params = FilesystemParams.model_validate(kwargs)
|
|
395
|
+
|
|
396
|
+
try:
|
|
397
|
+
if params.command not in allowed_commands:
|
|
398
|
+
raise ValueError(
|
|
399
|
+
f"The '{params.command}' command is disabled for this tool instance"
|
|
400
|
+
)
|
|
401
|
+
if params.command == "read_file":
|
|
402
|
+
if not params.path:
|
|
403
|
+
raise ValueError("path is required for read_file")
|
|
404
|
+
result = self._handle_read(
|
|
405
|
+
params.path, params.start_line, params.end_line
|
|
406
|
+
)
|
|
407
|
+
elif params.command == "write_file":
|
|
408
|
+
if params.path is None or params.content is None:
|
|
409
|
+
raise ValueError("path and content are required for write_file")
|
|
410
|
+
result = self._handle_write(params.path, params.content, params.mode)
|
|
411
|
+
elif params.command == "delete_path":
|
|
412
|
+
if not params.path:
|
|
413
|
+
raise ValueError("path is required for delete_path")
|
|
414
|
+
result = self._handle_delete(params.path)
|
|
415
|
+
elif params.command == "list_dir":
|
|
416
|
+
result = self._handle_list(params.path, params.recursive)
|
|
417
|
+
elif params.command == "grep":
|
|
418
|
+
if not params.pattern:
|
|
419
|
+
raise ValueError("pattern is required for grep")
|
|
420
|
+
result = self._handle_grep(
|
|
421
|
+
params.pattern, params.path, params.max_results
|
|
422
|
+
)
|
|
423
|
+
elif params.command == "apply_patch":
|
|
424
|
+
if params.operation is None:
|
|
425
|
+
raise ValueError("operation is required for apply_patch")
|
|
426
|
+
result = self._handle_apply_patch(params.operation)
|
|
427
|
+
else:
|
|
428
|
+
raise ValueError(f"Unknown command: {params.command}")
|
|
429
|
+
return json.dumps({"ok": True, "result": result}, indent=2)
|
|
430
|
+
except Exception as exc:
|
|
431
|
+
return json.dumps(
|
|
432
|
+
{"ok": False, "error": type(exc).__name__, "message": str(exc)},
|
|
433
|
+
indent=2,
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
def get_tools(self, *, exclude: Iterable[FsCommand] | None = None) -> list[Tool]:
|
|
437
|
+
exclude_set = set(exclude or [])
|
|
438
|
+
unknown = exclude_set.difference(ALL_COMMANDS)
|
|
439
|
+
if unknown:
|
|
440
|
+
raise ValueError(f"Unknown commands in exclude list: {sorted(unknown)}")
|
|
441
|
+
|
|
442
|
+
allowed = tuple(cmd for cmd in ALL_COMMANDS if cmd not in exclude_set)
|
|
443
|
+
if not allowed:
|
|
444
|
+
raise ValueError("Cannot exclude every filesystem command")
|
|
445
|
+
|
|
446
|
+
cache_key = allowed
|
|
447
|
+
if cache_key in self._tool_cache:
|
|
448
|
+
return self._tool_cache[cache_key]
|
|
449
|
+
|
|
450
|
+
allowed_set = set(allowed)
|
|
451
|
+
schema = FilesystemParams.model_json_schema(ref_template="#/$defs/{model}")
|
|
452
|
+
if (
|
|
453
|
+
"properties" in schema
|
|
454
|
+
and "command" in schema["properties"]
|
|
455
|
+
and isinstance(schema["properties"]["command"], dict)
|
|
456
|
+
):
|
|
457
|
+
schema["properties"]["command"]["enum"] = list(allowed)
|
|
458
|
+
|
|
459
|
+
tool = Tool(
|
|
460
|
+
name=self.tool_name,
|
|
461
|
+
description=FS_DESCRIPTION,
|
|
462
|
+
parameters=schema.get("properties", {}),
|
|
463
|
+
required=schema.get("required", []),
|
|
464
|
+
definitions=schema.get("$defs"),
|
|
465
|
+
run=partial(self._filesystem_tool, allowed_set), # type: ignore
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
self._tool_cache[cache_key] = [tool]
|
|
469
|
+
return [tool]
|
|
470
|
+
|
|
471
|
+
def dump(
|
|
472
|
+
self,
|
|
473
|
+
destination: str | os.PathLike[str],
|
|
474
|
+
*,
|
|
475
|
+
as_zip: bool = False,
|
|
476
|
+
) -> list[str]:
|
|
477
|
+
"""
|
|
478
|
+
Copy the virtual workspace to the given filesystem path.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
destination: Path to write to. If as_zip=True, this should be a .zip file path.
|
|
482
|
+
as_zip: If True, write as a zip archive instead of a directory.
|
|
483
|
+
|
|
484
|
+
Returns:
|
|
485
|
+
List of file paths that were written.
|
|
486
|
+
"""
|
|
487
|
+
entries = self.backend.list_dir(".", recursive=True)
|
|
488
|
+
written: list[str] = []
|
|
489
|
+
|
|
490
|
+
if as_zip:
|
|
491
|
+
target_path = Path(destination)
|
|
492
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
493
|
+
|
|
494
|
+
with zipfile.ZipFile(target_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
495
|
+
for entry in entries:
|
|
496
|
+
if entry.get("type") != "file":
|
|
497
|
+
continue
|
|
498
|
+
rel_path = entry["path"]
|
|
499
|
+
content = self.backend.read_file(rel_path)
|
|
500
|
+
zf.writestr(rel_path, content)
|
|
501
|
+
written.append(rel_path)
|
|
502
|
+
else:
|
|
503
|
+
target_root = Path(destination)
|
|
504
|
+
if target_root.exists() and not target_root.is_dir():
|
|
505
|
+
raise NotADirectoryError(f"{target_root} exists and is not a directory")
|
|
506
|
+
target_root.mkdir(parents=True, exist_ok=True)
|
|
507
|
+
|
|
508
|
+
for entry in entries:
|
|
509
|
+
if entry.get("type") != "file":
|
|
510
|
+
continue
|
|
511
|
+
rel_path = entry["path"]
|
|
512
|
+
destination_path = target_root.joinpath(*rel_path.split("/"))
|
|
513
|
+
destination_path.parent.mkdir(parents=True, exist_ok=True)
|
|
514
|
+
destination_path.write_text(self.backend.read_file(rel_path))
|
|
515
|
+
written.append(rel_path)
|
|
516
|
+
|
|
517
|
+
return sorted(written)
|
|
518
|
+
|
|
519
|
+
@classmethod
|
|
520
|
+
def from_dir(
|
|
521
|
+
cls,
|
|
522
|
+
source: str | os.PathLike[str],
|
|
523
|
+
*,
|
|
524
|
+
max_files: int = 100,
|
|
525
|
+
tool_name: str = "filesystem",
|
|
526
|
+
) -> "FilesystemManager":
|
|
527
|
+
"""
|
|
528
|
+
Create a FilesystemManager pre-populated with files from a directory.
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
source: Path to the directory to load files from.
|
|
532
|
+
max_files: Maximum number of files to load (default 100).
|
|
533
|
+
tool_name: Name for the filesystem tool.
|
|
534
|
+
|
|
535
|
+
Returns:
|
|
536
|
+
A new FilesystemManager with the files loaded into memory.
|
|
537
|
+
|
|
538
|
+
Raises:
|
|
539
|
+
ValueError: If more than max_files files are found.
|
|
540
|
+
NotADirectoryError: If source is not a directory.
|
|
541
|
+
"""
|
|
542
|
+
source_path = Path(source)
|
|
543
|
+
if not source_path.is_dir():
|
|
544
|
+
raise NotADirectoryError(f"{source_path} is not a directory")
|
|
545
|
+
|
|
546
|
+
files: dict[str, str] = {}
|
|
547
|
+
file_count = 0
|
|
548
|
+
|
|
549
|
+
for file_path in source_path.rglob("*"):
|
|
550
|
+
if not file_path.is_file():
|
|
551
|
+
continue
|
|
552
|
+
|
|
553
|
+
file_count += 1
|
|
554
|
+
if file_count > max_files:
|
|
555
|
+
raise ValueError(
|
|
556
|
+
f"Directory contains more than {max_files} files. "
|
|
557
|
+
f"Increase max_files or use a smaller directory."
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
rel_path = file_path.relative_to(source_path).as_posix()
|
|
561
|
+
|
|
562
|
+
# Try to read as text, skip binary files
|
|
563
|
+
try:
|
|
564
|
+
content = file_path.read_text()
|
|
565
|
+
files[rel_path] = content
|
|
566
|
+
except UnicodeDecodeError:
|
|
567
|
+
# Skip binary files
|
|
568
|
+
continue
|
|
569
|
+
|
|
570
|
+
backend = InMemoryWorkspaceBackend(files)
|
|
571
|
+
return cls(backend=backend, tool_name=tool_name)
|
|
572
|
+
|
|
573
|
+
@classmethod
|
|
574
|
+
def from_zip(
|
|
575
|
+
cls,
|
|
576
|
+
source: str | os.PathLike[str] | io.BytesIO,
|
|
577
|
+
*,
|
|
578
|
+
max_files: int = 100,
|
|
579
|
+
tool_name: str = "filesystem",
|
|
580
|
+
) -> "FilesystemManager":
|
|
581
|
+
"""
|
|
582
|
+
Create a FilesystemManager pre-populated with files from a zip archive.
|
|
583
|
+
|
|
584
|
+
Args:
|
|
585
|
+
source: Path to the zip file, or a BytesIO containing zip data.
|
|
586
|
+
max_files: Maximum number of files to load (default 100).
|
|
587
|
+
tool_name: Name for the filesystem tool.
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
A new FilesystemManager with the files loaded into memory.
|
|
591
|
+
|
|
592
|
+
Raises:
|
|
593
|
+
ValueError: If more than max_files files are found.
|
|
594
|
+
zipfile.BadZipFile: If the source is not a valid zip file.
|
|
595
|
+
"""
|
|
596
|
+
files: dict[str, str] = {}
|
|
597
|
+
file_count = 0
|
|
598
|
+
|
|
599
|
+
with zipfile.ZipFile(source, "r") as zf:
|
|
600
|
+
for info in zf.infolist():
|
|
601
|
+
# Skip directories
|
|
602
|
+
if info.is_dir():
|
|
603
|
+
continue
|
|
604
|
+
|
|
605
|
+
file_count += 1
|
|
606
|
+
if file_count > max_files:
|
|
607
|
+
raise ValueError(
|
|
608
|
+
f"Zip archive contains more than {max_files} files. "
|
|
609
|
+
f"Increase max_files or use a smaller archive."
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
# Normalize path (remove leading slashes, handle Windows paths)
|
|
613
|
+
rel_path = info.filename.lstrip("/").replace("\\", "/")
|
|
614
|
+
if not rel_path:
|
|
615
|
+
continue
|
|
616
|
+
|
|
617
|
+
# Try to read as text, skip binary files
|
|
618
|
+
try:
|
|
619
|
+
content = zf.read(info.filename).decode("utf-8")
|
|
620
|
+
files[rel_path] = content
|
|
621
|
+
except UnicodeDecodeError:
|
|
622
|
+
# Skip binary files
|
|
623
|
+
continue
|
|
624
|
+
|
|
625
|
+
backend = InMemoryWorkspaceBackend(files)
|
|
626
|
+
return cls(backend=backend, tool_name=tool_name)
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
ApplyDiffMode = Literal["default", "create"]
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
@dataclass
|
|
633
|
+
class Chunk:
|
|
634
|
+
orig_index: int
|
|
635
|
+
del_lines: list[str]
|
|
636
|
+
ins_lines: list[str]
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
@dataclass
|
|
640
|
+
class ParserState:
|
|
641
|
+
lines: list[str]
|
|
642
|
+
index: int = 0
|
|
643
|
+
fuzz: int = 0
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
@dataclass
|
|
647
|
+
class ParsedUpdateDiff:
|
|
648
|
+
chunks: list[Chunk]
|
|
649
|
+
fuzz: int
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
@dataclass
|
|
653
|
+
class ReadSectionResult:
|
|
654
|
+
next_context: list[str]
|
|
655
|
+
section_chunks: list[Chunk]
|
|
656
|
+
end_index: int
|
|
657
|
+
eof: bool
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
END_PATCH = "*** End Patch"
|
|
661
|
+
END_FILE = "*** End of File"
|
|
662
|
+
SECTION_TERMINATORS = [
|
|
663
|
+
END_PATCH,
|
|
664
|
+
"*** Update File:",
|
|
665
|
+
"*** Delete File:",
|
|
666
|
+
"*** Add File:",
|
|
667
|
+
]
|
|
668
|
+
END_SECTION_MARKERS = [*SECTION_TERMINATORS, END_FILE]
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def apply_diff(input_text: str, diff: str, mode: ApplyDiffMode = "default") -> str:
|
|
672
|
+
"""Apply a V4A diff to the provided text."""
|
|
673
|
+
|
|
674
|
+
diff_lines = _normalize_diff_lines(diff)
|
|
675
|
+
if mode == "create":
|
|
676
|
+
return _parse_create_diff(diff_lines)
|
|
677
|
+
|
|
678
|
+
parsed = _parse_update_diff(diff_lines, input_text)
|
|
679
|
+
return _apply_chunks(input_text, parsed.chunks)
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def _normalize_diff_lines(diff: str) -> list[str]:
|
|
683
|
+
lines = [line.rstrip("\r") for line in re.split(r"\r?\n", diff)]
|
|
684
|
+
if lines and lines[-1] == "":
|
|
685
|
+
lines.pop()
|
|
686
|
+
return lines
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def _is_done(state: ParserState, prefixes: Sequence[str]) -> bool:
|
|
690
|
+
if state.index >= len(state.lines):
|
|
691
|
+
return True
|
|
692
|
+
if any(state.lines[state.index].startswith(prefix) for prefix in prefixes):
|
|
693
|
+
return True
|
|
694
|
+
return False
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def _read_str(state: ParserState, prefix: str) -> str:
|
|
698
|
+
if state.index >= len(state.lines):
|
|
699
|
+
return ""
|
|
700
|
+
current = state.lines[state.index]
|
|
701
|
+
if current.startswith(prefix):
|
|
702
|
+
state.index += 1
|
|
703
|
+
return current[len(prefix) :]
|
|
704
|
+
return ""
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def _parse_create_diff(lines: list[str]) -> str:
|
|
708
|
+
parser = ParserState(lines=[*lines, END_PATCH])
|
|
709
|
+
output: list[str] = []
|
|
710
|
+
|
|
711
|
+
while not _is_done(parser, SECTION_TERMINATORS):
|
|
712
|
+
if parser.index >= len(parser.lines):
|
|
713
|
+
break
|
|
714
|
+
line = parser.lines[parser.index]
|
|
715
|
+
parser.index += 1
|
|
716
|
+
if not line.startswith("+"):
|
|
717
|
+
raise ValueError(f"Invalid Add File Line: {line}")
|
|
718
|
+
output.append(line[1:])
|
|
719
|
+
|
|
720
|
+
return "\n".join(output)
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def _parse_update_diff(lines: list[str], input_text: str) -> ParsedUpdateDiff:
|
|
724
|
+
parser = ParserState(lines=[*lines, END_PATCH])
|
|
725
|
+
input_lines = input_text.split("\n")
|
|
726
|
+
chunks: list[Chunk] = []
|
|
727
|
+
cursor = 0
|
|
728
|
+
|
|
729
|
+
while not _is_done(parser, END_SECTION_MARKERS):
|
|
730
|
+
anchor = _read_str(parser, "@@ ")
|
|
731
|
+
has_bare_anchor = (
|
|
732
|
+
anchor == ""
|
|
733
|
+
and parser.index < len(parser.lines)
|
|
734
|
+
and parser.lines[parser.index] == "@@"
|
|
735
|
+
)
|
|
736
|
+
if has_bare_anchor:
|
|
737
|
+
parser.index += 1
|
|
738
|
+
|
|
739
|
+
if not (anchor or has_bare_anchor or cursor == 0):
|
|
740
|
+
current_line = (
|
|
741
|
+
parser.lines[parser.index] if parser.index < len(parser.lines) else ""
|
|
742
|
+
)
|
|
743
|
+
raise ValueError(f"Invalid Line:\n{current_line}")
|
|
744
|
+
|
|
745
|
+
if anchor.strip():
|
|
746
|
+
cursor = _advance_cursor_to_anchor(anchor, input_lines, cursor, parser)
|
|
747
|
+
|
|
748
|
+
section = _read_section(parser.lines, parser.index)
|
|
749
|
+
find_result = _find_context(
|
|
750
|
+
input_lines, section.next_context, cursor, section.eof
|
|
751
|
+
)
|
|
752
|
+
if find_result.new_index == -1:
|
|
753
|
+
ctx_text = "\n".join(section.next_context)
|
|
754
|
+
if section.eof:
|
|
755
|
+
raise ValueError(f"Invalid EOF Context {cursor}:\n{ctx_text}")
|
|
756
|
+
raise ValueError(f"Invalid Context {cursor}:\n{ctx_text}")
|
|
757
|
+
|
|
758
|
+
cursor = find_result.new_index + len(section.next_context)
|
|
759
|
+
parser.fuzz += find_result.fuzz
|
|
760
|
+
parser.index = section.end_index
|
|
761
|
+
|
|
762
|
+
for ch in section.section_chunks:
|
|
763
|
+
chunks.append(
|
|
764
|
+
Chunk(
|
|
765
|
+
orig_index=ch.orig_index + find_result.new_index,
|
|
766
|
+
del_lines=list(ch.del_lines),
|
|
767
|
+
ins_lines=list(ch.ins_lines),
|
|
768
|
+
)
|
|
769
|
+
)
|
|
770
|
+
|
|
771
|
+
return ParsedUpdateDiff(chunks=chunks, fuzz=parser.fuzz)
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def _advance_cursor_to_anchor(
|
|
775
|
+
anchor: str,
|
|
776
|
+
input_lines: list[str],
|
|
777
|
+
cursor: int,
|
|
778
|
+
parser: ParserState,
|
|
779
|
+
) -> int:
|
|
780
|
+
found = False
|
|
781
|
+
|
|
782
|
+
if not any(line == anchor for line in input_lines[:cursor]):
|
|
783
|
+
for i in range(cursor, len(input_lines)):
|
|
784
|
+
if input_lines[i] == anchor:
|
|
785
|
+
cursor = i + 1
|
|
786
|
+
found = True
|
|
787
|
+
break
|
|
788
|
+
|
|
789
|
+
if not found and not any(
|
|
790
|
+
line.strip() == anchor.strip() for line in input_lines[:cursor]
|
|
791
|
+
):
|
|
792
|
+
for i in range(cursor, len(input_lines)):
|
|
793
|
+
if input_lines[i].strip() == anchor.strip():
|
|
794
|
+
cursor = i + 1
|
|
795
|
+
parser.fuzz += 1
|
|
796
|
+
found = True
|
|
797
|
+
break
|
|
798
|
+
|
|
799
|
+
return cursor
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
def _read_section(lines: list[str], start_index: int) -> ReadSectionResult:
|
|
803
|
+
context: list[str] = []
|
|
804
|
+
del_lines: list[str] = []
|
|
805
|
+
ins_lines: list[str] = []
|
|
806
|
+
section_chunks: list[Chunk] = []
|
|
807
|
+
mode: Literal["keep", "add", "delete"] = "keep"
|
|
808
|
+
index = start_index
|
|
809
|
+
orig_index = index
|
|
810
|
+
|
|
811
|
+
while index < len(lines):
|
|
812
|
+
raw = lines[index]
|
|
813
|
+
if (
|
|
814
|
+
raw.startswith("@@")
|
|
815
|
+
or raw.startswith(END_PATCH)
|
|
816
|
+
or raw.startswith("*** Update File:")
|
|
817
|
+
or raw.startswith("*** Delete File:")
|
|
818
|
+
or raw.startswith("*** Add File:")
|
|
819
|
+
or raw.startswith(END_FILE)
|
|
820
|
+
):
|
|
821
|
+
break
|
|
822
|
+
if raw == "***":
|
|
823
|
+
break
|
|
824
|
+
if raw.startswith("***"):
|
|
825
|
+
raise ValueError(f"Invalid Line: {raw}")
|
|
826
|
+
|
|
827
|
+
index += 1
|
|
828
|
+
last_mode = mode
|
|
829
|
+
line = raw if raw else " "
|
|
830
|
+
prefix = line[0]
|
|
831
|
+
if prefix == "+":
|
|
832
|
+
mode = "add"
|
|
833
|
+
elif prefix == "-":
|
|
834
|
+
mode = "delete"
|
|
835
|
+
elif prefix == " ":
|
|
836
|
+
mode = "keep"
|
|
837
|
+
else:
|
|
838
|
+
raise ValueError(f"Invalid Line: {line}")
|
|
839
|
+
|
|
840
|
+
line_content = line[1:]
|
|
841
|
+
switching_to_context = mode == "keep" and last_mode != mode
|
|
842
|
+
if switching_to_context and (del_lines or ins_lines):
|
|
843
|
+
section_chunks.append(
|
|
844
|
+
Chunk(
|
|
845
|
+
orig_index=len(context) - len(del_lines),
|
|
846
|
+
del_lines=list(del_lines),
|
|
847
|
+
ins_lines=list(ins_lines),
|
|
848
|
+
)
|
|
849
|
+
)
|
|
850
|
+
del_lines = []
|
|
851
|
+
ins_lines = []
|
|
852
|
+
|
|
853
|
+
if mode == "delete":
|
|
854
|
+
del_lines.append(line_content)
|
|
855
|
+
context.append(line_content)
|
|
856
|
+
elif mode == "add":
|
|
857
|
+
ins_lines.append(line_content)
|
|
858
|
+
else:
|
|
859
|
+
context.append(line_content)
|
|
860
|
+
|
|
861
|
+
if del_lines or ins_lines:
|
|
862
|
+
section_chunks.append(
|
|
863
|
+
Chunk(
|
|
864
|
+
orig_index=len(context) - len(del_lines),
|
|
865
|
+
del_lines=list(del_lines),
|
|
866
|
+
ins_lines=list(ins_lines),
|
|
867
|
+
)
|
|
868
|
+
)
|
|
869
|
+
|
|
870
|
+
if index < len(lines) and lines[index] == END_FILE:
|
|
871
|
+
return ReadSectionResult(context, section_chunks, index + 1, True)
|
|
872
|
+
|
|
873
|
+
if index == orig_index:
|
|
874
|
+
next_line = lines[index] if index < len(lines) else ""
|
|
875
|
+
raise ValueError(f"Nothing in this section - index={index} {next_line}")
|
|
876
|
+
|
|
877
|
+
return ReadSectionResult(context, section_chunks, index, False)
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
@dataclass
|
|
881
|
+
class ContextMatch:
|
|
882
|
+
new_index: int
|
|
883
|
+
fuzz: int
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
def _find_context(
|
|
887
|
+
lines: list[str], context: list[str], start: int, eof: bool
|
|
888
|
+
) -> ContextMatch:
|
|
889
|
+
if eof:
|
|
890
|
+
end_start = max(0, len(lines) - len(context))
|
|
891
|
+
end_match = _find_context_core(lines, context, end_start)
|
|
892
|
+
if end_match.new_index != -1:
|
|
893
|
+
return end_match
|
|
894
|
+
fallback = _find_context_core(lines, context, start)
|
|
895
|
+
return ContextMatch(new_index=fallback.new_index, fuzz=fallback.fuzz + 10000)
|
|
896
|
+
return _find_context_core(lines, context, start)
|
|
897
|
+
|
|
898
|
+
|
|
899
|
+
def _find_context_core(
|
|
900
|
+
lines: list[str], context: list[str], start: int
|
|
901
|
+
) -> ContextMatch:
|
|
902
|
+
if not context:
|
|
903
|
+
return ContextMatch(new_index=start, fuzz=0)
|
|
904
|
+
|
|
905
|
+
for i in range(start, len(lines)):
|
|
906
|
+
if _equals_slice(lines, context, i, lambda value: value):
|
|
907
|
+
return ContextMatch(new_index=i, fuzz=0)
|
|
908
|
+
for i in range(start, len(lines)):
|
|
909
|
+
if _equals_slice(lines, context, i, lambda value: value.rstrip()):
|
|
910
|
+
return ContextMatch(new_index=i, fuzz=1)
|
|
911
|
+
for i in range(start, len(lines)):
|
|
912
|
+
if _equals_slice(lines, context, i, lambda value: value.strip()):
|
|
913
|
+
return ContextMatch(new_index=i, fuzz=100)
|
|
914
|
+
|
|
915
|
+
return ContextMatch(new_index=-1, fuzz=0)
|
|
916
|
+
|
|
917
|
+
|
|
918
|
+
def _equals_slice(
|
|
919
|
+
source: list[str], target: list[str], start: int, map_fn: Callable[[str], str]
|
|
920
|
+
) -> bool:
|
|
921
|
+
if start + len(target) > len(source):
|
|
922
|
+
return False
|
|
923
|
+
for offset, target_value in enumerate(target):
|
|
924
|
+
if map_fn(source[start + offset]) != map_fn(target_value):
|
|
925
|
+
return False
|
|
926
|
+
return True
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
def _apply_chunks(input_text: str, chunks: list[Chunk]) -> str:
|
|
930
|
+
orig_lines = input_text.split("\n")
|
|
931
|
+
dest_lines: list[str] = []
|
|
932
|
+
cursor = 0
|
|
933
|
+
|
|
934
|
+
for chunk in chunks:
|
|
935
|
+
if chunk.orig_index > len(orig_lines):
|
|
936
|
+
raise ValueError(
|
|
937
|
+
f"apply_diff: chunk.origIndex {chunk.orig_index} > input length {len(orig_lines)}"
|
|
938
|
+
)
|
|
939
|
+
if cursor > chunk.orig_index:
|
|
940
|
+
raise ValueError(
|
|
941
|
+
f"apply_diff: overlapping chunk at {chunk.orig_index} (cursor {cursor})"
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
dest_lines.extend(orig_lines[cursor : chunk.orig_index])
|
|
945
|
+
cursor = chunk.orig_index
|
|
946
|
+
|
|
947
|
+
if chunk.ins_lines:
|
|
948
|
+
dest_lines.extend(chunk.ins_lines)
|
|
949
|
+
|
|
950
|
+
cursor += len(chunk.del_lines)
|
|
951
|
+
|
|
952
|
+
dest_lines.extend(orig_lines[cursor:])
|
|
953
|
+
return "\n".join(dest_lines)
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
# S3 filesystem description
|
|
957
|
+
S3_FS_DESCRIPTION = """Interact with a remote S3-backed filesystem that supports safe concurrent access.
|
|
958
|
+
|
|
959
|
+
This filesystem is backed by Amazon S3 with optimistic concurrency control, meaning multiple
|
|
960
|
+
agents can safely read and write to the same workspace without conflicts. If a write conflict
|
|
961
|
+
occurs (another agent modified the file), the operation will automatically retry.
|
|
962
|
+
|
|
963
|
+
Paths are always relative to the workspace root and use forward slashes. Use this tool to:
|
|
964
|
+
- inspect files with optional line ranges
|
|
965
|
+
- create, overwrite, or append to files (with automatic conflict resolution)
|
|
966
|
+
- delete files or folders from the workspace
|
|
967
|
+
- list directory contents
|
|
968
|
+
- search for text across the workspace using regular expressions
|
|
969
|
+
|
|
970
|
+
This filesystem is safe for distributed use - conflicts are automatically detected and resolved."""
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
@dataclass
|
|
974
|
+
class S3FileMetadata:
|
|
975
|
+
"""Metadata for a file stored in S3."""
|
|
976
|
+
|
|
977
|
+
key: str
|
|
978
|
+
etag: str
|
|
979
|
+
size: int
|
|
980
|
+
last_modified: float
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
@dataclass
|
|
984
|
+
class ConflictError(Exception):
|
|
985
|
+
"""Raised when a write conflict occurs due to concurrent modification."""
|
|
986
|
+
|
|
987
|
+
key: str
|
|
988
|
+
expected_etag: str | None
|
|
989
|
+
actual_etag: str | None
|
|
990
|
+
message: str = ""
|
|
991
|
+
|
|
992
|
+
def __str__(self) -> str:
|
|
993
|
+
if self.message:
|
|
994
|
+
return self.message
|
|
995
|
+
return f"Conflict writing {self.key}: expected ETag {self.expected_etag}, got {self.actual_etag}"
|
|
996
|
+
|
|
997
|
+
|
|
998
|
+
@dataclass
|
|
999
|
+
class RetryConfig:
|
|
1000
|
+
"""Configuration for retry behavior on conflicts."""
|
|
1001
|
+
|
|
1002
|
+
max_retries: int = 5
|
|
1003
|
+
base_delay: float = 0.1 # seconds
|
|
1004
|
+
max_delay: float = 5.0 # seconds
|
|
1005
|
+
jitter: float = 0.1 # random jitter factor
|
|
1006
|
+
|
|
1007
|
+
|
|
1008
|
+
class S3WorkspaceBackend:
|
|
1009
|
+
"""
|
|
1010
|
+
S3 backend with optimistic concurrency control using conditional writes.
|
|
1011
|
+
|
|
1012
|
+
Uses:
|
|
1013
|
+
- If-None-Match: * for create-if-not-exists operations
|
|
1014
|
+
- If-Match: <etag> for update-only-if-unchanged operations
|
|
1015
|
+
"""
|
|
1016
|
+
|
|
1017
|
+
def __init__(
|
|
1018
|
+
self,
|
|
1019
|
+
bucket: str,
|
|
1020
|
+
prefix: str = "",
|
|
1021
|
+
s3_client: Any | None = None,
|
|
1022
|
+
retry_config: RetryConfig | None = None,
|
|
1023
|
+
):
|
|
1024
|
+
self.bucket = bucket
|
|
1025
|
+
self.prefix = prefix.rstrip("/") + "/" if prefix else ""
|
|
1026
|
+
self._client = s3_client
|
|
1027
|
+
self.retry_config = retry_config or RetryConfig()
|
|
1028
|
+
# Local cache of ETags for optimistic locking
|
|
1029
|
+
self._etag_cache: dict[str, str] = {}
|
|
1030
|
+
|
|
1031
|
+
@property
|
|
1032
|
+
def client(self) -> Any:
|
|
1033
|
+
"""Lazy initialization of S3 client."""
|
|
1034
|
+
if self._client is None:
|
|
1035
|
+
import boto3
|
|
1036
|
+
|
|
1037
|
+
self._client = boto3.client("s3")
|
|
1038
|
+
return self._client
|
|
1039
|
+
|
|
1040
|
+
def _full_key(self, path: str) -> str:
|
|
1041
|
+
"""Convert a normalized path to a full S3 key."""
|
|
1042
|
+
if path == ".":
|
|
1043
|
+
return self.prefix.rstrip("/") if self.prefix else ""
|
|
1044
|
+
return f"{self.prefix}{path}"
|
|
1045
|
+
|
|
1046
|
+
def _strip_prefix(self, key: str) -> str:
|
|
1047
|
+
"""Strip the prefix from an S3 key to get the relative path."""
|
|
1048
|
+
if self.prefix and key.startswith(self.prefix):
|
|
1049
|
+
return key[len(self.prefix) :]
|
|
1050
|
+
return key
|
|
1051
|
+
|
|
1052
|
+
def _get_with_etag(self, path: str) -> tuple[str, str]:
|
|
1053
|
+
"""Read a file and return (content, etag)."""
|
|
1054
|
+
key = self._full_key(path)
|
|
1055
|
+
try:
|
|
1056
|
+
response = self.client.get_object(Bucket=self.bucket, Key=key)
|
|
1057
|
+
content = response["Body"].read().decode("utf-8")
|
|
1058
|
+
etag = response["ETag"].strip('"')
|
|
1059
|
+
self._etag_cache[key] = etag
|
|
1060
|
+
return content, etag
|
|
1061
|
+
except self.client.exceptions.NoSuchKey:
|
|
1062
|
+
raise FileNotFoundError(f"{path} does not exist")
|
|
1063
|
+
|
|
1064
|
+
def _put_with_condition(
|
|
1065
|
+
self,
|
|
1066
|
+
path: str,
|
|
1067
|
+
content: str,
|
|
1068
|
+
*,
|
|
1069
|
+
if_none_match: bool = False,
|
|
1070
|
+
if_match: str | None = None,
|
|
1071
|
+
) -> str:
|
|
1072
|
+
"""
|
|
1073
|
+
Write a file with conditional headers.
|
|
1074
|
+
|
|
1075
|
+
Args:
|
|
1076
|
+
path: The file path
|
|
1077
|
+
content: The content to write
|
|
1078
|
+
if_none_match: If True, only write if file doesn't exist
|
|
1079
|
+
if_match: If provided, only write if ETag matches
|
|
1080
|
+
|
|
1081
|
+
Returns:
|
|
1082
|
+
The new ETag of the written object
|
|
1083
|
+
|
|
1084
|
+
Raises:
|
|
1085
|
+
ConflictError: If the condition fails
|
|
1086
|
+
FileExistsError: If if_none_match=True and file exists
|
|
1087
|
+
"""
|
|
1088
|
+
key = self._full_key(path)
|
|
1089
|
+
kwargs: dict[str, Any] = {
|
|
1090
|
+
"Bucket": self.bucket,
|
|
1091
|
+
"Key": key,
|
|
1092
|
+
"Body": content.encode("utf-8"),
|
|
1093
|
+
"ContentType": "text/plain; charset=utf-8",
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
if if_none_match:
|
|
1097
|
+
kwargs["IfNoneMatch"] = "*"
|
|
1098
|
+
elif if_match:
|
|
1099
|
+
kwargs["IfMatch"] = (
|
|
1100
|
+
f'"{if_match}"' if not if_match.startswith('"') else if_match
|
|
1101
|
+
)
|
|
1102
|
+
|
|
1103
|
+
try:
|
|
1104
|
+
response = self.client.put_object(**kwargs)
|
|
1105
|
+
new_etag = response["ETag"].strip('"')
|
|
1106
|
+
self._etag_cache[key] = new_etag
|
|
1107
|
+
return new_etag
|
|
1108
|
+
except self.client.exceptions.ClientError as e:
|
|
1109
|
+
error_code = e.response.get("Error", {}).get("Code", "")
|
|
1110
|
+
if error_code == "PreconditionFailed":
|
|
1111
|
+
if if_none_match:
|
|
1112
|
+
raise FileExistsError(f"{path} already exists")
|
|
1113
|
+
raise ConflictError(
|
|
1114
|
+
key=key,
|
|
1115
|
+
expected_etag=if_match,
|
|
1116
|
+
actual_etag=None,
|
|
1117
|
+
message=f"File {path} was modified by another process",
|
|
1118
|
+
)
|
|
1119
|
+
raise
|
|
1120
|
+
|
|
1121
|
+
def _retry_with_backoff(
|
|
1122
|
+
self,
|
|
1123
|
+
operation: str,
|
|
1124
|
+
path: str,
|
|
1125
|
+
func: Any,
|
|
1126
|
+
) -> Any:
|
|
1127
|
+
"""Execute a function with retry and exponential backoff on conflicts."""
|
|
1128
|
+
config = self.retry_config
|
|
1129
|
+
last_error: Exception | None = None
|
|
1130
|
+
|
|
1131
|
+
for attempt in range(config.max_retries + 1):
|
|
1132
|
+
try:
|
|
1133
|
+
return func()
|
|
1134
|
+
except ConflictError as e:
|
|
1135
|
+
last_error = e
|
|
1136
|
+
if attempt >= config.max_retries:
|
|
1137
|
+
break
|
|
1138
|
+
# Calculate backoff with jitter
|
|
1139
|
+
delay = min(
|
|
1140
|
+
config.base_delay * (2**attempt),
|
|
1141
|
+
config.max_delay,
|
|
1142
|
+
)
|
|
1143
|
+
jitter = delay * config.jitter * random.random()
|
|
1144
|
+
time.sleep(delay + jitter)
|
|
1145
|
+
# Clear cached ETag to force fresh read
|
|
1146
|
+
key = self._full_key(path)
|
|
1147
|
+
self._etag_cache.pop(key, None)
|
|
1148
|
+
|
|
1149
|
+
raise last_error or RuntimeError(f"Retry failed for {operation} on {path}")
|
|
1150
|
+
|
|
1151
|
+
def read_file(self, path: str) -> str:
|
|
1152
|
+
"""Read a file from S3."""
|
|
1153
|
+
key = _normalize_path(path)
|
|
1154
|
+
content, _ = self._get_with_etag(key)
|
|
1155
|
+
return content
|
|
1156
|
+
|
|
1157
|
+
def write_file(self, path: str, content: str, *, overwrite: bool) -> None:
|
|
1158
|
+
"""
|
|
1159
|
+
Write a file to S3 with optimistic locking.
|
|
1160
|
+
|
|
1161
|
+
If overwrite=False, uses If-None-Match: * to ensure create-only.
|
|
1162
|
+
If overwrite=True, uses If-Match with cached ETag if available,
|
|
1163
|
+
otherwise does unconditional write.
|
|
1164
|
+
"""
|
|
1165
|
+
key = _normalize_path(path)
|
|
1166
|
+
s3_key = self._full_key(key)
|
|
1167
|
+
|
|
1168
|
+
if not overwrite:
|
|
1169
|
+
# Create-if-not-exists
|
|
1170
|
+
self._put_with_condition(key, content, if_none_match=True)
|
|
1171
|
+
else:
|
|
1172
|
+
# Check if we have a cached ETag for optimistic locking
|
|
1173
|
+
cached_etag = self._etag_cache.get(s3_key)
|
|
1174
|
+
if cached_etag:
|
|
1175
|
+
# Use optimistic locking with retry
|
|
1176
|
+
def _do_write():
|
|
1177
|
+
nonlocal cached_etag
|
|
1178
|
+
try:
|
|
1179
|
+
self._put_with_condition(key, content, if_match=cached_etag)
|
|
1180
|
+
except ConflictError:
|
|
1181
|
+
# Refresh ETag and retry
|
|
1182
|
+
_, cached_etag = self._get_with_etag(key)
|
|
1183
|
+
raise
|
|
1184
|
+
|
|
1185
|
+
self._retry_with_backoff("write_file", key, _do_write)
|
|
1186
|
+
else:
|
|
1187
|
+
# No cached ETag - try to get current state first for safety
|
|
1188
|
+
try:
|
|
1189
|
+
_, etag = self._get_with_etag(key)
|
|
1190
|
+
self._put_with_condition(key, content, if_match=etag)
|
|
1191
|
+
except FileNotFoundError:
|
|
1192
|
+
# File doesn't exist, create it
|
|
1193
|
+
self._put_with_condition(key, content, if_none_match=True)
|
|
1194
|
+
|
|
1195
|
+
def append_file(self, path: str, content: str) -> None:
|
|
1196
|
+
"""
|
|
1197
|
+
Append to a file with optimistic locking.
|
|
1198
|
+
|
|
1199
|
+
Uses read-modify-write with If-Match for consistency.
|
|
1200
|
+
"""
|
|
1201
|
+
key = _normalize_path(path)
|
|
1202
|
+
|
|
1203
|
+
def _do_append():
|
|
1204
|
+
try:
|
|
1205
|
+
current, etag = self._get_with_etag(key)
|
|
1206
|
+
new_content = current + content
|
|
1207
|
+
except FileNotFoundError:
|
|
1208
|
+
# File doesn't exist, create it
|
|
1209
|
+
self._put_with_condition(key, content, if_none_match=True)
|
|
1210
|
+
return
|
|
1211
|
+
|
|
1212
|
+
self._put_with_condition(key, new_content, if_match=etag)
|
|
1213
|
+
|
|
1214
|
+
self._retry_with_backoff("append_file", key, _do_append)
|
|
1215
|
+
|
|
1216
|
+
def delete_path(self, path: str) -> None:
|
|
1217
|
+
"""Delete a file or directory from S3."""
|
|
1218
|
+
key = _normalize_path(path, allow_root=True)
|
|
1219
|
+
|
|
1220
|
+
if key == ".":
|
|
1221
|
+
# Delete all files under the prefix
|
|
1222
|
+
paginator = self.client.get_paginator("list_objects_v2")
|
|
1223
|
+
prefix = self.prefix if self.prefix else ""
|
|
1224
|
+
for page in paginator.paginate(Bucket=self.bucket, Prefix=prefix):
|
|
1225
|
+
for obj in page.get("Contents", []):
|
|
1226
|
+
self.client.delete_object(Bucket=self.bucket, Key=obj["Key"])
|
|
1227
|
+
self._etag_cache.pop(obj["Key"], None)
|
|
1228
|
+
return
|
|
1229
|
+
|
|
1230
|
+
s3_key = self._full_key(key)
|
|
1231
|
+
|
|
1232
|
+
# Try to delete as a single file first
|
|
1233
|
+
try:
|
|
1234
|
+
self.client.head_object(Bucket=self.bucket, Key=s3_key)
|
|
1235
|
+
self.client.delete_object(Bucket=self.bucket, Key=s3_key)
|
|
1236
|
+
self._etag_cache.pop(s3_key, None)
|
|
1237
|
+
return
|
|
1238
|
+
except self.client.exceptions.ClientError:
|
|
1239
|
+
pass
|
|
1240
|
+
|
|
1241
|
+
# Try as a directory prefix
|
|
1242
|
+
prefix = f"{s3_key}/"
|
|
1243
|
+
paginator = self.client.get_paginator("list_objects_v2")
|
|
1244
|
+
deleted_any = False
|
|
1245
|
+
|
|
1246
|
+
for page in paginator.paginate(Bucket=self.bucket, Prefix=prefix):
|
|
1247
|
+
for obj in page.get("Contents", []):
|
|
1248
|
+
self.client.delete_object(Bucket=self.bucket, Key=obj["Key"])
|
|
1249
|
+
self._etag_cache.pop(obj["Key"], None)
|
|
1250
|
+
deleted_any = True
|
|
1251
|
+
|
|
1252
|
+
if not deleted_any:
|
|
1253
|
+
raise FileNotFoundError(f"{key} does not exist")
|
|
1254
|
+
|
|
1255
|
+
def list_dir(self, path: str, recursive: bool) -> list[dict[str, Any]]:
|
|
1256
|
+
"""List directory contents from S3."""
|
|
1257
|
+
key = _normalize_path(path, allow_root=True)
|
|
1258
|
+
|
|
1259
|
+
if key == ".":
|
|
1260
|
+
prefix = self.prefix
|
|
1261
|
+
else:
|
|
1262
|
+
prefix = f"{self._full_key(key)}/"
|
|
1263
|
+
|
|
1264
|
+
entries: list[dict[str, Any]] = []
|
|
1265
|
+
seen_dirs: set[str] = set()
|
|
1266
|
+
|
|
1267
|
+
paginator = self.client.get_paginator("list_objects_v2")
|
|
1268
|
+
kwargs: dict[str, Any] = {"Bucket": self.bucket, "Prefix": prefix}
|
|
1269
|
+
|
|
1270
|
+
if not recursive:
|
|
1271
|
+
kwargs["Delimiter"] = "/"
|
|
1272
|
+
|
|
1273
|
+
for page in paginator.paginate(**kwargs):
|
|
1274
|
+
# Handle common prefixes (directories) in non-recursive mode
|
|
1275
|
+
for common_prefix in page.get("CommonPrefixes", []):
|
|
1276
|
+
dir_key = common_prefix["Prefix"].rstrip("/")
|
|
1277
|
+
rel_path = self._strip_prefix(dir_key)
|
|
1278
|
+
if rel_path and rel_path not in seen_dirs:
|
|
1279
|
+
entries.append(
|
|
1280
|
+
{"path": rel_path, "type": "directory", "size": None}
|
|
1281
|
+
)
|
|
1282
|
+
seen_dirs.add(rel_path)
|
|
1283
|
+
|
|
1284
|
+
# Handle files
|
|
1285
|
+
for obj in page.get("Contents", []):
|
|
1286
|
+
obj_key = obj["Key"]
|
|
1287
|
+
rel_path = self._strip_prefix(obj_key)
|
|
1288
|
+
|
|
1289
|
+
# Skip the prefix itself if it's an empty marker
|
|
1290
|
+
if not rel_path or rel_path.endswith("/"):
|
|
1291
|
+
continue
|
|
1292
|
+
|
|
1293
|
+
if recursive:
|
|
1294
|
+
entries.append(self._format_file_entry(rel_path, obj))
|
|
1295
|
+
else:
|
|
1296
|
+
# Check if this is a direct child
|
|
1297
|
+
remainder = rel_path
|
|
1298
|
+
if key != ".":
|
|
1299
|
+
# Remove the directory prefix to get relative path
|
|
1300
|
+
dir_prefix = key + "/"
|
|
1301
|
+
if rel_path.startswith(dir_prefix):
|
|
1302
|
+
remainder = rel_path[len(dir_prefix) :]
|
|
1303
|
+
else:
|
|
1304
|
+
continue
|
|
1305
|
+
|
|
1306
|
+
if "/" not in remainder:
|
|
1307
|
+
entries.append(self._format_file_entry(rel_path, obj))
|
|
1308
|
+
|
|
1309
|
+
# Sort by path for consistent ordering
|
|
1310
|
+
entries.sort(key=lambda e: e["path"])
|
|
1311
|
+
|
|
1312
|
+
if not entries and key != ".":
|
|
1313
|
+
# Check if the path itself exists as a file
|
|
1314
|
+
try:
|
|
1315
|
+
s3_key = self._full_key(key)
|
|
1316
|
+
response = self.client.head_object(Bucket=self.bucket, Key=s3_key)
|
|
1317
|
+
return [
|
|
1318
|
+
{
|
|
1319
|
+
"path": key,
|
|
1320
|
+
"type": "file",
|
|
1321
|
+
"size": response["ContentLength"],
|
|
1322
|
+
"etag": response["ETag"].strip('"'),
|
|
1323
|
+
}
|
|
1324
|
+
]
|
|
1325
|
+
except self.client.exceptions.ClientError:
|
|
1326
|
+
raise FileNotFoundError(f"{key} does not exist")
|
|
1327
|
+
|
|
1328
|
+
return entries
|
|
1329
|
+
|
|
1330
|
+
def grep(self, pattern: str, path: str | None, limit: int) -> list[dict[str, Any]]:
|
|
1331
|
+
"""Search for pattern in files."""
|
|
1332
|
+
regex = re.compile(pattern)
|
|
1333
|
+
key = _normalize_path(path, allow_root=True) if path is not None else "."
|
|
1334
|
+
|
|
1335
|
+
matches: list[dict[str, Any]] = []
|
|
1336
|
+
paginator = self.client.get_paginator("list_objects_v2")
|
|
1337
|
+
|
|
1338
|
+
def process_object(obj_key: str, rel_path: str) -> None:
|
|
1339
|
+
if len(matches) >= limit:
|
|
1340
|
+
return
|
|
1341
|
+
try:
|
|
1342
|
+
response = self.client.get_object(Bucket=self.bucket, Key=obj_key)
|
|
1343
|
+
content = response["Body"].read().decode("utf-8")
|
|
1344
|
+
|
|
1345
|
+
for line_no, line in enumerate(content.splitlines(), start=1):
|
|
1346
|
+
if regex.search(line):
|
|
1347
|
+
matches.append(
|
|
1348
|
+
{
|
|
1349
|
+
"path": rel_path,
|
|
1350
|
+
"line": line_no,
|
|
1351
|
+
"text": line.strip(),
|
|
1352
|
+
}
|
|
1353
|
+
)
|
|
1354
|
+
if len(matches) >= limit:
|
|
1355
|
+
return
|
|
1356
|
+
except Exception:
|
|
1357
|
+
# Skip files that can't be read as text
|
|
1358
|
+
return
|
|
1359
|
+
|
|
1360
|
+
# If a specific path is provided, check if it's a file first
|
|
1361
|
+
if key == ".":
|
|
1362
|
+
prefix = self.prefix
|
|
1363
|
+
else:
|
|
1364
|
+
s3_key = self._full_key(key)
|
|
1365
|
+
try:
|
|
1366
|
+
self.client.head_object(Bucket=self.bucket, Key=s3_key)
|
|
1367
|
+
rel_path = self._strip_prefix(s3_key)
|
|
1368
|
+
process_object(s3_key, rel_path)
|
|
1369
|
+
return matches
|
|
1370
|
+
except self.client.exceptions.ClientError:
|
|
1371
|
+
prefix = f"{s3_key}/"
|
|
1372
|
+
|
|
1373
|
+
for page in paginator.paginate(Bucket=self.bucket, Prefix=prefix):
|
|
1374
|
+
for obj in page.get("Contents", []):
|
|
1375
|
+
if len(matches) >= limit:
|
|
1376
|
+
return matches
|
|
1377
|
+
|
|
1378
|
+
obj_key = obj["Key"]
|
|
1379
|
+
rel_path = self._strip_prefix(obj_key)
|
|
1380
|
+
|
|
1381
|
+
if not rel_path or rel_path.endswith("/"):
|
|
1382
|
+
continue
|
|
1383
|
+
|
|
1384
|
+
process_object(obj_key, rel_path)
|
|
1385
|
+
|
|
1386
|
+
return matches
|
|
1387
|
+
|
|
1388
|
+
def _format_file_entry(self, path: str, obj: dict[str, Any]) -> dict[str, Any]:
|
|
1389
|
+
"""Format a file entry from S3 object metadata."""
|
|
1390
|
+
return {
|
|
1391
|
+
"path": path,
|
|
1392
|
+
"type": "file",
|
|
1393
|
+
"size": obj["Size"],
|
|
1394
|
+
"etag": obj["ETag"].strip('"'),
|
|
1395
|
+
}
|
|
1396
|
+
|
|
1397
|
+
def get_file_etag(self, path: str) -> str | None:
|
|
1398
|
+
"""Get the cached ETag for a file, or fetch it if not cached."""
|
|
1399
|
+
key = _normalize_path(path)
|
|
1400
|
+
s3_key = self._full_key(key)
|
|
1401
|
+
|
|
1402
|
+
if s3_key in self._etag_cache:
|
|
1403
|
+
return self._etag_cache[s3_key]
|
|
1404
|
+
|
|
1405
|
+
try:
|
|
1406
|
+
response = self.client.head_object(Bucket=self.bucket, Key=s3_key)
|
|
1407
|
+
etag = response["ETag"].strip('"')
|
|
1408
|
+
self._etag_cache[s3_key] = etag
|
|
1409
|
+
return etag
|
|
1410
|
+
except self.client.exceptions.ClientError:
|
|
1411
|
+
return None
|
|
1412
|
+
|
|
1413
|
+
|
|
1414
|
+
# Command types for the S3 filesystem tool
|
|
1415
|
+
S3FsCommand = Literal[
|
|
1416
|
+
"read_file",
|
|
1417
|
+
"write_file",
|
|
1418
|
+
"delete_path",
|
|
1419
|
+
"list_dir",
|
|
1420
|
+
"grep",
|
|
1421
|
+
]
|
|
1422
|
+
|
|
1423
|
+
ALL_S3_COMMANDS: tuple[S3FsCommand, ...] = (
|
|
1424
|
+
"read_file",
|
|
1425
|
+
"write_file",
|
|
1426
|
+
"delete_path",
|
|
1427
|
+
"list_dir",
|
|
1428
|
+
"grep",
|
|
1429
|
+
)
|
|
1430
|
+
|
|
1431
|
+
|
|
1432
|
+
class S3FilesystemParams(BaseModel):
|
|
1433
|
+
"""Schema describing S3 filesystem tool calls."""
|
|
1434
|
+
|
|
1435
|
+
command: S3FsCommand = Field(
|
|
1436
|
+
description="Filesystem operation to perform (read_file, write_file, delete_path, list_dir, grep)"
|
|
1437
|
+
)
|
|
1438
|
+
path: Optional[str] = Field(
|
|
1439
|
+
default=None,
|
|
1440
|
+
description="Path to operate on, relative to workspace root. Use '.' for the root directory.",
|
|
1441
|
+
)
|
|
1442
|
+
start_line: Optional[int] = Field(
|
|
1443
|
+
default=None,
|
|
1444
|
+
description="1-based inclusive start line when reading a file. Leave unset to read from the beginning.",
|
|
1445
|
+
ge=1,
|
|
1446
|
+
)
|
|
1447
|
+
end_line: Optional[int] = Field(
|
|
1448
|
+
default=None,
|
|
1449
|
+
description="1-based inclusive end line when reading a file. Leave unset to read through the end.",
|
|
1450
|
+
ge=1,
|
|
1451
|
+
)
|
|
1452
|
+
content: Optional[str] = Field(
|
|
1453
|
+
default=None,
|
|
1454
|
+
description="Content to write when using write_file.",
|
|
1455
|
+
)
|
|
1456
|
+
mode: Optional[Literal["overwrite", "append", "create_if_missing"]] = Field(
|
|
1457
|
+
default="overwrite",
|
|
1458
|
+
description="How to write content. Overwrite replaces the file, append adds to the end, create_if_missing leaves existing files untouched.",
|
|
1459
|
+
)
|
|
1460
|
+
recursive: Optional[bool] = Field(
|
|
1461
|
+
default=None,
|
|
1462
|
+
description="When listing directories, set to true to recurse.",
|
|
1463
|
+
)
|
|
1464
|
+
pattern: Optional[str] = Field(
|
|
1465
|
+
default=None,
|
|
1466
|
+
description="Regular expression pattern to search for when using grep.",
|
|
1467
|
+
)
|
|
1468
|
+
max_results: Optional[int] = Field(
|
|
1469
|
+
default=50,
|
|
1470
|
+
description="Maximum number of grep matches to return.",
|
|
1471
|
+
ge=1,
|
|
1472
|
+
)
|
|
1473
|
+
|
|
1474
|
+
|
|
1475
|
+
class S3FilesystemManager:
|
|
1476
|
+
"""
|
|
1477
|
+
S3-backed filesystem manager with optimistic concurrency control.
|
|
1478
|
+
|
|
1479
|
+
Uses S3 conditional writes (If-None-Match and If-Match) for safe distributed
|
|
1480
|
+
operations, allowing multiple AI agents to share filesystem state.
|
|
1481
|
+
|
|
1482
|
+
Example:
|
|
1483
|
+
manager = S3FilesystemManager(
|
|
1484
|
+
bucket="my-ai-workspace",
|
|
1485
|
+
prefix="agent-123/",
|
|
1486
|
+
)
|
|
1487
|
+
tools = manager.get_tools()
|
|
1488
|
+
"""
|
|
1489
|
+
|
|
1490
|
+
def __init__(
|
|
1491
|
+
self,
|
|
1492
|
+
bucket: str,
|
|
1493
|
+
prefix: str = "",
|
|
1494
|
+
s3_client: Any | None = None,
|
|
1495
|
+
retry_config: RetryConfig | None = None,
|
|
1496
|
+
tool_name: str = "s3_filesystem",
|
|
1497
|
+
):
|
|
1498
|
+
"""
|
|
1499
|
+
Initialize the S3 filesystem manager.
|
|
1500
|
+
|
|
1501
|
+
Args:
|
|
1502
|
+
bucket: The S3 bucket name
|
|
1503
|
+
prefix: Optional prefix for all keys (like a workspace directory)
|
|
1504
|
+
s3_client: Optional pre-configured S3 client
|
|
1505
|
+
retry_config: Configuration for retry behavior on conflicts
|
|
1506
|
+
tool_name: Name for the tool (default: "s3_filesystem")
|
|
1507
|
+
"""
|
|
1508
|
+
self.backend = S3WorkspaceBackend(
|
|
1509
|
+
bucket=bucket,
|
|
1510
|
+
prefix=prefix,
|
|
1511
|
+
s3_client=s3_client,
|
|
1512
|
+
retry_config=retry_config,
|
|
1513
|
+
)
|
|
1514
|
+
self.tool_name = tool_name
|
|
1515
|
+
self._tool_cache: dict[tuple[str, ...], list[Tool]] = {}
|
|
1516
|
+
|
|
1517
|
+
def _handle_read(
|
|
1518
|
+
self, path: str, start_line: Optional[int], end_line: Optional[int]
|
|
1519
|
+
) -> dict[str, Any]:
|
|
1520
|
+
content = self.backend.read_file(path)
|
|
1521
|
+
total_lines = len(content.splitlines()) or (0 if content == "" else 1)
|
|
1522
|
+
start = start_line or 1
|
|
1523
|
+
end = end_line or total_lines
|
|
1524
|
+
if end < start:
|
|
1525
|
+
if not (total_lines == 0 and end_line is None and start == 1):
|
|
1526
|
+
raise ValueError("end_line must be greater than or equal to start_line")
|
|
1527
|
+
|
|
1528
|
+
if start == 1 and end >= total_lines:
|
|
1529
|
+
snippet = content
|
|
1530
|
+
else:
|
|
1531
|
+
lines = content.splitlines()
|
|
1532
|
+
snippet = "\n".join(lines[start - 1 : end])
|
|
1533
|
+
|
|
1534
|
+
# Include ETag in response for transparency
|
|
1535
|
+
etag = self.backend.get_file_etag(path)
|
|
1536
|
+
|
|
1537
|
+
return {
|
|
1538
|
+
"path": path,
|
|
1539
|
+
"start_line": start,
|
|
1540
|
+
"end_line": end,
|
|
1541
|
+
"content": snippet,
|
|
1542
|
+
"total_lines": total_lines,
|
|
1543
|
+
"character_count": len(content),
|
|
1544
|
+
"etag": etag,
|
|
1545
|
+
}
|
|
1546
|
+
|
|
1547
|
+
def _handle_write(
|
|
1548
|
+
self, path: str, content: str, mode: Optional[str]
|
|
1549
|
+
) -> dict[str, Any]:
|
|
1550
|
+
write_mode = mode or "overwrite"
|
|
1551
|
+
if write_mode == "overwrite":
|
|
1552
|
+
self.backend.write_file(path, content, overwrite=True)
|
|
1553
|
+
elif write_mode == "append":
|
|
1554
|
+
self.backend.append_file(path, content)
|
|
1555
|
+
elif write_mode == "create_if_missing":
|
|
1556
|
+
try:
|
|
1557
|
+
self.backend.write_file(path, content, overwrite=False)
|
|
1558
|
+
except FileExistsError:
|
|
1559
|
+
pass
|
|
1560
|
+
else:
|
|
1561
|
+
raise ValueError(f"Unsupported write mode: {write_mode}")
|
|
1562
|
+
|
|
1563
|
+
# Get new ETag after write
|
|
1564
|
+
etag = self.backend.get_file_etag(path)
|
|
1565
|
+
|
|
1566
|
+
return {"path": path, "status": "ok", "mode": write_mode, "etag": etag}
|
|
1567
|
+
|
|
1568
|
+
def _handle_delete(self, path: str) -> dict[str, Any]:
|
|
1569
|
+
self.backend.delete_path(path)
|
|
1570
|
+
return {"path": path, "status": "ok"}
|
|
1571
|
+
|
|
1572
|
+
def _handle_list(
|
|
1573
|
+
self, path: Optional[str], recursive: Optional[bool]
|
|
1574
|
+
) -> dict[str, Any]:
|
|
1575
|
+
listing = self.backend.list_dir(path or ".", recursive=bool(recursive))
|
|
1576
|
+
return {"path": path or ".", "recursive": bool(recursive), "entries": listing}
|
|
1577
|
+
|
|
1578
|
+
def _handle_grep(
|
|
1579
|
+
self, pattern: str, path: Optional[str], limit: Optional[int]
|
|
1580
|
+
) -> dict[str, Any]:
|
|
1581
|
+
max_results = limit or 50
|
|
1582
|
+
matches = self.backend.grep(pattern, path=path, limit=max_results)
|
|
1583
|
+
return {
|
|
1584
|
+
"pattern": pattern,
|
|
1585
|
+
"path": path,
|
|
1586
|
+
"max_results": max_results,
|
|
1587
|
+
"matches": matches,
|
|
1588
|
+
}
|
|
1589
|
+
|
|
1590
|
+
def _filesystem_tool(self, allowed_commands: set[str], **kwargs: Any) -> str:
|
|
1591
|
+
params = S3FilesystemParams.model_validate(kwargs)
|
|
1592
|
+
|
|
1593
|
+
try:
|
|
1594
|
+
if params.command not in allowed_commands:
|
|
1595
|
+
raise ValueError(
|
|
1596
|
+
f"The '{params.command}' command is disabled for this tool instance"
|
|
1597
|
+
)
|
|
1598
|
+
if params.command == "read_file":
|
|
1599
|
+
if not params.path:
|
|
1600
|
+
raise ValueError("path is required for read_file")
|
|
1601
|
+
result = self._handle_read(
|
|
1602
|
+
params.path, params.start_line, params.end_line
|
|
1603
|
+
)
|
|
1604
|
+
elif params.command == "write_file":
|
|
1605
|
+
if params.path is None or params.content is None:
|
|
1606
|
+
raise ValueError("path and content are required for write_file")
|
|
1607
|
+
result = self._handle_write(params.path, params.content, params.mode)
|
|
1608
|
+
elif params.command == "delete_path":
|
|
1609
|
+
if not params.path:
|
|
1610
|
+
raise ValueError("path is required for delete_path")
|
|
1611
|
+
result = self._handle_delete(params.path)
|
|
1612
|
+
elif params.command == "list_dir":
|
|
1613
|
+
result = self._handle_list(params.path, params.recursive)
|
|
1614
|
+
elif params.command == "grep":
|
|
1615
|
+
if not params.pattern:
|
|
1616
|
+
raise ValueError("pattern is required for grep")
|
|
1617
|
+
result = self._handle_grep(
|
|
1618
|
+
params.pattern, params.path, params.max_results
|
|
1619
|
+
)
|
|
1620
|
+
else:
|
|
1621
|
+
raise ValueError(f"Unknown command: {params.command}")
|
|
1622
|
+
return json.dumps({"ok": True, "result": result}, indent=2)
|
|
1623
|
+
except Exception as exc:
|
|
1624
|
+
return json.dumps(
|
|
1625
|
+
{"ok": False, "error": type(exc).__name__, "message": str(exc)},
|
|
1626
|
+
indent=2,
|
|
1627
|
+
)
|
|
1628
|
+
|
|
1629
|
+
def get_tools(self, *, exclude: Iterable[S3FsCommand] | None = None) -> list[Tool]:
|
|
1630
|
+
"""
|
|
1631
|
+
Get the filesystem tools.
|
|
1632
|
+
|
|
1633
|
+
Args:
|
|
1634
|
+
exclude: Optional list of commands to exclude from the tool
|
|
1635
|
+
|
|
1636
|
+
Returns:
|
|
1637
|
+
List containing the S3 filesystem tool
|
|
1638
|
+
"""
|
|
1639
|
+
exclude_set = set(exclude or [])
|
|
1640
|
+
unknown = exclude_set.difference(ALL_S3_COMMANDS)
|
|
1641
|
+
if unknown:
|
|
1642
|
+
raise ValueError(f"Unknown commands in exclude list: {sorted(unknown)}")
|
|
1643
|
+
|
|
1644
|
+
allowed = tuple(cmd for cmd in ALL_S3_COMMANDS if cmd not in exclude_set)
|
|
1645
|
+
if not allowed:
|
|
1646
|
+
raise ValueError("Cannot exclude every filesystem command")
|
|
1647
|
+
|
|
1648
|
+
cache_key = allowed
|
|
1649
|
+
if cache_key in self._tool_cache:
|
|
1650
|
+
return self._tool_cache[cache_key]
|
|
1651
|
+
|
|
1652
|
+
allowed_set = {cmd for cmd in allowed}
|
|
1653
|
+
schema = S3FilesystemParams.model_json_schema(ref_template="#/$defs/{model}")
|
|
1654
|
+
if (
|
|
1655
|
+
"properties" in schema
|
|
1656
|
+
and "command" in schema["properties"]
|
|
1657
|
+
and isinstance(schema["properties"]["command"], dict)
|
|
1658
|
+
):
|
|
1659
|
+
schema["properties"]["command"]["enum"] = list(allowed)
|
|
1660
|
+
|
|
1661
|
+
tool = Tool(
|
|
1662
|
+
name=self.tool_name,
|
|
1663
|
+
description=S3_FS_DESCRIPTION,
|
|
1664
|
+
parameters=schema.get("properties", {}),
|
|
1665
|
+
required=schema.get("required", []),
|
|
1666
|
+
definitions=schema.get("$defs"),
|
|
1667
|
+
run=partial(self._filesystem_tool, allowed_set),
|
|
1668
|
+
)
|
|
1669
|
+
|
|
1670
|
+
self._tool_cache[cache_key] = [tool]
|
|
1671
|
+
return [tool]
|
|
1672
|
+
|
|
1673
|
+
|
|
1674
|
+
__all__ = [
|
|
1675
|
+
"FilesystemManager",
|
|
1676
|
+
"FilesystemParams",
|
|
1677
|
+
"InMemoryWorkspaceBackend",
|
|
1678
|
+
"WorkspaceBackend",
|
|
1679
|
+
"S3FilesystemManager",
|
|
1680
|
+
"S3FilesystemParams",
|
|
1681
|
+
"S3WorkspaceBackend",
|
|
1682
|
+
"ConflictError",
|
|
1683
|
+
"RetryConfig",
|
|
1684
|
+
]
|
|
1685
|
+
|
|
1686
|
+
|
|
1687
|
+
description = """
|
|
1688
|
+
S3-backed remote filesystem tool with optimistic concurrency control.
|
|
1689
|
+
|
|
1690
|
+
Uses S3 conditional writes (If-None-Match and If-Match) for safe distributed
|
|
1691
|
+
operations, allowing multiple AI agents to share filesystem state without conflicts.
|
|
1692
|
+
|
|
1693
|
+
Features:
|
|
1694
|
+
- If-None-Match: * -> Create-if-not-exists (distributed locks, idempotent writes)
|
|
1695
|
+
- If-Match: <etag> -> Update-only-if-unchanged (optimistic locking)
|
|
1696
|
+
- Automatic retry with exponential backoff on conflicts
|
|
1697
|
+
- ETag tracking for all file operations
|
|
1698
|
+
|
|
1699
|
+
Example:
|
|
1700
|
+
from lm_deluge.tool.prefab.s3_filesystem import S3FilesystemManager
|
|
1701
|
+
|
|
1702
|
+
manager = S3FilesystemManager(
|
|
1703
|
+
bucket="my-ai-workspace",
|
|
1704
|
+
prefix="agent-123/", # Optional: isolate agent's workspace
|
|
1705
|
+
)
|
|
1706
|
+
|
|
1707
|
+
# Get tools for the agent
|
|
1708
|
+
tools = manager.get_tools()
|
|
1709
|
+
|
|
1710
|
+
# The filesystem operations are now safe for concurrent access
|
|
1711
|
+
"""
|