lm-deluge 0.0.76__py3-none-any.whl → 0.0.78__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lm_deluge/llm_tools/filesystem.py +821 -0
- {lm_deluge-0.0.76.dist-info → lm_deluge-0.0.78.dist-info}/METADATA +2 -1
- {lm_deluge-0.0.76.dist-info → lm_deluge-0.0.78.dist-info}/RECORD +6 -6
- {lm_deluge-0.0.76.dist-info → lm_deluge-0.0.78.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.76.dist-info → lm_deluge-0.0.78.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.76.dist-info → lm_deluge-0.0.78.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,821 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from collections.abc import Callable, Iterable, Sequence
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from functools import partial
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, Literal, Optional, Protocol
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
from ..tool import Tool
|
|
15
|
+
|
|
16
|
+
FS_DESCRIPTION = """Interact with an isolated virtual filesystem that belongs to this session.
|
|
17
|
+
|
|
18
|
+
Paths are always relative to the workspace root and use forward slashes. Use this tool to:
|
|
19
|
+
- inspect files with optional line ranges
|
|
20
|
+
- create, overwrite, or append to files
|
|
21
|
+
- delete files or folders from the workspace
|
|
22
|
+
- list directory contents
|
|
23
|
+
- search for text across the workspace using regular expressions
|
|
24
|
+
- apply OpenAI-style apply_patch operations (create/update/delete)
|
|
25
|
+
|
|
26
|
+
This virtual filesystem is safe-by-construction. Paths that try to escape the workspace
|
|
27
|
+
or reference missing files will raise clear errors."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class WorkspaceBackend(Protocol):
|
|
31
|
+
"""Abstract filesystem operations used by FilesystemManager."""
|
|
32
|
+
|
|
33
|
+
def read_file(self, path: str) -> str: ...
|
|
34
|
+
|
|
35
|
+
def write_file(self, path: str, content: str, *, overwrite: bool) -> None: ...
|
|
36
|
+
|
|
37
|
+
def append_file(self, path: str, content: str) -> None: ...
|
|
38
|
+
|
|
39
|
+
def delete_path(self, path: str) -> None: ...
|
|
40
|
+
|
|
41
|
+
def list_dir(self, path: str, recursive: bool) -> list[dict[str, Any]]: ...
|
|
42
|
+
|
|
43
|
+
def grep(
|
|
44
|
+
self, pattern: str, path: str | None, limit: int
|
|
45
|
+
) -> list[dict[str, Any]]: ...
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _normalize_path(path: str | None, *, allow_root: bool = False) -> str:
|
|
49
|
+
"""Normalize user-provided paths and prevent directory traversal."""
|
|
50
|
+
if path is None or path.strip() == "":
|
|
51
|
+
if allow_root:
|
|
52
|
+
return "."
|
|
53
|
+
raise ValueError("Path is required")
|
|
54
|
+
|
|
55
|
+
raw = path.strip()
|
|
56
|
+
if raw.startswith("/"):
|
|
57
|
+
raw = raw.lstrip("/")
|
|
58
|
+
|
|
59
|
+
parts: list[str] = []
|
|
60
|
+
for part in raw.split("/"):
|
|
61
|
+
if part in ("", "."):
|
|
62
|
+
continue
|
|
63
|
+
if part == "..":
|
|
64
|
+
if parts:
|
|
65
|
+
parts.pop()
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError("Path traversal outside the workspace is not allowed")
|
|
68
|
+
continue
|
|
69
|
+
parts.append(part)
|
|
70
|
+
|
|
71
|
+
normalized = "/".join(parts)
|
|
72
|
+
if normalized:
|
|
73
|
+
return normalized
|
|
74
|
+
if allow_root:
|
|
75
|
+
return "."
|
|
76
|
+
raise ValueError("Path must reference a file inside the workspace")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class InMemoryWorkspaceBackend:
|
|
80
|
+
"""Simple backend that stores files in memory."""
|
|
81
|
+
|
|
82
|
+
def __init__(self, files: dict[str, str] | None = None):
|
|
83
|
+
self._files: Dict[str, str] = {}
|
|
84
|
+
if files:
|
|
85
|
+
for path, content in files.items():
|
|
86
|
+
key = _normalize_path(path)
|
|
87
|
+
self._files[key] = content
|
|
88
|
+
|
|
89
|
+
def read_file(self, path: str) -> str:
|
|
90
|
+
key = _normalize_path(path)
|
|
91
|
+
if key not in self._files:
|
|
92
|
+
raise FileNotFoundError(f"{key} does not exist")
|
|
93
|
+
return self._files[key]
|
|
94
|
+
|
|
95
|
+
def write_file(self, path: str, content: str, *, overwrite: bool) -> None:
|
|
96
|
+
key = _normalize_path(path)
|
|
97
|
+
if not overwrite and key in self._files:
|
|
98
|
+
raise FileExistsError(f"{key} already exists")
|
|
99
|
+
self._files[key] = content
|
|
100
|
+
|
|
101
|
+
def append_file(self, path: str, content: str) -> None:
|
|
102
|
+
key = _normalize_path(path)
|
|
103
|
+
if key in self._files:
|
|
104
|
+
self._files[key] = f"{self._files[key]}{content}"
|
|
105
|
+
else:
|
|
106
|
+
self._files[key] = content
|
|
107
|
+
|
|
108
|
+
def delete_path(self, path: str) -> None:
|
|
109
|
+
key = _normalize_path(path, allow_root=True)
|
|
110
|
+
if key == ".":
|
|
111
|
+
self._files.clear()
|
|
112
|
+
return
|
|
113
|
+
if key in self._files:
|
|
114
|
+
del self._files[key]
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
prefix = f"{key}/"
|
|
118
|
+
targets = [p for p in self._files if p.startswith(prefix)]
|
|
119
|
+
if not targets:
|
|
120
|
+
raise FileNotFoundError(f"{key} does not exist")
|
|
121
|
+
for target in targets:
|
|
122
|
+
del self._files[target]
|
|
123
|
+
|
|
124
|
+
def list_dir(self, path: str, recursive: bool) -> list[dict[str, Any]]:
|
|
125
|
+
key = _normalize_path(path, allow_root=True)
|
|
126
|
+
if key != "." and key in self._files and not recursive:
|
|
127
|
+
# Listing a file path shows metadata for that file.
|
|
128
|
+
return [self._format_file_entry(key)]
|
|
129
|
+
|
|
130
|
+
prefix = "" if key == "." else f"{key}/"
|
|
131
|
+
entries: list[dict[str, Any]] = []
|
|
132
|
+
|
|
133
|
+
if key != "." and not any(
|
|
134
|
+
p == key or p.startswith(prefix) for p in self._files
|
|
135
|
+
):
|
|
136
|
+
raise FileNotFoundError(f"{key} does not exist")
|
|
137
|
+
|
|
138
|
+
if recursive:
|
|
139
|
+
for file_path in sorted(self._files):
|
|
140
|
+
if not (file_path == key or file_path.startswith(prefix)):
|
|
141
|
+
continue
|
|
142
|
+
entries.append(self._format_file_entry(file_path))
|
|
143
|
+
return entries
|
|
144
|
+
|
|
145
|
+
seen_dirs: set[str] = set()
|
|
146
|
+
for file_path in sorted(self._files):
|
|
147
|
+
if not (file_path == key or file_path.startswith(prefix)):
|
|
148
|
+
continue
|
|
149
|
+
remainder = file_path[len(prefix) :]
|
|
150
|
+
if remainder == "":
|
|
151
|
+
entries.append(self._format_file_entry(file_path))
|
|
152
|
+
continue
|
|
153
|
+
head, _, tail = remainder.partition("/")
|
|
154
|
+
if tail:
|
|
155
|
+
dir_path = head if key == "." else f"{key}/{head}"
|
|
156
|
+
if dir_path not in seen_dirs:
|
|
157
|
+
entries.append(
|
|
158
|
+
{"path": dir_path, "type": "directory", "size": None}
|
|
159
|
+
)
|
|
160
|
+
seen_dirs.add(dir_path)
|
|
161
|
+
else:
|
|
162
|
+
entries.append(self._format_file_entry(file_path))
|
|
163
|
+
return entries
|
|
164
|
+
|
|
165
|
+
def grep(self, pattern: str, path: str | None, limit: int) -> list[dict[str, Any]]:
|
|
166
|
+
regex = re.compile(pattern)
|
|
167
|
+
key = _normalize_path(path, allow_root=True) if path is not None else "."
|
|
168
|
+
prefix = "" if key == "." else f"{key}/"
|
|
169
|
+
matches: list[dict[str, Any]] = []
|
|
170
|
+
|
|
171
|
+
for file_path in sorted(self._files):
|
|
172
|
+
if not (file_path == key or file_path.startswith(prefix)):
|
|
173
|
+
continue
|
|
174
|
+
content = self._files[file_path]
|
|
175
|
+
for line_no, line in enumerate(content.splitlines(), start=1):
|
|
176
|
+
if regex.search(line):
|
|
177
|
+
matches.append(
|
|
178
|
+
{"path": file_path, "line": line_no, "text": line.strip()}
|
|
179
|
+
)
|
|
180
|
+
if len(matches) >= limit:
|
|
181
|
+
return matches
|
|
182
|
+
if (
|
|
183
|
+
key != "."
|
|
184
|
+
and key not in self._files
|
|
185
|
+
and not any(p.startswith(prefix) for p in self._files)
|
|
186
|
+
):
|
|
187
|
+
raise FileNotFoundError(f"{key} does not exist")
|
|
188
|
+
return matches
|
|
189
|
+
|
|
190
|
+
def _format_file_entry(self, path: str) -> dict[str, Any]:
|
|
191
|
+
content = self._files[path]
|
|
192
|
+
if content == "":
|
|
193
|
+
line_count = 0
|
|
194
|
+
else:
|
|
195
|
+
line_count = content.count("\n") + (0 if content.endswith("\n") else 1)
|
|
196
|
+
return {
|
|
197
|
+
"path": path,
|
|
198
|
+
"type": "file",
|
|
199
|
+
"size": len(content),
|
|
200
|
+
"line_count": max(line_count, 0),
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
FsCommand = Literal[
|
|
205
|
+
"read_file",
|
|
206
|
+
"write_file",
|
|
207
|
+
"delete_path",
|
|
208
|
+
"list_dir",
|
|
209
|
+
"grep",
|
|
210
|
+
"apply_patch",
|
|
211
|
+
]
|
|
212
|
+
ALL_COMMANDS: tuple[FsCommand, ...] = (
|
|
213
|
+
"read_file",
|
|
214
|
+
"write_file",
|
|
215
|
+
"delete_path",
|
|
216
|
+
"list_dir",
|
|
217
|
+
"grep",
|
|
218
|
+
"apply_patch",
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class ApplyPatchOperation(BaseModel):
|
|
223
|
+
"""Subset of OpenAI apply_patch operation payload."""
|
|
224
|
+
|
|
225
|
+
type: Literal["create_file", "update_file", "delete_file"] = Field(
|
|
226
|
+
description="Type of patch operation to perform."
|
|
227
|
+
)
|
|
228
|
+
path: str = Field(description="Path to the file being modified.")
|
|
229
|
+
diff: str | None = Field(
|
|
230
|
+
default=None,
|
|
231
|
+
description="V4A diff to apply for create/update operations.",
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
@property
|
|
235
|
+
def requires_diff(self) -> bool:
|
|
236
|
+
return self.type in {"create_file", "update_file"}
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class FilesystemParams(BaseModel):
|
|
240
|
+
"""Schema describing filesystem tool calls."""
|
|
241
|
+
|
|
242
|
+
command: FsCommand = Field(
|
|
243
|
+
description="Filesystem operation to perform (read_file, write_file, delete_path, list_dir, grep)"
|
|
244
|
+
)
|
|
245
|
+
path: Optional[str] = Field(
|
|
246
|
+
default=None,
|
|
247
|
+
description="Path to operate on, relative to workspace root. Use '.' for the root directory.",
|
|
248
|
+
)
|
|
249
|
+
start_line: Optional[int] = Field(
|
|
250
|
+
default=None,
|
|
251
|
+
description="1-based inclusive start line when reading a file. Leave unset to read from the beginning.",
|
|
252
|
+
ge=1,
|
|
253
|
+
)
|
|
254
|
+
end_line: Optional[int] = Field(
|
|
255
|
+
default=None,
|
|
256
|
+
description="1-based inclusive end line when reading a file. Leave unset to read through the end.",
|
|
257
|
+
ge=1,
|
|
258
|
+
)
|
|
259
|
+
content: Optional[str] = Field(
|
|
260
|
+
default=None,
|
|
261
|
+
description="Content to write when using write_file.",
|
|
262
|
+
)
|
|
263
|
+
mode: Optional[Literal["overwrite", "append", "create_if_missing"]] = Field(
|
|
264
|
+
default="overwrite",
|
|
265
|
+
description="How to write content. Overwrite replaces the file, append adds to the end, create_if_missing leaves existing files untouched.",
|
|
266
|
+
)
|
|
267
|
+
recursive: Optional[bool] = Field(
|
|
268
|
+
default=None,
|
|
269
|
+
description="When listing directories, set to true to recurse.",
|
|
270
|
+
)
|
|
271
|
+
pattern: Optional[str] = Field(
|
|
272
|
+
default=None,
|
|
273
|
+
description="Regular expression pattern to search for when using grep.",
|
|
274
|
+
)
|
|
275
|
+
max_results: Optional[int] = Field(
|
|
276
|
+
default=50,
|
|
277
|
+
description="Maximum number of grep matches to return.",
|
|
278
|
+
ge=1,
|
|
279
|
+
)
|
|
280
|
+
operation: ApplyPatchOperation | None = Field(
|
|
281
|
+
default=None,
|
|
282
|
+
description=(
|
|
283
|
+
"When using command='apply_patch', include an operation matching the "
|
|
284
|
+
"OpenAI apply_patch_call payload (type, path, diff)."
|
|
285
|
+
),
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class FilesystemManager:
|
|
290
|
+
"""Expose a TodoManager-style tool for interacting with a workspace."""
|
|
291
|
+
|
|
292
|
+
def __init__(
|
|
293
|
+
self,
|
|
294
|
+
backend: WorkspaceBackend | None = None,
|
|
295
|
+
*,
|
|
296
|
+
tool_name: str = "filesystem",
|
|
297
|
+
):
|
|
298
|
+
self.backend = backend or InMemoryWorkspaceBackend()
|
|
299
|
+
self.tool_name = tool_name
|
|
300
|
+
self._tool_cache: dict[tuple[str, ...], list[Tool]] = {}
|
|
301
|
+
|
|
302
|
+
def _handle_read(
|
|
303
|
+
self, path: str, start_line: Optional[int], end_line: Optional[int]
|
|
304
|
+
) -> dict[str, Any]:
|
|
305
|
+
content = self.backend.read_file(path)
|
|
306
|
+
total_lines = len(content.splitlines()) or (0 if content == "" else 1)
|
|
307
|
+
start = start_line or 1
|
|
308
|
+
end = end_line or total_lines
|
|
309
|
+
if end < start:
|
|
310
|
+
if not (total_lines == 0 and end_line is None and start == 1):
|
|
311
|
+
raise ValueError("end_line must be greater than or equal to start_line")
|
|
312
|
+
|
|
313
|
+
if start == 1 and end >= total_lines:
|
|
314
|
+
snippet = content
|
|
315
|
+
else:
|
|
316
|
+
lines = content.splitlines()
|
|
317
|
+
snippet = "\n".join(lines[start - 1 : end])
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
"path": path,
|
|
321
|
+
"start_line": start,
|
|
322
|
+
"end_line": end,
|
|
323
|
+
"content": snippet,
|
|
324
|
+
"total_lines": total_lines,
|
|
325
|
+
"character_count": len(content),
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
def _handle_write(
|
|
329
|
+
self, path: str, content: str, mode: Optional[str]
|
|
330
|
+
) -> dict[str, Any]:
|
|
331
|
+
write_mode = mode or "overwrite"
|
|
332
|
+
if write_mode == "overwrite":
|
|
333
|
+
self.backend.write_file(path, content, overwrite=True)
|
|
334
|
+
elif write_mode == "append":
|
|
335
|
+
self.backend.append_file(path, content)
|
|
336
|
+
elif write_mode == "create_if_missing":
|
|
337
|
+
try:
|
|
338
|
+
self.backend.write_file(path, content, overwrite=False)
|
|
339
|
+
except FileExistsError:
|
|
340
|
+
pass
|
|
341
|
+
else:
|
|
342
|
+
raise ValueError(f"Unsupported write mode: {write_mode}")
|
|
343
|
+
return {"path": path, "status": "ok", "mode": write_mode}
|
|
344
|
+
|
|
345
|
+
def _handle_delete(self, path: str) -> dict[str, Any]:
|
|
346
|
+
self.backend.delete_path(path)
|
|
347
|
+
return {"path": path, "status": "ok"}
|
|
348
|
+
|
|
349
|
+
def _handle_list(
|
|
350
|
+
self, path: Optional[str], recursive: Optional[bool]
|
|
351
|
+
) -> dict[str, Any]:
|
|
352
|
+
listing = self.backend.list_dir(path or ".", recursive=bool(recursive))
|
|
353
|
+
return {"path": path or ".", "recursive": bool(recursive), "entries": listing}
|
|
354
|
+
|
|
355
|
+
def _handle_grep(
|
|
356
|
+
self, pattern: str, path: Optional[str], limit: Optional[int]
|
|
357
|
+
) -> dict[str, Any]:
|
|
358
|
+
max_results = limit or 50
|
|
359
|
+
matches = self.backend.grep(pattern, path=path, limit=max_results)
|
|
360
|
+
return {
|
|
361
|
+
"pattern": pattern,
|
|
362
|
+
"path": path,
|
|
363
|
+
"max_results": max_results,
|
|
364
|
+
"matches": matches,
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
def _handle_apply_patch(self, operation: ApplyPatchOperation) -> dict[str, Any]:
|
|
368
|
+
if operation.requires_diff and not operation.diff:
|
|
369
|
+
raise ValueError("diff is required for create_file and update_file")
|
|
370
|
+
|
|
371
|
+
if operation.type == "delete_file":
|
|
372
|
+
self.backend.delete_path(operation.path)
|
|
373
|
+
return {"path": operation.path, "operation": "delete_file"}
|
|
374
|
+
|
|
375
|
+
assert operation.diff is not None # for type checkers
|
|
376
|
+
if operation.type == "create_file":
|
|
377
|
+
new_content = apply_diff("", operation.diff, mode="create")
|
|
378
|
+
self.backend.write_file(operation.path, new_content, overwrite=False)
|
|
379
|
+
return {"path": operation.path, "operation": "create_file"}
|
|
380
|
+
|
|
381
|
+
if operation.type == "update_file":
|
|
382
|
+
current = self.backend.read_file(operation.path)
|
|
383
|
+
new_content = apply_diff(current, operation.diff, mode="default")
|
|
384
|
+
self.backend.write_file(operation.path, new_content, overwrite=True)
|
|
385
|
+
return {"path": operation.path, "operation": "update_file"}
|
|
386
|
+
|
|
387
|
+
raise ValueError(f"Unsupported patch operation: {operation.type}")
|
|
388
|
+
|
|
389
|
+
def _filesystem_tool(self, allowed_commands: set[FsCommand], **kwargs: Any) -> str:
|
|
390
|
+
params = FilesystemParams.model_validate(kwargs)
|
|
391
|
+
|
|
392
|
+
try:
|
|
393
|
+
if params.command not in allowed_commands:
|
|
394
|
+
raise ValueError(
|
|
395
|
+
f"The '{params.command}' command is disabled for this tool instance"
|
|
396
|
+
)
|
|
397
|
+
if params.command == "read_file":
|
|
398
|
+
if not params.path:
|
|
399
|
+
raise ValueError("path is required for read_file")
|
|
400
|
+
result = self._handle_read(
|
|
401
|
+
params.path, params.start_line, params.end_line
|
|
402
|
+
)
|
|
403
|
+
elif params.command == "write_file":
|
|
404
|
+
if params.path is None or params.content is None:
|
|
405
|
+
raise ValueError("path and content are required for write_file")
|
|
406
|
+
result = self._handle_write(params.path, params.content, params.mode)
|
|
407
|
+
elif params.command == "delete_path":
|
|
408
|
+
if not params.path:
|
|
409
|
+
raise ValueError("path is required for delete_path")
|
|
410
|
+
result = self._handle_delete(params.path)
|
|
411
|
+
elif params.command == "list_dir":
|
|
412
|
+
result = self._handle_list(params.path, params.recursive)
|
|
413
|
+
elif params.command == "grep":
|
|
414
|
+
if not params.pattern:
|
|
415
|
+
raise ValueError("pattern is required for grep")
|
|
416
|
+
result = self._handle_grep(
|
|
417
|
+
params.pattern, params.path, params.max_results
|
|
418
|
+
)
|
|
419
|
+
elif params.command == "apply_patch":
|
|
420
|
+
if params.operation is None:
|
|
421
|
+
raise ValueError("operation is required for apply_patch")
|
|
422
|
+
result = self._handle_apply_patch(params.operation)
|
|
423
|
+
else:
|
|
424
|
+
raise ValueError(f"Unknown command: {params.command}")
|
|
425
|
+
return json.dumps({"ok": True, "result": result}, indent=2)
|
|
426
|
+
except Exception as exc:
|
|
427
|
+
return json.dumps(
|
|
428
|
+
{"ok": False, "error": type(exc).__name__, "message": str(exc)},
|
|
429
|
+
indent=2,
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
def get_tools(self, *, exclude: Iterable[FsCommand] | None = None) -> list[Tool]:
|
|
433
|
+
exclude_set = set(exclude or [])
|
|
434
|
+
unknown = exclude_set.difference(ALL_COMMANDS)
|
|
435
|
+
if unknown:
|
|
436
|
+
raise ValueError(f"Unknown commands in exclude list: {sorted(unknown)}")
|
|
437
|
+
|
|
438
|
+
allowed = tuple(cmd for cmd in ALL_COMMANDS if cmd not in exclude_set)
|
|
439
|
+
if not allowed:
|
|
440
|
+
raise ValueError("Cannot exclude every filesystem command")
|
|
441
|
+
|
|
442
|
+
cache_key = allowed
|
|
443
|
+
if cache_key in self._tool_cache:
|
|
444
|
+
return self._tool_cache[cache_key]
|
|
445
|
+
|
|
446
|
+
allowed_set = set(allowed)
|
|
447
|
+
schema = FilesystemParams.model_json_schema(ref_template="#/$defs/{model}")
|
|
448
|
+
if (
|
|
449
|
+
"properties" in schema
|
|
450
|
+
and "command" in schema["properties"]
|
|
451
|
+
and isinstance(schema["properties"]["command"], dict)
|
|
452
|
+
):
|
|
453
|
+
schema["properties"]["command"]["enum"] = list(allowed)
|
|
454
|
+
|
|
455
|
+
tool = Tool(
|
|
456
|
+
name=self.tool_name,
|
|
457
|
+
description=FS_DESCRIPTION,
|
|
458
|
+
parameters=schema.get("properties", {}),
|
|
459
|
+
required=schema.get("required", []),
|
|
460
|
+
definitions=schema.get("$defs"),
|
|
461
|
+
run=partial(self._filesystem_tool, allowed_set), # type: ignore
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
self._tool_cache[cache_key] = [tool]
|
|
465
|
+
return [tool]
|
|
466
|
+
|
|
467
|
+
def dump(self, destination: str | os.PathLike[str]) -> list[str]:
|
|
468
|
+
"""Copy the virtual workspace to the given filesystem directory."""
|
|
469
|
+
target_root = Path(destination)
|
|
470
|
+
if target_root.exists() and not target_root.is_dir():
|
|
471
|
+
raise NotADirectoryError(f"{target_root} exists and is not a directory")
|
|
472
|
+
target_root.mkdir(parents=True, exist_ok=True)
|
|
473
|
+
|
|
474
|
+
entries = self.backend.list_dir(".", recursive=True)
|
|
475
|
+
written: list[str] = []
|
|
476
|
+
|
|
477
|
+
for entry in entries:
|
|
478
|
+
if entry.get("type") != "file":
|
|
479
|
+
continue
|
|
480
|
+
rel_path = entry["path"]
|
|
481
|
+
destination_path = target_root.joinpath(*rel_path.split("/"))
|
|
482
|
+
destination_path.parent.mkdir(parents=True, exist_ok=True)
|
|
483
|
+
destination_path.write_text(self.backend.read_file(rel_path))
|
|
484
|
+
written.append(rel_path)
|
|
485
|
+
|
|
486
|
+
return sorted(written)
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
ApplyDiffMode = Literal["default", "create"]
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
@dataclass
|
|
493
|
+
class Chunk:
|
|
494
|
+
orig_index: int
|
|
495
|
+
del_lines: list[str]
|
|
496
|
+
ins_lines: list[str]
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
@dataclass
|
|
500
|
+
class ParserState:
|
|
501
|
+
lines: list[str]
|
|
502
|
+
index: int = 0
|
|
503
|
+
fuzz: int = 0
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
@dataclass
|
|
507
|
+
class ParsedUpdateDiff:
|
|
508
|
+
chunks: list[Chunk]
|
|
509
|
+
fuzz: int
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
@dataclass
|
|
513
|
+
class ReadSectionResult:
|
|
514
|
+
next_context: list[str]
|
|
515
|
+
section_chunks: list[Chunk]
|
|
516
|
+
end_index: int
|
|
517
|
+
eof: bool
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
END_PATCH = "*** End Patch"
|
|
521
|
+
END_FILE = "*** End of File"
|
|
522
|
+
SECTION_TERMINATORS = [
|
|
523
|
+
END_PATCH,
|
|
524
|
+
"*** Update File:",
|
|
525
|
+
"*** Delete File:",
|
|
526
|
+
"*** Add File:",
|
|
527
|
+
]
|
|
528
|
+
END_SECTION_MARKERS = [*SECTION_TERMINATORS, END_FILE]
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def apply_diff(input_text: str, diff: str, mode: ApplyDiffMode = "default") -> str:
|
|
532
|
+
"""Apply a V4A diff to the provided text."""
|
|
533
|
+
|
|
534
|
+
diff_lines = _normalize_diff_lines(diff)
|
|
535
|
+
if mode == "create":
|
|
536
|
+
return _parse_create_diff(diff_lines)
|
|
537
|
+
|
|
538
|
+
parsed = _parse_update_diff(diff_lines, input_text)
|
|
539
|
+
return _apply_chunks(input_text, parsed.chunks)
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def _normalize_diff_lines(diff: str) -> list[str]:
|
|
543
|
+
lines = [line.rstrip("\r") for line in re.split(r"\r?\n", diff)]
|
|
544
|
+
if lines and lines[-1] == "":
|
|
545
|
+
lines.pop()
|
|
546
|
+
return lines
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def _is_done(state: ParserState, prefixes: Sequence[str]) -> bool:
|
|
550
|
+
if state.index >= len(state.lines):
|
|
551
|
+
return True
|
|
552
|
+
if any(state.lines[state.index].startswith(prefix) for prefix in prefixes):
|
|
553
|
+
return True
|
|
554
|
+
return False
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def _read_str(state: ParserState, prefix: str) -> str:
|
|
558
|
+
if state.index >= len(state.lines):
|
|
559
|
+
return ""
|
|
560
|
+
current = state.lines[state.index]
|
|
561
|
+
if current.startswith(prefix):
|
|
562
|
+
state.index += 1
|
|
563
|
+
return current[len(prefix) :]
|
|
564
|
+
return ""
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
def _parse_create_diff(lines: list[str]) -> str:
|
|
568
|
+
parser = ParserState(lines=[*lines, END_PATCH])
|
|
569
|
+
output: list[str] = []
|
|
570
|
+
|
|
571
|
+
while not _is_done(parser, SECTION_TERMINATORS):
|
|
572
|
+
if parser.index >= len(parser.lines):
|
|
573
|
+
break
|
|
574
|
+
line = parser.lines[parser.index]
|
|
575
|
+
parser.index += 1
|
|
576
|
+
if not line.startswith("+"):
|
|
577
|
+
raise ValueError(f"Invalid Add File Line: {line}")
|
|
578
|
+
output.append(line[1:])
|
|
579
|
+
|
|
580
|
+
return "\n".join(output)
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def _parse_update_diff(lines: list[str], input_text: str) -> ParsedUpdateDiff:
|
|
584
|
+
parser = ParserState(lines=[*lines, END_PATCH])
|
|
585
|
+
input_lines = input_text.split("\n")
|
|
586
|
+
chunks: list[Chunk] = []
|
|
587
|
+
cursor = 0
|
|
588
|
+
|
|
589
|
+
while not _is_done(parser, END_SECTION_MARKERS):
|
|
590
|
+
anchor = _read_str(parser, "@@ ")
|
|
591
|
+
has_bare_anchor = (
|
|
592
|
+
anchor == ""
|
|
593
|
+
and parser.index < len(parser.lines)
|
|
594
|
+
and parser.lines[parser.index] == "@@"
|
|
595
|
+
)
|
|
596
|
+
if has_bare_anchor:
|
|
597
|
+
parser.index += 1
|
|
598
|
+
|
|
599
|
+
if not (anchor or has_bare_anchor or cursor == 0):
|
|
600
|
+
current_line = (
|
|
601
|
+
parser.lines[parser.index] if parser.index < len(parser.lines) else ""
|
|
602
|
+
)
|
|
603
|
+
raise ValueError(f"Invalid Line:\n{current_line}")
|
|
604
|
+
|
|
605
|
+
if anchor.strip():
|
|
606
|
+
cursor = _advance_cursor_to_anchor(anchor, input_lines, cursor, parser)
|
|
607
|
+
|
|
608
|
+
section = _read_section(parser.lines, parser.index)
|
|
609
|
+
find_result = _find_context(
|
|
610
|
+
input_lines, section.next_context, cursor, section.eof
|
|
611
|
+
)
|
|
612
|
+
if find_result.new_index == -1:
|
|
613
|
+
ctx_text = "\n".join(section.next_context)
|
|
614
|
+
if section.eof:
|
|
615
|
+
raise ValueError(f"Invalid EOF Context {cursor}:\n{ctx_text}")
|
|
616
|
+
raise ValueError(f"Invalid Context {cursor}:\n{ctx_text}")
|
|
617
|
+
|
|
618
|
+
cursor = find_result.new_index + len(section.next_context)
|
|
619
|
+
parser.fuzz += find_result.fuzz
|
|
620
|
+
parser.index = section.end_index
|
|
621
|
+
|
|
622
|
+
for ch in section.section_chunks:
|
|
623
|
+
chunks.append(
|
|
624
|
+
Chunk(
|
|
625
|
+
orig_index=ch.orig_index + find_result.new_index,
|
|
626
|
+
del_lines=list(ch.del_lines),
|
|
627
|
+
ins_lines=list(ch.ins_lines),
|
|
628
|
+
)
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
return ParsedUpdateDiff(chunks=chunks, fuzz=parser.fuzz)
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
def _advance_cursor_to_anchor(
|
|
635
|
+
anchor: str,
|
|
636
|
+
input_lines: list[str],
|
|
637
|
+
cursor: int,
|
|
638
|
+
parser: ParserState,
|
|
639
|
+
) -> int:
|
|
640
|
+
found = False
|
|
641
|
+
|
|
642
|
+
if not any(line == anchor for line in input_lines[:cursor]):
|
|
643
|
+
for i in range(cursor, len(input_lines)):
|
|
644
|
+
if input_lines[i] == anchor:
|
|
645
|
+
cursor = i + 1
|
|
646
|
+
found = True
|
|
647
|
+
break
|
|
648
|
+
|
|
649
|
+
if not found and not any(
|
|
650
|
+
line.strip() == anchor.strip() for line in input_lines[:cursor]
|
|
651
|
+
):
|
|
652
|
+
for i in range(cursor, len(input_lines)):
|
|
653
|
+
if input_lines[i].strip() == anchor.strip():
|
|
654
|
+
cursor = i + 1
|
|
655
|
+
parser.fuzz += 1
|
|
656
|
+
found = True
|
|
657
|
+
break
|
|
658
|
+
|
|
659
|
+
return cursor
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def _read_section(lines: list[str], start_index: int) -> ReadSectionResult:
|
|
663
|
+
context: list[str] = []
|
|
664
|
+
del_lines: list[str] = []
|
|
665
|
+
ins_lines: list[str] = []
|
|
666
|
+
section_chunks: list[Chunk] = []
|
|
667
|
+
mode: Literal["keep", "add", "delete"] = "keep"
|
|
668
|
+
index = start_index
|
|
669
|
+
orig_index = index
|
|
670
|
+
|
|
671
|
+
while index < len(lines):
|
|
672
|
+
raw = lines[index]
|
|
673
|
+
if (
|
|
674
|
+
raw.startswith("@@")
|
|
675
|
+
or raw.startswith(END_PATCH)
|
|
676
|
+
or raw.startswith("*** Update File:")
|
|
677
|
+
or raw.startswith("*** Delete File:")
|
|
678
|
+
or raw.startswith("*** Add File:")
|
|
679
|
+
or raw.startswith(END_FILE)
|
|
680
|
+
):
|
|
681
|
+
break
|
|
682
|
+
if raw == "***":
|
|
683
|
+
break
|
|
684
|
+
if raw.startswith("***"):
|
|
685
|
+
raise ValueError(f"Invalid Line: {raw}")
|
|
686
|
+
|
|
687
|
+
index += 1
|
|
688
|
+
last_mode = mode
|
|
689
|
+
line = raw if raw else " "
|
|
690
|
+
prefix = line[0]
|
|
691
|
+
if prefix == "+":
|
|
692
|
+
mode = "add"
|
|
693
|
+
elif prefix == "-":
|
|
694
|
+
mode = "delete"
|
|
695
|
+
elif prefix == " ":
|
|
696
|
+
mode = "keep"
|
|
697
|
+
else:
|
|
698
|
+
raise ValueError(f"Invalid Line: {line}")
|
|
699
|
+
|
|
700
|
+
line_content = line[1:]
|
|
701
|
+
switching_to_context = mode == "keep" and last_mode != mode
|
|
702
|
+
if switching_to_context and (del_lines or ins_lines):
|
|
703
|
+
section_chunks.append(
|
|
704
|
+
Chunk(
|
|
705
|
+
orig_index=len(context) - len(del_lines),
|
|
706
|
+
del_lines=list(del_lines),
|
|
707
|
+
ins_lines=list(ins_lines),
|
|
708
|
+
)
|
|
709
|
+
)
|
|
710
|
+
del_lines = []
|
|
711
|
+
ins_lines = []
|
|
712
|
+
|
|
713
|
+
if mode == "delete":
|
|
714
|
+
del_lines.append(line_content)
|
|
715
|
+
context.append(line_content)
|
|
716
|
+
elif mode == "add":
|
|
717
|
+
ins_lines.append(line_content)
|
|
718
|
+
else:
|
|
719
|
+
context.append(line_content)
|
|
720
|
+
|
|
721
|
+
if del_lines or ins_lines:
|
|
722
|
+
section_chunks.append(
|
|
723
|
+
Chunk(
|
|
724
|
+
orig_index=len(context) - len(del_lines),
|
|
725
|
+
del_lines=list(del_lines),
|
|
726
|
+
ins_lines=list(ins_lines),
|
|
727
|
+
)
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
if index < len(lines) and lines[index] == END_FILE:
|
|
731
|
+
return ReadSectionResult(context, section_chunks, index + 1, True)
|
|
732
|
+
|
|
733
|
+
if index == orig_index:
|
|
734
|
+
next_line = lines[index] if index < len(lines) else ""
|
|
735
|
+
raise ValueError(f"Nothing in this section - index={index} {next_line}")
|
|
736
|
+
|
|
737
|
+
return ReadSectionResult(context, section_chunks, index, False)
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
@dataclass
|
|
741
|
+
class ContextMatch:
|
|
742
|
+
new_index: int
|
|
743
|
+
fuzz: int
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
def _find_context(
|
|
747
|
+
lines: list[str], context: list[str], start: int, eof: bool
|
|
748
|
+
) -> ContextMatch:
|
|
749
|
+
if eof:
|
|
750
|
+
end_start = max(0, len(lines) - len(context))
|
|
751
|
+
end_match = _find_context_core(lines, context, end_start)
|
|
752
|
+
if end_match.new_index != -1:
|
|
753
|
+
return end_match
|
|
754
|
+
fallback = _find_context_core(lines, context, start)
|
|
755
|
+
return ContextMatch(new_index=fallback.new_index, fuzz=fallback.fuzz + 10000)
|
|
756
|
+
return _find_context_core(lines, context, start)
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def _find_context_core(
|
|
760
|
+
lines: list[str], context: list[str], start: int
|
|
761
|
+
) -> ContextMatch:
|
|
762
|
+
if not context:
|
|
763
|
+
return ContextMatch(new_index=start, fuzz=0)
|
|
764
|
+
|
|
765
|
+
for i in range(start, len(lines)):
|
|
766
|
+
if _equals_slice(lines, context, i, lambda value: value):
|
|
767
|
+
return ContextMatch(new_index=i, fuzz=0)
|
|
768
|
+
for i in range(start, len(lines)):
|
|
769
|
+
if _equals_slice(lines, context, i, lambda value: value.rstrip()):
|
|
770
|
+
return ContextMatch(new_index=i, fuzz=1)
|
|
771
|
+
for i in range(start, len(lines)):
|
|
772
|
+
if _equals_slice(lines, context, i, lambda value: value.strip()):
|
|
773
|
+
return ContextMatch(new_index=i, fuzz=100)
|
|
774
|
+
|
|
775
|
+
return ContextMatch(new_index=-1, fuzz=0)
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
def _equals_slice(
|
|
779
|
+
source: list[str], target: list[str], start: int, map_fn: Callable[[str], str]
|
|
780
|
+
) -> bool:
|
|
781
|
+
if start + len(target) > len(source):
|
|
782
|
+
return False
|
|
783
|
+
for offset, target_value in enumerate(target):
|
|
784
|
+
if map_fn(source[start + offset]) != map_fn(target_value):
|
|
785
|
+
return False
|
|
786
|
+
return True
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def _apply_chunks(input_text: str, chunks: list[Chunk]) -> str:
|
|
790
|
+
orig_lines = input_text.split("\n")
|
|
791
|
+
dest_lines: list[str] = []
|
|
792
|
+
cursor = 0
|
|
793
|
+
|
|
794
|
+
for chunk in chunks:
|
|
795
|
+
if chunk.orig_index > len(orig_lines):
|
|
796
|
+
raise ValueError(
|
|
797
|
+
f"apply_diff: chunk.origIndex {chunk.orig_index} > input length {len(orig_lines)}"
|
|
798
|
+
)
|
|
799
|
+
if cursor > chunk.orig_index:
|
|
800
|
+
raise ValueError(
|
|
801
|
+
f"apply_diff: overlapping chunk at {chunk.orig_index} (cursor {cursor})"
|
|
802
|
+
)
|
|
803
|
+
|
|
804
|
+
dest_lines.extend(orig_lines[cursor : chunk.orig_index])
|
|
805
|
+
cursor = chunk.orig_index
|
|
806
|
+
|
|
807
|
+
if chunk.ins_lines:
|
|
808
|
+
dest_lines.extend(chunk.ins_lines)
|
|
809
|
+
|
|
810
|
+
cursor += len(chunk.del_lines)
|
|
811
|
+
|
|
812
|
+
dest_lines.extend(orig_lines[cursor:])
|
|
813
|
+
return "\n".join(dest_lines)
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
__all__ = [
|
|
817
|
+
"FilesystemManager",
|
|
818
|
+
"FilesystemParams",
|
|
819
|
+
"InMemoryWorkspaceBackend",
|
|
820
|
+
"WorkspaceBackend",
|
|
821
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.78
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -301,5 +301,6 @@ The `lm_deluge.llm_tools` package exposes a few helper functions:
|
|
|
301
301
|
- `extract` – structure text or images into a Pydantic model based on a schema.
|
|
302
302
|
- `translate` – translate a list of strings to English.
|
|
303
303
|
- `score_llm` – simple yes/no style scoring with optional log probability output.
|
|
304
|
+
- `FilesystemManager` – expose a sandboxed read/write filesystem tool (with optional regex search and `apply_patch` support) that agents can call without touching the host machine.
|
|
304
305
|
|
|
305
306
|
Experimental embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) clients are also provided.
|
|
@@ -40,7 +40,7 @@ lm_deluge/built_in_tools/anthropic/editor.py,sha256=DyC_DrHVTm1khU9QDL39vBuhu4tO
|
|
|
40
40
|
lm_deluge/llm_tools/__init__.py,sha256=fMBsM6cGNxmv0YHZHZ79DGrfn3XYmiucvfgtxS47Ii8,433
|
|
41
41
|
lm_deluge/llm_tools/classify.py,sha256=OdMwV5u4XoPlVhjOHX0sng5KPBIKFJmQeOE2fmnPgLU,21
|
|
42
42
|
lm_deluge/llm_tools/extract.py,sha256=p61JW8yv5gQxPp4P8Hkm90ERgfD_Ek5IABzjIIlX-M0,4631
|
|
43
|
-
lm_deluge/llm_tools/filesystem.py,sha256=
|
|
43
|
+
lm_deluge/llm_tools/filesystem.py,sha256=Uy0lQ2Ecx5Cvqv0Sr3r_PEw8gBGZ21VAov5dg2knKfk,27942
|
|
44
44
|
lm_deluge/llm_tools/locate.py,sha256=lYNbKTmy9dTvj0lEQkOQ7yrxyqsgYzjD0C_byJKI_4w,6271
|
|
45
45
|
lm_deluge/llm_tools/ocr.py,sha256=7fDlvs6uUOvbxMasvGGNJx5Fj6biM6z3lijKZaGN26k,23
|
|
46
46
|
lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
|
|
@@ -73,8 +73,8 @@ lm_deluge/util/schema.py,sha256=q6uwhA4s1lM2dHT1Kwc46E7OY1VecMOtTEI0PTFn6tA,1320
|
|
|
73
73
|
lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
|
|
74
74
|
lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
|
|
75
75
|
lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
|
|
76
|
-
lm_deluge-0.0.
|
|
77
|
-
lm_deluge-0.0.
|
|
78
|
-
lm_deluge-0.0.
|
|
79
|
-
lm_deluge-0.0.
|
|
80
|
-
lm_deluge-0.0.
|
|
76
|
+
lm_deluge-0.0.78.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
|
|
77
|
+
lm_deluge-0.0.78.dist-info/METADATA,sha256=VaCsZgSsbc2yr0Wm8Q5Gw_9UKMRm-KkKkVQ7rS_p5-M,13697
|
|
78
|
+
lm_deluge-0.0.78.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
79
|
+
lm_deluge-0.0.78.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
|
|
80
|
+
lm_deluge-0.0.78.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|