deepagents 0.1.4__py3-none-any.whl → 0.1.5rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,452 @@
1
+ """FilesystemBackend: Read and write files directly from the filesystem.
2
+
3
+ Security and search upgrades:
4
+ - Secure path resolution with root containment when in virtual_mode (sandboxed to cwd)
5
+ - Prevent symlink-following on file I/O using O_NOFOLLOW when available
6
+ - Ripgrep-powered grep with JSON parsing, plus Python fallback with regex
7
+ and optional glob include filtering, while preserving virtual path behavior
8
+ """
9
+
10
+ import os
11
+ import re
12
+ import json
13
+ import subprocess
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+ from typing import Any, Optional, TYPE_CHECKING
17
+
18
+ if TYPE_CHECKING:
19
+ from langchain.tools import ToolRuntime
20
+
21
+ from .utils import (
22
+ check_empty_content,
23
+ format_content_with_line_numbers,
24
+ perform_string_replacement,
25
+ truncate_if_too_long,
26
+ )
27
+ import wcmatch.glob as wcglob
28
+ from deepagents.backends.utils import FileInfo, GrepMatch
29
+ from deepagents.backends.protocol import WriteResult, EditResult
30
+
31
+
32
+
33
+ class FilesystemBackend:
34
+ """Backend that reads and writes files directly from the filesystem.
35
+
36
+ Files are accessed using their actual filesystem paths. Relative paths are
37
+ resolved relative to the current working directory. Content is read/written
38
+ as plain text, and metadata (timestamps) are derived from filesystem stats.
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ root_dir: Optional[str | Path] = None,
44
+ virtual_mode: bool = False,
45
+ max_file_size_mb: int = 10,
46
+ ) -> None:
47
+ """Initialize filesystem backend.
48
+
49
+ Args:
50
+ root_dir: Optional root directory for file operations. If provided,
51
+ all file paths will be resolved relative to this directory.
52
+ If not provided, uses the current working directory.
53
+ """
54
+ self.cwd = Path(root_dir) if root_dir else Path.cwd()
55
+ self.virtual_mode = virtual_mode
56
+ self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
57
+
58
+ def _resolve_path(self, key: str) -> Path:
59
+ """Resolve a file path with security checks.
60
+
61
+ When virtual_mode=True, treat incoming paths as virtual absolute paths under
62
+ self.cwd, disallow traversal (.., ~) and ensure resolved path stays within root.
63
+ When virtual_mode=False, preserve legacy behavior: absolute paths are allowed
64
+ as-is; relative paths resolve under cwd.
65
+
66
+ Args:
67
+ key: File path (absolute, relative, or virtual when virtual_mode=True)
68
+
69
+ Returns:
70
+ Resolved absolute Path object
71
+ """
72
+ if self.virtual_mode:
73
+ vpath = key if key.startswith("/") else "/" + key
74
+ if ".." in vpath or vpath.startswith("~"):
75
+ raise ValueError("Path traversal not allowed")
76
+ full = (self.cwd / vpath.lstrip("/")).resolve()
77
+ try:
78
+ full.relative_to(self.cwd)
79
+ except ValueError:
80
+ raise ValueError(f"Path outside root directory: {key}") from None
81
+ return full
82
+
83
+ path = Path(key)
84
+ if path.is_absolute():
85
+ return path
86
+ return (self.cwd / path).resolve()
87
+
88
+ def ls_info(self, path: str) -> list[FileInfo]:
89
+ """List files from filesystem.
90
+
91
+ Args:
92
+ path: Absolute directory path to list files from.
93
+
94
+ Returns:
95
+ List of FileInfo-like dicts.
96
+ """
97
+ dir_path = self._resolve_path(path)
98
+ if not dir_path.exists() or not dir_path.is_dir():
99
+ return []
100
+
101
+ results: list[FileInfo] = []
102
+
103
+ # Convert cwd to string for comparison
104
+ cwd_str = str(self.cwd)
105
+ if not cwd_str.endswith("/"):
106
+ cwd_str += "/"
107
+
108
+ # Walk the directory tree
109
+ try:
110
+ for path in dir_path.rglob("*"):
111
+ try:
112
+ is_file = path.is_file()
113
+ except OSError:
114
+ continue
115
+ if is_file:
116
+ abs_path = str(path)
117
+ if not self.virtual_mode:
118
+ try:
119
+ st = path.stat()
120
+ results.append({
121
+ "path": abs_path,
122
+ "is_dir": False,
123
+ "size": int(st.st_size),
124
+ "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
125
+ })
126
+ except OSError:
127
+ results.append({"path": abs_path, "is_dir": False})
128
+ continue
129
+ # Strip the cwd prefix if present
130
+ if abs_path.startswith(cwd_str):
131
+ relative_path = abs_path[len(cwd_str):]
132
+ elif abs_path.startswith(str(self.cwd)):
133
+ # Handle case where cwd doesn't end with /
134
+ relative_path = abs_path[len(str(self.cwd)):].lstrip("/")
135
+ else:
136
+ # Path is outside cwd, return as-is or skip
137
+ relative_path = abs_path
138
+
139
+ virt_path = "/" + relative_path
140
+ try:
141
+ st = path.stat()
142
+ results.append({
143
+ "path": virt_path,
144
+ "is_dir": False,
145
+ "size": int(st.st_size),
146
+ "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
147
+ })
148
+ except OSError:
149
+ results.append({"path": virt_path, "is_dir": False})
150
+ except (OSError, PermissionError):
151
+ pass
152
+
153
+ # Keep deterministic order by path
154
+ results.sort(key=lambda x: x.get("path", ""))
155
+ return results
156
+
157
+ # Removed legacy ls() convenience to keep lean surface
158
+
159
+ def read(
160
+ self,
161
+ file_path: str,
162
+ offset: int = 0,
163
+ limit: int = 2000,
164
+ ) -> str:
165
+ """Read file content with line numbers.
166
+
167
+ Args:
168
+ file_path: Absolute or relative file path
169
+ offset: Line offset to start reading from (0-indexed)
170
+ limit: Maximum number of lines to readReturns:
171
+ Formatted file content with line numbers, or error message.
172
+ """
173
+ resolved_path = self._resolve_path(file_path)
174
+
175
+ if not resolved_path.exists() or not resolved_path.is_file():
176
+ return f"Error: File '{file_path}' not found"
177
+
178
+ try:
179
+ # Open with O_NOFOLLOW where available to avoid symlink traversal
180
+ try:
181
+ fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
182
+ with os.fdopen(fd, "r", encoding="utf-8") as f:
183
+ content = f.read()
184
+ except OSError:
185
+ # Fallback to normal open if O_NOFOLLOW unsupported or fails
186
+ with open(resolved_path, "r", encoding="utf-8") as f:
187
+ content = f.read()
188
+
189
+ empty_msg = check_empty_content(content)
190
+ if empty_msg:
191
+ return empty_msg
192
+
193
+ lines = content.splitlines()
194
+ start_idx = offset
195
+ end_idx = min(start_idx + limit, len(lines))
196
+
197
+ if start_idx >= len(lines):
198
+ return f"Error: Line offset {offset} exceeds file length ({len(lines)} lines)"
199
+
200
+ selected_lines = lines[start_idx:end_idx]
201
+ return format_content_with_line_numbers(selected_lines, start_line=start_idx + 1)
202
+ except (OSError, UnicodeDecodeError) as e:
203
+ return f"Error reading file '{file_path}': {e}"
204
+
205
+ def write(
206
+ self,
207
+ file_path: str,
208
+ content: str,
209
+ ) -> WriteResult:
210
+ """Create a new file with content.
211
+ Returns WriteResult. External storage sets files_update=None.
212
+ """
213
+ resolved_path = self._resolve_path(file_path)
214
+
215
+ if resolved_path.exists():
216
+ return WriteResult(error=f"Cannot write to {file_path} because it already exists. Read and then make an edit, or write to a new path.")
217
+
218
+ try:
219
+ # Create parent directories if needed
220
+ resolved_path.parent.mkdir(parents=True, exist_ok=True)
221
+
222
+ # Prefer O_NOFOLLOW to avoid writing through symlinks
223
+ flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
224
+ if hasattr(os, "O_NOFOLLOW"):
225
+ flags |= os.O_NOFOLLOW
226
+ fd = os.open(resolved_path, flags, 0o644)
227
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
228
+ f.write(content)
229
+
230
+ return WriteResult(path=file_path, files_update=None)
231
+ except (OSError, UnicodeEncodeError) as e:
232
+ return WriteResult(error=f"Error writing file '{file_path}': {e}")
233
+
234
+ def edit(
235
+ self,
236
+ file_path: str,
237
+ old_string: str,
238
+ new_string: str,
239
+ replace_all: bool = False,
240
+ ) -> EditResult:
241
+ """Edit a file by replacing string occurrences.
242
+ Returns EditResult. External storage sets files_update=None.
243
+ """
244
+ resolved_path = self._resolve_path(file_path)
245
+
246
+ if not resolved_path.exists() or not resolved_path.is_file():
247
+ return EditResult(error=f"Error: File '{file_path}' not found")
248
+
249
+ try:
250
+ # Read securely
251
+ try:
252
+ fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
253
+ with os.fdopen(fd, "r", encoding="utf-8") as f:
254
+ content = f.read()
255
+ except OSError:
256
+ with open(resolved_path, "r", encoding="utf-8") as f:
257
+ content = f.read()
258
+
259
+ result = perform_string_replacement(content, old_string, new_string, replace_all)
260
+
261
+ if isinstance(result, str):
262
+ return EditResult(error=result)
263
+
264
+ new_content, occurrences = result
265
+
266
+ # Write securely
267
+ flags = os.O_WRONLY | os.O_TRUNC
268
+ if hasattr(os, "O_NOFOLLOW"):
269
+ flags |= os.O_NOFOLLOW
270
+ fd = os.open(resolved_path, flags)
271
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
272
+ f.write(new_content)
273
+
274
+ return EditResult(path=file_path, files_update=None, occurrences=int(occurrences))
275
+ except (OSError, UnicodeDecodeError, UnicodeEncodeError) as e:
276
+ return EditResult(error=f"Error editing file '{file_path}': {e}")
277
+
278
+ # Removed legacy grep() convenience to keep lean surface
279
+
280
+ def grep_raw(
281
+ self,
282
+ pattern: str,
283
+ path: Optional[str] = None,
284
+ glob: Optional[str] = None,
285
+ ) -> list[GrepMatch] | str:
286
+ # Validate regex
287
+ try:
288
+ re.compile(pattern)
289
+ except re.error as e:
290
+ return f"Invalid regex pattern: {e}"
291
+
292
+ # Resolve base path
293
+ try:
294
+ base_full = self._resolve_path(path or ".")
295
+ except ValueError:
296
+ return []
297
+
298
+ if not base_full.exists():
299
+ return []
300
+
301
+ # Try ripgrep first
302
+ results = self._ripgrep_search(pattern, base_full, glob)
303
+ if results is None:
304
+ results = self._python_search(pattern, base_full, glob)
305
+
306
+ matches: list[GrepMatch] = []
307
+ for fpath, items in results.items():
308
+ for line_num, line_text in items:
309
+ matches.append({"path": fpath, "line": int(line_num), "text": line_text})
310
+ return matches
311
+
312
+ def _ripgrep_search(
313
+ self, pattern: str, base_full: Path, include_glob: Optional[str]
314
+ ) -> Optional[dict[str, list[tuple[int, str]]]]:
315
+ cmd = ["rg", "--json"]
316
+ if include_glob:
317
+ cmd.extend(["--glob", include_glob])
318
+ cmd.extend(["--", pattern, str(base_full)])
319
+
320
+ try:
321
+ proc = subprocess.run( # noqa: S603
322
+ cmd,
323
+ capture_output=True,
324
+ text=True,
325
+ timeout=30,
326
+ check=False,
327
+ )
328
+ except (subprocess.TimeoutExpired, FileNotFoundError):
329
+ return None
330
+
331
+ results: dict[str, list[tuple[int, str]]] = {}
332
+ for line in proc.stdout.splitlines():
333
+ try:
334
+ data = json.loads(line)
335
+ except json.JSONDecodeError:
336
+ continue
337
+ if data.get("type") != "match":
338
+ continue
339
+ pdata = data.get("data", {})
340
+ ftext = pdata.get("path", {}).get("text")
341
+ if not ftext:
342
+ continue
343
+ p = Path(ftext)
344
+ if self.virtual_mode:
345
+ try:
346
+ virt = "/" + str(p.resolve().relative_to(self.cwd))
347
+ except Exception:
348
+ continue
349
+ else:
350
+ virt = str(p)
351
+ ln = pdata.get("line_number")
352
+ lt = pdata.get("lines", {}).get("text", "").rstrip("\n")
353
+ if ln is None:
354
+ continue
355
+ results.setdefault(virt, []).append((int(ln), lt))
356
+
357
+ return results
358
+
359
+ def _python_search(
360
+ self, pattern: str, base_full: Path, include_glob: Optional[str]
361
+ ) -> dict[str, list[tuple[int, str]]]:
362
+ try:
363
+ regex = re.compile(pattern)
364
+ except re.error:
365
+ return {}
366
+
367
+ results: dict[str, list[tuple[int, str]]] = {}
368
+ root = base_full if base_full.is_dir() else base_full.parent
369
+
370
+ for fp in root.rglob("*"):
371
+ if not fp.is_file():
372
+ continue
373
+ if include_glob and not wcglob.globmatch(fp.name, include_glob, flags=wcglob.BRACE):
374
+ continue
375
+ try:
376
+ if fp.stat().st_size > self.max_file_size_bytes:
377
+ continue
378
+ except OSError:
379
+ continue
380
+ try:
381
+ content = fp.read_text()
382
+ except (UnicodeDecodeError, PermissionError, OSError):
383
+ continue
384
+ for line_num, line in enumerate(content.splitlines(), 1):
385
+ if regex.search(line):
386
+ if self.virtual_mode:
387
+ try:
388
+ virt_path = "/" + str(fp.resolve().relative_to(self.cwd))
389
+ except Exception:
390
+ continue
391
+ else:
392
+ virt_path = str(fp)
393
+ results.setdefault(virt_path, []).append((line_num, line))
394
+
395
+ return results
396
+
397
+ def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
398
+ if pattern.startswith("/"):
399
+ pattern = pattern.lstrip("/")
400
+
401
+ search_path = self.cwd if path == "/" else self._resolve_path(path)
402
+ if not search_path.exists() or not search_path.is_dir():
403
+ return []
404
+
405
+ results: list[FileInfo] = []
406
+ try:
407
+ # Use recursive globbing to match files in subdirectories as tests expect
408
+ for matched_path in search_path.rglob(pattern):
409
+ try:
410
+ is_file = matched_path.is_file()
411
+ except OSError:
412
+ continue
413
+ if not is_file:
414
+ continue
415
+ abs_path = str(matched_path)
416
+ if not self.virtual_mode:
417
+ try:
418
+ st = matched_path.stat()
419
+ results.append({
420
+ "path": abs_path,
421
+ "is_dir": False,
422
+ "size": int(st.st_size),
423
+ "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
424
+ })
425
+ except OSError:
426
+ results.append({"path": abs_path, "is_dir": False})
427
+ else:
428
+ cwd_str = str(self.cwd)
429
+ if not cwd_str.endswith("/"):
430
+ cwd_str += "/"
431
+ if abs_path.startswith(cwd_str):
432
+ relative_path = abs_path[len(cwd_str):]
433
+ elif abs_path.startswith(str(self.cwd)):
434
+ relative_path = abs_path[len(str(self.cwd)):].lstrip("/")
435
+ else:
436
+ relative_path = abs_path
437
+ virt = "/" + relative_path
438
+ try:
439
+ st = matched_path.stat()
440
+ results.append({
441
+ "path": virt,
442
+ "is_dir": False,
443
+ "size": int(st.st_size),
444
+ "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(),
445
+ })
446
+ except OSError:
447
+ results.append({"path": virt, "is_dir": False})
448
+ except (OSError, ValueError):
449
+ pass
450
+
451
+ results.sort(key=lambda x: x.get("path", ""))
452
+ return results
@@ -0,0 +1,122 @@
1
+ """Protocol definition for pluggable memory backends.
2
+
3
+ This module defines the BackendProtocol that all backend implementations
4
+ must follow. Backends can store files in different locations (state, filesystem,
5
+ database, etc.) and provide a uniform interface for file operations.
6
+ """
7
+
8
+ from typing import TYPE_CHECKING, Optional, Protocol, runtime_checkable, Callable, TypeAlias, Any
9
+ from langchain.tools import ToolRuntime
10
+ from deepagents.backends.utils import FileInfo, GrepMatch
11
+
12
+ from dataclasses import dataclass
13
+
14
+
15
+ @dataclass
16
+ class WriteResult:
17
+ """Result from backend write operations.
18
+ Attributes:
19
+ error: Error message on failure, None on success.
20
+ path: Absolute path of written file, None on failure.
21
+ files_update: State update dict for checkpoint backends, None for external storage.
22
+ Checkpoint backends populate this with {file_path: file_data} for LangGraph state.
23
+ External backends set None (already persisted to disk/S3/database/etc).
24
+ Examples:
25
+ >>> # Checkpoint storage
26
+ >>> WriteResult(path="/f.txt", files_update={"/f.txt": {...}})
27
+ >>> # External storage
28
+ >>> WriteResult(path="/f.txt", files_update=None)
29
+ >>> # Error
30
+ >>> WriteResult(error="File exists")
31
+ """
32
+
33
+ error: str | None = None
34
+ path: str | None = None
35
+ files_update: dict[str, Any] | None = None
36
+
37
+
38
+ @dataclass
39
+ class EditResult:
40
+ """Result from backend edit operations.
41
+ Attributes:
42
+ error: Error message on failure, None on success.
43
+ path: Absolute path of edited file, None on failure.
44
+ files_update: State update dict for checkpoint backends, None for external storage.
45
+ Checkpoint backends populate this with {file_path: file_data} for LangGraph state.
46
+ External backends set None (already persisted to disk/S3/database/etc).
47
+ occurrences: Number of replacements made, None on failure.
48
+ Examples:
49
+ >>> # Checkpoint storage
50
+ >>> EditResult(path="/f.txt", files_update={"/f.txt": {...}}, occurrences=1)
51
+ >>> # External storage
52
+ >>> EditResult(path="/f.txt", files_update=None, occurrences=2)
53
+ >>> # Error
54
+ >>> EditResult(error="File not found")
55
+ """
56
+
57
+ error: str | None = None
58
+ path: str | None = None
59
+ files_update: dict[str, Any] | None = None
60
+ occurrences: int | None = None
61
+
62
+ @runtime_checkable
63
+ class BackendProtocol(Protocol):
64
+ """Protocol for pluggable memory backends (single, unified).
65
+
66
+ Backends can store files in different locations (state, filesystem, database, etc.)
67
+ and provide a uniform interface for file operations.
68
+
69
+ All file data is represented as dicts with the following structure:
70
+ {
71
+ "content": list[str], # Lines of text content
72
+ "created_at": str, # ISO format timestamp
73
+ "modified_at": str, # ISO format timestamp
74
+ }
75
+ """
76
+
77
+ def ls_info(self, path: str) -> list["FileInfo"]:
78
+ """Structured listing with file metadata."""
79
+ ...
80
+
81
+ def read(
82
+ self,
83
+ file_path: str,
84
+ offset: int = 0,
85
+ limit: int = 2000,
86
+ ) -> str:
87
+ """Read file content with line numbers or an error string."""
88
+ ...
89
+
90
+ def grep_raw(
91
+ self,
92
+ pattern: str,
93
+ path: Optional[str] = None,
94
+ glob: Optional[str] = None,
95
+ ) -> list["GrepMatch"] | str:
96
+ """Structured search results or error string for invalid input."""
97
+ ...
98
+
99
+ def glob_info(self, pattern: str, path: str = "/") -> list["FileInfo"]:
100
+ """Structured glob matching returning FileInfo dicts."""
101
+ ...
102
+
103
+ def write(
104
+ self,
105
+ file_path: str,
106
+ content: str,
107
+ ) -> WriteResult:
108
+ """Create a new file. Returns WriteResult; error populated on failure."""
109
+ ...
110
+
111
+ def edit(
112
+ self,
113
+ file_path: str,
114
+ old_string: str,
115
+ new_string: str,
116
+ replace_all: bool = False,
117
+ ) -> EditResult:
118
+ """Edit a file by replacing string occurrences. Returns EditResult."""
119
+ ...
120
+
121
+
122
+ BackendFactory: TypeAlias = Callable[[ToolRuntime], BackendProtocol]