kader 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,607 @@
1
+ """FilesystemBackend: Read and write files directly from the filesystem.
2
+
3
+ Security and search upgrades:
4
+ - Secure path resolution with root containment when in virtual_mode (sandboxed to cwd)
5
+ - Prevent symlink-following on file I/O using O_NOFOLLOW when available
6
+ - Ripgrep-powered grep with JSON parsing, plus Python fallback with regex
7
+ and optional glob include filtering, while preserving virtual path behavior
8
+ """
9
+
10
+ import json
11
+ import os
12
+ import re
13
+ import subprocess
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+
17
+ import wcmatch.glob as wcglob
18
+
19
+ from kader.tools.protocol import (
20
+ BackendProtocol,
21
+ EditResult,
22
+ FileDownloadResponse,
23
+ FileInfo,
24
+ FileUploadResponse,
25
+ GrepMatch,
26
+ WriteResult,
27
+ )
28
+ from kader.tools.utils import (
29
+ check_empty_content,
30
+ format_content_with_line_numbers,
31
+ perform_string_replacement,
32
+ )
33
+
34
+
35
+ class FilesystemBackend(BackendProtocol):
36
+ """Backend that reads and writes files directly from the filesystem.
37
+
38
+ Files are accessed using their actual filesystem paths. Relative paths are
39
+ resolved relative to the current working directory. Content is read/written
40
+ as plain text, and metadata (timestamps) are derived from filesystem stats.
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ root_dir: str | Path | None = None,
46
+ virtual_mode: bool = False,
47
+ max_file_size_mb: int = 10,
48
+ ) -> None:
49
+ """Initialize filesystem backend.
50
+
51
+ Args:
52
+ root_dir: Optional root directory for file operations. If provided,
53
+ all file paths will be resolved relative to this directory.
54
+ If not provided, uses the current working directory.
55
+ """
56
+ self.cwd = Path(root_dir).resolve() if root_dir else Path.cwd()
57
+ self.virtual_mode = virtual_mode
58
+ self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
59
+
60
+ def _resolve_path(self, key: str) -> Path:
61
+ """Resolve a file path with security checks.
62
+
63
+ When virtual_mode=True, treat incoming paths as virtual absolute paths under
64
+ self.cwd, disallow traversal (.., ~) and ensure resolved path stays within root.
65
+ When virtual_mode=False, preserve legacy behavior: absolute paths are allowed
66
+ as-is; relative paths resolve under cwd.
67
+
68
+ Args:
69
+ key: File path (absolute, relative, or virtual when virtual_mode=True)
70
+
71
+ Returns:
72
+ Resolved absolute Path object
73
+ """
74
+ if self.virtual_mode:
75
+ vpath = key if key.startswith("/") else "/" + key
76
+ if ".." in vpath or vpath.startswith("~"):
77
+ raise ValueError("Path traversal not allowed")
78
+ full = (self.cwd / vpath.lstrip("/")).resolve()
79
+ try:
80
+ full.relative_to(self.cwd)
81
+ except ValueError:
82
+ raise ValueError(
83
+ f"Path:{full} outside root directory: {self.cwd}"
84
+ ) from None
85
+ return full
86
+
87
+ path = Path(key)
88
+ if path.is_absolute():
89
+ return path
90
+ return (self.cwd / path).resolve()
91
+
92
+ def ls_info(self, path: str) -> list[FileInfo]:
93
+ """List files and directories in the specified directory (non-recursive).
94
+
95
+ Args:
96
+ path: Absolute directory path to list files from.
97
+
98
+ Returns:
99
+ List of FileInfo-like dicts for files and directories directly in the directory.
100
+ Directories have a trailing / in their path and is_dir=True.
101
+ """
102
+ try:
103
+ dir_path = self._resolve_path(path)
104
+ except ValueError:
105
+ return []
106
+
107
+ if not dir_path.exists() or not dir_path.is_dir():
108
+ return []
109
+
110
+ results: list[FileInfo] = []
111
+
112
+ # Convert cwd to string for comparison
113
+ cwd_str = str(self.cwd)
114
+ if not cwd_str.endswith("/"):
115
+ cwd_str += "/"
116
+
117
+ # List only direct children (non-recursive)
118
+ try:
119
+ for child_path in dir_path.iterdir():
120
+ try:
121
+ is_file = child_path.is_file()
122
+ is_dir = child_path.is_dir()
123
+ except OSError:
124
+ continue
125
+
126
+ abs_path = str(child_path)
127
+
128
+ if not self.virtual_mode:
129
+ # Non-virtual mode: use absolute paths
130
+ if is_file:
131
+ try:
132
+ st = child_path.stat()
133
+ results.append(
134
+ {
135
+ "path": abs_path,
136
+ "is_dir": False,
137
+ "size": int(st.st_size),
138
+ "modified_at": datetime.fromtimestamp(
139
+ st.st_mtime
140
+ ).isoformat(),
141
+ }
142
+ )
143
+ except OSError:
144
+ results.append({"path": abs_path, "is_dir": False})
145
+ elif is_dir:
146
+ try:
147
+ st = child_path.stat()
148
+ results.append(
149
+ {
150
+ "path": abs_path + "/",
151
+ "is_dir": True,
152
+ "size": 0,
153
+ "modified_at": datetime.fromtimestamp(
154
+ st.st_mtime
155
+ ).isoformat(),
156
+ }
157
+ )
158
+ except OSError:
159
+ results.append({"path": abs_path + "/", "is_dir": True})
160
+ else:
161
+ # Virtual mode: strip cwd prefix
162
+ if abs_path.startswith(cwd_str):
163
+ relative_path = abs_path[len(cwd_str) :]
164
+ elif abs_path.startswith(str(self.cwd)):
165
+ # Handle case where cwd doesn't end with /
166
+ relative_path = abs_path[len(str(self.cwd)) :].lstrip("/")
167
+ else:
168
+ # Path is outside cwd, return as-is or skip
169
+ relative_path = abs_path
170
+
171
+ virt_path = "/" + relative_path
172
+
173
+ if is_file:
174
+ try:
175
+ st = child_path.stat()
176
+ results.append(
177
+ {
178
+ "path": virt_path,
179
+ "is_dir": False,
180
+ "size": int(st.st_size),
181
+ "modified_at": datetime.fromtimestamp(
182
+ st.st_mtime
183
+ ).isoformat(),
184
+ }
185
+ )
186
+ except OSError:
187
+ results.append({"path": virt_path, "is_dir": False})
188
+ elif is_dir:
189
+ try:
190
+ st = child_path.stat()
191
+ results.append(
192
+ {
193
+ "path": virt_path + "/",
194
+ "is_dir": True,
195
+ "size": 0,
196
+ "modified_at": datetime.fromtimestamp(
197
+ st.st_mtime
198
+ ).isoformat(),
199
+ }
200
+ )
201
+ except OSError:
202
+ results.append({"path": virt_path + "/", "is_dir": True})
203
+ except (OSError, PermissionError):
204
+ pass
205
+
206
+ # Keep deterministic order by path
207
+ results.sort(key=lambda x: x.get("path", ""))
208
+ return results
209
+
210
+ def read(
211
+ self,
212
+ file_path: str,
213
+ offset: int = 0,
214
+ limit: int = 2000,
215
+ ) -> str:
216
+ """Read file content with line numbers.
217
+
218
+ Args:
219
+ file_path: Absolute or relative file path.
220
+ offset: Line offset to start reading from (0-indexed).
221
+ limit: Maximum number of lines to read.
222
+
223
+ Returns:
224
+ Formatted file content with line numbers, or error message.
225
+ """
226
+ try:
227
+ resolved_path = self._resolve_path(file_path)
228
+ except ValueError as e:
229
+ return f"Error: {str(e)}"
230
+
231
+ if not resolved_path.exists() or not resolved_path.is_file():
232
+ return f"Error: File '{file_path}' not found"
233
+
234
+ try:
235
+ # Open with O_NOFOLLOW where available to avoid symlink traversal
236
+ fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
237
+ with os.fdopen(fd, "r", encoding="utf-8") as f:
238
+ content = f.read()
239
+
240
+ empty_msg = check_empty_content(content)
241
+ if empty_msg:
242
+ return empty_msg
243
+
244
+ lines = content.splitlines()
245
+ start_idx = offset
246
+ end_idx = min(start_idx + limit, len(lines))
247
+
248
+ if start_idx >= len(lines):
249
+ return f"Error: Line offset {offset} exceeds file length ({len(lines)} lines)"
250
+
251
+ selected_lines = lines[start_idx:end_idx]
252
+ return format_content_with_line_numbers(
253
+ selected_lines, start_line=start_idx + 1
254
+ )
255
+ except (OSError, UnicodeDecodeError) as e:
256
+ return f"Error reading file '{file_path}': {e}"
257
+
258
+ def write(
259
+ self,
260
+ file_path: str,
261
+ content: str,
262
+ ) -> WriteResult:
263
+ """Create a new file with content.
264
+ Returns WriteResult. External storage sets files_update=None.
265
+ """
266
+ resolved_path = self._resolve_path(file_path)
267
+
268
+ if resolved_path.exists():
269
+ return WriteResult(
270
+ error=f"Cannot write to {file_path} because it already exists. Read and then make an edit, or write to a new path."
271
+ )
272
+
273
+ try:
274
+ # Create parent directories if needed
275
+ resolved_path.parent.mkdir(parents=True, exist_ok=True)
276
+
277
+ # Prefer O_NOFOLLOW to avoid writing through symlinks
278
+ flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
279
+ if hasattr(os, "O_NOFOLLOW"):
280
+ flags |= os.O_NOFOLLOW
281
+ fd = os.open(resolved_path, flags, 0o644)
282
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
283
+ f.write(content)
284
+
285
+ return WriteResult(path=file_path, files_update=None)
286
+ except (OSError, UnicodeEncodeError) as e:
287
+ return WriteResult(error=f"Error writing file '{file_path}': {e}")
288
+
289
+ def edit(
290
+ self,
291
+ file_path: str,
292
+ old_string: str,
293
+ new_string: str,
294
+ replace_all: bool = False,
295
+ ) -> EditResult:
296
+ """Edit a file by replacing string occurrences.
297
+ Returns EditResult. External storage sets files_update=None.
298
+ """
299
+ resolved_path = self._resolve_path(file_path)
300
+
301
+ if not resolved_path.exists() or not resolved_path.is_file():
302
+ return EditResult(error=f"Error: File '{file_path}' not found")
303
+
304
+ try:
305
+ # Read securely
306
+ fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
307
+ with os.fdopen(fd, "r", encoding="utf-8") as f:
308
+ content = f.read()
309
+
310
+ result = perform_string_replacement(
311
+ content, old_string, new_string, replace_all
312
+ )
313
+
314
+ if isinstance(result, str):
315
+ return EditResult(error=result)
316
+
317
+ new_content, occurrences = result
318
+
319
+ # Write securely
320
+ flags = os.O_WRONLY | os.O_TRUNC
321
+ if hasattr(os, "O_NOFOLLOW"):
322
+ flags |= os.O_NOFOLLOW
323
+ fd = os.open(resolved_path, flags)
324
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
325
+ f.write(new_content)
326
+
327
+ return EditResult(
328
+ path=file_path, files_update=None, occurrences=int(occurrences)
329
+ )
330
+ except (OSError, UnicodeDecodeError, UnicodeEncodeError) as e:
331
+ return EditResult(error=f"Error editing file '{file_path}': {e}")
332
+
333
+ def grep_raw(
334
+ self,
335
+ pattern: str,
336
+ path: str | None = None,
337
+ glob: str | None = None,
338
+ ) -> list[GrepMatch] | str:
339
+ # Validate regex
340
+ try:
341
+ re.compile(pattern)
342
+ except re.error as e:
343
+ return f"Invalid regex pattern: {e}"
344
+
345
+ # Resolve base path
346
+ try:
347
+ base_full = self._resolve_path(path or ".")
348
+ except ValueError:
349
+ return []
350
+
351
+ if not base_full.exists():
352
+ return []
353
+
354
+ # Try ripgrep first
355
+ results = self._ripgrep_search(pattern, base_full, glob)
356
+ if results is None:
357
+ results = self._python_search(pattern, base_full, glob)
358
+
359
+ matches: list[GrepMatch] = []
360
+ for fpath, items in results.items():
361
+ for line_num, line_text in items:
362
+ matches.append(
363
+ {"path": fpath, "line": int(line_num), "text": line_text}
364
+ )
365
+ return matches
366
+
367
+ def _ripgrep_search(
368
+ self, pattern: str, base_full: Path, include_glob: str | None
369
+ ) -> dict[str, list[tuple[int, str]]] | None:
370
+ cmd = ["rg", "--json"]
371
+ if include_glob:
372
+ cmd.extend(["--glob", include_glob])
373
+ cmd.extend(["--", pattern, str(base_full)])
374
+
375
+ try:
376
+ proc = subprocess.run( # noqa: S603
377
+ cmd,
378
+ capture_output=True,
379
+ text=True,
380
+ timeout=30,
381
+ check=False,
382
+ )
383
+ except (subprocess.TimeoutExpired, FileNotFoundError):
384
+ return None
385
+
386
+ results: dict[str, list[tuple[int, str]]] = {}
387
+ for line in proc.stdout.splitlines():
388
+ try:
389
+ data = json.loads(line)
390
+ except json.JSONDecodeError:
391
+ continue
392
+ if data.get("type") != "match":
393
+ continue
394
+ pdata = data.get("data", {})
395
+ ftext = pdata.get("path", {}).get("text")
396
+ if not ftext:
397
+ continue
398
+ p = Path(ftext)
399
+ if self.virtual_mode:
400
+ try:
401
+ virt = "/" + str(p.resolve().relative_to(self.cwd))
402
+ except Exception:
403
+ continue
404
+ else:
405
+ virt = str(p)
406
+ ln = pdata.get("line_number")
407
+ lt = pdata.get("lines", {}).get("text", "").rstrip("\n")
408
+ if ln is None:
409
+ continue
410
+ results.setdefault(virt, []).append((int(ln), lt))
411
+
412
+ return results
413
+
414
+ def _python_search(
415
+ self, pattern: str, base_full: Path, include_glob: str | None
416
+ ) -> dict[str, list[tuple[int, str]]]:
417
+ try:
418
+ regex = re.compile(pattern)
419
+ except re.error:
420
+ return {}
421
+
422
+ results: dict[str, list[tuple[int, str]]] = {}
423
+ root = base_full if base_full.is_dir() else base_full.parent
424
+
425
+ for fp in root.rglob("*"):
426
+ if not fp.is_file():
427
+ continue
428
+ if include_glob and not wcglob.globmatch(
429
+ fp.name, include_glob, flags=wcglob.BRACE
430
+ ):
431
+ continue
432
+ try:
433
+ if fp.stat().st_size > self.max_file_size_bytes:
434
+ continue
435
+ except OSError:
436
+ continue
437
+ try:
438
+ content = fp.read_text()
439
+ except (UnicodeDecodeError, PermissionError, OSError):
440
+ continue
441
+ for line_num, line in enumerate(content.splitlines(), 1):
442
+ if regex.search(line):
443
+ if self.virtual_mode:
444
+ try:
445
+ virt_path = "/" + str(fp.resolve().relative_to(self.cwd))
446
+ except Exception:
447
+ continue
448
+ else:
449
+ virt_path = str(fp)
450
+ results.setdefault(virt_path, []).append((line_num, line))
451
+
452
+ return results
453
+
454
+ def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]:
455
+ if pattern.startswith("/"):
456
+ pattern = pattern.lstrip("/")
457
+
458
+ search_path = self.cwd if path == "/" else self._resolve_path(path)
459
+ if not search_path.exists() or not search_path.is_dir():
460
+ return []
461
+
462
+ results: list[FileInfo] = []
463
+ try:
464
+ # Use recursive globbing to match files in subdirectories as tests expect
465
+ for matched_path in search_path.rglob(pattern):
466
+ try:
467
+ is_file = matched_path.is_file()
468
+ except OSError:
469
+ continue
470
+ if not is_file:
471
+ continue
472
+ abs_path = str(matched_path)
473
+ if not self.virtual_mode:
474
+ try:
475
+ st = matched_path.stat()
476
+ results.append(
477
+ {
478
+ "path": abs_path,
479
+ "is_dir": False,
480
+ "size": int(st.st_size),
481
+ "modified_at": datetime.fromtimestamp(
482
+ st.st_mtime
483
+ ).isoformat(),
484
+ }
485
+ )
486
+ except OSError:
487
+ results.append({"path": abs_path, "is_dir": False})
488
+ else:
489
+ cwd_str = str(self.cwd)
490
+ if not cwd_str.endswith("/"):
491
+ cwd_str += "/"
492
+ if abs_path.startswith(cwd_str):
493
+ relative_path = abs_path[len(cwd_str) :]
494
+ elif abs_path.startswith(str(self.cwd)):
495
+ relative_path = abs_path[len(str(self.cwd)) :].lstrip("/")
496
+ else:
497
+ relative_path = abs_path
498
+ virt = "/" + relative_path
499
+ try:
500
+ st = matched_path.stat()
501
+ results.append(
502
+ {
503
+ "path": virt,
504
+ "is_dir": False,
505
+ "size": int(st.st_size),
506
+ "modified_at": datetime.fromtimestamp(
507
+ st.st_mtime
508
+ ).isoformat(),
509
+ }
510
+ )
511
+ except OSError:
512
+ results.append({"path": virt, "is_dir": False})
513
+ except (OSError, ValueError):
514
+ pass
515
+
516
+ results.sort(key=lambda x: x.get("path", ""))
517
+ return results
518
+
519
+ def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
520
+ """Upload multiple files to the filesystem.
521
+
522
+ Args:
523
+ files: List of (path, content) tuples where content is bytes.
524
+
525
+ Returns:
526
+ List of FileUploadResponse objects, one per input file.
527
+ Response order matches input order.
528
+ """
529
+ responses: list[FileUploadResponse] = []
530
+ for path, content in files:
531
+ try:
532
+ resolved_path = self._resolve_path(path)
533
+
534
+ # Create parent directories if needed
535
+ resolved_path.parent.mkdir(parents=True, exist_ok=True)
536
+
537
+ flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
538
+ if hasattr(os, "O_NOFOLLOW"):
539
+ flags |= os.O_NOFOLLOW
540
+ fd = os.open(resolved_path, flags, 0o644)
541
+ with os.fdopen(fd, "wb") as f:
542
+ f.write(content)
543
+
544
+ responses.append(FileUploadResponse(path=path, error=None))
545
+ except FileNotFoundError:
546
+ responses.append(FileUploadResponse(path=path, error="file_not_found"))
547
+ except PermissionError:
548
+ responses.append(
549
+ FileUploadResponse(path=path, error="permission_denied")
550
+ )
551
+ except (ValueError, OSError) as e:
552
+ # ValueError from _resolve_path for path traversal, OSError for other file errors
553
+ if isinstance(e, ValueError) or "invalid" in str(e).lower():
554
+ responses.append(
555
+ FileUploadResponse(path=path, error="invalid_path")
556
+ )
557
+ else:
558
+ # Generic error fallback
559
+ responses.append(
560
+ FileUploadResponse(path=path, error="invalid_path")
561
+ )
562
+
563
+ return responses
564
+
565
+ def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
566
+ """Download multiple files from the filesystem.
567
+
568
+ Args:
569
+ paths: List of file paths to download.
570
+
571
+ Returns:
572
+ List of FileDownloadResponse objects, one per input path.
573
+ """
574
+ responses: list[FileDownloadResponse] = []
575
+ for path in paths:
576
+ try:
577
+ resolved_path = self._resolve_path(path)
578
+ # Use flags to optionally prevent symlink following if
579
+ # supported by the OS
580
+ fd = os.open(resolved_path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
581
+ with os.fdopen(fd, "rb") as f:
582
+ content = f.read()
583
+ responses.append(
584
+ FileDownloadResponse(path=path, content=content, error=None)
585
+ )
586
+ except FileNotFoundError:
587
+ responses.append(
588
+ FileDownloadResponse(
589
+ path=path, content=None, error="file_not_found"
590
+ )
591
+ )
592
+ except PermissionError:
593
+ responses.append(
594
+ FileDownloadResponse(
595
+ path=path, content=None, error="permission_denied"
596
+ )
597
+ )
598
+ except IsADirectoryError:
599
+ responses.append(
600
+ FileDownloadResponse(path=path, content=None, error="is_directory")
601
+ )
602
+ except ValueError:
603
+ responses.append(
604
+ FileDownloadResponse(path=path, content=None, error="invalid_path")
605
+ )
606
+ # Let other errors propagate
607
+ return responses