patchpal 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
patchpal/tools.py ADDED
@@ -0,0 +1,1534 @@
1
+ """Tools with security guardrails for safe code modification."""
2
+
3
+ import difflib
4
+ import logging
5
+ import mimetypes
6
+ import os
7
+ import platform
8
+ import shutil
9
+ import subprocess
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+ import requests
15
+ from bs4 import BeautifulSoup
16
+
17
+ from patchpal.permissions import PermissionManager
18
+
19
+ try:
20
+ from ddgs import DDGS
21
+ except ImportError:
22
+ # Fall back to old package name if new one not installed
23
+ from duckduckgo_search import DDGS
24
+
25
+ # Import version for user agent
26
+ try:
27
+ from patchpal import __version__
28
+ except ImportError:
29
+ __version__ = "unknown"
30
+
31
+ REPO_ROOT = Path(".").resolve()
32
+
33
+ # Platform-aware command blocking - minimal list since we have permission system
34
+ # Only block privilege escalation commands specific to each platform
35
+ if platform.system() == "Windows":
36
+ # Windows privilege escalation commands
37
+ FORBIDDEN = {"runas", "psexec"} # Run as different user, SysInternals elevated execution
38
+ else:
39
+ # Unix/Linux/macOS privilege escalation commands
40
+ FORBIDDEN = {"sudo", "su"} # Privilege escalation
41
+
42
+ # Sensitive file patterns
43
+ SENSITIVE_PATTERNS = {
44
+ ".env",
45
+ ".env.local",
46
+ ".env.production",
47
+ ".env.development",
48
+ "credentials.json",
49
+ "secrets.yaml",
50
+ "secrets.yml",
51
+ ".aws/credentials",
52
+ ".ssh/id_rsa",
53
+ ".ssh/id_ed25519",
54
+ "config/master.key",
55
+ "config/credentials.yml.enc",
56
+ ".npmrc",
57
+ ".pypirc",
58
+ "keyring.cfg",
59
+ }
60
+
61
+ # Critical files that should have warnings
62
+ CRITICAL_FILES = {
63
+ "package.json",
64
+ "package-lock.json",
65
+ "pyproject.toml",
66
+ "setup.py",
67
+ "requirements.txt",
68
+ "Cargo.toml",
69
+ "Cargo.lock",
70
+ "Dockerfile",
71
+ "docker-compose.yml",
72
+ "Makefile",
73
+ ".github/workflows",
74
+ }
75
+
76
+ # Configuration
77
+ MAX_FILE_SIZE = int(os.getenv("PATCHPAL_MAX_FILE_SIZE", 10 * 1024 * 1024)) # 10MB default
78
+ READ_ONLY_MODE = os.getenv("PATCHPAL_READ_ONLY", "false").lower() == "true"
79
+ ALLOW_SENSITIVE = os.getenv("PATCHPAL_ALLOW_SENSITIVE", "false").lower() == "true"
80
+ ENABLE_AUDIT_LOG = os.getenv("PATCHPAL_AUDIT_LOG", "true").lower() == "true"
81
+ ENABLE_BACKUPS = os.getenv("PATCHPAL_ENABLE_BACKUPS", "false").lower() == "true"
82
+ MAX_OPERATIONS = int(os.getenv("PATCHPAL_MAX_OPERATIONS", 1000))
83
+
84
+ # Web request configuration
85
+ WEB_REQUEST_TIMEOUT = int(os.getenv("PATCHPAL_WEB_TIMEOUT", 30)) # 30 seconds
86
+ MAX_WEB_CONTENT_SIZE = int(
87
+ os.getenv("PATCHPAL_MAX_WEB_SIZE", 5 * 1024 * 1024)
88
+ ) # 5MB download limit
89
+ MAX_WEB_CONTENT_CHARS = int(
90
+ os.getenv("PATCHPAL_MAX_WEB_CHARS", 100_000)
91
+ ) # 100k chars (~25k tokens) - reduced to prevent context overflow
92
+ WEB_USER_AGENT = f"PatchPal/{__version__} (AI Code Assistant)"
93
+
94
+ # Shell command configuration
95
+ SHELL_TIMEOUT = int(os.getenv("PATCHPAL_SHELL_TIMEOUT", 30)) # 30 seconds default
96
+
97
+
98
+ # Create patchpal directory structure in home directory
99
+ # Format: ~/.patchpal/<repo-name>/
100
+ def _get_patchpal_dir() -> Path:
101
+ """Get the patchpal directory for this repository."""
102
+ home = Path.home()
103
+ patchpal_root = home / ".patchpal"
104
+
105
+ # Use repo name (last part of path) to create unique directory
106
+ repo_name = REPO_ROOT.name
107
+ repo_dir = patchpal_root / repo_name
108
+
109
+ # Create directories if they don't exist
110
+ repo_dir.mkdir(parents=True, exist_ok=True)
111
+ (repo_dir / "backups").mkdir(exist_ok=True)
112
+
113
+ return repo_dir
114
+
115
+
116
+ PATCHPAL_DIR = _get_patchpal_dir()
117
+ BACKUP_DIR = PATCHPAL_DIR / "backups"
118
+ AUDIT_LOG_FILE = PATCHPAL_DIR / "audit.log"
119
+
120
+ # Permission manager
121
+ _permission_manager = None
122
+
123
+
124
+ def _get_permission_manager() -> PermissionManager:
125
+ """Get or create the global permission manager."""
126
+ global _permission_manager
127
+ if _permission_manager is None:
128
+ _permission_manager = PermissionManager(PATCHPAL_DIR)
129
+ return _permission_manager
130
+
131
+
132
+ # Audit logging setup with rotation
133
+ audit_logger = logging.getLogger("patchpal.audit")
134
+ if ENABLE_AUDIT_LOG and not audit_logger.handlers:
135
+ from logging.handlers import RotatingFileHandler
136
+
137
+ audit_logger.setLevel(logging.INFO)
138
+ # Rotate at 10MB, keep 3 backup files (30MB total max)
139
+ handler = RotatingFileHandler(
140
+ AUDIT_LOG_FILE,
141
+ maxBytes=10 * 1024 * 1024, # 10MB
142
+ backupCount=3,
143
+ )
144
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
145
+ audit_logger.addHandler(handler)
146
+
147
+
148
+ # Operation counter for resource limits
149
+ class OperationLimiter:
150
+ """Track operations to prevent abuse."""
151
+
152
+ def __init__(self):
153
+ self.operations = 0
154
+ self.max_operations = MAX_OPERATIONS
155
+
156
+ def check_limit(self, operation: str):
157
+ """Check if operation limit has been exceeded."""
158
+ self.operations += 1
159
+ if self.operations > self.max_operations:
160
+ raise ValueError(
161
+ f"Operation limit exceeded ({self.max_operations} operations)\n"
162
+ f"This prevents infinite loops. Increase with PATCHPAL_MAX_OPERATIONS env var."
163
+ )
164
+ audit_logger.info(f"Operation {self.operations}/{self.max_operations}: {operation}")
165
+
166
+ def reset(self):
167
+ """Reset operation counter."""
168
+ self.operations = 0
169
+
170
+
171
+ # Global operation limiter
172
+ _operation_limiter = OperationLimiter()
173
+
174
+
175
+ def reset_operation_counter():
176
+ """Reset the operation counter. Useful for testing or starting new tasks."""
177
+ _operation_limiter.reset()
178
+
179
+
180
+ def get_operation_count() -> int:
181
+ """Get current operation count."""
182
+ return _operation_limiter.operations
183
+
184
+
185
+ def _format_colored_diff(
186
+ old_text: str,
187
+ new_text: str,
188
+ max_lines: int = 50,
189
+ file_path: Optional[str] = None,
190
+ start_line: Optional[int] = None,
191
+ ) -> str:
192
+ """Format text changes with colors showing actual differences.
193
+
194
+ Args:
195
+ old_text: Original text
196
+ new_text: New text
197
+ max_lines: Maximum diff lines to show (default: 50)
198
+ file_path: Optional file path to read full content for accurate line numbers
199
+ start_line: Optional starting line number for context (for edit_file)
200
+
201
+ Returns:
202
+ Formatted string with colored unified diff with line numbers
203
+ """
204
+ import difflib
205
+
206
+ # If we have a file path, read the full content to get accurate line numbers
207
+ if file_path:
208
+ try:
209
+ p = Path(file_path)
210
+ if not p.is_absolute():
211
+ p = REPO_ROOT / file_path
212
+ if p.exists():
213
+ full_content = p.read_text(encoding="utf-8", errors="replace")
214
+ # Find the position of old_text in the full file
215
+ pos = full_content.find(old_text)
216
+ if pos != -1:
217
+ # Count lines before the match to get the starting line number
218
+ start_line = full_content[:pos].count("\n") + 1
219
+ except Exception:
220
+ pass # If reading fails, fall back to relative line numbers
221
+
222
+ # Split into lines for diffing
223
+ old_lines = old_text.splitlines(keepends=True)
224
+ new_lines = new_text.splitlines(keepends=True)
225
+
226
+ # Use SequenceMatcher for a cleaner diff that shows true changes
227
+ # instead of unified diff which can be confusing with context lines
228
+ matcher = difflib.SequenceMatcher(None, old_lines, new_lines)
229
+
230
+ result = []
231
+ line_count = 0
232
+ old_line_num = start_line if start_line else 1
233
+ new_line_num = start_line if start_line else 1
234
+
235
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
236
+ if line_count >= max_lines:
237
+ result.append(" \033[90m... (truncated)\033[0m")
238
+ break
239
+
240
+ if tag == "equal":
241
+ # Show context lines in gray (only once, not as -/+)
242
+ for i in range(i1, i2):
243
+ if line_count >= max_lines:
244
+ break
245
+ # Only show a few context lines at boundaries
246
+ if i < i1 + 2 or i >= i2 - 2:
247
+ result.append(f" \033[90m{old_line_num:4d} {old_lines[i].rstrip()}\033[0m")
248
+ line_count += 1
249
+ elif i == i1 + 2:
250
+ # Show ellipsis for skipped context
251
+ result.append(" \033[90m ...\033[0m")
252
+ line_count += 1
253
+ old_line_num += 1
254
+ new_line_num += 1
255
+
256
+ elif tag == "delete":
257
+ # Lines only in old (removed)
258
+ for i in range(i1, i2):
259
+ if line_count >= max_lines:
260
+ break
261
+ result.append(f" \033[31m{old_line_num:4d} -{old_lines[i].rstrip()}\033[0m")
262
+ old_line_num += 1
263
+ line_count += 1
264
+
265
+ elif tag == "insert":
266
+ # Lines only in new (added)
267
+ for j in range(j1, j2):
268
+ if line_count >= max_lines:
269
+ break
270
+ result.append(f" \033[32m{new_line_num:4d} +{new_lines[j].rstrip()}\033[0m")
271
+ new_line_num += 1
272
+ line_count += 1
273
+
274
+ elif tag == "replace":
275
+ # Lines changed (show old then new)
276
+ for i in range(i1, i2):
277
+ if line_count >= max_lines:
278
+ break
279
+ result.append(f" \033[31m{old_line_num:4d} -{old_lines[i].rstrip()}\033[0m")
280
+ old_line_num += 1
281
+ line_count += 1
282
+ for j in range(j1, j2):
283
+ if line_count >= max_lines:
284
+ break
285
+ result.append(f" \033[32m{new_line_num:4d} +{new_lines[j].rstrip()}\033[0m")
286
+ new_line_num += 1
287
+ line_count += 1
288
+
289
+ # If no diff output (identical content), show a message
290
+ if not result:
291
+ return " \033[90m(no changes)\033[0m"
292
+
293
+ return "\n".join(result)
294
+
295
+
296
+ def _check_git_status() -> dict:
297
+ """Check git repository status."""
298
+ try:
299
+ # Check if we're in a git repo
300
+ result = subprocess.run(
301
+ ["git", "rev-parse", "--git-dir"],
302
+ capture_output=True,
303
+ text=True,
304
+ cwd=REPO_ROOT,
305
+ timeout=5,
306
+ )
307
+ if result.returncode != 0:
308
+ return {"is_repo": False}
309
+
310
+ # Get status
311
+ result = subprocess.run(
312
+ ["git", "status", "--porcelain"],
313
+ capture_output=True,
314
+ text=True,
315
+ cwd=REPO_ROOT,
316
+ timeout=5,
317
+ )
318
+
319
+ return {
320
+ "is_repo": True,
321
+ "has_uncommitted": bool(result.stdout.strip()),
322
+ "changes": result.stdout.strip().split("\n") if result.stdout.strip() else [],
323
+ }
324
+ except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
325
+ return {"is_repo": False}
326
+
327
+
328
+ def _backup_file(path: Path) -> Optional[Path]:
329
+ """Create backup of file before modification."""
330
+ if not ENABLE_BACKUPS or not path.exists():
331
+ return None
332
+
333
+ try:
334
+ BACKUP_DIR.mkdir(exist_ok=True)
335
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
336
+
337
+ # Include path structure in backup name to handle same filenames
338
+ # Handle both repo-relative and absolute paths
339
+ if _is_inside_repo(path):
340
+ relative = path.relative_to(REPO_ROOT)
341
+ backup_name = f"{str(relative).replace('/', '_')}.{timestamp}"
342
+ else:
343
+ # For files outside repo, use absolute path in backup name
344
+ backup_name = f"{str(path).replace('/', '_')}.{timestamp}"
345
+
346
+ backup_path = BACKUP_DIR / backup_name
347
+
348
+ shutil.copy2(path, backup_path)
349
+ audit_logger.info(f"BACKUP: {path} -> {backup_path}")
350
+ return backup_path
351
+ except Exception as e:
352
+ audit_logger.warning(f"BACKUP FAILED: {path} - {e}")
353
+ return None
354
+
355
+
356
+ def _is_sensitive_file(path: Path) -> bool:
357
+ """Check if file contains sensitive data."""
358
+ path_str = str(path).lower()
359
+ return any(pattern in path_str for pattern in SENSITIVE_PATTERNS)
360
+
361
+
362
+ def _is_critical_file(path: Path) -> bool:
363
+ """Check if file is critical infrastructure."""
364
+ path_str = str(path).lower()
365
+ return any(pattern in path_str for pattern in CRITICAL_FILES)
366
+
367
+
368
+ def _is_binary_file(path: Path) -> bool:
369
+ """Check if file is binary."""
370
+ if not path.exists():
371
+ return False
372
+
373
+ # Check MIME type first
374
+ mime_type, _ = mimetypes.guess_type(str(path))
375
+ if mime_type and not mime_type.startswith("text/"):
376
+ return True
377
+
378
+ # Fallback: check for null bytes in first 8KB
379
+ try:
380
+ with open(path, "rb") as f:
381
+ chunk = f.read(8192)
382
+ return b"\x00" in chunk
383
+ except Exception:
384
+ return True
385
+
386
+
387
+ def _is_inside_repo(path: Path) -> bool:
388
+ """Check if a path is inside the repository."""
389
+ return str(path).startswith(str(REPO_ROOT))
390
+
391
+
392
+ def _check_path(path: str, must_exist: bool = True) -> Path:
393
+ """
394
+ Validate and resolve a path.
395
+
396
+ Args:
397
+ path: Path to validate (relative or absolute)
398
+ must_exist: Whether the file must exist
399
+
400
+ Returns:
401
+ Resolved Path object
402
+
403
+ Raises:
404
+ ValueError: If path validation fails
405
+
406
+ Note:
407
+ Can access files anywhere on the system (repository or outside).
408
+ Sensitive files (.env, credentials) are always blocked for safety.
409
+ """
410
+ # Resolve path (handle both absolute and relative paths)
411
+ path_obj = Path(path)
412
+ if path_obj.is_absolute():
413
+ p = path_obj.resolve()
414
+ else:
415
+ p = (REPO_ROOT / path).resolve()
416
+
417
+ # Check if file exists when required
418
+ if must_exist and not p.is_file():
419
+ raise ValueError(f"File not found: {path}")
420
+
421
+ # Check if file is sensitive (regardless of location)
422
+ if _is_sensitive_file(p) and not ALLOW_SENSITIVE:
423
+ raise ValueError(
424
+ f"Access to sensitive file blocked: {path}\n"
425
+ f"Set PATCHPAL_ALLOW_SENSITIVE=true to override (not recommended)"
426
+ )
427
+
428
+ return p
429
+
430
+
431
+ def read_file(path: str) -> str:
432
+ """
433
+ Read the contents of a file.
434
+
435
+ Args:
436
+ path: Path to the file (relative to repository root or absolute)
437
+
438
+ Returns:
439
+ The file contents as a string
440
+
441
+ Raises:
442
+ ValueError: If file is too large, binary, or sensitive
443
+ """
444
+ _operation_limiter.check_limit(f"read_file({path})")
445
+
446
+ p = _check_path(path)
447
+
448
+ # Check file size
449
+ size = p.stat().st_size
450
+ if size > MAX_FILE_SIZE:
451
+ raise ValueError(
452
+ f"File too large: {size:,} bytes (max {MAX_FILE_SIZE:,} bytes)\n"
453
+ f"Set PATCHPAL_MAX_FILE_SIZE env var to increase"
454
+ )
455
+
456
+ # Check if binary
457
+ if _is_binary_file(p):
458
+ raise ValueError(
459
+ f"Cannot read binary file: {path}\nType: {mimetypes.guess_type(str(p))[0] or 'unknown'}"
460
+ )
461
+
462
+ content = p.read_text(encoding="utf-8", errors="replace")
463
+ audit_logger.info(f"READ: {path} ({size} bytes)")
464
+ return content
465
+
466
+
467
+ def list_files() -> list[str]:
468
+ """
469
+ List all files in the repository.
470
+
471
+ Returns:
472
+ A list of relative file paths (excludes hidden and binary files)
473
+ """
474
+ _operation_limiter.check_limit("list_files()")
475
+
476
+ files = []
477
+ for p in REPO_ROOT.rglob("*"):
478
+ if not p.is_file():
479
+ continue
480
+
481
+ # Skip hidden files
482
+ if any(part.startswith(".") for part in p.parts):
483
+ continue
484
+
485
+ # Skip binary files (optional - can be slow on large repos)
486
+ # if _is_binary_file(p):
487
+ # continue
488
+
489
+ files.append(str(p.relative_to(REPO_ROOT)))
490
+
491
+ audit_logger.info(f"LIST: Found {len(files)} files")
492
+ return files
493
+
494
+
495
+ def get_file_info(path: str) -> str:
496
+ """
497
+ Get metadata for file(s) at the specified path.
498
+
499
+ Args:
500
+ path: Path to file, directory, or glob pattern (e.g., "tests/*.txt")
501
+ Can be relative to repository root or absolute
502
+
503
+ Returns:
504
+ Formatted string with file metadata (name, size, modified time, type)
505
+ For multiple files, returns one line per file
506
+
507
+ Raises:
508
+ ValueError: If no files found
509
+ """
510
+ _operation_limiter.check_limit(f"get_file_info({path[:30]}...)")
511
+
512
+ # Handle glob patterns
513
+ if "*" in path or "?" in path:
514
+ # It's a glob pattern
515
+ # Use glob to find matching files
516
+ try:
517
+ matches = list(REPO_ROOT.glob(path))
518
+ except Exception as e:
519
+ raise ValueError(f"Invalid glob pattern: {e}")
520
+
521
+ if not matches:
522
+ return f"No files found matching pattern: {path}"
523
+
524
+ # Filter to files only
525
+ files = [p for p in matches if p.is_file()]
526
+ if not files:
527
+ return f"No files found matching pattern: {path}"
528
+ else:
529
+ # Single path
530
+ p = _check_path(path, must_exist=False)
531
+
532
+ if not p.exists():
533
+ return f"Path does not exist: {path}"
534
+
535
+ if p.is_file():
536
+ files = [p]
537
+ elif p.is_dir():
538
+ # List all files in directory (non-recursive)
539
+ files = [f for f in p.iterdir() if f.is_file() and not f.name.startswith(".")]
540
+ if not files:
541
+ return f"No files found in directory: {path}"
542
+ else:
543
+ return f"Path is not a file or directory: {path}"
544
+
545
+ # Format file information
546
+ results = []
547
+ for file_path in sorted(files):
548
+ try:
549
+ stat = file_path.stat()
550
+
551
+ # Try to get relative path; if it fails (e.g., Windows short names),
552
+ # use the file name or absolute path
553
+ try:
554
+ relative_path = file_path.relative_to(REPO_ROOT)
555
+ except ValueError:
556
+ # Can't compute relative path (e.g., Windows short name mismatch)
557
+ # Try to compute it manually by resolving both paths
558
+ try:
559
+ resolved_file = file_path.resolve()
560
+ resolved_repo = REPO_ROOT.resolve()
561
+ relative_path = resolved_file.relative_to(resolved_repo)
562
+ except (ValueError, OSError):
563
+ # Last resort: just use the file name
564
+ relative_path = file_path.name
565
+
566
+ # Format size
567
+ size = stat.st_size
568
+ if size < 1024:
569
+ size_str = f"{size}B"
570
+ elif size < 1024 * 1024:
571
+ size_str = f"{size / 1024:.1f}KB"
572
+ else:
573
+ size_str = f"{size / (1024 * 1024):.1f}MB"
574
+
575
+ # Format modification time
576
+ from datetime import datetime
577
+
578
+ mtime = datetime.fromtimestamp(stat.st_mtime).strftime("%Y-%m-%d %H:%M:%S")
579
+
580
+ # Detect file type
581
+ if _is_binary_file(file_path):
582
+ file_type = "binary"
583
+ else:
584
+ mime_type, _ = mimetypes.guess_type(str(file_path))
585
+ file_type = mime_type or "text"
586
+
587
+ results.append(f"{str(relative_path):<50} {size_str:>10} {mtime} {file_type}")
588
+
589
+ except Exception as e:
590
+ # Get relative path for error message (may fail if path is invalid)
591
+ try:
592
+ relative_path = file_path.relative_to(REPO_ROOT)
593
+ except Exception:
594
+ try:
595
+ resolved_file = file_path.resolve()
596
+ resolved_repo = REPO_ROOT.resolve()
597
+ relative_path = resolved_file.relative_to(resolved_repo)
598
+ except Exception:
599
+ relative_path = file_path.name
600
+ results.append(f"{str(relative_path):<50} ERROR: {e}")
601
+
602
+ header = f"{'Path':<50} {'Size':>10} {'Modified'} {'Type'}"
603
+ separator = "-" * 100
604
+
605
+ output = f"{header}\n{separator}\n" + "\n".join(results)
606
+ audit_logger.info(f"FILE_INFO: {path} - {len(files)} file(s)")
607
+ return output
608
+
609
+
610
+ def find_files(pattern: str, case_sensitive: bool = True) -> str:
611
+ """
612
+ Find files by name pattern (glob-style wildcards).
613
+
614
+ Args:
615
+ pattern: Glob pattern (e.g., '*.py', 'test_*.txt', 'src/**/*.js')
616
+ case_sensitive: Whether to match case-sensitively (default: True)
617
+
618
+ Returns:
619
+ List of matching file paths, one per line
620
+
621
+ Examples:
622
+ find_files("*.py") # All Python files in repo
623
+ find_files("test_*.py") # All test files
624
+ find_files("**/*.md") # All markdown files recursively
625
+ find_files("*.TXT", False) # All .txt files (case-insensitive)
626
+ """
627
+ _operation_limiter.check_limit(f"find_files({pattern})")
628
+
629
+ try:
630
+ # Use glob to find matching files
631
+ if case_sensitive:
632
+ matches = list(REPO_ROOT.glob(pattern))
633
+ else:
634
+ # Case-insensitive: just do case-insensitive glob matching
635
+ import fnmatch
636
+
637
+ matches = []
638
+ for file_path in REPO_ROOT.rglob("*"):
639
+ if file_path.is_file():
640
+ # Skip hidden files
641
+ relative_path = file_path.relative_to(REPO_ROOT)
642
+ if any(part.startswith(".") for part in relative_path.parts):
643
+ continue
644
+ # Check if matches pattern (case-insensitive)
645
+ if fnmatch.fnmatch(str(relative_path).lower(), pattern.lower()):
646
+ matches.append(file_path)
647
+
648
+ # Filter to only files (not directories) and exclude hidden
649
+ files = []
650
+ for match in matches:
651
+ if match.is_file():
652
+ relative_path = match.relative_to(REPO_ROOT)
653
+ # Skip hidden files/directories
654
+ if not any(part.startswith(".") for part in relative_path.parts):
655
+ files.append(str(relative_path))
656
+
657
+ if not files:
658
+ audit_logger.info(f"FIND_FILES: {pattern} - No matches")
659
+ return f"No files matching pattern: {pattern}"
660
+
661
+ # Sort for consistent output
662
+ files.sort()
663
+
664
+ header = f"Files matching '{pattern}' ({len(files)} found):"
665
+ separator = "-" * 100
666
+
667
+ audit_logger.info(f"FIND_FILES: {pattern} - {len(files)} file(s)")
668
+ return f"{header}\n{separator}\n" + "\n".join(files)
669
+
670
+ except Exception as e:
671
+ raise ValueError(f"Error finding files: {e}")
672
+
673
+
674
+ def tree(path: str = ".", max_depth: int = 3, show_hidden: bool = False) -> str:
675
+ """
676
+ Show directory tree structure.
677
+
678
+ Args:
679
+ path: Starting directory path (relative to repo or absolute)
680
+ max_depth: Maximum depth to traverse (default: 3, max: 10)
681
+ show_hidden: Include hidden files/directories (default: False)
682
+
683
+ Returns:
684
+ Visual tree structure of the directory
685
+
686
+ Example output:
687
+ .
688
+ ├── patchpal/
689
+ │ ├── __init__.py
690
+ │ ├── agent.py
691
+ │ └── tools.py
692
+ └── tests/
693
+ ├── test_agent.py
694
+ └── test_tools.py
695
+ """
696
+ _operation_limiter.check_limit(f"tree({path})")
697
+
698
+ # Limit max_depth
699
+ max_depth = min(max_depth, 10)
700
+
701
+ # Resolve path (handle both absolute and relative paths)
702
+ path_obj = Path(path)
703
+ if path_obj.is_absolute():
704
+ start_path = path_obj.resolve()
705
+ else:
706
+ start_path = (REPO_ROOT / path).resolve()
707
+
708
+ # Check if path exists and is a directory
709
+ if not start_path.exists():
710
+ raise ValueError(f"Path not found: {path}")
711
+
712
+ if not start_path.is_dir():
713
+ raise ValueError(f"Path is not a directory: {path}")
714
+
715
+ def _build_tree(dir_path: Path, prefix: str = "", depth: int = 0) -> list:
716
+ """Recursively build tree structure."""
717
+ if depth >= max_depth:
718
+ return []
719
+
720
+ try:
721
+ # Get all items in directory
722
+ items = sorted(dir_path.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower()))
723
+
724
+ # Filter hidden files if needed
725
+ if not show_hidden:
726
+ items = [item for item in items if not item.name.startswith(".")]
727
+
728
+ lines = []
729
+ for i, item in enumerate(items):
730
+ is_last = i == len(items) - 1
731
+
732
+ # Build the tree characters
733
+ connector = "└── " if is_last else "├── "
734
+ item_name = item.name + "/" if item.is_dir() else item.name
735
+
736
+ lines.append(f"{prefix}{connector}{item_name}")
737
+
738
+ # Recurse into directories
739
+ if item.is_dir():
740
+ extension = " " if is_last else "│ "
741
+ lines.extend(_build_tree(item, prefix + extension, depth + 1))
742
+
743
+ return lines
744
+
745
+ except PermissionError:
746
+ return [f"{prefix}[Permission Denied]"]
747
+
748
+ try:
749
+ # Build the tree
750
+ # Show relative path if inside repo, absolute path if outside
751
+ if _is_inside_repo(start_path):
752
+ display_path = (
753
+ start_path.relative_to(REPO_ROOT) if start_path != REPO_ROOT else Path(".")
754
+ )
755
+ else:
756
+ display_path = start_path
757
+
758
+ result = [str(display_path) + "/"]
759
+ result.extend(_build_tree(start_path))
760
+
761
+ audit_logger.info(f"TREE: {path} (depth={max_depth})")
762
+ return "\n".join(result)
763
+
764
+ except Exception as e:
765
+ raise ValueError(f"Error generating tree: {e}")
766
+
767
+
768
+ def list_skills() -> str:
769
+ """
770
+ List all available skills that can be invoked.
771
+
772
+ Skills are reusable workflows stored in:
773
+ - Personal: ~/.patchpal/skills/
774
+ - Project: <repo>/.patchpal/skills/
775
+
776
+ Returns:
777
+ Formatted list of available skills with names and descriptions
778
+ """
779
+ _operation_limiter.check_limit("list_skills()")
780
+
781
+ from patchpal.skills import list_skills as discover_all_skills
782
+
783
+ skills = discover_all_skills(repo_root=REPO_ROOT)
784
+
785
+ if not skills:
786
+ return """No skills found.
787
+
788
+ To get started:
789
+ 1. View examples: https://github.com/amaiya/patchpal/tree/main/examples/skills
790
+ 2. Copy examples to your personal skills directory:
791
+ mkdir -p ~/.patchpal/skills
792
+ # Download and copy the commit and review skills from the examples folder
793
+ 3. Or create your own skill in ~/.patchpal/skills/<skill-name>/SKILL.md
794
+
795
+ Skills are markdown files with YAML frontmatter. See the examples for the format."""
796
+
797
+ header = f"Available Skills ({len(skills)}):"
798
+ separator = "-" * 100
799
+
800
+ lines = [header, separator]
801
+ for skill in skills:
802
+ lines.append(f" /{skill.name}")
803
+ lines.append(f" {skill.description}")
804
+ lines.append("")
805
+
806
+ lines.append("How to invoke skills:")
807
+ lines.append(" - User types: /skill_name (e.g., /commit)")
808
+ lines.append(" - Or just ask naturally and the agent will discover the right skill")
809
+
810
+ audit_logger.info(f"LIST_SKILLS: {len(skills)} skill(s)")
811
+ return "\n".join(lines)
812
+
813
+
814
+ def use_skill(skill_name: str, args: str = "") -> str:
815
+ """
816
+ Invoke a skill with optional arguments.
817
+
818
+ Args:
819
+ skill_name: Name of the skill to invoke (without / prefix)
820
+ args: Optional arguments to pass to the skill
821
+
822
+ Returns:
823
+ The skill's instructions formatted with any provided arguments
824
+
825
+ Example:
826
+ use_skill("commit", args="Fix bug in auth")
827
+ """
828
+ _operation_limiter.check_limit(f"use_skill({skill_name})")
829
+
830
+ from patchpal.skills import get_skill
831
+
832
+ skill = get_skill(skill_name, repo_root=REPO_ROOT)
833
+
834
+ if not skill:
835
+ available_skills = list_skills()
836
+ return f"Skill not found: {skill_name}\n\n{available_skills}"
837
+
838
+ # Format the skill instructions with arguments if provided
839
+ instructions = skill.instructions
840
+ if args:
841
+ instructions = f"{instructions}\n\nArguments: {args}"
842
+
843
+ audit_logger.info(f"USE_SKILL: {skill_name} (args={args[:50]})")
844
+
845
+ return f"Skill: {skill.name}\n\n{instructions}"
846
+
847
+
848
+ def apply_patch(path: str, new_content: str) -> str:
849
+ """
850
+ Apply changes to a file by replacing its contents.
851
+
852
+ Args:
853
+ path: Relative path to the file from the repository root
854
+ new_content: The new complete content for the file
855
+
856
+ Returns:
857
+ A confirmation message with the unified diff
858
+
859
+ Raises:
860
+ ValueError: If in read-only mode or file is too large
861
+ """
862
+ _operation_limiter.check_limit(f"apply_patch({path})")
863
+
864
+ if READ_ONLY_MODE:
865
+ raise ValueError(
866
+ "Cannot modify files in read-only mode\n"
867
+ "Set PATCHPAL_READ_ONLY=false to allow modifications"
868
+ )
869
+
870
+ p = _check_path(path, must_exist=False)
871
+
872
+ # Check size of new content
873
+ new_size = len(new_content.encode("utf-8"))
874
+ if new_size > MAX_FILE_SIZE:
875
+ raise ValueError(f"New content too large: {new_size:,} bytes (max {MAX_FILE_SIZE:,} bytes)")
876
+
877
+ # Read old content if file exists (needed for diff in permission prompt)
878
+ old_content = ""
879
+ if p.exists():
880
+ old_content = p.read_text(encoding="utf-8", errors="replace")
881
+ old = old_content.splitlines(keepends=True)
882
+ else:
883
+ old = []
884
+
885
+ # Check permission with colored diff
886
+ permission_manager = _get_permission_manager()
887
+ operation = "Create" if not p.exists() else "Update"
888
+ diff_display = _format_colored_diff(old_content, new_content, file_path=path)
889
+
890
+ # Add warning if writing outside repository
891
+ outside_repo_warning = ""
892
+ if not _is_inside_repo(p):
893
+ outside_repo_warning = "\n ⚠️ WARNING: Writing file outside repository\n"
894
+
895
+ description = f" ● {operation}({path}){outside_repo_warning}\n{diff_display}"
896
+
897
+ if not permission_manager.request_permission("apply_patch", description, pattern=path):
898
+ return "Operation cancelled by user."
899
+
900
+ # Check git status for uncommitted changes (only for files inside repo)
901
+ git_status = _check_git_status()
902
+ git_warning = ""
903
+ if _is_inside_repo(p) and git_status.get("is_repo") and git_status.get("has_uncommitted"):
904
+ relative_path = str(p.relative_to(REPO_ROOT))
905
+ if any(relative_path in change for change in git_status.get("changes", [])):
906
+ git_warning = "\n⚠️ Note: File has uncommitted changes in git\n"
907
+
908
+ # Backup existing file
909
+ backup_path = None
910
+ if p.exists():
911
+ backup_path = _backup_file(p)
912
+
913
+ new = new_content.splitlines(keepends=True)
914
+
915
+ # Generate diff
916
+ diff = difflib.unified_diff(
917
+ old,
918
+ new,
919
+ fromfile=f"{path} (before)",
920
+ tofile=f"{path} (after)",
921
+ )
922
+ diff_str = "".join(diff)
923
+
924
+ # Check if critical file
925
+ warning = ""
926
+ if _is_critical_file(p):
927
+ warning = "\n⚠️ WARNING: Modifying critical infrastructure file!\n"
928
+
929
+ # Write the new content
930
+ p.parent.mkdir(parents=True, exist_ok=True)
931
+ p.write_text(new_content)
932
+
933
+ # Audit log
934
+ audit_logger.info(
935
+ f"WRITE: {path} ({new_size} bytes)" + (f" [BACKUP: {backup_path}]" if backup_path else "")
936
+ )
937
+
938
+ backup_msg = f"\n[Backup saved: {backup_path}]" if backup_path else ""
939
+
940
+ return f"Successfully updated {path}{warning}{git_warning}{backup_msg}\n\nDiff:\n{diff_str}"
941
+
942
+
943
+ def edit_file(path: str, old_string: str, new_string: str) -> str:
944
+ """
945
+ Edit a file by replacing an exact string match.
946
+
947
+ Args:
948
+ path: Relative path to the file from the repository root
949
+ old_string: The exact string to find and replace
950
+ new_string: The string to replace it with
951
+
952
+ Returns:
953
+ Confirmation message with the changes made
954
+
955
+ Raises:
956
+ ValueError: If file not found, old_string not found, or multiple matches
957
+ """
958
+ _operation_limiter.check_limit(f"edit_file({path[:30]}...)")
959
+
960
+ if READ_ONLY_MODE:
961
+ raise ValueError(
962
+ "Cannot edit files in read-only mode\n"
963
+ "Set PATCHPAL_READ_ONLY=false to allow modifications"
964
+ )
965
+
966
+ p = _check_path(path, must_exist=True)
967
+
968
+ # Read current content
969
+ try:
970
+ content = p.read_text(encoding="utf-8", errors="replace")
971
+ except Exception as e:
972
+ raise ValueError(f"Failed to read file: {e}")
973
+
974
+ # Check for old_string
975
+ if old_string not in content:
976
+ raise ValueError(
977
+ f"String not found in {path}:\n{old_string[:100]}"
978
+ + ("..." if len(old_string) > 100 else "")
979
+ )
980
+
981
+ # Count occurrences
982
+ count = content.count(old_string)
983
+ if count > 1:
984
+ raise ValueError(
985
+ f"String appears {count} times in {path}. "
986
+ f"Please provide a more specific string to ensure correct replacement.\n"
987
+ f"First occurrence context:\n{content[max(0, content.find(old_string) - 50) : content.find(old_string) + len(old_string) + 50]}"
988
+ )
989
+
990
+ # Check permission before proceeding
991
+ permission_manager = _get_permission_manager()
992
+
993
+ # Format colored diff for permission prompt
994
+ diff_display = _format_colored_diff(old_string, new_string, file_path=path)
995
+
996
+ # Add warning if writing outside repository
997
+ outside_repo_warning = ""
998
+ if not _is_inside_repo(p):
999
+ outside_repo_warning = "\n ⚠️ WARNING: Writing file outside repository\n"
1000
+
1001
+ description = f" ● Update({path}){outside_repo_warning}\n{diff_display}"
1002
+
1003
+ if not permission_manager.request_permission("edit_file", description, pattern=path):
1004
+ return "Operation cancelled by user."
1005
+
1006
+ # Backup if enabled
1007
+ backup_path = _backup_file(p)
1008
+
1009
+ # Perform replacement
1010
+ new_content = content.replace(old_string, new_string)
1011
+
1012
+ # Write the new content
1013
+ p.write_text(new_content)
1014
+
1015
+ # Generate diff for the specific change
1016
+ old_lines = old_string.split("\n")
1017
+ new_lines = new_string.split("\n")
1018
+ diff = difflib.unified_diff(old_lines, new_lines, fromfile="old", tofile="new", lineterm="")
1019
+ diff_str = "\n".join(diff)
1020
+
1021
+ audit_logger.info(f"EDIT: {path} ({len(old_string)} -> {len(new_string)} chars)")
1022
+
1023
+ backup_msg = f"\n[Backup saved: {backup_path}]" if backup_path else ""
1024
+ return f"Successfully edited {path}{backup_msg}\n\nChange:\n{diff_str}"
1025
+
1026
+
1027
+ def git_status() -> str:
1028
+ """
1029
+ Get the status of the git repository.
1030
+
1031
+ Returns:
1032
+ Formatted git status output showing modified, staged, and untracked files
1033
+
1034
+ Raises:
1035
+ ValueError: If not in a git repository or git command fails
1036
+ """
1037
+ _operation_limiter.check_limit("git_status()")
1038
+
1039
+ try:
1040
+ # Check if we're in a git repo
1041
+ result = subprocess.run(
1042
+ ["git", "rev-parse", "--git-dir"],
1043
+ capture_output=True,
1044
+ text=True,
1045
+ cwd=REPO_ROOT,
1046
+ timeout=5,
1047
+ )
1048
+ if result.returncode != 0:
1049
+ return "Not a git repository"
1050
+
1051
+ # Get status with short format
1052
+ result = subprocess.run(
1053
+ ["git", "status", "--short", "--branch"],
1054
+ capture_output=True,
1055
+ text=True,
1056
+ cwd=REPO_ROOT,
1057
+ timeout=10,
1058
+ )
1059
+
1060
+ if result.returncode != 0:
1061
+ raise ValueError(f"Git status failed: {result.stderr}")
1062
+
1063
+ output = result.stdout.strip()
1064
+ if not output:
1065
+ return "Git repository: No changes (working tree clean)"
1066
+
1067
+ audit_logger.info("GIT_STATUS: executed")
1068
+ return f"Git status:\n{output}"
1069
+
1070
+ except subprocess.TimeoutExpired:
1071
+ raise ValueError("Git status timed out")
1072
+ except FileNotFoundError:
1073
+ raise ValueError("Git command not found. Is git installed?")
1074
+ except Exception as e:
1075
+ raise ValueError(f"Git status error: {e}")
1076
+
1077
+
1078
+ def git_diff(path: Optional[str] = None, staged: bool = False) -> str:
1079
+ """
1080
+ Get the git diff for the repository or a specific file.
1081
+
1082
+ Args:
1083
+ path: Optional path to a specific file (relative to repo root)
1084
+ staged: If True, show staged changes (--cached), else show unstaged changes
1085
+
1086
+ Returns:
1087
+ Git diff output
1088
+
1089
+ Raises:
1090
+ ValueError: If not in a git repository or git command fails
1091
+ """
1092
+ _operation_limiter.check_limit(f"git_diff({path or 'all'})")
1093
+
1094
+ try:
1095
+ # Check if we're in a git repo
1096
+ result = subprocess.run(
1097
+ ["git", "rev-parse", "--git-dir"],
1098
+ capture_output=True,
1099
+ text=True,
1100
+ cwd=REPO_ROOT,
1101
+ timeout=5,
1102
+ )
1103
+ if result.returncode != 0:
1104
+ return "Not a git repository"
1105
+
1106
+ # Build git diff command
1107
+ cmd = ["git", "diff"]
1108
+ if staged:
1109
+ cmd.append("--cached")
1110
+
1111
+ if path:
1112
+ # Validate path
1113
+ p = _check_path(path, must_exist=False)
1114
+ # Git operations only work on repository files
1115
+ if not _is_inside_repo(p):
1116
+ raise ValueError(
1117
+ f"Git operations only work on repository files. Path {path} is outside the repository."
1118
+ )
1119
+ cmd.append(str(p.relative_to(REPO_ROOT)))
1120
+
1121
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=REPO_ROOT, timeout=30)
1122
+
1123
+ if result.returncode != 0:
1124
+ raise ValueError(f"Git diff failed: {result.stderr}")
1125
+
1126
+ output = result.stdout.strip()
1127
+ if not output:
1128
+ stage_msg = "staged " if staged else ""
1129
+ path_msg = f" for {path}" if path else ""
1130
+ return f"No {stage_msg}changes{path_msg}"
1131
+
1132
+ audit_logger.info(f"GIT_DIFF: {path or 'all'} (staged={staged})")
1133
+ return output
1134
+
1135
+ except subprocess.TimeoutExpired:
1136
+ raise ValueError("Git diff timed out")
1137
+ except FileNotFoundError:
1138
+ raise ValueError("Git command not found. Is git installed?")
1139
+ except Exception as e:
1140
+ raise ValueError(f"Git diff error: {e}")
1141
+
1142
+
1143
+ def git_log(max_count: int = 10, path: Optional[str] = None) -> str:
1144
+ """
1145
+ Get the git commit history.
1146
+
1147
+ Args:
1148
+ max_count: Maximum number of commits to show (default: 10, max: 50)
1149
+ path: Optional path to show history for a specific file
1150
+
1151
+ Returns:
1152
+ Formatted git log output
1153
+
1154
+ Raises:
1155
+ ValueError: If not in a git repository or git command fails
1156
+ """
1157
+ _operation_limiter.check_limit(f"git_log({max_count})")
1158
+
1159
+ # Limit max_count
1160
+ max_count = min(max_count, 50)
1161
+
1162
+ try:
1163
+ # Check if we're in a git repo
1164
+ result = subprocess.run(
1165
+ ["git", "rev-parse", "--git-dir"],
1166
+ capture_output=True,
1167
+ text=True,
1168
+ cwd=REPO_ROOT,
1169
+ timeout=5,
1170
+ )
1171
+ if result.returncode != 0:
1172
+ return "Not a git repository"
1173
+
1174
+ # Build git log command with formatting
1175
+ cmd = [
1176
+ "git",
1177
+ "log",
1178
+ f"-{max_count}",
1179
+ "--pretty=format:%h - %an, %ar : %s",
1180
+ "--abbrev-commit",
1181
+ ]
1182
+
1183
+ if path:
1184
+ # Validate path
1185
+ p = _check_path(path, must_exist=False)
1186
+ # Git operations only work on repository files
1187
+ if not _is_inside_repo(p):
1188
+ raise ValueError(
1189
+ f"Git operations only work on repository files. Path {path} is outside the repository."
1190
+ )
1191
+ cmd.append("--")
1192
+ cmd.append(str(p.relative_to(REPO_ROOT)))
1193
+
1194
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=REPO_ROOT, timeout=30)
1195
+
1196
+ if result.returncode != 0:
1197
+ raise ValueError(f"Git log failed: {result.stderr}")
1198
+
1199
+ output = result.stdout.strip()
1200
+ if not output:
1201
+ return "No commits found"
1202
+
1203
+ audit_logger.info(f"GIT_LOG: {max_count} commits" + (f" for {path}" if path else ""))
1204
+ return f"Recent commits:\n{output}"
1205
+
1206
+ except subprocess.TimeoutExpired:
1207
+ raise ValueError("Git log timed out")
1208
+ except FileNotFoundError:
1209
+ raise ValueError("Git command not found. Is git installed?")
1210
+ except Exception as e:
1211
+ raise ValueError(f"Git log error: {e}")
1212
+
1213
+
1214
+ def grep_code(
1215
+ pattern: str,
1216
+ file_glob: Optional[str] = None,
1217
+ case_sensitive: bool = True,
1218
+ max_results: int = 100,
1219
+ ) -> str:
1220
+ """
1221
+ Search for a pattern in repository files using grep.
1222
+
1223
+ Args:
1224
+ pattern: Regular expression pattern to search for
1225
+ file_glob: Optional glob pattern to filter files (e.g., "*.py", "src/**/*.js")
1226
+ case_sensitive: Whether the search should be case-sensitive (default: True)
1227
+ max_results: Maximum number of results to return (default: 100)
1228
+
1229
+ Returns:
1230
+ Search results in format "file:line:content" or a message if no results found
1231
+
1232
+ Raises:
1233
+ ValueError: If pattern is invalid or search fails
1234
+ """
1235
+ _operation_limiter.check_limit(f"grep_code({pattern[:30]}...)")
1236
+
1237
+ # Try ripgrep first (faster), fall back to grep
1238
+ use_rg = shutil.which("rg") is not None
1239
+
1240
+ try:
1241
+ if use_rg:
1242
+ # Build ripgrep command
1243
+ cmd = [
1244
+ "rg",
1245
+ "--no-heading", # Don't group by file
1246
+ "--line-number", # Show line numbers
1247
+ "--color",
1248
+ "never", # No color codes
1249
+ "--max-count",
1250
+ str(max_results), # Limit results per file
1251
+ ]
1252
+
1253
+ if not case_sensitive:
1254
+ cmd.append("--ignore-case")
1255
+
1256
+ # Add glob pattern if provided
1257
+ if file_glob:
1258
+ cmd.extend(["--glob", file_glob])
1259
+
1260
+ # Add the search pattern
1261
+ cmd.append(pattern)
1262
+
1263
+ else:
1264
+ # Fall back to grep
1265
+ cmd = [
1266
+ "grep",
1267
+ "--recursive",
1268
+ "--line-number",
1269
+ "--binary-files=without-match", # Skip binary files
1270
+ ]
1271
+
1272
+ if not case_sensitive:
1273
+ cmd.append("--ignore-case")
1274
+
1275
+ # Add pattern
1276
+ cmd.extend(["--regexp", pattern])
1277
+
1278
+ # Add file glob if provided (grep uses --include)
1279
+ if file_glob:
1280
+ cmd.extend(["--include", file_glob])
1281
+
1282
+ # Current directory (will be executed with cwd=REPO_ROOT)
1283
+ cmd.append(".")
1284
+
1285
+ # Execute search from repository root
1286
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=30, cwd=REPO_ROOT)
1287
+
1288
+ # ripgrep/grep return exit code 1 when no matches found (not an error)
1289
+ # exit code 0 = matches found
1290
+ # exit code 1 = no matches
1291
+ # exit code 2+ = actual error
1292
+
1293
+ if result.returncode > 1:
1294
+ # Actual error occurred
1295
+ raise ValueError(f"Search failed: {result.stderr or 'Unknown error'}")
1296
+
1297
+ # Process output
1298
+ output = result.stdout.strip()
1299
+
1300
+ if not output or result.returncode == 1:
1301
+ audit_logger.info(f"GREP: {pattern} - No matches found")
1302
+ return f"No matches found for pattern: {pattern}"
1303
+
1304
+ # Count and limit results
1305
+ lines = output.split("\n")
1306
+ total_matches = len(lines)
1307
+
1308
+ if total_matches > max_results:
1309
+ lines = lines[:max_results]
1310
+ output = "\n".join(lines)
1311
+ output += f"\n\n... (showing first {max_results} of {total_matches} matches)"
1312
+
1313
+ audit_logger.info(f"GREP: {pattern} - Found {total_matches} matches")
1314
+ return output
1315
+
1316
+ except subprocess.TimeoutExpired:
1317
+ raise ValueError(
1318
+ "Search timed out after 30 seconds\n"
1319
+ "Try narrowing your search with a file_glob parameter"
1320
+ )
1321
+ except ValueError:
1322
+ # Re-raise ValueError (from our checks above)
1323
+ raise
1324
+ except Exception as e:
1325
+ raise ValueError(f"Search error: {e}")
1326
+
1327
+
1328
+ def web_fetch(url: str, extract_text: bool = True) -> str:
1329
+ """
1330
+ Fetch content from a URL and optionally extract readable text.
1331
+
1332
+ Args:
1333
+ url: The URL to fetch
1334
+ extract_text: If True, extract readable text from HTML (default: True)
1335
+
1336
+ Returns:
1337
+ The fetched content (text extracted from HTML if extract_text=True)
1338
+
1339
+ Raises:
1340
+ ValueError: If request fails or content is too large
1341
+ """
1342
+ _operation_limiter.check_limit(f"web_fetch({url[:50]}...)")
1343
+
1344
+ # Validate URL format
1345
+ if not url.startswith(("http://", "https://")):
1346
+ raise ValueError("URL must start with http:// or https://")
1347
+
1348
+ try:
1349
+ # Make request with timeout
1350
+ response = requests.get(
1351
+ url,
1352
+ timeout=WEB_REQUEST_TIMEOUT,
1353
+ headers={"User-Agent": WEB_USER_AGENT},
1354
+ stream=True, # Stream to check size first
1355
+ )
1356
+ response.raise_for_status()
1357
+
1358
+ # Check content size
1359
+ content_length = response.headers.get("Content-Length")
1360
+ if content_length and int(content_length) > MAX_WEB_CONTENT_SIZE:
1361
+ raise ValueError(
1362
+ f"Content too large: {int(content_length):,} bytes "
1363
+ f"(max {MAX_WEB_CONTENT_SIZE:,} bytes)"
1364
+ )
1365
+
1366
+ # Read content with size limit
1367
+ content = b""
1368
+ for chunk in response.iter_content(chunk_size=8192):
1369
+ content += chunk
1370
+ if len(content) > MAX_WEB_CONTENT_SIZE:
1371
+ raise ValueError(f"Content exceeds size limit ({MAX_WEB_CONTENT_SIZE:,} bytes)")
1372
+
1373
+ # Decode content
1374
+ text_content = content.decode(response.encoding or "utf-8", errors="replace")
1375
+
1376
+ # Extract readable text from HTML if requested
1377
+ if extract_text and "html" in response.headers.get("Content-Type", "").lower():
1378
+ soup = BeautifulSoup(text_content, "html.parser")
1379
+
1380
+ # Remove script and style elements
1381
+ for element in soup(["script", "style", "nav", "footer", "header"]):
1382
+ element.decompose()
1383
+
1384
+ # Get text
1385
+ text = soup.get_text()
1386
+
1387
+ # Clean up whitespace
1388
+ lines = (line.strip() for line in text.splitlines())
1389
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
1390
+ text_content = "\n".join(chunk for chunk in chunks if chunk)
1391
+
1392
+ # Truncate content if it exceeds character limit to prevent context window overflow
1393
+ if len(text_content) > MAX_WEB_CONTENT_CHARS:
1394
+ truncated_content = text_content[:MAX_WEB_CONTENT_CHARS]
1395
+ warning_msg = (
1396
+ f"\n\n[WARNING: Content truncated from {len(text_content):,} to "
1397
+ f"{MAX_WEB_CONTENT_CHARS:,} characters to prevent context window overflow. "
1398
+ f"Set PATCHPAL_MAX_WEB_CHARS environment variable to adjust limit.]"
1399
+ )
1400
+ audit_logger.info(
1401
+ f"WEB_FETCH: {url} ({len(text_content)} chars, truncated to {MAX_WEB_CONTENT_CHARS})"
1402
+ )
1403
+ return truncated_content + warning_msg
1404
+
1405
+ audit_logger.info(f"WEB_FETCH: {url} ({len(text_content)} chars)")
1406
+ return text_content
1407
+
1408
+ except requests.Timeout:
1409
+ raise ValueError(f"Request timed out after {WEB_REQUEST_TIMEOUT} seconds")
1410
+ except requests.RequestException as e:
1411
+ raise ValueError(f"Failed to fetch URL: {e}")
1412
+ except Exception as e:
1413
+ raise ValueError(f"Error processing content: {e}")
1414
+
1415
+
1416
+ def web_search(query: str, max_results: int = 5) -> str:
1417
+ """
1418
+ Search the web using DuckDuckGo and return results.
1419
+
1420
+ Args:
1421
+ query: The search query
1422
+ max_results: Maximum number of results to return (default: 5, max: 10)
1423
+
1424
+ Returns:
1425
+ Formatted search results with titles, URLs, and snippets
1426
+
1427
+ Raises:
1428
+ ValueError: If search fails
1429
+ """
1430
+ _operation_limiter.check_limit(f"web_search({query[:30]}...)")
1431
+
1432
+ # Limit max_results
1433
+ max_results = min(max_results, 10)
1434
+
1435
+ try:
1436
+ # Perform search using DuckDuckGo
1437
+ with DDGS() as ddgs:
1438
+ results = list(ddgs.text(query, max_results=max_results))
1439
+
1440
+ if not results:
1441
+ audit_logger.info(f"WEB_SEARCH: {query} - No results")
1442
+ return f"No search results found for: {query}"
1443
+
1444
+ # Format results
1445
+ formatted_results = [f"Search results for: {query}\n"]
1446
+ for i, result in enumerate(results, 1):
1447
+ title = result.get("title", "No title")
1448
+ url = result.get("href", "No URL")
1449
+ snippet = result.get("body", "No description")
1450
+
1451
+ formatted_results.append(f"\n{i}. {title}\n URL: {url}\n {snippet}")
1452
+
1453
+ output = "\n".join(formatted_results)
1454
+ audit_logger.info(f"WEB_SEARCH: {query} - Found {len(results)} results")
1455
+ return output
1456
+
1457
+ except Exception as e:
1458
+ error_msg = str(e)
1459
+
1460
+ # Provide helpful error messages for common issues
1461
+ if "CERTIFICATE_VERIFY_FAILED" in error_msg or "TLS handshake failed" in error_msg:
1462
+ return (
1463
+ "Web search unavailable: SSL certificate verification failed.\n"
1464
+ "This may be due to:\n"
1465
+ "- Corporate proxy/firewall blocking requests\n"
1466
+ "- Network configuration issues\n"
1467
+ "- VPN interference\n\n"
1468
+ "Consider using web_fetch with a specific URL if you have one."
1469
+ )
1470
+ elif "RuntimeError" in error_msg or "error sending request" in error_msg:
1471
+ return (
1472
+ "Web search unavailable: Network connection failed.\n"
1473
+ "Please check your internet connection and try again."
1474
+ )
1475
+ else:
1476
+ raise ValueError(f"Web search failed: {e}")
1477
+
1478
+
1479
+ def run_shell(cmd: str) -> str:
1480
+ """
1481
+ Run a safe shell command in the repository.
1482
+
1483
+ Args:
1484
+ cmd: The shell command to execute
1485
+
1486
+ Returns:
1487
+ Combined stdout and stderr output
1488
+
1489
+ Raises:
1490
+ ValueError: If command contains forbidden operations
1491
+ """
1492
+ # Check permission before proceeding
1493
+ permission_manager = _get_permission_manager()
1494
+ description = f" {cmd}"
1495
+ pattern = cmd.split()[0] if cmd.split() else None
1496
+ if not permission_manager.request_permission("run_shell", description, pattern=pattern):
1497
+ return "Operation cancelled by user."
1498
+
1499
+ _operation_limiter.check_limit(f"run_shell({cmd[:50]}...)")
1500
+
1501
+ # Basic token-based blocking
1502
+ if any(tok in FORBIDDEN for tok in cmd.split()):
1503
+ raise ValueError(
1504
+ f"Blocked dangerous command: {cmd}\nForbidden operations: {', '.join(FORBIDDEN)}"
1505
+ )
1506
+
1507
+ # Additional pattern-based blocking
1508
+ dangerous_patterns = [
1509
+ "> /dev/", # Writing to devices
1510
+ "rm -rf /", # Recursive delete
1511
+ "| dd", # Piping to dd
1512
+ "--force", # Force flags often dangerous
1513
+ ]
1514
+
1515
+ for pattern in dangerous_patterns:
1516
+ if pattern in cmd:
1517
+ raise ValueError(f"Blocked dangerous pattern in command: {pattern}")
1518
+
1519
+ audit_logger.info(f"SHELL: {cmd}")
1520
+
1521
+ result = subprocess.run(
1522
+ cmd,
1523
+ shell=True,
1524
+ capture_output=True,
1525
+ cwd=REPO_ROOT,
1526
+ timeout=SHELL_TIMEOUT,
1527
+ )
1528
+
1529
+ # Decode output with error handling for problematic characters
1530
+ # Use utf-8 on all platforms with 'replace' to handle encoding issues
1531
+ stdout = result.stdout.decode("utf-8", errors="replace") if result.stdout else ""
1532
+ stderr = result.stderr.decode("utf-8", errors="replace") if result.stderr else ""
1533
+
1534
+ return stdout + stderr