crackerjack 0.39.11__py3-none-any.whl → 0.40.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crackerjack might be problematic. Click here for more details.

@@ -0,0 +1,520 @@
1
+ """Safe file modification service with comprehensive security checks.
2
+
3
+ This module provides SafeFileModifier, a service for safely modifying files
4
+ with automatic backups, validation, and atomic operations.
5
+
6
+ Security Features:
7
+ - Symlink detection and blocking (both direct and in path chain)
8
+ - Path traversal prevention (must be within project)
9
+ - File size limits (10MB default, configurable)
10
+ - Forbidden file patterns (.env, .git/*, *.key, etc.)
11
+ - Atomic write operations using tempfile
12
+ - Permission preservation
13
+ - Automatic rollback on errors
14
+
15
+ Features:
16
+ - Automatic backup creation with timestamps
17
+ - Diff generation for review
18
+ - Dry-run mode for previewing changes
19
+ - Rollback on errors
20
+ - Validation of file existence and permissions
21
+ """
22
+
23
+ import difflib
24
+ import os
25
+ import shutil
26
+ import tempfile
27
+ import typing as t
28
+ from contextlib import suppress
29
+ from datetime import datetime
30
+ from pathlib import Path
31
+
32
+ from loguru import logger
33
+
34
+
35
+ class SafeFileModifier:
36
+ """Safely modify files with backups and validation.
37
+
38
+ Features:
39
+ - Automatic backup creation with timestamps
40
+ - Diff generation for review
41
+ - Dry-run mode for previewing changes
42
+ - Rollback on errors
43
+ - Validation of file existence and permissions
44
+ - Atomic file operations to prevent partial writes
45
+ - Symlink protection to prevent following malicious links
46
+ - File size limits to prevent DoS attacks
47
+
48
+ Security:
49
+ - All file operations use atomic writes (write to temp, then rename)
50
+ - Symlinks are detected and blocked (both direct and in path chain)
51
+ - Path traversal attacks are prevented
52
+ - File size limits enforced
53
+ - Forbidden file patterns blocked (.env, .git/*, etc.)
54
+
55
+ Example:
56
+ >>> modifier = SafeFileModifier()
57
+ >>> result = await modifier.apply_fix(
58
+ ... file_path="myfile.py",
59
+ ... fixed_content="print('hello')",
60
+ ... dry_run=False,
61
+ ... )
62
+ >>> if result["success"]:
63
+ ... print(f"Applied fix, backup at: {result['backup_path']}")
64
+ """
65
+
66
+ # Forbidden file patterns for security
67
+ FORBIDDEN_PATTERNS = [
68
+ ".env*", # .env, .env.local, .env.production, .env.test, etc.
69
+ ".git/*",
70
+ "*.key",
71
+ "*.pem",
72
+ "*.crt",
73
+ "*_rsa",
74
+ "*_dsa",
75
+ "*_ed25519",
76
+ "*.p12",
77
+ "*.pfx",
78
+ "id_rsa*",
79
+ "*.secret",
80
+ "secrets.*",
81
+ ".ssh/*",
82
+ ]
83
+
84
+ def __init__(
85
+ self,
86
+ backup_dir: Path | None = None,
87
+ max_file_size: int = 10_485_760, # 10MB default
88
+ ):
89
+ """Initialize SafeFileModifier.
90
+
91
+ Args:
92
+ backup_dir: Directory for backups (default: .backups)
93
+ max_file_size: Maximum file size in bytes (default: 10MB)
94
+ """
95
+ self._backup_dir = backup_dir or Path(".backups")
96
+ self._max_file_size = max_file_size
97
+ self._ensure_backup_dir()
98
+
99
+ def _ensure_backup_dir(self) -> None:
100
+ """Create backup directory if it doesn't exist."""
101
+ if not self._backup_dir.exists():
102
+ self._backup_dir.mkdir(parents=True, exist_ok=True)
103
+ logger.debug(f"Created backup directory: {self._backup_dir}")
104
+
105
+ async def apply_fix(
106
+ self,
107
+ file_path: str,
108
+ fixed_content: str,
109
+ dry_run: bool = False,
110
+ create_backup: bool = True,
111
+ ) -> dict[str, t.Any]:
112
+ """Apply code fix with safety checks.
113
+
114
+ This is the main public API for applying fixes to files.
115
+ Performs comprehensive validation, creates backups, and applies
116
+ changes atomically.
117
+
118
+ Args:
119
+ file_path: Path to file to modify
120
+ fixed_content: New content to write
121
+ dry_run: If True, only generate diff without modifying
122
+ create_backup: If True, create backup before modifying
123
+
124
+ Returns:
125
+ Dictionary with keys:
126
+ - success: bool - Whether operation succeeded
127
+ - diff: str - Unified diff of changes
128
+ - backup_path: str | None - Path to backup file
129
+ - dry_run: bool - Whether this was a dry run
130
+ - message: str - Human-readable message
131
+ - error: str - Error message if failed
132
+
133
+ Example:
134
+ >>> result = await modifier.apply_fix(
135
+ ... "test.py", "print('fixed')", dry_run=True
136
+ ... )
137
+ >>> print(result["diff"])
138
+ """
139
+ return await self._apply_fix(file_path, fixed_content, dry_run, create_backup)
140
+
141
+ async def _apply_fix(
142
+ self,
143
+ file_path: str,
144
+ fixed_content: str,
145
+ dry_run: bool,
146
+ create_backup: bool,
147
+ ) -> dict[str, str | bool | None]:
148
+ """Internal implementation of fix application with atomic writes.
149
+
150
+ Security features:
151
+ 1. Validates file path (symlinks, traversal, size)
152
+ 2. Validates content size
153
+ 3. Creates backup before modification
154
+ 4. Uses atomic write (temp file + rename)
155
+ 5. Preserves permissions
156
+ 6. Rollback on errors
157
+
158
+ Args:
159
+ file_path: Path to file to modify
160
+ fixed_content: New content to write
161
+ dry_run: If True, only generate diff
162
+ create_backup: If True, create backup
163
+
164
+ Returns:
165
+ Result dictionary with success status and details
166
+ """
167
+ path = Path(file_path)
168
+
169
+ # Validation
170
+ result = self._validate_fix_inputs(path, fixed_content)
171
+ if not result["success"]:
172
+ return result
173
+
174
+ # Read original content
175
+ result = self._read_original_content(path)
176
+ if not result["success"]:
177
+ return result
178
+ original_content = result["content"]
179
+
180
+ # Generate diff
181
+ diff = self._generate_diff(original_content, fixed_content, file_path)
182
+
183
+ # Dry-run mode - just return diff
184
+ if dry_run:
185
+ return self._create_dry_run_result(diff)
186
+
187
+ # Create backup if requested
188
+ result = self._handle_backup(path, original_content, create_backup, diff)
189
+ if not result["success"]:
190
+ return result
191
+ backup_path = result.get("backup_path")
192
+
193
+ # Apply the fix atomically
194
+ return self._atomic_write_fix(path, fixed_content, diff, backup_path, file_path)
195
+
196
+ def _validate_fix_inputs(
197
+ self, path: Path, fixed_content: str
198
+ ) -> dict[str, str | bool | None]:
199
+ """Validate file path and content size."""
200
+ validation_result = self._validate_file_path(path)
201
+ if not validation_result["valid"]:
202
+ return {
203
+ "success": False,
204
+ "error": validation_result["error"],
205
+ "diff": "",
206
+ "backup_path": None,
207
+ }
208
+
209
+ if len(fixed_content) > self._max_file_size:
210
+ return {
211
+ "success": False,
212
+ "error": f"Content size {len(fixed_content)} exceeds limit {self._max_file_size}",
213
+ "diff": "",
214
+ "backup_path": None,
215
+ }
216
+
217
+ return {"success": True}
218
+
219
+ def _read_original_content(self, path: Path) -> dict[str, str | bool | None]:
220
+ """Read original file content with error handling."""
221
+ try:
222
+ original_content = path.read_text(encoding="utf-8")
223
+ return {"success": True, "content": original_content}
224
+ except UnicodeDecodeError:
225
+ return {
226
+ "success": False,
227
+ "error": f"File is not valid UTF-8: {path}",
228
+ "diff": "",
229
+ "backup_path": None,
230
+ }
231
+ except Exception as e:
232
+ return {
233
+ "success": False,
234
+ "error": f"Failed to read file: {e}",
235
+ "diff": "",
236
+ "backup_path": None,
237
+ }
238
+
239
+ def _create_dry_run_result(self, diff: str) -> dict[str, str | bool | None]:
240
+ """Create result dictionary for dry-run mode."""
241
+ return {
242
+ "success": True,
243
+ "diff": diff,
244
+ "backup_path": None,
245
+ "dry_run": True,
246
+ "message": "Dry-run: Changes not applied",
247
+ }
248
+
249
+ def _handle_backup(
250
+ self, path: Path, original_content: str, create_backup: bool, diff: str
251
+ ) -> dict[str, str | bool | Path | None]:
252
+ """Create backup if requested."""
253
+ if not create_backup:
254
+ return {"success": True, "backup_path": None}
255
+
256
+ try:
257
+ backup_path = self._create_backup(path, original_content)
258
+ return {"success": True, "backup_path": backup_path}
259
+ except Exception as e:
260
+ logger.error(f"Failed to create backup: {e}")
261
+ return {
262
+ "success": False,
263
+ "error": f"Backup creation failed: {e}",
264
+ "diff": diff,
265
+ "backup_path": None,
266
+ }
267
+
268
+ def _atomic_write_fix(
269
+ self,
270
+ path: Path,
271
+ fixed_content: str,
272
+ diff: str,
273
+ backup_path: Path | None,
274
+ file_path: str,
275
+ ) -> dict[str, str | bool | None]:
276
+ """Write fix atomically with rollback on error."""
277
+ try:
278
+ temp_fd, temp_path_str = tempfile.mkstemp(
279
+ dir=path.parent,
280
+ prefix=f".{path.name}.",
281
+ suffix=".tmp",
282
+ )
283
+
284
+ try:
285
+ with os.fdopen(temp_fd, "w", encoding="utf-8") as f:
286
+ f.write(fixed_content)
287
+ f.flush()
288
+ os.fsync(f.fileno())
289
+
290
+ original_stat = path.stat()
291
+ os.chmod(temp_path_str, original_stat.st_mode)
292
+ shutil.move(temp_path_str, path)
293
+
294
+ logger.info(f"Successfully applied fix to {file_path}")
295
+
296
+ return {
297
+ "success": True,
298
+ "diff": diff,
299
+ "backup_path": str(backup_path) if backup_path else None,
300
+ "dry_run": False,
301
+ "message": f"Fix applied successfully to {file_path}",
302
+ }
303
+
304
+ except Exception:
305
+ with suppress(Exception):
306
+ Path(temp_path_str).unlink()
307
+ raise
308
+
309
+ except Exception as e:
310
+ if backup_path:
311
+ logger.warning(f"Fix failed, restoring from backup: {e}")
312
+ try:
313
+ self._restore_backup(path, backup_path)
314
+ except Exception as restore_error:
315
+ logger.error(f"Rollback failed: {restore_error}")
316
+ return {
317
+ "success": False,
318
+ "error": f"Failed to write file AND rollback failed: {e} (rollback: {restore_error})",
319
+ "diff": diff,
320
+ "backup_path": str(backup_path) if backup_path else None,
321
+ }
322
+
323
+ return {
324
+ "success": False,
325
+ "error": f"Failed to write file: {e}",
326
+ "diff": diff,
327
+ "backup_path": str(backup_path) if backup_path else None,
328
+ }
329
+
330
+ def _validate_file_path(self, path: Path) -> dict[str, bool | str]:
331
+ """Validate file path before modification with comprehensive security checks.
332
+
333
+ Security checks:
334
+ 1. File existence and type validation
335
+ 2. Symlink detection (blocks symlinks to prevent malicious redirects)
336
+ 3. Path traversal prevention (must be within project)
337
+ 4. File size limits
338
+ 5. Permission checks
339
+ 6. Forbidden file pattern checks
340
+ 7. Path chain symlink validation
341
+
342
+ Args:
343
+ path: Path to validate
344
+
345
+ Returns:
346
+ Dictionary with:
347
+ - valid: bool - Whether path is valid
348
+ - error: str - Error message if invalid
349
+
350
+ Example:
351
+ >>> result = modifier._validate_file_path(Path("test.py"))
352
+ >>> if not result["valid"]:
353
+ ... print(result["error"])
354
+ """
355
+ # Must exist
356
+ if not path.exists():
357
+ return {
358
+ "valid": False,
359
+ "error": f"File does not exist: {path}",
360
+ }
361
+
362
+ # SECURITY: Block symlinks to prevent following malicious links
363
+ if path.is_symlink():
364
+ return {
365
+ "valid": False,
366
+ "error": f"Symlinks are not allowed for security reasons: {path}",
367
+ }
368
+
369
+ # Must be a file (not directory)
370
+ if not path.is_file():
371
+ return {
372
+ "valid": False,
373
+ "error": f"Path is not a file: {path}",
374
+ }
375
+
376
+ # SECURITY: Check forbidden file patterns
377
+ file_str = str(path)
378
+ from fnmatch import fnmatch
379
+
380
+ for pattern in self.FORBIDDEN_PATTERNS:
381
+ if fnmatch(file_str, pattern) or fnmatch(path.name, pattern):
382
+ return {
383
+ "valid": False,
384
+ "error": f"File matches forbidden pattern '{pattern}': {path}",
385
+ }
386
+
387
+ # SECURITY: Check file size before processing
388
+ try:
389
+ file_size = path.stat().st_size
390
+ if file_size > self._max_file_size:
391
+ return {
392
+ "valid": False,
393
+ "error": f"File size {file_size} exceeds limit {self._max_file_size}",
394
+ }
395
+ except Exception as e:
396
+ return {
397
+ "valid": False,
398
+ "error": f"Failed to check file size: {e}",
399
+ }
400
+
401
+ # Must be writable
402
+ if not os.access(path, os.W_OK):
403
+ return {
404
+ "valid": False,
405
+ "error": f"File is not writable: {path}",
406
+ }
407
+
408
+ # SECURITY: Prevent path traversal attacks
409
+ try:
410
+ resolved_path = path.resolve()
411
+ project_root = Path.cwd().resolve()
412
+
413
+ # Ensure the resolved path is within the project directory
414
+ resolved_path.relative_to(project_root)
415
+
416
+ except ValueError:
417
+ return {
418
+ "valid": False,
419
+ "error": f"File path outside project directory: {path}",
420
+ }
421
+
422
+ # SECURITY: Additional check - ensure no symlinks in the path chain
423
+ current = path
424
+ while current != current.parent:
425
+ if current.is_symlink():
426
+ return {
427
+ "valid": False,
428
+ "error": f"Symlink in path chain not allowed: {current}",
429
+ }
430
+ current = current.parent
431
+
432
+ return {"valid": True, "error": ""}
433
+
434
+ def _create_backup(
435
+ self,
436
+ file_path: Path,
437
+ content: str,
438
+ ) -> Path:
439
+ """Create timestamped backup file.
440
+
441
+ Backup naming: .backups/<filename>_<timestamp>.bak
442
+ Example: .backups/myfile.py_20250103_143022.bak
443
+
444
+ Args:
445
+ file_path: Original file path
446
+ content: Content to backup
447
+
448
+ Returns:
449
+ Path to backup file
450
+
451
+ Raises:
452
+ IOError: If backup creation fails
453
+ """
454
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
455
+ backup_name = f"{file_path.name}_{timestamp}.bak"
456
+ backup_path = self._backup_dir / backup_name
457
+
458
+ # Write backup
459
+ backup_path.write_text(content, encoding="utf-8")
460
+
461
+ logger.debug(f"Created backup: {backup_path}")
462
+
463
+ return backup_path
464
+
465
+ def _restore_backup(
466
+ self,
467
+ file_path: Path,
468
+ backup_path: Path,
469
+ ) -> None:
470
+ """Restore file from backup.
471
+
472
+ Args:
473
+ file_path: File to restore
474
+ backup_path: Backup file to restore from
475
+
476
+ Raises:
477
+ IOError: If restoration fails
478
+ """
479
+ try:
480
+ backup_content = backup_path.read_text(encoding="utf-8")
481
+ file_path.write_text(backup_content, encoding="utf-8")
482
+
483
+ logger.info(f"Restored {file_path} from backup")
484
+
485
+ except Exception as e:
486
+ logger.error(f"Failed to restore backup: {e}")
487
+ raise
488
+
489
+ def _generate_diff(
490
+ self,
491
+ original: str,
492
+ fixed: str,
493
+ filename: str,
494
+ ) -> str:
495
+ """Generate unified diff for review.
496
+
497
+ Args:
498
+ original: Original file content
499
+ fixed: Fixed file content
500
+ filename: Name for diff headers
501
+
502
+ Returns:
503
+ Unified diff string
504
+
505
+ Example:
506
+ >>> diff = modifier._generate_diff("old content", "new content", "test.py")
507
+ >>> print(diff)
508
+ """
509
+ original_lines = original.splitlines(keepends=True)
510
+ fixed_lines = fixed.splitlines(keepends=True)
511
+
512
+ diff = difflib.unified_diff(
513
+ original_lines,
514
+ fixed_lines,
515
+ fromfile=f"{filename} (original)",
516
+ tofile=f"{filename} (fixed)",
517
+ lineterm="",
518
+ )
519
+
520
+ return "".join(diff)
@@ -0,0 +1,10 @@
1
+ """
2
+ Workflow orchestration for crackerjack.
3
+
4
+ This module provides high-level workflows that coordinate multiple
5
+ agents and services to accomplish complex tasks like iterative auto-fixing.
6
+ """
7
+
8
+ from .auto_fix import AutoFixWorkflow, FixIteration
9
+
10
+ __all__ = ["AutoFixWorkflow", "FixIteration"]