ostruct-cli 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,483 @@
1
+ """Symlink resolution module.
2
+
3
+ This module provides secure symlink resolution with:
4
+ - Maximum depth enforcement
5
+ - Loop detection
6
+ - Security validation
7
+ - Windows path handling
8
+
9
+ Design Choices:
10
+ 1. Security First:
11
+ - Validate before resolving
12
+ - Check each step in chain
13
+ - Fail closed on errors
14
+
15
+ 2. Loop Detection:
16
+ - Track visited paths
17
+ - Check before traversal
18
+ - Handle all loop types
19
+
20
+ 3. Windows Support:
21
+ - Handle Windows-specific paths
22
+ - Support both APIs
23
+ - Our approach works with both behaviors
24
+ """
25
+
26
+ import logging
27
+ import os
28
+ from dataclasses import dataclass, field
29
+ from datetime import datetime
30
+ from pathlib import Path
31
+ from typing import List, Optional, Set
32
+
33
+ from .allowed_checker import is_path_in_allowed_dirs
34
+ from .errors import PathSecurityError, SecurityErrorReasons
35
+ from .normalization import normalize_path
36
+ from .windows_paths import is_windows_path, validate_windows_path
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ @dataclass
42
+ class SymlinkInfo:
43
+ """Information about a symlink in the resolution chain.
44
+
45
+ This class is used for logging and auditing symlink resolution chains.
46
+ Each instance represents one step in the resolution process.
47
+ """
48
+
49
+ source: Path
50
+ target: Path
51
+ depth: int
52
+ timestamp: datetime = field(default_factory=datetime.now)
53
+
54
+ def __str__(self) -> str:
55
+ return f"{self.source} -> {self.target}"
56
+
57
+
58
+ def _debug_seen_set(seen: Set[Path], prefix: str = "") -> None:
59
+ """Debug helper to log the contents of the seen set."""
60
+ paths = "\n ".join(str(p) for p in seen)
61
+ logger.debug("%sSeen set contents:\n %s", prefix, paths)
62
+
63
+
64
+ def _follow_symlink_chain(
65
+ path: Path, seen: Set[Path], max_depth: int = 16
66
+ ) -> Optional[List[Path]]:
67
+ """Follow a symlink chain to detect loops, without checking existence.
68
+
69
+ This function follows a chain of symlinks, looking only at the targets
70
+ (via readlink) without checking existence. This allows us to detect
71
+ loops even when the filesystem reports loop members as non-existent.
72
+
73
+ Implementation Details:
74
+ 1. Chain Following:
75
+ - Starts at the given path
76
+ - Uses readlink() to get each target
77
+ - Normalizes paths for consistent comparison
78
+ - Continues until a non-symlink or error is encountered
79
+
80
+ 2. Loop Detection:
81
+ - Maintains a chain of followed links
82
+ - Uses a separate seen set for chain following
83
+ - Checks if each new target is in the chain seen set
84
+ - If found in chain seen set -> loop detected
85
+ - If max depth reached -> returns None
86
+
87
+ 3. Error Handling:
88
+ - OSError from readlink -> chain ends (no loop)
89
+ - Max depth exceeded -> returns None
90
+ - Target in chain seen set -> loop detected
91
+
92
+ Filesystem Behavior:
93
+ 1. Real Filesystem:
94
+ - May raise ELOOP for symlink loops
95
+ - exists() behavior varies by OS
96
+ - readlink() works on loop members
97
+
98
+ 2. pyfakefs:
99
+ - No ELOOP errors are raised
100
+ - exists() returns False for all symlinks in a loop
101
+ - is_symlink() works correctly even in loops
102
+ - readlink() works correctly on loop members
103
+
104
+ 3. Our Approach:
105
+ - Filesystem-agnostic loop detection
106
+ - Works with both real and fake filesystems
107
+ - Consistent error classification
108
+ - Detects loops before checking existence
109
+ - Works despite differences in filesystem implementations
110
+
111
+ Note: This function assumes the initial path has already been validated
112
+ by the SecurityManager. It focuses solely on secure symlink resolution.
113
+
114
+ Race Condition Warning:
115
+ This function cannot guarantee atomic operations between validation
116
+ and usage. A malicious actor could potentially modify symlinks or
117
+ their targets between checks. Use appropriate filesystem permissions
118
+ to mitigate TOCTOU risks.
119
+
120
+ Args:
121
+ path: The path to start following from
122
+ seen: Set of already seen paths from main resolution
123
+ max_depth: Maximum depth to follow
124
+
125
+ Returns:
126
+ List of paths in the chain if a loop is found, None otherwise
127
+ """
128
+ logger.debug(
129
+ "\n=== Following symlink chain ===\n"
130
+ "Starting path: %s\n"
131
+ "Resolution seen: %s",
132
+ path,
133
+ sorted(list(seen)),
134
+ )
135
+
136
+ chain: list[Path] = []
137
+ chain_seen: set[Path] = set() # Separate seen set for chain following
138
+ current = normalize_path(path)
139
+
140
+ for depth in range(max_depth):
141
+ try:
142
+ # Check for loop using chain seen set
143
+ if current in chain_seen:
144
+ logger.warning(
145
+ "\n=== Loop detected in chain! ===\n"
146
+ "Target creating loop: %s\n"
147
+ "Chain: %s\n"
148
+ "Chain seen: %s\n"
149
+ "Resolution seen: %s",
150
+ current,
151
+ chain,
152
+ sorted(list(chain_seen)),
153
+ sorted(list(seen)),
154
+ )
155
+ return chain + [
156
+ current
157
+ ] # Return complete chain including loop point
158
+
159
+ # Add to chain and chain seen set before reading link
160
+ chain.append(current)
161
+ chain_seen.add(current)
162
+
163
+ # Use readlink to follow link without existence check
164
+ target_str = os.readlink(str(current))
165
+ if not os.path.isabs(target_str):
166
+ target_str = os.path.normpath(
167
+ os.path.join(str(current.parent), target_str)
168
+ )
169
+
170
+ # Normalize the target path
171
+ current = normalize_path(Path(target_str))
172
+ logger.debug(
173
+ "Chain step %d:\n"
174
+ " Current: %s\n"
175
+ " Target: %s\n"
176
+ " Chain so far: %s\n"
177
+ " Chain seen: %s\n"
178
+ " Resolution seen: %s",
179
+ depth,
180
+ chain[-1],
181
+ current,
182
+ chain,
183
+ sorted(list(chain_seen)),
184
+ sorted(list(seen)),
185
+ )
186
+
187
+ except OSError as e:
188
+ logger.debug(
189
+ "Failed to read symlink at depth %d: %s - %s",
190
+ depth,
191
+ current,
192
+ e,
193
+ )
194
+ return None
195
+
196
+ logger.debug(
197
+ "Chain exceeded max depth (%d) without finding loop", max_depth
198
+ )
199
+ return None
200
+
201
+
202
+ def _resolve_symlink(
203
+ path: Path,
204
+ max_depth: int,
205
+ allowed_dirs: List[Path],
206
+ seen: Optional[Set[Path]] = None,
207
+ current_depth: int = 0,
208
+ ) -> Path:
209
+ """Internal security primitive for symlink resolution.
210
+
211
+ INTERNAL API: This function is not part of the public interface.
212
+ Use SecurityManager.resolve_path() for general path resolution.
213
+
214
+ This function resolves symlinks with the following security measures:
215
+ 1. Maximum depth enforcement to prevent infinite recursion
216
+ 2. Loop detection to prevent symlink cycles
217
+ 3. Allowed directory checks at each resolution step
218
+ 4. Security validation BEFORE existence checks
219
+
220
+ Security Design Choices:
221
+ 1. Path Normalization:
222
+ - All paths are normalized before loop detection and recursion
223
+ - Consistent NFKC Unicode normalization
224
+ - Handles path separator differences
225
+
226
+ 2. Loop Detection Strategy:
227
+ - Loop detection is purely path-based, using a seen set
228
+ - Loops are detected before any existence checks
229
+ - Three-phase detection:
230
+ a) Check if current path is in seen set (catches A->B->A)
231
+ b) Check if target would create loop (catches A->A)
232
+ c) Follow entire chain to detect complex loops (catches C->B->A->A)
233
+ - A path is added to seen immediately when encountered
234
+ - This ensures accurate loop detection regardless of filesystem behavior
235
+
236
+ 3. Security Checks Order:
237
+ 1. Maximum depth check (prevent infinite recursion)
238
+ 2. Path normalization (consistent comparison)
239
+ 3. Current path loop check (detect revisiting paths)
240
+ 4. Add current path to seen set
241
+ 5. Allowed directory check
242
+ 6. Symlink check
243
+ 7. Target resolution and normalization
244
+ 8. Target loop check
245
+ 9. Chain loop check
246
+ 10. Target existence check (only after confirming no loops)
247
+ 11. Target allowed directory check
248
+ 12. Recursion with target
249
+
250
+ 4. Error Precedence:
251
+ - SYMLINK_LOOP takes precedence over SYMLINK_BROKEN
252
+ - Loop detection happens before existence checks
253
+ - This ensures correct classification regardless of how the filesystem
254
+ reports existence for looped symlinks
255
+
256
+ 5. pyfakefs Behavior:
257
+ - pyfakefs simulates filesystem behavior but has some differences:
258
+ a) Symlink loops are not detected by the OS layer (no ELOOP)
259
+ b) exists() returns False for all symlinks in a loop
260
+ c) is_symlink() works correctly even in loops
261
+ d) readlink() works correctly even in loops
262
+ - Our loop detection is filesystem-agnostic and works with:
263
+ a) Real filesystems (that raise ELOOP)
264
+ b) pyfakefs (that silently allows loops)
265
+ c) Other filesystem implementations
266
+
267
+ Known Limitations:
268
+ 1. Windows Support:
269
+ - Limited handling of Windows-specific paths
270
+ - UNC paths may not resolve correctly
271
+ - Reparse points not fully supported
272
+
273
+ 2. Race Conditions:
274
+ - TOCTOU races possible between checks
275
+ - Symlinks can change between resolution steps
276
+ - No atomic path resolution guarantee
277
+
278
+ 3. Filesystem Differences:
279
+ - Different filesystems handle symlink loops differently
280
+ - Some raise ELOOP immediately
281
+ - Others allow following until a depth limit
282
+
283
+ Args:
284
+ path: The starting Path object.
285
+ max_depth: Maximum allowed resolution depth.
286
+ allowed_dirs: List of allowed directories for the target.
287
+ seen: Set of already seen normalized paths to detect loops.
288
+ current_depth: Current depth in the resolution chain.
289
+
290
+ Returns:
291
+ A Path object for the resolved target.
292
+
293
+ Raises:
294
+ PathSecurityError: With context["reason"] indicating:
295
+ - SYMLINK_MAX_DEPTH: Chain exceeds maximum depth
296
+ - SYMLINK_LOOP: Cyclic reference detected
297
+ - SYMLINK_BROKEN: Target doesn't exist
298
+ """
299
+ logger.debug(
300
+ "\n=== Starting symlink resolution ===\n"
301
+ "Path: %s\n"
302
+ "Depth: %d\n"
303
+ "Seen paths: %s",
304
+ path,
305
+ current_depth,
306
+ sorted(list(seen or set())),
307
+ )
308
+
309
+ # 1. Check maximum recursion depth first (highest precedence)
310
+ if current_depth >= max_depth:
311
+ logger.warning(
312
+ "\n=== Maximum symlink depth exceeded ===\n"
313
+ "Path: %s\n"
314
+ "Current depth: %d\n"
315
+ "Max depth: %d\n"
316
+ "Chain: %s",
317
+ path,
318
+ current_depth,
319
+ max_depth,
320
+ sorted(list(seen or set())),
321
+ )
322
+ raise PathSecurityError(
323
+ "Symlink security violation: maximum depth exceeded",
324
+ path=str(path),
325
+ context={
326
+ "reason": SecurityErrorReasons.SYMLINK_MAX_DEPTH,
327
+ "depth": current_depth,
328
+ "max_depth": max_depth,
329
+ "chain": [str(p) for p in (seen or set())],
330
+ },
331
+ )
332
+
333
+ # 2. Initialize seen set if not provided
334
+ if seen is None:
335
+ seen = set()
336
+ logger.debug("Initialized new seen set")
337
+
338
+ # 3. Normalize path for consistent comparison
339
+ norm_path = normalize_path(path)
340
+ logger.debug("Normalized path: %s", norm_path)
341
+
342
+ # 4. Check if it's a symlink first
343
+ try:
344
+ if not norm_path.is_symlink():
345
+ logger.debug(
346
+ "Not a symlink, returning normalized path: %s", norm_path
347
+ )
348
+ return norm_path
349
+
350
+ # 5. Check for loops using chain following (second highest precedence)
351
+ chain = _follow_symlink_chain(
352
+ norm_path, seen, max_depth - current_depth
353
+ )
354
+ if chain:
355
+ logger.warning(
356
+ "\n=== Loop detected in symlink chain! ===\n"
357
+ "Starting path: %s\n"
358
+ "Chain: %s\n"
359
+ "Seen paths: %s",
360
+ norm_path,
361
+ chain,
362
+ sorted(list(seen)),
363
+ )
364
+ raise PathSecurityError(
365
+ "Symlink security violation: loop detected",
366
+ path=str(path),
367
+ context={
368
+ "reason": SecurityErrorReasons.SYMLINK_LOOP,
369
+ "chain": [str(p) for p in chain],
370
+ "seen": [str(p) for p in seen],
371
+ },
372
+ )
373
+
374
+ # 6. Read and normalize the target
375
+ target_str = os.readlink(norm_path)
376
+ logger.debug("Raw symlink target: %s", target_str)
377
+
378
+ # Convert to absolute path if needed
379
+ if not os.path.isabs(target_str):
380
+ target_str = os.path.normpath(
381
+ os.path.join(str(path.parent), target_str)
382
+ )
383
+ logger.debug("Absolute target path: %s", target_str)
384
+
385
+ # Normalize the target path
386
+ normalized_target = normalize_path(target_str)
387
+ logger.debug(
388
+ "\n=== Processing symlink target ===\n"
389
+ "Original path: %s\n"
390
+ "Target string: %s\n"
391
+ "Normalized target: %s\n"
392
+ "Current seen set: %s",
393
+ path,
394
+ target_str,
395
+ normalized_target,
396
+ sorted(list(seen)),
397
+ )
398
+
399
+ # 7. Validate Windows-specific path features
400
+ if os.name == "nt":
401
+ if is_windows_path(path):
402
+ error_msg = validate_windows_path(path)
403
+ if error_msg:
404
+ logger.warning(
405
+ "Windows path validation failed: %s - %s",
406
+ path,
407
+ error_msg,
408
+ )
409
+ raise PathSecurityError(
410
+ f"Symlink security violation: {error_msg}",
411
+ path=str(path),
412
+ context={
413
+ "reason": SecurityErrorReasons.SYMLINK_ERROR,
414
+ "windows_specific": True,
415
+ "chain": [str(p) for p in seen],
416
+ },
417
+ )
418
+
419
+ # 8. Check existence after confirming no loops (lowest precedence)
420
+ if not normalized_target.exists():
421
+ logger.debug(
422
+ "\n=== Broken symlink detected ===\n"
423
+ "Path: %s\n"
424
+ "Target: %s\n"
425
+ "Chain: %s",
426
+ path,
427
+ normalized_target,
428
+ sorted(list(seen)),
429
+ )
430
+ raise PathSecurityError(
431
+ f"Symlink security violation: broken symlink target '{normalized_target}' does not exist",
432
+ path=str(path),
433
+ context={
434
+ "reason": SecurityErrorReasons.SYMLINK_BROKEN,
435
+ "source": str(path),
436
+ "target": str(normalized_target),
437
+ "chain": [str(p) for p in seen],
438
+ },
439
+ )
440
+
441
+ # 9. Validate target is allowed
442
+ if not is_path_in_allowed_dirs(normalized_target, allowed_dirs):
443
+ logger.warning(
444
+ "Symlink target not allowed: %s -> %s", path, normalized_target
445
+ )
446
+ raise PathSecurityError(
447
+ "Symlink security violation: target not allowed",
448
+ path=str(path),
449
+ context={
450
+ "reason": SecurityErrorReasons.SYMLINK_TARGET_NOT_ALLOWED,
451
+ "source": str(path),
452
+ "target": str(normalized_target),
453
+ "chain": [str(p) for p in seen],
454
+ },
455
+ )
456
+
457
+ # 10. Recurse with the normalized target
458
+ logger.debug(
459
+ "\n=== Recursing to target ===\n"
460
+ "From path: %s\n"
461
+ "To target: %s\n"
462
+ "Current depth: %d\n"
463
+ "Chain so far: %s",
464
+ path,
465
+ normalized_target,
466
+ current_depth + 1,
467
+ sorted(list(seen)),
468
+ )
469
+ return _resolve_symlink(
470
+ normalized_target, max_depth, allowed_dirs, seen, current_depth + 1
471
+ )
472
+
473
+ except OSError as e:
474
+ logger.debug("OSError during symlink resolution: %s - %s", path, e)
475
+ raise PathSecurityError(
476
+ f"Symlink security violation: failed to resolve symlink - {e}",
477
+ path=str(path),
478
+ context={
479
+ "reason": SecurityErrorReasons.SYMLINK_ERROR,
480
+ "error": str(e),
481
+ "chain": [str(p) for p in (seen or set())],
482
+ },
483
+ ) from e
@@ -0,0 +1,108 @@
1
+ """Security type definitions and protocols."""
2
+
3
+ from contextlib import AbstractContextManager
4
+ from pathlib import Path
5
+ from typing import List, Protocol, Union
6
+
7
+
8
+ class SecurityManagerProtocol(Protocol):
9
+ """Protocol defining the interface for security management."""
10
+
11
+ @property
12
+ def base_dir(self) -> Path:
13
+ """Get the base directory."""
14
+ ...
15
+
16
+ @property
17
+ def allowed_dirs(self) -> List[Path]:
18
+ """Get the list of allowed directories."""
19
+ ...
20
+
21
+ def add_allowed_directory(self, directory: Union[str, Path]) -> None:
22
+ """Add a directory to the set of allowed directories.
23
+
24
+ Args:
25
+ directory: The directory to add.
26
+
27
+ Raises:
28
+ DirectoryNotFoundError: If the directory doesn't exist.
29
+ """
30
+ ...
31
+
32
+ def is_temp_path(self, path: Union[str, Path]) -> bool:
33
+ """Check if a path is in the system's temporary directory.
34
+
35
+ Args:
36
+ path: The path to check.
37
+
38
+ Returns:
39
+ True if the path is a temporary path; False otherwise.
40
+
41
+ Raises:
42
+ PathSecurityError: If there's an error checking the path.
43
+ """
44
+ ...
45
+
46
+ def is_path_allowed(self, path: Union[str, Path]) -> bool:
47
+ """Check if a path is allowed based on security rules.
48
+
49
+ Args:
50
+ path: The path to check.
51
+
52
+ Returns:
53
+ True if the path is allowed; False otherwise.
54
+ """
55
+ ...
56
+
57
+ def validate_path(self, path: Union[str, Path]) -> Path:
58
+ """Validate a path against security rules.
59
+
60
+ This method:
61
+ 1. Normalizes the path
62
+ 2. Checks for directory traversal
63
+ 3. Verifies the path is allowed
64
+ 4. Resolves symlinks securely if needed
65
+
66
+ Args:
67
+ path: The path to validate.
68
+
69
+ Returns:
70
+ A validated and (if applicable) resolved Path object.
71
+
72
+ Raises:
73
+ PathSecurityError: If the path fails any security check.
74
+ """
75
+ ...
76
+
77
+ def resolve_path(self, path: Union[str, Path]) -> Path:
78
+ """Resolve a path with security checks.
79
+
80
+ This method:
81
+ 1. Normalizes the input
82
+ 2. Checks for existence
83
+ 3. Validates against security rules
84
+ 4. Resolves symlinks if needed
85
+
86
+ Args:
87
+ path: The path to resolve.
88
+
89
+ Returns:
90
+ A validated and resolved Path object.
91
+
92
+ Raises:
93
+ FileNotFoundError: If the file doesn't exist.
94
+ PathSecurityError: If the path fails validation.
95
+ """
96
+ ...
97
+
98
+ def symlink_context(self) -> AbstractContextManager[None]:
99
+ """Context manager for symlink resolution.
100
+
101
+ This context manager ensures that symlink resolution state is properly
102
+ cleaned up, even if an error occurs during resolution.
103
+
104
+ Example:
105
+ >>> with security_manager.symlink_context():
106
+ ... resolved = security_manager.resolve_path("/path/to/symlink")
107
+ """
108
+ ...