ostruct-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,151 @@
1
+ """FileInfoList implementation providing smart file content access."""
2
+
3
+ from typing import List, Union
4
+
5
+ from .file_info import FileInfo
6
+
7
+ __all__ = ["FileInfoList", "FileInfo"]
8
+
9
+
10
+ class FileInfoList(List[FileInfo]):
11
+ """List of FileInfo objects with smart content access.
12
+
13
+ This class extends List[FileInfo] to provide convenient access to file contents
14
+ and metadata. When the list contains exactly one file from a single file mapping,
15
+ properties like content return the value directly. For multiple files or directory
16
+ mappings, properties return a list of values.
17
+
18
+ Examples:
19
+ Single file (--file):
20
+ files = FileInfoList([file_info], from_dir=False)
21
+ content = files.content # Returns "file contents"
22
+
23
+ Multiple files or directory (--files or --dir):
24
+ files = FileInfoList([file1, file2]) # or FileInfoList([file1], from_dir=True)
25
+ content = files.content # Returns ["contents1", "contents2"] or ["contents1"]
26
+
27
+ Backward compatibility:
28
+ content = files[0].content # Still works
29
+
30
+ Properties:
31
+ content: File content(s) - string for single file mapping, list for multiple files or directory
32
+ path: File path(s)
33
+ abs_path: Absolute file path(s)
34
+ size: File size(s) in bytes
35
+
36
+ Raises:
37
+ ValueError: When accessing properties on an empty list
38
+ """
39
+
40
+ def __init__(self, files: List[FileInfo], from_dir: bool = False) -> None:
41
+ """Initialize FileInfoList.
42
+
43
+ Args:
44
+ files: List of FileInfo objects
45
+ from_dir: Whether this list was created from a directory mapping
46
+ """
47
+ super().__init__(files)
48
+ self._from_dir = from_dir
49
+
50
+ @property
51
+ def content(self) -> Union[str, List[str]]:
52
+ """Get the content of the file(s).
53
+
54
+ Returns:
55
+ Union[str, List[str]]: For a single file from file mapping, returns its content as a string.
56
+ For multiple files or directory mapping, returns a list of contents.
57
+
58
+ Raises:
59
+ ValueError: If the list is empty
60
+ """
61
+ if not self:
62
+ raise ValueError("No files in FileInfoList")
63
+ if len(self) == 1 and not self._from_dir:
64
+ return self[0].content
65
+ return [f.content for f in self]
66
+
67
+ @property
68
+ def path(self) -> Union[str, List[str]]:
69
+ """Get the path of the file(s).
70
+
71
+ Returns:
72
+ Union[str, List[str]]: For a single file from file mapping, returns its path as a string.
73
+ For multiple files or directory mapping, returns a list of paths.
74
+
75
+ Raises:
76
+ ValueError: If the list is empty
77
+ """
78
+ if not self:
79
+ raise ValueError("No files in FileInfoList")
80
+ if len(self) == 1 and not self._from_dir:
81
+ return self[0].path
82
+ return [f.path for f in self]
83
+
84
+ @property
85
+ def abs_path(self) -> Union[str, List[str]]:
86
+ """Get the absolute path of the file(s).
87
+
88
+ Returns:
89
+ Union[str, List[str]]: For a single file from file mapping, returns its absolute path as a string.
90
+ For multiple files or directory mapping, returns a list of absolute paths.
91
+
92
+ Raises:
93
+ ValueError: If the list is empty
94
+ """
95
+ if not self:
96
+ raise ValueError("No files in FileInfoList")
97
+ if len(self) == 1 and not self._from_dir:
98
+ return self[0].abs_path
99
+ return [f.abs_path for f in self]
100
+
101
+ @property
102
+ def size(self) -> Union[int, List[int]]:
103
+ """Get file size(s) in bytes.
104
+
105
+ Returns:
106
+ Union[int, List[int]]: For a single file from file mapping, returns its size in bytes.
107
+ For multiple files or directory mapping, returns a list of sizes.
108
+
109
+ Raises:
110
+ ValueError: If the list is empty or if any file size is None
111
+ """
112
+ if not self:
113
+ raise ValueError("No files in FileInfoList")
114
+
115
+ # For single file not from directory, return its size
116
+ if len(self) == 1 and not self._from_dir:
117
+ size = self[0].size
118
+ if size is None:
119
+ raise ValueError(
120
+ f"Could not get size for file: {self[0].path}"
121
+ )
122
+ return size
123
+
124
+ # For multiple files, collect all sizes
125
+ sizes = []
126
+ for f in self:
127
+ size = f.size
128
+ if size is None:
129
+ raise ValueError(f"Could not get size for file: {f.path}")
130
+ sizes.append(size)
131
+ return sizes
132
+
133
+ def __str__(self) -> str:
134
+ """Get string representation of the file list.
135
+
136
+ Returns:
137
+ str: String representation in format FileInfoList([paths])
138
+ """
139
+ if not self:
140
+ return "FileInfoList([])"
141
+ if len(self) == 1:
142
+ return f"FileInfoList(['{self[0].path}'])"
143
+ return f"FileInfoList({[f.path for f in self]})"
144
+
145
+ def __repr__(self) -> str:
146
+ """Get detailed string representation of the file list.
147
+
148
+ Returns:
149
+ str: Same as str() for consistency
150
+ """
151
+ return str(self)
@@ -0,0 +1,518 @@
1
+ """File utilities for the CLI.
2
+
3
+ This module provides utilities for file operations with security controls:
4
+
5
+ 1. File Information:
6
+ - FileInfo class for safe file access and metadata
7
+ - Support for file content caching
8
+ - Automatic encoding detection
9
+
10
+ 2. Path Handling:
11
+ - Supports ~ expansion for home directory
12
+ - Supports environment variable expansion
13
+ - Security checks for file access
14
+ - Requires explicit allowed directories for access outside CWD
15
+
16
+ 3. Security Features:
17
+ - Directory traversal prevention
18
+ - Explicit allowed directory configuration
19
+ - Temporary file access controls
20
+ - Path validation and normalization
21
+
22
+ Usage Examples:
23
+ Basic file access (from current directory):
24
+ >>> info = FileInfo.from_path("var_name", "local_file.txt")
25
+ >>> content = info.content
26
+
27
+ Access home directory files (requires --allowed-dir):
28
+ >>> info = FileInfo.from_path("var_name", "~/file.txt", allowed_dirs=["~/"])
29
+ >>> content = info.content
30
+
31
+ Multiple file collection:
32
+ >>> files = collect_files(
33
+ ... file_args=["var=path.txt"],
34
+ ... allowed_dirs=["/allowed/path"],
35
+ ... recursive=True
36
+ ... )
37
+
38
+ Security Notes:
39
+ - Files must be in current directory or explicitly allowed directories
40
+ - Use --allowed-dir to access files outside current directory
41
+ - Home directory (~) is not automatically allowed
42
+ - Environment variables are expanded in paths
43
+ """
44
+
45
+ import codecs
46
+ import glob
47
+ import logging
48
+ import os
49
+ from typing import Any, Dict, List, Optional, Type, Union
50
+
51
+ import chardet
52
+
53
+ from .errors import (
54
+ DirectoryNotFoundError,
55
+ FileNotFoundError,
56
+ PathSecurityError,
57
+ )
58
+ from .file_info import FileInfo
59
+ from .file_list import FileInfoList
60
+ from .security import SecurityManager
61
+ from .security_types import SecurityManagerProtocol
62
+
63
+ __all__ = [
64
+ "FileInfo", # Re-exported from file_info
65
+ "SecurityManager", # Re-exported from security
66
+ "FileInfoList", # Re-exported from file_list
67
+ "collect_files",
68
+ "collect_files_from_pattern",
69
+ "collect_files_from_directory",
70
+ "detect_encoding",
71
+ "expand_path",
72
+ "read_allowed_dirs_from_file",
73
+ ]
74
+
75
+ logger = logging.getLogger(__name__)
76
+
77
+ # Type for values in template context
78
+ TemplateValue = Union[str, List[str], Dict[str, str]]
79
+
80
+
81
+ def _get_security_manager() -> Type[SecurityManagerProtocol]:
82
+ """Get the SecurityManager class.
83
+
84
+ Returns:
85
+ The SecurityManager class type
86
+ """
87
+ return SecurityManager
88
+
89
+
90
+ def expand_path(path: str, force_absolute: bool = False) -> str:
91
+ """Expand user home directory and environment variables in path.
92
+
93
+ Args:
94
+ path: Path that may contain ~ or environment variables
95
+ force_absolute: Whether to force conversion to absolute path
96
+
97
+ Returns:
98
+ Expanded path, maintaining relative paths unless force_absolute=True
99
+ or the path contains ~ or environment variables
100
+ """
101
+ # First expand user and environment variables
102
+ expanded = os.path.expanduser(os.path.expandvars(path))
103
+
104
+ # If the path hasn't changed and we're not forcing absolute, keep it relative
105
+ if expanded == path and not force_absolute:
106
+ return path
107
+
108
+ # Otherwise return absolute path
109
+ return os.path.abspath(expanded)
110
+
111
+
112
+ def collect_files_from_pattern(
113
+ pattern: str,
114
+ security_manager: SecurityManager,
115
+ ) -> List[FileInfo]:
116
+ """Collect files matching a glob pattern.
117
+
118
+ Args:
119
+ pattern: Glob pattern to match files
120
+ security_manager: Security manager for path validation
121
+
122
+ Returns:
123
+ List of FileInfo objects for matched files
124
+
125
+ Raises:
126
+ PathSecurityError: If any matched file is outside base directory
127
+ """
128
+ # Expand pattern
129
+ matched_paths = glob.glob(pattern, recursive=True)
130
+ if not matched_paths:
131
+ logger.debug("No files matched pattern: %s", pattern)
132
+ return []
133
+
134
+ # Create FileInfo objects
135
+ files = []
136
+ for path in matched_paths:
137
+ try:
138
+ file_info = FileInfo.from_path(path, security_manager)
139
+ files.append(file_info)
140
+ except PathSecurityError:
141
+ # Let security errors propagate
142
+ raise
143
+ except Exception:
144
+ logger.warning("Could not process file %s", path)
145
+
146
+ return files
147
+
148
+
149
+ def collect_files_from_directory(
150
+ directory: str,
151
+ security_manager: SecurityManager,
152
+ recursive: bool = False,
153
+ allowed_extensions: Optional[List[str]] = None,
154
+ **kwargs: Any,
155
+ ) -> List[FileInfo]:
156
+ """Collect files from directory.
157
+
158
+ Args:
159
+ directory: Directory to collect files from
160
+ security_manager: Security manager for path validation
161
+ recursive: Whether to collect files recursively
162
+ allowed_extensions: List of allowed file extensions without dots
163
+ **kwargs: Additional arguments passed to FileInfo.from_path
164
+
165
+ Returns:
166
+ List of FileInfo instances
167
+
168
+ Raises:
169
+ DirectoryNotFoundError: If directory does not exist
170
+ PathSecurityError: If directory is not allowed
171
+ """
172
+ # Validate directory exists and is allowed
173
+ try:
174
+ abs_dir = str(security_manager.resolve_path(directory))
175
+ except PathSecurityError:
176
+ # Let the original error propagate
177
+ raise
178
+
179
+ if not os.path.exists(abs_dir):
180
+ raise DirectoryNotFoundError(f"Directory not found: {directory}")
181
+ if not os.path.isdir(abs_dir):
182
+ raise DirectoryNotFoundError(f"Path is not a directory: {directory}")
183
+
184
+ # Collect files
185
+ files = []
186
+ for root, _, filenames in os.walk(abs_dir):
187
+ if not recursive and root != abs_dir:
188
+ continue
189
+
190
+ for filename in filenames:
191
+ # Get relative path from base directory
192
+ abs_path = os.path.join(root, filename)
193
+ try:
194
+ rel_path = os.path.relpath(abs_path, security_manager.base_dir)
195
+ except ValueError:
196
+ # Skip files that can't be made relative
197
+ continue
198
+
199
+ # Check extension if filter is specified
200
+ if allowed_extensions is not None:
201
+ ext = os.path.splitext(filename)[1].lstrip(".")
202
+ if ext not in allowed_extensions:
203
+ continue
204
+
205
+ try:
206
+ file_info = FileInfo.from_path(
207
+ rel_path, security_manager=security_manager, **kwargs
208
+ )
209
+ files.append(file_info)
210
+ except (FileNotFoundError, PathSecurityError):
211
+ # Skip files that can't be accessed
212
+ continue
213
+
214
+ return files
215
+
216
+
217
+ def _validate_and_split_mapping(
218
+ mapping: str, mapping_type: str
219
+ ) -> tuple[str, str]:
220
+ """Validate and split a name=value mapping.
221
+
222
+ Args:
223
+ mapping: The mapping string to validate (e.g. "name=value")
224
+ mapping_type: Type of mapping for error messages ("file", "pattern", or "directory")
225
+
226
+ Returns:
227
+ Tuple of (name, value)
228
+
229
+ Raises:
230
+ ValueError: If mapping format is invalid
231
+ """
232
+ try:
233
+ name, value = mapping.split("=", 1)
234
+ except ValueError:
235
+ raise ValueError(
236
+ f"Invalid {mapping_type} mapping format: {mapping!r} (missing '=' separator)"
237
+ )
238
+
239
+ if not name:
240
+ raise ValueError(f"Empty name in {mapping_type} mapping: {mapping!r}")
241
+ if not value:
242
+ raise ValueError(f"Empty value in {mapping_type} mapping: {mapping!r}")
243
+
244
+ return name, value
245
+
246
+
247
+ def collect_files(
248
+ file_mappings: Optional[List[str]] = None,
249
+ pattern_mappings: Optional[List[str]] = None,
250
+ dir_mappings: Optional[List[str]] = None,
251
+ dir_recursive: bool = False,
252
+ dir_extensions: Optional[List[str]] = None,
253
+ security_manager: Optional[SecurityManager] = None,
254
+ **kwargs: Any,
255
+ ) -> Dict[str, FileInfoList]:
256
+ """Collect files from multiple sources.
257
+
258
+ Args:
259
+ file_mappings: List of file mappings in the format "name=path"
260
+ pattern_mappings: List of pattern mappings in the format "name=pattern"
261
+ dir_mappings: List of directory mappings in the format "name=directory"
262
+ dir_recursive: Whether to process directories recursively
263
+ dir_extensions: List of file extensions to include in directory processing
264
+ security_manager: Security manager instance
265
+ **kwargs: Additional arguments passed to FileInfo.from_path
266
+
267
+ Returns:
268
+ Dictionary mapping variable names to FileInfoList instances
269
+
270
+ Raises:
271
+ ValueError: If no files are found or if there are duplicate mappings
272
+ PathSecurityError: If a path is outside the base directory
273
+ DirectoryNotFoundError: If a directory is not found
274
+ """
275
+ if security_manager is None:
276
+ security_manager = SecurityManager(base_dir=os.getcwd())
277
+
278
+ # Normalize extensions by removing leading dots
279
+ if dir_extensions:
280
+ dir_extensions = [ext.lstrip(".") for ext in dir_extensions]
281
+
282
+ files: Dict[str, FileInfoList] = {}
283
+
284
+ # Process file mappings
285
+ if file_mappings:
286
+ for mapping in file_mappings:
287
+ name, path = _validate_and_split_mapping(mapping, "file")
288
+ if name in files:
289
+ raise ValueError(f"Duplicate file mapping: {name}")
290
+
291
+ file_info = FileInfo.from_path(
292
+ path, security_manager=security_manager, **kwargs
293
+ )
294
+ files[name] = FileInfoList([file_info], from_dir=False)
295
+
296
+ # Process pattern mappings
297
+ if pattern_mappings:
298
+ for mapping in pattern_mappings:
299
+ name, pattern = _validate_and_split_mapping(mapping, "pattern")
300
+ if name in files:
301
+ raise ValueError(f"Duplicate pattern mapping: {name}")
302
+
303
+ try:
304
+ matched_files = collect_files_from_pattern(
305
+ pattern, security_manager=security_manager, **kwargs
306
+ )
307
+ except PathSecurityError as e:
308
+ raise PathSecurityError(
309
+ "Pattern mapping error: Access denied: "
310
+ f"{pattern} is outside base directory and not in allowed directories"
311
+ ) from e
312
+
313
+ if not matched_files:
314
+ logger.warning("No files matched pattern: %s", pattern)
315
+ continue
316
+
317
+ files[name] = FileInfoList(matched_files, from_dir=False)
318
+
319
+ # Process directory mappings
320
+ if dir_mappings:
321
+ for mapping in dir_mappings:
322
+ name, directory = _validate_and_split_mapping(mapping, "directory")
323
+ if name in files:
324
+ raise ValueError(f"Duplicate directory mapping: {name}")
325
+
326
+ try:
327
+ dir_files = collect_files_from_directory(
328
+ directory=directory,
329
+ security_manager=security_manager,
330
+ recursive=dir_recursive,
331
+ allowed_extensions=dir_extensions,
332
+ **kwargs,
333
+ )
334
+ except PathSecurityError as e:
335
+ raise PathSecurityError(
336
+ "Directory mapping error: Access denied: "
337
+ f"{directory} is outside base directory and not in allowed directories"
338
+ ) from e
339
+ except DirectoryNotFoundError:
340
+ raise DirectoryNotFoundError(
341
+ f"Directory not found: {directory}"
342
+ )
343
+
344
+ if not dir_files:
345
+ logger.warning("No files found in directory: %s", directory)
346
+ files[name] = FileInfoList([], from_dir=True)
347
+ else:
348
+ files[name] = FileInfoList(dir_files, from_dir=True)
349
+
350
+ if not files:
351
+ raise ValueError("No files found")
352
+
353
+ return files
354
+
355
+
356
+ def detect_encoding(file_path: str) -> str:
357
+ """Detect the encoding of a file.
358
+
359
+ Args:
360
+ file_path: Path to the file to check
361
+
362
+ Returns:
363
+ str: The detected encoding (e.g. 'utf-8', 'utf-16', etc.)
364
+
365
+ Raises:
366
+ OSError: If there is an error reading the file
367
+ ValueError: If the encoding cannot be detected
368
+ """
369
+ logger = logging.getLogger(__name__)
370
+ logger.debug("Detecting encoding for file: %s", file_path)
371
+
372
+ try:
373
+ with open(file_path, "rb") as f:
374
+ # Check for BOM markers first
375
+ raw_data = f.read(4)
376
+ if not raw_data:
377
+ logger.debug("Empty file")
378
+ return "utf-8"
379
+
380
+ # Check for common BOMs
381
+ if raw_data.startswith(codecs.BOM_UTF8):
382
+ logger.debug("UTF-8 BOM detected")
383
+ return "utf-8"
384
+ elif raw_data.startswith(codecs.BOM_UTF16_LE):
385
+ logger.debug("UTF-16 LE BOM detected")
386
+ return "utf-16-le"
387
+ elif raw_data.startswith(codecs.BOM_UTF16_BE):
388
+ logger.debug("UTF-16 BE BOM detected")
389
+ return "utf-16-be"
390
+ elif raw_data.startswith(codecs.BOM_UTF32_LE):
391
+ logger.debug("UTF-32 LE BOM detected")
392
+ return "utf-32-le"
393
+ elif raw_data.startswith(codecs.BOM_UTF32_BE):
394
+ logger.debug("UTF-32 BE BOM detected")
395
+ return "utf-32-be"
396
+
397
+ # Read more data for chardet (up to 1MB)
398
+ f.seek(0)
399
+ raw_data = f.read(
400
+ 1024 * 1024
401
+ ) # Read up to 1MB for better detection
402
+
403
+ # Try chardet detection
404
+ result = chardet.detect(raw_data)
405
+ logger.debug("Chardet detection result: %s", result)
406
+
407
+ if result and isinstance(result, dict) and result.get("encoding"):
408
+ detected = str(result["encoding"]).lower()
409
+ confidence = float(result.get("confidence", 0.0))
410
+
411
+ # Handle ASCII detection
412
+ if detected == "ascii":
413
+ logger.debug(
414
+ "ASCII detected, converting to UTF-8 (confidence: %f)",
415
+ confidence,
416
+ )
417
+ return "utf-8"
418
+
419
+ # High confidence detection
420
+ if confidence > 0.9:
421
+ logger.debug(
422
+ "High confidence encoding detected: %s (confidence: %f)",
423
+ detected,
424
+ confidence,
425
+ )
426
+ return detected
427
+
428
+ # Medium confidence - validate with UTF-8 attempt
429
+ if confidence > 0.6:
430
+ logger.debug(
431
+ "Medium confidence for %s (confidence: %f), validating",
432
+ detected,
433
+ confidence,
434
+ )
435
+ try:
436
+ raw_data.decode("utf-8")
437
+ logger.debug("Successfully validated as UTF-8")
438
+ return "utf-8"
439
+ except UnicodeDecodeError:
440
+ logger.debug(
441
+ "UTF-8 validation failed, using detected encoding: %s",
442
+ detected,
443
+ )
444
+ return detected
445
+
446
+ # Low confidence or no detection - try UTF-8
447
+ try:
448
+ raw_data.decode("utf-8")
449
+ logger.debug(
450
+ "No confident detection, but UTF-8 decode successful"
451
+ )
452
+ return "utf-8"
453
+ except UnicodeDecodeError:
454
+ if (
455
+ result
456
+ and isinstance(result, dict)
457
+ and result.get("encoding")
458
+ ):
459
+ detected_encoding = str(result["encoding"]).lower()
460
+ logger.debug(
461
+ "Falling back to detected encoding with low confidence: %s",
462
+ detected_encoding,
463
+ )
464
+ return detected_encoding
465
+
466
+ logger.warning(
467
+ "Could not confidently detect encoding for %s, defaulting to UTF-8",
468
+ file_path,
469
+ )
470
+ return "utf-8"
471
+
472
+ except OSError as e:
473
+ logger.error("Error reading file %s: %s", file_path, e)
474
+ raise
475
+ except Exception as e:
476
+ logger.error(
477
+ "Unexpected error detecting encoding for %s: %s",
478
+ file_path,
479
+ e,
480
+ )
481
+ raise ValueError(f"Failed to detect encoding: {e}")
482
+
483
+
484
+ def read_allowed_dirs_from_file(filepath: str) -> List[str]:
485
+ """Reads a list of allowed directories from a file.
486
+
487
+ Args:
488
+ filepath: The path to the file.
489
+
490
+ Returns:
491
+ A list of allowed directories as absolute paths.
492
+
493
+ Raises:
494
+ FileNotFoundError: If the file does not exist.
495
+ ValueError: If the file contains invalid data.
496
+ """
497
+ try:
498
+ with open(filepath, "r") as f:
499
+ lines = f.readlines()
500
+ except OSError as e:
501
+ raise FileNotFoundError(
502
+ f"Error reading allowed directories from file: {filepath}: {e}"
503
+ )
504
+
505
+ allowed_dirs = []
506
+ for line in lines:
507
+ line = line.strip()
508
+ if line and not line.startswith(
509
+ "#"
510
+ ): # Ignore empty lines and comments
511
+ abs_path = os.path.abspath(line)
512
+ if not os.path.isdir(abs_path):
513
+ raise ValueError(
514
+ f"Invalid directory in allowed directories file '{filepath}': "
515
+ f"'{line}' is not a directory or does not exist."
516
+ )
517
+ allowed_dirs.append(abs_path)
518
+ return allowed_dirs