ostruct-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/__init__.py +0 -0
- ostruct/cli/__init__.py +19 -0
- ostruct/cli/cache_manager.py +175 -0
- ostruct/cli/cli.py +2033 -0
- ostruct/cli/errors.py +329 -0
- ostruct/cli/file_info.py +316 -0
- ostruct/cli/file_list.py +151 -0
- ostruct/cli/file_utils.py +518 -0
- ostruct/cli/path_utils.py +123 -0
- ostruct/cli/progress.py +105 -0
- ostruct/cli/security.py +311 -0
- ostruct/cli/security_types.py +49 -0
- ostruct/cli/template_env.py +55 -0
- ostruct/cli/template_extensions.py +51 -0
- ostruct/cli/template_filters.py +650 -0
- ostruct/cli/template_io.py +261 -0
- ostruct/cli/template_rendering.py +347 -0
- ostruct/cli/template_schema.py +565 -0
- ostruct/cli/template_utils.py +288 -0
- ostruct/cli/template_validation.py +375 -0
- ostruct/cli/utils.py +31 -0
- ostruct/py.typed +0 -0
- ostruct_cli-0.1.0.dist-info/LICENSE +21 -0
- ostruct_cli-0.1.0.dist-info/METADATA +182 -0
- ostruct_cli-0.1.0.dist-info/RECORD +27 -0
- ostruct_cli-0.1.0.dist-info/WHEEL +4 -0
- ostruct_cli-0.1.0.dist-info/entry_points.txt +3 -0
ostruct/cli/file_list.py
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
"""FileInfoList implementation providing smart file content access."""
|
2
|
+
|
3
|
+
from typing import List, Union
|
4
|
+
|
5
|
+
from .file_info import FileInfo
|
6
|
+
|
7
|
+
__all__ = ["FileInfoList", "FileInfo"]
|
8
|
+
|
9
|
+
|
10
|
+
class FileInfoList(List[FileInfo]):
|
11
|
+
"""List of FileInfo objects with smart content access.
|
12
|
+
|
13
|
+
This class extends List[FileInfo] to provide convenient access to file contents
|
14
|
+
and metadata. When the list contains exactly one file from a single file mapping,
|
15
|
+
properties like content return the value directly. For multiple files or directory
|
16
|
+
mappings, properties return a list of values.
|
17
|
+
|
18
|
+
Examples:
|
19
|
+
Single file (--file):
|
20
|
+
files = FileInfoList([file_info], from_dir=False)
|
21
|
+
content = files.content # Returns "file contents"
|
22
|
+
|
23
|
+
Multiple files or directory (--files or --dir):
|
24
|
+
files = FileInfoList([file1, file2]) # or FileInfoList([file1], from_dir=True)
|
25
|
+
content = files.content # Returns ["contents1", "contents2"] or ["contents1"]
|
26
|
+
|
27
|
+
Backward compatibility:
|
28
|
+
content = files[0].content # Still works
|
29
|
+
|
30
|
+
Properties:
|
31
|
+
content: File content(s) - string for single file mapping, list for multiple files or directory
|
32
|
+
path: File path(s)
|
33
|
+
abs_path: Absolute file path(s)
|
34
|
+
size: File size(s) in bytes
|
35
|
+
|
36
|
+
Raises:
|
37
|
+
ValueError: When accessing properties on an empty list
|
38
|
+
"""
|
39
|
+
|
40
|
+
def __init__(self, files: List[FileInfo], from_dir: bool = False) -> None:
|
41
|
+
"""Initialize FileInfoList.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
files: List of FileInfo objects
|
45
|
+
from_dir: Whether this list was created from a directory mapping
|
46
|
+
"""
|
47
|
+
super().__init__(files)
|
48
|
+
self._from_dir = from_dir
|
49
|
+
|
50
|
+
@property
|
51
|
+
def content(self) -> Union[str, List[str]]:
|
52
|
+
"""Get the content of the file(s).
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
Union[str, List[str]]: For a single file from file mapping, returns its content as a string.
|
56
|
+
For multiple files or directory mapping, returns a list of contents.
|
57
|
+
|
58
|
+
Raises:
|
59
|
+
ValueError: If the list is empty
|
60
|
+
"""
|
61
|
+
if not self:
|
62
|
+
raise ValueError("No files in FileInfoList")
|
63
|
+
if len(self) == 1 and not self._from_dir:
|
64
|
+
return self[0].content
|
65
|
+
return [f.content for f in self]
|
66
|
+
|
67
|
+
@property
|
68
|
+
def path(self) -> Union[str, List[str]]:
|
69
|
+
"""Get the path of the file(s).
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
Union[str, List[str]]: For a single file from file mapping, returns its path as a string.
|
73
|
+
For multiple files or directory mapping, returns a list of paths.
|
74
|
+
|
75
|
+
Raises:
|
76
|
+
ValueError: If the list is empty
|
77
|
+
"""
|
78
|
+
if not self:
|
79
|
+
raise ValueError("No files in FileInfoList")
|
80
|
+
if len(self) == 1 and not self._from_dir:
|
81
|
+
return self[0].path
|
82
|
+
return [f.path for f in self]
|
83
|
+
|
84
|
+
@property
|
85
|
+
def abs_path(self) -> Union[str, List[str]]:
|
86
|
+
"""Get the absolute path of the file(s).
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
Union[str, List[str]]: For a single file from file mapping, returns its absolute path as a string.
|
90
|
+
For multiple files or directory mapping, returns a list of absolute paths.
|
91
|
+
|
92
|
+
Raises:
|
93
|
+
ValueError: If the list is empty
|
94
|
+
"""
|
95
|
+
if not self:
|
96
|
+
raise ValueError("No files in FileInfoList")
|
97
|
+
if len(self) == 1 and not self._from_dir:
|
98
|
+
return self[0].abs_path
|
99
|
+
return [f.abs_path for f in self]
|
100
|
+
|
101
|
+
@property
|
102
|
+
def size(self) -> Union[int, List[int]]:
|
103
|
+
"""Get file size(s) in bytes.
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
Union[int, List[int]]: For a single file from file mapping, returns its size in bytes.
|
107
|
+
For multiple files or directory mapping, returns a list of sizes.
|
108
|
+
|
109
|
+
Raises:
|
110
|
+
ValueError: If the list is empty or if any file size is None
|
111
|
+
"""
|
112
|
+
if not self:
|
113
|
+
raise ValueError("No files in FileInfoList")
|
114
|
+
|
115
|
+
# For single file not from directory, return its size
|
116
|
+
if len(self) == 1 and not self._from_dir:
|
117
|
+
size = self[0].size
|
118
|
+
if size is None:
|
119
|
+
raise ValueError(
|
120
|
+
f"Could not get size for file: {self[0].path}"
|
121
|
+
)
|
122
|
+
return size
|
123
|
+
|
124
|
+
# For multiple files, collect all sizes
|
125
|
+
sizes = []
|
126
|
+
for f in self:
|
127
|
+
size = f.size
|
128
|
+
if size is None:
|
129
|
+
raise ValueError(f"Could not get size for file: {f.path}")
|
130
|
+
sizes.append(size)
|
131
|
+
return sizes
|
132
|
+
|
133
|
+
def __str__(self) -> str:
|
134
|
+
"""Get string representation of the file list.
|
135
|
+
|
136
|
+
Returns:
|
137
|
+
str: String representation in format FileInfoList([paths])
|
138
|
+
"""
|
139
|
+
if not self:
|
140
|
+
return "FileInfoList([])"
|
141
|
+
if len(self) == 1:
|
142
|
+
return f"FileInfoList(['{self[0].path}'])"
|
143
|
+
return f"FileInfoList({[f.path for f in self]})"
|
144
|
+
|
145
|
+
def __repr__(self) -> str:
|
146
|
+
"""Get detailed string representation of the file list.
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
str: Same as str() for consistency
|
150
|
+
"""
|
151
|
+
return str(self)
|
@@ -0,0 +1,518 @@
|
|
1
|
+
"""File utilities for the CLI.
|
2
|
+
|
3
|
+
This module provides utilities for file operations with security controls:
|
4
|
+
|
5
|
+
1. File Information:
|
6
|
+
- FileInfo class for safe file access and metadata
|
7
|
+
- Support for file content caching
|
8
|
+
- Automatic encoding detection
|
9
|
+
|
10
|
+
2. Path Handling:
|
11
|
+
- Supports ~ expansion for home directory
|
12
|
+
- Supports environment variable expansion
|
13
|
+
- Security checks for file access
|
14
|
+
- Requires explicit allowed directories for access outside CWD
|
15
|
+
|
16
|
+
3. Security Features:
|
17
|
+
- Directory traversal prevention
|
18
|
+
- Explicit allowed directory configuration
|
19
|
+
- Temporary file access controls
|
20
|
+
- Path validation and normalization
|
21
|
+
|
22
|
+
Usage Examples:
|
23
|
+
Basic file access (from current directory):
|
24
|
+
>>> info = FileInfo.from_path("var_name", "local_file.txt")
|
25
|
+
>>> content = info.content
|
26
|
+
|
27
|
+
Access home directory files (requires --allowed-dir):
|
28
|
+
>>> info = FileInfo.from_path("var_name", "~/file.txt", allowed_dirs=["~/"])
|
29
|
+
>>> content = info.content
|
30
|
+
|
31
|
+
Multiple file collection:
|
32
|
+
>>> files = collect_files(
|
33
|
+
... file_args=["var=path.txt"],
|
34
|
+
... allowed_dirs=["/allowed/path"],
|
35
|
+
... recursive=True
|
36
|
+
... )
|
37
|
+
|
38
|
+
Security Notes:
|
39
|
+
- Files must be in current directory or explicitly allowed directories
|
40
|
+
- Use --allowed-dir to access files outside current directory
|
41
|
+
- Home directory (~) is not automatically allowed
|
42
|
+
- Environment variables are expanded in paths
|
43
|
+
"""
|
44
|
+
|
45
|
+
import codecs
|
46
|
+
import glob
|
47
|
+
import logging
|
48
|
+
import os
|
49
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
50
|
+
|
51
|
+
import chardet
|
52
|
+
|
53
|
+
from .errors import (
|
54
|
+
DirectoryNotFoundError,
|
55
|
+
FileNotFoundError,
|
56
|
+
PathSecurityError,
|
57
|
+
)
|
58
|
+
from .file_info import FileInfo
|
59
|
+
from .file_list import FileInfoList
|
60
|
+
from .security import SecurityManager
|
61
|
+
from .security_types import SecurityManagerProtocol
|
62
|
+
|
63
|
+
__all__ = [
|
64
|
+
"FileInfo", # Re-exported from file_info
|
65
|
+
"SecurityManager", # Re-exported from security
|
66
|
+
"FileInfoList", # Re-exported from file_list
|
67
|
+
"collect_files",
|
68
|
+
"collect_files_from_pattern",
|
69
|
+
"collect_files_from_directory",
|
70
|
+
"detect_encoding",
|
71
|
+
"expand_path",
|
72
|
+
"read_allowed_dirs_from_file",
|
73
|
+
]
|
74
|
+
|
75
|
+
logger = logging.getLogger(__name__)
|
76
|
+
|
77
|
+
# Type for values in template context
|
78
|
+
TemplateValue = Union[str, List[str], Dict[str, str]]
|
79
|
+
|
80
|
+
|
81
|
+
def _get_security_manager() -> Type[SecurityManagerProtocol]:
|
82
|
+
"""Get the SecurityManager class.
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
The SecurityManager class type
|
86
|
+
"""
|
87
|
+
return SecurityManager
|
88
|
+
|
89
|
+
|
90
|
+
def expand_path(path: str, force_absolute: bool = False) -> str:
|
91
|
+
"""Expand user home directory and environment variables in path.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
path: Path that may contain ~ or environment variables
|
95
|
+
force_absolute: Whether to force conversion to absolute path
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
Expanded path, maintaining relative paths unless force_absolute=True
|
99
|
+
or the path contains ~ or environment variables
|
100
|
+
"""
|
101
|
+
# First expand user and environment variables
|
102
|
+
expanded = os.path.expanduser(os.path.expandvars(path))
|
103
|
+
|
104
|
+
# If the path hasn't changed and we're not forcing absolute, keep it relative
|
105
|
+
if expanded == path and not force_absolute:
|
106
|
+
return path
|
107
|
+
|
108
|
+
# Otherwise return absolute path
|
109
|
+
return os.path.abspath(expanded)
|
110
|
+
|
111
|
+
|
112
|
+
def collect_files_from_pattern(
|
113
|
+
pattern: str,
|
114
|
+
security_manager: SecurityManager,
|
115
|
+
) -> List[FileInfo]:
|
116
|
+
"""Collect files matching a glob pattern.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
pattern: Glob pattern to match files
|
120
|
+
security_manager: Security manager for path validation
|
121
|
+
|
122
|
+
Returns:
|
123
|
+
List of FileInfo objects for matched files
|
124
|
+
|
125
|
+
Raises:
|
126
|
+
PathSecurityError: If any matched file is outside base directory
|
127
|
+
"""
|
128
|
+
# Expand pattern
|
129
|
+
matched_paths = glob.glob(pattern, recursive=True)
|
130
|
+
if not matched_paths:
|
131
|
+
logger.debug("No files matched pattern: %s", pattern)
|
132
|
+
return []
|
133
|
+
|
134
|
+
# Create FileInfo objects
|
135
|
+
files = []
|
136
|
+
for path in matched_paths:
|
137
|
+
try:
|
138
|
+
file_info = FileInfo.from_path(path, security_manager)
|
139
|
+
files.append(file_info)
|
140
|
+
except PathSecurityError:
|
141
|
+
# Let security errors propagate
|
142
|
+
raise
|
143
|
+
except Exception:
|
144
|
+
logger.warning("Could not process file %s", path)
|
145
|
+
|
146
|
+
return files
|
147
|
+
|
148
|
+
|
149
|
+
def collect_files_from_directory(
|
150
|
+
directory: str,
|
151
|
+
security_manager: SecurityManager,
|
152
|
+
recursive: bool = False,
|
153
|
+
allowed_extensions: Optional[List[str]] = None,
|
154
|
+
**kwargs: Any,
|
155
|
+
) -> List[FileInfo]:
|
156
|
+
"""Collect files from directory.
|
157
|
+
|
158
|
+
Args:
|
159
|
+
directory: Directory to collect files from
|
160
|
+
security_manager: Security manager for path validation
|
161
|
+
recursive: Whether to collect files recursively
|
162
|
+
allowed_extensions: List of allowed file extensions without dots
|
163
|
+
**kwargs: Additional arguments passed to FileInfo.from_path
|
164
|
+
|
165
|
+
Returns:
|
166
|
+
List of FileInfo instances
|
167
|
+
|
168
|
+
Raises:
|
169
|
+
DirectoryNotFoundError: If directory does not exist
|
170
|
+
PathSecurityError: If directory is not allowed
|
171
|
+
"""
|
172
|
+
# Validate directory exists and is allowed
|
173
|
+
try:
|
174
|
+
abs_dir = str(security_manager.resolve_path(directory))
|
175
|
+
except PathSecurityError:
|
176
|
+
# Let the original error propagate
|
177
|
+
raise
|
178
|
+
|
179
|
+
if not os.path.exists(abs_dir):
|
180
|
+
raise DirectoryNotFoundError(f"Directory not found: {directory}")
|
181
|
+
if not os.path.isdir(abs_dir):
|
182
|
+
raise DirectoryNotFoundError(f"Path is not a directory: {directory}")
|
183
|
+
|
184
|
+
# Collect files
|
185
|
+
files = []
|
186
|
+
for root, _, filenames in os.walk(abs_dir):
|
187
|
+
if not recursive and root != abs_dir:
|
188
|
+
continue
|
189
|
+
|
190
|
+
for filename in filenames:
|
191
|
+
# Get relative path from base directory
|
192
|
+
abs_path = os.path.join(root, filename)
|
193
|
+
try:
|
194
|
+
rel_path = os.path.relpath(abs_path, security_manager.base_dir)
|
195
|
+
except ValueError:
|
196
|
+
# Skip files that can't be made relative
|
197
|
+
continue
|
198
|
+
|
199
|
+
# Check extension if filter is specified
|
200
|
+
if allowed_extensions is not None:
|
201
|
+
ext = os.path.splitext(filename)[1].lstrip(".")
|
202
|
+
if ext not in allowed_extensions:
|
203
|
+
continue
|
204
|
+
|
205
|
+
try:
|
206
|
+
file_info = FileInfo.from_path(
|
207
|
+
rel_path, security_manager=security_manager, **kwargs
|
208
|
+
)
|
209
|
+
files.append(file_info)
|
210
|
+
except (FileNotFoundError, PathSecurityError):
|
211
|
+
# Skip files that can't be accessed
|
212
|
+
continue
|
213
|
+
|
214
|
+
return files
|
215
|
+
|
216
|
+
|
217
|
+
def _validate_and_split_mapping(
|
218
|
+
mapping: str, mapping_type: str
|
219
|
+
) -> tuple[str, str]:
|
220
|
+
"""Validate and split a name=value mapping.
|
221
|
+
|
222
|
+
Args:
|
223
|
+
mapping: The mapping string to validate (e.g. "name=value")
|
224
|
+
mapping_type: Type of mapping for error messages ("file", "pattern", or "directory")
|
225
|
+
|
226
|
+
Returns:
|
227
|
+
Tuple of (name, value)
|
228
|
+
|
229
|
+
Raises:
|
230
|
+
ValueError: If mapping format is invalid
|
231
|
+
"""
|
232
|
+
try:
|
233
|
+
name, value = mapping.split("=", 1)
|
234
|
+
except ValueError:
|
235
|
+
raise ValueError(
|
236
|
+
f"Invalid {mapping_type} mapping format: {mapping!r} (missing '=' separator)"
|
237
|
+
)
|
238
|
+
|
239
|
+
if not name:
|
240
|
+
raise ValueError(f"Empty name in {mapping_type} mapping: {mapping!r}")
|
241
|
+
if not value:
|
242
|
+
raise ValueError(f"Empty value in {mapping_type} mapping: {mapping!r}")
|
243
|
+
|
244
|
+
return name, value
|
245
|
+
|
246
|
+
|
247
|
+
def collect_files(
|
248
|
+
file_mappings: Optional[List[str]] = None,
|
249
|
+
pattern_mappings: Optional[List[str]] = None,
|
250
|
+
dir_mappings: Optional[List[str]] = None,
|
251
|
+
dir_recursive: bool = False,
|
252
|
+
dir_extensions: Optional[List[str]] = None,
|
253
|
+
security_manager: Optional[SecurityManager] = None,
|
254
|
+
**kwargs: Any,
|
255
|
+
) -> Dict[str, FileInfoList]:
|
256
|
+
"""Collect files from multiple sources.
|
257
|
+
|
258
|
+
Args:
|
259
|
+
file_mappings: List of file mappings in the format "name=path"
|
260
|
+
pattern_mappings: List of pattern mappings in the format "name=pattern"
|
261
|
+
dir_mappings: List of directory mappings in the format "name=directory"
|
262
|
+
dir_recursive: Whether to process directories recursively
|
263
|
+
dir_extensions: List of file extensions to include in directory processing
|
264
|
+
security_manager: Security manager instance
|
265
|
+
**kwargs: Additional arguments passed to FileInfo.from_path
|
266
|
+
|
267
|
+
Returns:
|
268
|
+
Dictionary mapping variable names to FileInfoList instances
|
269
|
+
|
270
|
+
Raises:
|
271
|
+
ValueError: If no files are found or if there are duplicate mappings
|
272
|
+
PathSecurityError: If a path is outside the base directory
|
273
|
+
DirectoryNotFoundError: If a directory is not found
|
274
|
+
"""
|
275
|
+
if security_manager is None:
|
276
|
+
security_manager = SecurityManager(base_dir=os.getcwd())
|
277
|
+
|
278
|
+
# Normalize extensions by removing leading dots
|
279
|
+
if dir_extensions:
|
280
|
+
dir_extensions = [ext.lstrip(".") for ext in dir_extensions]
|
281
|
+
|
282
|
+
files: Dict[str, FileInfoList] = {}
|
283
|
+
|
284
|
+
# Process file mappings
|
285
|
+
if file_mappings:
|
286
|
+
for mapping in file_mappings:
|
287
|
+
name, path = _validate_and_split_mapping(mapping, "file")
|
288
|
+
if name in files:
|
289
|
+
raise ValueError(f"Duplicate file mapping: {name}")
|
290
|
+
|
291
|
+
file_info = FileInfo.from_path(
|
292
|
+
path, security_manager=security_manager, **kwargs
|
293
|
+
)
|
294
|
+
files[name] = FileInfoList([file_info], from_dir=False)
|
295
|
+
|
296
|
+
# Process pattern mappings
|
297
|
+
if pattern_mappings:
|
298
|
+
for mapping in pattern_mappings:
|
299
|
+
name, pattern = _validate_and_split_mapping(mapping, "pattern")
|
300
|
+
if name in files:
|
301
|
+
raise ValueError(f"Duplicate pattern mapping: {name}")
|
302
|
+
|
303
|
+
try:
|
304
|
+
matched_files = collect_files_from_pattern(
|
305
|
+
pattern, security_manager=security_manager, **kwargs
|
306
|
+
)
|
307
|
+
except PathSecurityError as e:
|
308
|
+
raise PathSecurityError(
|
309
|
+
"Pattern mapping error: Access denied: "
|
310
|
+
f"{pattern} is outside base directory and not in allowed directories"
|
311
|
+
) from e
|
312
|
+
|
313
|
+
if not matched_files:
|
314
|
+
logger.warning("No files matched pattern: %s", pattern)
|
315
|
+
continue
|
316
|
+
|
317
|
+
files[name] = FileInfoList(matched_files, from_dir=False)
|
318
|
+
|
319
|
+
# Process directory mappings
|
320
|
+
if dir_mappings:
|
321
|
+
for mapping in dir_mappings:
|
322
|
+
name, directory = _validate_and_split_mapping(mapping, "directory")
|
323
|
+
if name in files:
|
324
|
+
raise ValueError(f"Duplicate directory mapping: {name}")
|
325
|
+
|
326
|
+
try:
|
327
|
+
dir_files = collect_files_from_directory(
|
328
|
+
directory=directory,
|
329
|
+
security_manager=security_manager,
|
330
|
+
recursive=dir_recursive,
|
331
|
+
allowed_extensions=dir_extensions,
|
332
|
+
**kwargs,
|
333
|
+
)
|
334
|
+
except PathSecurityError as e:
|
335
|
+
raise PathSecurityError(
|
336
|
+
"Directory mapping error: Access denied: "
|
337
|
+
f"{directory} is outside base directory and not in allowed directories"
|
338
|
+
) from e
|
339
|
+
except DirectoryNotFoundError:
|
340
|
+
raise DirectoryNotFoundError(
|
341
|
+
f"Directory not found: {directory}"
|
342
|
+
)
|
343
|
+
|
344
|
+
if not dir_files:
|
345
|
+
logger.warning("No files found in directory: %s", directory)
|
346
|
+
files[name] = FileInfoList([], from_dir=True)
|
347
|
+
else:
|
348
|
+
files[name] = FileInfoList(dir_files, from_dir=True)
|
349
|
+
|
350
|
+
if not files:
|
351
|
+
raise ValueError("No files found")
|
352
|
+
|
353
|
+
return files
|
354
|
+
|
355
|
+
|
356
|
+
def detect_encoding(file_path: str) -> str:
|
357
|
+
"""Detect the encoding of a file.
|
358
|
+
|
359
|
+
Args:
|
360
|
+
file_path: Path to the file to check
|
361
|
+
|
362
|
+
Returns:
|
363
|
+
str: The detected encoding (e.g. 'utf-8', 'utf-16', etc.)
|
364
|
+
|
365
|
+
Raises:
|
366
|
+
OSError: If there is an error reading the file
|
367
|
+
ValueError: If the encoding cannot be detected
|
368
|
+
"""
|
369
|
+
logger = logging.getLogger(__name__)
|
370
|
+
logger.debug("Detecting encoding for file: %s", file_path)
|
371
|
+
|
372
|
+
try:
|
373
|
+
with open(file_path, "rb") as f:
|
374
|
+
# Check for BOM markers first
|
375
|
+
raw_data = f.read(4)
|
376
|
+
if not raw_data:
|
377
|
+
logger.debug("Empty file")
|
378
|
+
return "utf-8"
|
379
|
+
|
380
|
+
# Check for common BOMs
|
381
|
+
if raw_data.startswith(codecs.BOM_UTF8):
|
382
|
+
logger.debug("UTF-8 BOM detected")
|
383
|
+
return "utf-8"
|
384
|
+
elif raw_data.startswith(codecs.BOM_UTF16_LE):
|
385
|
+
logger.debug("UTF-16 LE BOM detected")
|
386
|
+
return "utf-16-le"
|
387
|
+
elif raw_data.startswith(codecs.BOM_UTF16_BE):
|
388
|
+
logger.debug("UTF-16 BE BOM detected")
|
389
|
+
return "utf-16-be"
|
390
|
+
elif raw_data.startswith(codecs.BOM_UTF32_LE):
|
391
|
+
logger.debug("UTF-32 LE BOM detected")
|
392
|
+
return "utf-32-le"
|
393
|
+
elif raw_data.startswith(codecs.BOM_UTF32_BE):
|
394
|
+
logger.debug("UTF-32 BE BOM detected")
|
395
|
+
return "utf-32-be"
|
396
|
+
|
397
|
+
# Read more data for chardet (up to 1MB)
|
398
|
+
f.seek(0)
|
399
|
+
raw_data = f.read(
|
400
|
+
1024 * 1024
|
401
|
+
) # Read up to 1MB for better detection
|
402
|
+
|
403
|
+
# Try chardet detection
|
404
|
+
result = chardet.detect(raw_data)
|
405
|
+
logger.debug("Chardet detection result: %s", result)
|
406
|
+
|
407
|
+
if result and isinstance(result, dict) and result.get("encoding"):
|
408
|
+
detected = str(result["encoding"]).lower()
|
409
|
+
confidence = float(result.get("confidence", 0.0))
|
410
|
+
|
411
|
+
# Handle ASCII detection
|
412
|
+
if detected == "ascii":
|
413
|
+
logger.debug(
|
414
|
+
"ASCII detected, converting to UTF-8 (confidence: %f)",
|
415
|
+
confidence,
|
416
|
+
)
|
417
|
+
return "utf-8"
|
418
|
+
|
419
|
+
# High confidence detection
|
420
|
+
if confidence > 0.9:
|
421
|
+
logger.debug(
|
422
|
+
"High confidence encoding detected: %s (confidence: %f)",
|
423
|
+
detected,
|
424
|
+
confidence,
|
425
|
+
)
|
426
|
+
return detected
|
427
|
+
|
428
|
+
# Medium confidence - validate with UTF-8 attempt
|
429
|
+
if confidence > 0.6:
|
430
|
+
logger.debug(
|
431
|
+
"Medium confidence for %s (confidence: %f), validating",
|
432
|
+
detected,
|
433
|
+
confidence,
|
434
|
+
)
|
435
|
+
try:
|
436
|
+
raw_data.decode("utf-8")
|
437
|
+
logger.debug("Successfully validated as UTF-8")
|
438
|
+
return "utf-8"
|
439
|
+
except UnicodeDecodeError:
|
440
|
+
logger.debug(
|
441
|
+
"UTF-8 validation failed, using detected encoding: %s",
|
442
|
+
detected,
|
443
|
+
)
|
444
|
+
return detected
|
445
|
+
|
446
|
+
# Low confidence or no detection - try UTF-8
|
447
|
+
try:
|
448
|
+
raw_data.decode("utf-8")
|
449
|
+
logger.debug(
|
450
|
+
"No confident detection, but UTF-8 decode successful"
|
451
|
+
)
|
452
|
+
return "utf-8"
|
453
|
+
except UnicodeDecodeError:
|
454
|
+
if (
|
455
|
+
result
|
456
|
+
and isinstance(result, dict)
|
457
|
+
and result.get("encoding")
|
458
|
+
):
|
459
|
+
detected_encoding = str(result["encoding"]).lower()
|
460
|
+
logger.debug(
|
461
|
+
"Falling back to detected encoding with low confidence: %s",
|
462
|
+
detected_encoding,
|
463
|
+
)
|
464
|
+
return detected_encoding
|
465
|
+
|
466
|
+
logger.warning(
|
467
|
+
"Could not confidently detect encoding for %s, defaulting to UTF-8",
|
468
|
+
file_path,
|
469
|
+
)
|
470
|
+
return "utf-8"
|
471
|
+
|
472
|
+
except OSError as e:
|
473
|
+
logger.error("Error reading file %s: %s", file_path, e)
|
474
|
+
raise
|
475
|
+
except Exception as e:
|
476
|
+
logger.error(
|
477
|
+
"Unexpected error detecting encoding for %s: %s",
|
478
|
+
file_path,
|
479
|
+
e,
|
480
|
+
)
|
481
|
+
raise ValueError(f"Failed to detect encoding: {e}")
|
482
|
+
|
483
|
+
|
484
|
+
def read_allowed_dirs_from_file(filepath: str) -> List[str]:
|
485
|
+
"""Reads a list of allowed directories from a file.
|
486
|
+
|
487
|
+
Args:
|
488
|
+
filepath: The path to the file.
|
489
|
+
|
490
|
+
Returns:
|
491
|
+
A list of allowed directories as absolute paths.
|
492
|
+
|
493
|
+
Raises:
|
494
|
+
FileNotFoundError: If the file does not exist.
|
495
|
+
ValueError: If the file contains invalid data.
|
496
|
+
"""
|
497
|
+
try:
|
498
|
+
with open(filepath, "r") as f:
|
499
|
+
lines = f.readlines()
|
500
|
+
except OSError as e:
|
501
|
+
raise FileNotFoundError(
|
502
|
+
f"Error reading allowed directories from file: {filepath}: {e}"
|
503
|
+
)
|
504
|
+
|
505
|
+
allowed_dirs = []
|
506
|
+
for line in lines:
|
507
|
+
line = line.strip()
|
508
|
+
if line and not line.startswith(
|
509
|
+
"#"
|
510
|
+
): # Ignore empty lines and comments
|
511
|
+
abs_path = os.path.abspath(line)
|
512
|
+
if not os.path.isdir(abs_path):
|
513
|
+
raise ValueError(
|
514
|
+
f"Invalid directory in allowed directories file '{filepath}': "
|
515
|
+
f"'{line}' is not a directory or does not exist."
|
516
|
+
)
|
517
|
+
allowed_dirs.append(abs_path)
|
518
|
+
return allowed_dirs
|