ostruct-cli 0.7.1__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/cli/__init__.py +21 -3
- ostruct/cli/base_errors.py +1 -1
- ostruct/cli/cli.py +66 -1983
- ostruct/cli/click_options.py +460 -28
- ostruct/cli/code_interpreter.py +238 -0
- ostruct/cli/commands/__init__.py +32 -0
- ostruct/cli/commands/list_models.py +128 -0
- ostruct/cli/commands/quick_ref.py +50 -0
- ostruct/cli/commands/run.py +137 -0
- ostruct/cli/commands/update_registry.py +71 -0
- ostruct/cli/config.py +277 -0
- ostruct/cli/cost_estimation.py +134 -0
- ostruct/cli/errors.py +310 -6
- ostruct/cli/exit_codes.py +1 -0
- ostruct/cli/explicit_file_processor.py +548 -0
- ostruct/cli/field_utils.py +69 -0
- ostruct/cli/file_info.py +42 -9
- ostruct/cli/file_list.py +301 -102
- ostruct/cli/file_search.py +455 -0
- ostruct/cli/file_utils.py +47 -13
- ostruct/cli/mcp_integration.py +541 -0
- ostruct/cli/model_creation.py +150 -1
- ostruct/cli/model_validation.py +204 -0
- ostruct/cli/progress_reporting.py +398 -0
- ostruct/cli/registry_updates.py +14 -9
- ostruct/cli/runner.py +1418 -0
- ostruct/cli/schema_utils.py +113 -0
- ostruct/cli/services.py +626 -0
- ostruct/cli/template_debug.py +748 -0
- ostruct/cli/template_debug_help.py +162 -0
- ostruct/cli/template_env.py +15 -6
- ostruct/cli/template_filters.py +55 -3
- ostruct/cli/template_optimizer.py +474 -0
- ostruct/cli/template_processor.py +1080 -0
- ostruct/cli/template_rendering.py +69 -34
- ostruct/cli/token_validation.py +286 -0
- ostruct/cli/types.py +78 -0
- ostruct/cli/unattended_operation.py +269 -0
- ostruct/cli/validators.py +386 -3
- {ostruct_cli-0.7.1.dist-info → ostruct_cli-0.8.0.dist-info}/LICENSE +2 -0
- ostruct_cli-0.8.0.dist-info/METADATA +633 -0
- ostruct_cli-0.8.0.dist-info/RECORD +69 -0
- {ostruct_cli-0.7.1.dist-info → ostruct_cli-0.8.0.dist-info}/WHEEL +1 -1
- ostruct_cli-0.7.1.dist-info/METADATA +0 -369
- ostruct_cli-0.7.1.dist-info/RECORD +0 -45
- {ostruct_cli-0.7.1.dist-info → ostruct_cli-0.8.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,455 @@
|
|
1
|
+
"""File Search integration for ostruct CLI.
|
2
|
+
|
3
|
+
This module provides support for uploading files to OpenAI's File Search
|
4
|
+
(vector store) and integrating retrieval capabilities with the OpenAI Responses API.
|
5
|
+
Includes retry logic for reliability improvements.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import logging
|
10
|
+
import os
|
11
|
+
import time
|
12
|
+
from pathlib import Path
|
13
|
+
from typing import Any, Dict, List
|
14
|
+
|
15
|
+
from openai import AsyncOpenAI
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
class FileSearchManager:
|
21
|
+
"""Manager for File Search vector store operations with retry logic."""
|
22
|
+
|
23
|
+
def __init__(self, client: AsyncOpenAI):
|
24
|
+
"""Initialize File Search manager.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
client: AsyncOpenAI client instance
|
28
|
+
"""
|
29
|
+
self.client = client
|
30
|
+
self.uploaded_file_ids: List[str] = []
|
31
|
+
self.created_vector_stores: List[str] = []
|
32
|
+
|
33
|
+
async def create_vector_store_with_retry(
|
34
|
+
self,
|
35
|
+
name: str = "ostruct_vector_store",
|
36
|
+
max_retries: int = 3,
|
37
|
+
retry_delay: float = 1.0,
|
38
|
+
) -> str:
|
39
|
+
"""Create a vector store with retry logic for reliability.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
name: Name for the vector store
|
43
|
+
max_retries: Maximum number of retry attempts
|
44
|
+
retry_delay: Initial delay between retries (exponential backoff)
|
45
|
+
|
46
|
+
Returns:
|
47
|
+
Vector store ID
|
48
|
+
|
49
|
+
Raises:
|
50
|
+
Exception: If creation fails after all retries
|
51
|
+
"""
|
52
|
+
last_exception = None
|
53
|
+
|
54
|
+
for attempt in range(max_retries + 1):
|
55
|
+
try:
|
56
|
+
logger.debug(
|
57
|
+
f"Creating vector store '{name}' (attempt {attempt + 1}/{max_retries + 1})"
|
58
|
+
)
|
59
|
+
|
60
|
+
vector_store = await self.client.vector_stores.create(
|
61
|
+
name=name,
|
62
|
+
expires_after={
|
63
|
+
"anchor": "last_active_at",
|
64
|
+
"days": 7, # Automatically expire after 7 days of inactivity
|
65
|
+
},
|
66
|
+
)
|
67
|
+
|
68
|
+
self.created_vector_stores.append(vector_store.id)
|
69
|
+
logger.debug(
|
70
|
+
f"Successfully created vector store: {vector_store.id}"
|
71
|
+
)
|
72
|
+
return vector_store.id
|
73
|
+
|
74
|
+
except Exception as e:
|
75
|
+
last_exception = e
|
76
|
+
logger.warning(
|
77
|
+
f"Vector store creation attempt {attempt + 1} failed: {e}"
|
78
|
+
)
|
79
|
+
|
80
|
+
if attempt < max_retries:
|
81
|
+
delay = retry_delay * (2**attempt) # Exponential backoff
|
82
|
+
logger.debug(f"Retrying in {delay:.1f} seconds...")
|
83
|
+
await asyncio.sleep(delay)
|
84
|
+
else:
|
85
|
+
logger.error(
|
86
|
+
f"Vector store creation failed after {max_retries + 1} attempts"
|
87
|
+
)
|
88
|
+
|
89
|
+
raise Exception(
|
90
|
+
f"Failed to create vector store after {max_retries + 1} attempts: {last_exception}"
|
91
|
+
)
|
92
|
+
|
93
|
+
async def upload_files_to_vector_store(
|
94
|
+
self,
|
95
|
+
vector_store_id: str,
|
96
|
+
files: List[str],
|
97
|
+
max_retries: int = 3,
|
98
|
+
retry_delay: float = 1.0,
|
99
|
+
) -> List[str]:
|
100
|
+
"""Upload files to vector store with retry logic.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
vector_store_id: ID of the vector store
|
104
|
+
files: List of file paths to upload
|
105
|
+
max_retries: Maximum number of retry attempts per file
|
106
|
+
retry_delay: Initial delay between retries
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
List of successfully uploaded file IDs
|
110
|
+
|
111
|
+
Raises:
|
112
|
+
Exception: If upload fails for any file after all retries
|
113
|
+
"""
|
114
|
+
file_ids = []
|
115
|
+
|
116
|
+
for file_path in files:
|
117
|
+
try:
|
118
|
+
# Validate file exists
|
119
|
+
if not os.path.exists(file_path):
|
120
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
121
|
+
|
122
|
+
# Upload file with retry logic
|
123
|
+
file_id = await self._upload_single_file_with_retry(
|
124
|
+
file_path, max_retries, retry_delay
|
125
|
+
)
|
126
|
+
file_ids.append(file_id)
|
127
|
+
|
128
|
+
except Exception as e:
|
129
|
+
logger.error(f"Failed to upload file {file_path}: {e}")
|
130
|
+
# Clean up any successfully uploaded files on error
|
131
|
+
await self._cleanup_uploaded_files(file_ids)
|
132
|
+
raise
|
133
|
+
|
134
|
+
# Add files to vector store with retry logic
|
135
|
+
try:
|
136
|
+
await self._add_files_to_vector_store_with_retry(
|
137
|
+
vector_store_id, file_ids, max_retries, retry_delay
|
138
|
+
)
|
139
|
+
except Exception as e:
|
140
|
+
logger.error(f"Failed to add files to vector store: {e}")
|
141
|
+
await self._cleanup_uploaded_files(file_ids)
|
142
|
+
raise
|
143
|
+
|
144
|
+
# Store for potential cleanup
|
145
|
+
self.uploaded_file_ids.extend(file_ids)
|
146
|
+
return file_ids
|
147
|
+
|
148
|
+
async def _upload_single_file_with_retry(
|
149
|
+
self, file_path: str, max_retries: int, retry_delay: float
|
150
|
+
) -> str:
|
151
|
+
"""Upload a single file with retry logic.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
file_path: Path to the file to upload
|
155
|
+
max_retries: Maximum number of retry attempts
|
156
|
+
retry_delay: Initial delay between retries
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
File ID of uploaded file
|
160
|
+
|
161
|
+
Raises:
|
162
|
+
Exception: If upload fails after all retries
|
163
|
+
"""
|
164
|
+
last_exception = None
|
165
|
+
file_size = os.path.getsize(file_path)
|
166
|
+
|
167
|
+
for attempt in range(max_retries + 1):
|
168
|
+
try:
|
169
|
+
logger.debug(
|
170
|
+
f"Uploading {file_path} ({file_size} bytes) - attempt {attempt + 1}/{max_retries + 1}"
|
171
|
+
)
|
172
|
+
|
173
|
+
with open(file_path, "rb") as f:
|
174
|
+
file_obj = await self.client.files.create(
|
175
|
+
file=f,
|
176
|
+
purpose="assistants", # Required for File Search
|
177
|
+
)
|
178
|
+
|
179
|
+
logger.debug(
|
180
|
+
f"Successfully uploaded {file_path} with ID: {file_obj.id}"
|
181
|
+
)
|
182
|
+
return file_obj.id
|
183
|
+
|
184
|
+
except Exception as e:
|
185
|
+
last_exception = e
|
186
|
+
logger.warning(
|
187
|
+
f"File upload attempt {attempt + 1} failed for {file_path}: {e}"
|
188
|
+
)
|
189
|
+
|
190
|
+
if attempt < max_retries:
|
191
|
+
delay = retry_delay * (2**attempt)
|
192
|
+
logger.debug(f"Retrying upload in {delay:.1f} seconds...")
|
193
|
+
await asyncio.sleep(delay)
|
194
|
+
else:
|
195
|
+
logger.error(
|
196
|
+
f"File upload failed after {max_retries + 1} attempts: {file_path}"
|
197
|
+
)
|
198
|
+
|
199
|
+
raise Exception(
|
200
|
+
f"Failed to upload {file_path} after {max_retries + 1} attempts: {last_exception}"
|
201
|
+
)
|
202
|
+
|
203
|
+
async def _add_files_to_vector_store_with_retry(
|
204
|
+
self,
|
205
|
+
vector_store_id: str,
|
206
|
+
file_ids: List[str],
|
207
|
+
max_retries: int,
|
208
|
+
retry_delay: float,
|
209
|
+
) -> None:
|
210
|
+
"""Add files to vector store with retry logic.
|
211
|
+
|
212
|
+
Args:
|
213
|
+
vector_store_id: ID of the vector store
|
214
|
+
file_ids: List of file IDs to add
|
215
|
+
max_retries: Maximum number of retry attempts
|
216
|
+
retry_delay: Initial delay between retries
|
217
|
+
|
218
|
+
Raises:
|
219
|
+
Exception: If adding files fails after all retries
|
220
|
+
"""
|
221
|
+
last_exception = None
|
222
|
+
|
223
|
+
for attempt in range(max_retries + 1):
|
224
|
+
try:
|
225
|
+
logger.debug(
|
226
|
+
f"Adding {len(file_ids)} files to vector store - attempt {attempt + 1}/{max_retries + 1}"
|
227
|
+
)
|
228
|
+
|
229
|
+
await self.client.vector_stores.file_batches.create(
|
230
|
+
vector_store_id=vector_store_id, file_ids=file_ids
|
231
|
+
)
|
232
|
+
|
233
|
+
logger.debug(
|
234
|
+
f"Successfully added files to vector store: {vector_store_id}"
|
235
|
+
)
|
236
|
+
return
|
237
|
+
|
238
|
+
except Exception as e:
|
239
|
+
last_exception = e
|
240
|
+
logger.warning(
|
241
|
+
f"Adding files to vector store attempt {attempt + 1} failed: {e}"
|
242
|
+
)
|
243
|
+
|
244
|
+
if attempt < max_retries:
|
245
|
+
delay = retry_delay * (2**attempt)
|
246
|
+
logger.debug(f"Retrying in {delay:.1f} seconds...")
|
247
|
+
await asyncio.sleep(delay)
|
248
|
+
else:
|
249
|
+
logger.error(
|
250
|
+
f"Adding files to vector store failed after {max_retries + 1} attempts"
|
251
|
+
)
|
252
|
+
|
253
|
+
raise Exception(
|
254
|
+
f"Failed to add files to vector store after {max_retries + 1} attempts: {last_exception}"
|
255
|
+
)
|
256
|
+
|
257
|
+
async def wait_for_vector_store_ready(
|
258
|
+
self,
|
259
|
+
vector_store_id: str,
|
260
|
+
timeout: float = 60.0,
|
261
|
+
poll_interval: float = 2.0,
|
262
|
+
) -> bool:
|
263
|
+
"""Wait for vector store to be ready for search.
|
264
|
+
|
265
|
+
Based on probe tests, indexing is typically instant (0.0s average),
|
266
|
+
but we include polling for reliability.
|
267
|
+
|
268
|
+
Args:
|
269
|
+
vector_store_id: ID of the vector store
|
270
|
+
timeout: Maximum time to wait in seconds
|
271
|
+
poll_interval: Time between status checks in seconds
|
272
|
+
|
273
|
+
Returns:
|
274
|
+
True if vector store is ready, False if timeout
|
275
|
+
"""
|
276
|
+
start_time = time.time()
|
277
|
+
|
278
|
+
while time.time() - start_time < timeout:
|
279
|
+
try:
|
280
|
+
vector_store = await self.client.vector_stores.retrieve(
|
281
|
+
vector_store_id
|
282
|
+
)
|
283
|
+
|
284
|
+
if vector_store.status == "completed":
|
285
|
+
logger.debug(f"Vector store {vector_store_id} is ready")
|
286
|
+
return True
|
287
|
+
elif vector_store.status == "failed":
|
288
|
+
logger.error(
|
289
|
+
f"Vector store {vector_store_id} failed to index"
|
290
|
+
)
|
291
|
+
return False
|
292
|
+
|
293
|
+
logger.debug(
|
294
|
+
f"Vector store {vector_store_id} status: {vector_store.status}"
|
295
|
+
)
|
296
|
+
await asyncio.sleep(poll_interval)
|
297
|
+
|
298
|
+
except Exception as e:
|
299
|
+
logger.warning(f"Error checking vector store status: {e}")
|
300
|
+
await asyncio.sleep(poll_interval)
|
301
|
+
|
302
|
+
logger.warning(
|
303
|
+
f"Vector store {vector_store_id} not ready after {timeout}s timeout"
|
304
|
+
)
|
305
|
+
return False
|
306
|
+
|
307
|
+
def build_tool_config(self, vector_store_id: str) -> dict:
|
308
|
+
"""Build File Search tool configuration.
|
309
|
+
|
310
|
+
Creates a tool configuration compatible with the OpenAI Responses API
|
311
|
+
for File Search functionality.
|
312
|
+
|
313
|
+
Args:
|
314
|
+
vector_store_id: ID of the vector store to search
|
315
|
+
|
316
|
+
Returns:
|
317
|
+
Tool configuration dict for Responses API
|
318
|
+
"""
|
319
|
+
return {
|
320
|
+
"type": "file_search",
|
321
|
+
"vector_store_ids": [vector_store_id],
|
322
|
+
}
|
323
|
+
|
324
|
+
async def cleanup_resources(self) -> None:
|
325
|
+
"""Clean up uploaded files and created vector stores.
|
326
|
+
|
327
|
+
This method removes files and vector stores that were created during
|
328
|
+
the session to avoid accumulating resources in the user's OpenAI account.
|
329
|
+
"""
|
330
|
+
# Clean up uploaded files
|
331
|
+
await self._cleanup_uploaded_files(self.uploaded_file_ids)
|
332
|
+
self.uploaded_file_ids.clear()
|
333
|
+
|
334
|
+
# Clean up vector stores
|
335
|
+
await self._cleanup_vector_stores(self.created_vector_stores)
|
336
|
+
self.created_vector_stores.clear()
|
337
|
+
|
338
|
+
async def _cleanup_uploaded_files(self, file_ids: List[str]) -> None:
|
339
|
+
"""Internal method to clean up specific file IDs.
|
340
|
+
|
341
|
+
Args:
|
342
|
+
file_ids: List of file IDs to delete
|
343
|
+
"""
|
344
|
+
for file_id in file_ids:
|
345
|
+
try:
|
346
|
+
await self.client.files.delete(file_id)
|
347
|
+
logger.debug(f"Cleaned up uploaded file: {file_id}")
|
348
|
+
except Exception as e:
|
349
|
+
logger.warning(f"Failed to clean up file {file_id}: {e}")
|
350
|
+
|
351
|
+
async def _cleanup_vector_stores(
|
352
|
+
self, vector_store_ids: List[str]
|
353
|
+
) -> None:
|
354
|
+
"""Internal method to clean up specific vector store IDs.
|
355
|
+
|
356
|
+
Args:
|
357
|
+
vector_store_ids: List of vector store IDs to delete
|
358
|
+
"""
|
359
|
+
for vs_id in vector_store_ids:
|
360
|
+
try:
|
361
|
+
await self.client.vector_stores.delete(vs_id)
|
362
|
+
logger.debug(f"Cleaned up vector store: {vs_id}")
|
363
|
+
except Exception as e:
|
364
|
+
logger.warning(f"Failed to clean up vector store {vs_id}: {e}")
|
365
|
+
|
366
|
+
def validate_files_for_file_search(self, files: List[str]) -> List[str]:
|
367
|
+
"""Validate files are suitable for File Search upload.
|
368
|
+
|
369
|
+
Args:
|
370
|
+
files: List of file paths to validate
|
371
|
+
|
372
|
+
Returns:
|
373
|
+
List of validation error messages, empty if all files are valid
|
374
|
+
"""
|
375
|
+
errors = []
|
376
|
+
|
377
|
+
# File types commonly supported by File Search
|
378
|
+
supported_extensions = {
|
379
|
+
".txt",
|
380
|
+
".md",
|
381
|
+
".pdf",
|
382
|
+
".docx",
|
383
|
+
".doc",
|
384
|
+
".rtf",
|
385
|
+
".html",
|
386
|
+
".xml",
|
387
|
+
".csv",
|
388
|
+
".json",
|
389
|
+
".jsonl",
|
390
|
+
".py",
|
391
|
+
".js",
|
392
|
+
".ts",
|
393
|
+
".java",
|
394
|
+
".cpp",
|
395
|
+
".c",
|
396
|
+
".h",
|
397
|
+
".sql",
|
398
|
+
".yaml",
|
399
|
+
".yml",
|
400
|
+
".toml",
|
401
|
+
".ini",
|
402
|
+
".log",
|
403
|
+
}
|
404
|
+
|
405
|
+
# Size limits for File Search (approximate)
|
406
|
+
max_file_size = 100 * 1024 * 1024 # 100MB
|
407
|
+
|
408
|
+
for file_path in files:
|
409
|
+
try:
|
410
|
+
if not os.path.exists(file_path):
|
411
|
+
errors.append(f"File not found: {file_path}")
|
412
|
+
continue
|
413
|
+
|
414
|
+
# Check file size
|
415
|
+
file_size = os.path.getsize(file_path)
|
416
|
+
if file_size > max_file_size:
|
417
|
+
errors.append(
|
418
|
+
f"File too large: {file_path} ({file_size / 1024 / 1024:.1f}MB > 100MB)"
|
419
|
+
)
|
420
|
+
|
421
|
+
# Check file extension
|
422
|
+
file_ext = Path(file_path).suffix.lower()
|
423
|
+
if file_ext not in supported_extensions:
|
424
|
+
logger.warning(
|
425
|
+
f"File extension {file_ext} may not be optimal for File Search: {file_path}"
|
426
|
+
)
|
427
|
+
|
428
|
+
# Check if file is empty
|
429
|
+
if file_size == 0:
|
430
|
+
errors.append(f"File is empty: {file_path}")
|
431
|
+
|
432
|
+
except Exception as e:
|
433
|
+
errors.append(f"Error validating file {file_path}: {e}")
|
434
|
+
|
435
|
+
return errors
|
436
|
+
|
437
|
+
def get_performance_info(self) -> Dict[str, Any]:
|
438
|
+
"""Get information about File Search performance characteristics.
|
439
|
+
|
440
|
+
Returns:
|
441
|
+
Dictionary with performance information
|
442
|
+
"""
|
443
|
+
return {
|
444
|
+
"indexing_time": "typically instant (0.0s average based on probe tests)",
|
445
|
+
"reliability_note": "File Search can be intermittent - retry logic is essential",
|
446
|
+
"max_file_size_mb": 100,
|
447
|
+
"supported_formats": [
|
448
|
+
"PDF",
|
449
|
+
"text files",
|
450
|
+
"documents",
|
451
|
+
"code files",
|
452
|
+
],
|
453
|
+
"vector_store_expiry": "7 days of inactivity",
|
454
|
+
"retry_strategy": "exponential backoff with 3 retries by default",
|
455
|
+
}
|
ostruct/cli/file_utils.py
CHANGED
@@ -51,11 +51,13 @@ from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
51
51
|
|
52
52
|
import chardet
|
53
53
|
|
54
|
-
from .errors import (
|
54
|
+
from ostruct.cli.errors import (
|
55
55
|
DirectoryNotFoundError,
|
56
|
+
DuplicateFileMappingError,
|
56
57
|
OstructFileNotFoundError,
|
57
58
|
PathSecurityError,
|
58
59
|
)
|
60
|
+
|
59
61
|
from .file_info import FileInfo
|
60
62
|
from .file_list import FileInfoList
|
61
63
|
from .security import SecurityManager
|
@@ -113,12 +115,14 @@ def expand_path(path: str, force_absolute: bool = False) -> str:
|
|
113
115
|
def collect_files_from_pattern(
|
114
116
|
pattern: str,
|
115
117
|
security_manager: SecurityManager,
|
118
|
+
routing_type: Optional[str] = None,
|
116
119
|
) -> List[FileInfo]:
|
117
120
|
"""Collect files matching a glob pattern or exact file path.
|
118
121
|
|
119
122
|
Args:
|
120
123
|
pattern: Glob pattern or file path to match
|
121
124
|
security_manager: Security manager for path validation
|
125
|
+
routing_type: How the file was routed
|
122
126
|
|
123
127
|
Returns:
|
124
128
|
List of FileInfo objects for matched files
|
@@ -129,7 +133,9 @@ def collect_files_from_pattern(
|
|
129
133
|
# First check if it's an exact file path
|
130
134
|
if os.path.isfile(pattern):
|
131
135
|
try:
|
132
|
-
file_info = FileInfo.from_path(
|
136
|
+
file_info = FileInfo.from_path(
|
137
|
+
pattern, security_manager, routing_type=routing_type
|
138
|
+
)
|
133
139
|
return [file_info]
|
134
140
|
except PathSecurityError:
|
135
141
|
raise
|
@@ -147,7 +153,9 @@ def collect_files_from_pattern(
|
|
147
153
|
files: List[FileInfo] = []
|
148
154
|
for path in matched_paths:
|
149
155
|
try:
|
150
|
-
file_info = FileInfo.from_path(
|
156
|
+
file_info = FileInfo.from_path(
|
157
|
+
path, security_manager, routing_type=routing_type
|
158
|
+
)
|
151
159
|
files.append(file_info)
|
152
160
|
except PathSecurityError:
|
153
161
|
# Let security errors propagate
|
@@ -163,6 +171,7 @@ def collect_files_from_directory(
|
|
163
171
|
security_manager: SecurityManager,
|
164
172
|
recursive: bool = False,
|
165
173
|
allowed_extensions: Optional[List[str]] = None,
|
174
|
+
routing_type: Optional[str] = None,
|
166
175
|
**kwargs: Any,
|
167
176
|
) -> List[FileInfo]:
|
168
177
|
"""Collect files from a directory.
|
@@ -172,6 +181,7 @@ def collect_files_from_directory(
|
|
172
181
|
security_manager: Security manager for path validation
|
173
182
|
recursive: Whether to process subdirectories
|
174
183
|
allowed_extensions: List of allowed file extensions (without dot)
|
184
|
+
routing_type: How the file was routed
|
175
185
|
**kwargs: Additional arguments passed to FileInfo.from_path
|
176
186
|
|
177
187
|
Returns:
|
@@ -270,7 +280,10 @@ def collect_files_from_directory(
|
|
270
280
|
try:
|
271
281
|
# Use absolute path when creating FileInfo
|
272
282
|
file_info = FileInfo.from_path(
|
273
|
-
abs_path,
|
283
|
+
abs_path,
|
284
|
+
security_manager=security_manager,
|
285
|
+
routing_type=routing_type,
|
286
|
+
**kwargs,
|
274
287
|
)
|
275
288
|
files.append(file_info)
|
276
289
|
logger.debug("Added file to list: %s", abs_path)
|
@@ -333,6 +346,7 @@ def collect_files(
|
|
333
346
|
dir_recursive: bool = False,
|
334
347
|
dir_extensions: Optional[List[str]] = None,
|
335
348
|
security_manager: Optional[SecurityManager] = None,
|
349
|
+
routing_type: Optional[str] = None,
|
336
350
|
**kwargs: Any,
|
337
351
|
) -> Dict[str, FileInfoList]:
|
338
352
|
"""Collect files from multiple sources.
|
@@ -344,6 +358,7 @@ def collect_files(
|
|
344
358
|
dir_recursive: Whether to process directories recursively
|
345
359
|
dir_extensions: List of file extensions to include in directory processing
|
346
360
|
security_manager: Security manager instance
|
361
|
+
routing_type: How the files were routed (passed to FileInfo)
|
347
362
|
**kwargs: Additional arguments passed to FileInfo.from_path
|
348
363
|
|
349
364
|
Returns:
|
@@ -388,12 +403,19 @@ def collect_files(
|
|
388
403
|
logger.debug("Processing file mapping: %s", mapping)
|
389
404
|
name, path = _validate_and_split_mapping(mapping, "file")
|
390
405
|
if name in files:
|
391
|
-
raise
|
406
|
+
raise DuplicateFileMappingError(
|
407
|
+
f"Duplicate file mapping: {name}"
|
408
|
+
)
|
392
409
|
|
393
410
|
file_info = FileInfo.from_path(
|
394
|
-
str(path),
|
411
|
+
str(path),
|
412
|
+
security_manager=security_manager,
|
413
|
+
routing_type=routing_type,
|
414
|
+
**kwargs,
|
415
|
+
)
|
416
|
+
files[name] = FileInfoList(
|
417
|
+
[file_info], from_dir=False, var_alias=name
|
395
418
|
)
|
396
|
-
files[name] = FileInfoList([file_info], from_dir=False)
|
397
419
|
logger.debug("Added single file mapping: %s -> %s", name, path)
|
398
420
|
|
399
421
|
# Process pattern mappings
|
@@ -402,11 +424,16 @@ def collect_files(
|
|
402
424
|
logger.debug("Processing pattern mapping: %s", mapping)
|
403
425
|
name, pattern = _validate_and_split_mapping(mapping, "pattern")
|
404
426
|
if name in files:
|
405
|
-
raise
|
427
|
+
raise DuplicateFileMappingError(
|
428
|
+
f"Duplicate pattern mapping: {name}"
|
429
|
+
)
|
406
430
|
|
407
431
|
try:
|
408
432
|
matched_files = collect_files_from_pattern(
|
409
|
-
str(pattern),
|
433
|
+
str(pattern),
|
434
|
+
security_manager=security_manager,
|
435
|
+
routing_type=routing_type,
|
436
|
+
**kwargs,
|
410
437
|
)
|
411
438
|
except PathSecurityError as e:
|
412
439
|
logger.debug("Security error in pattern mapping: %s", str(e))
|
@@ -419,7 +446,9 @@ def collect_files(
|
|
419
446
|
logger.warning("No files matched pattern: %s", pattern)
|
420
447
|
continue
|
421
448
|
|
422
|
-
files[name] = FileInfoList(
|
449
|
+
files[name] = FileInfoList(
|
450
|
+
matched_files, from_dir=False, var_alias=name
|
451
|
+
)
|
423
452
|
logger.debug(
|
424
453
|
"Added pattern mapping: %s -> %s (%d files)",
|
425
454
|
name,
|
@@ -433,7 +462,9 @@ def collect_files(
|
|
433
462
|
logger.debug("Processing directory mapping: %s", mapping)
|
434
463
|
name, directory = _validate_and_split_mapping(mapping, "directory")
|
435
464
|
if name in files:
|
436
|
-
raise
|
465
|
+
raise DuplicateFileMappingError(
|
466
|
+
f"Duplicate directory mapping: {name}"
|
467
|
+
)
|
437
468
|
|
438
469
|
logger.debug(
|
439
470
|
"Processing directory mapping: %s -> %s", name, directory
|
@@ -444,6 +475,7 @@ def collect_files(
|
|
444
475
|
security_manager=security_manager,
|
445
476
|
recursive=dir_recursive,
|
446
477
|
allowed_extensions=dir_extensions,
|
478
|
+
routing_type=routing_type,
|
447
479
|
**kwargs,
|
448
480
|
)
|
449
481
|
except PathSecurityError as e:
|
@@ -461,9 +493,11 @@ def collect_files(
|
|
461
493
|
|
462
494
|
if not dir_files:
|
463
495
|
logger.warning("No files found in directory: %s", directory)
|
464
|
-
files[name] = FileInfoList([], from_dir=True)
|
496
|
+
files[name] = FileInfoList([], from_dir=True, var_alias=name)
|
465
497
|
else:
|
466
|
-
files[name] = FileInfoList(
|
498
|
+
files[name] = FileInfoList(
|
499
|
+
dir_files, from_dir=True, var_alias=name
|
500
|
+
)
|
467
501
|
logger.debug(
|
468
502
|
"Added directory mapping: %s -> %s (%d files)",
|
469
503
|
name,
|