ostruct-cli 0.7.2__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. ostruct/cli/__init__.py +21 -3
  2. ostruct/cli/base_errors.py +1 -1
  3. ostruct/cli/cli.py +66 -1983
  4. ostruct/cli/click_options.py +460 -28
  5. ostruct/cli/code_interpreter.py +238 -0
  6. ostruct/cli/commands/__init__.py +32 -0
  7. ostruct/cli/commands/list_models.py +128 -0
  8. ostruct/cli/commands/quick_ref.py +50 -0
  9. ostruct/cli/commands/run.py +137 -0
  10. ostruct/cli/commands/update_registry.py +71 -0
  11. ostruct/cli/config.py +277 -0
  12. ostruct/cli/cost_estimation.py +134 -0
  13. ostruct/cli/errors.py +310 -6
  14. ostruct/cli/exit_codes.py +1 -0
  15. ostruct/cli/explicit_file_processor.py +548 -0
  16. ostruct/cli/field_utils.py +69 -0
  17. ostruct/cli/file_info.py +42 -9
  18. ostruct/cli/file_list.py +301 -102
  19. ostruct/cli/file_search.py +455 -0
  20. ostruct/cli/file_utils.py +47 -13
  21. ostruct/cli/mcp_integration.py +541 -0
  22. ostruct/cli/model_creation.py +150 -1
  23. ostruct/cli/model_validation.py +204 -0
  24. ostruct/cli/progress_reporting.py +398 -0
  25. ostruct/cli/registry_updates.py +14 -9
  26. ostruct/cli/runner.py +1418 -0
  27. ostruct/cli/schema_utils.py +113 -0
  28. ostruct/cli/services.py +626 -0
  29. ostruct/cli/template_debug.py +748 -0
  30. ostruct/cli/template_debug_help.py +162 -0
  31. ostruct/cli/template_env.py +15 -6
  32. ostruct/cli/template_filters.py +55 -3
  33. ostruct/cli/template_optimizer.py +474 -0
  34. ostruct/cli/template_processor.py +1080 -0
  35. ostruct/cli/template_rendering.py +69 -34
  36. ostruct/cli/token_validation.py +286 -0
  37. ostruct/cli/types.py +78 -0
  38. ostruct/cli/unattended_operation.py +269 -0
  39. ostruct/cli/validators.py +386 -3
  40. {ostruct_cli-0.7.2.dist-info → ostruct_cli-0.8.0.dist-info}/LICENSE +2 -0
  41. ostruct_cli-0.8.0.dist-info/METADATA +633 -0
  42. ostruct_cli-0.8.0.dist-info/RECORD +69 -0
  43. {ostruct_cli-0.7.2.dist-info → ostruct_cli-0.8.0.dist-info}/WHEEL +1 -1
  44. ostruct_cli-0.7.2.dist-info/METADATA +0 -370
  45. ostruct_cli-0.7.2.dist-info/RECORD +0 -45
  46. {ostruct_cli-0.7.2.dist-info → ostruct_cli-0.8.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1080 @@
1
+ """Template processing functions for ostruct CLI."""
2
+
3
+ import json
4
+ import logging
5
+ import re
6
+ import sys
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
9
+
10
+ import click
11
+ import jinja2
12
+ import yaml
13
+
14
+ from .errors import (
15
+ DirectoryNotFoundError,
16
+ DuplicateFileMappingError,
17
+ InvalidJSONError,
18
+ OstructFileNotFoundError,
19
+ PathSecurityError,
20
+ SystemPromptError,
21
+ TaskTemplateSyntaxError,
22
+ TaskTemplateVariableError,
23
+ VariableNameError,
24
+ )
25
+ from .explicit_file_processor import ProcessingResult
26
+ from .file_utils import FileInfoList, collect_files
27
+ from .path_utils import validate_path_mapping
28
+ from .security import SecurityManager
29
+ from .template_optimizer import (
30
+ is_optimization_beneficial,
31
+ optimize_template_for_llm,
32
+ )
33
+ from .template_utils import render_template
34
+ from .types import CLIParams
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
39
+
40
+
41
+ def _render_template_with_debug(
42
+ template_content: str,
43
+ context: Dict[str, Any],
44
+ env: jinja2.Environment,
45
+ no_optimization: bool = False,
46
+ show_optimization_diff: bool = False,
47
+ show_optimization_steps: bool = False,
48
+ optimization_step_detail: str = "summary",
49
+ ) -> str:
50
+ """Render template with optimization debugging support.
51
+
52
+ Args:
53
+ template_content: Template content to render
54
+ context: Template context variables
55
+ env: Jinja2 environment
56
+ no_optimization: Skip optimization entirely
57
+ show_optimization_diff: Show before/after optimization comparison
58
+ show_optimization_steps: Show detailed optimization step tracking
59
+ optimization_step_detail: Level of detail for optimization steps
60
+
61
+ Returns:
62
+ Rendered template string
63
+ """
64
+ from .template_debug import show_optimization_diff as show_diff
65
+
66
+ if no_optimization:
67
+ # Skip optimization entirely - render directly
68
+ template = env.from_string(template_content)
69
+ return template.render(**context)
70
+
71
+ # Handle optimization debugging (diff and/or steps)
72
+ if show_optimization_diff or show_optimization_steps:
73
+ # Check if optimization would be beneficial
74
+ if is_optimization_beneficial(template_content):
75
+ # Create step tracker if step tracking is enabled
76
+ step_tracker = None
77
+ if show_optimization_steps:
78
+ from .template_debug import OptimizationStepTracker
79
+
80
+ step_tracker = OptimizationStepTracker(enabled=True)
81
+
82
+ # Get optimization result with optional step tracking
83
+ optimization_result = optimize_template_for_llm(
84
+ template_content, step_tracker
85
+ )
86
+
87
+ if optimization_result.has_optimizations:
88
+ # Show the diff if requested
89
+ if show_optimization_diff:
90
+ show_diff(
91
+ template_content,
92
+ optimization_result.optimized_template,
93
+ )
94
+
95
+ # Show optimization steps if requested
96
+ if show_optimization_steps and step_tracker:
97
+ if optimization_step_detail == "detailed":
98
+ step_tracker.show_detailed_steps()
99
+ else:
100
+ step_tracker.show_step_summary()
101
+
102
+ # Render the optimized version
103
+ template = env.from_string(
104
+ optimization_result.optimized_template
105
+ )
106
+ return template.render(**context)
107
+
108
+ # No optimization was applied, show that too
109
+ if show_optimization_diff:
110
+ show_diff(template_content, template_content)
111
+ if show_optimization_steps:
112
+ from .template_debug import (
113
+ show_optimization_steps as show_steps_func,
114
+ )
115
+
116
+ show_steps_func([], optimization_step_detail)
117
+
118
+ # Fall back to standard rendering (which includes optimization)
119
+ return render_template(template_content, context, env)
120
+
121
+
122
+ def process_system_prompt(
123
+ task_template: str,
124
+ system_prompt: Optional[str],
125
+ system_prompt_file: Optional[str],
126
+ template_context: Dict[str, Any],
127
+ env: jinja2.Environment,
128
+ ignore_task_sysprompt: bool = False,
129
+ template_path: Optional[str] = None,
130
+ ) -> str:
131
+ """Process system prompt from various sources.
132
+
133
+ Args:
134
+ task_template: The task template string
135
+ system_prompt: Optional system prompt string
136
+ system_prompt_file: Optional path to system prompt file
137
+ template_context: Template context for rendering
138
+ env: Jinja2 environment
139
+ ignore_task_sysprompt: Whether to ignore system prompt in task template
140
+ template_path: Optional path to template file for include_system resolution
141
+
142
+ Returns:
143
+ The final system prompt string
144
+
145
+ Raises:
146
+ SystemPromptError: If the system prompt cannot be loaded or rendered
147
+ FileNotFoundError: If a prompt file does not exist
148
+ PathSecurityError: If a prompt file path violates security constraints
149
+ """
150
+ # Check for conflicting arguments
151
+ if system_prompt is not None and system_prompt_file is not None:
152
+ raise SystemPromptError(
153
+ "Cannot specify both --system-prompt and --system-prompt-file"
154
+ )
155
+
156
+ # CLI system prompt takes precedence and stops further processing
157
+ if system_prompt_file is not None:
158
+ try:
159
+ name, path = validate_path_mapping(
160
+ f"system_prompt={system_prompt_file}"
161
+ )
162
+ with open(path, "r", encoding="utf-8") as f:
163
+ cli_system_prompt = f.read().strip()
164
+ except OstructFileNotFoundError as e:
165
+ raise SystemPromptError(
166
+ f"Failed to load system prompt file: {e}"
167
+ ) from e
168
+ except PathSecurityError as e:
169
+ raise SystemPromptError(f"Invalid system prompt file: {e}") from e
170
+
171
+ try:
172
+ template = env.from_string(cli_system_prompt)
173
+ base_prompt = template.render(**template_context).strip()
174
+ except jinja2.TemplateError as e:
175
+ raise SystemPromptError(f"Error rendering system prompt: {e}")
176
+
177
+ elif system_prompt is not None:
178
+ try:
179
+ template = env.from_string(system_prompt)
180
+ base_prompt = template.render(**template_context).strip()
181
+ except jinja2.TemplateError as e:
182
+ raise SystemPromptError(f"Error rendering system prompt: {e}")
183
+
184
+ else:
185
+ # Build message parts from template in order: auto-stub, include_system, system_prompt
186
+ message_parts = []
187
+
188
+ # 1. Auto-stub (default system prompt)
189
+ message_parts.append("You are a helpful assistant.")
190
+
191
+ # 2. Template-based system prompts (include_system and system_prompt)
192
+ if not ignore_task_sysprompt:
193
+ try:
194
+ # Extract YAML frontmatter
195
+ if task_template.startswith("---\n"):
196
+ end = task_template.find("\n---\n", 4)
197
+ if end != -1:
198
+ frontmatter = task_template[4:end]
199
+ try:
200
+ metadata = yaml.safe_load(frontmatter)
201
+ if isinstance(metadata, dict):
202
+ # 2a. include_system: from template
203
+ inc = metadata.get("include_system")
204
+ if inc and template_path:
205
+ inc_path = (
206
+ Path(template_path).parent / inc
207
+ ).resolve()
208
+ if not inc_path.is_file():
209
+ raise click.ClickException(
210
+ f"include_system file not found: {inc}"
211
+ )
212
+ include_txt = inc_path.read_text(
213
+ encoding="utf-8"
214
+ )
215
+ message_parts.append(include_txt)
216
+
217
+ # 2b. system_prompt: from template
218
+ if "system_prompt" in metadata:
219
+ template_system_prompt = str(
220
+ metadata["system_prompt"]
221
+ )
222
+ try:
223
+ template = env.from_string(
224
+ template_system_prompt
225
+ )
226
+ message_parts.append(
227
+ template.render(
228
+ **template_context
229
+ ).strip()
230
+ )
231
+ except jinja2.TemplateError as e:
232
+ raise SystemPromptError(
233
+ f"Error rendering system prompt: {e}"
234
+ )
235
+ except yaml.YAMLError as e:
236
+ raise SystemPromptError(
237
+ f"Invalid YAML frontmatter: {e}"
238
+ )
239
+
240
+ except Exception as e:
241
+ raise SystemPromptError(
242
+ f"Error extracting system prompt from template: {e}"
243
+ )
244
+
245
+ # Return the combined message (remove default if we have other content)
246
+ if len(message_parts) > 1:
247
+ # Remove the default auto-stub if we have other content
248
+ base_prompt = "\n\n".join(message_parts[1:]).strip()
249
+ else:
250
+ # Return just the default
251
+ base_prompt = message_parts[0]
252
+
253
+ # Add web search tool instructions if web search is enabled
254
+ web_search_enabled = template_context.get("web_search_enabled", False)
255
+ if web_search_enabled:
256
+ web_search_instructions = (
257
+ "\n\nYou have access to a web search tool for finding up-to-date information. "
258
+ "Use it when you need current events, recent data, or real-time information. "
259
+ "When using web search results, cite your sources in any 'sources' or 'references' "
260
+ "field in the JSON schema if available. Do not include [n] citation markers "
261
+ "within other JSON fields - keep the main content clean and put citations "
262
+ "in dedicated source fields."
263
+ )
264
+ base_prompt += web_search_instructions
265
+
266
+ return base_prompt
267
+
268
+
269
+ def validate_task_template(
270
+ task: Optional[str], task_file: Optional[str]
271
+ ) -> str:
272
+ """Validate and load a task template.
273
+
274
+ Args:
275
+ task: The task template string
276
+ task_file: Path to task template file
277
+
278
+ Returns:
279
+ The task template string
280
+
281
+ Raises:
282
+ TaskTemplateVariableError: If neither task nor task_file is provided, or if both are provided
283
+ TaskTemplateSyntaxError: If the template has invalid syntax
284
+ FileNotFoundError: If the template file does not exist
285
+ PathSecurityError: If the template file path violates security constraints
286
+ """
287
+ if task is not None and task_file is not None:
288
+ raise TaskTemplateVariableError(
289
+ "Cannot specify both --task and --task-file"
290
+ )
291
+
292
+ if task is None and task_file is None:
293
+ raise TaskTemplateVariableError(
294
+ "Must specify either --task or --task-file"
295
+ )
296
+
297
+ template_content: str
298
+ if task_file is not None:
299
+ try:
300
+ with open(task_file, "r", encoding="utf-8") as f:
301
+ template_content = f.read()
302
+ except FileNotFoundError:
303
+ raise TaskTemplateVariableError(
304
+ f"Task template file not found: {task_file}"
305
+ )
306
+ except PermissionError:
307
+ raise TaskTemplateVariableError(
308
+ f"Permission denied reading task template file: {task_file}"
309
+ )
310
+ except Exception as e:
311
+ raise TaskTemplateVariableError(
312
+ f"Error reading task template file: {e}"
313
+ )
314
+ else:
315
+ template_content = task # type: ignore # We know task is str here due to the checks above
316
+
317
+ try:
318
+ env = jinja2.Environment(undefined=jinja2.StrictUndefined)
319
+ env.parse(template_content)
320
+ return template_content
321
+ except jinja2.TemplateSyntaxError as e:
322
+ raise TaskTemplateSyntaxError(
323
+ f"Invalid Jinja2 template syntax: {e.message}",
324
+ context={
325
+ "line": e.lineno,
326
+ "template_file": task_file,
327
+ "template_preview": template_content[:200],
328
+ },
329
+ )
330
+
331
+
332
+ async def process_templates(
333
+ args: CLIParams,
334
+ task_template: str,
335
+ template_context: Dict[str, Any],
336
+ env: jinja2.Environment,
337
+ template_path: Optional[str] = None,
338
+ ) -> Tuple[str, str]:
339
+ """Process system prompt and user prompt templates.
340
+
341
+ Args:
342
+ args: Command line arguments
343
+ task_template: Validated task template
344
+ template_context: Template context dictionary
345
+ env: Jinja2 environment
346
+
347
+ Returns:
348
+ Tuple of (system_prompt, user_prompt)
349
+
350
+ Raises:
351
+ CLIError: For template processing errors
352
+ """
353
+ logger.debug("=== Template Processing Phase ===")
354
+
355
+ # Add template debugging if enabled
356
+ debug_enabled = args.get("debug", False)
357
+ debug_templates_enabled = args.get("debug_templates", False)
358
+ show_context = args.get("show_context", False)
359
+ show_context_detailed = args.get("show_context_detailed", False)
360
+ show_pre_optimization = args.get("show_pre_optimization", False)
361
+ show_optimization_diff = args.get("show_optimization_diff", False)
362
+ no_optimization = args.get("no_optimization", False)
363
+ show_optimization_steps = args.get("show_optimization_steps", False)
364
+ optimization_step_detail = args.get("optimization_step_detail", "summary")
365
+
366
+ debugger = None
367
+ if debug_enabled or debug_templates_enabled:
368
+ from .template_debug import (
369
+ TemplateDebugger,
370
+ log_template_expansion,
371
+ show_file_content_expansions,
372
+ )
373
+
374
+ # Initialize template debugger
375
+ debugger = TemplateDebugger(enabled=True)
376
+
377
+ # Log template context
378
+ show_file_content_expansions(template_context)
379
+
380
+ # Log raw template before expansion
381
+ logger.debug("Raw task template:")
382
+ logger.debug(task_template)
383
+
384
+ # Log initial template state
385
+ debugger.log_expansion_step(
386
+ "Initial template loaded",
387
+ "",
388
+ task_template,
389
+ {"template_path": template_path},
390
+ )
391
+
392
+ # Show context inspection if requested
393
+ if show_context or show_context_detailed:
394
+ from .template_debug import (
395
+ display_context_detailed,
396
+ display_context_summary,
397
+ )
398
+
399
+ if show_context_detailed:
400
+ display_context_detailed(template_context)
401
+ elif show_context:
402
+ display_context_summary(template_context)
403
+
404
+ # Check for undefined variables if context inspection is enabled
405
+ from .template_debug import detect_undefined_variables
406
+
407
+ undefined_vars = detect_undefined_variables(
408
+ task_template, template_context
409
+ )
410
+ if undefined_vars:
411
+ click.echo(
412
+ f"⚠️ Potentially undefined variables: {', '.join(undefined_vars)}",
413
+ err=True,
414
+ )
415
+ click.echo(
416
+ f" Available variables: {', '.join(sorted(template_context.keys()))}",
417
+ err=True,
418
+ )
419
+
420
+ system_prompt = process_system_prompt(
421
+ task_template,
422
+ args.get("system_prompt"),
423
+ args.get("system_prompt_file"),
424
+ template_context,
425
+ env,
426
+ args.get("ignore_task_sysprompt", False),
427
+ template_path,
428
+ )
429
+
430
+ # Log system prompt processing step
431
+ if debugger:
432
+ debugger.log_expansion_step(
433
+ "System prompt processed",
434
+ task_template,
435
+ system_prompt,
436
+ {
437
+ "system_prompt_source": (
438
+ "task_template"
439
+ if not args.get("system_prompt")
440
+ else "custom"
441
+ )
442
+ },
443
+ )
444
+
445
+ # Handle pre-optimization template display
446
+ if show_pre_optimization:
447
+ from .template_debug import show_pre_optimization_template
448
+
449
+ show_pre_optimization_template(task_template)
450
+
451
+ # Handle optimization debugging with custom rendering
452
+ if no_optimization or show_optimization_diff or show_optimization_steps:
453
+ # We need custom handling for optimization debugging
454
+ user_prompt = _render_template_with_debug(
455
+ task_template,
456
+ template_context,
457
+ env,
458
+ no_optimization=bool(no_optimization),
459
+ show_optimization_diff=bool(show_optimization_diff),
460
+ show_optimization_steps=bool(show_optimization_steps),
461
+ optimization_step_detail=str(optimization_step_detail),
462
+ )
463
+ else:
464
+ # Standard rendering with optimization
465
+ user_prompt = render_template(task_template, template_context, env)
466
+
467
+ # Log user prompt rendering step
468
+ if debugger:
469
+ debugger.log_expansion_step(
470
+ "User prompt rendered",
471
+ task_template,
472
+ user_prompt,
473
+ template_context,
474
+ )
475
+
476
+ # Log template expansion if debug enabled
477
+ if debug_enabled or debug_templates_enabled:
478
+ from .template_debug import log_template_expansion
479
+
480
+ log_template_expansion(
481
+ template_content=task_template,
482
+ context=template_context,
483
+ expanded=user_prompt,
484
+ template_file=template_path,
485
+ )
486
+
487
+ # Show expansion summary and detailed steps
488
+ if debugger:
489
+ debugger.show_expansion_summary()
490
+ debugger.show_detailed_expansion()
491
+
492
+ # Show expansion statistics
493
+ stats = debugger.get_expansion_stats()
494
+ if stats:
495
+ logger.debug(
496
+ f"📊 Expansion Stats: {stats['total_steps']} steps, {stats['unique_variables']} variables"
497
+ )
498
+
499
+ return system_prompt, user_prompt
500
+
501
+
502
+ def collect_simple_variables(args: CLIParams) -> Dict[str, str]:
503
+ """Collect simple string variables from --var arguments.
504
+
505
+ Args:
506
+ args: Command line arguments
507
+
508
+ Returns:
509
+ Dictionary mapping variable names to string values
510
+
511
+ Raises:
512
+ VariableNameError: If a variable name is invalid or duplicate
513
+ """
514
+ variables: Dict[str, str] = {}
515
+ all_names: Set[str] = set()
516
+
517
+ if args.get("var"):
518
+ for mapping in args["var"]:
519
+ try:
520
+ # Handle both tuple format and string format
521
+ if isinstance(mapping, tuple):
522
+ name, value = mapping
523
+ else:
524
+ name, value = mapping.split("=", 1)
525
+
526
+ if not name.isidentifier():
527
+ raise VariableNameError(f"Invalid variable name: {name}")
528
+ if name in all_names:
529
+ raise VariableNameError(f"Duplicate variable name: {name}")
530
+ variables[name] = value
531
+ all_names.add(name)
532
+ except ValueError:
533
+ raise VariableNameError(
534
+ f"Invalid variable mapping (expected name=value format): {mapping!r}"
535
+ )
536
+
537
+ return variables
538
+
539
+
540
+ def collect_json_variables(args: CLIParams) -> Dict[str, Any]:
541
+ """Collect JSON variables from --json-var arguments.
542
+
543
+ Args:
544
+ args: Command line arguments
545
+
546
+ Returns:
547
+ Dictionary mapping variable names to parsed JSON values
548
+
549
+ Raises:
550
+ VariableNameError: If a variable name is invalid or duplicate
551
+ InvalidJSONError: If a JSON value is invalid
552
+ """
553
+ variables: Dict[str, Any] = {}
554
+ all_names: Set[str] = set()
555
+
556
+ if args.get("json_var"):
557
+ for mapping in args["json_var"]:
558
+ try:
559
+ # Handle both tuple format and string format
560
+ if isinstance(mapping, tuple):
561
+ name, value = (
562
+ mapping # Value is already parsed by Click validator
563
+ )
564
+ else:
565
+ try:
566
+ name, json_str = mapping.split("=", 1)
567
+ except ValueError:
568
+ raise VariableNameError(
569
+ f"Invalid JSON variable mapping format: {mapping}. Expected name=json"
570
+ )
571
+ try:
572
+ value = json.loads(json_str)
573
+ except json.JSONDecodeError as e:
574
+ raise InvalidJSONError(
575
+ f"Invalid JSON value for variable '{name}': {json_str}",
576
+ context={"variable_name": name},
577
+ ) from e
578
+
579
+ if not name.isidentifier():
580
+ raise VariableNameError(f"Invalid variable name: {name}")
581
+ if name in all_names:
582
+ raise VariableNameError(f"Duplicate variable name: {name}")
583
+
584
+ variables[name] = value
585
+ all_names.add(name)
586
+ except (VariableNameError, InvalidJSONError):
587
+ raise
588
+
589
+ return variables
590
+
591
+
592
+ def collect_template_files(
593
+ args: CLIParams,
594
+ security_manager: SecurityManager,
595
+ ) -> Dict[str, Union[FileInfoList, str, List[str], Dict[str, str]]]:
596
+ """Collect files from command line arguments.
597
+
598
+ Args:
599
+ args: Command line arguments
600
+ security_manager: Security manager for path validation
601
+
602
+ Returns:
603
+ Dictionary mapping variable names to file info objects
604
+
605
+ Raises:
606
+ PathSecurityError: If any file paths violate security constraints
607
+ ValueError: If file mappings are invalid or files cannot be accessed
608
+ """
609
+ try:
610
+ # Get files, directories, and patterns from args - they are already tuples from Click's nargs=2
611
+ files = list(
612
+ args.get("files", [])
613
+ ) # List of (name, path) tuples from Click
614
+ dirs = args.get("dir", []) # List of (name, dir) tuples from Click
615
+ patterns = args.get(
616
+ "patterns", []
617
+ ) # List of (name, pattern) tuples from Click
618
+
619
+ # Collect files from directories and patterns
620
+ dir_files = collect_files(
621
+ file_mappings=cast(List[Tuple[str, Union[str, Path]]], files),
622
+ dir_mappings=cast(List[Tuple[str, Union[str, Path]]], dirs),
623
+ pattern_mappings=cast(
624
+ List[Tuple[str, Union[str, Path]]], patterns
625
+ ),
626
+ dir_recursive=args.get("recursive", False),
627
+ security_manager=security_manager,
628
+ routing_type="template", # Indicate these are primarily for template access
629
+ )
630
+
631
+ # Combine results
632
+ return cast(
633
+ Dict[str, Union[FileInfoList, str, List[str], Dict[str, str]]],
634
+ dir_files,
635
+ )
636
+
637
+ except Exception as e:
638
+ # Check for nested security errors
639
+ if hasattr(e, "__cause__") and hasattr(e.__cause__, "__class__"):
640
+ if "SecurityError" in str(e.__cause__.__class__) and isinstance(
641
+ e.__cause__, BaseException
642
+ ):
643
+ raise e.__cause__
644
+ if "PathSecurityError" in str(
645
+ e.__cause__.__class__
646
+ ) and isinstance(e.__cause__, BaseException):
647
+ raise e.__cause__
648
+ # Check if this is a wrapped security error
649
+ if isinstance(e.__cause__, PathSecurityError):
650
+ raise e.__cause__
651
+ # Don't wrap InvalidJSONError
652
+ if isinstance(e, InvalidJSONError):
653
+ raise
654
+ # Don't wrap DuplicateFileMappingError
655
+ if isinstance(e, DuplicateFileMappingError):
656
+ raise
657
+ # Catch broader exceptions and re-raise
658
+ logger.error(
659
+ "Error collecting template files: %s", str(e), exc_info=True
660
+ )
661
+ raise
662
+
663
+
664
+ def extract_template_file_paths(template_context: Dict[str, Any]) -> List[str]:
665
+ """Extract actual file paths from template context for token validation.
666
+
667
+ Args:
668
+ template_context: Template context dictionary containing FileInfoList objects
669
+
670
+ Returns:
671
+ List of file paths that were included in template rendering
672
+ """
673
+ file_paths = []
674
+
675
+ for key, value in template_context.items():
676
+ if isinstance(value, FileInfoList):
677
+ # Extract paths from FileInfoList
678
+ for file_info in value:
679
+ if hasattr(file_info, "path"):
680
+ file_paths.append(file_info.path)
681
+ elif key == "stdin":
682
+ # Skip stdin content - it's already counted in template
683
+ continue
684
+ elif key == "current_model":
685
+ # Skip model name
686
+ continue
687
+
688
+ return file_paths
689
+
690
+
691
+ def create_template_context(
692
+ files: Optional[
693
+ Dict[str, Union[FileInfoList, str, List[str], Dict[str, str]]]
694
+ ] = None,
695
+ variables: Optional[Dict[str, str]] = None,
696
+ json_variables: Optional[Dict[str, Any]] = None,
697
+ security_manager: Optional[SecurityManager] = None,
698
+ stdin_content: Optional[str] = None,
699
+ ) -> Dict[str, Any]:
700
+ """Create template context from files and variables."""
701
+ context: Dict[str, Any] = {}
702
+
703
+ # Add file variables
704
+ if files:
705
+ for name, file_list in files.items():
706
+ context[name] = file_list # Always keep FileInfoList wrapper
707
+
708
+ # Add simple variables
709
+ if variables:
710
+ context.update(variables)
711
+
712
+ # Add JSON variables
713
+ if json_variables:
714
+ context.update(json_variables)
715
+
716
+ # Add stdin if provided
717
+ if stdin_content is not None:
718
+ context["stdin"] = stdin_content
719
+
720
+ return context
721
+
722
+
723
+ def _generate_template_variable_name(file_path: str) -> str:
724
+ """Generate a template variable name from a file path.
725
+
726
+ Converts filename to a valid template variable name by:
727
+ 1. Taking the full filename (with extension)
728
+ 2. Replacing dots and other special characters with underscores
729
+ 3. Ensuring it starts with a letter or underscore
730
+
731
+ Examples:
732
+ data.csv -> data_csv
733
+ data.json -> data_json
734
+ my-file.txt -> my_file_txt
735
+ 123data.xml -> _123data_xml
736
+
737
+ Args:
738
+ file_path: Path to the file
739
+
740
+ Returns:
741
+ Valid template variable name
742
+ """
743
+ filename = Path(file_path).name
744
+ # Replace special characters with underscores
745
+ var_name = re.sub(r"[^a-zA-Z0-9_]", "_", filename)
746
+ # Ensure it starts with letter or underscore
747
+ if var_name and var_name[0].isdigit():
748
+ var_name = "_" + var_name
749
+ return var_name
750
+
751
+
752
+ async def create_template_context_from_routing(
753
+ args: CLIParams,
754
+ security_manager: SecurityManager,
755
+ routing_result: ProcessingResult,
756
+ ) -> Dict[str, Any]:
757
+ """Create template context from explicit file routing result.
758
+
759
+ Args:
760
+ args: Command line arguments
761
+ security_manager: Security manager for path validation
762
+ routing_result: Result from explicit file processor
763
+
764
+ Returns:
765
+ Template context dictionary
766
+
767
+ Raises:
768
+ PathSecurityError: If any file paths violate security constraints
769
+ VariableError: If variable mappings are invalid
770
+ ValueError: If file mappings are invalid or files cannot be accessed
771
+ """
772
+ try:
773
+ # Get files from routing result - include ALL routed files in template context
774
+ template_files = routing_result.validated_files.get("template", [])
775
+ code_interpreter_files = routing_result.validated_files.get(
776
+ "code-interpreter", []
777
+ )
778
+ file_search_files = routing_result.validated_files.get(
779
+ "file-search", []
780
+ )
781
+
782
+ # Convert to the format expected by create_template_context
783
+ # For legacy compatibility, we need (name, path) tuples
784
+ files_tuples = []
785
+ seen_files = set() # Track files to avoid duplicates
786
+
787
+ # Add template files - now single-argument auto-naming only
788
+ template_file_paths = args.get("template_files", [])
789
+ for template_file_path in template_file_paths:
790
+ if isinstance(template_file_path, (str, Path)):
791
+ # Auto-generate name for single-arg form: -ft config.yaml
792
+ file_name = _generate_template_variable_name(
793
+ str(template_file_path)
794
+ )
795
+ file_path_str = str(template_file_path)
796
+ if file_path_str not in seen_files:
797
+ files_tuples.append((file_name, file_path_str))
798
+ seen_files.add(file_path_str)
799
+
800
+ # Add template file aliases (from --fta) - two-argument explicit naming
801
+ template_file_aliases = args.get("template_file_aliases", [])
802
+ for name_path_tuple in template_file_aliases:
803
+ if (
804
+ isinstance(name_path_tuple, tuple)
805
+ and len(name_path_tuple) == 2
806
+ ):
807
+ custom_name, file_path_raw = name_path_tuple
808
+ file_path = str(file_path_raw)
809
+ file_name = str(
810
+ custom_name
811
+ ) # Always use custom name for aliases
812
+
813
+ if file_path not in seen_files:
814
+ files_tuples.append((file_name, file_path))
815
+ seen_files.add(file_path)
816
+
817
+ # Also process template_files from routing result (for compatibility)
818
+ for template_file_item in template_files:
819
+ if isinstance(template_file_item, (str, Path)):
820
+ file_name = _generate_template_variable_name(
821
+ str(template_file_item)
822
+ )
823
+ file_path_str = str(template_file_item)
824
+ if file_path_str not in seen_files:
825
+ files_tuples.append((file_name, file_path_str))
826
+ seen_files.add(file_path_str)
827
+ elif (
828
+ isinstance(template_file_item, tuple)
829
+ and len(template_file_item) == 2
830
+ ):
831
+ # Handle tuple format (name, path)
832
+ _, template_file_path = template_file_item
833
+ template_file_path_str = str(template_file_path)
834
+ file_name = _generate_template_variable_name(
835
+ template_file_path_str
836
+ )
837
+ if template_file_path_str not in seen_files:
838
+ files_tuples.append((file_name, template_file_path_str))
839
+ seen_files.add(template_file_path_str)
840
+
841
+ # Add code interpreter files - now single-argument auto-naming only
842
+ code_interpreter_file_paths = args.get("code_interpreter_files", [])
843
+ for ci_file_path in code_interpreter_file_paths:
844
+ if isinstance(ci_file_path, (str, Path)):
845
+ # Auto-generate name: -fc data.csv
846
+ file_name = _generate_template_variable_name(str(ci_file_path))
847
+ file_path_str = str(ci_file_path)
848
+ if file_path_str not in seen_files:
849
+ files_tuples.append((file_name, file_path_str))
850
+ seen_files.add(file_path_str)
851
+
852
+ # Add code interpreter file aliases (from --fca) - two-argument explicit naming
853
+ code_interpreter_file_aliases = args.get(
854
+ "code_interpreter_file_aliases", []
855
+ )
856
+ for name_path_tuple in code_interpreter_file_aliases:
857
+ if (
858
+ isinstance(name_path_tuple, tuple)
859
+ and len(name_path_tuple) == 2
860
+ ):
861
+ custom_name, file_path_raw = name_path_tuple
862
+ file_path = str(file_path_raw)
863
+ file_name = str(
864
+ custom_name
865
+ ) # Always use custom name for aliases
866
+
867
+ if file_path not in seen_files:
868
+ files_tuples.append((file_name, file_path))
869
+ seen_files.add(file_path)
870
+
871
+ # Also process code_interpreter_files from routing result (for compatibility)
872
+ for ci_file_item in code_interpreter_files:
873
+ if isinstance(ci_file_item, (str, Path)):
874
+ file_name = _generate_template_variable_name(str(ci_file_item))
875
+ file_path_str = str(ci_file_item)
876
+ if file_path_str not in seen_files:
877
+ files_tuples.append((file_name, file_path_str))
878
+ seen_files.add(file_path_str)
879
+ elif isinstance(ci_file_item, tuple) and len(ci_file_item) == 2:
880
+ # Handle tuple format (name, path)
881
+ _, ci_file_path = ci_file_item
882
+ ci_file_path_str = str(ci_file_path)
883
+ file_name = _generate_template_variable_name(ci_file_path_str)
884
+ if ci_file_path_str not in seen_files:
885
+ files_tuples.append((file_name, ci_file_path_str))
886
+ seen_files.add(ci_file_path_str)
887
+
888
+ # Add file search files - now single-argument auto-naming only
889
+ file_search_file_paths = args.get("file_search_files", [])
890
+ for fs_file_path in file_search_file_paths:
891
+ if isinstance(fs_file_path, (str, Path)):
892
+ # Auto-generate name: -fs docs.pdf
893
+ file_name = _generate_template_variable_name(str(fs_file_path))
894
+ file_path_str = str(fs_file_path)
895
+ if file_path_str not in seen_files:
896
+ files_tuples.append((file_name, file_path_str))
897
+ seen_files.add(file_path_str)
898
+
899
+ # Add file search file aliases (from --fsa) - two-argument explicit naming
900
+ file_search_file_aliases = args.get("file_search_file_aliases", [])
901
+ for name_path_tuple in file_search_file_aliases:
902
+ if (
903
+ isinstance(name_path_tuple, tuple)
904
+ and len(name_path_tuple) == 2
905
+ ):
906
+ custom_name, file_path_raw = name_path_tuple
907
+ file_path = str(file_path_raw)
908
+ file_name = str(
909
+ custom_name
910
+ ) # Always use custom name for aliases
911
+
912
+ if file_path not in seen_files:
913
+ files_tuples.append((file_name, file_path))
914
+ seen_files.add(file_path)
915
+
916
+ # Also process file_search_files from routing result (for compatibility)
917
+ for fs_file_item in file_search_files:
918
+ if isinstance(fs_file_item, (str, Path)):
919
+ file_name = _generate_template_variable_name(str(fs_file_item))
920
+ file_path_str = str(fs_file_item)
921
+ if file_path_str not in seen_files:
922
+ files_tuples.append((file_name, file_path_str))
923
+ seen_files.add(file_path_str)
924
+ elif isinstance(fs_file_item, tuple) and len(fs_file_item) == 2:
925
+ # Handle tuple format (name, path)
926
+ _, fs_file_path = fs_file_item
927
+ fs_file_path_str = str(fs_file_path)
928
+ file_name = _generate_template_variable_name(fs_file_path_str)
929
+ if fs_file_path_str not in seen_files:
930
+ files_tuples.append((file_name, fs_file_path_str))
931
+ seen_files.add(fs_file_path_str)
932
+
933
+ # Handle directory aliases - create stable template variables for directories
934
+ dir_mappings = []
935
+
936
+ # Get directory aliases from routing result (these are already processed from CLI args)
937
+ routing = routing_result.routing
938
+ for alias_name, dir_path in routing.template_dir_aliases:
939
+ dir_mappings.append((alias_name, str(dir_path)))
940
+ for alias_name, dir_path in routing.code_interpreter_dir_aliases:
941
+ dir_mappings.append((alias_name, str(dir_path)))
942
+ for alias_name, dir_path in routing.file_search_dir_aliases:
943
+ dir_mappings.append((alias_name, str(dir_path)))
944
+
945
+ # Auto-naming directories (from -dt, -dc, -ds)
946
+ template_dirs = args.get("template_dirs", [])
947
+ for dir_path in template_dirs:
948
+ dir_name = _generate_template_variable_name(str(dir_path))
949
+ dir_mappings.append((dir_name, str(dir_path)))
950
+
951
+ code_interpreter_dirs = args.get("code_interpreter_dirs", [])
952
+ for dir_path in code_interpreter_dirs:
953
+ dir_name = _generate_template_variable_name(str(dir_path))
954
+ dir_mappings.append((dir_name, str(dir_path)))
955
+
956
+ file_search_dirs = args.get("file_search_dirs", [])
957
+ for dir_path in file_search_dirs:
958
+ dir_name = _generate_template_variable_name(str(dir_path))
959
+ dir_mappings.append((dir_name, str(dir_path)))
960
+
961
+ # Process files from explicit routing
962
+ files_dict = collect_files(
963
+ file_mappings=cast(
964
+ List[Tuple[str, Union[str, Path]]], files_tuples
965
+ ),
966
+ dir_mappings=cast(
967
+ List[Tuple[str, Union[str, Path]]], dir_mappings
968
+ ),
969
+ dir_recursive=args.get("recursive", False),
970
+ security_manager=security_manager,
971
+ routing_type="template", # Explicitly set routing_type for files processed here
972
+ # This needs careful thought as files_tuples can come from various sources
973
+ # For now, we assume files directly added to files_tuples are 'template' routed
974
+ # if not overridden by a more specific tool routing later.
975
+ # This is a simplification. A more robust way would be to track routing type
976
+ # for each path as it's parsed from CLI args.
977
+ # For the large file warning, FileInfo will default routing_type to None
978
+ # which FileInfo.content interprets as potentially template-routed.
979
+ )
980
+
981
+ # Handle legacy files and directories separately to preserve variable names
982
+ legacy_files = args.get("files", [])
983
+ legacy_dirs = args.get("dir", [])
984
+ legacy_patterns = args.get("patterns", [])
985
+
986
+ if legacy_files or legacy_dirs or legacy_patterns:
987
+ legacy_files_dict = collect_files(
988
+ file_mappings=cast(
989
+ List[Tuple[str, Union[str, Path]]], legacy_files
990
+ ),
991
+ dir_mappings=cast(
992
+ List[Tuple[str, Union[str, Path]]], legacy_dirs
993
+ ),
994
+ pattern_mappings=cast(
995
+ List[Tuple[str, Union[str, Path]]], legacy_patterns
996
+ ),
997
+ dir_recursive=args.get("recursive", False),
998
+ security_manager=security_manager,
999
+ routing_type="template", # Legacy flags are considered template-only
1000
+ )
1001
+ # Merge legacy results into the main template context
1002
+ files_dict.update(legacy_files_dict)
1003
+
1004
+ # Collect simple variables
1005
+ variables = collect_simple_variables(args)
1006
+
1007
+ # Collect JSON variables
1008
+ json_variables = collect_json_variables(args)
1009
+
1010
+ # Get stdin content if available
1011
+ stdin_content = None
1012
+ try:
1013
+ if not sys.stdin.isatty():
1014
+ stdin_content = sys.stdin.read()
1015
+ except (OSError, IOError):
1016
+ # Skip stdin if it can't be read
1017
+ pass
1018
+
1019
+ context = create_template_context(
1020
+ files=cast(
1021
+ Dict[str, Union[FileInfoList, str, List[str], Dict[str, str]]],
1022
+ files_dict,
1023
+ ),
1024
+ variables=variables,
1025
+ json_variables=json_variables,
1026
+ security_manager=security_manager,
1027
+ stdin_content=stdin_content,
1028
+ )
1029
+
1030
+ # Add current model to context
1031
+ context["current_model"] = args["model"]
1032
+
1033
+ # Add web search enabled flag to context
1034
+ # Use the same logic as runner.py for consistency
1035
+ web_search_from_cli = args.get("web_search", False)
1036
+ no_web_search_from_cli = args.get("no_web_search", False)
1037
+
1038
+ # Load configuration to check defaults
1039
+ from .config import OstructConfig
1040
+
1041
+ config_path = cast(Union[str, Path, None], args.get("config"))
1042
+ config = OstructConfig.load(config_path)
1043
+ web_search_config = config.get_web_search_config()
1044
+
1045
+ # Determine if web search should be enabled
1046
+ if web_search_from_cli:
1047
+ # Explicit --web-search flag takes precedence
1048
+ web_search_enabled = True
1049
+ elif no_web_search_from_cli:
1050
+ # Explicit --no-web-search flag disables
1051
+ web_search_enabled = False
1052
+ else:
1053
+ # Use config default
1054
+ web_search_enabled = web_search_config.enable_by_default
1055
+
1056
+ context["web_search_enabled"] = web_search_enabled
1057
+
1058
+ return context
1059
+
1060
+ except PathSecurityError:
1061
+ # Let PathSecurityError propagate without wrapping
1062
+ raise
1063
+ except (FileNotFoundError, DirectoryNotFoundError) as e:
1064
+ # Convert FileNotFoundError to OstructFileNotFoundError
1065
+ if isinstance(e, FileNotFoundError):
1066
+ raise OstructFileNotFoundError(str(e))
1067
+ # Let DirectoryNotFoundError propagate
1068
+ raise
1069
+ except Exception as e:
1070
+ # Don't wrap InvalidJSONError
1071
+ if isinstance(e, InvalidJSONError):
1072
+ raise
1073
+ # Don't wrap DuplicateFileMappingError
1074
+ if isinstance(e, DuplicateFileMappingError):
1075
+ raise
1076
+ # Check if this is a wrapped security error
1077
+ if isinstance(e.__cause__, PathSecurityError):
1078
+ raise e.__cause__
1079
+ # Wrap other errors
1080
+ raise ValueError(f"Error collecting files: {e}")