ostruct-cli 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/cli/click_options.py +111 -8
- ostruct/cli/code_interpreter.py +210 -17
- ostruct/cli/commands/run.py +56 -0
- ostruct/cli/config.py +20 -1
- ostruct/cli/errors.py +2 -30
- ostruct/cli/file_info.py +55 -20
- ostruct/cli/file_utils.py +19 -3
- ostruct/cli/json_extract.py +75 -0
- ostruct/cli/model_creation.py +1 -1
- ostruct/cli/runner.py +461 -180
- ostruct/cli/sentinel.py +29 -0
- ostruct/cli/template_optimizer.py +11 -7
- ostruct/cli/template_processor.py +243 -115
- ostruct/cli/template_rendering.py +41 -1
- ostruct/cli/template_validation.py +41 -3
- ostruct/cli/types.py +14 -1
- {ostruct_cli-0.8.2.dist-info → ostruct_cli-0.8.3.dist-info}/METADATA +88 -2
- {ostruct_cli-0.8.2.dist-info → ostruct_cli-0.8.3.dist-info}/RECORD +21 -19
- {ostruct_cli-0.8.2.dist-info → ostruct_cli-0.8.3.dist-info}/LICENSE +0 -0
- {ostruct_cli-0.8.2.dist-info → ostruct_cli-0.8.3.dist-info}/WHEEL +0 -0
- {ostruct_cli-0.8.2.dist-info → ostruct_cli-0.8.3.dist-info}/entry_points.txt +0 -0
ostruct/cli/runner.py
CHANGED
@@ -5,7 +5,7 @@ import json
|
|
5
5
|
import logging
|
6
6
|
import os
|
7
7
|
from pathlib import Path
|
8
|
-
from typing import Any,
|
8
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
9
9
|
|
10
10
|
from openai import AsyncOpenAI, OpenAIError
|
11
11
|
from openai_model_registry import ModelRegistry
|
@@ -18,18 +18,18 @@ from .errors import (
|
|
18
18
|
APIErrorMapper,
|
19
19
|
CLIError,
|
20
20
|
SchemaValidationError,
|
21
|
-
StreamInterruptedError,
|
22
|
-
StreamParseError,
|
23
21
|
)
|
24
22
|
from .exit_codes import ExitCode
|
25
23
|
from .explicit_file_processor import ProcessingResult
|
26
24
|
from .file_search import FileSearchManager
|
25
|
+
from .json_extract import split_json_and_text
|
27
26
|
from .mcp_integration import MCPConfiguration, MCPServerManager
|
28
27
|
from .progress_reporting import (
|
29
28
|
configure_progress_reporter,
|
30
29
|
get_progress_reporter,
|
31
30
|
report_success,
|
32
31
|
)
|
32
|
+
from .sentinel import extract_json_block
|
33
33
|
from .serialization import LogSerializer
|
34
34
|
from .services import ServiceContainer
|
35
35
|
from .types import CLIParams
|
@@ -38,7 +38,7 @@ from .unattended_operation import (
|
|
38
38
|
)
|
39
39
|
|
40
40
|
|
41
|
-
# Error classes for
|
41
|
+
# Error classes for API operations
|
42
42
|
class APIResponseError(Exception):
|
43
43
|
pass
|
44
44
|
|
@@ -51,10 +51,6 @@ class InvalidResponseFormatError(Exception):
|
|
51
51
|
pass
|
52
52
|
|
53
53
|
|
54
|
-
class StreamBufferError(Exception):
|
55
|
-
pass
|
56
|
-
|
57
|
-
|
58
54
|
def make_strict(obj: Any) -> None:
|
59
55
|
"""Transform Pydantic schema for Responses API strict mode.
|
60
56
|
|
@@ -85,6 +81,18 @@ def supports_structured_output(model: str) -> bool:
|
|
85
81
|
return True
|
86
82
|
|
87
83
|
|
84
|
+
def _assistant_text(response: Any) -> str:
|
85
|
+
"""Extract text content from API response (Responses API format)."""
|
86
|
+
text_parts = []
|
87
|
+
for item in response.output:
|
88
|
+
if getattr(item, "type", None) == "message":
|
89
|
+
for content_block in item.content or []:
|
90
|
+
if hasattr(content_block, "text"):
|
91
|
+
# For Responses API, text content is directly in the text attribute
|
92
|
+
text_parts.append(content_block.text)
|
93
|
+
return "\n".join(text_parts)
|
94
|
+
|
95
|
+
|
88
96
|
logger = logging.getLogger(__name__)
|
89
97
|
|
90
98
|
|
@@ -243,8 +251,17 @@ async def process_code_interpreter_configuration(
|
|
243
251
|
if not files_to_upload:
|
244
252
|
return None
|
245
253
|
|
254
|
+
# Load configuration for Code Interpreter
|
255
|
+
from typing import Union, cast
|
256
|
+
|
257
|
+
from .config import OstructConfig
|
258
|
+
|
259
|
+
config_path = cast(Union[str, Path, None], args.get("config"))
|
260
|
+
config = OstructConfig.load(config_path)
|
261
|
+
ci_config = config.get_code_interpreter_config()
|
262
|
+
|
246
263
|
# Create Code Interpreter manager
|
247
|
-
manager = CodeInterpreterManager(client)
|
264
|
+
manager = CodeInterpreterManager(client, ci_config)
|
248
265
|
|
249
266
|
# Validate files before upload
|
250
267
|
validation_errors = manager.validate_files_for_upload(files_to_upload)
|
@@ -420,7 +437,7 @@ async def process_file_search_configuration(
|
|
420
437
|
raise mapped_error
|
421
438
|
|
422
439
|
|
423
|
-
async def
|
440
|
+
async def create_structured_output(
|
424
441
|
client: AsyncOpenAI,
|
425
442
|
model: str,
|
426
443
|
system_prompt: str,
|
@@ -429,8 +446,8 @@ async def stream_structured_output(
|
|
429
446
|
output_file: Optional[str] = None,
|
430
447
|
tools: Optional[List[dict]] = None,
|
431
448
|
**kwargs: Any,
|
432
|
-
) ->
|
433
|
-
"""
|
449
|
+
) -> BaseModel:
|
450
|
+
"""Create structured output from OpenAI Responses API.
|
434
451
|
|
435
452
|
This function uses the OpenAI Responses API with strict mode schema validation
|
436
453
|
to generate structured output that matches the provided Pydantic model.
|
@@ -441,16 +458,15 @@ async def stream_structured_output(
|
|
441
458
|
system_prompt: The system prompt to use
|
442
459
|
user_prompt: The user prompt to use
|
443
460
|
output_schema: The Pydantic model to validate responses against
|
444
|
-
output_file: Optional file to write output to
|
461
|
+
output_file: Optional file to write output to (unused, kept for compatibility)
|
445
462
|
tools: Optional list of tools (e.g., MCP, Code Interpreter) to include
|
446
463
|
**kwargs: Additional parameters to pass to the API
|
447
464
|
|
448
465
|
Returns:
|
449
|
-
|
466
|
+
A validated model instance
|
450
467
|
|
451
468
|
Raises:
|
452
469
|
ValueError: If the model does not support structured output or parameters are invalid
|
453
|
-
StreamInterruptedError: If the stream is interrupted
|
454
470
|
APIResponseError: If there is an API error
|
455
471
|
"""
|
456
472
|
try:
|
@@ -465,7 +481,7 @@ async def stream_structured_output(
|
|
465
481
|
on_log = kwargs.pop("on_log", None)
|
466
482
|
|
467
483
|
# Handle model-specific parameters
|
468
|
-
|
484
|
+
api_kwargs = {}
|
469
485
|
registry = ModelRegistry.get_instance()
|
470
486
|
capabilities = registry.get_capabilities(model)
|
471
487
|
|
@@ -474,7 +490,7 @@ async def stream_structured_output(
|
|
474
490
|
if param_name in capabilities.supported_parameters:
|
475
491
|
# Validate the parameter value
|
476
492
|
capabilities.validate_parameter(param_name, value)
|
477
|
-
|
493
|
+
api_kwargs[param_name] = value
|
478
494
|
else:
|
479
495
|
logger.warning(
|
480
496
|
f"Parameter {param_name} is not supported by model {model} and will be ignored"
|
@@ -503,8 +519,8 @@ async def stream_structured_output(
|
|
503
519
|
"strict": True,
|
504
520
|
}
|
505
521
|
},
|
506
|
-
"stream":
|
507
|
-
**
|
522
|
+
"stream": False,
|
523
|
+
**api_kwargs,
|
508
524
|
}
|
509
525
|
|
510
526
|
# Add tools if provided
|
@@ -516,7 +532,7 @@ async def stream_structured_output(
|
|
516
532
|
logger.debug("Making OpenAI Responses API request with:")
|
517
533
|
logger.debug("Model: %s", model)
|
518
534
|
logger.debug("Combined prompt: %s", combined_prompt)
|
519
|
-
logger.debug("Parameters: %s", json.dumps(
|
535
|
+
logger.debug("Parameters: %s", json.dumps(api_kwargs, indent=2))
|
520
536
|
logger.debug("Schema: %s", json.dumps(strict_schema, indent=2))
|
521
537
|
logger.debug("Tools being passed to API: %s", tools)
|
522
538
|
logger.debug(
|
@@ -524,114 +540,51 @@ async def stream_structured_output(
|
|
524
540
|
json.dumps(api_params, indent=2, default=str),
|
525
541
|
)
|
526
542
|
|
527
|
-
# Use the Responses API
|
528
|
-
|
543
|
+
# Use the Responses API
|
544
|
+
api_response = await client.responses.create(**api_params)
|
529
545
|
|
530
|
-
|
531
|
-
|
532
|
-
async for chunk in response:
|
533
|
-
if on_log:
|
534
|
-
on_log(logging.DEBUG, f"Received chunk: {chunk}", {})
|
546
|
+
if on_log:
|
547
|
+
on_log(logging.DEBUG, f"Received response: {api_response.id}", {})
|
535
548
|
|
536
|
-
|
537
|
-
|
538
|
-
choice = chunk.choices[0]
|
539
|
-
# Log tool calls if present
|
540
|
-
if (
|
541
|
-
hasattr(choice, "delta")
|
542
|
-
and hasattr(choice.delta, "tool_calls")
|
543
|
-
and choice.delta.tool_calls
|
544
|
-
):
|
545
|
-
for tool_call in choice.delta.tool_calls:
|
546
|
-
if (
|
547
|
-
hasattr(tool_call, "type")
|
548
|
-
and tool_call.type == "web_search_preview"
|
549
|
-
):
|
550
|
-
tool_id = getattr(tool_call, "id", "unknown")
|
551
|
-
logger.debug(
|
552
|
-
f"Web search tool invoked (id={tool_id})"
|
553
|
-
)
|
554
|
-
elif hasattr(tool_call, "function") and hasattr(
|
555
|
-
tool_call.function, "name"
|
556
|
-
):
|
557
|
-
# Handle other tool types for completeness
|
558
|
-
tool_name = tool_call.function.name
|
559
|
-
tool_id = getattr(tool_call, "id", "unknown")
|
560
|
-
logger.debug(
|
561
|
-
f"Tool '{tool_name}' invoked (id={tool_id})"
|
562
|
-
)
|
563
|
-
|
564
|
-
# Handle different response formats based on the chunk structure
|
565
|
-
content_added = False
|
549
|
+
# Get the complete response content directly
|
550
|
+
content = api_response.output_text
|
566
551
|
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
and hasattr(choice.delta, "content")
|
574
|
-
and choice.delta.content
|
575
|
-
):
|
576
|
-
accumulated_content += choice.delta.content
|
577
|
-
content_added = True
|
578
|
-
elif (
|
579
|
-
hasattr(choice, "message")
|
580
|
-
and hasattr(choice.message, "content")
|
581
|
-
and choice.message.content
|
582
|
-
):
|
583
|
-
accumulated_content += choice.message.content
|
584
|
-
content_added = True
|
585
|
-
elif hasattr(chunk, "response") and hasattr(
|
586
|
-
chunk.response, "body"
|
587
|
-
):
|
588
|
-
# Responses API format
|
589
|
-
accumulated_content += chunk.response.body
|
590
|
-
content_added = True
|
591
|
-
elif hasattr(chunk, "content"):
|
592
|
-
# Direct content
|
593
|
-
accumulated_content += chunk.content
|
594
|
-
content_added = True
|
595
|
-
elif hasattr(chunk, "text"):
|
596
|
-
# Text content
|
597
|
-
accumulated_content += chunk.text
|
598
|
-
content_added = True
|
599
|
-
|
600
|
-
if on_log and content_added:
|
601
|
-
on_log(
|
602
|
-
logging.DEBUG,
|
603
|
-
f"Added content, total length: {len(accumulated_content)}",
|
604
|
-
{},
|
605
|
-
)
|
552
|
+
if on_log:
|
553
|
+
on_log(
|
554
|
+
logging.DEBUG,
|
555
|
+
f"Response content length: {len(content)}",
|
556
|
+
{},
|
557
|
+
)
|
606
558
|
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
# Attempt to parse as complete JSON
|
611
|
-
data = json.loads(accumulated_content.strip())
|
612
|
-
validated = output_schema.model_validate(data)
|
613
|
-
yield validated
|
614
|
-
# Reset for next complete response (if any)
|
615
|
-
accumulated_content = ""
|
616
|
-
except (json.JSONDecodeError, ValueError):
|
617
|
-
# Not yet complete JSON, continue accumulating
|
618
|
-
continue
|
619
|
-
|
620
|
-
# Handle any remaining content
|
621
|
-
if accumulated_content.strip():
|
559
|
+
# Parse and validate the complete response
|
560
|
+
try:
|
561
|
+
# Try new JSON extraction logic first
|
622
562
|
try:
|
623
|
-
data =
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
563
|
+
data, markdown_text = split_json_and_text(content)
|
564
|
+
except ValueError:
|
565
|
+
# Fallback to original parsing for non-fenced JSON
|
566
|
+
data = json.loads(content.strip())
|
567
|
+
markdown_text = ""
|
568
|
+
|
569
|
+
validated = output_schema.model_validate(data)
|
570
|
+
|
571
|
+
# Store full raw text for downstream processing (debug logs, etc.)
|
572
|
+
setattr(validated, "_raw_text", content)
|
573
|
+
# Store markdown text for annotation processing
|
574
|
+
setattr(validated, "_markdown_text", markdown_text)
|
575
|
+
# Store full API response for file download access
|
576
|
+
setattr(validated, "_api_response", api_response)
|
577
|
+
|
578
|
+
return validated
|
579
|
+
|
580
|
+
except ValueError as e:
|
581
|
+
logger.error(f"Failed to parse response content: {e}")
|
582
|
+
raise InvalidResponseFormatError(
|
583
|
+
f"Failed to parse response as valid JSON: {e}"
|
584
|
+
)
|
631
585
|
|
632
586
|
except Exception as e:
|
633
587
|
# Map OpenAI errors using the error mapper
|
634
|
-
|
635
588
|
if isinstance(e, OpenAIError):
|
636
589
|
mapped_error = APIErrorMapper.map_openai_error(e)
|
637
590
|
logger.error(f"OpenAI API error mapped: {mapped_error}")
|
@@ -680,10 +633,6 @@ async def stream_structured_output(
|
|
680
633
|
else:
|
681
634
|
logger.error(f"Unmapped API error: {e}")
|
682
635
|
raise APIResponseError(str(e))
|
683
|
-
finally:
|
684
|
-
# Note: We don't close the client here as it may be reused
|
685
|
-
# The caller is responsible for client lifecycle management
|
686
|
-
pass
|
687
636
|
|
688
637
|
|
689
638
|
# Note: validation functions are defined in cli.py to avoid circular imports
|
@@ -709,29 +658,201 @@ async def process_templates(
|
|
709
658
|
)
|
710
659
|
|
711
660
|
|
712
|
-
async def
|
661
|
+
async def _execute_two_pass_sentinel(
|
662
|
+
client: AsyncOpenAI,
|
713
663
|
args: CLIParams,
|
714
|
-
|
664
|
+
system_prompt: str,
|
665
|
+
user_prompt: str,
|
715
666
|
output_model: Type[BaseModel],
|
667
|
+
tools: List[dict],
|
668
|
+
log_cb: Any,
|
669
|
+
ci_config: Dict[str, Any],
|
670
|
+
code_interpreter_info: Optional[Dict[str, Any]],
|
671
|
+
) -> tuple[BaseModel, List[str]]:
|
672
|
+
"""Execute two-pass sentinel approach for file downloads."""
|
673
|
+
import json
|
674
|
+
|
675
|
+
# ---- pass 1 (raw) ----
|
676
|
+
logger.debug("Starting two-pass execution: Pass 1 (raw mode)")
|
677
|
+
raw_resp = await client.responses.create(
|
678
|
+
model=args["model"],
|
679
|
+
input=f"{system_prompt}\n\n{user_prompt}",
|
680
|
+
tools=tools, # type: ignore[arg-type]
|
681
|
+
# No text format - this allows annotations
|
682
|
+
)
|
683
|
+
|
684
|
+
logger.debug(f"Raw response structure: {type(raw_resp)}")
|
685
|
+
logger.debug(
|
686
|
+
f"Raw response output: {getattr(raw_resp, 'output', 'No output attr')}"
|
687
|
+
)
|
688
|
+
raw_text = _assistant_text(raw_resp)
|
689
|
+
logger.debug(
|
690
|
+
f"Raw response from first pass (first 500 chars): {raw_text[:500]}"
|
691
|
+
)
|
692
|
+
data = extract_json_block(raw_text) or {}
|
693
|
+
logger.debug(f"Extracted JSON from sentinel markers: {bool(data)}")
|
694
|
+
|
695
|
+
# Validate sentinel extraction
|
696
|
+
if not data:
|
697
|
+
logger.warning(
|
698
|
+
"No sentinel JSON found in first pass, falling back to single-pass"
|
699
|
+
)
|
700
|
+
return await _fallback_single_pass(
|
701
|
+
client,
|
702
|
+
args,
|
703
|
+
system_prompt,
|
704
|
+
user_prompt,
|
705
|
+
output_model,
|
706
|
+
tools,
|
707
|
+
log_cb,
|
708
|
+
)
|
709
|
+
|
710
|
+
# download files from first pass
|
711
|
+
downloaded_files = []
|
712
|
+
if code_interpreter_info and code_interpreter_info.get("manager"):
|
713
|
+
cm = code_interpreter_info["manager"]
|
714
|
+
# Use output directory from config, fallback to args, then default
|
715
|
+
download_dir = (
|
716
|
+
ci_config.get("output_directory")
|
717
|
+
or args.get("code_interpreter_download_dir")
|
718
|
+
or "./downloads"
|
719
|
+
)
|
720
|
+
logger.debug(f"Downloading files to: {download_dir}")
|
721
|
+
downloaded_files = await cm.download_generated_files(
|
722
|
+
raw_resp, download_dir
|
723
|
+
)
|
724
|
+
if downloaded_files:
|
725
|
+
logger.info(
|
726
|
+
f"Downloaded {len(downloaded_files)} files from first pass"
|
727
|
+
)
|
728
|
+
|
729
|
+
# ---- pass 2 (strict) ----
|
730
|
+
logger.debug("Starting two-pass execution: Pass 2 (structured mode)")
|
731
|
+
strict_sys = (
|
732
|
+
system_prompt
|
733
|
+
+ "\n\nReuse ONLY these values; do not repeat external calls:\n"
|
734
|
+
+ json.dumps(data, indent=2)
|
735
|
+
)
|
736
|
+
|
737
|
+
# Prepare schema for strict mode
|
738
|
+
schema = output_model.model_json_schema()
|
739
|
+
strict_schema = copy.deepcopy(schema)
|
740
|
+
make_strict(strict_schema)
|
741
|
+
schema_name = output_model.__name__.lower()
|
742
|
+
|
743
|
+
strict_resp = await client.responses.create(
|
744
|
+
model=args["model"],
|
745
|
+
input=f"{strict_sys}\n\n{user_prompt}",
|
746
|
+
text={
|
747
|
+
"format": {
|
748
|
+
"type": "json_schema",
|
749
|
+
"name": schema_name,
|
750
|
+
"schema": strict_schema,
|
751
|
+
"strict": True,
|
752
|
+
}
|
753
|
+
},
|
754
|
+
tools=[], # No tools needed for formatting
|
755
|
+
stream=False,
|
756
|
+
)
|
757
|
+
|
758
|
+
# Parse and validate the structured response
|
759
|
+
content = strict_resp.output_text
|
760
|
+
try:
|
761
|
+
# Try new JSON extraction logic first
|
762
|
+
try:
|
763
|
+
data_final, markdown_text = split_json_and_text(content)
|
764
|
+
except ValueError:
|
765
|
+
# Fallback to original parsing for non-fenced JSON
|
766
|
+
data_final = json.loads(content.strip())
|
767
|
+
markdown_text = ""
|
768
|
+
|
769
|
+
validated = output_model.model_validate(data_final)
|
770
|
+
|
771
|
+
# Store full raw text for downstream processing (debug logs, etc.)
|
772
|
+
setattr(validated, "_raw_text", content)
|
773
|
+
# Store markdown text for annotation processing
|
774
|
+
setattr(validated, "_markdown_text", markdown_text)
|
775
|
+
# Store full API response for file download access
|
776
|
+
setattr(validated, "_api_response", strict_resp)
|
777
|
+
|
778
|
+
return validated, downloaded_files
|
779
|
+
|
780
|
+
except ValueError as e:
|
781
|
+
logger.error(f"Failed to parse structured response content: {e}")
|
782
|
+
raise InvalidResponseFormatError(
|
783
|
+
f"Failed to parse response as valid JSON: {e}"
|
784
|
+
)
|
785
|
+
|
786
|
+
|
787
|
+
async def _fallback_single_pass(
|
788
|
+
client: AsyncOpenAI,
|
789
|
+
args: CLIParams,
|
716
790
|
system_prompt: str,
|
717
791
|
user_prompt: str,
|
718
|
-
|
719
|
-
|
792
|
+
output_model: Type[BaseModel],
|
793
|
+
tools: List[dict],
|
794
|
+
log_cb: Any,
|
795
|
+
) -> tuple[BaseModel, List[str]]:
|
796
|
+
"""Fallback to single-pass execution."""
|
797
|
+
logger.debug("Executing single-pass fallback")
|
798
|
+
response = await create_structured_output(
|
799
|
+
client=client,
|
800
|
+
model=args["model"],
|
801
|
+
system_prompt=system_prompt,
|
802
|
+
user_prompt=user_prompt,
|
803
|
+
output_schema=output_model,
|
804
|
+
output_file=args.get("output_file"),
|
805
|
+
on_log=log_cb,
|
806
|
+
tools=tools,
|
807
|
+
)
|
808
|
+
return response, [] # No files downloaded in fallback
|
809
|
+
|
810
|
+
|
811
|
+
def _get_effective_download_strategy(
|
812
|
+
args: CLIParams, ci_config: Dict[str, Any]
|
813
|
+
) -> str:
|
814
|
+
"""Determine the effective download strategy from config and feature flags.
|
720
815
|
|
721
816
|
Args:
|
722
|
-
args:
|
723
|
-
|
724
|
-
output_model: Generated Pydantic model
|
725
|
-
system_prompt: Processed system prompt
|
726
|
-
user_prompt: Processed user prompt
|
817
|
+
args: CLI parameters including enabled_features and disabled_features
|
818
|
+
ci_config: Code interpreter configuration
|
727
819
|
|
728
820
|
Returns:
|
729
|
-
|
730
|
-
|
731
|
-
Raises:
|
732
|
-
CLIError: For execution errors
|
733
|
-
UnattendedOperationTimeoutError: For operation timeouts
|
821
|
+
Either "single_pass" or "two_pass_sentinel"
|
734
822
|
"""
|
823
|
+
# Start with config default
|
824
|
+
strategy: str = ci_config.get("download_strategy", "single_pass")
|
825
|
+
|
826
|
+
# Check for feature flag override
|
827
|
+
enabled_features = args.get("enabled_features", [])
|
828
|
+
disabled_features = args.get("disabled_features", [])
|
829
|
+
|
830
|
+
if enabled_features or disabled_features:
|
831
|
+
from .click_options import parse_feature_flags
|
832
|
+
|
833
|
+
try:
|
834
|
+
parsed_flags = parse_feature_flags(
|
835
|
+
tuple(enabled_features), tuple(disabled_features)
|
836
|
+
)
|
837
|
+
ci_hack_flag = parsed_flags.get("ci-download-hack")
|
838
|
+
if ci_hack_flag == "on":
|
839
|
+
strategy = "two_pass_sentinel"
|
840
|
+
elif ci_hack_flag == "off":
|
841
|
+
strategy = "single_pass"
|
842
|
+
except Exception as e:
|
843
|
+
logger.warning(f"Failed to parse feature flags: {e}")
|
844
|
+
|
845
|
+
return strategy
|
846
|
+
|
847
|
+
|
848
|
+
async def execute_model(
|
849
|
+
args: CLIParams,
|
850
|
+
params: Dict[str, Any],
|
851
|
+
output_model: Type[BaseModel],
|
852
|
+
system_prompt: str,
|
853
|
+
user_prompt: str,
|
854
|
+
) -> ExitCode:
|
855
|
+
"""Execute the model with the given parameters."""
|
735
856
|
logger.debug("=== Execution Phase ===")
|
736
857
|
|
737
858
|
# Initialize unattended operation manager
|
@@ -770,7 +891,7 @@ async def execute_model(
|
|
770
891
|
|
771
892
|
# Create detailed log callback
|
772
893
|
def log_callback(level: int, message: str, extra: dict[str, Any]) -> None:
|
773
|
-
if args.get("
|
894
|
+
if args.get("verbose", False):
|
774
895
|
if extra:
|
775
896
|
extra_str = LogSerializer.serialize_log_extra(extra)
|
776
897
|
if extra_str:
|
@@ -789,8 +910,31 @@ async def execute_model(
|
|
789
910
|
tools = []
|
790
911
|
nonlocal code_interpreter_info, file_search_info
|
791
912
|
|
913
|
+
# Get universal tool toggle overrides first
|
914
|
+
enabled_tools: set[str] = args.get("_enabled_tools", set()) # type: ignore[assignment]
|
915
|
+
disabled_tools: set[str] = args.get("_disabled_tools", set()) # type: ignore[assignment]
|
916
|
+
|
792
917
|
# Process MCP configuration if provided
|
793
|
-
|
918
|
+
# Apply universal tool toggle overrides for mcp
|
919
|
+
mcp_enabled_by_config = services.is_configured("mcp")
|
920
|
+
mcp_enabled_by_toggle = "mcp" in enabled_tools
|
921
|
+
mcp_disabled_by_toggle = "mcp" in disabled_tools
|
922
|
+
|
923
|
+
# Determine final enablement state
|
924
|
+
mcp_should_enable = False
|
925
|
+
if mcp_enabled_by_toggle:
|
926
|
+
# Universal --enable-tool takes highest precedence
|
927
|
+
mcp_should_enable = True
|
928
|
+
logger.debug("MCP enabled via --enable-tool")
|
929
|
+
elif mcp_disabled_by_toggle:
|
930
|
+
# Universal --disable-tool takes highest precedence
|
931
|
+
mcp_should_enable = False
|
932
|
+
logger.debug("MCP disabled via --disable-tool")
|
933
|
+
else:
|
934
|
+
# Fall back to config-based enablement
|
935
|
+
mcp_should_enable = mcp_enabled_by_config
|
936
|
+
|
937
|
+
if mcp_should_enable and services.is_configured("mcp"):
|
794
938
|
mcp_manager = await services.get_mcp_manager()
|
795
939
|
if mcp_manager:
|
796
940
|
tools.extend(mcp_manager.get_tools_for_responses_api())
|
@@ -804,10 +948,29 @@ async def execute_model(
|
|
804
948
|
routing_result_typed: Optional[ProcessingResult] = routing_result
|
805
949
|
|
806
950
|
# Process Code Interpreter configuration if enabled
|
807
|
-
|
951
|
+
# Apply universal tool toggle overrides for code-interpreter
|
952
|
+
ci_enabled_by_routing = (
|
808
953
|
routing_result_typed
|
809
954
|
and "code-interpreter" in routing_result_typed.enabled_tools
|
810
|
-
)
|
955
|
+
)
|
956
|
+
ci_enabled_by_toggle = "code-interpreter" in enabled_tools
|
957
|
+
ci_disabled_by_toggle = "code-interpreter" in disabled_tools
|
958
|
+
|
959
|
+
# Determine final enablement state
|
960
|
+
ci_should_enable = False
|
961
|
+
if ci_enabled_by_toggle:
|
962
|
+
# Universal --enable-tool takes highest precedence
|
963
|
+
ci_should_enable = True
|
964
|
+
logger.debug("Code Interpreter enabled via --enable-tool")
|
965
|
+
elif ci_disabled_by_toggle:
|
966
|
+
# Universal --disable-tool takes highest precedence
|
967
|
+
ci_should_enable = False
|
968
|
+
logger.debug("Code Interpreter disabled via --disable-tool")
|
969
|
+
else:
|
970
|
+
# Fall back to routing-based enablement
|
971
|
+
ci_should_enable = bool(ci_enabled_by_routing)
|
972
|
+
|
973
|
+
if ci_should_enable and routing_result_typed:
|
811
974
|
code_interpreter_files = routing_result_typed.validated_files.get(
|
812
975
|
"code-interpreter", []
|
813
976
|
)
|
@@ -854,10 +1017,29 @@ async def execute_model(
|
|
854
1017
|
)
|
855
1018
|
|
856
1019
|
# Process File Search configuration if enabled
|
857
|
-
|
1020
|
+
# Apply universal tool toggle overrides for file-search
|
1021
|
+
fs_enabled_by_routing = (
|
858
1022
|
routing_result_typed
|
859
1023
|
and "file-search" in routing_result_typed.enabled_tools
|
860
|
-
)
|
1024
|
+
)
|
1025
|
+
fs_enabled_by_toggle = "file-search" in enabled_tools
|
1026
|
+
fs_disabled_by_toggle = "file-search" in disabled_tools
|
1027
|
+
|
1028
|
+
# Determine final enablement state
|
1029
|
+
fs_should_enable = False
|
1030
|
+
if fs_enabled_by_toggle:
|
1031
|
+
# Universal --enable-tool takes highest precedence
|
1032
|
+
fs_should_enable = True
|
1033
|
+
logger.debug("File Search enabled via --enable-tool")
|
1034
|
+
elif fs_disabled_by_toggle:
|
1035
|
+
# Universal --disable-tool takes highest precedence
|
1036
|
+
fs_should_enable = False
|
1037
|
+
logger.debug("File Search disabled via --disable-tool")
|
1038
|
+
else:
|
1039
|
+
# Fall back to routing-based enablement
|
1040
|
+
fs_should_enable = bool(fs_enabled_by_routing)
|
1041
|
+
|
1042
|
+
if fs_should_enable and routing_result_typed:
|
861
1043
|
file_search_files = routing_result_typed.validated_files.get(
|
862
1044
|
"file-search", []
|
863
1045
|
)
|
@@ -920,7 +1102,15 @@ async def execute_model(
|
|
920
1102
|
|
921
1103
|
# Determine if web search should be enabled
|
922
1104
|
web_search_enabled = False
|
923
|
-
if
|
1105
|
+
if "web-search" in enabled_tools:
|
1106
|
+
# Universal --enable-tool web-search takes highest precedence
|
1107
|
+
web_search_enabled = True
|
1108
|
+
logger.debug("Web search enabled via --enable-tool")
|
1109
|
+
elif "web-search" in disabled_tools:
|
1110
|
+
# Universal --disable-tool web-search takes highest precedence
|
1111
|
+
web_search_enabled = False
|
1112
|
+
logger.debug("Web search disabled via --disable-tool")
|
1113
|
+
elif web_search_from_cli:
|
924
1114
|
# Explicit --web-search flag takes precedence
|
925
1115
|
web_search_enabled = True
|
926
1116
|
elif no_web_search_from_cli:
|
@@ -995,19 +1185,61 @@ async def execute_model(
|
|
995
1185
|
# Debug log the final tools array
|
996
1186
|
logger.debug(f"Final tools array being passed to API: {tools}")
|
997
1187
|
|
998
|
-
#
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
output_schema=output_model,
|
1006
|
-
output_file=args.get("output_file"),
|
1007
|
-
on_log=log_callback,
|
1008
|
-
tools=tools,
|
1188
|
+
# Check for two-pass sentinel mode
|
1189
|
+
ci_config = config.get_code_interpreter_config()
|
1190
|
+
effective_strategy = _get_effective_download_strategy(args, ci_config)
|
1191
|
+
if (
|
1192
|
+
effective_strategy == "two_pass_sentinel"
|
1193
|
+
and output_model
|
1194
|
+
and code_interpreter_info
|
1009
1195
|
):
|
1010
|
-
|
1196
|
+
try:
|
1197
|
+
logger.debug(
|
1198
|
+
"Using two-pass sentinel mode for Code Interpreter file downloads"
|
1199
|
+
)
|
1200
|
+
resp, downloaded_files = await _execute_two_pass_sentinel(
|
1201
|
+
client,
|
1202
|
+
args,
|
1203
|
+
system_prompt,
|
1204
|
+
user_prompt,
|
1205
|
+
output_model,
|
1206
|
+
tools,
|
1207
|
+
log_callback,
|
1208
|
+
ci_config,
|
1209
|
+
code_interpreter_info,
|
1210
|
+
)
|
1211
|
+
response = resp
|
1212
|
+
# Store downloaded files info for later use
|
1213
|
+
if downloaded_files:
|
1214
|
+
setattr(response, "_downloaded_files", downloaded_files)
|
1215
|
+
except Exception as e:
|
1216
|
+
logger.warning(
|
1217
|
+
f"Two-pass execution failed, falling back to single-pass: {e}"
|
1218
|
+
)
|
1219
|
+
resp, _ = await _fallback_single_pass(
|
1220
|
+
client,
|
1221
|
+
args,
|
1222
|
+
system_prompt,
|
1223
|
+
user_prompt,
|
1224
|
+
output_model,
|
1225
|
+
tools,
|
1226
|
+
log_callback,
|
1227
|
+
)
|
1228
|
+
response = resp
|
1229
|
+
else:
|
1230
|
+
# Create the response using the API (single-pass mode)
|
1231
|
+
logger.debug(f"Tools being passed to API: {tools}")
|
1232
|
+
response = await create_structured_output(
|
1233
|
+
client=client,
|
1234
|
+
model=args["model"],
|
1235
|
+
system_prompt=system_prompt,
|
1236
|
+
user_prompt=user_prompt,
|
1237
|
+
output_schema=output_model,
|
1238
|
+
output_file=args.get("output_file"),
|
1239
|
+
on_log=log_callback,
|
1240
|
+
tools=tools,
|
1241
|
+
)
|
1242
|
+
output_buffer.append(response)
|
1011
1243
|
|
1012
1244
|
# Handle final output
|
1013
1245
|
output_file = args.get("output_file")
|
@@ -1043,26 +1275,78 @@ async def execute_model(
|
|
1043
1275
|
print(json_output)
|
1044
1276
|
|
1045
1277
|
# Handle file downloads from Code Interpreter if any were generated
|
1046
|
-
if
|
1047
|
-
code_interpreter_info
|
1048
|
-
and hasattr(response, "file_ids")
|
1049
|
-
and response.file_ids
|
1050
|
-
):
|
1278
|
+
if code_interpreter_info and output_buffer:
|
1051
1279
|
try:
|
1052
|
-
|
1053
|
-
|
1054
|
-
)
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1280
|
+
# Get the API response from the last output item
|
1281
|
+
last_response = output_buffer[-1]
|
1282
|
+
if hasattr(last_response, "_api_response"):
|
1283
|
+
api_response = getattr(last_response, "_api_response")
|
1284
|
+
# Responses API has 'output' attribute, not 'messages'
|
1285
|
+
if hasattr(api_response, "output"):
|
1286
|
+
download_dir = args.get(
|
1287
|
+
"code_interpreter_download_dir", "./downloads"
|
1288
|
+
)
|
1289
|
+
manager = code_interpreter_info["manager"]
|
1290
|
+
|
1291
|
+
# Debug: Log response structure for Responses API
|
1292
|
+
logger.debug(
|
1293
|
+
f"Response has {len(api_response.output)} output items"
|
1294
|
+
)
|
1295
|
+
for i, item in enumerate(api_response.output):
|
1296
|
+
logger.debug(f"Output item {i}: {type(item)}")
|
1297
|
+
if hasattr(item, "type"):
|
1298
|
+
logger.debug(f" Type: {item.type}")
|
1299
|
+
if hasattr(item, "content"):
|
1300
|
+
content_str = (
|
1301
|
+
str(item.content)[:200] + "..."
|
1302
|
+
if len(str(item.content)) > 200
|
1303
|
+
else str(item.content)
|
1304
|
+
)
|
1305
|
+
logger.debug(
|
1306
|
+
f" Content preview: {content_str}"
|
1307
|
+
)
|
1308
|
+
# Debug tool call outputs for file detection
|
1309
|
+
if hasattr(item, "outputs"):
|
1310
|
+
logger.debug(
|
1311
|
+
f" Outputs: {len(item.outputs or [])} items"
|
1312
|
+
)
|
1313
|
+
for j, output in enumerate(item.outputs or []):
|
1314
|
+
logger.debug(
|
1315
|
+
f" Output {j}: {type(output)}"
|
1316
|
+
)
|
1317
|
+
if hasattr(output, "type"):
|
1318
|
+
logger.debug(
|
1319
|
+
f" Type: {output.type}"
|
1320
|
+
)
|
1321
|
+
if hasattr(output, "file_id"):
|
1322
|
+
logger.debug(
|
1323
|
+
f" File ID: {output.file_id}"
|
1324
|
+
)
|
1325
|
+
if hasattr(output, "filename"):
|
1326
|
+
logger.debug(
|
1327
|
+
f" Filename: {output.filename}"
|
1328
|
+
)
|
1329
|
+
|
1330
|
+
# Type ignore since we know this is a CodeInterpreterManager
|
1331
|
+
downloaded_files = await manager.download_generated_files( # type: ignore[attr-defined]
|
1332
|
+
api_response, download_dir
|
1333
|
+
)
|
1334
|
+
if downloaded_files:
|
1335
|
+
logger.info(
|
1336
|
+
f"Downloaded {len(downloaded_files)} generated files to {download_dir}"
|
1337
|
+
)
|
1338
|
+
for file_path in downloaded_files:
|
1339
|
+
logger.info(f" - {file_path}")
|
1340
|
+
else:
|
1341
|
+
logger.debug(
|
1342
|
+
"No files were downloaded from Code Interpreter"
|
1343
|
+
)
|
1344
|
+
else:
|
1345
|
+
logger.debug("API response has no output attribute")
|
1346
|
+
else:
|
1347
|
+
logger.debug(
|
1348
|
+
"Last response has no _api_response attribute"
|
1063
1349
|
)
|
1064
|
-
for file_path in downloaded_files:
|
1065
|
-
logger.info(f" - {file_path}")
|
1066
1350
|
except Exception as e:
|
1067
1351
|
logger.warning(f"Failed to download generated files: {e}")
|
1068
1352
|
|
@@ -1076,17 +1360,14 @@ async def execute_model(
|
|
1076
1360
|
# The result should be an ExitCode from execute_main_operation
|
1077
1361
|
return result # type: ignore[no-any-return]
|
1078
1362
|
except (
|
1079
|
-
StreamInterruptedError,
|
1080
|
-
StreamBufferError,
|
1081
|
-
StreamParseError,
|
1082
1363
|
APIResponseError,
|
1083
1364
|
EmptyResponseError,
|
1084
1365
|
InvalidResponseFormatError,
|
1085
1366
|
) as e:
|
1086
|
-
logger.error("
|
1367
|
+
logger.error("API error: %s", str(e))
|
1087
1368
|
raise CLIError(str(e), exit_code=ExitCode.API_ERROR)
|
1088
1369
|
except Exception as e:
|
1089
|
-
logger.exception("Unexpected error during
|
1370
|
+
logger.exception("Unexpected error during execution")
|
1090
1371
|
raise CLIError(str(e), exit_code=ExitCode.UNKNOWN_ERROR)
|
1091
1372
|
finally:
|
1092
1373
|
# Clean up Code Interpreter files if requested
|