ostruct-cli 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ostruct/cli/runner.py CHANGED
@@ -5,7 +5,7 @@ import json
5
5
  import logging
6
6
  import os
7
7
  from pathlib import Path
8
- from typing import Any, AsyncGenerator, Dict, List, Optional, Type, Union
8
+ from typing import Any, Dict, List, Optional, Type, Union
9
9
 
10
10
  from openai import AsyncOpenAI, OpenAIError
11
11
  from openai_model_registry import ModelRegistry
@@ -18,27 +18,24 @@ from .errors import (
18
18
  APIErrorMapper,
19
19
  CLIError,
20
20
  SchemaValidationError,
21
- StreamInterruptedError,
22
- StreamParseError,
23
21
  )
24
22
  from .exit_codes import ExitCode
25
23
  from .explicit_file_processor import ProcessingResult
26
24
  from .file_search import FileSearchManager
25
+ from .json_extract import split_json_and_text
27
26
  from .mcp_integration import MCPConfiguration, MCPServerManager
28
27
  from .progress_reporting import (
29
28
  configure_progress_reporter,
30
29
  get_progress_reporter,
31
30
  report_success,
32
31
  )
32
+ from .sentinel import extract_json_block
33
33
  from .serialization import LogSerializer
34
34
  from .services import ServiceContainer
35
35
  from .types import CLIParams
36
- from .unattended_operation import (
37
- UnattendedOperationManager,
38
- )
39
36
 
40
37
 
41
- # Error classes for streaming operations (duplicated from cli.py for now)
38
+ # Error classes for API operations
42
39
  class APIResponseError(Exception):
43
40
  pass
44
41
 
@@ -51,10 +48,6 @@ class InvalidResponseFormatError(Exception):
51
48
  pass
52
49
 
53
50
 
54
- class StreamBufferError(Exception):
55
- pass
56
-
57
-
58
51
  def make_strict(obj: Any) -> None:
59
52
  """Transform Pydantic schema for Responses API strict mode.
60
53
 
@@ -85,6 +78,18 @@ def supports_structured_output(model: str) -> bool:
85
78
  return True
86
79
 
87
80
 
81
+ def _assistant_text(response: Any) -> str:
82
+ """Extract text content from API response (Responses API format)."""
83
+ text_parts = []
84
+ for item in response.output:
85
+ if getattr(item, "type", None) == "message":
86
+ for content_block in item.content or []:
87
+ if hasattr(content_block, "text"):
88
+ # For Responses API, text content is directly in the text attribute
89
+ text_parts.append(content_block.text)
90
+ return "\n".join(text_parts)
91
+
92
+
88
93
  logger = logging.getLogger(__name__)
89
94
 
90
95
 
@@ -243,8 +248,17 @@ async def process_code_interpreter_configuration(
243
248
  if not files_to_upload:
244
249
  return None
245
250
 
251
+ # Load configuration for Code Interpreter
252
+ from typing import Union, cast
253
+
254
+ from .config import OstructConfig
255
+
256
+ config_path = cast(Union[str, Path, None], args.get("config"))
257
+ config = OstructConfig.load(config_path)
258
+ ci_config = config.get_code_interpreter_config()
259
+
246
260
  # Create Code Interpreter manager
247
- manager = CodeInterpreterManager(client)
261
+ manager = CodeInterpreterManager(client, ci_config)
248
262
 
249
263
  # Validate files before upload
250
264
  validation_errors = manager.validate_files_for_upload(files_to_upload)
@@ -420,7 +434,7 @@ async def process_file_search_configuration(
420
434
  raise mapped_error
421
435
 
422
436
 
423
- async def stream_structured_output(
437
+ async def create_structured_output(
424
438
  client: AsyncOpenAI,
425
439
  model: str,
426
440
  system_prompt: str,
@@ -429,8 +443,8 @@ async def stream_structured_output(
429
443
  output_file: Optional[str] = None,
430
444
  tools: Optional[List[dict]] = None,
431
445
  **kwargs: Any,
432
- ) -> AsyncGenerator[BaseModel, None]:
433
- """Stream structured output from OpenAI API using Responses API.
446
+ ) -> BaseModel:
447
+ """Create structured output from OpenAI Responses API.
434
448
 
435
449
  This function uses the OpenAI Responses API with strict mode schema validation
436
450
  to generate structured output that matches the provided Pydantic model.
@@ -441,16 +455,15 @@ async def stream_structured_output(
441
455
  system_prompt: The system prompt to use
442
456
  user_prompt: The user prompt to use
443
457
  output_schema: The Pydantic model to validate responses against
444
- output_file: Optional file to write output to
458
+ output_file: Optional file to write output to (unused, kept for compatibility)
445
459
  tools: Optional list of tools (e.g., MCP, Code Interpreter) to include
446
460
  **kwargs: Additional parameters to pass to the API
447
461
 
448
462
  Returns:
449
- An async generator yielding validated model instances
463
+ A validated model instance
450
464
 
451
465
  Raises:
452
466
  ValueError: If the model does not support structured output or parameters are invalid
453
- StreamInterruptedError: If the stream is interrupted
454
467
  APIResponseError: If there is an API error
455
468
  """
456
469
  try:
@@ -465,7 +478,7 @@ async def stream_structured_output(
465
478
  on_log = kwargs.pop("on_log", None)
466
479
 
467
480
  # Handle model-specific parameters
468
- stream_kwargs = {}
481
+ api_kwargs = {}
469
482
  registry = ModelRegistry.get_instance()
470
483
  capabilities = registry.get_capabilities(model)
471
484
 
@@ -474,7 +487,7 @@ async def stream_structured_output(
474
487
  if param_name in capabilities.supported_parameters:
475
488
  # Validate the parameter value
476
489
  capabilities.validate_parameter(param_name, value)
477
- stream_kwargs[param_name] = value
490
+ api_kwargs[param_name] = value
478
491
  else:
479
492
  logger.warning(
480
493
  f"Parameter {param_name} is not supported by model {model} and will be ignored"
@@ -503,8 +516,8 @@ async def stream_structured_output(
503
516
  "strict": True,
504
517
  }
505
518
  },
506
- "stream": True,
507
- **stream_kwargs,
519
+ "stream": False,
520
+ **api_kwargs,
508
521
  }
509
522
 
510
523
  # Add tools if provided
@@ -516,7 +529,7 @@ async def stream_structured_output(
516
529
  logger.debug("Making OpenAI Responses API request with:")
517
530
  logger.debug("Model: %s", model)
518
531
  logger.debug("Combined prompt: %s", combined_prompt)
519
- logger.debug("Parameters: %s", json.dumps(stream_kwargs, indent=2))
532
+ logger.debug("Parameters: %s", json.dumps(api_kwargs, indent=2))
520
533
  logger.debug("Schema: %s", json.dumps(strict_schema, indent=2))
521
534
  logger.debug("Tools being passed to API: %s", tools)
522
535
  logger.debug(
@@ -524,114 +537,51 @@ async def stream_structured_output(
524
537
  json.dumps(api_params, indent=2, default=str),
525
538
  )
526
539
 
527
- # Use the Responses API with streaming
528
- response = await client.responses.create(**api_params)
540
+ # Use the Responses API
541
+ api_response = await client.responses.create(**api_params)
529
542
 
530
- # Process streaming response
531
- accumulated_content = ""
532
- async for chunk in response:
533
- if on_log:
534
- on_log(logging.DEBUG, f"Received chunk: {chunk}", {})
543
+ if on_log:
544
+ on_log(logging.DEBUG, f"Received response: {api_response.id}", {})
535
545
 
536
- # Check for tool calls (including web search)
537
- if hasattr(chunk, "choices") and chunk.choices:
538
- choice = chunk.choices[0]
539
- # Log tool calls if present
540
- if (
541
- hasattr(choice, "delta")
542
- and hasattr(choice.delta, "tool_calls")
543
- and choice.delta.tool_calls
544
- ):
545
- for tool_call in choice.delta.tool_calls:
546
- if (
547
- hasattr(tool_call, "type")
548
- and tool_call.type == "web_search_preview"
549
- ):
550
- tool_id = getattr(tool_call, "id", "unknown")
551
- logger.debug(
552
- f"Web search tool invoked (id={tool_id})"
553
- )
554
- elif hasattr(tool_call, "function") and hasattr(
555
- tool_call.function, "name"
556
- ):
557
- # Handle other tool types for completeness
558
- tool_name = tool_call.function.name
559
- tool_id = getattr(tool_call, "id", "unknown")
560
- logger.debug(
561
- f"Tool '{tool_name}' invoked (id={tool_id})"
562
- )
546
+ # Get the complete response content directly
547
+ content = api_response.output_text
563
548
 
564
- # Handle different response formats based on the chunk structure
565
- content_added = False
566
-
567
- # Try different possible response formats
568
- if hasattr(chunk, "choices") and chunk.choices:
569
- # Standard chat completion format
570
- choice = chunk.choices[0]
571
- if (
572
- hasattr(choice, "delta")
573
- and hasattr(choice.delta, "content")
574
- and choice.delta.content
575
- ):
576
- accumulated_content += choice.delta.content
577
- content_added = True
578
- elif (
579
- hasattr(choice, "message")
580
- and hasattr(choice.message, "content")
581
- and choice.message.content
582
- ):
583
- accumulated_content += choice.message.content
584
- content_added = True
585
- elif hasattr(chunk, "response") and hasattr(
586
- chunk.response, "body"
587
- ):
588
- # Responses API format
589
- accumulated_content += chunk.response.body
590
- content_added = True
591
- elif hasattr(chunk, "content"):
592
- # Direct content
593
- accumulated_content += chunk.content
594
- content_added = True
595
- elif hasattr(chunk, "text"):
596
- # Text content
597
- accumulated_content += chunk.text
598
- content_added = True
599
-
600
- if on_log and content_added:
601
- on_log(
602
- logging.DEBUG,
603
- f"Added content, total length: {len(accumulated_content)}",
604
- {},
605
- )
549
+ if on_log:
550
+ on_log(
551
+ logging.DEBUG,
552
+ f"Response content length: {len(content)}",
553
+ {},
554
+ )
606
555
 
607
- # Try to parse and validate accumulated content as complete JSON
608
- try:
609
- if accumulated_content.strip():
610
- # Attempt to parse as complete JSON
611
- data = json.loads(accumulated_content.strip())
612
- validated = output_schema.model_validate(data)
613
- yield validated
614
- # Reset for next complete response (if any)
615
- accumulated_content = ""
616
- except (json.JSONDecodeError, ValueError):
617
- # Not yet complete JSON, continue accumulating
618
- continue
619
-
620
- # Handle any remaining content
621
- if accumulated_content.strip():
556
+ # Parse and validate the complete response
557
+ try:
558
+ # Try new JSON extraction logic first
622
559
  try:
623
- data = json.loads(accumulated_content.strip())
624
- validated = output_schema.model_validate(data)
625
- yield validated
626
- except (json.JSONDecodeError, ValueError) as e:
627
- logger.error(f"Failed to parse final accumulated content: {e}")
628
- raise StreamParseError(
629
- f"Failed to parse response as valid JSON: {e}"
630
- )
560
+ data, markdown_text = split_json_and_text(content)
561
+ except ValueError:
562
+ # Fallback to original parsing for non-fenced JSON
563
+ data = json.loads(content.strip())
564
+ markdown_text = ""
565
+
566
+ validated = output_schema.model_validate(data)
567
+
568
+ # Store full raw text for downstream processing (debug logs, etc.)
569
+ setattr(validated, "_raw_text", content)
570
+ # Store markdown text for annotation processing
571
+ setattr(validated, "_markdown_text", markdown_text)
572
+ # Store full API response for file download access
573
+ setattr(validated, "_api_response", api_response)
574
+
575
+ return validated
576
+
577
+ except ValueError as e:
578
+ logger.error(f"Failed to parse response content: {e}")
579
+ raise InvalidResponseFormatError(
580
+ f"Failed to parse response as valid JSON: {e}"
581
+ )
631
582
 
632
583
  except Exception as e:
633
584
  # Map OpenAI errors using the error mapper
634
-
635
585
  if isinstance(e, OpenAIError):
636
586
  mapped_error = APIErrorMapper.map_openai_error(e)
637
587
  logger.error(f"OpenAI API error mapped: {mapped_error}")
@@ -680,10 +630,6 @@ async def stream_structured_output(
680
630
  else:
681
631
  logger.error(f"Unmapped API error: {e}")
682
632
  raise APIResponseError(str(e))
683
- finally:
684
- # Note: We don't close the client here as it may be reused
685
- # The caller is responsible for client lifecycle management
686
- pass
687
633
 
688
634
 
689
635
  # Note: validation functions are defined in cli.py to avoid circular imports
@@ -709,34 +655,202 @@ async def process_templates(
709
655
  )
710
656
 
711
657
 
712
- async def execute_model(
658
+ async def _execute_two_pass_sentinel(
659
+ client: AsyncOpenAI,
713
660
  args: CLIParams,
714
- params: Dict[str, Any],
661
+ system_prompt: str,
662
+ user_prompt: str,
715
663
  output_model: Type[BaseModel],
664
+ tools: List[dict],
665
+ log_cb: Any,
666
+ ci_config: Dict[str, Any],
667
+ code_interpreter_info: Optional[Dict[str, Any]],
668
+ ) -> tuple[BaseModel, List[str]]:
669
+ """Execute two-pass sentinel approach for file downloads."""
670
+ import json
671
+
672
+ # ---- pass 1 (raw) ----
673
+ logger.debug("Starting two-pass execution: Pass 1 (raw mode)")
674
+ raw_resp = await client.responses.create(
675
+ model=args["model"],
676
+ input=f"{system_prompt}\n\n{user_prompt}",
677
+ tools=tools, # type: ignore[arg-type]
678
+ # No text format - this allows annotations
679
+ )
680
+
681
+ logger.debug(f"Raw response structure: {type(raw_resp)}")
682
+ logger.debug(
683
+ f"Raw response output: {getattr(raw_resp, 'output', 'No output attr')}"
684
+ )
685
+ raw_text = _assistant_text(raw_resp)
686
+ logger.debug(
687
+ f"Raw response from first pass (first 500 chars): {raw_text[:500]}"
688
+ )
689
+ data = extract_json_block(raw_text) or {}
690
+ logger.debug(f"Extracted JSON from sentinel markers: {bool(data)}")
691
+
692
+ # Validate sentinel extraction
693
+ if not data:
694
+ logger.warning(
695
+ "No sentinel JSON found in first pass, falling back to single-pass"
696
+ )
697
+ return await _fallback_single_pass(
698
+ client,
699
+ args,
700
+ system_prompt,
701
+ user_prompt,
702
+ output_model,
703
+ tools,
704
+ log_cb,
705
+ )
706
+
707
+ # download files from first pass
708
+ downloaded_files = []
709
+ if code_interpreter_info and code_interpreter_info.get("manager"):
710
+ cm = code_interpreter_info["manager"]
711
+ # Use output directory from config, fallback to args, then default
712
+ download_dir = (
713
+ ci_config.get("output_directory")
714
+ or args.get("code_interpreter_download_dir")
715
+ or "./downloads"
716
+ )
717
+ logger.debug(f"Downloading files to: {download_dir}")
718
+ downloaded_files = await cm.download_generated_files(
719
+ raw_resp, download_dir
720
+ )
721
+ if downloaded_files:
722
+ logger.info(
723
+ f"Downloaded {len(downloaded_files)} files from first pass"
724
+ )
725
+
726
+ # ---- pass 2 (strict) ----
727
+ logger.debug("Starting two-pass execution: Pass 2 (structured mode)")
728
+ strict_sys = (
729
+ system_prompt
730
+ + "\n\nReuse ONLY these values; do not repeat external calls:\n"
731
+ + json.dumps(data, indent=2)
732
+ )
733
+
734
+ # Prepare schema for strict mode
735
+ schema = output_model.model_json_schema()
736
+ strict_schema = copy.deepcopy(schema)
737
+ make_strict(strict_schema)
738
+ schema_name = output_model.__name__.lower()
739
+
740
+ strict_resp = await client.responses.create(
741
+ model=args["model"],
742
+ input=f"{strict_sys}\n\n{user_prompt}",
743
+ text={
744
+ "format": {
745
+ "type": "json_schema",
746
+ "name": schema_name,
747
+ "schema": strict_schema,
748
+ "strict": True,
749
+ }
750
+ },
751
+ tools=[], # No tools needed for formatting
752
+ stream=False,
753
+ )
754
+
755
+ # Parse and validate the structured response
756
+ content = strict_resp.output_text
757
+ try:
758
+ # Try new JSON extraction logic first
759
+ try:
760
+ data_final, markdown_text = split_json_and_text(content)
761
+ except ValueError:
762
+ # Fallback to original parsing for non-fenced JSON
763
+ data_final = json.loads(content.strip())
764
+ markdown_text = ""
765
+
766
+ validated = output_model.model_validate(data_final)
767
+
768
+ # Store full raw text for downstream processing (debug logs, etc.)
769
+ setattr(validated, "_raw_text", content)
770
+ # Store markdown text for annotation processing
771
+ setattr(validated, "_markdown_text", markdown_text)
772
+ # Store full API response for file download access
773
+ setattr(validated, "_api_response", strict_resp)
774
+
775
+ return validated, downloaded_files
776
+
777
+ except ValueError as e:
778
+ logger.error(f"Failed to parse structured response content: {e}")
779
+ raise InvalidResponseFormatError(
780
+ f"Failed to parse response as valid JSON: {e}"
781
+ )
782
+
783
+
784
+ async def _fallback_single_pass(
785
+ client: AsyncOpenAI,
786
+ args: CLIParams,
716
787
  system_prompt: str,
717
788
  user_prompt: str,
718
- ) -> ExitCode:
719
- """Execute the model and handle the response.
789
+ output_model: Type[BaseModel],
790
+ tools: List[dict],
791
+ log_cb: Any,
792
+ ) -> tuple[BaseModel, List[str]]:
793
+ """Fallback to single-pass execution."""
794
+ logger.debug("Executing single-pass fallback")
795
+ response = await create_structured_output(
796
+ client=client,
797
+ model=args["model"],
798
+ system_prompt=system_prompt,
799
+ user_prompt=user_prompt,
800
+ output_schema=output_model,
801
+ output_file=args.get("output_file"),
802
+ on_log=log_cb,
803
+ tools=tools,
804
+ )
805
+ return response, [] # No files downloaded in fallback
806
+
807
+
808
+ def _get_effective_download_strategy(
809
+ args: CLIParams, ci_config: Dict[str, Any]
810
+ ) -> str:
811
+ """Determine the effective download strategy from config and feature flags.
720
812
 
721
813
  Args:
722
- args: Command line arguments
723
- params: Validated model parameters
724
- output_model: Generated Pydantic model
725
- system_prompt: Processed system prompt
726
- user_prompt: Processed user prompt
814
+ args: CLI parameters including enabled_features and disabled_features
815
+ ci_config: Code interpreter configuration
727
816
 
728
817
  Returns:
729
- Exit code indicating success or failure
730
-
731
- Raises:
732
- CLIError: For execution errors
733
- UnattendedOperationTimeoutError: For operation timeouts
818
+ Either "single_pass" or "two_pass_sentinel"
734
819
  """
735
- logger.debug("=== Execution Phase ===")
820
+ # Start with config default
821
+ strategy: str = ci_config.get("download_strategy", "single_pass")
822
+
823
+ # Check for feature flag override
824
+ enabled_features = args.get("enabled_features", [])
825
+ disabled_features = args.get("disabled_features", [])
826
+
827
+ if enabled_features or disabled_features:
828
+ from .click_options import parse_feature_flags
829
+
830
+ try:
831
+ parsed_flags = parse_feature_flags(
832
+ tuple(enabled_features), tuple(disabled_features)
833
+ )
834
+ ci_hack_flag = parsed_flags.get("ci-download-hack")
835
+ if ci_hack_flag == "on":
836
+ strategy = "two_pass_sentinel"
837
+ elif ci_hack_flag == "off":
838
+ strategy = "single_pass"
839
+ except Exception as e:
840
+ logger.warning(f"Failed to parse feature flags: {e}")
736
841
 
737
- # Initialize unattended operation manager
738
- timeout_seconds = int(args.get("timeout", 3600))
739
- operation_manager = UnattendedOperationManager(timeout_seconds)
842
+ return strategy
843
+
844
+
845
+ async def execute_model(
846
+ args: CLIParams,
847
+ params: Dict[str, Any],
848
+ output_model: Type[BaseModel],
849
+ system_prompt: str,
850
+ user_prompt: str,
851
+ ) -> ExitCode:
852
+ """Execute the model with the given parameters."""
853
+ logger.debug("=== Execution Phase ===")
740
854
 
741
855
  # Pre-validate unattended compatibility
742
856
  # Note: MCP validation is handled during MCP configuration processing
@@ -753,12 +867,22 @@ async def execute_model(
753
867
 
754
868
  api_key = args.get("api_key") or os.getenv("OPENAI_API_KEY")
755
869
  if not api_key:
756
- msg = "No API key provided. Set OPENAI_API_KEY environment variable or use --api-key"
870
+ msg = (
871
+ "No OpenAI API key found. Please:\n"
872
+ " • Set OPENAI_API_KEY environment variable, or\n"
873
+ " • Create a .env file with OPENAI_API_KEY=your-key-here, or\n"
874
+ " • Use --api-key option (not recommended for production)\n"
875
+ "\n"
876
+ "Get your API key from: https://platform.openai.com/api-keys"
877
+ )
757
878
  logger.error(msg)
758
879
  raise CLIError(msg, exit_code=ExitCode.API_ERROR)
759
880
 
881
+ # Get API timeout
882
+ api_timeout = args.get("timeout", 60.0)
760
883
  client = AsyncOpenAI(
761
- api_key=api_key, timeout=min(args.get("timeout", 60.0), 300.0)
884
+ api_key=api_key,
885
+ timeout=min(float(api_timeout), 300.0),
762
886
  ) # Cap at 5 min for client timeout
763
887
 
764
888
  # Create service container for dependency management
@@ -770,7 +894,7 @@ async def execute_model(
770
894
 
771
895
  # Create detailed log callback
772
896
  def log_callback(level: int, message: str, extra: dict[str, Any]) -> None:
773
- if args.get("debug_openai_stream", False):
897
+ if args.get("verbose", False):
774
898
  if extra:
775
899
  extra_str = LogSerializer.serialize_log_extra(extra)
776
900
  if extra_str:
@@ -789,8 +913,31 @@ async def execute_model(
789
913
  tools = []
790
914
  nonlocal code_interpreter_info, file_search_info
791
915
 
916
+ # Get universal tool toggle overrides first
917
+ enabled_tools: set[str] = args.get("_enabled_tools", set()) # type: ignore[assignment]
918
+ disabled_tools: set[str] = args.get("_disabled_tools", set()) # type: ignore[assignment]
919
+
792
920
  # Process MCP configuration if provided
793
- if services.is_configured("mcp"):
921
+ # Apply universal tool toggle overrides for mcp
922
+ mcp_enabled_by_config = services.is_configured("mcp")
923
+ mcp_enabled_by_toggle = "mcp" in enabled_tools
924
+ mcp_disabled_by_toggle = "mcp" in disabled_tools
925
+
926
+ # Determine final enablement state
927
+ mcp_should_enable = False
928
+ if mcp_enabled_by_toggle:
929
+ # Universal --enable-tool takes highest precedence
930
+ mcp_should_enable = True
931
+ logger.debug("MCP enabled via --enable-tool")
932
+ elif mcp_disabled_by_toggle:
933
+ # Universal --disable-tool takes highest precedence
934
+ mcp_should_enable = False
935
+ logger.debug("MCP disabled via --disable-tool")
936
+ else:
937
+ # Fall back to config-based enablement
938
+ mcp_should_enable = mcp_enabled_by_config
939
+
940
+ if mcp_should_enable and services.is_configured("mcp"):
794
941
  mcp_manager = await services.get_mcp_manager()
795
942
  if mcp_manager:
796
943
  tools.extend(mcp_manager.get_tools_for_responses_api())
@@ -804,10 +951,29 @@ async def execute_model(
804
951
  routing_result_typed: Optional[ProcessingResult] = routing_result
805
952
 
806
953
  # Process Code Interpreter configuration if enabled
807
- if (
954
+ # Apply universal tool toggle overrides for code-interpreter
955
+ ci_enabled_by_routing = (
808
956
  routing_result_typed
809
957
  and "code-interpreter" in routing_result_typed.enabled_tools
810
- ):
958
+ )
959
+ ci_enabled_by_toggle = "code-interpreter" in enabled_tools
960
+ ci_disabled_by_toggle = "code-interpreter" in disabled_tools
961
+
962
+ # Determine final enablement state
963
+ ci_should_enable = False
964
+ if ci_enabled_by_toggle:
965
+ # Universal --enable-tool takes highest precedence
966
+ ci_should_enable = True
967
+ logger.debug("Code Interpreter enabled via --enable-tool")
968
+ elif ci_disabled_by_toggle:
969
+ # Universal --disable-tool takes highest precedence
970
+ ci_should_enable = False
971
+ logger.debug("Code Interpreter disabled via --disable-tool")
972
+ else:
973
+ # Fall back to routing-based enablement
974
+ ci_should_enable = bool(ci_enabled_by_routing)
975
+
976
+ if ci_should_enable and routing_result_typed:
811
977
  code_interpreter_files = routing_result_typed.validated_files.get(
812
978
  "code-interpreter", []
813
979
  )
@@ -854,10 +1020,29 @@ async def execute_model(
854
1020
  )
855
1021
 
856
1022
  # Process File Search configuration if enabled
857
- if (
1023
+ # Apply universal tool toggle overrides for file-search
1024
+ fs_enabled_by_routing = (
858
1025
  routing_result_typed
859
1026
  and "file-search" in routing_result_typed.enabled_tools
860
- ):
1027
+ )
1028
+ fs_enabled_by_toggle = "file-search" in enabled_tools
1029
+ fs_disabled_by_toggle = "file-search" in disabled_tools
1030
+
1031
+ # Determine final enablement state
1032
+ fs_should_enable = False
1033
+ if fs_enabled_by_toggle:
1034
+ # Universal --enable-tool takes highest precedence
1035
+ fs_should_enable = True
1036
+ logger.debug("File Search enabled via --enable-tool")
1037
+ elif fs_disabled_by_toggle:
1038
+ # Universal --disable-tool takes highest precedence
1039
+ fs_should_enable = False
1040
+ logger.debug("File Search disabled via --disable-tool")
1041
+ else:
1042
+ # Fall back to routing-based enablement
1043
+ fs_should_enable = bool(fs_enabled_by_routing)
1044
+
1045
+ if fs_should_enable and routing_result_typed:
861
1046
  file_search_files = routing_result_typed.validated_files.get(
862
1047
  "file-search", []
863
1048
  )
@@ -920,7 +1105,15 @@ async def execute_model(
920
1105
 
921
1106
  # Determine if web search should be enabled
922
1107
  web_search_enabled = False
923
- if web_search_from_cli:
1108
+ if "web-search" in enabled_tools:
1109
+ # Universal --enable-tool web-search takes highest precedence
1110
+ web_search_enabled = True
1111
+ logger.debug("Web search enabled via --enable-tool")
1112
+ elif "web-search" in disabled_tools:
1113
+ # Universal --disable-tool web-search takes highest precedence
1114
+ web_search_enabled = False
1115
+ logger.debug("Web search disabled via --disable-tool")
1116
+ elif web_search_from_cli:
924
1117
  # Explicit --web-search flag takes precedence
925
1118
  web_search_enabled = True
926
1119
  elif no_web_search_from_cli:
@@ -995,19 +1188,61 @@ async def execute_model(
995
1188
  # Debug log the final tools array
996
1189
  logger.debug(f"Final tools array being passed to API: {tools}")
997
1190
 
998
- # Stream the response
999
- logger.debug(f"Tools being passed to API: {tools}")
1000
- async for response in stream_structured_output(
1001
- client=client,
1002
- model=args["model"],
1003
- system_prompt=system_prompt,
1004
- user_prompt=user_prompt,
1005
- output_schema=output_model,
1006
- output_file=args.get("output_file"),
1007
- on_log=log_callback,
1008
- tools=tools,
1191
+ # Check for two-pass sentinel mode
1192
+ ci_config = config.get_code_interpreter_config()
1193
+ effective_strategy = _get_effective_download_strategy(args, ci_config)
1194
+ if (
1195
+ effective_strategy == "two_pass_sentinel"
1196
+ and output_model
1197
+ and code_interpreter_info
1009
1198
  ):
1010
- output_buffer.append(response)
1199
+ try:
1200
+ logger.debug(
1201
+ "Using two-pass sentinel mode for Code Interpreter file downloads"
1202
+ )
1203
+ resp, downloaded_files = await _execute_two_pass_sentinel(
1204
+ client,
1205
+ args,
1206
+ system_prompt,
1207
+ user_prompt,
1208
+ output_model,
1209
+ tools,
1210
+ log_callback,
1211
+ ci_config,
1212
+ code_interpreter_info,
1213
+ )
1214
+ response = resp
1215
+ # Store downloaded files info for later use
1216
+ if downloaded_files:
1217
+ setattr(response, "_downloaded_files", downloaded_files)
1218
+ except Exception as e:
1219
+ logger.warning(
1220
+ f"Two-pass execution failed, falling back to single-pass: {e}"
1221
+ )
1222
+ resp, _ = await _fallback_single_pass(
1223
+ client,
1224
+ args,
1225
+ system_prompt,
1226
+ user_prompt,
1227
+ output_model,
1228
+ tools,
1229
+ log_callback,
1230
+ )
1231
+ response = resp
1232
+ else:
1233
+ # Create the response using the API (single-pass mode)
1234
+ logger.debug(f"Tools being passed to API: {tools}")
1235
+ response = await create_structured_output(
1236
+ client=client,
1237
+ model=args["model"],
1238
+ system_prompt=system_prompt,
1239
+ user_prompt=user_prompt,
1240
+ output_schema=output_model,
1241
+ output_file=args.get("output_file"),
1242
+ on_log=log_callback,
1243
+ tools=tools,
1244
+ )
1245
+ output_buffer.append(response)
1011
1246
 
1012
1247
  # Handle final output
1013
1248
  output_file = args.get("output_file")
@@ -1043,50 +1278,96 @@ async def execute_model(
1043
1278
  print(json_output)
1044
1279
 
1045
1280
  # Handle file downloads from Code Interpreter if any were generated
1046
- if (
1047
- code_interpreter_info
1048
- and hasattr(response, "file_ids")
1049
- and response.file_ids
1050
- ):
1281
+ if code_interpreter_info and output_buffer:
1051
1282
  try:
1052
- download_dir = args.get(
1053
- "code_interpreter_download_dir", "./downloads"
1054
- )
1055
- manager = code_interpreter_info["manager"]
1056
- # Type ignore since we know this is a CodeInterpreterManager
1057
- downloaded_files = await manager.download_generated_files( # type: ignore[attr-defined]
1058
- response.file_ids, download_dir
1059
- )
1060
- if downloaded_files:
1061
- logger.info(
1062
- f"Downloaded {len(downloaded_files)} generated files to {download_dir}"
1283
+ # Get the API response from the last output item
1284
+ last_response = output_buffer[-1]
1285
+ if hasattr(last_response, "_api_response"):
1286
+ api_response = getattr(last_response, "_api_response")
1287
+ # Responses API has 'output' attribute, not 'messages'
1288
+ if hasattr(api_response, "output"):
1289
+ download_dir = args.get(
1290
+ "code_interpreter_download_dir", "./downloads"
1291
+ )
1292
+ manager = code_interpreter_info["manager"]
1293
+
1294
+ # Debug: Log response structure for Responses API
1295
+ logger.debug(
1296
+ f"Response has {len(api_response.output)} output items"
1297
+ )
1298
+ for i, item in enumerate(api_response.output):
1299
+ logger.debug(f"Output item {i}: {type(item)}")
1300
+ if hasattr(item, "type"):
1301
+ logger.debug(f" Type: {item.type}")
1302
+ if hasattr(item, "content"):
1303
+ content_str = (
1304
+ str(item.content)[:200] + "..."
1305
+ if len(str(item.content)) > 200
1306
+ else str(item.content)
1307
+ )
1308
+ logger.debug(
1309
+ f" Content preview: {content_str}"
1310
+ )
1311
+ # Debug tool call outputs for file detection
1312
+ if hasattr(item, "outputs"):
1313
+ logger.debug(
1314
+ f" Outputs: {len(item.outputs or [])} items"
1315
+ )
1316
+ for j, output in enumerate(item.outputs or []):
1317
+ logger.debug(
1318
+ f" Output {j}: {type(output)}"
1319
+ )
1320
+ if hasattr(output, "type"):
1321
+ logger.debug(
1322
+ f" Type: {output.type}"
1323
+ )
1324
+ if hasattr(output, "file_id"):
1325
+ logger.debug(
1326
+ f" File ID: {output.file_id}"
1327
+ )
1328
+ if hasattr(output, "filename"):
1329
+ logger.debug(
1330
+ f" Filename: {output.filename}"
1331
+ )
1332
+
1333
+ # Type ignore since we know this is a CodeInterpreterManager
1334
+ downloaded_files = await manager.download_generated_files( # type: ignore[attr-defined]
1335
+ api_response, download_dir
1336
+ )
1337
+ if downloaded_files:
1338
+ logger.info(
1339
+ f"Downloaded {len(downloaded_files)} generated files to {download_dir}"
1340
+ )
1341
+ for file_path in downloaded_files:
1342
+ logger.info(f" - {file_path}")
1343
+ else:
1344
+ logger.debug(
1345
+ "No files were downloaded from Code Interpreter"
1346
+ )
1347
+ else:
1348
+ logger.debug("API response has no output attribute")
1349
+ else:
1350
+ logger.debug(
1351
+ "Last response has no _api_response attribute"
1063
1352
  )
1064
- for file_path in downloaded_files:
1065
- logger.info(f" - {file_path}")
1066
1353
  except Exception as e:
1067
1354
  logger.warning(f"Failed to download generated files: {e}")
1068
1355
 
1069
1356
  return ExitCode.SUCCESS
1070
1357
 
1071
- # Execute main operation with timeout safeguards
1358
+ # Execute main operation
1072
1359
  try:
1073
- result = await operation_manager.execute_with_safeguards(
1074
- execute_main_operation, "model execution"
1075
- )
1076
- # The result should be an ExitCode from execute_main_operation
1077
- return result # type: ignore[no-any-return]
1360
+ result = await execute_main_operation()
1361
+ return result
1078
1362
  except (
1079
- StreamInterruptedError,
1080
- StreamBufferError,
1081
- StreamParseError,
1082
1363
  APIResponseError,
1083
1364
  EmptyResponseError,
1084
1365
  InvalidResponseFormatError,
1085
1366
  ) as e:
1086
- logger.error("Stream error: %s", str(e))
1367
+ logger.error("API error: %s", str(e))
1087
1368
  raise CLIError(str(e), exit_code=ExitCode.API_ERROR)
1088
1369
  except Exception as e:
1089
- logger.exception("Unexpected error during streaming")
1370
+ logger.exception("Unexpected error during execution")
1090
1371
  raise CLIError(str(e), exit_code=ExitCode.UNKNOWN_ERROR)
1091
1372
  finally:
1092
1373
  # Clean up Code Interpreter files if requested