ostruct-cli 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ostruct/cli/runner.py CHANGED
@@ -5,7 +5,7 @@ import json
5
5
  import logging
6
6
  import os
7
7
  from pathlib import Path
8
- from typing import Any, AsyncGenerator, Dict, List, Optional, Type, Union
8
+ from typing import Any, Dict, List, Optional, Type, Union
9
9
 
10
10
  from openai import AsyncOpenAI, OpenAIError
11
11
  from openai_model_registry import ModelRegistry
@@ -18,18 +18,18 @@ from .errors import (
18
18
  APIErrorMapper,
19
19
  CLIError,
20
20
  SchemaValidationError,
21
- StreamInterruptedError,
22
- StreamParseError,
23
21
  )
24
22
  from .exit_codes import ExitCode
25
23
  from .explicit_file_processor import ProcessingResult
26
24
  from .file_search import FileSearchManager
25
+ from .json_extract import split_json_and_text
27
26
  from .mcp_integration import MCPConfiguration, MCPServerManager
28
27
  from .progress_reporting import (
29
28
  configure_progress_reporter,
30
29
  get_progress_reporter,
31
30
  report_success,
32
31
  )
32
+ from .sentinel import extract_json_block
33
33
  from .serialization import LogSerializer
34
34
  from .services import ServiceContainer
35
35
  from .types import CLIParams
@@ -38,7 +38,7 @@ from .unattended_operation import (
38
38
  )
39
39
 
40
40
 
41
- # Error classes for streaming operations (duplicated from cli.py for now)
41
+ # Error classes for API operations
42
42
  class APIResponseError(Exception):
43
43
  pass
44
44
 
@@ -51,10 +51,6 @@ class InvalidResponseFormatError(Exception):
51
51
  pass
52
52
 
53
53
 
54
- class StreamBufferError(Exception):
55
- pass
56
-
57
-
58
54
  def make_strict(obj: Any) -> None:
59
55
  """Transform Pydantic schema for Responses API strict mode.
60
56
 
@@ -85,6 +81,18 @@ def supports_structured_output(model: str) -> bool:
85
81
  return True
86
82
 
87
83
 
84
+ def _assistant_text(response: Any) -> str:
85
+ """Extract text content from API response (Responses API format)."""
86
+ text_parts = []
87
+ for item in response.output:
88
+ if getattr(item, "type", None) == "message":
89
+ for content_block in item.content or []:
90
+ if hasattr(content_block, "text"):
91
+ # For Responses API, text content is directly in the text attribute
92
+ text_parts.append(content_block.text)
93
+ return "\n".join(text_parts)
94
+
95
+
88
96
  logger = logging.getLogger(__name__)
89
97
 
90
98
 
@@ -243,8 +251,17 @@ async def process_code_interpreter_configuration(
243
251
  if not files_to_upload:
244
252
  return None
245
253
 
254
+ # Load configuration for Code Interpreter
255
+ from typing import Union, cast
256
+
257
+ from .config import OstructConfig
258
+
259
+ config_path = cast(Union[str, Path, None], args.get("config"))
260
+ config = OstructConfig.load(config_path)
261
+ ci_config = config.get_code_interpreter_config()
262
+
246
263
  # Create Code Interpreter manager
247
- manager = CodeInterpreterManager(client)
264
+ manager = CodeInterpreterManager(client, ci_config)
248
265
 
249
266
  # Validate files before upload
250
267
  validation_errors = manager.validate_files_for_upload(files_to_upload)
@@ -420,7 +437,7 @@ async def process_file_search_configuration(
420
437
  raise mapped_error
421
438
 
422
439
 
423
- async def stream_structured_output(
440
+ async def create_structured_output(
424
441
  client: AsyncOpenAI,
425
442
  model: str,
426
443
  system_prompt: str,
@@ -429,8 +446,8 @@ async def stream_structured_output(
429
446
  output_file: Optional[str] = None,
430
447
  tools: Optional[List[dict]] = None,
431
448
  **kwargs: Any,
432
- ) -> AsyncGenerator[BaseModel, None]:
433
- """Stream structured output from OpenAI API using Responses API.
449
+ ) -> BaseModel:
450
+ """Create structured output from OpenAI Responses API.
434
451
 
435
452
  This function uses the OpenAI Responses API with strict mode schema validation
436
453
  to generate structured output that matches the provided Pydantic model.
@@ -441,16 +458,15 @@ async def stream_structured_output(
441
458
  system_prompt: The system prompt to use
442
459
  user_prompt: The user prompt to use
443
460
  output_schema: The Pydantic model to validate responses against
444
- output_file: Optional file to write output to
461
+ output_file: Optional file to write output to (unused, kept for compatibility)
445
462
  tools: Optional list of tools (e.g., MCP, Code Interpreter) to include
446
463
  **kwargs: Additional parameters to pass to the API
447
464
 
448
465
  Returns:
449
- An async generator yielding validated model instances
466
+ A validated model instance
450
467
 
451
468
  Raises:
452
469
  ValueError: If the model does not support structured output or parameters are invalid
453
- StreamInterruptedError: If the stream is interrupted
454
470
  APIResponseError: If there is an API error
455
471
  """
456
472
  try:
@@ -465,7 +481,7 @@ async def stream_structured_output(
465
481
  on_log = kwargs.pop("on_log", None)
466
482
 
467
483
  # Handle model-specific parameters
468
- stream_kwargs = {}
484
+ api_kwargs = {}
469
485
  registry = ModelRegistry.get_instance()
470
486
  capabilities = registry.get_capabilities(model)
471
487
 
@@ -474,7 +490,7 @@ async def stream_structured_output(
474
490
  if param_name in capabilities.supported_parameters:
475
491
  # Validate the parameter value
476
492
  capabilities.validate_parameter(param_name, value)
477
- stream_kwargs[param_name] = value
493
+ api_kwargs[param_name] = value
478
494
  else:
479
495
  logger.warning(
480
496
  f"Parameter {param_name} is not supported by model {model} and will be ignored"
@@ -503,8 +519,8 @@ async def stream_structured_output(
503
519
  "strict": True,
504
520
  }
505
521
  },
506
- "stream": True,
507
- **stream_kwargs,
522
+ "stream": False,
523
+ **api_kwargs,
508
524
  }
509
525
 
510
526
  # Add tools if provided
@@ -516,7 +532,7 @@ async def stream_structured_output(
516
532
  logger.debug("Making OpenAI Responses API request with:")
517
533
  logger.debug("Model: %s", model)
518
534
  logger.debug("Combined prompt: %s", combined_prompt)
519
- logger.debug("Parameters: %s", json.dumps(stream_kwargs, indent=2))
535
+ logger.debug("Parameters: %s", json.dumps(api_kwargs, indent=2))
520
536
  logger.debug("Schema: %s", json.dumps(strict_schema, indent=2))
521
537
  logger.debug("Tools being passed to API: %s", tools)
522
538
  logger.debug(
@@ -524,114 +540,51 @@ async def stream_structured_output(
524
540
  json.dumps(api_params, indent=2, default=str),
525
541
  )
526
542
 
527
- # Use the Responses API with streaming
528
- response = await client.responses.create(**api_params)
543
+ # Use the Responses API
544
+ api_response = await client.responses.create(**api_params)
529
545
 
530
- # Process streaming response
531
- accumulated_content = ""
532
- async for chunk in response:
533
- if on_log:
534
- on_log(logging.DEBUG, f"Received chunk: {chunk}", {})
546
+ if on_log:
547
+ on_log(logging.DEBUG, f"Received response: {api_response.id}", {})
535
548
 
536
- # Check for tool calls (including web search)
537
- if hasattr(chunk, "choices") and chunk.choices:
538
- choice = chunk.choices[0]
539
- # Log tool calls if present
540
- if (
541
- hasattr(choice, "delta")
542
- and hasattr(choice.delta, "tool_calls")
543
- and choice.delta.tool_calls
544
- ):
545
- for tool_call in choice.delta.tool_calls:
546
- if (
547
- hasattr(tool_call, "type")
548
- and tool_call.type == "web_search_preview"
549
- ):
550
- tool_id = getattr(tool_call, "id", "unknown")
551
- logger.debug(
552
- f"Web search tool invoked (id={tool_id})"
553
- )
554
- elif hasattr(tool_call, "function") and hasattr(
555
- tool_call.function, "name"
556
- ):
557
- # Handle other tool types for completeness
558
- tool_name = tool_call.function.name
559
- tool_id = getattr(tool_call, "id", "unknown")
560
- logger.debug(
561
- f"Tool '{tool_name}' invoked (id={tool_id})"
562
- )
563
-
564
- # Handle different response formats based on the chunk structure
565
- content_added = False
549
+ # Get the complete response content directly
550
+ content = api_response.output_text
566
551
 
567
- # Try different possible response formats
568
- if hasattr(chunk, "choices") and chunk.choices:
569
- # Standard chat completion format
570
- choice = chunk.choices[0]
571
- if (
572
- hasattr(choice, "delta")
573
- and hasattr(choice.delta, "content")
574
- and choice.delta.content
575
- ):
576
- accumulated_content += choice.delta.content
577
- content_added = True
578
- elif (
579
- hasattr(choice, "message")
580
- and hasattr(choice.message, "content")
581
- and choice.message.content
582
- ):
583
- accumulated_content += choice.message.content
584
- content_added = True
585
- elif hasattr(chunk, "response") and hasattr(
586
- chunk.response, "body"
587
- ):
588
- # Responses API format
589
- accumulated_content += chunk.response.body
590
- content_added = True
591
- elif hasattr(chunk, "content"):
592
- # Direct content
593
- accumulated_content += chunk.content
594
- content_added = True
595
- elif hasattr(chunk, "text"):
596
- # Text content
597
- accumulated_content += chunk.text
598
- content_added = True
599
-
600
- if on_log and content_added:
601
- on_log(
602
- logging.DEBUG,
603
- f"Added content, total length: {len(accumulated_content)}",
604
- {},
605
- )
552
+ if on_log:
553
+ on_log(
554
+ logging.DEBUG,
555
+ f"Response content length: {len(content)}",
556
+ {},
557
+ )
606
558
 
607
- # Try to parse and validate accumulated content as complete JSON
608
- try:
609
- if accumulated_content.strip():
610
- # Attempt to parse as complete JSON
611
- data = json.loads(accumulated_content.strip())
612
- validated = output_schema.model_validate(data)
613
- yield validated
614
- # Reset for next complete response (if any)
615
- accumulated_content = ""
616
- except (json.JSONDecodeError, ValueError):
617
- # Not yet complete JSON, continue accumulating
618
- continue
619
-
620
- # Handle any remaining content
621
- if accumulated_content.strip():
559
+ # Parse and validate the complete response
560
+ try:
561
+ # Try new JSON extraction logic first
622
562
  try:
623
- data = json.loads(accumulated_content.strip())
624
- validated = output_schema.model_validate(data)
625
- yield validated
626
- except (json.JSONDecodeError, ValueError) as e:
627
- logger.error(f"Failed to parse final accumulated content: {e}")
628
- raise StreamParseError(
629
- f"Failed to parse response as valid JSON: {e}"
630
- )
563
+ data, markdown_text = split_json_and_text(content)
564
+ except ValueError:
565
+ # Fallback to original parsing for non-fenced JSON
566
+ data = json.loads(content.strip())
567
+ markdown_text = ""
568
+
569
+ validated = output_schema.model_validate(data)
570
+
571
+ # Store full raw text for downstream processing (debug logs, etc.)
572
+ setattr(validated, "_raw_text", content)
573
+ # Store markdown text for annotation processing
574
+ setattr(validated, "_markdown_text", markdown_text)
575
+ # Store full API response for file download access
576
+ setattr(validated, "_api_response", api_response)
577
+
578
+ return validated
579
+
580
+ except ValueError as e:
581
+ logger.error(f"Failed to parse response content: {e}")
582
+ raise InvalidResponseFormatError(
583
+ f"Failed to parse response as valid JSON: {e}"
584
+ )
631
585
 
632
586
  except Exception as e:
633
587
  # Map OpenAI errors using the error mapper
634
-
635
588
  if isinstance(e, OpenAIError):
636
589
  mapped_error = APIErrorMapper.map_openai_error(e)
637
590
  logger.error(f"OpenAI API error mapped: {mapped_error}")
@@ -680,10 +633,6 @@ async def stream_structured_output(
680
633
  else:
681
634
  logger.error(f"Unmapped API error: {e}")
682
635
  raise APIResponseError(str(e))
683
- finally:
684
- # Note: We don't close the client here as it may be reused
685
- # The caller is responsible for client lifecycle management
686
- pass
687
636
 
688
637
 
689
638
  # Note: validation functions are defined in cli.py to avoid circular imports
@@ -709,29 +658,201 @@ async def process_templates(
709
658
  )
710
659
 
711
660
 
712
- async def execute_model(
661
+ async def _execute_two_pass_sentinel(
662
+ client: AsyncOpenAI,
713
663
  args: CLIParams,
714
- params: Dict[str, Any],
664
+ system_prompt: str,
665
+ user_prompt: str,
715
666
  output_model: Type[BaseModel],
667
+ tools: List[dict],
668
+ log_cb: Any,
669
+ ci_config: Dict[str, Any],
670
+ code_interpreter_info: Optional[Dict[str, Any]],
671
+ ) -> tuple[BaseModel, List[str]]:
672
+ """Execute two-pass sentinel approach for file downloads."""
673
+ import json
674
+
675
+ # ---- pass 1 (raw) ----
676
+ logger.debug("Starting two-pass execution: Pass 1 (raw mode)")
677
+ raw_resp = await client.responses.create(
678
+ model=args["model"],
679
+ input=f"{system_prompt}\n\n{user_prompt}",
680
+ tools=tools, # type: ignore[arg-type]
681
+ # No text format - this allows annotations
682
+ )
683
+
684
+ logger.debug(f"Raw response structure: {type(raw_resp)}")
685
+ logger.debug(
686
+ f"Raw response output: {getattr(raw_resp, 'output', 'No output attr')}"
687
+ )
688
+ raw_text = _assistant_text(raw_resp)
689
+ logger.debug(
690
+ f"Raw response from first pass (first 500 chars): {raw_text[:500]}"
691
+ )
692
+ data = extract_json_block(raw_text) or {}
693
+ logger.debug(f"Extracted JSON from sentinel markers: {bool(data)}")
694
+
695
+ # Validate sentinel extraction
696
+ if not data:
697
+ logger.warning(
698
+ "No sentinel JSON found in first pass, falling back to single-pass"
699
+ )
700
+ return await _fallback_single_pass(
701
+ client,
702
+ args,
703
+ system_prompt,
704
+ user_prompt,
705
+ output_model,
706
+ tools,
707
+ log_cb,
708
+ )
709
+
710
+ # download files from first pass
711
+ downloaded_files = []
712
+ if code_interpreter_info and code_interpreter_info.get("manager"):
713
+ cm = code_interpreter_info["manager"]
714
+ # Use output directory from config, fallback to args, then default
715
+ download_dir = (
716
+ ci_config.get("output_directory")
717
+ or args.get("code_interpreter_download_dir")
718
+ or "./downloads"
719
+ )
720
+ logger.debug(f"Downloading files to: {download_dir}")
721
+ downloaded_files = await cm.download_generated_files(
722
+ raw_resp, download_dir
723
+ )
724
+ if downloaded_files:
725
+ logger.info(
726
+ f"Downloaded {len(downloaded_files)} files from first pass"
727
+ )
728
+
729
+ # ---- pass 2 (strict) ----
730
+ logger.debug("Starting two-pass execution: Pass 2 (structured mode)")
731
+ strict_sys = (
732
+ system_prompt
733
+ + "\n\nReuse ONLY these values; do not repeat external calls:\n"
734
+ + json.dumps(data, indent=2)
735
+ )
736
+
737
+ # Prepare schema for strict mode
738
+ schema = output_model.model_json_schema()
739
+ strict_schema = copy.deepcopy(schema)
740
+ make_strict(strict_schema)
741
+ schema_name = output_model.__name__.lower()
742
+
743
+ strict_resp = await client.responses.create(
744
+ model=args["model"],
745
+ input=f"{strict_sys}\n\n{user_prompt}",
746
+ text={
747
+ "format": {
748
+ "type": "json_schema",
749
+ "name": schema_name,
750
+ "schema": strict_schema,
751
+ "strict": True,
752
+ }
753
+ },
754
+ tools=[], # No tools needed for formatting
755
+ stream=False,
756
+ )
757
+
758
+ # Parse and validate the structured response
759
+ content = strict_resp.output_text
760
+ try:
761
+ # Try new JSON extraction logic first
762
+ try:
763
+ data_final, markdown_text = split_json_and_text(content)
764
+ except ValueError:
765
+ # Fallback to original parsing for non-fenced JSON
766
+ data_final = json.loads(content.strip())
767
+ markdown_text = ""
768
+
769
+ validated = output_model.model_validate(data_final)
770
+
771
+ # Store full raw text for downstream processing (debug logs, etc.)
772
+ setattr(validated, "_raw_text", content)
773
+ # Store markdown text for annotation processing
774
+ setattr(validated, "_markdown_text", markdown_text)
775
+ # Store full API response for file download access
776
+ setattr(validated, "_api_response", strict_resp)
777
+
778
+ return validated, downloaded_files
779
+
780
+ except ValueError as e:
781
+ logger.error(f"Failed to parse structured response content: {e}")
782
+ raise InvalidResponseFormatError(
783
+ f"Failed to parse response as valid JSON: {e}"
784
+ )
785
+
786
+
787
+ async def _fallback_single_pass(
788
+ client: AsyncOpenAI,
789
+ args: CLIParams,
716
790
  system_prompt: str,
717
791
  user_prompt: str,
718
- ) -> ExitCode:
719
- """Execute the model and handle the response.
792
+ output_model: Type[BaseModel],
793
+ tools: List[dict],
794
+ log_cb: Any,
795
+ ) -> tuple[BaseModel, List[str]]:
796
+ """Fallback to single-pass execution."""
797
+ logger.debug("Executing single-pass fallback")
798
+ response = await create_structured_output(
799
+ client=client,
800
+ model=args["model"],
801
+ system_prompt=system_prompt,
802
+ user_prompt=user_prompt,
803
+ output_schema=output_model,
804
+ output_file=args.get("output_file"),
805
+ on_log=log_cb,
806
+ tools=tools,
807
+ )
808
+ return response, [] # No files downloaded in fallback
809
+
810
+
811
+ def _get_effective_download_strategy(
812
+ args: CLIParams, ci_config: Dict[str, Any]
813
+ ) -> str:
814
+ """Determine the effective download strategy from config and feature flags.
720
815
 
721
816
  Args:
722
- args: Command line arguments
723
- params: Validated model parameters
724
- output_model: Generated Pydantic model
725
- system_prompt: Processed system prompt
726
- user_prompt: Processed user prompt
817
+ args: CLI parameters including enabled_features and disabled_features
818
+ ci_config: Code interpreter configuration
727
819
 
728
820
  Returns:
729
- Exit code indicating success or failure
730
-
731
- Raises:
732
- CLIError: For execution errors
733
- UnattendedOperationTimeoutError: For operation timeouts
821
+ Either "single_pass" or "two_pass_sentinel"
734
822
  """
823
+ # Start with config default
824
+ strategy: str = ci_config.get("download_strategy", "single_pass")
825
+
826
+ # Check for feature flag override
827
+ enabled_features = args.get("enabled_features", [])
828
+ disabled_features = args.get("disabled_features", [])
829
+
830
+ if enabled_features or disabled_features:
831
+ from .click_options import parse_feature_flags
832
+
833
+ try:
834
+ parsed_flags = parse_feature_flags(
835
+ tuple(enabled_features), tuple(disabled_features)
836
+ )
837
+ ci_hack_flag = parsed_flags.get("ci-download-hack")
838
+ if ci_hack_flag == "on":
839
+ strategy = "two_pass_sentinel"
840
+ elif ci_hack_flag == "off":
841
+ strategy = "single_pass"
842
+ except Exception as e:
843
+ logger.warning(f"Failed to parse feature flags: {e}")
844
+
845
+ return strategy
846
+
847
+
848
+ async def execute_model(
849
+ args: CLIParams,
850
+ params: Dict[str, Any],
851
+ output_model: Type[BaseModel],
852
+ system_prompt: str,
853
+ user_prompt: str,
854
+ ) -> ExitCode:
855
+ """Execute the model with the given parameters."""
735
856
  logger.debug("=== Execution Phase ===")
736
857
 
737
858
  # Initialize unattended operation manager
@@ -770,7 +891,7 @@ async def execute_model(
770
891
 
771
892
  # Create detailed log callback
772
893
  def log_callback(level: int, message: str, extra: dict[str, Any]) -> None:
773
- if args.get("debug_openai_stream", False):
894
+ if args.get("verbose", False):
774
895
  if extra:
775
896
  extra_str = LogSerializer.serialize_log_extra(extra)
776
897
  if extra_str:
@@ -789,8 +910,31 @@ async def execute_model(
789
910
  tools = []
790
911
  nonlocal code_interpreter_info, file_search_info
791
912
 
913
+ # Get universal tool toggle overrides first
914
+ enabled_tools: set[str] = args.get("_enabled_tools", set()) # type: ignore[assignment]
915
+ disabled_tools: set[str] = args.get("_disabled_tools", set()) # type: ignore[assignment]
916
+
792
917
  # Process MCP configuration if provided
793
- if services.is_configured("mcp"):
918
+ # Apply universal tool toggle overrides for mcp
919
+ mcp_enabled_by_config = services.is_configured("mcp")
920
+ mcp_enabled_by_toggle = "mcp" in enabled_tools
921
+ mcp_disabled_by_toggle = "mcp" in disabled_tools
922
+
923
+ # Determine final enablement state
924
+ mcp_should_enable = False
925
+ if mcp_enabled_by_toggle:
926
+ # Universal --enable-tool takes highest precedence
927
+ mcp_should_enable = True
928
+ logger.debug("MCP enabled via --enable-tool")
929
+ elif mcp_disabled_by_toggle:
930
+ # Universal --disable-tool takes highest precedence
931
+ mcp_should_enable = False
932
+ logger.debug("MCP disabled via --disable-tool")
933
+ else:
934
+ # Fall back to config-based enablement
935
+ mcp_should_enable = mcp_enabled_by_config
936
+
937
+ if mcp_should_enable and services.is_configured("mcp"):
794
938
  mcp_manager = await services.get_mcp_manager()
795
939
  if mcp_manager:
796
940
  tools.extend(mcp_manager.get_tools_for_responses_api())
@@ -804,10 +948,29 @@ async def execute_model(
804
948
  routing_result_typed: Optional[ProcessingResult] = routing_result
805
949
 
806
950
  # Process Code Interpreter configuration if enabled
807
- if (
951
+ # Apply universal tool toggle overrides for code-interpreter
952
+ ci_enabled_by_routing = (
808
953
  routing_result_typed
809
954
  and "code-interpreter" in routing_result_typed.enabled_tools
810
- ):
955
+ )
956
+ ci_enabled_by_toggle = "code-interpreter" in enabled_tools
957
+ ci_disabled_by_toggle = "code-interpreter" in disabled_tools
958
+
959
+ # Determine final enablement state
960
+ ci_should_enable = False
961
+ if ci_enabled_by_toggle:
962
+ # Universal --enable-tool takes highest precedence
963
+ ci_should_enable = True
964
+ logger.debug("Code Interpreter enabled via --enable-tool")
965
+ elif ci_disabled_by_toggle:
966
+ # Universal --disable-tool takes highest precedence
967
+ ci_should_enable = False
968
+ logger.debug("Code Interpreter disabled via --disable-tool")
969
+ else:
970
+ # Fall back to routing-based enablement
971
+ ci_should_enable = bool(ci_enabled_by_routing)
972
+
973
+ if ci_should_enable and routing_result_typed:
811
974
  code_interpreter_files = routing_result_typed.validated_files.get(
812
975
  "code-interpreter", []
813
976
  )
@@ -854,10 +1017,29 @@ async def execute_model(
854
1017
  )
855
1018
 
856
1019
  # Process File Search configuration if enabled
857
- if (
1020
+ # Apply universal tool toggle overrides for file-search
1021
+ fs_enabled_by_routing = (
858
1022
  routing_result_typed
859
1023
  and "file-search" in routing_result_typed.enabled_tools
860
- ):
1024
+ )
1025
+ fs_enabled_by_toggle = "file-search" in enabled_tools
1026
+ fs_disabled_by_toggle = "file-search" in disabled_tools
1027
+
1028
+ # Determine final enablement state
1029
+ fs_should_enable = False
1030
+ if fs_enabled_by_toggle:
1031
+ # Universal --enable-tool takes highest precedence
1032
+ fs_should_enable = True
1033
+ logger.debug("File Search enabled via --enable-tool")
1034
+ elif fs_disabled_by_toggle:
1035
+ # Universal --disable-tool takes highest precedence
1036
+ fs_should_enable = False
1037
+ logger.debug("File Search disabled via --disable-tool")
1038
+ else:
1039
+ # Fall back to routing-based enablement
1040
+ fs_should_enable = bool(fs_enabled_by_routing)
1041
+
1042
+ if fs_should_enable and routing_result_typed:
861
1043
  file_search_files = routing_result_typed.validated_files.get(
862
1044
  "file-search", []
863
1045
  )
@@ -920,7 +1102,15 @@ async def execute_model(
920
1102
 
921
1103
  # Determine if web search should be enabled
922
1104
  web_search_enabled = False
923
- if web_search_from_cli:
1105
+ if "web-search" in enabled_tools:
1106
+ # Universal --enable-tool web-search takes highest precedence
1107
+ web_search_enabled = True
1108
+ logger.debug("Web search enabled via --enable-tool")
1109
+ elif "web-search" in disabled_tools:
1110
+ # Universal --disable-tool web-search takes highest precedence
1111
+ web_search_enabled = False
1112
+ logger.debug("Web search disabled via --disable-tool")
1113
+ elif web_search_from_cli:
924
1114
  # Explicit --web-search flag takes precedence
925
1115
  web_search_enabled = True
926
1116
  elif no_web_search_from_cli:
@@ -995,19 +1185,61 @@ async def execute_model(
995
1185
  # Debug log the final tools array
996
1186
  logger.debug(f"Final tools array being passed to API: {tools}")
997
1187
 
998
- # Stream the response
999
- logger.debug(f"Tools being passed to API: {tools}")
1000
- async for response in stream_structured_output(
1001
- client=client,
1002
- model=args["model"],
1003
- system_prompt=system_prompt,
1004
- user_prompt=user_prompt,
1005
- output_schema=output_model,
1006
- output_file=args.get("output_file"),
1007
- on_log=log_callback,
1008
- tools=tools,
1188
+ # Check for two-pass sentinel mode
1189
+ ci_config = config.get_code_interpreter_config()
1190
+ effective_strategy = _get_effective_download_strategy(args, ci_config)
1191
+ if (
1192
+ effective_strategy == "two_pass_sentinel"
1193
+ and output_model
1194
+ and code_interpreter_info
1009
1195
  ):
1010
- output_buffer.append(response)
1196
+ try:
1197
+ logger.debug(
1198
+ "Using two-pass sentinel mode for Code Interpreter file downloads"
1199
+ )
1200
+ resp, downloaded_files = await _execute_two_pass_sentinel(
1201
+ client,
1202
+ args,
1203
+ system_prompt,
1204
+ user_prompt,
1205
+ output_model,
1206
+ tools,
1207
+ log_callback,
1208
+ ci_config,
1209
+ code_interpreter_info,
1210
+ )
1211
+ response = resp
1212
+ # Store downloaded files info for later use
1213
+ if downloaded_files:
1214
+ setattr(response, "_downloaded_files", downloaded_files)
1215
+ except Exception as e:
1216
+ logger.warning(
1217
+ f"Two-pass execution failed, falling back to single-pass: {e}"
1218
+ )
1219
+ resp, _ = await _fallback_single_pass(
1220
+ client,
1221
+ args,
1222
+ system_prompt,
1223
+ user_prompt,
1224
+ output_model,
1225
+ tools,
1226
+ log_callback,
1227
+ )
1228
+ response = resp
1229
+ else:
1230
+ # Create the response using the API (single-pass mode)
1231
+ logger.debug(f"Tools being passed to API: {tools}")
1232
+ response = await create_structured_output(
1233
+ client=client,
1234
+ model=args["model"],
1235
+ system_prompt=system_prompt,
1236
+ user_prompt=user_prompt,
1237
+ output_schema=output_model,
1238
+ output_file=args.get("output_file"),
1239
+ on_log=log_callback,
1240
+ tools=tools,
1241
+ )
1242
+ output_buffer.append(response)
1011
1243
 
1012
1244
  # Handle final output
1013
1245
  output_file = args.get("output_file")
@@ -1043,26 +1275,78 @@ async def execute_model(
1043
1275
  print(json_output)
1044
1276
 
1045
1277
  # Handle file downloads from Code Interpreter if any were generated
1046
- if (
1047
- code_interpreter_info
1048
- and hasattr(response, "file_ids")
1049
- and response.file_ids
1050
- ):
1278
+ if code_interpreter_info and output_buffer:
1051
1279
  try:
1052
- download_dir = args.get(
1053
- "code_interpreter_download_dir", "./downloads"
1054
- )
1055
- manager = code_interpreter_info["manager"]
1056
- # Type ignore since we know this is a CodeInterpreterManager
1057
- downloaded_files = await manager.download_generated_files( # type: ignore[attr-defined]
1058
- response.file_ids, download_dir
1059
- )
1060
- if downloaded_files:
1061
- logger.info(
1062
- f"Downloaded {len(downloaded_files)} generated files to {download_dir}"
1280
+ # Get the API response from the last output item
1281
+ last_response = output_buffer[-1]
1282
+ if hasattr(last_response, "_api_response"):
1283
+ api_response = getattr(last_response, "_api_response")
1284
+ # Responses API has 'output' attribute, not 'messages'
1285
+ if hasattr(api_response, "output"):
1286
+ download_dir = args.get(
1287
+ "code_interpreter_download_dir", "./downloads"
1288
+ )
1289
+ manager = code_interpreter_info["manager"]
1290
+
1291
+ # Debug: Log response structure for Responses API
1292
+ logger.debug(
1293
+ f"Response has {len(api_response.output)} output items"
1294
+ )
1295
+ for i, item in enumerate(api_response.output):
1296
+ logger.debug(f"Output item {i}: {type(item)}")
1297
+ if hasattr(item, "type"):
1298
+ logger.debug(f" Type: {item.type}")
1299
+ if hasattr(item, "content"):
1300
+ content_str = (
1301
+ str(item.content)[:200] + "..."
1302
+ if len(str(item.content)) > 200
1303
+ else str(item.content)
1304
+ )
1305
+ logger.debug(
1306
+ f" Content preview: {content_str}"
1307
+ )
1308
+ # Debug tool call outputs for file detection
1309
+ if hasattr(item, "outputs"):
1310
+ logger.debug(
1311
+ f" Outputs: {len(item.outputs or [])} items"
1312
+ )
1313
+ for j, output in enumerate(item.outputs or []):
1314
+ logger.debug(
1315
+ f" Output {j}: {type(output)}"
1316
+ )
1317
+ if hasattr(output, "type"):
1318
+ logger.debug(
1319
+ f" Type: {output.type}"
1320
+ )
1321
+ if hasattr(output, "file_id"):
1322
+ logger.debug(
1323
+ f" File ID: {output.file_id}"
1324
+ )
1325
+ if hasattr(output, "filename"):
1326
+ logger.debug(
1327
+ f" Filename: {output.filename}"
1328
+ )
1329
+
1330
+ # Type ignore since we know this is a CodeInterpreterManager
1331
+ downloaded_files = await manager.download_generated_files( # type: ignore[attr-defined]
1332
+ api_response, download_dir
1333
+ )
1334
+ if downloaded_files:
1335
+ logger.info(
1336
+ f"Downloaded {len(downloaded_files)} generated files to {download_dir}"
1337
+ )
1338
+ for file_path in downloaded_files:
1339
+ logger.info(f" - {file_path}")
1340
+ else:
1341
+ logger.debug(
1342
+ "No files were downloaded from Code Interpreter"
1343
+ )
1344
+ else:
1345
+ logger.debug("API response has no output attribute")
1346
+ else:
1347
+ logger.debug(
1348
+ "Last response has no _api_response attribute"
1063
1349
  )
1064
- for file_path in downloaded_files:
1065
- logger.info(f" - {file_path}")
1066
1350
  except Exception as e:
1067
1351
  logger.warning(f"Failed to download generated files: {e}")
1068
1352
 
@@ -1076,17 +1360,14 @@ async def execute_model(
1076
1360
  # The result should be an ExitCode from execute_main_operation
1077
1361
  return result # type: ignore[no-any-return]
1078
1362
  except (
1079
- StreamInterruptedError,
1080
- StreamBufferError,
1081
- StreamParseError,
1082
1363
  APIResponseError,
1083
1364
  EmptyResponseError,
1084
1365
  InvalidResponseFormatError,
1085
1366
  ) as e:
1086
- logger.error("Stream error: %s", str(e))
1367
+ logger.error("API error: %s", str(e))
1087
1368
  raise CLIError(str(e), exit_code=ExitCode.API_ERROR)
1088
1369
  except Exception as e:
1089
- logger.exception("Unexpected error during streaming")
1370
+ logger.exception("Unexpected error during execution")
1090
1371
  raise CLIError(str(e), exit_code=ExitCode.UNKNOWN_ERROR)
1091
1372
  finally:
1092
1373
  # Clean up Code Interpreter files if requested