code-puppy 0.0.316__py3-none-any.whl → 0.0.325__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,11 +4,23 @@ import asyncio
4
4
  import json
5
5
  import math
6
6
  import signal
7
+ import sys
7
8
  import threading
8
9
  import uuid
9
10
  from abc import ABC, abstractmethod
10
11
  from collections.abc import AsyncIterable
11
- from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union
12
+ from typing import (
13
+ Any,
14
+ Callable,
15
+ Dict,
16
+ List,
17
+ Optional,
18
+ Sequence,
19
+ Set,
20
+ Tuple,
21
+ Type,
22
+ Union,
23
+ )
12
24
 
13
25
  import mcp
14
26
  import pydantic
@@ -89,6 +101,9 @@ class BaseAgent(ABC):
89
101
  # Cache for MCP tool definitions (for token estimation)
90
102
  # This is populated after the first successful run when MCP tools are retrieved
91
103
  self._mcp_tool_definitions_cache: List[Dict[str, Any]] = []
104
+ # Shared console for streaming output - should be set by cli_runner
105
+ # to avoid conflicts between spinner's Live display and response streaming
106
+ self._console: Optional[Any] = None
92
107
 
93
108
  @property
94
109
  @abstractmethod
@@ -1227,6 +1242,74 @@ class BaseAgent(ABC):
1227
1242
  self._mcp_servers = mcp_servers
1228
1243
  return self._code_generation_agent
1229
1244
 
1245
+ def _create_agent_with_output_type(self, output_type: Type[Any]) -> PydanticAgent:
1246
+ """Create a temporary agent configured with a custom output_type.
1247
+
1248
+ This is used when structured output is requested via run_with_mcp.
1249
+ The agent is created fresh with the same configuration as the main agent
1250
+ but with the specified output_type instead of str.
1251
+
1252
+ Args:
1253
+ output_type: The Pydantic model or type for structured output.
1254
+
1255
+ Returns:
1256
+ A configured PydanticAgent (or DBOSAgent wrapper) with the custom output_type.
1257
+ """
1258
+ from code_puppy.model_utils import prepare_prompt_for_model
1259
+ from code_puppy.tools import register_tools_for_agent
1260
+
1261
+ model_name = self.get_model_name()
1262
+ models_config = ModelFactory.load_config()
1263
+ model, resolved_model_name = self._load_model_with_fallback(
1264
+ model_name, models_config, str(uuid.uuid4())
1265
+ )
1266
+
1267
+ instructions = self.get_system_prompt()
1268
+ puppy_rules = self.load_puppy_rules()
1269
+ if puppy_rules:
1270
+ instructions += f"\n{puppy_rules}"
1271
+
1272
+ mcp_servers = getattr(self, "_mcp_servers", []) or []
1273
+ model_settings = make_model_settings(resolved_model_name)
1274
+
1275
+ prepared = prepare_prompt_for_model(
1276
+ model_name, instructions, "", prepend_system_to_user=False
1277
+ )
1278
+ instructions = prepared.instructions
1279
+
1280
+ global _reload_count
1281
+ _reload_count += 1
1282
+
1283
+ if get_use_dbos():
1284
+ temp_agent = PydanticAgent(
1285
+ model=model,
1286
+ instructions=instructions,
1287
+ output_type=output_type,
1288
+ retries=3,
1289
+ toolsets=[],
1290
+ history_processors=[self.message_history_accumulator],
1291
+ model_settings=model_settings,
1292
+ )
1293
+ agent_tools = self.get_available_tools()
1294
+ register_tools_for_agent(temp_agent, agent_tools)
1295
+ dbos_agent = DBOSAgent(
1296
+ temp_agent, name=f"{self.name}-structured-{_reload_count}"
1297
+ )
1298
+ return dbos_agent
1299
+ else:
1300
+ temp_agent = PydanticAgent(
1301
+ model=model,
1302
+ instructions=instructions,
1303
+ output_type=output_type,
1304
+ retries=3,
1305
+ toolsets=mcp_servers,
1306
+ history_processors=[self.message_history_accumulator],
1307
+ model_settings=model_settings,
1308
+ )
1309
+ agent_tools = self.get_available_tools()
1310
+ register_tools_for_agent(temp_agent, agent_tools)
1311
+ return temp_agent
1312
+
1230
1313
  # It's okay to decorate it with DBOS.step even if not using DBOS; the decorator is a no-op in that case.
1231
1314
  @DBOS.step()
1232
1315
  def message_history_accumulator(self, ctx: RunContext, messages: List[Any]):
@@ -1257,47 +1340,45 @@ class BaseAgent(ABC):
1257
1340
  ) -> None:
1258
1341
  """Handle streaming events from the agent run.
1259
1342
 
1260
- This method processes streaming events and emits TextPart and ThinkingPart
1261
- content with styled banners as they stream in.
1343
+ This method processes streaming events and emits TextPart, ThinkingPart,
1344
+ and ToolCallPart content with styled banners/tokens as they stream in.
1262
1345
 
1263
1346
  Args:
1264
1347
  ctx: The run context.
1265
1348
  events: Async iterable of streaming events (PartStartEvent, PartDeltaEvent, etc.).
1266
1349
  """
1267
- import os
1268
- import time as time_module
1269
-
1270
1350
  from pydantic_ai import PartDeltaEvent, PartStartEvent
1271
- from pydantic_ai.messages import TextPartDelta, ThinkingPartDelta
1351
+ from pydantic_ai.messages import (
1352
+ TextPartDelta,
1353
+ ThinkingPartDelta,
1354
+ ToolCallPartDelta,
1355
+ )
1272
1356
  from rich.console import Console
1273
- from rich.live import Live
1274
1357
  from rich.markdown import Markdown
1275
1358
  from rich.markup import escape
1276
1359
 
1277
1360
  from code_puppy.messaging.spinner import pause_all_spinners
1278
1361
 
1279
- console = Console()
1280
-
1281
- # Disable Live display in test mode or non-interactive environments
1282
- # This fixes issues with pexpect PTY where Live() hangs
1283
- use_live_display = (
1284
- console.is_terminal
1285
- and os.environ.get("CODE_PUPPY_TEST_FAST", "").lower() not in ("1", "true")
1286
- and os.environ.get("CI", "").lower() not in ("1", "true")
1287
- )
1362
+ # IMPORTANT: Use the shared console (set by cli_runner) to avoid conflicts
1363
+ # with the spinner's Live display. Multiple Console instances with separate
1364
+ # Live displays cause cursor positioning chaos and line duplication.
1365
+ if self._console is not None:
1366
+ console = self._console
1367
+ else:
1368
+ # Fallback if console not set (shouldn't happen in normal use)
1369
+ console = Console()
1288
1370
 
1289
- # Track which part indices we're currently streaming (for Text/Thinking parts)
1371
+ # Track which part indices we're currently streaming (for Text/Thinking/Tool parts)
1290
1372
  streaming_parts: set[int] = set()
1291
1373
  thinking_parts: set[int] = (
1292
1374
  set()
1293
1375
  ) # Track which parts are thinking (for dim style)
1294
1376
  text_parts: set[int] = set() # Track which parts are text
1377
+ tool_parts: set[int] = set() # Track which parts are tool calls
1295
1378
  banner_printed: set[int] = set() # Track if banner was already printed
1296
- text_buffer: dict[int, list[str]] = {} # Buffer text for markdown
1297
- live_displays: dict[int, Live] = {} # Live displays for streaming markdown
1379
+ text_buffer: dict[int, list[str]] = {} # Buffer text for final markdown render
1380
+ token_count: dict[int, int] = {} # Track token count per text/tool part
1298
1381
  did_stream_anything = False # Track if we streamed any content
1299
- last_render_time: dict[int, float] = {} # Track last render time per part
1300
- render_interval = 0.1 # Only re-render markdown every 100ms (throttle)
1301
1382
 
1302
1383
  def _print_thinking_banner() -> None:
1303
1384
  """Print the THINKING banner with spinner pause and line clear."""
@@ -1362,9 +1443,20 @@ class BaseAgent(ABC):
1362
1443
  streaming_parts.add(event.index)
1363
1444
  text_parts.add(event.index)
1364
1445
  text_buffer[event.index] = [] # Initialize buffer
1446
+ token_count[event.index] = 0 # Initialize token counter
1365
1447
  # Buffer initial content if present
1366
1448
  if part.content and part.content.strip():
1367
1449
  text_buffer[event.index].append(part.content)
1450
+ # Use len(content) / 3 for token estimation (more accurate than chunk counting)
1451
+ token_count[event.index] += len(part.content) // 3
1452
+ elif isinstance(part, ToolCallPart):
1453
+ streaming_parts.add(event.index)
1454
+ tool_parts.add(event.index)
1455
+ token_count[event.index] = 0 # Initialize token counter
1456
+ # Track tool name for display
1457
+ banner_printed.add(
1458
+ event.index
1459
+ ) # Use banner_printed to track if we've shown tool info
1368
1460
 
1369
1461
  # PartDeltaEvent - stream the content as it arrives
1370
1462
  elif isinstance(event, PartDeltaEvent):
@@ -1372,43 +1464,26 @@ class BaseAgent(ABC):
1372
1464
  delta = event.delta
1373
1465
  if isinstance(delta, (TextPartDelta, ThinkingPartDelta)):
1374
1466
  if delta.content_delta:
1375
- # For text parts, stream markdown with Live display
1467
+ # For text parts, show token counter then render at end
1376
1468
  if event.index in text_parts:
1377
- # Print banner and start Live on first content
1469
+ import sys
1470
+
1471
+ # Print banner on first content
1378
1472
  if event.index not in banner_printed:
1379
1473
  _print_response_banner()
1380
1474
  banner_printed.add(event.index)
1381
- # Only use Live display if enabled (disabled in test/CI)
1382
- if use_live_display:
1383
- live = Live(
1384
- Markdown(""),
1385
- console=console,
1386
- refresh_per_second=10,
1387
- vertical_overflow="visible", # Allow scrolling for long content
1388
- )
1389
- live.start()
1390
- live_displays[event.index] = live
1391
- # Accumulate text and throttle markdown rendering
1392
- # (Markdown parsing is O(n), doing it on every token = O(n²) death)
1475
+ # Accumulate text for final markdown render
1393
1476
  text_buffer[event.index].append(delta.content_delta)
1394
- now = time_module.monotonic()
1395
- last_render = last_render_time.get(event.index, 0)
1396
-
1397
- # Only re-render if enough time has passed (throttle)
1398
- # Skip Live updates when not using live display
1399
- if (
1400
- use_live_display
1401
- and now - last_render >= render_interval
1402
- ):
1403
- content = "".join(text_buffer[event.index])
1404
- if event.index in live_displays:
1405
- try:
1406
- live_displays[event.index].update(
1407
- Markdown(content)
1408
- )
1409
- last_render_time[event.index] = now
1410
- except Exception:
1411
- pass
1477
+ # Use len(content) / 3 for token estimation
1478
+ token_count[event.index] += (
1479
+ len(delta.content_delta) // 3
1480
+ )
1481
+ # Update token counter in place (single line)
1482
+ count = token_count[event.index]
1483
+ sys.stdout.write(
1484
+ f"\r\x1b[K ⏳ Receiving... {count} tokens"
1485
+ )
1486
+ sys.stdout.flush()
1412
1487
  else:
1413
1488
  # For thinking parts, stream immediately (dim)
1414
1489
  if event.index not in banner_printed:
@@ -1416,56 +1491,69 @@ class BaseAgent(ABC):
1416
1491
  banner_printed.add(event.index)
1417
1492
  escaped = escape(delta.content_delta)
1418
1493
  console.print(f"[dim]{escaped}[/dim]", end="")
1494
+ elif isinstance(delta, ToolCallPartDelta):
1495
+ import sys
1496
+
1497
+ # For tool calls, show token counter (use string repr for estimation)
1498
+ token_count[event.index] += len(str(delta)) // 3
1499
+ # Get tool name if available
1500
+ tool_name = getattr(delta, "tool_name_delta", "")
1501
+ count = token_count[event.index]
1502
+ # Display with tool wrench icon and tool name
1503
+ if tool_name:
1504
+ sys.stdout.write(
1505
+ f"\r\x1b[K 🔧 Calling {tool_name}... {count} tokens"
1506
+ )
1507
+ else:
1508
+ sys.stdout.write(
1509
+ f"\r\x1b[K 🔧 Calling tool... {count} tokens"
1510
+ )
1511
+ sys.stdout.flush()
1419
1512
 
1420
1513
  # PartEndEvent - finish the streaming with a newline
1421
1514
  elif isinstance(event, PartEndEvent):
1422
1515
  if event.index in streaming_parts:
1423
- # For text parts, do final render then stop the Live display
1516
+ import sys
1517
+
1518
+ # For text parts, clear counter line and render markdown
1424
1519
  if event.index in text_parts:
1425
- # Final render to ensure we show complete content
1426
- # (throttling may have skipped the last few tokens)
1427
- if event.index in live_displays and event.index in text_buffer:
1428
- try:
1429
- final_content = "".join(text_buffer[event.index])
1430
- live_displays[event.index].update(
1431
- Markdown(final_content)
1432
- )
1433
- except Exception:
1434
- pass
1435
- if event.index in live_displays:
1436
- try:
1437
- live_displays[event.index].stop()
1438
- except Exception:
1439
- pass
1440
- del live_displays[event.index]
1441
- # When not using Live display, print the final content as markdown
1442
- elif event.index in text_buffer:
1520
+ # Clear the token counter line
1521
+ sys.stdout.write("\r\x1b[K")
1522
+ sys.stdout.flush()
1523
+ # Render the final markdown nicely
1524
+ if event.index in text_buffer:
1443
1525
  try:
1444
1526
  final_content = "".join(text_buffer[event.index])
1445
1527
  if final_content.strip():
1446
1528
  console.print(Markdown(final_content))
1447
1529
  except Exception:
1448
1530
  pass
1449
- if event.index in text_buffer:
1450
1531
  del text_buffer[event.index]
1451
- # Clean up render time tracking
1452
- last_render_time.pop(event.index, None)
1532
+ # For tool parts, clear the token counter line
1533
+ elif event.index in tool_parts:
1534
+ # Clear the token counter line
1535
+ sys.stdout.write("\r\x1b[K")
1536
+ sys.stdout.flush()
1453
1537
  # For thinking parts, just print newline
1454
1538
  elif event.index in banner_printed:
1455
1539
  console.print() # Final newline after streaming
1540
+
1541
+ # Clean up token count
1542
+ token_count.pop(event.index, None)
1456
1543
  # Clean up all tracking sets
1457
1544
  streaming_parts.discard(event.index)
1458
1545
  thinking_parts.discard(event.index)
1459
1546
  text_parts.discard(event.index)
1547
+ tool_parts.discard(event.index)
1460
1548
  banner_printed.discard(event.index)
1461
1549
 
1462
- # Resume spinner if next part is NOT text/thinking (avoid race condition)
1463
- # If next part is a tool call or None, it's safe to resume
1550
+ # Resume spinner if next part is NOT text/thinking/tool (avoid race condition)
1551
+ # If next part is None or handled differently, it's safe to resume
1464
1552
  # Note: spinner itself handles blank line before appearing
1465
1553
  from code_puppy.messaging.spinner import resume_all_spinners
1466
1554
 
1467
1555
  next_kind = getattr(event, "next_part_kind", None)
1468
- if next_kind not in ("text", "thinking"):
1556
+ if next_kind not in ("text", "thinking", "tool-call"):
1469
1557
  resume_all_spinners()
1470
1558
 
1471
1559
  # Spinner is resumed in PartEndEvent when appropriate (based on next_part_kind)
@@ -1624,6 +1712,7 @@ class BaseAgent(ABC):
1624
1712
  *,
1625
1713
  attachments: Optional[Sequence[BinaryContent]] = None,
1626
1714
  link_attachments: Optional[Sequence[Union[ImageUrl, DocumentUrl]]] = None,
1715
+ output_type: Optional[Type[Any]] = None,
1627
1716
  **kwargs,
1628
1717
  ) -> Any:
1629
1718
  """Run the agent with MCP servers, attachments, and full cancellation support.
@@ -1632,10 +1721,13 @@ class BaseAgent(ABC):
1632
1721
  prompt: Primary user prompt text (may be empty when attachments present).
1633
1722
  attachments: Local binary payloads (e.g., dragged images) to include.
1634
1723
  link_attachments: Remote assets (image/document URLs) to include.
1724
+ output_type: Optional Pydantic model or type for structured output.
1725
+ When provided, creates a temporary agent configured to return
1726
+ this type instead of the default string output.
1635
1727
  **kwargs: Additional arguments forwarded to `pydantic_ai.Agent.run`.
1636
1728
 
1637
1729
  Returns:
1638
- The agent's response.
1730
+ The agent's response (typed according to output_type if specified).
1639
1731
 
1640
1732
  Raises:
1641
1733
  asyncio.CancelledError: When execution is cancelled by user.
@@ -1659,6 +1751,11 @@ class BaseAgent(ABC):
1659
1751
  pydantic_agent = (
1660
1752
  self._code_generation_agent or self.reload_code_generation_agent()
1661
1753
  )
1754
+
1755
+ # If a custom output_type is specified, create a temporary agent with that type
1756
+ if output_type is not None:
1757
+ pydantic_agent = self._create_agent_with_output_type(output_type)
1758
+
1662
1759
  # Handle claude-code and chatgpt-codex models: prepend system prompt to first user message
1663
1760
  from code_puppy.model_utils import is_chatgpt_codex_model, is_claude_code_model
1664
1761
 
@@ -1855,30 +1952,74 @@ class BaseAgent(ABC):
1855
1952
  def graceful_sigint_handler(_sig, _frame):
1856
1953
  # When using keyboard-based cancel, SIGINT should be a no-op
1857
1954
  # (just show a hint to user about the configured cancel key)
1955
+ import sys
1956
+
1858
1957
  from code_puppy.keymap import get_cancel_agent_display_name
1859
1958
 
1860
1959
  cancel_key = get_cancel_agent_display_name()
1861
- emit_info(f"Use {cancel_key} to cancel the agent task.")
1960
+ if sys.platform == "win32":
1961
+ # On Windows, we use keyboard listener, so SIGINT might still fire
1962
+ # but we handle cancellation via the key listener
1963
+ pass # Silent on Windows - the key listener handles it
1964
+ else:
1965
+ emit_info(f"Use {cancel_key} to cancel the agent task.")
1862
1966
 
1863
1967
  original_handler = None
1864
1968
  key_listener_stop_event = None
1865
1969
  _key_listener_thread = None
1970
+ _windows_ctrl_handler = None # Store reference to prevent garbage collection
1866
1971
 
1867
1972
  try:
1868
- if cancel_agent_uses_signal():
1869
- # Use SIGINT-based cancellation (default Ctrl+C behavior)
1973
+ if sys.platform == "win32":
1974
+ # Windows: Use SetConsoleCtrlHandler for reliable Ctrl+C handling
1975
+ import ctypes
1976
+
1977
+ # Define the handler function type
1978
+ HANDLER_ROUTINE = ctypes.WINFUNCTYPE(ctypes.c_bool, ctypes.c_ulong)
1979
+
1980
+ def windows_ctrl_handler(ctrl_type):
1981
+ """Handle Windows console control events."""
1982
+ CTRL_C_EVENT = 0
1983
+ CTRL_BREAK_EVENT = 1
1984
+
1985
+ if ctrl_type in (CTRL_C_EVENT, CTRL_BREAK_EVENT):
1986
+ # Check if we're awaiting user input
1987
+ if is_awaiting_user_input():
1988
+ return False # Let default handler run
1989
+
1990
+ # Schedule agent cancellation
1991
+ schedule_agent_cancel()
1992
+ return True # We handled it, don't terminate
1993
+
1994
+ return False # Let other handlers process it
1995
+
1996
+ # Create the callback - must keep reference alive!
1997
+ _windows_ctrl_handler = HANDLER_ROUTINE(windows_ctrl_handler)
1998
+
1999
+ # Register the handler
2000
+ kernel32 = ctypes.windll.kernel32
2001
+ if not kernel32.SetConsoleCtrlHandler(_windows_ctrl_handler, True):
2002
+ emit_warning("Failed to set Windows Ctrl+C handler")
2003
+
2004
+ # Also spawn keyboard listener for Ctrl+X (shell cancel) and other keys
2005
+ key_listener_stop_event = threading.Event()
2006
+ _key_listener_thread = self._spawn_ctrl_x_key_listener(
2007
+ key_listener_stop_event,
2008
+ on_escape=lambda: None, # Ctrl+X handled by command_runner
2009
+ on_cancel_agent=None, # Ctrl+C handled by SetConsoleCtrlHandler above
2010
+ )
2011
+ elif cancel_agent_uses_signal():
2012
+ # Unix with Ctrl+C: Use SIGINT-based cancellation
1870
2013
  original_handler = signal.signal(
1871
2014
  signal.SIGINT, keyboard_interrupt_handler
1872
2015
  )
1873
2016
  else:
1874
- # Use keyboard listener for agent cancellation
1875
- # Set a graceful SIGINT handler that shows a hint
2017
+ # Unix with different cancel key: Use keyboard listener
1876
2018
  original_handler = signal.signal(signal.SIGINT, graceful_sigint_handler)
1877
- # Spawn keyboard listener with the cancel agent callback
1878
2019
  key_listener_stop_event = threading.Event()
1879
2020
  _key_listener_thread = self._spawn_ctrl_x_key_listener(
1880
2021
  key_listener_stop_event,
1881
- on_escape=lambda: None, # Ctrl+X handled by command_runner
2022
+ on_escape=lambda: None,
1882
2023
  on_cancel_agent=schedule_agent_cancel,
1883
2024
  )
1884
2025
 
@@ -1903,8 +2044,17 @@ class BaseAgent(ABC):
1903
2044
  # Stop keyboard listener if it was started
1904
2045
  if key_listener_stop_event is not None:
1905
2046
  key_listener_stop_event.set()
1906
- # Restore original signal handler
1907
- if (
1908
- original_handler is not None
1909
- ): # Explicit None check - SIG_DFL can be 0/falsy!
2047
+
2048
+ # Unregister Windows Ctrl handler
2049
+ if sys.platform == "win32" and _windows_ctrl_handler is not None:
2050
+ try:
2051
+ import ctypes
2052
+
2053
+ kernel32 = ctypes.windll.kernel32
2054
+ kernel32.SetConsoleCtrlHandler(_windows_ctrl_handler, False)
2055
+ except Exception:
2056
+ pass # Best effort cleanup
2057
+
2058
+ # Restore original signal handler (Unix)
2059
+ if original_handler is not None:
1910
2060
  signal.signal(signal.SIGINT, original_handler)
code_puppy/cli_runner.py CHANGED
@@ -706,6 +706,12 @@ async def run_prompt_with_attachments(
706
706
  attachments = [attachment.content for attachment in processed_prompt.attachments]
707
707
  link_attachments = [link.url_part for link in processed_prompt.link_attachments]
708
708
 
709
+ # IMPORTANT: Set the shared console on the agent so that streaming output
710
+ # uses the same console as the spinner. This prevents Live display conflicts
711
+ # that cause line duplication during markdown streaming.
712
+ if spinner_console is not None:
713
+ agent._console = spinner_console
714
+
709
715
  # Create the agent task first so we can track and cancel it
710
716
  agent_task = asyncio.create_task(
711
717
  agent.run_with_mcp(
@@ -784,5 +790,6 @@ def main_entry():
784
790
  DBOS.destroy()
785
791
  return 0
786
792
  finally:
787
- # Reset terminal on Unix-like systems (not Windows)
793
+ # Reset terminal on all platforms for clean state
794
+ reset_windows_terminal_full() # Safe no-op on non-Windows
788
795
  reset_unix_terminal()
@@ -571,6 +571,7 @@ class AddModelMenu:
571
571
  "cerebras": "cerebras",
572
572
  "cohere": "custom_openai",
573
573
  "perplexity": "custom_openai",
574
+ "minimax": "custom_anthropic",
574
575
  }
575
576
 
576
577
  # Determine the model type
@@ -600,6 +601,16 @@ class AddModelMenu:
600
601
  api_key_env = f"${provider.env[0]}" if provider.env else "$API_KEY"
601
602
  config["custom_endpoint"] = {"url": api_url, "api_key": api_key_env}
602
603
 
604
+ # Special handling for minimax: uses custom_anthropic but needs custom_endpoint
605
+ # and the URL needs /v1 stripped (comes as https://api.minimax.io/anthropic/v1)
606
+ if provider.id == "minimax" and provider.api:
607
+ api_url = provider.api
608
+ # Strip /v1 suffix if present
609
+ if api_url.endswith("/v1"):
610
+ api_url = api_url[:-3]
611
+ api_key_env = f"${provider.env[0]}" if provider.env else "$API_KEY"
612
+ config["custom_endpoint"] = {"url": api_url, "api_key": api_key_env}
613
+
603
614
  # Add context length if available
604
615
  if model.context_length and model.context_length > 0:
605
616
  config["context_length"] = model.context_length