multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,619 @@
|
|
|
1
|
+
"""LiteLLM client implementation.
|
|
2
|
+
|
|
3
|
+
Uses OpenAI SDK to communicate with LiteLLM endpoints (remote or local).
|
|
4
|
+
Supports both non-streaming and streaming completions with tool support.
|
|
5
|
+
|
|
6
|
+
GPT-5 Models:
|
|
7
|
+
GPT-5 family models use the Responses API which supports tools, verbosity
|
|
8
|
+
control, and reasoning_effort together. Chat Completions API is only used
|
|
9
|
+
for non-GPT-5 models (it does NOT support reasoning_effort with function
|
|
10
|
+
tools for GPT-5).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import ssl
|
|
16
|
+
import time
|
|
17
|
+
from typing import Any, AsyncGenerator
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
from openai import AsyncOpenAI
|
|
21
|
+
from tenacity import (
|
|
22
|
+
retry,
|
|
23
|
+
retry_if_exception,
|
|
24
|
+
stop_after_attempt,
|
|
25
|
+
wait_exponential,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
from forge.core.models.catalog import get_model_spec, model_exists
|
|
29
|
+
from forge.runtime_config import get_runtime_config
|
|
30
|
+
|
|
31
|
+
from ..credentials import CredentialManager
|
|
32
|
+
from ..detection import ProviderType
|
|
33
|
+
from ..errors import AuthenticationError, ProviderError
|
|
34
|
+
from ..types import (
|
|
35
|
+
CompletionResponse,
|
|
36
|
+
Message,
|
|
37
|
+
ModelHyperparameters,
|
|
38
|
+
StreamEvent,
|
|
39
|
+
ToolCall,
|
|
40
|
+
ToolCallDelta,
|
|
41
|
+
)
|
|
42
|
+
from .base import estimate_message_tokens, merge_hyperparams
|
|
43
|
+
from .openai_compat import (
|
|
44
|
+
ToolCallAccumulator,
|
|
45
|
+
build_chat_completion_kwargs,
|
|
46
|
+
extract_cached_tokens,
|
|
47
|
+
is_retryable_error,
|
|
48
|
+
message_to_openai,
|
|
49
|
+
openai_response_to_completion,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
logger = logging.getLogger(__name__)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class LiteLLMClient:
|
|
56
|
+
"""LiteLLM client using OpenAI SDK.
|
|
57
|
+
|
|
58
|
+
Supports both remote LiteLLM and local LiteLLM instances.
|
|
59
|
+
Uses Chat Completions API for standard models, and Responses API for GPT-5
|
|
60
|
+
family models (which supports tools, verbosity, and reasoning_effort together).
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
model: str,
|
|
66
|
+
provider: ProviderType,
|
|
67
|
+
credentials: CredentialManager | None = None,
|
|
68
|
+
default_hyperparams: ModelHyperparameters | None = None,
|
|
69
|
+
) -> None:
|
|
70
|
+
"""Initialize LiteLLM client.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
model: Model identifier (e.g., "openai/gpt-5.5").
|
|
74
|
+
provider: Provider type (litellm_remote or litellm_local).
|
|
75
|
+
credentials: Credential manager (uses default if not provided).
|
|
76
|
+
default_hyperparams: Default hyperparameters for all calls.
|
|
77
|
+
"""
|
|
78
|
+
self._model = model
|
|
79
|
+
self._provider = provider
|
|
80
|
+
self._credentials = credentials or CredentialManager.default()
|
|
81
|
+
self._default_hyperparams = default_hyperparams
|
|
82
|
+
self._client: AsyncOpenAI | None = None
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def model(self) -> str:
|
|
86
|
+
"""The model this client is configured for."""
|
|
87
|
+
return self._model
|
|
88
|
+
|
|
89
|
+
async def _get_client(self) -> AsyncOpenAI:
|
|
90
|
+
"""Get or create the OpenAI client with credentials."""
|
|
91
|
+
if self._client is not None:
|
|
92
|
+
return self._client
|
|
93
|
+
|
|
94
|
+
creds = await self._credentials.get_credentials(self._provider)
|
|
95
|
+
|
|
96
|
+
http_client = None
|
|
97
|
+
ssl_cert = creds.get("ssl_cert")
|
|
98
|
+
if ssl_cert:
|
|
99
|
+
# Custom SSL certificate (e.g., remote proxy root CA)
|
|
100
|
+
ssl_context = ssl.create_default_context(cafile=ssl_cert)
|
|
101
|
+
http_client = httpx.AsyncClient(verify=ssl_context)
|
|
102
|
+
|
|
103
|
+
version = get_runtime_config().user_agent_claude_code_version or "unknown"
|
|
104
|
+
self._client = AsyncOpenAI(
|
|
105
|
+
api_key=creds["api_key"],
|
|
106
|
+
base_url=creds["base_url"],
|
|
107
|
+
http_client=http_client,
|
|
108
|
+
default_headers={"User-Agent": f"claude-cli/{version} (external, cli)"},
|
|
109
|
+
)
|
|
110
|
+
return self._client
|
|
111
|
+
|
|
112
|
+
_is_retryable_error = staticmethod(is_retryable_error)
|
|
113
|
+
|
|
114
|
+
def _is_gpt5_model(self) -> bool:
|
|
115
|
+
"""Check if the current model belongs to the GPT-5 family.
|
|
116
|
+
|
|
117
|
+
Used by the Chat Completions safety net (`_build_request_kwargs`) to
|
|
118
|
+
strip `reasoning_effort` when tools are present -- a Chat Completions
|
|
119
|
+
API limitation that affects all GPT-5 models regardless of whether
|
|
120
|
+
we route them to Responses API. Distinct from `_should_use_responses_api`
|
|
121
|
+
which determines routing.
|
|
122
|
+
"""
|
|
123
|
+
model_name = self._model.split("/")[-1].lower()
|
|
124
|
+
return model_name.startswith("gpt-5")
|
|
125
|
+
|
|
126
|
+
def _should_use_responses_api(
|
|
127
|
+
self,
|
|
128
|
+
tools: list[dict[str, Any]] | None,
|
|
129
|
+
hyperparams: ModelHyperparameters,
|
|
130
|
+
) -> bool:
|
|
131
|
+
"""Determine if Responses API should be used.
|
|
132
|
+
|
|
133
|
+
Reads `use_responses_api` from the model catalog (single source of truth).
|
|
134
|
+
Returns False for models not in the catalog (graceful for OpenRouter's
|
|
135
|
+
open model space).
|
|
136
|
+
"""
|
|
137
|
+
model_name = self._model.split("/")[-1].lower()
|
|
138
|
+
if not model_exists(model_name):
|
|
139
|
+
return False
|
|
140
|
+
return get_model_spec(model_name).use_responses_api
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def _convert_messages_for_responses(messages: list[Message]) -> list[dict[str, Any]]:
|
|
144
|
+
"""Convert canonical Messages to Responses API structured input format.
|
|
145
|
+
|
|
146
|
+
Handles tool call history by converting:
|
|
147
|
+
- assistant messages with tool_calls -> assistant message + function_call items
|
|
148
|
+
- tool role messages -> function_call_output items
|
|
149
|
+
- system/user/assistant text -> standard role messages
|
|
150
|
+
- multimodal content -> Responses API format (input_text, input_image)
|
|
151
|
+
"""
|
|
152
|
+
input_items: list[dict[str, Any]] = []
|
|
153
|
+
|
|
154
|
+
for msg in messages:
|
|
155
|
+
content: Any = msg.content
|
|
156
|
+
|
|
157
|
+
# Convert multimodal content to Responses API format
|
|
158
|
+
if isinstance(content, list):
|
|
159
|
+
converted_parts: list[dict[str, Any]] = []
|
|
160
|
+
for item in content:
|
|
161
|
+
if not isinstance(item, dict):
|
|
162
|
+
continue
|
|
163
|
+
if item.get("type") == "text":
|
|
164
|
+
converted_parts.append({"type": "input_text", "text": item.get("text", "")})
|
|
165
|
+
elif item.get("type") == "image_url":
|
|
166
|
+
image_data = item.get("image_url", {})
|
|
167
|
+
url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
|
|
168
|
+
converted_parts.append({"type": "input_image", "image_url": url})
|
|
169
|
+
content = converted_parts if converted_parts else ""
|
|
170
|
+
|
|
171
|
+
if msg.role in ("system", "user"):
|
|
172
|
+
input_items.append({"role": msg.role, "content": content or ""})
|
|
173
|
+
|
|
174
|
+
elif msg.role == "assistant":
|
|
175
|
+
if content:
|
|
176
|
+
input_items.append({"role": "assistant", "content": content})
|
|
177
|
+
# Convert tool_calls to Responses API function_call items
|
|
178
|
+
if msg.tool_calls:
|
|
179
|
+
for tc in msg.tool_calls:
|
|
180
|
+
input_items.append(
|
|
181
|
+
{
|
|
182
|
+
"type": "function_call",
|
|
183
|
+
"call_id": tc.id,
|
|
184
|
+
"name": tc.name,
|
|
185
|
+
"arguments": json.dumps(tc.arguments),
|
|
186
|
+
}
|
|
187
|
+
)
|
|
188
|
+
elif not content:
|
|
189
|
+
input_items.append({"role": "assistant", "content": ""})
|
|
190
|
+
|
|
191
|
+
elif msg.role == "tool":
|
|
192
|
+
# Convert tool result to Responses API function_call_output
|
|
193
|
+
if isinstance(content, (dict, list)):
|
|
194
|
+
output_str = json.dumps(content)
|
|
195
|
+
else:
|
|
196
|
+
output_str = str(content) if content else ""
|
|
197
|
+
input_items.append(
|
|
198
|
+
{
|
|
199
|
+
"type": "function_call_output",
|
|
200
|
+
"call_id": msg.tool_call_id or "",
|
|
201
|
+
"output": output_str,
|
|
202
|
+
}
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
return input_items
|
|
206
|
+
|
|
207
|
+
@staticmethod
|
|
208
|
+
def _convert_tools_for_responses(tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
209
|
+
"""Convert Chat Completions tool format to Responses API format.
|
|
210
|
+
|
|
211
|
+
Chat Completions: {type: "function", function: {name, description, parameters}}
|
|
212
|
+
Responses API: {type: "function", name, description, parameters}
|
|
213
|
+
"""
|
|
214
|
+
responses_tools = []
|
|
215
|
+
for tool in tools:
|
|
216
|
+
if tool.get("type") == "function" and "function" in tool:
|
|
217
|
+
func = tool["function"]
|
|
218
|
+
resp_tool: dict[str, Any] = {
|
|
219
|
+
"type": "function",
|
|
220
|
+
"name": func.get("name"),
|
|
221
|
+
"parameters": func.get("parameters", {}),
|
|
222
|
+
}
|
|
223
|
+
if func.get("description"):
|
|
224
|
+
resp_tool["description"] = func["description"]
|
|
225
|
+
if func.get("strict") is not None:
|
|
226
|
+
resp_tool["strict"] = func["strict"]
|
|
227
|
+
responses_tools.append(resp_tool)
|
|
228
|
+
else:
|
|
229
|
+
responses_tools.append(tool)
|
|
230
|
+
return responses_tools
|
|
231
|
+
|
|
232
|
+
@staticmethod
|
|
233
|
+
def _parse_responses_output(response: Any, model: str) -> CompletionResponse:
|
|
234
|
+
"""Parse Responses API output into canonical CompletionResponse.
|
|
235
|
+
|
|
236
|
+
Extracts text content and tool calls from the response output items.
|
|
237
|
+
Checks response.status for incomplete/truncated responses.
|
|
238
|
+
"""
|
|
239
|
+
text_parts: list[str] = []
|
|
240
|
+
tool_calls: list[ToolCall] = []
|
|
241
|
+
|
|
242
|
+
for item in getattr(response, "output", []):
|
|
243
|
+
item_type = getattr(item, "type", None)
|
|
244
|
+
if item_type == "message":
|
|
245
|
+
for part in getattr(item, "content", []):
|
|
246
|
+
if getattr(part, "type", None) == "output_text":
|
|
247
|
+
text_parts.append(getattr(part, "text", ""))
|
|
248
|
+
elif item_type == "function_call":
|
|
249
|
+
args_raw = getattr(item, "arguments", "{}")
|
|
250
|
+
if isinstance(args_raw, dict):
|
|
251
|
+
arguments = args_raw
|
|
252
|
+
else:
|
|
253
|
+
try:
|
|
254
|
+
arguments = json.loads(args_raw) if args_raw else {}
|
|
255
|
+
except (json.JSONDecodeError, TypeError):
|
|
256
|
+
arguments = {}
|
|
257
|
+
tool_calls.append(
|
|
258
|
+
ToolCall(
|
|
259
|
+
id=getattr(item, "call_id", ""),
|
|
260
|
+
name=getattr(item, "name", ""),
|
|
261
|
+
arguments=arguments,
|
|
262
|
+
)
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
text = "".join(text_parts)
|
|
266
|
+
|
|
267
|
+
usage = None
|
|
268
|
+
resp_usage = getattr(response, "usage", None)
|
|
269
|
+
if resp_usage:
|
|
270
|
+
input_tokens = getattr(resp_usage, "input_tokens", 0) or 0
|
|
271
|
+
output_tokens = getattr(resp_usage, "output_tokens", 0) or 0
|
|
272
|
+
usage = {
|
|
273
|
+
"prompt_tokens": input_tokens,
|
|
274
|
+
"completion_tokens": output_tokens,
|
|
275
|
+
"total_tokens": input_tokens + output_tokens,
|
|
276
|
+
}
|
|
277
|
+
cached = extract_cached_tokens(resp_usage)
|
|
278
|
+
if cached:
|
|
279
|
+
usage["cached_tokens"] = cached
|
|
280
|
+
|
|
281
|
+
status = getattr(response, "status", "completed")
|
|
282
|
+
if status == "incomplete":
|
|
283
|
+
finish_reason = "length"
|
|
284
|
+
elif status in ("failed", "cancelled"):
|
|
285
|
+
finish_reason = "error"
|
|
286
|
+
elif tool_calls:
|
|
287
|
+
finish_reason = "tool_calls"
|
|
288
|
+
else:
|
|
289
|
+
finish_reason = "stop"
|
|
290
|
+
|
|
291
|
+
return CompletionResponse(
|
|
292
|
+
text=text,
|
|
293
|
+
tool_calls=tool_calls if tool_calls else None,
|
|
294
|
+
usage=usage,
|
|
295
|
+
raw={
|
|
296
|
+
"id": getattr(response, "id", f"responses-{int(time.time())}"),
|
|
297
|
+
"object": "responses",
|
|
298
|
+
"model": model,
|
|
299
|
+
"finish_reason": finish_reason,
|
|
300
|
+
},
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
def _message_to_openai(self, msg: Message) -> dict[str, Any]:
|
|
304
|
+
"""Convert canonical Message to OpenAI format."""
|
|
305
|
+
return message_to_openai(msg)
|
|
306
|
+
|
|
307
|
+
def _openai_to_completion(self, response: Any) -> CompletionResponse:
|
|
308
|
+
"""Convert OpenAI response to canonical CompletionResponse."""
|
|
309
|
+
return openai_response_to_completion(response, self._provider)
|
|
310
|
+
|
|
311
|
+
def _build_request_kwargs(
|
|
312
|
+
self,
|
|
313
|
+
messages: list[Message],
|
|
314
|
+
tools: list[dict[str, Any]] | None,
|
|
315
|
+
hyperparams: ModelHyperparameters,
|
|
316
|
+
) -> dict[str, Any]:
|
|
317
|
+
"""Build kwargs for OpenAI chat completion request."""
|
|
318
|
+
kwargs = build_chat_completion_kwargs(self._model, messages, tools, hyperparams)
|
|
319
|
+
|
|
320
|
+
# GPT-5 Chat Completions API doesn't support reasoning_effort with
|
|
321
|
+
# function tools. Runs AFTER extras merge so callers can't reintroduce it.
|
|
322
|
+
if tools and "reasoning_effort" in kwargs and self._is_gpt5_model():
|
|
323
|
+
dropped = kwargs.pop("reasoning_effort")
|
|
324
|
+
logger.warning(
|
|
325
|
+
f"Stripped reasoning_effort={dropped} - "
|
|
326
|
+
f"not supported with function tools on Chat Completions API for {self._model}"
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
return kwargs
|
|
330
|
+
|
|
331
|
+
async def _complete_with_responses_api(
|
|
332
|
+
self,
|
|
333
|
+
client: AsyncOpenAI,
|
|
334
|
+
messages: list[Message],
|
|
335
|
+
hyperparams: ModelHyperparameters,
|
|
336
|
+
tools: list[dict[str, Any]] | None = None,
|
|
337
|
+
) -> CompletionResponse:
|
|
338
|
+
"""Complete using GPT-5 Responses API.
|
|
339
|
+
|
|
340
|
+
The Responses API supports tools, verbosity, and reasoning_effort together.
|
|
341
|
+
This is extracted as a separate method (without retry decorator) so that
|
|
342
|
+
both complete() and stream() can call it without nesting retries (3x3=9).
|
|
343
|
+
"""
|
|
344
|
+
input_items = self._convert_messages_for_responses(messages)
|
|
345
|
+
|
|
346
|
+
# Responses API requires max_output_tokens >= 16
|
|
347
|
+
max_tokens = max(hyperparams.max_tokens, 16)
|
|
348
|
+
|
|
349
|
+
request_params: dict[str, Any] = {
|
|
350
|
+
"model": self._model,
|
|
351
|
+
"input": input_items,
|
|
352
|
+
"max_output_tokens": max_tokens,
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
if hyperparams.verbosity is not None:
|
|
356
|
+
request_params["text"] = {"verbosity": hyperparams.verbosity}
|
|
357
|
+
|
|
358
|
+
if hyperparams.reasoning_effort is not None:
|
|
359
|
+
request_params["reasoning"] = {"effort": hyperparams.reasoning_effort}
|
|
360
|
+
|
|
361
|
+
if hyperparams.temperature is not None:
|
|
362
|
+
request_params["temperature"] = hyperparams.temperature
|
|
363
|
+
|
|
364
|
+
if tools:
|
|
365
|
+
request_params["tools"] = self._convert_tools_for_responses(tools)
|
|
366
|
+
|
|
367
|
+
# Forward extra_headers (e.g., User-Agent from incoming Claude Code request)
|
|
368
|
+
extra_headers = hyperparams.extra.get("openai", {}).get("extra_headers")
|
|
369
|
+
if extra_headers:
|
|
370
|
+
request_params["extra_headers"] = extra_headers
|
|
371
|
+
|
|
372
|
+
tools_log = f", tools={len(tools)}" if tools else ""
|
|
373
|
+
logger.info(
|
|
374
|
+
f"GPT-5 Responses API call: model={self._model}, "
|
|
375
|
+
f"verbosity={hyperparams.verbosity}, reasoning={hyperparams.reasoning_effort}{tools_log}"
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
response = await client.responses.create(**request_params)
|
|
379
|
+
|
|
380
|
+
return self._parse_responses_output(response, self._model)
|
|
381
|
+
|
|
382
|
+
@retry(
|
|
383
|
+
retry=retry_if_exception(lambda e: isinstance(e, Exception) and LiteLLMClient._is_retryable_error(e)),
|
|
384
|
+
stop=stop_after_attempt(3),
|
|
385
|
+
wait=wait_exponential(multiplier=1, min=2, max=10),
|
|
386
|
+
reraise=True,
|
|
387
|
+
)
|
|
388
|
+
async def _make_completion_request(
|
|
389
|
+
self,
|
|
390
|
+
client: AsyncOpenAI,
|
|
391
|
+
messages: list[Message],
|
|
392
|
+
tools: list[dict[str, Any]] | None,
|
|
393
|
+
merged_params: ModelHyperparameters,
|
|
394
|
+
) -> CompletionResponse:
|
|
395
|
+
"""Make the completion request with retry logic.
|
|
396
|
+
|
|
397
|
+
Retry is applied here (not on complete()) so tenacity sees raw
|
|
398
|
+
OpenAI exceptions (RateLimitError, APIStatusError) before they
|
|
399
|
+
are wrapped into ProviderError/AuthenticationError.
|
|
400
|
+
"""
|
|
401
|
+
if self._should_use_responses_api(tools, merged_params):
|
|
402
|
+
return await self._complete_with_responses_api(client, messages, merged_params, tools=tools)
|
|
403
|
+
|
|
404
|
+
kwargs = self._build_request_kwargs(messages, tools, merged_params)
|
|
405
|
+
response = await client.chat.completions.create(**kwargs)
|
|
406
|
+
return self._openai_to_completion(response)
|
|
407
|
+
|
|
408
|
+
async def complete(
|
|
409
|
+
self,
|
|
410
|
+
messages: list[Message],
|
|
411
|
+
*,
|
|
412
|
+
tools: list[dict[str, Any]] | None = None,
|
|
413
|
+
hyperparams: ModelHyperparameters | None = None,
|
|
414
|
+
) -> CompletionResponse:
|
|
415
|
+
"""Non-streaming completion.
|
|
416
|
+
|
|
417
|
+
For GPT-5 models, uses Responses API. Otherwise, uses Chat Completions API.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
messages: List of messages in the conversation.
|
|
421
|
+
tools: Optional list of tool definitions.
|
|
422
|
+
hyperparams: Optional hyperparameters to override defaults.
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
CompletionResponse with text, optional tool_calls, and usage.
|
|
426
|
+
|
|
427
|
+
Raises:
|
|
428
|
+
ProviderError: If the API call fails.
|
|
429
|
+
AuthenticationError: If authentication fails.
|
|
430
|
+
"""
|
|
431
|
+
merged_params = merge_hyperparams(self._default_hyperparams, hyperparams)
|
|
432
|
+
client = await self._get_client()
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
return await self._make_completion_request(client, messages, tools, merged_params)
|
|
436
|
+
except (ProviderError, AuthenticationError):
|
|
437
|
+
# Already wrapped, re-raise as-is
|
|
438
|
+
raise
|
|
439
|
+
except Exception as e:
|
|
440
|
+
error_str = str(e).lower()
|
|
441
|
+
if "authentication" in error_str or "unauthorized" in error_str:
|
|
442
|
+
await self._credentials.invalidate(self._provider)
|
|
443
|
+
await self._close_client()
|
|
444
|
+
raise AuthenticationError(self._provider, str(e)) from e
|
|
445
|
+
raise ProviderError(self._provider, e) from e
|
|
446
|
+
|
|
447
|
+
async def _close_client(self) -> None:
|
|
448
|
+
"""Close and discard the cached HTTP client.
|
|
449
|
+
|
|
450
|
+
Forces credential re-resolution on next request. Especially
|
|
451
|
+
important when a custom httpx.AsyncClient with SSL context was
|
|
452
|
+
created (remote LiteLLM with root CA).
|
|
453
|
+
"""
|
|
454
|
+
client = self._client
|
|
455
|
+
self._client = None
|
|
456
|
+
if client is not None:
|
|
457
|
+
try:
|
|
458
|
+
await client.close()
|
|
459
|
+
except Exception:
|
|
460
|
+
pass
|
|
461
|
+
|
|
462
|
+
async def stream(
|
|
463
|
+
self,
|
|
464
|
+
messages: list[Message],
|
|
465
|
+
*,
|
|
466
|
+
tools: list[dict[str, Any]] | None = None,
|
|
467
|
+
hyperparams: ModelHyperparameters | None = None,
|
|
468
|
+
) -> AsyncGenerator[StreamEvent, None]:
|
|
469
|
+
"""Streaming completion.
|
|
470
|
+
|
|
471
|
+
For GPT-5 models, falls back to non-streaming Responses API
|
|
472
|
+
since it doesn't support streaming.
|
|
473
|
+
|
|
474
|
+
Yields canonical StreamEvent objects. For tool calls, accumulate
|
|
475
|
+
ToolCallDelta events until response_end.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
messages: List of messages in the conversation.
|
|
479
|
+
tools: Optional list of tool definitions.
|
|
480
|
+
hyperparams: Optional hyperparameters to override defaults.
|
|
481
|
+
|
|
482
|
+
Yields:
|
|
483
|
+
StreamEvent objects (text_delta, tool_call_delta, response_end, usage, error).
|
|
484
|
+
"""
|
|
485
|
+
merged_params = merge_hyperparams(self._default_hyperparams, hyperparams)
|
|
486
|
+
client = await self._get_client()
|
|
487
|
+
|
|
488
|
+
# GPT-5 models use Responses API (doesn't support streaming),
|
|
489
|
+
# so we fall back to non-streaming and emit synthetic stream events
|
|
490
|
+
if self._should_use_responses_api(tools, merged_params):
|
|
491
|
+
try:
|
|
492
|
+
logger.info(
|
|
493
|
+
f"GPT-5 Responses API (streaming fallback): model={self._model}, "
|
|
494
|
+
f"verbosity={merged_params.verbosity}"
|
|
495
|
+
)
|
|
496
|
+
response = await self._complete_with_responses_api(client, messages, merged_params, tools=tools)
|
|
497
|
+
|
|
498
|
+
if response.text:
|
|
499
|
+
yield StreamEvent(type="text_delta", text=response.text)
|
|
500
|
+
|
|
501
|
+
# Emit tool call deltas so callers can accumulate them
|
|
502
|
+
if response.tool_calls:
|
|
503
|
+
for i, tc in enumerate(response.tool_calls):
|
|
504
|
+
yield StreamEvent(
|
|
505
|
+
type="tool_call_delta",
|
|
506
|
+
tool_call_delta=ToolCallDelta(
|
|
507
|
+
index=i,
|
|
508
|
+
id=tc.id,
|
|
509
|
+
name=tc.name,
|
|
510
|
+
arguments_json=json.dumps(tc.arguments),
|
|
511
|
+
),
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
if response.usage:
|
|
515
|
+
yield StreamEvent(type="usage", usage=response.usage)
|
|
516
|
+
|
|
517
|
+
yield StreamEvent(
|
|
518
|
+
type="response_end",
|
|
519
|
+
tool_calls=response.tool_calls,
|
|
520
|
+
usage=response.usage,
|
|
521
|
+
)
|
|
522
|
+
return
|
|
523
|
+
|
|
524
|
+
except Exception as e:
|
|
525
|
+
error_str = str(e).lower()
|
|
526
|
+
if "authentication" in error_str or "unauthorized" in error_str:
|
|
527
|
+
await self._credentials.invalidate(self._provider)
|
|
528
|
+
await self._close_client()
|
|
529
|
+
yield StreamEvent(type="error", error=str(e))
|
|
530
|
+
return
|
|
531
|
+
|
|
532
|
+
# Standard Chat Completions API streaming path
|
|
533
|
+
accumulator = ToolCallAccumulator()
|
|
534
|
+
usage_data: dict[str, int] | None = None
|
|
535
|
+
|
|
536
|
+
try:
|
|
537
|
+
kwargs = self._build_request_kwargs(messages, tools, merged_params)
|
|
538
|
+
kwargs["stream"] = True
|
|
539
|
+
kwargs["stream_options"] = {"include_usage": True}
|
|
540
|
+
|
|
541
|
+
stream = await client.chat.completions.create(**kwargs)
|
|
542
|
+
|
|
543
|
+
async for chunk in stream:
|
|
544
|
+
# Handle usage from final chunk
|
|
545
|
+
if chunk.usage:
|
|
546
|
+
usage_data = {
|
|
547
|
+
"prompt_tokens": chunk.usage.prompt_tokens,
|
|
548
|
+
"completion_tokens": chunk.usage.completion_tokens,
|
|
549
|
+
"total_tokens": chunk.usage.total_tokens,
|
|
550
|
+
}
|
|
551
|
+
cached = extract_cached_tokens(chunk.usage)
|
|
552
|
+
if cached:
|
|
553
|
+
usage_data["cached_tokens"] = cached
|
|
554
|
+
|
|
555
|
+
if not chunk.choices:
|
|
556
|
+
continue
|
|
557
|
+
|
|
558
|
+
choice = chunk.choices[0]
|
|
559
|
+
delta = choice.delta
|
|
560
|
+
|
|
561
|
+
if delta.content:
|
|
562
|
+
yield StreamEvent(type="text_delta", text=delta.content)
|
|
563
|
+
|
|
564
|
+
if delta.tool_calls:
|
|
565
|
+
for tc_delta in delta.tool_calls:
|
|
566
|
+
idx = tc_delta.index
|
|
567
|
+
if idx is None and len(delta.tool_calls) == 1:
|
|
568
|
+
idx = accumulator.default_index()
|
|
569
|
+
tool_delta = ToolCallDelta(
|
|
570
|
+
index=idx,
|
|
571
|
+
id=tc_delta.id,
|
|
572
|
+
name=tc_delta.function.name if tc_delta.function else None,
|
|
573
|
+
arguments_json=(tc_delta.function.arguments or "") if tc_delta.function else "",
|
|
574
|
+
)
|
|
575
|
+
accumulator.add_delta(tool_delta)
|
|
576
|
+
yield StreamEvent(type="tool_call_delta", tool_call_delta=tool_delta)
|
|
577
|
+
|
|
578
|
+
if usage_data:
|
|
579
|
+
yield StreamEvent(type="usage", usage=usage_data)
|
|
580
|
+
|
|
581
|
+
final_tool_calls = accumulator.finalize() if accumulator.has_pending() else None
|
|
582
|
+
yield StreamEvent(
|
|
583
|
+
type="response_end",
|
|
584
|
+
tool_calls=final_tool_calls,
|
|
585
|
+
usage=usage_data,
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
except Exception as e:
|
|
589
|
+
error_str = str(e).lower()
|
|
590
|
+
if "authentication" in error_str or "unauthorized" in error_str:
|
|
591
|
+
await self._credentials.invalidate(self._provider)
|
|
592
|
+
await self._close_client()
|
|
593
|
+
yield StreamEvent(type="error", error=str(e))
|
|
594
|
+
|
|
595
|
+
async def count_tokens(
|
|
596
|
+
self,
|
|
597
|
+
messages: list[Message],
|
|
598
|
+
tools: list[dict[str, Any]] | None = None,
|
|
599
|
+
) -> int:
|
|
600
|
+
"""Estimate token count for messages and tools.
|
|
601
|
+
|
|
602
|
+
Uses simple estimation (4 chars per token) since LiteLLM
|
|
603
|
+
doesn't provide a tokenization endpoint.
|
|
604
|
+
|
|
605
|
+
Args:
|
|
606
|
+
messages: List of messages to count.
|
|
607
|
+
tools: Optional list of tool definitions to include in count.
|
|
608
|
+
|
|
609
|
+
Returns:
|
|
610
|
+
Estimated token count.
|
|
611
|
+
"""
|
|
612
|
+
openai_messages = [self._message_to_openai(m) for m in messages]
|
|
613
|
+
total = estimate_message_tokens(openai_messages)
|
|
614
|
+
|
|
615
|
+
if tools:
|
|
616
|
+
tools_json = json.dumps(tools)
|
|
617
|
+
total += len(tools_json) // 4
|
|
618
|
+
|
|
619
|
+
return total
|