multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,1206 @@
|
|
|
1
|
+
"""Format conversion utilities between Anthropic and OpenAI APIs.
|
|
2
|
+
|
|
3
|
+
This module handles the conversion between two API formats:
|
|
4
|
+
1. Anthropic Claude API format (client-facing)
|
|
5
|
+
2. OpenAI format (backend - used by LiteLLM)
|
|
6
|
+
|
|
7
|
+
Conversion Flow:
|
|
8
|
+
- Request: Anthropic → OpenAI
|
|
9
|
+
- Response: OpenAI → Anthropic
|
|
10
|
+
|
|
11
|
+
Key Components:
|
|
12
|
+
- Tool description enhancement with usage examples
|
|
13
|
+
- Streaming and non-streaming response handling
|
|
14
|
+
- Comprehensive tool event logging for diagnostics
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
import traceback
|
|
21
|
+
import uuid
|
|
22
|
+
from typing import Any, AsyncGenerator, Callable, Dict, List, Literal, Optional, Union
|
|
23
|
+
|
|
24
|
+
from forge.proxy.data_models import (
|
|
25
|
+
ContentBlock,
|
|
26
|
+
ContentBlockText,
|
|
27
|
+
ContentBlockToolUse,
|
|
28
|
+
MessagesRequest,
|
|
29
|
+
MessagesResponse,
|
|
30
|
+
Usage,
|
|
31
|
+
)
|
|
32
|
+
from forge.proxy.utils import (
|
|
33
|
+
log_tool_event,
|
|
34
|
+
smart_format_str,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
# on_complete(usage, failed, error_type) -- called when SSE stream finishes
|
|
40
|
+
_OnCompleteCallback = Callable[[Dict[str, int], bool, Optional[str]], None]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Tool parameters that non-Claude models compulsively fill with empty values.
|
|
44
|
+
# Stripped before forwarding to Claude Code to prevent validation errors.
|
|
45
|
+
_STRIP_EMPTY_PARAMS: dict[str, dict[str, tuple[Any, ...]]] = {
|
|
46
|
+
"Read": {
|
|
47
|
+
"pages": (None, "", 0),
|
|
48
|
+
# Claude Code treats these as optional sectioning controls. GPT models
|
|
49
|
+
# often send 0 as a placeholder for "unset", which is better omitted.
|
|
50
|
+
"offset": (None, "", 0),
|
|
51
|
+
"limit": (None, "", 0),
|
|
52
|
+
},
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _is_pdf_path(value: Any) -> bool:
|
|
57
|
+
# Claude Code's Read tool accepts filesystem paths, so extension detection is sufficient here.
|
|
58
|
+
return isinstance(value, str) and value.lower().endswith(".pdf")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _should_buffer_streaming_tool_args(tool_name: str | None) -> bool:
|
|
62
|
+
return tool_name in _STRIP_EMPTY_PARAMS
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def sanitize_tool_input_with_report(tool_name: str, tool_input: dict[str, Any]) -> tuple[dict[str, Any], list[str]]:
|
|
66
|
+
"""Strip optional parameters and report which parameter names were removed.
|
|
67
|
+
|
|
68
|
+
GPT-5.5 fills optional schema fields even when not needed (for example,
|
|
69
|
+
pages="" or pages="1" on Read for non-PDF files). Claude Code rejects
|
|
70
|
+
these, causing an unrecoverable retry loop.
|
|
71
|
+
"""
|
|
72
|
+
params_to_check = _STRIP_EMPTY_PARAMS.get(tool_name)
|
|
73
|
+
if not params_to_check:
|
|
74
|
+
return tool_input, []
|
|
75
|
+
|
|
76
|
+
cleaned = dict(tool_input)
|
|
77
|
+
stripped_params: list[str] = []
|
|
78
|
+
|
|
79
|
+
def strip_param(param: str) -> None:
|
|
80
|
+
if param in cleaned:
|
|
81
|
+
del cleaned[param]
|
|
82
|
+
stripped_params.append(param)
|
|
83
|
+
|
|
84
|
+
if tool_name == "Read" and "pages" in cleaned and not _is_pdf_path(cleaned.get("file_path")):
|
|
85
|
+
strip_param("pages")
|
|
86
|
+
|
|
87
|
+
for param, empty_values in params_to_check.items():
|
|
88
|
+
if param in cleaned and cleaned[param] in empty_values:
|
|
89
|
+
strip_param(param)
|
|
90
|
+
|
|
91
|
+
return cleaned, stripped_params
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def sanitize_tool_input(tool_name: str, tool_input: dict[str, Any]) -> dict[str, Any]:
|
|
95
|
+
"""Strip optional parameters that non-Claude models add compulsively."""
|
|
96
|
+
cleaned, _stripped_params = sanitize_tool_input_with_report(tool_name, tool_input)
|
|
97
|
+
return cleaned
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _sanitize_tool_arguments_json(tool_name: str | None, args_json: str) -> str:
|
|
101
|
+
"""Sanitize complete tool-call JSON arguments while preserving malformed JSON."""
|
|
102
|
+
cleaned_json, _stripped_params = _sanitize_tool_arguments_json_with_report(tool_name, args_json)
|
|
103
|
+
return cleaned_json
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _sanitize_tool_arguments_json_with_report(tool_name: str | None, args_json: str) -> tuple[str, list[str]]:
|
|
107
|
+
"""Sanitize complete tool-call JSON arguments and report stripped parameter names."""
|
|
108
|
+
if not tool_name or tool_name not in _STRIP_EMPTY_PARAMS or not args_json:
|
|
109
|
+
return args_json, []
|
|
110
|
+
try:
|
|
111
|
+
parsed = json.loads(args_json)
|
|
112
|
+
except json.JSONDecodeError:
|
|
113
|
+
return args_json, []
|
|
114
|
+
if not isinstance(parsed, dict):
|
|
115
|
+
return args_json, []
|
|
116
|
+
cleaned, stripped_params = sanitize_tool_input_with_report(tool_name, parsed)
|
|
117
|
+
return json.dumps(cleaned, separators=(",", ":")), stripped_params
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _schedule_tool_args_sanitized_event(
|
|
121
|
+
request_id: str,
|
|
122
|
+
tool_name: str | None,
|
|
123
|
+
stripped_params: list[str],
|
|
124
|
+
*,
|
|
125
|
+
tool_id: str | None,
|
|
126
|
+
streaming: bool,
|
|
127
|
+
block_index: int | None = None,
|
|
128
|
+
) -> None:
|
|
129
|
+
"""Emit debug-only telemetry when proxy sanitization changes model-generated tool args."""
|
|
130
|
+
if not tool_name or not stripped_params:
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
details: dict[str, Any] = {
|
|
134
|
+
"event": "tool_args_sanitized",
|
|
135
|
+
"streaming": streaming,
|
|
136
|
+
"stripped_params": stripped_params,
|
|
137
|
+
}
|
|
138
|
+
if tool_id is not None:
|
|
139
|
+
details["tool_id"] = tool_id
|
|
140
|
+
if block_index is not None:
|
|
141
|
+
details["block_index"] = block_index
|
|
142
|
+
|
|
143
|
+
asyncio.create_task(
|
|
144
|
+
log_tool_event(
|
|
145
|
+
request_id=request_id,
|
|
146
|
+
tool_name=tool_name,
|
|
147
|
+
status="success",
|
|
148
|
+
stage="client_response",
|
|
149
|
+
details=details,
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def enhance_tool_description(tool_name: str, original_description: str, schema: Dict) -> str:
|
|
155
|
+
"""
|
|
156
|
+
Enhance tool descriptions with concrete examples to help Gemini generate proper tool calls.
|
|
157
|
+
|
|
158
|
+
This function adds detailed usage examples for tools that have shown high failure rates
|
|
159
|
+
in client execution reports. Examples are formatted to match the schema structure and
|
|
160
|
+
highlight required parameters.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
tool_name: The name of the tool
|
|
164
|
+
original_description: The original tool description
|
|
165
|
+
schema: The cleaned schema for this tool
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Enhanced description with appropriate usage examples
|
|
169
|
+
"""
|
|
170
|
+
enhanced_description = original_description
|
|
171
|
+
|
|
172
|
+
# Library of tool examples for problematic tools
|
|
173
|
+
if tool_name == "Batch":
|
|
174
|
+
example = (
|
|
175
|
+
"\n\nEXAMPLE USAGE (Always include the invocations array):\n"
|
|
176
|
+
"{\n"
|
|
177
|
+
' "description": "Run multiple tools in parallel",\n'
|
|
178
|
+
' "invocations": [ // REQUIRED: Array of tool invocations to execute\n'
|
|
179
|
+
" {\n"
|
|
180
|
+
' "tool_name": "Read", // Name of the tool to invoke\n'
|
|
181
|
+
' "input": { // Parameters for the tool\n'
|
|
182
|
+
' "file_path": "/path/to/file.txt"\n'
|
|
183
|
+
" }\n"
|
|
184
|
+
" },\n"
|
|
185
|
+
" {\n"
|
|
186
|
+
' "tool_name": "Grep",\n'
|
|
187
|
+
' "input": {\n'
|
|
188
|
+
' "pattern": "search term",\n'
|
|
189
|
+
' "include": "*.py"\n'
|
|
190
|
+
" }\n"
|
|
191
|
+
" }\n"
|
|
192
|
+
" ]\n"
|
|
193
|
+
"}"
|
|
194
|
+
)
|
|
195
|
+
enhanced_description += example
|
|
196
|
+
logger.debug("Enhanced Batch tool description with usage example")
|
|
197
|
+
|
|
198
|
+
elif tool_name == "Edit":
|
|
199
|
+
example = (
|
|
200
|
+
"\n\nEXAMPLE USAGE:\n"
|
|
201
|
+
"{\n"
|
|
202
|
+
' "file_path": "/path/to/file.py", // REQUIRED: Absolute path to the file\n'
|
|
203
|
+
' "old_string": "def old_function(x, y):\\n return x + y", // REQUIRED: Exact text to replace\n'
|
|
204
|
+
' "new_string": "def old_function(x, y):\\n # Add comment\\n return x + y", // REQUIRED: New text\n'
|
|
205
|
+
' "expected_replacements": 1 // Optional: Number of replacements to perform\n'
|
|
206
|
+
"}"
|
|
207
|
+
)
|
|
208
|
+
enhanced_description += example
|
|
209
|
+
logger.debug("Enhanced Edit tool description with usage example")
|
|
210
|
+
|
|
211
|
+
elif tool_name == "Read":
|
|
212
|
+
example = (
|
|
213
|
+
"\n\nEXAMPLE USAGE:\n"
|
|
214
|
+
"{\n"
|
|
215
|
+
' "file_path": "/path/to/file.txt" // REQUIRED: Absolute path to the file\n'
|
|
216
|
+
"}"
|
|
217
|
+
)
|
|
218
|
+
enhanced_description += example
|
|
219
|
+
logger.debug("Enhanced Read tool description with usage example")
|
|
220
|
+
|
|
221
|
+
elif tool_name == "Write":
|
|
222
|
+
example = (
|
|
223
|
+
"\n\nEXAMPLE USAGE:\n"
|
|
224
|
+
"{\n"
|
|
225
|
+
' "file_path": "/path/to/file.txt", // REQUIRED: Absolute path to the file\n'
|
|
226
|
+
' "content": "Contents to write to the file" // REQUIRED: Content to write\n'
|
|
227
|
+
"}"
|
|
228
|
+
)
|
|
229
|
+
enhanced_description += example
|
|
230
|
+
logger.debug("Enhanced Write tool description with usage example")
|
|
231
|
+
|
|
232
|
+
elif tool_name == "Glob":
|
|
233
|
+
example = (
|
|
234
|
+
"\n\nEXAMPLE USAGE:\n"
|
|
235
|
+
"{\n"
|
|
236
|
+
' "pattern": "**/*.py" // REQUIRED: The glob pattern to match files against\n'
|
|
237
|
+
"}"
|
|
238
|
+
)
|
|
239
|
+
enhanced_description += example
|
|
240
|
+
logger.debug("Enhanced Glob tool description with usage example")
|
|
241
|
+
|
|
242
|
+
elif tool_name == "Grep":
|
|
243
|
+
example = (
|
|
244
|
+
"\n\nEXAMPLE USAGE:\n"
|
|
245
|
+
"{\n"
|
|
246
|
+
' "pattern": "function", // REQUIRED: The regex pattern to search for\n'
|
|
247
|
+
' "include": "*.py" // Optional: File pattern to include in search\n'
|
|
248
|
+
"}"
|
|
249
|
+
)
|
|
250
|
+
enhanced_description += example
|
|
251
|
+
logger.debug("Enhanced Grep tool description with usage example")
|
|
252
|
+
|
|
253
|
+
elif tool_name == "MultiEdit":
|
|
254
|
+
example = (
|
|
255
|
+
"\n\n⚠︎ CRITICAL: This is a TOOL CALL, not Python code! DO NOT use print(), default_api, or any Python syntax!\n"
|
|
256
|
+
"✔ CORRECT JSON FORMAT:\n"
|
|
257
|
+
"{\n"
|
|
258
|
+
' "file_path": "/absolute/path/to/file.py",\n'
|
|
259
|
+
' "edits": [\n'
|
|
260
|
+
" {\n"
|
|
261
|
+
' "old_string": "exact text to find",\n'
|
|
262
|
+
' "new_string": "replacement text",\n'
|
|
263
|
+
' "replace_all": false\n'
|
|
264
|
+
" }\n"
|
|
265
|
+
" ]\n"
|
|
266
|
+
"}\n\n"
|
|
267
|
+
"✘ NEVER DO THIS:\n"
|
|
268
|
+
"- print(default_api.MultiEdit(...))\n"
|
|
269
|
+
"- default_api.MultieditEdits(...)\n"
|
|
270
|
+
"- MultiEdit(file_path=..., edits=[...])\n"
|
|
271
|
+
"- Any Python function call syntax\n\n"
|
|
272
|
+
"Remember: You are calling a TOOL via JSON, not writing Python code!"
|
|
273
|
+
)
|
|
274
|
+
enhanced_description += example
|
|
275
|
+
logger.debug("Enhanced MultiEdit tool description with usage example")
|
|
276
|
+
|
|
277
|
+
# Add more tool examples as needed based on failure patterns in logs
|
|
278
|
+
|
|
279
|
+
return enhanced_description
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _should_ignore_tool(tool_name: str) -> bool:
|
|
283
|
+
"""Return True if tool_name matches any configured ignore glob pattern."""
|
|
284
|
+
try:
|
|
285
|
+
from fnmatch import fnmatch
|
|
286
|
+
|
|
287
|
+
from forge.config import config
|
|
288
|
+
|
|
289
|
+
patterns = config.proxy.tool_prefixes_to_ignore
|
|
290
|
+
except Exception as e:
|
|
291
|
+
logger.debug("Cannot load tool ignore config: %s", e)
|
|
292
|
+
return False
|
|
293
|
+
for pattern in patterns:
|
|
294
|
+
if fnmatch(tool_name, pattern):
|
|
295
|
+
return True
|
|
296
|
+
return False
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _model_supports_cache_control(model_name: str) -> bool:
|
|
300
|
+
"""Check if model requires explicit cache_control in requests.
|
|
301
|
+
|
|
302
|
+
Anthropic/Bedrock: requires cache_control on content blocks to enable caching.
|
|
303
|
+
OpenAI/Deepseek: automatic caching (≥1024 tokens), no field needed.
|
|
304
|
+
Gemini: separate Context Caching API (not supported here).
|
|
305
|
+
|
|
306
|
+
For non-Anthropic models, cache_control is silently stripped to avoid 400 errors.
|
|
307
|
+
"""
|
|
308
|
+
if not model_name:
|
|
309
|
+
return False
|
|
310
|
+
name = model_name.lower()
|
|
311
|
+
return "anthropic/" in name or "claude" in name or "bedrock/anthropic" in name
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def convert_anthropic_to_openai(request: MessagesRequest, provider: str = "gemini") -> Dict[str, Any]:
|
|
315
|
+
"""Convert Anthropic API request to intermediate OpenAI format.
|
|
316
|
+
|
|
317
|
+
Transforms Anthropic's message-based format into an OpenAI format that's
|
|
318
|
+
easier to process before final conversion to provider-specific format. Handles system messages,
|
|
319
|
+
content blocks, tool calls/results, and various parameter conversions.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
request: The validated Anthropic API request with messages and parameters
|
|
323
|
+
provider: Target provider ("gemini", "openai", "litellm") - affects schema normalization
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Dict[str, Any]: Request in OpenAI-compatible format with mapped parameters
|
|
327
|
+
"""
|
|
328
|
+
openai_messages = []
|
|
329
|
+
|
|
330
|
+
# system_cache_control is preserved and forwarded for Anthropic models only
|
|
331
|
+
system_text = None
|
|
332
|
+
system_cache_control = None
|
|
333
|
+
|
|
334
|
+
if request.system:
|
|
335
|
+
if isinstance(request.system, str):
|
|
336
|
+
system_text = request.system
|
|
337
|
+
else:
|
|
338
|
+
text_parts = []
|
|
339
|
+
for block in request.system:
|
|
340
|
+
if block.type == "text":
|
|
341
|
+
text_parts.append(block.text)
|
|
342
|
+
if block.cache_control and _model_supports_cache_control(request.model):
|
|
343
|
+
system_cache_control = {"type": block.cache_control.type}
|
|
344
|
+
system_text = "\n".join(text_parts) if text_parts else None
|
|
345
|
+
|
|
346
|
+
if system_text:
|
|
347
|
+
if provider in ("openai", "litellm", "openrouter"):
|
|
348
|
+
# Auto-inject cache_control if configured and no explicit cache_control
|
|
349
|
+
if not system_cache_control and _model_supports_cache_control(request.model):
|
|
350
|
+
try:
|
|
351
|
+
from forge.config import config as forge_config
|
|
352
|
+
|
|
353
|
+
provider_cfg = forge_config.proxy.get_provider(forge_config.proxy.preferred_provider)
|
|
354
|
+
if provider_cfg.prompt_caching == "auto_inject":
|
|
355
|
+
estimated_tokens = len(system_text) // 4
|
|
356
|
+
if estimated_tokens >= provider_cfg.auto_cache_min_tokens:
|
|
357
|
+
system_cache_control = {"type": "ephemeral"}
|
|
358
|
+
logger.debug(
|
|
359
|
+
f"Auto-injected cache_control for system prompt "
|
|
360
|
+
f"(~{estimated_tokens} tokens >= {provider_cfg.auto_cache_min_tokens})"
|
|
361
|
+
)
|
|
362
|
+
except RuntimeError:
|
|
363
|
+
logger.debug("Config not loaded, skipping cache_control auto-injection")
|
|
364
|
+
|
|
365
|
+
# Use content block array when cache_control present (Anthropic requirement)
|
|
366
|
+
if system_cache_control:
|
|
367
|
+
system_content = [
|
|
368
|
+
{
|
|
369
|
+
"type": "text",
|
|
370
|
+
"text": system_text,
|
|
371
|
+
"cache_control": system_cache_control,
|
|
372
|
+
}
|
|
373
|
+
]
|
|
374
|
+
openai_messages.append({"role": "system", "content": system_content})
|
|
375
|
+
else:
|
|
376
|
+
openai_messages.append({"role": "system", "content": system_text})
|
|
377
|
+
|
|
378
|
+
logger.debug(
|
|
379
|
+
f"System prompt added as message for {provider}"
|
|
380
|
+
+ (" with cache_control" if system_cache_control else "")
|
|
381
|
+
)
|
|
382
|
+
else:
|
|
383
|
+
# For Gemini: store separately
|
|
384
|
+
logger.debug("System prompt extracted for Vertex SDK.")
|
|
385
|
+
else:
|
|
386
|
+
system_text = None # Ensure it's None if empty
|
|
387
|
+
|
|
388
|
+
for msg in request.messages:
|
|
389
|
+
is_tool_response_message = False
|
|
390
|
+
content_list = []
|
|
391
|
+
tool_calls_list: list[Dict[str, Any]] = []
|
|
392
|
+
|
|
393
|
+
if isinstance(msg.content, str):
|
|
394
|
+
content_list.append({"type": "text", "text": msg.content})
|
|
395
|
+
elif isinstance(msg.content, list):
|
|
396
|
+
for block in msg.content: # type: ignore[assignment] # Pydantic ContentBlock union
|
|
397
|
+
if block.type in ("thinking", "redacted_thinking"):
|
|
398
|
+
# Anthropic thinking blocks appear in --resume history;
|
|
399
|
+
# non-Anthropic providers don't support them — strip for conversion.
|
|
400
|
+
logger.debug("Stripping %s block (unsupported by target provider)", block.type)
|
|
401
|
+
continue
|
|
402
|
+
if block.type == "text":
|
|
403
|
+
text_block: Dict[str, Any] = {"type": "text", "text": block.text}
|
|
404
|
+
if block.cache_control and _model_supports_cache_control(request.model):
|
|
405
|
+
text_block["cache_control"] = {"type": block.cache_control.type}
|
|
406
|
+
content_list.append(text_block)
|
|
407
|
+
elif block.type == "image" and msg.role == "user": # Images only supported for user role
|
|
408
|
+
content_list.append(
|
|
409
|
+
{
|
|
410
|
+
"type": "image_url",
|
|
411
|
+
"image_url": {"url": f"data:{block.source.media_type};base64,{block.source.data}"},
|
|
412
|
+
}
|
|
413
|
+
)
|
|
414
|
+
logger.debug("Image block added to intermediate format.")
|
|
415
|
+
elif block.type == "tool_use" and msg.role == "assistant":
|
|
416
|
+
cleaned_input = (
|
|
417
|
+
sanitize_tool_input(block.name, block.input) if isinstance(block.input, dict) else block.input
|
|
418
|
+
)
|
|
419
|
+
tool_calls_list.append(
|
|
420
|
+
{
|
|
421
|
+
"id": block.id,
|
|
422
|
+
"type": "function",
|
|
423
|
+
"function": {
|
|
424
|
+
"name": block.name,
|
|
425
|
+
"arguments": json.dumps(cleaned_input),
|
|
426
|
+
},
|
|
427
|
+
}
|
|
428
|
+
)
|
|
429
|
+
logger.debug(f"Assistant tool_use '{block.name}' converted to intermediate tool_calls.")
|
|
430
|
+
elif block.type == "tool_result" and msg.role == "user":
|
|
431
|
+
if content_list:
|
|
432
|
+
openai_messages.append({"role": "user", "content": content_list})
|
|
433
|
+
content_list = []
|
|
434
|
+
|
|
435
|
+
tool_content = block.content
|
|
436
|
+
# Ensure content is a string (JSON if possible) for OpenAI format
|
|
437
|
+
if not isinstance(tool_content, str):
|
|
438
|
+
try:
|
|
439
|
+
tool_content = json.dumps(tool_content)
|
|
440
|
+
except Exception:
|
|
441
|
+
tool_content = str(tool_content) # Fallback to string representation
|
|
442
|
+
|
|
443
|
+
openai_messages.append(
|
|
444
|
+
{
|
|
445
|
+
"role": "tool",
|
|
446
|
+
"tool_call_id": block.tool_use_id,
|
|
447
|
+
"content": tool_content,
|
|
448
|
+
}
|
|
449
|
+
)
|
|
450
|
+
logger.debug(f"User tool_result for '{block.tool_use_id}' converted to intermediate tool message.")
|
|
451
|
+
is_tool_response_message = True
|
|
452
|
+
# Don't break - process all tool_result blocks in this message
|
|
453
|
+
|
|
454
|
+
# Flush any remaining content after tool_result blocks
|
|
455
|
+
if is_tool_response_message and content_list:
|
|
456
|
+
openai_messages.append({"role": "user", "content": content_list})
|
|
457
|
+
content_list = []
|
|
458
|
+
|
|
459
|
+
if not is_tool_response_message:
|
|
460
|
+
openai_message: Dict[str, Any] = {"role": msg.role}
|
|
461
|
+
# Simplify content if only text AND no extra metadata (like cache_control)
|
|
462
|
+
first_item = content_list[0] if len(content_list) == 1 else None
|
|
463
|
+
if (
|
|
464
|
+
isinstance(first_item, dict)
|
|
465
|
+
and first_item.get("type") == "text"
|
|
466
|
+
and set(first_item.keys()) == {"type", "text"}
|
|
467
|
+
):
|
|
468
|
+
openai_message["content"] = first_item.get("text", "")
|
|
469
|
+
elif content_list: # Keep as list for multimodal or when metadata present
|
|
470
|
+
openai_message["content"] = content_list
|
|
471
|
+
else:
|
|
472
|
+
openai_message["content"] = None # Or empty string ""? Let's use None for clarity
|
|
473
|
+
|
|
474
|
+
if tool_calls_list:
|
|
475
|
+
openai_message["tool_calls"] = tool_calls_list
|
|
476
|
+
|
|
477
|
+
if openai_message.get("content") is not None or openai_message.get("tool_calls"):
|
|
478
|
+
openai_messages.append(openai_message)
|
|
479
|
+
elif msg.role == "assistant" and not openai_message.get("content") and not openai_message.get("tool_calls"):
|
|
480
|
+
# Handle case where assistant message might be empty (e.g., after tool call)
|
|
481
|
+
# OpenAI format expects content: null or content: ""
|
|
482
|
+
openai_message["content"] = ""
|
|
483
|
+
openai_messages.append(openai_message)
|
|
484
|
+
|
|
485
|
+
# --- Assemble OpenAI Request Dictionary ---
|
|
486
|
+
# Note: request.model already contains the *mapped* Gemini ID from the validator
|
|
487
|
+
openai_request = {
|
|
488
|
+
"model": request.model,
|
|
489
|
+
"messages": openai_messages,
|
|
490
|
+
"max_tokens": request.max_tokens,
|
|
491
|
+
"stream": request.stream or False,
|
|
492
|
+
}
|
|
493
|
+
if request.temperature is not None:
|
|
494
|
+
openai_request["temperature"] = request.temperature
|
|
495
|
+
if request.top_p is not None:
|
|
496
|
+
openai_request["top_p"] = request.top_p
|
|
497
|
+
if request.top_k is not None:
|
|
498
|
+
openai_request["top_k"] = request.top_k
|
|
499
|
+
if request.stop_sequences:
|
|
500
|
+
openai_request["stop"] = request.stop_sequences
|
|
501
|
+
if request.metadata:
|
|
502
|
+
openai_request["metadata"] = request.metadata
|
|
503
|
+
|
|
504
|
+
if system_text:
|
|
505
|
+
openai_request["system_prompt"] = system_text
|
|
506
|
+
|
|
507
|
+
if request.tools:
|
|
508
|
+
openai_tools = []
|
|
509
|
+
ignored_tool_names = []
|
|
510
|
+
for tool in request.tools:
|
|
511
|
+
if _should_ignore_tool(tool.name):
|
|
512
|
+
ignored_tool_names.append(tool.name)
|
|
513
|
+
continue
|
|
514
|
+
|
|
515
|
+
input_schema = tool.input_schema.model_dump(exclude_unset=True)
|
|
516
|
+
logger.debug(f"Cleaning schema for intermediate tool format: {tool.name}")
|
|
517
|
+
logger.debug(f"Original schema for tool '{tool.name}': {smart_format_str(input_schema)}")
|
|
518
|
+
|
|
519
|
+
tool_schema_details = {
|
|
520
|
+
"tool_name": tool.name,
|
|
521
|
+
"original_schema": input_schema,
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
# Pass through original schema (no normalization needed for OpenAI/LiteLLM)
|
|
525
|
+
cleaned_schema = input_schema
|
|
526
|
+
logger.debug(f"[{provider.upper()}] Using original schema for tool '{tool.name}'")
|
|
527
|
+
asyncio.create_task(
|
|
528
|
+
log_tool_event(
|
|
529
|
+
request_id="schema_" + str(uuid.uuid4())[:8],
|
|
530
|
+
tool_name=tool.name,
|
|
531
|
+
status="attempt",
|
|
532
|
+
stage="openai_request",
|
|
533
|
+
details=tool_schema_details,
|
|
534
|
+
)
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
# Default to an empty object schema when unspecified.
|
|
538
|
+
if "type" not in cleaned_schema:
|
|
539
|
+
cleaned_schema["type"] = "object"
|
|
540
|
+
logger.debug(f"Added missing 'type': 'object' to schema root for tool '{tool.name}'")
|
|
541
|
+
if cleaned_schema.get("type") == "object" and "properties" not in cleaned_schema:
|
|
542
|
+
cleaned_schema["properties"] = {}
|
|
543
|
+
logger.debug(f"Added missing empty 'properties' object for tool '{tool.name}'")
|
|
544
|
+
|
|
545
|
+
enhanced_description = enhance_tool_description(tool.name, tool.description or "", cleaned_schema)
|
|
546
|
+
|
|
547
|
+
openai_tools.append(
|
|
548
|
+
{
|
|
549
|
+
"type": "function",
|
|
550
|
+
"function": {
|
|
551
|
+
"name": tool.name,
|
|
552
|
+
"description": enhanced_description,
|
|
553
|
+
"parameters": cleaned_schema,
|
|
554
|
+
},
|
|
555
|
+
}
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
if openai_tools:
|
|
559
|
+
openai_request["tools"] = openai_tools
|
|
560
|
+
logger.debug(f"Converted {len(openai_tools)} tools to intermediate OpenAI format.")
|
|
561
|
+
|
|
562
|
+
if len(ignored_tool_names) > 0:
|
|
563
|
+
logger.info(f"Skipping {len(ignored_tool_names)} tool(s) due to TOOL_PREFIXES_TO_IGNORE")
|
|
564
|
+
ignored_names = ", ".join(ignored_tool_names)
|
|
565
|
+
logger.debug(f"Skipped tool(s): {ignored_names}")
|
|
566
|
+
|
|
567
|
+
# Note: Vertex has a different `tool_config`, this mapping might be approximate
|
|
568
|
+
if request.tool_choice:
|
|
569
|
+
choice_type = request.tool_choice.get("type")
|
|
570
|
+
if choice_type == "any" or choice_type == "auto":
|
|
571
|
+
openai_request["tool_choice"] = "auto"
|
|
572
|
+
elif choice_type == "tool" and "name" in request.tool_choice:
|
|
573
|
+
openai_request["tool_choice"] = {
|
|
574
|
+
"type": "function",
|
|
575
|
+
"function": {"name": request.tool_choice["name"]},
|
|
576
|
+
}
|
|
577
|
+
else: # Includes 'none' or other types
|
|
578
|
+
openai_request["tool_choice"] = "none"
|
|
579
|
+
logger.debug(f"Converted tool_choice '{choice_type}' to intermediate format '{openai_request['tool_choice']}'.")
|
|
580
|
+
|
|
581
|
+
logger.debug(f"Intermediate OpenAI Request Prepared: {smart_format_str(openai_request)}")
|
|
582
|
+
return openai_request
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def convert_openai_to_anthropic(
|
|
586
|
+
response_chunk: Union[Dict, Any], original_model_name: Optional[str] = None
|
|
587
|
+
) -> Optional[MessagesResponse]:
|
|
588
|
+
"""Convert OpenAI-format response to Anthropic API response format.
|
|
589
|
+
|
|
590
|
+
Transforms a completed (non-streaming) response from the intermediate OpenAI
|
|
591
|
+
format back to the Anthropic API response format expected by Claude clients.
|
|
592
|
+
Handles content blocks, tool calls, and finish reason mapping.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
response_chunk: Response in OpenAI format from the intermediate conversion
|
|
596
|
+
original_model_name: The original Claude model name requested by the client
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
Optional[MessagesResponse]: Response in Anthropic format, or None if conversion fails
|
|
600
|
+
"""
|
|
601
|
+
request_id = response_chunk.get("request_id", "unknown") # Get request ID if passed through
|
|
602
|
+
logger.info(f"[{request_id}] Converting adapted OpenAI response to Anthropic MessagesResponse format.")
|
|
603
|
+
try:
|
|
604
|
+
# Ensure input is a dictionary
|
|
605
|
+
resp_dict = {}
|
|
606
|
+
if isinstance(response_chunk, dict):
|
|
607
|
+
resp_dict = response_chunk
|
|
608
|
+
elif hasattr(response_chunk, "model_dump"):
|
|
609
|
+
resp_dict = response_chunk.model_dump()
|
|
610
|
+
else:
|
|
611
|
+
try:
|
|
612
|
+
resp_dict = vars(response_chunk) # Fallback for simple objects
|
|
613
|
+
except TypeError as e:
|
|
614
|
+
logger.error(f"[{request_id}] Cannot convert response_chunk of type {type(response_chunk)} to dict.")
|
|
615
|
+
raise ValueError(
|
|
616
|
+
"Input response_chunk is not convertible to dict.",
|
|
617
|
+
) from e
|
|
618
|
+
|
|
619
|
+
resp_id = resp_dict.get("id") or f"msg_{uuid.uuid4().hex[:24]}"
|
|
620
|
+
choices = resp_dict.get("choices", [])
|
|
621
|
+
usage_data = resp_dict.get("usage", {}) or {}
|
|
622
|
+
|
|
623
|
+
anthropic_content: List[ContentBlock] = []
|
|
624
|
+
stop_reason_map = {
|
|
625
|
+
"stop": "end_turn",
|
|
626
|
+
"length": "max_tokens",
|
|
627
|
+
"tool_calls": "tool_use",
|
|
628
|
+
"content_filter": "content_filtered",
|
|
629
|
+
}
|
|
630
|
+
openai_finish_reason = "stop" # Default
|
|
631
|
+
|
|
632
|
+
if choices:
|
|
633
|
+
choice = choices[0] # Assume only one choice
|
|
634
|
+
openai_finish_reason = choice.get("finish_reason", "stop")
|
|
635
|
+
message = choice.get("message", {}) or {}
|
|
636
|
+
|
|
637
|
+
text_content = message.get("content")
|
|
638
|
+
tool_calls = message.get("tool_calls")
|
|
639
|
+
|
|
640
|
+
if text_content and isinstance(text_content, str):
|
|
641
|
+
anthropic_content.append(ContentBlockText(type="text", text=text_content))
|
|
642
|
+
logger.debug(f"[{request_id}] Added text content block.")
|
|
643
|
+
|
|
644
|
+
if tool_calls and isinstance(tool_calls, list):
|
|
645
|
+
for tc in tool_calls:
|
|
646
|
+
if isinstance(tc, dict) and tc.get("type") == "function":
|
|
647
|
+
func = tc.get("function", {})
|
|
648
|
+
args_str = func.get("arguments", "{}")
|
|
649
|
+
tool_id = tc.get("id", f"toolu_{uuid.uuid4().hex[:12]}")
|
|
650
|
+
tool_name = func.get("name", "unknown_tool")
|
|
651
|
+
|
|
652
|
+
try:
|
|
653
|
+
args_input = json.loads(args_str)
|
|
654
|
+
stripped_params: list[str] = []
|
|
655
|
+
if isinstance(args_input, dict):
|
|
656
|
+
args_input, stripped_params = sanitize_tool_input_with_report(tool_name, args_input)
|
|
657
|
+
_schedule_tool_args_sanitized_event(
|
|
658
|
+
request_id,
|
|
659
|
+
tool_name,
|
|
660
|
+
stripped_params,
|
|
661
|
+
tool_id=tool_id,
|
|
662
|
+
streaming=False,
|
|
663
|
+
)
|
|
664
|
+
except json.JSONDecodeError:
|
|
665
|
+
logger.warning(
|
|
666
|
+
f"[{request_id}] Non-streaming: Failed to parse tool arguments JSON: {args_str}. Sending raw string."
|
|
667
|
+
)
|
|
668
|
+
args_input = {"raw_arguments": args_str}
|
|
669
|
+
except Exception as e:
|
|
670
|
+
logger.error(
|
|
671
|
+
f"[{request_id}] Non-streaming: Error parsing tool arguments: {e}. Args: {args_str}"
|
|
672
|
+
)
|
|
673
|
+
args_input = {
|
|
674
|
+
"error_parsing_arguments": str(e),
|
|
675
|
+
"raw_arguments": args_str,
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
anthropic_content.append(
|
|
679
|
+
ContentBlockToolUse(
|
|
680
|
+
type="tool_use",
|
|
681
|
+
id=tool_id,
|
|
682
|
+
name=tool_name,
|
|
683
|
+
input=args_input,
|
|
684
|
+
)
|
|
685
|
+
)
|
|
686
|
+
logger.debug(f"[{request_id}] Added tool_use content block: id={tool_id}, name={tool_name}")
|
|
687
|
+
|
|
688
|
+
asyncio.create_task(
|
|
689
|
+
log_tool_event(
|
|
690
|
+
request_id=request_id,
|
|
691
|
+
tool_name=tool_name,
|
|
692
|
+
status="success",
|
|
693
|
+
stage="client_response",
|
|
694
|
+
details={"tool_id": tool_id, "streaming": False},
|
|
695
|
+
)
|
|
696
|
+
)
|
|
697
|
+
else:
|
|
698
|
+
logger.warning(
|
|
699
|
+
f"[{request_id}] Skipping conversion of non-function tool_call in response: {tc}"
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
# Ensure there's always at least one content block (even if empty text)
|
|
703
|
+
# Anthropic requires content to be a non-empty list.
|
|
704
|
+
if not anthropic_content:
|
|
705
|
+
logger.warning(f"[{request_id}] No content generated, adding empty text block.")
|
|
706
|
+
anthropic_content.append(ContentBlockText(type="text", text=""))
|
|
707
|
+
|
|
708
|
+
anthropic_stop_reason = stop_reason_map.get(openai_finish_reason, "end_turn")
|
|
709
|
+
logger.debug(
|
|
710
|
+
f"[{request_id}] Mapped finish_reason '{openai_finish_reason}' to stop_reason '{anthropic_stop_reason}'."
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
model_name = original_model_name if original_model_name else "claude-3.7-sonnet"
|
|
714
|
+
|
|
715
|
+
return MessagesResponse(
|
|
716
|
+
id=resp_id,
|
|
717
|
+
model=model_name,
|
|
718
|
+
type="message",
|
|
719
|
+
role="assistant",
|
|
720
|
+
content=anthropic_content,
|
|
721
|
+
stop_reason=anthropic_stop_reason, # type: ignore[arg-type] # values from controlled stop_reason_map
|
|
722
|
+
stop_sequence=None, # not returned in OpenAI format
|
|
723
|
+
usage=Usage(
|
|
724
|
+
input_tokens=usage_data.get("prompt_tokens", 0),
|
|
725
|
+
output_tokens=usage_data.get("completion_tokens", 0),
|
|
726
|
+
),
|
|
727
|
+
)
|
|
728
|
+
except Exception as e:
|
|
729
|
+
logger.error(
|
|
730
|
+
f"[{request_id}] Failed to convert adapted OpenAI response to Anthropic format: {e}",
|
|
731
|
+
exc_info=True,
|
|
732
|
+
)
|
|
733
|
+
model_name = original_model_name if original_model_name else "claude-3.7-sonnet"
|
|
734
|
+
|
|
735
|
+
return MessagesResponse(
|
|
736
|
+
id=f"error_{uuid.uuid4().hex[:24]}",
|
|
737
|
+
model=model_name,
|
|
738
|
+
type="message",
|
|
739
|
+
role="assistant",
|
|
740
|
+
content=[ContentBlockText(type="text", text=f"Error processing model response: {str(e)}")],
|
|
741
|
+
stop_reason="end_turn", # Or maybe a custom error reason?
|
|
742
|
+
usage=Usage(input_tokens=0, output_tokens=0),
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
async def convert_openai_to_anthropic_sse(
|
|
747
|
+
response_generator: AsyncGenerator[Dict[str, Any], None],
|
|
748
|
+
request: MessagesRequest,
|
|
749
|
+
request_id: str,
|
|
750
|
+
on_complete: Optional["_OnCompleteCallback"] = None,
|
|
751
|
+
):
|
|
752
|
+
"""Convert OpenAI streaming format to Anthropic Server-Sent Events (SSE) format.
|
|
753
|
+
|
|
754
|
+
Transforms a stream of OpenAI-format chunks into the Anthropic streaming format
|
|
755
|
+
using Server-Sent Events. Handles the complex event structure required by Anthropic:
|
|
756
|
+
- message_start/stop events
|
|
757
|
+
- content_block_start/stop events
|
|
758
|
+
- content_block_delta events
|
|
759
|
+
- message_delta events with finish information
|
|
760
|
+
- ping events for connection maintenance
|
|
761
|
+
|
|
762
|
+
Args:
|
|
763
|
+
response_generator: Async generator yielding OpenAI-format response chunks
|
|
764
|
+
request: The original MessagesRequest from the client
|
|
765
|
+
request_id: Unique identifier for logging and tracking this request
|
|
766
|
+
|
|
767
|
+
Yields:
|
|
768
|
+
SSE-formatted text chunks following the Anthropic streaming protocol
|
|
769
|
+
"""
|
|
770
|
+
message_id = f"msg_{uuid.uuid4().hex[:24]}"
|
|
771
|
+
response_model_name = request.original_model_name or request.model # fallback to mapped ID if original is missing
|
|
772
|
+
logger.info(
|
|
773
|
+
f"[{request_id}] Starting Anthropic SSE stream conversion (message {message_id}, model: {response_model_name})"
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
# --- Stream Initialization ---
|
|
777
|
+
start_event_data = {
|
|
778
|
+
"type": "message_start",
|
|
779
|
+
"message": {
|
|
780
|
+
"id": message_id,
|
|
781
|
+
"type": "message",
|
|
782
|
+
"role": "assistant",
|
|
783
|
+
"model": response_model_name,
|
|
784
|
+
"content": [], # Content starts empty
|
|
785
|
+
"stop_reason": None,
|
|
786
|
+
"stop_sequence": None,
|
|
787
|
+
"usage": {"input_tokens": 0, "output_tokens": 0},
|
|
788
|
+
},
|
|
789
|
+
}
|
|
790
|
+
yield f"event: message_start\ndata: {json.dumps(start_event_data)}\n\n"
|
|
791
|
+
logger.debug(f"[{request_id}] Sent message_start")
|
|
792
|
+
|
|
793
|
+
yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n"
|
|
794
|
+
logger.debug(f"[{request_id}] Sent initial ping")
|
|
795
|
+
|
|
796
|
+
# --- Stream Processing ---
|
|
797
|
+
content_block_index = -1
|
|
798
|
+
current_block_type: Optional[Literal["text", "tool_use"]] = None
|
|
799
|
+
text_started = False
|
|
800
|
+
tool_calls_buffer: Dict[int, Dict[str, Any]] = (
|
|
801
|
+
{}
|
|
802
|
+
) # {openai_tc_index: {id: str, name: str, args: str, block_idx: int}}
|
|
803
|
+
final_usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "cached_tokens": 0}
|
|
804
|
+
_stream_failed = False
|
|
805
|
+
_stream_error_type: Optional[str] = None
|
|
806
|
+
final_stop_reason: Optional[str] = None
|
|
807
|
+
|
|
808
|
+
def _build_sanitized_args_delta(tool_info: Dict[str, Any]) -> dict[str, Any] | None:
|
|
809
|
+
block_idx = tool_info.get("block_idx")
|
|
810
|
+
args_json = tool_info.get("args", "")
|
|
811
|
+
if block_idx is None or not args_json:
|
|
812
|
+
return None
|
|
813
|
+
partial_json, stripped_params = _sanitize_tool_arguments_json_with_report(tool_info.get("name"), args_json)
|
|
814
|
+
if not partial_json:
|
|
815
|
+
return None
|
|
816
|
+
_schedule_tool_args_sanitized_event(
|
|
817
|
+
request_id,
|
|
818
|
+
tool_info.get("name"),
|
|
819
|
+
stripped_params,
|
|
820
|
+
tool_id=tool_info.get("id"),
|
|
821
|
+
streaming=True,
|
|
822
|
+
block_index=block_idx,
|
|
823
|
+
)
|
|
824
|
+
return {
|
|
825
|
+
"type": "content_block_delta",
|
|
826
|
+
"index": block_idx,
|
|
827
|
+
"delta": {
|
|
828
|
+
"type": "input_json_delta",
|
|
829
|
+
"partial_json": partial_json,
|
|
830
|
+
},
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
def _build_tool_block_close_events() -> list[tuple[str, dict[str, Any]]]:
|
|
834
|
+
events: list[tuple[str, dict[str, Any]]] = []
|
|
835
|
+
started_tools = [
|
|
836
|
+
tool_info for tool_info in tool_calls_buffer.values() if tool_info.get("block_idx") is not None
|
|
837
|
+
]
|
|
838
|
+
for tool_info in sorted(started_tools, key=lambda item: item["block_idx"]):
|
|
839
|
+
if tool_info.get("buffer_args"):
|
|
840
|
+
args_delta_event = _build_sanitized_args_delta(tool_info)
|
|
841
|
+
if args_delta_event is not None:
|
|
842
|
+
events.append(("content_block_delta", args_delta_event))
|
|
843
|
+
events.append(
|
|
844
|
+
(
|
|
845
|
+
"content_block_stop",
|
|
846
|
+
{
|
|
847
|
+
"type": "content_block_stop",
|
|
848
|
+
"index": tool_info["block_idx"],
|
|
849
|
+
},
|
|
850
|
+
)
|
|
851
|
+
)
|
|
852
|
+
return events
|
|
853
|
+
|
|
854
|
+
stop_reason_map = {
|
|
855
|
+
"stop": "end_turn",
|
|
856
|
+
"length": "max_tokens",
|
|
857
|
+
"tool_calls": "tool_use",
|
|
858
|
+
"content_filter": "content_filtered",
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
try:
|
|
862
|
+
async for chunk in response_generator:
|
|
863
|
+
logger.debug(f"[{request_id}] Processing adapted OpenAI Chunk: {chunk}")
|
|
864
|
+
|
|
865
|
+
if not isinstance(chunk, dict):
|
|
866
|
+
logger.warning(f"[{request_id}] Skipping invalid chunk format: {type(chunk)}")
|
|
867
|
+
continue
|
|
868
|
+
|
|
869
|
+
# Handle error chunks from stream generator.
|
|
870
|
+
# stream_generator() catches ToolCallError/ProxyStreamError and yields
|
|
871
|
+
# error dicts instead of raising — so no exception reaches the except
|
|
872
|
+
# block below. We must set the failure flag here for metrics.
|
|
873
|
+
if "error" in chunk:
|
|
874
|
+
error_data = chunk["error"]
|
|
875
|
+
_stream_failed = True
|
|
876
|
+
_stream_error_type = error_data.get("type", "stream_error")
|
|
877
|
+
error_event = {
|
|
878
|
+
"type": "error",
|
|
879
|
+
"error": {
|
|
880
|
+
"type": error_data.get("type", "api_error"),
|
|
881
|
+
"message": error_data.get("message", "Unknown streaming error"),
|
|
882
|
+
},
|
|
883
|
+
}
|
|
884
|
+
yield f"event: error\ndata: {json.dumps(error_event)}\n\n"
|
|
885
|
+
return # End stream after error
|
|
886
|
+
|
|
887
|
+
# --- Check for usage-only chunk (LiteLLM sends usage in chunk with empty choices) ---
|
|
888
|
+
chunk_usage = chunk.get("usage")
|
|
889
|
+
if chunk_usage and isinstance(chunk_usage, dict):
|
|
890
|
+
prompt_tokens = chunk_usage.get("prompt_tokens", 0)
|
|
891
|
+
completion_tokens = chunk_usage.get("completion_tokens", 0)
|
|
892
|
+
|
|
893
|
+
if prompt_tokens > 0 and final_usage["input_tokens"] == 0:
|
|
894
|
+
# First time seeing input_tokens - send immediately
|
|
895
|
+
final_usage["input_tokens"] = prompt_tokens
|
|
896
|
+
usage_update_event = {
|
|
897
|
+
"type": "message_delta",
|
|
898
|
+
"delta": {},
|
|
899
|
+
"usage": {"input_tokens": prompt_tokens},
|
|
900
|
+
}
|
|
901
|
+
yield f"event: message_delta\ndata: {json.dumps(usage_update_event)}\n\n"
|
|
902
|
+
logger.debug(f"[{request_id}] Sent immediate message_delta with input_tokens={prompt_tokens}")
|
|
903
|
+
|
|
904
|
+
if completion_tokens > 0:
|
|
905
|
+
final_usage["output_tokens"] = completion_tokens
|
|
906
|
+
logger.debug(f"[{request_id}] Updated output_tokens={completion_tokens}")
|
|
907
|
+
|
|
908
|
+
# Accumulate cached_tokens (propagated from client_adapter since Step 2)
|
|
909
|
+
cached_tokens = chunk_usage.get("cached_tokens", 0)
|
|
910
|
+
if cached_tokens > 0:
|
|
911
|
+
final_usage["cached_tokens"] = cached_tokens
|
|
912
|
+
|
|
913
|
+
logger.debug(f"[{request_id}] Updated usage from chunk: {final_usage}")
|
|
914
|
+
|
|
915
|
+
choices = chunk.get("choices", [])
|
|
916
|
+
if not choices or not isinstance(choices, list):
|
|
917
|
+
# Skip chunk if no choices AND no usage (truly empty chunk)
|
|
918
|
+
if not chunk_usage:
|
|
919
|
+
logger.warning(f"[{request_id}] Skipping chunk with missing or invalid 'choices': {chunk}")
|
|
920
|
+
continue
|
|
921
|
+
|
|
922
|
+
if len(choices) == 0:
|
|
923
|
+
# Empty choices is OK if we just processed usage
|
|
924
|
+
if chunk_usage:
|
|
925
|
+
logger.debug(f"[{request_id}] Processed usage-only chunk (empty choices)")
|
|
926
|
+
continue
|
|
927
|
+
else:
|
|
928
|
+
logger.warning(f"[{request_id}] Skipping chunk with empty 'choices' list: {chunk}")
|
|
929
|
+
continue
|
|
930
|
+
|
|
931
|
+
choice = choices[0]
|
|
932
|
+
|
|
933
|
+
if not isinstance(choice, dict):
|
|
934
|
+
logger.warning(
|
|
935
|
+
f"[{request_id}] Skipping chunk with invalid choice format (type={type(choice)}): {choice}"
|
|
936
|
+
)
|
|
937
|
+
continue
|
|
938
|
+
|
|
939
|
+
delta = choice.get("delta", {}) or {}
|
|
940
|
+
finish_reason = choice.get("finish_reason")
|
|
941
|
+
|
|
942
|
+
# --- Process Delta Content ---
|
|
943
|
+
text_delta = delta.get("content")
|
|
944
|
+
tool_calls_delta = delta.get("tool_calls")
|
|
945
|
+
|
|
946
|
+
if text_delta and isinstance(text_delta, str):
|
|
947
|
+
# If currently in a tool_use block, stop it first
|
|
948
|
+
if current_block_type == "tool_use":
|
|
949
|
+
if tool_calls_buffer:
|
|
950
|
+
for event_name, event_data in _build_tool_block_close_events():
|
|
951
|
+
yield f"event: {event_name}\ndata: {json.dumps(event_data)}\n\n"
|
|
952
|
+
tool_calls_buffer.clear()
|
|
953
|
+
logger.debug(f"[{request_id}] Stopped tool block(s) due to incoming text.")
|
|
954
|
+
else:
|
|
955
|
+
logger.warning(
|
|
956
|
+
f"[{request_id}] current_block_type is 'tool_use' but tool_calls_buffer is empty"
|
|
957
|
+
)
|
|
958
|
+
current_block_type = None
|
|
959
|
+
|
|
960
|
+
if not text_started:
|
|
961
|
+
content_block_index += 1
|
|
962
|
+
current_block_type = "text"
|
|
963
|
+
text_started = True
|
|
964
|
+
start_event = {
|
|
965
|
+
"type": "content_block_start",
|
|
966
|
+
"index": content_block_index,
|
|
967
|
+
"content_block": {
|
|
968
|
+
"type": "text",
|
|
969
|
+
"text": "",
|
|
970
|
+
},
|
|
971
|
+
}
|
|
972
|
+
yield f"event: content_block_start\ndata: {json.dumps(start_event)}\n\n"
|
|
973
|
+
logger.debug(f"[{request_id}] Started text block {content_block_index}")
|
|
974
|
+
|
|
975
|
+
delta_event = {
|
|
976
|
+
"type": "content_block_delta",
|
|
977
|
+
"index": content_block_index,
|
|
978
|
+
"delta": {"type": "text_delta", "text": text_delta},
|
|
979
|
+
}
|
|
980
|
+
yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
981
|
+
logger.debug(f"[{request_id}] Sent text delta: '{text_delta[:50]}...'")
|
|
982
|
+
|
|
983
|
+
if tool_calls_delta and isinstance(tool_calls_delta, list):
|
|
984
|
+
logger.debug(f"[{request_id}] Received tool_calls_delta: {tool_calls_delta}")
|
|
985
|
+
if current_block_type == "text" and text_started:
|
|
986
|
+
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
987
|
+
logger.debug(f"[{request_id}] Stopped text block {content_block_index} due to incoming tool call.")
|
|
988
|
+
current_block_type = None
|
|
989
|
+
text_started = False
|
|
990
|
+
|
|
991
|
+
for tc_delta in tool_calls_delta:
|
|
992
|
+
if not isinstance(tc_delta, dict):
|
|
993
|
+
continue # Skip invalid format
|
|
994
|
+
|
|
995
|
+
# OpenAI tool index (usually 0 for the first tool, 1 for second, etc.)
|
|
996
|
+
# We rely on this index to aggregate arguments for the *same* tool call.
|
|
997
|
+
tc_openai_index = tc_delta.get("index", 0)
|
|
998
|
+
tc_id = tc_delta.get("id")
|
|
999
|
+
func_delta = tc_delta.get("function", {}) or {}
|
|
1000
|
+
func_name = func_delta.get("name")
|
|
1001
|
+
args_delta = func_delta.get("arguments")
|
|
1002
|
+
|
|
1003
|
+
# --- Start a new tool_use block if necessary ---
|
|
1004
|
+
if tc_openai_index not in tool_calls_buffer:
|
|
1005
|
+
if tc_id and func_name:
|
|
1006
|
+
content_block_index += 1
|
|
1007
|
+
current_block_type = "tool_use"
|
|
1008
|
+
tool_calls_buffer[tc_openai_index] = {
|
|
1009
|
+
"id": tc_id,
|
|
1010
|
+
"name": func_name,
|
|
1011
|
+
"args": "",
|
|
1012
|
+
"block_idx": content_block_index,
|
|
1013
|
+
"buffer_args": _should_buffer_streaming_tool_args(func_name),
|
|
1014
|
+
}
|
|
1015
|
+
start_event = {
|
|
1016
|
+
"type": "content_block_start",
|
|
1017
|
+
"index": content_block_index,
|
|
1018
|
+
"content_block": {
|
|
1019
|
+
"type": "tool_use",
|
|
1020
|
+
"id": tc_id,
|
|
1021
|
+
"name": func_name,
|
|
1022
|
+
"input": {},
|
|
1023
|
+
},
|
|
1024
|
+
}
|
|
1025
|
+
yield f"event: content_block_start\ndata: {json.dumps(start_event)}\n\n"
|
|
1026
|
+
logger.debug(
|
|
1027
|
+
f"[{request_id}] Started tool_use block {content_block_index} (id: {tc_id}, name: {func_name})"
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
# Log successful tool event for client in streaming
|
|
1031
|
+
asyncio.create_task(
|
|
1032
|
+
log_tool_event(
|
|
1033
|
+
request_id=request_id,
|
|
1034
|
+
tool_name=func_name,
|
|
1035
|
+
status="success",
|
|
1036
|
+
stage="client_response",
|
|
1037
|
+
details={
|
|
1038
|
+
"tool_id": tc_id,
|
|
1039
|
+
"streaming": True,
|
|
1040
|
+
"block_index": content_block_index,
|
|
1041
|
+
},
|
|
1042
|
+
)
|
|
1043
|
+
)
|
|
1044
|
+
# ID can arrive before name in some providers; buffer until name arrives
|
|
1045
|
+
elif tc_id and not func_name:
|
|
1046
|
+
tool_calls_buffer[tc_openai_index] = {
|
|
1047
|
+
"id": tc_id,
|
|
1048
|
+
"name": None,
|
|
1049
|
+
"args": "",
|
|
1050
|
+
"block_idx": None,
|
|
1051
|
+
"buffer_args": False,
|
|
1052
|
+
}
|
|
1053
|
+
logger.debug(
|
|
1054
|
+
f"[{request_id}] Received tool ID {tc_id} first for index {tc_openai_index}, waiting for name."
|
|
1055
|
+
)
|
|
1056
|
+
else:
|
|
1057
|
+
logger.warning(
|
|
1058
|
+
f"[{request_id}] Cannot start tool block for index {tc_openai_index} without ID and/or Name. Delta: {tc_delta}"
|
|
1059
|
+
)
|
|
1060
|
+
continue # Cannot start block yet
|
|
1061
|
+
|
|
1062
|
+
# --- If name arrives later for an existing ID ---
|
|
1063
|
+
elif (
|
|
1064
|
+
tc_openai_index in tool_calls_buffer
|
|
1065
|
+
and func_name
|
|
1066
|
+
and tool_calls_buffer[tc_openai_index]["name"] is None
|
|
1067
|
+
):
|
|
1068
|
+
tool_info = tool_calls_buffer[tc_openai_index]
|
|
1069
|
+
if tool_info["id"] == tc_id: # Ensure ID matches if provided again
|
|
1070
|
+
content_block_index += 1
|
|
1071
|
+
current_block_type = "tool_use"
|
|
1072
|
+
tool_info["name"] = func_name
|
|
1073
|
+
tool_info["block_idx"] = content_block_index
|
|
1074
|
+
tool_info["buffer_args"] = _should_buffer_streaming_tool_args(func_name)
|
|
1075
|
+
start_event = {
|
|
1076
|
+
"type": "content_block_start",
|
|
1077
|
+
"index": content_block_index,
|
|
1078
|
+
"content_block": {
|
|
1079
|
+
"type": "tool_use",
|
|
1080
|
+
"id": tool_info["id"],
|
|
1081
|
+
"name": func_name,
|
|
1082
|
+
"input": {},
|
|
1083
|
+
},
|
|
1084
|
+
}
|
|
1085
|
+
yield f"event: content_block_start\ndata: {json.dumps(start_event)}\n\n"
|
|
1086
|
+
logger.debug(
|
|
1087
|
+
f"[{request_id}] Started tool_use block {content_block_index} for index {tc_openai_index} after receiving name ({func_name})"
|
|
1088
|
+
)
|
|
1089
|
+
else:
|
|
1090
|
+
logger.warning(
|
|
1091
|
+
f"[{request_id}] Received name '{func_name}' for index {tc_openai_index}, but ID mismatch (expected {tool_info['id']}, got {tc_id}). Skipping."
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
# --- Append argument fragments if block has started ---
|
|
1095
|
+
if (
|
|
1096
|
+
tc_openai_index in tool_calls_buffer
|
|
1097
|
+
and args_delta
|
|
1098
|
+
and tool_calls_buffer[tc_openai_index]["block_idx"] is not None
|
|
1099
|
+
):
|
|
1100
|
+
tool_info = tool_calls_buffer[tc_openai_index]
|
|
1101
|
+
tool_info["args"] += args_delta
|
|
1102
|
+
if tool_info.get("buffer_args"):
|
|
1103
|
+
logger.debug(
|
|
1104
|
+
f"[{request_id}] Buffered tool args delta for block {tool_info['block_idx']}: '{args_delta[:50]}...'"
|
|
1105
|
+
)
|
|
1106
|
+
else:
|
|
1107
|
+
delta_event = {
|
|
1108
|
+
"type": "content_block_delta",
|
|
1109
|
+
"index": tool_info["block_idx"],
|
|
1110
|
+
"delta": {
|
|
1111
|
+
"type": "input_json_delta",
|
|
1112
|
+
"partial_json": args_delta,
|
|
1113
|
+
},
|
|
1114
|
+
}
|
|
1115
|
+
yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
1116
|
+
logger.debug(
|
|
1117
|
+
f"[{request_id}] Sent tool args delta for block {tool_info['block_idx']}: '{args_delta[:50]}...'"
|
|
1118
|
+
)
|
|
1119
|
+
|
|
1120
|
+
# --- Process Finish Reason ---
|
|
1121
|
+
if finish_reason:
|
|
1122
|
+
final_stop_reason = stop_reason_map.get(finish_reason, "end_turn")
|
|
1123
|
+
logger.info(
|
|
1124
|
+
f"[{request_id}] Received final finish_reason: '{finish_reason}' -> Mapped to stop_reason: '{final_stop_reason}'"
|
|
1125
|
+
)
|
|
1126
|
+
break
|
|
1127
|
+
|
|
1128
|
+
# --- End of Stream ---
|
|
1129
|
+
if current_block_type == "text" and text_started:
|
|
1130
|
+
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
1131
|
+
logger.debug(f"[{request_id}] Stopped final text block {content_block_index}")
|
|
1132
|
+
elif current_block_type == "tool_use":
|
|
1133
|
+
if tool_calls_buffer:
|
|
1134
|
+
for event_name, event_data in _build_tool_block_close_events():
|
|
1135
|
+
logger.debug(f"[{request_id}] Yielding {event_name} for tool_use: {json.dumps(event_data)}")
|
|
1136
|
+
yield f"event: {event_name}\ndata: {json.dumps(event_data)}\n\n"
|
|
1137
|
+
logger.debug(f"[{request_id}] Stopped final tool_use block(s)")
|
|
1138
|
+
else:
|
|
1139
|
+
logger.warning(
|
|
1140
|
+
f"[{request_id}] Current block type is tool_use, but buffer is empty. Cannot stop block."
|
|
1141
|
+
)
|
|
1142
|
+
|
|
1143
|
+
if final_stop_reason is None:
|
|
1144
|
+
logger.warning(
|
|
1145
|
+
f"[{request_id}] Stream finished without receiving a finish_reason. Defaulting to 'end_turn'."
|
|
1146
|
+
)
|
|
1147
|
+
final_stop_reason = "end_turn"
|
|
1148
|
+
|
|
1149
|
+
final_delta_event = {
|
|
1150
|
+
"type": "message_delta",
|
|
1151
|
+
"delta": {
|
|
1152
|
+
"stop_reason": final_stop_reason,
|
|
1153
|
+
"stop_sequence": None, # not returned in OpenAI stream format
|
|
1154
|
+
},
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
# Only include usage if we have valid data (not zeros)
|
|
1158
|
+
# Sending zeros overwrites any previously displayed usage in Claude Code UI
|
|
1159
|
+
input_tokens = final_usage.get("input_tokens", 0)
|
|
1160
|
+
output_tokens = final_usage.get("output_tokens", 0)
|
|
1161
|
+
if input_tokens > 0 or output_tokens > 0:
|
|
1162
|
+
final_delta_event["usage"] = {
|
|
1163
|
+
"input_tokens": input_tokens,
|
|
1164
|
+
"output_tokens": output_tokens,
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
logger.debug(f"[{request_id}] Yielding final message_delta: {json.dumps(final_delta_event)}")
|
|
1168
|
+
yield f"event: message_delta\ndata: {json.dumps(final_delta_event)}\n\n"
|
|
1169
|
+
logger.debug(
|
|
1170
|
+
f"[{request_id}] Sent final message_delta (stop_reason: {final_stop_reason}, "
|
|
1171
|
+
f"usage: {final_delta_event.get('usage', 'not included')})"
|
|
1172
|
+
)
|
|
1173
|
+
|
|
1174
|
+
stop_event_data = {"type": "message_stop"}
|
|
1175
|
+
logger.debug(f"[{request_id}] Yielding message_stop: {json.dumps(stop_event_data)}")
|
|
1176
|
+
yield f"event: message_stop\ndata: {json.dumps(stop_event_data)}\n\n"
|
|
1177
|
+
logger.debug(f"[{request_id}] Sent message_stop")
|
|
1178
|
+
|
|
1179
|
+
except Exception as e:
|
|
1180
|
+
_stream_failed = True
|
|
1181
|
+
_stream_error_type = "internal_error"
|
|
1182
|
+
logger.error(
|
|
1183
|
+
f"[{request_id}] Error during Anthropic SSE stream conversion: {e}, "
|
|
1184
|
+
f"Full traceback:\n{traceback.format_exc()}"
|
|
1185
|
+
)
|
|
1186
|
+
try:
|
|
1187
|
+
error_payload = {
|
|
1188
|
+
"type": "error",
|
|
1189
|
+
"error": {
|
|
1190
|
+
"type": "internal_server_error",
|
|
1191
|
+
"message": "Stream processing error",
|
|
1192
|
+
},
|
|
1193
|
+
}
|
|
1194
|
+
yield f"event: error\ndata: {json.dumps(error_payload)}\n\n"
|
|
1195
|
+
# Always send message_stop after an error
|
|
1196
|
+
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
|
1197
|
+
logger.debug(f"[{request_id}] Sent error event and message_stop after exception.")
|
|
1198
|
+
except Exception as e2:
|
|
1199
|
+
logger.error(f"[{request_id}] Failed to send error event to client: {e2}")
|
|
1200
|
+
finally:
|
|
1201
|
+
logger.info(f"[{request_id}] Anthropic SSE stream conversion finished.")
|
|
1202
|
+
if on_complete is not None:
|
|
1203
|
+
try:
|
|
1204
|
+
on_complete(final_usage, _stream_failed, _stream_error_type)
|
|
1205
|
+
except Exception:
|
|
1206
|
+
logger.debug(f"[{request_id}] on_complete callback failed", exc_info=True)
|