multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""Shared helpers for OpenAI-compatible LLM clients.
|
|
2
|
+
|
|
3
|
+
Used by both LiteLLMClient and OpenRouterClient. Extracted here so the
|
|
4
|
+
OpenRouter client has no import dependency on LiteLLM.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from openai import APIError, APIStatusError, RateLimitError
|
|
12
|
+
|
|
13
|
+
from ..errors import ProviderError
|
|
14
|
+
from ..types import (
|
|
15
|
+
CompletionResponse,
|
|
16
|
+
Message,
|
|
17
|
+
ModelHyperparameters,
|
|
18
|
+
ToolCall,
|
|
19
|
+
ToolCallDelta,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def is_retryable_error(error: Exception) -> bool:
|
|
26
|
+
"""Return True if the error should trigger tenacity retry.
|
|
27
|
+
|
|
28
|
+
Only retries transient errors (rate limits, server errors).
|
|
29
|
+
Auth failures (401/403) are excluded -- retrying with the same
|
|
30
|
+
bad credentials just adds ~14s of delay.
|
|
31
|
+
"""
|
|
32
|
+
if isinstance(error, APIStatusError):
|
|
33
|
+
return error.status_code not in (400, 401, 403)
|
|
34
|
+
if isinstance(error, (RateLimitError, APIError)):
|
|
35
|
+
return True
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def extract_cached_tokens(usage: object) -> int:
|
|
40
|
+
"""Extract cached_tokens from a usage object's prompt_tokens_details.
|
|
41
|
+
|
|
42
|
+
LiteLLM and OpenRouter pass through provider cache metrics in
|
|
43
|
+
``usage.prompt_tokens_details.cached_tokens``. The field may be an
|
|
44
|
+
object (SDK model) or a plain dict depending on the response path.
|
|
45
|
+
|
|
46
|
+
Returns 0 if no cache data is present.
|
|
47
|
+
"""
|
|
48
|
+
prompt_details = getattr(usage, "prompt_tokens_details", None)
|
|
49
|
+
if prompt_details is None and isinstance(usage, dict):
|
|
50
|
+
prompt_details = usage.get("prompt_tokens_details")
|
|
51
|
+
if not prompt_details:
|
|
52
|
+
return 0
|
|
53
|
+
if isinstance(prompt_details, dict):
|
|
54
|
+
raw = prompt_details.get("cached_tokens", 0) or 0
|
|
55
|
+
else:
|
|
56
|
+
raw = getattr(prompt_details, "cached_tokens", 0) or 0
|
|
57
|
+
return int(raw)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def message_to_openai(msg: Message) -> dict[str, Any]:
|
|
61
|
+
"""Convert canonical Message to OpenAI chat completion format."""
|
|
62
|
+
result: dict[str, Any] = {"role": msg.role, "content": msg.content}
|
|
63
|
+
|
|
64
|
+
if msg.tool_call_id:
|
|
65
|
+
result["tool_call_id"] = msg.tool_call_id
|
|
66
|
+
|
|
67
|
+
if msg.tool_calls:
|
|
68
|
+
result["tool_calls"] = [
|
|
69
|
+
{
|
|
70
|
+
"id": tc.id,
|
|
71
|
+
"type": "function",
|
|
72
|
+
"function": {
|
|
73
|
+
"name": tc.name,
|
|
74
|
+
"arguments": json.dumps(tc.arguments),
|
|
75
|
+
},
|
|
76
|
+
}
|
|
77
|
+
for tc in msg.tool_calls
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
return result
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def build_chat_completion_kwargs(
|
|
84
|
+
model: str,
|
|
85
|
+
messages: list[Message],
|
|
86
|
+
tools: list[dict[str, Any]] | None,
|
|
87
|
+
hyperparams: ModelHyperparameters,
|
|
88
|
+
) -> dict[str, Any]:
|
|
89
|
+
"""Build kwargs for OpenAI chat.completions.create()."""
|
|
90
|
+
kwargs: dict[str, Any] = {
|
|
91
|
+
"model": model,
|
|
92
|
+
"messages": [message_to_openai(m) for m in messages],
|
|
93
|
+
"max_tokens": hyperparams.max_tokens,
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if hyperparams.temperature is not None:
|
|
97
|
+
kwargs["temperature"] = hyperparams.temperature
|
|
98
|
+
|
|
99
|
+
if hyperparams.top_p is not None:
|
|
100
|
+
kwargs["top_p"] = hyperparams.top_p
|
|
101
|
+
|
|
102
|
+
if hyperparams.reasoning_effort is not None:
|
|
103
|
+
kwargs["reasoning_effort"] = hyperparams.reasoning_effort
|
|
104
|
+
|
|
105
|
+
if hyperparams.verbosity is not None:
|
|
106
|
+
kwargs["verbosity"] = hyperparams.verbosity
|
|
107
|
+
|
|
108
|
+
if tools:
|
|
109
|
+
kwargs["tools"] = tools
|
|
110
|
+
|
|
111
|
+
if "openai" in hyperparams.extra:
|
|
112
|
+
kwargs.update(hyperparams.extra["openai"])
|
|
113
|
+
|
|
114
|
+
return kwargs
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def openai_response_to_completion(response: Any, provider: str) -> CompletionResponse:
|
|
118
|
+
"""Convert OpenAI ChatCompletion response to canonical CompletionResponse."""
|
|
119
|
+
if hasattr(response, "error") and response.error:
|
|
120
|
+
error_msg = response.error.get("message", "Unknown error")
|
|
121
|
+
error_code = response.error.get("code", "unknown")
|
|
122
|
+
raise ProviderError(
|
|
123
|
+
provider,
|
|
124
|
+
Exception(f"API error (code={error_code}): {error_msg}"),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if not response.choices:
|
|
128
|
+
raise ProviderError(
|
|
129
|
+
provider,
|
|
130
|
+
Exception("No choices in response"),
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
choice = response.choices[0]
|
|
134
|
+
message = choice.message
|
|
135
|
+
|
|
136
|
+
text = message.content or ""
|
|
137
|
+
|
|
138
|
+
tool_calls = None
|
|
139
|
+
if message.tool_calls:
|
|
140
|
+
tool_calls = []
|
|
141
|
+
for tc in message.tool_calls:
|
|
142
|
+
try:
|
|
143
|
+
arguments = json.loads(tc.function.arguments)
|
|
144
|
+
except json.JSONDecodeError:
|
|
145
|
+
arguments = {}
|
|
146
|
+
tool_calls.append(
|
|
147
|
+
ToolCall(
|
|
148
|
+
id=tc.id,
|
|
149
|
+
name=tc.function.name,
|
|
150
|
+
arguments=arguments,
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
usage = None
|
|
155
|
+
if response.usage:
|
|
156
|
+
usage = {
|
|
157
|
+
"prompt_tokens": response.usage.prompt_tokens,
|
|
158
|
+
"completion_tokens": response.usage.completion_tokens,
|
|
159
|
+
"total_tokens": response.usage.total_tokens,
|
|
160
|
+
}
|
|
161
|
+
cached = extract_cached_tokens(response.usage)
|
|
162
|
+
if cached:
|
|
163
|
+
usage["cached_tokens"] = cached
|
|
164
|
+
|
|
165
|
+
return CompletionResponse(
|
|
166
|
+
text=text,
|
|
167
|
+
tool_calls=tool_calls,
|
|
168
|
+
usage=usage,
|
|
169
|
+
raw=response.model_dump(),
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class ToolCallAccumulator:
|
|
174
|
+
"""Accumulates streaming tool call deltas into complete ToolCalls.
|
|
175
|
+
|
|
176
|
+
During streaming, tool calls arrive as fragments (id, name, argument chunks).
|
|
177
|
+
OpenAI sends `id` only on the first chunk; subsequent chunks use `index`
|
|
178
|
+
to correlate. This accumulator uses index-based lookup to handle both.
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
def __init__(self) -> None:
|
|
182
|
+
self._pending: dict[int, ToolCallDelta] = {}
|
|
183
|
+
|
|
184
|
+
def add_delta(self, delta: ToolCallDelta) -> None:
|
|
185
|
+
"""Add a streaming delta to the accumulator."""
|
|
186
|
+
idx = delta.index
|
|
187
|
+
if idx is None:
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
if idx not in self._pending:
|
|
191
|
+
self._pending[idx] = ToolCallDelta(index=idx)
|
|
192
|
+
|
|
193
|
+
existing = self._pending[idx]
|
|
194
|
+
if delta.id:
|
|
195
|
+
existing.id = delta.id
|
|
196
|
+
if delta.name:
|
|
197
|
+
existing.name = delta.name
|
|
198
|
+
existing.arguments_json += delta.arguments_json
|
|
199
|
+
|
|
200
|
+
def finalize(self) -> list[ToolCall]:
|
|
201
|
+
"""Parse accumulated deltas into complete ToolCalls.
|
|
202
|
+
|
|
203
|
+
Returns tool calls sorted by index for deterministic ordering.
|
|
204
|
+
"""
|
|
205
|
+
result = []
|
|
206
|
+
for idx in sorted(self._pending):
|
|
207
|
+
delta = self._pending[idx]
|
|
208
|
+
if delta.id and delta.name:
|
|
209
|
+
try:
|
|
210
|
+
arguments = json.loads(delta.arguments_json) if delta.arguments_json else {}
|
|
211
|
+
except json.JSONDecodeError:
|
|
212
|
+
logger.warning(f"Failed to parse tool call arguments: {delta.arguments_json}")
|
|
213
|
+
arguments = {}
|
|
214
|
+
|
|
215
|
+
result.append(
|
|
216
|
+
ToolCall(
|
|
217
|
+
id=delta.id,
|
|
218
|
+
name=delta.name,
|
|
219
|
+
arguments=arguments,
|
|
220
|
+
)
|
|
221
|
+
)
|
|
222
|
+
elif delta.arguments_json:
|
|
223
|
+
logger.warning(
|
|
224
|
+
f"Dropping incomplete tool call at index {idx}: "
|
|
225
|
+
f"id={delta.id}, name={delta.name}, args_len={len(delta.arguments_json)}"
|
|
226
|
+
)
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
def has_pending(self) -> bool:
|
|
230
|
+
"""Check if there are any pending tool calls."""
|
|
231
|
+
return len(self._pending) > 0
|
|
232
|
+
|
|
233
|
+
def default_index(self) -> int | None:
|
|
234
|
+
"""Suggest an index for an unindexed single-tool delta.
|
|
235
|
+
|
|
236
|
+
Returns 0 when no calls are pending (first tool call), the sole
|
|
237
|
+
pending index when exactly one exists (continuation), or None
|
|
238
|
+
when multiple calls are pending (ambiguous -- caller should drop).
|
|
239
|
+
"""
|
|
240
|
+
if len(self._pending) == 0:
|
|
241
|
+
return 0
|
|
242
|
+
if len(self._pending) == 1:
|
|
243
|
+
return next(iter(self._pending))
|
|
244
|
+
return None
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""OpenRouter client implementation.
|
|
2
|
+
|
|
3
|
+
Uses OpenAI SDK to call OpenRouter's API directly (no LiteLLM).
|
|
4
|
+
OpenRouter is OpenAI-compatible, so this is a thin wrapper that adds
|
|
5
|
+
OpenRouter-specific headers and translates parameters to OpenRouter's format.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any, AsyncGenerator
|
|
11
|
+
|
|
12
|
+
from openai import AsyncOpenAI
|
|
13
|
+
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential
|
|
14
|
+
|
|
15
|
+
from ..credentials import CredentialManager
|
|
16
|
+
from ..detection import ProviderType
|
|
17
|
+
from ..errors import AuthenticationError, ProviderError
|
|
18
|
+
from ..types import (
|
|
19
|
+
CompletionResponse,
|
|
20
|
+
Message,
|
|
21
|
+
ModelHyperparameters,
|
|
22
|
+
StreamEvent,
|
|
23
|
+
ToolCallDelta,
|
|
24
|
+
)
|
|
25
|
+
from .base import estimate_message_tokens, merge_hyperparams
|
|
26
|
+
from .openai_compat import (
|
|
27
|
+
ToolCallAccumulator,
|
|
28
|
+
build_chat_completion_kwargs,
|
|
29
|
+
extract_cached_tokens,
|
|
30
|
+
is_retryable_error,
|
|
31
|
+
message_to_openai,
|
|
32
|
+
openai_response_to_completion,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class OpenRouterClient:
|
|
39
|
+
"""OpenRouter client using OpenAI SDK.
|
|
40
|
+
|
|
41
|
+
Calls OpenRouter's API directly at https://openrouter.ai/api/v1.
|
|
42
|
+
Uses Chat Completions only (no Responses API).
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
model: str,
|
|
48
|
+
provider: ProviderType,
|
|
49
|
+
credentials: CredentialManager | None = None,
|
|
50
|
+
default_hyperparams: ModelHyperparameters | None = None,
|
|
51
|
+
) -> None:
|
|
52
|
+
self._model = model
|
|
53
|
+
self._provider = provider
|
|
54
|
+
self._credentials = credentials or CredentialManager.default()
|
|
55
|
+
self._default_hyperparams = default_hyperparams
|
|
56
|
+
self._client: AsyncOpenAI | None = None
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def model(self) -> str:
|
|
60
|
+
return self._model
|
|
61
|
+
|
|
62
|
+
async def _get_client(self) -> AsyncOpenAI:
|
|
63
|
+
if self._client is not None:
|
|
64
|
+
return self._client
|
|
65
|
+
|
|
66
|
+
creds = await self._credentials.get_credentials(self._provider)
|
|
67
|
+
|
|
68
|
+
self._client = AsyncOpenAI(
|
|
69
|
+
api_key=creds["api_key"],
|
|
70
|
+
base_url=creds["base_url"],
|
|
71
|
+
default_headers=creds.get("extra_headers", {}),
|
|
72
|
+
)
|
|
73
|
+
return self._client
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def _translate_params(kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
77
|
+
"""Translate Forge params to OpenRouter's API format.
|
|
78
|
+
|
|
79
|
+
OpenRouter uses ``reasoning: {effort: ...}`` (object) and top-level
|
|
80
|
+
``verbosity``, passed via ``extra_body`` since the OpenAI SDK does not
|
|
81
|
+
accept them as direct kwargs.
|
|
82
|
+
"""
|
|
83
|
+
extra_body: dict[str, Any] = kwargs.pop("extra_body", None) or {}
|
|
84
|
+
effort = kwargs.pop("reasoning_effort", None)
|
|
85
|
+
if effort is not None:
|
|
86
|
+
extra_body["reasoning"] = {"effort": effort}
|
|
87
|
+
verbosity = kwargs.pop("verbosity", None)
|
|
88
|
+
if verbosity is not None:
|
|
89
|
+
extra_body["verbosity"] = verbosity
|
|
90
|
+
if extra_body:
|
|
91
|
+
kwargs["extra_body"] = extra_body
|
|
92
|
+
return kwargs
|
|
93
|
+
|
|
94
|
+
_is_retryable_error = staticmethod(is_retryable_error)
|
|
95
|
+
|
|
96
|
+
@retry(
|
|
97
|
+
retry=retry_if_exception(lambda e: isinstance(e, Exception) and is_retryable_error(e)),
|
|
98
|
+
stop=stop_after_attempt(3),
|
|
99
|
+
wait=wait_exponential(multiplier=1, min=2, max=10),
|
|
100
|
+
reraise=True,
|
|
101
|
+
)
|
|
102
|
+
async def _make_completion_request(
|
|
103
|
+
self,
|
|
104
|
+
client: AsyncOpenAI,
|
|
105
|
+
messages: list[Message],
|
|
106
|
+
tools: list[dict[str, Any]] | None,
|
|
107
|
+
merged_params: ModelHyperparameters,
|
|
108
|
+
) -> CompletionResponse:
|
|
109
|
+
kwargs = build_chat_completion_kwargs(self._model, messages, tools, merged_params)
|
|
110
|
+
kwargs = self._translate_params(kwargs)
|
|
111
|
+
response = await client.chat.completions.create(**kwargs)
|
|
112
|
+
return openai_response_to_completion(response, self._provider)
|
|
113
|
+
|
|
114
|
+
async def complete(
|
|
115
|
+
self,
|
|
116
|
+
messages: list[Message],
|
|
117
|
+
*,
|
|
118
|
+
tools: list[dict[str, Any]] | None = None,
|
|
119
|
+
hyperparams: ModelHyperparameters | None = None,
|
|
120
|
+
) -> CompletionResponse:
|
|
121
|
+
merged_params = merge_hyperparams(self._default_hyperparams, hyperparams)
|
|
122
|
+
client = await self._get_client()
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
return await self._make_completion_request(client, messages, tools, merged_params)
|
|
126
|
+
except (ProviderError, AuthenticationError):
|
|
127
|
+
raise
|
|
128
|
+
except Exception as e:
|
|
129
|
+
error_str = str(e).lower()
|
|
130
|
+
if "authentication" in error_str or "unauthorized" in error_str:
|
|
131
|
+
await self._credentials.invalidate(self._provider)
|
|
132
|
+
await self._close_client()
|
|
133
|
+
raise AuthenticationError(self._provider, str(e)) from e
|
|
134
|
+
raise ProviderError(self._provider, e) from e
|
|
135
|
+
|
|
136
|
+
async def _close_client(self) -> None:
|
|
137
|
+
"""Close and discard the cached HTTP client.
|
|
138
|
+
|
|
139
|
+
Forces credential re-resolution on next request, preventing
|
|
140
|
+
stale credentials from being reused after invalidation.
|
|
141
|
+
"""
|
|
142
|
+
client = self._client
|
|
143
|
+
self._client = None
|
|
144
|
+
if client is not None:
|
|
145
|
+
try:
|
|
146
|
+
await client.close()
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
async def stream(
|
|
151
|
+
self,
|
|
152
|
+
messages: list[Message],
|
|
153
|
+
*,
|
|
154
|
+
tools: list[dict[str, Any]] | None = None,
|
|
155
|
+
hyperparams: ModelHyperparameters | None = None,
|
|
156
|
+
) -> AsyncGenerator[StreamEvent, None]:
|
|
157
|
+
merged_params = merge_hyperparams(self._default_hyperparams, hyperparams)
|
|
158
|
+
client = await self._get_client()
|
|
159
|
+
|
|
160
|
+
accumulator = ToolCallAccumulator()
|
|
161
|
+
usage_data: dict[str, int] | None = None
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
kwargs = build_chat_completion_kwargs(self._model, messages, tools, merged_params)
|
|
165
|
+
kwargs = self._translate_params(kwargs)
|
|
166
|
+
kwargs["stream"] = True
|
|
167
|
+
kwargs["stream_options"] = {"include_usage": True}
|
|
168
|
+
|
|
169
|
+
stream_resp = await client.chat.completions.create(**kwargs)
|
|
170
|
+
|
|
171
|
+
async for chunk in stream_resp:
|
|
172
|
+
if chunk.usage:
|
|
173
|
+
usage_data = {
|
|
174
|
+
"prompt_tokens": chunk.usage.prompt_tokens,
|
|
175
|
+
"completion_tokens": chunk.usage.completion_tokens,
|
|
176
|
+
"total_tokens": chunk.usage.total_tokens,
|
|
177
|
+
}
|
|
178
|
+
cached = extract_cached_tokens(chunk.usage)
|
|
179
|
+
if cached:
|
|
180
|
+
usage_data["cached_tokens"] = cached
|
|
181
|
+
|
|
182
|
+
if not chunk.choices:
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
choice = chunk.choices[0]
|
|
186
|
+
delta = choice.delta
|
|
187
|
+
|
|
188
|
+
if delta.content:
|
|
189
|
+
yield StreamEvent(type="text_delta", text=delta.content)
|
|
190
|
+
|
|
191
|
+
if delta.tool_calls:
|
|
192
|
+
for tc_delta in delta.tool_calls:
|
|
193
|
+
idx = tc_delta.index
|
|
194
|
+
if idx is None and len(delta.tool_calls) == 1:
|
|
195
|
+
idx = accumulator.default_index()
|
|
196
|
+
tool_delta = ToolCallDelta(
|
|
197
|
+
index=idx,
|
|
198
|
+
id=tc_delta.id,
|
|
199
|
+
name=tc_delta.function.name if tc_delta.function else None,
|
|
200
|
+
arguments_json=(tc_delta.function.arguments or "") if tc_delta.function else "",
|
|
201
|
+
)
|
|
202
|
+
accumulator.add_delta(tool_delta)
|
|
203
|
+
yield StreamEvent(type="tool_call_delta", tool_call_delta=tool_delta)
|
|
204
|
+
|
|
205
|
+
if usage_data:
|
|
206
|
+
yield StreamEvent(type="usage", usage=usage_data)
|
|
207
|
+
|
|
208
|
+
final_tool_calls = accumulator.finalize() if accumulator.has_pending() else None
|
|
209
|
+
yield StreamEvent(
|
|
210
|
+
type="response_end",
|
|
211
|
+
tool_calls=final_tool_calls,
|
|
212
|
+
usage=usage_data,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
except Exception as e:
|
|
216
|
+
error_str = str(e).lower()
|
|
217
|
+
if "authentication" in error_str or "unauthorized" in error_str:
|
|
218
|
+
await self._credentials.invalidate(self._provider)
|
|
219
|
+
await self._close_client()
|
|
220
|
+
yield StreamEvent(type="error", error=str(e))
|
|
221
|
+
|
|
222
|
+
async def count_tokens(
|
|
223
|
+
self,
|
|
224
|
+
messages: list[Message],
|
|
225
|
+
tools: list[dict[str, Any]] | None = None,
|
|
226
|
+
) -> int:
|
|
227
|
+
openai_messages = [message_to_openai(m) for m in messages]
|
|
228
|
+
total = estimate_message_tokens(openai_messages)
|
|
229
|
+
|
|
230
|
+
if tools:
|
|
231
|
+
tools_json = json.dumps(tools)
|
|
232
|
+
total += len(tools_json) // 4
|
|
233
|
+
|
|
234
|
+
return total
|