multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
"""Adapter to use core.llm clients with proxy's converter pipeline.
|
|
2
|
+
|
|
3
|
+
This adapter bridges between:
|
|
4
|
+
- core.llm interface (StreamEvent, CompletionResponse)
|
|
5
|
+
- Proxy's expected interface (OpenAI dict format)
|
|
6
|
+
|
|
7
|
+
The adapter allows the proxy to use core.llm for LLM calls while keeping
|
|
8
|
+
the existing Anthropic ↔ OpenAI converters unchanged.
|
|
9
|
+
|
|
10
|
+
Supports LiteLLM providers via core.llm's get_client().
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import time
|
|
16
|
+
from typing import Any, AsyncGenerator, Dict, List, Literal, Optional
|
|
17
|
+
|
|
18
|
+
from forge.core.llm import (
|
|
19
|
+
CompletionResponse,
|
|
20
|
+
Message,
|
|
21
|
+
ModelHyperparameters,
|
|
22
|
+
get_client,
|
|
23
|
+
)
|
|
24
|
+
from forge.core.llm.types import ToolCall
|
|
25
|
+
from forge.proxy.base_client import ProxyStreamError
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
AdapterProviderType = Literal["litellm_remote", "litellm_local"]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _extract_cache_info(usage: dict[str, int] | None) -> dict[str, Any]:
|
|
33
|
+
"""Extract cache hit info from a usage dict.
|
|
34
|
+
|
|
35
|
+
Core.llm clients include ``cached_tokens`` in the usage dict when the
|
|
36
|
+
provider reports prompt caching metrics (via ``prompt_tokens_details``).
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Dict with ``cached_tokens`` and ``cache_hit_rate`` (percentage),
|
|
40
|
+
or empty dict if no cache data is present.
|
|
41
|
+
"""
|
|
42
|
+
if not usage:
|
|
43
|
+
return {}
|
|
44
|
+
cached_tokens = usage.get("cached_tokens", 0)
|
|
45
|
+
if not cached_tokens:
|
|
46
|
+
return {}
|
|
47
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
48
|
+
cache_hit_rate = (cached_tokens / prompt_tokens * 100) if prompt_tokens > 0 else 0
|
|
49
|
+
return {"cached_tokens": cached_tokens, "cache_hit_rate": cache_hit_rate}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _sanitize_header_value(value: str, max_length: int = 256) -> str:
|
|
53
|
+
"""Sanitize a header value to prevent header injection and cap length.
|
|
54
|
+
|
|
55
|
+
Strips all ASCII control characters (0x00-0x1F, 0x7F), not just CR/LF,
|
|
56
|
+
to prevent log injection and downstream parsing issues.
|
|
57
|
+
"""
|
|
58
|
+
sanitized = "".join(ch for ch in value if 0x20 <= ord(ch) < 0x7F or ord(ch) > 0x7F)
|
|
59
|
+
return sanitized[:max_length]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class CoreLLMClientAdapter:
|
|
63
|
+
"""Adapts core.llm client interface to proxy's expected format.
|
|
64
|
+
|
|
65
|
+
The proxy expects clients with:
|
|
66
|
+
- create_completion(openai_request, request_id) -> dict
|
|
67
|
+
- create_streaming_completion(openai_request, request_id) -> AsyncGenerator[dict, None]
|
|
68
|
+
|
|
69
|
+
This adapter wraps core.llm clients to provide that interface.
|
|
70
|
+
Supports LiteLLM providers (remote and local).
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
model: str,
|
|
76
|
+
provider: AdapterProviderType,
|
|
77
|
+
max_tokens_override: Optional[int] = None,
|
|
78
|
+
tier: str = "sonnet",
|
|
79
|
+
default_hyperparams: ModelHyperparameters | None = None,
|
|
80
|
+
) -> None:
|
|
81
|
+
"""Initialize the adapter.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
model: Model identifier (e.g., "openai/gpt-5.5").
|
|
85
|
+
provider: Provider type (litellm_remote, litellm_local).
|
|
86
|
+
max_tokens_override: Optional max_tokens cap.
|
|
87
|
+
tier: Tier name for hyperparameter lookup.
|
|
88
|
+
default_hyperparams: Default hyperparameters.
|
|
89
|
+
"""
|
|
90
|
+
self.model_name = model
|
|
91
|
+
self._provider = provider
|
|
92
|
+
self.max_tokens_override = max_tokens_override
|
|
93
|
+
self.tier = tier
|
|
94
|
+
self.default_hyperparams = default_hyperparams
|
|
95
|
+
|
|
96
|
+
# Model includes vendor prefix (e.g., "openai/gpt-5.5")
|
|
97
|
+
self._client = get_client(
|
|
98
|
+
model,
|
|
99
|
+
provider=provider, # type: ignore # AdapterProviderType is subset of ProviderType
|
|
100
|
+
default_hyperparams=default_hyperparams,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
logger.info(f"CoreLLMClientAdapter initialized: model={model}, provider={provider}, tier={tier}")
|
|
104
|
+
|
|
105
|
+
def _openai_messages_to_core(self, openai_messages: List[Dict[str, Any]]) -> List[Message]:
|
|
106
|
+
"""Convert OpenAI format messages to core.llm Messages.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
openai_messages: Messages in OpenAI format.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Messages in core.llm format.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
def _tool_calls_to_core(tool_calls: object) -> list[ToolCall] | None:
|
|
116
|
+
if not isinstance(tool_calls, list):
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
out: list[ToolCall] = []
|
|
120
|
+
for tc in tool_calls:
|
|
121
|
+
if not isinstance(tc, dict):
|
|
122
|
+
continue
|
|
123
|
+
if tc.get("type") != "function":
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
tc_id = tc.get("id")
|
|
127
|
+
func = tc.get("function") or {}
|
|
128
|
+
if not isinstance(func, dict):
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
name = func.get("name")
|
|
132
|
+
args_raw = func.get("arguments")
|
|
133
|
+
|
|
134
|
+
if not isinstance(tc_id, str) or not isinstance(name, str):
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
arguments: dict[str, Any]
|
|
138
|
+
if isinstance(args_raw, str):
|
|
139
|
+
try:
|
|
140
|
+
parsed = json.loads(args_raw) if args_raw.strip() else {}
|
|
141
|
+
arguments = parsed if isinstance(parsed, dict) else {"_raw": parsed}
|
|
142
|
+
except json.JSONDecodeError:
|
|
143
|
+
arguments = {"raw_arguments": args_raw}
|
|
144
|
+
elif isinstance(args_raw, dict):
|
|
145
|
+
arguments = args_raw
|
|
146
|
+
else:
|
|
147
|
+
arguments = {}
|
|
148
|
+
|
|
149
|
+
out.append(ToolCall(id=tc_id, name=name, arguments=arguments))
|
|
150
|
+
|
|
151
|
+
return out or None
|
|
152
|
+
|
|
153
|
+
messages = []
|
|
154
|
+
for msg in openai_messages:
|
|
155
|
+
role = msg.get("role", "user")
|
|
156
|
+
content = msg.get("content", "")
|
|
157
|
+
if content is None:
|
|
158
|
+
# OpenAI allows content=null when tool_calls are present; core.llm does not.
|
|
159
|
+
content = ""
|
|
160
|
+
|
|
161
|
+
if role not in ("system", "user", "assistant", "tool"):
|
|
162
|
+
role = "user" # Fallback for unknown roles
|
|
163
|
+
|
|
164
|
+
tool_call_id = msg.get("tool_call_id")
|
|
165
|
+
tool_calls = _tool_calls_to_core(msg.get("tool_calls"))
|
|
166
|
+
|
|
167
|
+
messages.append(
|
|
168
|
+
Message(
|
|
169
|
+
role=role,
|
|
170
|
+
content=content,
|
|
171
|
+
tool_call_id=tool_call_id,
|
|
172
|
+
tool_calls=tool_calls,
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
return messages
|
|
177
|
+
|
|
178
|
+
def _core_response_to_openai(self, response: CompletionResponse, model: str) -> Dict[str, Any]:
|
|
179
|
+
"""Convert core.llm CompletionResponse to OpenAI format.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
response: CompletionResponse from core.llm.
|
|
183
|
+
model: Model identifier.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Response in OpenAI format.
|
|
187
|
+
"""
|
|
188
|
+
tool_calls = None
|
|
189
|
+
if response.tool_calls:
|
|
190
|
+
tool_calls = []
|
|
191
|
+
for tc in response.tool_calls:
|
|
192
|
+
tool_calls.append(
|
|
193
|
+
{
|
|
194
|
+
"id": tc.id,
|
|
195
|
+
"type": "function",
|
|
196
|
+
"function": {
|
|
197
|
+
"name": tc.name,
|
|
198
|
+
"arguments": json.dumps(tc.arguments),
|
|
199
|
+
},
|
|
200
|
+
}
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
return {
|
|
204
|
+
"id": f"chatcmpl-{int(time.time())}",
|
|
205
|
+
"object": "chat.completion",
|
|
206
|
+
"created": int(time.time()),
|
|
207
|
+
"model": model,
|
|
208
|
+
"choices": [
|
|
209
|
+
{
|
|
210
|
+
"index": 0,
|
|
211
|
+
"message": {
|
|
212
|
+
"role": "assistant",
|
|
213
|
+
"content": response.text,
|
|
214
|
+
**({"tool_calls": tool_calls} if tool_calls else {}),
|
|
215
|
+
},
|
|
216
|
+
"finish_reason": "tool_calls" if tool_calls else "stop",
|
|
217
|
+
}
|
|
218
|
+
],
|
|
219
|
+
"usage": {
|
|
220
|
+
"prompt_tokens": response.usage.get("prompt_tokens", 0) if response.usage else 0,
|
|
221
|
+
"completion_tokens": response.usage.get("completion_tokens", 0) if response.usage else 0,
|
|
222
|
+
"total_tokens": response.usage.get("total_tokens", 0) if response.usage else 0,
|
|
223
|
+
"cached_tokens": response.usage.get("cached_tokens", 0) if response.usage else 0,
|
|
224
|
+
},
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
async def create_completion(self, openai_request: Dict[str, Any], request_id: str) -> Dict[str, Any]:
|
|
228
|
+
"""Create a non-streaming completion.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
openai_request: Request in OpenAI format.
|
|
232
|
+
request_id: Request ID for logging.
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Response in OpenAI format.
|
|
236
|
+
"""
|
|
237
|
+
logger.debug(f"[{request_id}] CoreLLMClientAdapter.create_completion: model={self.model_name}")
|
|
238
|
+
|
|
239
|
+
messages = self._openai_messages_to_core(openai_request.get("messages", []))
|
|
240
|
+
tools = openai_request.get("tools")
|
|
241
|
+
|
|
242
|
+
# IMPORTANT: Only set fields that are explicitly provided by the request.
|
|
243
|
+
# Otherwise, core.llm's merge_hyperparams() will treat unset defaults as overrides.
|
|
244
|
+
hyperparams_data: dict[str, Any] = {}
|
|
245
|
+
|
|
246
|
+
if "max_tokens" in openai_request and openai_request["max_tokens"] is not None:
|
|
247
|
+
max_tokens = int(openai_request["max_tokens"])
|
|
248
|
+
if self.max_tokens_override is not None:
|
|
249
|
+
max_tokens = min(max_tokens, self.max_tokens_override)
|
|
250
|
+
hyperparams_data["max_tokens"] = max_tokens
|
|
251
|
+
|
|
252
|
+
for key in ("temperature", "top_p", "reasoning_effort", "verbosity"):
|
|
253
|
+
if key in openai_request and openai_request[key] is not None:
|
|
254
|
+
hyperparams_data[key] = openai_request[key]
|
|
255
|
+
|
|
256
|
+
# Forward User-Agent to upstream if server injected it
|
|
257
|
+
user_agent = openai_request.get("_user_agent")
|
|
258
|
+
if isinstance(user_agent, str) and user_agent:
|
|
259
|
+
openai_extra = hyperparams_data.setdefault("extra", {}).setdefault("openai", {})
|
|
260
|
+
openai_extra["extra_headers"] = {"User-Agent": _sanitize_header_value(user_agent)}
|
|
261
|
+
|
|
262
|
+
hyperparams = ModelHyperparameters(**hyperparams_data)
|
|
263
|
+
|
|
264
|
+
response = await self._client.complete(messages, tools=tools, hyperparams=hyperparams)
|
|
265
|
+
|
|
266
|
+
if response.usage:
|
|
267
|
+
cache_info = _extract_cache_info(response.usage)
|
|
268
|
+
cache_log = ""
|
|
269
|
+
if cache_info:
|
|
270
|
+
cache_log = (
|
|
271
|
+
f" | cached_tokens={cache_info['cached_tokens']}"
|
|
272
|
+
f" ({cache_info['cache_hit_rate']:.1f}% cache hit)"
|
|
273
|
+
)
|
|
274
|
+
logger.info(
|
|
275
|
+
f"[{request_id}] <<< Response from {self.model_name} | "
|
|
276
|
+
f"input_tokens={response.usage.get('prompt_tokens', 0)} | "
|
|
277
|
+
f"output_tokens={response.usage.get('completion_tokens', 0)} | "
|
|
278
|
+
f"total_tokens={response.usage.get('total_tokens', 0)}{cache_log}"
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
return self._core_response_to_openai(response, self.model_name)
|
|
282
|
+
|
|
283
|
+
async def create_streaming_completion(
|
|
284
|
+
self, openai_request: Dict[str, Any], request_id: str
|
|
285
|
+
) -> AsyncGenerator[Dict[str, Any], None]:
|
|
286
|
+
"""Create a streaming completion.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
openai_request: Request in OpenAI format.
|
|
290
|
+
request_id: Request ID for logging.
|
|
291
|
+
|
|
292
|
+
Yields:
|
|
293
|
+
Streaming chunks in OpenAI format.
|
|
294
|
+
"""
|
|
295
|
+
logger.debug(f"[{request_id}] CoreLLMClientAdapter.create_streaming_completion: model={self.model_name}")
|
|
296
|
+
|
|
297
|
+
messages = self._openai_messages_to_core(openai_request.get("messages", []))
|
|
298
|
+
tools = openai_request.get("tools")
|
|
299
|
+
|
|
300
|
+
# IMPORTANT: Only set fields that are explicitly provided by the request.
|
|
301
|
+
# Otherwise, core.llm's merge_hyperparams() will treat unset defaults as overrides.
|
|
302
|
+
hyperparams_data: dict[str, Any] = {}
|
|
303
|
+
|
|
304
|
+
if "max_tokens" in openai_request and openai_request["max_tokens"] is not None:
|
|
305
|
+
max_tokens = int(openai_request["max_tokens"])
|
|
306
|
+
if self.max_tokens_override is not None:
|
|
307
|
+
max_tokens = min(max_tokens, self.max_tokens_override)
|
|
308
|
+
hyperparams_data["max_tokens"] = max_tokens
|
|
309
|
+
|
|
310
|
+
for key in ("temperature", "top_p", "reasoning_effort", "verbosity"):
|
|
311
|
+
if key in openai_request and openai_request[key] is not None:
|
|
312
|
+
hyperparams_data[key] = openai_request[key]
|
|
313
|
+
|
|
314
|
+
# Forward User-Agent to upstream if server injected it
|
|
315
|
+
user_agent = openai_request.get("_user_agent")
|
|
316
|
+
if isinstance(user_agent, str) and user_agent:
|
|
317
|
+
openai_extra = hyperparams_data.setdefault("extra", {}).setdefault("openai", {})
|
|
318
|
+
openai_extra["extra_headers"] = {"User-Agent": _sanitize_header_value(user_agent)}
|
|
319
|
+
|
|
320
|
+
hyperparams = ModelHyperparameters(**hyperparams_data)
|
|
321
|
+
|
|
322
|
+
# Track accumulated tool calls by OpenAI index (not id — id only in first chunk)
|
|
323
|
+
accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
|
|
324
|
+
response_id = f"chatcmpl-{int(time.time())}"
|
|
325
|
+
final_usage: dict[str, int] = {}
|
|
326
|
+
|
|
327
|
+
async for event in self._client.stream(messages, tools=tools, hyperparams=hyperparams):
|
|
328
|
+
if event.type == "text_delta":
|
|
329
|
+
yield {
|
|
330
|
+
"id": response_id,
|
|
331
|
+
"object": "chat.completion.chunk",
|
|
332
|
+
"created": int(time.time()),
|
|
333
|
+
"model": self.model_name,
|
|
334
|
+
"choices": [
|
|
335
|
+
{
|
|
336
|
+
"index": 0,
|
|
337
|
+
"delta": {
|
|
338
|
+
"content": event.text,
|
|
339
|
+
},
|
|
340
|
+
"finish_reason": None,
|
|
341
|
+
}
|
|
342
|
+
],
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
elif event.type == "tool_call_delta":
|
|
346
|
+
delta = event.tool_call_delta
|
|
347
|
+
if delta is None or delta.index is None:
|
|
348
|
+
# Core clients yield index=None for ambiguous fragments they can't
|
|
349
|
+
# route (e.g., late chunks in multi-tool streams). Mirrors
|
|
350
|
+
# ToolCallAccumulator.add_delta -- coercing to 0 corrupts tool 0.
|
|
351
|
+
continue
|
|
352
|
+
tc_idx = delta.index
|
|
353
|
+
|
|
354
|
+
if tc_idx not in accumulated_tool_calls:
|
|
355
|
+
accumulated_tool_calls[tc_idx] = {
|
|
356
|
+
"id": "",
|
|
357
|
+
"type": "function",
|
|
358
|
+
"function": {"name": "", "arguments": ""},
|
|
359
|
+
}
|
|
360
|
+
entry = accumulated_tool_calls[tc_idx]
|
|
361
|
+
if delta.id:
|
|
362
|
+
entry["id"] = delta.id
|
|
363
|
+
if delta.name:
|
|
364
|
+
entry["function"]["name"] = delta.name
|
|
365
|
+
entry["function"]["arguments"] += delta.arguments_json
|
|
366
|
+
|
|
367
|
+
yield {
|
|
368
|
+
"id": response_id,
|
|
369
|
+
"object": "chat.completion.chunk",
|
|
370
|
+
"created": int(time.time()),
|
|
371
|
+
"model": self.model_name,
|
|
372
|
+
"choices": [
|
|
373
|
+
{
|
|
374
|
+
"index": 0,
|
|
375
|
+
"delta": {
|
|
376
|
+
"tool_calls": [
|
|
377
|
+
{
|
|
378
|
+
"index": tc_idx,
|
|
379
|
+
"id": delta.id,
|
|
380
|
+
"type": "function" if delta.id else None,
|
|
381
|
+
"function": {
|
|
382
|
+
"name": delta.name,
|
|
383
|
+
"arguments": delta.arguments_json,
|
|
384
|
+
},
|
|
385
|
+
}
|
|
386
|
+
],
|
|
387
|
+
},
|
|
388
|
+
"finish_reason": None,
|
|
389
|
+
}
|
|
390
|
+
],
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
elif event.type == "usage":
|
|
394
|
+
if event.usage:
|
|
395
|
+
final_usage = event.usage
|
|
396
|
+
yield {
|
|
397
|
+
"id": response_id,
|
|
398
|
+
"object": "chat.completion.chunk",
|
|
399
|
+
"created": int(time.time()),
|
|
400
|
+
"model": self.model_name,
|
|
401
|
+
"choices": [],
|
|
402
|
+
"usage": {
|
|
403
|
+
"prompt_tokens": event.usage.get("prompt_tokens", 0) if event.usage else 0,
|
|
404
|
+
"completion_tokens": event.usage.get("completion_tokens", 0) if event.usage else 0,
|
|
405
|
+
"total_tokens": event.usage.get("total_tokens", 0) if event.usage else 0,
|
|
406
|
+
"cached_tokens": event.usage.get("cached_tokens", 0) if event.usage else 0,
|
|
407
|
+
},
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
elif event.type == "response_end":
|
|
411
|
+
finish_reason = "tool_calls" if accumulated_tool_calls else "stop"
|
|
412
|
+
yield {
|
|
413
|
+
"id": response_id,
|
|
414
|
+
"object": "chat.completion.chunk",
|
|
415
|
+
"created": int(time.time()),
|
|
416
|
+
"model": self.model_name,
|
|
417
|
+
"choices": [
|
|
418
|
+
{
|
|
419
|
+
"index": 0,
|
|
420
|
+
"delta": {},
|
|
421
|
+
"finish_reason": finish_reason,
|
|
422
|
+
}
|
|
423
|
+
],
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
elif event.type == "error":
|
|
427
|
+
logger.error(f"[{request_id}] Stream error: {event.error}")
|
|
428
|
+
# Detect error type from message for proper HTTP status mapping
|
|
429
|
+
error_msg = event.error or "Unknown streaming error"
|
|
430
|
+
error_lower = error_msg.lower()
|
|
431
|
+
if "authentication" in error_lower or "unauthorized" in error_lower:
|
|
432
|
+
error_type = "authentication_error"
|
|
433
|
+
elif "rate limit" in error_lower or "rate_limit" in error_lower:
|
|
434
|
+
error_type = "rate_limit_error"
|
|
435
|
+
elif "invalid" in error_lower or "bad request" in error_lower:
|
|
436
|
+
error_type = "invalid_request_error"
|
|
437
|
+
else:
|
|
438
|
+
error_type = "api_error"
|
|
439
|
+
raise ProxyStreamError(error_msg, error_type=error_type)
|
|
440
|
+
|
|
441
|
+
if final_usage:
|
|
442
|
+
cache_info = _extract_cache_info(final_usage)
|
|
443
|
+
cache_log = ""
|
|
444
|
+
if cache_info:
|
|
445
|
+
cache_log = (
|
|
446
|
+
f" | cached_tokens={cache_info['cached_tokens']}"
|
|
447
|
+
f" ({cache_info['cache_hit_rate']:.1f}% cache hit)"
|
|
448
|
+
)
|
|
449
|
+
logger.info(
|
|
450
|
+
f"[{request_id}] <<< Stream complete from {self.model_name} | "
|
|
451
|
+
f"input_tokens={final_usage.get('prompt_tokens', 0)} | "
|
|
452
|
+
f"output_tokens={final_usage.get('completion_tokens', 0)} | "
|
|
453
|
+
f"total_tokens={final_usage.get('total_tokens', 0)}{cache_log}"
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
async def count_tokens(
|
|
457
|
+
self,
|
|
458
|
+
messages: List[Dict[str, Any]],
|
|
459
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
460
|
+
) -> int:
|
|
461
|
+
"""Count tokens for messages and tools.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
messages: Messages in OpenAI format.
|
|
465
|
+
tools: Optional tools.
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
Estimated token count.
|
|
469
|
+
"""
|
|
470
|
+
core_messages = self._openai_messages_to_core(messages)
|
|
471
|
+
return await self._client.count_tokens(core_messages, tools)
|
|
472
|
+
|
|
473
|
+
async def aclose(self):
|
|
474
|
+
"""Clean up resources."""
|
|
475
|
+
# core.llm clients don't have aclose yet, but we keep the interface
|
|
476
|
+
pass
|