multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
"""Tier-aware client factory for proxy model routing.
|
|
2
|
+
|
|
3
|
+
Creates and caches LLM client instances keyed by (model_name, tier),
|
|
4
|
+
with resolved hyperparameters from env vars, tier overrides, and provider config.
|
|
5
|
+
Actual credential fetching is delegated to core.llm.CredentialManager.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import time
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from threading import Lock
|
|
14
|
+
from typing import Any, Dict, Optional
|
|
15
|
+
from urllib.parse import urlparse
|
|
16
|
+
|
|
17
|
+
from forge.config import config
|
|
18
|
+
from forge.core.llm.types import ModelHyperparameters
|
|
19
|
+
from forge.core.models import (
|
|
20
|
+
ModelCatalogError,
|
|
21
|
+
get_max_output_tokens,
|
|
22
|
+
model_exists,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
DEFAULT_MAX_OUTPUT_TOKENS = 16384
|
|
29
|
+
|
|
30
|
+
_LOCAL_HOSTS = ("localhost", "127.0.0.1", "0.0.0.0", "::1")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _is_local_url(url: str) -> bool:
|
|
34
|
+
"""Check if a URL points to a local host."""
|
|
35
|
+
try:
|
|
36
|
+
parsed = urlparse(url)
|
|
37
|
+
return (parsed.hostname or "") in _LOCAL_HOSTS
|
|
38
|
+
except Exception:
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _enforce_max_output_tokens_cap(model_name: str, requested: int | None, *, strict: bool = True) -> int:
|
|
43
|
+
"""Enforce the catalog's max_output_tokens as a hard cap.
|
|
44
|
+
|
|
45
|
+
The model catalog defines the maximum output tokens each model can produce.
|
|
46
|
+
This function ensures requested values don't exceed that ceiling.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
model_name: Model ID (canonical or alias).
|
|
50
|
+
requested: Requested max_tokens from config/env/request (or None for default).
|
|
51
|
+
strict: If True (default), raise on unknown models. If False, return
|
|
52
|
+
requested or a safe default for models not in the catalog
|
|
53
|
+
(used by OpenRouter where the model space is open).
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Effective max_tokens, capped to catalog limit.
|
|
57
|
+
|
|
58
|
+
Raises:
|
|
59
|
+
ModelCatalogError: If model is unknown (strict mode) or requested exceeds catalog cap.
|
|
60
|
+
"""
|
|
61
|
+
if not model_exists(model_name):
|
|
62
|
+
if strict:
|
|
63
|
+
raise ModelCatalogError(f"Model {model_name!r} not in catalog. Add it to core/data/model_catalog.yaml.")
|
|
64
|
+
logger.debug(f"Model {model_name!r} not in catalog, using default max_output_tokens")
|
|
65
|
+
return requested if requested is not None else DEFAULT_MAX_OUTPUT_TOKENS
|
|
66
|
+
|
|
67
|
+
catalog_cap = get_max_output_tokens(model_name)
|
|
68
|
+
|
|
69
|
+
if requested is None:
|
|
70
|
+
return catalog_cap
|
|
71
|
+
|
|
72
|
+
if requested > catalog_cap:
|
|
73
|
+
raise ModelCatalogError(
|
|
74
|
+
f"Requested max_tokens ({requested}) exceeds model {model_name!r} catalog cap ({catalog_cap}). "
|
|
75
|
+
f"Update catalog or reduce max_tokens override."
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
return requested
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ModelProvider(Enum):
|
|
82
|
+
"""Supported model providers."""
|
|
83
|
+
|
|
84
|
+
LITELLM = "litellm"
|
|
85
|
+
OPENROUTER = "openrouter"
|
|
86
|
+
UNKNOWN = "unknown"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class TierClientFactory:
|
|
90
|
+
"""Tier-aware client factory for proxy model routing.
|
|
91
|
+
|
|
92
|
+
Creates and caches LLM client instances keyed by (model_name, tier)
|
|
93
|
+
with resolved hyperparameters. Delegates credential fetching to
|
|
94
|
+
core.llm.CredentialManager via CoreLLMClientAdapter.
|
|
95
|
+
|
|
96
|
+
Features:
|
|
97
|
+
- Automatic model type detection
|
|
98
|
+
- Tier-specific hyperparameter resolution (env > tier_override > config)
|
|
99
|
+
- Unified caching with configurable TTL
|
|
100
|
+
- Retry on authentication failure
|
|
101
|
+
- Thread-safe client management
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
_instance: "TierClientFactory | None" = None
|
|
105
|
+
_lock = Lock()
|
|
106
|
+
_initialized: bool = False
|
|
107
|
+
|
|
108
|
+
def __new__(cls):
|
|
109
|
+
"""Singleton pattern to ensure only one manager exists."""
|
|
110
|
+
with cls._lock:
|
|
111
|
+
if cls._instance is None:
|
|
112
|
+
cls._instance = super(TierClientFactory, cls).__new__(cls)
|
|
113
|
+
cls._instance._initialized = False
|
|
114
|
+
return cls._instance
|
|
115
|
+
|
|
116
|
+
def __init__(self, default_ttl: Optional[float] = 3600):
|
|
117
|
+
"""Initialize the tier client factory."""
|
|
118
|
+
if self._initialized:
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
# Tier is included in key to support same model with different hyperparameters
|
|
122
|
+
self._cache: Dict[tuple[str, str], tuple[Any, float, ModelProvider]] = {}
|
|
123
|
+
|
|
124
|
+
self._default_ttl = float(os.getenv("CREDENTIAL_CACHE_TTL", str(default_ttl))) # 1 hour default
|
|
125
|
+
self._litellm_ttl = float(os.getenv("LITELLM_CACHE_TTL", str(self._default_ttl)))
|
|
126
|
+
self._upstream_base_url_cache: tuple[str, str | None] | None = None
|
|
127
|
+
|
|
128
|
+
self._refresh_lock = asyncio.Lock()
|
|
129
|
+
self._initialized = True
|
|
130
|
+
|
|
131
|
+
# Lazy imports to avoid circular dependencies
|
|
132
|
+
self._client_classes: Dict[ModelProvider, type] = {}
|
|
133
|
+
|
|
134
|
+
ttl_config = []
|
|
135
|
+
if os.getenv("CREDENTIAL_CACHE_TTL"):
|
|
136
|
+
ttl_config.append(f"Default: {self._default_ttl}s")
|
|
137
|
+
if os.getenv("LITELLM_CACHE_TTL"):
|
|
138
|
+
ttl_config.append(f"LiteLLM (custom): {self._litellm_ttl}s")
|
|
139
|
+
else:
|
|
140
|
+
ttl_config.append(f"LiteLLM: {self._litellm_ttl}s (using default)")
|
|
141
|
+
|
|
142
|
+
logger.info(f"TierClientFactory initialized - TTL configuration: {', '.join(ttl_config)}")
|
|
143
|
+
|
|
144
|
+
def _detect_provider(self, model_name: str) -> ModelProvider:
|
|
145
|
+
"""Detect the model provider from the model name or PREFERRED_PROVIDER.
|
|
146
|
+
|
|
147
|
+
PREFERRED_PROVIDER (set by the proxy server from the template) takes
|
|
148
|
+
precedence over model-name prefix detection. This prevents OpenRouter
|
|
149
|
+
model IDs like ``anthropic/claude-sonnet-4.6`` from being misrouted
|
|
150
|
+
to LiteLLM via the ``anthropic/`` prefix match.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
model_name: The model identifier
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
ModelProvider enum indicating the provider
|
|
157
|
+
"""
|
|
158
|
+
preferred = os.getenv("PREFERRED_PROVIDER", "")
|
|
159
|
+
if preferred == "openrouter":
|
|
160
|
+
return ModelProvider.OPENROUTER
|
|
161
|
+
|
|
162
|
+
model_family = os.getenv("MODEL_FAMILY", "").upper()
|
|
163
|
+
if model_family == "OPENROUTER":
|
|
164
|
+
return ModelProvider.OPENROUTER
|
|
165
|
+
|
|
166
|
+
clean_name = model_name.lower()
|
|
167
|
+
|
|
168
|
+
if "/" in clean_name and any(
|
|
169
|
+
clean_name.startswith(prefix)
|
|
170
|
+
for prefix in [
|
|
171
|
+
"openai/",
|
|
172
|
+
"anthropic/",
|
|
173
|
+
"vertex_ai/",
|
|
174
|
+
"bedrock/",
|
|
175
|
+
"replicate/",
|
|
176
|
+
"together_ai/",
|
|
177
|
+
"gemini/",
|
|
178
|
+
]
|
|
179
|
+
):
|
|
180
|
+
return ModelProvider.LITELLM
|
|
181
|
+
|
|
182
|
+
if model_family == "LITELLM":
|
|
183
|
+
return ModelProvider.LITELLM
|
|
184
|
+
|
|
185
|
+
logger.warning(f"Unknown model provider for model: {model_name}, defaulting to LiteLLM")
|
|
186
|
+
return ModelProvider.LITELLM
|
|
187
|
+
|
|
188
|
+
def _get_upstream_base_url(self) -> str | None:
|
|
189
|
+
"""Get the proxy's upstream base URL from the instance config.
|
|
190
|
+
|
|
191
|
+
Reads the proxy.yaml for the current proxy instance to determine
|
|
192
|
+
whether the upstream is local or remote.
|
|
193
|
+
"""
|
|
194
|
+
proxy_id = os.getenv("FORGE_PROXY_ID")
|
|
195
|
+
if not proxy_id:
|
|
196
|
+
return None
|
|
197
|
+
if self._upstream_base_url_cache and self._upstream_base_url_cache[0] == proxy_id:
|
|
198
|
+
return self._upstream_base_url_cache[1]
|
|
199
|
+
try:
|
|
200
|
+
from forge.config.loader import load_proxy_instance_config
|
|
201
|
+
|
|
202
|
+
instance = load_proxy_instance_config(proxy_id)
|
|
203
|
+
upstream = instance.upstream_base_url if instance else None
|
|
204
|
+
if upstream:
|
|
205
|
+
self._upstream_base_url_cache = (proxy_id, upstream)
|
|
206
|
+
return upstream
|
|
207
|
+
except Exception:
|
|
208
|
+
logger.debug("Failed to resolve upstream base URL for proxy %s", proxy_id, exc_info=True)
|
|
209
|
+
return None
|
|
210
|
+
|
|
211
|
+
def _get_ttl_for_provider(self, provider: ModelProvider) -> float:
|
|
212
|
+
"""Get the TTL for a specific provider."""
|
|
213
|
+
if provider == ModelProvider.LITELLM:
|
|
214
|
+
return self._litellm_ttl
|
|
215
|
+
return self._default_ttl
|
|
216
|
+
|
|
217
|
+
def _get_tier_for_model(self, model_name: str, provider: ModelProvider) -> Optional[str]:
|
|
218
|
+
"""Detect which tier (haiku/sonnet/opus) a model belongs to.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
model_name: The model identifier (e.g., "openai/gpt-4o-mini")
|
|
222
|
+
provider: The provider type
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Tier name (haiku/sonnet/opus) or None if not found
|
|
226
|
+
"""
|
|
227
|
+
prefix_map = {
|
|
228
|
+
ModelProvider.LITELLM: "LITELLM",
|
|
229
|
+
ModelProvider.OPENROUTER: "OPENROUTER",
|
|
230
|
+
}
|
|
231
|
+
prefix = prefix_map.get(provider)
|
|
232
|
+
if not prefix:
|
|
233
|
+
return None
|
|
234
|
+
|
|
235
|
+
for tier in ["haiku", "sonnet", "opus"]:
|
|
236
|
+
tier_model = os.getenv(f"{prefix}_{tier.upper()}_MODEL")
|
|
237
|
+
if tier_model and tier_model.lower() == model_name.lower():
|
|
238
|
+
return tier
|
|
239
|
+
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
def _import_client_class(self, provider: ModelProvider):
|
|
243
|
+
"""Lazy import client classes to avoid circular dependencies."""
|
|
244
|
+
if provider not in self._client_classes:
|
|
245
|
+
if provider in (ModelProvider.LITELLM, ModelProvider.OPENROUTER):
|
|
246
|
+
from forge.proxy.client_adapter import CoreLLMClientAdapter
|
|
247
|
+
|
|
248
|
+
self._client_classes[provider] = CoreLLMClientAdapter
|
|
249
|
+
|
|
250
|
+
def detect_provider_for_model(self, model_name: str) -> ModelProvider:
|
|
251
|
+
"""
|
|
252
|
+
Public method to detect provider for a given model name.
|
|
253
|
+
|
|
254
|
+
This allows the server to determine the provider before converting
|
|
255
|
+
requests, enabling provider-specific schema handling.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
model_name: The model identifier
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
ModelProvider enum indicating the provider
|
|
262
|
+
"""
|
|
263
|
+
return self._detect_provider(model_name)
|
|
264
|
+
|
|
265
|
+
async def get_client(
|
|
266
|
+
self, model_name: str, tier: Optional[str] = None
|
|
267
|
+
) -> Any: # Returns AbstractLLMClient instances
|
|
268
|
+
"""
|
|
269
|
+
Get client for the specified model.
|
|
270
|
+
|
|
271
|
+
Automatically detects model type and returns appropriate LiteLLM client.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
model_name: The model identifier
|
|
275
|
+
tier: The tier name (haiku/sonnet/opus) for tier-specific hyperparameters.
|
|
276
|
+
If not provided, attempts to auto-detect from model name.
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Client instance for the appropriate provider
|
|
280
|
+
|
|
281
|
+
Raises:
|
|
282
|
+
AuthenticationError: If credentials cannot be obtained
|
|
283
|
+
"""
|
|
284
|
+
provider = self._detect_provider(model_name)
|
|
285
|
+
ttl = self._get_ttl_for_provider(provider)
|
|
286
|
+
|
|
287
|
+
# Auto-detect tier as a fallback for backwards compatibility
|
|
288
|
+
if tier is None:
|
|
289
|
+
tier = self._get_tier_for_model(model_name, provider) or "sonnet"
|
|
290
|
+
logger.debug(f"Auto-detected tier '{tier}' for model {model_name}")
|
|
291
|
+
|
|
292
|
+
# Cache key includes tier to support same model with different hyperparameters
|
|
293
|
+
cache_key = (model_name, tier)
|
|
294
|
+
|
|
295
|
+
if cache_key in self._cache:
|
|
296
|
+
cached_data, fetch_time, cached_provider = self._cache[cache_key]
|
|
297
|
+
age = time.monotonic() - fetch_time
|
|
298
|
+
|
|
299
|
+
if age < ttl and cached_provider == provider:
|
|
300
|
+
logger.debug(f"Using cached client for {model_name} (tier={tier}, {provider.value}, age: {age:.0f}s)")
|
|
301
|
+
return cached_data
|
|
302
|
+
else:
|
|
303
|
+
logger.info(
|
|
304
|
+
f"Cache expired or provider changed for {model_name} (tier={tier}, age: {age:.0f}s, ttl: {ttl}s)"
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
async with self._refresh_lock:
|
|
308
|
+
# Double-check after acquiring lock (use cache_key which includes tier)
|
|
309
|
+
if cache_key in self._cache:
|
|
310
|
+
cached_data, fetch_time, cached_provider = self._cache[cache_key]
|
|
311
|
+
if time.monotonic() - fetch_time < ttl and cached_provider == provider:
|
|
312
|
+
return cached_data
|
|
313
|
+
|
|
314
|
+
if provider not in (ModelProvider.LITELLM, ModelProvider.OPENROUTER):
|
|
315
|
+
raise ValueError(f"Unsupported provider: {provider}")
|
|
316
|
+
|
|
317
|
+
self._import_client_class(provider)
|
|
318
|
+
|
|
319
|
+
# Resolve hyperparameters via the single source of truth
|
|
320
|
+
default_hyperparams = self._resolve_tier_hyperparams(provider, tier, model_name)
|
|
321
|
+
|
|
322
|
+
if provider == ModelProvider.OPENROUTER:
|
|
323
|
+
core_provider = "openrouter"
|
|
324
|
+
else:
|
|
325
|
+
from forge.core.llm.detection import (
|
|
326
|
+
detect_provider as core_detect_provider,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
core_provider = core_detect_provider(model_name)
|
|
330
|
+
|
|
331
|
+
# Override to litellm_local when upstream is localhost.
|
|
332
|
+
# detect_provider uses model prefix (openai/ -> litellm_remote),
|
|
333
|
+
# but local templates route through a local LiteLLM that needs
|
|
334
|
+
# no API key. The proxy instance config's upstream_base_url is
|
|
335
|
+
# the authoritative source for local vs remote.
|
|
336
|
+
if core_provider == "litellm_remote":
|
|
337
|
+
upstream = self._get_upstream_base_url()
|
|
338
|
+
if upstream and _is_local_url(upstream):
|
|
339
|
+
core_provider = "litellm_local"
|
|
340
|
+
|
|
341
|
+
client = self._client_classes[provider](
|
|
342
|
+
model=model_name,
|
|
343
|
+
provider=core_provider,
|
|
344
|
+
max_tokens_override=default_hyperparams.max_tokens,
|
|
345
|
+
tier=tier,
|
|
346
|
+
default_hyperparams=default_hyperparams,
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
self._cache[cache_key] = (client, time.monotonic(), provider)
|
|
350
|
+
logger.info(f"Cached new {provider.value} client (core.llm) for {model_name} (tier={tier})")
|
|
351
|
+
|
|
352
|
+
return client
|
|
353
|
+
|
|
354
|
+
async def invalidate_and_retry(
|
|
355
|
+
self, model_name: str, tier: Optional[str] = None
|
|
356
|
+
) -> Any: # Returns AbstractLLMClient instances
|
|
357
|
+
"""
|
|
358
|
+
Invalidate cached credentials and fetch new ones.
|
|
359
|
+
|
|
360
|
+
Called when authentication fails, indicating credentials may be expired.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
model_name: The model whose credentials should be refreshed
|
|
364
|
+
tier: The tier name (haiku/sonnet/opus). If None, invalidates all tiers for this model.
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
Fresh credentials or client
|
|
368
|
+
"""
|
|
369
|
+
logger.warning(f"Invalidating cached credentials for {model_name} (tier={tier}) due to auth failure")
|
|
370
|
+
|
|
371
|
+
async with self._refresh_lock:
|
|
372
|
+
# Remove from cache - handle both specific tier and all tiers
|
|
373
|
+
if tier is not None:
|
|
374
|
+
cache_key = (model_name, tier)
|
|
375
|
+
if cache_key in self._cache:
|
|
376
|
+
del self._cache[cache_key]
|
|
377
|
+
else:
|
|
378
|
+
keys_to_remove = [k for k in self._cache if k[0] == model_name]
|
|
379
|
+
for key in keys_to_remove:
|
|
380
|
+
del self._cache[key]
|
|
381
|
+
|
|
382
|
+
return await self.get_client(model_name, tier=tier)
|
|
383
|
+
|
|
384
|
+
def _resolve_tier_hyperparams(
|
|
385
|
+
self,
|
|
386
|
+
provider: ModelProvider,
|
|
387
|
+
tier: str,
|
|
388
|
+
model_name: str,
|
|
389
|
+
) -> ModelHyperparameters:
|
|
390
|
+
"""Single source of truth for tier-specific hyperparameters.
|
|
391
|
+
|
|
392
|
+
Used by both get_client() (actual client creation) and
|
|
393
|
+
get_default_hyperparams_for_tier() (runtime truth reporting).
|
|
394
|
+
|
|
395
|
+
Priority chain per field:
|
|
396
|
+
- max_tokens: env ({PREFIX}_{TIER}_MAX_TOKENS) > provider config (tokens.override), capped by catalog
|
|
397
|
+
- reasoning/verbosity/thinking: env > tier_override > provider config
|
|
398
|
+
- temperature: tier_override > provider config override > provider config default
|
|
399
|
+
- top_p: provider config only
|
|
400
|
+
|
|
401
|
+
Fields left as None fall through to core.llm's own defaults.
|
|
402
|
+
"""
|
|
403
|
+
from forge.core.llm.types import ThinkingConfig
|
|
404
|
+
|
|
405
|
+
if provider == ModelProvider.LITELLM:
|
|
406
|
+
provider_cfg = config.proxy.litellm
|
|
407
|
+
env_prefix = "LITELLM"
|
|
408
|
+
elif provider == ModelProvider.OPENROUTER:
|
|
409
|
+
provider_cfg = config.proxy.openrouter
|
|
410
|
+
env_prefix = "OPENROUTER"
|
|
411
|
+
else:
|
|
412
|
+
raise ValueError(f"Unsupported provider: {provider}")
|
|
413
|
+
|
|
414
|
+
tier_upper = tier.upper()
|
|
415
|
+
tier_override = provider_cfg.tier_overrides.get(tier)
|
|
416
|
+
|
|
417
|
+
# max_tokens: env > catalog cap (lenient for OpenRouter's open model space)
|
|
418
|
+
tier_max_tokens = os.getenv(f"{env_prefix}_{tier_upper}_MAX_TOKENS")
|
|
419
|
+
requested_max_tokens = int(tier_max_tokens) if tier_max_tokens else None
|
|
420
|
+
catalog_strict = provider != ModelProvider.OPENROUTER
|
|
421
|
+
max_tokens_override = _enforce_max_output_tokens_cap(model_name, requested_max_tokens, strict=catalog_strict)
|
|
422
|
+
|
|
423
|
+
# reasoning_effort: env > tier_override
|
|
424
|
+
tier_reasoning: str | None
|
|
425
|
+
tier_reasoning_env = os.getenv(f"{env_prefix}_{tier_upper}_REASONING_EFFORT")
|
|
426
|
+
if tier_reasoning_env:
|
|
427
|
+
tier_reasoning = tier_reasoning_env
|
|
428
|
+
elif tier_override and tier_override.reasoning_effort is not None:
|
|
429
|
+
tier_reasoning = tier_override.reasoning_effort
|
|
430
|
+
else:
|
|
431
|
+
tier_reasoning = None
|
|
432
|
+
|
|
433
|
+
# verbosity: env > tier_override
|
|
434
|
+
tier_verbosity: str | None
|
|
435
|
+
tier_verbosity_env = os.getenv(f"{env_prefix}_{tier_upper}_VERBOSITY")
|
|
436
|
+
if tier_verbosity_env:
|
|
437
|
+
tier_verbosity = tier_verbosity_env
|
|
438
|
+
elif tier_override and tier_override.verbosity is not None:
|
|
439
|
+
tier_verbosity = tier_override.verbosity
|
|
440
|
+
else:
|
|
441
|
+
tier_verbosity = None
|
|
442
|
+
|
|
443
|
+
# thinking: env > tier_override
|
|
444
|
+
tier_thinking_type = os.getenv(f"{env_prefix}_{tier_upper}_THINKING_TYPE")
|
|
445
|
+
if tier_thinking_type:
|
|
446
|
+
tier_thinking: dict[str, str | int] | None = {
|
|
447
|
+
"type": tier_thinking_type,
|
|
448
|
+
"budget_tokens": int(os.getenv(f"{env_prefix}_{tier_upper}_THINKING_BUDGET_TOKENS", "1024")),
|
|
449
|
+
}
|
|
450
|
+
elif tier_override and tier_override.thinking_budget_tokens is not None:
|
|
451
|
+
if tier_override.thinking_budget_tokens <= 0:
|
|
452
|
+
tier_thinking = None
|
|
453
|
+
else:
|
|
454
|
+
tier_thinking = {
|
|
455
|
+
"type": "enabled",
|
|
456
|
+
"budget_tokens": tier_override.thinking_budget_tokens,
|
|
457
|
+
}
|
|
458
|
+
else:
|
|
459
|
+
tier_thinking = None
|
|
460
|
+
|
|
461
|
+
thinking_config = ThinkingConfig(**tier_thinking) if tier_thinking else None # type: ignore[arg-type]
|
|
462
|
+
|
|
463
|
+
default_hyperparams = ModelHyperparameters(
|
|
464
|
+
max_tokens=max_tokens_override,
|
|
465
|
+
reasoning_effort=tier_reasoning, # type: ignore[arg-type]
|
|
466
|
+
verbosity=tier_verbosity, # type: ignore[arg-type]
|
|
467
|
+
thinking=thinking_config,
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
# temperature: tier_override only
|
|
471
|
+
if tier_override and tier_override.temperature is not None:
|
|
472
|
+
default_hyperparams.temperature = tier_override.temperature
|
|
473
|
+
|
|
474
|
+
# top_p: provider config only
|
|
475
|
+
if provider_cfg.top_p is not None:
|
|
476
|
+
default_hyperparams.top_p = provider_cfg.top_p
|
|
477
|
+
|
|
478
|
+
return default_hyperparams
|
|
479
|
+
|
|
480
|
+
def get_default_hyperparams_for_tier(self, *, provider: str, tier: str, model_name: str) -> ModelHyperparameters:
|
|
481
|
+
"""Return the computed default hyperparameters for a provider/tier.
|
|
482
|
+
|
|
483
|
+
Used by runtime truth reporting (GET /) and any other caller that needs
|
|
484
|
+
the effective baseline hyperparameters without creating a client.
|
|
485
|
+
|
|
486
|
+
Delegates to _resolve_tier_hyperparams() — the single source of truth.
|
|
487
|
+
"""
|
|
488
|
+
if provider.lower() == "litellm":
|
|
489
|
+
provider_enum = ModelProvider.LITELLM
|
|
490
|
+
elif provider.lower() == "openrouter":
|
|
491
|
+
provider_enum = ModelProvider.OPENROUTER
|
|
492
|
+
else:
|
|
493
|
+
raise ValueError(f"Unsupported provider for default hyperparams: {provider}")
|
|
494
|
+
|
|
495
|
+
return self._resolve_tier_hyperparams(provider_enum, tier, model_name)
|
|
496
|
+
|
|
497
|
+
def get_cache_status(self) -> Dict[str, Any]:
|
|
498
|
+
"""Get current cache status for monitoring."""
|
|
499
|
+
status: Dict[str, Any] = {
|
|
500
|
+
"ttl_configuration": {
|
|
501
|
+
"default": self._default_ttl,
|
|
502
|
+
"litellm": self._litellm_ttl,
|
|
503
|
+
},
|
|
504
|
+
"cached_models": {},
|
|
505
|
+
}
|
|
506
|
+
current_time = time.monotonic()
|
|
507
|
+
|
|
508
|
+
for cache_key, (_, fetch_time, provider) in self._cache.items():
|
|
509
|
+
model_name, tier = cache_key
|
|
510
|
+
ttl = self._get_ttl_for_provider(provider)
|
|
511
|
+
age = current_time - fetch_time
|
|
512
|
+
remaining_ttl = max(0, ttl - age)
|
|
513
|
+
|
|
514
|
+
# Use "model_name:tier" as display key for readability
|
|
515
|
+
display_key = f"{model_name}:{tier}"
|
|
516
|
+
status["cached_models"][display_key] = {
|
|
517
|
+
"model": model_name,
|
|
518
|
+
"tier": tier,
|
|
519
|
+
"provider": provider.value,
|
|
520
|
+
"age_seconds": round(age, 1),
|
|
521
|
+
"remaining_ttl_seconds": round(remaining_ttl, 1),
|
|
522
|
+
"ttl_seconds": ttl,
|
|
523
|
+
"expired": age >= ttl,
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
return status
|
|
527
|
+
|
|
528
|
+
def clear_cache(self):
|
|
529
|
+
"""Clear all cached credentials."""
|
|
530
|
+
logger.info("Clearing all cached credentials")
|
|
531
|
+
self._cache.clear()
|