multi-forge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. forge/__init__.py +3 -0
  2. forge/_extensions/agents/.gitkeep +0 -0
  3. forge/_extensions/commands/.gitkeep +0 -0
  4. forge/_extensions/skills/analyze/SKILL.md +87 -0
  5. forge/_extensions/skills/challenge/SKILL.md +91 -0
  6. forge/_extensions/skills/consensus/SKILL.md +120 -0
  7. forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
  8. forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
  9. forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
  10. forge/_extensions/skills/debate/SKILL.md +116 -0
  11. forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
  12. forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
  13. forge/_extensions/skills/panel/SKILL.md +141 -0
  14. forge/_extensions/skills/panel/resources/synthesis.md +103 -0
  15. forge/_extensions/skills/qa/SKILL.md +704 -0
  16. forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
  17. forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
  18. forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
  19. forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
  20. forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
  21. forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
  22. forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
  23. forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
  24. forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
  25. forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
  26. forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
  27. forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
  28. forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
  29. forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
  30. forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
  31. forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
  32. forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
  33. forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
  34. forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
  35. forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
  36. forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
  37. forge/_extensions/skills/qa/resources/checklist.md +103 -0
  38. forge/_extensions/skills/qa/resources/report-template.md +62 -0
  39. forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
  40. forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
  41. forge/_extensions/skills/review/SKILL.md +125 -0
  42. forge/_extensions/skills/review/references/claude-4.6.md +474 -0
  43. forge/_extensions/skills/review/references/claude-4.7.md +710 -0
  44. forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
  45. forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
  46. forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
  47. forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
  48. forge/_extensions/skills/review/resources/code-gemini.md +184 -0
  49. forge/_extensions/skills/review/resources/code-openai.md +203 -0
  50. forge/_extensions/skills/review/resources/code.md +160 -0
  51. forge/_extensions/skills/review-docs/SKILL.md +121 -0
  52. forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
  53. forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
  54. forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
  55. forge/_extensions/skills/review-docs/resources/docs.md +170 -0
  56. forge/_extensions/skills/smoke-test/SKILL.md +27 -0
  57. forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
  58. forge/_extensions/skills/understand/SKILL.md +148 -0
  59. forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
  60. forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
  61. forge/_extensions/skills/understand/resources/code-openai.md +181 -0
  62. forge/_extensions/skills/understand/resources/code.md +163 -0
  63. forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
  64. forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
  65. forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
  66. forge/_extensions/skills/understand/resources/docs.md +177 -0
  67. forge/_extensions/skills/walkthrough/SKILL.md +599 -0
  68. forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
  69. forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
  70. forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
  71. forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
  72. forge/backend/__init__.py +174 -0
  73. forge/backend/adapters/__init__.py +38 -0
  74. forge/backend/adapters/litellm.py +158 -0
  75. forge/backend/creation.py +89 -0
  76. forge/backend/registry.py +178 -0
  77. forge/cli/__init__.py +16 -0
  78. forge/cli/auth.py +483 -0
  79. forge/cli/backend.py +298 -0
  80. forge/cli/claude.py +411 -0
  81. forge/cli/config_cmd.py +303 -0
  82. forge/cli/extensions.py +1001 -0
  83. forge/cli/gc.py +165 -0
  84. forge/cli/guard.py +1018 -0
  85. forge/cli/guards.py +106 -0
  86. forge/cli/handoff.py +110 -0
  87. forge/cli/hooks/__init__.py +36 -0
  88. forge/cli/hooks/_group.py +20 -0
  89. forge/cli/hooks/_helpers.py +149 -0
  90. forge/cli/hooks/commands.py +1677 -0
  91. forge/cli/hooks/direct_commands.py +1304 -0
  92. forge/cli/hooks/install.py +232 -0
  93. forge/cli/hooks/policy.py +151 -0
  94. forge/cli/hooks/read_hygiene.py +74 -0
  95. forge/cli/hooks/verification.py +370 -0
  96. forge/cli/logs.py +406 -0
  97. forge/cli/main.py +292 -0
  98. forge/cli/proxy.py +1821 -0
  99. forge/cli/proxy_costs.py +313 -0
  100. forge/cli/search.py +416 -0
  101. forge/cli/session.py +892 -0
  102. forge/cli/session_addendum.py +81 -0
  103. forge/cli/session_fork.py +750 -0
  104. forge/cli/session_handoff.py +141 -0
  105. forge/cli/session_lifecycle.py +2053 -0
  106. forge/cli/session_manage.py +1336 -0
  107. forge/cli/session_memory.py +201 -0
  108. forge/cli/status_line.py +1398 -0
  109. forge/cli/workflow.py +1964 -0
  110. forge/config/__init__.py +110 -0
  111. forge/config/dataclass_utils.py +88 -0
  112. forge/config/defaults/__init__.py +0 -0
  113. forge/config/defaults/backends/__init__.py +0 -0
  114. forge/config/defaults/backends/litellm.yaml +196 -0
  115. forge/config/defaults/templates/__init__.py +0 -0
  116. forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
  117. forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
  118. forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
  119. forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
  120. forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
  121. forge/config/defaults/templates/litellm-gemini.yaml +21 -0
  122. forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
  123. forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
  124. forge/config/defaults/templates/litellm-openai.yaml +28 -0
  125. forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
  126. forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
  127. forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
  128. forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
  129. forge/config/defaults/templates/openrouter-glm.yaml +23 -0
  130. forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
  131. forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
  132. forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
  133. forge/config/defaults/templates/openrouter-openai.yaml +28 -0
  134. forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
  135. forge/config/loader.py +675 -0
  136. forge/config/schema.py +448 -0
  137. forge/core/__init__.py +5 -0
  138. forge/core/auth/__init__.py +67 -0
  139. forge/core/auth/capabilities.py +219 -0
  140. forge/core/auth/credentials_file.py +244 -0
  141. forge/core/auth/protocols.py +18 -0
  142. forge/core/auth/secrets.py +243 -0
  143. forge/core/auth/template_secrets.py +112 -0
  144. forge/core/data/__init__.py +5 -0
  145. forge/core/data/model_catalog.yaml +1522 -0
  146. forge/core/data/pricing.yaml +140 -0
  147. forge/core/data/system_prompt_addendums/__init__.py +0 -0
  148. forge/core/data/system_prompt_addendums/gemini.md +330 -0
  149. forge/core/data/system_prompt_addendums/openai.md +328 -0
  150. forge/core/llm/__init__.py +231 -0
  151. forge/core/llm/clients/__init__.py +14 -0
  152. forge/core/llm/clients/base.py +115 -0
  153. forge/core/llm/clients/litellm.py +619 -0
  154. forge/core/llm/clients/openai_compat.py +244 -0
  155. forge/core/llm/clients/openrouter.py +234 -0
  156. forge/core/llm/credentials.py +439 -0
  157. forge/core/llm/detection.py +86 -0
  158. forge/core/llm/errors.py +44 -0
  159. forge/core/llm/protocols.py +80 -0
  160. forge/core/llm/types.py +176 -0
  161. forge/core/logging.py +146 -0
  162. forge/core/models/__init__.py +91 -0
  163. forge/core/models/catalog.py +467 -0
  164. forge/core/models/pricing.py +165 -0
  165. forge/core/models/types.py +167 -0
  166. forge/core/naming.py +212 -0
  167. forge/core/ops/__init__.py +73 -0
  168. forge/core/ops/context.py +141 -0
  169. forge/core/ops/gc.py +802 -0
  170. forge/core/ops/proxy.py +146 -0
  171. forge/core/ops/resolution.py +135 -0
  172. forge/core/ops/session.py +344 -0
  173. forge/core/ops/session_context.py +548 -0
  174. forge/core/paths.py +38 -0
  175. forge/core/process.py +54 -0
  176. forge/core/reactive/__init__.py +38 -0
  177. forge/core/reactive/cost_tracking.py +300 -0
  178. forge/core/reactive/env.py +180 -0
  179. forge/core/reactive/proxy.py +78 -0
  180. forge/core/reactive/routing.py +622 -0
  181. forge/core/reactive/session_runner.py +185 -0
  182. forge/core/reactive/structured_output.py +62 -0
  183. forge/core/reactive/tagger.py +94 -0
  184. forge/core/reactive/throttle.py +132 -0
  185. forge/core/state/__init__.py +59 -0
  186. forge/core/state/exceptions.py +59 -0
  187. forge/core/state/io.py +140 -0
  188. forge/core/state/lock.py +99 -0
  189. forge/core/state/timestamps.py +60 -0
  190. forge/core/transcript.py +78 -0
  191. forge/core/typing_helpers.py +24 -0
  192. forge/core/workqueue/__init__.py +67 -0
  193. forge/core/workqueue/queue.py +552 -0
  194. forge/core/workqueue/types.py +63 -0
  195. forge/guard/__init__.py +26 -0
  196. forge/guard/deterministic/__init__.py +26 -0
  197. forge/guard/deterministic/base.py +158 -0
  198. forge/guard/deterministic/coding_standards.py +256 -0
  199. forge/guard/deterministic/registry.py +148 -0
  200. forge/guard/deterministic/tdd.py +171 -0
  201. forge/guard/engine.py +216 -0
  202. forge/guard/protocols.py +91 -0
  203. forge/guard/queries.py +96 -0
  204. forge/guard/semantic/__init__.py +34 -0
  205. forge/guard/semantic/promotion.py +18 -0
  206. forge/guard/semantic/supervisor.py +813 -0
  207. forge/guard/semantic/verdict.py +183 -0
  208. forge/guard/store.py +124 -0
  209. forge/guard/team/__init__.py +6 -0
  210. forge/guard/team/config.py +24 -0
  211. forge/guard/team/handlers.py +209 -0
  212. forge/guard/team/prompts.py +41 -0
  213. forge/guard/types.py +125 -0
  214. forge/guard/workflow/__init__.py +17 -0
  215. forge/guard/workflow/branches.py +67 -0
  216. forge/guard/workflow/config.py +63 -0
  217. forge/guard/workflow/divergence.py +113 -0
  218. forge/guard/workflow/policy.py +87 -0
  219. forge/guard/workflow/stages.py +205 -0
  220. forge/install/__init__.py +55 -0
  221. forge/install/cli.py +281 -0
  222. forge/install/exceptions.py +163 -0
  223. forge/install/hooks.py +109 -0
  224. forge/install/installer.py +1037 -0
  225. forge/install/models.py +321 -0
  226. forge/install/preset.py +272 -0
  227. forge/install/settings_merge.py +831 -0
  228. forge/install/tracking.py +238 -0
  229. forge/install/version.py +141 -0
  230. forge/proxy/__init__.py +0 -0
  231. forge/proxy/base_client.py +181 -0
  232. forge/proxy/client_adapter.py +476 -0
  233. forge/proxy/client_factory.py +531 -0
  234. forge/proxy/converters.py +1206 -0
  235. forge/proxy/cost_logger.py +132 -0
  236. forge/proxy/cost_tracker.py +242 -0
  237. forge/proxy/data_models.py +338 -0
  238. forge/proxy/error_hints.py +92 -0
  239. forge/proxy/metrics.py +222 -0
  240. forge/proxy/model_spec.py +158 -0
  241. forge/proxy/proxies.py +333 -0
  242. forge/proxy/proxy_identity.py +134 -0
  243. forge/proxy/proxy_orchestrator.py +1018 -0
  244. forge/proxy/proxy_startup.py +54 -0
  245. forge/proxy/server.py +1561 -0
  246. forge/proxy/utils.py +537 -0
  247. forge/review/__init__.py +6 -0
  248. forge/review/adversarial.py +111 -0
  249. forge/review/consensus.py +236 -0
  250. forge/review/engine.py +356 -0
  251. forge/review/models.py +437 -0
  252. forge/review/resources/__init__.py +5 -0
  253. forge/review/resources/codereview-performance.md +85 -0
  254. forge/review/resources/codereview-quick.md +75 -0
  255. forge/review/resources/codereview-security.md +92 -0
  256. forge/review/resources/codereview.md +85 -0
  257. forge/review/resources/docreview-quick.md +75 -0
  258. forge/review/resources/docreview.md +86 -0
  259. forge/review/resources/thinkdeep.md +89 -0
  260. forge/review/routing.py +368 -0
  261. forge/review/synthesis.py +73 -0
  262. forge/runtime_config.py +438 -0
  263. forge/search/__init__.py +55 -0
  264. forge/search/bm25_store.py +264 -0
  265. forge/search/content_store.py +197 -0
  266. forge/search/engine.py +352 -0
  267. forge/search/exceptions.py +51 -0
  268. forge/search/extractor.py +234 -0
  269. forge/search/index_state.py +295 -0
  270. forge/search/store.py +215 -0
  271. forge/search/tokenizer.py +24 -0
  272. forge/session/__init__.py +130 -0
  273. forge/session/active.py +339 -0
  274. forge/session/artifacts.py +202 -0
  275. forge/session/claude/__init__.py +50 -0
  276. forge/session/claude/cleanup.py +105 -0
  277. forge/session/claude/invoke.py +236 -0
  278. forge/session/claude/paths.py +200 -0
  279. forge/session/cleanup.py +216 -0
  280. forge/session/config.py +34 -0
  281. forge/session/direct_model.py +107 -0
  282. forge/session/effective.py +169 -0
  283. forge/session/exceptions.py +255 -0
  284. forge/session/handoff.py +881 -0
  285. forge/session/handoff_agent.py +544 -0
  286. forge/session/hooks/__init__.py +35 -0
  287. forge/session/hooks/models.py +73 -0
  288. forge/session/hooks/session_start.py +507 -0
  289. forge/session/identity.py +84 -0
  290. forge/session/index.py +553 -0
  291. forge/session/manager.py +1506 -0
  292. forge/session/models.py +572 -0
  293. forge/session/overrides.py +344 -0
  294. forge/session/plan_resolution.py +286 -0
  295. forge/session/prev_sessions.py +128 -0
  296. forge/session/store.py +431 -0
  297. forge/session/validation.py +47 -0
  298. forge/session/worktree/__init__.py +65 -0
  299. forge/session/worktree/cleanup.py +262 -0
  300. forge/session/worktree/config_copy.py +203 -0
  301. forge/session/worktree/create.py +332 -0
  302. forge/sidecar/__init__.py +29 -0
  303. forge/sidecar/container.py +161 -0
  304. forge/sidecar/docker.py +86 -0
  305. forge/sidecar/secrets.py +19 -0
  306. multi_forge-0.2.0.dist-info/METADATA +242 -0
  307. multi_forge-0.2.0.dist-info/RECORD +311 -0
  308. multi_forge-0.2.0.dist-info/WHEEL +4 -0
  309. multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
  310. multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
  311. multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,531 @@
1
+ """Tier-aware client factory for proxy model routing.
2
+
3
+ Creates and caches LLM client instances keyed by (model_name, tier),
4
+ with resolved hyperparameters from env vars, tier overrides, and provider config.
5
+ Actual credential fetching is delegated to core.llm.CredentialManager.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import os
11
+ import time
12
+ from enum import Enum
13
+ from threading import Lock
14
+ from typing import Any, Dict, Optional
15
+ from urllib.parse import urlparse
16
+
17
+ from forge.config import config
18
+ from forge.core.llm.types import ModelHyperparameters
19
+ from forge.core.models import (
20
+ ModelCatalogError,
21
+ get_max_output_tokens,
22
+ model_exists,
23
+ )
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ DEFAULT_MAX_OUTPUT_TOKENS = 16384
29
+
30
+ _LOCAL_HOSTS = ("localhost", "127.0.0.1", "0.0.0.0", "::1")
31
+
32
+
33
+ def _is_local_url(url: str) -> bool:
34
+ """Check if a URL points to a local host."""
35
+ try:
36
+ parsed = urlparse(url)
37
+ return (parsed.hostname or "") in _LOCAL_HOSTS
38
+ except Exception:
39
+ return False
40
+
41
+
42
+ def _enforce_max_output_tokens_cap(model_name: str, requested: int | None, *, strict: bool = True) -> int:
43
+ """Enforce the catalog's max_output_tokens as a hard cap.
44
+
45
+ The model catalog defines the maximum output tokens each model can produce.
46
+ This function ensures requested values don't exceed that ceiling.
47
+
48
+ Args:
49
+ model_name: Model ID (canonical or alias).
50
+ requested: Requested max_tokens from config/env/request (or None for default).
51
+ strict: If True (default), raise on unknown models. If False, return
52
+ requested or a safe default for models not in the catalog
53
+ (used by OpenRouter where the model space is open).
54
+
55
+ Returns:
56
+ Effective max_tokens, capped to catalog limit.
57
+
58
+ Raises:
59
+ ModelCatalogError: If model is unknown (strict mode) or requested exceeds catalog cap.
60
+ """
61
+ if not model_exists(model_name):
62
+ if strict:
63
+ raise ModelCatalogError(f"Model {model_name!r} not in catalog. Add it to core/data/model_catalog.yaml.")
64
+ logger.debug(f"Model {model_name!r} not in catalog, using default max_output_tokens")
65
+ return requested if requested is not None else DEFAULT_MAX_OUTPUT_TOKENS
66
+
67
+ catalog_cap = get_max_output_tokens(model_name)
68
+
69
+ if requested is None:
70
+ return catalog_cap
71
+
72
+ if requested > catalog_cap:
73
+ raise ModelCatalogError(
74
+ f"Requested max_tokens ({requested}) exceeds model {model_name!r} catalog cap ({catalog_cap}). "
75
+ f"Update catalog or reduce max_tokens override."
76
+ )
77
+
78
+ return requested
79
+
80
+
81
+ class ModelProvider(Enum):
82
+ """Supported model providers."""
83
+
84
+ LITELLM = "litellm"
85
+ OPENROUTER = "openrouter"
86
+ UNKNOWN = "unknown"
87
+
88
+
89
+ class TierClientFactory:
90
+ """Tier-aware client factory for proxy model routing.
91
+
92
+ Creates and caches LLM client instances keyed by (model_name, tier)
93
+ with resolved hyperparameters. Delegates credential fetching to
94
+ core.llm.CredentialManager via CoreLLMClientAdapter.
95
+
96
+ Features:
97
+ - Automatic model type detection
98
+ - Tier-specific hyperparameter resolution (env > tier_override > config)
99
+ - Unified caching with configurable TTL
100
+ - Retry on authentication failure
101
+ - Thread-safe client management
102
+ """
103
+
104
+ _instance: "TierClientFactory | None" = None
105
+ _lock = Lock()
106
+ _initialized: bool = False
107
+
108
+ def __new__(cls):
109
+ """Singleton pattern to ensure only one manager exists."""
110
+ with cls._lock:
111
+ if cls._instance is None:
112
+ cls._instance = super(TierClientFactory, cls).__new__(cls)
113
+ cls._instance._initialized = False
114
+ return cls._instance
115
+
116
+ def __init__(self, default_ttl: Optional[float] = 3600):
117
+ """Initialize the tier client factory."""
118
+ if self._initialized:
119
+ return
120
+
121
+ # Tier is included in key to support same model with different hyperparameters
122
+ self._cache: Dict[tuple[str, str], tuple[Any, float, ModelProvider]] = {}
123
+
124
+ self._default_ttl = float(os.getenv("CREDENTIAL_CACHE_TTL", str(default_ttl))) # 1 hour default
125
+ self._litellm_ttl = float(os.getenv("LITELLM_CACHE_TTL", str(self._default_ttl)))
126
+ self._upstream_base_url_cache: tuple[str, str | None] | None = None
127
+
128
+ self._refresh_lock = asyncio.Lock()
129
+ self._initialized = True
130
+
131
+ # Lazy imports to avoid circular dependencies
132
+ self._client_classes: Dict[ModelProvider, type] = {}
133
+
134
+ ttl_config = []
135
+ if os.getenv("CREDENTIAL_CACHE_TTL"):
136
+ ttl_config.append(f"Default: {self._default_ttl}s")
137
+ if os.getenv("LITELLM_CACHE_TTL"):
138
+ ttl_config.append(f"LiteLLM (custom): {self._litellm_ttl}s")
139
+ else:
140
+ ttl_config.append(f"LiteLLM: {self._litellm_ttl}s (using default)")
141
+
142
+ logger.info(f"TierClientFactory initialized - TTL configuration: {', '.join(ttl_config)}")
143
+
144
+ def _detect_provider(self, model_name: str) -> ModelProvider:
145
+ """Detect the model provider from the model name or PREFERRED_PROVIDER.
146
+
147
+ PREFERRED_PROVIDER (set by the proxy server from the template) takes
148
+ precedence over model-name prefix detection. This prevents OpenRouter
149
+ model IDs like ``anthropic/claude-sonnet-4.6`` from being misrouted
150
+ to LiteLLM via the ``anthropic/`` prefix match.
151
+
152
+ Args:
153
+ model_name: The model identifier
154
+
155
+ Returns:
156
+ ModelProvider enum indicating the provider
157
+ """
158
+ preferred = os.getenv("PREFERRED_PROVIDER", "")
159
+ if preferred == "openrouter":
160
+ return ModelProvider.OPENROUTER
161
+
162
+ model_family = os.getenv("MODEL_FAMILY", "").upper()
163
+ if model_family == "OPENROUTER":
164
+ return ModelProvider.OPENROUTER
165
+
166
+ clean_name = model_name.lower()
167
+
168
+ if "/" in clean_name and any(
169
+ clean_name.startswith(prefix)
170
+ for prefix in [
171
+ "openai/",
172
+ "anthropic/",
173
+ "vertex_ai/",
174
+ "bedrock/",
175
+ "replicate/",
176
+ "together_ai/",
177
+ "gemini/",
178
+ ]
179
+ ):
180
+ return ModelProvider.LITELLM
181
+
182
+ if model_family == "LITELLM":
183
+ return ModelProvider.LITELLM
184
+
185
+ logger.warning(f"Unknown model provider for model: {model_name}, defaulting to LiteLLM")
186
+ return ModelProvider.LITELLM
187
+
188
+ def _get_upstream_base_url(self) -> str | None:
189
+ """Get the proxy's upstream base URL from the instance config.
190
+
191
+ Reads the proxy.yaml for the current proxy instance to determine
192
+ whether the upstream is local or remote.
193
+ """
194
+ proxy_id = os.getenv("FORGE_PROXY_ID")
195
+ if not proxy_id:
196
+ return None
197
+ if self._upstream_base_url_cache and self._upstream_base_url_cache[0] == proxy_id:
198
+ return self._upstream_base_url_cache[1]
199
+ try:
200
+ from forge.config.loader import load_proxy_instance_config
201
+
202
+ instance = load_proxy_instance_config(proxy_id)
203
+ upstream = instance.upstream_base_url if instance else None
204
+ if upstream:
205
+ self._upstream_base_url_cache = (proxy_id, upstream)
206
+ return upstream
207
+ except Exception:
208
+ logger.debug("Failed to resolve upstream base URL for proxy %s", proxy_id, exc_info=True)
209
+ return None
210
+
211
+ def _get_ttl_for_provider(self, provider: ModelProvider) -> float:
212
+ """Get the TTL for a specific provider."""
213
+ if provider == ModelProvider.LITELLM:
214
+ return self._litellm_ttl
215
+ return self._default_ttl
216
+
217
+ def _get_tier_for_model(self, model_name: str, provider: ModelProvider) -> Optional[str]:
218
+ """Detect which tier (haiku/sonnet/opus) a model belongs to.
219
+
220
+ Args:
221
+ model_name: The model identifier (e.g., "openai/gpt-4o-mini")
222
+ provider: The provider type
223
+
224
+ Returns:
225
+ Tier name (haiku/sonnet/opus) or None if not found
226
+ """
227
+ prefix_map = {
228
+ ModelProvider.LITELLM: "LITELLM",
229
+ ModelProvider.OPENROUTER: "OPENROUTER",
230
+ }
231
+ prefix = prefix_map.get(provider)
232
+ if not prefix:
233
+ return None
234
+
235
+ for tier in ["haiku", "sonnet", "opus"]:
236
+ tier_model = os.getenv(f"{prefix}_{tier.upper()}_MODEL")
237
+ if tier_model and tier_model.lower() == model_name.lower():
238
+ return tier
239
+
240
+ return None
241
+
242
+ def _import_client_class(self, provider: ModelProvider):
243
+ """Lazy import client classes to avoid circular dependencies."""
244
+ if provider not in self._client_classes:
245
+ if provider in (ModelProvider.LITELLM, ModelProvider.OPENROUTER):
246
+ from forge.proxy.client_adapter import CoreLLMClientAdapter
247
+
248
+ self._client_classes[provider] = CoreLLMClientAdapter
249
+
250
+ def detect_provider_for_model(self, model_name: str) -> ModelProvider:
251
+ """
252
+ Public method to detect provider for a given model name.
253
+
254
+ This allows the server to determine the provider before converting
255
+ requests, enabling provider-specific schema handling.
256
+
257
+ Args:
258
+ model_name: The model identifier
259
+
260
+ Returns:
261
+ ModelProvider enum indicating the provider
262
+ """
263
+ return self._detect_provider(model_name)
264
+
265
+ async def get_client(
266
+ self, model_name: str, tier: Optional[str] = None
267
+ ) -> Any: # Returns AbstractLLMClient instances
268
+ """
269
+ Get client for the specified model.
270
+
271
+ Automatically detects model type and returns appropriate LiteLLM client.
272
+
273
+ Args:
274
+ model_name: The model identifier
275
+ tier: The tier name (haiku/sonnet/opus) for tier-specific hyperparameters.
276
+ If not provided, attempts to auto-detect from model name.
277
+
278
+ Returns:
279
+ Client instance for the appropriate provider
280
+
281
+ Raises:
282
+ AuthenticationError: If credentials cannot be obtained
283
+ """
284
+ provider = self._detect_provider(model_name)
285
+ ttl = self._get_ttl_for_provider(provider)
286
+
287
+ # Auto-detect tier as a fallback for backwards compatibility
288
+ if tier is None:
289
+ tier = self._get_tier_for_model(model_name, provider) or "sonnet"
290
+ logger.debug(f"Auto-detected tier '{tier}' for model {model_name}")
291
+
292
+ # Cache key includes tier to support same model with different hyperparameters
293
+ cache_key = (model_name, tier)
294
+
295
+ if cache_key in self._cache:
296
+ cached_data, fetch_time, cached_provider = self._cache[cache_key]
297
+ age = time.monotonic() - fetch_time
298
+
299
+ if age < ttl and cached_provider == provider:
300
+ logger.debug(f"Using cached client for {model_name} (tier={tier}, {provider.value}, age: {age:.0f}s)")
301
+ return cached_data
302
+ else:
303
+ logger.info(
304
+ f"Cache expired or provider changed for {model_name} (tier={tier}, age: {age:.0f}s, ttl: {ttl}s)"
305
+ )
306
+
307
+ async with self._refresh_lock:
308
+ # Double-check after acquiring lock (use cache_key which includes tier)
309
+ if cache_key in self._cache:
310
+ cached_data, fetch_time, cached_provider = self._cache[cache_key]
311
+ if time.monotonic() - fetch_time < ttl and cached_provider == provider:
312
+ return cached_data
313
+
314
+ if provider not in (ModelProvider.LITELLM, ModelProvider.OPENROUTER):
315
+ raise ValueError(f"Unsupported provider: {provider}")
316
+
317
+ self._import_client_class(provider)
318
+
319
+ # Resolve hyperparameters via the single source of truth
320
+ default_hyperparams = self._resolve_tier_hyperparams(provider, tier, model_name)
321
+
322
+ if provider == ModelProvider.OPENROUTER:
323
+ core_provider = "openrouter"
324
+ else:
325
+ from forge.core.llm.detection import (
326
+ detect_provider as core_detect_provider,
327
+ )
328
+
329
+ core_provider = core_detect_provider(model_name)
330
+
331
+ # Override to litellm_local when upstream is localhost.
332
+ # detect_provider uses model prefix (openai/ -> litellm_remote),
333
+ # but local templates route through a local LiteLLM that needs
334
+ # no API key. The proxy instance config's upstream_base_url is
335
+ # the authoritative source for local vs remote.
336
+ if core_provider == "litellm_remote":
337
+ upstream = self._get_upstream_base_url()
338
+ if upstream and _is_local_url(upstream):
339
+ core_provider = "litellm_local"
340
+
341
+ client = self._client_classes[provider](
342
+ model=model_name,
343
+ provider=core_provider,
344
+ max_tokens_override=default_hyperparams.max_tokens,
345
+ tier=tier,
346
+ default_hyperparams=default_hyperparams,
347
+ )
348
+
349
+ self._cache[cache_key] = (client, time.monotonic(), provider)
350
+ logger.info(f"Cached new {provider.value} client (core.llm) for {model_name} (tier={tier})")
351
+
352
+ return client
353
+
354
+ async def invalidate_and_retry(
355
+ self, model_name: str, tier: Optional[str] = None
356
+ ) -> Any: # Returns AbstractLLMClient instances
357
+ """
358
+ Invalidate cached credentials and fetch new ones.
359
+
360
+ Called when authentication fails, indicating credentials may be expired.
361
+
362
+ Args:
363
+ model_name: The model whose credentials should be refreshed
364
+ tier: The tier name (haiku/sonnet/opus). If None, invalidates all tiers for this model.
365
+
366
+ Returns:
367
+ Fresh credentials or client
368
+ """
369
+ logger.warning(f"Invalidating cached credentials for {model_name} (tier={tier}) due to auth failure")
370
+
371
+ async with self._refresh_lock:
372
+ # Remove from cache - handle both specific tier and all tiers
373
+ if tier is not None:
374
+ cache_key = (model_name, tier)
375
+ if cache_key in self._cache:
376
+ del self._cache[cache_key]
377
+ else:
378
+ keys_to_remove = [k for k in self._cache if k[0] == model_name]
379
+ for key in keys_to_remove:
380
+ del self._cache[key]
381
+
382
+ return await self.get_client(model_name, tier=tier)
383
+
384
+ def _resolve_tier_hyperparams(
385
+ self,
386
+ provider: ModelProvider,
387
+ tier: str,
388
+ model_name: str,
389
+ ) -> ModelHyperparameters:
390
+ """Single source of truth for tier-specific hyperparameters.
391
+
392
+ Used by both get_client() (actual client creation) and
393
+ get_default_hyperparams_for_tier() (runtime truth reporting).
394
+
395
+ Priority chain per field:
396
+ - max_tokens: env ({PREFIX}_{TIER}_MAX_TOKENS) > provider config (tokens.override), capped by catalog
397
+ - reasoning/verbosity/thinking: env > tier_override > provider config
398
+ - temperature: tier_override > provider config override > provider config default
399
+ - top_p: provider config only
400
+
401
+ Fields left as None fall through to core.llm's own defaults.
402
+ """
403
+ from forge.core.llm.types import ThinkingConfig
404
+
405
+ if provider == ModelProvider.LITELLM:
406
+ provider_cfg = config.proxy.litellm
407
+ env_prefix = "LITELLM"
408
+ elif provider == ModelProvider.OPENROUTER:
409
+ provider_cfg = config.proxy.openrouter
410
+ env_prefix = "OPENROUTER"
411
+ else:
412
+ raise ValueError(f"Unsupported provider: {provider}")
413
+
414
+ tier_upper = tier.upper()
415
+ tier_override = provider_cfg.tier_overrides.get(tier)
416
+
417
+ # max_tokens: env > catalog cap (lenient for OpenRouter's open model space)
418
+ tier_max_tokens = os.getenv(f"{env_prefix}_{tier_upper}_MAX_TOKENS")
419
+ requested_max_tokens = int(tier_max_tokens) if tier_max_tokens else None
420
+ catalog_strict = provider != ModelProvider.OPENROUTER
421
+ max_tokens_override = _enforce_max_output_tokens_cap(model_name, requested_max_tokens, strict=catalog_strict)
422
+
423
+ # reasoning_effort: env > tier_override
424
+ tier_reasoning: str | None
425
+ tier_reasoning_env = os.getenv(f"{env_prefix}_{tier_upper}_REASONING_EFFORT")
426
+ if tier_reasoning_env:
427
+ tier_reasoning = tier_reasoning_env
428
+ elif tier_override and tier_override.reasoning_effort is not None:
429
+ tier_reasoning = tier_override.reasoning_effort
430
+ else:
431
+ tier_reasoning = None
432
+
433
+ # verbosity: env > tier_override
434
+ tier_verbosity: str | None
435
+ tier_verbosity_env = os.getenv(f"{env_prefix}_{tier_upper}_VERBOSITY")
436
+ if tier_verbosity_env:
437
+ tier_verbosity = tier_verbosity_env
438
+ elif tier_override and tier_override.verbosity is not None:
439
+ tier_verbosity = tier_override.verbosity
440
+ else:
441
+ tier_verbosity = None
442
+
443
+ # thinking: env > tier_override
444
+ tier_thinking_type = os.getenv(f"{env_prefix}_{tier_upper}_THINKING_TYPE")
445
+ if tier_thinking_type:
446
+ tier_thinking: dict[str, str | int] | None = {
447
+ "type": tier_thinking_type,
448
+ "budget_tokens": int(os.getenv(f"{env_prefix}_{tier_upper}_THINKING_BUDGET_TOKENS", "1024")),
449
+ }
450
+ elif tier_override and tier_override.thinking_budget_tokens is not None:
451
+ if tier_override.thinking_budget_tokens <= 0:
452
+ tier_thinking = None
453
+ else:
454
+ tier_thinking = {
455
+ "type": "enabled",
456
+ "budget_tokens": tier_override.thinking_budget_tokens,
457
+ }
458
+ else:
459
+ tier_thinking = None
460
+
461
+ thinking_config = ThinkingConfig(**tier_thinking) if tier_thinking else None # type: ignore[arg-type]
462
+
463
+ default_hyperparams = ModelHyperparameters(
464
+ max_tokens=max_tokens_override,
465
+ reasoning_effort=tier_reasoning, # type: ignore[arg-type]
466
+ verbosity=tier_verbosity, # type: ignore[arg-type]
467
+ thinking=thinking_config,
468
+ )
469
+
470
+ # temperature: tier_override only
471
+ if tier_override and tier_override.temperature is not None:
472
+ default_hyperparams.temperature = tier_override.temperature
473
+
474
+ # top_p: provider config only
475
+ if provider_cfg.top_p is not None:
476
+ default_hyperparams.top_p = provider_cfg.top_p
477
+
478
+ return default_hyperparams
479
+
480
+ def get_default_hyperparams_for_tier(self, *, provider: str, tier: str, model_name: str) -> ModelHyperparameters:
481
+ """Return the computed default hyperparameters for a provider/tier.
482
+
483
+ Used by runtime truth reporting (GET /) and any other caller that needs
484
+ the effective baseline hyperparameters without creating a client.
485
+
486
+ Delegates to _resolve_tier_hyperparams() — the single source of truth.
487
+ """
488
+ if provider.lower() == "litellm":
489
+ provider_enum = ModelProvider.LITELLM
490
+ elif provider.lower() == "openrouter":
491
+ provider_enum = ModelProvider.OPENROUTER
492
+ else:
493
+ raise ValueError(f"Unsupported provider for default hyperparams: {provider}")
494
+
495
+ return self._resolve_tier_hyperparams(provider_enum, tier, model_name)
496
+
497
+ def get_cache_status(self) -> Dict[str, Any]:
498
+ """Get current cache status for monitoring."""
499
+ status: Dict[str, Any] = {
500
+ "ttl_configuration": {
501
+ "default": self._default_ttl,
502
+ "litellm": self._litellm_ttl,
503
+ },
504
+ "cached_models": {},
505
+ }
506
+ current_time = time.monotonic()
507
+
508
+ for cache_key, (_, fetch_time, provider) in self._cache.items():
509
+ model_name, tier = cache_key
510
+ ttl = self._get_ttl_for_provider(provider)
511
+ age = current_time - fetch_time
512
+ remaining_ttl = max(0, ttl - age)
513
+
514
+ # Use "model_name:tier" as display key for readability
515
+ display_key = f"{model_name}:{tier}"
516
+ status["cached_models"][display_key] = {
517
+ "model": model_name,
518
+ "tier": tier,
519
+ "provider": provider.value,
520
+ "age_seconds": round(age, 1),
521
+ "remaining_ttl_seconds": round(remaining_ttl, 1),
522
+ "ttl_seconds": ttl,
523
+ "expired": age >= ttl,
524
+ }
525
+
526
+ return status
527
+
528
+ def clear_cache(self):
529
+ """Clear all cached credentials."""
530
+ logger.info("Clearing all cached credentials")
531
+ self._cache.clear()