multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,1018 @@
|
|
|
1
|
+
"""Proxy orchestration.
|
|
2
|
+
|
|
3
|
+
This module implements the **active** proxy start workflow:
|
|
4
|
+
|
|
5
|
+
- Reuse an existing healthy proxy from the proxy registry.
|
|
6
|
+
- Adopt a healthy unregistered proxy at the template's default port if it is not registered.
|
|
7
|
+
- Otherwise spawn a new proxy process and wait for it to become healthy.
|
|
8
|
+
|
|
9
|
+
The proxy registry persistence layer lives in `forge.proxy.proxies`.
|
|
10
|
+
|
|
11
|
+
NOTE: Proxy start is intentionally implemented as a synchronous, CLI-friendly
|
|
12
|
+
workflow (blocking + polling). The proxy server itself remains async.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
import shutil
|
|
20
|
+
import socket
|
|
21
|
+
import subprocess
|
|
22
|
+
import sys
|
|
23
|
+
import time
|
|
24
|
+
from dataclasses import asdict, dataclass
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
import httpx
|
|
29
|
+
from rich.console import Console
|
|
30
|
+
|
|
31
|
+
from forge.config import TierOverride, load_config
|
|
32
|
+
from forge.config.loader import (
|
|
33
|
+
compute_template_digest,
|
|
34
|
+
get_proxy_file_path,
|
|
35
|
+
template_exists,
|
|
36
|
+
write_proxy_instance_config,
|
|
37
|
+
)
|
|
38
|
+
from forge.config.schema import (
|
|
39
|
+
BackendDependency,
|
|
40
|
+
ProxyInstanceConfig,
|
|
41
|
+
TierModels,
|
|
42
|
+
TierOverrides,
|
|
43
|
+
)
|
|
44
|
+
from forge.core.auth.template_secrets import resolve_env_or_credential
|
|
45
|
+
from forge.core.paths import get_forge_home
|
|
46
|
+
from forge.core.state import now_iso
|
|
47
|
+
from forge.proxy.proxies import ProxyEntry, ProxyRegistry, ProxyRegistryStore
|
|
48
|
+
|
|
49
|
+
logger = logging.getLogger(__name__)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ProxyStartError(ValueError):
|
|
53
|
+
"""Raised when a proxy cannot be started."""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class TierOverrideOptions:
|
|
58
|
+
"""CLI options for per-tier hyperparameter overrides.
|
|
59
|
+
|
|
60
|
+
These override the template defaults when starting a proxy.
|
|
61
|
+
None means "use template default".
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
haiku_reasoning_effort: str | None = None
|
|
65
|
+
sonnet_reasoning_effort: str | None = None
|
|
66
|
+
opus_reasoning_effort: str | None = None
|
|
67
|
+
haiku_temperature: float | None = None
|
|
68
|
+
sonnet_temperature: float | None = None
|
|
69
|
+
opus_temperature: float | None = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass(frozen=True)
|
|
73
|
+
class ProxyStartResult:
|
|
74
|
+
proxy: ProxyEntry
|
|
75
|
+
source: str # "reuse" | "adopt" | "spawn"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class PruneStaleProxiesResult:
|
|
80
|
+
pruned_proxy_ids: list[str]
|
|
81
|
+
deleted_overlay_dirs: list[str]
|
|
82
|
+
delete_errors: list[tuple[str, str]]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _get_proxy_overlay_dir(proxy_id: str) -> Path:
|
|
86
|
+
"""Get the proxy overlay directory path."""
|
|
87
|
+
return get_forge_home() / "proxies" / proxy_id
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _tier_override_to_dict(override: TierOverride | None) -> dict[str, Any] | None:
|
|
91
|
+
"""Convert TierOverride to dict, excluding None values."""
|
|
92
|
+
if override is None:
|
|
93
|
+
return None
|
|
94
|
+
d = asdict(override)
|
|
95
|
+
return {k: v for k, v in d.items() if v is not None} or None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def create_proxy_file(
|
|
99
|
+
*,
|
|
100
|
+
proxy_id: str,
|
|
101
|
+
template: str,
|
|
102
|
+
base_url: str,
|
|
103
|
+
port: int,
|
|
104
|
+
cli_overrides: TierOverrideOptions | None = None,
|
|
105
|
+
upstream_base_url: str | None = None,
|
|
106
|
+
) -> Path:
|
|
107
|
+
"""Create a full proxy.yaml file from template + CLI overrides.
|
|
108
|
+
|
|
109
|
+
The user owns the entire file (no runtime merge with template).
|
|
110
|
+
File is created at ~/.forge/proxies/<proxy_id>/proxy.yaml.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
proxy_id: The proxy identifier.
|
|
114
|
+
template: The template name to copy configuration from.
|
|
115
|
+
base_url: The proxy base URL (this proxy's own endpoint).
|
|
116
|
+
port: The proxy port.
|
|
117
|
+
cli_overrides: Optional CLI overrides to apply on top of template defaults.
|
|
118
|
+
upstream_base_url: Explicit upstream LiteLLM URL. If not provided,
|
|
119
|
+
resolved from env vars or backend_dependency.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Path to the created proxy.yaml file.
|
|
123
|
+
"""
|
|
124
|
+
cfg = load_config(template=template)
|
|
125
|
+
provider = cfg.proxy.get_provider()
|
|
126
|
+
provider_name = cfg.proxy.preferred_provider or "litellm"
|
|
127
|
+
|
|
128
|
+
template_digest = compute_template_digest(template)
|
|
129
|
+
|
|
130
|
+
tiers = TierModels(
|
|
131
|
+
haiku=provider.tiers.haiku,
|
|
132
|
+
sonnet=provider.tiers.sonnet,
|
|
133
|
+
opus=provider.tiers.opus,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Build tier_overrides, merging template defaults with CLI overrides
|
|
137
|
+
template_overrides = provider.tier_overrides
|
|
138
|
+
|
|
139
|
+
def _build_tier_override(tier_name: str) -> TierOverride | None:
|
|
140
|
+
template_tier = template_overrides.get(tier_name)
|
|
141
|
+
tier_dict = _tier_override_to_dict(template_tier) or {}
|
|
142
|
+
|
|
143
|
+
# Apply CLI overrides if provided
|
|
144
|
+
if cli_overrides:
|
|
145
|
+
reasoning = getattr(cli_overrides, f"{tier_name}_reasoning_effort", None)
|
|
146
|
+
if reasoning is not None:
|
|
147
|
+
tier_dict["reasoning_effort"] = reasoning
|
|
148
|
+
|
|
149
|
+
temp = getattr(cli_overrides, f"{tier_name}_temperature", None)
|
|
150
|
+
if temp is not None:
|
|
151
|
+
tier_dict["temperature"] = temp
|
|
152
|
+
|
|
153
|
+
return TierOverride(**tier_dict) if tier_dict else None
|
|
154
|
+
|
|
155
|
+
tier_overrides = TierOverrides(
|
|
156
|
+
haiku=_build_tier_override("haiku"),
|
|
157
|
+
sonnet=_build_tier_override("sonnet"),
|
|
158
|
+
opus=_build_tier_override("opus"),
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Build provider_settings from template (e.g., openai_api_mode, error_hints)
|
|
162
|
+
provider_settings: dict[str, Any] = {}
|
|
163
|
+
if hasattr(provider, "openai_api_mode") and provider.openai_api_mode != "auto":
|
|
164
|
+
provider_settings["openai_api_mode"] = provider.openai_api_mode
|
|
165
|
+
if hasattr(provider, "error_hints") and provider.error_hints:
|
|
166
|
+
provider_settings["error_hints"] = True
|
|
167
|
+
|
|
168
|
+
# Resolve upstream base_url: explicit arg > template > env/credential file > backend port
|
|
169
|
+
resolved_upstream = upstream_base_url or provider.base_url
|
|
170
|
+
is_local_template = cfg.proxy.backend_dependency is not None
|
|
171
|
+
if not resolved_upstream:
|
|
172
|
+
dep = cfg.proxy.backend_dependency
|
|
173
|
+
if is_local_template and dep is not None:
|
|
174
|
+
resolved_upstream = resolve_env_or_credential("LITELLM_LOCAL_BASE_URL") or ""
|
|
175
|
+
if not resolved_upstream and dep.port:
|
|
176
|
+
resolved_upstream = f"http://localhost:{dep.port}"
|
|
177
|
+
else:
|
|
178
|
+
resolved_upstream = resolve_env_or_credential("LITELLM_BASE_URL") or ""
|
|
179
|
+
if not resolved_upstream:
|
|
180
|
+
raise ProxyStartError(
|
|
181
|
+
f"Template '{template}' has no upstream URL configured.\n"
|
|
182
|
+
f"Use: forge proxy create {template} --base-url https://your-litellm-server/\n"
|
|
183
|
+
f"Or store it: forge auth login -c litellm-remote"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
proxy_config = ProxyInstanceConfig(
|
|
187
|
+
proxy_format=1,
|
|
188
|
+
template=template,
|
|
189
|
+
template_digest=template_digest,
|
|
190
|
+
provider=provider_name,
|
|
191
|
+
proxy_endpoint=base_url,
|
|
192
|
+
port=port,
|
|
193
|
+
upstream_base_url=resolved_upstream,
|
|
194
|
+
tiers=tiers,
|
|
195
|
+
family=cfg.proxy.family,
|
|
196
|
+
tier_overrides=tier_overrides,
|
|
197
|
+
model_alternatives=provider.model_alternatives,
|
|
198
|
+
default_tier=cfg.proxy.default_tier or "sonnet",
|
|
199
|
+
provider_settings=provider_settings,
|
|
200
|
+
created_at=now_iso(),
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
return write_proxy_instance_config(proxy_id, proxy_config)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def prune_stale_proxies(*, timeout_s: float = 5.0) -> PruneStaleProxiesResult:
|
|
207
|
+
"""Prune stale proxy entries and delete their overlay directories.
|
|
208
|
+
|
|
209
|
+
Stale definition (normative):
|
|
210
|
+
- Only proxies with pid != None are eligible (Forge-spawned)
|
|
211
|
+
- A proxy is stale if its pid is no longer running
|
|
212
|
+
|
|
213
|
+
This function is intentionally best-effort:
|
|
214
|
+
- It always prunes the registry first (under lock)
|
|
215
|
+
- Overlay directory deletion happens afterward (no lock held)
|
|
216
|
+
- Overlay deletion errors are recorded and do not cause failure
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
store = ProxyRegistryStore()
|
|
220
|
+
pruned_ids = store.prune_dead_pids(timeout_s=timeout_s)
|
|
221
|
+
|
|
222
|
+
deleted_dirs: list[str] = []
|
|
223
|
+
delete_errors: list[tuple[str, str]] = []
|
|
224
|
+
|
|
225
|
+
# Proxy overlays live under ~/.forge/proxies/<proxy_id>/ (sibling of index.json)
|
|
226
|
+
for proxy_id in pruned_ids:
|
|
227
|
+
overlay_dir = store.registry_path.parent / proxy_id
|
|
228
|
+
if not overlay_dir.exists():
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
try:
|
|
232
|
+
shutil.rmtree(overlay_dir)
|
|
233
|
+
deleted_dirs.append(str(overlay_dir))
|
|
234
|
+
except OSError as e:
|
|
235
|
+
delete_errors.append((proxy_id, str(e)))
|
|
236
|
+
|
|
237
|
+
return PruneStaleProxiesResult(
|
|
238
|
+
pruned_proxy_ids=pruned_ids,
|
|
239
|
+
deleted_overlay_dirs=deleted_dirs,
|
|
240
|
+
delete_errors=delete_errors,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _has_env_var(var_name: str) -> bool:
|
|
245
|
+
"""Check if environment variable is set.
|
|
246
|
+
|
|
247
|
+
Note: load_config() already loads .env files via load_dotenv(),
|
|
248
|
+
so checking os.environ is sufficient after config is loaded.
|
|
249
|
+
"""
|
|
250
|
+
return var_name in os.environ
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _ensure_template_credentials(template: str) -> None:
|
|
254
|
+
"""Fail fast if template secret credentials are missing.
|
|
255
|
+
|
|
256
|
+
Only checks secret env vars (API keys), not connection values
|
|
257
|
+
like LITELLM_BASE_URL that can come from CLI --base-url or
|
|
258
|
+
persisted proxy config. Runs on the spawn path only — after
|
|
259
|
+
reuse/adoption checks pass.
|
|
260
|
+
"""
|
|
261
|
+
from forge.core.auth.capabilities import (
|
|
262
|
+
credential_for_env_var,
|
|
263
|
+
credentials_for_template,
|
|
264
|
+
format_missing_credential_error,
|
|
265
|
+
)
|
|
266
|
+
from forge.core.auth.template_secrets import TEMPLATE_SECRETS
|
|
267
|
+
|
|
268
|
+
required = TEMPLATE_SECRETS.get(template, [])
|
|
269
|
+
if not required:
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
# Only check secret vars (API keys). Connection values (base URLs)
|
|
273
|
+
# may come from CLI args, proxy config, or backend_dependency.
|
|
274
|
+
missing: list[str] = []
|
|
275
|
+
for var_name in required:
|
|
276
|
+
cred = credential_for_env_var(var_name)
|
|
277
|
+
if cred:
|
|
278
|
+
ev = next((ev for ev in cred.env_vars if ev.name == var_name), None)
|
|
279
|
+
if ev and ev.connection_value:
|
|
280
|
+
continue
|
|
281
|
+
if not resolve_env_or_credential(var_name):
|
|
282
|
+
missing.append(var_name)
|
|
283
|
+
|
|
284
|
+
if not missing:
|
|
285
|
+
return
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
from forge.runtime_config import get_runtime_config
|
|
289
|
+
|
|
290
|
+
env_ignored = get_runtime_config().auth_ignore_env
|
|
291
|
+
except Exception as e:
|
|
292
|
+
logger.debug("Could not read auth_ignore_env; formatting credential error without env-ignored note: %s", e)
|
|
293
|
+
env_ignored = False
|
|
294
|
+
|
|
295
|
+
creds = credentials_for_template(template)
|
|
296
|
+
if creds:
|
|
297
|
+
msg = format_missing_credential_error(
|
|
298
|
+
creds[0], missing_vars=missing, template=template, env_ignored=env_ignored
|
|
299
|
+
)
|
|
300
|
+
raise ProxyStartError(msg)
|
|
301
|
+
|
|
302
|
+
raise ProxyStartError(
|
|
303
|
+
f"Template '{template}' requires credentials: {', '.join(missing)}\n"
|
|
304
|
+
f"Tip: Run 'forge auth login' to store them, or add to .env / shell exports."
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _ensure_dependency_backend(backend_dep: BackendDependency, template: str) -> None:
|
|
309
|
+
"""Ensure dependency backend is running before starting proxy.
|
|
310
|
+
|
|
311
|
+
Auto-creates backend config if missing, then starts backend.
|
|
312
|
+
Runs during start_proxy(), NOT during create.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
backend_dep: Backend dependency declaration from template
|
|
316
|
+
template: Template name (for error messages)
|
|
317
|
+
|
|
318
|
+
Raises:
|
|
319
|
+
ProxyStartError: If backend config creation fails, env vars missing, or backend fails to start
|
|
320
|
+
"""
|
|
321
|
+
from forge.backend import BackendManager
|
|
322
|
+
from forge.backend.adapters import get_adapter
|
|
323
|
+
from forge.backend.creation import (
|
|
324
|
+
create_backend_config,
|
|
325
|
+
get_backend_config_path,
|
|
326
|
+
is_backend_config_outdated,
|
|
327
|
+
)
|
|
328
|
+
from forge.backend.registry import BackendRegistryStore
|
|
329
|
+
|
|
330
|
+
console = Console(width=200)
|
|
331
|
+
|
|
332
|
+
backend_id = f"{backend_dep.adapter}-{backend_dep.port}"
|
|
333
|
+
|
|
334
|
+
backend_registry = BackendRegistryStore()
|
|
335
|
+
backend_manager = BackendManager(backend_registry)
|
|
336
|
+
backend_manager.register_adapter(backend_dep.adapter, get_adapter(backend_dep.adapter))
|
|
337
|
+
|
|
338
|
+
backend_config = get_backend_config_path(backend_dep.adapter)
|
|
339
|
+
|
|
340
|
+
if not backend_config.exists():
|
|
341
|
+
# Auto-create backend config (copy from defaults/backends/, first use)
|
|
342
|
+
console.print(f"[dim]Creating backend config for '{backend_dep.adapter}' (first use)...[/dim]")
|
|
343
|
+
try:
|
|
344
|
+
create_backend_config(adapter_type=backend_dep.adapter)
|
|
345
|
+
console.print(f"[green]✓[/green] Backend config created at {backend_config}")
|
|
346
|
+
except Exception as e:
|
|
347
|
+
raise ProxyStartError(f"Failed to create backend config: {e}")
|
|
348
|
+
else:
|
|
349
|
+
# Config exists — check if default has been updated (new models available)
|
|
350
|
+
if is_backend_config_outdated(backend_dep.adapter):
|
|
351
|
+
console.print(
|
|
352
|
+
f"[yellow]⚠︎[/yellow] Backend config differs from defaults (new models may be available).\n"
|
|
353
|
+
f"[dim]Tip: Delete {backend_config} and restart to get latest defaults.[/dim]"
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
missing = [k for k in backend_dep.required_env_vars if not resolve_env_or_credential(k)]
|
|
357
|
+
if missing:
|
|
358
|
+
from forge.core.auth.capabilities import (
|
|
359
|
+
credentials_for_template,
|
|
360
|
+
format_missing_credential_error,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
try:
|
|
364
|
+
from forge.runtime_config import get_runtime_config
|
|
365
|
+
|
|
366
|
+
env_ignored = get_runtime_config().auth_ignore_env
|
|
367
|
+
except Exception as e:
|
|
368
|
+
logger.debug("Could not read auth_ignore_env; formatting credential error without env-ignored note: %s", e)
|
|
369
|
+
env_ignored = False
|
|
370
|
+
|
|
371
|
+
creds = credentials_for_template(template)
|
|
372
|
+
if creds:
|
|
373
|
+
raise ProxyStartError(
|
|
374
|
+
format_missing_credential_error(
|
|
375
|
+
creds[0], missing_vars=missing, template=template, env_ignored=env_ignored
|
|
376
|
+
)
|
|
377
|
+
)
|
|
378
|
+
raise ProxyStartError(
|
|
379
|
+
f"Template '{template}' requires credentials: {', '.join(missing)}\n"
|
|
380
|
+
f"Tip: Run 'forge auth login' to store them, or add to .env / shell exports."
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
# Inject credential-file values into os.environ for the backend subprocess
|
|
384
|
+
# (LiteLLM adapter copies os.environ when spawning).
|
|
385
|
+
# When auth_ignore_env is active, override even when env var is present
|
|
386
|
+
# so the subprocess uses the credential-file value, not the ignored env var.
|
|
387
|
+
try:
|
|
388
|
+
from forge.runtime_config import get_runtime_config
|
|
389
|
+
|
|
390
|
+
ignore_env = get_runtime_config().auth_ignore_env
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.debug("Could not read auth_ignore_env; using environment credentials for backend subprocess: %s", e)
|
|
393
|
+
ignore_env = False
|
|
394
|
+
|
|
395
|
+
_SENTINEL = object()
|
|
396
|
+
originals: dict[str, str | object] = {}
|
|
397
|
+
for key in backend_dep.required_env_vars:
|
|
398
|
+
if ignore_env or not os.environ.get(key):
|
|
399
|
+
val = resolve_env_or_credential(key)
|
|
400
|
+
if val:
|
|
401
|
+
originals[key] = os.environ.get(key, _SENTINEL)
|
|
402
|
+
os.environ[key] = val
|
|
403
|
+
|
|
404
|
+
try:
|
|
405
|
+
result = backend_manager.ensure_backend(backend_id, backend_dep.adapter, backend_dep.port)
|
|
406
|
+
if result.source == "start":
|
|
407
|
+
console.print(f"[green]✓[/green] Backend '{backend_id}' started on port {backend_dep.port}")
|
|
408
|
+
else:
|
|
409
|
+
console.print(f"[dim]Backend '{backend_id}' already running on port {backend_dep.port}[/dim]")
|
|
410
|
+
except Exception as e:
|
|
411
|
+
raise ProxyStartError(
|
|
412
|
+
f"Failed to start dependency backend for '{template}': {e}\n"
|
|
413
|
+
f"Backend: {backend_dep.adapter} on port {backend_dep.port}"
|
|
414
|
+
)
|
|
415
|
+
finally:
|
|
416
|
+
for key, original in originals.items():
|
|
417
|
+
if original is _SENTINEL:
|
|
418
|
+
os.environ.pop(key, None)
|
|
419
|
+
else:
|
|
420
|
+
os.environ[key] = str(original)
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def start_proxy(
|
|
424
|
+
*,
|
|
425
|
+
template: str,
|
|
426
|
+
host: str = "localhost",
|
|
427
|
+
proxy_id: str | None = None,
|
|
428
|
+
port: int | None = None,
|
|
429
|
+
timeout_s: float = 10.0,
|
|
430
|
+
max_port_attempts: int = 20,
|
|
431
|
+
tier_overrides: TierOverrideOptions | None = None,
|
|
432
|
+
skip_proxy_file: bool = False,
|
|
433
|
+
upstream_base_url: str | None = None,
|
|
434
|
+
) -> ProxyStartResult:
|
|
435
|
+
"""Start a proxy for the given template.
|
|
436
|
+
|
|
437
|
+
Semantics:
|
|
438
|
+
1) Reuse a registered healthy proxy (by proxy_id if given, otherwise any for the template).
|
|
439
|
+
2) Adopt a healthy unregistered proxy running at the target port if not registered.
|
|
440
|
+
3) Spawn a new proxy process and register it.
|
|
441
|
+
|
|
442
|
+
When spawning a new proxy, creates a proxy config at
|
|
443
|
+
~/.forge/proxies/<proxy_id>/proxy.yaml with tier_overrides from the
|
|
444
|
+
template, merged with any CLI overrides (unless skip_proxy_file=True).
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
template: Proxy template (e.g., "litellm-openai"). Must match an existing template overlay.
|
|
448
|
+
host: Host to bind the proxy to and to connect healthchecks to.
|
|
449
|
+
proxy_id: Optional proxy identity. When given, reuse checks only this ID (not any
|
|
450
|
+
template proxy), and spawn uses this ID instead of generating one.
|
|
451
|
+
port: Optional port. When given, used directly (no port scan). Fails loudly if in use.
|
|
452
|
+
timeout_s: Total time to wait for the proxy to become healthy.
|
|
453
|
+
max_port_attempts: Upper bound for port scanning (only used when port is None).
|
|
454
|
+
tier_overrides: Optional per-tier hyperparameter overrides from CLI.
|
|
455
|
+
skip_proxy_file: If True, skip creating proxy.yaml (for start_cmd where file exists).
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
ProxyStartResult containing the proxy entry and the start source.
|
|
459
|
+
|
|
460
|
+
Raises:
|
|
461
|
+
ProxyStartError: On invalid template, no ports available, proxy start failure, timeout, etc.
|
|
462
|
+
ProxyRegistryCorruptedError: If the registry exists but cannot be parsed.
|
|
463
|
+
"""
|
|
464
|
+
|
|
465
|
+
_validate_template_exists(template)
|
|
466
|
+
|
|
467
|
+
cfg = load_config(template=template)
|
|
468
|
+
|
|
469
|
+
store = ProxyRegistryStore()
|
|
470
|
+
registry = store.read() # May raise ProxyRegistryCorruptedError
|
|
471
|
+
|
|
472
|
+
# 1) Reuse a registered healthy proxy.
|
|
473
|
+
# Skip template-wide reuse when an explicit upstream URL is requested,
|
|
474
|
+
# since an existing proxy may point at a different gateway.
|
|
475
|
+
reused = None
|
|
476
|
+
if not upstream_base_url:
|
|
477
|
+
reused = _try_reuse_registered_proxy(
|
|
478
|
+
registry=registry,
|
|
479
|
+
template=template,
|
|
480
|
+
proxy_id=proxy_id,
|
|
481
|
+
timeout_s=min(1.0, timeout_s),
|
|
482
|
+
)
|
|
483
|
+
if reused is not None:
|
|
484
|
+
# Persist best-effort status updates without clobbering concurrent writers.
|
|
485
|
+
def _persist_reuse(r: ProxyRegistry) -> None:
|
|
486
|
+
entry = r.proxies.get(reused.proxy_id)
|
|
487
|
+
if entry is None:
|
|
488
|
+
return
|
|
489
|
+
entry.last_seen_at = reused.last_seen_at
|
|
490
|
+
entry.status = reused.status
|
|
491
|
+
|
|
492
|
+
store.update(timeout_s=5.0, mutate=_persist_reuse)
|
|
493
|
+
return ProxyStartResult(proxy=reused, source="reuse")
|
|
494
|
+
|
|
495
|
+
target_port = port if port is not None else _get_template_default_port(template)
|
|
496
|
+
target_base_url = _base_url(host, target_port)
|
|
497
|
+
|
|
498
|
+
# 2) Adopt a healthy unregistered proxy at the target port if it is not registered.
|
|
499
|
+
# If another Forge-managed proxy already owns the port, do not alias it
|
|
500
|
+
# under a new proxy_id. That would let one FORGE_HOME silently point at
|
|
501
|
+
# another home's proxy process and later fail identity checks.
|
|
502
|
+
# Runs entirely under lock to prevent TOCTOU races: two concurrent callers
|
|
503
|
+
# could both health-check the same orphan and create duplicate entries.
|
|
504
|
+
adopted: ProxyEntry | None = None
|
|
505
|
+
health_timeout = min(1.0, timeout_s)
|
|
506
|
+
|
|
507
|
+
def _try_adopt_under_lock(r: ProxyRegistry) -> None:
|
|
508
|
+
nonlocal adopted
|
|
509
|
+
already_registered = any(
|
|
510
|
+
entry.template == template and entry.base_url == target_base_url for entry in r.proxies.values()
|
|
511
|
+
)
|
|
512
|
+
if already_registered:
|
|
513
|
+
return
|
|
514
|
+
|
|
515
|
+
if not check_proxy_health(
|
|
516
|
+
base_url=target_base_url,
|
|
517
|
+
expected_template=template,
|
|
518
|
+
timeout_s=health_timeout,
|
|
519
|
+
require_unregistered=True,
|
|
520
|
+
):
|
|
521
|
+
return
|
|
522
|
+
|
|
523
|
+
now = now_iso()
|
|
524
|
+
entry = ProxyEntry(
|
|
525
|
+
proxy_id=proxy_id or _new_proxy_id(set(r.proxies.keys())),
|
|
526
|
+
template=template,
|
|
527
|
+
base_url=target_base_url,
|
|
528
|
+
port=target_port,
|
|
529
|
+
pid=None,
|
|
530
|
+
created_at=now,
|
|
531
|
+
last_seen_at=now,
|
|
532
|
+
status="healthy",
|
|
533
|
+
)
|
|
534
|
+
r.proxies[entry.proxy_id] = entry
|
|
535
|
+
adopted = entry
|
|
536
|
+
|
|
537
|
+
# Lock timeout must exceed health check timeout (held inside lock)
|
|
538
|
+
store.update(timeout_s=health_timeout + 5.0, mutate=_try_adopt_under_lock)
|
|
539
|
+
if adopted is not None:
|
|
540
|
+
return ProxyStartResult(proxy=adopted, source="adopt")
|
|
541
|
+
|
|
542
|
+
# 3) Spawn a new proxy process.
|
|
543
|
+
# Dependency backend + credential preflights run here (not earlier)
|
|
544
|
+
# so reuse/adopt paths aren't blocked by missing credentials or
|
|
545
|
+
# backend state in the current shell.
|
|
546
|
+
if cfg.proxy.backend_dependency:
|
|
547
|
+
_ensure_dependency_backend(cfg.proxy.backend_dependency, template)
|
|
548
|
+
_ensure_template_credentials(template)
|
|
549
|
+
|
|
550
|
+
# Port selection: honor explicit port or scan for available
|
|
551
|
+
if port is not None:
|
|
552
|
+
if _is_port_in_use(target_port):
|
|
553
|
+
raise ProxyStartError(
|
|
554
|
+
f"Port {target_port} is already in use and could not be adopted. "
|
|
555
|
+
f"Stop the process using that port or choose a different one."
|
|
556
|
+
)
|
|
557
|
+
spawn_port = target_port
|
|
558
|
+
else:
|
|
559
|
+
start_port = target_port
|
|
560
|
+
if _is_port_in_use(start_port):
|
|
561
|
+
start_port = target_port + 1
|
|
562
|
+
spawn_port = _find_available_port(start_port=start_port, max_attempts=max_port_attempts)
|
|
563
|
+
|
|
564
|
+
base_url = _base_url(host, spawn_port)
|
|
565
|
+
|
|
566
|
+
# ID selection: honor explicit proxy_id or generate one
|
|
567
|
+
actual_proxy_id = proxy_id or _new_proxy_id(set(registry.proxies.keys()))
|
|
568
|
+
|
|
569
|
+
# Create full proxy file (user owns the entire config)
|
|
570
|
+
# Do this before spawning so the proxy can load it on startup
|
|
571
|
+
# Skip when starting an existing proxy (start_cmd) to preserve user edits
|
|
572
|
+
if not skip_proxy_file:
|
|
573
|
+
create_proxy_file(
|
|
574
|
+
proxy_id=actual_proxy_id,
|
|
575
|
+
template=template,
|
|
576
|
+
base_url=base_url,
|
|
577
|
+
port=spawn_port,
|
|
578
|
+
cli_overrides=tier_overrides,
|
|
579
|
+
upstream_base_url=upstream_base_url,
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
# Register proxy BEFORE spawning so startup validation passes (B2.1.3)
|
|
583
|
+
# Server validates that proxy_id exists in registry on startup
|
|
584
|
+
now = now_iso()
|
|
585
|
+
starting_proxy = ProxyEntry(
|
|
586
|
+
proxy_id=actual_proxy_id,
|
|
587
|
+
template=template,
|
|
588
|
+
base_url=base_url,
|
|
589
|
+
port=spawn_port,
|
|
590
|
+
pid=None, # Not known yet
|
|
591
|
+
created_at=now,
|
|
592
|
+
last_seen_at=None,
|
|
593
|
+
status="starting",
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
def _register_starting(r: ProxyRegistry) -> None:
|
|
597
|
+
r.proxies[actual_proxy_id] = starting_proxy
|
|
598
|
+
|
|
599
|
+
store.update(timeout_s=5.0, mutate=_register_starting)
|
|
600
|
+
|
|
601
|
+
proc, stderr_capture = _spawn_proxy_process(
|
|
602
|
+
template=template,
|
|
603
|
+
host=host,
|
|
604
|
+
port=spawn_port,
|
|
605
|
+
proxy_id=actual_proxy_id,
|
|
606
|
+
provider=cfg.proxy.preferred_provider,
|
|
607
|
+
)
|
|
608
|
+
try:
|
|
609
|
+
_wait_until_healthy(
|
|
610
|
+
base_url=base_url,
|
|
611
|
+
expected_template=template,
|
|
612
|
+
proc=proc,
|
|
613
|
+
stderr_capture=stderr_capture,
|
|
614
|
+
timeout_s=timeout_s,
|
|
615
|
+
expected_proxy_id=actual_proxy_id,
|
|
616
|
+
)
|
|
617
|
+
except Exception:
|
|
618
|
+
_terminate_process(proc)
|
|
619
|
+
# Clean up stderr capture on failure
|
|
620
|
+
if stderr_capture.exists():
|
|
621
|
+
try:
|
|
622
|
+
stderr_capture.unlink()
|
|
623
|
+
except Exception:
|
|
624
|
+
pass
|
|
625
|
+
# Clean up the proxy directory AND registry entry on failure
|
|
626
|
+
# Only clean proxy dir if we created it (not skip_proxy_file)
|
|
627
|
+
if not skip_proxy_file:
|
|
628
|
+
proxy_dir = get_proxy_file_path(actual_proxy_id).parent
|
|
629
|
+
if proxy_dir.exists():
|
|
630
|
+
shutil.rmtree(proxy_dir, ignore_errors=True)
|
|
631
|
+
|
|
632
|
+
def _remove_failed(r: ProxyRegistry) -> None:
|
|
633
|
+
r.proxies.pop(actual_proxy_id, None)
|
|
634
|
+
|
|
635
|
+
store.update(timeout_s=5.0, mutate=_remove_failed)
|
|
636
|
+
raise
|
|
637
|
+
|
|
638
|
+
healthy_proxy = ProxyEntry(
|
|
639
|
+
proxy_id=actual_proxy_id,
|
|
640
|
+
template=template,
|
|
641
|
+
base_url=base_url,
|
|
642
|
+
port=spawn_port,
|
|
643
|
+
pid=proc.pid,
|
|
644
|
+
created_at=now,
|
|
645
|
+
last_seen_at=now_iso(),
|
|
646
|
+
status="healthy",
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
def _mark_healthy(r: ProxyRegistry) -> None:
|
|
650
|
+
r.proxies[actual_proxy_id] = healthy_proxy
|
|
651
|
+
|
|
652
|
+
store.update(timeout_s=5.0, mutate=_mark_healthy)
|
|
653
|
+
return ProxyStartResult(proxy=healthy_proxy, source="spawn")
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
def _validate_template_exists(template: str) -> None:
|
|
657
|
+
if not template_exists(template):
|
|
658
|
+
raise ProxyStartError(
|
|
659
|
+
f"Unknown template '{template}'. Run 'forge proxy template list' to see available templates."
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
def _get_template_default_port(template: str) -> int:
|
|
664
|
+
cfg = load_config(template=template)
|
|
665
|
+
default_port = cfg.proxy.default_port
|
|
666
|
+
if not default_port:
|
|
667
|
+
raise ProxyStartError(f"Template '{template}' has no proxy.default_port configured")
|
|
668
|
+
return int(default_port)
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def _try_reuse_registered_proxy(
|
|
672
|
+
*,
|
|
673
|
+
registry: ProxyRegistry,
|
|
674
|
+
template: str,
|
|
675
|
+
proxy_id: str | None = None,
|
|
676
|
+
timeout_s: float,
|
|
677
|
+
) -> ProxyEntry | None:
|
|
678
|
+
if proxy_id is not None:
|
|
679
|
+
# Identity-specific reuse: look for THIS proxy only
|
|
680
|
+
entry = registry.proxies.get(proxy_id)
|
|
681
|
+
if entry is None or entry.template != template:
|
|
682
|
+
return None
|
|
683
|
+
if check_proxy_health(
|
|
684
|
+
base_url=entry.base_url,
|
|
685
|
+
expected_template=template,
|
|
686
|
+
timeout_s=timeout_s,
|
|
687
|
+
expected_proxy_id=entry.proxy_id,
|
|
688
|
+
):
|
|
689
|
+
entry.last_seen_at = now_iso()
|
|
690
|
+
entry.status = "healthy"
|
|
691
|
+
return entry
|
|
692
|
+
entry.status = "unhealthy"
|
|
693
|
+
return None
|
|
694
|
+
|
|
695
|
+
# Template-wide reuse: find any healthy proxy for the template
|
|
696
|
+
candidates = [entry for entry in registry.proxies.values() if entry.template == template]
|
|
697
|
+
|
|
698
|
+
# Keep behavior deterministic.
|
|
699
|
+
candidates.sort(key=lambda e: (e.last_seen_at is not None, e.proxy_id), reverse=True)
|
|
700
|
+
|
|
701
|
+
for entry in candidates:
|
|
702
|
+
if check_proxy_health(
|
|
703
|
+
base_url=entry.base_url,
|
|
704
|
+
expected_template=template,
|
|
705
|
+
timeout_s=timeout_s,
|
|
706
|
+
expected_proxy_id=entry.proxy_id,
|
|
707
|
+
):
|
|
708
|
+
entry.last_seen_at = now_iso()
|
|
709
|
+
entry.status = "healthy"
|
|
710
|
+
return entry
|
|
711
|
+
|
|
712
|
+
# Update status so `forge proxy list` reflects reality (best effort).
|
|
713
|
+
entry.status = "unhealthy"
|
|
714
|
+
|
|
715
|
+
return None
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
def check_proxy_health(
|
|
719
|
+
*,
|
|
720
|
+
base_url: str,
|
|
721
|
+
expected_template: str,
|
|
722
|
+
timeout_s: float,
|
|
723
|
+
expected_proxy_id: str | None = None,
|
|
724
|
+
require_unregistered: bool = False,
|
|
725
|
+
) -> bool:
|
|
726
|
+
try:
|
|
727
|
+
with httpx.Client(timeout=httpx.Timeout(timeout_s)) as client:
|
|
728
|
+
resp = client.get(f"{base_url}/")
|
|
729
|
+
except (httpx.RequestError, httpx.TimeoutException):
|
|
730
|
+
return False
|
|
731
|
+
|
|
732
|
+
if resp.status_code != 200:
|
|
733
|
+
return False
|
|
734
|
+
|
|
735
|
+
try:
|
|
736
|
+
data = resp.json()
|
|
737
|
+
except ValueError:
|
|
738
|
+
return False
|
|
739
|
+
|
|
740
|
+
if data.get("is_proxy") is not True:
|
|
741
|
+
return False
|
|
742
|
+
|
|
743
|
+
if data.get("template") != expected_template:
|
|
744
|
+
return False
|
|
745
|
+
|
|
746
|
+
proxy_block = data.get("proxy")
|
|
747
|
+
actual_proxy_id = proxy_block.get("proxy_id") if isinstance(proxy_block, dict) else None
|
|
748
|
+
|
|
749
|
+
# Missing proxy metadata is treated as "unregistered": adopt may proceed,
|
|
750
|
+
# but identity-specific reuse/spawn validation must still fail.
|
|
751
|
+
if expected_proxy_id is not None and actual_proxy_id != expected_proxy_id:
|
|
752
|
+
return False
|
|
753
|
+
|
|
754
|
+
if require_unregistered and actual_proxy_id is not None:
|
|
755
|
+
return False
|
|
756
|
+
|
|
757
|
+
return True
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
def smoke_test_proxy(*, base_url: str, timeout_s: float = 30.0) -> tuple[bool, str]:
|
|
761
|
+
"""Send a minimal completion request through the proxy to verify the upstream LLM.
|
|
762
|
+
|
|
763
|
+
Returns (success, detail) where detail is the model response text on
|
|
764
|
+
success or the error message on failure. Retries once on failure.
|
|
765
|
+
"""
|
|
766
|
+
# max_tokens must be large enough for thinking models (e.g., Gemini 2.5 Pro)
|
|
767
|
+
# which consume tokens for internal reasoning before producing visible output
|
|
768
|
+
payload = {
|
|
769
|
+
"model": "sonnet",
|
|
770
|
+
"max_tokens": 256,
|
|
771
|
+
"messages": [{"role": "user", "content": "Say hi"}],
|
|
772
|
+
}
|
|
773
|
+
url = f"{base_url.rstrip('/')}/v1/messages"
|
|
774
|
+
last_error = ""
|
|
775
|
+
|
|
776
|
+
for attempt in range(2):
|
|
777
|
+
if attempt > 0:
|
|
778
|
+
time.sleep(2)
|
|
779
|
+
try:
|
|
780
|
+
with httpx.Client(timeout=httpx.Timeout(timeout_s)) as client:
|
|
781
|
+
resp = client.post(url, json=payload)
|
|
782
|
+
|
|
783
|
+
if resp.status_code != 200:
|
|
784
|
+
last_error = f"HTTP {resp.status_code}: {resp.text[:200]}"
|
|
785
|
+
continue
|
|
786
|
+
|
|
787
|
+
data = resp.json()
|
|
788
|
+
content = data.get("content", [])
|
|
789
|
+
if content and isinstance(content, list):
|
|
790
|
+
text = content[0].get("text", "")
|
|
791
|
+
if text:
|
|
792
|
+
return True, text.strip()
|
|
793
|
+
# Valid structure but empty text — thinking models may consume
|
|
794
|
+
# all tokens for reasoning. Report model + usage for diagnosis.
|
|
795
|
+
model = data.get("model", "unknown")
|
|
796
|
+
usage = data.get("usage", {})
|
|
797
|
+
last_error = (
|
|
798
|
+
f"Empty response from {model} "
|
|
799
|
+
f"(input={usage.get('input_tokens', '?')}, "
|
|
800
|
+
f"output={usage.get('output_tokens', '?')} tokens)"
|
|
801
|
+
)
|
|
802
|
+
continue
|
|
803
|
+
|
|
804
|
+
last_error = f"Unexpected response shape: {resp.text[:200]}"
|
|
805
|
+
except httpx.TimeoutException:
|
|
806
|
+
last_error = f"Request timed out after {timeout_s}s"
|
|
807
|
+
except (httpx.RequestError, ValueError) as e:
|
|
808
|
+
last_error = str(e)
|
|
809
|
+
|
|
810
|
+
return False, last_error
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def _find_available_port(*, start_port: int, max_attempts: int) -> int:
|
|
814
|
+
for port in range(start_port, start_port + max_attempts):
|
|
815
|
+
if not _is_port_in_use(port):
|
|
816
|
+
return port
|
|
817
|
+
|
|
818
|
+
raise ProxyStartError(f"Could not find an available port in range {start_port}-{start_port + max_attempts - 1}")
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
def _is_port_in_use(port: int) -> bool:
|
|
822
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
|
823
|
+
try:
|
|
824
|
+
sock.bind(("", port))
|
|
825
|
+
except OSError:
|
|
826
|
+
return True
|
|
827
|
+
return False
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
def _check_proxy_dependencies(*, provider: str = "") -> None:
|
|
831
|
+
"""Check if proxy dependencies are installed.
|
|
832
|
+
|
|
833
|
+
Args:
|
|
834
|
+
provider: The preferred_provider from the template. When "openrouter",
|
|
835
|
+
litellm is not required (direct API, no LiteLLM subprocess).
|
|
836
|
+
|
|
837
|
+
Raises:
|
|
838
|
+
ProxyStartError: If required proxy dependencies are missing.
|
|
839
|
+
"""
|
|
840
|
+
missing = []
|
|
841
|
+
try:
|
|
842
|
+
import fastapi # noqa: F401
|
|
843
|
+
except ImportError:
|
|
844
|
+
missing.append("fastapi")
|
|
845
|
+
|
|
846
|
+
try:
|
|
847
|
+
import uvicorn # noqa: F401
|
|
848
|
+
except ImportError:
|
|
849
|
+
missing.append("uvicorn")
|
|
850
|
+
|
|
851
|
+
if provider != "openrouter":
|
|
852
|
+
try:
|
|
853
|
+
import litellm # noqa: F401
|
|
854
|
+
except ImportError:
|
|
855
|
+
missing.append("litellm")
|
|
856
|
+
|
|
857
|
+
if missing:
|
|
858
|
+
deps_str = ", ".join(missing)
|
|
859
|
+
raise ProxyStartError(
|
|
860
|
+
f"Missing required proxy dependencies: {deps_str}\n\n"
|
|
861
|
+
"These are needed to run the model routing proxy.\n\n"
|
|
862
|
+
"To install them:\n"
|
|
863
|
+
" uv sync # If developing in the repo\n"
|
|
864
|
+
" ./scripts/setup.sh --local # If you installed with --local\n\n"
|
|
865
|
+
"Or use --no-start to create the config without starting the server."
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
def _spawn_proxy_process(
|
|
870
|
+
*, template: str, host: str, port: int, proxy_id: str, provider: str = ""
|
|
871
|
+
) -> tuple[subprocess.Popen[bytes], Path]:
|
|
872
|
+
"""Spawn a proxy subprocess with the given configuration.
|
|
873
|
+
|
|
874
|
+
Returns:
|
|
875
|
+
Tuple of (process, stderr_capture_path) for error reporting.
|
|
876
|
+
"""
|
|
877
|
+
_check_proxy_dependencies(provider=provider)
|
|
878
|
+
|
|
879
|
+
cmd = [
|
|
880
|
+
sys.executable,
|
|
881
|
+
"-m",
|
|
882
|
+
"forge.proxy.server",
|
|
883
|
+
"--template",
|
|
884
|
+
template,
|
|
885
|
+
"--host",
|
|
886
|
+
host,
|
|
887
|
+
"--port",
|
|
888
|
+
str(port),
|
|
889
|
+
"--proxy-id",
|
|
890
|
+
proxy_id,
|
|
891
|
+
]
|
|
892
|
+
|
|
893
|
+
env = {**os.environ}
|
|
894
|
+
|
|
895
|
+
# Create temp file for stderr capture (for error reporting)
|
|
896
|
+
import tempfile
|
|
897
|
+
|
|
898
|
+
stderr_fd, stderr_path = tempfile.mkstemp(suffix=".log", prefix=f"forge_proxy_{proxy_id}_")
|
|
899
|
+
|
|
900
|
+
proc = subprocess.Popen(
|
|
901
|
+
cmd,
|
|
902
|
+
stdout=subprocess.DEVNULL,
|
|
903
|
+
stderr=stderr_fd,
|
|
904
|
+
env=env,
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
# Close the fd (process has it open)
|
|
908
|
+
os.close(stderr_fd)
|
|
909
|
+
|
|
910
|
+
return proc, Path(stderr_path)
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
def _wait_until_healthy(
|
|
914
|
+
*,
|
|
915
|
+
base_url: str,
|
|
916
|
+
expected_template: str,
|
|
917
|
+
proc: subprocess.Popen[bytes],
|
|
918
|
+
stderr_capture: Path,
|
|
919
|
+
timeout_s: float,
|
|
920
|
+
expected_proxy_id: str | None = None,
|
|
921
|
+
) -> None:
|
|
922
|
+
deadline = time.time() + timeout_s
|
|
923
|
+
|
|
924
|
+
while time.time() < deadline:
|
|
925
|
+
if proc.poll() is not None:
|
|
926
|
+
error_msg = f"Proxy process exited before becoming healthy (exit_code={proc.returncode})"
|
|
927
|
+
|
|
928
|
+
stderr_content = ""
|
|
929
|
+
if stderr_capture.exists():
|
|
930
|
+
try:
|
|
931
|
+
stderr_content = stderr_capture.read_text().strip()
|
|
932
|
+
stderr_capture.unlink()
|
|
933
|
+
except Exception:
|
|
934
|
+
pass
|
|
935
|
+
|
|
936
|
+
# Also try to read from logs directory
|
|
937
|
+
from forge.core.logging import find_latest_log
|
|
938
|
+
|
|
939
|
+
log_hint = ""
|
|
940
|
+
latest_log = find_latest_log("proxy", "proxy.*.log")
|
|
941
|
+
if latest_log:
|
|
942
|
+
log_hint = f"\n\nCheck log file for details: {latest_log}"
|
|
943
|
+
# Try to read last 20 lines
|
|
944
|
+
try:
|
|
945
|
+
with open(latest_log) as f:
|
|
946
|
+
lines = f.readlines()
|
|
947
|
+
if lines:
|
|
948
|
+
tail = "".join(lines[-20:]).strip()
|
|
949
|
+
if tail and not stderr_content:
|
|
950
|
+
stderr_content = tail
|
|
951
|
+
except Exception:
|
|
952
|
+
pass
|
|
953
|
+
|
|
954
|
+
if stderr_content:
|
|
955
|
+
if len(stderr_content) > 500:
|
|
956
|
+
stderr_content = "..." + stderr_content[-500:]
|
|
957
|
+
error_msg += f"\n\nError output:\n{stderr_content}"
|
|
958
|
+
|
|
959
|
+
# Add helpful hint for common dependency errors
|
|
960
|
+
if "ModuleNotFoundError" in stderr_content and any(
|
|
961
|
+
pkg in stderr_content for pkg in ["uvicorn", "fastapi", "litellm"]
|
|
962
|
+
):
|
|
963
|
+
error_msg += (
|
|
964
|
+
"\n\nTip: Proxy dependencies not installed. Run:\n"
|
|
965
|
+
" uv sync (if developing) or ./scripts/setup.sh --local (to reinstall)"
|
|
966
|
+
)
|
|
967
|
+
|
|
968
|
+
error_msg += log_hint
|
|
969
|
+
raise ProxyStartError(error_msg)
|
|
970
|
+
|
|
971
|
+
if check_proxy_health(
|
|
972
|
+
base_url=base_url,
|
|
973
|
+
expected_template=expected_template,
|
|
974
|
+
timeout_s=min(1.0, timeout_s),
|
|
975
|
+
expected_proxy_id=expected_proxy_id,
|
|
976
|
+
):
|
|
977
|
+
if stderr_capture.exists():
|
|
978
|
+
try:
|
|
979
|
+
stderr_capture.unlink()
|
|
980
|
+
except Exception:
|
|
981
|
+
pass
|
|
982
|
+
return
|
|
983
|
+
|
|
984
|
+
time.sleep(0.25)
|
|
985
|
+
|
|
986
|
+
if stderr_capture.exists():
|
|
987
|
+
try:
|
|
988
|
+
stderr_capture.unlink()
|
|
989
|
+
except Exception:
|
|
990
|
+
pass
|
|
991
|
+
|
|
992
|
+
raise ProxyStartError(f"Timed out waiting for proxy to become healthy at {base_url}")
|
|
993
|
+
|
|
994
|
+
|
|
995
|
+
def _terminate_process(proc: subprocess.Popen[bytes]) -> None:
|
|
996
|
+
try:
|
|
997
|
+
proc.terminate()
|
|
998
|
+
except OSError:
|
|
999
|
+
return
|
|
1000
|
+
|
|
1001
|
+
try:
|
|
1002
|
+
proc.wait(timeout=2.0)
|
|
1003
|
+
except Exception:
|
|
1004
|
+
try:
|
|
1005
|
+
proc.kill()
|
|
1006
|
+
except OSError:
|
|
1007
|
+
return
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
def _base_url(host: str, port: int) -> str:
|
|
1011
|
+
return f"http://{host}:{port}"
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
def _new_proxy_id(existing: set[str] | None = None) -> str:
|
|
1015
|
+
"""Generate a color-fruit proxy ID (e.g., 'teal-lemon')."""
|
|
1016
|
+
from forge.core.naming import generate_unique_proxy_name
|
|
1017
|
+
|
|
1018
|
+
return generate_unique_proxy_name(existing or set())
|