multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
"""Pydantic models for API request/response validation.
|
|
2
|
+
|
|
3
|
+
Defines data models for the proxy API, including models for:
|
|
4
|
+
- Content blocks (text, images, tool use)
|
|
5
|
+
- Messages
|
|
6
|
+
- API requests and responses
|
|
7
|
+
- Model name mapping between Claude and Gemini
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any, Dict, List, Literal, Optional, Union
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel, Field, model_validator
|
|
14
|
+
|
|
15
|
+
from forge.config import config, is_openai_model
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _detect_tier(values: dict) -> dict:
|
|
21
|
+
"""Detect Claude tier (haiku/sonnet/opus) from model name in request dict.
|
|
22
|
+
|
|
23
|
+
Sets `original_model_name`, `tier`, and `has_explicit_tier` fields.
|
|
24
|
+
Used by model_validator(mode="before") on request models.
|
|
25
|
+
"""
|
|
26
|
+
if isinstance(values, dict) and "model" in values:
|
|
27
|
+
model_name = values["model"]
|
|
28
|
+
values["original_model_name"] = model_name
|
|
29
|
+
|
|
30
|
+
model_lower = model_name.lower()
|
|
31
|
+
if "haiku" in model_lower:
|
|
32
|
+
values["tier"] = "haiku"
|
|
33
|
+
values["has_explicit_tier"] = True
|
|
34
|
+
elif "sonnet" in model_lower:
|
|
35
|
+
values["tier"] = "sonnet"
|
|
36
|
+
values["has_explicit_tier"] = True
|
|
37
|
+
elif "opus" in model_lower:
|
|
38
|
+
values["tier"] = "opus"
|
|
39
|
+
values["has_explicit_tier"] = True
|
|
40
|
+
else:
|
|
41
|
+
values["tier"] = None
|
|
42
|
+
values["has_explicit_tier"] = False
|
|
43
|
+
|
|
44
|
+
return values
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CacheControl(BaseModel):
|
|
48
|
+
"""Cache control directive for prompt caching (Anthropic API).
|
|
49
|
+
|
|
50
|
+
The "ephemeral" type indicates content should be cached for the session.
|
|
51
|
+
Only affects Anthropic/Bedrock models — other providers cache automatically
|
|
52
|
+
or don't support the field.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
type: Literal["ephemeral"] = "ephemeral"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ContentBlockText(BaseModel):
|
|
59
|
+
type: Literal["text"]
|
|
60
|
+
text: str
|
|
61
|
+
cache_control: Optional[CacheControl] = None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ContentBlockImageSource(BaseModel):
|
|
65
|
+
type: Literal["base64"]
|
|
66
|
+
media_type: str
|
|
67
|
+
data: str
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ContentBlockImage(BaseModel):
|
|
71
|
+
type: Literal["image"]
|
|
72
|
+
source: ContentBlockImageSource
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ContentBlockToolUse(BaseModel):
|
|
76
|
+
type: Literal["tool_use"]
|
|
77
|
+
id: str
|
|
78
|
+
name: str
|
|
79
|
+
input: Dict[str, Any]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ContentBlockToolResult(BaseModel):
|
|
83
|
+
type: Literal["tool_result"]
|
|
84
|
+
tool_use_id: str
|
|
85
|
+
content: Union[str, List[Dict[str, Any]]]
|
|
86
|
+
is_error: Optional[bool] = False
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ContentBlockThinking(BaseModel):
|
|
90
|
+
"""Anthropic extended thinking block (sent in conversation history on --resume)."""
|
|
91
|
+
|
|
92
|
+
type: Literal["thinking"]
|
|
93
|
+
thinking: str = ""
|
|
94
|
+
signature: Optional[str] = None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class ContentBlockRedactedThinking(BaseModel):
|
|
98
|
+
"""Anthropic redacted thinking block (opaque, sent back for continuity)."""
|
|
99
|
+
|
|
100
|
+
type: Literal["redacted_thinking"]
|
|
101
|
+
data: str = ""
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
ContentBlock = Union[
|
|
105
|
+
ContentBlockText,
|
|
106
|
+
ContentBlockImage,
|
|
107
|
+
ContentBlockToolUse,
|
|
108
|
+
ContentBlockToolResult,
|
|
109
|
+
ContentBlockThinking,
|
|
110
|
+
ContentBlockRedactedThinking,
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class SystemContent(BaseModel):
|
|
115
|
+
type: Literal["text"]
|
|
116
|
+
text: str
|
|
117
|
+
cache_control: Optional[CacheControl] = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class Message(BaseModel):
|
|
121
|
+
role: Literal["user", "assistant"]
|
|
122
|
+
content: Union[str, List[ContentBlock]]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class ToolInputSchema(BaseModel):
|
|
126
|
+
type: Literal["object"] = "object"
|
|
127
|
+
properties: Dict[str, Any]
|
|
128
|
+
required: Optional[List[str]] = None
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class ToolDefinition(BaseModel):
|
|
132
|
+
name: str
|
|
133
|
+
description: Optional[str] = None
|
|
134
|
+
input_schema: ToolInputSchema
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class MessagesRequest(BaseModel):
|
|
138
|
+
model: str # Raw client-supplied model string; mapped in handler after config reload
|
|
139
|
+
messages: List[Message]
|
|
140
|
+
system: Optional[Union[str, List[SystemContent]]] = None
|
|
141
|
+
max_tokens: int = Field(ge=1)
|
|
142
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
143
|
+
stop_sequences: Optional[List[str]] = None
|
|
144
|
+
stream: Optional[bool] = False
|
|
145
|
+
temperature: Optional[float] = None
|
|
146
|
+
top_p: Optional[float] = None
|
|
147
|
+
top_k: Optional[int] = None
|
|
148
|
+
# Reasoning/thinking overrides (explicit request overrides are allowed)
|
|
149
|
+
reasoning_effort: Optional[str] = None
|
|
150
|
+
verbosity: Optional[str] = None
|
|
151
|
+
thinking: Optional[Dict[str, Any]] = None
|
|
152
|
+
tools: Optional[List[ToolDefinition]] = None
|
|
153
|
+
tool_choice: Optional[Dict[str, Any]] = None
|
|
154
|
+
original_model_name: Optional[str] = None # Internal field to store original name pre-mapping
|
|
155
|
+
tier: Optional[str] = None # Internal field to store detected tier (haiku/sonnet/opus)
|
|
156
|
+
has_explicit_tier: bool = False # Whether tier was explicit in model name (not defaulted)
|
|
157
|
+
|
|
158
|
+
@model_validator(mode="before")
|
|
159
|
+
@classmethod
|
|
160
|
+
def store_original_model(cls, values):
|
|
161
|
+
return _detect_tier(values)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class TokenCountRequest(BaseModel):
|
|
165
|
+
model: str # Raw client-supplied model string; mapped in handler after config reload
|
|
166
|
+
messages: List[Message]
|
|
167
|
+
system: Optional[Union[str, List[SystemContent]]] = None
|
|
168
|
+
original_model_name: Optional[str] = None # Internal field
|
|
169
|
+
tier: Optional[str] = None # Internal field to store detected tier (haiku/sonnet/opus)
|
|
170
|
+
has_explicit_tier: bool = False # Whether tier was explicit in model name
|
|
171
|
+
|
|
172
|
+
@model_validator(mode="before")
|
|
173
|
+
@classmethod
|
|
174
|
+
def store_original_model_token_count(cls, values):
|
|
175
|
+
return _detect_tier(values)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class TokenCountResponse(BaseModel):
|
|
179
|
+
input_tokens: int
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class Usage(BaseModel):
|
|
183
|
+
input_tokens: int
|
|
184
|
+
output_tokens: int
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class MessagesResponse(BaseModel):
|
|
188
|
+
id: str
|
|
189
|
+
type: Literal["message"] = "message"
|
|
190
|
+
role: Literal["assistant"] = "assistant"
|
|
191
|
+
model: str # Original Anthropic model name
|
|
192
|
+
content: List[ContentBlock]
|
|
193
|
+
stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use", "content_filtered"]] = None
|
|
194
|
+
stop_sequence: Optional[str] = None
|
|
195
|
+
usage: Usage
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def map_model_name(anthropic_model_name: str) -> str:
|
|
199
|
+
"""Map Anthropic model names (haiku, sonnet, opus) to backend models.
|
|
200
|
+
|
|
201
|
+
Uses unified config for model mappings. Handles:
|
|
202
|
+
- Pass-through for known backend models (openai/, vertex_ai/, gemini/)
|
|
203
|
+
- Mapping Anthropic-style names to current provider's tier equivalents
|
|
204
|
+
- Default provider fallback for ambiguous names
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
The mapped model name for the backend provider.
|
|
208
|
+
"""
|
|
209
|
+
original = anthropic_model_name
|
|
210
|
+
preferred = config.proxy.preferred_provider or None
|
|
211
|
+
|
|
212
|
+
def _normalize(name: str) -> str:
|
|
213
|
+
n = name.strip().lower().split("@", 1)[0]
|
|
214
|
+
for prefix in ("anthropic/", "openai/", "gemini/"):
|
|
215
|
+
if n.startswith(prefix):
|
|
216
|
+
n = n[len(prefix) :]
|
|
217
|
+
break
|
|
218
|
+
return n
|
|
219
|
+
|
|
220
|
+
def _anthropic_flavor(name: str) -> str | None:
|
|
221
|
+
if "haiku" in name:
|
|
222
|
+
return "haiku"
|
|
223
|
+
if "sonnet" in name:
|
|
224
|
+
return "sonnet"
|
|
225
|
+
if "opus" in name:
|
|
226
|
+
return "opus"
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
def _is_openai(name: str) -> bool:
|
|
230
|
+
return is_openai_model(name)
|
|
231
|
+
|
|
232
|
+
def _is_gemini(name: str) -> bool:
|
|
233
|
+
# Use unified config for Gemini model detection
|
|
234
|
+
known = {
|
|
235
|
+
config.proxy.gemini.tiers.haiku.lower(),
|
|
236
|
+
config.proxy.gemini.tiers.sonnet.lower(),
|
|
237
|
+
config.proxy.gemini.tiers.opus.lower(),
|
|
238
|
+
}
|
|
239
|
+
return name.startswith("gemini-") or name in known
|
|
240
|
+
|
|
241
|
+
def _is_litellm(name: str) -> bool:
|
|
242
|
+
"""Check if model name is a LiteLLM model (has provider prefix)."""
|
|
243
|
+
return "/" in name and any(
|
|
244
|
+
name.startswith(prefix)
|
|
245
|
+
for prefix in [
|
|
246
|
+
"openai/",
|
|
247
|
+
"anthropic/",
|
|
248
|
+
"vertex_ai/",
|
|
249
|
+
"bedrock/",
|
|
250
|
+
"replicate/",
|
|
251
|
+
"together_ai/",
|
|
252
|
+
"gemini/", # Local LiteLLM with Google GenAI SDK
|
|
253
|
+
]
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
def _get_provider_models(provider_name: str) -> dict[str, str]:
|
|
257
|
+
"""Get tier->model mappings from unified config."""
|
|
258
|
+
provider = config.proxy.get_provider(provider_name)
|
|
259
|
+
return {
|
|
260
|
+
"haiku": provider.tiers.haiku,
|
|
261
|
+
"sonnet": provider.tiers.sonnet,
|
|
262
|
+
"opus": provider.tiers.opus,
|
|
263
|
+
"default": provider.tiers.sonnet,
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
name = _normalize(original)
|
|
267
|
+
flavor = _anthropic_flavor(name)
|
|
268
|
+
|
|
269
|
+
# OpenRouter: pass-through model IDs as-is (OpenRouter handles routing)
|
|
270
|
+
if preferred == "openrouter":
|
|
271
|
+
if "/" in original:
|
|
272
|
+
logger.info(f"Using OpenRouter model: '{original}' (pass-through)")
|
|
273
|
+
return original
|
|
274
|
+
|
|
275
|
+
# Map Anthropic flavors to OpenRouter tier models
|
|
276
|
+
provider_models = _get_provider_models("openrouter")
|
|
277
|
+
if flavor:
|
|
278
|
+
mapped = provider_models[flavor]
|
|
279
|
+
logger.info(f"Mapping '{original}' ({flavor.title()}) -> OpenRouter '{mapped}'")
|
|
280
|
+
return mapped
|
|
281
|
+
|
|
282
|
+
mapped = provider_models["default"]
|
|
283
|
+
logger.warning(f"Unknown model '{original}' with provider preference 'openrouter', defaulting to '{mapped}'")
|
|
284
|
+
return mapped
|
|
285
|
+
|
|
286
|
+
# Forced provider: symmetric handling for OpenAI, Gemini, and LiteLLM
|
|
287
|
+
if preferred in ("openai", "gemini", "litellm"):
|
|
288
|
+
target = preferred
|
|
289
|
+
|
|
290
|
+
# Pass-through if already the target provider
|
|
291
|
+
if (
|
|
292
|
+
(target == "openai" and _is_openai(name))
|
|
293
|
+
or (target == "gemini" and _is_gemini(name))
|
|
294
|
+
or (target == "litellm" and _is_litellm(original))
|
|
295
|
+
):
|
|
296
|
+
# Return original for LiteLLM to preserve the provider prefix
|
|
297
|
+
result = original if target == "litellm" else name
|
|
298
|
+
logger.info(f"Using {target} model: '{result}' (provider preference: {target})")
|
|
299
|
+
return result
|
|
300
|
+
|
|
301
|
+
# Map Anthropic flavors to the target provider
|
|
302
|
+
provider_models = _get_provider_models(target)
|
|
303
|
+
if flavor:
|
|
304
|
+
mapped = provider_models[flavor]
|
|
305
|
+
logger.info(f"Mapping '{original}' ({flavor.title()}) -> {target.title()} '{mapped}'")
|
|
306
|
+
return mapped
|
|
307
|
+
|
|
308
|
+
# Otherwise default to target provider's default
|
|
309
|
+
mapped = provider_models["default"]
|
|
310
|
+
logger.warning(
|
|
311
|
+
f"Unknown/other model '{original}' with provider preference '{target}', defaulting to '{mapped}'"
|
|
312
|
+
)
|
|
313
|
+
return mapped
|
|
314
|
+
|
|
315
|
+
# No forced provider: pass-through known provider models
|
|
316
|
+
if _is_litellm(original):
|
|
317
|
+
logger.info(f"Detected LiteLLM model: '{original}'")
|
|
318
|
+
return original
|
|
319
|
+
if _is_openai(name):
|
|
320
|
+
logger.info(f"Detected OpenAI model: '{original}' -> '{name}'")
|
|
321
|
+
return name
|
|
322
|
+
if _is_gemini(name):
|
|
323
|
+
logger.info(f"Detected Gemini model: '{original}' -> '{name}'")
|
|
324
|
+
return name
|
|
325
|
+
|
|
326
|
+
# Anthropic or unknown: map Anthropic by flavor, else default to Gemini
|
|
327
|
+
target = "gemini"
|
|
328
|
+
provider_models = _get_provider_models(target)
|
|
329
|
+
if flavor:
|
|
330
|
+
mapped = provider_models[flavor]
|
|
331
|
+
logger.info(f"Mapping '{original}' ({flavor.title()}) -> {target.title()} '{mapped}'")
|
|
332
|
+
return mapped
|
|
333
|
+
|
|
334
|
+
# Fail-closed: reject completely unknown models rather than silently routing to default
|
|
335
|
+
raise ValueError(
|
|
336
|
+
f"Unrecognized model '{original}'. Cannot route to backend. "
|
|
337
|
+
"Check model name or configure a mapping in the proxy template."
|
|
338
|
+
)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Error hint enrichment for client-side tool failures.
|
|
2
|
+
|
|
3
|
+
Appends targeted hints to tool_result error content before forwarding
|
|
4
|
+
to the LLM, helping non-Claude models recover from common mistakes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
# Sentinel prefix to prevent double-appending hints
|
|
10
|
+
_HINT_PREFIX = "\n\nHINT: "
|
|
11
|
+
|
|
12
|
+
# Each rule: (tool_name_or_None, list_of_required_substrings, hint_text)
|
|
13
|
+
# tool_name=None means match any tool. First matching rule wins.
|
|
14
|
+
_HINT_RULES: list[tuple[Optional[str], list[str], str]] = [
|
|
15
|
+
# Edit: no-op (old_string == new_string) -- 57% of all failures
|
|
16
|
+
(
|
|
17
|
+
"Edit",
|
|
18
|
+
["old_string and new_string are exactly the same"],
|
|
19
|
+
"Edit requires old_string \u2260 new_string. To view code, use Read instead of Edit.",
|
|
20
|
+
),
|
|
21
|
+
# Edit: not unique match
|
|
22
|
+
(
|
|
23
|
+
"Edit",
|
|
24
|
+
["matches", "replace_all is false"],
|
|
25
|
+
"Include more surrounding context in old_string to uniquely identify the target, or set replace_all=true.",
|
|
26
|
+
),
|
|
27
|
+
# Bash: ruff F401 unused import
|
|
28
|
+
(
|
|
29
|
+
"Bash",
|
|
30
|
+
["F401", "imported but unused"],
|
|
31
|
+
"Remove the unused import(s) listed above, then retry.",
|
|
32
|
+
),
|
|
33
|
+
# Bash: ruff F811 redefinition of unused name
|
|
34
|
+
(
|
|
35
|
+
"Bash",
|
|
36
|
+
["F811", "redefinition of unused"],
|
|
37
|
+
"Remove the duplicate definition listed above, then retry.",
|
|
38
|
+
),
|
|
39
|
+
# TaskOutput: hallucinated task ID
|
|
40
|
+
(
|
|
41
|
+
"TaskOutput",
|
|
42
|
+
["No task found with ID"],
|
|
43
|
+
(
|
|
44
|
+
"Task IDs are short hex strings returned by run_in_background. "
|
|
45
|
+
"Do not append file extensions. If not found, stop retrying the same ID."
|
|
46
|
+
),
|
|
47
|
+
),
|
|
48
|
+
# Read: invalid pages parameter (non-PDF files)
|
|
49
|
+
(
|
|
50
|
+
"Read",
|
|
51
|
+
["Invalid pages parameter"],
|
|
52
|
+
"pages is only for PDF files. For non-PDF files, omit pages entirely. Retry with only file_path.",
|
|
53
|
+
),
|
|
54
|
+
# Read: file not found
|
|
55
|
+
(
|
|
56
|
+
"Read",
|
|
57
|
+
["File does not exist"],
|
|
58
|
+
"Verify the absolute file path is correct. Use Glob to search for the file.",
|
|
59
|
+
),
|
|
60
|
+
# --- Fallback rules (tool_name=None) for when _find_tool_name() fails ---
|
|
61
|
+
(
|
|
62
|
+
None,
|
|
63
|
+
["old_string and new_string are exactly the same"],
|
|
64
|
+
"Edit requires old_string \u2260 new_string. To view code, use Read instead of Edit.",
|
|
65
|
+
),
|
|
66
|
+
(
|
|
67
|
+
None,
|
|
68
|
+
["No task found with ID"],
|
|
69
|
+
(
|
|
70
|
+
"Task IDs are short hex strings returned by run_in_background. "
|
|
71
|
+
"Do not append file extensions. If not found, stop retrying the same ID."
|
|
72
|
+
),
|
|
73
|
+
),
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def enrich_error_content(tool_name: Optional[str], error_content: str) -> str:
|
|
78
|
+
"""Append a HINT to error content if a known failure pattern matches.
|
|
79
|
+
|
|
80
|
+
First matching rule wins. Returns original content unchanged if no match.
|
|
81
|
+
"""
|
|
82
|
+
if _HINT_PREFIX in error_content:
|
|
83
|
+
return error_content
|
|
84
|
+
|
|
85
|
+
for rule_tool, required_substrings, hint_text in _HINT_RULES:
|
|
86
|
+
if rule_tool is not None and tool_name != rule_tool:
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
if all(substr in error_content for substr in required_substrings):
|
|
90
|
+
return error_content + _HINT_PREFIX + hint_text
|
|
91
|
+
|
|
92
|
+
return error_content
|
forge/proxy/metrics.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""In-memory per-proxy runtime metrics.
|
|
2
|
+
|
|
3
|
+
Each proxy process maintains a single ProxyMetrics instance that accumulates
|
|
4
|
+
request counts, token usage (including cached and failed), and latency.
|
|
5
|
+
Metrics reset on proxy restart — this is expected and correct since each
|
|
6
|
+
proxy is a separate subprocess.
|
|
7
|
+
|
|
8
|
+
Exposed via GET / (runtime truth endpoint) and ``forge proxy metrics`` CLI.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import threading
|
|
14
|
+
import time
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
|
|
17
|
+
from forge.core.state import now_iso
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class TierTokens:
|
|
22
|
+
"""Per-tier (or per-model) token breakdown with latency tracking."""
|
|
23
|
+
|
|
24
|
+
input_tokens: int = 0
|
|
25
|
+
output_tokens: int = 0
|
|
26
|
+
cached_tokens: int = 0
|
|
27
|
+
total_latency_ms: float = 0.0
|
|
28
|
+
request_count: int = 0 # for avg latency (separate from requests_by_tier for reset clarity)
|
|
29
|
+
estimated_cost_micros: int = 0 # microdollars (1 USD = 1_000_000)
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> dict[str, object]:
|
|
32
|
+
avg = round(self.total_latency_ms / self.request_count, 1) if self.request_count > 0 else 0.0
|
|
33
|
+
return {
|
|
34
|
+
"input_tokens": self.input_tokens,
|
|
35
|
+
"output_tokens": self.output_tokens,
|
|
36
|
+
"cached_tokens": self.cached_tokens,
|
|
37
|
+
"avg_latency_ms": avg,
|
|
38
|
+
"estimated_cost_usd": round(self.estimated_cost_micros / 1_000_000, 6),
|
|
39
|
+
"estimated_cost_micros": self.estimated_cost_micros,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ProxyMetrics:
|
|
45
|
+
"""Thread-safe in-memory metrics for a single proxy process.
|
|
46
|
+
|
|
47
|
+
All counter updates go through ``record_request()`` under a single lock.
|
|
48
|
+
The lock hold time is microseconds (dict increments only), so contention
|
|
49
|
+
with uvicorn's async event loop or thread pool workers is negligible.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# Timestamps
|
|
53
|
+
started_at: str = field(default_factory=now_iso)
|
|
54
|
+
_started_mono: float = field(default_factory=time.monotonic)
|
|
55
|
+
|
|
56
|
+
# Counters
|
|
57
|
+
total_requests: int = 0
|
|
58
|
+
total_streaming: int = 0
|
|
59
|
+
total_failures: int = 0
|
|
60
|
+
|
|
61
|
+
# Token accounting (success + failure)
|
|
62
|
+
total_input_tokens: int = 0
|
|
63
|
+
total_output_tokens: int = 0
|
|
64
|
+
total_cached_tokens: int = 0
|
|
65
|
+
|
|
66
|
+
# Failed request tokens (wasted spend)
|
|
67
|
+
failed_input_tokens: int = 0
|
|
68
|
+
failed_output_tokens: int = 0
|
|
69
|
+
|
|
70
|
+
# Cost estimates (microdollars, 1 USD = 1_000_000)
|
|
71
|
+
total_cost_micros: int = 0
|
|
72
|
+
failed_cost_micros: int = 0
|
|
73
|
+
|
|
74
|
+
# Per-tier breakdown
|
|
75
|
+
requests_by_tier: dict[str, int] = field(default_factory=dict)
|
|
76
|
+
tokens_by_tier: dict[str, TierTokens] = field(default_factory=dict)
|
|
77
|
+
|
|
78
|
+
# Per-model breakdown (actual_model_id, for cost comparison)
|
|
79
|
+
requests_by_model: dict[str, int] = field(default_factory=dict)
|
|
80
|
+
tokens_by_model: dict[str, TierTokens] = field(default_factory=dict)
|
|
81
|
+
|
|
82
|
+
# Failure classification (error_type, not HTTP status — streaming is always 200)
|
|
83
|
+
failures_by_type: dict[str, int] = field(default_factory=dict)
|
|
84
|
+
|
|
85
|
+
# Activity
|
|
86
|
+
last_request_at: str | None = None
|
|
87
|
+
|
|
88
|
+
# Lock
|
|
89
|
+
_lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
|
|
90
|
+
|
|
91
|
+
def record_request(
|
|
92
|
+
self,
|
|
93
|
+
*,
|
|
94
|
+
tier: str,
|
|
95
|
+
model: str,
|
|
96
|
+
input_tokens: int,
|
|
97
|
+
output_tokens: int,
|
|
98
|
+
cached_tokens: int,
|
|
99
|
+
latency_ms: float,
|
|
100
|
+
streaming: bool,
|
|
101
|
+
failed: bool,
|
|
102
|
+
error_type: str | None = None,
|
|
103
|
+
cost_micros: int = 0,
|
|
104
|
+
) -> None:
|
|
105
|
+
"""Record a completed request. All fields updated atomically under lock."""
|
|
106
|
+
with self._lock:
|
|
107
|
+
self.total_requests += 1
|
|
108
|
+
if streaming:
|
|
109
|
+
self.total_streaming += 1
|
|
110
|
+
|
|
111
|
+
# Tokens (always, success + failure)
|
|
112
|
+
self.total_input_tokens += input_tokens
|
|
113
|
+
self.total_output_tokens += output_tokens
|
|
114
|
+
self.total_cached_tokens += cached_tokens
|
|
115
|
+
|
|
116
|
+
# Cost
|
|
117
|
+
self.total_cost_micros += cost_micros
|
|
118
|
+
|
|
119
|
+
# Per-tier
|
|
120
|
+
self.requests_by_tier[tier] = self.requests_by_tier.get(tier, 0) + 1
|
|
121
|
+
tier_tokens = self.tokens_by_tier.get(tier)
|
|
122
|
+
if tier_tokens is None:
|
|
123
|
+
tier_tokens = TierTokens()
|
|
124
|
+
self.tokens_by_tier[tier] = tier_tokens
|
|
125
|
+
tier_tokens.input_tokens += input_tokens
|
|
126
|
+
tier_tokens.output_tokens += output_tokens
|
|
127
|
+
tier_tokens.cached_tokens += cached_tokens
|
|
128
|
+
tier_tokens.total_latency_ms += latency_ms
|
|
129
|
+
tier_tokens.request_count += 1
|
|
130
|
+
tier_tokens.estimated_cost_micros += cost_micros
|
|
131
|
+
|
|
132
|
+
# Per-model
|
|
133
|
+
self.requests_by_model[model] = self.requests_by_model.get(model, 0) + 1
|
|
134
|
+
model_tokens = self.tokens_by_model.get(model)
|
|
135
|
+
if model_tokens is None:
|
|
136
|
+
model_tokens = TierTokens()
|
|
137
|
+
self.tokens_by_model[model] = model_tokens
|
|
138
|
+
model_tokens.input_tokens += input_tokens
|
|
139
|
+
model_tokens.output_tokens += output_tokens
|
|
140
|
+
model_tokens.cached_tokens += cached_tokens
|
|
141
|
+
model_tokens.total_latency_ms += latency_ms
|
|
142
|
+
model_tokens.request_count += 1
|
|
143
|
+
model_tokens.estimated_cost_micros += cost_micros
|
|
144
|
+
|
|
145
|
+
# Failures
|
|
146
|
+
if failed:
|
|
147
|
+
self.total_failures += 1
|
|
148
|
+
self.failed_input_tokens += input_tokens
|
|
149
|
+
self.failed_output_tokens += output_tokens
|
|
150
|
+
self.failed_cost_micros += cost_micros
|
|
151
|
+
if error_type:
|
|
152
|
+
self.failures_by_type[error_type] = self.failures_by_type.get(error_type, 0) + 1
|
|
153
|
+
|
|
154
|
+
# Activity
|
|
155
|
+
self.last_request_at = now_iso()
|
|
156
|
+
|
|
157
|
+
def snapshot(self) -> dict:
|
|
158
|
+
"""Return a JSON-serializable dict of all metrics plus derived values."""
|
|
159
|
+
with self._lock:
|
|
160
|
+
total = self.total_requests
|
|
161
|
+
uptime = time.monotonic() - self._started_mono
|
|
162
|
+
|
|
163
|
+
return {
|
|
164
|
+
"started_at": self.started_at,
|
|
165
|
+
"uptime_seconds": round(uptime, 1),
|
|
166
|
+
"total_requests": total,
|
|
167
|
+
"total_streaming": self.total_streaming,
|
|
168
|
+
"total_failures": self.total_failures,
|
|
169
|
+
"tokens": {
|
|
170
|
+
"input": self.total_input_tokens,
|
|
171
|
+
"output": self.total_output_tokens,
|
|
172
|
+
"cached": self.total_cached_tokens,
|
|
173
|
+
"failed_input": self.failed_input_tokens,
|
|
174
|
+
"failed_output": self.failed_output_tokens,
|
|
175
|
+
},
|
|
176
|
+
"cache_hit_rate": (
|
|
177
|
+
round(self.total_cached_tokens / self.total_input_tokens * 100, 1)
|
|
178
|
+
if self.total_input_tokens > 0
|
|
179
|
+
else 0.0
|
|
180
|
+
),
|
|
181
|
+
"by_tier": {
|
|
182
|
+
tier: {"requests": self.requests_by_tier.get(tier, 0), **tokens.to_dict()}
|
|
183
|
+
for tier, tokens in self.tokens_by_tier.items()
|
|
184
|
+
},
|
|
185
|
+
"by_model": {
|
|
186
|
+
model: {"requests": self.requests_by_model.get(model, 0), **tokens.to_dict()}
|
|
187
|
+
for model, tokens in self.tokens_by_model.items()
|
|
188
|
+
},
|
|
189
|
+
"failures_by_type": dict(self.failures_by_type),
|
|
190
|
+
"costs": {
|
|
191
|
+
"total_usd": round(self.total_cost_micros / 1_000_000, 6),
|
|
192
|
+
"failed_usd": round(self.failed_cost_micros / 1_000_000, 6),
|
|
193
|
+
"total_micros": self.total_cost_micros,
|
|
194
|
+
"failed_micros": self.failed_cost_micros,
|
|
195
|
+
},
|
|
196
|
+
"last_request_at": self.last_request_at,
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
def reset(self) -> None:
|
|
200
|
+
"""Zero all counters. Preserves started_at for uptime. For test isolation."""
|
|
201
|
+
with self._lock:
|
|
202
|
+
self.total_requests = 0
|
|
203
|
+
self.total_streaming = 0
|
|
204
|
+
self.total_failures = 0
|
|
205
|
+
self.total_input_tokens = 0
|
|
206
|
+
self.total_output_tokens = 0
|
|
207
|
+
self.total_cached_tokens = 0
|
|
208
|
+
self.failed_input_tokens = 0
|
|
209
|
+
self.failed_output_tokens = 0
|
|
210
|
+
self.total_cost_micros = 0
|
|
211
|
+
self.failed_cost_micros = 0
|
|
212
|
+
self.requests_by_tier.clear()
|
|
213
|
+
self.tokens_by_tier.clear()
|
|
214
|
+
self.requests_by_model.clear()
|
|
215
|
+
self.tokens_by_model.clear()
|
|
216
|
+
self.failures_by_type.clear()
|
|
217
|
+
self.last_request_at = None
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
# Module-level singleton — one per proxy process.
|
|
221
|
+
# Matches existing patterns: client_factory and PROXY_ID in server.py are also module globals.
|
|
222
|
+
proxy_metrics = ProxyMetrics()
|