multi-forge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. forge/__init__.py +3 -0
  2. forge/_extensions/agents/.gitkeep +0 -0
  3. forge/_extensions/commands/.gitkeep +0 -0
  4. forge/_extensions/skills/analyze/SKILL.md +87 -0
  5. forge/_extensions/skills/challenge/SKILL.md +91 -0
  6. forge/_extensions/skills/consensus/SKILL.md +120 -0
  7. forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
  8. forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
  9. forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
  10. forge/_extensions/skills/debate/SKILL.md +116 -0
  11. forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
  12. forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
  13. forge/_extensions/skills/panel/SKILL.md +141 -0
  14. forge/_extensions/skills/panel/resources/synthesis.md +103 -0
  15. forge/_extensions/skills/qa/SKILL.md +704 -0
  16. forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
  17. forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
  18. forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
  19. forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
  20. forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
  21. forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
  22. forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
  23. forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
  24. forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
  25. forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
  26. forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
  27. forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
  28. forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
  29. forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
  30. forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
  31. forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
  32. forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
  33. forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
  34. forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
  35. forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
  36. forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
  37. forge/_extensions/skills/qa/resources/checklist.md +103 -0
  38. forge/_extensions/skills/qa/resources/report-template.md +62 -0
  39. forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
  40. forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
  41. forge/_extensions/skills/review/SKILL.md +125 -0
  42. forge/_extensions/skills/review/references/claude-4.6.md +474 -0
  43. forge/_extensions/skills/review/references/claude-4.7.md +710 -0
  44. forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
  45. forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
  46. forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
  47. forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
  48. forge/_extensions/skills/review/resources/code-gemini.md +184 -0
  49. forge/_extensions/skills/review/resources/code-openai.md +203 -0
  50. forge/_extensions/skills/review/resources/code.md +160 -0
  51. forge/_extensions/skills/review-docs/SKILL.md +121 -0
  52. forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
  53. forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
  54. forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
  55. forge/_extensions/skills/review-docs/resources/docs.md +170 -0
  56. forge/_extensions/skills/smoke-test/SKILL.md +27 -0
  57. forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
  58. forge/_extensions/skills/understand/SKILL.md +148 -0
  59. forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
  60. forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
  61. forge/_extensions/skills/understand/resources/code-openai.md +181 -0
  62. forge/_extensions/skills/understand/resources/code.md +163 -0
  63. forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
  64. forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
  65. forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
  66. forge/_extensions/skills/understand/resources/docs.md +177 -0
  67. forge/_extensions/skills/walkthrough/SKILL.md +599 -0
  68. forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
  69. forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
  70. forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
  71. forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
  72. forge/backend/__init__.py +174 -0
  73. forge/backend/adapters/__init__.py +38 -0
  74. forge/backend/adapters/litellm.py +158 -0
  75. forge/backend/creation.py +89 -0
  76. forge/backend/registry.py +178 -0
  77. forge/cli/__init__.py +16 -0
  78. forge/cli/auth.py +483 -0
  79. forge/cli/backend.py +298 -0
  80. forge/cli/claude.py +411 -0
  81. forge/cli/config_cmd.py +303 -0
  82. forge/cli/extensions.py +1001 -0
  83. forge/cli/gc.py +165 -0
  84. forge/cli/guard.py +1018 -0
  85. forge/cli/guards.py +106 -0
  86. forge/cli/handoff.py +110 -0
  87. forge/cli/hooks/__init__.py +36 -0
  88. forge/cli/hooks/_group.py +20 -0
  89. forge/cli/hooks/_helpers.py +149 -0
  90. forge/cli/hooks/commands.py +1677 -0
  91. forge/cli/hooks/direct_commands.py +1304 -0
  92. forge/cli/hooks/install.py +232 -0
  93. forge/cli/hooks/policy.py +151 -0
  94. forge/cli/hooks/read_hygiene.py +74 -0
  95. forge/cli/hooks/verification.py +370 -0
  96. forge/cli/logs.py +406 -0
  97. forge/cli/main.py +292 -0
  98. forge/cli/proxy.py +1821 -0
  99. forge/cli/proxy_costs.py +313 -0
  100. forge/cli/search.py +416 -0
  101. forge/cli/session.py +892 -0
  102. forge/cli/session_addendum.py +81 -0
  103. forge/cli/session_fork.py +750 -0
  104. forge/cli/session_handoff.py +141 -0
  105. forge/cli/session_lifecycle.py +2053 -0
  106. forge/cli/session_manage.py +1336 -0
  107. forge/cli/session_memory.py +201 -0
  108. forge/cli/status_line.py +1398 -0
  109. forge/cli/workflow.py +1964 -0
  110. forge/config/__init__.py +110 -0
  111. forge/config/dataclass_utils.py +88 -0
  112. forge/config/defaults/__init__.py +0 -0
  113. forge/config/defaults/backends/__init__.py +0 -0
  114. forge/config/defaults/backends/litellm.yaml +196 -0
  115. forge/config/defaults/templates/__init__.py +0 -0
  116. forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
  117. forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
  118. forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
  119. forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
  120. forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
  121. forge/config/defaults/templates/litellm-gemini.yaml +21 -0
  122. forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
  123. forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
  124. forge/config/defaults/templates/litellm-openai.yaml +28 -0
  125. forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
  126. forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
  127. forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
  128. forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
  129. forge/config/defaults/templates/openrouter-glm.yaml +23 -0
  130. forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
  131. forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
  132. forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
  133. forge/config/defaults/templates/openrouter-openai.yaml +28 -0
  134. forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
  135. forge/config/loader.py +675 -0
  136. forge/config/schema.py +448 -0
  137. forge/core/__init__.py +5 -0
  138. forge/core/auth/__init__.py +67 -0
  139. forge/core/auth/capabilities.py +219 -0
  140. forge/core/auth/credentials_file.py +244 -0
  141. forge/core/auth/protocols.py +18 -0
  142. forge/core/auth/secrets.py +243 -0
  143. forge/core/auth/template_secrets.py +112 -0
  144. forge/core/data/__init__.py +5 -0
  145. forge/core/data/model_catalog.yaml +1522 -0
  146. forge/core/data/pricing.yaml +140 -0
  147. forge/core/data/system_prompt_addendums/__init__.py +0 -0
  148. forge/core/data/system_prompt_addendums/gemini.md +330 -0
  149. forge/core/data/system_prompt_addendums/openai.md +328 -0
  150. forge/core/llm/__init__.py +231 -0
  151. forge/core/llm/clients/__init__.py +14 -0
  152. forge/core/llm/clients/base.py +115 -0
  153. forge/core/llm/clients/litellm.py +619 -0
  154. forge/core/llm/clients/openai_compat.py +244 -0
  155. forge/core/llm/clients/openrouter.py +234 -0
  156. forge/core/llm/credentials.py +439 -0
  157. forge/core/llm/detection.py +86 -0
  158. forge/core/llm/errors.py +44 -0
  159. forge/core/llm/protocols.py +80 -0
  160. forge/core/llm/types.py +176 -0
  161. forge/core/logging.py +146 -0
  162. forge/core/models/__init__.py +91 -0
  163. forge/core/models/catalog.py +467 -0
  164. forge/core/models/pricing.py +165 -0
  165. forge/core/models/types.py +167 -0
  166. forge/core/naming.py +212 -0
  167. forge/core/ops/__init__.py +73 -0
  168. forge/core/ops/context.py +141 -0
  169. forge/core/ops/gc.py +802 -0
  170. forge/core/ops/proxy.py +146 -0
  171. forge/core/ops/resolution.py +135 -0
  172. forge/core/ops/session.py +344 -0
  173. forge/core/ops/session_context.py +548 -0
  174. forge/core/paths.py +38 -0
  175. forge/core/process.py +54 -0
  176. forge/core/reactive/__init__.py +38 -0
  177. forge/core/reactive/cost_tracking.py +300 -0
  178. forge/core/reactive/env.py +180 -0
  179. forge/core/reactive/proxy.py +78 -0
  180. forge/core/reactive/routing.py +622 -0
  181. forge/core/reactive/session_runner.py +185 -0
  182. forge/core/reactive/structured_output.py +62 -0
  183. forge/core/reactive/tagger.py +94 -0
  184. forge/core/reactive/throttle.py +132 -0
  185. forge/core/state/__init__.py +59 -0
  186. forge/core/state/exceptions.py +59 -0
  187. forge/core/state/io.py +140 -0
  188. forge/core/state/lock.py +99 -0
  189. forge/core/state/timestamps.py +60 -0
  190. forge/core/transcript.py +78 -0
  191. forge/core/typing_helpers.py +24 -0
  192. forge/core/workqueue/__init__.py +67 -0
  193. forge/core/workqueue/queue.py +552 -0
  194. forge/core/workqueue/types.py +63 -0
  195. forge/guard/__init__.py +26 -0
  196. forge/guard/deterministic/__init__.py +26 -0
  197. forge/guard/deterministic/base.py +158 -0
  198. forge/guard/deterministic/coding_standards.py +256 -0
  199. forge/guard/deterministic/registry.py +148 -0
  200. forge/guard/deterministic/tdd.py +171 -0
  201. forge/guard/engine.py +216 -0
  202. forge/guard/protocols.py +91 -0
  203. forge/guard/queries.py +96 -0
  204. forge/guard/semantic/__init__.py +34 -0
  205. forge/guard/semantic/promotion.py +18 -0
  206. forge/guard/semantic/supervisor.py +813 -0
  207. forge/guard/semantic/verdict.py +183 -0
  208. forge/guard/store.py +124 -0
  209. forge/guard/team/__init__.py +6 -0
  210. forge/guard/team/config.py +24 -0
  211. forge/guard/team/handlers.py +209 -0
  212. forge/guard/team/prompts.py +41 -0
  213. forge/guard/types.py +125 -0
  214. forge/guard/workflow/__init__.py +17 -0
  215. forge/guard/workflow/branches.py +67 -0
  216. forge/guard/workflow/config.py +63 -0
  217. forge/guard/workflow/divergence.py +113 -0
  218. forge/guard/workflow/policy.py +87 -0
  219. forge/guard/workflow/stages.py +205 -0
  220. forge/install/__init__.py +55 -0
  221. forge/install/cli.py +281 -0
  222. forge/install/exceptions.py +163 -0
  223. forge/install/hooks.py +109 -0
  224. forge/install/installer.py +1037 -0
  225. forge/install/models.py +321 -0
  226. forge/install/preset.py +272 -0
  227. forge/install/settings_merge.py +831 -0
  228. forge/install/tracking.py +238 -0
  229. forge/install/version.py +141 -0
  230. forge/proxy/__init__.py +0 -0
  231. forge/proxy/base_client.py +181 -0
  232. forge/proxy/client_adapter.py +476 -0
  233. forge/proxy/client_factory.py +531 -0
  234. forge/proxy/converters.py +1206 -0
  235. forge/proxy/cost_logger.py +132 -0
  236. forge/proxy/cost_tracker.py +242 -0
  237. forge/proxy/data_models.py +338 -0
  238. forge/proxy/error_hints.py +92 -0
  239. forge/proxy/metrics.py +222 -0
  240. forge/proxy/model_spec.py +158 -0
  241. forge/proxy/proxies.py +333 -0
  242. forge/proxy/proxy_identity.py +134 -0
  243. forge/proxy/proxy_orchestrator.py +1018 -0
  244. forge/proxy/proxy_startup.py +54 -0
  245. forge/proxy/server.py +1561 -0
  246. forge/proxy/utils.py +537 -0
  247. forge/review/__init__.py +6 -0
  248. forge/review/adversarial.py +111 -0
  249. forge/review/consensus.py +236 -0
  250. forge/review/engine.py +356 -0
  251. forge/review/models.py +437 -0
  252. forge/review/resources/__init__.py +5 -0
  253. forge/review/resources/codereview-performance.md +85 -0
  254. forge/review/resources/codereview-quick.md +75 -0
  255. forge/review/resources/codereview-security.md +92 -0
  256. forge/review/resources/codereview.md +85 -0
  257. forge/review/resources/docreview-quick.md +75 -0
  258. forge/review/resources/docreview.md +86 -0
  259. forge/review/resources/thinkdeep.md +89 -0
  260. forge/review/routing.py +368 -0
  261. forge/review/synthesis.py +73 -0
  262. forge/runtime_config.py +438 -0
  263. forge/search/__init__.py +55 -0
  264. forge/search/bm25_store.py +264 -0
  265. forge/search/content_store.py +197 -0
  266. forge/search/engine.py +352 -0
  267. forge/search/exceptions.py +51 -0
  268. forge/search/extractor.py +234 -0
  269. forge/search/index_state.py +295 -0
  270. forge/search/store.py +215 -0
  271. forge/search/tokenizer.py +24 -0
  272. forge/session/__init__.py +130 -0
  273. forge/session/active.py +339 -0
  274. forge/session/artifacts.py +202 -0
  275. forge/session/claude/__init__.py +50 -0
  276. forge/session/claude/cleanup.py +105 -0
  277. forge/session/claude/invoke.py +236 -0
  278. forge/session/claude/paths.py +200 -0
  279. forge/session/cleanup.py +216 -0
  280. forge/session/config.py +34 -0
  281. forge/session/direct_model.py +107 -0
  282. forge/session/effective.py +169 -0
  283. forge/session/exceptions.py +255 -0
  284. forge/session/handoff.py +881 -0
  285. forge/session/handoff_agent.py +544 -0
  286. forge/session/hooks/__init__.py +35 -0
  287. forge/session/hooks/models.py +73 -0
  288. forge/session/hooks/session_start.py +507 -0
  289. forge/session/identity.py +84 -0
  290. forge/session/index.py +553 -0
  291. forge/session/manager.py +1506 -0
  292. forge/session/models.py +572 -0
  293. forge/session/overrides.py +344 -0
  294. forge/session/plan_resolution.py +286 -0
  295. forge/session/prev_sessions.py +128 -0
  296. forge/session/store.py +431 -0
  297. forge/session/validation.py +47 -0
  298. forge/session/worktree/__init__.py +65 -0
  299. forge/session/worktree/cleanup.py +262 -0
  300. forge/session/worktree/config_copy.py +203 -0
  301. forge/session/worktree/create.py +332 -0
  302. forge/sidecar/__init__.py +29 -0
  303. forge/sidecar/container.py +161 -0
  304. forge/sidecar/docker.py +86 -0
  305. forge/sidecar/secrets.py +19 -0
  306. multi_forge-0.2.0.dist-info/METADATA +242 -0
  307. multi_forge-0.2.0.dist-info/RECORD +311 -0
  308. multi_forge-0.2.0.dist-info/WHEEL +4 -0
  309. multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
  310. multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
  311. multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,338 @@
1
+ """Pydantic models for API request/response validation.
2
+
3
+ Defines data models for the proxy API, including models for:
4
+ - Content blocks (text, images, tool use)
5
+ - Messages
6
+ - API requests and responses
7
+ - Model name mapping between Claude and Gemini
8
+ """
9
+
10
+ import logging
11
+ from typing import Any, Dict, List, Literal, Optional, Union
12
+
13
+ from pydantic import BaseModel, Field, model_validator
14
+
15
+ from forge.config import config, is_openai_model
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def _detect_tier(values: dict) -> dict:
21
+ """Detect Claude tier (haiku/sonnet/opus) from model name in request dict.
22
+
23
+ Sets `original_model_name`, `tier`, and `has_explicit_tier` fields.
24
+ Used by model_validator(mode="before") on request models.
25
+ """
26
+ if isinstance(values, dict) and "model" in values:
27
+ model_name = values["model"]
28
+ values["original_model_name"] = model_name
29
+
30
+ model_lower = model_name.lower()
31
+ if "haiku" in model_lower:
32
+ values["tier"] = "haiku"
33
+ values["has_explicit_tier"] = True
34
+ elif "sonnet" in model_lower:
35
+ values["tier"] = "sonnet"
36
+ values["has_explicit_tier"] = True
37
+ elif "opus" in model_lower:
38
+ values["tier"] = "opus"
39
+ values["has_explicit_tier"] = True
40
+ else:
41
+ values["tier"] = None
42
+ values["has_explicit_tier"] = False
43
+
44
+ return values
45
+
46
+
47
+ class CacheControl(BaseModel):
48
+ """Cache control directive for prompt caching (Anthropic API).
49
+
50
+ The "ephemeral" type indicates content should be cached for the session.
51
+ Only affects Anthropic/Bedrock models — other providers cache automatically
52
+ or don't support the field.
53
+ """
54
+
55
+ type: Literal["ephemeral"] = "ephemeral"
56
+
57
+
58
+ class ContentBlockText(BaseModel):
59
+ type: Literal["text"]
60
+ text: str
61
+ cache_control: Optional[CacheControl] = None
62
+
63
+
64
+ class ContentBlockImageSource(BaseModel):
65
+ type: Literal["base64"]
66
+ media_type: str
67
+ data: str
68
+
69
+
70
+ class ContentBlockImage(BaseModel):
71
+ type: Literal["image"]
72
+ source: ContentBlockImageSource
73
+
74
+
75
+ class ContentBlockToolUse(BaseModel):
76
+ type: Literal["tool_use"]
77
+ id: str
78
+ name: str
79
+ input: Dict[str, Any]
80
+
81
+
82
+ class ContentBlockToolResult(BaseModel):
83
+ type: Literal["tool_result"]
84
+ tool_use_id: str
85
+ content: Union[str, List[Dict[str, Any]]]
86
+ is_error: Optional[bool] = False
87
+
88
+
89
+ class ContentBlockThinking(BaseModel):
90
+ """Anthropic extended thinking block (sent in conversation history on --resume)."""
91
+
92
+ type: Literal["thinking"]
93
+ thinking: str = ""
94
+ signature: Optional[str] = None
95
+
96
+
97
+ class ContentBlockRedactedThinking(BaseModel):
98
+ """Anthropic redacted thinking block (opaque, sent back for continuity)."""
99
+
100
+ type: Literal["redacted_thinking"]
101
+ data: str = ""
102
+
103
+
104
+ ContentBlock = Union[
105
+ ContentBlockText,
106
+ ContentBlockImage,
107
+ ContentBlockToolUse,
108
+ ContentBlockToolResult,
109
+ ContentBlockThinking,
110
+ ContentBlockRedactedThinking,
111
+ ]
112
+
113
+
114
+ class SystemContent(BaseModel):
115
+ type: Literal["text"]
116
+ text: str
117
+ cache_control: Optional[CacheControl] = None
118
+
119
+
120
+ class Message(BaseModel):
121
+ role: Literal["user", "assistant"]
122
+ content: Union[str, List[ContentBlock]]
123
+
124
+
125
+ class ToolInputSchema(BaseModel):
126
+ type: Literal["object"] = "object"
127
+ properties: Dict[str, Any]
128
+ required: Optional[List[str]] = None
129
+
130
+
131
+ class ToolDefinition(BaseModel):
132
+ name: str
133
+ description: Optional[str] = None
134
+ input_schema: ToolInputSchema
135
+
136
+
137
+ class MessagesRequest(BaseModel):
138
+ model: str # Raw client-supplied model string; mapped in handler after config reload
139
+ messages: List[Message]
140
+ system: Optional[Union[str, List[SystemContent]]] = None
141
+ max_tokens: int = Field(ge=1)
142
+ metadata: Optional[Dict[str, Any]] = None
143
+ stop_sequences: Optional[List[str]] = None
144
+ stream: Optional[bool] = False
145
+ temperature: Optional[float] = None
146
+ top_p: Optional[float] = None
147
+ top_k: Optional[int] = None
148
+ # Reasoning/thinking overrides (explicit request overrides are allowed)
149
+ reasoning_effort: Optional[str] = None
150
+ verbosity: Optional[str] = None
151
+ thinking: Optional[Dict[str, Any]] = None
152
+ tools: Optional[List[ToolDefinition]] = None
153
+ tool_choice: Optional[Dict[str, Any]] = None
154
+ original_model_name: Optional[str] = None # Internal field to store original name pre-mapping
155
+ tier: Optional[str] = None # Internal field to store detected tier (haiku/sonnet/opus)
156
+ has_explicit_tier: bool = False # Whether tier was explicit in model name (not defaulted)
157
+
158
+ @model_validator(mode="before")
159
+ @classmethod
160
+ def store_original_model(cls, values):
161
+ return _detect_tier(values)
162
+
163
+
164
+ class TokenCountRequest(BaseModel):
165
+ model: str # Raw client-supplied model string; mapped in handler after config reload
166
+ messages: List[Message]
167
+ system: Optional[Union[str, List[SystemContent]]] = None
168
+ original_model_name: Optional[str] = None # Internal field
169
+ tier: Optional[str] = None # Internal field to store detected tier (haiku/sonnet/opus)
170
+ has_explicit_tier: bool = False # Whether tier was explicit in model name
171
+
172
+ @model_validator(mode="before")
173
+ @classmethod
174
+ def store_original_model_token_count(cls, values):
175
+ return _detect_tier(values)
176
+
177
+
178
+ class TokenCountResponse(BaseModel):
179
+ input_tokens: int
180
+
181
+
182
+ class Usage(BaseModel):
183
+ input_tokens: int
184
+ output_tokens: int
185
+
186
+
187
+ class MessagesResponse(BaseModel):
188
+ id: str
189
+ type: Literal["message"] = "message"
190
+ role: Literal["assistant"] = "assistant"
191
+ model: str # Original Anthropic model name
192
+ content: List[ContentBlock]
193
+ stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use", "content_filtered"]] = None
194
+ stop_sequence: Optional[str] = None
195
+ usage: Usage
196
+
197
+
198
+ def map_model_name(anthropic_model_name: str) -> str:
199
+ """Map Anthropic model names (haiku, sonnet, opus) to backend models.
200
+
201
+ Uses unified config for model mappings. Handles:
202
+ - Pass-through for known backend models (openai/, vertex_ai/, gemini/)
203
+ - Mapping Anthropic-style names to current provider's tier equivalents
204
+ - Default provider fallback for ambiguous names
205
+
206
+ Returns:
207
+ The mapped model name for the backend provider.
208
+ """
209
+ original = anthropic_model_name
210
+ preferred = config.proxy.preferred_provider or None
211
+
212
+ def _normalize(name: str) -> str:
213
+ n = name.strip().lower().split("@", 1)[0]
214
+ for prefix in ("anthropic/", "openai/", "gemini/"):
215
+ if n.startswith(prefix):
216
+ n = n[len(prefix) :]
217
+ break
218
+ return n
219
+
220
+ def _anthropic_flavor(name: str) -> str | None:
221
+ if "haiku" in name:
222
+ return "haiku"
223
+ if "sonnet" in name:
224
+ return "sonnet"
225
+ if "opus" in name:
226
+ return "opus"
227
+ return None
228
+
229
+ def _is_openai(name: str) -> bool:
230
+ return is_openai_model(name)
231
+
232
+ def _is_gemini(name: str) -> bool:
233
+ # Use unified config for Gemini model detection
234
+ known = {
235
+ config.proxy.gemini.tiers.haiku.lower(),
236
+ config.proxy.gemini.tiers.sonnet.lower(),
237
+ config.proxy.gemini.tiers.opus.lower(),
238
+ }
239
+ return name.startswith("gemini-") or name in known
240
+
241
+ def _is_litellm(name: str) -> bool:
242
+ """Check if model name is a LiteLLM model (has provider prefix)."""
243
+ return "/" in name and any(
244
+ name.startswith(prefix)
245
+ for prefix in [
246
+ "openai/",
247
+ "anthropic/",
248
+ "vertex_ai/",
249
+ "bedrock/",
250
+ "replicate/",
251
+ "together_ai/",
252
+ "gemini/", # Local LiteLLM with Google GenAI SDK
253
+ ]
254
+ )
255
+
256
+ def _get_provider_models(provider_name: str) -> dict[str, str]:
257
+ """Get tier->model mappings from unified config."""
258
+ provider = config.proxy.get_provider(provider_name)
259
+ return {
260
+ "haiku": provider.tiers.haiku,
261
+ "sonnet": provider.tiers.sonnet,
262
+ "opus": provider.tiers.opus,
263
+ "default": provider.tiers.sonnet,
264
+ }
265
+
266
+ name = _normalize(original)
267
+ flavor = _anthropic_flavor(name)
268
+
269
+ # OpenRouter: pass-through model IDs as-is (OpenRouter handles routing)
270
+ if preferred == "openrouter":
271
+ if "/" in original:
272
+ logger.info(f"Using OpenRouter model: '{original}' (pass-through)")
273
+ return original
274
+
275
+ # Map Anthropic flavors to OpenRouter tier models
276
+ provider_models = _get_provider_models("openrouter")
277
+ if flavor:
278
+ mapped = provider_models[flavor]
279
+ logger.info(f"Mapping '{original}' ({flavor.title()}) -> OpenRouter '{mapped}'")
280
+ return mapped
281
+
282
+ mapped = provider_models["default"]
283
+ logger.warning(f"Unknown model '{original}' with provider preference 'openrouter', defaulting to '{mapped}'")
284
+ return mapped
285
+
286
+ # Forced provider: symmetric handling for OpenAI, Gemini, and LiteLLM
287
+ if preferred in ("openai", "gemini", "litellm"):
288
+ target = preferred
289
+
290
+ # Pass-through if already the target provider
291
+ if (
292
+ (target == "openai" and _is_openai(name))
293
+ or (target == "gemini" and _is_gemini(name))
294
+ or (target == "litellm" and _is_litellm(original))
295
+ ):
296
+ # Return original for LiteLLM to preserve the provider prefix
297
+ result = original if target == "litellm" else name
298
+ logger.info(f"Using {target} model: '{result}' (provider preference: {target})")
299
+ return result
300
+
301
+ # Map Anthropic flavors to the target provider
302
+ provider_models = _get_provider_models(target)
303
+ if flavor:
304
+ mapped = provider_models[flavor]
305
+ logger.info(f"Mapping '{original}' ({flavor.title()}) -> {target.title()} '{mapped}'")
306
+ return mapped
307
+
308
+ # Otherwise default to target provider's default
309
+ mapped = provider_models["default"]
310
+ logger.warning(
311
+ f"Unknown/other model '{original}' with provider preference '{target}', defaulting to '{mapped}'"
312
+ )
313
+ return mapped
314
+
315
+ # No forced provider: pass-through known provider models
316
+ if _is_litellm(original):
317
+ logger.info(f"Detected LiteLLM model: '{original}'")
318
+ return original
319
+ if _is_openai(name):
320
+ logger.info(f"Detected OpenAI model: '{original}' -> '{name}'")
321
+ return name
322
+ if _is_gemini(name):
323
+ logger.info(f"Detected Gemini model: '{original}' -> '{name}'")
324
+ return name
325
+
326
+ # Anthropic or unknown: map Anthropic by flavor, else default to Gemini
327
+ target = "gemini"
328
+ provider_models = _get_provider_models(target)
329
+ if flavor:
330
+ mapped = provider_models[flavor]
331
+ logger.info(f"Mapping '{original}' ({flavor.title()}) -> {target.title()} '{mapped}'")
332
+ return mapped
333
+
334
+ # Fail-closed: reject completely unknown models rather than silently routing to default
335
+ raise ValueError(
336
+ f"Unrecognized model '{original}'. Cannot route to backend. "
337
+ "Check model name or configure a mapping in the proxy template."
338
+ )
@@ -0,0 +1,92 @@
1
+ """Error hint enrichment for client-side tool failures.
2
+
3
+ Appends targeted hints to tool_result error content before forwarding
4
+ to the LLM, helping non-Claude models recover from common mistakes.
5
+ """
6
+
7
+ from typing import Optional
8
+
9
+ # Sentinel prefix to prevent double-appending hints
10
+ _HINT_PREFIX = "\n\nHINT: "
11
+
12
+ # Each rule: (tool_name_or_None, list_of_required_substrings, hint_text)
13
+ # tool_name=None means match any tool. First matching rule wins.
14
+ _HINT_RULES: list[tuple[Optional[str], list[str], str]] = [
15
+ # Edit: no-op (old_string == new_string) -- 57% of all failures
16
+ (
17
+ "Edit",
18
+ ["old_string and new_string are exactly the same"],
19
+ "Edit requires old_string \u2260 new_string. To view code, use Read instead of Edit.",
20
+ ),
21
+ # Edit: not unique match
22
+ (
23
+ "Edit",
24
+ ["matches", "replace_all is false"],
25
+ "Include more surrounding context in old_string to uniquely identify the target, or set replace_all=true.",
26
+ ),
27
+ # Bash: ruff F401 unused import
28
+ (
29
+ "Bash",
30
+ ["F401", "imported but unused"],
31
+ "Remove the unused import(s) listed above, then retry.",
32
+ ),
33
+ # Bash: ruff F811 redefinition of unused name
34
+ (
35
+ "Bash",
36
+ ["F811", "redefinition of unused"],
37
+ "Remove the duplicate definition listed above, then retry.",
38
+ ),
39
+ # TaskOutput: hallucinated task ID
40
+ (
41
+ "TaskOutput",
42
+ ["No task found with ID"],
43
+ (
44
+ "Task IDs are short hex strings returned by run_in_background. "
45
+ "Do not append file extensions. If not found, stop retrying the same ID."
46
+ ),
47
+ ),
48
+ # Read: invalid pages parameter (non-PDF files)
49
+ (
50
+ "Read",
51
+ ["Invalid pages parameter"],
52
+ "pages is only for PDF files. For non-PDF files, omit pages entirely. Retry with only file_path.",
53
+ ),
54
+ # Read: file not found
55
+ (
56
+ "Read",
57
+ ["File does not exist"],
58
+ "Verify the absolute file path is correct. Use Glob to search for the file.",
59
+ ),
60
+ # --- Fallback rules (tool_name=None) for when _find_tool_name() fails ---
61
+ (
62
+ None,
63
+ ["old_string and new_string are exactly the same"],
64
+ "Edit requires old_string \u2260 new_string. To view code, use Read instead of Edit.",
65
+ ),
66
+ (
67
+ None,
68
+ ["No task found with ID"],
69
+ (
70
+ "Task IDs are short hex strings returned by run_in_background. "
71
+ "Do not append file extensions. If not found, stop retrying the same ID."
72
+ ),
73
+ ),
74
+ ]
75
+
76
+
77
+ def enrich_error_content(tool_name: Optional[str], error_content: str) -> str:
78
+ """Append a HINT to error content if a known failure pattern matches.
79
+
80
+ First matching rule wins. Returns original content unchanged if no match.
81
+ """
82
+ if _HINT_PREFIX in error_content:
83
+ return error_content
84
+
85
+ for rule_tool, required_substrings, hint_text in _HINT_RULES:
86
+ if rule_tool is not None and tool_name != rule_tool:
87
+ continue
88
+
89
+ if all(substr in error_content for substr in required_substrings):
90
+ return error_content + _HINT_PREFIX + hint_text
91
+
92
+ return error_content
forge/proxy/metrics.py ADDED
@@ -0,0 +1,222 @@
1
+ """In-memory per-proxy runtime metrics.
2
+
3
+ Each proxy process maintains a single ProxyMetrics instance that accumulates
4
+ request counts, token usage (including cached and failed), and latency.
5
+ Metrics reset on proxy restart — this is expected and correct since each
6
+ proxy is a separate subprocess.
7
+
8
+ Exposed via GET / (runtime truth endpoint) and ``forge proxy metrics`` CLI.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import threading
14
+ import time
15
+ from dataclasses import dataclass, field
16
+
17
+ from forge.core.state import now_iso
18
+
19
+
20
+ @dataclass
21
+ class TierTokens:
22
+ """Per-tier (or per-model) token breakdown with latency tracking."""
23
+
24
+ input_tokens: int = 0
25
+ output_tokens: int = 0
26
+ cached_tokens: int = 0
27
+ total_latency_ms: float = 0.0
28
+ request_count: int = 0 # for avg latency (separate from requests_by_tier for reset clarity)
29
+ estimated_cost_micros: int = 0 # microdollars (1 USD = 1_000_000)
30
+
31
+ def to_dict(self) -> dict[str, object]:
32
+ avg = round(self.total_latency_ms / self.request_count, 1) if self.request_count > 0 else 0.0
33
+ return {
34
+ "input_tokens": self.input_tokens,
35
+ "output_tokens": self.output_tokens,
36
+ "cached_tokens": self.cached_tokens,
37
+ "avg_latency_ms": avg,
38
+ "estimated_cost_usd": round(self.estimated_cost_micros / 1_000_000, 6),
39
+ "estimated_cost_micros": self.estimated_cost_micros,
40
+ }
41
+
42
+
43
+ @dataclass
44
+ class ProxyMetrics:
45
+ """Thread-safe in-memory metrics for a single proxy process.
46
+
47
+ All counter updates go through ``record_request()`` under a single lock.
48
+ The lock hold time is microseconds (dict increments only), so contention
49
+ with uvicorn's async event loop or thread pool workers is negligible.
50
+ """
51
+
52
+ # Timestamps
53
+ started_at: str = field(default_factory=now_iso)
54
+ _started_mono: float = field(default_factory=time.monotonic)
55
+
56
+ # Counters
57
+ total_requests: int = 0
58
+ total_streaming: int = 0
59
+ total_failures: int = 0
60
+
61
+ # Token accounting (success + failure)
62
+ total_input_tokens: int = 0
63
+ total_output_tokens: int = 0
64
+ total_cached_tokens: int = 0
65
+
66
+ # Failed request tokens (wasted spend)
67
+ failed_input_tokens: int = 0
68
+ failed_output_tokens: int = 0
69
+
70
+ # Cost estimates (microdollars, 1 USD = 1_000_000)
71
+ total_cost_micros: int = 0
72
+ failed_cost_micros: int = 0
73
+
74
+ # Per-tier breakdown
75
+ requests_by_tier: dict[str, int] = field(default_factory=dict)
76
+ tokens_by_tier: dict[str, TierTokens] = field(default_factory=dict)
77
+
78
+ # Per-model breakdown (actual_model_id, for cost comparison)
79
+ requests_by_model: dict[str, int] = field(default_factory=dict)
80
+ tokens_by_model: dict[str, TierTokens] = field(default_factory=dict)
81
+
82
+ # Failure classification (error_type, not HTTP status — streaming is always 200)
83
+ failures_by_type: dict[str, int] = field(default_factory=dict)
84
+
85
+ # Activity
86
+ last_request_at: str | None = None
87
+
88
+ # Lock
89
+ _lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
90
+
91
+ def record_request(
92
+ self,
93
+ *,
94
+ tier: str,
95
+ model: str,
96
+ input_tokens: int,
97
+ output_tokens: int,
98
+ cached_tokens: int,
99
+ latency_ms: float,
100
+ streaming: bool,
101
+ failed: bool,
102
+ error_type: str | None = None,
103
+ cost_micros: int = 0,
104
+ ) -> None:
105
+ """Record a completed request. All fields updated atomically under lock."""
106
+ with self._lock:
107
+ self.total_requests += 1
108
+ if streaming:
109
+ self.total_streaming += 1
110
+
111
+ # Tokens (always, success + failure)
112
+ self.total_input_tokens += input_tokens
113
+ self.total_output_tokens += output_tokens
114
+ self.total_cached_tokens += cached_tokens
115
+
116
+ # Cost
117
+ self.total_cost_micros += cost_micros
118
+
119
+ # Per-tier
120
+ self.requests_by_tier[tier] = self.requests_by_tier.get(tier, 0) + 1
121
+ tier_tokens = self.tokens_by_tier.get(tier)
122
+ if tier_tokens is None:
123
+ tier_tokens = TierTokens()
124
+ self.tokens_by_tier[tier] = tier_tokens
125
+ tier_tokens.input_tokens += input_tokens
126
+ tier_tokens.output_tokens += output_tokens
127
+ tier_tokens.cached_tokens += cached_tokens
128
+ tier_tokens.total_latency_ms += latency_ms
129
+ tier_tokens.request_count += 1
130
+ tier_tokens.estimated_cost_micros += cost_micros
131
+
132
+ # Per-model
133
+ self.requests_by_model[model] = self.requests_by_model.get(model, 0) + 1
134
+ model_tokens = self.tokens_by_model.get(model)
135
+ if model_tokens is None:
136
+ model_tokens = TierTokens()
137
+ self.tokens_by_model[model] = model_tokens
138
+ model_tokens.input_tokens += input_tokens
139
+ model_tokens.output_tokens += output_tokens
140
+ model_tokens.cached_tokens += cached_tokens
141
+ model_tokens.total_latency_ms += latency_ms
142
+ model_tokens.request_count += 1
143
+ model_tokens.estimated_cost_micros += cost_micros
144
+
145
+ # Failures
146
+ if failed:
147
+ self.total_failures += 1
148
+ self.failed_input_tokens += input_tokens
149
+ self.failed_output_tokens += output_tokens
150
+ self.failed_cost_micros += cost_micros
151
+ if error_type:
152
+ self.failures_by_type[error_type] = self.failures_by_type.get(error_type, 0) + 1
153
+
154
+ # Activity
155
+ self.last_request_at = now_iso()
156
+
157
+ def snapshot(self) -> dict:
158
+ """Return a JSON-serializable dict of all metrics plus derived values."""
159
+ with self._lock:
160
+ total = self.total_requests
161
+ uptime = time.monotonic() - self._started_mono
162
+
163
+ return {
164
+ "started_at": self.started_at,
165
+ "uptime_seconds": round(uptime, 1),
166
+ "total_requests": total,
167
+ "total_streaming": self.total_streaming,
168
+ "total_failures": self.total_failures,
169
+ "tokens": {
170
+ "input": self.total_input_tokens,
171
+ "output": self.total_output_tokens,
172
+ "cached": self.total_cached_tokens,
173
+ "failed_input": self.failed_input_tokens,
174
+ "failed_output": self.failed_output_tokens,
175
+ },
176
+ "cache_hit_rate": (
177
+ round(self.total_cached_tokens / self.total_input_tokens * 100, 1)
178
+ if self.total_input_tokens > 0
179
+ else 0.0
180
+ ),
181
+ "by_tier": {
182
+ tier: {"requests": self.requests_by_tier.get(tier, 0), **tokens.to_dict()}
183
+ for tier, tokens in self.tokens_by_tier.items()
184
+ },
185
+ "by_model": {
186
+ model: {"requests": self.requests_by_model.get(model, 0), **tokens.to_dict()}
187
+ for model, tokens in self.tokens_by_model.items()
188
+ },
189
+ "failures_by_type": dict(self.failures_by_type),
190
+ "costs": {
191
+ "total_usd": round(self.total_cost_micros / 1_000_000, 6),
192
+ "failed_usd": round(self.failed_cost_micros / 1_000_000, 6),
193
+ "total_micros": self.total_cost_micros,
194
+ "failed_micros": self.failed_cost_micros,
195
+ },
196
+ "last_request_at": self.last_request_at,
197
+ }
198
+
199
+ def reset(self) -> None:
200
+ """Zero all counters. Preserves started_at for uptime. For test isolation."""
201
+ with self._lock:
202
+ self.total_requests = 0
203
+ self.total_streaming = 0
204
+ self.total_failures = 0
205
+ self.total_input_tokens = 0
206
+ self.total_output_tokens = 0
207
+ self.total_cached_tokens = 0
208
+ self.failed_input_tokens = 0
209
+ self.failed_output_tokens = 0
210
+ self.total_cost_micros = 0
211
+ self.failed_cost_micros = 0
212
+ self.requests_by_tier.clear()
213
+ self.tokens_by_tier.clear()
214
+ self.requests_by_model.clear()
215
+ self.tokens_by_model.clear()
216
+ self.failures_by_type.clear()
217
+ self.last_request_at = None
218
+
219
+
220
+ # Module-level singleton — one per proxy process.
221
+ # Matches existing patterns: client_factory and PROXY_ID in server.py are also module globals.
222
+ proxy_metrics = ProxyMetrics()