multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
"""Model catalog loader and validator.
|
|
2
|
+
|
|
3
|
+
This module loads the repo-owned model_catalog.yaml and provides
|
|
4
|
+
strict validation and lookup functions. The catalog is cached at
|
|
5
|
+
module level for efficiency.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from importlib import resources
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import yaml
|
|
13
|
+
|
|
14
|
+
from forge.core.models.types import (
|
|
15
|
+
REQUIRED_TIERS,
|
|
16
|
+
ModelCatalog,
|
|
17
|
+
ModelSpec,
|
|
18
|
+
TemperatureSpec,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
# Supported schema versions (reject unknown)
|
|
24
|
+
SUPPORTED_SCHEMA_VERSIONS = frozenset({1})
|
|
25
|
+
|
|
26
|
+
# Module-level singleton (lazy-loaded)
|
|
27
|
+
_catalog: ModelCatalog | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ModelCatalogError(ValueError):
|
|
31
|
+
"""Raised when the model catalog is invalid or a lookup fails."""
|
|
32
|
+
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _load_catalog_yaml() -> dict[str, Any]:
|
|
37
|
+
"""Load the raw YAML from package resources.
|
|
38
|
+
|
|
39
|
+
Works in both editable installs and built wheels.
|
|
40
|
+
"""
|
|
41
|
+
try:
|
|
42
|
+
# Python 3.9+ style
|
|
43
|
+
catalog_ref = resources.files("forge.core.data").joinpath("model_catalog.yaml")
|
|
44
|
+
yaml_content = catalog_ref.read_text(encoding="utf-8")
|
|
45
|
+
except (TypeError, AttributeError):
|
|
46
|
+
# Fallback for older Python or edge cases
|
|
47
|
+
with resources.open_text("forge.core.data", "model_catalog.yaml") as f:
|
|
48
|
+
yaml_content = f.read()
|
|
49
|
+
|
|
50
|
+
return yaml.safe_load(yaml_content)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _parse_temperature(model_id: str, temp_data: Any, constraint: str) -> TemperatureSpec:
|
|
54
|
+
"""Parse temperature field which can be a single value or dict.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
model_id: The model ID (for error messages).
|
|
58
|
+
temp_data: Either a float/int or a dict with min/default/max.
|
|
59
|
+
constraint: The temperature_constraint value ("fixed" or "range").
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
A TemperatureSpec instance.
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
ModelCatalogError: If the temperature spec is invalid.
|
|
66
|
+
"""
|
|
67
|
+
if isinstance(temp_data, (int, float)):
|
|
68
|
+
temp_val = float(temp_data)
|
|
69
|
+
return TemperatureSpec(min=temp_val, default=temp_val, max=temp_val)
|
|
70
|
+
|
|
71
|
+
if isinstance(temp_data, dict):
|
|
72
|
+
if not all(k in temp_data for k in ("min", "default", "max")):
|
|
73
|
+
raise ModelCatalogError(
|
|
74
|
+
f"Model {model_id!r} temperature dict must have min/default/max keys, got {temp_data}"
|
|
75
|
+
)
|
|
76
|
+
try:
|
|
77
|
+
return TemperatureSpec(
|
|
78
|
+
min=float(temp_data["min"]),
|
|
79
|
+
default=float(temp_data["default"]),
|
|
80
|
+
max=float(temp_data["max"]),
|
|
81
|
+
)
|
|
82
|
+
except (TypeError, ValueError) as e:
|
|
83
|
+
raise ModelCatalogError(f"Model {model_id!r} temperature spec error: {e}") from e
|
|
84
|
+
|
|
85
|
+
raise ModelCatalogError(f"Model {model_id!r} has invalid temperature: {temp_data!r} (expected number or dict)")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _parse_tuple_or_none(model_id: str, field_name: str, data: Any) -> tuple[str, ...] | None:
|
|
89
|
+
"""Parse a field that should be a list of strings or null.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
model_id: The model ID (for error messages).
|
|
93
|
+
field_name: The field name (for error messages).
|
|
94
|
+
data: The raw data from YAML.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
A tuple of strings, or None if data is None/null.
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
ModelCatalogError: If the data is invalid.
|
|
101
|
+
"""
|
|
102
|
+
if data is None:
|
|
103
|
+
return None
|
|
104
|
+
if not isinstance(data, list):
|
|
105
|
+
raise ModelCatalogError(f"Model {model_id!r} {field_name} must be a list or null, got {type(data).__name__}")
|
|
106
|
+
return tuple(str(item) for item in data)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _parse_model_spec(model_id: str, data: dict[str, Any]) -> ModelSpec:
|
|
110
|
+
"""Parse and validate a single model spec from YAML data.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
model_id: The canonical model ID (for error messages).
|
|
114
|
+
data: The raw YAML dict for this model.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
A validated ModelSpec instance.
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
ModelCatalogError: If required fields are missing or invalid.
|
|
121
|
+
"""
|
|
122
|
+
required_fields = {
|
|
123
|
+
"friendly_name",
|
|
124
|
+
"context_window_tokens",
|
|
125
|
+
"max_output_tokens",
|
|
126
|
+
"supports_thinking",
|
|
127
|
+
"supports_images",
|
|
128
|
+
"temperature_constraint",
|
|
129
|
+
"temperature",
|
|
130
|
+
"intelligence_score",
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
missing = required_fields - set(data.keys())
|
|
134
|
+
if missing:
|
|
135
|
+
raise ModelCatalogError(f"Model {model_id!r} missing required fields: {sorted(missing)}")
|
|
136
|
+
|
|
137
|
+
constraint = data["temperature_constraint"]
|
|
138
|
+
valid_constraints = {"fixed", "range"}
|
|
139
|
+
if constraint not in valid_constraints:
|
|
140
|
+
raise ModelCatalogError(
|
|
141
|
+
f"Model {model_id!r} has invalid temperature_constraint: {constraint!r} "
|
|
142
|
+
f"(must be one of {sorted(valid_constraints)})"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
temperature = _parse_temperature(model_id, data["temperature"], constraint)
|
|
146
|
+
litellm_reasoning_efforts = _parse_tuple_or_none(
|
|
147
|
+
model_id, "litellm_reasoning_efforts", data.get("litellm_reasoning_efforts")
|
|
148
|
+
)
|
|
149
|
+
verbosity_levels = _parse_tuple_or_none(model_id, "verbosity_levels", data.get("verbosity_levels"))
|
|
150
|
+
thinking_levels = _parse_tuple_or_none(model_id, "thinking_levels", data.get("thinking_levels"))
|
|
151
|
+
thinking_modes = _parse_tuple_or_none(model_id, "thinking_modes", data.get("thinking_modes"))
|
|
152
|
+
|
|
153
|
+
tags_raw = data.get("tags", [])
|
|
154
|
+
if not isinstance(tags_raw, list):
|
|
155
|
+
raise ModelCatalogError(f"Model {model_id!r} tags must be a list, got {type(tags_raw).__name__}")
|
|
156
|
+
tags = tuple(str(t) for t in tags_raw)
|
|
157
|
+
|
|
158
|
+
short_name = data.get("short_name")
|
|
159
|
+
if short_name is not None:
|
|
160
|
+
short_name = str(short_name)
|
|
161
|
+
|
|
162
|
+
addendum = data.get("system_prompt_addendum")
|
|
163
|
+
if addendum is not None:
|
|
164
|
+
addendum = str(addendum)
|
|
165
|
+
if not addendum.startswith("system_prompt_addendums/") or not addendum.endswith(".md"):
|
|
166
|
+
raise ModelCatalogError(
|
|
167
|
+
f"Model {model_id!r} system_prompt_addendum must be "
|
|
168
|
+
f"'system_prompt_addendums/<name>.md', got {addendum!r}"
|
|
169
|
+
)
|
|
170
|
+
try:
|
|
171
|
+
ref = resources.files("forge.core.data").joinpath(*addendum.split("/"))
|
|
172
|
+
ref.read_text(encoding="utf-8")
|
|
173
|
+
except Exception as e:
|
|
174
|
+
raise ModelCatalogError(
|
|
175
|
+
f"Model {model_id!r} system_prompt_addendum resource not found: {addendum!r}"
|
|
176
|
+
) from e
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
return ModelSpec(
|
|
180
|
+
friendly_name=str(data["friendly_name"]),
|
|
181
|
+
short_name=short_name,
|
|
182
|
+
intelligence_score=int(data["intelligence_score"]),
|
|
183
|
+
context_window_tokens=int(data["context_window_tokens"]),
|
|
184
|
+
max_output_tokens=int(data["max_output_tokens"]),
|
|
185
|
+
max_thinking_tokens=int(data["max_thinking_tokens"]) if data.get("max_thinking_tokens") else None,
|
|
186
|
+
supports_thinking=bool(data["supports_thinking"]),
|
|
187
|
+
supports_images=bool(data["supports_images"]),
|
|
188
|
+
supports_verbosity=bool(data.get("supports_verbosity", False)),
|
|
189
|
+
supports_top_p=bool(data.get("supports_top_p", True)),
|
|
190
|
+
supports_sampling_overrides=bool(data.get("supports_sampling_overrides", True)),
|
|
191
|
+
supports_1m_context=bool(data.get("supports_1m_context", False)),
|
|
192
|
+
temperature_constraint=constraint,
|
|
193
|
+
temperature=temperature,
|
|
194
|
+
verbosity_levels=verbosity_levels,
|
|
195
|
+
use_responses_api=bool(data.get("use_responses_api", False)),
|
|
196
|
+
native_thinking_param=data.get("native_thinking_param"),
|
|
197
|
+
litellm_reasoning_efforts=litellm_reasoning_efforts,
|
|
198
|
+
default_reasoning_effort=data.get("default_reasoning_effort"),
|
|
199
|
+
thinking_modes=thinking_modes,
|
|
200
|
+
thinking_levels=thinking_levels,
|
|
201
|
+
default_thinking_level=data.get("default_thinking_level"),
|
|
202
|
+
token_estimate_multiplier=float(data.get("token_estimate_multiplier", 1.0)),
|
|
203
|
+
system_prompt_addendum=addendum,
|
|
204
|
+
tags=tags,
|
|
205
|
+
)
|
|
206
|
+
except (TypeError, ValueError) as e:
|
|
207
|
+
raise ModelCatalogError(f"Model {model_id!r} validation error: {e}") from e
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _validate_and_build_catalog(raw: dict[str, Any]) -> ModelCatalog:
|
|
211
|
+
"""Validate raw YAML data and build a ModelCatalog.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
raw: The parsed YAML dict.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
A validated ModelCatalog instance.
|
|
218
|
+
|
|
219
|
+
Raises:
|
|
220
|
+
ModelCatalogError: If the catalog is invalid.
|
|
221
|
+
"""
|
|
222
|
+
schema_version = raw.get("schema_version")
|
|
223
|
+
if schema_version is None:
|
|
224
|
+
raise ModelCatalogError("Model catalog missing required 'schema_version' field")
|
|
225
|
+
if schema_version not in SUPPORTED_SCHEMA_VERSIONS:
|
|
226
|
+
raise ModelCatalogError(
|
|
227
|
+
f"Unsupported model catalog schema_version: {schema_version} "
|
|
228
|
+
f"(supported: {sorted(SUPPORTED_SCHEMA_VERSIONS)})"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
models_raw = raw.get("models", {})
|
|
232
|
+
if not isinstance(models_raw, dict):
|
|
233
|
+
raise ModelCatalogError(f"'models' must be a dict, got {type(models_raw).__name__}")
|
|
234
|
+
|
|
235
|
+
models: dict[str, ModelSpec] = {}
|
|
236
|
+
for model_id, model_data in models_raw.items():
|
|
237
|
+
if not isinstance(model_data, dict):
|
|
238
|
+
raise ModelCatalogError(f"Model {model_id!r} must be a dict, got {type(model_data).__name__}")
|
|
239
|
+
models[model_id] = _parse_model_spec(model_id, model_data)
|
|
240
|
+
|
|
241
|
+
aliases_raw = raw.get("aliases", {})
|
|
242
|
+
if not isinstance(aliases_raw, dict):
|
|
243
|
+
raise ModelCatalogError(f"'aliases' must be a dict, got {type(aliases_raw).__name__}")
|
|
244
|
+
|
|
245
|
+
aliases: dict[str, str] = {}
|
|
246
|
+
for alias, target in aliases_raw.items():
|
|
247
|
+
if not isinstance(target, str):
|
|
248
|
+
raise ModelCatalogError(f"Alias {alias!r} target must be a string, got {type(target).__name__}")
|
|
249
|
+
# Validate alias target exists in models (this also prevents chaining
|
|
250
|
+
# since aliases cannot be in the models dict)
|
|
251
|
+
if target not in models:
|
|
252
|
+
raise ModelCatalogError(f"Alias {alias!r} points to unknown model {target!r}")
|
|
253
|
+
aliases[alias] = target
|
|
254
|
+
|
|
255
|
+
# Parse defaults (optional; empty dict if missing for backward compat with tests)
|
|
256
|
+
defaults_raw = raw.get("defaults", {})
|
|
257
|
+
if not isinstance(defaults_raw, dict):
|
|
258
|
+
raise ModelCatalogError(f"'defaults' must be a dict, got {type(defaults_raw).__name__}")
|
|
259
|
+
|
|
260
|
+
defaults: dict[str, dict[str, str]] = {}
|
|
261
|
+
for provider, tiers in defaults_raw.items():
|
|
262
|
+
if not isinstance(tiers, dict):
|
|
263
|
+
raise ModelCatalogError(f"defaults.{provider} must be a dict, got {type(tiers).__name__}")
|
|
264
|
+
missing_tiers = REQUIRED_TIERS - set(tiers.keys())
|
|
265
|
+
if missing_tiers:
|
|
266
|
+
raise ModelCatalogError(f"defaults.{provider} missing required tiers: {sorted(missing_tiers)}")
|
|
267
|
+
provider_defaults: dict[str, str] = {}
|
|
268
|
+
for tier, model_id in tiers.items():
|
|
269
|
+
if not isinstance(model_id, str):
|
|
270
|
+
raise ModelCatalogError(f"defaults.{provider}.{tier} must be a string, got {type(model_id).__name__}")
|
|
271
|
+
if model_id not in models:
|
|
272
|
+
raise ModelCatalogError(f"defaults.{provider}.{tier} references unknown model {model_id!r}")
|
|
273
|
+
provider_defaults[tier] = model_id
|
|
274
|
+
defaults[provider] = provider_defaults
|
|
275
|
+
|
|
276
|
+
logger.info(f"Loaded model catalog v{schema_version}: {len(models)} models, {len(aliases)} aliases")
|
|
277
|
+
|
|
278
|
+
return ModelCatalog(
|
|
279
|
+
schema_version=schema_version,
|
|
280
|
+
models=models,
|
|
281
|
+
aliases=aliases,
|
|
282
|
+
defaults=defaults,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def load_model_catalog(*, force_reload: bool = False) -> ModelCatalog:
|
|
287
|
+
"""Load and cache the model catalog.
|
|
288
|
+
|
|
289
|
+
The catalog is loaded once and cached at module level. Subsequent
|
|
290
|
+
calls return the cached instance unless force_reload is True.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
force_reload: If True, reload from YAML even if cached.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
The validated ModelCatalog.
|
|
297
|
+
|
|
298
|
+
Raises:
|
|
299
|
+
ModelCatalogError: If the catalog is invalid.
|
|
300
|
+
"""
|
|
301
|
+
global _catalog
|
|
302
|
+
|
|
303
|
+
if _catalog is not None and not force_reload:
|
|
304
|
+
return _catalog
|
|
305
|
+
|
|
306
|
+
raw = _load_catalog_yaml()
|
|
307
|
+
_catalog = _validate_and_build_catalog(raw)
|
|
308
|
+
return _catalog
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def resolve_model_id(model_or_alias: str) -> str:
|
|
312
|
+
"""Resolve a model ID or alias to its canonical ID.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
model_or_alias: A canonical model ID or an alias.
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
The canonical model ID.
|
|
319
|
+
|
|
320
|
+
Raises:
|
|
321
|
+
ModelCatalogError: If the model/alias is not found.
|
|
322
|
+
"""
|
|
323
|
+
catalog = load_model_catalog()
|
|
324
|
+
try:
|
|
325
|
+
return catalog.resolve(model_or_alias)
|
|
326
|
+
except KeyError as e:
|
|
327
|
+
raise ModelCatalogError(str(e)) from e
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def get_model_spec(model_or_alias: str) -> ModelSpec:
|
|
331
|
+
"""Get the model spec for a model ID or alias.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
model_or_alias: A canonical model ID or an alias.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
The ModelSpec for the resolved model.
|
|
338
|
+
|
|
339
|
+
Raises:
|
|
340
|
+
ModelCatalogError: If the model/alias is not found.
|
|
341
|
+
"""
|
|
342
|
+
catalog = load_model_catalog()
|
|
343
|
+
try:
|
|
344
|
+
return catalog.get(model_or_alias)
|
|
345
|
+
except KeyError as e:
|
|
346
|
+
raise ModelCatalogError(str(e)) from e
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def get_context_window_tokens(model_or_alias: str) -> int:
|
|
350
|
+
"""Get the context window size for a model.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
model_or_alias: A canonical model ID or an alias.
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
The context window size in tokens.
|
|
357
|
+
|
|
358
|
+
Raises:
|
|
359
|
+
ModelCatalogError: If the model/alias is not found.
|
|
360
|
+
"""
|
|
361
|
+
return get_model_spec(model_or_alias).context_window_tokens
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def get_max_output_tokens(model_or_alias: str) -> int:
|
|
365
|
+
"""Get the maximum output tokens for a model.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
model_or_alias: A canonical model ID or an alias.
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
The maximum output tokens.
|
|
372
|
+
|
|
373
|
+
Raises:
|
|
374
|
+
ModelCatalogError: If the model/alias is not found.
|
|
375
|
+
"""
|
|
376
|
+
return get_model_spec(model_or_alias).max_output_tokens
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def model_exists(model_or_alias: str) -> bool:
|
|
380
|
+
"""Check if a model or alias exists in the catalog.
|
|
381
|
+
|
|
382
|
+
This is a non-strict check that doesn't raise on unknown models.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
model_or_alias: A canonical model ID or an alias.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
True if the model/alias exists, False otherwise.
|
|
389
|
+
"""
|
|
390
|
+
catalog = load_model_catalog()
|
|
391
|
+
return model_or_alias in catalog
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def get_default_model(provider: str, tier: str) -> str:
|
|
395
|
+
"""Return the canonical model ID for a provider+tier default.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
provider: Provider name (e.g., "openai", "gemini", "anthropic").
|
|
399
|
+
tier: Tier name (e.g., "haiku", "sonnet", "opus").
|
|
400
|
+
|
|
401
|
+
Raises:
|
|
402
|
+
ModelCatalogError: If the provider or tier is not in defaults.
|
|
403
|
+
"""
|
|
404
|
+
catalog = load_model_catalog()
|
|
405
|
+
try:
|
|
406
|
+
return catalog.get_default(provider, tier)
|
|
407
|
+
except KeyError:
|
|
408
|
+
raise ModelCatalogError(f"No default model for {provider}/{tier}")
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def get_provider_defaults() -> dict[str, dict[str, str]]:
|
|
412
|
+
"""Return the full defaults dict (provider -> tier -> canonical model ID).
|
|
413
|
+
|
|
414
|
+
Returns a copy so callers cannot mutate the cached catalog.
|
|
415
|
+
"""
|
|
416
|
+
return {p: dict(tiers) for p, tiers in load_model_catalog().defaults.items()}
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def get_compact_name(model: str) -> str:
|
|
420
|
+
"""Get a compact display name for a model.
|
|
421
|
+
|
|
422
|
+
Strips provider prefix, checks catalog for a short_name override,
|
|
423
|
+
and applies generic shortening rules. Safe for models not in the catalog.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
model: Model ID, possibly with provider prefix (e.g., "vertex_ai/gemini-3.1-pro-preview").
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
A compact display name (e.g., "gemini-3-pro").
|
|
430
|
+
"""
|
|
431
|
+
if "/" in model:
|
|
432
|
+
model = model.split("/")[-1]
|
|
433
|
+
|
|
434
|
+
catalog = load_model_catalog()
|
|
435
|
+
if model in catalog:
|
|
436
|
+
spec = catalog.get(model)
|
|
437
|
+
if spec.short_name is not None:
|
|
438
|
+
return spec.short_name
|
|
439
|
+
|
|
440
|
+
model = model.removesuffix("-preview")
|
|
441
|
+
|
|
442
|
+
return model
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def get_system_prompt_addendum(model_or_alias: str) -> str | None:
|
|
446
|
+
"""Return system prompt addendum content for a model, or None.
|
|
447
|
+
|
|
448
|
+
Fails open: returns None for models not in the catalog (common with
|
|
449
|
+
OpenRouter custom routing) or if resource loading fails at runtime.
|
|
450
|
+
"""
|
|
451
|
+
if "/" in model_or_alias:
|
|
452
|
+
model_or_alias = model_or_alias.split("/")[-1]
|
|
453
|
+
|
|
454
|
+
try:
|
|
455
|
+
spec = get_model_spec(model_or_alias)
|
|
456
|
+
except (KeyError, ModelCatalogError):
|
|
457
|
+
return None
|
|
458
|
+
|
|
459
|
+
if not spec.system_prompt_addendum:
|
|
460
|
+
return None
|
|
461
|
+
|
|
462
|
+
try:
|
|
463
|
+
ref = resources.files("forge.core.data").joinpath(*spec.system_prompt_addendum.split("/"))
|
|
464
|
+
return ref.read_text(encoding="utf-8")
|
|
465
|
+
except Exception:
|
|
466
|
+
logger.warning("Failed to load system prompt addendum: %s", spec.system_prompt_addendum)
|
|
467
|
+
return None
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Model pricing lookup and cost calculation.
|
|
2
|
+
|
|
3
|
+
Loads pricing.yaml (shipped with Forge) and provides cost estimates
|
|
4
|
+
in integer microdollars (1 USD = 1_000_000 microdollars) to avoid
|
|
5
|
+
float accumulation drift.
|
|
6
|
+
|
|
7
|
+
Cost formula:
|
|
8
|
+
(input_tokens - cached_tokens) * input_rate
|
|
9
|
+
+ cached_tokens * cached_input_rate
|
|
10
|
+
+ output_tokens * output_rate
|
|
11
|
+
|
|
12
|
+
Cached tokens are a SUBSET of input_tokens (prompt cache hits).
|
|
13
|
+
Subtracting prevents double-counting.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from importlib import resources
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
import yaml
|
|
24
|
+
|
|
25
|
+
from forge.core.models.catalog import model_exists, resolve_model_id
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
SUPPORTED_SCHEMA_VERSIONS = frozenset({1})
|
|
30
|
+
|
|
31
|
+
_MICROS_PER_DOLLAR = 1_000_000
|
|
32
|
+
|
|
33
|
+
_pricing_data: dict[str, Any] | None = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(frozen=True)
|
|
37
|
+
class ModelPricing:
|
|
38
|
+
"""Per-million-token rates in USD (floats from YAML) and source label."""
|
|
39
|
+
|
|
40
|
+
input_per_mtok: float
|
|
41
|
+
output_per_mtok: float
|
|
42
|
+
cached_input_per_mtok: float
|
|
43
|
+
source: str # "catalog", "default", or "override"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _load_pricing_yaml() -> dict[str, Any]:
|
|
47
|
+
"""Load pricing.yaml from package resources."""
|
|
48
|
+
try:
|
|
49
|
+
ref = resources.files("forge.core.data").joinpath("pricing.yaml")
|
|
50
|
+
content = ref.read_text(encoding="utf-8")
|
|
51
|
+
except (TypeError, AttributeError):
|
|
52
|
+
with resources.open_text("forge.core.data", "pricing.yaml") as f:
|
|
53
|
+
content = f.read()
|
|
54
|
+
return yaml.safe_load(content)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _get_pricing_data() -> dict[str, Any]:
|
|
58
|
+
"""Return cached pricing data (lazy-loaded singleton)."""
|
|
59
|
+
global _pricing_data
|
|
60
|
+
if _pricing_data is None:
|
|
61
|
+
raw = _load_pricing_yaml()
|
|
62
|
+
version = raw.get("schema_version")
|
|
63
|
+
if version not in SUPPORTED_SCHEMA_VERSIONS:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"Unsupported pricing schema_version: {version} " f"(supported: {sorted(SUPPORTED_SCHEMA_VERSIONS)})"
|
|
66
|
+
)
|
|
67
|
+
_pricing_data = raw
|
|
68
|
+
return _pricing_data
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def reset_pricing_cache() -> None:
|
|
72
|
+
"""Reset the cached pricing data (for testing)."""
|
|
73
|
+
global _pricing_data
|
|
74
|
+
_pricing_data = None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _parse_model_pricing(data: dict[str, Any], source: str) -> ModelPricing:
|
|
78
|
+
return ModelPricing(
|
|
79
|
+
input_per_mtok=float(data["input"]),
|
|
80
|
+
output_per_mtok=float(data["output"]),
|
|
81
|
+
cached_input_per_mtok=float(data.get("cached_input", data["input"] * 0.1)),
|
|
82
|
+
source=source,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get_pricing(model: str) -> ModelPricing:
|
|
87
|
+
"""Look up pricing for a model, resolving aliases.
|
|
88
|
+
|
|
89
|
+
Resolution order:
|
|
90
|
+
1. Exact match in pricing.yaml models
|
|
91
|
+
2. Resolve via model catalog alias, then match
|
|
92
|
+
3. Fall back to pricing.yaml default section
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
model: Model ID (canonical or alias, e.g. "anthropic/claude-sonnet-4.6").
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
ModelPricing with per-MTok rates and source label.
|
|
99
|
+
"""
|
|
100
|
+
data = _get_pricing_data()
|
|
101
|
+
models = data.get("models", {})
|
|
102
|
+
|
|
103
|
+
if model in models:
|
|
104
|
+
return _parse_model_pricing(models[model], "catalog")
|
|
105
|
+
|
|
106
|
+
if model_exists(model):
|
|
107
|
+
try:
|
|
108
|
+
canonical = resolve_model_id(model)
|
|
109
|
+
if canonical in models:
|
|
110
|
+
return _parse_model_pricing(models[canonical], "catalog")
|
|
111
|
+
except Exception:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
default = data.get("default")
|
|
115
|
+
if default:
|
|
116
|
+
logger.warning("No catalog pricing for model %r; using default rates", model)
|
|
117
|
+
return _parse_model_pricing(default, "default")
|
|
118
|
+
|
|
119
|
+
logger.warning("No pricing data for model %r; using hardcoded fallback rates", model)
|
|
120
|
+
return ModelPricing(
|
|
121
|
+
input_per_mtok=3.0,
|
|
122
|
+
output_per_mtok=15.0,
|
|
123
|
+
cached_input_per_mtok=0.30,
|
|
124
|
+
source="hardcoded",
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def calculate_cost(
|
|
129
|
+
model: str,
|
|
130
|
+
input_tokens: int,
|
|
131
|
+
output_tokens: int,
|
|
132
|
+
cached_tokens: int,
|
|
133
|
+
) -> int:
|
|
134
|
+
"""Calculate estimated cost in microdollars (integer, 1 USD = 1_000_000).
|
|
135
|
+
|
|
136
|
+
Cached tokens are a subset of input_tokens. The formula avoids
|
|
137
|
+
double-counting by charging cached tokens at the lower cached rate
|
|
138
|
+
and only the remainder at the full input rate.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
model: Model ID (canonical or alias).
|
|
142
|
+
input_tokens: Total input/prompt tokens (includes cached).
|
|
143
|
+
output_tokens: Completion tokens.
|
|
144
|
+
cached_tokens: Prompt cache hit tokens (subset of input_tokens).
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Estimated cost in microdollars (integer).
|
|
148
|
+
"""
|
|
149
|
+
pricing = get_pricing(model)
|
|
150
|
+
|
|
151
|
+
cached = min(cached_tokens, input_tokens)
|
|
152
|
+
fresh_input = input_tokens - cached
|
|
153
|
+
|
|
154
|
+
cost_usd = (
|
|
155
|
+
fresh_input * pricing.input_per_mtok / 1_000_000
|
|
156
|
+
+ cached * pricing.cached_input_per_mtok / 1_000_000
|
|
157
|
+
+ output_tokens * pricing.output_per_mtok / 1_000_000
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return round(cost_usd * _MICROS_PER_DOLLAR)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def micros_to_usd(micros: int) -> float:
|
|
164
|
+
"""Convert microdollars to USD float for display."""
|
|
165
|
+
return micros / _MICROS_PER_DOLLAR
|