PyPI - multi-forge - Versions diffs - 0.2.0__py3-none-any.whl - Mend

multi-forge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (311) hide show

forge/__init__.py +3 -0
forge/_extensions/agents/.gitkeep +0 -0
forge/_extensions/commands/.gitkeep +0 -0
forge/_extensions/skills/analyze/SKILL.md +87 -0
forge/_extensions/skills/challenge/SKILL.md +91 -0
forge/_extensions/skills/consensus/SKILL.md +120 -0
forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
forge/_extensions/skills/debate/SKILL.md +116 -0
forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
forge/_extensions/skills/panel/SKILL.md +141 -0
forge/_extensions/skills/panel/resources/synthesis.md +103 -0
forge/_extensions/skills/qa/SKILL.md +704 -0
forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
forge/_extensions/skills/qa/resources/checklist.md +103 -0
forge/_extensions/skills/qa/resources/report-template.md +62 -0
forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
forge/_extensions/skills/review/SKILL.md +125 -0
forge/_extensions/skills/review/references/claude-4.6.md +474 -0
forge/_extensions/skills/review/references/claude-4.7.md +710 -0
forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
forge/_extensions/skills/review/resources/code-gemini.md +184 -0
forge/_extensions/skills/review/resources/code-openai.md +203 -0
forge/_extensions/skills/review/resources/code.md +160 -0
forge/_extensions/skills/review-docs/SKILL.md +121 -0
forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
forge/_extensions/skills/review-docs/resources/docs.md +170 -0
forge/_extensions/skills/smoke-test/SKILL.md +27 -0
forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
forge/_extensions/skills/understand/SKILL.md +148 -0
forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
forge/_extensions/skills/understand/resources/code-openai.md +181 -0
forge/_extensions/skills/understand/resources/code.md +163 -0
forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
forge/_extensions/skills/understand/resources/docs.md +177 -0
forge/_extensions/skills/walkthrough/SKILL.md +599 -0
forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
forge/backend/__init__.py +174 -0
forge/backend/adapters/__init__.py +38 -0
forge/backend/adapters/litellm.py +158 -0
forge/backend/creation.py +89 -0
forge/backend/registry.py +178 -0
forge/cli/__init__.py +16 -0
forge/cli/auth.py +483 -0
forge/cli/backend.py +298 -0
forge/cli/claude.py +411 -0
forge/cli/config_cmd.py +303 -0
forge/cli/extensions.py +1001 -0
forge/cli/gc.py +165 -0
forge/cli/guard.py +1018 -0
forge/cli/guards.py +106 -0
forge/cli/handoff.py +110 -0
forge/cli/hooks/__init__.py +36 -0
forge/cli/hooks/_group.py +20 -0
forge/cli/hooks/_helpers.py +149 -0
forge/cli/hooks/commands.py +1677 -0
forge/cli/hooks/direct_commands.py +1304 -0
forge/cli/hooks/install.py +232 -0
forge/cli/hooks/policy.py +151 -0
forge/cli/hooks/read_hygiene.py +74 -0
forge/cli/hooks/verification.py +370 -0
forge/cli/logs.py +406 -0
forge/cli/main.py +292 -0
forge/cli/proxy.py +1821 -0
forge/cli/proxy_costs.py +313 -0
forge/cli/search.py +416 -0
forge/cli/session.py +892 -0
forge/cli/session_addendum.py +81 -0
forge/cli/session_fork.py +750 -0
forge/cli/session_handoff.py +141 -0
forge/cli/session_lifecycle.py +2053 -0
forge/cli/session_manage.py +1336 -0
forge/cli/session_memory.py +201 -0
forge/cli/status_line.py +1398 -0
forge/cli/workflow.py +1964 -0
forge/config/__init__.py +110 -0
forge/config/dataclass_utils.py +88 -0
forge/config/defaults/__init__.py +0 -0
forge/config/defaults/backends/__init__.py +0 -0
forge/config/defaults/backends/litellm.yaml +196 -0
forge/config/defaults/templates/__init__.py +0 -0
forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
forge/config/defaults/templates/litellm-gemini.yaml +21 -0
forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
forge/config/defaults/templates/litellm-openai.yaml +28 -0
forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
forge/config/defaults/templates/openrouter-glm.yaml +23 -0
forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
forge/config/defaults/templates/openrouter-openai.yaml +28 -0
forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
forge/config/loader.py +675 -0
forge/config/schema.py +448 -0
forge/core/__init__.py +5 -0
forge/core/auth/__init__.py +67 -0
forge/core/auth/capabilities.py +219 -0
forge/core/auth/credentials_file.py +244 -0
forge/core/auth/protocols.py +18 -0
forge/core/auth/secrets.py +243 -0
forge/core/auth/template_secrets.py +112 -0
forge/core/data/__init__.py +5 -0
forge/core/data/model_catalog.yaml +1522 -0
forge/core/data/pricing.yaml +140 -0
forge/core/data/system_prompt_addendums/__init__.py +0 -0
forge/core/data/system_prompt_addendums/gemini.md +330 -0
forge/core/data/system_prompt_addendums/openai.md +328 -0
forge/core/llm/__init__.py +231 -0
forge/core/llm/clients/__init__.py +14 -0
forge/core/llm/clients/base.py +115 -0
forge/core/llm/clients/litellm.py +619 -0
forge/core/llm/clients/openai_compat.py +244 -0
forge/core/llm/clients/openrouter.py +234 -0
forge/core/llm/credentials.py +439 -0
forge/core/llm/detection.py +86 -0
forge/core/llm/errors.py +44 -0
forge/core/llm/protocols.py +80 -0
forge/core/llm/types.py +176 -0
forge/core/logging.py +146 -0
forge/core/models/__init__.py +91 -0
forge/core/models/catalog.py +467 -0
forge/core/models/pricing.py +165 -0
forge/core/models/types.py +167 -0
forge/core/naming.py +212 -0
forge/core/ops/__init__.py +73 -0
forge/core/ops/context.py +141 -0
forge/core/ops/gc.py +802 -0
forge/core/ops/proxy.py +146 -0
forge/core/ops/resolution.py +135 -0
forge/core/ops/session.py +344 -0
forge/core/ops/session_context.py +548 -0
forge/core/paths.py +38 -0
forge/core/process.py +54 -0
forge/core/reactive/__init__.py +38 -0
forge/core/reactive/cost_tracking.py +300 -0
forge/core/reactive/env.py +180 -0
forge/core/reactive/proxy.py +78 -0
forge/core/reactive/routing.py +622 -0
forge/core/reactive/session_runner.py +185 -0
forge/core/reactive/structured_output.py +62 -0
forge/core/reactive/tagger.py +94 -0
forge/core/reactive/throttle.py +132 -0
forge/core/state/__init__.py +59 -0
forge/core/state/exceptions.py +59 -0
forge/core/state/io.py +140 -0
forge/core/state/lock.py +99 -0
forge/core/state/timestamps.py +60 -0
forge/core/transcript.py +78 -0
forge/core/typing_helpers.py +24 -0
forge/core/workqueue/__init__.py +67 -0
forge/core/workqueue/queue.py +552 -0
forge/core/workqueue/types.py +63 -0
forge/guard/__init__.py +26 -0
forge/guard/deterministic/__init__.py +26 -0
forge/guard/deterministic/base.py +158 -0
forge/guard/deterministic/coding_standards.py +256 -0
forge/guard/deterministic/registry.py +148 -0
forge/guard/deterministic/tdd.py +171 -0
forge/guard/engine.py +216 -0
forge/guard/protocols.py +91 -0
forge/guard/queries.py +96 -0
forge/guard/semantic/__init__.py +34 -0
forge/guard/semantic/promotion.py +18 -0
forge/guard/semantic/supervisor.py +813 -0
forge/guard/semantic/verdict.py +183 -0
forge/guard/store.py +124 -0
forge/guard/team/__init__.py +6 -0
forge/guard/team/config.py +24 -0
forge/guard/team/handlers.py +209 -0
forge/guard/team/prompts.py +41 -0
forge/guard/types.py +125 -0
forge/guard/workflow/__init__.py +17 -0
forge/guard/workflow/branches.py +67 -0
forge/guard/workflow/config.py +63 -0
forge/guard/workflow/divergence.py +113 -0
forge/guard/workflow/policy.py +87 -0
forge/guard/workflow/stages.py +205 -0
forge/install/__init__.py +55 -0
forge/install/cli.py +281 -0
forge/install/exceptions.py +163 -0
forge/install/hooks.py +109 -0
forge/install/installer.py +1037 -0
forge/install/models.py +321 -0
forge/install/preset.py +272 -0
forge/install/settings_merge.py +831 -0
forge/install/tracking.py +238 -0
forge/install/version.py +141 -0
forge/proxy/__init__.py +0 -0
forge/proxy/base_client.py +181 -0
forge/proxy/client_adapter.py +476 -0
forge/proxy/client_factory.py +531 -0
forge/proxy/converters.py +1206 -0
forge/proxy/cost_logger.py +132 -0
forge/proxy/cost_tracker.py +242 -0
forge/proxy/data_models.py +338 -0
forge/proxy/error_hints.py +92 -0
forge/proxy/metrics.py +222 -0
forge/proxy/model_spec.py +158 -0
forge/proxy/proxies.py +333 -0
forge/proxy/proxy_identity.py +134 -0
forge/proxy/proxy_orchestrator.py +1018 -0
forge/proxy/proxy_startup.py +54 -0
forge/proxy/server.py +1561 -0
forge/proxy/utils.py +537 -0
forge/review/__init__.py +6 -0
forge/review/adversarial.py +111 -0
forge/review/consensus.py +236 -0
forge/review/engine.py +356 -0
forge/review/models.py +437 -0
forge/review/resources/__init__.py +5 -0
forge/review/resources/codereview-performance.md +85 -0
forge/review/resources/codereview-quick.md +75 -0
forge/review/resources/codereview-security.md +92 -0
forge/review/resources/codereview.md +85 -0
forge/review/resources/docreview-quick.md +75 -0
forge/review/resources/docreview.md +86 -0
forge/review/resources/thinkdeep.md +89 -0
forge/review/routing.py +368 -0
forge/review/synthesis.py +73 -0
forge/runtime_config.py +438 -0
forge/search/__init__.py +55 -0
forge/search/bm25_store.py +264 -0
forge/search/content_store.py +197 -0
forge/search/engine.py +352 -0
forge/search/exceptions.py +51 -0
forge/search/extractor.py +234 -0
forge/search/index_state.py +295 -0
forge/search/store.py +215 -0
forge/search/tokenizer.py +24 -0
forge/session/__init__.py +130 -0
forge/session/active.py +339 -0
forge/session/artifacts.py +202 -0
forge/session/claude/__init__.py +50 -0
forge/session/claude/cleanup.py +105 -0
forge/session/claude/invoke.py +236 -0
forge/session/claude/paths.py +200 -0
forge/session/cleanup.py +216 -0
forge/session/config.py +34 -0
forge/session/direct_model.py +107 -0
forge/session/effective.py +169 -0
forge/session/exceptions.py +255 -0
forge/session/handoff.py +881 -0
forge/session/handoff_agent.py +544 -0
forge/session/hooks/__init__.py +35 -0
forge/session/hooks/models.py +73 -0
forge/session/hooks/session_start.py +507 -0
forge/session/identity.py +84 -0
forge/session/index.py +553 -0
forge/session/manager.py +1506 -0
forge/session/models.py +572 -0
forge/session/overrides.py +344 -0
forge/session/plan_resolution.py +286 -0
forge/session/prev_sessions.py +128 -0
forge/session/store.py +431 -0
forge/session/validation.py +47 -0
forge/session/worktree/__init__.py +65 -0
forge/session/worktree/cleanup.py +262 -0
forge/session/worktree/config_copy.py +203 -0
forge/session/worktree/create.py +332 -0
forge/sidecar/__init__.py +29 -0
forge/sidecar/container.py +161 -0
forge/sidecar/docker.py +86 -0
forge/sidecar/secrets.py +19 -0
multi_forge-0.2.0.dist-info/METADATA +242 -0
multi_forge-0.2.0.dist-info/RECORD +311 -0
multi_forge-0.2.0.dist-info/WHEEL +4 -0
multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0

forge/core/data/model_catalog.yaml ADDED Viewed

@@ -0,0 +1,1522 @@
+# Central Model Catalog - Intrinsic Properties & Safe Defaults
+# ============================================================
+# This file is the single source of truth for model capabilities.
+# Families reference models by ID; this defines what each model CAN do.
+#
+# DO NOT put operational config here (tier mappings, routing, etc.)
+# Those belong in template YAML overlays under config/defaults/templates/
+#
+# HYPERPARAMETER FIELDS:
+# - temperature_constraint: "fixed" (must be 1.0) or "range" (configurable)
+# - supports_top_p: Whether top_p sampling is supported (reasoning models don't)
+# - default_timeout_seconds: Safe default timeout (client can override)
+#
+# PROMPT CACHING (via LiteLLM - see docs.litellm.ai/docs/completion/prompt_caching):
+# LiteLLM reports cached_tokens for: OpenAI, Anthropic, Bedrock, Deepseek (NOT Gemini)
+# - prompt_caching.supports: Whether LiteLLM supports prompt caching for this model
+# - prompt_caching.mechanism: How the provider implements caching
+#     "auto": Server-side automatic (OpenAI/Deepseek) - no client action needed
+#     "cache_control": Requires explicit cache_control blocks (Anthropic)
+# - prompt_caching.min_tokens: Minimum tokens for caching to activate
+# - prompt_caching.max_cache_blocks: Max cache_control blocks (Anthropic = 4)
+#
+# NOTE: Only Anthropic caching can be controlled via input (cache_control markers).
+# OpenAI/Deepseek cache automatically - client policies have no effect on them.
+schema_version: 1
+# =============================================================================
+# DEFAULTS - Per-provider edge picks at each tier
+# =============================================================================
+# Single source of truth for "which model should we use by default?"
+# Review defaults, tests, and proxy templates derive from these.
+# Update here when adopting a new model family.
+defaults:
+  openai:
+    haiku: gpt-5.4-mini
+    sonnet: gpt-5.5
+    opus: gpt-5.5
+  gemini:
+    haiku: gemini-3-flash-preview
+    sonnet: gemini-3.1-pro-preview
+    opus: gemini-3.1-pro-preview
+  anthropic:
+    haiku: claude-haiku-4-5-20251001
+    sonnet: claude-sonnet-4-6
+    opus: claude-opus-4-6
+  openrouter:
+    haiku: claude-haiku-4-5-20251001
+    sonnet: claude-sonnet-4-6
+    opus: claude-opus-4-6
+  deepseek:
+    haiku: deepseek-v4-flash
+    sonnet: deepseek-v4-pro
+    opus: deepseek-v4-pro
+  minimax:
+    haiku: gemma-4-31b-it
+    sonnet: minimax-m2.7
+    opus: minimax-m2.7
+  qwen:
+    haiku: qwen3.6-flash
+    sonnet: qwen3.6-plus
+    opus: qwen3.6-max-preview
+  glm:
+    haiku: glm-4.7-flash
+    sonnet: glm-5.1
+    opus: glm-5.1
+  kimi:
+    haiku: gemma-4-31b-it
+    sonnet: kimi-k2.6
+    opus: kimi-k2.6
+# =============================================================================
+# MODELS - Canonical IDs are provider-agnostic
+# =============================================================================
+models:
+  # ---------------------------------------------------------------------------
+  # OpenAI Models (via Azure or direct)
+  # ---------------------------------------------------------------------------
+  gpt-5.4:
+    friendly_name: GPT-5.4
+    context_window_tokens: 1050000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [none, low, medium, high, xhigh]
+    default_reasoning_effort: medium
+    use_responses_api: true
+    intelligence_score: 99
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5.5:
+    friendly_name: GPT-5.5
+    context_window_tokens: 1050000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [none, low, medium, high, xhigh]
+    default_reasoning_effort: medium
+    use_responses_api: true
+    intelligence_score: 99
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5.5-pro:
+    friendly_name: GPT-5.5 Pro
+    context_window_tokens: 1050000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 300
+    prompt_caching:
+      supports: false
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [low, medium, high, xhigh]
+    default_reasoning_effort: medium
+    use_responses_api: true
+    intelligence_score: 100
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5.4-pro:
+    friendly_name: GPT-5.4 Pro
+    context_window_tokens: 1050000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 300
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [medium, high, xhigh]
+    default_reasoning_effort: high
+    use_responses_api: true
+    intelligence_score: 100
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5.4-mini:
+    friendly_name: GPT-5.4 Mini
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [none, low, medium, high, xhigh]
+    default_reasoning_effort: medium
+    use_responses_api: true
+    intelligence_score: 85
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5.4-nano:
+    friendly_name: GPT-5.4 Nano
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 90
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [none, low, medium, high, xhigh]
+    default_reasoning_effort: minimal
+    use_responses_api: true
+    intelligence_score: 76
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5.2:
+    friendly_name: GPT-5.2
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false  # Reasoning models don't support top_p
+    default_timeout_seconds: 180   # Reasoning models need longer timeouts
+    prompt_caching:
+      supports: true
+      mechanism: auto           # OpenAI handles caching automatically
+      min_tokens: 1024     # OpenAI requires 1024+ tokens for caching
+    native_thinking_param: reasoning_effort
+    # LiteLLM docs: gpt-5.2 supports none, low, medium, high, xhigh (no minimal)
+    litellm_reasoning_efforts: [none, low, medium, high, xhigh]
+    default_reasoning_effort: medium
+    intelligence_score: 98
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5.2-pro:
+    friendly_name: GPT-5.2 Pro
+    context_window_tokens: 400000
+    max_output_tokens: 272000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false  # Reasoning models don't support top_p
+    default_timeout_seconds: 300   # Pro models need even longer timeouts
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # OpenAI matrix: gpt-5.2-pro supports medium, high, xhigh only (no none/minimal/low)
+    litellm_reasoning_efforts: [medium, high, xhigh]
+    default_reasoning_effort: high
+    use_responses_api: true
+    intelligence_score: 99
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5.3-codex:
+    friendly_name: GPT-5.3 Codex
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: false
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [low, medium, high, xhigh]
+    default_reasoning_effort: high
+    use_responses_api: true
+    intelligence_score: 95
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: [code-gen]
+  gpt-5.2-codex:
+    friendly_name: GPT-5.2 Codex
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: false  # Codex models don't support verbosity
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false  # Reasoning/codex models don't support top_p
+    default_timeout_seconds: 180   # Code generation can take time
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # OpenAI matrix: gpt-5.2-codex supports low, medium, high, xhigh
+    litellm_reasoning_efforts: [low, medium, high, xhigh]
+    default_reasoning_effort: high
+    use_responses_api: true
+    intelligence_score: 90
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: [code-gen]
+  gpt-5.1-codex:
+    friendly_name: GPT-5.1 Codex
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: false  # Codex models don't support verbosity
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false  # Reasoning/codex models don't support top_p
+    default_timeout_seconds: 180   # Code generation can take time
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # LiteLLM docs: codex models support low, medium, high only (default: adaptive)
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: high
+    use_responses_api: true
+    intelligence_score: 82
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: [code-gen]
+  gpt-5.1-codex-mini:
+    friendly_name: GPT-5.1 Codex Mini
+    short_name: codex-mini
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    supports_verbosity: false  # Codex models don't support verbosity
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # LiteLLM docs: codex-mini supports low, medium, high only (default: adaptive)
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 77
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: [code-gen]
+  gpt-5.1-codex-max:
+    friendly_name: GPT-5.1 Codex Max
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: false  # Codex models don't support verbosity
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 240   # Max variant needs more time
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # LiteLLM docs: codex-max supports low, medium, high, xhigh (default: adaptive)
+    litellm_reasoning_efforts: [low, medium, high, xhigh]
+    default_reasoning_effort: high
+    use_responses_api: true
+    intelligence_score: 85
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: [code-gen]
+  gpt-5.1-mini:
+    friendly_name: GPT-5.1 Mini
+    context_window_tokens: 200000
+    max_output_tokens: 64000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false  # Reasoning models don't support top_p
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # LiteLLM docs: gpt-5.1 series supports none, low, medium, high (no minimal)
+    litellm_reasoning_efforts: [none, low, medium, high]
+    default_reasoning_effort: none
+    intelligence_score: 76
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5-mini:
+    friendly_name: GPT-5 Mini
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # LiteLLM docs: gpt-5-mini supports minimal, low, medium, high (no none)
+    litellm_reasoning_efforts: [minimal, low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 79
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5:
+    friendly_name: GPT-5
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # LiteLLM docs: gpt-5 supports minimal, low, medium, high (no none)
+    litellm_reasoning_efforts: [minimal, low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 88
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5-pro:
+    friendly_name: GPT-5 Pro
+    context_window_tokens: 400000
+    max_output_tokens: 272000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 300  # Pro models need longer timeouts
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # LiteLLM docs: gpt-5-pro ONLY supports high
+    litellm_reasoning_efforts: [high]
+    default_reasoning_effort: high
+    use_responses_api: true
+    intelligence_score: 91
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-5-codex:
+    friendly_name: GPT-5 Codex
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: false  # Codex models don't support verbosity
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # LiteLLM docs: codex models support low, medium, high only (default: adaptive)
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: medium
+    use_responses_api: true
+    intelligence_score: 80
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: [code-gen]
+  gpt-5-nano:
+    friendly_name: GPT-5 Nano
+    context_window_tokens: 400000
+    max_output_tokens: 128000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    supports_verbosity: true
+    verbosity_levels: [low, medium, high]
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 90  # Nano is faster
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # OpenAI matrix: gpt-5-nano supports minimal, low, medium, high (no none)
+    litellm_reasoning_efforts: [minimal, low, medium, high]
+    default_reasoning_effort: minimal
+    intelligence_score: 70
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-4o:
+    friendly_name: GPT-4o
+    context_window_tokens: 128000
+    max_output_tokens: 16384
+    max_thinking_tokens: null
+    supports_thinking: false
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true  # Non-reasoning models support top_p
+    default_timeout_seconds: 60
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    # No reasoning_effort support (non-reasoning model)
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 76
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-4o-mini:
+    friendly_name: GPT-4o Mini
+    context_window_tokens: 128000
+    max_output_tokens: 16384
+    max_thinking_tokens: null
+    supports_thinking: false
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 60  # Mini models are fast but need margin for P99
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 65
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  gpt-4.1:
+    friendly_name: GPT-4.1
+    context_window_tokens: 1000000
+    max_output_tokens: 32768
+    max_thinking_tokens: null
+    supports_thinking: false
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 90  # Large context can be slow
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 72
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  o4-mini:
+    friendly_name: O4 Mini
+    context_window_tokens: 200000
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false  # Reasoning models don't support top_p
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [none, minimal, low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 60
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  o3:
+    friendly_name: O3
+    context_window_tokens: 200000
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [none, minimal, low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 85
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  o3-pro:
+    friendly_name: O3 Pro
+    context_window_tokens: 200000
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 300  # Pro reasoning takes longer
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [none, minimal, low, medium, high]
+    default_reasoning_effort: high
+    use_responses_api: true
+    intelligence_score: 87
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  o3-mini:
+    friendly_name: O3 Mini
+    context_window_tokens: 200000
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 90
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [none, minimal, low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 67
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  o1:
+    friendly_name: O1
+    context_window_tokens: 128000
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [none, minimal, low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 84
+    system_prompt_addendum: system_prompt_addendums/openai.md
+    tags: []
+  # ---------------------------------------------------------------------------
+  # Gemini Models (via Vertex AI or Google AI Studio)
+  # ---------------------------------------------------------------------------
+  gemini-3.1-pro-preview:
+    friendly_name: Gemini 3.1 Pro Preview
+    context_window_tokens: 1048576
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: true  # Gemini supports top_p
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: false  # Gemini has Context Caching API but LiteLLM doesn't support it
+    # Native: thinking_level (low, high only for Pro)
+    native_thinking_param: thinking_level
+    thinking_levels: [low, high]
+    default_thinking_level: high
+    # LiteLLM mapping: reasoning_effort -> thinking_level
+    # minimal/low/none/disable -> low, medium/high -> high
+    litellm_reasoning_efforts: [none, disable, minimal, low, medium, high]
+    default_reasoning_effort: high
+    intelligence_score: 94
+    system_prompt_addendum: system_prompt_addendums/gemini.md
+    tags: []
+  gemini-3.1-pro-preview-customtools:
+    friendly_name: Gemini 3.1 Pro Preview (Custom Tools)
+    short_name: gemini-3.1-pro-ct
+    context_window_tokens: 1048576
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: false
+    # Same thinking params as 3.1 Pro
+    native_thinking_param: thinking_level
+    thinking_levels: [low, high]
+    default_thinking_level: high
+    litellm_reasoning_efforts: [none, disable, minimal, low, medium, high]
+    default_reasoning_effort: high
+    intelligence_score: 94
+    system_prompt_addendum: system_prompt_addendums/gemini.md
+    tags: [agentic, custom-tools]
+  gemini-3-flash-preview:
+    friendly_name: Gemini 3 Flash Preview
+    context_window_tokens: 1048576
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: true
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: false  # Gemini has Context Caching API but LiteLLM doesn't support it
+    # Native: thinking_level (minimal, low, medium, high for Flash)
+    native_thinking_param: thinking_level
+    thinking_levels: [minimal, low, medium, high]
+    default_thinking_level: high
+    # LiteLLM mapping: reasoning_effort -> thinking_level
+    litellm_reasoning_efforts: [none, disable, minimal, low, medium, high]
+    default_reasoning_effort: high
+    intelligence_score: 80
+    system_prompt_addendum: system_prompt_addendums/gemini.md
+    tags: []
+  gemini-2.5-pro:
+    friendly_name: Gemini 2.5 Pro
+    context_window_tokens: 1048576
+    max_output_tokens: 65536
+    max_thinking_tokens: 32768
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: false  # Gemini has Context Caching API but LiteLLM doesn't support it
+    # Native: thinking.budget_tokens (128 to 32768, cannot disable)
+    native_thinking_param: thinking_budget
+    # LiteLLM mapping: reasoning_effort -> budget_tokens
+    # none=0, disable=0, low=1024, medium=2048, high=4096
+    # Note: Cannot disable thinking on 2.5 Pro (none still uses dynamic)
+    litellm_reasoning_efforts: [none, disable, low, medium, high]
+    default_reasoning_effort: high
+    intelligence_score: 89
+    system_prompt_addendum: system_prompt_addendums/gemini.md
+    tags: []
+  gemini-2.5-flash:
+    friendly_name: Gemini 2.5 Flash
+    short_name: gemini-flash
+    context_window_tokens: 1048576
+    max_output_tokens: 65536
+    max_thinking_tokens: 24576
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 90
+    prompt_caching:
+      supports: false  # Gemini has Context Caching API but LiteLLM doesn't support it
+    # Native: thinking.budget_tokens (0 to 24576, can disable with 0)
+    native_thinking_param: thinking_budget
+    # LiteLLM mapping: reasoning_effort -> budget_tokens
+    # none=0, disable=0, low=1024, medium=2048, high=4096
+    litellm_reasoning_efforts: [none, disable, low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 73
+    system_prompt_addendum: system_prompt_addendums/gemini.md
+    tags: []
+  # ---------------------------------------------------------------------------
+  # Claude Models (4.7 + 4.6 + 4.5 series)
+  # ---------------------------------------------------------------------------
+  # --- Claude 4.6 Series ---
+  claude-opus-4-6:
+    friendly_name: Claude Opus 4.6
+    context_window_tokens: 200000
+    max_output_tokens: 128000
+    max_thinking_tokens: 32768
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 1.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: cache_control
+      min_tokens: 1024
+      max_cache_blocks: 4
+    native_thinking_param: output_config.effort
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: high
+    intelligence_score: 98
+    tags: []
+  # --- Claude 4.7 Series ---
+  # Opus 4.7 is intentionally opt-in. Keep opus/claude-opus defaults on 4.6
+  # until this profile is proven safe for resume-heavy and referent-heavy flows.
+  claude-opus-4-7:
+    friendly_name: Claude Opus 4.7
+    context_window_tokens: 1000000
+    max_output_tokens: 128000
+    max_thinking_tokens: 128000
+    supports_thinking: true
+    supports_images: true
+    supports_1m_context: true
+    temperature_constraint: fixed
+    temperature: 1.0
+    supports_top_p: false
+    supports_sampling_overrides: false
+    default_timeout_seconds: 240
+    prompt_caching:
+      supports: true
+      mechanism: cache_control
+      min_tokens: 1024
+      max_cache_blocks: 4
+    native_thinking_param: output_config.effort
+    thinking_modes: [adaptive]
+    litellm_reasoning_efforts: [low, medium, high, xhigh]
+    default_reasoning_effort: high
+    token_estimate_multiplier: 1.35
+    intelligence_score: 99
+    tags: [bounded-review, opt-in]
+  claude-sonnet-4-6:
+    friendly_name: Claude Sonnet 4.6
+    context_window_tokens: 200000
+    max_output_tokens: 64000
+    max_thinking_tokens: 32768
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 1.0
+    supports_top_p: true
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: true
+      mechanism: cache_control
+      min_tokens: 1024
+      max_cache_blocks: 4
+    native_thinking_param: output_config.effort
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 92
+    tags: []
+  # 1M context variants (requires anthropic-beta header via LiteLLM extra_headers)
+  claude-opus-4-6-1m:
+    friendly_name: Claude Opus 4.6 (1M)
+    context_window_tokens: 1000000
+    max_output_tokens: 128000
+    max_thinking_tokens: 32768
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 1.0
+    supports_top_p: true
+    default_timeout_seconds: 240
+    prompt_caching:
+      supports: true
+      mechanism: cache_control
+      min_tokens: 1024
+      max_cache_blocks: 4
+    native_thinking_param: output_config.effort
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: high
+    intelligence_score: 98
+    tags: []
+  claude-sonnet-4-6-1m:
+    friendly_name: Claude Sonnet 4.6 (1M)
+    context_window_tokens: 1000000
+    max_output_tokens: 64000
+    max_thinking_tokens: 32768
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 1.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: cache_control
+      min_tokens: 1024
+      max_cache_blocks: 4
+    native_thinking_param: output_config.effort
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 92
+    tags: []
+  # --- Claude 4.5 Series ---
+  claude-opus-4-5-20251101:
+    friendly_name: Claude Opus 4.5
+    context_window_tokens: 200000
+    max_output_tokens: 65536
+    max_thinking_tokens: 32768
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 1.0
+    supports_top_p: true  # Claude supports top_p
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: cache_control  # Requires cache_control: {type: ephemeral} on content blocks
+      min_tokens: 1024  # Effective with 1024+ tokens, max 4 cache_control blocks
+      max_cache_blocks: 4  # Anthropic limit
+    # Native: output_config.effort (Opus 4.5 only, beta)
+    native_thinking_param: output_config.effort
+    # LiteLLM mapping: reasoning_effort -> output_config.effort
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: high
+    intelligence_score: 96
+    tags: []
+  claude-sonnet-4-5-20250929:
+    friendly_name: Claude Sonnet 4.5
+    context_window_tokens: 200000
+    max_output_tokens: 65536
+    max_thinking_tokens: 32768
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 1.0
+    supports_top_p: true
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: true
+      mechanism: cache_control
+      min_tokens: 1024
+      max_cache_blocks: 4
+    # No effort param support (Sonnet 4.5 doesn't support effort beta)
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 90
+    tags: []
+  claude-haiku-4-5-20251001:
+    friendly_name: Claude Haiku 4.5
+    context_window_tokens: 200000
+    max_output_tokens: 64000
+    max_thinking_tokens: 32768
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 1.0
+    supports_top_p: true
+    default_timeout_seconds: 60  # Haiku is fast
+    prompt_caching:
+      supports: true
+      mechanism: cache_control
+      min_tokens: 1024
+      max_cache_blocks: 4
+    # No effort param support (Haiku 4.5 doesn't support effort beta)
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 81
+    tags: []
+  # ---------------------------------------------------------------------------
+  # Open-source models via OpenRouter
+  # ---------------------------------------------------------------------------
+  deepseek-v4-flash:
+    friendly_name: DeepSeek V4 Flash
+    context_window_tokens: 1048576
+    max_output_tokens: 384000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [high, xhigh]
+    default_reasoning_effort: high
+    intelligence_score: 82
+    tags: [code-gen]
+  deepseek-v4-pro:
+    friendly_name: DeepSeek V4 Pro
+    context_window_tokens: 1048576
+    max_output_tokens: 384000
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    # OpenRouter exposes only the upper effort tiers for V4 Pro
+    litellm_reasoning_efforts: [high, xhigh]
+    default_reasoning_effort: high
+    intelligence_score: 88
+    tags: [code-gen]
+  kimi-k2.5:
+    friendly_name: Kimi K2.5
+    context_window_tokens: 262144
+    max_output_tokens: 262144
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 86
+    tags: [agentic, code-gen, vision]
+  kimi-k2.6:
+    friendly_name: Kimi K2.6
+    context_window_tokens: 32768
+    max_output_tokens: 32768
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: high
+    intelligence_score: 89
+    tags: [agentic, code-gen, vision]
+  qwen3.6-plus:
+    friendly_name: Qwen3.6 Plus
+    context_window_tokens: 1000000
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: false
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 88
+    tags: [agentic, code-gen, vision]
+  qwen3.6-flash:
+    friendly_name: Qwen3.6 Flash
+    context_window_tokens: 1000000
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: false
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 82
+    tags: [agentic, code-gen, vision]
+  qwen3.6-max-preview:
+    friendly_name: Qwen3.6 Max Preview
+    context_window_tokens: 262144
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: false
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 90
+    tags: [agentic, code-gen]
+  qwen3-coder:
+    friendly_name: Qwen3 Coder 480B A35B
+    context_window_tokens: 262144
+    max_output_tokens: 65536
+    max_thinking_tokens: null
+    supports_thinking: false
+    supports_images: false
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: false
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 84
+    tags: [code-gen]
+  minimax-m2.5:
+    friendly_name: MiniMax M2.5
+    context_window_tokens: 196608
+    max_output_tokens: 196608
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: false
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: medium
+    intelligence_score: 84
+    tags: [agentic]
+  minimax-m2.7:
+    friendly_name: MiniMax M2.7
+    context_window_tokens: 196608
+    max_output_tokens: 131072
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: false
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 86
+    tags: [agentic]
+  glm-4.7-flash:
+    friendly_name: GLM 4.7 Flash
+    context_window_tokens: 202752
+    max_output_tokens: 16384
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 90
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 78
+    tags: [agentic]
+  glm-5.1:
+    friendly_name: GLM 5.1
+    context_window_tokens: 202752
+    max_output_tokens: 202752
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: false
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 180
+    prompt_caching:
+      supports: true
+      mechanism: auto
+      min_tokens: 1024
+    native_thinking_param: reasoning_effort
+    litellm_reasoning_efforts: [low, medium, high]
+    default_reasoning_effort: high
+    intelligence_score: 87
+    tags: [agentic, code-gen]
+  gemma-4-31b-it:
+    friendly_name: Gemma 4 31B
+    context_window_tokens: 262144
+    max_output_tokens: 16384
+    max_thinking_tokens: null
+    supports_thinking: true
+    supports_images: true
+    temperature_constraint: range
+    temperature:
+      min: 0.0
+      default: 1.0
+      max: 2.0
+    supports_top_p: true
+    default_timeout_seconds: 120
+    prompt_caching:
+      supports: false
+    native_thinking_param: null
+    litellm_reasoning_efforts: null
+    default_reasoning_effort: null
+    intelligence_score: 78
+    tags: [open-weights, vision]
+# =============================================================================
+# ALIASES - Provider-prefixed IDs map to canonical models
+# =============================================================================
+aliases:
+  # OpenAI provider prefix (LiteLLM style)
+  openai/gpt-5.5: gpt-5.5
+  openai/gpt-5.5-pro: gpt-5.5-pro
+  openai/gpt-5.4: gpt-5.4
+  openai/gpt-5.4-pro: gpt-5.4-pro
+  openai/gpt-5.4-mini: gpt-5.4-mini
+  openai/gpt-5.4-nano: gpt-5.4-nano
+  openai/gpt-5.2: gpt-5.2
+  openai/gpt-5.2-pro: gpt-5.2-pro
+  openai/gpt-5.3-codex: gpt-5.3-codex
+  openai/gpt-5.2-codex: gpt-5.2-codex
+  openai/gpt-5.1-codex: gpt-5.1-codex
+  openai/gpt-5.1-codex-mini: gpt-5.1-codex-mini
+  openai/gpt-5.1-codex-max: gpt-5.1-codex-max
+  openai/gpt-5.1-mini: gpt-5.1-mini
+  openai/gpt-5-pro: gpt-5-pro
+  openai/gpt-5-mini: gpt-5-mini
+  openai/gpt-5: gpt-5
+  openai/gpt-5-codex: gpt-5-codex
+  openai/gpt-5-nano: gpt-5-nano
+  openai/gpt-4.1: gpt-4.1
+  openai/gpt-4o: gpt-4o
+  openai/gpt-4o-mini: gpt-4o-mini
+  openai/o4-mini: o4-mini
+  openai/o3: o3
+  openai/o3-pro: o3-pro
+  openai/o3-mini: o3-mini
+  openai/o1: o1
+  # Vertex AI provider prefix (LiteLLM style)
+  vertex_ai/gemini-3.1-pro-preview: gemini-3.1-pro-preview
+  vertex_ai/gemini-3.1-pro-preview-customtools: gemini-3.1-pro-preview-customtools
+  vertex_ai/gemini-3-flash-preview: gemini-3-flash-preview
+  vertex_ai/gemini-2.5-pro: gemini-2.5-pro
+  vertex_ai/gemini-2.5-flash: gemini-2.5-flash
+  # Google AI Studio provider prefix (LiteLLM style)
+  gemini/gemini-3.1-pro-preview: gemini-3.1-pro-preview
+  gemini/gemini-3.1-pro-preview-customtools: gemini-3.1-pro-preview-customtools
+  gemini/gemini-3-flash-preview: gemini-3-flash-preview
+  gemini/gemini-2.5-pro: gemini-2.5-pro
+  gemini/gemini-2.5-flash: gemini-2.5-flash
+  # Google provider prefix (OpenRouter style)
+  google/gemini-3.1-pro-preview: gemini-3.1-pro-preview
+  google/gemini-3-flash-preview: gemini-3-flash-preview
+  google/gemini-2.5-pro: gemini-2.5-pro
+  google/gemini-2.5-flash: gemini-2.5-flash
+  # Anthropic provider prefix + friendly aliases
+  # Opus 4.6
+  anthropic/claude-opus-4-6: claude-opus-4-6
+  anthropic/claude-opus-4-6-1m: claude-opus-4-6-1m
+  claude-opus: claude-opus-4-6
+  opus-4-6: claude-opus-4-6
+  opus-4-6-1m: claude-opus-4-6-1m
+  opus: claude-opus-4-6
+  # Opus 4.7 (explicit opt-in only)
+  anthropic/claude-opus-4-7: claude-opus-4-7
+  anthropic/claude-opus-4.7: claude-opus-4-7
+  claude-opus-4.7: claude-opus-4-7
+  opus-4-7: claude-opus-4-7
+  opus-4.7: claude-opus-4-7
+  # Sonnet 4.6
+  anthropic/claude-sonnet-4-6: claude-sonnet-4-6
+  anthropic/claude-sonnet-4-6-1m: claude-sonnet-4-6-1m
+  sonnet-4-6: claude-sonnet-4-6
+  sonnet-4-6-1m: claude-sonnet-4-6-1m
+  sonnet: claude-sonnet-4-6
+  # Opus 4.5
+  anthropic/claude-opus-4-5-20251101: claude-opus-4-5-20251101
+  anthropic/claude-opus-4-5: claude-opus-4-5-20251101
+  claude-opus-4-5: claude-opus-4-5-20251101
+  opus-4-5: claude-opus-4-5-20251101
+  # Sonnet 4.5
+  anthropic/claude-sonnet-4-5-20250929: claude-sonnet-4-5-20250929
+  anthropic/claude-sonnet-4-5: claude-sonnet-4-5-20250929
+  claude-sonnet-4-5: claude-sonnet-4-5-20250929
+  sonnet-4-5: claude-sonnet-4-5-20250929
+  # Haiku 4.5
+  anthropic/claude-haiku-4-5-20251001: claude-haiku-4-5-20251001
+  anthropic/claude-haiku-4-5: claude-haiku-4-5-20251001
+  claude-haiku-4-5: claude-haiku-4-5-20251001
+  haiku-4-5: claude-haiku-4-5-20251001
+  haiku: claude-haiku-4-5-20251001
+  # OpenRouter dot-form aliases (OpenRouter uses dots, Forge uses hyphens)
+  anthropic/claude-opus-4.6: claude-opus-4-6
+  anthropic/claude-sonnet-4.6: claude-sonnet-4-6
+  anthropic/claude-haiku-4.5: claude-haiku-4-5-20251001
+  # DeepSeek (via OpenRouter)
+  deepseek/deepseek-v4-pro: deepseek-v4-pro
+  deepseek/deepseek-v4-flash: deepseek-v4-flash
+  deepseek-v4: deepseek-v4-pro
+  v4-pro: deepseek-v4-pro
+  v4-flash: deepseek-v4-flash
+  # Kimi / MoonshotAI (via OpenRouter)
+  moonshotai/kimi-k2.5: kimi-k2.5
+  moonshotai/kimi-k2.6: kimi-k2.6
+  kimi-k2-5: kimi-k2.5
+  kimi-k2-6: kimi-k2.6
+  # Qwen (via OpenRouter)
+  qwen/qwen3.6-flash: qwen3.6-flash
+  qwen/qwen3.6-plus: qwen3.6-plus
+  qwen/qwen3.6-max-preview: qwen3.6-max-preview
+  qwen/qwen3-coder: qwen3-coder
+  qwen3-6-flash: qwen3.6-flash
+  qwen3-6-plus: qwen3.6-plus
+  qwen3-6-max-preview: qwen3.6-max-preview
+  qwen3-6-max: qwen3.6-max-preview
+  qwen-coder: qwen3-coder
+  # MiniMax / Z.ai / Google / Mistral (via OpenRouter)
+  minimax/minimax-m2.5: minimax-m2.5
+  minimax/minimax-m2.7: minimax-m2.7
+  minimax-m2-5: minimax-m2.5
+  minimax-m2-7: minimax-m2.7
+  z-ai/glm-4.7-flash: glm-4.7-flash
+  z-ai/glm-5.1: glm-5.1
+  glm-4-7-flash: glm-4.7-flash
+  glm-5-1: glm-5.1
+  google/gemma-4-31b-it: gemma-4-31b-it
+  gemma-4-31b: gemma-4-31b-it