npm - @qa-gentic/stlc-agents - Versions diffs - 1.0.15 → 1.0.17 - Mend

@qa-gentic/stlc-agents 1.0.15 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/src/stlc_agents/agent_test_case_manager/server.py CHANGED Viewed

@@ -175,9 +175,33 @@ def _validate_linked_test_cases_response(result: dict) -> dict:
 # ---------------------------------------------------------------------------
-# Tool definitions
+# Deduplication helper
 # ---------------------------------------------------------------------------
+_TC_STOP_WORDS = frozenset({
+    "verify", "ensure", "validate", "check", "test", "the", "a", "an",
+    "that", "is", "are", "can", "user", "should", "able", "to", "with",
+})
+def _normalise_title(title: str) -> str:
+    """Lowercase, strip punctuation, remove stop words — for dedup comparison.
+    Also strips LLM-generated numeric prefixes such as TC_1_, TC-2_, 1., 1)
+    so that the same scenario generates a stable key across runs even when the
+    LLM varies its numbering scheme.
+    """
+    import re
+    s = title.strip()
+    # Remove common LLM prefix patterns: TC_1_, TC-2_, 1., 1), (1), #1
+    s = re.sub(r"^(?:TC[-_]?\d+[-_:]?|\(\d+\)|#?\d+[.):]?)\s*", "", s, flags=re.IGNORECASE)
+    cleaned = re.sub(r"[^a-z0-9\s]", "", s.lower())
+    tokens = [w for w in cleaned.split() if w not in _TC_STOP_WORDS]
+    return " ".join(tokens)
 @app.list_tools()
 async def list_tools() -> list[types.Tool]:
     return [
@@ -273,6 +297,51 @@ async def list_tools() -> list[types.Tool]:
                 "required": ["work_item_id", "organization_url", "project_name"],
             },
         ),
+        types.Tool(
+            name="create_deduped_test_cases",
+            description=(
+                "Create test cases in ADO, skipping any whose title already exists as a "
+                "linked test case on the work item. "
+                "Internally calls get_linked_test_cases, filters the incoming batch against "
+                "existing titles (case-insensitive, stop-word-normalised), then calls "
+                "create_and_link_test_cases on the net-new subset only. "
+                "Use this instead of create_and_link_test_cases for webhook/headless runs "
+                "where re-triggers would otherwise produce duplicates. "
+                "Returns skipped_count, created_count, and the full create result."
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "work_item_id": {"type": "integer", "description": "Work item to link test cases to"},
+                    "organization_url": {"type": "string"},
+                    "project_name": {"type": "string"},
+                    "test_cases": {
+                        "type": "array",
+                        "description": "Full proposed test case batch (duplicates will be filtered out)",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "title": {"type": "string"},
+                                "priority": {"type": "integer", "description": "1-4, default 2"},
+                                "steps": {
+                                    "type": "array",
+                                    "items": {
+                                        "type": "object",
+                                        "properties": {
+                                            "action": {"type": "string"},
+                                            "expected_result": {"type": "string"},
+                                        },
+                                        "required": ["action", "expected_result"],
+                                    },
+                                },
+                            },
+                            "required": ["title", "steps"],
+                        },
+                    },
+                },
+                "required": ["work_item_id", "organization_url", "project_name", "test_cases"],
+            },
+        ),
     ]
@@ -443,6 +512,56 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
             # ── Pre-output validation ─────────────────────────────────────
             result["_validation"] = _validate_linked_test_cases_response(result)
+        elif name == "create_deduped_test_cases":
+            org = arguments["organization_url"]
+            project = arguments["project_name"]
+            wi_id = arguments["work_item_id"]
+            proposed = arguments["test_cases"]
+            # Step 1: fetch existing linked TCs
+            existing_result = await asyncio.to_thread(
+                _get_linked_test_cases, org, project, wi_id
+            )
+            existing_titles: set[str] = {
+                _normalise_title(tc.get("title", ""))
+                for tc in existing_result.get("linked_test_cases", [])
+            }
+            # Step 2: filter — keep only net-new
+            net_new = [
+                tc for tc in proposed
+                if _normalise_title(tc.get("title", "")) not in existing_titles
+            ]
+            skipped = len(proposed) - len(net_new)
+            if not net_new:
+                result = {
+                    "status": "all_duplicates",
+                    "skipped_count": skipped,
+                    "created_count": 0,
+                    "message": (
+                        f"All {skipped} proposed test case(s) already exist as linked "
+                        "test cases on this work item. Nothing was created."
+                    ),
+                }
+            else:
+                # Step 3: create net-new batch via the existing tool logic
+                # Re-dispatch through call_tool to reuse input validation + Feature gate
+                create_result_raw = await call_tool("create_and_link_test_cases", {
+                    "work_item_id":     wi_id,
+                    "organization_url": org,
+                    "project_name":     project,
+                    "test_cases":       net_new,
+                    "confirmed":        True,
+                })
+                inner = json.loads(create_result_raw[0].text)
+                result = {
+                    "status": "ok",
+                    "skipped_count": skipped,
+                    "created_count": len(net_new),
+                    "create_result": inner,
+                }
         else:
             result = {"error": f"Unknown tool: {name}"}
@@ -480,4 +599,4 @@ def main():
 if __name__ == "__main__":
-    main()
+    main()

package/src/stlc_agents/shared/cost_tracker.py ADDED Viewed

@@ -0,0 +1,395 @@
+"""
+cost_tracker.py  —  stlc_agents.shared.cost_tracker
+─────────────────────────────────────────────────────
+Shared cost tracking injected into all 5 MCP servers at install time.
+MODEL AUTO-DETECTION
+─────────────────────
+The MCP server is a subprocess. It cannot see the coding agent's API
+response or token usage. Instead, each coding agent exposes the model
+it is running on via a known environment variable that the MCP config
+(`.mcp.json` / `.vscode/mcp.json`) passes through into the subprocess:
+  Agent              Env var set automatically         Value example
+  ─────────────────  ──────────────────────────────   ──────────────────────────
+  Claude Code        ANTHROPIC_MODEL                  claude-sonnet-4-6
+                     CLAUDE_MODEL (fallback)           claude-opus-4-6
+  GitHub Copilot     GITHUB_COPILOT_MODEL (if set)    gpt-4o
+  Cursor / Windsurf  (none — user sets manually)       —
+  Any agent          STLC_CODING_AGENT_MODEL           user-specified override
+Detection order (first match wins):
+  1. STLC_CODING_AGENT_MODEL   — explicit user override (always wins)
+  2. ANTHROPIC_MODEL            — set by Claude Code automatically
+  3. CLAUDE_MODEL               — older Claude Code versions
+  4. GITHUB_COPILOT_MODEL       — Copilot if configured
+  5. ~/.qa-stlc/agent-model     — saved preference from `qa-stlc cost --set-model`
+  6. "claude-sonnet-4-6"        — safe default (most common)
+TOKEN ESTIMATION
+─────────────────
+Because the server never sees the LLM's token usage, tokens are estimated
+from the ADO/Jira JSON payload size the server returns:
+  estimated_tokens = len(json_response_text) / 4    (chars-per-token heuristic)
+  input_tokens  = estimated_tokens * 0.70   (coding agent reading the result)
+  output_tokens = estimated_tokens * 0.30   (coding agent writing the artifact)
+This is conservative and consistent with how promptfoo's HTTP provider
+estimates tokens when the API doesn't return a usage block.
+WHAT GETS LOGGED (per tool call)
+──────────────────────────────────
+  _cost block injected into every tool response JSON — the coding agent
+  sees it inline alongside the tool result.
+  ~/.qa-stlc/cost-<session>.jsonl — machine-readable session log.
+  stderr live line — visible in Claude Code's MCP log pane, VS Code
+  Output > MCP, Cursor's tool output panel, etc.
+  atexit summary — printed when the MCP server process exits.
+"""
+from __future__ import annotations
+import atexit
+import json
+import os
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Optional
+from mcp import types
+from .pricing import ModelPricing, get_pricing
+# ── Model detection ────────────────────────────────────────────────────────
+_PREF_FILE = Path.home() / ".qa-stlc" / "agent-model"
+def _detect_model() -> str:
+    """
+    Detect the coding agent's model from environment variables,
+    in priority order. Falls back to saved preference, then default.
+    """
+    # 1. Explicit user override — always wins
+    if v := os.getenv("STLC_CODING_AGENT_MODEL", "").strip():
+        return v
+    # 2. Claude Code sets this automatically when it spawns MCP subprocesses
+    if v := os.getenv("ANTHROPIC_MODEL", "").strip():
+        return v
+    # 3. Older Claude Code versions used this name
+    if v := os.getenv("CLAUDE_MODEL", "").strip():
+        return v
+    # 4. GitHub Copilot (if the MCP config passes it through)
+    if v := os.getenv("GITHUB_COPILOT_MODEL", "").strip():
+        return v
+    # 5. Saved preference from `qa-stlc cost --set-model`
+    try:
+        if _PREF_FILE.exists():
+            saved = _PREF_FILE.read_text(encoding="utf-8").strip()
+            if saved:
+                return saved
+    except OSError:
+        pass
+    # 6. Safe default
+    return "claude-sonnet-4-6"
+# ── Config ─────────────────────────────────────────────────────────────────
+_TRACKING_ENABLED = os.getenv("STLC_COST_TRACKING", "true").lower() != "false"
+_LOG_DIR          = Path(os.getenv("STLC_COST_LOG_DIR", str(Path.home() / ".qa-stlc")))
+_SESSION_ID       = os.getenv("STLC_SESSION_ID", f"stlc-{int(time.time())}")
+# Resolved once at import time
+_MODEL_ID         = _detect_model()
+_PRICING          = get_pricing(_MODEL_ID)
+# ANSI colours (suppressed when not a TTY, e.g. in VS Code Output pane)
+_TTY = sys.stderr.isatty()
+_C = {k: v if _TTY else "" for k, v in {
+    "reset": "\x1b[0m", "bold": "\x1b[1m", "dim": "\x1b[2m",
+    "cyan": "\x1b[36m", "green": "\x1b[32m", "yellow": "\x1b[33m",
+}.items()}
+# ── Session ────────────────────────────────────────────────────────────────
+class _Session:
+    def __init__(self):
+        self.id         = _SESSION_ID
+        self.started_at = time.time()
+        self.records: list[dict] = []
+        _LOG_DIR.mkdir(parents=True, exist_ok=True)
+        self.log_path = _LOG_DIR / f"cost-{self.id}.jsonl"
+    def add(self, record: dict) -> None:
+        self.records.append(record)
+        try:
+            with self.log_path.open("a", encoding="utf-8") as f:
+                f.write(json.dumps(record) + "\n")
+        except OSError:
+            pass  # never break a tool call over logging
+    def running_total(self) -> float:
+        return sum(r.get("cost_usd", 0.0) for r in self.records)
+_session: Optional[_Session] = None
+def _get_session() -> _Session:
+    global _session
+    if _session is None:
+        _session = _Session()
+    return _session
+# ── Token estimation ───────────────────────────────────────────────────────
+def _estimate_tokens(payload: Any) -> tuple[int, int, int]:
+    """
+    Estimate input/output tokens from the ADO/Jira response payload size.
+    The MCP server cannot see the LLM's token usage. We estimate from the
+    JSON payload the server returns, since the coding agent must tokenise
+    that entire payload to read it:
+      total  ≈ len(json_text) / 4     (chars-per-token heuristic)
+      input  ≈ total * 0.70           (agent reading the ADO/Jira result)
+      output ≈ total * 0.30           (agent writing the next artifact)
+    Returns (total, input, output).
+    """
+    text  = json.dumps(payload) if not isinstance(payload, str) else payload
+    total = max(1, len(text) // 4)
+    return total, int(total * 0.70), int(total * 0.30)
+# ── Public API ─────────────────────────────────────────────────────────────
+def track(
+    result: Any,
+    *,
+    tool_name: str,
+    server: str,
+    t0: float,
+) -> list[types.TextContent]:
+    """
+    Wrap a call_tool() result. Injects _cost into the response JSON,
+    logs the call, and returns the TextContent list ready for the MCP client.
+    Called from each server's call_tool() as the final return:
+        return track(result, tool_name=name, server="qa-gherkin-generator", t0=t0)
+    Args:
+        result:    The dict your tool function produced.
+        tool_name: The MCP tool name, e.g. "fetch_work_item_for_gherkin".
+        server:    The MCP server name, e.g. "qa-gherkin-generator".
+        t0:        time.monotonic() captured at the start of call_tool().
+    """
+    if not _TRACKING_ENABLED:
+        text = json.dumps(result, indent=2, ensure_ascii=False)
+        return [types.TextContent(type="text", text=text)]
+    latency_ms         = int((time.monotonic() - t0) * 1000)
+    sess               = _get_session()
+    total, inp, out    = _estimate_tokens(result)
+    cost = _PRICING.cost(input_tokens=inp, output_tokens=out) if _PRICING else 0.0
+    running = sess.running_total() + cost
+    cost_block = {
+        "session_id":       sess.id,
+        "server":           server,
+        "tool":             tool_name,
+        "model":            _MODEL_ID,
+        "model_source":     _model_source(),
+        "input_tokens":     inp,
+        "output_tokens":    out,
+        "estimated_tokens": total,
+        "cost_usd":         round(cost, 8),
+        "latency_ms":       latency_ms,
+        "timestamp":        datetime.now(timezone.utc).isoformat(),
+        "session_total_usd": round(running, 8),
+        "token_method":     "estimated",
+        "token_note": (
+            "Estimated from ADO/Jira payload size (chars÷4, 70/30 split). "
+            "The MCP server has no access to the coding agent's token usage. "
+            "Set STLC_CODING_AGENT_MODEL if the detected model is wrong. "
+            f"Detected via: {_model_source()}."
+        ),
+    }
+    if isinstance(result, dict):
+        result["_cost"] = cost_block
+    sess.add({"tool": tool_name, "server": server, **cost_block})
+    _print_live(server, tool_name, total, cost, latency_ms, running)
+    return [types.TextContent(
+        type="text",
+        text=json.dumps(result, indent=2, ensure_ascii=False),
+    )]
+def track_healing(payload: dict) -> None:
+    """
+    Record an AI Vision healing call from LocatorHealer.ts.
+    Called by healing_cost_server.py when LocatorHealer posts usage.
+    """
+    if not _TRACKING_ENABLED:
+        return
+    sess    = _get_session()
+    model   = payload.get("model_id", "claude-sonnet-4-20250514")
+    pricing = get_pricing(model)
+    usage   = payload.get("usage", {})
+    inp     = usage.get("input_tokens", usage.get("prompt_tokens", 0))
+    out     = usage.get("output_tokens", usage.get("completion_tokens", 0))
+    cost    = pricing.cost(input_tokens=inp, output_tokens=out) if pricing else 0.0
+    record = {
+        "tool":             f"ai-vision-{payload.get('provider', 'anthropic')}",
+        "server":           "locator-healer",
+        "session_id":       sess.id,
+        "model":            model,
+        "model_source":     "locator-healer-env",
+        "input_tokens":     inp,
+        "output_tokens":    out,
+        "estimated_tokens": inp + out,
+        "cost_usd":         round(cost, 8),
+        "latency_ms":       payload.get("latency_ms", 0),
+        "timestamp":        datetime.now(timezone.utc).isoformat(),
+        "session_total_usd": round(sess.running_total() + cost, 8),
+        "token_method":     "exact",
+        "healing_meta": {
+            "locator_key": payload.get("locator_key"),
+            "healed":      payload.get("healed", False),
+            "selector":    payload.get("selector"),
+        },
+    }
+    sess.add(record)
+    _print_live(
+        "locator-healer",
+        f"ai-vision ({payload.get('provider', '?')})",
+        inp + out, cost, payload.get("latency_ms", 0),
+        sess.running_total(),
+    )
+def _model_source() -> str:
+    """Describe where the detected model came from."""
+    if os.getenv("STLC_CODING_AGENT_MODEL", "").strip():
+        return "STLC_CODING_AGENT_MODEL env var"
+    if os.getenv("ANTHROPIC_MODEL", "").strip():
+        return "ANTHROPIC_MODEL env var (set by Claude Code)"
+    if os.getenv("CLAUDE_MODEL", "").strip():
+        return "CLAUDE_MODEL env var (set by Claude Code)"
+    if os.getenv("GITHUB_COPILOT_MODEL", "").strip():
+        return "GITHUB_COPILOT_MODEL env var"
+    if _PREF_FILE.exists():
+        return f"saved preference ({_PREF_FILE})"
+    return "default fallback"
+# ── Live stderr line ───────────────────────────────────────────────────────
+def _print_live(
+    server: str, tool: str, tokens: int,
+    cost: float, latency_ms: int, running: float,
+) -> None:
+    c        = _C
+    tok_str  = f"{tokens/1000:.1f}K" if tokens >= 1000 else str(tokens)
+    cost_str = f"${cost:.6f}"
+    total    = f"${running:.6f}"
+    print(
+        f"{c['dim']}[stlc-cost]{c['reset']} "
+        f"{c['cyan']}{server}{c['reset']}{c['dim']} · {c['reset']}{tool}"
+        f"  ~{tok_str} tokens  {c['green']}{cost_str}{c['reset']}"
+        f"  {c['dim']}(session: {total}  {latency_ms}ms){c['reset']}",
+        file=sys.stderr, flush=True,
+    )
+# ── Session summary on exit ────────────────────────────────────────────────
+def _print_summary() -> None:
+    if not _TRACKING_ENABLED or _session is None or not _session.records:
+        return
+    sess    = _session
+    records = sess.records
+    elapsed = time.time() - sess.started_at
+    c       = _C
+    by_server: dict[str, dict] = {}
+    for r in records:
+        k = r.get("server", "unknown")
+        if k not in by_server:
+            by_server[k] = {"calls": 0, "tokens": 0, "cost_usd": 0.0}
+        by_server[k]["calls"]    += 1
+        by_server[k]["tokens"]   += r.get("estimated_tokens", 0)
+        by_server[k]["cost_usd"] += r.get("cost_usd", 0.0)
+    total_cost   = sum(r.get("cost_usd", 0.0) for r in records)
+    total_tokens = sum(r.get("estimated_tokens", 0) for r in records)
+    W = 68
+    print(f"\n{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
+    print(f"{c['bold']}  stlc-agents · Cost Summary  ·  {sess.id}{c['reset']}", file=sys.stderr)
+    print(f"{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
+    # Per-server
+    print(f"\n  {'Server':<30} {'Calls':>6} {'~Tokens':>10} {'Cost (USD)':>14}", file=sys.stderr)
+    print(f"  {'─'*60}", file=sys.stderr)
+    for svr, d in sorted(by_server.items()):
+        tok = f"{d['tokens']/1000:.1f}K" if d['tokens'] >= 1000 else str(d['tokens'])
+        print(
+            f"  {svr:<30} {d['calls']:>6} {tok:>10} "
+            f"{c['green']}${d['cost_usd']:.6f}{c['reset']:>14}",
+            file=sys.stderr,
+        )
+    # Per-step
+    print(f"\n  {'Step':<26} {'Tool':<36} {'~Tok':>6}  {'Cost':>10}  {'ms':>6}", file=sys.stderr)
+    print(f"  {'─'*W}", file=sys.stderr)
+    for r in records:
+        tok = f"{r.get('estimated_tokens',0)/1000:.1f}K" if r.get('estimated_tokens',0) >= 1000 else str(r.get('estimated_tokens',0))
+        print(
+            f"  {r.get('server','?'):<26} {r.get('tool','?'):<36} "
+            f"{tok:>6}  ${r.get('cost_usd',0):.6f}  {r.get('latency_ms',0):>6}",
+            file=sys.stderr,
+        )
+    # Totals
+    tok_total = f"{total_tokens/1000:.1f}K" if total_tokens >= 1000 else str(total_tokens)
+    print(f"\n  {'─'*W}", file=sys.stderr)
+    print(f"  {'Total tokens':<40} {tok_total:>10}", file=sys.stderr)
+    print(f"  {c['bold']}{'Total cost':<40} {c['green']}${total_cost:.6f}{c['reset']}", file=sys.stderr)
+    # Model info
+    model_str = f"{_MODEL_ID}"
+    if _PRICING:
+        model_str += f"  (${_PRICING.input_per_mtok}/${_PRICING.output_per_mtok} per MTok in/out)"
+    print(f"  {c['dim']}Model: {model_str}{c['reset']}", file=sys.stderr)
+    print(f"  {c['dim']}Model detected via: {_model_source()}{c['reset']}", file=sys.stderr)
+    print(f"  {c['dim']}Token method: estimated from payload size (chars÷4){c['reset']}", file=sys.stderr)
+    print(f"  {c['dim']}Duration: {elapsed:.1f}s  ·  Log: {sess.log_path}{c['reset']}", file=sys.stderr)
+    print(f"\n  {c['dim']}To set model explicitly:{c['reset']}", file=sys.stderr)
+    print(f"  {c['dim']}  qa-stlc cost --set-model claude-opus-4-6{c['reset']}", file=sys.stderr)
+    print(f"  {c['dim']}  or add to .mcp.json env: STLC_CODING_AGENT_MODEL=claude-opus-4-6{c['reset']}", file=sys.stderr)
+    print(f"  {c['dim']}  or add to .env:          STLC_CODING_AGENT_MODEL=claude-opus-4-6{c['reset']}", file=sys.stderr)
+    print(f"\n{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
+atexit.register(_print_summary)

package/src/stlc_agents/shared/pricing.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""
+pricing.py  —  Model pricing registry for stlc-agents cost tracking.
+Prices: USD per million tokens (MTok).
+Source: Anthropic official docs, April 2026.
+Models this repo actually calls:
+  - claude-sonnet-4-20250514  (LocatorHealer AI Vision, default)
+  - gpt-4o                    (LocatorHealer AI Vision, copilot provider)
+  + whatever coding agent the user runs (Claude / Copilot / Cursor / Windsurf)
+    — the user declares this via STLC_CODING_AGENT_MODEL env var.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional
+@dataclass(frozen=True)
+class ModelPricing:
+    model_id: str
+    display_name: str
+    provider: str
+    input_per_mtok: float        # USD / 1M input tokens
+    output_per_mtok: float       # USD / 1M output tokens
+    cache_write_per_mtok: float  # USD / 1M cache-write tokens
+    cache_read_per_mtok: float   # USD / 1M cache-read tokens
+    def cost(
+        self,
+        input_tokens: int = 0,
+        output_tokens: int = 0,
+        cache_write_tokens: int = 0,
+        cache_read_tokens: int = 0,
+    ) -> float:
+        return (
+            (input_tokens        / 1_000_000) * self.input_per_mtok
+            + (output_tokens     / 1_000_000) * self.output_per_mtok
+            + (cache_write_tokens/ 1_000_000) * self.cache_write_per_mtok
+            + (cache_read_tokens / 1_000_000) * self.cache_read_per_mtok
+        )
+_REGISTRY: list[ModelPricing] = [
+    # ── Anthropic ──────────────────────────────────────────────────────────
+    ModelPricing("claude-sonnet-4-20250514", "Claude Sonnet 4",    "anthropic",  3.00, 15.00,  3.75, 0.30),
+    ModelPricing("claude-sonnet-4-6",        "Claude Sonnet 4.6",  "anthropic",  3.00, 15.00,  3.75, 0.30),
+    ModelPricing("claude-haiku-4-5-20251001","Claude Haiku 4.5",   "anthropic",  1.00,  5.00,  1.25, 0.10),
+    ModelPricing("claude-opus-4-6",          "Claude Opus 4.6",    "anthropic",  5.00, 25.00,  6.25, 0.50),
+    ModelPricing("claude-opus-4-7",          "Claude Opus 4.7",    "anthropic",  5.00, 25.00,  6.25, 0.50),
+    # ── OpenAI / Copilot ──────────────────────────────────────────────────
+    ModelPricing("gpt-4o",                   "GPT-4o",             "openai",     2.50, 10.00,  0.00, 0.00),
+    ModelPricing("gpt-4o-mini",              "GPT-4o Mini",        "openai",     0.15,  0.60,  0.00, 0.00),
+]
+_by_id: dict[str, ModelPricing] = {p.model_id: p for p in _REGISTRY}
+def get_pricing(model_id: str) -> Optional[ModelPricing]:
+    """Exact match first, then longest substring match."""
+    key = model_id.lower().strip()
+    if key in _by_id:
+        return _by_id[key]
+    # Substring: "claude-sonnet-4-20250514" ⊇ "sonnet-4"
+    for p in _REGISTRY:
+        if key in p.model_id or p.model_id in key:
+            return p
+    return None
+def list_models() -> list[ModelPricing]:
+    return list(_REGISTRY)