npm - @qa-gentic/stlc-agents - Versions diffs - 1.0.25 → 1.0.26 - Mend

@qa-gentic/stlc-agents 1.0.25 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file

package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/ado_attach.cpython-314.pyc ADDED Viewed

Binary file

package/src/stlc_agents/agent_test_case_manager/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file

package/src/stlc_agents/agent_test_case_manager/__pycache__/server.cpython-314.pyc ADDED Viewed

Binary file

package/src/stlc_agents/agent_test_case_manager/server.py CHANGED Viewed

@@ -30,6 +30,7 @@ from stlc_agents.agent_test_case_manager.tools.ado_workitem import (
     create_test_case as _create_test_case,
     link_test_cases_to_work_item as _link_test_cases,
     get_linked_test_cases as _get_linked_test_cases,
+    add_tag_to_work_item as _add_tag,
 )
 from stlc_agents.shared.cost_tracker import track
@@ -483,6 +484,16 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
                 except Exception as e:
                     link_result = {"error": str(e)}
+            # Add tag to the parent work item after linking
+            tag_result = {}
+            if created:
+                try:
+                    tag_result = await asyncio.to_thread(
+                        _add_tag, org, project, wi_id, "STLCAgentTestCases"
+                    )
+                except Exception as e:
+                    tag_result = {"error": str(e)}
             result = {
                 "summary": {
                     "requested": len(test_cases),
@@ -493,6 +504,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
                 "created_test_cases": created,
                 "failed": failed,
                 "link_result": link_result,
+                "tag_result": tag_result,
                 "_validation": {
                     "valid": len(failed) == 0 and bool(link_result.get("success", True)),
                     "input_validation": input_validation,

package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file

package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/ado_workitem.cpython-314.pyc ADDED Viewed

Binary file

package/src/stlc_agents/agent_test_case_manager/tools/ado_workitem.py CHANGED Viewed

@@ -8,6 +8,8 @@ Public API:
   create_test_case(org_url, project, title, steps, ...)  -> dict
   link_test_cases_to_work_item(org_url, project, wi_id, tc_ids) -> dict
   get_linked_test_cases(org_url, project, work_item_id)  -> dict
+  add_comment_to_work_item(org_url, project, work_item_id, text) -> dict
+  add_tag_to_work_item(org_url, project, work_item_id, tag)      -> dict
 """
 from __future__ import annotations
@@ -175,8 +177,13 @@ def link_test_cases_to_work_item(
     project: str,
     work_item_id: int,
     test_case_ids: List[int],
+    link_comment: str = "STLC-Agent generated test case",
 ) -> dict:
-    """Create TestedBy-Forward links from a work item to test cases."""
+    """Create TestedBy-Forward links from a work item to test cases.
+    link_comment is stored as attributes.comment on each relation and appears
+    in the Links tab Comments column in Azure DevOps.
+    """
     org_url = org_url.rstrip("/")
     headers = get_auth_headers("application/json-patch+json")
@@ -187,6 +194,7 @@ def link_test_cases_to_work_item(
             "value": {
                 "rel": "Microsoft.VSTS.Common.TestedBy-Forward",
                 "url": f"{org_url}/{project}/_apis/wit/workItems/{tc_id}",
+                "attributes": {"comment": link_comment},
             },
         }
         for tc_id in test_case_ids
@@ -249,6 +257,62 @@ def get_linked_test_cases(org_url: str, project: str, work_item_id: int) -> dict
     return {"work_item_id": work_item_id, "linked_test_cases": linked, "count": len(linked)}
+# ---------------------------------------------------------------------------
+# add_comment_to_work_item
+# ---------------------------------------------------------------------------
+def add_comment_to_work_item(org_url: str, project: str, work_item_id: int, text: str) -> dict:
+    """Add a comment to a work item via the ADO comments API."""
+    org_url = org_url.rstrip("/")
+    headers = get_auth_headers()
+    resp = requests.post(
+        f"{org_url}/{project}/_apis/wit/workitems/{work_item_id}/comments",
+        headers=headers,
+        params={"api-version": "7.1-preview.3"},
+        json={"text": text},
+        timeout=30,
+    )
+    resp.raise_for_status()
+    return {"success": True, "comment_id": resp.json().get("id")}
+# ---------------------------------------------------------------------------
+# add_tag_to_work_item
+# ---------------------------------------------------------------------------
+def add_tag_to_work_item(org_url: str, project: str, work_item_id: int, tag: str) -> dict:
+    """Append a tag to a work item's System.Tags field (no-op if already present)."""
+    org_url = org_url.rstrip("/")
+    fetch_resp = requests.get(
+        f"{org_url}/{project}/_apis/wit/workitems/{work_item_id}",
+        headers=get_auth_headers(),
+        params={"api-version": _API},
+        timeout=30,
+    )
+    fetch_resp.raise_for_status()
+    existing_str = fetch_resp.json().get("fields", {}).get("System.Tags", "") or ""
+    existing = [t.strip() for t in existing_str.split(";") if t.strip()]
+    if tag in existing:
+        return {"success": True, "tag": tag, "already_present": True}
+    existing.append(tag)
+    new_tags_str = "; ".join(existing)
+    patch = [{"op": "add", "path": "/fields/System.Tags", "value": new_tags_str}]
+    patch_resp = requests.patch(
+        f"{org_url}/{project}/_apis/wit/workitems/{work_item_id}",
+        headers=get_auth_headers("application/json-patch+json"),
+        params={"api-version": _API},
+        json=patch,
+        timeout=30,
+    )
+    patch_resp.raise_for_status()
+    return {"success": True, "tag": tag, "tags": new_tags_str}
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------

package/src/stlc_agents/shared/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file

package/src/stlc_agents/shared/__pycache__/auth.cpython-314.pyc ADDED Viewed

Binary file

package/src/stlc_agents/shared/__pycache__/cost_tracker.cpython-314.pyc ADDED Viewed

Binary file

package/src/stlc_agents/shared/__pycache__/pricing.cpython-314.pyc ADDED Viewed

Binary file

package/src/stlc_agents/shared/cost_tracker.py CHANGED Viewed

@@ -1,15 +1,21 @@
 """
 cost_tracker.py  —  stlc_agents.shared.cost_tracker
 ─────────────────────────────────────────────────────
-Shared cost tracking injected into all 5 MCP servers at install time.
+Two tracking modes:
-MODEL AUTO-DETECTION
-─────────────────────
-The MCP server is a subprocess. It cannot see the coding agent's API
-response or token usage. Instead, each coding agent exposes the model
-it is running on via a known environment variable that the MCP config
-(`.mcp.json` / `.vscode/mcp.json`) passes through into the subprocess:
+  track()           — MCP server tool calls (coding-agent-driven flow).
+                      Token counts are ESTIMATED from payload size because
+                      the MCP server subprocess never sees the coding agent's
+                      API response.
+  track_llm_call()  — Webhook orchestrator LLM calls (agent_runner.py).
+                      Token counts are EXACT: taken directly from the LLM
+                      API response's usage block, matching how promptfoo
+                      tracks costs (input + output + cache per iteration,
+                      accumulated across all iterations in the agent loop).
+MODEL AUTO-DETECTION (for track() only)
+─────────────────────────────────────────
   Agent              Env var set automatically         Value example
   ─────────────────  ──────────────────────────────   ──────────────────────────
   Claude Code        ANTHROPIC_MODEL                  claude-sonnet-4-6
@@ -26,28 +32,11 @@ Detection order (first match wins):
   5. ~/.qa-stlc/agent-model     — saved preference from `qa-stlc cost --set-model`
   6. "claude-sonnet-4-6"        — safe default (most common)
-TOKEN ESTIMATION
-─────────────────
-Because the server never sees the LLM's token usage, tokens are estimated
-from the ADO/Jira JSON payload size the server returns:
-  estimated_tokens = len(json_response_text) / 4    (chars-per-token heuristic)
-  input_tokens  = estimated_tokens * 0.70   (coding agent reading the result)
-  output_tokens = estimated_tokens * 0.30   (coding agent writing the artifact)
-This is conservative and consistent with how promptfoo's HTTP provider
-estimates tokens when the API doesn't return a usage block.
-WHAT GETS LOGGED (per tool call)
-──────────────────────────────────
-  _cost block injected into every tool response JSON — the coding agent
-  sees it inline alongside the tool result.
+WHAT GETS LOGGED (per tool call / LLM call)
+─────────────────────────────────────────────
   ~/.qa-stlc/cost-<session>.jsonl — machine-readable session log.
-  stderr live line — visible in Claude Code's MCP log pane, VS Code
-  Output > MCP, Cursor's tool output panel, etc.
-  atexit summary — printed when the MCP server process exits.
+  stderr live line — visible in Claude Code MCP log, VS Code Output, etc.
+  atexit summary — printed when the process exits.
 """
 from __future__ import annotations
@@ -243,6 +232,94 @@ def track(
     )]
+def track_llm_call(
+    *,
+    model: str,
+    provider: str,
+    input_tokens: int,
+    output_tokens: int,
+    cache_write_tokens: int = 0,
+    cache_read_tokens: int = 0,
+    tool: str = "llm-agent-loop",
+    server: str = "agent-runner",
+    work_item_id: str = "",
+    iterations: int = 1,
+    latency_ms: int = 0,
+) -> float:
+    """
+    Record exact LLM token usage from an API response (webhook / agent_runner path).
+    Token counts come directly from the LLM API response.usage block —
+    same approach as promptfoo: capture per-iteration, accumulate across all
+    iterations, compute cost once at the end.
+    Anthropic fields:
+      input_tokens, output_tokens,
+      cache_creation_input_tokens → cache_write_tokens,
+      cache_read_input_tokens     → cache_read_tokens
+    OpenAI fields:
+      prompt_tokens  → input_tokens,
+      completion_tokens → output_tokens,
+      prompt_tokens_details.cached_tokens → cache_read_tokens
+    Returns the cost in USD.
+    """
+    if not _TRACKING_ENABLED:
+        return 0.0
+    pricing = get_pricing(model)
+    cost = (
+        pricing.cost(
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            cache_write_tokens=cache_write_tokens,
+            cache_read_tokens=cache_read_tokens,
+        )
+        if pricing else 0.0
+    )
+    sess    = _get_session()
+    running = sess.running_total() + cost
+    total   = input_tokens + output_tokens
+    cache_note = ""
+    if cache_write_tokens or cache_read_tokens:
+        cache_note = f" cache_write={cache_write_tokens} cache_read={cache_read_tokens}"
+    record = {
+        "tool":               tool,
+        "server":             server,
+        "session_id":         sess.id,
+        "model":              model,
+        "model_source":       f"{provider}-api-response",
+        "input_tokens":       input_tokens,
+        "output_tokens":      output_tokens,
+        "cache_write_tokens": cache_write_tokens,
+        "cache_read_tokens":  cache_read_tokens,
+        "estimated_tokens":   total,
+        "cost_usd":           round(cost, 8),
+        "latency_ms":         latency_ms,
+        "timestamp":          datetime.now(timezone.utc).isoformat(),
+        "session_total_usd":  round(running, 8),
+        "token_method":       "exact",
+        "token_note": (
+            f"Exact counts from {provider} API response. "
+            f"Iterations: {iterations}.{cache_note}"
+        ),
+        "work_item_id":       work_item_id,
+        "iterations":         iterations,
+    }
+    sess.add(record)
+    _print_live(server, tool, total, cost, latency_ms, running, exact=True)
+    return cost
+def get_session_id() -> str:
+    """Return the current session ID (used to pass STLC_SESSION_ID to subprocesses)."""
+    return _SESSION_ID
 def track_healing(payload: dict) -> None:
     """
     Record an AI Vision healing call from LocatorHealer.ts.
@@ -307,23 +384,277 @@ def _model_source() -> str:
 def _print_live(
     server: str, tool: str, tokens: int,
     cost: float, latency_ms: int, running: float,
+    *, exact: bool = False,
 ) -> None:
     c        = _C
+    prefix   = "" if exact else "~"
     tok_str  = f"{tokens/1000:.1f}K" if tokens >= 1000 else str(tokens)
     cost_str = f"${cost:.6f}"
     total    = f"${running:.6f}"
+    method   = "" if exact else f"{c['dim']} [est]{c['reset']}"
     print(
         f"{c['dim']}[stlc-cost]{c['reset']} "
         f"{c['cyan']}{server}{c['reset']}{c['dim']} · {c['reset']}{tool}"
-        f"  ~{tok_str} tokens  {c['green']}{cost_str}{c['reset']}"
+        f"  {prefix}{tok_str} tokens  {c['green']}{cost_str}{c['reset']}{method}"
         f"  {c['dim']}(session: {total}  {latency_ms}ms){c['reset']}",
         file=sys.stderr, flush=True,
     )
-# ── Session summary on exit ────────────────────────────────────────────────
+# ── Tool → artifact label ──────────────────────────────────────────────────
+_TOOL_ARTIFACT: dict[str, str] = {
+    "fetch_work_item_for_gherkin":  "Work item fetched",
+    "fetch_feature_hierarchy":      "Feature hierarchy fetched",
+    "generate_and_attach_gherkin":  "Gherkin attached to work item",
+    "attach_gherkin_to_feature":    "Gherkin attached to feature",
+    "attach_gherkin_to_work_item":  "Gherkin attached to work item",
+    "validate_gherkin_content":     "Gherkin validated",
+    "capture_app_context":          "App context captured",
+    "generate_playwright_code":     "Playwright code generated",
+    "scaffold_locator_repository":  "Locator repository scaffolded",
+    "attach_code_to_work_item":     "Code attached to work item",
+    "validate_gherkin_steps":       "Gherkin steps validated",
+    "pre_validate_cucumber_steps":  "Cucumber steps pre-validated",
+    "get_generated_files":          "Generated files retrieved",
+    "inspect_helix_project":        "Helix project inspected",
+    "write_helix_files":            "Files written to Helix",
+    "update_helix_file":            "Helix file updated",
+    "read_helix_file":              "Helix file read",
+    "list_helix_tree":              "Helix tree listed",
+    "fetch_work_item":              "Work item fetched",
+    "create_and_link_test_cases":   "Test cases created & linked",
+    "create_deduped_test_cases":    "Test cases created (deduped)",
+    "get_linked_test_cases":        "Linked test cases retrieved",
+    "llm-agent-loop":               "LLM orchestration",
+}
+_SERVER_ORDER = [
+    "qa-test-case-manager", "qa-jira-manager",
+    "qa-gherkin-generator", "qa-playwright-generator", "qa-helix-writer",
+    "agent-runner",
+]
+_SERVER_FRIENDLY: dict[str, str] = {
+    "qa-test-case-manager":   "QA Test Case Manager",
+    "qa-jira-manager":        "QA Jira Manager",
+    "qa-gherkin-generator":   "QA Gherkin Generator",
+    "qa-playwright-generator": "QA Playwright Generator",
+    "qa-helix-writer":        "QA Helix Writer",
+    "agent-runner":           "Orchestrator",
+    "locator-healer":         "Locator Healer",
+}
+_TOOL_FRIENDLY: dict[str, str] = {
+    "fetch_work_item_for_gherkin":  "fetch WI",
+    "fetch_feature_hierarchy":      "fetch feature hierarchy",
+    "generate_and_attach_gherkin":  "generate & attach Gherkin",
+    "attach_gherkin_to_feature":    "attach Gherkin to feature",
+    "attach_gherkin_to_work_item":  "attach Gherkin to WI",
+    "validate_gherkin_content":     "validate Gherkin",
+    "capture_app_context":          "capture app context",
+    "generate_playwright_code":     "generate Playwright code",
+    "scaffold_locator_repository":  "scaffold locators",
+    "attach_code_to_work_item":     "attach code to WI",
+    "validate_gherkin_steps":       "validate Gherkin steps",
+    "pre_validate_cucumber_steps":  "pre-validate Cucumber steps",
+    "get_generated_files":          "retrieve generated files",
+    "inspect_helix_project":        "inspect Helix project",
+    "write_helix_files":            "write files to Helix",
+    "update_helix_file":            "update Helix file",
+    "read_helix_file":              "read Helix file",
+    "list_helix_tree":              "list Helix tree",
+    "fetch_work_item":              "fetch WI",
+    "create_and_link_test_cases":   "create & link test cases",
+    "create_deduped_test_cases":    "create test cases (deduped)",
+    "get_linked_test_cases":        "get linked test cases",
+    "llm-agent-loop":               "LLM agent loop",
+    "ai-vision-anthropic":          "AI Vision (Anthropic)",
+    "ai-vision-copilot":            "AI Vision (Copilot)",
+}
+def _step_label(record: dict) -> str:
+    server = record.get("server", "?")
+    tool   = record.get("tool", "?")
+    sname  = _SERVER_FRIENDLY.get(server, server)
+    tname  = _TOOL_FRIENDLY.get(tool, tool)
+    if server == "agent-runner":
+        return f"Orchestrator ({tname})"
+    return f"{sname} ({tname})"
+def _model_display(model_id: str) -> str:
+    """Return a short display name for a model ID."""
+    p = get_pricing(model_id)
+    return p.display_name if p else model_id
+def _fmt_tok(n: int, exact: bool) -> str:
+    pfx = "" if exact else "~"
+    if n >= 1_000_000:
+        return f"{pfx}{n/1_000_000:.1f}M"
+    if n >= 1000:
+        return f"{pfx}{n/1000:.1f}K"
+    return f"{pfx}{n}"
+# ── Unified pipeline summary (called by agent_runner after loop) ───────────
+def print_pipeline_summary(
+    session_id: str,
+    work_item_id: str = "",
+    elapsed_s: float = 0.0,
+    model: str = "",
+    artefacts: list[dict] | None = None,
+) -> None:
+    """
+    Print the two-section final report:
+      1. Artefact Summary  — what was produced and where it lives
+      2. Token and Cost Report — per-step token counts and USD cost
+    """
+    if not _TRACKING_ENABLED:
+        return
+    log_path = _LOG_DIR / f"cost-{session_id}.jsonl"
+    records: list[dict] = []
+    try:
+        with log_path.open(encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    try:
+                        records.append(json.loads(line))
+                    except json.JSONDecodeError:
+                        pass
+    except OSError:
+        return
+    if not records:
+        return
+    c   = _C
+    W   = 108
+    m   = model or _MODEL_ID
+    wi  = f"WI {work_item_id}  ·  " if work_item_id else ""
+    dur = f"  ·  {elapsed_s:.1f}s" if elapsed_s else ""
+    print(f"\n{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
+    print(
+        f"{c['bold']}  stlc-agents · Pipeline Report  ·  {wi}{m}{dur}{c['reset']}",
+        file=sys.stderr,
+    )
+    print(f"{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
+    # ── Section 1: Artefact Summary ──────────────────────────────────────────
+    if artefacts:
+        print(f"\n{c['bold']}  1. Artefact Summary{c['reset']}", file=sys.stderr)
+        C1, C2, C3, C4 = 3, 26, 22, 22
+        hdr = (
+            f"  {'#':<{C1}}  {'Artefact':<{C2}}  {'Type':<{C3}}"
+            f"  {'Location / Status':<{C4}}  Detail"
+        )
+        print(f"\n{hdr}", file=sys.stderr)
+        print(f"  {'─'*(W-2)}", file=sys.stderr)
+        for i, art in enumerate(artefacts, 1):
+            row = (
+                f"  {i:<{C1}}  {art.get('name',''):<{C2}}  "
+                f"{art.get('type',''):<{C3}}  "
+                f"{art.get('location',''):<{C4}}  "
+                f"{c['dim']}{art.get('detail','')}{c['reset']}"
+            )
+            print(row, file=sys.stderr)
+    # ── Section 2: Token and Cost Report ─────────────────────────────────────
+    print(f"\n{c['bold']}  2. Token and Cost Report{c['reset']}", file=sys.stderr)
+    S1, S2, S3, S4, S5 = 3, 46, 20, 12, 12
+    hdr2 = (
+        f"\n  {'#':<{S1}}  {'Agent / Step':<{S2}}  {'Model':<{S3}}"
+        f"  {'Input':>{S4}}  {'Output':>{S5}}  {'Cost USD':>12}"
+    )
+    print(hdr2, file=sys.stderr)
+    print(f"  {'─'*(W-2)}", file=sys.stderr)
+    total_in = total_out = 0
+    total_cost = 0.0
+    for i, r in enumerate(records, 1):
+        exact  = r.get("token_method") == "exact"
+        inp    = r.get("input_tokens", 0)
+        out    = r.get("output_tokens", 0)
+        cost   = r.get("cost_usd", 0.0)
+        mdl    = _model_display(r.get("model", m))
+        label  = _step_label(r)
+        pfx    = "" if exact else "~"
+        in_s   = _fmt_tok(inp, exact)
+        out_s  = _fmt_tok(out, exact)
+        cost_s = f"{pfx}${cost:.6f}"
+        print(
+            f"  {i:<{S1}}  {label:<{S2}}  {mdl:<{S3}}"
+            f"  {in_s:>{S4}}  {out_s:>{S5}}  "
+            f"{c['green']}{cost_s:>12}{c['reset']}",
+            file=sys.stderr,
+        )
+    # Session total: use only exact agent-runner rows when present.
+    # Estimated (MCP) rows measure payload size — those bytes are already part of
+    # the LLM's context window, so summing them with the LLM total would double-count.
+    runner_indices = [i for i, r in enumerate(records) if r.get("server") == "agent-runner"]
+    total_rows     = [records[i] for i in runner_indices] if runner_indices else records
+    for r in total_rows:
+        total_in   += r.get("input_tokens", 0)
+        total_out  += r.get("output_tokens", 0)
+        total_cost += r.get("cost_usd", 0.0)
+    all_exact = all(r.get("token_method") == "exact" for r in total_rows)
+    in_tot    = _fmt_tok(total_in, all_exact)
+    out_tot   = _fmt_tok(total_out, all_exact)
+    pfx_tot   = "" if all_exact else "~"
+    cost_tot  = f"{pfx_tot}${total_cost:.6f}"
+    if runner_indices and len(runner_indices) < len(records):
+        row_nums  = [i + 1 for i in runner_indices]
+        est_count = len(records) - len(runner_indices)
+        row_label = f"row {row_nums[0]}" if len(row_nums) == 1 else f"rows {','.join(map(str, row_nums))}"
+        tot_label = f"Session Total ({row_label}; {est_count} MCP rows in LLM ctx)"
+    else:
+        tot_label = "Session Total"
+    p = get_pricing(m)
+    rate_note = (
+        f"  Model: {m} — rates applied: "
+        f"${p.input_per_mtok:.2f}/M input, ${p.output_per_mtok:.2f}/M output."
+        if p else f"  Model: {m}"
+    )
+    if runner_indices and len(runner_indices) < len(records):
+        rate_note += (
+            "\n  Session total = exact LLM API cost only."
+            " Estimated MCP rows are payload-size heuristics already included in the LLM context."
+        )
+    elif not all_exact:
+        rate_note += (
+            "\n  Estimates (~) based on payload size; "
+            "exact rows come directly from the LLM API response."
+        )
+    print(f"  {'─'*(W-2)}", file=sys.stderr)
+    print(
+        f"  {'':>{S1}}  {c['bold']}{tot_label:<{S2}}{c['reset']}  "
+        f"{'':>{S3}}  {c['bold']}{in_tot:>{S4}}  {out_tot:>{S5}}  "
+        f"{c['green']}{cost_tot:>12}{c['reset']}",
+        file=sys.stderr,
+    )
+    print(f"{c['dim']}\n{rate_note}{c['reset']}", file=sys.stderr)
+    print(f"{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
+# ── Session summary on exit (MCP server / Claude Code path) ───────────────
 def _print_summary() -> None:
+    # Suppressed when agent_runner will print the unified pipeline summary
+    if os.environ.get("STLC_COST_SUMMARY", "").lower() == "suppress":
+        return
     if not _TRACKING_ENABLED or _session is None or not _session.records:
         return
@@ -332,15 +663,6 @@ def _print_summary() -> None:
     elapsed = time.time() - sess.started_at
     c       = _C
-    by_server: dict[str, dict] = {}
-    for r in records:
-        k = r.get("server", "unknown")
-        if k not in by_server:
-            by_server[k] = {"calls": 0, "tokens": 0, "cost_usd": 0.0}
-        by_server[k]["calls"]    += 1
-        by_server[k]["tokens"]   += r.get("estimated_tokens", 0)
-        by_server[k]["cost_usd"] += r.get("cost_usd", 0.0)
     total_cost   = sum(r.get("cost_usd", 0.0) for r in records)
     total_tokens = sum(r.get("estimated_tokens", 0) for r in records)
@@ -349,47 +671,33 @@ def _print_summary() -> None:
     print(f"{c['bold']}  stlc-agents · Cost Summary  ·  {sess.id}{c['reset']}", file=sys.stderr)
     print(f"{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
-    # Per-server
-    print(f"\n  {'Server':<30} {'Calls':>6} {'~Tokens':>10} {'Cost (USD)':>14}", file=sys.stderr)
-    print(f"  {'─'*60}", file=sys.stderr)
-    for svr, d in sorted(by_server.items()):
-        tok = f"{d['tokens']/1000:.1f}K" if d['tokens'] >= 1000 else str(d['tokens'])
-        print(
-            f"  {svr:<30} {d['calls']:>6} {tok:>10} "
-            f"{c['green']}${d['cost_usd']:.6f}{c['reset']:>14}",
-            file=sys.stderr,
-        )
-    # Per-step
-    print(f"\n  {'Step':<26} {'Tool':<36} {'~Tok':>6}  {'Cost':>10}  {'ms':>6}", file=sys.stderr)
+    print(f"\n  {'Agent':<28} {'Artifact':<34} {'~Tokens':>8}  {'Cost':>10}  {'ms':>6}", file=sys.stderr)
     print(f"  {'─'*W}", file=sys.stderr)
     for r in records:
-        tok = f"{r.get('estimated_tokens',0)/1000:.1f}K" if r.get('estimated_tokens',0) >= 1000 else str(r.get('estimated_tokens',0))
+        raw    = r.get("estimated_tokens", 0)
+        tok    = f"{raw/1000:.1f}K" if raw >= 1000 else str(raw)
+        pfx    = "" if r.get("token_method") == "exact" else "~"
+        art    = _TOOL_ARTIFACT.get(r.get("tool", ""), r.get("tool", "?"))
         print(
-            f"  {r.get('server','?'):<26} {r.get('tool','?'):<36} "
-            f"{tok:>6}  ${r.get('cost_usd',0):.6f}  {r.get('latency_ms',0):>6}",
+            f"  {r.get('server','?'):<28} {art:<34} "
+            f"{pfx}{tok:>8}  ${r.get('cost_usd',0):.6f}  {r.get('latency_ms',0):>6}",
             file=sys.stderr,
         )
-    # Totals
     tok_total = f"{total_tokens/1000:.1f}K" if total_tokens >= 1000 else str(total_tokens)
-    print(f"\n  {'─'*W}", file=sys.stderr)
-    print(f"  {'Total tokens':<40} {tok_total:>10}", file=sys.stderr)
-    print(f"  {c['bold']}{'Total cost':<40} {c['green']}${total_cost:.6f}{c['reset']}", file=sys.stderr)
+    print(f"  {'─'*W}", file=sys.stderr)
+    print(
+        f"  {c['bold']}{'TOTAL':<28} {'':<34} {tok_total:>9}  "
+        f"{c['green']}${total_cost:.6f}{c['reset']}  {elapsed:.1f}s",
+        file=sys.stderr,
+    )
-    # Model info
-    model_str = f"{_MODEL_ID}"
+    model_str = _MODEL_ID
     if _PRICING:
         model_str += f"  (${_PRICING.input_per_mtok}/${_PRICING.output_per_mtok} per MTok in/out)"
-    print(f"  {c['dim']}Model: {model_str}{c['reset']}", file=sys.stderr)
-    print(f"  {c['dim']}Model detected via: {_model_source()}{c['reset']}", file=sys.stderr)
-    print(f"  {c['dim']}Token method: estimated from payload size (chars÷4){c['reset']}", file=sys.stderr)
-    print(f"  {c['dim']}Duration: {elapsed:.1f}s  ·  Log: {sess.log_path}{c['reset']}", file=sys.stderr)
-    print(f"\n  {c['dim']}To set model explicitly:{c['reset']}", file=sys.stderr)
-    print(f"  {c['dim']}  qa-stlc cost --set-model claude-opus-4-6{c['reset']}", file=sys.stderr)
-    print(f"  {c['dim']}  or add to .mcp.json env: STLC_CODING_AGENT_MODEL=claude-opus-4-6{c['reset']}", file=sys.stderr)
-    print(f"  {c['dim']}  or add to .env:          STLC_CODING_AGENT_MODEL=claude-opus-4-6{c['reset']}", file=sys.stderr)
-    print(f"\n{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
+    print(f"\n  {c['dim']}Model: {model_str}{c['reset']}", file=sys.stderr)
+    print(f"  {c['dim']}Log: {sess.log_path}{c['reset']}", file=sys.stderr)
+    print(f"{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
 atexit.register(_print_summary)