@qa-gentic/stlc-agents 1.0.15 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +59 -314
  2. package/bin/postinstall.js +17 -1
  3. package/bin/qa-stlc.js +23 -0
  4. package/package.json +1 -1
  5. package/skills/write-helix-files/SKILL.md +6 -0
  6. package/src/cli/cmd-cost.js +253 -0
  7. package/src/cli/cmd-mcp-config.js +124 -59
  8. package/src/stlc_agents/agent_gherkin_generator/server.py +88 -4
  9. package/src/stlc_agents/agent_helix_writer/tools/helix_write.py +60 -28
  10. package/src/stlc_agents/agent_jira_manager/server.py +209 -2
  11. package/src/stlc_agents/agent_jira_manager/tools/jira_workitem.py +36 -0
  12. package/src/stlc_agents/agent_playwright_generator/server.py +968 -105
  13. package/src/stlc_agents/agent_test_case_manager/server.py +121 -2
  14. package/src/stlc_agents/shared/cost_tracker.py +395 -0
  15. package/src/stlc_agents/shared/pricing.py +72 -0
  16. package/src/stlc_agents/__pycache__/__init__.cpython-310.pyc +0 -0
  17. package/src/stlc_agents/agent_gherkin_generator/__pycache__/__init__.cpython-310.pyc +0 -0
  18. package/src/stlc_agents/agent_gherkin_generator/__pycache__/server.cpython-310.pyc +0 -0
  19. package/src/stlc_agents/agent_gherkin_generator/tools/__pycache__/__init__.cpython-310.pyc +0 -0
  20. package/src/stlc_agents/agent_gherkin_generator/tools/__pycache__/ado_gherkin.cpython-310.pyc +0 -0
  21. package/src/stlc_agents/agent_helix_writer/__pycache__/__init__.cpython-310.pyc +0 -0
  22. package/src/stlc_agents/agent_helix_writer/__pycache__/server.cpython-310.pyc +0 -0
  23. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/__init__.cpython-310.pyc +0 -0
  24. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/boilerplate.cpython-310.pyc +0 -0
  25. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/helix_write.cpython-310.pyc +0 -0
  26. package/src/stlc_agents/agent_jira_manager/__pycache__/__init__.cpython-310.pyc +0 -0
  27. package/src/stlc_agents/agent_jira_manager/__pycache__/server.cpython-310.pyc +0 -0
  28. package/src/stlc_agents/agent_jira_manager/tools/__pycache__/__init__.cpython-310.pyc +0 -0
  29. package/src/stlc_agents/agent_jira_manager/tools/__pycache__/jira_workitem.cpython-310.pyc +0 -0
  30. package/src/stlc_agents/agent_playwright_generator/__pycache__/__init__.cpython-310.pyc +0 -0
  31. package/src/stlc_agents/agent_playwright_generator/__pycache__/server.cpython-310.pyc +0 -0
  32. package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/__init__.cpython-310.pyc +0 -0
  33. package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/ado_attach.cpython-310.pyc +0 -0
  34. package/src/stlc_agents/agent_test_case_manager/__pycache__/__init__.cpython-310.pyc +0 -0
  35. package/src/stlc_agents/agent_test_case_manager/__pycache__/server.cpython-310.pyc +0 -0
  36. package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/__init__.cpython-310.pyc +0 -0
  37. package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/ado_workitem.cpython-310.pyc +0 -0
  38. package/src/stlc_agents/shared/__pycache__/__init__.cpython-310.pyc +0 -0
  39. package/src/stlc_agents/shared/__pycache__/auth.cpython-310.pyc +0 -0
  40. package/src/stlc_agents/shared_jira/__pycache__/__init__.cpython-310.pyc +0 -0
  41. package/src/stlc_agents/shared_jira/__pycache__/auth.cpython-310.pyc +0 -0
@@ -175,9 +175,33 @@ def _validate_linked_test_cases_response(result: dict) -> dict:
175
175
 
176
176
 
177
177
  # ---------------------------------------------------------------------------
178
- # Tool definitions
178
+ # Deduplication helper
179
179
  # ---------------------------------------------------------------------------
180
180
 
181
+ _TC_STOP_WORDS = frozenset({
182
+ "verify", "ensure", "validate", "check", "test", "the", "a", "an",
183
+ "that", "is", "are", "can", "user", "should", "able", "to", "with",
184
+ })
185
+
186
+
187
+ def _normalise_title(title: str) -> str:
188
+ """Lowercase, strip punctuation, remove stop words — for dedup comparison.
189
+
190
+ Also strips LLM-generated numeric prefixes such as TC_1_, TC-2_, 1., 1)
191
+ so that the same scenario generates a stable key across runs even when the
192
+ LLM varies its numbering scheme.
193
+ """
194
+ import re
195
+ s = title.strip()
196
+ # Remove common LLM prefix patterns: TC_1_, TC-2_, 1., 1), (1), #1
197
+ s = re.sub(r"^(?:TC[-_]?\d+[-_:]?|\(\d+\)|#?\d+[.):]?)\s*", "", s, flags=re.IGNORECASE)
198
+ cleaned = re.sub(r"[^a-z0-9\s]", "", s.lower())
199
+ tokens = [w for w in cleaned.split() if w not in _TC_STOP_WORDS]
200
+ return " ".join(tokens)
201
+
202
+
203
+
204
+
181
205
  @app.list_tools()
182
206
  async def list_tools() -> list[types.Tool]:
183
207
  return [
@@ -273,6 +297,51 @@ async def list_tools() -> list[types.Tool]:
273
297
  "required": ["work_item_id", "organization_url", "project_name"],
274
298
  },
275
299
  ),
300
+ types.Tool(
301
+ name="create_deduped_test_cases",
302
+ description=(
303
+ "Create test cases in ADO, skipping any whose title already exists as a "
304
+ "linked test case on the work item. "
305
+ "Internally calls get_linked_test_cases, filters the incoming batch against "
306
+ "existing titles (case-insensitive, stop-word-normalised), then calls "
307
+ "create_and_link_test_cases on the net-new subset only. "
308
+ "Use this instead of create_and_link_test_cases for webhook/headless runs "
309
+ "where re-triggers would otherwise produce duplicates. "
310
+ "Returns skipped_count, created_count, and the full create result."
311
+ ),
312
+ inputSchema={
313
+ "type": "object",
314
+ "properties": {
315
+ "work_item_id": {"type": "integer", "description": "Work item to link test cases to"},
316
+ "organization_url": {"type": "string"},
317
+ "project_name": {"type": "string"},
318
+ "test_cases": {
319
+ "type": "array",
320
+ "description": "Full proposed test case batch (duplicates will be filtered out)",
321
+ "items": {
322
+ "type": "object",
323
+ "properties": {
324
+ "title": {"type": "string"},
325
+ "priority": {"type": "integer", "description": "1-4, default 2"},
326
+ "steps": {
327
+ "type": "array",
328
+ "items": {
329
+ "type": "object",
330
+ "properties": {
331
+ "action": {"type": "string"},
332
+ "expected_result": {"type": "string"},
333
+ },
334
+ "required": ["action", "expected_result"],
335
+ },
336
+ },
337
+ },
338
+ "required": ["title", "steps"],
339
+ },
340
+ },
341
+ },
342
+ "required": ["work_item_id", "organization_url", "project_name", "test_cases"],
343
+ },
344
+ ),
276
345
  ]
277
346
 
278
347
 
@@ -443,6 +512,56 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
443
512
  # ── Pre-output validation ─────────────────────────────────────
444
513
  result["_validation"] = _validate_linked_test_cases_response(result)
445
514
 
515
+ elif name == "create_deduped_test_cases":
516
+ org = arguments["organization_url"]
517
+ project = arguments["project_name"]
518
+ wi_id = arguments["work_item_id"]
519
+ proposed = arguments["test_cases"]
520
+
521
+ # Step 1: fetch existing linked TCs
522
+ existing_result = await asyncio.to_thread(
523
+ _get_linked_test_cases, org, project, wi_id
524
+ )
525
+ existing_titles: set[str] = {
526
+ _normalise_title(tc.get("title", ""))
527
+ for tc in existing_result.get("linked_test_cases", [])
528
+ }
529
+
530
+ # Step 2: filter — keep only net-new
531
+ net_new = [
532
+ tc for tc in proposed
533
+ if _normalise_title(tc.get("title", "")) not in existing_titles
534
+ ]
535
+ skipped = len(proposed) - len(net_new)
536
+
537
+ if not net_new:
538
+ result = {
539
+ "status": "all_duplicates",
540
+ "skipped_count": skipped,
541
+ "created_count": 0,
542
+ "message": (
543
+ f"All {skipped} proposed test case(s) already exist as linked "
544
+ "test cases on this work item. Nothing was created."
545
+ ),
546
+ }
547
+ else:
548
+ # Step 3: create net-new batch via the existing tool logic
549
+ # Re-dispatch through call_tool to reuse input validation + Feature gate
550
+ create_result_raw = await call_tool("create_and_link_test_cases", {
551
+ "work_item_id": wi_id,
552
+ "organization_url": org,
553
+ "project_name": project,
554
+ "test_cases": net_new,
555
+ "confirmed": True,
556
+ })
557
+ inner = json.loads(create_result_raw[0].text)
558
+ result = {
559
+ "status": "ok",
560
+ "skipped_count": skipped,
561
+ "created_count": len(net_new),
562
+ "create_result": inner,
563
+ }
564
+
446
565
  else:
447
566
  result = {"error": f"Unknown tool: {name}"}
448
567
 
@@ -480,4 +599,4 @@ def main():
480
599
 
481
600
 
482
601
  if __name__ == "__main__":
483
- main()
602
+ main()
@@ -0,0 +1,395 @@
1
+ """
2
+ cost_tracker.py — stlc_agents.shared.cost_tracker
3
+ ─────────────────────────────────────────────────────
4
+ Shared cost tracking injected into all 5 MCP servers at install time.
5
+
6
+ MODEL AUTO-DETECTION
7
+ ─────────────────────
8
+ The MCP server is a subprocess. It cannot see the coding agent's API
9
+ response or token usage. Instead, each coding agent exposes the model
10
+ it is running on via a known environment variable that the MCP config
11
+ (`.mcp.json` / `.vscode/mcp.json`) passes through into the subprocess:
12
+
13
+ Agent Env var set automatically Value example
14
+ ───────────────── ────────────────────────────── ──────────────────────────
15
+ Claude Code ANTHROPIC_MODEL claude-sonnet-4-6
16
+ CLAUDE_MODEL (fallback) claude-opus-4-6
17
+ GitHub Copilot GITHUB_COPILOT_MODEL (if set) gpt-4o
18
+ Cursor / Windsurf (none — user sets manually) —
19
+ Any agent STLC_CODING_AGENT_MODEL user-specified override
20
+
21
+ Detection order (first match wins):
22
+ 1. STLC_CODING_AGENT_MODEL — explicit user override (always wins)
23
+ 2. ANTHROPIC_MODEL — set by Claude Code automatically
24
+ 3. CLAUDE_MODEL — older Claude Code versions
25
+ 4. GITHUB_COPILOT_MODEL — Copilot if configured
26
+ 5. ~/.qa-stlc/agent-model — saved preference from `qa-stlc cost --set-model`
27
+ 6. "claude-sonnet-4-6" — safe default (most common)
28
+
29
+ TOKEN ESTIMATION
30
+ ─────────────────
31
+ Because the server never sees the LLM's token usage, tokens are estimated
32
+ from the ADO/Jira JSON payload size the server returns:
33
+ estimated_tokens = len(json_response_text) / 4 (chars-per-token heuristic)
34
+ input_tokens = estimated_tokens * 0.70 (coding agent reading the result)
35
+ output_tokens = estimated_tokens * 0.30 (coding agent writing the artifact)
36
+
37
+ This is conservative and consistent with how promptfoo's HTTP provider
38
+ estimates tokens when the API doesn't return a usage block.
39
+
40
+ WHAT GETS LOGGED (per tool call)
41
+ ──────────────────────────────────
42
+ _cost block injected into every tool response JSON — the coding agent
43
+ sees it inline alongside the tool result.
44
+
45
+ ~/.qa-stlc/cost-<session>.jsonl — machine-readable session log.
46
+
47
+ stderr live line — visible in Claude Code's MCP log pane, VS Code
48
+ Output > MCP, Cursor's tool output panel, etc.
49
+
50
+ atexit summary — printed when the MCP server process exits.
51
+ """
52
+
53
+ from __future__ import annotations
54
+
55
+ import atexit
56
+ import json
57
+ import os
58
+ import sys
59
+ import time
60
+ from datetime import datetime, timezone
61
+ from pathlib import Path
62
+ from typing import Any, Optional
63
+
64
+ from mcp import types
65
+
66
+ from .pricing import ModelPricing, get_pricing
67
+
68
+ # ── Model detection ────────────────────────────────────────────────────────
69
+
70
+ _PREF_FILE = Path.home() / ".qa-stlc" / "agent-model"
71
+
72
+
73
+ def _detect_model() -> str:
74
+ """
75
+ Detect the coding agent's model from environment variables,
76
+ in priority order. Falls back to saved preference, then default.
77
+ """
78
+ # 1. Explicit user override — always wins
79
+ if v := os.getenv("STLC_CODING_AGENT_MODEL", "").strip():
80
+ return v
81
+
82
+ # 2. Claude Code sets this automatically when it spawns MCP subprocesses
83
+ if v := os.getenv("ANTHROPIC_MODEL", "").strip():
84
+ return v
85
+
86
+ # 3. Older Claude Code versions used this name
87
+ if v := os.getenv("CLAUDE_MODEL", "").strip():
88
+ return v
89
+
90
+ # 4. GitHub Copilot (if the MCP config passes it through)
91
+ if v := os.getenv("GITHUB_COPILOT_MODEL", "").strip():
92
+ return v
93
+
94
+ # 5. Saved preference from `qa-stlc cost --set-model`
95
+ try:
96
+ if _PREF_FILE.exists():
97
+ saved = _PREF_FILE.read_text(encoding="utf-8").strip()
98
+ if saved:
99
+ return saved
100
+ except OSError:
101
+ pass
102
+
103
+ # 6. Safe default
104
+ return "claude-sonnet-4-6"
105
+
106
+
107
+ # ── Config ─────────────────────────────────────────────────────────────────
108
+
109
+ _TRACKING_ENABLED = os.getenv("STLC_COST_TRACKING", "true").lower() != "false"
110
+ _LOG_DIR = Path(os.getenv("STLC_COST_LOG_DIR", str(Path.home() / ".qa-stlc")))
111
+ _SESSION_ID = os.getenv("STLC_SESSION_ID", f"stlc-{int(time.time())}")
112
+
113
+ # Resolved once at import time
114
+ _MODEL_ID = _detect_model()
115
+ _PRICING = get_pricing(_MODEL_ID)
116
+
117
+ # ANSI colours (suppressed when not a TTY, e.g. in VS Code Output pane)
118
+ _TTY = sys.stderr.isatty()
119
+ _C = {k: v if _TTY else "" for k, v in {
120
+ "reset": "\x1b[0m", "bold": "\x1b[1m", "dim": "\x1b[2m",
121
+ "cyan": "\x1b[36m", "green": "\x1b[32m", "yellow": "\x1b[33m",
122
+ }.items()}
123
+
124
+
125
+ # ── Session ────────────────────────────────────────────────────────────────
126
+
127
+ class _Session:
128
+ def __init__(self):
129
+ self.id = _SESSION_ID
130
+ self.started_at = time.time()
131
+ self.records: list[dict] = []
132
+ _LOG_DIR.mkdir(parents=True, exist_ok=True)
133
+ self.log_path = _LOG_DIR / f"cost-{self.id}.jsonl"
134
+
135
+ def add(self, record: dict) -> None:
136
+ self.records.append(record)
137
+ try:
138
+ with self.log_path.open("a", encoding="utf-8") as f:
139
+ f.write(json.dumps(record) + "\n")
140
+ except OSError:
141
+ pass # never break a tool call over logging
142
+
143
+ def running_total(self) -> float:
144
+ return sum(r.get("cost_usd", 0.0) for r in self.records)
145
+
146
+
147
+ _session: Optional[_Session] = None
148
+
149
+
150
+ def _get_session() -> _Session:
151
+ global _session
152
+ if _session is None:
153
+ _session = _Session()
154
+ return _session
155
+
156
+
157
+ # ── Token estimation ───────────────────────────────────────────────────────
158
+
159
+ def _estimate_tokens(payload: Any) -> tuple[int, int, int]:
160
+ """
161
+ Estimate input/output tokens from the ADO/Jira response payload size.
162
+
163
+ The MCP server cannot see the LLM's token usage. We estimate from the
164
+ JSON payload the server returns, since the coding agent must tokenise
165
+ that entire payload to read it:
166
+
167
+ total ≈ len(json_text) / 4 (chars-per-token heuristic)
168
+ input ≈ total * 0.70 (agent reading the ADO/Jira result)
169
+ output ≈ total * 0.30 (agent writing the next artifact)
170
+
171
+ Returns (total, input, output).
172
+ """
173
+ text = json.dumps(payload) if not isinstance(payload, str) else payload
174
+ total = max(1, len(text) // 4)
175
+ return total, int(total * 0.70), int(total * 0.30)
176
+
177
+
178
+ # ── Public API ─────────────────────────────────────────────────────────────
179
+
180
+ def track(
181
+ result: Any,
182
+ *,
183
+ tool_name: str,
184
+ server: str,
185
+ t0: float,
186
+ ) -> list[types.TextContent]:
187
+ """
188
+ Wrap a call_tool() result. Injects _cost into the response JSON,
189
+ logs the call, and returns the TextContent list ready for the MCP client.
190
+
191
+ Called from each server's call_tool() as the final return:
192
+
193
+ return track(result, tool_name=name, server="qa-gherkin-generator", t0=t0)
194
+
195
+ Args:
196
+ result: The dict your tool function produced.
197
+ tool_name: The MCP tool name, e.g. "fetch_work_item_for_gherkin".
198
+ server: The MCP server name, e.g. "qa-gherkin-generator".
199
+ t0: time.monotonic() captured at the start of call_tool().
200
+ """
201
+ if not _TRACKING_ENABLED:
202
+ text = json.dumps(result, indent=2, ensure_ascii=False)
203
+ return [types.TextContent(type="text", text=text)]
204
+
205
+ latency_ms = int((time.monotonic() - t0) * 1000)
206
+ sess = _get_session()
207
+ total, inp, out = _estimate_tokens(result)
208
+
209
+ cost = _PRICING.cost(input_tokens=inp, output_tokens=out) if _PRICING else 0.0
210
+ running = sess.running_total() + cost
211
+
212
+ cost_block = {
213
+ "session_id": sess.id,
214
+ "server": server,
215
+ "tool": tool_name,
216
+ "model": _MODEL_ID,
217
+ "model_source": _model_source(),
218
+ "input_tokens": inp,
219
+ "output_tokens": out,
220
+ "estimated_tokens": total,
221
+ "cost_usd": round(cost, 8),
222
+ "latency_ms": latency_ms,
223
+ "timestamp": datetime.now(timezone.utc).isoformat(),
224
+ "session_total_usd": round(running, 8),
225
+ "token_method": "estimated",
226
+ "token_note": (
227
+ "Estimated from ADO/Jira payload size (chars÷4, 70/30 split). "
228
+ "The MCP server has no access to the coding agent's token usage. "
229
+ "Set STLC_CODING_AGENT_MODEL if the detected model is wrong. "
230
+ f"Detected via: {_model_source()}."
231
+ ),
232
+ }
233
+
234
+ if isinstance(result, dict):
235
+ result["_cost"] = cost_block
236
+
237
+ sess.add({"tool": tool_name, "server": server, **cost_block})
238
+ _print_live(server, tool_name, total, cost, latency_ms, running)
239
+
240
+ return [types.TextContent(
241
+ type="text",
242
+ text=json.dumps(result, indent=2, ensure_ascii=False),
243
+ )]
244
+
245
+
246
+ def track_healing(payload: dict) -> None:
247
+ """
248
+ Record an AI Vision healing call from LocatorHealer.ts.
249
+ Called by healing_cost_server.py when LocatorHealer posts usage.
250
+ """
251
+ if not _TRACKING_ENABLED:
252
+ return
253
+ sess = _get_session()
254
+ model = payload.get("model_id", "claude-sonnet-4-20250514")
255
+ pricing = get_pricing(model)
256
+ usage = payload.get("usage", {})
257
+ inp = usage.get("input_tokens", usage.get("prompt_tokens", 0))
258
+ out = usage.get("output_tokens", usage.get("completion_tokens", 0))
259
+ cost = pricing.cost(input_tokens=inp, output_tokens=out) if pricing else 0.0
260
+
261
+ record = {
262
+ "tool": f"ai-vision-{payload.get('provider', 'anthropic')}",
263
+ "server": "locator-healer",
264
+ "session_id": sess.id,
265
+ "model": model,
266
+ "model_source": "locator-healer-env",
267
+ "input_tokens": inp,
268
+ "output_tokens": out,
269
+ "estimated_tokens": inp + out,
270
+ "cost_usd": round(cost, 8),
271
+ "latency_ms": payload.get("latency_ms", 0),
272
+ "timestamp": datetime.now(timezone.utc).isoformat(),
273
+ "session_total_usd": round(sess.running_total() + cost, 8),
274
+ "token_method": "exact",
275
+ "healing_meta": {
276
+ "locator_key": payload.get("locator_key"),
277
+ "healed": payload.get("healed", False),
278
+ "selector": payload.get("selector"),
279
+ },
280
+ }
281
+ sess.add(record)
282
+ _print_live(
283
+ "locator-healer",
284
+ f"ai-vision ({payload.get('provider', '?')})",
285
+ inp + out, cost, payload.get("latency_ms", 0),
286
+ sess.running_total(),
287
+ )
288
+
289
+
290
+ def _model_source() -> str:
291
+ """Describe where the detected model came from."""
292
+ if os.getenv("STLC_CODING_AGENT_MODEL", "").strip():
293
+ return "STLC_CODING_AGENT_MODEL env var"
294
+ if os.getenv("ANTHROPIC_MODEL", "").strip():
295
+ return "ANTHROPIC_MODEL env var (set by Claude Code)"
296
+ if os.getenv("CLAUDE_MODEL", "").strip():
297
+ return "CLAUDE_MODEL env var (set by Claude Code)"
298
+ if os.getenv("GITHUB_COPILOT_MODEL", "").strip():
299
+ return "GITHUB_COPILOT_MODEL env var"
300
+ if _PREF_FILE.exists():
301
+ return f"saved preference ({_PREF_FILE})"
302
+ return "default fallback"
303
+
304
+
305
+ # ── Live stderr line ───────────────────────────────────────────────────────
306
+
307
+ def _print_live(
308
+ server: str, tool: str, tokens: int,
309
+ cost: float, latency_ms: int, running: float,
310
+ ) -> None:
311
+ c = _C
312
+ tok_str = f"{tokens/1000:.1f}K" if tokens >= 1000 else str(tokens)
313
+ cost_str = f"${cost:.6f}"
314
+ total = f"${running:.6f}"
315
+ print(
316
+ f"{c['dim']}[stlc-cost]{c['reset']} "
317
+ f"{c['cyan']}{server}{c['reset']}{c['dim']} · {c['reset']}{tool}"
318
+ f" ~{tok_str} tokens {c['green']}{cost_str}{c['reset']}"
319
+ f" {c['dim']}(session: {total} {latency_ms}ms){c['reset']}",
320
+ file=sys.stderr, flush=True,
321
+ )
322
+
323
+
324
+ # ── Session summary on exit ────────────────────────────────────────────────
325
+
326
+ def _print_summary() -> None:
327
+ if not _TRACKING_ENABLED or _session is None or not _session.records:
328
+ return
329
+
330
+ sess = _session
331
+ records = sess.records
332
+ elapsed = time.time() - sess.started_at
333
+ c = _C
334
+
335
+ by_server: dict[str, dict] = {}
336
+ for r in records:
337
+ k = r.get("server", "unknown")
338
+ if k not in by_server:
339
+ by_server[k] = {"calls": 0, "tokens": 0, "cost_usd": 0.0}
340
+ by_server[k]["calls"] += 1
341
+ by_server[k]["tokens"] += r.get("estimated_tokens", 0)
342
+ by_server[k]["cost_usd"] += r.get("cost_usd", 0.0)
343
+
344
+ total_cost = sum(r.get("cost_usd", 0.0) for r in records)
345
+ total_tokens = sum(r.get("estimated_tokens", 0) for r in records)
346
+
347
+ W = 68
348
+ print(f"\n{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
349
+ print(f"{c['bold']} stlc-agents · Cost Summary · {sess.id}{c['reset']}", file=sys.stderr)
350
+ print(f"{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
351
+
352
+ # Per-server
353
+ print(f"\n {'Server':<30} {'Calls':>6} {'~Tokens':>10} {'Cost (USD)':>14}", file=sys.stderr)
354
+ print(f" {'─'*60}", file=sys.stderr)
355
+ for svr, d in sorted(by_server.items()):
356
+ tok = f"{d['tokens']/1000:.1f}K" if d['tokens'] >= 1000 else str(d['tokens'])
357
+ print(
358
+ f" {svr:<30} {d['calls']:>6} {tok:>10} "
359
+ f"{c['green']}${d['cost_usd']:.6f}{c['reset']:>14}",
360
+ file=sys.stderr,
361
+ )
362
+
363
+ # Per-step
364
+ print(f"\n {'Step':<26} {'Tool':<36} {'~Tok':>6} {'Cost':>10} {'ms':>6}", file=sys.stderr)
365
+ print(f" {'─'*W}", file=sys.stderr)
366
+ for r in records:
367
+ tok = f"{r.get('estimated_tokens',0)/1000:.1f}K" if r.get('estimated_tokens',0) >= 1000 else str(r.get('estimated_tokens',0))
368
+ print(
369
+ f" {r.get('server','?'):<26} {r.get('tool','?'):<36} "
370
+ f"{tok:>6} ${r.get('cost_usd',0):.6f} {r.get('latency_ms',0):>6}",
371
+ file=sys.stderr,
372
+ )
373
+
374
+ # Totals
375
+ tok_total = f"{total_tokens/1000:.1f}K" if total_tokens >= 1000 else str(total_tokens)
376
+ print(f"\n {'─'*W}", file=sys.stderr)
377
+ print(f" {'Total tokens':<40} {tok_total:>10}", file=sys.stderr)
378
+ print(f" {c['bold']}{'Total cost':<40} {c['green']}${total_cost:.6f}{c['reset']}", file=sys.stderr)
379
+
380
+ # Model info
381
+ model_str = f"{_MODEL_ID}"
382
+ if _PRICING:
383
+ model_str += f" (${_PRICING.input_per_mtok}/${_PRICING.output_per_mtok} per MTok in/out)"
384
+ print(f" {c['dim']}Model: {model_str}{c['reset']}", file=sys.stderr)
385
+ print(f" {c['dim']}Model detected via: {_model_source()}{c['reset']}", file=sys.stderr)
386
+ print(f" {c['dim']}Token method: estimated from payload size (chars÷4){c['reset']}", file=sys.stderr)
387
+ print(f" {c['dim']}Duration: {elapsed:.1f}s · Log: {sess.log_path}{c['reset']}", file=sys.stderr)
388
+ print(f"\n {c['dim']}To set model explicitly:{c['reset']}", file=sys.stderr)
389
+ print(f" {c['dim']} qa-stlc cost --set-model claude-opus-4-6{c['reset']}", file=sys.stderr)
390
+ print(f" {c['dim']} or add to .mcp.json env: STLC_CODING_AGENT_MODEL=claude-opus-4-6{c['reset']}", file=sys.stderr)
391
+ print(f" {c['dim']} or add to .env: STLC_CODING_AGENT_MODEL=claude-opus-4-6{c['reset']}", file=sys.stderr)
392
+ print(f"\n{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
393
+
394
+
395
+ atexit.register(_print_summary)
@@ -0,0 +1,72 @@
1
+ """
2
+ pricing.py — Model pricing registry for stlc-agents cost tracking.
3
+
4
+ Prices: USD per million tokens (MTok).
5
+ Source: Anthropic official docs, April 2026.
6
+
7
+ Models this repo actually calls:
8
+ - claude-sonnet-4-20250514 (LocatorHealer AI Vision, default)
9
+ - gpt-4o (LocatorHealer AI Vision, copilot provider)
10
+ + whatever coding agent the user runs (Claude / Copilot / Cursor / Windsurf)
11
+ — the user declares this via STLC_CODING_AGENT_MODEL env var.
12
+ """
13
+
14
+ from __future__ import annotations
15
+ from dataclasses import dataclass
16
+ from typing import Optional
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class ModelPricing:
21
+ model_id: str
22
+ display_name: str
23
+ provider: str
24
+ input_per_mtok: float # USD / 1M input tokens
25
+ output_per_mtok: float # USD / 1M output tokens
26
+ cache_write_per_mtok: float # USD / 1M cache-write tokens
27
+ cache_read_per_mtok: float # USD / 1M cache-read tokens
28
+
29
+ def cost(
30
+ self,
31
+ input_tokens: int = 0,
32
+ output_tokens: int = 0,
33
+ cache_write_tokens: int = 0,
34
+ cache_read_tokens: int = 0,
35
+ ) -> float:
36
+ return (
37
+ (input_tokens / 1_000_000) * self.input_per_mtok
38
+ + (output_tokens / 1_000_000) * self.output_per_mtok
39
+ + (cache_write_tokens/ 1_000_000) * self.cache_write_per_mtok
40
+ + (cache_read_tokens / 1_000_000) * self.cache_read_per_mtok
41
+ )
42
+
43
+
44
+ _REGISTRY: list[ModelPricing] = [
45
+ # ── Anthropic ──────────────────────────────────────────────────────────
46
+ ModelPricing("claude-sonnet-4-20250514", "Claude Sonnet 4", "anthropic", 3.00, 15.00, 3.75, 0.30),
47
+ ModelPricing("claude-sonnet-4-6", "Claude Sonnet 4.6", "anthropic", 3.00, 15.00, 3.75, 0.30),
48
+ ModelPricing("claude-haiku-4-5-20251001","Claude Haiku 4.5", "anthropic", 1.00, 5.00, 1.25, 0.10),
49
+ ModelPricing("claude-opus-4-6", "Claude Opus 4.6", "anthropic", 5.00, 25.00, 6.25, 0.50),
50
+ ModelPricing("claude-opus-4-7", "Claude Opus 4.7", "anthropic", 5.00, 25.00, 6.25, 0.50),
51
+ # ── OpenAI / Copilot ──────────────────────────────────────────────────
52
+ ModelPricing("gpt-4o", "GPT-4o", "openai", 2.50, 10.00, 0.00, 0.00),
53
+ ModelPricing("gpt-4o-mini", "GPT-4o Mini", "openai", 0.15, 0.60, 0.00, 0.00),
54
+ ]
55
+
56
+ _by_id: dict[str, ModelPricing] = {p.model_id: p for p in _REGISTRY}
57
+
58
+
59
+ def get_pricing(model_id: str) -> Optional[ModelPricing]:
60
+ """Exact match first, then longest substring match."""
61
+ key = model_id.lower().strip()
62
+ if key in _by_id:
63
+ return _by_id[key]
64
+ # Substring: "claude-sonnet-4-20250514" ⊇ "sonnet-4"
65
+ for p in _REGISTRY:
66
+ if key in p.model_id or p.model_id in key:
67
+ return p
68
+ return None
69
+
70
+
71
+ def list_models() -> list[ModelPricing]:
72
+ return list(_REGISTRY)