@qa-gentic/stlc-agents 1.0.25 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/package.json +1 -1
  2. package/skills/generate-test-cases/SKILL.md +5 -0
  3. package/src/cli/cmd-cost.js +61 -30
  4. package/src/cli/cmd-init.js +88 -8
  5. package/src/stlc_agents/__pycache__/__init__.cpython-314.pyc +0 -0
  6. package/src/stlc_agents/agent_gherkin_generator/__pycache__/__init__.cpython-314.pyc +0 -0
  7. package/src/stlc_agents/agent_gherkin_generator/__pycache__/server.cpython-314.pyc +0 -0
  8. package/src/stlc_agents/agent_gherkin_generator/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  9. package/src/stlc_agents/agent_gherkin_generator/tools/__pycache__/ado_gherkin.cpython-314.pyc +0 -0
  10. package/src/stlc_agents/agent_helix_writer/__pycache__/__init__.cpython-314.pyc +0 -0
  11. package/src/stlc_agents/agent_helix_writer/__pycache__/server.cpython-314.pyc +0 -0
  12. package/src/stlc_agents/agent_helix_writer/server.py +41 -6
  13. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  14. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/boilerplate.cpython-314.pyc +0 -0
  15. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/helix_write.cpython-314.pyc +0 -0
  16. package/src/stlc_agents/agent_playwright_generator/__pycache__/__init__.cpython-314.pyc +0 -0
  17. package/src/stlc_agents/agent_playwright_generator/__pycache__/server.cpython-314.pyc +0 -0
  18. package/src/stlc_agents/agent_playwright_generator/server.py +419 -213
  19. package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  20. package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/ado_attach.cpython-314.pyc +0 -0
  21. package/src/stlc_agents/agent_test_case_manager/__pycache__/__init__.cpython-314.pyc +0 -0
  22. package/src/stlc_agents/agent_test_case_manager/__pycache__/server.cpython-314.pyc +0 -0
  23. package/src/stlc_agents/agent_test_case_manager/server.py +12 -0
  24. package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  25. package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/ado_workitem.cpython-314.pyc +0 -0
  26. package/src/stlc_agents/agent_test_case_manager/tools/ado_workitem.py +65 -1
  27. package/src/stlc_agents/shared/__pycache__/__init__.cpython-314.pyc +0 -0
  28. package/src/stlc_agents/shared/__pycache__/auth.cpython-314.pyc +0 -0
  29. package/src/stlc_agents/shared/__pycache__/cost_tracker.cpython-314.pyc +0 -0
  30. package/src/stlc_agents/shared/__pycache__/pricing.cpython-314.pyc +0 -0
  31. package/src/stlc_agents/shared/cost_tracker.py +378 -70
  32. package/src/stlc_agents/shared/pricing.py +115 -24
  33. package/src/stlc_agents/webhook_orchestrator/__init__.py +0 -0
  34. package/src/stlc_agents/webhook_orchestrator/agent_runner.py +599 -0
  35. package/src/stlc_agents/webhook_orchestrator/main.py +43 -0
  36. package/src/stlc_agents/webhook_orchestrator/models.py +63 -0
  37. package/src/stlc_agents/webhook_orchestrator/orchestrator.py +103 -0
  38. package/src/stlc_agents/webhook_orchestrator/pipelines/__init__.py +0 -0
  39. package/src/stlc_agents/webhook_orchestrator/pipelines/_base.py +57 -0
  40. package/src/stlc_agents/webhook_orchestrator/pipelines/ado_test_cases.py +55 -0
  41. package/src/stlc_agents/webhook_orchestrator/pipelines/full_pipeline.py +202 -0
  42. package/src/stlc_agents/webhook_orchestrator/pipelines/gherkin_playwright.py +156 -0
  43. package/src/stlc_agents/webhook_orchestrator/pipelines/jira_test_cases.py +48 -0
  44. package/src/stlc_agents/webhook_orchestrator/webhook_bridge.py +368 -0
  45. package/src/stlc_agents/agent_gherkin_generator/__pycache__/server.cpython-310.pyc +0 -0
  46. package/src/stlc_agents/agent_helix_writer/__pycache__/server.cpython-310.pyc +0 -0
  47. package/src/stlc_agents/agent_jira_manager/__pycache__/server.cpython-310.pyc +0 -0
  48. package/src/stlc_agents/agent_test_case_manager/__pycache__/server.cpython-310.pyc +0 -0
  49. package/src/stlc_agents/shared/__pycache__/cost_tracker.cpython-310.pyc +0 -0
  50. package/src/stlc_agents/shared/__pycache__/pricing.cpython-310.pyc +0 -0
@@ -30,6 +30,7 @@ from stlc_agents.agent_test_case_manager.tools.ado_workitem import (
30
30
  create_test_case as _create_test_case,
31
31
  link_test_cases_to_work_item as _link_test_cases,
32
32
  get_linked_test_cases as _get_linked_test_cases,
33
+ add_tag_to_work_item as _add_tag,
33
34
  )
34
35
  from stlc_agents.shared.cost_tracker import track
35
36
 
@@ -483,6 +484,16 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
483
484
  except Exception as e:
484
485
  link_result = {"error": str(e)}
485
486
 
487
+ # Add tag to the parent work item after linking
488
+ tag_result = {}
489
+ if created:
490
+ try:
491
+ tag_result = await asyncio.to_thread(
492
+ _add_tag, org, project, wi_id, "STLCAgentTestCases"
493
+ )
494
+ except Exception as e:
495
+ tag_result = {"error": str(e)}
496
+
486
497
  result = {
487
498
  "summary": {
488
499
  "requested": len(test_cases),
@@ -493,6 +504,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
493
504
  "created_test_cases": created,
494
505
  "failed": failed,
495
506
  "link_result": link_result,
507
+ "tag_result": tag_result,
496
508
  "_validation": {
497
509
  "valid": len(failed) == 0 and bool(link_result.get("success", True)),
498
510
  "input_validation": input_validation,
@@ -8,6 +8,8 @@ Public API:
8
8
  create_test_case(org_url, project, title, steps, ...) -> dict
9
9
  link_test_cases_to_work_item(org_url, project, wi_id, tc_ids) -> dict
10
10
  get_linked_test_cases(org_url, project, work_item_id) -> dict
11
+ add_comment_to_work_item(org_url, project, work_item_id, text) -> dict
12
+ add_tag_to_work_item(org_url, project, work_item_id, tag) -> dict
11
13
  """
12
14
  from __future__ import annotations
13
15
 
@@ -175,8 +177,13 @@ def link_test_cases_to_work_item(
175
177
  project: str,
176
178
  work_item_id: int,
177
179
  test_case_ids: List[int],
180
+ link_comment: str = "STLC-Agent generated test case",
178
181
  ) -> dict:
179
- """Create TestedBy-Forward links from a work item to test cases."""
182
+ """Create TestedBy-Forward links from a work item to test cases.
183
+
184
+ link_comment is stored as attributes.comment on each relation and appears
185
+ in the Links tab Comments column in Azure DevOps.
186
+ """
180
187
  org_url = org_url.rstrip("/")
181
188
  headers = get_auth_headers("application/json-patch+json")
182
189
 
@@ -187,6 +194,7 @@ def link_test_cases_to_work_item(
187
194
  "value": {
188
195
  "rel": "Microsoft.VSTS.Common.TestedBy-Forward",
189
196
  "url": f"{org_url}/{project}/_apis/wit/workItems/{tc_id}",
197
+ "attributes": {"comment": link_comment},
190
198
  },
191
199
  }
192
200
  for tc_id in test_case_ids
@@ -249,6 +257,62 @@ def get_linked_test_cases(org_url: str, project: str, work_item_id: int) -> dict
249
257
  return {"work_item_id": work_item_id, "linked_test_cases": linked, "count": len(linked)}
250
258
 
251
259
 
260
+ # ---------------------------------------------------------------------------
261
+ # add_comment_to_work_item
262
+ # ---------------------------------------------------------------------------
263
+
264
+ def add_comment_to_work_item(org_url: str, project: str, work_item_id: int, text: str) -> dict:
265
+ """Add a comment to a work item via the ADO comments API."""
266
+ org_url = org_url.rstrip("/")
267
+ headers = get_auth_headers()
268
+
269
+ resp = requests.post(
270
+ f"{org_url}/{project}/_apis/wit/workitems/{work_item_id}/comments",
271
+ headers=headers,
272
+ params={"api-version": "7.1-preview.3"},
273
+ json={"text": text},
274
+ timeout=30,
275
+ )
276
+ resp.raise_for_status()
277
+ return {"success": True, "comment_id": resp.json().get("id")}
278
+
279
+
280
+ # ---------------------------------------------------------------------------
281
+ # add_tag_to_work_item
282
+ # ---------------------------------------------------------------------------
283
+
284
+ def add_tag_to_work_item(org_url: str, project: str, work_item_id: int, tag: str) -> dict:
285
+ """Append a tag to a work item's System.Tags field (no-op if already present)."""
286
+ org_url = org_url.rstrip("/")
287
+
288
+ fetch_resp = requests.get(
289
+ f"{org_url}/{project}/_apis/wit/workitems/{work_item_id}",
290
+ headers=get_auth_headers(),
291
+ params={"api-version": _API},
292
+ timeout=30,
293
+ )
294
+ fetch_resp.raise_for_status()
295
+ existing_str = fetch_resp.json().get("fields", {}).get("System.Tags", "") or ""
296
+ existing = [t.strip() for t in existing_str.split(";") if t.strip()]
297
+
298
+ if tag in existing:
299
+ return {"success": True, "tag": tag, "already_present": True}
300
+
301
+ existing.append(tag)
302
+ new_tags_str = "; ".join(existing)
303
+
304
+ patch = [{"op": "add", "path": "/fields/System.Tags", "value": new_tags_str}]
305
+ patch_resp = requests.patch(
306
+ f"{org_url}/{project}/_apis/wit/workitems/{work_item_id}",
307
+ headers=get_auth_headers("application/json-patch+json"),
308
+ params={"api-version": _API},
309
+ json=patch,
310
+ timeout=30,
311
+ )
312
+ patch_resp.raise_for_status()
313
+ return {"success": True, "tag": tag, "tags": new_tags_str}
314
+
315
+
252
316
  # ---------------------------------------------------------------------------
253
317
  # Helpers
254
318
  # ---------------------------------------------------------------------------
@@ -1,15 +1,21 @@
1
1
  """
2
2
  cost_tracker.py — stlc_agents.shared.cost_tracker
3
3
  ─────────────────────────────────────────────────────
4
- Shared cost tracking injected into all 5 MCP servers at install time.
4
+ Two tracking modes:
5
5
 
6
- MODEL AUTO-DETECTION
7
- ─────────────────────
8
- The MCP server is a subprocess. It cannot see the coding agent's API
9
- response or token usage. Instead, each coding agent exposes the model
10
- it is running on via a known environment variable that the MCP config
11
- (`.mcp.json` / `.vscode/mcp.json`) passes through into the subprocess:
6
+ track() — MCP server tool calls (coding-agent-driven flow).
7
+ Token counts are ESTIMATED from payload size because
8
+ the MCP server subprocess never sees the coding agent's
9
+ API response.
12
10
 
11
+ track_llm_call() — Webhook orchestrator LLM calls (agent_runner.py).
12
+ Token counts are EXACT: taken directly from the LLM
13
+ API response's usage block, matching how promptfoo
14
+ tracks costs (input + output + cache per iteration,
15
+ accumulated across all iterations in the agent loop).
16
+
17
+ MODEL AUTO-DETECTION (for track() only)
18
+ ─────────────────────────────────────────
13
19
  Agent Env var set automatically Value example
14
20
  ───────────────── ────────────────────────────── ──────────────────────────
15
21
  Claude Code ANTHROPIC_MODEL claude-sonnet-4-6
@@ -26,28 +32,11 @@ Detection order (first match wins):
26
32
  5. ~/.qa-stlc/agent-model — saved preference from `qa-stlc cost --set-model`
27
33
  6. "claude-sonnet-4-6" — safe default (most common)
28
34
 
29
- TOKEN ESTIMATION
30
- ─────────────────
31
- Because the server never sees the LLM's token usage, tokens are estimated
32
- from the ADO/Jira JSON payload size the server returns:
33
- estimated_tokens = len(json_response_text) / 4 (chars-per-token heuristic)
34
- input_tokens = estimated_tokens * 0.70 (coding agent reading the result)
35
- output_tokens = estimated_tokens * 0.30 (coding agent writing the artifact)
36
-
37
- This is conservative and consistent with how promptfoo's HTTP provider
38
- estimates tokens when the API doesn't return a usage block.
39
-
40
- WHAT GETS LOGGED (per tool call)
41
- ──────────────────────────────────
42
- _cost block injected into every tool response JSON — the coding agent
43
- sees it inline alongside the tool result.
44
-
35
+ WHAT GETS LOGGED (per tool call / LLM call)
36
+ ─────────────────────────────────────────────
45
37
  ~/.qa-stlc/cost-<session>.jsonl — machine-readable session log.
46
-
47
- stderr live line visible in Claude Code's MCP log pane, VS Code
48
- Output > MCP, Cursor's tool output panel, etc.
49
-
50
- atexit summary — printed when the MCP server process exits.
38
+ stderr live line — visible in Claude Code MCP log, VS Code Output, etc.
39
+ atexit summaryprinted when the process exits.
51
40
  """
52
41
 
53
42
  from __future__ import annotations
@@ -243,6 +232,94 @@ def track(
243
232
  )]
244
233
 
245
234
 
235
+ def track_llm_call(
236
+ *,
237
+ model: str,
238
+ provider: str,
239
+ input_tokens: int,
240
+ output_tokens: int,
241
+ cache_write_tokens: int = 0,
242
+ cache_read_tokens: int = 0,
243
+ tool: str = "llm-agent-loop",
244
+ server: str = "agent-runner",
245
+ work_item_id: str = "",
246
+ iterations: int = 1,
247
+ latency_ms: int = 0,
248
+ ) -> float:
249
+ """
250
+ Record exact LLM token usage from an API response (webhook / agent_runner path).
251
+
252
+ Token counts come directly from the LLM API response.usage block —
253
+ same approach as promptfoo: capture per-iteration, accumulate across all
254
+ iterations, compute cost once at the end.
255
+
256
+ Anthropic fields:
257
+ input_tokens, output_tokens,
258
+ cache_creation_input_tokens → cache_write_tokens,
259
+ cache_read_input_tokens → cache_read_tokens
260
+
261
+ OpenAI fields:
262
+ prompt_tokens → input_tokens,
263
+ completion_tokens → output_tokens,
264
+ prompt_tokens_details.cached_tokens → cache_read_tokens
265
+
266
+ Returns the cost in USD.
267
+ """
268
+ if not _TRACKING_ENABLED:
269
+ return 0.0
270
+
271
+ pricing = get_pricing(model)
272
+ cost = (
273
+ pricing.cost(
274
+ input_tokens=input_tokens,
275
+ output_tokens=output_tokens,
276
+ cache_write_tokens=cache_write_tokens,
277
+ cache_read_tokens=cache_read_tokens,
278
+ )
279
+ if pricing else 0.0
280
+ )
281
+
282
+ sess = _get_session()
283
+ running = sess.running_total() + cost
284
+ total = input_tokens + output_tokens
285
+
286
+ cache_note = ""
287
+ if cache_write_tokens or cache_read_tokens:
288
+ cache_note = f" cache_write={cache_write_tokens} cache_read={cache_read_tokens}"
289
+
290
+ record = {
291
+ "tool": tool,
292
+ "server": server,
293
+ "session_id": sess.id,
294
+ "model": model,
295
+ "model_source": f"{provider}-api-response",
296
+ "input_tokens": input_tokens,
297
+ "output_tokens": output_tokens,
298
+ "cache_write_tokens": cache_write_tokens,
299
+ "cache_read_tokens": cache_read_tokens,
300
+ "estimated_tokens": total,
301
+ "cost_usd": round(cost, 8),
302
+ "latency_ms": latency_ms,
303
+ "timestamp": datetime.now(timezone.utc).isoformat(),
304
+ "session_total_usd": round(running, 8),
305
+ "token_method": "exact",
306
+ "token_note": (
307
+ f"Exact counts from {provider} API response. "
308
+ f"Iterations: {iterations}.{cache_note}"
309
+ ),
310
+ "work_item_id": work_item_id,
311
+ "iterations": iterations,
312
+ }
313
+ sess.add(record)
314
+ _print_live(server, tool, total, cost, latency_ms, running, exact=True)
315
+ return cost
316
+
317
+
318
+ def get_session_id() -> str:
319
+ """Return the current session ID (used to pass STLC_SESSION_ID to subprocesses)."""
320
+ return _SESSION_ID
321
+
322
+
246
323
  def track_healing(payload: dict) -> None:
247
324
  """
248
325
  Record an AI Vision healing call from LocatorHealer.ts.
@@ -307,23 +384,277 @@ def _model_source() -> str:
307
384
  def _print_live(
308
385
  server: str, tool: str, tokens: int,
309
386
  cost: float, latency_ms: int, running: float,
387
+ *, exact: bool = False,
310
388
  ) -> None:
311
389
  c = _C
390
+ prefix = "" if exact else "~"
312
391
  tok_str = f"{tokens/1000:.1f}K" if tokens >= 1000 else str(tokens)
313
392
  cost_str = f"${cost:.6f}"
314
393
  total = f"${running:.6f}"
394
+ method = "" if exact else f"{c['dim']} [est]{c['reset']}"
315
395
  print(
316
396
  f"{c['dim']}[stlc-cost]{c['reset']} "
317
397
  f"{c['cyan']}{server}{c['reset']}{c['dim']} · {c['reset']}{tool}"
318
- f" ~{tok_str} tokens {c['green']}{cost_str}{c['reset']}"
398
+ f" {prefix}{tok_str} tokens {c['green']}{cost_str}{c['reset']}{method}"
319
399
  f" {c['dim']}(session: {total} {latency_ms}ms){c['reset']}",
320
400
  file=sys.stderr, flush=True,
321
401
  )
322
402
 
323
403
 
324
- # ── Session summary on exit ────────────────────────────────────────────────
404
+ # ── Tool artifact label ──────────────────────────────────────────────────
405
+
406
+ _TOOL_ARTIFACT: dict[str, str] = {
407
+ "fetch_work_item_for_gherkin": "Work item fetched",
408
+ "fetch_feature_hierarchy": "Feature hierarchy fetched",
409
+ "generate_and_attach_gherkin": "Gherkin attached to work item",
410
+ "attach_gherkin_to_feature": "Gherkin attached to feature",
411
+ "attach_gherkin_to_work_item": "Gherkin attached to work item",
412
+ "validate_gherkin_content": "Gherkin validated",
413
+ "capture_app_context": "App context captured",
414
+ "generate_playwright_code": "Playwright code generated",
415
+ "scaffold_locator_repository": "Locator repository scaffolded",
416
+ "attach_code_to_work_item": "Code attached to work item",
417
+ "validate_gherkin_steps": "Gherkin steps validated",
418
+ "pre_validate_cucumber_steps": "Cucumber steps pre-validated",
419
+ "get_generated_files": "Generated files retrieved",
420
+ "inspect_helix_project": "Helix project inspected",
421
+ "write_helix_files": "Files written to Helix",
422
+ "update_helix_file": "Helix file updated",
423
+ "read_helix_file": "Helix file read",
424
+ "list_helix_tree": "Helix tree listed",
425
+ "fetch_work_item": "Work item fetched",
426
+ "create_and_link_test_cases": "Test cases created & linked",
427
+ "create_deduped_test_cases": "Test cases created (deduped)",
428
+ "get_linked_test_cases": "Linked test cases retrieved",
429
+ "llm-agent-loop": "LLM orchestration",
430
+ }
431
+
432
+ _SERVER_ORDER = [
433
+ "qa-test-case-manager", "qa-jira-manager",
434
+ "qa-gherkin-generator", "qa-playwright-generator", "qa-helix-writer",
435
+ "agent-runner",
436
+ ]
437
+
438
+ _SERVER_FRIENDLY: dict[str, str] = {
439
+ "qa-test-case-manager": "QA Test Case Manager",
440
+ "qa-jira-manager": "QA Jira Manager",
441
+ "qa-gherkin-generator": "QA Gherkin Generator",
442
+ "qa-playwright-generator": "QA Playwright Generator",
443
+ "qa-helix-writer": "QA Helix Writer",
444
+ "agent-runner": "Orchestrator",
445
+ "locator-healer": "Locator Healer",
446
+ }
447
+
448
+ _TOOL_FRIENDLY: dict[str, str] = {
449
+ "fetch_work_item_for_gherkin": "fetch WI",
450
+ "fetch_feature_hierarchy": "fetch feature hierarchy",
451
+ "generate_and_attach_gherkin": "generate & attach Gherkin",
452
+ "attach_gherkin_to_feature": "attach Gherkin to feature",
453
+ "attach_gherkin_to_work_item": "attach Gherkin to WI",
454
+ "validate_gherkin_content": "validate Gherkin",
455
+ "capture_app_context": "capture app context",
456
+ "generate_playwright_code": "generate Playwright code",
457
+ "scaffold_locator_repository": "scaffold locators",
458
+ "attach_code_to_work_item": "attach code to WI",
459
+ "validate_gherkin_steps": "validate Gherkin steps",
460
+ "pre_validate_cucumber_steps": "pre-validate Cucumber steps",
461
+ "get_generated_files": "retrieve generated files",
462
+ "inspect_helix_project": "inspect Helix project",
463
+ "write_helix_files": "write files to Helix",
464
+ "update_helix_file": "update Helix file",
465
+ "read_helix_file": "read Helix file",
466
+ "list_helix_tree": "list Helix tree",
467
+ "fetch_work_item": "fetch WI",
468
+ "create_and_link_test_cases": "create & link test cases",
469
+ "create_deduped_test_cases": "create test cases (deduped)",
470
+ "get_linked_test_cases": "get linked test cases",
471
+ "llm-agent-loop": "LLM agent loop",
472
+ "ai-vision-anthropic": "AI Vision (Anthropic)",
473
+ "ai-vision-copilot": "AI Vision (Copilot)",
474
+ }
475
+
476
+
477
+ def _step_label(record: dict) -> str:
478
+ server = record.get("server", "?")
479
+ tool = record.get("tool", "?")
480
+ sname = _SERVER_FRIENDLY.get(server, server)
481
+ tname = _TOOL_FRIENDLY.get(tool, tool)
482
+ if server == "agent-runner":
483
+ return f"Orchestrator ({tname})"
484
+ return f"{sname} ({tname})"
485
+
486
+
487
+ def _model_display(model_id: str) -> str:
488
+ """Return a short display name for a model ID."""
489
+ p = get_pricing(model_id)
490
+ return p.display_name if p else model_id
491
+
492
+
493
+ def _fmt_tok(n: int, exact: bool) -> str:
494
+ pfx = "" if exact else "~"
495
+ if n >= 1_000_000:
496
+ return f"{pfx}{n/1_000_000:.1f}M"
497
+ if n >= 1000:
498
+ return f"{pfx}{n/1000:.1f}K"
499
+ return f"{pfx}{n}"
500
+
501
+
502
+ # ── Unified pipeline summary (called by agent_runner after loop) ───────────
503
+
504
+ def print_pipeline_summary(
505
+ session_id: str,
506
+ work_item_id: str = "",
507
+ elapsed_s: float = 0.0,
508
+ model: str = "",
509
+ artefacts: list[dict] | None = None,
510
+ ) -> None:
511
+ """
512
+ Print the two-section final report:
513
+ 1. Artefact Summary — what was produced and where it lives
514
+ 2. Token and Cost Report — per-step token counts and USD cost
515
+ """
516
+ if not _TRACKING_ENABLED:
517
+ return
518
+
519
+ log_path = _LOG_DIR / f"cost-{session_id}.jsonl"
520
+ records: list[dict] = []
521
+ try:
522
+ with log_path.open(encoding="utf-8") as f:
523
+ for line in f:
524
+ line = line.strip()
525
+ if line:
526
+ try:
527
+ records.append(json.loads(line))
528
+ except json.JSONDecodeError:
529
+ pass
530
+ except OSError:
531
+ return
532
+ if not records:
533
+ return
534
+
535
+ c = _C
536
+ W = 108
537
+ m = model or _MODEL_ID
538
+ wi = f"WI {work_item_id} · " if work_item_id else ""
539
+ dur = f" · {elapsed_s:.1f}s" if elapsed_s else ""
540
+
541
+ print(f"\n{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
542
+ print(
543
+ f"{c['bold']} stlc-agents · Pipeline Report · {wi}{m}{dur}{c['reset']}",
544
+ file=sys.stderr,
545
+ )
546
+ print(f"{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
547
+
548
+ # ── Section 1: Artefact Summary ──────────────────────────────────────────
549
+ if artefacts:
550
+ print(f"\n{c['bold']} 1. Artefact Summary{c['reset']}", file=sys.stderr)
551
+ C1, C2, C3, C4 = 3, 26, 22, 22
552
+ hdr = (
553
+ f" {'#':<{C1}} {'Artefact':<{C2}} {'Type':<{C3}}"
554
+ f" {'Location / Status':<{C4}} Detail"
555
+ )
556
+ print(f"\n{hdr}", file=sys.stderr)
557
+ print(f" {'─'*(W-2)}", file=sys.stderr)
558
+ for i, art in enumerate(artefacts, 1):
559
+ row = (
560
+ f" {i:<{C1}} {art.get('name',''):<{C2}} "
561
+ f"{art.get('type',''):<{C3}} "
562
+ f"{art.get('location',''):<{C4}} "
563
+ f"{c['dim']}{art.get('detail','')}{c['reset']}"
564
+ )
565
+ print(row, file=sys.stderr)
566
+
567
+ # ── Section 2: Token and Cost Report ─────────────────────────────────────
568
+ print(f"\n{c['bold']} 2. Token and Cost Report{c['reset']}", file=sys.stderr)
569
+ S1, S2, S3, S4, S5 = 3, 46, 20, 12, 12
570
+ hdr2 = (
571
+ f"\n {'#':<{S1}} {'Agent / Step':<{S2}} {'Model':<{S3}}"
572
+ f" {'Input':>{S4}} {'Output':>{S5}} {'Cost USD':>12}"
573
+ )
574
+ print(hdr2, file=sys.stderr)
575
+ print(f" {'─'*(W-2)}", file=sys.stderr)
576
+
577
+ total_in = total_out = 0
578
+ total_cost = 0.0
579
+
580
+ for i, r in enumerate(records, 1):
581
+ exact = r.get("token_method") == "exact"
582
+ inp = r.get("input_tokens", 0)
583
+ out = r.get("output_tokens", 0)
584
+ cost = r.get("cost_usd", 0.0)
585
+ mdl = _model_display(r.get("model", m))
586
+ label = _step_label(r)
587
+ pfx = "" if exact else "~"
588
+ in_s = _fmt_tok(inp, exact)
589
+ out_s = _fmt_tok(out, exact)
590
+ cost_s = f"{pfx}${cost:.6f}"
591
+
592
+ print(
593
+ f" {i:<{S1}} {label:<{S2}} {mdl:<{S3}}"
594
+ f" {in_s:>{S4}} {out_s:>{S5}} "
595
+ f"{c['green']}{cost_s:>12}{c['reset']}",
596
+ file=sys.stderr,
597
+ )
598
+
599
+ # Session total: use only exact agent-runner rows when present.
600
+ # Estimated (MCP) rows measure payload size — those bytes are already part of
601
+ # the LLM's context window, so summing them with the LLM total would double-count.
602
+ runner_indices = [i for i, r in enumerate(records) if r.get("server") == "agent-runner"]
603
+ total_rows = [records[i] for i in runner_indices] if runner_indices else records
604
+
605
+ for r in total_rows:
606
+ total_in += r.get("input_tokens", 0)
607
+ total_out += r.get("output_tokens", 0)
608
+ total_cost += r.get("cost_usd", 0.0)
609
+
610
+ all_exact = all(r.get("token_method") == "exact" for r in total_rows)
611
+ in_tot = _fmt_tok(total_in, all_exact)
612
+ out_tot = _fmt_tok(total_out, all_exact)
613
+ pfx_tot = "" if all_exact else "~"
614
+ cost_tot = f"{pfx_tot}${total_cost:.6f}"
615
+
616
+ if runner_indices and len(runner_indices) < len(records):
617
+ row_nums = [i + 1 for i in runner_indices]
618
+ est_count = len(records) - len(runner_indices)
619
+ row_label = f"row {row_nums[0]}" if len(row_nums) == 1 else f"rows {','.join(map(str, row_nums))}"
620
+ tot_label = f"Session Total ({row_label}; {est_count} MCP rows in LLM ctx)"
621
+ else:
622
+ tot_label = "Session Total"
623
+
624
+ p = get_pricing(m)
625
+ rate_note = (
626
+ f" Model: {m} — rates applied: "
627
+ f"${p.input_per_mtok:.2f}/M input, ${p.output_per_mtok:.2f}/M output."
628
+ if p else f" Model: {m}"
629
+ )
630
+ if runner_indices and len(runner_indices) < len(records):
631
+ rate_note += (
632
+ "\n Session total = exact LLM API cost only."
633
+ " Estimated MCP rows are payload-size heuristics already included in the LLM context."
634
+ )
635
+ elif not all_exact:
636
+ rate_note += (
637
+ "\n Estimates (~) based on payload size; "
638
+ "exact rows come directly from the LLM API response."
639
+ )
640
+
641
+ print(f" {'─'*(W-2)}", file=sys.stderr)
642
+ print(
643
+ f" {'':>{S1}} {c['bold']}{tot_label:<{S2}}{c['reset']} "
644
+ f"{'':>{S3}} {c['bold']}{in_tot:>{S4}} {out_tot:>{S5}} "
645
+ f"{c['green']}{cost_tot:>12}{c['reset']}",
646
+ file=sys.stderr,
647
+ )
648
+ print(f"{c['dim']}\n{rate_note}{c['reset']}", file=sys.stderr)
649
+ print(f"{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
650
+
651
+
652
+ # ── Session summary on exit (MCP server / Claude Code path) ───────────────
325
653
 
326
654
  def _print_summary() -> None:
655
+ # Suppressed when agent_runner will print the unified pipeline summary
656
+ if os.environ.get("STLC_COST_SUMMARY", "").lower() == "suppress":
657
+ return
327
658
  if not _TRACKING_ENABLED or _session is None or not _session.records:
328
659
  return
329
660
 
@@ -332,15 +663,6 @@ def _print_summary() -> None:
332
663
  elapsed = time.time() - sess.started_at
333
664
  c = _C
334
665
 
335
- by_server: dict[str, dict] = {}
336
- for r in records:
337
- k = r.get("server", "unknown")
338
- if k not in by_server:
339
- by_server[k] = {"calls": 0, "tokens": 0, "cost_usd": 0.0}
340
- by_server[k]["calls"] += 1
341
- by_server[k]["tokens"] += r.get("estimated_tokens", 0)
342
- by_server[k]["cost_usd"] += r.get("cost_usd", 0.0)
343
-
344
666
  total_cost = sum(r.get("cost_usd", 0.0) for r in records)
345
667
  total_tokens = sum(r.get("estimated_tokens", 0) for r in records)
346
668
 
@@ -349,47 +671,33 @@ def _print_summary() -> None:
349
671
  print(f"{c['bold']} stlc-agents · Cost Summary · {sess.id}{c['reset']}", file=sys.stderr)
350
672
  print(f"{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
351
673
 
352
- # Per-server
353
- print(f"\n {'Server':<30} {'Calls':>6} {'~Tokens':>10} {'Cost (USD)':>14}", file=sys.stderr)
354
- print(f" {'─'*60}", file=sys.stderr)
355
- for svr, d in sorted(by_server.items()):
356
- tok = f"{d['tokens']/1000:.1f}K" if d['tokens'] >= 1000 else str(d['tokens'])
357
- print(
358
- f" {svr:<30} {d['calls']:>6} {tok:>10} "
359
- f"{c['green']}${d['cost_usd']:.6f}{c['reset']:>14}",
360
- file=sys.stderr,
361
- )
362
-
363
- # Per-step
364
- print(f"\n {'Step':<26} {'Tool':<36} {'~Tok':>6} {'Cost':>10} {'ms':>6}", file=sys.stderr)
674
+ print(f"\n {'Agent':<28} {'Artifact':<34} {'~Tokens':>8} {'Cost':>10} {'ms':>6}", file=sys.stderr)
365
675
  print(f" {'─'*W}", file=sys.stderr)
366
676
  for r in records:
367
- tok = f"{r.get('estimated_tokens',0)/1000:.1f}K" if r.get('estimated_tokens',0) >= 1000 else str(r.get('estimated_tokens',0))
677
+ raw = r.get("estimated_tokens", 0)
678
+ tok = f"{raw/1000:.1f}K" if raw >= 1000 else str(raw)
679
+ pfx = "" if r.get("token_method") == "exact" else "~"
680
+ art = _TOOL_ARTIFACT.get(r.get("tool", ""), r.get("tool", "?"))
368
681
  print(
369
- f" {r.get('server','?'):<26} {r.get('tool','?'):<36} "
370
- f"{tok:>6} ${r.get('cost_usd',0):.6f} {r.get('latency_ms',0):>6}",
682
+ f" {r.get('server','?'):<28} {art:<34} "
683
+ f"{pfx}{tok:>8} ${r.get('cost_usd',0):.6f} {r.get('latency_ms',0):>6}",
371
684
  file=sys.stderr,
372
685
  )
373
686
 
374
- # Totals
375
687
  tok_total = f"{total_tokens/1000:.1f}K" if total_tokens >= 1000 else str(total_tokens)
376
- print(f"\n {'─'*W}", file=sys.stderr)
377
- print(f" {'Total tokens':<40} {tok_total:>10}", file=sys.stderr)
378
- print(f" {c['bold']}{'Total cost':<40} {c['green']}${total_cost:.6f}{c['reset']}", file=sys.stderr)
688
+ print(f" {'─'*W}", file=sys.stderr)
689
+ print(
690
+ f" {c['bold']}{'TOTAL':<28} {'':<34} {tok_total:>9} "
691
+ f"{c['green']}${total_cost:.6f}{c['reset']} {elapsed:.1f}s",
692
+ file=sys.stderr,
693
+ )
379
694
 
380
- # Model info
381
- model_str = f"{_MODEL_ID}"
695
+ model_str = _MODEL_ID
382
696
  if _PRICING:
383
697
  model_str += f" (${_PRICING.input_per_mtok}/${_PRICING.output_per_mtok} per MTok in/out)"
384
- print(f" {c['dim']}Model: {model_str}{c['reset']}", file=sys.stderr)
385
- print(f" {c['dim']}Model detected via: {_model_source()}{c['reset']}", file=sys.stderr)
386
- print(f" {c['dim']}Token method: estimated from payload size (chars÷4){c['reset']}", file=sys.stderr)
387
- print(f" {c['dim']}Duration: {elapsed:.1f}s · Log: {sess.log_path}{c['reset']}", file=sys.stderr)
388
- print(f"\n {c['dim']}To set model explicitly:{c['reset']}", file=sys.stderr)
389
- print(f" {c['dim']} qa-stlc cost --set-model claude-opus-4-6{c['reset']}", file=sys.stderr)
390
- print(f" {c['dim']} or add to .mcp.json env: STLC_CODING_AGENT_MODEL=claude-opus-4-6{c['reset']}", file=sys.stderr)
391
- print(f" {c['dim']} or add to .env: STLC_CODING_AGENT_MODEL=claude-opus-4-6{c['reset']}", file=sys.stderr)
392
- print(f"\n{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
698
+ print(f"\n {c['dim']}Model: {model_str}{c['reset']}", file=sys.stderr)
699
+ print(f" {c['dim']}Log: {sess.log_path}{c['reset']}", file=sys.stderr)
700
+ print(f"{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
393
701
 
394
702
 
395
703
  atexit.register(_print_summary)