@qa-gentic/stlc-agents 1.0.25 → 1.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/generate-test-cases/SKILL.md +5 -0
- package/src/cli/cmd-cost.js +61 -30
- package/src/cli/cmd-init.js +88 -8
- package/src/stlc_agents/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_gherkin_generator/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_gherkin_generator/__pycache__/server.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_gherkin_generator/tools/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_gherkin_generator/tools/__pycache__/ado_gherkin.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_helix_writer/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_helix_writer/__pycache__/server.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_helix_writer/server.py +41 -6
- package/src/stlc_agents/agent_helix_writer/tools/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_helix_writer/tools/__pycache__/boilerplate.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_helix_writer/tools/__pycache__/helix_write.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_playwright_generator/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_playwright_generator/__pycache__/server.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_playwright_generator/server.py +419 -213
- package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/ado_attach.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_test_case_manager/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_test_case_manager/__pycache__/server.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_test_case_manager/server.py +12 -0
- package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/ado_workitem.cpython-314.pyc +0 -0
- package/src/stlc_agents/agent_test_case_manager/tools/ado_workitem.py +65 -1
- package/src/stlc_agents/shared/__pycache__/__init__.cpython-314.pyc +0 -0
- package/src/stlc_agents/shared/__pycache__/auth.cpython-314.pyc +0 -0
- package/src/stlc_agents/shared/__pycache__/cost_tracker.cpython-314.pyc +0 -0
- package/src/stlc_agents/shared/__pycache__/pricing.cpython-314.pyc +0 -0
- package/src/stlc_agents/shared/cost_tracker.py +378 -70
- package/src/stlc_agents/shared/pricing.py +115 -24
- package/src/stlc_agents/webhook_orchestrator/__init__.py +0 -0
- package/src/stlc_agents/webhook_orchestrator/agent_runner.py +599 -0
- package/src/stlc_agents/webhook_orchestrator/main.py +43 -0
- package/src/stlc_agents/webhook_orchestrator/models.py +63 -0
- package/src/stlc_agents/webhook_orchestrator/orchestrator.py +103 -0
- package/src/stlc_agents/webhook_orchestrator/pipelines/__init__.py +0 -0
- package/src/stlc_agents/webhook_orchestrator/pipelines/_base.py +57 -0
- package/src/stlc_agents/webhook_orchestrator/pipelines/ado_test_cases.py +55 -0
- package/src/stlc_agents/webhook_orchestrator/pipelines/full_pipeline.py +202 -0
- package/src/stlc_agents/webhook_orchestrator/pipelines/gherkin_playwright.py +156 -0
- package/src/stlc_agents/webhook_orchestrator/pipelines/jira_test_cases.py +48 -0
- package/src/stlc_agents/webhook_orchestrator/webhook_bridge.py +368 -0
- package/src/stlc_agents/agent_gherkin_generator/__pycache__/server.cpython-310.pyc +0 -0
- package/src/stlc_agents/agent_helix_writer/__pycache__/server.cpython-310.pyc +0 -0
- package/src/stlc_agents/agent_jira_manager/__pycache__/server.cpython-310.pyc +0 -0
- package/src/stlc_agents/agent_test_case_manager/__pycache__/server.cpython-310.pyc +0 -0
- package/src/stlc_agents/shared/__pycache__/cost_tracker.cpython-310.pyc +0 -0
- package/src/stlc_agents/shared/__pycache__/pricing.cpython-310.pyc +0 -0
|
Binary file
|
package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/ado_attach.cpython-314.pyc
ADDED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -30,6 +30,7 @@ from stlc_agents.agent_test_case_manager.tools.ado_workitem import (
|
|
|
30
30
|
create_test_case as _create_test_case,
|
|
31
31
|
link_test_cases_to_work_item as _link_test_cases,
|
|
32
32
|
get_linked_test_cases as _get_linked_test_cases,
|
|
33
|
+
add_tag_to_work_item as _add_tag,
|
|
33
34
|
)
|
|
34
35
|
from stlc_agents.shared.cost_tracker import track
|
|
35
36
|
|
|
@@ -483,6 +484,16 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
|
|
|
483
484
|
except Exception as e:
|
|
484
485
|
link_result = {"error": str(e)}
|
|
485
486
|
|
|
487
|
+
# Add tag to the parent work item after linking
|
|
488
|
+
tag_result = {}
|
|
489
|
+
if created:
|
|
490
|
+
try:
|
|
491
|
+
tag_result = await asyncio.to_thread(
|
|
492
|
+
_add_tag, org, project, wi_id, "STLCAgentTestCases"
|
|
493
|
+
)
|
|
494
|
+
except Exception as e:
|
|
495
|
+
tag_result = {"error": str(e)}
|
|
496
|
+
|
|
486
497
|
result = {
|
|
487
498
|
"summary": {
|
|
488
499
|
"requested": len(test_cases),
|
|
@@ -493,6 +504,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
|
|
|
493
504
|
"created_test_cases": created,
|
|
494
505
|
"failed": failed,
|
|
495
506
|
"link_result": link_result,
|
|
507
|
+
"tag_result": tag_result,
|
|
496
508
|
"_validation": {
|
|
497
509
|
"valid": len(failed) == 0 and bool(link_result.get("success", True)),
|
|
498
510
|
"input_validation": input_validation,
|
|
Binary file
|
package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/ado_workitem.cpython-314.pyc
ADDED
|
Binary file
|
|
@@ -8,6 +8,8 @@ Public API:
|
|
|
8
8
|
create_test_case(org_url, project, title, steps, ...) -> dict
|
|
9
9
|
link_test_cases_to_work_item(org_url, project, wi_id, tc_ids) -> dict
|
|
10
10
|
get_linked_test_cases(org_url, project, work_item_id) -> dict
|
|
11
|
+
add_comment_to_work_item(org_url, project, work_item_id, text) -> dict
|
|
12
|
+
add_tag_to_work_item(org_url, project, work_item_id, tag) -> dict
|
|
11
13
|
"""
|
|
12
14
|
from __future__ import annotations
|
|
13
15
|
|
|
@@ -175,8 +177,13 @@ def link_test_cases_to_work_item(
|
|
|
175
177
|
project: str,
|
|
176
178
|
work_item_id: int,
|
|
177
179
|
test_case_ids: List[int],
|
|
180
|
+
link_comment: str = "STLC-Agent generated test case",
|
|
178
181
|
) -> dict:
|
|
179
|
-
"""Create TestedBy-Forward links from a work item to test cases.
|
|
182
|
+
"""Create TestedBy-Forward links from a work item to test cases.
|
|
183
|
+
|
|
184
|
+
link_comment is stored as attributes.comment on each relation and appears
|
|
185
|
+
in the Links tab Comments column in Azure DevOps.
|
|
186
|
+
"""
|
|
180
187
|
org_url = org_url.rstrip("/")
|
|
181
188
|
headers = get_auth_headers("application/json-patch+json")
|
|
182
189
|
|
|
@@ -187,6 +194,7 @@ def link_test_cases_to_work_item(
|
|
|
187
194
|
"value": {
|
|
188
195
|
"rel": "Microsoft.VSTS.Common.TestedBy-Forward",
|
|
189
196
|
"url": f"{org_url}/{project}/_apis/wit/workItems/{tc_id}",
|
|
197
|
+
"attributes": {"comment": link_comment},
|
|
190
198
|
},
|
|
191
199
|
}
|
|
192
200
|
for tc_id in test_case_ids
|
|
@@ -249,6 +257,62 @@ def get_linked_test_cases(org_url: str, project: str, work_item_id: int) -> dict
|
|
|
249
257
|
return {"work_item_id": work_item_id, "linked_test_cases": linked, "count": len(linked)}
|
|
250
258
|
|
|
251
259
|
|
|
260
|
+
# ---------------------------------------------------------------------------
|
|
261
|
+
# add_comment_to_work_item
|
|
262
|
+
# ---------------------------------------------------------------------------
|
|
263
|
+
|
|
264
|
+
def add_comment_to_work_item(org_url: str, project: str, work_item_id: int, text: str) -> dict:
|
|
265
|
+
"""Add a comment to a work item via the ADO comments API."""
|
|
266
|
+
org_url = org_url.rstrip("/")
|
|
267
|
+
headers = get_auth_headers()
|
|
268
|
+
|
|
269
|
+
resp = requests.post(
|
|
270
|
+
f"{org_url}/{project}/_apis/wit/workitems/{work_item_id}/comments",
|
|
271
|
+
headers=headers,
|
|
272
|
+
params={"api-version": "7.1-preview.3"},
|
|
273
|
+
json={"text": text},
|
|
274
|
+
timeout=30,
|
|
275
|
+
)
|
|
276
|
+
resp.raise_for_status()
|
|
277
|
+
return {"success": True, "comment_id": resp.json().get("id")}
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# ---------------------------------------------------------------------------
|
|
281
|
+
# add_tag_to_work_item
|
|
282
|
+
# ---------------------------------------------------------------------------
|
|
283
|
+
|
|
284
|
+
def add_tag_to_work_item(org_url: str, project: str, work_item_id: int, tag: str) -> dict:
|
|
285
|
+
"""Append a tag to a work item's System.Tags field (no-op if already present)."""
|
|
286
|
+
org_url = org_url.rstrip("/")
|
|
287
|
+
|
|
288
|
+
fetch_resp = requests.get(
|
|
289
|
+
f"{org_url}/{project}/_apis/wit/workitems/{work_item_id}",
|
|
290
|
+
headers=get_auth_headers(),
|
|
291
|
+
params={"api-version": _API},
|
|
292
|
+
timeout=30,
|
|
293
|
+
)
|
|
294
|
+
fetch_resp.raise_for_status()
|
|
295
|
+
existing_str = fetch_resp.json().get("fields", {}).get("System.Tags", "") or ""
|
|
296
|
+
existing = [t.strip() for t in existing_str.split(";") if t.strip()]
|
|
297
|
+
|
|
298
|
+
if tag in existing:
|
|
299
|
+
return {"success": True, "tag": tag, "already_present": True}
|
|
300
|
+
|
|
301
|
+
existing.append(tag)
|
|
302
|
+
new_tags_str = "; ".join(existing)
|
|
303
|
+
|
|
304
|
+
patch = [{"op": "add", "path": "/fields/System.Tags", "value": new_tags_str}]
|
|
305
|
+
patch_resp = requests.patch(
|
|
306
|
+
f"{org_url}/{project}/_apis/wit/workitems/{work_item_id}",
|
|
307
|
+
headers=get_auth_headers("application/json-patch+json"),
|
|
308
|
+
params={"api-version": _API},
|
|
309
|
+
json=patch,
|
|
310
|
+
timeout=30,
|
|
311
|
+
)
|
|
312
|
+
patch_resp.raise_for_status()
|
|
313
|
+
return {"success": True, "tag": tag, "tags": new_tags_str}
|
|
314
|
+
|
|
315
|
+
|
|
252
316
|
# ---------------------------------------------------------------------------
|
|
253
317
|
# Helpers
|
|
254
318
|
# ---------------------------------------------------------------------------
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,15 +1,21 @@
|
|
|
1
1
|
"""
|
|
2
2
|
cost_tracker.py — stlc_agents.shared.cost_tracker
|
|
3
3
|
─────────────────────────────────────────────────────
|
|
4
|
-
|
|
4
|
+
Two tracking modes:
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
response
|
|
10
|
-
it is running on via a known environment variable that the MCP config
|
|
11
|
-
(`.mcp.json` / `.vscode/mcp.json`) passes through into the subprocess:
|
|
6
|
+
track() — MCP server tool calls (coding-agent-driven flow).
|
|
7
|
+
Token counts are ESTIMATED from payload size because
|
|
8
|
+
the MCP server subprocess never sees the coding agent's
|
|
9
|
+
API response.
|
|
12
10
|
|
|
11
|
+
track_llm_call() — Webhook orchestrator LLM calls (agent_runner.py).
|
|
12
|
+
Token counts are EXACT: taken directly from the LLM
|
|
13
|
+
API response's usage block, matching how promptfoo
|
|
14
|
+
tracks costs (input + output + cache per iteration,
|
|
15
|
+
accumulated across all iterations in the agent loop).
|
|
16
|
+
|
|
17
|
+
MODEL AUTO-DETECTION (for track() only)
|
|
18
|
+
─────────────────────────────────────────
|
|
13
19
|
Agent Env var set automatically Value example
|
|
14
20
|
───────────────── ────────────────────────────── ──────────────────────────
|
|
15
21
|
Claude Code ANTHROPIC_MODEL claude-sonnet-4-6
|
|
@@ -26,28 +32,11 @@ Detection order (first match wins):
|
|
|
26
32
|
5. ~/.qa-stlc/agent-model — saved preference from `qa-stlc cost --set-model`
|
|
27
33
|
6. "claude-sonnet-4-6" — safe default (most common)
|
|
28
34
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
Because the server never sees the LLM's token usage, tokens are estimated
|
|
32
|
-
from the ADO/Jira JSON payload size the server returns:
|
|
33
|
-
estimated_tokens = len(json_response_text) / 4 (chars-per-token heuristic)
|
|
34
|
-
input_tokens = estimated_tokens * 0.70 (coding agent reading the result)
|
|
35
|
-
output_tokens = estimated_tokens * 0.30 (coding agent writing the artifact)
|
|
36
|
-
|
|
37
|
-
This is conservative and consistent with how promptfoo's HTTP provider
|
|
38
|
-
estimates tokens when the API doesn't return a usage block.
|
|
39
|
-
|
|
40
|
-
WHAT GETS LOGGED (per tool call)
|
|
41
|
-
──────────────────────────────────
|
|
42
|
-
_cost block injected into every tool response JSON — the coding agent
|
|
43
|
-
sees it inline alongside the tool result.
|
|
44
|
-
|
|
35
|
+
WHAT GETS LOGGED (per tool call / LLM call)
|
|
36
|
+
─────────────────────────────────────────────
|
|
45
37
|
~/.qa-stlc/cost-<session>.jsonl — machine-readable session log.
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
Output > MCP, Cursor's tool output panel, etc.
|
|
49
|
-
|
|
50
|
-
atexit summary — printed when the MCP server process exits.
|
|
38
|
+
stderr live line — visible in Claude Code MCP log, VS Code Output, etc.
|
|
39
|
+
atexit summary — printed when the process exits.
|
|
51
40
|
"""
|
|
52
41
|
|
|
53
42
|
from __future__ import annotations
|
|
@@ -243,6 +232,94 @@ def track(
|
|
|
243
232
|
)]
|
|
244
233
|
|
|
245
234
|
|
|
235
|
+
def track_llm_call(
|
|
236
|
+
*,
|
|
237
|
+
model: str,
|
|
238
|
+
provider: str,
|
|
239
|
+
input_tokens: int,
|
|
240
|
+
output_tokens: int,
|
|
241
|
+
cache_write_tokens: int = 0,
|
|
242
|
+
cache_read_tokens: int = 0,
|
|
243
|
+
tool: str = "llm-agent-loop",
|
|
244
|
+
server: str = "agent-runner",
|
|
245
|
+
work_item_id: str = "",
|
|
246
|
+
iterations: int = 1,
|
|
247
|
+
latency_ms: int = 0,
|
|
248
|
+
) -> float:
|
|
249
|
+
"""
|
|
250
|
+
Record exact LLM token usage from an API response (webhook / agent_runner path).
|
|
251
|
+
|
|
252
|
+
Token counts come directly from the LLM API response.usage block —
|
|
253
|
+
same approach as promptfoo: capture per-iteration, accumulate across all
|
|
254
|
+
iterations, compute cost once at the end.
|
|
255
|
+
|
|
256
|
+
Anthropic fields:
|
|
257
|
+
input_tokens, output_tokens,
|
|
258
|
+
cache_creation_input_tokens → cache_write_tokens,
|
|
259
|
+
cache_read_input_tokens → cache_read_tokens
|
|
260
|
+
|
|
261
|
+
OpenAI fields:
|
|
262
|
+
prompt_tokens → input_tokens,
|
|
263
|
+
completion_tokens → output_tokens,
|
|
264
|
+
prompt_tokens_details.cached_tokens → cache_read_tokens
|
|
265
|
+
|
|
266
|
+
Returns the cost in USD.
|
|
267
|
+
"""
|
|
268
|
+
if not _TRACKING_ENABLED:
|
|
269
|
+
return 0.0
|
|
270
|
+
|
|
271
|
+
pricing = get_pricing(model)
|
|
272
|
+
cost = (
|
|
273
|
+
pricing.cost(
|
|
274
|
+
input_tokens=input_tokens,
|
|
275
|
+
output_tokens=output_tokens,
|
|
276
|
+
cache_write_tokens=cache_write_tokens,
|
|
277
|
+
cache_read_tokens=cache_read_tokens,
|
|
278
|
+
)
|
|
279
|
+
if pricing else 0.0
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
sess = _get_session()
|
|
283
|
+
running = sess.running_total() + cost
|
|
284
|
+
total = input_tokens + output_tokens
|
|
285
|
+
|
|
286
|
+
cache_note = ""
|
|
287
|
+
if cache_write_tokens or cache_read_tokens:
|
|
288
|
+
cache_note = f" cache_write={cache_write_tokens} cache_read={cache_read_tokens}"
|
|
289
|
+
|
|
290
|
+
record = {
|
|
291
|
+
"tool": tool,
|
|
292
|
+
"server": server,
|
|
293
|
+
"session_id": sess.id,
|
|
294
|
+
"model": model,
|
|
295
|
+
"model_source": f"{provider}-api-response",
|
|
296
|
+
"input_tokens": input_tokens,
|
|
297
|
+
"output_tokens": output_tokens,
|
|
298
|
+
"cache_write_tokens": cache_write_tokens,
|
|
299
|
+
"cache_read_tokens": cache_read_tokens,
|
|
300
|
+
"estimated_tokens": total,
|
|
301
|
+
"cost_usd": round(cost, 8),
|
|
302
|
+
"latency_ms": latency_ms,
|
|
303
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
304
|
+
"session_total_usd": round(running, 8),
|
|
305
|
+
"token_method": "exact",
|
|
306
|
+
"token_note": (
|
|
307
|
+
f"Exact counts from {provider} API response. "
|
|
308
|
+
f"Iterations: {iterations}.{cache_note}"
|
|
309
|
+
),
|
|
310
|
+
"work_item_id": work_item_id,
|
|
311
|
+
"iterations": iterations,
|
|
312
|
+
}
|
|
313
|
+
sess.add(record)
|
|
314
|
+
_print_live(server, tool, total, cost, latency_ms, running, exact=True)
|
|
315
|
+
return cost
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def get_session_id() -> str:
|
|
319
|
+
"""Return the current session ID (used to pass STLC_SESSION_ID to subprocesses)."""
|
|
320
|
+
return _SESSION_ID
|
|
321
|
+
|
|
322
|
+
|
|
246
323
|
def track_healing(payload: dict) -> None:
|
|
247
324
|
"""
|
|
248
325
|
Record an AI Vision healing call from LocatorHealer.ts.
|
|
@@ -307,23 +384,277 @@ def _model_source() -> str:
|
|
|
307
384
|
def _print_live(
|
|
308
385
|
server: str, tool: str, tokens: int,
|
|
309
386
|
cost: float, latency_ms: int, running: float,
|
|
387
|
+
*, exact: bool = False,
|
|
310
388
|
) -> None:
|
|
311
389
|
c = _C
|
|
390
|
+
prefix = "" if exact else "~"
|
|
312
391
|
tok_str = f"{tokens/1000:.1f}K" if tokens >= 1000 else str(tokens)
|
|
313
392
|
cost_str = f"${cost:.6f}"
|
|
314
393
|
total = f"${running:.6f}"
|
|
394
|
+
method = "" if exact else f"{c['dim']} [est]{c['reset']}"
|
|
315
395
|
print(
|
|
316
396
|
f"{c['dim']}[stlc-cost]{c['reset']} "
|
|
317
397
|
f"{c['cyan']}{server}{c['reset']}{c['dim']} · {c['reset']}{tool}"
|
|
318
|
-
f"
|
|
398
|
+
f" {prefix}{tok_str} tokens {c['green']}{cost_str}{c['reset']}{method}"
|
|
319
399
|
f" {c['dim']}(session: {total} {latency_ms}ms){c['reset']}",
|
|
320
400
|
file=sys.stderr, flush=True,
|
|
321
401
|
)
|
|
322
402
|
|
|
323
403
|
|
|
324
|
-
# ──
|
|
404
|
+
# ── Tool → artifact label ──────────────────────────────────────────────────
|
|
405
|
+
|
|
406
|
+
_TOOL_ARTIFACT: dict[str, str] = {
|
|
407
|
+
"fetch_work_item_for_gherkin": "Work item fetched",
|
|
408
|
+
"fetch_feature_hierarchy": "Feature hierarchy fetched",
|
|
409
|
+
"generate_and_attach_gherkin": "Gherkin attached to work item",
|
|
410
|
+
"attach_gherkin_to_feature": "Gherkin attached to feature",
|
|
411
|
+
"attach_gherkin_to_work_item": "Gherkin attached to work item",
|
|
412
|
+
"validate_gherkin_content": "Gherkin validated",
|
|
413
|
+
"capture_app_context": "App context captured",
|
|
414
|
+
"generate_playwright_code": "Playwright code generated",
|
|
415
|
+
"scaffold_locator_repository": "Locator repository scaffolded",
|
|
416
|
+
"attach_code_to_work_item": "Code attached to work item",
|
|
417
|
+
"validate_gherkin_steps": "Gherkin steps validated",
|
|
418
|
+
"pre_validate_cucumber_steps": "Cucumber steps pre-validated",
|
|
419
|
+
"get_generated_files": "Generated files retrieved",
|
|
420
|
+
"inspect_helix_project": "Helix project inspected",
|
|
421
|
+
"write_helix_files": "Files written to Helix",
|
|
422
|
+
"update_helix_file": "Helix file updated",
|
|
423
|
+
"read_helix_file": "Helix file read",
|
|
424
|
+
"list_helix_tree": "Helix tree listed",
|
|
425
|
+
"fetch_work_item": "Work item fetched",
|
|
426
|
+
"create_and_link_test_cases": "Test cases created & linked",
|
|
427
|
+
"create_deduped_test_cases": "Test cases created (deduped)",
|
|
428
|
+
"get_linked_test_cases": "Linked test cases retrieved",
|
|
429
|
+
"llm-agent-loop": "LLM orchestration",
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
_SERVER_ORDER = [
|
|
433
|
+
"qa-test-case-manager", "qa-jira-manager",
|
|
434
|
+
"qa-gherkin-generator", "qa-playwright-generator", "qa-helix-writer",
|
|
435
|
+
"agent-runner",
|
|
436
|
+
]
|
|
437
|
+
|
|
438
|
+
_SERVER_FRIENDLY: dict[str, str] = {
|
|
439
|
+
"qa-test-case-manager": "QA Test Case Manager",
|
|
440
|
+
"qa-jira-manager": "QA Jira Manager",
|
|
441
|
+
"qa-gherkin-generator": "QA Gherkin Generator",
|
|
442
|
+
"qa-playwright-generator": "QA Playwright Generator",
|
|
443
|
+
"qa-helix-writer": "QA Helix Writer",
|
|
444
|
+
"agent-runner": "Orchestrator",
|
|
445
|
+
"locator-healer": "Locator Healer",
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
_TOOL_FRIENDLY: dict[str, str] = {
|
|
449
|
+
"fetch_work_item_for_gherkin": "fetch WI",
|
|
450
|
+
"fetch_feature_hierarchy": "fetch feature hierarchy",
|
|
451
|
+
"generate_and_attach_gherkin": "generate & attach Gherkin",
|
|
452
|
+
"attach_gherkin_to_feature": "attach Gherkin to feature",
|
|
453
|
+
"attach_gherkin_to_work_item": "attach Gherkin to WI",
|
|
454
|
+
"validate_gherkin_content": "validate Gherkin",
|
|
455
|
+
"capture_app_context": "capture app context",
|
|
456
|
+
"generate_playwright_code": "generate Playwright code",
|
|
457
|
+
"scaffold_locator_repository": "scaffold locators",
|
|
458
|
+
"attach_code_to_work_item": "attach code to WI",
|
|
459
|
+
"validate_gherkin_steps": "validate Gherkin steps",
|
|
460
|
+
"pre_validate_cucumber_steps": "pre-validate Cucumber steps",
|
|
461
|
+
"get_generated_files": "retrieve generated files",
|
|
462
|
+
"inspect_helix_project": "inspect Helix project",
|
|
463
|
+
"write_helix_files": "write files to Helix",
|
|
464
|
+
"update_helix_file": "update Helix file",
|
|
465
|
+
"read_helix_file": "read Helix file",
|
|
466
|
+
"list_helix_tree": "list Helix tree",
|
|
467
|
+
"fetch_work_item": "fetch WI",
|
|
468
|
+
"create_and_link_test_cases": "create & link test cases",
|
|
469
|
+
"create_deduped_test_cases": "create test cases (deduped)",
|
|
470
|
+
"get_linked_test_cases": "get linked test cases",
|
|
471
|
+
"llm-agent-loop": "LLM agent loop",
|
|
472
|
+
"ai-vision-anthropic": "AI Vision (Anthropic)",
|
|
473
|
+
"ai-vision-copilot": "AI Vision (Copilot)",
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def _step_label(record: dict) -> str:
|
|
478
|
+
server = record.get("server", "?")
|
|
479
|
+
tool = record.get("tool", "?")
|
|
480
|
+
sname = _SERVER_FRIENDLY.get(server, server)
|
|
481
|
+
tname = _TOOL_FRIENDLY.get(tool, tool)
|
|
482
|
+
if server == "agent-runner":
|
|
483
|
+
return f"Orchestrator ({tname})"
|
|
484
|
+
return f"{sname} ({tname})"
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _model_display(model_id: str) -> str:
|
|
488
|
+
"""Return a short display name for a model ID."""
|
|
489
|
+
p = get_pricing(model_id)
|
|
490
|
+
return p.display_name if p else model_id
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def _fmt_tok(n: int, exact: bool) -> str:
|
|
494
|
+
pfx = "" if exact else "~"
|
|
495
|
+
if n >= 1_000_000:
|
|
496
|
+
return f"{pfx}{n/1_000_000:.1f}M"
|
|
497
|
+
if n >= 1000:
|
|
498
|
+
return f"{pfx}{n/1000:.1f}K"
|
|
499
|
+
return f"{pfx}{n}"
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
# ── Unified pipeline summary (called by agent_runner after loop) ───────────
|
|
503
|
+
|
|
504
|
+
def print_pipeline_summary(
|
|
505
|
+
session_id: str,
|
|
506
|
+
work_item_id: str = "",
|
|
507
|
+
elapsed_s: float = 0.0,
|
|
508
|
+
model: str = "",
|
|
509
|
+
artefacts: list[dict] | None = None,
|
|
510
|
+
) -> None:
|
|
511
|
+
"""
|
|
512
|
+
Print the two-section final report:
|
|
513
|
+
1. Artefact Summary — what was produced and where it lives
|
|
514
|
+
2. Token and Cost Report — per-step token counts and USD cost
|
|
515
|
+
"""
|
|
516
|
+
if not _TRACKING_ENABLED:
|
|
517
|
+
return
|
|
518
|
+
|
|
519
|
+
log_path = _LOG_DIR / f"cost-{session_id}.jsonl"
|
|
520
|
+
records: list[dict] = []
|
|
521
|
+
try:
|
|
522
|
+
with log_path.open(encoding="utf-8") as f:
|
|
523
|
+
for line in f:
|
|
524
|
+
line = line.strip()
|
|
525
|
+
if line:
|
|
526
|
+
try:
|
|
527
|
+
records.append(json.loads(line))
|
|
528
|
+
except json.JSONDecodeError:
|
|
529
|
+
pass
|
|
530
|
+
except OSError:
|
|
531
|
+
return
|
|
532
|
+
if not records:
|
|
533
|
+
return
|
|
534
|
+
|
|
535
|
+
c = _C
|
|
536
|
+
W = 108
|
|
537
|
+
m = model or _MODEL_ID
|
|
538
|
+
wi = f"WI {work_item_id} · " if work_item_id else ""
|
|
539
|
+
dur = f" · {elapsed_s:.1f}s" if elapsed_s else ""
|
|
540
|
+
|
|
541
|
+
print(f"\n{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
|
|
542
|
+
print(
|
|
543
|
+
f"{c['bold']} stlc-agents · Pipeline Report · {wi}{m}{dur}{c['reset']}",
|
|
544
|
+
file=sys.stderr,
|
|
545
|
+
)
|
|
546
|
+
print(f"{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
|
|
547
|
+
|
|
548
|
+
# ── Section 1: Artefact Summary ──────────────────────────────────────────
|
|
549
|
+
if artefacts:
|
|
550
|
+
print(f"\n{c['bold']} 1. Artefact Summary{c['reset']}", file=sys.stderr)
|
|
551
|
+
C1, C2, C3, C4 = 3, 26, 22, 22
|
|
552
|
+
hdr = (
|
|
553
|
+
f" {'#':<{C1}} {'Artefact':<{C2}} {'Type':<{C3}}"
|
|
554
|
+
f" {'Location / Status':<{C4}} Detail"
|
|
555
|
+
)
|
|
556
|
+
print(f"\n{hdr}", file=sys.stderr)
|
|
557
|
+
print(f" {'─'*(W-2)}", file=sys.stderr)
|
|
558
|
+
for i, art in enumerate(artefacts, 1):
|
|
559
|
+
row = (
|
|
560
|
+
f" {i:<{C1}} {art.get('name',''):<{C2}} "
|
|
561
|
+
f"{art.get('type',''):<{C3}} "
|
|
562
|
+
f"{art.get('location',''):<{C4}} "
|
|
563
|
+
f"{c['dim']}{art.get('detail','')}{c['reset']}"
|
|
564
|
+
)
|
|
565
|
+
print(row, file=sys.stderr)
|
|
566
|
+
|
|
567
|
+
# ── Section 2: Token and Cost Report ─────────────────────────────────────
|
|
568
|
+
print(f"\n{c['bold']} 2. Token and Cost Report{c['reset']}", file=sys.stderr)
|
|
569
|
+
S1, S2, S3, S4, S5 = 3, 46, 20, 12, 12
|
|
570
|
+
hdr2 = (
|
|
571
|
+
f"\n {'#':<{S1}} {'Agent / Step':<{S2}} {'Model':<{S3}}"
|
|
572
|
+
f" {'Input':>{S4}} {'Output':>{S5}} {'Cost USD':>12}"
|
|
573
|
+
)
|
|
574
|
+
print(hdr2, file=sys.stderr)
|
|
575
|
+
print(f" {'─'*(W-2)}", file=sys.stderr)
|
|
576
|
+
|
|
577
|
+
total_in = total_out = 0
|
|
578
|
+
total_cost = 0.0
|
|
579
|
+
|
|
580
|
+
for i, r in enumerate(records, 1):
|
|
581
|
+
exact = r.get("token_method") == "exact"
|
|
582
|
+
inp = r.get("input_tokens", 0)
|
|
583
|
+
out = r.get("output_tokens", 0)
|
|
584
|
+
cost = r.get("cost_usd", 0.0)
|
|
585
|
+
mdl = _model_display(r.get("model", m))
|
|
586
|
+
label = _step_label(r)
|
|
587
|
+
pfx = "" if exact else "~"
|
|
588
|
+
in_s = _fmt_tok(inp, exact)
|
|
589
|
+
out_s = _fmt_tok(out, exact)
|
|
590
|
+
cost_s = f"{pfx}${cost:.6f}"
|
|
591
|
+
|
|
592
|
+
print(
|
|
593
|
+
f" {i:<{S1}} {label:<{S2}} {mdl:<{S3}}"
|
|
594
|
+
f" {in_s:>{S4}} {out_s:>{S5}} "
|
|
595
|
+
f"{c['green']}{cost_s:>12}{c['reset']}",
|
|
596
|
+
file=sys.stderr,
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
# Session total: use only exact agent-runner rows when present.
|
|
600
|
+
# Estimated (MCP) rows measure payload size — those bytes are already part of
|
|
601
|
+
# the LLM's context window, so summing them with the LLM total would double-count.
|
|
602
|
+
runner_indices = [i for i, r in enumerate(records) if r.get("server") == "agent-runner"]
|
|
603
|
+
total_rows = [records[i] for i in runner_indices] if runner_indices else records
|
|
604
|
+
|
|
605
|
+
for r in total_rows:
|
|
606
|
+
total_in += r.get("input_tokens", 0)
|
|
607
|
+
total_out += r.get("output_tokens", 0)
|
|
608
|
+
total_cost += r.get("cost_usd", 0.0)
|
|
609
|
+
|
|
610
|
+
all_exact = all(r.get("token_method") == "exact" for r in total_rows)
|
|
611
|
+
in_tot = _fmt_tok(total_in, all_exact)
|
|
612
|
+
out_tot = _fmt_tok(total_out, all_exact)
|
|
613
|
+
pfx_tot = "" if all_exact else "~"
|
|
614
|
+
cost_tot = f"{pfx_tot}${total_cost:.6f}"
|
|
615
|
+
|
|
616
|
+
if runner_indices and len(runner_indices) < len(records):
|
|
617
|
+
row_nums = [i + 1 for i in runner_indices]
|
|
618
|
+
est_count = len(records) - len(runner_indices)
|
|
619
|
+
row_label = f"row {row_nums[0]}" if len(row_nums) == 1 else f"rows {','.join(map(str, row_nums))}"
|
|
620
|
+
tot_label = f"Session Total ({row_label}; {est_count} MCP rows in LLM ctx)"
|
|
621
|
+
else:
|
|
622
|
+
tot_label = "Session Total"
|
|
623
|
+
|
|
624
|
+
p = get_pricing(m)
|
|
625
|
+
rate_note = (
|
|
626
|
+
f" Model: {m} — rates applied: "
|
|
627
|
+
f"${p.input_per_mtok:.2f}/M input, ${p.output_per_mtok:.2f}/M output."
|
|
628
|
+
if p else f" Model: {m}"
|
|
629
|
+
)
|
|
630
|
+
if runner_indices and len(runner_indices) < len(records):
|
|
631
|
+
rate_note += (
|
|
632
|
+
"\n Session total = exact LLM API cost only."
|
|
633
|
+
" Estimated MCP rows are payload-size heuristics already included in the LLM context."
|
|
634
|
+
)
|
|
635
|
+
elif not all_exact:
|
|
636
|
+
rate_note += (
|
|
637
|
+
"\n Estimates (~) based on payload size; "
|
|
638
|
+
"exact rows come directly from the LLM API response."
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
print(f" {'─'*(W-2)}", file=sys.stderr)
|
|
642
|
+
print(
|
|
643
|
+
f" {'':>{S1}} {c['bold']}{tot_label:<{S2}}{c['reset']} "
|
|
644
|
+
f"{'':>{S3}} {c['bold']}{in_tot:>{S4}} {out_tot:>{S5}} "
|
|
645
|
+
f"{c['green']}{cost_tot:>12}{c['reset']}",
|
|
646
|
+
file=sys.stderr,
|
|
647
|
+
)
|
|
648
|
+
print(f"{c['dim']}\n{rate_note}{c['reset']}", file=sys.stderr)
|
|
649
|
+
print(f"{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
# ── Session summary on exit (MCP server / Claude Code path) ───────────────
|
|
325
653
|
|
|
326
654
|
def _print_summary() -> None:
|
|
655
|
+
# Suppressed when agent_runner will print the unified pipeline summary
|
|
656
|
+
if os.environ.get("STLC_COST_SUMMARY", "").lower() == "suppress":
|
|
657
|
+
return
|
|
327
658
|
if not _TRACKING_ENABLED or _session is None or not _session.records:
|
|
328
659
|
return
|
|
329
660
|
|
|
@@ -332,15 +663,6 @@ def _print_summary() -> None:
|
|
|
332
663
|
elapsed = time.time() - sess.started_at
|
|
333
664
|
c = _C
|
|
334
665
|
|
|
335
|
-
by_server: dict[str, dict] = {}
|
|
336
|
-
for r in records:
|
|
337
|
-
k = r.get("server", "unknown")
|
|
338
|
-
if k not in by_server:
|
|
339
|
-
by_server[k] = {"calls": 0, "tokens": 0, "cost_usd": 0.0}
|
|
340
|
-
by_server[k]["calls"] += 1
|
|
341
|
-
by_server[k]["tokens"] += r.get("estimated_tokens", 0)
|
|
342
|
-
by_server[k]["cost_usd"] += r.get("cost_usd", 0.0)
|
|
343
|
-
|
|
344
666
|
total_cost = sum(r.get("cost_usd", 0.0) for r in records)
|
|
345
667
|
total_tokens = sum(r.get("estimated_tokens", 0) for r in records)
|
|
346
668
|
|
|
@@ -349,47 +671,33 @@ def _print_summary() -> None:
|
|
|
349
671
|
print(f"{c['bold']} stlc-agents · Cost Summary · {sess.id}{c['reset']}", file=sys.stderr)
|
|
350
672
|
print(f"{c['bold']}{'═'*W}{c['reset']}", file=sys.stderr)
|
|
351
673
|
|
|
352
|
-
|
|
353
|
-
print(f"\n {'Server':<30} {'Calls':>6} {'~Tokens':>10} {'Cost (USD)':>14}", file=sys.stderr)
|
|
354
|
-
print(f" {'─'*60}", file=sys.stderr)
|
|
355
|
-
for svr, d in sorted(by_server.items()):
|
|
356
|
-
tok = f"{d['tokens']/1000:.1f}K" if d['tokens'] >= 1000 else str(d['tokens'])
|
|
357
|
-
print(
|
|
358
|
-
f" {svr:<30} {d['calls']:>6} {tok:>10} "
|
|
359
|
-
f"{c['green']}${d['cost_usd']:.6f}{c['reset']:>14}",
|
|
360
|
-
file=sys.stderr,
|
|
361
|
-
)
|
|
362
|
-
|
|
363
|
-
# Per-step
|
|
364
|
-
print(f"\n {'Step':<26} {'Tool':<36} {'~Tok':>6} {'Cost':>10} {'ms':>6}", file=sys.stderr)
|
|
674
|
+
print(f"\n {'Agent':<28} {'Artifact':<34} {'~Tokens':>8} {'Cost':>10} {'ms':>6}", file=sys.stderr)
|
|
365
675
|
print(f" {'─'*W}", file=sys.stderr)
|
|
366
676
|
for r in records:
|
|
367
|
-
|
|
677
|
+
raw = r.get("estimated_tokens", 0)
|
|
678
|
+
tok = f"{raw/1000:.1f}K" if raw >= 1000 else str(raw)
|
|
679
|
+
pfx = "" if r.get("token_method") == "exact" else "~"
|
|
680
|
+
art = _TOOL_ARTIFACT.get(r.get("tool", ""), r.get("tool", "?"))
|
|
368
681
|
print(
|
|
369
|
-
f" {r.get('server','?'):<
|
|
370
|
-
f"{tok:>
|
|
682
|
+
f" {r.get('server','?'):<28} {art:<34} "
|
|
683
|
+
f"{pfx}{tok:>8} ${r.get('cost_usd',0):.6f} {r.get('latency_ms',0):>6}",
|
|
371
684
|
file=sys.stderr,
|
|
372
685
|
)
|
|
373
686
|
|
|
374
|
-
# Totals
|
|
375
687
|
tok_total = f"{total_tokens/1000:.1f}K" if total_tokens >= 1000 else str(total_tokens)
|
|
376
|
-
print(f"
|
|
377
|
-
print(
|
|
378
|
-
|
|
688
|
+
print(f" {'─'*W}", file=sys.stderr)
|
|
689
|
+
print(
|
|
690
|
+
f" {c['bold']}{'TOTAL':<28} {'':<34} {tok_total:>9} "
|
|
691
|
+
f"{c['green']}${total_cost:.6f}{c['reset']} {elapsed:.1f}s",
|
|
692
|
+
file=sys.stderr,
|
|
693
|
+
)
|
|
379
694
|
|
|
380
|
-
|
|
381
|
-
model_str = f"{_MODEL_ID}"
|
|
695
|
+
model_str = _MODEL_ID
|
|
382
696
|
if _PRICING:
|
|
383
697
|
model_str += f" (${_PRICING.input_per_mtok}/${_PRICING.output_per_mtok} per MTok in/out)"
|
|
384
|
-
print(f" {c['dim']}Model: {model_str}{c['reset']}", file=sys.stderr)
|
|
385
|
-
print(f" {c['dim']}
|
|
386
|
-
print(f"
|
|
387
|
-
print(f" {c['dim']}Duration: {elapsed:.1f}s · Log: {sess.log_path}{c['reset']}", file=sys.stderr)
|
|
388
|
-
print(f"\n {c['dim']}To set model explicitly:{c['reset']}", file=sys.stderr)
|
|
389
|
-
print(f" {c['dim']} qa-stlc cost --set-model claude-opus-4-6{c['reset']}", file=sys.stderr)
|
|
390
|
-
print(f" {c['dim']} or add to .mcp.json env: STLC_CODING_AGENT_MODEL=claude-opus-4-6{c['reset']}", file=sys.stderr)
|
|
391
|
-
print(f" {c['dim']} or add to .env: STLC_CODING_AGENT_MODEL=claude-opus-4-6{c['reset']}", file=sys.stderr)
|
|
392
|
-
print(f"\n{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
|
|
698
|
+
print(f"\n {c['dim']}Model: {model_str}{c['reset']}", file=sys.stderr)
|
|
699
|
+
print(f" {c['dim']}Log: {sess.log_path}{c['reset']}", file=sys.stderr)
|
|
700
|
+
print(f"{c['bold']}{'═'*W}{c['reset']}\n", file=sys.stderr)
|
|
393
701
|
|
|
394
702
|
|
|
395
703
|
atexit.register(_print_summary)
|