@qa-gentic/stlc-agents 1.0.23 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/package.json +1 -1
  2. package/skills/generate-test-cases/SKILL.md +5 -0
  3. package/src/cli/cmd-cost.js +61 -30
  4. package/src/cli/cmd-init.js +88 -8
  5. package/src/stlc_agents/__pycache__/__init__.cpython-314.pyc +0 -0
  6. package/src/stlc_agents/agent_gherkin_generator/__pycache__/__init__.cpython-314.pyc +0 -0
  7. package/src/stlc_agents/agent_gherkin_generator/__pycache__/server.cpython-314.pyc +0 -0
  8. package/src/stlc_agents/agent_gherkin_generator/server.py +8 -7
  9. package/src/stlc_agents/agent_gherkin_generator/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  10. package/src/stlc_agents/agent_gherkin_generator/tools/__pycache__/ado_gherkin.cpython-314.pyc +0 -0
  11. package/src/stlc_agents/agent_helix_writer/__pycache__/__init__.cpython-314.pyc +0 -0
  12. package/src/stlc_agents/agent_helix_writer/__pycache__/server.cpython-314.pyc +0 -0
  13. package/src/stlc_agents/agent_helix_writer/server.py +48 -12
  14. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  15. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/boilerplate.cpython-314.pyc +0 -0
  16. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/helix_write.cpython-314.pyc +0 -0
  17. package/src/stlc_agents/agent_jira_manager/server.py +9 -8
  18. package/src/stlc_agents/agent_playwright_generator/__pycache__/__init__.cpython-314.pyc +0 -0
  19. package/src/stlc_agents/agent_playwright_generator/__pycache__/server.cpython-314.pyc +0 -0
  20. package/src/stlc_agents/agent_playwright_generator/server.py +419 -213
  21. package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  22. package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/ado_attach.cpython-314.pyc +0 -0
  23. package/src/stlc_agents/agent_test_case_manager/__pycache__/__init__.cpython-314.pyc +0 -0
  24. package/src/stlc_agents/agent_test_case_manager/__pycache__/server.cpython-314.pyc +0 -0
  25. package/src/stlc_agents/agent_test_case_manager/server.py +21 -8
  26. package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  27. package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/ado_workitem.cpython-314.pyc +0 -0
  28. package/src/stlc_agents/agent_test_case_manager/tools/ado_workitem.py +65 -1
  29. package/src/stlc_agents/shared/__pycache__/__init__.cpython-314.pyc +0 -0
  30. package/src/stlc_agents/shared/__pycache__/auth.cpython-314.pyc +0 -0
  31. package/src/stlc_agents/shared/__pycache__/cost_tracker.cpython-314.pyc +0 -0
  32. package/src/stlc_agents/shared/__pycache__/pricing.cpython-314.pyc +0 -0
  33. package/src/stlc_agents/shared/cost_tracker.py +378 -70
  34. package/src/stlc_agents/shared/pricing.py +115 -24
  35. package/src/stlc_agents/webhook_orchestrator/__init__.py +0 -0
  36. package/src/stlc_agents/webhook_orchestrator/agent_runner.py +599 -0
  37. package/src/stlc_agents/webhook_orchestrator/main.py +43 -0
  38. package/src/stlc_agents/webhook_orchestrator/models.py +63 -0
  39. package/src/stlc_agents/webhook_orchestrator/orchestrator.py +103 -0
  40. package/src/stlc_agents/webhook_orchestrator/pipelines/__init__.py +0 -0
  41. package/src/stlc_agents/webhook_orchestrator/pipelines/_base.py +57 -0
  42. package/src/stlc_agents/webhook_orchestrator/pipelines/ado_test_cases.py +55 -0
  43. package/src/stlc_agents/webhook_orchestrator/pipelines/full_pipeline.py +202 -0
  44. package/src/stlc_agents/webhook_orchestrator/pipelines/gherkin_playwright.py +156 -0
  45. package/src/stlc_agents/webhook_orchestrator/pipelines/jira_test_cases.py +48 -0
  46. package/src/stlc_agents/webhook_orchestrator/webhook_bridge.py +368 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@qa-gentic/stlc-agents",
3
- "version": "1.0.23",
3
+ "version": "1.0.26",
4
4
  "description": "QA STLC Agents — five MCP servers + skills for AI-powered test case, Gherkin, Playwright generation, and Helix-QA file writing against Azure DevOps and Jira Cloud. Full pipeline for both: fetch → test cases → Gherkin → Playwright → Helix-QA. Works with Claude Code, GitHub Copilot, Cursor, Windsurf.",
5
5
  "keywords": [
6
6
  "playwright",
@@ -152,6 +152,11 @@ Each test case:
152
152
  - `steps` — array of `{ action, expected_result }` — at least 2 steps per TC
153
153
  - `priority` — 1=Critical, 2=High (default), 3=Medium, 4=Low
154
154
 
155
+ **Automatic post-creation actions (server-side — no skill action required):**
156
+ - Each `TestedBy-Forward` relation is created with `"attributes": {"comment": "STLC-Agent generated test case"}` — this comment appears in the Links tab Comments column in ADO.
157
+ - The tag `STLCAgentTestCases` is appended to the parent work item (PBI/Bug/Feature).
158
+ Both are best-effort; a failure does not roll back test case creation.
159
+
155
160
  ## Example tool call
156
161
 
157
162
  ```json
@@ -124,64 +124,88 @@ function readLogs() {
124
124
 
125
125
  // ── Printing ───────────────────────────────────────────────────────────────
126
126
 
127
+ function fmtTokens(r) {
128
+ const n = r.estimated_tokens || 0;
129
+ const exact = r.token_method === "exact";
130
+ const prefix = exact ? "" : "~";
131
+ return prefix + fmtTok(n);
132
+ }
133
+
127
134
  function printSession(sess) {
128
135
  const { sessionId, records } = sess;
129
136
  if (!records.length) return;
130
137
 
131
138
  const byServer = {};
132
- let totalCost = 0, totalTokens = 0;
139
+ let totalCost = 0, totalExactTokens = 0, totalEstTokens = 0;
140
+ let hasExact = false, hasEst = false;
141
+
133
142
  for (const r of records) {
134
143
  const k = r.server || "unknown";
135
- if (!byServer[k]) byServer[k] = { calls: 0, tokens: 0, cost: 0 };
144
+ if (!byServer[k]) byServer[k] = { calls: 0, tokens: 0, cost: 0, exact: false };
136
145
  byServer[k].calls++;
137
146
  byServer[k].tokens += r.estimated_tokens || 0;
138
147
  byServer[k].cost += r.cost_usd || 0;
139
- totalCost += r.cost_usd || 0;
140
- totalTokens += r.estimated_tokens || 0;
148
+ if (r.token_method === "exact") { byServer[k].exact = true; hasExact = true; }
149
+ else { hasEst = true; }
150
+ totalCost += r.cost_usd || 0;
151
+ if (r.token_method === "exact") totalExactTokens += r.estimated_tokens || 0;
152
+ else totalEstTokens += r.estimated_tokens || 0;
141
153
  }
142
154
 
143
- const model = records[0]?.model || "unknown";
144
- const source = records[0]?.model_source || "";
145
- const method = records.some((r) => r.token_method === "estimated")
146
- ? "estimated (payload chars÷4)" : "exact";
155
+ const methodLabel = hasExact && hasEst ? "mixed (exact + estimated)"
156
+ : hasExact ? "exact (from API response)"
157
+ : "estimated (payload chars÷4)";
158
+
159
+ const models = [...new Set(records.map((r) => r.model).filter(Boolean))].join(", ");
160
+ const sources = [...new Set(records.map((r) => r.model_source).filter(Boolean))].join(", ");
147
161
  const ts = records[0]?.timestamp?.slice(0, 19).replace("T", " ") || "";
148
162
  const tsEnd = records[records.length - 1]?.timestamp?.slice(0, 19).replace("T", " ") || "";
149
163
 
150
- console.log(`\n${"═".repeat(68)}`);
164
+ console.log(`\n${"═".repeat(72)}`);
151
165
  console.log(b(` stlc-agents · Cost Report · ${sessionId}`));
152
- console.log(`${"═".repeat(68)}`);
166
+ console.log(`${"═".repeat(72)}`);
153
167
  console.log(dim(` ${ts} → ${tsEnd}`));
154
- console.log(dim(` Model: ${model} (detected via: ${source})`));
155
- console.log(dim(` Token method: ${method}`));
168
+ console.log(dim(` Model(s): ${models} (via: ${sources})`));
169
+ console.log(dim(` Token method: ${methodLabel}`));
156
170
 
157
171
  // Per-server
158
- console.log(`\n ${"Server".padEnd(30)} ${"Calls".padStart(6)} ${"~Tokens".padStart(10)} ${"Cost (USD)".padStart(14)}`);
159
- console.log(` ${"─".repeat(60)}`);
172
+ console.log(`\n ${"Server".padEnd(30)} ${"Calls".padStart(6)} ${"Tokens".padStart(10)} ${"Cost (USD)".padStart(14)} Method`);
173
+ console.log(` ${"─".repeat(70)}`);
160
174
  for (const [svr, d] of Object.entries(byServer)) {
175
+ const meth = d.exact ? "" : dim(" [est]");
176
+ const prefix = d.exact ? "" : "~";
161
177
  console.log(
162
178
  ` ${cyn(svr.padEnd(30))} ${String(d.calls).padStart(6)} ` +
163
- `${fmtTok(d.tokens).padStart(10)} ${grn(fmtUsd(d.cost).padStart(14))}`
179
+ `${(prefix + fmtTok(d.tokens)).padStart(10)} ${grn(fmtUsd(d.cost).padStart(14))}${meth}`
164
180
  );
165
181
  }
166
182
 
167
183
  // Per-step
168
184
  console.log(`\n ${"Step detail"}`);
169
- console.log(` ${"─".repeat(68)}`);
185
+ console.log(` ${"─".repeat(72)}`);
170
186
  for (const r of records) {
187
+ const exact = r.token_method === "exact";
188
+ const methTag = exact ? "" : dim(" [est]");
189
+ const cacheNote = (r.cache_write_tokens || r.cache_read_tokens)
190
+ ? dim(` cw=${r.cache_write_tokens||0} cr=${r.cache_read_tokens||0}`) : "";
191
+ const iters = r.iterations > 1 ? dim(` ×${r.iterations}`) : "";
171
192
  console.log(
172
- ` ${(r.server || "?").padEnd(26)} ${(r.tool || "?").padEnd(36)} ` +
173
- `${fmtTok(r.estimated_tokens || 0).padStart(6)} ` +
174
- `${grn(fmtUsd(r.cost_usd || 0))} ${fmtMs(r.latency_ms || 0)} ` +
175
- dim(`[${r.token_method || "?"}]`)
193
+ ` ${(r.server || "?").padEnd(26)} ${(r.tool || "?").padEnd(34)} ` +
194
+ `${fmtTokens(r).padStart(8)} ` +
195
+ `${grn(fmtUsd(r.cost_usd || 0))} ${fmtMs(r.latency_ms || 0)}${methTag}${cacheNote}${iters}`
176
196
  );
177
197
  }
178
198
 
179
199
  // Totals
180
- console.log(`\n ${"─".repeat(68)}`);
181
- console.log(` ${"Total tokens".padEnd(40)} ${fmtTok(totalTokens).padStart(10)}`);
200
+ const totalTokens = totalExactTokens + totalEstTokens;
201
+ const tokenNote = hasExact && hasEst
202
+ ? ` (${fmtTok(totalExactTokens)} exact + ~${fmtTok(totalEstTokens)} est)`
203
+ : hasExact ? " (exact)" : " (estimated)";
204
+ console.log(`\n ${"─".repeat(72)}`);
205
+ console.log(` ${"Total tokens".padEnd(40)} ${fmtTok(totalTokens).padStart(10)}${dim(tokenNote)}`);
182
206
  console.log(` ${b("Total cost".padEnd(40))} ${grn(fmtUsd(totalCost))}`);
183
207
  console.log(dim(` Log: ${sess.file}`));
184
- console.log(`${"═".repeat(68)}\n`);
208
+ console.log(`${"═".repeat(72)}\n`);
185
209
  }
186
210
 
187
211
  // ── Main ───────────────────────────────────────────────────────────────────
@@ -235,16 +259,23 @@ module.exports = async function cost(opts) {
235
259
 
236
260
  if (opts.all) {
237
261
  for (const s of sessions) printSession(s);
238
- const allRecords = sessions.flatMap((s) => s.records);
239
- const grandTotal = allRecords.reduce((a, r) => a + (r.cost_usd || 0), 0);
240
- const grandTokens = allRecords.reduce((a, r) => a + (r.estimated_tokens || 0), 0);
241
- console.log(`${"═".repeat(68)}`);
262
+ const allRecords = sessions.flatMap((s) => s.records);
263
+ const grandTotal = allRecords.reduce((a, r) => a + (r.cost_usd || 0), 0);
264
+ const grandExact = allRecords.filter((r) => r.token_method === "exact")
265
+ .reduce((a, r) => a + (r.estimated_tokens || 0), 0);
266
+ const grandEst = allRecords.filter((r) => r.token_method !== "exact")
267
+ .reduce((a, r) => a + (r.estimated_tokens || 0), 0);
268
+ const grandTokens = grandExact + grandEst;
269
+ const tokenBreakdown = grandExact && grandEst
270
+ ? ` (${fmtTok(grandExact)} exact + ~${fmtTok(grandEst)} est)`
271
+ : grandExact ? " (exact)" : " (estimated)";
272
+ console.log(`${"═".repeat(72)}`);
242
273
  console.log(b(` All sessions — grand total`));
243
- console.log(`${"═".repeat(68)}`);
274
+ console.log(`${"═".repeat(72)}`);
244
275
  console.log(` Sessions : ${sessions.length}`);
245
- console.log(` Total tokens: ${fmtTok(grandTokens)}`);
276
+ console.log(` Total tokens: ${fmtTok(grandTokens)}${dim(tokenBreakdown)}`);
246
277
  console.log(b(` TOTAL COST : ${grn(fmtUsd(grandTotal))}`));
247
- console.log(`${"═".repeat(68)}\n`);
278
+ console.log(`${"═".repeat(72)}\n`);
248
279
  return;
249
280
  }
250
281
 
@@ -80,14 +80,94 @@ module.exports = async function init(opts) {
80
80
 
81
81
  // ── 3. pip install qa-gentic-stlc-agents ──────────────────────────────────
82
82
  info("Installing qa-gentic-stlc-agents (pip)…");
83
- const pip = spawnSync(python, ["-m", "pip", "install", "qa-gentic-stlc-agents>=1.0.1", "--quiet"], {
84
- stdio: "inherit",
85
- encoding: "utf8",
86
- });
87
- if (pip.status !== 0) {
88
- die("pip install failed. Run manually: pip install qa-gentic-stlc-agents");
83
+
84
+ const IS_WIN = process.platform === "win32";
85
+ const VENV_DIR = path.join(os.homedir(), ".qa-stlc", "venv");
86
+ const venvPython = IS_WIN
87
+ ? path.join(VENV_DIR, "Scripts", "python.exe")
88
+ : path.join(VENV_DIR, "bin", "python3");
89
+
90
+ // Helper: check if the package is importable by a given python binary
91
+ function isImportable(pyBin) {
92
+ if (!fs.existsSync(pyBin) && pyBin !== python) return false;
93
+ const r = spawnSync(pyBin, ["-c", "import qa_gentic_stlc_agents"], { encoding: "utf8" });
94
+ return r.status === 0;
95
+ }
96
+
97
+ // Find a Python 3.10–3.13 binary compatible with qa-gentic-stlc-agents.
98
+ // The package declares Requires-Python >=3.10,<3.14 so we must avoid 3.14+.
99
+ function findCompatiblePython(preferred) {
100
+ const candidates = IS_WIN
101
+ ? ["py", "python", "python3"]
102
+ : ["python3.13", "python3.12", "python3.11", "python3.10", preferred];
103
+
104
+ for (const bin of candidates) {
105
+ const r = spawnSync(bin, ["--version"], { encoding: "utf8" });
106
+ if (r.status !== 0) continue;
107
+ const ver = (r.stdout || r.stderr || "").trim();
108
+ const m = ver.match(/Python (\d+)\.(\d+)/);
109
+ if (!m) continue;
110
+ const major = parseInt(m[1]), minor = parseInt(m[2]);
111
+ if (major === 3 && minor >= 10 && minor <= 13) return bin;
112
+ }
113
+ return null;
89
114
  }
90
- ok("qa-gentic-stlc-agents installed.");
115
+
116
+ // Determine which python to use for MCP servers — may be updated to venv python below
117
+ let resolvedPython = python;
118
+
119
+ if (isImportable(python)) {
120
+ // Already importable by the user-supplied / system python (CI, active venv, etc.)
121
+ ok("qa-gentic-stlc-agents already installed — skipping pip install.");
122
+ } else if (isImportable(venvPython)) {
123
+ // Package found in the persistent qa-stlc venv from a previous run
124
+ ok("qa-gentic-stlc-agents found in ~/.qa-stlc/venv — skipping pip install.");
125
+ resolvedPython = venvPython;
126
+ } else {
127
+ // Create (or reuse) a dedicated venv and install there.
128
+ // Bypasses PEP 668 on Mac/Linux Homebrew Python and works on Windows & CI
129
+ // without requiring elevated permissions or breaking the system Python.
130
+
131
+ // Find a Python 3.10–3.13 binary (package does not support 3.14+ yet)
132
+ const compatPython = findCompatiblePython(python);
133
+ if (!compatPython) {
134
+ die(
135
+ "qa-gentic-stlc-agents requires Python 3.10–3.13 but none was found.\n" +
136
+ " Mac: brew install python@3.13\n" +
137
+ " Linux: sudo apt install python3.13\n" +
138
+ " Windows: install Python 3.13 from python.org\n" +
139
+ " Then re-run: qa-stlc init --python python3.13 --vscode --integration ado"
140
+ );
141
+ }
142
+
143
+ const compatPythonVer = (spawnSync(compatPython, ["--version"], { encoding: "utf8" }).stdout || "").trim();
144
+ info(`Using ${compatPython} (${compatPythonVer}) for venv…`);
145
+
146
+ if (!fs.existsSync(VENV_DIR)) {
147
+ info(`Creating Python venv at ${VENV_DIR}…`);
148
+ const mkVenv = spawnSync(compatPython, ["-m", "venv", VENV_DIR], { stdio: "inherit", encoding: "utf8" });
149
+ if (mkVenv.status !== 0) {
150
+ die(`Failed to create venv. Ensure python3-venv is installed:\n ${compatPython} -m venv ${VENV_DIR}`);
151
+ }
152
+ ok("Venv created.");
153
+ }
154
+
155
+ info("Installing qa-gentic-stlc-agents into venv…");
156
+ const pip = spawnSync(
157
+ venvPython,
158
+ ["-m", "pip", "install", "qa-gentic-stlc-agents>=1.0.1", "--quiet"],
159
+ { stdio: "inherit", encoding: "utf8" }
160
+ );
161
+
162
+ if (pip.status !== 0) {
163
+ die(`pip install into venv failed. Try manually:\n ${venvPython} -m pip install qa-gentic-stlc-agents`);
164
+ }
165
+ ok("qa-gentic-stlc-agents installed into ~/.qa-stlc/venv.");
166
+ resolvedPython = venvPython;
167
+ }
168
+
169
+ // Propagate the resolved python so MCP config points to the correct interpreter
170
+ opts.python = resolvedPython;
91
171
 
92
172
  // ── 4. Copy ORCHESTRATION_RULES.md to project root ─────────────────────────
93
173
  info("Installing ORCHESTRATION_RULES.md to project root…");
@@ -115,7 +195,7 @@ module.exports = async function init(opts) {
115
195
  await cmdMcpConfig({
116
196
  vscode: opts.vscode || false,
117
197
  print: false,
118
- python: python,
198
+ python: resolvedPython,
119
199
  playwrightPort: "8931",
120
200
  integration,
121
201
  });
@@ -17,6 +17,7 @@ Skills: see skills/generate-gherkin.md
17
17
  import asyncio
18
18
  import json
19
19
  import sys
20
+ import time
20
21
 
21
22
  from dotenv import load_dotenv
22
23
  from mcp.server import Server
@@ -31,6 +32,7 @@ from stlc_agents.agent_gherkin_generator.tools.ado_gherkin import (
31
32
  attach_work_item_file as _attach_wi_file,
32
33
  validate_gherkin_content as _validate_gherkin,
33
34
  )
35
+ from stlc_agents.shared.cost_tracker import track
34
36
 
35
37
  load_dotenv()
36
38
 
@@ -377,6 +379,7 @@ async def list_tools() -> list[types.Tool]:
377
379
 
378
380
  @app.call_tool()
379
381
  async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
382
+ t0 = time.monotonic()
380
383
  try:
381
384
  if name == "fetch_feature_hierarchy":
382
385
  result = await asyncio.to_thread(
@@ -415,7 +418,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
415
418
  "Fix the errors below before attaching to ADO."
416
419
  ),
417
420
  }
418
- return [types.TextContent(type="text", text=json.dumps(result, indent=2, ensure_ascii=False))]
421
+ return track(result, tool_name=name, server="qa-gherkin-generator", t0=t0)
419
422
 
420
423
  result = await asyncio.to_thread(
421
424
  _attach_feature,
@@ -445,7 +448,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
445
448
  "Fix the errors below before attaching to ADO."
446
449
  ),
447
450
  }
448
- return [types.TextContent(type="text", text=json.dumps(result, indent=2, ensure_ascii=False))]
451
+ return track(result, tool_name=name, server="qa-gherkin-generator", t0=t0)
449
452
 
450
453
  result = await asyncio.to_thread(
451
454
  _attach_wi_file,
@@ -506,13 +509,11 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
506
509
  else:
507
510
  result = {"error": f"Unknown tool: {name}"}
508
511
 
509
- return [types.TextContent(type="text", text=json.dumps(result, indent=2, ensure_ascii=False))]
512
+ return track(result, tool_name=name, server="qa-gherkin-generator", t0=t0)
510
513
 
511
514
  except Exception as exc:
512
- return [types.TextContent(
513
- type="text",
514
- text=json.dumps({"error": str(exc), "tool": name}, indent=2),
515
- )]
515
+ err_result = {"error": str(exc), "tool": name}
516
+ return track(err_result, tool_name=name, server="qa-gherkin-generator", t0=t0)
516
517
 
517
518
 
518
519
  async def _run():
@@ -23,6 +23,9 @@ import asyncio
23
23
  import json
24
24
  import re
25
25
  import sys
26
+ import tempfile
27
+ import time
28
+ from pathlib import Path
26
29
 
27
30
  from dotenv import load_dotenv
28
31
  from mcp.server import Server
@@ -36,6 +39,7 @@ from stlc_agents.agent_helix_writer.tools.helix_write import (
36
39
  list_helix_tree as _list_tree,
37
40
  update_helix_file as _update_file,
38
41
  )
42
+ from stlc_agents.shared.cost_tracker import track
39
43
 
40
44
  load_dotenv()
41
45
 
@@ -296,8 +300,9 @@ async def list_tools() -> list[types.Tool]:
296
300
  description=(
297
301
  "Write generated TypeScript/Gherkin files into the Helix-QA directory layout "
298
302
  "with full deduplication and interface adaptation.\n\n"
299
- "Pass the 'files' dict from qa-playwright-generator:generate_playwright_code "
300
- "or scaffold_locator_repository directly.\n\n"
303
+ "Preferred: pass cache_key from generate_playwright_code "
304
+ "the server loads the files from disk automatically. "
305
+ "Alternative: pass the 'files' dict directly.\n\n"
301
306
  "mode='tests_only' (default, safe to run repeatedly):\n"
302
307
  " Writes locators.ts, *.page.ts, *.steps.ts, *.feature only.\n"
303
308
  " Infrastructure files (LocatorHealer.ts etc.) are always skipped.\n"
@@ -324,11 +329,20 @@ async def list_tools() -> list[types.Tool]:
324
329
  "type": "string",
325
330
  "description": "Absolute path to the Helix-QA project root.",
326
331
  },
332
+ "cache_key": {
333
+ "type": "string",
334
+ "description": (
335
+ "Cache key returned by generate_playwright_code or "
336
+ "scaffold_locator_repository. Preferred over passing 'files' directly — "
337
+ "the server loads the files from disk automatically."
338
+ ),
339
+ },
327
340
  "files": {
328
341
  "type": "object",
329
342
  "description": (
330
343
  "Dict of { file_key: file_content } as returned by "
331
- "generate_playwright_code or scaffold_locator_repository."
344
+ "generate_playwright_code or scaffold_locator_repository. "
345
+ "Use cache_key instead whenever possible."
332
346
  ),
333
347
  "additionalProperties": {"type": "string"},
334
348
  },
@@ -349,7 +363,7 @@ async def list_tools() -> list[types.Tool]:
349
363
  ),
350
364
  },
351
365
  },
352
- "required": ["helix_root", "files"],
366
+ "required": ["helix_root"],
353
367
  },
354
368
  ),
355
369
  types.Tool(
@@ -448,6 +462,7 @@ async def list_tools() -> list[types.Tool]:
448
462
 
449
463
  @app.call_tool()
450
464
  async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
465
+ t0 = time.monotonic()
451
466
  try:
452
467
  if name == "inspect_helix_project":
453
468
  result = await asyncio.to_thread(_inspect, arguments["helix_root"])
@@ -455,8 +470,31 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
455
470
  result["_validation"] = _validate_inspect_result(result)
456
471
 
457
472
  elif name == "write_helix_files":
473
+ # ── Resolve files: cache_key takes priority over inline dict ──
474
+ files = arguments.get("files") or {}
475
+ # Normalise: LLMs sometimes send [{file_name, content}] instead of {path: content}
476
+ if isinstance(files, list):
477
+ files = {
478
+ item.get("file_name") or item.get("path") or item.get("name", ""): item.get("content", "")
479
+ for item in files
480
+ if isinstance(item, dict)
481
+ }
482
+ cache_key = arguments.get("cache_key", "").strip()
483
+ if cache_key and not files:
484
+ _cache_dir = Path(tempfile.gettempdir()) / "stlc_file_cache"
485
+ cache_file = _cache_dir / f"{cache_key}.json"
486
+ if cache_file.exists():
487
+ files = json.loads(cache_file.read_text())
488
+ else:
489
+ result = {
490
+ "success": False,
491
+ "error": f"cache_key '{cache_key}' not found — file does not exist at {cache_file}. "
492
+ "Either pass the files dict directly or call get_generated_files first.",
493
+ "_validation": {"valid": False, "errors": [f"cache_key '{cache_key}' not found"], "warnings": []},
494
+ }
495
+ return track(result, tool_name=name, server="qa-helix-writer", t0=t0)
458
496
  # ── Pre-write input validation ────────────────────────────────
459
- input_validation = _validate_write_inputs(arguments.get("files", {}))
497
+ input_validation = _validate_write_inputs(files)
460
498
  if not input_validation["valid"]:
461
499
  result = {
462
500
  "success": False,
@@ -467,12 +505,12 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
467
505
  "below and retry. No files were written to disk."
468
506
  ),
469
507
  }
470
- return [types.TextContent(type="text", text=json.dumps(result, indent=2, ensure_ascii=False))]
508
+ return track(result, tool_name=name, server="qa-helix-writer", t0=t0)
471
509
 
472
510
  result = await asyncio.to_thread(
473
511
  _write_files,
474
512
  arguments["helix_root"],
475
- arguments["files"],
513
+ files,
476
514
  arguments.get("mode", "tests_only"),
477
515
  arguments.get("force_scaffold", False),
478
516
  )
@@ -507,13 +545,11 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
507
545
  else:
508
546
  result = {"error": f"Unknown tool: {name}"}
509
547
 
510
- return [types.TextContent(type="text", text=json.dumps(result, indent=2, ensure_ascii=False))]
548
+ return track(result, tool_name=name, server="qa-helix-writer", t0=t0)
511
549
 
512
550
  except Exception as exc:
513
- return [types.TextContent(
514
- type="text",
515
- text=json.dumps({"error": str(exc), "tool": name}, indent=2),
516
- )]
551
+ err_result = {"error": str(exc), "tool": name}
552
+ return track(err_result, tool_name=name, server="qa-helix-writer", t0=t0)
517
553
 
518
554
 
519
555
  async def _run():
@@ -22,6 +22,7 @@ Skills: see skills/qa-jira-manager.md
22
22
  import asyncio
23
23
  import json
24
24
  import sys
25
+ import time
25
26
 
26
27
  from dotenv import load_dotenv
27
28
  from mcp.server import Server
@@ -36,6 +37,7 @@ from stlc_agents.agent_jira_manager.tools.jira_workitem import (
36
37
  get_linked_test_cases as _get_linked_test_cases,
37
38
  attach_gherkin_to_issue as _attach_gherkin,
38
39
  )
40
+ from stlc_agents.shared.cost_tracker import track
39
41
 
40
42
  load_dotenv()
41
43
 
@@ -411,6 +413,7 @@ async def list_tools() -> list[types.Tool]:
411
413
 
412
414
  @app.call_tool()
413
415
  async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
416
+ t0 = time.monotonic()
414
417
  try:
415
418
  cloud_id = (arguments.get("cloud_id") or "").strip() or get_cloud_id()
416
419
 
@@ -442,7 +445,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
442
445
  "below and retry. No test cases were created in Jira."
443
446
  ),
444
447
  }
445
- return [types.TextContent(type="text", text=json.dumps(result, indent=2, ensure_ascii=False))]
448
+ return track(result, tool_name=name, server="qa-jira-manager", t0=t0)
446
449
 
447
450
  # ── Peek at the issue to get type and project_key ─────────────
448
451
  issue_data: dict = {}
@@ -484,7 +487,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
484
487
  "Reply 'yes' or 'confirm' to proceed, or 'no' / 'cancel' to abort."
485
488
  ),
486
489
  }
487
- return [types.TextContent(type="text", text=json.dumps(result, indent=2, ensure_ascii=False))]
490
+ return track(result, tool_name=name, server="qa-jira-manager", t0=t0)
488
491
 
489
492
  if not project_key:
490
493
  result = {
@@ -494,7 +497,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
494
497
  "Pass project_key explicitly, e.g. 'PROJ'."
495
498
  ),
496
499
  }
497
- return [types.TextContent(type="text", text=json.dumps(result, indent=2, ensure_ascii=False))]
500
+ return track(result, tool_name=name, server="qa-jira-manager", t0=t0)
498
501
 
499
502
  # ── Create test cases ─────────────────────────────────────────
500
503
  created = []
@@ -638,13 +641,11 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
638
641
  else:
639
642
  result = {"error": f"Unknown tool: {name}"}
640
643
 
641
- return [types.TextContent(type="text", text=json.dumps(result, indent=2, ensure_ascii=False))]
644
+ return track(result, tool_name=name, server="qa-jira-manager", t0=t0)
642
645
 
643
646
  except Exception as exc:
644
- return [types.TextContent(
645
- type="text",
646
- text=json.dumps({"error": str(exc), "tool": name}, indent=2),
647
- )]
647
+ err_result = {"error": str(exc), "tool": name}
648
+ return track(err_result, tool_name=name, server="qa-jira-manager", t0=t0)
648
649
 
649
650
 
650
651
  # ---------------------------------------------------------------------------