@qa-gentic/stlc-agents 1.0.25 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/package.json +1 -1
  2. package/skills/generate-test-cases/SKILL.md +5 -0
  3. package/src/cli/cmd-cost.js +61 -30
  4. package/src/cli/cmd-init.js +88 -8
  5. package/src/stlc_agents/__pycache__/__init__.cpython-314.pyc +0 -0
  6. package/src/stlc_agents/agent_gherkin_generator/__pycache__/__init__.cpython-314.pyc +0 -0
  7. package/src/stlc_agents/agent_gherkin_generator/__pycache__/server.cpython-314.pyc +0 -0
  8. package/src/stlc_agents/agent_gherkin_generator/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  9. package/src/stlc_agents/agent_gherkin_generator/tools/__pycache__/ado_gherkin.cpython-314.pyc +0 -0
  10. package/src/stlc_agents/agent_helix_writer/__pycache__/__init__.cpython-314.pyc +0 -0
  11. package/src/stlc_agents/agent_helix_writer/__pycache__/server.cpython-314.pyc +0 -0
  12. package/src/stlc_agents/agent_helix_writer/server.py +41 -6
  13. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  14. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/boilerplate.cpython-314.pyc +0 -0
  15. package/src/stlc_agents/agent_helix_writer/tools/__pycache__/helix_write.cpython-314.pyc +0 -0
  16. package/src/stlc_agents/agent_playwright_generator/__pycache__/__init__.cpython-314.pyc +0 -0
  17. package/src/stlc_agents/agent_playwright_generator/__pycache__/server.cpython-314.pyc +0 -0
  18. package/src/stlc_agents/agent_playwright_generator/server.py +419 -213
  19. package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  20. package/src/stlc_agents/agent_playwright_generator/tools/__pycache__/ado_attach.cpython-314.pyc +0 -0
  21. package/src/stlc_agents/agent_test_case_manager/__pycache__/__init__.cpython-314.pyc +0 -0
  22. package/src/stlc_agents/agent_test_case_manager/__pycache__/server.cpython-314.pyc +0 -0
  23. package/src/stlc_agents/agent_test_case_manager/server.py +12 -0
  24. package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/__init__.cpython-314.pyc +0 -0
  25. package/src/stlc_agents/agent_test_case_manager/tools/__pycache__/ado_workitem.cpython-314.pyc +0 -0
  26. package/src/stlc_agents/agent_test_case_manager/tools/ado_workitem.py +65 -1
  27. package/src/stlc_agents/shared/__pycache__/__init__.cpython-314.pyc +0 -0
  28. package/src/stlc_agents/shared/__pycache__/auth.cpython-314.pyc +0 -0
  29. package/src/stlc_agents/shared/__pycache__/cost_tracker.cpython-314.pyc +0 -0
  30. package/src/stlc_agents/shared/__pycache__/pricing.cpython-314.pyc +0 -0
  31. package/src/stlc_agents/shared/cost_tracker.py +378 -70
  32. package/src/stlc_agents/shared/pricing.py +115 -24
  33. package/src/stlc_agents/webhook_orchestrator/__init__.py +0 -0
  34. package/src/stlc_agents/webhook_orchestrator/agent_runner.py +599 -0
  35. package/src/stlc_agents/webhook_orchestrator/main.py +43 -0
  36. package/src/stlc_agents/webhook_orchestrator/models.py +63 -0
  37. package/src/stlc_agents/webhook_orchestrator/orchestrator.py +103 -0
  38. package/src/stlc_agents/webhook_orchestrator/pipelines/__init__.py +0 -0
  39. package/src/stlc_agents/webhook_orchestrator/pipelines/_base.py +57 -0
  40. package/src/stlc_agents/webhook_orchestrator/pipelines/ado_test_cases.py +55 -0
  41. package/src/stlc_agents/webhook_orchestrator/pipelines/full_pipeline.py +202 -0
  42. package/src/stlc_agents/webhook_orchestrator/pipelines/gherkin_playwright.py +156 -0
  43. package/src/stlc_agents/webhook_orchestrator/pipelines/jira_test_cases.py +48 -0
  44. package/src/stlc_agents/webhook_orchestrator/webhook_bridge.py +368 -0
  45. package/src/stlc_agents/agent_gherkin_generator/__pycache__/server.cpython-310.pyc +0 -0
  46. package/src/stlc_agents/agent_helix_writer/__pycache__/server.cpython-310.pyc +0 -0
  47. package/src/stlc_agents/agent_jira_manager/__pycache__/server.cpython-310.pyc +0 -0
  48. package/src/stlc_agents/agent_test_case_manager/__pycache__/server.cpython-310.pyc +0 -0
  49. package/src/stlc_agents/shared/__pycache__/cost_tracker.cpython-310.pyc +0 -0
  50. package/src/stlc_agents/shared/__pycache__/pricing.cpython-310.pyc +0 -0
@@ -0,0 +1,599 @@
1
+ """
2
+ agent_runner.py — Multi-provider LLM agent loop that drives MCP stdio servers directly.
3
+
4
+ Replaces `claude --mcp-config … -p -- <prompt>` so the webhook pipeline works with
5
+ any provider configured in .env (Anthropic, OpenAI, Copilot, Grok, DeepSeek, Azure
6
+ OpenAI, Ollama, LM Studio).
7
+
8
+ Flow
9
+ ────
10
+ 1. Start each MCP server subprocess via stdio
11
+ 2. Collect all tools from every server
12
+ 3. Call the configured LLM with those tools and the task prompt
13
+ 4. Loop: execute tool calls → feed results back → repeat until stop/end_turn
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import logging
19
+ import os
20
+ import time
21
+ from contextlib import AsyncExitStack
22
+ from typing import Any
23
+
24
+ from mcp import ClientSession
25
+ from mcp.client.stdio import StdioServerParameters, stdio_client
26
+
27
+ from stlc_agents.shared.cost_tracker import get_session_id, print_pipeline_summary, track_llm_call
28
+
29
+ logger = logging.getLogger("stlc.agent_runner")
30
+
31
+ MAX_ITERATIONS = 60
32
+ MAX_TOKENS = 8192
33
+
34
+ # ──────────────────────────────────────────────────────────────────────────────
35
+ # Provider resolution
36
+ # ──────────────────────────────────────────────────────────────────────────────
37
+
38
+ _OPENAI_COMPAT: dict[str, tuple[str | None, str, str]] = {
39
+ # provider base_url key_env default_model
40
+ "openai": (None, "OPENAI_API_KEY", "gpt-4o-mini"),
41
+ "copilot": ("https://api.githubcopilot.com", "GITHUB_TOKEN", "gpt-4o"),
42
+ "grok": ("https://api.x.ai/v1", "XAI_API_KEY", "grok-3-mini"),
43
+ "deepseek": ("https://api.deepseek.com/v1", "DEEPSEEK_API_KEY","deepseek-chat"),
44
+ "azure-openai": (None, "AZURE_OPENAI_KEY","gpt-4o-mini"),
45
+ "ollama": ("http://localhost:11434/v1", "", "llama3.2"),
46
+ "lm-studio": ("http://localhost:1234/v1", "", "local-model"),
47
+ }
48
+
49
+
50
+ def active_provider() -> str:
51
+ """Return the active LLM provider, auto-detecting from env with key-prefix fallback."""
52
+ p = os.environ.get("LLM_PROVIDER", "").strip().lower()
53
+ if p:
54
+ return p
55
+ p = os.environ.get("AI_HEALING_PROVIDER", "").strip().lower()
56
+ if p:
57
+ return p
58
+ # Auto-detect from API key prefix
59
+ key = os.environ.get("AI_HEALING_API_KEY", "").strip()
60
+ if key.startswith("sk-ant-"):
61
+ return "anthropic"
62
+ if key.startswith("gh_") or key.startswith("ghu_"):
63
+ return "copilot"
64
+ if key.startswith("sk-") or key.startswith("sk-proj-"):
65
+ return "openai"
66
+ return "ollama"
67
+
68
+
69
+ def _model_for(provider: str) -> str:
70
+ """Return the configured model for a provider, falling back to the provider default."""
71
+ m = os.environ.get("LLM_MODEL", "").strip()
72
+ if m:
73
+ return m
74
+ cfg = _OPENAI_COMPAT.get(provider.lower(), (None, "", "gpt-4o-mini"))
75
+ return cfg[2] # default_model slot
76
+
77
+
78
+ def _normalise_copilot_model(model: str) -> str:
79
+ """
80
+ Normalise a model name for the GitHub Copilot API.
81
+ claude-* models are lowercased (no vendor prefix needed).
82
+ Everything else gets the 'openai/' prefix.
83
+ Already-prefixed names (contain '/') are returned unchanged.
84
+ """
85
+ if not model or "/" in model:
86
+ return model
87
+ if "claude" in model.lower():
88
+ return model.lower()
89
+ return f"openai/{model}"
90
+
91
+
92
+ def _resolve_provider(
93
+ provider: str, model: str, api_key: str, base_url: str
94
+ ) -> tuple[Any, str, str]:
95
+ """Return (async_client, model_name, family) where family is 'anthropic' or 'openai'."""
96
+ p = (provider or "anthropic").lower().strip()
97
+
98
+ if p in ("anthropic", "claude", "claude-code"):
99
+ try:
100
+ from anthropic import AsyncAnthropic
101
+ except ImportError:
102
+ raise RuntimeError(
103
+ "anthropic package missing — run: pip install 'qa-stlc-agents[webhook]'"
104
+ )
105
+ key = api_key or os.environ.get("ANTHROPIC_API_KEY", "")
106
+ client = AsyncAnthropic(api_key=key)
107
+ return client, model or "claude-haiku-4-5-20251001", "anthropic"
108
+
109
+ if p == "azure-openai":
110
+ try:
111
+ from openai import AsyncAzureOpenAI
112
+ except ImportError:
113
+ raise RuntimeError(
114
+ "openai package missing — run: pip install 'qa-stlc-agents[webhook]'"
115
+ )
116
+ endpoint = base_url or os.environ.get("AZURE_OPENAI_ENDPOINT", "")
117
+ key = (api_key or os.environ.get("AZURE_OPENAI_KEY")
118
+ or os.environ.get("AZURE_OPENAI_API_KEY", ""))
119
+ api_ver = os.environ.get("AZURE_OPENAI_API_VERSION", "2024-02-01")
120
+ dep = model or os.environ.get("AZURE_OPENAI_DEPLOYMENT", "gpt-4o-mini")
121
+ client = AsyncAzureOpenAI(api_key=key, azure_endpoint=endpoint, api_version=api_ver)
122
+ return client, dep, "openai"
123
+
124
+ # All other OpenAI-compatible providers
125
+ try:
126
+ from openai import AsyncOpenAI
127
+ except ImportError:
128
+ raise RuntimeError(
129
+ "openai package missing — run: pip install 'qa-stlc-agents[webhook]'"
130
+ )
131
+ cfg = _OPENAI_COMPAT.get(p, (None, "", "gpt-4o-mini"))
132
+ default_url, key_env, default_model = cfg
133
+
134
+ url = (base_url
135
+ or (os.environ.get("LM_STUDIO_URL") if p == "lm-studio" else None)
136
+ or (os.environ.get("OLLAMA_HOST") if p == "ollama" else None)
137
+ or default_url)
138
+ key = api_key or (os.environ.get(key_env) if key_env else "") or "dummy"
139
+ m = model or default_model
140
+
141
+ if p == "copilot":
142
+ m = _normalise_copilot_model(m)
143
+
144
+ kw: dict = {"api_key": key}
145
+ if url:
146
+ kw["base_url"] = url
147
+ return AsyncOpenAI(**kw), m, "openai"
148
+
149
+
150
+ # ──────────────────────────────────────────────────────────────────────────────
151
+ # MCP server management
152
+ # ──────────────────────────────────────────────────────────────────────────────
153
+
154
+ async def _start_servers(
155
+ mcp_config: dict, env: dict[str, str], stack: AsyncExitStack
156
+ ) -> dict[str, tuple[ClientSession, Any]]:
157
+ """Start all MCP servers and return {tool_name: (session, mcp_tool)}."""
158
+ servers_cfg: dict = mcp_config.get("mcpServers", {})
159
+ tool_map: dict[str, tuple[ClientSession, Any]] = {}
160
+
161
+ for server_name, cfg in servers_cfg.items():
162
+ command = cfg.get("command", "")
163
+ args = cfg.get("args", [])
164
+ if not command:
165
+ logger.warning("MCP server '%s' has no command — skipping", server_name)
166
+ continue
167
+
168
+ params = StdioServerParameters(command=command, args=args, env=env)
169
+ try:
170
+ read, write = await stack.enter_async_context(stdio_client(params))
171
+ session = await stack.enter_async_context(ClientSession(read, write))
172
+ await session.initialize()
173
+ except Exception as exc:
174
+ logger.error("Failed to start MCP server '%s': %s", server_name, exc)
175
+ continue
176
+
177
+ try:
178
+ result = await session.list_tools()
179
+ for tool in result.tools:
180
+ if tool.name in tool_map:
181
+ logger.debug(
182
+ "Tool name collision: '%s' already registered; '%s' wins",
183
+ tool.name, server_name,
184
+ )
185
+ tool_map[tool.name] = (session, tool)
186
+ except Exception as exc:
187
+ logger.error("Failed to list tools for '%s': %s", server_name, exc)
188
+
189
+ return tool_map
190
+
191
+
192
+
193
+ def _extract_artefact(tool_name: str, result_text: str, wi_id: str) -> dict | None:
194
+ """Parse a tool result and return a structured artefact dict for the summary report."""
195
+ try:
196
+ data = json.loads(result_text)
197
+ except (json.JSONDecodeError, ValueError):
198
+ return None
199
+ if not isinstance(data, dict):
200
+ return None
201
+ if data.get("success") is False and tool_name != "inspect_helix_project":
202
+ return None
203
+
204
+ if tool_name in ("fetch_work_item_for_gherkin", "fetch_work_item", "fetch_feature_hierarchy"):
205
+ title = data.get("title") or data.get("work_item_title") or f"WI {wi_id}"
206
+ wi_type = data.get("work_item_type") or data.get("type") or "Work Item"
207
+ state = data.get("state") or ""
208
+ return {
209
+ "name": title[:26],
210
+ "type": wi_type,
211
+ "location": f"WI #{wi_id}",
212
+ "detail": f"State: {state}" if state else "",
213
+ }
214
+
215
+ if tool_name in (
216
+ "generate_and_attach_gherkin", "attach_gherkin_to_feature", "attach_gherkin_to_work_item"
217
+ ):
218
+ gherkin = data.get("gherkin_content") or data.get("content") or ""
219
+ n_sc = gherkin.lower().count("scenario:")
220
+ att_id = data.get("attachment_id") or data.get("attachment_url") or ""
221
+ detail = f"{n_sc} scenario(s)" if n_sc else ""
222
+ if att_id:
223
+ detail = (detail + f" id:{att_id}").strip()
224
+ return {
225
+ "name": "Gherkin Scenarios",
226
+ "type": "Gherkin / attachment",
227
+ "location": f"WI #{wi_id}",
228
+ "detail": detail,
229
+ }
230
+
231
+ if tool_name in ("create_and_link_test_cases", "create_deduped_test_cases"):
232
+ tcs = data.get("test_cases") or data.get("created_test_cases") or []
233
+ n = len(tcs) if isinstance(tcs, list) else data.get("count", 0)
234
+ return {
235
+ "name": "Test Cases",
236
+ "type": "Test Cases",
237
+ "location": f"WI #{wi_id}",
238
+ "detail": f"{n} test case(s) created",
239
+ }
240
+
241
+ if tool_name == "capture_app_context":
242
+ ctx_map = data.get("context_map") or {}
243
+ n_pages = len(ctx_map) if isinstance(ctx_map, dict) else 0
244
+ src = data.get("locator_source") or ""
245
+ ck = data.get("cache_key") or ""
246
+ detail = f"{n_pages} page(s)" + (f" source:{src}" if src else "")
247
+ return {
248
+ "name": "App Context",
249
+ "type": "Browser Snapshot",
250
+ "location": f"cache:{ck}" if ck else "in-memory",
251
+ "detail": detail,
252
+ }
253
+
254
+ if tool_name == "generate_playwright_code":
255
+ ck = data.get("cache_key") or ""
256
+ files = data.get("files") or {}
257
+ n = len(files) if isinstance(files, dict) else 0
258
+ names = ", ".join(list(files.keys())[:3]) if isinstance(files, dict) else ""
259
+ if n > 3:
260
+ names += f" +{n - 3} more"
261
+ return {
262
+ "name": "Playwright Tests",
263
+ "type": "TypeScript",
264
+ "location": f"cache:{ck}" if ck else "in-memory",
265
+ "detail": f"{n} file(s): {names}" if names else f"{n} file(s)",
266
+ }
267
+
268
+ if tool_name == "write_helix_files":
269
+ helix_root = data.get("helix_root") or ""
270
+ written = data.get("written") or data.get("files_written") or []
271
+ n = len(written) if isinstance(written, list) else 0
272
+ names = ", ".join([
273
+ (p.get("dest") or p.get("file_key") or "").rsplit("/", 1)[-1]
274
+ if isinstance(p, dict) else str(p).rsplit("/", 1)[-1]
275
+ for p in written[:3]
276
+ ]) if isinstance(written, list) else ""
277
+ if n > 3:
278
+ names += f" +{n - 3} more"
279
+ return {
280
+ "name": "Helix QA Files",
281
+ "type": "TypeScript / Gherkin",
282
+ "location": helix_root or "Helix disk",
283
+ "detail": f"{n} file(s): {names}" if names else f"{n} file(s)",
284
+ }
285
+
286
+ return None
287
+
288
+
289
+ async def _call_mcp_tool(
290
+ tool_map: dict, tool_name: str, arguments: dict, wi_id: str,
291
+ artefacts: list[dict] | None = None,
292
+ ) -> str:
293
+ """Call an MCP tool and return its result as a string."""
294
+ if tool_name not in tool_map:
295
+ return f"ERROR: unknown tool '{tool_name}'"
296
+ session, _ = tool_map[tool_name]
297
+ try:
298
+ logger.info("claude[wi=%s][tool] %s(%s)", wi_id, tool_name, _abbrev(arguments))
299
+ result = await session.call_tool(tool_name, arguments)
300
+ text = _extract_text(result)
301
+ logger.info("claude[wi=%s][tool-result] %s → %s", wi_id, tool_name, text[:200])
302
+ if artefacts is not None:
303
+ art = _extract_artefact(tool_name, text, wi_id)
304
+ if art is not None:
305
+ artefacts.append(art)
306
+ return text
307
+ except Exception as exc:
308
+ logger.error("MCP tool '%s' raised %s", tool_name, exc)
309
+ return f"ERROR calling {tool_name}: {exc}"
310
+
311
+
312
+ # ──────────────────────────────────────────────────────────────────────────────
313
+ # Anthropic agent loop
314
+ # ──────────────────────────────────────────────────────────────────────────────
315
+
316
+ def _anthropic_tools(tool_map: dict) -> list[dict]:
317
+ return [
318
+ {
319
+ "name": name,
320
+ "description": tool.description or "",
321
+ "input_schema": tool.inputSchema or {"type": "object", "properties": {}},
322
+ }
323
+ for name, (_, tool) in tool_map.items()
324
+ ]
325
+
326
+
327
+ async def _run_anthropic(
328
+ client: Any, model: str, prompt: str,
329
+ tool_map: dict, wi_id: str,
330
+ artefacts: list[dict] | None = None,
331
+ ) -> tuple[bool, str]:
332
+ messages = [{"role": "user", "content": prompt}]
333
+ tools = _anthropic_tools(tool_map)
334
+
335
+ total_input_tokens = 0
336
+ total_output_tokens = 0
337
+ total_cache_write_tokens = 0
338
+ total_cache_read_tokens = 0
339
+ t0 = time.monotonic()
340
+
341
+ def _flush(iteration: int) -> None:
342
+ track_llm_call(
343
+ model=model, provider="anthropic",
344
+ input_tokens=total_input_tokens,
345
+ output_tokens=total_output_tokens,
346
+ cache_write_tokens=total_cache_write_tokens,
347
+ cache_read_tokens=total_cache_read_tokens,
348
+ work_item_id=wi_id,
349
+ iterations=iteration + 1,
350
+ latency_ms=int((time.monotonic() - t0) * 1000),
351
+ )
352
+
353
+ for iteration in range(MAX_ITERATIONS):
354
+ response = await client.messages.create(
355
+ model=model,
356
+ max_tokens=MAX_TOKENS,
357
+ system=(
358
+ "You are a QA automation agent. Execute the requested pipeline steps "
359
+ "by calling the available tools in sequence. Auto-confirm all actions."
360
+ ),
361
+ messages=messages,
362
+ tools=tools or None,
363
+ )
364
+
365
+ if response.usage:
366
+ total_input_tokens += response.usage.input_tokens or 0
367
+ total_output_tokens += response.usage.output_tokens or 0
368
+ total_cache_write_tokens += getattr(response.usage, "cache_creation_input_tokens", 0) or 0
369
+ total_cache_read_tokens += getattr(response.usage, "cache_read_input_tokens", 0) or 0
370
+
371
+ logger.info(
372
+ "claude[wi=%s][llm] iteration=%d stop=%s in=%d out=%d cache_write=%d cache_read=%d",
373
+ wi_id, iteration, response.stop_reason,
374
+ total_input_tokens, total_output_tokens,
375
+ total_cache_write_tokens, total_cache_read_tokens,
376
+ )
377
+
378
+ messages.append({"role": "assistant", "content": response.content})
379
+
380
+ if response.stop_reason in ("end_turn", "stop_sequence"):
381
+ _flush(iteration)
382
+ return True, ""
383
+
384
+ if response.stop_reason != "tool_use":
385
+ _flush(iteration)
386
+ return False, f"Unexpected stop reason: {response.stop_reason}"
387
+
388
+ tool_results = []
389
+ for block in response.content:
390
+ if block.type != "tool_use":
391
+ continue
392
+ result_text = await _call_mcp_tool(tool_map, block.name, block.input, wi_id, artefacts)
393
+ tool_results.append({
394
+ "type": "tool_result",
395
+ "tool_use_id": block.id,
396
+ "content": result_text,
397
+ })
398
+
399
+ if not tool_results:
400
+ _flush(iteration)
401
+ return False, "tool_use stop reason but no tool_use blocks"
402
+
403
+ messages.append({"role": "user", "content": tool_results})
404
+
405
+ _flush(MAX_ITERATIONS - 1)
406
+ return False, f"Max iterations ({MAX_ITERATIONS}) reached"
407
+
408
+
409
+ # ──────────────────────────────────────────────────────────────────────────────
410
+ # OpenAI-compatible agent loop
411
+ # ──────────────────────────────────────────────────────────────────────────────
412
+
413
+ def _openai_tools(tool_map: dict) -> list[dict]:
414
+ return [
415
+ {
416
+ "type": "function",
417
+ "function": {
418
+ "name": name,
419
+ "description": tool.description or "",
420
+ "parameters": tool.inputSchema or {"type": "object", "properties": {}},
421
+ },
422
+ }
423
+ for name, (_, tool) in tool_map.items()
424
+ ]
425
+
426
+
427
+ async def _run_openai(
428
+ client: Any, model: str, prompt: str,
429
+ tool_map: dict, wi_id: str,
430
+ artefacts: list[dict] | None = None,
431
+ ) -> tuple[bool, str]:
432
+ messages: list[dict] = [
433
+ {
434
+ "role": "system",
435
+ "content": (
436
+ "You are a QA automation agent. Execute the requested pipeline steps "
437
+ "by calling the available tools in sequence. Auto-confirm all actions."
438
+ ),
439
+ },
440
+ {"role": "user", "content": prompt},
441
+ ]
442
+ tools = _openai_tools(tool_map) or None
443
+
444
+ total_input_tokens = 0
445
+ total_output_tokens = 0
446
+ total_cache_read_tokens = 0
447
+ t0 = time.monotonic()
448
+
449
+ def _flush(iteration: int) -> None:
450
+ track_llm_call(
451
+ model=model, provider="openai",
452
+ input_tokens=total_input_tokens,
453
+ output_tokens=total_output_tokens,
454
+ cache_read_tokens=total_cache_read_tokens,
455
+ work_item_id=wi_id,
456
+ iterations=iteration + 1,
457
+ latency_ms=int((time.monotonic() - t0) * 1000),
458
+ )
459
+
460
+ for iteration in range(MAX_ITERATIONS):
461
+ response = await client.chat.completions.create(
462
+ model=model,
463
+ messages=messages,
464
+ tools=tools,
465
+ tool_choice="auto" if tools else None,
466
+ max_completion_tokens=MAX_TOKENS,
467
+ )
468
+ msg = response.choices[0].message
469
+ finish = response.choices[0].finish_reason
470
+
471
+ if response.usage:
472
+ # prompt_tokens is the TOTAL (cached + uncached). Split them so each
473
+ # bucket is billed at the correct rate via pricing.py cache_read_per_mtok.
474
+ details = getattr(response.usage, "prompt_tokens_details", None)
475
+ cached = (getattr(details, "cached_tokens", 0) or 0) if details else 0
476
+ total_input_tokens += (response.usage.prompt_tokens or 0) - cached
477
+ total_output_tokens += response.usage.completion_tokens or 0
478
+ total_cache_read_tokens += cached
479
+
480
+ logger.info(
481
+ "claude[wi=%s][llm] iteration=%d finish=%s in=%d out=%d cached=%d",
482
+ wi_id, iteration, finish,
483
+ total_input_tokens, total_output_tokens, total_cache_read_tokens,
484
+ )
485
+
486
+ messages.append(msg.model_dump(exclude_unset=True))
487
+
488
+ if finish == "stop":
489
+ _flush(iteration)
490
+ return True, ""
491
+
492
+ if finish != "tool_calls":
493
+ _flush(iteration)
494
+ return False, f"Unexpected finish reason: {finish}"
495
+
496
+ if not msg.tool_calls:
497
+ _flush(iteration)
498
+ return False, "finish_reason=tool_calls but no tool_calls"
499
+
500
+ for tc in msg.tool_calls:
501
+ try:
502
+ args = json.loads(tc.function.arguments or "{}")
503
+ except json.JSONDecodeError:
504
+ args = {}
505
+ result_text = await _call_mcp_tool(tool_map, tc.function.name, args, wi_id, artefacts)
506
+ messages.append({
507
+ "role": "tool",
508
+ "tool_call_id": tc.id,
509
+ "content": result_text,
510
+ })
511
+
512
+ _flush(MAX_ITERATIONS - 1)
513
+ return False, f"Max iterations ({MAX_ITERATIONS}) reached"
514
+
515
+
516
+ # ──────────────────────────────────────────────────────────────────────────────
517
+ # Public entry point
518
+ # ──────────────────────────────────────────────────────────────────────────────
519
+
520
+ async def run_agent_loop(
521
+ mcp_config: dict,
522
+ prompt: str,
523
+ llm_provider: str,
524
+ llm_model: str,
525
+ llm_api_key: str,
526
+ llm_base_url: str,
527
+ env: dict[str, str],
528
+ work_item_id: str,
529
+ ) -> tuple[bool, str]:
530
+ """
531
+ Drive the STLC agent pipeline using the configured LLM provider.
532
+ Returns (success, error_message).
533
+ """
534
+ try:
535
+ client, model, family = _resolve_provider(llm_provider, llm_model, llm_api_key, llm_base_url)
536
+ except RuntimeError as exc:
537
+ return False, str(exc)
538
+
539
+ logger.info(
540
+ "agent_runner: wi=%s provider=%s model=%s",
541
+ work_item_id, llm_provider or "anthropic", model,
542
+ )
543
+
544
+ t0 = time.time()
545
+ async with AsyncExitStack() as stack:
546
+ # Share session ID with MCP subprocesses so all costs land in one JSONL.
547
+ # Suppress per-subprocess atexit summaries; parent prints the unified table.
548
+ merged_env = {**env}
549
+ if "STLC_SESSION_ID" not in merged_env:
550
+ merged_env["STLC_SESSION_ID"] = get_session_id()
551
+ merged_env["STLC_COST_SUMMARY"] = "suppress"
552
+
553
+ tool_map = await _start_servers(mcp_config, merged_env, stack)
554
+ if not tool_map:
555
+ return False, "No MCP tools available — all servers failed to start"
556
+
557
+ logger.info(
558
+ "agent_runner: wi=%s tools=[%s]",
559
+ work_item_id, ", ".join(tool_map.keys()),
560
+ )
561
+
562
+ artefacts: list[dict] = []
563
+ if family == "anthropic":
564
+ success, error = await _run_anthropic(
565
+ client, model, prompt, tool_map, work_item_id, artefacts
566
+ )
567
+ else:
568
+ success, error = await _run_openai(
569
+ client, model, prompt, tool_map, work_item_id, artefacts
570
+ )
571
+
572
+ # AsyncExitStack has closed all MCP servers; all JSONL records are written.
573
+ print_pipeline_summary(
574
+ get_session_id(), str(work_item_id), time.time() - t0, model,
575
+ artefacts or None,
576
+ )
577
+ return success, error
578
+
579
+
580
+ # ──────────────────────────────────────────────────────────────────────────────
581
+ # Helpers
582
+ # ──────────────────────────────────────────────────────────────────────────────
583
+
584
+ def _abbrev(d: dict, limit: int = 120) -> str:
585
+ s = json.dumps(d, default=str)
586
+ return s[:limit] + "…" if len(s) > limit else s
587
+
588
+
589
+ def _extract_text(result: Any) -> str:
590
+ """Extract a plain-text representation from an MCP CallToolResult."""
591
+ if hasattr(result, "content"):
592
+ parts = []
593
+ for block in result.content:
594
+ if hasattr(block, "text"):
595
+ parts.append(block.text)
596
+ else:
597
+ parts.append(json.dumps(block.model_dump() if hasattr(block, "model_dump") else str(block)))
598
+ return "\n".join(parts)
599
+ return str(result)
@@ -0,0 +1,43 @@
1
+ """
2
+ Entry point for the qa-stlc-webhook CLI command.
3
+
4
+ Usage:
5
+ qa-stlc-webhook [--port PORT] [--host HOST]
6
+
7
+ Or via uvicorn directly:
8
+ uvicorn stlc_agents.webhook_orchestrator.webhook_bridge:app --port 8080
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import sys
14
+
15
+
16
+ def main() -> None:
17
+ parser = argparse.ArgumentParser(description="STLC Agents Webhook Bridge")
18
+ parser.add_argument("--port", type=int, default=8080, help="Port to listen on (default: 8080)")
19
+ parser.add_argument("--host", default="0.0.0.0", help="Host to bind (default: 0.0.0.0)")
20
+ parser.add_argument("--reload", action="store_true", help="Enable auto-reload (dev mode)")
21
+ args = parser.parse_args()
22
+
23
+ try:
24
+ import uvicorn
25
+ except ImportError:
26
+ print(
27
+ "ERROR: uvicorn is not installed. "
28
+ "Install the webhook extra: pip install 'qa-stlc-agents[webhook]'",
29
+ file=sys.stderr,
30
+ )
31
+ sys.exit(1)
32
+
33
+ uvicorn.run(
34
+ "stlc_agents.webhook_orchestrator.webhook_bridge:app",
35
+ host=args.host,
36
+ port=args.port,
37
+ reload=args.reload,
38
+ log_level="info",
39
+ )
40
+
41
+
42
+ if __name__ == "__main__":
43
+ main()