@qa-gentic/stlc-agents 1.0.17 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ORCHESTRATION_RULES.md +283 -0
- package/README.md +250 -57
- package/bin/postinstall.js +9 -1
- package/package.json +15 -2
- package/src/cli/cmd-init.js +19 -2
- package/src/cli/cmd-mcp-config.js +10 -14
- package/src/cli/cmd-skills.js +21 -4
- package/src/stlc_agents/shared/install_hook.py +154 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
# Orchestration Rules — Multi-Step Pipeline Agents
|
|
2
|
+
|
|
3
|
+
> **Universal rules file for coding agents (Claude, Copilot, Cursor, Windsurf, Gemini, etc.)**
|
|
4
|
+
> Place this file in your project root or `.ai/` folder. Reference it in your prompt with:
|
|
5
|
+
> `"Apply all rules from ORCHESTRATION_RULES.md before executing any step."`
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## 1. Core Principle
|
|
10
|
+
|
|
11
|
+
Every intermediate output is an **input contract** for the next step — not a done state.
|
|
12
|
+
A step is only complete when its output has been validated against its spec and confirmed
|
|
13
|
+
fit for downstream consumption. Never proceed on a "good enough" assumption.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## 2. Mandatory Behaviours
|
|
18
|
+
|
|
19
|
+
### 2.1 Explicit Task Breakdown
|
|
20
|
+
|
|
21
|
+
- Before executing, decompose the full task into named steps using a todo/task list tool
|
|
22
|
+
(e.g. `manage_todo_list`, GitHub Copilot Tasks, Cursor task panel).
|
|
23
|
+
- Each step must have a declared **input**, **action**, and **output spec**.
|
|
24
|
+
- Mark steps as `[ ] pending`, `[~] in-progress`, `[x] done`, `[!] blocked` — update in real time.
|
|
25
|
+
- Never treat a step as in-progress and done simultaneously.
|
|
26
|
+
|
|
27
|
+
### 2.2 No Skipping Intermediate Steps
|
|
28
|
+
|
|
29
|
+
- If a step produces data that the next step consumes, that data must be:
|
|
30
|
+
1. **Extracted** from raw output (not left embedded)
|
|
31
|
+
2. **Structured** into the agreed schema
|
|
32
|
+
3. **Validated** against the checkpoint gate
|
|
33
|
+
4. **Explicitly handed off** as a named artefact
|
|
34
|
+
- Do NOT jump ahead assuming the downstream step can infer missing data.
|
|
35
|
+
- Do NOT proceed if a required input is absent or malformed.
|
|
36
|
+
|
|
37
|
+
### 2.3 Checkpoint Gates Are Blocking — Pre-Flight Required
|
|
38
|
+
|
|
39
|
+
Gates are not a post-generation reflection. They run **before** output is produced.
|
|
40
|
+
|
|
41
|
+
**Before generating output for any step, you MUST:**
|
|
42
|
+
1. Output the pre-flight checklist with your intended answers filled in.
|
|
43
|
+
2. Only if all items are YES — proceed to generate the output.
|
|
44
|
+
3. If any item is NO — stop, state what is missing, and wait for the user.
|
|
45
|
+
|
|
46
|
+
This is not optional. Generating output first and checking after is a rule violation.
|
|
47
|
+
|
|
48
|
+
Pre-flight format (required before every step):
|
|
49
|
+
```
|
|
50
|
+
PRE-FLIGHT: Step [N] — [Step Name]
|
|
51
|
+
[ ] Input artefact "[name]" received from Step [N-1]?
|
|
52
|
+
[ ] Input matches expected schema?
|
|
53
|
+
[ ] [Step-specific countable check, e.g. "11 scenarios in scenario_inventory?"]
|
|
54
|
+
[ ] [Any tool or selector availability check]
|
|
55
|
+
→ PROCEED / → BLOCKED: [state what is missing]
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### 2.4 Data Handoff Must Be Explicit
|
|
59
|
+
|
|
60
|
+
- Output data in full — not summarised, not truncated.
|
|
61
|
+
- Use a consistent schema (JSON, YAML, or named list) — do not change shape between steps.
|
|
62
|
+
- Name the artefact (e.g. `context_map`, `test_case_list`, `scenario_inventory`).
|
|
63
|
+
- The receiving step must reference the artefact by name, not re-derive it.
|
|
64
|
+
|
|
65
|
+
### 2.5 No Placeholder or Stub Outputs
|
|
66
|
+
|
|
67
|
+
This rule applies at generation time, not reflection time. The agent must not produce
|
|
68
|
+
stubs and then acknowledge the violation — it must prevent them before generating.
|
|
69
|
+
|
|
70
|
+
- Never produce output containing `TODO`, `placeholder`, `// implement later`,
|
|
71
|
+
`throw new Error('pending')`, or any empty method/step body.
|
|
72
|
+
- Before generating a file, state the **expected item count** (e.g. number of step
|
|
73
|
+
definitions, number of test cases). The generated file must match that count exactly.
|
|
74
|
+
- If a step cannot produce a complete output, declare it `[!] blocked` and stop.
|
|
75
|
+
- Partial outputs passed downstream cause compounding failures and wasted tokens.
|
|
76
|
+
|
|
77
|
+
**Countable verification pattern (required for code generation steps):**
|
|
78
|
+
```
|
|
79
|
+
Expected: [N] step definitions (from scenario_inventory)
|
|
80
|
+
Generating: [N] step definitions
|
|
81
|
+
Verify after: count implemented bodies — must equal [N], zero empty
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### 2.6 Query-Driven Data Capture (Snapshot / Scraping Steps)
|
|
85
|
+
|
|
86
|
+
- Navigation is NOT the deliverable — **structured data extraction** is.
|
|
87
|
+
- For every screen or page visited, immediately extract all required fields before moving on.
|
|
88
|
+
- Do not defer extraction to a later step.
|
|
89
|
+
- Capture only what downstream steps need (defined by the step's output spec).
|
|
90
|
+
- Validate coverage: every field required by downstream must be present in the captured data.
|
|
91
|
+
|
|
92
|
+
### 2.7 Split Generation Steps to Prevent Silent Stubs
|
|
93
|
+
|
|
94
|
+
For any step that generates code or structured output consumed by a subsequent step,
|
|
95
|
+
split it into two sub-steps:
|
|
96
|
+
|
|
97
|
+
- **[N]a — Signatures only:** generate method/step signatures (names, parameters) with no bodies.
|
|
98
|
+
Output as an inventory list. This makes the expected count explicit and visible.
|
|
99
|
+
- **[N]b — Implement each signature:** implement every item from the [N]a inventory.
|
|
100
|
+
No body may be left empty. Reference `context_map` or equivalent for all selectors/data.
|
|
101
|
+
|
|
102
|
+
This forces a visible count before implementation begins, eliminating silent stub generation.
|
|
103
|
+
|
|
104
|
+
### 2.8 Token Efficiency
|
|
105
|
+
|
|
106
|
+
- Avoid re-deriving data already produced in a prior step.
|
|
107
|
+
- Reference prior artefacts by name; do not re-fetch or re-generate unless a gate failed.
|
|
108
|
+
- If rework is needed, state which gate failed, what was missing, and what the corrected output is.
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## 3. Error Handling
|
|
113
|
+
|
|
114
|
+
| Situation | Required Action |
|
|
115
|
+
|---|---|
|
|
116
|
+
| Gate fails | STOP. Report failed items. Wait for user input or resolution. |
|
|
117
|
+
| Required input missing | STOP. Name the missing input. Do not guess. |
|
|
118
|
+
| Tool call returns empty | STOP. Report. Do not silently continue. |
|
|
119
|
+
| Partial output produced | Mark step `[!] blocked`. Do not pass partial output downstream. |
|
|
120
|
+
| Schema mismatch | STOP. Show expected vs actual schema. Do not transform silently. |
|
|
121
|
+
| Ambiguous instruction | Ask one clarifying question before proceeding. Do not assume. |
|
|
122
|
+
| Stub/TODO found in output | STOP. Do not accept the output. Regenerate from signatures. |
|
|
123
|
+
| Count mismatch (generated vs expected) | STOP. List which items are missing. Do not proceed. |
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## 4. Checkpoint Gate Template
|
|
128
|
+
|
|
129
|
+
Run this **before** generating output — not after.
|
|
130
|
+
|
|
131
|
+
```
|
|
132
|
+
CHECKPOINT GATE [N] — [Step Name]
|
|
133
|
+
---------------------------------------
|
|
134
|
+
PRE-FLIGHT (run before generating):
|
|
135
|
+
[ ] Input artefact received and named
|
|
136
|
+
[ ] Input matches expected schema
|
|
137
|
+
[ ] Expected output count stated: [N items]
|
|
138
|
+
[ ] All required tools/selectors available
|
|
139
|
+
|
|
140
|
+
POST-GENERATION (run before handing off):
|
|
141
|
+
[ ] Actual output count matches expected: [N of N]
|
|
142
|
+
[ ] No stubs, TODOs, or empty bodies in output
|
|
143
|
+
[ ] All items from upstream list are accounted for
|
|
144
|
+
[ ] Output is in agreed schema / format
|
|
145
|
+
|
|
146
|
+
RESULT: [ ] PASS — hand off artefact to Step [N+1]
|
|
147
|
+
[ ] FAIL — stop and report: [list what failed]
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## 5. Step Definition Template
|
|
153
|
+
|
|
154
|
+
```
|
|
155
|
+
STEP [N] — [Name]
|
|
156
|
+
---------------------------------------
|
|
157
|
+
Tool / Agent: [name of tool, MCP server, or agent]
|
|
158
|
+
|
|
159
|
+
Input (required):
|
|
160
|
+
- [Named artefact from Step N-1]
|
|
161
|
+
- [Any other required input]
|
|
162
|
+
|
|
163
|
+
Pre-flight check:
|
|
164
|
+
- State expected output count before generating
|
|
165
|
+
- Confirm all inputs available and schema-valid
|
|
166
|
+
|
|
167
|
+
Action:
|
|
168
|
+
[Precise description — not vague verbs like "process" or "handle"]
|
|
169
|
+
If code generation: split into [N]a (signatures) and [N]b (implementations)
|
|
170
|
+
|
|
171
|
+
Output spec (the contract):
|
|
172
|
+
- Artefact name: [e.g. context_map]
|
|
173
|
+
- Format: [JSON | YAML | list | file | etc.]
|
|
174
|
+
- Required fields: [enumerate them]
|
|
175
|
+
- Coverage requirement: [e.g. one implementation per Gherkin step]
|
|
176
|
+
|
|
177
|
+
Checkpoint Gate: → run Gate [N] template above
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## 6. Anti-Patterns (Never Do These)
|
|
183
|
+
|
|
184
|
+
| Anti-Pattern | Why It Fails | Correct Behaviour |
|
|
185
|
+
|---|---|---|
|
|
186
|
+
| Generating output then checking the gate | Gate runs after stubs already exist — violation acknowledged but not prevented | Run pre-flight checklist before generating |
|
|
187
|
+
| Treating gate as a reflection step | Agent notices violation after the fact; output is already committed | Gate is a pre-condition, not a review |
|
|
188
|
+
| Skipping data extraction after capture | Downstream step receives raw/unstructured input and must infer | Extract and structure data immediately after capture |
|
|
189
|
+
| Jumping to generation without verified inputs | Output based on inference, not facts — stubs and errors result | Validate inputs at gate before calling the generator |
|
|
190
|
+
| Treating "good enough" output as done | Errors compound; rework costs more tokens than doing it right | Validate against spec before marking a step complete |
|
|
191
|
+
| Producing stubs with TODO | Downstream steps receive incomplete contracts and silently fail | Block the step; declare it incomplete; stop |
|
|
192
|
+
| Re-deriving upstream data in a downstream step | Wasted tokens; divergence risk if re-derivation differs | Reference the named artefact from the prior step |
|
|
193
|
+
| Proceeding past a failed gate | Snowballing failures requiring full rework | Stop at the gate; surface the gap; wait for resolution |
|
|
194
|
+
| Single atomic generation step for code | No visible count before generation — stubs go undetected | Split into signatures ([N]a) then implementations ([N]b) |
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
## 7. Orchestration Health Checks
|
|
199
|
+
|
|
200
|
+
Run at the start of any multi-step task:
|
|
201
|
+
|
|
202
|
+
- [ ] Are all steps named and sequenced in the task list?
|
|
203
|
+
- [ ] Does each step have a declared input and output spec?
|
|
204
|
+
- [ ] Does each step have a defined pre-flight and post-generation gate?
|
|
205
|
+
- [ ] Are code generation steps split into signatures + implementations?
|
|
206
|
+
- [ ] Are all required tools / MCP servers available?
|
|
207
|
+
- [ ] Are named artefacts from prior steps available as inputs?
|
|
208
|
+
|
|
209
|
+
If any health check fails before execution begins, resolve it first.
|
|
210
|
+
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
## 8. Agent-Specific Integration Notes
|
|
214
|
+
|
|
215
|
+
### Claude (claude.ai / API)
|
|
216
|
+
- Reference this file in your system prompt or project instructions.
|
|
217
|
+
- Use `manage_todo_list` for step tracking.
|
|
218
|
+
- Attach this file as a project document so it persists across sessions.
|
|
219
|
+
- Pre-flight checklists work reliably as Claude outputs reasoning before tool calls.
|
|
220
|
+
|
|
221
|
+
### GitHub Copilot (VS Code / JetBrains)
|
|
222
|
+
- Add to `.github/copilot-instructions.md` or reference in your workspace prompt.
|
|
223
|
+
- **Critical:** Copilot treats rules as advisory context — gates are not enforced at runtime.
|
|
224
|
+
Mitigate by scoping rules to file types (e.g. `When generating *.steps.ts files, you must...`).
|
|
225
|
+
- Always include the pre-flight checklist directly in your chat message for the current step,
|
|
226
|
+
not just in the rules file. Copilot applies in-message instructions more reliably than
|
|
227
|
+
file-level rules for generation constraints.
|
|
228
|
+
- Use the countable verification pattern (section 2.5) explicitly in each chat prompt:
|
|
229
|
+
"There are 11 scenarios. Generate exactly 11 step definitions. State the count before writing."
|
|
230
|
+
- For step definition files: add a file-type-scoped rule to `.github/copilot-instructions.md`:
|
|
231
|
+
```
|
|
232
|
+
When generating Playwright step definition files (*.steps.ts):
|
|
233
|
+
1. Count Given/When/Then steps in the linked .feature file and state the count first.
|
|
234
|
+
2. Every step body must contain real implementation — no TODO, no throw pending, no empty bodies.
|
|
235
|
+
3. If a selector is missing from context_map, name the missing step and stop. Do not stub it.
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### Cursor
|
|
239
|
+
- Place in `.cursor/rules/` as `orchestration.mdc` (set scope: `always`).
|
|
240
|
+
- Or add to `.cursorrules` in the project root.
|
|
241
|
+
- Cursor applies project-level rules more consistently than Copilot for generation steps.
|
|
242
|
+
- Use `@file` references in chat to explicitly pull the rules into context per step.
|
|
243
|
+
|
|
244
|
+
### Windsurf (Codeium)
|
|
245
|
+
- Place in `.windsurf/rules.md` or reference in the global rules panel.
|
|
246
|
+
- Windsurf's Cascade agent picks up project-level markdown rules automatically.
|
|
247
|
+
|
|
248
|
+
### Gemini CLI / Vertex AI Agent Builder
|
|
249
|
+
- Reference via system instruction or as a grounding document.
|
|
250
|
+
- Use the Step Definition Template when constructing task configs.
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
## 9. Quick Reference Card
|
|
255
|
+
|
|
256
|
+
```
|
|
257
|
+
BEFORE EACH STEP:
|
|
258
|
+
1. Output the pre-flight checklist — fill in all items.
|
|
259
|
+
2. If all YES → state expected output count → generate.
|
|
260
|
+
3. If any NO → stop and report.
|
|
261
|
+
|
|
262
|
+
AFTER EACH STEP:
|
|
263
|
+
1. Run post-generation gate — count actual vs expected.
|
|
264
|
+
2. If PASS → hand off named artefact to next step.
|
|
265
|
+
3. If FAIL → stop, report, regenerate.
|
|
266
|
+
|
|
267
|
+
FOR CODE GENERATION:
|
|
268
|
+
1. Generate signatures/names only first ([N]a).
|
|
269
|
+
2. State the count from [N]a.
|
|
270
|
+
3. Implement every item ([N]b) — zero empty bodies allowed.
|
|
271
|
+
|
|
272
|
+
NEVER:
|
|
273
|
+
- Generate output then check the gate.
|
|
274
|
+
- Proceed past a failed gate.
|
|
275
|
+
- Pass unstructured or partial data downstream.
|
|
276
|
+
- Produce stubs and acknowledge them — prevent them.
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
---
|
|
280
|
+
|
|
281
|
+
*Version 1.1 — Updated to add pre-flight gate enforcement, countable verification,
|
|
282
|
+
split generation step pattern, and Copilot-specific stub prevention guidance.
|
|
283
|
+
Root cause addressed: gates were post-generation reflections, not pre-generation blockers.*
|
package/README.md
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# @qa-gentic/stlc-agents
|
|
2
2
|
|
|
3
|
-
> AI-powered QA STLC automation — from Azure DevOps **or Jira Cloud** work item to self-healing Playwright TypeScript in a Helix-QA project.
|
|
3
|
+
> AI-powered QA STLC automation — from Azure DevOps **or Jira Cloud** work item state change to self-healing Playwright TypeScript in a Helix-QA project.
|
|
4
4
|
|
|
5
|
-
Works with **GitHub Copilot** (VS Code Agent mode), **Claude Code**, **Cursor**, and **Windsurf**.
|
|
5
|
+
Works with **GitHub Copilot** (VS Code Agent mode), **Claude Code**, **Cursor**, and **Windsurf**.
|
|
6
|
+
Also runs fully headless via the **webhook bridge** — no human in the loop required.
|
|
6
7
|
|
|
7
|
-

|
|
9
|
+

|
|
9
10
|
[](LICENSE)
|
|
10
11
|
[](https://nodejs.org)
|
|
11
12
|
[](https://python.org)
|
|
@@ -14,19 +15,103 @@ Works with **GitHub Copilot** (VS Code Agent mode), **Claude Code**, **Cursor**,
|
|
|
14
15
|
|
|
15
16
|
## What It Does
|
|
16
17
|
|
|
17
|
-
Five Python MCP servers cover the full QA Software Test Life Cycle
|
|
18
|
+
Five Python MCP servers cover the full QA Software Test Life Cycle. A sixth server (Playwright MCP) drives a real browser during code generation.
|
|
18
19
|
|
|
19
|
-
| Agent | Input | Output |
|
|
20
|
-
|
|
21
|
-
| `qa-test-case-manager` | ADO PBI / Bug / Feature
|
|
22
|
-
| `qa-gherkin-generator` | ADO
|
|
23
|
-
| `qa-playwright-generator` | Gherkin +
|
|
24
|
-
| `qa-helix-writer` | Generated `.ts` files + `helix_root` | Files written to Helix-QA directory layout
|
|
25
|
-
| `qa-jira-manager` | Jira Story / Bug / Task
|
|
20
|
+
| Agent | Server name | Input | Output |
|
|
21
|
+
|---|---|---|---|
|
|
22
|
+
| Agent 1 | `qa-test-case-manager` | ADO PBI / Bug / Feature | Manual test cases created & linked (TestedBy-Forward), deduped on re-trigger |
|
|
23
|
+
| Agent 2 | `qa-gherkin-generator` | ADO Feature / PBI / Bug | `.feature` file validated and attached to work item |
|
|
24
|
+
| Agent 3 | `qa-playwright-generator` | Gherkin + optional AX-tree `context_map` | `locators.ts` + page objects + step defs (cached, retrieved via `get_generated_files`) |
|
|
25
|
+
| Agent 4 | `qa-helix-writer` | Generated `.ts` files + `helix_root` | Files written to Helix-QA directory layout, never overwrites |
|
|
26
|
+
| Agent 5 | `qa-jira-manager` | Jira Story / Bug / Task | Test cases created & linked in Jira, `.feature` attached to issue, deduped on re-trigger |
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## End-to-End Flow
|
|
31
|
+
|
|
32
|
+
### Webhook-triggered (headless)
|
|
33
|
+
|
|
34
|
+
A work item state change in ADO or Jira fires a webhook POST to the bridge. The bridge normalises the payload and routes it through the pipeline automatically.
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
ADO Service Hook / Jira Webhook
|
|
38
|
+
│ POST
|
|
39
|
+
▼
|
|
40
|
+
webhook_bridge/server.py (FastAPI — qa-stlc-serve)
|
|
41
|
+
│
|
|
42
|
+
├─ parsers.py raw payload → normalised event dict
|
|
43
|
+
├─ state_router.py event → STAGE_TEST_CASES | STAGE_FULL_PIPELINE | SKIP
|
|
44
|
+
│
|
|
45
|
+
└─ ci_runner/pipeline.py
|
|
46
|
+
├─ run_test_cases() → Agent 1 (ADO) or Agent 5 (Jira)
|
|
47
|
+
└─ run_post_done() → Agent 2 → Agent 3 → Agent 4
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### State → stage mapping
|
|
26
51
|
|
|
27
|
-
|
|
52
|
+
| Platform | Work item state | Stage | Agents called |
|
|
53
|
+
|---|---|---|---|
|
|
54
|
+
| ADO | `Approved` / `Committed` | `STAGE_TEST_CASES` | Agent 1 only |
|
|
55
|
+
| ADO | `Done` | `STAGE_FULL_PIPELINE` | Agents 2 → 3 → 4 |
|
|
56
|
+
| Jira | `In Progress` / `Selected for Development` | `STAGE_TEST_CASES` | Agent 5 only |
|
|
57
|
+
| Jira | `Done` | `STAGE_FULL_PIPELINE` | Agents 2 → 3 → 4 |
|
|
28
58
|
|
|
29
|
-
|
|
59
|
+
Any other state is silently dropped. State names are configurable in `state_router.py`.
|
|
60
|
+
|
|
61
|
+
### STAGE_TEST_CASES — Agent 1 / Agent 5
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
fetch_work_item / fetch_jira_issue
|
|
65
|
+
↓
|
|
66
|
+
[LLM] generate_test_cases() ← pipeline bridges fetch → LLM → create
|
|
67
|
+
↓
|
|
68
|
+
create_deduped_test_cases() ← internally: get_linked → filter dupes → create_and_link
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
On re-trigger, titles already linked are skipped (case-insensitive, stop-word normalised). Only net-new test cases are created.
|
|
72
|
+
|
|
73
|
+
### STAGE_FULL_PIPELINE — Agents 2 → 3 → 4
|
|
74
|
+
|
|
75
|
+
First error stops the chain and surfaces in the result dict. Subsequent agents are not called.
|
|
76
|
+
|
|
77
|
+
**Agent 2 — Gherkin**
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
ADO Feature : fetch_feature_hierarchy → [LLM] generate_gherkin
|
|
81
|
+
ADO PBI/Bug : fetch_work_item_for_gherkin → [LLM] generate_gherkin
|
|
82
|
+
Jira : fetch_jira_issue (via Agent 5) → [LLM] generate_gherkin
|
|
83
|
+
↓
|
|
84
|
+
validate_gherkin_content() ← structural check before attach
|
|
85
|
+
↓ (if invalid → pipeline stops, returns validation errors)
|
|
86
|
+
ADO Feature : attach_gherkin_to_feature()
|
|
87
|
+
ADO PBI/Bug : attach_gherkin_to_work_item()
|
|
88
|
+
Jira : attach_gherkin_to_issue() ← uploads via Jira attachment API
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The `generate_and_attach_gherkin` composite tool wraps validate + attach for CI/headless callers.
|
|
92
|
+
|
|
93
|
+
**Agent 3 — Playwright**
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
generate_playwright_code(gherkin_content, page_class_name)
|
|
97
|
+
↓ returns cache_key (files held in-memory)
|
|
98
|
+
get_generated_files(cache_key)
|
|
99
|
+
↓ returns { "path/to/file.ts": "content", ... }
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
`page_class_name` is derived from `event["title"]` (ADO) or `event["summary"]` (Jira) — camel-cased, max 4 words. Without a `context_map`, locators are Gherkin-inferred (stability=0). Pass `app_url` to embed a snapshot hint comment in `locators.ts`.
|
|
103
|
+
|
|
104
|
+
**Agent 4 — Helix writer**
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
inspect_helix_project(helix_root)
|
|
108
|
+
↓ framework_state: absent | partial | present
|
|
109
|
+
write_helix_files(helix_root, files, mode)
|
|
110
|
+
mode = "scaffold_and_tests" if absent or partial
|
|
111
|
+
mode = "tests_only" if present
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Agent 4 handles all deduplication and conflict renaming internally. No file is ever overwritten.
|
|
30
115
|
|
|
31
116
|
---
|
|
32
117
|
|
|
@@ -34,11 +119,9 @@ A sixth server — **Playwright MCP** (`http://localhost:8931/mcp`) — drives a
|
|
|
34
119
|
|
|
35
120
|
```bash
|
|
36
121
|
# 1. Install the CLI + npm package globally
|
|
37
|
-
#
|
|
122
|
+
# Prompted to choose integration: ado / jira / both
|
|
38
123
|
npm install -g @qa-gentic/stlc-agents
|
|
39
|
-
```
|
|
40
124
|
|
|
41
|
-
```bash
|
|
42
125
|
# 2. Bootstrap your project
|
|
43
126
|
qa-stlc init --vscode --integration ado # GitHub Copilot / VS Code — ADO
|
|
44
127
|
qa-stlc init --vscode --integration jira # GitHub Copilot / VS Code — Jira
|
|
@@ -49,16 +132,30 @@ qa-stlc init --integration ado
|
|
|
49
132
|
# 3. Scaffold a new Playwright + Cucumber + TypeScript QA project
|
|
50
133
|
qa-stlc scaffold --name my-qa-project
|
|
51
134
|
|
|
52
|
-
# 4. Start the Playwright browser server (required for
|
|
135
|
+
# 4. Start the Playwright browser server (required for live-locator generation)
|
|
53
136
|
npx @playwright/mcp@latest --port 8931
|
|
54
137
|
```
|
|
55
138
|
|
|
56
|
-
|
|
139
|
+
### Cost Tracking Activation
|
|
140
|
+
|
|
141
|
+
**npm install** (`npm install -g @qa-gentic/stlc-agents`): Cost tracking is activated automatically after the Python servers are installed. No manual step required.
|
|
142
|
+
|
|
143
|
+
**pip install** (`pip install qa-gentic-stlc-agents`): Cost tracking is **not** activated automatically. You must run one of the following after pip install:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
qa-stlc-apply-cost
|
|
147
|
+
# or
|
|
148
|
+
python -m stlc_agents.shared.install_hook
|
|
149
|
+
```
|
|
150
|
+
This patches all MCP servers to log tokens and cost for every tool call.
|
|
151
|
+
|
|
152
|
+
`qa-stlc init` does five things:
|
|
57
153
|
|
|
58
|
-
1. `pip install qa-gentic-stlc-agents` — installs all five Python MCP servers
|
|
154
|
+
1. `pip install qa-gentic-stlc-agents` — installs all five Python MCP servers + rules files
|
|
59
155
|
2. Copies skill files to `.github/copilot-instructions/` (and `.claude/` if not `--vscode`)
|
|
60
156
|
3. Copies custom agent files to `.github/agents/`
|
|
61
|
-
4.
|
|
157
|
+
4. Copies `ORCHESTRATION_RULES.md` to project root for reference during multi-step tasks
|
|
158
|
+
5. Writes `.vscode/mcp.json` (or `.mcp.json`) with all six servers configured
|
|
62
159
|
|
|
63
160
|
---
|
|
64
161
|
|
|
@@ -78,12 +175,15 @@ npx @playwright/mcp@latest --port 8931
|
|
|
78
175
|
ADO_ORGANIZATION_URL=https://dev.azure.com/your-org
|
|
79
176
|
ADO_PROJECT_NAME=YourProject
|
|
80
177
|
ADO_PAT=your-personal-access-token
|
|
81
|
-
APP_BASE_URL=your-app
|
|
82
|
-
|
|
83
|
-
|
|
178
|
+
APP_BASE_URL=https://your-app.example.com
|
|
179
|
+
|
|
180
|
+
# LLM — pick one provider:
|
|
181
|
+
AI_HEALING_PROVIDER=anthropic
|
|
182
|
+
AI_HEALING_API_KEY=sk-ant-...
|
|
183
|
+
# or: OPENAI_API_KEY / AZURE_OPENAI_API_KEY / GITHUB_TOKEN (Copilot)
|
|
84
184
|
```
|
|
85
185
|
|
|
86
|
-
**Jira `.env` vars
|
|
186
|
+
**Jira additional `.env` vars:**
|
|
87
187
|
|
|
88
188
|
```env
|
|
89
189
|
JIRA_CLIENT_ID=your-atlassian-oauth-client-id
|
|
@@ -91,6 +191,14 @@ JIRA_CLIENT_SECRET=your-atlassian-oauth-client-secret
|
|
|
91
191
|
JIRA_CLOUD_ID=your-atlassian-cloud-id
|
|
92
192
|
```
|
|
93
193
|
|
|
194
|
+
**Webhook bridge additional `.env` vars:**
|
|
195
|
+
|
|
196
|
+
```env
|
|
197
|
+
WEBHOOK_SECRET=your-shared-secret
|
|
198
|
+
HELIX_PROJECT_ROOT=/path/to/helix-qa # where Agent 4 writes files
|
|
199
|
+
PLAYWRIGHT_MCP_URL=http://localhost:8931/mcp # leave blank to skip Agent 3
|
|
200
|
+
```
|
|
201
|
+
|
|
94
202
|
---
|
|
95
203
|
|
|
96
204
|
## CLI Commands
|
|
@@ -101,56 +209,83 @@ JIRA_CLOUD_ID=your-atlassian-cloud-id
|
|
|
101
209
|
| `qa-stlc scaffold [--name n] [--dir path]` | Copy full Playwright + Cucumber + TypeScript boilerplate to a new project |
|
|
102
210
|
| `qa-stlc skills [--target claude\|vscode\|cursor\|windsurf]` | Copy skill files to the correct AI coding agent directory |
|
|
103
211
|
| `qa-stlc mcp-config [--vscode] [--print]` | Write `.vscode/mcp.json` or `.mcp.json` with all servers configured |
|
|
104
|
-
| `qa-stlc verify` | Check that all
|
|
212
|
+
| `qa-stlc verify` | Check that all MCP servers are reachable and auth is cached |
|
|
213
|
+
| `qa-stlc-serve [--host] [--port] [--reload]` | Start the webhook bridge (FastAPI) |
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
## Orchestration Rules
|
|
218
|
+
|
|
219
|
+
When `qa-stlc init` is run, `ORCHESTRATION_RULES.md` is installed to your project root and placed in both npm (`node_modules/@qa-gentic/stlc-agents/`) and pip (`site-packages/stlc_agents/`) installations.
|
|
220
|
+
|
|
221
|
+
Refer to this file in multi-step QA workflows to ensure:
|
|
222
|
+
- **Step breakdown:** every task is decomposed into named steps with explicit inputs/outputs
|
|
223
|
+
- **Pre-flight gates:** output validation runs *before* generation, not after
|
|
224
|
+
- **No skipped steps:** intermediate data is structured and handed off explicitly, never inferred
|
|
225
|
+
- **Countable verification:** code generation steps state expected item counts before implementation
|
|
226
|
+
- **Zero stubs:** no partial outputs with TODOs or empty bodies passed downstream
|
|
227
|
+
|
|
228
|
+
Integration per AI coding agent:
|
|
229
|
+
|
|
230
|
+
- **GitHub Copilot (VS Code):** Add to `.github/copilot-instructions.md` or reference in your workspace prompt
|
|
231
|
+
- **Claude Code:** Reference in `.claude/instructions.md` or as a project document
|
|
232
|
+
- **Cursor:** Add to `.cursor/rules/orchestration.mdc` (scope: always)
|
|
233
|
+
- **Windsurf:** Reference in `.windsurf/rules.md` or global rules panel
|
|
234
|
+
|
|
235
|
+
See section 8 of `ORCHESTRATION_RULES.md` for agent-specific integration notes.
|
|
105
236
|
|
|
106
237
|
---
|
|
107
238
|
|
|
108
239
|
## Tool Reference
|
|
109
240
|
|
|
110
|
-
### qa-test-case-manager _(Azure DevOps)_
|
|
241
|
+
### Agent 1 — `qa-test-case-manager` _(Azure DevOps)_
|
|
111
242
|
|
|
112
243
|
| Tool | Description |
|
|
113
244
|
|---|---|
|
|
114
|
-
| `fetch_work_item` | Fetch a PBI, Bug, or Feature with acceptance criteria, coverage hints, and existing TC count. Returns `epic_not_supported` for Epics
|
|
115
|
-
| `get_linked_test_cases` | List all test cases
|
|
116
|
-
| `create_and_link_test_cases` | Create structured manual test cases
|
|
245
|
+
| `fetch_work_item` | Fetch a PBI, Bug, or Feature with acceptance criteria, coverage hints, and existing TC count. Returns `epic_not_supported` for Epics. |
|
|
246
|
+
| `get_linked_test_cases` | List all test cases linked via TestedBy-Forward (used by dedup). |
|
|
247
|
+
| `create_and_link_test_cases` | Create structured manual test cases and link to work item. Feature confirmation gate fires unless `confirmed=true`. |
|
|
248
|
+
| `create_deduped_test_cases` | **Headless/webhook tool.** Internally calls `get_linked_test_cases`, filters duplicates (normalised title match), then calls `create_and_link_test_cases` on net-new only. Safe to call on every re-trigger. |
|
|
117
249
|
|
|
118
|
-
### qa-gherkin-generator _(Azure DevOps)_
|
|
250
|
+
### Agent 2 — `qa-gherkin-generator` _(Azure DevOps)_
|
|
119
251
|
|
|
120
252
|
| Tool | Description |
|
|
121
253
|
|---|---|
|
|
122
|
-
| `fetch_feature_hierarchy` | Fetch a Feature
|
|
123
|
-
| `fetch_work_item_for_gherkin` | Fetch a PBI or Bug with parent Feature context
|
|
124
|
-
| `
|
|
125
|
-
| `
|
|
126
|
-
| `
|
|
254
|
+
| `fetch_feature_hierarchy` | Fetch a Feature + all child PBIs/Bugs + coverage hints. |
|
|
255
|
+
| `fetch_work_item_for_gherkin` | Fetch a single PBI or Bug with parent Feature context. |
|
|
256
|
+
| `validate_gherkin_content` | Structural check: @smoke/@regression tags, scenario count (5–10 feature / 3–9 work_item), every scenario has `When`, no duplicate titles. |
|
|
257
|
+
| `attach_gherkin_to_feature` | Validate + upload + link `.feature` to a Feature work item. |
|
|
258
|
+
| `attach_gherkin_to_work_item` | Validate + upload + link `.feature` to a PBI or Bug. |
|
|
259
|
+
| `generate_and_attach_gherkin` | **Headless/webhook composite.** Accepts pre-generated `gherkin_content`, validates, attaches. Returns `status: validation_failed` with errors if invalid — pipeline must re-generate. |
|
|
127
260
|
|
|
128
|
-
### qa-playwright-generator _(ADO + Jira)_
|
|
261
|
+
### Agent 3 — `qa-playwright-generator` _(ADO + Jira)_
|
|
129
262
|
|
|
130
263
|
| Tool | Description |
|
|
131
264
|
|---|---|
|
|
132
|
-
| `generate_playwright_code` | Generate `locators.ts`, `*Page.ts`, `*.steps.ts`,
|
|
133
|
-
| `
|
|
265
|
+
| `generate_playwright_code` | Generate `locators.ts`, `*Page.ts`, `*.steps.ts`, `cucumber-profile.js`. Returns `cache_key`. Optional `context_map` for AX-tree-verified locators; optional `app_url` embeds snapshot hint. |
|
|
266
|
+
| `get_generated_files` | Retrieve full file content by `cache_key` from Agent 3's in-memory cache. |
|
|
267
|
+
| `scaffold_locator_repository` | Generate the five Helix-QA healing infrastructure files (`LocatorHealer`, `TimingHealer`, `VisualIntentChecker`, `LocatorRepository`, `HealingDashboard`). Call once per project. |
|
|
134
268
|
| `validate_gherkin_steps` | Check for duplicate step strings and missing `When` steps. |
|
|
135
|
-
| `attach_code_to_work_item` | Attach
|
|
269
|
+
| `attach_code_to_work_item` | Attach generated TypeScript delta files to an ADO work item. |
|
|
136
270
|
|
|
137
|
-
### qa-helix-writer _(ADO + Jira)_
|
|
271
|
+
### Agent 4 — `qa-helix-writer` _(ADO + Jira)_
|
|
138
272
|
|
|
139
273
|
| Tool | Description |
|
|
140
274
|
|---|---|
|
|
141
|
-
| `inspect_helix_project` | Returns `framework_state
|
|
275
|
+
| `inspect_helix_project` | Returns `framework_state`: `present` / `partial` / `absent`. Drives `mode` selection in pipeline. |
|
|
276
|
+
| `write_helix_files` | Write files to Helix-QA layout. `mode=scaffold_and_tests` for new/partial projects; `mode=tests_only` for existing. Never overwrites; deduplicates and conflict-renames. |
|
|
142
277
|
| `list_helix_tree` | Full directory listing of a Helix-QA project. |
|
|
143
278
|
| `read_helix_file` | Read an existing file for overlap detection. |
|
|
144
|
-
| `write_helix_files` | Write generated files to the correct Helix-QA paths. |
|
|
145
279
|
|
|
146
|
-
### qa-jira-manager _(Jira Cloud)_
|
|
280
|
+
### Agent 5 — `qa-jira-manager` _(Jira Cloud)_
|
|
147
281
|
|
|
148
282
|
| Tool | Description |
|
|
149
283
|
|---|---|
|
|
150
284
|
| `fetch_jira_issue` | Fetch a Story, Bug, or Task with acceptance criteria and coverage hints. Returns `epic_use_hierarchy` for Epics. |
|
|
151
|
-
| `
|
|
152
|
-
| `create_and_link_test_cases` | Create test case issues in Jira (type `Test`, falls back to `Task`) and link
|
|
153
|
-
| `
|
|
285
|
+
| `get_linked_test_cases` | List all issues linked via `is tested by` / `Test` link type (used by dedup). |
|
|
286
|
+
| `create_and_link_test_cases` | Create test case issues in Jira (type `Test`, falls back to `Task`) and link. Steps stored as ADF table — no Xray required. Epic confirmation gate fires unless `confirmed=true`. |
|
|
287
|
+
| `create_deduped_test_cases` | **Headless/webhook tool.** Internally calls `get_linked_test_cases`, filters duplicates (normalised summary match), then calls `create_and_link_test_cases` on net-new only. Safe on every re-trigger. |
|
|
288
|
+
| `attach_gherkin_to_issue` | Upload a `.feature` file as an attachment to a Jira issue via the Jira attachment API. File is named `{issue_key}_{summary_kebab}_regression.feature`. |
|
|
154
289
|
|
|
155
290
|
---
|
|
156
291
|
|
|
@@ -166,25 +301,84 @@ Healed selectors persist in `LocatorRepository`. All AI suggestions require huma
|
|
|
166
301
|
|
|
167
302
|
---
|
|
168
303
|
|
|
169
|
-
##
|
|
304
|
+
## Webhook / Auto-Trigger Setup
|
|
305
|
+
|
|
306
|
+
### Local dev
|
|
307
|
+
|
|
308
|
+
```bash
|
|
309
|
+
# 1. Install webhook extras
|
|
310
|
+
pip install "qa-gentic-stlc-agents[webhook]"
|
|
311
|
+
|
|
312
|
+
# 2. Start the bridge
|
|
313
|
+
make serve
|
|
314
|
+
|
|
315
|
+
# 3. Expose via ngrok
|
|
316
|
+
ngrok http 8080
|
|
317
|
+
|
|
318
|
+
# 4. Register hooks
|
|
319
|
+
make register-ado-hooks BRIDGE_URL=https://xxxx.ngrok.io
|
|
320
|
+
make register-jira-hooks BRIDGE_URL=https://xxxx.ngrok.io
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### Azure Functions deploy
|
|
324
|
+
|
|
325
|
+
```bash
|
|
326
|
+
make deploy-azure FUNC_APP=stlc-webhook-bridge
|
|
327
|
+
make register-ado-hooks BRIDGE_URL=https://stlc-webhook-bridge.azurewebsites.net/api
|
|
328
|
+
make register-jira-hooks BRIDGE_URL=https://stlc-webhook-bridge.azurewebsites.net/api
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
### Docker
|
|
332
|
+
|
|
333
|
+
```bash
|
|
334
|
+
make docker-build-webhook
|
|
335
|
+
make docker-run-webhook
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
### LLM provider selection
|
|
339
|
+
|
|
340
|
+
| Provider | Env var(s) | Default model |
|
|
341
|
+
|---|---|---|
|
|
342
|
+
| `anthropic` | `AI_HEALING_API_KEY` or `ANTHROPIC_API_KEY` | `claude-haiku-4-5-20251001` |
|
|
343
|
+
| `copilot` | `GITHUB_TOKEN` | `gpt-4o` |
|
|
344
|
+
| `openai` | `OPENAI_API_KEY` | `gpt-4o-mini` |
|
|
345
|
+
| `azure-openai` | `AZURE_OPENAI_API_KEY` + `AZURE_OPENAI_ENDPOINT` | `gpt-4o-mini` |
|
|
346
|
+
| `ollama` | `OLLAMA_HOST` (optional) | `llama3.2` |
|
|
347
|
+
|
|
348
|
+
Override model at any time: `LLM_MODEL=claude-sonnet-4-6`
|
|
349
|
+
|
|
350
|
+
### Token cost (headless, Haiku default)
|
|
351
|
+
|
|
352
|
+
| Stage | Tokens / work item | Approx cost |
|
|
353
|
+
|---|---|---|
|
|
354
|
+
| Test case generation (state → Approved) | ~8,000–15,000 | $0.01–$0.03 |
|
|
355
|
+
| Gherkin generation (state → Done) | ~10,000–18,000 | $0.01–$0.04 |
|
|
356
|
+
| Playwright TS generation (state → Done) | ~20,000–35,000 | $0.03–$0.07 |
|
|
357
|
+
|
|
358
|
+
---
|
|
359
|
+
|
|
360
|
+
## ADO vs Jira — Side by Side
|
|
170
361
|
|
|
171
362
|
| Step | Azure DevOps | Jira Cloud |
|
|
172
363
|
|---|---|---|
|
|
364
|
+
| Trigger | Service Hook: `workitem.updated` | Webhook: `jira:issue_updated` |
|
|
173
365
|
| Fetch issue | `fetch_work_item` | `fetch_jira_issue` |
|
|
174
|
-
|
|
|
175
|
-
|
|
|
176
|
-
| Create
|
|
177
|
-
|
|
|
178
|
-
|
|
|
366
|
+
| Check duplicates | `get_linked_test_cases` | `get_linked_test_cases` |
|
|
367
|
+
| Create test cases (interactive) | `create_and_link_test_cases` | `create_and_link_test_cases` |
|
|
368
|
+
| Create test cases (headless) | `create_deduped_test_cases` | `create_deduped_test_cases` |
|
|
369
|
+
| Gherkin attach | `attach_gherkin_to_feature` / `attach_gherkin_to_work_item` | `attach_gherkin_to_issue` |
|
|
370
|
+
| Headless Gherkin composite | `generate_and_attach_gherkin` | `generate_and_attach_gherkin` + `attach_gherkin_to_issue` |
|
|
371
|
+
| Playwright generation | `generate_playwright_code` (shared) | `generate_playwright_code` (shared) |
|
|
179
372
|
| Write to disk | `write_helix_files` (shared) | `write_helix_files` (shared) |
|
|
180
|
-
|
|
|
181
|
-
|
|
|
373
|
+
| Link relation | `TestedBy-Forward` | `is tested by` / `Test` |
|
|
374
|
+
| Auth | MSAL silent + browser (`~/.msal-cache/`) | OAuth 2.0 3LO + browser (`~/.jira-cache/`) |
|
|
182
375
|
|
|
183
376
|
---
|
|
184
377
|
|
|
185
378
|
## Run Tests
|
|
186
379
|
|
|
187
380
|
```bash
|
|
381
|
+
# Full suite
|
|
188
382
|
ENABLE_SELF_HEALING=true \
|
|
189
383
|
HEALING_DASHBOARD_PORT=7890 \
|
|
190
384
|
APP_BASE_URL=<your-app-base-url> \
|
|
@@ -204,8 +398,7 @@ cucumber-js --config=config/cucumber.js -p <feature_profile> --tags "@smoke"
|
|
|
204
398
|
- [ARCHITECTURE-JIRA.md](ARCHITECTURE-JIRA.md) — Full technical architecture, Jira pipeline
|
|
205
399
|
- [WALKTHROUGH-ADO.md](WALKTHROUGH-ADO.md) — End-to-end walkthrough, ADO pipeline
|
|
206
400
|
- [WALKTHROUGH-JIRA.md](WALKTHROUGH-JIRA.md) — End-to-end walkthrough, Jira pipeline
|
|
207
|
-
- [
|
|
208
|
-
- [MANAGEMENT-ROI.md](MANAGEMENT-ROI.md) — ROI, quality impact, and cost analysis
|
|
401
|
+
- [WEBHOOK.md](WEBHOOK.md) — Webhook bridge setup, deployment, and state trigger customisation
|
|
209
402
|
|
|
210
403
|
---
|
|
211
404
|
|
package/bin/postinstall.js
CHANGED
|
@@ -52,7 +52,15 @@ const info = (s) => console.log(`${C.cyan}→${C.reset} ${s}`);
|
|
|
52
52
|
const warn = (s) => console.log(`${C.yellow}⚠${C.reset} ${s}`);
|
|
53
53
|
const d = (s) => `${C.dim}${s}${C.reset}`;
|
|
54
54
|
|
|
55
|
-
console.log(
|
|
55
|
+
console.log(`
|
|
56
|
+
${b("QA STLC Agents")} v${pkg.version} — post-install
|
|
57
|
+
|
|
58
|
+
${d("This npm package includes:")}
|
|
59
|
+
• Five Python MCP servers for Azure DevOps + Jira Cloud
|
|
60
|
+
• Skill files for AI coding agents (Claude Code, Copilot, Cursor, Windsurf)
|
|
61
|
+
• ORCHESTRATION_RULES.md — reference guide for multi-step QA workflows
|
|
62
|
+
• Command-line tools: qa-stlc init, qa-stlc scaffold, qa-stlc skills, etc.
|
|
63
|
+
`);
|
|
56
64
|
|
|
57
65
|
// ── 1. Find Python ────────────────────────────────────────────────────────────
|
|
58
66
|
const pythonCandidates = ["python3", "python"];
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@qa-gentic/stlc-agents",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.19",
|
|
4
4
|
"description": "QA STLC Agents — five MCP servers + skills for AI-powered test case, Gherkin, Playwright generation, and Helix-QA file writing against Azure DevOps and Jira Cloud. Full pipeline for both: fetch → test cases → Gherkin → Playwright → Helix-QA. Works with Claude Code, GitHub Copilot, Cursor, Windsurf.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"playwright",
|
|
@@ -37,11 +37,24 @@
|
|
|
37
37
|
"src/",
|
|
38
38
|
"skills/",
|
|
39
39
|
".github/agents/",
|
|
40
|
-
"README.md"
|
|
40
|
+
"README.md",
|
|
41
|
+
"ORCHESTRATION_RULES.md"
|
|
41
42
|
],
|
|
42
43
|
"scripts": {
|
|
43
44
|
"postinstall": "node ./bin/postinstall.js"
|
|
44
45
|
},
|
|
46
|
+
"_comment": "Diff to apply to package.json — add the cost command to qa-stlc.js",
|
|
47
|
+
|
|
48
|
+
"diff": {
|
|
49
|
+
"bin/qa-stlc.js": {
|
|
50
|
+
"add_require": "const cmdCost = require('../src/cli/cmd-cost');",
|
|
51
|
+
"add_command": {
|
|
52
|
+
"after": "// ── scaffold (or any existing last command) ──────────────────────────────",
|
|
53
|
+
"insert": "\n// ── cost ─────────────────────────────────────────────────────────────────\nprogram\n .command('cost')\n .description('Show token usage and cost for the current or past sessions.\\n' +\n 'Reads session logs from ~/.qa-stlc/cost-*.jsonl\\n' +\n 'Written automatically by the MCP servers on every tool call.')\n .option('--all', 'Show all sessions (default: last session only)')\n .option('--session <id>', 'Show a specific session by ID')\n .option('--json', 'Output raw JSON')\n .action(cmdCost);\n"
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
"full_command_to_add_to_qa-stlc.js": "// ── cost ─────────────────────────────────────────────────────────────────\nprogram\n .command('cost')\n .description(\n 'Show token usage and cost for the current or past pipeline sessions.\\n' +\n 'Reads logs from ~/.qa-stlc/cost-*.jsonl written by the MCP servers.\\n' +\n 'Each MCP tool call logs tokens, cost, and latency automatically.'\n )\n .option('--all', 'Show all sessions (not just the last one)')\n .option('--session <id>', 'Show a specific session by its ID')\n .option('--json', 'Emit raw JSON instead of a formatted table')\n .action(cmdCost);",
|
|
45
58
|
"dependencies": {
|
|
46
59
|
"commander": "^12.0.0",
|
|
47
60
|
"which": "^4.0.0"
|
package/src/cli/cmd-init.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* cmd-init.js — `qa-stlc init`
|
|
3
3
|
*
|
|
4
|
-
* Full bootstrap: install Python agents + skills + MCP config.
|
|
4
|
+
* Full bootstrap: install Python agents + skills + ORCHESTRATION_RULES.md + MCP config.
|
|
5
5
|
* Accepts --integration <ado|jira|both>. When omitted, reads
|
|
6
6
|
* ~/.qa-stlc/integration (written by postinstall) or prompts interactively.
|
|
7
7
|
*/
|
|
@@ -89,7 +89,23 @@ module.exports = async function init(opts) {
|
|
|
89
89
|
}
|
|
90
90
|
ok("qa-gentic-stlc-agents installed.");
|
|
91
91
|
|
|
92
|
-
// ── 4.
|
|
92
|
+
// ── 4. Copy ORCHESTRATION_RULES.md to project root ─────────────────────────
|
|
93
|
+
info("Installing ORCHESTRATION_RULES.md to project root…");
|
|
94
|
+
try {
|
|
95
|
+
const npmPkgDir = path.join(path.dirname(require.resolve("@qa-gentic/stlc-agents/package.json")));
|
|
96
|
+
const srcRules = path.join(npmPkgDir, "ORCHESTRATION_RULES.md");
|
|
97
|
+
const destRules = path.join(process.cwd(), "ORCHESTRATION_RULES.md");
|
|
98
|
+
if (fs.existsSync(srcRules)) {
|
|
99
|
+
fs.copyFileSync(srcRules, destRules);
|
|
100
|
+
ok("ORCHESTRATION_RULES.md copied to project root.");
|
|
101
|
+
} else {
|
|
102
|
+
warn("ORCHESTRATION_RULES.md not found in npm package (expected in development).");
|
|
103
|
+
}
|
|
104
|
+
} catch (e) {
|
|
105
|
+
// Silently skip if not available (common in dev environments before full build)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// ── 5. Install skills ─────────────────────────────────────────────────────
|
|
93
109
|
info("Installing skills…");
|
|
94
110
|
const skillTarget = opts.vscode ? "vscode" : "claude";
|
|
95
111
|
await cmdSkills({ target: skillTarget, integration });
|
|
@@ -132,6 +148,7 @@ ${C.bold}Setup complete.${C.reset}
|
|
|
132
148
|
${C.dim}Integration:${C.reset} ${C.bold}${integration}${C.reset}
|
|
133
149
|
${C.dim}MCP config :${C.reset} ${mcpLocation}
|
|
134
150
|
${C.dim}Skills :${C.reset} ${skillsLocation}
|
|
151
|
+
${C.dim}Rules :${C.reset} ORCHESTRATION_RULES.md ${C.dim}(project root — reference for multi-step workflows)${C.reset}
|
|
135
152
|
|
|
136
153
|
${C.bold}Start Playwright MCP${C.reset} ${C.dim}(keep running in a separate terminal):${C.reset}
|
|
137
154
|
|
|
@@ -169,7 +169,10 @@ function buildClaudeConfig(pythonBin, playwrightPort, integration) {
|
|
|
169
169
|
}
|
|
170
170
|
}
|
|
171
171
|
|
|
172
|
-
servers["playwright"] = {
|
|
172
|
+
servers["playwright"] = {
|
|
173
|
+
command: "npx",
|
|
174
|
+
args: ["@playwright/mcp@latest", "--isolated"],
|
|
175
|
+
};
|
|
173
176
|
|
|
174
177
|
return { config: { mcpServers: servers }, missing };
|
|
175
178
|
}
|
|
@@ -190,14 +193,7 @@ function buildVscodeConfig(pythonBin, playwrightPort, integration) {
|
|
|
190
193
|
type: "stdio",
|
|
191
194
|
command: bin,
|
|
192
195
|
args: [],
|
|
193
|
-
env: {
|
|
194
|
-
// Azure DevOps auth passthrough (original)
|
|
195
|
-
"AZURE_TENANT_ID": "${env:AZURE_TENANT_ID}",
|
|
196
|
-
"AZURE_CLIENT_ID": "${env:AZURE_CLIENT_ID}",
|
|
197
|
-
"AZURE_CLIENT_SECRET": "${env:AZURE_CLIENT_SECRET}",
|
|
198
|
-
// Cost tracking passthrough (new)
|
|
199
|
-
...COST_ENV,
|
|
200
|
-
},
|
|
196
|
+
env: { ...COST_ENV },
|
|
201
197
|
};
|
|
202
198
|
} else {
|
|
203
199
|
missing.push(name);
|
|
@@ -236,8 +232,9 @@ function buildVscodeConfig(pythonBin, playwrightPort, integration) {
|
|
|
236
232
|
}
|
|
237
233
|
|
|
238
234
|
servers["playwright"] = {
|
|
239
|
-
type: "
|
|
240
|
-
|
|
235
|
+
type: "stdio",
|
|
236
|
+
command: "npx",
|
|
237
|
+
args: ["@playwright/mcp@latest", "--isolated"],
|
|
241
238
|
};
|
|
242
239
|
|
|
243
240
|
return { config: { servers }, missing };
|
|
@@ -246,9 +243,8 @@ function buildVscodeConfig(pythonBin, playwrightPort, integration) {
|
|
|
246
243
|
function printNextSteps(mode, playwrightPort) {
|
|
247
244
|
const isVscode = mode === "vscode";
|
|
248
245
|
console.log(`
|
|
249
|
-
${C.dim}
|
|
250
|
-
|
|
251
|
-
${C.dim}headless (CI): npx @playwright/mcp@latest --headless --port ${playwrightPort}${C.reset}
|
|
246
|
+
${C.dim}Playwright MCP is auto-started by the MCP framework (--isolated, no manual start needed).
|
|
247
|
+
For CI/headless: set PLAYWRIGHT_MCP_URL or start manually with --headless --isolated --port ${playwrightPort}${C.reset}
|
|
252
248
|
|
|
253
249
|
${isVscode
|
|
254
250
|
? `Reload VS Code window — all MCP servers will appear in the MCP panel.`
|
package/src/cli/cmd-skills.js
CHANGED
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
* windsurf → .windsurf/rules/
|
|
10
10
|
* both → claude + vscode
|
|
11
11
|
* print → stdout only
|
|
12
|
+
*
|
|
13
|
+
* Also installs ORCHESTRATION_RULES.md to project root (multi-step workflow reference).
|
|
12
14
|
*/
|
|
13
15
|
"use strict";
|
|
14
16
|
|
|
@@ -35,10 +37,11 @@ function readIntegrationPrefSk() {
|
|
|
35
37
|
}
|
|
36
38
|
|
|
37
39
|
// Resolve the skills bundled with this npm package
|
|
38
|
-
const PKG_ROOT
|
|
39
|
-
const SKILLS_DIR
|
|
40
|
-
const BEHAVIOR_MD
|
|
41
|
-
const
|
|
40
|
+
const PKG_ROOT = path.resolve(__dirname, "../..");
|
|
41
|
+
const SKILLS_DIR = path.join(PKG_ROOT, "skills");
|
|
42
|
+
const BEHAVIOR_MD = path.join(SKILLS_DIR, "AGENT-BEHAVIOR.md");
|
|
43
|
+
const ORCHESTRATION_MD = path.join(PKG_ROOT, "ORCHESTRATION_RULES.md");
|
|
44
|
+
const AGENTS_DIR = path.join(PKG_ROOT, ".github", "agents");
|
|
42
45
|
|
|
43
46
|
/** Copy a file, creating parent dirs as needed. */
|
|
44
47
|
function cp(src, dest) {
|
|
@@ -105,6 +108,15 @@ function installAgents(integration) {
|
|
|
105
108
|
agentFiles(integration).forEach((f) => info(path.basename(f)));
|
|
106
109
|
}
|
|
107
110
|
|
|
111
|
+
/** Install ORCHESTRATION_RULES.md to project root for multi-step workflow reference. */
|
|
112
|
+
function installOrchestrationRules() {
|
|
113
|
+
if (!fs.existsSync(ORCHESTRATION_MD)) return;
|
|
114
|
+
const dest = path.join(CWD, "ORCHESTRATION_RULES.md");
|
|
115
|
+
cp(ORCHESTRATION_MD, dest);
|
|
116
|
+
ok(`ORCHESTRATION_RULES.md installed → project root`);
|
|
117
|
+
info("Reference this file for multi-step QA workflow best practices");
|
|
118
|
+
}
|
|
119
|
+
|
|
108
120
|
function installClaude(integration) {
|
|
109
121
|
const dest = path.join(CWD, ".claude", "skills");
|
|
110
122
|
// Copy entire skill directory (preserves references/ subdirectory)
|
|
@@ -118,6 +130,7 @@ function installClaude(integration) {
|
|
|
118
130
|
info("AGENT-BEHAVIOR.md → .claude/AGENT-BEHAVIOR.md");
|
|
119
131
|
skillEntries(integration).forEach((e) => info(`${e.name}/SKILL.md`));
|
|
120
132
|
installAgents(integration);
|
|
133
|
+
installOrchestrationRules();
|
|
121
134
|
printPlaywrightHint();
|
|
122
135
|
}
|
|
123
136
|
|
|
@@ -134,6 +147,7 @@ function installVscode(integration) {
|
|
|
134
147
|
info("AGENT-BEHAVIOR.md → .github/copilot-instructions/AGENT-BEHAVIOR.md");
|
|
135
148
|
skillEntries(integration).forEach((e) => info(`${e.name}/SKILL.md`));
|
|
136
149
|
installAgents(integration);
|
|
150
|
+
installOrchestrationRules();
|
|
137
151
|
printPlaywrightHint();
|
|
138
152
|
}
|
|
139
153
|
|
|
@@ -146,6 +160,7 @@ function installCursor(integration) {
|
|
|
146
160
|
cp(BEHAVIOR_MD, path.join(dest, "AGENT-BEHAVIOR.md"));
|
|
147
161
|
ok(`Skills installed → .cursor/rules/`);
|
|
148
162
|
installAgents(integration);
|
|
163
|
+
installOrchestrationRules();
|
|
149
164
|
printPlaywrightHint();
|
|
150
165
|
}
|
|
151
166
|
|
|
@@ -158,6 +173,7 @@ function installWindsurf(integration) {
|
|
|
158
173
|
cp(BEHAVIOR_MD, path.join(dest, "AGENT-BEHAVIOR.md"));
|
|
159
174
|
ok(`Skills installed → .windsurf/rules/`);
|
|
160
175
|
installAgents(integration);
|
|
176
|
+
installOrchestrationRules();
|
|
161
177
|
printPlaywrightHint();
|
|
162
178
|
}
|
|
163
179
|
|
|
@@ -165,6 +181,7 @@ function printSkills() {
|
|
|
165
181
|
console.log("\nAvailable skills:\n");
|
|
166
182
|
skillEntries().forEach((e) => console.log(` ${e.name}/SKILL.md`));
|
|
167
183
|
console.log(` AGENT-BEHAVIOR.md`);
|
|
184
|
+
console.log(` ORCHESTRATION_RULES.md (workflow reference)`);
|
|
168
185
|
}
|
|
169
186
|
|
|
170
187
|
/** Recursively copy a directory tree. */
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
install_hook.py — stlc_agents.shared.install_hook
|
|
3
|
+
─────────────────────────────────────────────────────
|
|
4
|
+
Called automatically by postinstall.js after `pip install qa-gentic-stlc-agents`.
|
|
5
|
+
Also callable manually: python -m stlc_agents.shared.install_hook
|
|
6
|
+
|
|
7
|
+
Applies the cost tracking patch to all 5 MCP server files by importing
|
|
8
|
+
and running the same logic as scripts/apply_cost_tracking.py, but resolved
|
|
9
|
+
relative to the installed package location (works in site-packages, .venv, etc).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
import re
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
SERVERS = [
|
|
19
|
+
("agent_gherkin_generator", "qa-gherkin-generator"),
|
|
20
|
+
("agent_test_case_manager", "qa-test-case-manager"),
|
|
21
|
+
("agent_playwright_generator", "qa-playwright-generator"),
|
|
22
|
+
("agent_helix_writer", "qa-helix-writer"),
|
|
23
|
+
("agent_jira_manager", "qa-jira-manager"),
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
IMPORT_MARKER = "from stlc_agents.shared.cost_tracker import track"
|
|
27
|
+
TIME_IMPORT = "import time"
|
|
28
|
+
|
|
29
|
+
OLD_RETURN = (
|
|
30
|
+
'return [types.TextContent(type="text", '
|
|
31
|
+
'text=json.dumps(result, indent=2, ensure_ascii=False))]'
|
|
32
|
+
)
|
|
33
|
+
NEW_RETURN = "return track(result, tool_name=name, server={server!r}, t0=t0)"
|
|
34
|
+
|
|
35
|
+
OLD_ERR_BLOCK = """\
|
|
36
|
+
return [types.TextContent(
|
|
37
|
+
type="text",
|
|
38
|
+
text=json.dumps({"error": str(exc), "tool": name}, indent=2),
|
|
39
|
+
)]"""
|
|
40
|
+
|
|
41
|
+
NEW_ERR_BLOCK = """\
|
|
42
|
+
err_result = {{"error": str(exc), "tool": name}}
|
|
43
|
+
return track(err_result, tool_name=name, server={server!r}, t0=t0)"""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _root() -> Path:
|
|
47
|
+
"""Resolve the stlc_agents package root regardless of install method."""
|
|
48
|
+
import stlc_agents
|
|
49
|
+
return Path(stlc_agents.__file__).parent
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def patch_server(agent_dir: str, server_name: str, root: Path) -> str:
|
|
53
|
+
"""Patch one server file. Returns 'patched' | 'already_patched' | 'not_found' | 'no_change'."""
|
|
54
|
+
path = root / agent_dir / "server.py"
|
|
55
|
+
if not path.exists():
|
|
56
|
+
return "not_found"
|
|
57
|
+
|
|
58
|
+
src = path.read_text(encoding="utf-8")
|
|
59
|
+
if IMPORT_MARKER in src:
|
|
60
|
+
return "already_patched"
|
|
61
|
+
|
|
62
|
+
original = src
|
|
63
|
+
|
|
64
|
+
# 1. import time
|
|
65
|
+
if TIME_IMPORT not in src:
|
|
66
|
+
src = src.replace("import sys\n", "import sys\nimport time\n", 1)
|
|
67
|
+
|
|
68
|
+
# 2. cost_tracker import — after last `from stlc_agents...` line
|
|
69
|
+
last_match = None
|
|
70
|
+
for m in re.finditer(r"^from stlc_agents\..+\n", src, re.MULTILINE):
|
|
71
|
+
last_match = m
|
|
72
|
+
if last_match:
|
|
73
|
+
pos = last_match.end()
|
|
74
|
+
src = src[:pos] + "from stlc_agents.shared.cost_tracker import track\n" + src[pos:]
|
|
75
|
+
else:
|
|
76
|
+
src = src.replace(
|
|
77
|
+
"from dotenv import load_dotenv\n",
|
|
78
|
+
"from dotenv import load_dotenv\nfrom stlc_agents.shared.cost_tracker import track\n",
|
|
79
|
+
1,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# 3. t0 = time.monotonic() inside call_tool()
|
|
83
|
+
src = src.replace(
|
|
84
|
+
"@app.call_tool()\nasync def call_tool(name: str, arguments: dict)"
|
|
85
|
+
" -> list[types.TextContent]:\n try:\n",
|
|
86
|
+
"@app.call_tool()\nasync def call_tool(name: str, arguments: dict)"
|
|
87
|
+
" -> list[types.TextContent]:\n t0 = time.monotonic()\n try:\n",
|
|
88
|
+
1,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# 4. Replace all result return lines
|
|
92
|
+
new_ret = NEW_RETURN.format(server=server_name)
|
|
93
|
+
for indent in (" ", " ", " "):
|
|
94
|
+
src = src.replace(f"{indent}{OLD_RETURN}", f"{indent}{new_ret}")
|
|
95
|
+
|
|
96
|
+
# 5. Replace error-path block
|
|
97
|
+
src = src.replace(OLD_ERR_BLOCK, NEW_ERR_BLOCK.format(server=server_name))
|
|
98
|
+
|
|
99
|
+
if src == original:
|
|
100
|
+
return "no_change"
|
|
101
|
+
|
|
102
|
+
path.write_text(src, encoding="utf-8")
|
|
103
|
+
return "patched"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def apply_cost_tracking() -> None:
|
|
107
|
+
"""Entry point — called by postinstall.js and the console_script."""
|
|
108
|
+
root = _root()
|
|
109
|
+
|
|
110
|
+
ok = "\x1b[32m✓\x1b[0m"
|
|
111
|
+
skip = "\x1b[33m–\x1b[0m"
|
|
112
|
+
err = "\x1b[31m✗\x1b[0m"
|
|
113
|
+
|
|
114
|
+
print("\n stlc-agents · Activating cost tracking on MCP servers...\n")
|
|
115
|
+
|
|
116
|
+
any_patched = False
|
|
117
|
+
for agent_dir, server_name in SERVERS:
|
|
118
|
+
status = patch_server(agent_dir, server_name, root)
|
|
119
|
+
if status == "patched":
|
|
120
|
+
print(f" {ok} {agent_dir} ({server_name})")
|
|
121
|
+
any_patched = True
|
|
122
|
+
elif status == "already_patched":
|
|
123
|
+
print(f" {skip} {agent_dir} — already active")
|
|
124
|
+
elif status == "not_found":
|
|
125
|
+
print(f" {err} {agent_dir}/server.py — not found (skip)")
|
|
126
|
+
elif status == "no_change":
|
|
127
|
+
print(f" {skip} {agent_dir} — no matching pattern (manual patch needed)")
|
|
128
|
+
|
|
129
|
+
print()
|
|
130
|
+
if any_patched:
|
|
131
|
+
print(" Cost tracking is now active. On every MCP tool call you will see:")
|
|
132
|
+
print(" [stlc-cost] <server> · <tool> ~<N>K tokens $<cost> (session: $<total>)")
|
|
133
|
+
print()
|
|
134
|
+
print(" Session logs: ~/.qa-stlc/cost-<session-id>.jsonl")
|
|
135
|
+
print(" View report: qa-stlc cost")
|
|
136
|
+
print(" View all: qa-stlc cost --all")
|
|
137
|
+
print()
|
|
138
|
+
print(" Environment variables:")
|
|
139
|
+
print(" STLC_COST_TRACKING=false disable output")
|
|
140
|
+
print(" STLC_CODING_AGENT_MODEL=<model> set your agent's model for exact pricing")
|
|
141
|
+
print(" e.g. claude-sonnet-4-6 | claude-opus-4-6 | gpt-4o")
|
|
142
|
+
print(" STLC_COST_LOG_DIR=<path> change log directory")
|
|
143
|
+
else:
|
|
144
|
+
print(" All servers already have cost tracking active.")
|
|
145
|
+
|
|
146
|
+
print()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def main() -> None:
|
|
150
|
+
apply_cost_tracking()
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
if __name__ == "__main__":
|
|
154
|
+
main()
|