@trygentic/agentloop 0.16.0-alpha.11 → 0.18.0-alpha.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -12
- package/package.json +3 -3
- package/templates/agents/_base/proactive.bt.json +43 -0
- package/templates/agents/_base/reactive-delegation.bt.json +73 -0
- package/templates/agents/_base/reactive-message.bt.json +58 -0
- package/templates/agents/_base/reactive-task.bt.json +51 -0
- package/templates/agents/chat/chat.bt.json +70 -20
- package/templates/agents/chat/chat.md +36 -19
- package/templates/agents/engineer/engineer.bt.json +951 -346
- package/templates/agents/engineer/engineer.md +86 -33
- package/templates/agents/merge-resolver/merge-resolver.bt.json +217 -0
- package/templates/agents/merge-resolver/merge-resolver.md +297 -0
- package/templates/agents/orchestrator/orchestrator.bt.json +1 -0
- package/templates/agents/orchestrator/orchestrator.md +17 -92
- package/templates/agents/product-manager/product-manager.bt.json +215 -25
- package/templates/agents/product-manager/product-manager.md +86 -13
- package/templates/agents/qa-tester/qa-tester.bt.json +299 -88
- package/templates/agents/qa-tester/qa-tester.md +59 -12
- package/templates/agents/release/release.bt.json +219 -0
- package/templates/agents/release/release.md +164 -0
- package/templates/examples/engineer.md.example +4 -4
- package/templates/examples/example-custom-agent.md.example +4 -4
- package/templates/examples/example-plugin.js.example +1 -1
- package/templates/plugins/qa-e2e-maestro/qa-e2e-maestro.bt.json +1191 -0
- package/templates/plugins/qa-e2e-maestro/qa-e2e-maestro.md +923 -0
- package/templates/plugins/qa-e2e-scenario/qa-e2e-scenario.md +85 -0
- package/templates/non-core-templates/container.md +0 -173
- package/templates/non-core-templates/dag-planner.md +0 -96
- package/templates/non-core-templates/internal/cli-tester.md +0 -218
- package/templates/non-core-templates/internal/qa-tester.md +0 -300
- package/templates/non-core-templates/internal/tui-designer.md +0 -370
- package/templates/non-core-templates/internal/tui-tester.md +0 -125
- package/templates/non-core-templates/maestro-qa.md +0 -240
- package/templates/non-core-templates/merge-resolver.md +0 -150
- package/templates/non-core-templates/project-detection.md +0 -75
- package/templates/non-core-templates/questionnaire.md +0 -124
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
"name": "qa-tester-continuous-agent-tree",
|
|
3
3
|
"description": "Continuous behavior tree for the QA Tester agent. Loops forever, waiting for task assignments from the orchestrator. Handles test execution, environment issues, and provides feedback to engineers without process restarts.",
|
|
4
4
|
"version": "1.0.0",
|
|
5
|
+
"mode": "reactive",
|
|
5
6
|
"tree": {
|
|
6
7
|
"type": "root",
|
|
7
8
|
"child": {
|
|
@@ -89,7 +90,7 @@
|
|
|
89
90
|
},
|
|
90
91
|
{
|
|
91
92
|
"type": "action",
|
|
92
|
-
"call": "
|
|
93
|
+
"call": "ReportTriggerPass"
|
|
93
94
|
}
|
|
94
95
|
]
|
|
95
96
|
},
|
|
@@ -105,18 +106,41 @@
|
|
|
105
106
|
"type": "llm-action",
|
|
106
107
|
"name": "AnalyzeChanges",
|
|
107
108
|
"prompt": "You are a QA agent analyzing changes. Review the task and git diff.\n\nTask: {{taskDescription}}\nGit Diff: {{gitDiff}}\nProject Info: {{projectInfo}}\n\nBriefly summarize what was changed.",
|
|
108
|
-
"contextKeys": [
|
|
109
|
+
"contextKeys": [
|
|
110
|
+
"taskDescription",
|
|
111
|
+
"taskTitle",
|
|
112
|
+
"gitDiff",
|
|
113
|
+
"projectInfo"
|
|
114
|
+
],
|
|
109
115
|
"outputSchema": {
|
|
110
116
|
"type": "object",
|
|
111
117
|
"properties": {
|
|
112
|
-
"changesSummary": {
|
|
113
|
-
|
|
114
|
-
|
|
118
|
+
"changesSummary": {
|
|
119
|
+
"type": "string"
|
|
120
|
+
},
|
|
121
|
+
"affectedAreas": {
|
|
122
|
+
"type": "array",
|
|
123
|
+
"items": {
|
|
124
|
+
"type": "string"
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
"riskLevel": {
|
|
128
|
+
"type": "string",
|
|
129
|
+
"enum": [
|
|
130
|
+
"low",
|
|
131
|
+
"medium",
|
|
132
|
+
"high"
|
|
133
|
+
]
|
|
134
|
+
}
|
|
115
135
|
},
|
|
116
|
-
"required": [
|
|
136
|
+
"required": [
|
|
137
|
+
"changesSummary",
|
|
138
|
+
"riskLevel"
|
|
139
|
+
]
|
|
117
140
|
},
|
|
118
141
|
"outputKey": "changeAnalysis",
|
|
119
|
-
"temperature": 0.3
|
|
142
|
+
"temperature": 0.3,
|
|
143
|
+
"allowedTools": []
|
|
120
144
|
},
|
|
121
145
|
{
|
|
122
146
|
"type": "action",
|
|
@@ -138,18 +162,49 @@
|
|
|
138
162
|
"type": "llm-action",
|
|
139
163
|
"name": "DetermineTestCommand",
|
|
140
164
|
"prompt": "Determine the correct test command for this project.\n\nProject Info: {{projectInfo}}\n\nEngineer Test Setup (from engineer's completion comment): {{engineerTestSetup}}\n\nIMPORTANT: If engineerTestSetup is provided by the engineer, PREFER using their testCommand and testDirectory. The engineer already verified these work. Only override if you detect an obvious error.\n\nIf engineerTestSetup.testDirectory is set (e.g., \"frontend\"), the test command must be run from that subdirectory. Prefix with: cd <testDirectory> && <testCommand>\n\nCRITICAL: Check the runtime/package manager FIRST before choosing a test command.\n\nRuntime detection priority (check in this order):\n1. If projectInfo.primaryType is 'bun' OR detectedFiles include 'bun.lockb', 'bun.lock', or 'bunfig.toml' OR packageManager is 'bun': this is a BUN project. Use 'bun test'. Set projectType to 'bun'.\n2. If detectedFiles include 'yarn.lock': use 'yarn test'. Set projectType to 'node-yarn'.\n3. If detectedFiles include 'pnpm-lock.yaml': use 'pnpm test'. Set projectType to 'node-pnpm'.\n4. If only 'package.json' is detected with no specific lock file: use 'npm test'. Set projectType to 'node'.\n5. If projectInfo.primaryType is 'expo' OR detectedFiles include 'app.json' with 'expo' key, 'app.config.js', 'app.config.ts', OR package.json has 'expo' dependency: this is an EXPO/React Native project. Set projectType to 'expo'.\n - If package.json has a 'test' script that is valid (not a placeholder), use that (e.g., 'npm test' or 'jest')\n - If jest-expo is in dependencies/devDependencies, use 'npx jest'\n - If @testing-library/react-native is present, use 'npx jest'\n - If no test runner is found, use 'npx jest --passWithNoTests'\n\nOther project types:\n- Rust (Cargo.toml): cargo test\n- Python (pyproject.toml, setup.py): pytest\n- Go (go.mod): go test ./...\n\nDo NOT default to 'npm test' when Bun indicators are present. A project with bun.lock or bun.lockb is a Bun project, not a Node.js project.\nDo NOT default to 'npm test' for Expo projects that have no test script. Use 'npx jest' instead.",
|
|
141
|
-
"contextKeys": [
|
|
165
|
+
"contextKeys": [
|
|
166
|
+
"projectInfo",
|
|
167
|
+
"testFilesFound",
|
|
168
|
+
"engineerTestSetup"
|
|
169
|
+
],
|
|
142
170
|
"outputSchema": {
|
|
143
171
|
"type": "object",
|
|
144
172
|
"properties": {
|
|
145
|
-
"projectType": {
|
|
146
|
-
|
|
147
|
-
|
|
173
|
+
"projectType": {
|
|
174
|
+
"type": "string",
|
|
175
|
+
"enum": [
|
|
176
|
+
"bun",
|
|
177
|
+
"node",
|
|
178
|
+
"node-yarn",
|
|
179
|
+
"node-pnpm",
|
|
180
|
+
"expo",
|
|
181
|
+
"rust",
|
|
182
|
+
"python",
|
|
183
|
+
"go",
|
|
184
|
+
"java-maven",
|
|
185
|
+
"java-gradle",
|
|
186
|
+
"ruby",
|
|
187
|
+
"php",
|
|
188
|
+
"elixir",
|
|
189
|
+
"other"
|
|
190
|
+
]
|
|
191
|
+
},
|
|
192
|
+
"testCommand": {
|
|
193
|
+
"type": "string"
|
|
194
|
+
},
|
|
195
|
+
"reasoning": {
|
|
196
|
+
"type": "string"
|
|
197
|
+
}
|
|
148
198
|
},
|
|
149
|
-
"required": [
|
|
199
|
+
"required": [
|
|
200
|
+
"projectType",
|
|
201
|
+
"testCommand",
|
|
202
|
+
"reasoning"
|
|
203
|
+
]
|
|
150
204
|
},
|
|
151
205
|
"outputKey": "testCommandInfo",
|
|
152
|
-
"temperature": 0.2
|
|
206
|
+
"temperature": 0.2,
|
|
207
|
+
"allowedTools": []
|
|
153
208
|
},
|
|
154
209
|
{
|
|
155
210
|
"type": "action",
|
|
@@ -180,36 +235,100 @@
|
|
|
180
235
|
{
|
|
181
236
|
"type": "llm-action",
|
|
182
237
|
"name": "AnalyzeTestResults",
|
|
183
|
-
"prompt": "Analyze the test results in the context of what files were changed.\n\nTest Output: {{testResults}}\nTest Command: {{testCommandInfo}}\nGit Diff (files changed by engineer): {{gitDiff}}\nTask Files: {{taskFiles}}\nChange Analysis: {{changeAnalysis}}\n\nYour job is to determine if the engineer's changes CAUSED any test failures. You MUST distinguish between:\n\n1. **Task-related failures**: Tests that fail because of code the engineer changed or added. These are in files listed in the git diff or task files, or test files that directly import/test those changed modules. These are legitimate failures.\n\n2. **Pre-existing/unrelated failures**: Tests that fail in modules the engineer did NOT touch. These failures existed BEFORE the engineer's changes and are NOT the engineer's responsibility. Do NOT count these as failures.\n\n3. **Environment issues**: Test runner not found (exit code 127), dependencies not installed, 'command not found' errors, missing optional dependencies (@rollup/rollup-*, @esbuild/*), module resolution errors. These are QA environment issues, NOT code issues.\n\nIMPORTANT: If ONLY environment issues occurred and there are NO indications of task-related failures (taskRelatedFailures is 0 or null), set 'passed' to true
|
|
184
|
-
"contextKeys": [
|
|
238
|
+
"prompt": "Analyze the test results in the context of what files were changed.\n\nTest Output: {{testResults}}\nTest Command: {{testCommandInfo}}\nGit Diff (files changed by engineer): {{gitDiff}}\nTask Files: {{taskFiles}}\nChange Analysis: {{changeAnalysis}}\n\nYour job is to determine if the engineer's changes CAUSED any test failures. You MUST distinguish between:\n\n1. **Task-related failures**: Tests that fail because of code the engineer changed or added. These are in files listed in the git diff or task files, or test files that directly import/test those changed modules. These are legitimate failures.\n\n2. **Pre-existing/unrelated failures**: Tests that fail in modules the engineer did NOT touch. These failures existed BEFORE the engineer's changes and are NOT the engineer's responsibility. Do NOT count these as failures.\n\n3. **Environment issues**: Test runner not found (exit code 127), dependencies not installed, 'command not found' errors, missing optional dependencies (@rollup/rollup-*, @esbuild/*), module resolution errors. These are QA environment issues, NOT code issues.\n\nIMPORTANT: If ONLY environment issues occurred and there are NO indications of task-related failures (taskRelatedFailures is 0 or null), set 'passed' to true \u2014 the engineer's code is not at fault for environment problems. Classify failures as 'environment'.\n\nSet 'passed' to true if:\n- Tests actually executed AND there are NO task-related failures, OR\n- Tests did NOT execute due to environment issues AND there are NO task-related failures detected\n\nSet 'passed' to false if:\n- There are task-related failures (regardless of whether other environment issues exist)\n\nFor each failure, classify it as 'task-related', 'pre-existing', or 'environment' in the classification field.",
|
|
239
|
+
"contextKeys": [
|
|
240
|
+
"testResults",
|
|
241
|
+
"testCommandInfo",
|
|
242
|
+
"changeAnalysis",
|
|
243
|
+
"gitDiff",
|
|
244
|
+
"taskFiles",
|
|
245
|
+
"engineerTestSetup"
|
|
246
|
+
],
|
|
185
247
|
"outputSchema": {
|
|
186
248
|
"type": "object",
|
|
187
249
|
"properties": {
|
|
188
|
-
"passed": {
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
"
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
250
|
+
"passed": {
|
|
251
|
+
"type": "boolean",
|
|
252
|
+
"description": "true if no task-related failures exist (even if tests did not run due to environment issues). false only if there are task-related failures."
|
|
253
|
+
},
|
|
254
|
+
"testsActuallyRan": {
|
|
255
|
+
"type": "boolean",
|
|
256
|
+
"description": "true if tests actually executed, false if blocked by environment issues"
|
|
257
|
+
},
|
|
258
|
+
"totalTests": {
|
|
259
|
+
"type": [
|
|
260
|
+
"number",
|
|
261
|
+
"null"
|
|
262
|
+
]
|
|
263
|
+
},
|
|
264
|
+
"passedTests": {
|
|
265
|
+
"type": [
|
|
266
|
+
"number",
|
|
267
|
+
"null"
|
|
268
|
+
]
|
|
269
|
+
},
|
|
270
|
+
"failedTests": {
|
|
271
|
+
"type": [
|
|
272
|
+
"number",
|
|
273
|
+
"null"
|
|
274
|
+
]
|
|
275
|
+
},
|
|
276
|
+
"taskRelatedFailures": {
|
|
277
|
+
"type": [
|
|
278
|
+
"number",
|
|
279
|
+
"null"
|
|
280
|
+
],
|
|
281
|
+
"description": "Number of failures caused by the engineer's changes"
|
|
282
|
+
},
|
|
283
|
+
"preExistingFailures": {
|
|
284
|
+
"type": [
|
|
285
|
+
"number",
|
|
286
|
+
"null"
|
|
287
|
+
],
|
|
288
|
+
"description": "Number of failures that existed before the engineer's changes"
|
|
289
|
+
},
|
|
290
|
+
"environmentIssues": {
|
|
291
|
+
"type": [
|
|
292
|
+
"number",
|
|
293
|
+
"null"
|
|
294
|
+
],
|
|
295
|
+
"description": "Number of environment-related issues (missing deps, command not found, etc.)"
|
|
296
|
+
},
|
|
196
297
|
"failures": {
|
|
197
298
|
"type": "array",
|
|
198
299
|
"items": {
|
|
199
300
|
"type": "object",
|
|
200
301
|
"properties": {
|
|
201
|
-
"testName": {
|
|
202
|
-
|
|
203
|
-
|
|
302
|
+
"testName": {
|
|
303
|
+
"type": "string"
|
|
304
|
+
},
|
|
305
|
+
"error": {
|
|
306
|
+
"type": "string"
|
|
307
|
+
},
|
|
308
|
+
"classification": {
|
|
309
|
+
"type": "string",
|
|
310
|
+
"enum": [
|
|
311
|
+
"task-related",
|
|
312
|
+
"pre-existing",
|
|
313
|
+
"environment"
|
|
314
|
+
],
|
|
315
|
+
"description": "Whether this failure is caused by the engineer's changes, pre-existing, or an environment issue"
|
|
316
|
+
}
|
|
204
317
|
}
|
|
205
318
|
}
|
|
206
319
|
},
|
|
207
|
-
"summary": {
|
|
320
|
+
"summary": {
|
|
321
|
+
"type": "string"
|
|
322
|
+
}
|
|
208
323
|
},
|
|
209
|
-
"required": [
|
|
324
|
+
"required": [
|
|
325
|
+
"passed",
|
|
326
|
+
"summary"
|
|
327
|
+
]
|
|
210
328
|
},
|
|
211
329
|
"outputKey": "analyzedTestResults",
|
|
212
|
-
"temperature": 0.2
|
|
330
|
+
"temperature": 0.2,
|
|
331
|
+
"allowedTools": []
|
|
213
332
|
},
|
|
214
333
|
{
|
|
215
334
|
"type": "action",
|
|
@@ -296,25 +415,39 @@
|
|
|
296
415
|
{
|
|
297
416
|
"type": "llm-condition",
|
|
298
417
|
"name": "TestsPassed",
|
|
299
|
-
"prompt": "Based on the analyzed test results, did the engineer's changes pass QA?\n\nTest Results: {{analyzedTestResults}}\nGit Diff: {{gitDiff}}\nTask Files: {{taskFiles}}\nEnvironment Retry Count: {{envRetryCount}}\n\nReturn true if the engineer's changes did NOT introduce any new test failures. Environment issues alone should NOT cause rejection.\n\nCRITICAL RULES:\n1. If 'passed' is true in analyzedTestResults, return true.\n2. If 'taskRelatedFailures' is 0 or null, return true
|
|
300
|
-
"contextKeys": [
|
|
418
|
+
"prompt": "Based on the analyzed test results, did the engineer's changes pass QA?\n\nTest Results: {{analyzedTestResults}}\nGit Diff: {{gitDiff}}\nTask Files: {{taskFiles}}\nEnvironment Retry Count: {{envRetryCount}}\n\nReturn true if the engineer's changes did NOT introduce any new test failures. Environment issues alone should NOT cause rejection.\n\nCRITICAL RULES:\n1. If 'passed' is true in analyzedTestResults, return true.\n2. If 'taskRelatedFailures' is 0 or null, return true \u2014 even if there are pre-existing failures or environment issues. The engineer's code is not at fault.\n3. Pre-existing failures (tests failing in code the engineer did NOT touch) do NOT count as the engineer's fault. Return true.\n4. ONLY return false if there are actual task-related failures (classification: 'task-related') \u2014 failures directly caused by the engineer's changes.\n5. If 'testsActuallyRan' is false AND 'taskRelatedFailures' is 0 or null, return true \u2014 environment issues that prevented tests from running are NOT the engineer's fault. The task should be approved.\n6. If 'testsActuallyRan' is false AND there ARE task-related failures detected, return false.",
|
|
419
|
+
"contextKeys": [
|
|
420
|
+
"analyzedTestResults",
|
|
421
|
+
"gitDiff",
|
|
422
|
+
"taskFiles",
|
|
423
|
+
"envRetryCount"
|
|
424
|
+
],
|
|
301
425
|
"confidenceThreshold": 0.8,
|
|
302
|
-
"fallbackValue": true
|
|
426
|
+
"fallbackValue": true,
|
|
427
|
+
"allowedTools": []
|
|
303
428
|
},
|
|
304
429
|
{
|
|
305
430
|
"type": "llm-action",
|
|
306
431
|
"name": "WriteApprovalComment",
|
|
307
432
|
"prompt": "Write a brief approval comment.\n\nTask: {{taskDescription}}\nTest Results: {{analyzedTestResults}}\n\nKeep it short. If there were pre-existing test failures (not caused by the engineer), mention them briefly as known pre-existing issues that are not blocking.",
|
|
308
|
-
"contextKeys": [
|
|
433
|
+
"contextKeys": [
|
|
434
|
+
"taskDescription",
|
|
435
|
+
"analyzedTestResults"
|
|
436
|
+
],
|
|
309
437
|
"outputSchema": {
|
|
310
438
|
"type": "object",
|
|
311
439
|
"properties": {
|
|
312
|
-
"comment": {
|
|
440
|
+
"comment": {
|
|
441
|
+
"type": "string"
|
|
442
|
+
}
|
|
313
443
|
},
|
|
314
|
-
"required": [
|
|
444
|
+
"required": [
|
|
445
|
+
"comment"
|
|
446
|
+
]
|
|
315
447
|
},
|
|
316
448
|
"outputKey": "approvalComment",
|
|
317
|
-
"temperature": 0.4
|
|
449
|
+
"temperature": 0.4,
|
|
450
|
+
"allowedTools": []
|
|
318
451
|
},
|
|
319
452
|
{
|
|
320
453
|
"type": "action",
|
|
@@ -331,7 +464,7 @@
|
|
|
331
464
|
"children": [
|
|
332
465
|
{
|
|
333
466
|
"type": "sequence",
|
|
334
|
-
"comment": "Lint passed (or no linter) -
|
|
467
|
+
"comment": "Lint passed (or no linter) - approve (PR creation handled by release agent)",
|
|
335
468
|
"children": [
|
|
336
469
|
{
|
|
337
470
|
"type": "condition",
|
|
@@ -339,15 +472,7 @@
|
|
|
339
472
|
},
|
|
340
473
|
{
|
|
341
474
|
"type": "action",
|
|
342
|
-
"call": "
|
|
343
|
-
},
|
|
344
|
-
{
|
|
345
|
-
"type": "action",
|
|
346
|
-
"call": "GitCreatePR"
|
|
347
|
-
},
|
|
348
|
-
{
|
|
349
|
-
"type": "action",
|
|
350
|
-
"call": "RequestStatusDone"
|
|
475
|
+
"call": "ReportTriggerPass"
|
|
351
476
|
}
|
|
352
477
|
]
|
|
353
478
|
},
|
|
@@ -365,7 +490,7 @@
|
|
|
365
490
|
},
|
|
366
491
|
{
|
|
367
492
|
"type": "action",
|
|
368
|
-
"call": "
|
|
493
|
+
"call": "ReportTriggerFail"
|
|
369
494
|
}
|
|
370
495
|
]
|
|
371
496
|
}
|
|
@@ -373,6 +498,27 @@
|
|
|
373
498
|
}
|
|
374
499
|
]
|
|
375
500
|
},
|
|
501
|
+
{
|
|
502
|
+
"type": "sequence",
|
|
503
|
+
"comment": "ALL failures are pre-existing (unrelated to task) and block testing completely - escalate rather than bouncing to engineer",
|
|
504
|
+
"children": [
|
|
505
|
+
{
|
|
506
|
+
"type": "condition",
|
|
507
|
+
"call": "IsPreExistingBugBlocking",
|
|
508
|
+
"comment": "Check if ALL failures are pre-existing with 0 task-related failures"
|
|
509
|
+
},
|
|
510
|
+
{
|
|
511
|
+
"type": "action",
|
|
512
|
+
"call": "EscalatePreExistingBug",
|
|
513
|
+
"comment": "Create bug-fix task, add DAG dependency blocking original task, notify PM and merge-resolver"
|
|
514
|
+
},
|
|
515
|
+
{
|
|
516
|
+
"type": "action",
|
|
517
|
+
"call": "ReportTriggerFail",
|
|
518
|
+
"comment": "Move task to todo - DAG dependency on the bug-fix task prevents orchestrator from re-assigning until fix is done. NOT a rejection of engineer's work."
|
|
519
|
+
}
|
|
520
|
+
]
|
|
521
|
+
},
|
|
376
522
|
{
|
|
377
523
|
"type": "sequence",
|
|
378
524
|
"comment": "Tests failed or did not execute - handle appropriately",
|
|
@@ -397,19 +543,41 @@
|
|
|
397
543
|
"type": "llm-action",
|
|
398
544
|
"name": "DocumentEnvironmentBlocker",
|
|
399
545
|
"prompt": "Document that the task is blocked due to unresolved environment issues.\n\nTask: {{taskDescription}}\nTest Results: {{analyzedTestResults}}\nRaw Test Output: {{testResults}}\nFix Attempts: {{environmentFixResults}}\nRetry Count: {{envRetryCount}}\n\nExplain that:\n1. The QA agent attempted to run tests but encountered environment issues\n2. Multiple attempts to fix the environment (installing dependencies, etc.) failed\n3. This is NOT a code issue with the engineer's changes\n4. Human intervention is needed to resolve the test environment\n\nBe specific about what environment issue occurred (missing dependency, command not found, etc.).",
|
|
400
|
-
"contextKeys": [
|
|
546
|
+
"contextKeys": [
|
|
547
|
+
"taskDescription",
|
|
548
|
+
"analyzedTestResults",
|
|
549
|
+
"testResults",
|
|
550
|
+
"environmentFixResults",
|
|
551
|
+
"envRetryCount"
|
|
552
|
+
],
|
|
401
553
|
"outputSchema": {
|
|
402
554
|
"type": "object",
|
|
403
555
|
"properties": {
|
|
404
|
-
"rejectionReason": {
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
"
|
|
556
|
+
"rejectionReason": {
|
|
557
|
+
"type": "string"
|
|
558
|
+
},
|
|
559
|
+
"environmentIssue": {
|
|
560
|
+
"type": "string"
|
|
561
|
+
},
|
|
562
|
+
"fixAttempts": {
|
|
563
|
+
"type": "array",
|
|
564
|
+
"items": {
|
|
565
|
+
"type": "string"
|
|
566
|
+
}
|
|
567
|
+
},
|
|
568
|
+
"comment": {
|
|
569
|
+
"type": "string"
|
|
570
|
+
}
|
|
408
571
|
},
|
|
409
|
-
"required": [
|
|
572
|
+
"required": [
|
|
573
|
+
"rejectionReason",
|
|
574
|
+
"environmentIssue",
|
|
575
|
+
"comment"
|
|
576
|
+
]
|
|
410
577
|
},
|
|
411
578
|
"outputKey": "rejectionDetails",
|
|
412
|
-
"temperature": 0.3
|
|
579
|
+
"temperature": 0.3,
|
|
580
|
+
"allowedTools": []
|
|
413
581
|
},
|
|
414
582
|
{
|
|
415
583
|
"type": "action",
|
|
@@ -417,7 +585,7 @@
|
|
|
417
585
|
},
|
|
418
586
|
{
|
|
419
587
|
"type": "action",
|
|
420
|
-
"call": "
|
|
588
|
+
"call": "ReportTriggerFail"
|
|
421
589
|
}
|
|
422
590
|
]
|
|
423
591
|
},
|
|
@@ -428,29 +596,54 @@
|
|
|
428
596
|
{
|
|
429
597
|
"type": "llm-action",
|
|
430
598
|
"name": "DocumentRejection",
|
|
431
|
-
"prompt": "Document why the task is rejected based ONLY on task-related test failures.\n\nTask: {{taskDescription}}\nTest Results: {{analyzedTestResults}}\nGit Diff: {{gitDiff}}\nTask Files: {{taskFiles}}\n\nExplain what failed and what needs fixing. ONLY include failures that are classified as 'task-related'
|
|
432
|
-
"contextKeys": [
|
|
599
|
+
"prompt": "Document why the task is rejected based ONLY on task-related test failures.\n\nTask: {{taskDescription}}\nTest Results: {{analyzedTestResults}}\nGit Diff: {{gitDiff}}\nTask Files: {{taskFiles}}\n\nExplain what failed and what needs fixing. ONLY include failures that are classified as 'task-related' \u2014 failures in code the engineer actually changed.\n\nCRITICAL RULES:\n1. NEVER reject for pre-existing failures (tests failing in code the engineer did NOT touch).\n2. NEVER reject because dependencies were not installed, test runners were not found (exit code 127), or the test environment was not set up.\n3. ONLY reject for actual code failures in the engineer's changed files: tests that fail due to bugs, missing implementations, incorrect logic, or code that does not meet acceptance criteria.\n4. If the only failures are pre-existing or environment-related, this rejection should NOT have been reached \u2014 but if it was, explain that the failures are not task-related and recommend approval.",
|
|
600
|
+
"contextKeys": [
|
|
601
|
+
"taskDescription",
|
|
602
|
+
"analyzedTestResults",
|
|
603
|
+
"testResults",
|
|
604
|
+
"gitDiff",
|
|
605
|
+
"taskFiles"
|
|
606
|
+
],
|
|
433
607
|
"outputSchema": {
|
|
434
608
|
"type": "object",
|
|
435
609
|
"properties": {
|
|
436
|
-
"rejectionReason": {
|
|
610
|
+
"rejectionReason": {
|
|
611
|
+
"type": "string"
|
|
612
|
+
},
|
|
437
613
|
"requiredFixes": {
|
|
438
614
|
"type": "array",
|
|
439
615
|
"items": {
|
|
440
616
|
"type": "object",
|
|
441
617
|
"properties": {
|
|
442
|
-
"issue": {
|
|
443
|
-
|
|
444
|
-
|
|
618
|
+
"issue": {
|
|
619
|
+
"type": "string"
|
|
620
|
+
},
|
|
621
|
+
"suggestedFix": {
|
|
622
|
+
"type": "string"
|
|
623
|
+
},
|
|
624
|
+
"priority": {
|
|
625
|
+
"type": "string",
|
|
626
|
+
"enum": [
|
|
627
|
+
"high",
|
|
628
|
+
"medium",
|
|
629
|
+
"low"
|
|
630
|
+
]
|
|
631
|
+
}
|
|
445
632
|
}
|
|
446
633
|
}
|
|
447
634
|
},
|
|
448
|
-
"comment": {
|
|
635
|
+
"comment": {
|
|
636
|
+
"type": "string"
|
|
637
|
+
}
|
|
449
638
|
},
|
|
450
|
-
"required": [
|
|
639
|
+
"required": [
|
|
640
|
+
"rejectionReason",
|
|
641
|
+
"comment"
|
|
642
|
+
]
|
|
451
643
|
},
|
|
452
644
|
"outputKey": "rejectionDetails",
|
|
453
|
-
"temperature": 0.3
|
|
645
|
+
"temperature": 0.3,
|
|
646
|
+
"allowedTools": []
|
|
454
647
|
},
|
|
455
648
|
{
|
|
456
649
|
"type": "action",
|
|
@@ -458,7 +651,7 @@
|
|
|
458
651
|
},
|
|
459
652
|
{
|
|
460
653
|
"type": "action",
|
|
461
|
-
"call": "
|
|
654
|
+
"call": "ReportTriggerFail"
|
|
462
655
|
}
|
|
463
656
|
]
|
|
464
657
|
}
|
|
@@ -478,9 +671,13 @@
|
|
|
478
671
|
"type": "llm-condition",
|
|
479
672
|
"name": "IsLowRisk",
|
|
480
673
|
"prompt": "Is this a low-risk change that can be approved without tests?\n\nChange Analysis: {{changeAnalysis}}\nTask: {{taskDescription}}\n\nLow risk = docs, config, minor refactoring.\nHigh risk = core logic, security, data.\n\nReturn true if low risk.",
|
|
481
|
-
"contextKeys": [
|
|
674
|
+
"contextKeys": [
|
|
675
|
+
"changeAnalysis",
|
|
676
|
+
"taskDescription"
|
|
677
|
+
],
|
|
482
678
|
"confidenceThreshold": 0.7,
|
|
483
|
-
"fallbackValue": false
|
|
679
|
+
"fallbackValue": false,
|
|
680
|
+
"allowedTools": []
|
|
484
681
|
},
|
|
485
682
|
{
|
|
486
683
|
"type": "action",
|
|
@@ -497,7 +694,7 @@
|
|
|
497
694
|
"children": [
|
|
498
695
|
{
|
|
499
696
|
"type": "sequence",
|
|
500
|
-
"comment": "Lint passed (or no linter) -
|
|
697
|
+
"comment": "Lint passed (or no linter) - approve (PR creation handled by release agent)",
|
|
501
698
|
"children": [
|
|
502
699
|
{
|
|
503
700
|
"type": "condition",
|
|
@@ -505,15 +702,7 @@
|
|
|
505
702
|
},
|
|
506
703
|
{
|
|
507
704
|
"type": "action",
|
|
508
|
-
"call": "
|
|
509
|
-
},
|
|
510
|
-
{
|
|
511
|
-
"type": "action",
|
|
512
|
-
"call": "GitCreatePR"
|
|
513
|
-
},
|
|
514
|
-
{
|
|
515
|
-
"type": "action",
|
|
516
|
-
"call": "RequestStatusDone"
|
|
705
|
+
"call": "ReportTriggerPass"
|
|
517
706
|
}
|
|
518
707
|
]
|
|
519
708
|
},
|
|
@@ -531,7 +720,7 @@
|
|
|
531
720
|
},
|
|
532
721
|
{
|
|
533
722
|
"type": "action",
|
|
534
|
-
"call": "
|
|
723
|
+
"call": "ReportTriggerFail"
|
|
535
724
|
}
|
|
536
725
|
]
|
|
537
726
|
}
|
|
@@ -547,28 +736,50 @@
|
|
|
547
736
|
"type": "llm-action",
|
|
548
737
|
"name": "DocumentTestsRequired",
|
|
549
738
|
"prompt": "Explain why tests are needed.\n\nTask: {{taskDescription}}\nChange Analysis: {{changeAnalysis}}\n\nThis is a high-risk change that needs tests.",
|
|
550
|
-
"contextKeys": [
|
|
739
|
+
"contextKeys": [
|
|
740
|
+
"taskDescription",
|
|
741
|
+
"changeAnalysis"
|
|
742
|
+
],
|
|
551
743
|
"outputSchema": {
|
|
552
744
|
"type": "object",
|
|
553
745
|
"properties": {
|
|
554
|
-
"rejectionReason": {
|
|
746
|
+
"rejectionReason": {
|
|
747
|
+
"type": "string"
|
|
748
|
+
},
|
|
555
749
|
"requiredFixes": {
|
|
556
750
|
"type": "array",
|
|
557
751
|
"items": {
|
|
558
752
|
"type": "object",
|
|
559
753
|
"properties": {
|
|
560
|
-
"issue": {
|
|
561
|
-
|
|
562
|
-
|
|
754
|
+
"issue": {
|
|
755
|
+
"type": "string"
|
|
756
|
+
},
|
|
757
|
+
"suggestedFix": {
|
|
758
|
+
"type": "string"
|
|
759
|
+
},
|
|
760
|
+
"priority": {
|
|
761
|
+
"type": "string",
|
|
762
|
+
"enum": [
|
|
763
|
+
"high",
|
|
764
|
+
"medium",
|
|
765
|
+
"low"
|
|
766
|
+
]
|
|
767
|
+
}
|
|
563
768
|
}
|
|
564
769
|
}
|
|
565
770
|
},
|
|
566
|
-
"comment": {
|
|
771
|
+
"comment": {
|
|
772
|
+
"type": "string"
|
|
773
|
+
}
|
|
567
774
|
},
|
|
568
|
-
"required": [
|
|
775
|
+
"required": [
|
|
776
|
+
"rejectionReason",
|
|
777
|
+
"comment"
|
|
778
|
+
]
|
|
569
779
|
},
|
|
570
780
|
"outputKey": "rejectionDetails",
|
|
571
|
-
"temperature": 0.3
|
|
781
|
+
"temperature": 0.3,
|
|
782
|
+
"allowedTools": []
|
|
572
783
|
},
|
|
573
784
|
{
|
|
574
785
|
"type": "action",
|
|
@@ -576,7 +787,7 @@
|
|
|
576
787
|
},
|
|
577
788
|
{
|
|
578
789
|
"type": "action",
|
|
579
|
-
"call": "
|
|
790
|
+
"call": "ReportTriggerFail"
|
|
580
791
|
}
|
|
581
792
|
]
|
|
582
793
|
}
|
|
@@ -598,7 +809,7 @@
|
|
|
598
809
|
},
|
|
599
810
|
{
|
|
600
811
|
"type": "action",
|
|
601
|
-
"call": "
|
|
812
|
+
"call": "ReportTriggerFail"
|
|
602
813
|
}
|
|
603
814
|
]
|
|
604
815
|
}
|