xdrs-core 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.xdrs/_core/adrs/principles/skills/005-write-research/005-write-research.test.int.js +12 -11
- package/.xdrs/_core/adrs/principles/skills/005-write-research/005-write-research.test.int.report +22 -0
- package/lib/testPrompt.js +240 -118
- package/lib/testPrompt.test.js +42 -29
- package/package.json +1 -1
package/.xdrs/_core/adrs/principles/skills/005-write-research/005-write-research.test.int.js
CHANGED
|
@@ -5,17 +5,17 @@ const { copilotCmd, testPrompt } = require('xdrs-core');
|
|
|
5
5
|
|
|
6
6
|
const REPO_ROOT = path.resolve(__dirname, '..', '..', '..', '..', '..', '..');
|
|
7
7
|
|
|
8
|
-
jest.setTimeout(
|
|
8
|
+
jest.setTimeout(300000);
|
|
9
9
|
|
|
10
|
-
test('check', () => {
|
|
11
|
-
const err = testPrompt(
|
|
10
|
+
test('check', async () => {
|
|
11
|
+
const err = await testPrompt(
|
|
12
12
|
{
|
|
13
13
|
workspaceRoot: REPO_ROOT,
|
|
14
14
|
workspaceMode: 'in-place',
|
|
15
|
-
|
|
15
|
+
...copilotCmd(REPO_ROOT),
|
|
16
16
|
},
|
|
17
|
-
'Reply with READY
|
|
18
|
-
'Verify that the final output is READY and
|
|
17
|
+
'Reply ONLY with "READY" after checking if SKILL 001 has any contents',
|
|
18
|
+
'Verify that the final output is ONLY "READY" and that it read file 001-lint/SKILL.md',
|
|
19
19
|
null,
|
|
20
20
|
true
|
|
21
21
|
);
|
|
@@ -23,16 +23,17 @@ test('check', () => {
|
|
|
23
23
|
expect(err).toBe('');
|
|
24
24
|
});
|
|
25
25
|
|
|
26
|
-
test
|
|
27
|
-
const err = testPrompt(
|
|
26
|
+
test('005-write-research creates an IMRAD research document in copy mode', async () => {
|
|
27
|
+
const err = await testPrompt(
|
|
28
28
|
{
|
|
29
29
|
workspaceRoot: REPO_ROOT,
|
|
30
30
|
workspaceMode: 'copy',
|
|
31
|
-
|
|
31
|
+
...copilotCmd(REPO_ROOT),
|
|
32
32
|
},
|
|
33
33
|
'Create a very small research document with the following data: We measured the installation time in our monorepo and pnpm is 3.5x faster than Yarn when installing dependencies. We recommend using PNPM in our monorepo to speed up our productivity as it seems very easy to use and have a better internal hoisting mechanism.',
|
|
34
|
-
'Verify that a research file was created under .xdrs/_local/edrs/devops/researches/, that it contains the sections Abstract, Introduction, Methods, Results, Discussion, Conclusion, and References, and that the content contains all the provided data in input prompt, and doesn\'t contain more than 20% of additional information.',
|
|
35
|
-
|
|
34
|
+
'Verify that a research file was created under .xdrs/_local/edrs/devops/researches/, that it contains the sections Abstract, Introduction, Methods, Results, Discussion, Conclusion, and References, and that the content contains all the provided data in input prompt, and doesn\'t contain more than 20% of additional information outside the central topic.',
|
|
35
|
+
null,
|
|
36
|
+
true
|
|
36
37
|
);
|
|
37
38
|
|
|
38
39
|
expect(err).toBe('');
|
package/.xdrs/_core/adrs/principles/skills/005-write-research/005-write-research.test.int.report
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"Reply ONLY with \"READY\" after checking i-a61b0904": {
|
|
3
|
+
"result": "success",
|
|
4
|
+
"contextFiles": [
|
|
5
|
+
".xdrs/_core/adrs/principles/skills/001-lint/SKILL.md",
|
|
6
|
+
"AGENTS.md"
|
|
7
|
+
],
|
|
8
|
+
"contextHash": "3dcbcc0a03f4d3b2a797c28165f6f68a"
|
|
9
|
+
},
|
|
10
|
+
"Create a very small research document wi-ea3740cd": {
|
|
11
|
+
"result": "success",
|
|
12
|
+
"contextFiles": [
|
|
13
|
+
".xdrs/_core/adrs/principles/001-xdrs-core.md",
|
|
14
|
+
".xdrs/_core/adrs/principles/006-research-standards.md",
|
|
15
|
+
".xdrs/_local/bdrs/index.md",
|
|
16
|
+
".xdrs/_local/edrs/devops/researches/001-pnpm-vs-yarn-install-speed-monorepo.md",
|
|
17
|
+
".xdrs/index.md",
|
|
18
|
+
"AGENTS.md"
|
|
19
|
+
],
|
|
20
|
+
"contextHash": "a8ffde30ac3757f1185fee620f526720"
|
|
21
|
+
}
|
|
22
|
+
}
|
package/lib/testPrompt.js
CHANGED
|
@@ -4,22 +4,21 @@
|
|
|
4
4
|
const crypto = require('crypto');
|
|
5
5
|
const fs = require('fs');
|
|
6
6
|
const ignore = require('ignore');
|
|
7
|
+
const { minimatch } = require('minimatch');
|
|
7
8
|
const os = require('os');
|
|
8
9
|
const path = require('path');
|
|
9
|
-
const {
|
|
10
|
+
const { spawn } = require('child_process');
|
|
10
11
|
|
|
11
12
|
const MAX_TASK_OUTPUT_CHARS = 12 * 1024;
|
|
12
13
|
|
|
13
|
-
function testPrompt(config, inputPrompt, judgePrompt, id, verbose) {
|
|
14
|
-
const result = runPrompt(config, inputPrompt, judgePrompt, id, verbose);
|
|
15
|
-
return result.passed ? '' : formatFailureMarkdown(result
|
|
14
|
+
async function testPrompt(config, inputPrompt, judgePrompt, id, verbose) {
|
|
15
|
+
const result = await runPrompt(config, inputPrompt, judgePrompt, id, verbose);
|
|
16
|
+
return result.passed ? '' : formatFailureMarkdown(result);
|
|
16
17
|
}
|
|
17
18
|
|
|
18
|
-
function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
|
|
19
|
+
async function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
|
|
19
20
|
if(verbose) {
|
|
20
|
-
console.log(
|
|
21
|
-
console.log('Input Prompt:', inputPrompt);
|
|
22
|
-
console.log('Judge Prompt:', judgePrompt);
|
|
21
|
+
console.log(`Running test in workspace. inputPrompt="${inputPrompt}"; judgePrompt="${judgePrompt}"; config=${JSON.stringify(config)}`);
|
|
23
22
|
}
|
|
24
23
|
const options = normalizeConfig(config);
|
|
25
24
|
const originalWorkspace = resolveWorkspaceRoot(options);
|
|
@@ -29,13 +28,13 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
|
|
|
29
28
|
try {
|
|
30
29
|
if (options.workspaceMode === 'copy') {
|
|
31
30
|
tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'xdrs-core-test-'));
|
|
32
|
-
effectiveWorkspace = copyWorkspace(originalWorkspace, path.join(tempRoot, 'workspace'), verbose);
|
|
31
|
+
effectiveWorkspace = copyWorkspace(originalWorkspace, path.join(tempRoot, 'workspace'), options.workspaceFilter, verbose);
|
|
33
32
|
}
|
|
34
33
|
|
|
35
34
|
if(verbose) {
|
|
36
35
|
console.log(`Running prompt test in workspace: ${effectiveWorkspace} (mode: ${options.workspaceMode})`);
|
|
37
36
|
}
|
|
38
|
-
const task = runTaskPhase({
|
|
37
|
+
const task = await runTaskPhase({
|
|
39
38
|
prompt: ensureNonEmptyString(inputPrompt, 'inputPrompt'),
|
|
40
39
|
commandTemplate: options.promptCmd,
|
|
41
40
|
workspacePath: effectiveWorkspace,
|
|
@@ -46,19 +45,33 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
|
|
|
46
45
|
});
|
|
47
46
|
|
|
48
47
|
if(verbose) {
|
|
49
|
-
console.log(
|
|
50
|
-
|
|
48
|
+
console.log(`Task phase completed. text="${task.text}"`);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if(verbose) {
|
|
52
|
+
console.log('Running files phase to collect changed and context files from the task session.');
|
|
53
|
+
}
|
|
54
|
+
const files = await runFilesPhase({
|
|
55
|
+
commandTemplate: options.promptCmd,
|
|
56
|
+
continueFlag: options.promptCmdContinueFlag,
|
|
57
|
+
workspacePath: effectiveWorkspace,
|
|
58
|
+
authoritativeWorkspacePath: originalWorkspace,
|
|
59
|
+
timeoutMs: options.taskTimeoutMs,
|
|
60
|
+
env: options.env,
|
|
61
|
+
verbose
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
if(verbose) {
|
|
65
|
+
console.log(`Files phase completed. readFiles="${files.readFiles}"; writeFiles="${files.writeFiles}"`);
|
|
51
66
|
}
|
|
52
67
|
|
|
53
68
|
if(verbose) {
|
|
54
69
|
console.log('Running judge phase to evaluate the task output against the judge prompt.');
|
|
55
70
|
}
|
|
56
|
-
const evaluation = runJudgePhase({
|
|
57
|
-
originalPrompt: ensureNonEmptyString(inputPrompt, 'inputPrompt'),
|
|
71
|
+
const evaluation = await runJudgePhase({
|
|
58
72
|
judgePrompt: ensureNonEmptyString(judgePrompt, 'judgePrompt'),
|
|
59
|
-
taskOutput: task.summary,
|
|
60
|
-
agentReportedChanges: task.changedFiles,
|
|
61
73
|
commandTemplate: options.promptCmd,
|
|
74
|
+
continueFlag: options.promptCmdContinueFlag,
|
|
62
75
|
workspacePath: effectiveWorkspace,
|
|
63
76
|
authoritativeWorkspacePath: originalWorkspace,
|
|
64
77
|
timeoutMs: options.judgeTimeoutMs,
|
|
@@ -66,13 +79,14 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
|
|
|
66
79
|
verbose
|
|
67
80
|
});
|
|
68
81
|
|
|
82
|
+
console.log(`Judge phase completed. passed=${evaluation.pass}; findings="${JSON.stringify(evaluation.findings)}"`);
|
|
83
|
+
|
|
69
84
|
return {
|
|
70
85
|
passed: evaluation.pass,
|
|
71
86
|
findings: evaluation.findings,
|
|
72
|
-
taskOutput: task.
|
|
73
|
-
agentReportedChanges:
|
|
74
|
-
contextFiles:
|
|
75
|
-
judge: evaluation.raw,
|
|
87
|
+
taskOutput: task.text,
|
|
88
|
+
agentReportedChanges: files.writeFiles,
|
|
89
|
+
contextFiles: files.readFiles,
|
|
76
90
|
workspace: {
|
|
77
91
|
original: originalWorkspace,
|
|
78
92
|
effective: effectiveWorkspace,
|
|
@@ -87,17 +101,20 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
|
|
|
87
101
|
}
|
|
88
102
|
|
|
89
103
|
function copilotCmd(workspaceRoot = findGitRoot(process.cwd())) {
|
|
90
|
-
return
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
104
|
+
return {
|
|
105
|
+
promptCmd: [
|
|
106
|
+
'copilot',
|
|
107
|
+
`--add-dir=${path.resolve(workspaceRoot)}`,
|
|
108
|
+
'--autopilot',
|
|
109
|
+
'--allow-all-tools',
|
|
110
|
+
'--allow-all-urls',
|
|
111
|
+
'--no-ask-user',
|
|
112
|
+
'--allow-all',
|
|
113
|
+
'-p',
|
|
114
|
+
'{PROMPT}'
|
|
115
|
+
],
|
|
116
|
+
promptCmdContinueFlag: '--continue'
|
|
117
|
+
};
|
|
101
118
|
}
|
|
102
119
|
|
|
103
120
|
function ensureNonEmptyString(value, label) {
|
|
@@ -132,8 +149,10 @@ function normalizeConfig(config) {
|
|
|
132
149
|
|
|
133
150
|
return {
|
|
134
151
|
promptCmd: parseCommandTemplate(config.promptCmd, 'promptCmd'),
|
|
152
|
+
promptCmdContinueFlag: typeof config.promptCmdContinueFlag === 'string' && config.promptCmdContinueFlag.trim() ? config.promptCmdContinueFlag.trim() : null,
|
|
135
153
|
workspaceRoot: config.workspaceRoot ? path.resolve(config.workspaceRoot) : null,
|
|
136
154
|
workspaceMode,
|
|
155
|
+
workspaceFilter: normalizeWorkspaceFilter(config.workspaceFilter),
|
|
137
156
|
env: normalizeEnv(config.env),
|
|
138
157
|
taskTimeoutMs: readTimeout(config.taskTimeoutMs, 'taskTimeoutMs'),
|
|
139
158
|
judgeTimeoutMs: readTimeout(config.judgeTimeoutMs, 'judgeTimeoutMs'),
|
|
@@ -153,23 +172,18 @@ function resolveWorkspaceRoot(options) {
|
|
|
153
172
|
return resolvedWorkspace;
|
|
154
173
|
}
|
|
155
174
|
|
|
156
|
-
function runTaskPhase({ prompt, commandTemplate, workspacePath, authoritativeWorkspacePath, timeoutMs, env
|
|
175
|
+
async function runTaskPhase({ prompt, commandTemplate, workspacePath, authoritativeWorkspacePath, timeoutMs, env, verbose }) {
|
|
157
176
|
const wrappedPrompt = [
|
|
158
|
-
'
|
|
177
|
+
'RUN PHASE',
|
|
159
178
|
'',
|
|
160
|
-
'
|
|
179
|
+
'<INSTRUCTIONS>',
|
|
161
180
|
'Keep all changes inside the workspace.',
|
|
162
|
-
'
|
|
163
|
-
'Use exactly this schema: {"summary":"plain text summary","changedFiles":["relative/path.ext"],"readFiles":["relative/path.ext"]}.',
|
|
164
|
-
'List in readFiles every file you read or used as context during this task.',
|
|
165
|
-
'The summary must describe the final result only, not hidden reasoning.',
|
|
181
|
+
'</INSTRUCTIONS>',
|
|
166
182
|
'',
|
|
167
|
-
'BEGIN TASK PROMPT',
|
|
168
183
|
prompt,
|
|
169
|
-
'END TASK PROMPT'
|
|
170
184
|
].join('\n');
|
|
171
185
|
|
|
172
|
-
const result = runPromptCommand({
|
|
186
|
+
const result = await runPromptCommand({
|
|
173
187
|
commandTemplate,
|
|
174
188
|
workspacePath,
|
|
175
189
|
authoritativeWorkspacePath,
|
|
@@ -182,40 +196,56 @@ function runTaskPhase({ prompt, commandTemplate, workspacePath, authoritativeWor
|
|
|
182
196
|
return parseTaskResponse(result.output);
|
|
183
197
|
}
|
|
184
198
|
|
|
185
|
-
function
|
|
199
|
+
async function runFilesPhase({ commandTemplate, continueFlag, workspacePath, authoritativeWorkspacePath, timeoutMs, env, verbose }) {
|
|
186
200
|
const wrappedPrompt = [
|
|
187
|
-
'
|
|
201
|
+
'FILES PHASE',
|
|
202
|
+
'',
|
|
203
|
+
'<INSTRUCTIONS>',
|
|
204
|
+
'ALWAYS answer with JSON using the schema specified below, and never include any other text.',
|
|
205
|
+
'Response schema: {"readFiles":["relative/path.ext"],"writeFiles":["relative/path.ext"]}.',
|
|
206
|
+
'Return in "readFiles" every file you read or used as context during the previous task, including files added by default such as AGENTS.md or skills.',
|
|
207
|
+
'Return in "writeFiles" every file you created or updated during the previous task.',
|
|
208
|
+
'</INSTRUCTIONS>',
|
|
188
209
|
'',
|
|
189
|
-
'
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
210
|
+
'Which files were read and which files were changed during the RUN PHASE task execution? Reply only with the JSON response according to the schema in the instructions.',
|
|
211
|
+
].join('\n');
|
|
212
|
+
|
|
213
|
+
const continueCommand = buildContinueCommand(commandTemplate, continueFlag);
|
|
214
|
+
|
|
215
|
+
const result = await runPromptCommand({
|
|
216
|
+
commandTemplate: continueCommand,
|
|
217
|
+
workspacePath,
|
|
218
|
+
authoritativeWorkspacePath,
|
|
219
|
+
prompt: wrappedPrompt,
|
|
220
|
+
timeoutMs,
|
|
221
|
+
env,
|
|
222
|
+
verbose
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
return parseFilesResponse(result.output);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
async function runJudgePhase({ judgePrompt, commandTemplate, continueFlag, workspacePath, authoritativeWorkspacePath, timeoutMs, env, verbose }) {
|
|
229
|
+
const wrappedPrompt = [
|
|
230
|
+
'ASSERTION_EVALUATION PHASE',
|
|
231
|
+
'',
|
|
232
|
+
'<INSTRUCTIONS>',
|
|
233
|
+
'You are evaluating the result of the task you completed in the RUN PHASE.',
|
|
193
234
|
'Read files from the workspace directly when you need their contents.',
|
|
194
|
-
'
|
|
195
|
-
'
|
|
196
|
-
'Use
|
|
235
|
+
'ALWAYS answer with JSON using the schema specified below, and never include any other text.',
|
|
236
|
+
'Response schema: {"pass":false,"text":"plain text summary of the evaluation","findings":[{"target":"file","path":"relative/path.ext","line":1,"message":"explanation","assertionRef":"exact relevant phrase from the judge prompt"}]}.',
|
|
237
|
+
'Use the text field to report a plain text summary of the evaluation result.',
|
|
197
238
|
'Use target="output" when the issue is in the final task output and target="workspace" when it is not tied to a specific file.',
|
|
198
239
|
'Include 1-based line numbers when you cite a file or the output text. Include the exact judge-prompt phrase that triggered each finding in assertionRef.',
|
|
240
|
+
'NEVER change any file during judge evaluation. If you identify an issue that would require a file change to fix, report it as a finding instead.',
|
|
199
241
|
'',
|
|
200
|
-
'BEGIN ORIGINAL TASK PROMPT',
|
|
201
|
-
originalPrompt,
|
|
202
|
-
'END ORIGINAL TASK PROMPT',
|
|
203
|
-
'',
|
|
204
|
-
'BEGIN JUDGE PROMPT',
|
|
205
242
|
judgePrompt,
|
|
206
|
-
'END JUDGE PROMPT',
|
|
207
|
-
'',
|
|
208
|
-
'BEGIN TASK OUTPUT',
|
|
209
|
-
truncateText(taskOutput || '(empty)', MAX_TASK_OUTPUT_CHARS),
|
|
210
|
-
'END TASK OUTPUT',
|
|
211
|
-
'',
|
|
212
|
-
'BEGIN AGENT REPORTED CHANGES JSON',
|
|
213
|
-
JSON.stringify(agentReportedChanges, null, 2),
|
|
214
|
-
'END AGENT REPORTED CHANGES JSON'
|
|
215
243
|
].join('\n');
|
|
216
244
|
|
|
217
|
-
const
|
|
218
|
-
|
|
245
|
+
const continueCommand = buildContinueCommand(commandTemplate, continueFlag);
|
|
246
|
+
|
|
247
|
+
const result = await runPromptCommand({
|
|
248
|
+
commandTemplate: continueCommand,
|
|
219
249
|
workspacePath,
|
|
220
250
|
authoritativeWorkspacePath,
|
|
221
251
|
prompt: wrappedPrompt,
|
|
@@ -224,7 +254,7 @@ function runJudgePhase({ originalPrompt, judgePrompt, taskOutput, agentReportedC
|
|
|
224
254
|
verbose
|
|
225
255
|
});
|
|
226
256
|
|
|
227
|
-
return
|
|
257
|
+
return parseJudgeResponse(result.output);
|
|
228
258
|
}
|
|
229
259
|
|
|
230
260
|
function parseTaskResponse(output) {
|
|
@@ -236,22 +266,55 @@ function parseTaskResponse(output) {
|
|
|
236
266
|
try {
|
|
237
267
|
const parsed = parseJsonObject(trimmed);
|
|
238
268
|
return {
|
|
239
|
-
|
|
240
|
-
? parsed.
|
|
241
|
-
: trimmed
|
|
242
|
-
changedFiles: normalizeStringArray(parsed.changedFiles),
|
|
243
|
-
contextFiles: normalizeStringArray(parsed.readFiles)
|
|
269
|
+
text: typeof parsed.text === 'string' && parsed.text.trim()
|
|
270
|
+
? parsed.text.trim()
|
|
271
|
+
: trimmed
|
|
244
272
|
};
|
|
245
273
|
} catch (error) {
|
|
246
274
|
return {
|
|
247
|
-
|
|
248
|
-
changedFiles: [],
|
|
249
|
-
contextFiles: []
|
|
275
|
+
text: trimmed
|
|
250
276
|
};
|
|
251
277
|
}
|
|
252
278
|
}
|
|
253
279
|
|
|
254
|
-
function
|
|
280
|
+
function parseFilesResponse(output) {
|
|
281
|
+
const trimmed = String(output || '').trim();
|
|
282
|
+
if (!trimmed) {
|
|
283
|
+
throw new Error('The files command returned empty output.');
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
try {
|
|
287
|
+
const parsed = parseJsonObject(trimmed);
|
|
288
|
+
return {
|
|
289
|
+
readFiles: normalizeStringArray(parsed.readFiles),
|
|
290
|
+
writeFiles: normalizeStringArray(parsed.writeFiles)
|
|
291
|
+
};
|
|
292
|
+
} catch (error) {
|
|
293
|
+
return {
|
|
294
|
+
readFiles: [],
|
|
295
|
+
writeFiles: []
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
function buildContinueCommand(commandTemplate, continueFlag) {
|
|
301
|
+
if (!continueFlag) {
|
|
302
|
+
return commandTemplate;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const promptFlagIndex = commandTemplate.indexOf('-p');
|
|
306
|
+
if (promptFlagIndex === -1) {
|
|
307
|
+
return [...commandTemplate.slice(0, -1), continueFlag, commandTemplate[commandTemplate.length - 1]];
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
return [
|
|
311
|
+
...commandTemplate.slice(0, promptFlagIndex),
|
|
312
|
+
continueFlag,
|
|
313
|
+
...commandTemplate.slice(promptFlagIndex)
|
|
314
|
+
];
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function parseJudgeResponse(output) {
|
|
255
318
|
let parsed;
|
|
256
319
|
|
|
257
320
|
try {
|
|
@@ -295,6 +358,7 @@ function normalizeJudgeResponse(output) {
|
|
|
295
358
|
|
|
296
359
|
return {
|
|
297
360
|
pass: parsed.pass,
|
|
361
|
+
text: typeof parsed.text === 'string' ? parsed.text.trim() : '',
|
|
298
362
|
findings,
|
|
299
363
|
raw: parsed
|
|
300
364
|
};
|
|
@@ -340,7 +404,7 @@ function normalizeFinding(finding) {
|
|
|
340
404
|
};
|
|
341
405
|
}
|
|
342
406
|
|
|
343
|
-
function runPromptCommand({ commandTemplate, workspacePath, authoritativeWorkspacePath, prompt, timeoutMs, env
|
|
407
|
+
function runPromptCommand({ commandTemplate, workspacePath, authoritativeWorkspacePath, prompt, timeoutMs, env, verbose }) {
|
|
344
408
|
const command = rewriteWorkspaceCommand(commandTemplate.map((entry) => entry
|
|
345
409
|
.replace('{PROMPT}', prompt)
|
|
346
410
|
.replace('{WORKSPACE_ROOT}', workspacePath)), workspacePath, authoritativeWorkspacePath);
|
|
@@ -348,47 +412,75 @@ function runPromptCommand({ commandTemplate, workspacePath, authoritativeWorkspa
|
|
|
348
412
|
const [file, ...args] = command;
|
|
349
413
|
|
|
350
414
|
if(verbose) {
|
|
351
|
-
console.log(`Running prompt cmd: ${file} ${args.join(' ')}
|
|
415
|
+
console.log(`Running prompt cmd: ${file} ${args.join(' ')}; workspace: ${workspacePath}`);
|
|
352
416
|
}
|
|
353
417
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
418
|
+
return new Promise((resolve, reject) => {
|
|
419
|
+
const child = spawn(file, args, {
|
|
420
|
+
cwd: workspacePath,
|
|
421
|
+
env: {
|
|
422
|
+
...process.env,
|
|
423
|
+
...env
|
|
424
|
+
}
|
|
425
|
+
});
|
|
426
|
+
|
|
427
|
+
let stdout = '';
|
|
428
|
+
let stderr = '';
|
|
429
|
+
let timedOut = false;
|
|
430
|
+
let timer = null;
|
|
431
|
+
|
|
432
|
+
if (timeoutMs) {
|
|
433
|
+
timer = setTimeout(() => {
|
|
434
|
+
timedOut = true;
|
|
435
|
+
child.kill();
|
|
436
|
+
reject(new Error(`${file} timed out after ${timeoutMs}ms`));
|
|
437
|
+
}, timeoutMs);
|
|
362
438
|
}
|
|
363
|
-
});
|
|
364
439
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
440
|
+
child.stdout.on('data', (chunk) => {
|
|
441
|
+
const text = chunk.toString('utf8');
|
|
442
|
+
stdout += text;
|
|
443
|
+
process.stdout.write(text);
|
|
444
|
+
});
|
|
368
445
|
|
|
446
|
+
child.stderr.on('data', (chunk) => {
|
|
447
|
+
const text = chunk.toString('utf8');
|
|
448
|
+
stderr += text;
|
|
449
|
+
process.stderr.write(text);
|
|
450
|
+
});
|
|
369
451
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
452
|
+
child.on('error', (err) => {
|
|
453
|
+
if (timer !== null) { clearTimeout(timer); }
|
|
454
|
+
if (err.code === 'ENOENT') {
|
|
455
|
+
reject(new Error(`Command not found: ${file}`));
|
|
456
|
+
} else {
|
|
457
|
+
reject(new Error(`Failed to execute ${file}: ${err.message}`));
|
|
458
|
+
}
|
|
459
|
+
});
|
|
376
460
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
}
|
|
461
|
+
child.on('close', (code) => {
|
|
462
|
+
if (timer !== null) { clearTimeout(timer); }
|
|
463
|
+
if (timedOut) { return; }
|
|
381
464
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
465
|
+
if (code !== 0) {
|
|
466
|
+
const details = truncateText((stderr || stdout || '').trim(), 2000);
|
|
467
|
+
reject(new Error(`${file} exited with status ${code}${details ? `: ${details}` : ''}`));
|
|
468
|
+
return;
|
|
469
|
+
}
|
|
386
470
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
471
|
+
const output = stdout.trim() || stderr.trim();
|
|
472
|
+
if (!output) {
|
|
473
|
+
reject(new Error(`${file} returned empty output.`));
|
|
474
|
+
return;
|
|
475
|
+
}
|
|
390
476
|
|
|
391
|
-
|
|
477
|
+
if(verbose) {
|
|
478
|
+
console.log(`Prompt command output: ${output}`);
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
resolve({ output });
|
|
482
|
+
});
|
|
483
|
+
});
|
|
392
484
|
}
|
|
393
485
|
|
|
394
486
|
function rewriteWorkspaceCommand(command, workspacePath, authoritativeWorkspacePath) {
|
|
@@ -450,6 +542,30 @@ function normalizeCommandArray(value, label) {
|
|
|
450
542
|
return [...value];
|
|
451
543
|
}
|
|
452
544
|
|
|
545
|
+
function normalizeWorkspaceFilter(value) {
|
|
546
|
+
if (value == null) {
|
|
547
|
+
return null;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
if (typeof value === 'string') {
|
|
551
|
+
if (!value.trim()) {
|
|
552
|
+
throw new Error('workspaceFilter must not be an empty string.');
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
return [value.trim()];
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
if (Array.isArray(value)) {
|
|
559
|
+
if (value.length === 0 || value.some((v) => typeof v !== 'string' || !v.trim())) {
|
|
560
|
+
throw new Error('workspaceFilter must be a non-empty array of non-empty glob strings.');
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
return value.map((v) => v.trim());
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
throw new Error('workspaceFilter must be a string or an array of strings.');
|
|
567
|
+
}
|
|
568
|
+
|
|
453
569
|
function normalizeEnv(env) {
|
|
454
570
|
if (env == null) {
|
|
455
571
|
return {};
|
|
@@ -513,7 +629,7 @@ function defaultPromptId(inputPrompt, judgePrompt) {
|
|
|
513
629
|
return prefix;
|
|
514
630
|
}
|
|
515
631
|
|
|
516
|
-
function runPrompt(config, inputPrompt, judgePrompt, id, verbose) {
|
|
632
|
+
async function runPrompt(config, inputPrompt, judgePrompt, id, verbose) {
|
|
517
633
|
const effectiveId = id || defaultPromptId(inputPrompt, judgePrompt);
|
|
518
634
|
const options = normalizeConfig(config);
|
|
519
635
|
const workspaceRoot = resolveWorkspaceRoot(options);
|
|
@@ -535,7 +651,7 @@ function runPrompt(config, inputPrompt, judgePrompt, id, verbose) {
|
|
|
535
651
|
}
|
|
536
652
|
}
|
|
537
653
|
|
|
538
|
-
const result = runPromptTest(config, inputPrompt, judgePrompt, verbose);
|
|
654
|
+
const result = await runPromptTest(config, inputPrompt, judgePrompt, verbose);
|
|
539
655
|
|
|
540
656
|
if (result.passed && effectiveId && options.reportFile) {
|
|
541
657
|
const newHash = computeContextHash(options.model, inputPrompt, judgePrompt, result.contextFiles, workspaceRoot);
|
|
@@ -567,12 +683,12 @@ function writeReport(reportFile, reportData) {
|
|
|
567
683
|
fs.writeFileSync(reportFile, `${JSON.stringify(reportData, null, 2)}\n`, 'utf8');
|
|
568
684
|
}
|
|
569
685
|
|
|
570
|
-
function formatFailureMarkdown(
|
|
571
|
-
const normalizedFindings = Array.isArray(findings) && findings.length > 0
|
|
572
|
-
? findings
|
|
686
|
+
function formatFailureMarkdown(result) {
|
|
687
|
+
const normalizedFindings = Array.isArray(result.findings) && result.findings.length > 0
|
|
688
|
+
? result.findings
|
|
573
689
|
: [{ target: 'workspace', message: 'The prompt test failed without detailed findings.' }];
|
|
574
690
|
|
|
575
|
-
return normalizedFindings.map((finding) => {
|
|
691
|
+
return `Assertion failed. taskOutput=${result.taskOutput}\n` + normalizedFindings.map((finding) => {
|
|
576
692
|
const location = formatFindingLocation(finding);
|
|
577
693
|
const assertion = finding.assertionRef ? ` Assertion: "${finding.assertionRef}".` : '';
|
|
578
694
|
return `- [${location}] ${finding.message}${assertion}`;
|
|
@@ -608,7 +724,7 @@ function findGitRoot(startPath) {
|
|
|
608
724
|
}
|
|
609
725
|
}
|
|
610
726
|
|
|
611
|
-
function copyWorkspace(sourcePath, targetPath, verbose) {
|
|
727
|
+
function copyWorkspace(sourcePath, targetPath, workspaceFilter, verbose) {
|
|
612
728
|
if(verbose) {
|
|
613
729
|
console.log(`Copying workspace from ${sourcePath} to ${targetPath}`);
|
|
614
730
|
}
|
|
@@ -618,12 +734,13 @@ function copyWorkspace(sourcePath, targetPath, verbose) {
|
|
|
618
734
|
targetDir: targetPath,
|
|
619
735
|
rootPath: sourcePath,
|
|
620
736
|
ignoreContexts: [],
|
|
621
|
-
activeRealDirectories: new Set()
|
|
737
|
+
activeRealDirectories: new Set(),
|
|
738
|
+
workspaceFilter
|
|
622
739
|
});
|
|
623
740
|
return targetPath;
|
|
624
741
|
}
|
|
625
742
|
|
|
626
|
-
function copyWorkspaceDirectory({ sourceDir, targetDir, rootPath, ignoreContexts, activeRealDirectories }) {
|
|
743
|
+
function copyWorkspaceDirectory({ sourceDir, targetDir, rootPath, ignoreContexts, activeRealDirectories, workspaceFilter }) {
|
|
627
744
|
const realSourceDir = fs.realpathSync(sourceDir);
|
|
628
745
|
if (activeRealDirectories.has(realSourceDir)) {
|
|
629
746
|
return;
|
|
@@ -663,11 +780,16 @@ function copyWorkspaceDirectory({ sourceDir, targetDir, rootPath, ignoreContexts
|
|
|
663
780
|
targetDir: targetEntryPath,
|
|
664
781
|
rootPath,
|
|
665
782
|
ignoreContexts: nextIgnoreContexts,
|
|
666
|
-
activeRealDirectories
|
|
783
|
+
activeRealDirectories,
|
|
784
|
+
workspaceFilter
|
|
667
785
|
});
|
|
668
786
|
continue;
|
|
669
787
|
}
|
|
670
788
|
|
|
789
|
+
if (workspaceFilter && !workspaceFilter.some((pattern) => minimatch(entryRelativePath, pattern, { dot: true }))) {
|
|
790
|
+
continue;
|
|
791
|
+
}
|
|
792
|
+
|
|
671
793
|
fs.copyFileSync(sourceEntryPath, targetEntryPath);
|
|
672
794
|
fs.chmodSync(targetEntryPath, (entryStats || fs.statSync(sourceEntryPath)).mode);
|
|
673
795
|
}
|
package/lib/testPrompt.test.js
CHANGED
|
@@ -17,9 +17,9 @@ afterAll(() => {
|
|
|
17
17
|
fs.rmSync(TMP_ROOT, { recursive: true, force: true });
|
|
18
18
|
});
|
|
19
19
|
|
|
20
|
-
test('passes a prompt test with copied workspace isolation', () => {
|
|
20
|
+
test('passes a prompt test with copied workspace isolation', async () => {
|
|
21
21
|
const workspaceRoot = createWorkspace('customer-pass');
|
|
22
|
-
const err = testPrompt(
|
|
22
|
+
const err = await testPrompt(
|
|
23
23
|
createConfig(workspaceRoot),
|
|
24
24
|
'create a research about our customer base. We have 30% of customer > 50 years; 90% > 20',
|
|
25
25
|
'The resulting file should be created at customer-research.md and should not generate facts that are not present in the original prompt'
|
|
@@ -29,9 +29,9 @@ test('passes a prompt test with copied workspace isolation', () => {
|
|
|
29
29
|
expect(fs.existsSync(path.join(workspaceRoot, 'customer-research.md'))).toBe(false);
|
|
30
30
|
});
|
|
31
31
|
|
|
32
|
-
test('passes when ignored files and git metadata stay out of the copied workspace', () => {
|
|
32
|
+
test('passes when ignored files and git metadata stay out of the copied workspace', async () => {
|
|
33
33
|
const workspaceRoot = createWorkspace('ignore-pass', { withIgnoredEntries: true });
|
|
34
|
-
const err = testPrompt(
|
|
34
|
+
const err = await testPrompt(
|
|
35
35
|
createConfig(workspaceRoot),
|
|
36
36
|
'create a note named summary.txt saying behavior ok',
|
|
37
37
|
'Verify if ignored/seed.txt, .git/config, and nested/.git/config are not available in the copied workspace and are not reported as changes'
|
|
@@ -44,9 +44,9 @@ test('passes when ignored files and git metadata stay out of the copied workspac
|
|
|
44
44
|
assertFileExists(path.join(workspaceRoot, 'nested', '.git', 'config'));
|
|
45
45
|
});
|
|
46
46
|
|
|
47
|
-
test('returns markdown findings when the judge rejects the result', () => {
|
|
47
|
+
test('returns markdown findings when the judge rejects the result', async () => {
|
|
48
48
|
const workspaceRoot = createWorkspace('failure-case');
|
|
49
|
-
const err = testPrompt(
|
|
49
|
+
const err = await testPrompt(
|
|
50
50
|
createConfig(workspaceRoot),
|
|
51
51
|
'create a research about our customer base. We have 30% of customer > 50 years; 90% > 20',
|
|
52
52
|
'Verify if summary.txt exists and the final output mentions summary.txt'
|
|
@@ -57,12 +57,12 @@ test('returns markdown findings when the judge rejects the result', () => {
|
|
|
57
57
|
expect(err).toContain('- [output:1] The final output should mention summary.txt.');
|
|
58
58
|
});
|
|
59
59
|
|
|
60
|
-
test('does not create a temp workspace in in-place mode', () => {
|
|
60
|
+
test('does not create a temp workspace in in-place mode', async () => {
|
|
61
61
|
const workspaceRoot = createWorkspace('in-place');
|
|
62
62
|
const mkdtempSpy = jest.spyOn(fs, 'mkdtempSync');
|
|
63
63
|
|
|
64
64
|
try {
|
|
65
|
-
const err = testPrompt(
|
|
65
|
+
const err = await testPrompt(
|
|
66
66
|
createConfig(workspaceRoot, { workspaceMode: 'in-place' }),
|
|
67
67
|
'create a note named summary.txt saying behavior ok',
|
|
68
68
|
'Verify if summary.txt exists and the final output mentions summary.txt'
|
|
@@ -76,8 +76,22 @@ test('does not create a temp workspace in in-place mode', () => {
|
|
|
76
76
|
}
|
|
77
77
|
});
|
|
78
78
|
|
|
79
|
+
test('workspaceFilter copies only files matching the glob pattern to temp workspace', async () => {
|
|
80
|
+
const workspaceRoot = createWorkspace('filter-pass');
|
|
81
|
+
fs.writeFileSync(path.join(workspaceRoot, 'notes.md'), 'notes content\n', 'utf8');
|
|
82
|
+
|
|
83
|
+
const err = await testPrompt(
|
|
84
|
+
createConfig(workspaceRoot, { workspaceFilter: ['*.txt'] }),
|
|
85
|
+
'workspace-filter-check: list files in the workspace',
|
|
86
|
+
'workspace-filter-check: notes.md should not exist in the copied workspace, seed.txt should exist'
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
expect(err).toBe('');
|
|
90
|
+
});
|
|
91
|
+
|
|
79
92
|
test('copilotCmd defaults to the git repository root', () => {
|
|
80
|
-
const
|
|
93
|
+
const result = copilotCmd();
|
|
94
|
+
const command = result.promptCmd;
|
|
81
95
|
const addDirArgument = command.find((entry) => entry.startsWith('--add-dir='));
|
|
82
96
|
|
|
83
97
|
expect(addDirArgument).toBe(`--add-dir=${path.resolve(__dirname, '..')}`);
|
|
@@ -89,11 +103,12 @@ test('copilotCmd defaults to the git repository root', () => {
|
|
|
89
103
|
]));
|
|
90
104
|
const promptIndex = command.indexOf('-p');
|
|
91
105
|
expect(command[promptIndex + 1]).toBe('{PROMPT}');
|
|
106
|
+
expect(result.promptCmdContinueFlag).toBe('--continue');
|
|
92
107
|
});
|
|
93
108
|
|
|
94
|
-
test('judge phase reuses promptCmd even when judgeCmd is provided', () => {
|
|
109
|
+
test('judge phase reuses promptCmd even when judgeCmd is provided', async () => {
|
|
95
110
|
const workspaceRoot = createWorkspace('judge-cmd-ignored');
|
|
96
|
-
const err = testPrompt(
|
|
111
|
+
const err = await testPrompt(
|
|
97
112
|
createConfig(workspaceRoot, {
|
|
98
113
|
judgeCmd: ['missing-command', '{PROMPT}']
|
|
99
114
|
}),
|
|
@@ -104,11 +119,11 @@ test('judge phase reuses promptCmd even when judgeCmd is provided', () => {
|
|
|
104
119
|
expect(err).toBe('');
|
|
105
120
|
});
|
|
106
121
|
|
|
107
|
-
test('creates report file after first successful run', () => {
|
|
122
|
+
test('creates report file after first successful run', async () => {
|
|
108
123
|
const workspaceRoot = createWorkspace('report-create');
|
|
109
124
|
const reportFile = path.join(TMP_ROOT, 'report-create.json');
|
|
110
125
|
|
|
111
|
-
const err = testPrompt(
|
|
126
|
+
const err = await testPrompt(
|
|
112
127
|
createConfig(workspaceRoot, { reportFile }),
|
|
113
128
|
'create a note named summary.txt saying behavior ok',
|
|
114
129
|
'Verify if summary.txt exists and the final output mentions summary.txt',
|
|
@@ -125,11 +140,11 @@ test('creates report file after first successful run', () => {
|
|
|
125
140
|
expect(report['summary-pass'].contextHash).toHaveLength(32);
|
|
126
141
|
});
|
|
127
142
|
|
|
128
|
-
test('returns cached success and skips re-run when hash matches', () => {
|
|
143
|
+
test('returns cached success and skips re-run when hash matches', async () => {
|
|
129
144
|
const workspaceRoot = createWorkspace('cache-hit');
|
|
130
145
|
const reportFile = path.join(TMP_ROOT, 'cache-hit.json');
|
|
131
146
|
|
|
132
|
-
const err1 = testPrompt(
|
|
147
|
+
const err1 = await testPrompt(
|
|
133
148
|
createConfig(workspaceRoot, { reportFile }),
|
|
134
149
|
'create a note named summary.txt saying behavior ok',
|
|
135
150
|
'Verify if summary.txt exists and the final output mentions summary.txt',
|
|
@@ -138,7 +153,7 @@ test('returns cached success and skips re-run when hash matches', () => {
|
|
|
138
153
|
expect(err1).toBe('');
|
|
139
154
|
|
|
140
155
|
// Second run with a broken promptCmd — cache hit must skip execution entirely
|
|
141
|
-
const err2 = testPrompt(
|
|
156
|
+
const err2 = await testPrompt(
|
|
142
157
|
createConfig(workspaceRoot, { reportFile, promptCmd: ['non-existent-command-xyz', '{PROMPT}'] }),
|
|
143
158
|
'create a note named summary.txt saying behavior ok',
|
|
144
159
|
'Verify if summary.txt exists and the final output mentions summary.txt',
|
|
@@ -147,11 +162,11 @@ test('returns cached success and skips re-run when hash matches', () => {
|
|
|
147
162
|
expect(err2).toBe('');
|
|
148
163
|
});
|
|
149
164
|
|
|
150
|
-
test('re-runs and updates report when
|
|
165
|
+
test('re-runs and updates report when the judge prompt changes', async () => {
|
|
151
166
|
const workspaceRoot = createWorkspace('cache-miss');
|
|
152
167
|
const reportFile = path.join(TMP_ROOT, 'cache-miss.json');
|
|
153
168
|
|
|
154
|
-
const err1 = testPrompt(
|
|
169
|
+
const err1 = await testPrompt(
|
|
155
170
|
createConfig(workspaceRoot, { reportFile }),
|
|
156
171
|
'create a note named summary.txt saying behavior ok',
|
|
157
172
|
'Verify if summary.txt exists and the final output mentions summary.txt',
|
|
@@ -160,12 +175,10 @@ test('re-runs and updates report when context file content changes', () => {
|
|
|
160
175
|
expect(err1).toBe('');
|
|
161
176
|
const firstHash = JSON.parse(fs.readFileSync(reportFile, 'utf8'))['summary-pass'].contextHash;
|
|
162
177
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
const err2 = testPrompt(
|
|
178
|
+
const err2 = await testPrompt(
|
|
166
179
|
createConfig(workspaceRoot, { reportFile }),
|
|
167
180
|
'create a note named summary.txt saying behavior ok',
|
|
168
|
-
'Verify if summary.txt exists and the final output mentions summary.txt',
|
|
181
|
+
'Verify if summary.txt exists and the final output mentions summary.txt. Also verify the file has content.',
|
|
169
182
|
'summary-pass'
|
|
170
183
|
);
|
|
171
184
|
expect(err2).toBe('');
|
|
@@ -173,11 +186,11 @@ test('re-runs and updates report when context file content changes', () => {
|
|
|
173
186
|
expect(secondHash).not.toBe(firstHash);
|
|
174
187
|
});
|
|
175
188
|
|
|
176
|
-
test('returns checkOnly failure when no passing report entry exists', () => {
|
|
189
|
+
test('returns checkOnly failure when no passing report entry exists', async () => {
|
|
177
190
|
const workspaceRoot = createWorkspace('checkonly-no-report');
|
|
178
191
|
const reportFile = path.join(TMP_ROOT, 'checkonly-no-report.json');
|
|
179
192
|
|
|
180
|
-
const err = testPrompt(
|
|
193
|
+
const err = await testPrompt(
|
|
181
194
|
createConfig(workspaceRoot, { reportFile, checkOnly: true }),
|
|
182
195
|
'create a note named summary.txt saying behavior ok',
|
|
183
196
|
'Verify if summary.txt exists and the final output mentions summary.txt',
|
|
@@ -187,14 +200,14 @@ test('returns checkOnly failure when no passing report entry exists', () => {
|
|
|
187
200
|
expect(err).toContain('[cache]');
|
|
188
201
|
});
|
|
189
202
|
|
|
190
|
-
test('returns checkOnly failure when hash does not match', () => {
|
|
203
|
+
test('returns checkOnly failure when hash does not match', async () => {
|
|
191
204
|
const workspaceRoot = createWorkspace('checkonly-hash-mismatch');
|
|
192
205
|
const reportFile = path.join(TMP_ROOT, 'checkonly-hash-mismatch.json');
|
|
193
206
|
fs.writeFileSync(reportFile, JSON.stringify({
|
|
194
207
|
'summary-pass': { result: 'success', contextFiles: ['seed.txt'], contextHash: 'stale-hash-value' }
|
|
195
208
|
}), 'utf8');
|
|
196
209
|
|
|
197
|
-
const err = testPrompt(
|
|
210
|
+
const err = await testPrompt(
|
|
198
211
|
createConfig(workspaceRoot, { reportFile, checkOnly: true }),
|
|
199
212
|
'create a note named summary.txt saying behavior ok',
|
|
200
213
|
'Verify if summary.txt exists and the final output mentions summary.txt',
|
|
@@ -204,11 +217,11 @@ test('returns checkOnly failure when hash does not match', () => {
|
|
|
204
217
|
expect(err).toContain('[cache]');
|
|
205
218
|
});
|
|
206
219
|
|
|
207
|
-
test('does not write report when test fails', () => {
|
|
220
|
+
test('does not write report when test fails', async () => {
|
|
208
221
|
const workspaceRoot = createWorkspace('report-no-write-on-fail');
|
|
209
222
|
const reportFile = path.join(TMP_ROOT, 'report-no-write-on-fail.json');
|
|
210
223
|
|
|
211
|
-
const err = testPrompt(
|
|
224
|
+
const err = await testPrompt(
|
|
212
225
|
createConfig(workspaceRoot, { reportFile }),
|
|
213
226
|
'create a research about our customer base. We have 30% of customer > 50 years; 90% > 20',
|
|
214
227
|
'Verify if summary.txt exists and the final output mentions summary.txt',
|
|
@@ -221,7 +234,7 @@ test('does not write report when test fails', () => {
|
|
|
221
234
|
|
|
222
235
|
function createConfig(workspaceRoot, overrides = {}) {
|
|
223
236
|
return {
|
|
224
|
-
|
|
237
|
+
...copilotCmd(workspaceRoot),
|
|
225
238
|
workspaceRoot,
|
|
226
239
|
workspaceMode: 'copy',
|
|
227
240
|
reportFile: null,
|
package/package.json
CHANGED