xdrs-core 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,17 +5,17 @@ const { copilotCmd, testPrompt } = require('xdrs-core');
5
5
 
6
6
  const REPO_ROOT = path.resolve(__dirname, '..', '..', '..', '..', '..', '..');
7
7
 
8
- jest.setTimeout(60000);
8
+ jest.setTimeout(300000);
9
9
 
10
- test('check', () => {
11
- const err = testPrompt(
10
+ test('check', async () => {
11
+ const err = await testPrompt(
12
12
  {
13
13
  workspaceRoot: REPO_ROOT,
14
14
  workspaceMode: 'in-place',
15
- promptCmd: copilotCmd(REPO_ROOT),
15
+ ...copilotCmd(REPO_ROOT),
16
16
  },
17
- 'Reply with READY and nothing else.',
18
- 'Verify that the final output is READY and nothing else.',
17
+ 'Reply ONLY with "READY" after checking if SKILL 001 has any contents',
18
+ 'Verify that the final output is ONLY "READY" and that it read file 001-lint/SKILL.md',
19
19
  null,
20
20
  true
21
21
  );
@@ -23,16 +23,17 @@ test('check', () => {
23
23
  expect(err).toBe('');
24
24
  });
25
25
 
26
- test.skip('005-write-research creates an IMRAD research document in copy mode', () => {
27
- const err = testPrompt(
26
+ test('005-write-research creates an IMRAD research document in copy mode', async () => {
27
+ const err = await testPrompt(
28
28
  {
29
29
  workspaceRoot: REPO_ROOT,
30
30
  workspaceMode: 'copy',
31
- promptCmd: copilotCmd(REPO_ROOT),
31
+ ...copilotCmd(REPO_ROOT),
32
32
  },
33
33
  'Create a very small research document with the following data: We measured the installation time in our monorepo and pnpm is 3.5x faster than Yarn when installing dependencies. We recommend using PNPM in our monorepo to speed up our productivity as it seems very easy to use and have a better internal hoisting mechanism.',
34
- 'Verify that a research file was created under .xdrs/_local/edrs/devops/researches/, that it contains the sections Abstract, Introduction, Methods, Results, Discussion, Conclusion, and References, and that the content contains all the provided data in input prompt, and doesn\'t contain more than 20% of additional information.',
35
- '005-write-research-imrad-copy'
34
+ 'Verify that a research file was created under .xdrs/_local/edrs/devops/researches/, that it contains the sections Abstract, Introduction, Methods, Results, Discussion, Conclusion, and References, and that the content contains all the provided data in input prompt, and doesn\'t contain more than 20% of additional information outside the central topic.',
35
+ null,
36
+ true
36
37
  );
37
38
 
38
39
  expect(err).toBe('');
@@ -0,0 +1,22 @@
1
+ {
2
+ "Reply ONLY with \"READY\" after checking i-a61b0904": {
3
+ "result": "success",
4
+ "contextFiles": [
5
+ ".xdrs/_core/adrs/principles/skills/001-lint/SKILL.md",
6
+ "AGENTS.md"
7
+ ],
8
+ "contextHash": "3dcbcc0a03f4d3b2a797c28165f6f68a"
9
+ },
10
+ "Create a very small research document wi-ea3740cd": {
11
+ "result": "success",
12
+ "contextFiles": [
13
+ ".xdrs/_core/adrs/principles/001-xdrs-core.md",
14
+ ".xdrs/_core/adrs/principles/006-research-standards.md",
15
+ ".xdrs/_local/bdrs/index.md",
16
+ ".xdrs/_local/edrs/devops/researches/001-pnpm-vs-yarn-install-speed-monorepo.md",
17
+ ".xdrs/index.md",
18
+ "AGENTS.md"
19
+ ],
20
+ "contextHash": "a8ffde30ac3757f1185fee620f526720"
21
+ }
22
+ }
package/lib/testPrompt.js CHANGED
@@ -4,22 +4,21 @@
4
4
  const crypto = require('crypto');
5
5
  const fs = require('fs');
6
6
  const ignore = require('ignore');
7
+ const { minimatch } = require('minimatch');
7
8
  const os = require('os');
8
9
  const path = require('path');
9
- const { spawnSync } = require('child_process');
10
+ const { spawn } = require('child_process');
10
11
 
11
12
  const MAX_TASK_OUTPUT_CHARS = 12 * 1024;
12
13
 
13
- function testPrompt(config, inputPrompt, judgePrompt, id, verbose) {
14
- const result = runPrompt(config, inputPrompt, judgePrompt, id, verbose);
15
- return result.passed ? '' : formatFailureMarkdown(result.findings);
14
+ async function testPrompt(config, inputPrompt, judgePrompt, id, verbose) {
15
+ const result = await runPrompt(config, inputPrompt, judgePrompt, id, verbose);
16
+ return result.passed ? '' : formatFailureMarkdown(result);
16
17
  }
17
18
 
18
- function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
19
+ async function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
19
20
  if(verbose) {
20
- console.log('Running prompt test with config:', JSON.stringify(config, null, 2));
21
- console.log('Input Prompt:', inputPrompt);
22
- console.log('Judge Prompt:', judgePrompt);
21
+ console.log(`Running test in workspace. inputPrompt="${inputPrompt}"; judgePrompt="${judgePrompt}"; config=${JSON.stringify(config)}`);
23
22
  }
24
23
  const options = normalizeConfig(config);
25
24
  const originalWorkspace = resolveWorkspaceRoot(options);
@@ -29,13 +28,13 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
29
28
  try {
30
29
  if (options.workspaceMode === 'copy') {
31
30
  tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'xdrs-core-test-'));
32
- effectiveWorkspace = copyWorkspace(originalWorkspace, path.join(tempRoot, 'workspace'), verbose);
31
+ effectiveWorkspace = copyWorkspace(originalWorkspace, path.join(tempRoot, 'workspace'), options.workspaceFilter, verbose);
33
32
  }
34
33
 
35
34
  if(verbose) {
36
35
  console.log(`Running prompt test in workspace: ${effectiveWorkspace} (mode: ${options.workspaceMode})`);
37
36
  }
38
- const task = runTaskPhase({
37
+ const task = await runTaskPhase({
39
38
  prompt: ensureNonEmptyString(inputPrompt, 'inputPrompt'),
40
39
  commandTemplate: options.promptCmd,
41
40
  workspacePath: effectiveWorkspace,
@@ -46,19 +45,33 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
46
45
  });
47
46
 
48
47
  if(verbose) {
49
- console.log('Task phase completed. Summary:', task.summary);
50
- console.log('Agent reported changed files:', task.changedFiles);
48
+ console.log(`Task phase completed. text="${task.text}"`);
49
+ }
50
+
51
+ if(verbose) {
52
+ console.log('Running files phase to collect changed and context files from the task session.');
53
+ }
54
+ const files = await runFilesPhase({
55
+ commandTemplate: options.promptCmd,
56
+ continueFlag: options.promptCmdContinueFlag,
57
+ workspacePath: effectiveWorkspace,
58
+ authoritativeWorkspacePath: originalWorkspace,
59
+ timeoutMs: options.taskTimeoutMs,
60
+ env: options.env,
61
+ verbose
62
+ });
63
+
64
+ if(verbose) {
65
+ console.log(`Files phase completed. readFiles="${files.readFiles}"; writeFiles="${files.writeFiles}"`);
51
66
  }
52
67
 
53
68
  if(verbose) {
54
69
  console.log('Running judge phase to evaluate the task output against the judge prompt.');
55
70
  }
56
- const evaluation = runJudgePhase({
57
- originalPrompt: ensureNonEmptyString(inputPrompt, 'inputPrompt'),
71
+ const evaluation = await runJudgePhase({
58
72
  judgePrompt: ensureNonEmptyString(judgePrompt, 'judgePrompt'),
59
- taskOutput: task.summary,
60
- agentReportedChanges: task.changedFiles,
61
73
  commandTemplate: options.promptCmd,
74
+ continueFlag: options.promptCmdContinueFlag,
62
75
  workspacePath: effectiveWorkspace,
63
76
  authoritativeWorkspacePath: originalWorkspace,
64
77
  timeoutMs: options.judgeTimeoutMs,
@@ -66,13 +79,14 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
66
79
  verbose
67
80
  });
68
81
 
82
+ console.log(`Judge phase completed. passed=${evaluation.pass}; findings="${JSON.stringify(evaluation.findings)}"`);
83
+
69
84
  return {
70
85
  passed: evaluation.pass,
71
86
  findings: evaluation.findings,
72
- taskOutput: task.summary,
73
- agentReportedChanges: task.changedFiles,
74
- contextFiles: task.contextFiles,
75
- judge: evaluation.raw,
87
+ taskOutput: task.text,
88
+ agentReportedChanges: files.writeFiles,
89
+ contextFiles: files.readFiles,
76
90
  workspace: {
77
91
  original: originalWorkspace,
78
92
  effective: effectiveWorkspace,
@@ -87,17 +101,20 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
87
101
  }
88
102
 
89
103
  function copilotCmd(workspaceRoot = findGitRoot(process.cwd())) {
90
- return [
91
- 'copilot',
92
- `--add-dir=${path.resolve(workspaceRoot)}`,
93
- '--autopilot',
94
- '--allow-all-tools',
95
- '--allow-all-urls',
96
- '--no-ask-user',
97
- '--allow-all',
98
- '-p',
99
- '{PROMPT}'
100
- ];
104
+ return {
105
+ promptCmd: [
106
+ 'copilot',
107
+ `--add-dir=${path.resolve(workspaceRoot)}`,
108
+ '--autopilot',
109
+ '--allow-all-tools',
110
+ '--allow-all-urls',
111
+ '--no-ask-user',
112
+ '--allow-all',
113
+ '-p',
114
+ '{PROMPT}'
115
+ ],
116
+ promptCmdContinueFlag: '--continue'
117
+ };
101
118
  }
102
119
 
103
120
  function ensureNonEmptyString(value, label) {
@@ -132,8 +149,10 @@ function normalizeConfig(config) {
132
149
 
133
150
  return {
134
151
  promptCmd: parseCommandTemplate(config.promptCmd, 'promptCmd'),
152
+ promptCmdContinueFlag: typeof config.promptCmdContinueFlag === 'string' && config.promptCmdContinueFlag.trim() ? config.promptCmdContinueFlag.trim() : null,
135
153
  workspaceRoot: config.workspaceRoot ? path.resolve(config.workspaceRoot) : null,
136
154
  workspaceMode,
155
+ workspaceFilter: normalizeWorkspaceFilter(config.workspaceFilter),
137
156
  env: normalizeEnv(config.env),
138
157
  taskTimeoutMs: readTimeout(config.taskTimeoutMs, 'taskTimeoutMs'),
139
158
  judgeTimeoutMs: readTimeout(config.judgeTimeoutMs, 'judgeTimeoutMs'),
@@ -153,23 +172,18 @@ function resolveWorkspaceRoot(options) {
153
172
  return resolvedWorkspace;
154
173
  }
155
174
 
156
- function runTaskPhase({ prompt, commandTemplate, workspacePath, authoritativeWorkspacePath, timeoutMs, env }, verbose) {
175
+ async function runTaskPhase({ prompt, commandTemplate, workspacePath, authoritativeWorkspacePath, timeoutMs, env, verbose }) {
157
176
  const wrappedPrompt = [
158
- 'XDRS-CORE TEST PHASE: TASK',
177
+ 'RUN PHASE',
159
178
  '',
160
- 'Execute the following task in the current workspace.',
179
+ '<INSTRUCTIONS>',
161
180
  'Keep all changes inside the workspace.',
162
- 'Respond with JSON only and no code fences.',
163
- 'Use exactly this schema: {"summary":"plain text summary","changedFiles":["relative/path.ext"],"readFiles":["relative/path.ext"]}.',
164
- 'List in readFiles every file you read or used as context during this task.',
165
- 'The summary must describe the final result only, not hidden reasoning.',
181
+ '</INSTRUCTIONS>',
166
182
  '',
167
- 'BEGIN TASK PROMPT',
168
183
  prompt,
169
- 'END TASK PROMPT'
170
184
  ].join('\n');
171
185
 
172
- const result = runPromptCommand({
186
+ const result = await runPromptCommand({
173
187
  commandTemplate,
174
188
  workspacePath,
175
189
  authoritativeWorkspacePath,
@@ -182,40 +196,56 @@ function runTaskPhase({ prompt, commandTemplate, workspacePath, authoritativeWor
182
196
  return parseTaskResponse(result.output);
183
197
  }
184
198
 
185
- function runJudgePhase({ originalPrompt, judgePrompt, taskOutput, agentReportedChanges, commandTemplate, workspacePath, authoritativeWorkspacePath, timeoutMs, env }, verbose) {
199
+ async function runFilesPhase({ commandTemplate, continueFlag, workspacePath, authoritativeWorkspacePath, timeoutMs, env, verbose }) {
186
200
  const wrappedPrompt = [
187
- 'XDRS-CORE TEST PHASE: ASSERTION_EVALUATION',
201
+ 'FILES PHASE',
202
+ '',
203
+ '<INSTRUCTIONS>',
204
+ 'ALWAYS answer with JSON using the schema specified below, and never include any other text.',
205
+ 'Response schema: {"readFiles":["relative/path.ext"],"writeFiles":["relative/path.ext"]}.',
206
+ 'Return in "readFiles" every file you read or used as context during the previous task, including files added by default such as AGENTS.md or skills.',
207
+ 'Return in "writeFiles" every file you created or updated during the previous task.',
208
+ '</INSTRUCTIONS>',
188
209
  '',
189
- 'You are evaluating the result of a separate agent task run.',
190
- 'Treat this as a fresh session. Do not assume any hidden history.',
191
- 'Use the original task prompt, the judge prompt, the final task output, the reported changed file paths, and the current workspace state to decide whether the result passes.',
192
- 'Trust the reported changed file path list as the authoritative change report for this task run.',
210
+ 'Which files were read and which files were changed during the RUN PHASE task execution? Reply only with the JSON response according to the schema in the instructions.',
211
+ ].join('\n');
212
+
213
+ const continueCommand = buildContinueCommand(commandTemplate, continueFlag);
214
+
215
+ const result = await runPromptCommand({
216
+ commandTemplate: continueCommand,
217
+ workspacePath,
218
+ authoritativeWorkspacePath,
219
+ prompt: wrappedPrompt,
220
+ timeoutMs,
221
+ env,
222
+ verbose
223
+ });
224
+
225
+ return parseFilesResponse(result.output);
226
+ }
227
+
228
+ async function runJudgePhase({ judgePrompt, commandTemplate, continueFlag, workspacePath, authoritativeWorkspacePath, timeoutMs, env, verbose }) {
229
+ const wrappedPrompt = [
230
+ 'ASSERTION_EVALUATION PHASE',
231
+ '',
232
+ '<INSTRUCTIONS>',
233
+ 'You are evaluating the result of the task you completed in the RUN PHASE.',
193
234
  'Read files from the workspace directly when you need their contents.',
194
- 'Inspect files in the workspace directly when needed.',
195
- 'Respond with JSON only and no code fences.',
196
- 'Use exactly this schema: {"pass":true,"findings":[]} or {"pass":false,"findings":[{"target":"file","path":"relative/path.ext","line":1,"message":"explanation","assertionRef":"exact relevant phrase from the judge prompt"}]}.',
235
+ 'ALWAYS answer with JSON using the schema specified below, and never include any other text.',
236
+ 'Response schema: {"pass":false,"text":"plain text summary of the evaluation","findings":[{"target":"file","path":"relative/path.ext","line":1,"message":"explanation","assertionRef":"exact relevant phrase from the judge prompt"}]}.',
237
+ 'Use the text field to report a plain text summary of the evaluation result.',
197
238
  'Use target="output" when the issue is in the final task output and target="workspace" when it is not tied to a specific file.',
198
239
  'Include 1-based line numbers when you cite a file or the output text. Include the exact judge-prompt phrase that triggered each finding in assertionRef.',
240
+ 'NEVER change any file during judge evaluation. If you identify an issue that would require a file change to fix, report it as a finding instead.',
199
241
  '',
200
- 'BEGIN ORIGINAL TASK PROMPT',
201
- originalPrompt,
202
- 'END ORIGINAL TASK PROMPT',
203
- '',
204
- 'BEGIN JUDGE PROMPT',
205
242
  judgePrompt,
206
- 'END JUDGE PROMPT',
207
- '',
208
- 'BEGIN TASK OUTPUT',
209
- truncateText(taskOutput || '(empty)', MAX_TASK_OUTPUT_CHARS),
210
- 'END TASK OUTPUT',
211
- '',
212
- 'BEGIN AGENT REPORTED CHANGES JSON',
213
- JSON.stringify(agentReportedChanges, null, 2),
214
- 'END AGENT REPORTED CHANGES JSON'
215
243
  ].join('\n');
216
244
 
217
- const result = runPromptCommand({
218
- commandTemplate,
245
+ const continueCommand = buildContinueCommand(commandTemplate, continueFlag);
246
+
247
+ const result = await runPromptCommand({
248
+ commandTemplate: continueCommand,
219
249
  workspacePath,
220
250
  authoritativeWorkspacePath,
221
251
  prompt: wrappedPrompt,
@@ -224,7 +254,7 @@ function runJudgePhase({ originalPrompt, judgePrompt, taskOutput, agentReportedC
224
254
  verbose
225
255
  });
226
256
 
227
- return normalizeJudgeResponse(result.output);
257
+ return parseJudgeResponse(result.output);
228
258
  }
229
259
 
230
260
  function parseTaskResponse(output) {
@@ -236,22 +266,55 @@ function parseTaskResponse(output) {
236
266
  try {
237
267
  const parsed = parseJsonObject(trimmed);
238
268
  return {
239
- summary: typeof parsed.summary === 'string' && parsed.summary.trim()
240
- ? parsed.summary.trim()
241
- : trimmed,
242
- changedFiles: normalizeStringArray(parsed.changedFiles),
243
- contextFiles: normalizeStringArray(parsed.readFiles)
269
+ text: typeof parsed.text === 'string' && parsed.text.trim()
270
+ ? parsed.text.trim()
271
+ : trimmed
244
272
  };
245
273
  } catch (error) {
246
274
  return {
247
- summary: trimmed,
248
- changedFiles: [],
249
- contextFiles: []
275
+ text: trimmed
250
276
  };
251
277
  }
252
278
  }
253
279
 
254
- function normalizeJudgeResponse(output) {
280
+ function parseFilesResponse(output) {
281
+ const trimmed = String(output || '').trim();
282
+ if (!trimmed) {
283
+ throw new Error('The files command returned empty output.');
284
+ }
285
+
286
+ try {
287
+ const parsed = parseJsonObject(trimmed);
288
+ return {
289
+ readFiles: normalizeStringArray(parsed.readFiles),
290
+ writeFiles: normalizeStringArray(parsed.writeFiles)
291
+ };
292
+ } catch (error) {
293
+ return {
294
+ readFiles: [],
295
+ writeFiles: []
296
+ };
297
+ }
298
+ }
299
+
300
+ function buildContinueCommand(commandTemplate, continueFlag) {
301
+ if (!continueFlag) {
302
+ return commandTemplate;
303
+ }
304
+
305
+ const promptFlagIndex = commandTemplate.indexOf('-p');
306
+ if (promptFlagIndex === -1) {
307
+ return [...commandTemplate.slice(0, -1), continueFlag, commandTemplate[commandTemplate.length - 1]];
308
+ }
309
+
310
+ return [
311
+ ...commandTemplate.slice(0, promptFlagIndex),
312
+ continueFlag,
313
+ ...commandTemplate.slice(promptFlagIndex)
314
+ ];
315
+ }
316
+
317
+ function parseJudgeResponse(output) {
255
318
  let parsed;
256
319
 
257
320
  try {
@@ -295,6 +358,7 @@ function normalizeJudgeResponse(output) {
295
358
 
296
359
  return {
297
360
  pass: parsed.pass,
361
+ text: typeof parsed.text === 'string' ? parsed.text.trim() : '',
298
362
  findings,
299
363
  raw: parsed
300
364
  };
@@ -340,7 +404,7 @@ function normalizeFinding(finding) {
340
404
  };
341
405
  }
342
406
 
343
- function runPromptCommand({ commandTemplate, workspacePath, authoritativeWorkspacePath, prompt, timeoutMs, env }, verbose) {
407
+ function runPromptCommand({ commandTemplate, workspacePath, authoritativeWorkspacePath, prompt, timeoutMs, env, verbose }) {
344
408
  const command = rewriteWorkspaceCommand(commandTemplate.map((entry) => entry
345
409
  .replace('{PROMPT}', prompt)
346
410
  .replace('{WORKSPACE_ROOT}', workspacePath)), workspacePath, authoritativeWorkspacePath);
@@ -348,47 +412,75 @@ function runPromptCommand({ commandTemplate, workspacePath, authoritativeWorkspa
348
412
  const [file, ...args] = command;
349
413
 
350
414
  if(verbose) {
351
- console.log(`Running prompt cmd: ${file} ${args.join(' ')} in workspace: ${workspacePath}`);
415
+ console.log(`Running prompt cmd: ${file} ${args.join(' ')}; workspace: ${workspacePath}`);
352
416
  }
353
417
 
354
- const result = spawnSync(file, args, {
355
- encoding: 'utf8',
356
- cwd: workspacePath,
357
- timeout: timeoutMs || undefined,
358
- maxBuffer: 10 * 1024 * 1024,
359
- env: {
360
- ...process.env,
361
- ...env
418
+ return new Promise((resolve, reject) => {
419
+ const child = spawn(file, args, {
420
+ cwd: workspacePath,
421
+ env: {
422
+ ...process.env,
423
+ ...env
424
+ }
425
+ });
426
+
427
+ let stdout = '';
428
+ let stderr = '';
429
+ let timedOut = false;
430
+ let timer = null;
431
+
432
+ if (timeoutMs) {
433
+ timer = setTimeout(() => {
434
+ timedOut = true;
435
+ child.kill();
436
+ reject(new Error(`${file} timed out after ${timeoutMs}ms`));
437
+ }, timeoutMs);
362
438
  }
363
- });
364
439
 
365
- if(verbose) {
366
- console.log(`Prompt command output: ${result.stdout || result.stderr}`);
367
- }
440
+ child.stdout.on('data', (chunk) => {
441
+ const text = chunk.toString('utf8');
442
+ stdout += text;
443
+ process.stdout.write(text);
444
+ });
368
445
 
446
+ child.stderr.on('data', (chunk) => {
447
+ const text = chunk.toString('utf8');
448
+ stderr += text;
449
+ process.stderr.write(text);
450
+ });
369
451
 
370
- if (result.error) {
371
- if (result.error.code === 'ENOENT') {
372
- throw new Error(`Command not found: ${file}`);
373
- }
374
- throw new Error(`Failed to execute ${file}: ${result.error.message}`);
375
- }
452
+ child.on('error', (err) => {
453
+ if (timer !== null) { clearTimeout(timer); }
454
+ if (err.code === 'ENOENT') {
455
+ reject(new Error(`Command not found: ${file}`));
456
+ } else {
457
+ reject(new Error(`Failed to execute ${file}: ${err.message}`));
458
+ }
459
+ });
376
460
 
377
- if (result.status !== 0) {
378
- const details = truncateText((result.stderr || result.stdout || '').trim(), 2000);
379
- throw new Error(`${file} exited with status ${result.status}${details ? `: ${details}` : ''}`);
380
- }
461
+ child.on('close', (code) => {
462
+ if (timer !== null) { clearTimeout(timer); }
463
+ if (timedOut) { return; }
381
464
 
382
- const output = (result.stdout || '').trim() || (result.stderr || '').trim();
383
- if (!output) {
384
- throw new Error(`${file} returned empty output.`);
385
- }
465
+ if (code !== 0) {
466
+ const details = truncateText((stderr || stdout || '').trim(), 2000);
467
+ reject(new Error(`${file} exited with status ${code}${details ? `: ${details}` : ''}`));
468
+ return;
469
+ }
386
470
 
387
- if(verbose) {
388
- console.log(`Prompt command output: ${output}`);
389
- }
471
+ const output = stdout.trim() || stderr.trim();
472
+ if (!output) {
473
+ reject(new Error(`${file} returned empty output.`));
474
+ return;
475
+ }
390
476
 
391
- return { output };
477
+ if(verbose) {
478
+ console.log(`Prompt command output: ${output}`);
479
+ }
480
+
481
+ resolve({ output });
482
+ });
483
+ });
392
484
  }
393
485
 
394
486
  function rewriteWorkspaceCommand(command, workspacePath, authoritativeWorkspacePath) {
@@ -450,6 +542,30 @@ function normalizeCommandArray(value, label) {
450
542
  return [...value];
451
543
  }
452
544
 
545
+ function normalizeWorkspaceFilter(value) {
546
+ if (value == null) {
547
+ return null;
548
+ }
549
+
550
+ if (typeof value === 'string') {
551
+ if (!value.trim()) {
552
+ throw new Error('workspaceFilter must not be an empty string.');
553
+ }
554
+
555
+ return [value.trim()];
556
+ }
557
+
558
+ if (Array.isArray(value)) {
559
+ if (value.length === 0 || value.some((v) => typeof v !== 'string' || !v.trim())) {
560
+ throw new Error('workspaceFilter must be a non-empty array of non-empty glob strings.');
561
+ }
562
+
563
+ return value.map((v) => v.trim());
564
+ }
565
+
566
+ throw new Error('workspaceFilter must be a string or an array of strings.');
567
+ }
568
+
453
569
  function normalizeEnv(env) {
454
570
  if (env == null) {
455
571
  return {};
@@ -513,7 +629,7 @@ function defaultPromptId(inputPrompt, judgePrompt) {
513
629
  return prefix;
514
630
  }
515
631
 
516
- function runPrompt(config, inputPrompt, judgePrompt, id, verbose) {
632
+ async function runPrompt(config, inputPrompt, judgePrompt, id, verbose) {
517
633
  const effectiveId = id || defaultPromptId(inputPrompt, judgePrompt);
518
634
  const options = normalizeConfig(config);
519
635
  const workspaceRoot = resolveWorkspaceRoot(options);
@@ -535,7 +651,7 @@ function runPrompt(config, inputPrompt, judgePrompt, id, verbose) {
535
651
  }
536
652
  }
537
653
 
538
- const result = runPromptTest(config, inputPrompt, judgePrompt, verbose);
654
+ const result = await runPromptTest(config, inputPrompt, judgePrompt, verbose);
539
655
 
540
656
  if (result.passed && effectiveId && options.reportFile) {
541
657
  const newHash = computeContextHash(options.model, inputPrompt, judgePrompt, result.contextFiles, workspaceRoot);
@@ -567,12 +683,12 @@ function writeReport(reportFile, reportData) {
567
683
  fs.writeFileSync(reportFile, `${JSON.stringify(reportData, null, 2)}\n`, 'utf8');
568
684
  }
569
685
 
570
- function formatFailureMarkdown(findings) {
571
- const normalizedFindings = Array.isArray(findings) && findings.length > 0
572
- ? findings
686
+ function formatFailureMarkdown(result) {
687
+ const normalizedFindings = Array.isArray(result.findings) && result.findings.length > 0
688
+ ? result.findings
573
689
  : [{ target: 'workspace', message: 'The prompt test failed without detailed findings.' }];
574
690
 
575
- return normalizedFindings.map((finding) => {
691
+ return `Assertion failed. taskOutput=${result.taskOutput}\n` + normalizedFindings.map((finding) => {
576
692
  const location = formatFindingLocation(finding);
577
693
  const assertion = finding.assertionRef ? ` Assertion: "${finding.assertionRef}".` : '';
578
694
  return `- [${location}] ${finding.message}${assertion}`;
@@ -608,7 +724,7 @@ function findGitRoot(startPath) {
608
724
  }
609
725
  }
610
726
 
611
- function copyWorkspace(sourcePath, targetPath, verbose) {
727
+ function copyWorkspace(sourcePath, targetPath, workspaceFilter, verbose) {
612
728
  if(verbose) {
613
729
  console.log(`Copying workspace from ${sourcePath} to ${targetPath}`);
614
730
  }
@@ -618,12 +734,13 @@ function copyWorkspace(sourcePath, targetPath, verbose) {
618
734
  targetDir: targetPath,
619
735
  rootPath: sourcePath,
620
736
  ignoreContexts: [],
621
- activeRealDirectories: new Set()
737
+ activeRealDirectories: new Set(),
738
+ workspaceFilter
622
739
  });
623
740
  return targetPath;
624
741
  }
625
742
 
626
- function copyWorkspaceDirectory({ sourceDir, targetDir, rootPath, ignoreContexts, activeRealDirectories }) {
743
+ function copyWorkspaceDirectory({ sourceDir, targetDir, rootPath, ignoreContexts, activeRealDirectories, workspaceFilter }) {
627
744
  const realSourceDir = fs.realpathSync(sourceDir);
628
745
  if (activeRealDirectories.has(realSourceDir)) {
629
746
  return;
@@ -663,11 +780,16 @@ function copyWorkspaceDirectory({ sourceDir, targetDir, rootPath, ignoreContexts
663
780
  targetDir: targetEntryPath,
664
781
  rootPath,
665
782
  ignoreContexts: nextIgnoreContexts,
666
- activeRealDirectories
783
+ activeRealDirectories,
784
+ workspaceFilter
667
785
  });
668
786
  continue;
669
787
  }
670
788
 
789
+ if (workspaceFilter && !workspaceFilter.some((pattern) => minimatch(entryRelativePath, pattern, { dot: true }))) {
790
+ continue;
791
+ }
792
+
671
793
  fs.copyFileSync(sourceEntryPath, targetEntryPath);
672
794
  fs.chmodSync(targetEntryPath, (entryStats || fs.statSync(sourceEntryPath)).mode);
673
795
  }
@@ -17,9 +17,9 @@ afterAll(() => {
17
17
  fs.rmSync(TMP_ROOT, { recursive: true, force: true });
18
18
  });
19
19
 
20
- test('passes a prompt test with copied workspace isolation', () => {
20
+ test('passes a prompt test with copied workspace isolation', async () => {
21
21
  const workspaceRoot = createWorkspace('customer-pass');
22
- const err = testPrompt(
22
+ const err = await testPrompt(
23
23
  createConfig(workspaceRoot),
24
24
  'create a research about our customer base. We have 30% of customer > 50 years; 90% > 20',
25
25
  'The resulting file should be created at customer-research.md and should not generate facts that are not present in the original prompt'
@@ -29,9 +29,9 @@ test('passes a prompt test with copied workspace isolation', () => {
29
29
  expect(fs.existsSync(path.join(workspaceRoot, 'customer-research.md'))).toBe(false);
30
30
  });
31
31
 
32
- test('passes when ignored files and git metadata stay out of the copied workspace', () => {
32
+ test('passes when ignored files and git metadata stay out of the copied workspace', async () => {
33
33
  const workspaceRoot = createWorkspace('ignore-pass', { withIgnoredEntries: true });
34
- const err = testPrompt(
34
+ const err = await testPrompt(
35
35
  createConfig(workspaceRoot),
36
36
  'create a note named summary.txt saying behavior ok',
37
37
  'Verify if ignored/seed.txt, .git/config, and nested/.git/config are not available in the copied workspace and are not reported as changes'
@@ -44,9 +44,9 @@ test('passes when ignored files and git metadata stay out of the copied workspac
44
44
  assertFileExists(path.join(workspaceRoot, 'nested', '.git', 'config'));
45
45
  });
46
46
 
47
- test('returns markdown findings when the judge rejects the result', () => {
47
+ test('returns markdown findings when the judge rejects the result', async () => {
48
48
  const workspaceRoot = createWorkspace('failure-case');
49
- const err = testPrompt(
49
+ const err = await testPrompt(
50
50
  createConfig(workspaceRoot),
51
51
  'create a research about our customer base. We have 30% of customer > 50 years; 90% > 20',
52
52
  'Verify if summary.txt exists and the final output mentions summary.txt'
@@ -57,12 +57,12 @@ test('returns markdown findings when the judge rejects the result', () => {
57
57
  expect(err).toContain('- [output:1] The final output should mention summary.txt.');
58
58
  });
59
59
 
60
- test('does not create a temp workspace in in-place mode', () => {
60
+ test('does not create a temp workspace in in-place mode', async () => {
61
61
  const workspaceRoot = createWorkspace('in-place');
62
62
  const mkdtempSpy = jest.spyOn(fs, 'mkdtempSync');
63
63
 
64
64
  try {
65
- const err = testPrompt(
65
+ const err = await testPrompt(
66
66
  createConfig(workspaceRoot, { workspaceMode: 'in-place' }),
67
67
  'create a note named summary.txt saying behavior ok',
68
68
  'Verify if summary.txt exists and the final output mentions summary.txt'
@@ -76,8 +76,22 @@ test('does not create a temp workspace in in-place mode', () => {
76
76
  }
77
77
  });
78
78
 
79
+ test('workspaceFilter copies only files matching the glob pattern to temp workspace', async () => {
80
+ const workspaceRoot = createWorkspace('filter-pass');
81
+ fs.writeFileSync(path.join(workspaceRoot, 'notes.md'), 'notes content\n', 'utf8');
82
+
83
+ const err = await testPrompt(
84
+ createConfig(workspaceRoot, { workspaceFilter: ['*.txt'] }),
85
+ 'workspace-filter-check: list files in the workspace',
86
+ 'workspace-filter-check: notes.md should not exist in the copied workspace, seed.txt should exist'
87
+ );
88
+
89
+ expect(err).toBe('');
90
+ });
91
+
79
92
  test('copilotCmd defaults to the git repository root', () => {
80
- const command = copilotCmd();
93
+ const result = copilotCmd();
94
+ const command = result.promptCmd;
81
95
  const addDirArgument = command.find((entry) => entry.startsWith('--add-dir='));
82
96
 
83
97
  expect(addDirArgument).toBe(`--add-dir=${path.resolve(__dirname, '..')}`);
@@ -89,11 +103,12 @@ test('copilotCmd defaults to the git repository root', () => {
89
103
  ]));
90
104
  const promptIndex = command.indexOf('-p');
91
105
  expect(command[promptIndex + 1]).toBe('{PROMPT}');
106
+ expect(result.promptCmdContinueFlag).toBe('--continue');
92
107
  });
93
108
 
94
- test('judge phase reuses promptCmd even when judgeCmd is provided', () => {
109
+ test('judge phase reuses promptCmd even when judgeCmd is provided', async () => {
95
110
  const workspaceRoot = createWorkspace('judge-cmd-ignored');
96
- const err = testPrompt(
111
+ const err = await testPrompt(
97
112
  createConfig(workspaceRoot, {
98
113
  judgeCmd: ['missing-command', '{PROMPT}']
99
114
  }),
@@ -104,11 +119,11 @@ test('judge phase reuses promptCmd even when judgeCmd is provided', () => {
104
119
  expect(err).toBe('');
105
120
  });
106
121
 
107
- test('creates report file after first successful run', () => {
122
+ test('creates report file after first successful run', async () => {
108
123
  const workspaceRoot = createWorkspace('report-create');
109
124
  const reportFile = path.join(TMP_ROOT, 'report-create.json');
110
125
 
111
- const err = testPrompt(
126
+ const err = await testPrompt(
112
127
  createConfig(workspaceRoot, { reportFile }),
113
128
  'create a note named summary.txt saying behavior ok',
114
129
  'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -125,11 +140,11 @@ test('creates report file after first successful run', () => {
125
140
  expect(report['summary-pass'].contextHash).toHaveLength(32);
126
141
  });
127
142
 
128
- test('returns cached success and skips re-run when hash matches', () => {
143
+ test('returns cached success and skips re-run when hash matches', async () => {
129
144
  const workspaceRoot = createWorkspace('cache-hit');
130
145
  const reportFile = path.join(TMP_ROOT, 'cache-hit.json');
131
146
 
132
- const err1 = testPrompt(
147
+ const err1 = await testPrompt(
133
148
  createConfig(workspaceRoot, { reportFile }),
134
149
  'create a note named summary.txt saying behavior ok',
135
150
  'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -138,7 +153,7 @@ test('returns cached success and skips re-run when hash matches', () => {
138
153
  expect(err1).toBe('');
139
154
 
140
155
  // Second run with a broken promptCmd — cache hit must skip execution entirely
141
- const err2 = testPrompt(
156
+ const err2 = await testPrompt(
142
157
  createConfig(workspaceRoot, { reportFile, promptCmd: ['non-existent-command-xyz', '{PROMPT}'] }),
143
158
  'create a note named summary.txt saying behavior ok',
144
159
  'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -147,11 +162,11 @@ test('returns cached success and skips re-run when hash matches', () => {
147
162
  expect(err2).toBe('');
148
163
  });
149
164
 
150
- test('re-runs and updates report when context file content changes', () => {
165
+ test('re-runs and updates report when the judge prompt changes', async () => {
151
166
  const workspaceRoot = createWorkspace('cache-miss');
152
167
  const reportFile = path.join(TMP_ROOT, 'cache-miss.json');
153
168
 
154
- const err1 = testPrompt(
169
+ const err1 = await testPrompt(
155
170
  createConfig(workspaceRoot, { reportFile }),
156
171
  'create a note named summary.txt saying behavior ok',
157
172
  'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -160,12 +175,10 @@ test('re-runs and updates report when context file content changes', () => {
160
175
  expect(err1).toBe('');
161
176
  const firstHash = JSON.parse(fs.readFileSync(reportFile, 'utf8'))['summary-pass'].contextHash;
162
177
 
163
- fs.writeFileSync(path.join(workspaceRoot, 'seed.txt'), 'changed content\n', 'utf8');
164
-
165
- const err2 = testPrompt(
178
+ const err2 = await testPrompt(
166
179
  createConfig(workspaceRoot, { reportFile }),
167
180
  'create a note named summary.txt saying behavior ok',
168
- 'Verify if summary.txt exists and the final output mentions summary.txt',
181
+ 'Verify if summary.txt exists and the final output mentions summary.txt. Also verify the file has content.',
169
182
  'summary-pass'
170
183
  );
171
184
  expect(err2).toBe('');
@@ -173,11 +186,11 @@ test('re-runs and updates report when context file content changes', () => {
173
186
  expect(secondHash).not.toBe(firstHash);
174
187
  });
175
188
 
176
- test('returns checkOnly failure when no passing report entry exists', () => {
189
+ test('returns checkOnly failure when no passing report entry exists', async () => {
177
190
  const workspaceRoot = createWorkspace('checkonly-no-report');
178
191
  const reportFile = path.join(TMP_ROOT, 'checkonly-no-report.json');
179
192
 
180
- const err = testPrompt(
193
+ const err = await testPrompt(
181
194
  createConfig(workspaceRoot, { reportFile, checkOnly: true }),
182
195
  'create a note named summary.txt saying behavior ok',
183
196
  'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -187,14 +200,14 @@ test('returns checkOnly failure when no passing report entry exists', () => {
187
200
  expect(err).toContain('[cache]');
188
201
  });
189
202
 
190
- test('returns checkOnly failure when hash does not match', () => {
203
+ test('returns checkOnly failure when hash does not match', async () => {
191
204
  const workspaceRoot = createWorkspace('checkonly-hash-mismatch');
192
205
  const reportFile = path.join(TMP_ROOT, 'checkonly-hash-mismatch.json');
193
206
  fs.writeFileSync(reportFile, JSON.stringify({
194
207
  'summary-pass': { result: 'success', contextFiles: ['seed.txt'], contextHash: 'stale-hash-value' }
195
208
  }), 'utf8');
196
209
 
197
- const err = testPrompt(
210
+ const err = await testPrompt(
198
211
  createConfig(workspaceRoot, { reportFile, checkOnly: true }),
199
212
  'create a note named summary.txt saying behavior ok',
200
213
  'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -204,11 +217,11 @@ test('returns checkOnly failure when hash does not match', () => {
204
217
  expect(err).toContain('[cache]');
205
218
  });
206
219
 
207
- test('does not write report when test fails', () => {
220
+ test('does not write report when test fails', async () => {
208
221
  const workspaceRoot = createWorkspace('report-no-write-on-fail');
209
222
  const reportFile = path.join(TMP_ROOT, 'report-no-write-on-fail.json');
210
223
 
211
- const err = testPrompt(
224
+ const err = await testPrompt(
212
225
  createConfig(workspaceRoot, { reportFile }),
213
226
  'create a research about our customer base. We have 30% of customer > 50 years; 90% > 20',
214
227
  'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -221,7 +234,7 @@ test('does not write report when test fails', () => {
221
234
 
222
235
  function createConfig(workspaceRoot, overrides = {}) {
223
236
  return {
224
- promptCmd: copilotCmd(workspaceRoot),
237
+ ...copilotCmd(workspaceRoot),
225
238
  workspaceRoot,
226
239
  workspaceMode: 'copy',
227
240
  reportFile: null,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xdrs-core",
3
- "version": "0.12.0",
3
+ "version": "0.13.0",
4
4
  "description": "A standard way to organize Decision Records (XDRs) across scopes, subjects, and teams so that AI agents can reliably query and follow them.",
5
5
  "repository": {
6
6
  "type": "git",