npm - xdrs-core - Versions diffs - 0.12.0 → 0.13.0 - Mend

xdrs-core 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/.xdrs/_core/adrs/principles/skills/005-write-research/005-write-research.test.int.js +12 -11
package/.xdrs/_core/adrs/principles/skills/005-write-research/005-write-research.test.int.report +22 -0
package/lib/testPrompt.js +240 -118
package/lib/testPrompt.test.js +42 -29
package/package.json +1 -1

package/.xdrs/_core/adrs/principles/skills/005-write-research/005-write-research.test.int.js CHANGED Viewed

@@ -5,17 +5,17 @@ const { copilotCmd, testPrompt } = require('xdrs-core');
 const REPO_ROOT = path.resolve(__dirname, '..', '..', '..', '..', '..', '..');
-jest.setTimeout(60000);
+jest.setTimeout(300000);
-test('check', () => {
-	const err = testPrompt(
+test('check', async () => {
+	const err = await testPrompt(
 		{
 			workspaceRoot: REPO_ROOT,
 			workspaceMode: 'in-place',
-			promptCmd: copilotCmd(REPO_ROOT),
+			...copilotCmd(REPO_ROOT),
 		},
-		'Reply with READY and nothing else.',
-		'Verify that the final output is READY and nothing else.',
+		'Reply ONLY with "READY" after checking if SKILL 001 has any contents',
+		'Verify that the final output is ONLY "READY" and that it read file 001-lint/SKILL.md',
 		null,
 		true
 	);
@@ -23,16 +23,17 @@ test('check', () => {
 	expect(err).toBe('');
 });
-test.skip('005-write-research creates an IMRAD research document in copy mode', () => {
-	const err = testPrompt(
+test('005-write-research creates an IMRAD research document in copy mode', async () => {
+	const err = await testPrompt(
 		{
 			workspaceRoot: REPO_ROOT,
 			workspaceMode: 'copy',
-			promptCmd: copilotCmd(REPO_ROOT),
+			...copilotCmd(REPO_ROOT),
 		},
 		'Create a very small research document with the following data: We measured the installation time in our monorepo and pnpm is 3.5x faster than Yarn when installing dependencies. We recommend using PNPM in our monorepo to speed up our productivity as it seems very easy to use and have a better internal hoisting mechanism.',
-		'Verify that a research file was created under .xdrs/_local/edrs/devops/researches/, that it contains the sections Abstract, Introduction, Methods, Results, Discussion, Conclusion, and References, and that the content contains all the provided data in input prompt, and doesn\'t contain more than 20% of additional information.',
-		'005-write-research-imrad-copy'
+		'Verify that a research file was created under .xdrs/_local/edrs/devops/researches/, that it contains the sections Abstract, Introduction, Methods, Results, Discussion, Conclusion, and References, and that the content contains all the provided data in input prompt, and doesn\'t contain more than 20% of additional information outside the central topic.',
+		null,
+		true
 	);
 	expect(err).toBe('');

package/.xdrs/_core/adrs/principles/skills/005-write-research/005-write-research.test.int.report ADDED Viewed

@@ -0,0 +1,22 @@
+{
+  "Reply ONLY with \"READY\" after checking i-a61b0904": {
+    "result": "success",
+    "contextFiles": [
+      ".xdrs/_core/adrs/principles/skills/001-lint/SKILL.md",
+      "AGENTS.md"
+    ],
+    "contextHash": "3dcbcc0a03f4d3b2a797c28165f6f68a"
+  },
+  "Create a very small research document wi-ea3740cd": {
+    "result": "success",
+    "contextFiles": [
+      ".xdrs/_core/adrs/principles/001-xdrs-core.md",
+      ".xdrs/_core/adrs/principles/006-research-standards.md",
+      ".xdrs/_local/bdrs/index.md",
+      ".xdrs/_local/edrs/devops/researches/001-pnpm-vs-yarn-install-speed-monorepo.md",
+      ".xdrs/index.md",
+      "AGENTS.md"
+    ],
+    "contextHash": "a8ffde30ac3757f1185fee620f526720"
+  }
+}

package/lib/testPrompt.js CHANGED Viewed

@@ -4,22 +4,21 @@
 const crypto = require('crypto');
 const fs = require('fs');
 const ignore = require('ignore');
+const { minimatch } = require('minimatch');
 const os = require('os');
 const path = require('path');
-const { spawnSync } = require('child_process');
+const { spawn } = require('child_process');
 const MAX_TASK_OUTPUT_CHARS = 12 * 1024;
-function testPrompt(config, inputPrompt, judgePrompt, id, verbose) {
-	const result = runPrompt(config, inputPrompt, judgePrompt, id, verbose);
-	return result.passed ? '' : formatFailureMarkdown(result.findings);
+async function testPrompt(config, inputPrompt, judgePrompt, id, verbose) {
+	const result = await runPrompt(config, inputPrompt, judgePrompt, id, verbose);
+	return result.passed ? '' : formatFailureMarkdown(result);
 }
-function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
+async function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
 	if(verbose) {
-		console.log('Running prompt test with config:', JSON.stringify(config, null, 2));
-		console.log('Input Prompt:', inputPrompt);
-		console.log('Judge Prompt:', judgePrompt);
+		console.log(`Running test in workspace. inputPrompt="${inputPrompt}"; judgePrompt="${judgePrompt}"; config=${JSON.stringify(config)}`);
 	}
 	const options = normalizeConfig(config);
 	const originalWorkspace = resolveWorkspaceRoot(options);
@@ -29,13 +28,13 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
 	try {
 		if (options.workspaceMode === 'copy') {
 			tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'xdrs-core-test-'));
-			effectiveWorkspace = copyWorkspace(originalWorkspace, path.join(tempRoot, 'workspace'), verbose);
+			effectiveWorkspace = copyWorkspace(originalWorkspace, path.join(tempRoot, 'workspace'), options.workspaceFilter, verbose);
 		}
 		if(verbose) {
 			console.log(`Running prompt test in workspace: ${effectiveWorkspace} (mode: ${options.workspaceMode})`);
 		}
-		const task = runTaskPhase({
+		const task = await runTaskPhase({
 			prompt: ensureNonEmptyString(inputPrompt, 'inputPrompt'),
 			commandTemplate: options.promptCmd,
 			workspacePath: effectiveWorkspace,
@@ -46,19 +45,33 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
 		});
 		if(verbose) {
-			console.log('Task phase completed. Summary:', task.summary);
-			console.log('Agent reported changed files:', task.changedFiles);
+			console.log(`Task phase completed. text="${task.text}"`);
+		}
+		if(verbose) {
+			console.log('Running files phase to collect changed and context files from the task session.');
+		}
+		const files = await runFilesPhase({
+			commandTemplate: options.promptCmd,
+			continueFlag: options.promptCmdContinueFlag,
+			workspacePath: effectiveWorkspace,
+			authoritativeWorkspacePath: originalWorkspace,
+			timeoutMs: options.taskTimeoutMs,
+			env: options.env,
+			verbose
+		});
+		if(verbose) {
+			console.log(`Files phase completed. readFiles="${files.readFiles}"; writeFiles="${files.writeFiles}"`);
 		}
 		if(verbose) {
 			console.log('Running judge phase to evaluate the task output against the judge prompt.');
 		}
-		const evaluation = runJudgePhase({
-			originalPrompt: ensureNonEmptyString(inputPrompt, 'inputPrompt'),
+		const evaluation = await runJudgePhase({
 			judgePrompt: ensureNonEmptyString(judgePrompt, 'judgePrompt'),
-			taskOutput: task.summary,
-			agentReportedChanges: task.changedFiles,
 			commandTemplate: options.promptCmd,
+			continueFlag: options.promptCmdContinueFlag,
 			workspacePath: effectiveWorkspace,
 			authoritativeWorkspacePath: originalWorkspace,
 			timeoutMs: options.judgeTimeoutMs,
@@ -66,13 +79,14 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
 			verbose
 		});
+		console.log(`Judge phase completed. passed=${evaluation.pass}; findings="${JSON.stringify(evaluation.findings)}"`);
 		return {
 			passed: evaluation.pass,
 			findings: evaluation.findings,
-			taskOutput: task.summary,
-			agentReportedChanges: task.changedFiles,
-			contextFiles: task.contextFiles,
-			judge: evaluation.raw,
+			taskOutput: task.text,
+			agentReportedChanges: files.writeFiles,
+			contextFiles: files.readFiles,
 			workspace: {
 				original: originalWorkspace,
 				effective: effectiveWorkspace,
@@ -87,17 +101,20 @@ function runPromptTest(config, inputPrompt, judgePrompt, verbose) {
 }
 function copilotCmd(workspaceRoot = findGitRoot(process.cwd())) {
-	return [
-		'copilot',
-		`--add-dir=${path.resolve(workspaceRoot)}`,
-		'--autopilot',
-		'--allow-all-tools',
-		'--allow-all-urls',
-		'--no-ask-user',
-		'--allow-all',
-		'-p',
-		'{PROMPT}'
-	];
+	return {
+		promptCmd: [
+			'copilot',
+			`--add-dir=${path.resolve(workspaceRoot)}`,
+			'--autopilot',
+			'--allow-all-tools',
+			'--allow-all-urls',
+			'--no-ask-user',
+			'--allow-all',
+			'-p',
+			'{PROMPT}'
+		],
+		promptCmdContinueFlag: '--continue'
+	};
 }
 function ensureNonEmptyString(value, label) {
@@ -132,8 +149,10 @@ function normalizeConfig(config) {
 	return {
 		promptCmd: parseCommandTemplate(config.promptCmd, 'promptCmd'),
+		promptCmdContinueFlag: typeof config.promptCmdContinueFlag === 'string' && config.promptCmdContinueFlag.trim() ? config.promptCmdContinueFlag.trim() : null,
 		workspaceRoot: config.workspaceRoot ? path.resolve(config.workspaceRoot) : null,
 		workspaceMode,
+		workspaceFilter: normalizeWorkspaceFilter(config.workspaceFilter),
 		env: normalizeEnv(config.env),
 		taskTimeoutMs: readTimeout(config.taskTimeoutMs, 'taskTimeoutMs'),
 		judgeTimeoutMs: readTimeout(config.judgeTimeoutMs, 'judgeTimeoutMs'),
@@ -153,23 +172,18 @@ function resolveWorkspaceRoot(options) {
 	return resolvedWorkspace;
 }
-function runTaskPhase({ prompt, commandTemplate, workspacePath, authoritativeWorkspacePath, timeoutMs, env }, verbose) {
+async function runTaskPhase({ prompt, commandTemplate, workspacePath, authoritativeWorkspacePath, timeoutMs, env, verbose }) {
 	const wrappedPrompt = [
-		'XDRS-CORE TEST PHASE: TASK',
+		'RUN PHASE',
 		'',
-		'Execute the following task in the current workspace.',
+		'<INSTRUCTIONS>',
 		'Keep all changes inside the workspace.',
-		'Respond with JSON only and no code fences.',
-		'Use exactly this schema: {"summary":"plain text summary","changedFiles":["relative/path.ext"],"readFiles":["relative/path.ext"]}.',
-		'List in readFiles every file you read or used as context during this task.',
-		'The summary must describe the final result only, not hidden reasoning.',
+		'</INSTRUCTIONS>',
 		'',
-		'BEGIN TASK PROMPT',
 		prompt,
-		'END TASK PROMPT'
 	].join('\n');
-	const result = runPromptCommand({
+	const result = await runPromptCommand({
 		commandTemplate,
 		workspacePath,
 		authoritativeWorkspacePath,
@@ -182,40 +196,56 @@ function runTaskPhase({ prompt, commandTemplate, workspacePath, authoritativeWor
 	return parseTaskResponse(result.output);
 }
-function runJudgePhase({ originalPrompt, judgePrompt, taskOutput, agentReportedChanges, commandTemplate, workspacePath, authoritativeWorkspacePath, timeoutMs, env }, verbose) {
+async function runFilesPhase({ commandTemplate, continueFlag, workspacePath, authoritativeWorkspacePath, timeoutMs, env, verbose }) {
 	const wrappedPrompt = [
-		'XDRS-CORE TEST PHASE: ASSERTION_EVALUATION',
+		'FILES PHASE',
+		'',
+		'<INSTRUCTIONS>',
+		'ALWAYS answer with JSON using the schema specified below, and never include any other text.',
+		'Response schema: {"readFiles":["relative/path.ext"],"writeFiles":["relative/path.ext"]}.',
+		'Return in "readFiles" every file you read or used as context during the previous task, including files added by default such as AGENTS.md or skills.',
+		'Return in "writeFiles" every file you created or updated during the previous task.',
+		'</INSTRUCTIONS>',
 		'',
-		'You are evaluating the result of a separate agent task run.',
-		'Treat this as a fresh session. Do not assume any hidden history.',
-		'Use the original task prompt, the judge prompt, the final task output, the reported changed file paths, and the current workspace state to decide whether the result passes.',
-		'Trust the reported changed file path list as the authoritative change report for this task run.',
+		'Which files were read and which files were changed during the RUN PHASE task execution? Reply only with the JSON response according to the schema in the instructions.',
+	].join('\n');
+	const continueCommand = buildContinueCommand(commandTemplate, continueFlag);
+	const result = await runPromptCommand({
+		commandTemplate: continueCommand,
+		workspacePath,
+		authoritativeWorkspacePath,
+		prompt: wrappedPrompt,
+		timeoutMs,
+		env,
+		verbose
+	});
+	return parseFilesResponse(result.output);
+}
+async function runJudgePhase({ judgePrompt, commandTemplate, continueFlag, workspacePath, authoritativeWorkspacePath, timeoutMs, env, verbose }) {
+	const wrappedPrompt = [
+		'ASSERTION_EVALUATION PHASE',
+		'',
+		'<INSTRUCTIONS>',
+		'You are evaluating the result of the task you completed in the RUN PHASE.',
 		'Read files from the workspace directly when you need their contents.',
-		'Inspect files in the workspace directly when needed.',
-		'Respond with JSON only and no code fences.',
-		'Use exactly this schema: {"pass":true,"findings":[]} or {"pass":false,"findings":[{"target":"file","path":"relative/path.ext","line":1,"message":"explanation","assertionRef":"exact relevant phrase from the judge prompt"}]}.',
+		'ALWAYS answer with JSON using the schema specified below, and never include any other text.',
+		'Response schema: {"pass":false,"text":"plain text summary of the evaluation","findings":[{"target":"file","path":"relative/path.ext","line":1,"message":"explanation","assertionRef":"exact relevant phrase from the judge prompt"}]}.',
+		'Use the text field to report a plain text summary of the evaluation result.',
 		'Use target="output" when the issue is in the final task output and target="workspace" when it is not tied to a specific file.',
 		'Include 1-based line numbers when you cite a file or the output text. Include the exact judge-prompt phrase that triggered each finding in assertionRef.',
+		'NEVER change any file during judge evaluation. If you identify an issue that would require a file change to fix, report it as a finding instead.',
 		'',
-		'BEGIN ORIGINAL TASK PROMPT',
-		originalPrompt,
-		'END ORIGINAL TASK PROMPT',
-		'',
-		'BEGIN JUDGE PROMPT',
 		judgePrompt,
-		'END JUDGE PROMPT',
-		'',
-		'BEGIN TASK OUTPUT',
-		truncateText(taskOutput || '(empty)', MAX_TASK_OUTPUT_CHARS),
-		'END TASK OUTPUT',
-		'',
-		'BEGIN AGENT REPORTED CHANGES JSON',
-		JSON.stringify(agentReportedChanges, null, 2),
-		'END AGENT REPORTED CHANGES JSON'
 	].join('\n');
-	const result = runPromptCommand({
-		commandTemplate,
+	const continueCommand = buildContinueCommand(commandTemplate, continueFlag);
+	const result = await runPromptCommand({
+		commandTemplate: continueCommand,
 		workspacePath,
 		authoritativeWorkspacePath,
 		prompt: wrappedPrompt,
@@ -224,7 +254,7 @@ function runJudgePhase({ originalPrompt, judgePrompt, taskOutput, agentReportedC
 		verbose
 	});
-	return normalizeJudgeResponse(result.output);
+	return parseJudgeResponse(result.output);
 }
 function parseTaskResponse(output) {
@@ -236,22 +266,55 @@ function parseTaskResponse(output) {
 	try {
 		const parsed = parseJsonObject(trimmed);
 		return {
-			summary: typeof parsed.summary === 'string' && parsed.summary.trim()
-				? parsed.summary.trim()
-				: trimmed,
-			changedFiles: normalizeStringArray(parsed.changedFiles),
-			contextFiles: normalizeStringArray(parsed.readFiles)
+			text: typeof parsed.text === 'string' && parsed.text.trim()
+				? parsed.text.trim()
+				: trimmed
 		};
 	} catch (error) {
 		return {
-			summary: trimmed,
-			changedFiles: [],
-			contextFiles: []
+			text: trimmed
 		};
 	}
 }
-function normalizeJudgeResponse(output) {
+function parseFilesResponse(output) {
+	const trimmed = String(output || '').trim();
+	if (!trimmed) {
+		throw new Error('The files command returned empty output.');
+	}
+	try {
+		const parsed = parseJsonObject(trimmed);
+		return {
+			readFiles: normalizeStringArray(parsed.readFiles),
+			writeFiles: normalizeStringArray(parsed.writeFiles)
+		};
+	} catch (error) {
+		return {
+			readFiles: [],
+			writeFiles: []
+		};
+	}
+}
+function buildContinueCommand(commandTemplate, continueFlag) {
+	if (!continueFlag) {
+		return commandTemplate;
+	}
+	const promptFlagIndex = commandTemplate.indexOf('-p');
+	if (promptFlagIndex === -1) {
+		return [...commandTemplate.slice(0, -1), continueFlag, commandTemplate[commandTemplate.length - 1]];
+	}
+	return [
+		...commandTemplate.slice(0, promptFlagIndex),
+		continueFlag,
+		...commandTemplate.slice(promptFlagIndex)
+	];
+}
+function parseJudgeResponse(output) {
 	let parsed;
 	try {
@@ -295,6 +358,7 @@ function normalizeJudgeResponse(output) {
 	return {
 		pass: parsed.pass,
+		text: typeof parsed.text === 'string' ? parsed.text.trim() : '',
 		findings,
 		raw: parsed
 	};
@@ -340,7 +404,7 @@ function normalizeFinding(finding) {
 	};
 }
-function runPromptCommand({ commandTemplate, workspacePath, authoritativeWorkspacePath, prompt, timeoutMs, env }, verbose) {
+function runPromptCommand({ commandTemplate, workspacePath, authoritativeWorkspacePath, prompt, timeoutMs, env, verbose }) {
 	const command = rewriteWorkspaceCommand(commandTemplate.map((entry) => entry
 		.replace('{PROMPT}', prompt)
 		.replace('{WORKSPACE_ROOT}', workspacePath)), workspacePath, authoritativeWorkspacePath);
@@ -348,47 +412,75 @@ function runPromptCommand({ commandTemplate, workspacePath, authoritativeWorkspa
 	const [file, ...args] = command;
 	if(verbose) {
-		console.log(`Running prompt cmd: ${file} ${args.join(' ')} in workspace: ${workspacePath}`);
+		console.log(`Running prompt cmd: ${file} ${args.join(' ')}; workspace: ${workspacePath}`);
 	}
-	const result = spawnSync(file, args, {
-		encoding: 'utf8',
-		cwd: workspacePath,
-		timeout: timeoutMs || undefined,
-		maxBuffer: 10 * 1024 * 1024,
-		env: {
-			...process.env,
-			...env
+	return new Promise((resolve, reject) => {
+		const child = spawn(file, args, {
+			cwd: workspacePath,
+			env: {
+				...process.env,
+				...env
+			}
+		});
+		let stdout = '';
+		let stderr = '';
+		let timedOut = false;
+		let timer = null;
+		if (timeoutMs) {
+			timer = setTimeout(() => {
+				timedOut = true;
+				child.kill();
+				reject(new Error(`${file} timed out after ${timeoutMs}ms`));
+			}, timeoutMs);
 		}
-	});
-	if(verbose) {
-		console.log(`Prompt command output: ${result.stdout || result.stderr}`);
-	}
+		child.stdout.on('data', (chunk) => {
+			const text = chunk.toString('utf8');
+			stdout += text;
+			process.stdout.write(text);
+		});
+		child.stderr.on('data', (chunk) => {
+			const text = chunk.toString('utf8');
+			stderr += text;
+			process.stderr.write(text);
+		});
-	if (result.error) {
-		if (result.error.code === 'ENOENT') {
-			throw new Error(`Command not found: ${file}`);
-		}
-		throw new Error(`Failed to execute ${file}: ${result.error.message}`);
-	}
+		child.on('error', (err) => {
+			if (timer !== null) { clearTimeout(timer); }
+			if (err.code === 'ENOENT') {
+				reject(new Error(`Command not found: ${file}`));
+			} else {
+				reject(new Error(`Failed to execute ${file}: ${err.message}`));
+			}
+		});
-	if (result.status !== 0) {
-		const details = truncateText((result.stderr || result.stdout || '').trim(), 2000);
-		throw new Error(`${file} exited with status ${result.status}${details ? `: ${details}` : ''}`);
-	}
+		child.on('close', (code) => {
+			if (timer !== null) { clearTimeout(timer); }
+			if (timedOut) { return; }
-	const output = (result.stdout || '').trim() || (result.stderr || '').trim();
-	if (!output) {
-		throw new Error(`${file} returned empty output.`);
-	}
+			if (code !== 0) {
+				const details = truncateText((stderr || stdout || '').trim(), 2000);
+				reject(new Error(`${file} exited with status ${code}${details ? `: ${details}` : ''}`));
+				return;
+			}
-	if(verbose) {
-		console.log(`Prompt command output: ${output}`);
-	}
+			const output = stdout.trim() || stderr.trim();
+			if (!output) {
+				reject(new Error(`${file} returned empty output.`));
+				return;
+			}
-	return { output };
+			if(verbose) {
+				console.log(`Prompt command output: ${output}`);
+			}
+			resolve({ output });
+		});
+	});
 }
 function rewriteWorkspaceCommand(command, workspacePath, authoritativeWorkspacePath) {
@@ -450,6 +542,30 @@ function normalizeCommandArray(value, label) {
 	return [...value];
 }
+function normalizeWorkspaceFilter(value) {
+	if (value == null) {
+		return null;
+	}
+	if (typeof value === 'string') {
+		if (!value.trim()) {
+			throw new Error('workspaceFilter must not be an empty string.');
+		}
+		return [value.trim()];
+	}
+	if (Array.isArray(value)) {
+		if (value.length === 0 || value.some((v) => typeof v !== 'string' || !v.trim())) {
+			throw new Error('workspaceFilter must be a non-empty array of non-empty glob strings.');
+		}
+		return value.map((v) => v.trim());
+	}
+	throw new Error('workspaceFilter must be a string or an array of strings.');
+}
 function normalizeEnv(env) {
 	if (env == null) {
 		return {};
@@ -513,7 +629,7 @@ function defaultPromptId(inputPrompt, judgePrompt) {
 	return prefix;
 }
-function runPrompt(config, inputPrompt, judgePrompt, id, verbose) {
+async function runPrompt(config, inputPrompt, judgePrompt, id, verbose) {
 	const effectiveId = id || defaultPromptId(inputPrompt, judgePrompt);
 	const options = normalizeConfig(config);
 	const workspaceRoot = resolveWorkspaceRoot(options);
@@ -535,7 +651,7 @@ function runPrompt(config, inputPrompt, judgePrompt, id, verbose) {
 		}
 	}
-	const result = runPromptTest(config, inputPrompt, judgePrompt, verbose);
+	const result = await runPromptTest(config, inputPrompt, judgePrompt, verbose);
 	if (result.passed && effectiveId && options.reportFile) {
 		const newHash = computeContextHash(options.model, inputPrompt, judgePrompt, result.contextFiles, workspaceRoot);
@@ -567,12 +683,12 @@ function writeReport(reportFile, reportData) {
 	fs.writeFileSync(reportFile, `${JSON.stringify(reportData, null, 2)}\n`, 'utf8');
 }
-function formatFailureMarkdown(findings) {
-	const normalizedFindings = Array.isArray(findings) && findings.length > 0
-		? findings
+function formatFailureMarkdown(result) {
+	const normalizedFindings = Array.isArray(result.findings) && result.findings.length > 0
+		? result.findings
 		: [{ target: 'workspace', message: 'The prompt test failed without detailed findings.' }];
-	return normalizedFindings.map((finding) => {
+	return `Assertion failed. taskOutput=${result.taskOutput}\n` + normalizedFindings.map((finding) => {
 		const location = formatFindingLocation(finding);
 		const assertion = finding.assertionRef ? ` Assertion: "${finding.assertionRef}".` : '';
 		return `- [${location}] ${finding.message}${assertion}`;
@@ -608,7 +724,7 @@ function findGitRoot(startPath) {
 	}
 }
-function copyWorkspace(sourcePath, targetPath, verbose) {
+function copyWorkspace(sourcePath, targetPath, workspaceFilter, verbose) {
 	if(verbose) {
 		console.log(`Copying workspace from ${sourcePath} to ${targetPath}`);
 	}
@@ -618,12 +734,13 @@ function copyWorkspace(sourcePath, targetPath, verbose) {
 		targetDir: targetPath,
 		rootPath: sourcePath,
 		ignoreContexts: [],
-		activeRealDirectories: new Set()
+		activeRealDirectories: new Set(),
+		workspaceFilter
 	});
 	return targetPath;
 }
-function copyWorkspaceDirectory({ sourceDir, targetDir, rootPath, ignoreContexts, activeRealDirectories }) {
+function copyWorkspaceDirectory({ sourceDir, targetDir, rootPath, ignoreContexts, activeRealDirectories, workspaceFilter }) {
 	const realSourceDir = fs.realpathSync(sourceDir);
 	if (activeRealDirectories.has(realSourceDir)) {
 		return;
@@ -663,11 +780,16 @@ function copyWorkspaceDirectory({ sourceDir, targetDir, rootPath, ignoreContexts
 					targetDir: targetEntryPath,
 					rootPath,
 					ignoreContexts: nextIgnoreContexts,
-					activeRealDirectories
+					activeRealDirectories,
+					workspaceFilter
 				});
 				continue;
 			}
+			if (workspaceFilter && !workspaceFilter.some((pattern) => minimatch(entryRelativePath, pattern, { dot: true }))) {
+				continue;
+			}
 			fs.copyFileSync(sourceEntryPath, targetEntryPath);
 			fs.chmodSync(targetEntryPath, (entryStats || fs.statSync(sourceEntryPath)).mode);
 		}

package/lib/testPrompt.test.js CHANGED Viewed

@@ -17,9 +17,9 @@ afterAll(() => {
 	fs.rmSync(TMP_ROOT, { recursive: true, force: true });
 });
-test('passes a prompt test with copied workspace isolation', () => {
+test('passes a prompt test with copied workspace isolation', async () => {
 	const workspaceRoot = createWorkspace('customer-pass');
-	const err = testPrompt(
+	const err = await testPrompt(
 		createConfig(workspaceRoot),
 		'create a research about our customer base. We have 30% of customer > 50 years; 90% > 20',
 		'The resulting file should be created at customer-research.md and should not generate facts that are not present in the original prompt'
@@ -29,9 +29,9 @@ test('passes a prompt test with copied workspace isolation', () => {
 	expect(fs.existsSync(path.join(workspaceRoot, 'customer-research.md'))).toBe(false);
 });
-test('passes when ignored files and git metadata stay out of the copied workspace', () => {
+test('passes when ignored files and git metadata stay out of the copied workspace', async () => {
 	const workspaceRoot = createWorkspace('ignore-pass', { withIgnoredEntries: true });
-	const err = testPrompt(
+	const err = await testPrompt(
 		createConfig(workspaceRoot),
 		'create a note named summary.txt saying behavior ok',
 		'Verify if ignored/seed.txt, .git/config, and nested/.git/config are not available in the copied workspace and are not reported as changes'
@@ -44,9 +44,9 @@ test('passes when ignored files and git metadata stay out of the copied workspac
 	assertFileExists(path.join(workspaceRoot, 'nested', '.git', 'config'));
 });
-test('returns markdown findings when the judge rejects the result', () => {
+test('returns markdown findings when the judge rejects the result', async () => {
 	const workspaceRoot = createWorkspace('failure-case');
-	const err = testPrompt(
+	const err = await testPrompt(
 		createConfig(workspaceRoot),
 		'create a research about our customer base. We have 30% of customer > 50 years; 90% > 20',
 		'Verify if summary.txt exists and the final output mentions summary.txt'
@@ -57,12 +57,12 @@ test('returns markdown findings when the judge rejects the result', () => {
 	expect(err).toContain('- [output:1] The final output should mention summary.txt.');
 });
-test('does not create a temp workspace in in-place mode', () => {
+test('does not create a temp workspace in in-place mode', async () => {
 	const workspaceRoot = createWorkspace('in-place');
 	const mkdtempSpy = jest.spyOn(fs, 'mkdtempSync');
 	try {
-		const err = testPrompt(
+		const err = await testPrompt(
 			createConfig(workspaceRoot, { workspaceMode: 'in-place' }),
 			'create a note named summary.txt saying behavior ok',
 			'Verify if summary.txt exists and the final output mentions summary.txt'
@@ -76,8 +76,22 @@ test('does not create a temp workspace in in-place mode', () => {
 	}
 });
+test('workspaceFilter copies only files matching the glob pattern to temp workspace', async () => {
+	const workspaceRoot = createWorkspace('filter-pass');
+	fs.writeFileSync(path.join(workspaceRoot, 'notes.md'), 'notes content\n', 'utf8');
+	const err = await testPrompt(
+		createConfig(workspaceRoot, { workspaceFilter: ['*.txt'] }),
+		'workspace-filter-check: list files in the workspace',
+		'workspace-filter-check: notes.md should not exist in the copied workspace, seed.txt should exist'
+	);
+	expect(err).toBe('');
+});
 test('copilotCmd defaults to the git repository root', () => {
-	const command = copilotCmd();
+	const result = copilotCmd();
+	const command = result.promptCmd;
 	const addDirArgument = command.find((entry) => entry.startsWith('--add-dir='));
 	expect(addDirArgument).toBe(`--add-dir=${path.resolve(__dirname, '..')}`);
@@ -89,11 +103,12 @@ test('copilotCmd defaults to the git repository root', () => {
 	]));
 	const promptIndex = command.indexOf('-p');
 	expect(command[promptIndex + 1]).toBe('{PROMPT}');
+	expect(result.promptCmdContinueFlag).toBe('--continue');
 });
-test('judge phase reuses promptCmd even when judgeCmd is provided', () => {
+test('judge phase reuses promptCmd even when judgeCmd is provided', async () => {
 	const workspaceRoot = createWorkspace('judge-cmd-ignored');
-	const err = testPrompt(
+	const err = await testPrompt(
 		createConfig(workspaceRoot, {
 			judgeCmd: ['missing-command', '{PROMPT}']
 		}),
@@ -104,11 +119,11 @@ test('judge phase reuses promptCmd even when judgeCmd is provided', () => {
 	expect(err).toBe('');
 });
-test('creates report file after first successful run', () => {
+test('creates report file after first successful run', async () => {
 	const workspaceRoot = createWorkspace('report-create');
 	const reportFile = path.join(TMP_ROOT, 'report-create.json');
-	const err = testPrompt(
+	const err = await testPrompt(
 		createConfig(workspaceRoot, { reportFile }),
 		'create a note named summary.txt saying behavior ok',
 		'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -125,11 +140,11 @@ test('creates report file after first successful run', () => {
 	expect(report['summary-pass'].contextHash).toHaveLength(32);
 });
-test('returns cached success and skips re-run when hash matches', () => {
+test('returns cached success and skips re-run when hash matches', async () => {
 	const workspaceRoot = createWorkspace('cache-hit');
 	const reportFile = path.join(TMP_ROOT, 'cache-hit.json');
-	const err1 = testPrompt(
+	const err1 = await testPrompt(
 		createConfig(workspaceRoot, { reportFile }),
 		'create a note named summary.txt saying behavior ok',
 		'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -138,7 +153,7 @@ test('returns cached success and skips re-run when hash matches', () => {
 	expect(err1).toBe('');
 	// Second run with a broken promptCmd — cache hit must skip execution entirely
-	const err2 = testPrompt(
+	const err2 = await testPrompt(
 		createConfig(workspaceRoot, { reportFile, promptCmd: ['non-existent-command-xyz', '{PROMPT}'] }),
 		'create a note named summary.txt saying behavior ok',
 		'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -147,11 +162,11 @@ test('returns cached success and skips re-run when hash matches', () => {
 	expect(err2).toBe('');
 });
-test('re-runs and updates report when context file content changes', () => {
+test('re-runs and updates report when the judge prompt changes', async () => {
 	const workspaceRoot = createWorkspace('cache-miss');
 	const reportFile = path.join(TMP_ROOT, 'cache-miss.json');
-	const err1 = testPrompt(
+	const err1 = await testPrompt(
 		createConfig(workspaceRoot, { reportFile }),
 		'create a note named summary.txt saying behavior ok',
 		'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -160,12 +175,10 @@ test('re-runs and updates report when context file content changes', () => {
 	expect(err1).toBe('');
 	const firstHash = JSON.parse(fs.readFileSync(reportFile, 'utf8'))['summary-pass'].contextHash;
-	fs.writeFileSync(path.join(workspaceRoot, 'seed.txt'), 'changed content\n', 'utf8');
-	const err2 = testPrompt(
+	const err2 = await testPrompt(
 		createConfig(workspaceRoot, { reportFile }),
 		'create a note named summary.txt saying behavior ok',
-		'Verify if summary.txt exists and the final output mentions summary.txt',
+		'Verify if summary.txt exists and the final output mentions summary.txt. Also verify the file has content.',
 		'summary-pass'
 	);
 	expect(err2).toBe('');
@@ -173,11 +186,11 @@ test('re-runs and updates report when context file content changes', () => {
 	expect(secondHash).not.toBe(firstHash);
 });
-test('returns checkOnly failure when no passing report entry exists', () => {
+test('returns checkOnly failure when no passing report entry exists', async () => {
 	const workspaceRoot = createWorkspace('checkonly-no-report');
 	const reportFile = path.join(TMP_ROOT, 'checkonly-no-report.json');
-	const err = testPrompt(
+	const err = await testPrompt(
 		createConfig(workspaceRoot, { reportFile, checkOnly: true }),
 		'create a note named summary.txt saying behavior ok',
 		'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -187,14 +200,14 @@ test('returns checkOnly failure when no passing report entry exists', () => {
 	expect(err).toContain('[cache]');
 });
-test('returns checkOnly failure when hash does not match', () => {
+test('returns checkOnly failure when hash does not match', async () => {
 	const workspaceRoot = createWorkspace('checkonly-hash-mismatch');
 	const reportFile = path.join(TMP_ROOT, 'checkonly-hash-mismatch.json');
 	fs.writeFileSync(reportFile, JSON.stringify({
 		'summary-pass': { result: 'success', contextFiles: ['seed.txt'], contextHash: 'stale-hash-value' }
 	}), 'utf8');
-	const err = testPrompt(
+	const err = await testPrompt(
 		createConfig(workspaceRoot, { reportFile, checkOnly: true }),
 		'create a note named summary.txt saying behavior ok',
 		'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -204,11 +217,11 @@ test('returns checkOnly failure when hash does not match', () => {
 	expect(err).toContain('[cache]');
 });
-test('does not write report when test fails', () => {
+test('does not write report when test fails', async () => {
 	const workspaceRoot = createWorkspace('report-no-write-on-fail');
 	const reportFile = path.join(TMP_ROOT, 'report-no-write-on-fail.json');
-	const err = testPrompt(
+	const err = await testPrompt(
 		createConfig(workspaceRoot, { reportFile }),
 		'create a research about our customer base. We have 30% of customer > 50 years; 90% > 20',
 		'Verify if summary.txt exists and the final output mentions summary.txt',
@@ -221,7 +234,7 @@ test('does not write report when test fails', () => {
 	function createConfig(workspaceRoot, overrides = {}) {
 	return {
-		promptCmd: copilotCmd(workspaceRoot),
+		...copilotCmd(workspaceRoot),
 		workspaceRoot,
 		workspaceMode: 'copy',
 		reportFile: null,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "xdrs-core",
-  "version": "0.12.0",
+  "version": "0.13.0",
   "description": "A standard way to organize Decision Records (XDRs) across scopes, subjects, and teams so that AI agents can reliably query and follow them.",
   "repository": {
     "type": "git",