mustflow 2.22.16 → 2.22.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/run.js +10 -0
- package/dist/cli/commands/verify.js +2 -1
- package/dist/cli/i18n/en.js +3 -0
- package/dist/cli/i18n/es.js +3 -0
- package/dist/cli/i18n/fr.js +3 -0
- package/dist/cli/i18n/hi.js +3 -0
- package/dist/cli/i18n/ko.js +3 -0
- package/dist/cli/i18n/zh.js +3 -0
- package/dist/cli/lib/git-changes.js +2 -0
- package/dist/cli/lib/run-plan.js +20 -7
- package/dist/cli/lib/run-root-trust.js +33 -2
- package/dist/cli/lib/validation/test-selection.js +11 -1
- package/dist/core/command-intent-eligibility.js +7 -0
- package/dist/core/line-endings.js +2 -0
- package/dist/core/run-write-drift.js +12 -9
- package/dist/core/test-selection.js +13 -2
- package/dist/core/test-target-paths.js +17 -0
- package/dist/core/validation-ratchet.js +2 -0
- package/package.json +1 -1
- package/templates/default/i18n.toml +2 -2
- package/templates/default/locales/en/.mustflow/skills/security-privacy-review/SKILL.md +22 -7
- package/templates/default/locales/en/.mustflow/skills/security-regression-tests/SKILL.md +31 -20
- package/templates/default/manifest.toml +1 -1
package/dist/cli/commands/run.js
CHANGED
|
@@ -36,6 +36,9 @@ function reportRunPlanFailure(plan, reporter, lang) {
|
|
|
36
36
|
case 'stdin_not_closed':
|
|
37
37
|
message = t(lang, 'run.error.stdin', { intent: plan.intentName });
|
|
38
38
|
break;
|
|
39
|
+
case 'agent_shell_requires_allow':
|
|
40
|
+
message = t(lang, 'run.error.agentShellRequiresAllow', { intent: plan.intentName });
|
|
41
|
+
break;
|
|
39
42
|
case 'missing_timeout':
|
|
40
43
|
message = t(lang, 'run.error.timeout', { intent: plan.intentName });
|
|
41
44
|
break;
|
|
@@ -66,6 +69,12 @@ function reportRunPlanFailure(plan, reporter, lang) {
|
|
|
66
69
|
detail: getRunPlanDetail(plan, lang, 'run.error.cwdOutsideProjectDetail'),
|
|
67
70
|
});
|
|
68
71
|
break;
|
|
72
|
+
case 'invalid_test_target':
|
|
73
|
+
message = t(lang, 'run.error.invalidTestTarget', {
|
|
74
|
+
intent: plan.intentName,
|
|
75
|
+
detail: getRunPlanDetail(plan, lang, 'run.error.invalidTestTargetDetail'),
|
|
76
|
+
});
|
|
77
|
+
break;
|
|
69
78
|
case 'max_output_bytes_exceeds_limit':
|
|
70
79
|
message = t(lang, 'run.error.maxOutputBytes', {
|
|
71
80
|
intent: plan.intentName,
|
|
@@ -225,6 +234,7 @@ export async function runRun(args, reporter, lang = 'en', options = {}) {
|
|
|
225
234
|
const env = profiler.measure('environment', () => createCommandEnv(projectRoot, { policy: plan.envPolicy, allowlist: plan.envAllowlist }));
|
|
226
235
|
const writeTracker = profiler.measure('write_drift_before', () => startRunWriteTracking(projectRoot, contract, intentName, {
|
|
227
236
|
additionalDeclaredPaths: options.additionalDeclaredWritePaths,
|
|
237
|
+
env,
|
|
228
238
|
}));
|
|
229
239
|
const stdoutTailBytes = Math.min(runReceiptPolicy.stdoutTailBytes, plan.maxOutputBytes);
|
|
230
240
|
const stderrTailBytes = Math.min(runReceiptPolicy.stderrTailBytes, plan.maxOutputBytes);
|
|
@@ -11,6 +11,7 @@ import { createVerifyEvidenceModel } from '../../core/verification-evidence.js';
|
|
|
11
11
|
import { createScopeDiffRisks } from '../../core/scope-risk.js';
|
|
12
12
|
import { countValidationRatchetVerdictEffects, createValidationRatchetRisks, } from '../../core/validation-ratchet.js';
|
|
13
13
|
import { finishRunWriteBatchTracking, startRunWriteBatchTracking, } from '../../core/run-write-drift.js';
|
|
14
|
+
import { createCommandEnv } from '../../core/command-env.js';
|
|
14
15
|
import { readCommandContract } from '../../core/config-loading.js';
|
|
15
16
|
import { evaluateCommandPreconditions, } from '../../core/command-preconditions.js';
|
|
16
17
|
import { DEFAULT_VERIFY_PARALLELISM, parseVerifyArgs, resolveVerifyParallelism, } from './verify/args.js';
|
|
@@ -198,7 +199,7 @@ async function runVerificationEntriesInParallelChunks(projectRoot, entries, para
|
|
|
198
199
|
const results = [];
|
|
199
200
|
for (let index = 0; index < entries.length; index += parallelism) {
|
|
200
201
|
const chunk = entries.slice(index, index + parallelism);
|
|
201
|
-
const batchTracker = startRunWriteBatchTracking(projectRoot);
|
|
202
|
+
const batchTracker = startRunWriteBatchTracking(projectRoot, createCommandEnv(projectRoot, { policy: 'minimal', allowlist: [] }));
|
|
202
203
|
const chunkResults = await Promise.all(chunk.map((entry) => runVerificationIntent(entry.intent, lang, verificationPlanId, correlationId, scheduledTestTargets.get(entry.intent) ?? [])));
|
|
203
204
|
const writeDriftByIntent = finishRunWriteBatchTracking(batchTracker, chunk.map((entry) => ({
|
|
204
205
|
intentName: entry.intent,
|
package/dist/cli/i18n/en.js
CHANGED
|
@@ -671,6 +671,7 @@ Read these files before working:
|
|
|
671
671
|
"run.error.lifecycleNotOneshot": 'Refused: command "{intent}" has lifecycle = "{lifecycle}"; mf run only executes oneshot commands',
|
|
672
672
|
"run.error.runPolicy": 'Command "{intent}" requires run_policy = "agent_allowed" for mf run',
|
|
673
673
|
"run.error.stdin": 'Command "{intent}" must set stdin = "closed"',
|
|
674
|
+
"run.error.agentShellRequiresAllow": 'Command "{intent}" must set allow_shell = true when mode = "shell"',
|
|
674
675
|
"run.error.timeout": 'Command "{intent}" must define timeout_seconds',
|
|
675
676
|
"run.error.commandSource": 'Command "{intent}" must define argv or mode = "shell" with cmd',
|
|
676
677
|
"run.error.unsafeIntent": 'Intent "{intent}" has an unsafe name. {detail}',
|
|
@@ -681,6 +682,8 @@ Read these files before working:
|
|
|
681
682
|
"run.error.blockedLongRunningCommandDetail": "Command argv must describe a finite one-shot command, not a development server, watcher, shell wrapper, interpreter loop, or background process.",
|
|
682
683
|
"run.error.cwdOutsideProject": 'Command "{intent}" has an invalid cwd: {detail}',
|
|
683
684
|
"run.error.cwdOutsideProjectDetail": "Intent cwd must stay inside the current root.",
|
|
685
|
+
"run.error.invalidTestTarget": 'Command "{intent}" received an invalid test target. {detail}',
|
|
686
|
+
"run.error.invalidTestTargetDetail": "Test targets must be relative file paths and cannot start with '-'.",
|
|
684
687
|
"run.error.maxOutputBytes": 'Command "{intent}" has invalid max_output_bytes. {detail}',
|
|
685
688
|
"run.error.maxOutputBytesDetail": "The output limit must stay within the allowed maximum.",
|
|
686
689
|
"run.error.conflictingPreviewModes": "Use either --dry-run or --plan-only, not both",
|
package/dist/cli/i18n/es.js
CHANGED
|
@@ -671,6 +671,7 @@ Lee estos archivos antes de trabajar:
|
|
|
671
671
|
"run.error.lifecycleNotOneshot": 'Rechazado: el comando "{intent}" tiene lifecycle = "{lifecycle}"; mf run sólo ejecuta comandos oneshot',
|
|
672
672
|
"run.error.runPolicy": 'El comando "{intent}" requiere run_policy = "agent_allowed" para mf run',
|
|
673
673
|
"run.error.stdin": 'El comando "{intent}" debe establecer stdin = "closed"',
|
|
674
|
+
"run.error.agentShellRequiresAllow": 'El comando "{intent}" debe establecer allow_shell = true cuando mode = "shell"',
|
|
674
675
|
"run.error.timeout": 'El comando "{intent}" debe definir timeout_seconds',
|
|
675
676
|
"run.error.commandSource": 'El comando "{intent}" debe definir argv o mode = "shell" con cmd',
|
|
676
677
|
"run.error.unsafeIntent": 'La intención "{intent}" tiene un nombre no seguro. {detail}',
|
|
@@ -681,6 +682,8 @@ Lee estos archivos antes de trabajar:
|
|
|
681
682
|
"run.error.blockedLongRunningCommandDetail": "argv debe describir un comando finito de una sola ejecución, no un servidor de desarrollo, watcher, envoltorio de shell, bucle de intérprete o proceso en segundo plano.",
|
|
682
683
|
"run.error.cwdOutsideProject": 'El comando "{intent}" tiene un cwd no válido: {detail}',
|
|
683
684
|
"run.error.cwdOutsideProjectDetail": "El cwd de la intención debe permanecer dentro de la raíz actual.",
|
|
685
|
+
"run.error.invalidTestTarget": 'El comando "{intent}" recibió un objetivo de prueba no válido. {detail}',
|
|
686
|
+
"run.error.invalidTestTargetDetail": "Los objetivos de prueba deben ser rutas relativas y no pueden empezar con '-'.",
|
|
684
687
|
"run.error.maxOutputBytes": 'El comando "{intent}" tiene max_output_bytes no válido. {detail}',
|
|
685
688
|
"run.error.maxOutputBytesDetail": "El límite de salida debe permanecer dentro del máximo permitido.",
|
|
686
689
|
"run.error.conflictingPreviewModes": "Usa --dry-run o --plan-only, no ambos",
|
package/dist/cli/i18n/fr.js
CHANGED
|
@@ -671,6 +671,7 @@ Lisez ces fichiers avant de travailler :
|
|
|
671
671
|
"run.error.lifecycleNotOneshot": 'Refusé : la commande "{intent}" a lifecycle = "{lifecycle}" ; mf run exécute uniquement les commandes oneshot',
|
|
672
672
|
"run.error.runPolicy": 'La commande "{intent}" nécessite run_policy = "agent_allowed" pour mf run',
|
|
673
673
|
"run.error.stdin": 'La commande "{intent}" doit définir stdin = "closed"',
|
|
674
|
+
"run.error.agentShellRequiresAllow": 'La commande "{intent}" doit définir allow_shell = true lorsque mode = "shell"',
|
|
674
675
|
"run.error.timeout": 'La commande "{intent}" doit définir timeout_seconds',
|
|
675
676
|
"run.error.commandSource": 'La commande "{intent}" doit définir argv ou mode = "shell" avec cmd',
|
|
676
677
|
"run.error.unsafeIntent": 'L’intention "{intent}" a un nom non sûr. {detail}',
|
|
@@ -681,6 +682,8 @@ Lisez ces fichiers avant de travailler :
|
|
|
681
682
|
"run.error.blockedLongRunningCommandDetail": "argv doit décrire une commande ponctuelle finie, pas un serveur de développement, un watcher, un wrapper shell, une boucle d'interpréteur ou un processus en arrière-plan.",
|
|
682
683
|
"run.error.cwdOutsideProject": 'La commande "{intent}" a un cwd non valide : {detail}',
|
|
683
684
|
"run.error.cwdOutsideProjectDetail": "Le cwd de l’intention doit rester dans la racine actuelle.",
|
|
685
|
+
"run.error.invalidTestTarget": 'La commande "{intent}" a reçu une cible de test invalide. {detail}',
|
|
686
|
+
"run.error.invalidTestTargetDetail": "Les cibles de test doivent être des chemins relatifs et ne peuvent pas commencer par '-'.",
|
|
684
687
|
"run.error.maxOutputBytes": 'La commande "{intent}" a une valeur max_output_bytes non valide. {detail}',
|
|
685
688
|
"run.error.maxOutputBytesDetail": "La limite de sortie doit rester dans le maximum autorisé.",
|
|
686
689
|
"run.error.conflictingPreviewModes": "Utilisez --dry-run ou --plan-only, pas les deux",
|
package/dist/cli/i18n/hi.js
CHANGED
|
@@ -671,6 +671,7 @@ export const hiMessages = {
|
|
|
671
671
|
"run.error.lifecycleNotOneshot": 'अस्वीकृत: कमांड "{intent}" का lifecycle = "{lifecycle}" है; mf run केवल oneshot कमांड चलाता है',
|
|
672
672
|
"run.error.runPolicy": 'mf run के लिए कमांड "{intent}" में run_policy = "agent_allowed" चाहिए',
|
|
673
673
|
"run.error.stdin": 'कमांड "{intent}" को stdin = "closed" सेट करना होगा',
|
|
674
|
+
"run.error.agentShellRequiresAllow": 'कमांड "{intent}" में mode = "shell" होने पर allow_shell = true सेट होना चाहिए',
|
|
674
675
|
"run.error.timeout": 'कमांड "{intent}" को timeout_seconds परिभाषित करना होगा',
|
|
675
676
|
"run.error.commandSource": 'कमांड "{intent}" को argv या mode = "shell" के साथ cmd परिभाषित करना होगा',
|
|
676
677
|
"run.error.unsafeIntent": 'इंटेंट "{intent}" का नाम सुरक्षित नहीं है। {detail}',
|
|
@@ -681,6 +682,8 @@ export const hiMessages = {
|
|
|
681
682
|
"run.error.blockedLongRunningCommandDetail": "argv में finite one-shot command होना चाहिए, development server, watcher, shell wrapper, interpreter loop, या background process नहीं।",
|
|
682
683
|
"run.error.cwdOutsideProject": 'कमांड "{intent}" का cwd अमान्य है: {detail}',
|
|
683
684
|
"run.error.cwdOutsideProjectDetail": "Intent cwd current root के अंदर रहना चाहिए।",
|
|
685
|
+
"run.error.invalidTestTarget": 'कमांड "{intent}" को अमान्य test target मिला। {detail}',
|
|
686
|
+
"run.error.invalidTestTargetDetail": "Test targets relative file paths होने चाहिए और '-' से शुरू नहीं हो सकते।",
|
|
684
687
|
"run.error.maxOutputBytes": 'कमांड "{intent}" में max_output_bytes अमान्य है। {detail}',
|
|
685
688
|
"run.error.maxOutputBytesDetail": "Output limit अनुमत maximum के अंदर रहनी चाहिए।",
|
|
686
689
|
"run.error.conflictingPreviewModes": "--dry-run या --plan-only में से एक इस्तेमाल करें, दोनों नहीं",
|
package/dist/cli/i18n/ko.js
CHANGED
|
@@ -671,6 +671,7 @@ export const koMessages = {
|
|
|
671
671
|
"run.error.lifecycleNotOneshot": '거부됨: 명령 "{intent}"의 수명 주기(lifecycle)는 "{lifecycle}"입니다. mf run은 일회성(oneshot) 명령만 실행합니다',
|
|
672
672
|
"run.error.runPolicy": '명령 "{intent}"는 mf run에서 실행하려면 run_policy = "agent_allowed"가 필요합니다',
|
|
673
673
|
"run.error.stdin": '명령 "{intent}"는 stdin = "closed"를 설정해야 합니다',
|
|
674
|
+
"run.error.agentShellRequiresAllow": '명령 "{intent}"는 mode = "shell"일 때 allow_shell = true를 설정해야 합니다',
|
|
674
675
|
"run.error.timeout": '명령 "{intent}"는 timeout_seconds를 정의해야 합니다',
|
|
675
676
|
"run.error.commandSource": '명령 "{intent}"는 argv를 정의하거나 mode = "shell"과 cmd를 함께 정의해야 합니다',
|
|
676
677
|
"run.error.unsafeIntent": '명령 의도 "{intent}"의 이름이 안전하지 않습니다. {detail}',
|
|
@@ -681,6 +682,8 @@ export const koMessages = {
|
|
|
681
682
|
"run.error.blockedLongRunningCommandDetail": "argv는 개발 서버, 감시 명령, 셸 래퍼, 인터프리터 반복 작업, 백그라운드 프로세스가 아니라 끝나는 단발성 명령이어야 합니다.",
|
|
682
683
|
"run.error.cwdOutsideProject": '명령 "{intent}"의 실행 위치(cwd)가 올바르지 않습니다: {detail}',
|
|
683
684
|
"run.error.cwdOutsideProjectDetail": "명령 실행 위치(cwd)는 현재 루트 안에 있어야 합니다.",
|
|
685
|
+
"run.error.invalidTestTarget": '명령 "{intent}"에 올바르지 않은 테스트 대상이 전달되었습니다. {detail}',
|
|
686
|
+
"run.error.invalidTestTargetDetail": "테스트 대상은 상대 파일 경로여야 하며 '-'로 시작할 수 없습니다.",
|
|
684
687
|
"run.error.maxOutputBytes": '명령 "{intent}"의 max_output_bytes 값이 올바르지 않습니다. {detail}',
|
|
685
688
|
"run.error.maxOutputBytesDetail": "출력 상한은 허용된 최댓값 안에 있어야 합니다.",
|
|
686
689
|
"run.error.conflictingPreviewModes": "--dry-run과 --plan-only 중 하나만 사용하세요",
|
package/dist/cli/i18n/zh.js
CHANGED
|
@@ -671,6 +671,7 @@ export const zhMessages = {
|
|
|
671
671
|
"run.error.lifecycleNotOneshot": '已拒绝:命令 "{intent}" 的 lifecycle = "{lifecycle}";mf run 只执行 oneshot 命令',
|
|
672
672
|
"run.error.runPolicy": '命令 "{intent}" 需要 run_policy = "agent_allowed" 才能通过 mf run 执行',
|
|
673
673
|
"run.error.stdin": '命令 "{intent}" 必须设置 stdin = "closed"',
|
|
674
|
+
"run.error.agentShellRequiresAllow": '当 mode = "shell" 时,命令 "{intent}" 必须设置 allow_shell = true',
|
|
674
675
|
"run.error.timeout": '命令 "{intent}" 必须定义 timeout_seconds',
|
|
675
676
|
"run.error.commandSource": '命令 "{intent}" 必须定义 argv,或定义 mode = "shell" 并提供 cmd',
|
|
676
677
|
"run.error.unsafeIntent": '意图 "{intent}" 的名称不安全。{detail}',
|
|
@@ -681,6 +682,8 @@ export const zhMessages = {
|
|
|
681
682
|
"run.error.blockedLongRunningCommandDetail": "argv 必须描述会结束的单次命令,而不是开发服务器、监听命令、shell 包装器、解释器循环或后台进程。",
|
|
682
683
|
"run.error.cwdOutsideProject": '命令 "{intent}" 的 cwd 无效:{detail}',
|
|
683
684
|
"run.error.cwdOutsideProjectDetail": "意图 cwd 必须位于当前根目录内。",
|
|
685
|
+
"run.error.invalidTestTarget": '命令 "{intent}" 收到无效测试目标。{detail}',
|
|
686
|
+
"run.error.invalidTestTargetDetail": "测试目标必须是相对文件路径,且不能以 '-' 开头。",
|
|
684
687
|
"run.error.maxOutputBytes": '命令 "{intent}" 的 max_output_bytes 无效。{detail}',
|
|
685
688
|
"run.error.maxOutputBytesDetail": "输出限制必须保持在允许的最大值内。",
|
|
686
689
|
"run.error.conflictingPreviewModes": "只能使用 --dry-run 或 --plan-only,不能同时使用",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { spawnSync } from 'node:child_process';
|
|
2
2
|
import { parseGitStatusOutput } from '../../core/change-classification.js';
|
|
3
|
+
import { createCommandEnv } from '../../core/command-env.js';
|
|
3
4
|
const GIT_STATUS_TIMEOUT_MS = 10_000;
|
|
4
5
|
const GIT_STATUS_MAX_BUFFER_BYTES = 16 * 1024 * 1024;
|
|
5
6
|
export class GitChangedFilesError extends Error {
|
|
@@ -14,6 +15,7 @@ export function readGitChangedFiles(projectRoot) {
|
|
|
14
15
|
const result = spawnSync('git', ['status', '--porcelain=v1', '-z', '--untracked-files=all'], {
|
|
15
16
|
cwd: projectRoot,
|
|
16
17
|
encoding: 'utf8',
|
|
18
|
+
env: createCommandEnv(projectRoot, { policy: 'minimal', allowlist: [] }),
|
|
17
19
|
input: '',
|
|
18
20
|
maxBuffer: GIT_STATUS_MAX_BUFFER_BYTES,
|
|
19
21
|
stdio: ['ignore', 'pipe', 'pipe'],
|
package/dist/cli/lib/run-plan.js
CHANGED
|
@@ -7,6 +7,7 @@ import { inspectActiveRunLocks, } from '../../core/active-run-locks.js';
|
|
|
7
7
|
import { isRecord, readPositiveInteger, readString, readStringArray, } from '../../core/config-loading.js';
|
|
8
8
|
import { DEFAULT_COMMAND_MAX_OUTPUT_BYTES, COMMAND_OUTPUT_LIMIT_SCOPE, MAX_COMMAND_OUTPUT_BYTES, commandMaxOutputBytesLimitMessage, } from '../../core/command-output-limits.js';
|
|
9
9
|
import { normalizeSuccessExitCodes } from '../../core/success-exit-codes.js';
|
|
10
|
+
import { normalizeSafeTestTargetPath, TEST_TARGET_PATH_ERROR } from '../../core/test-target-paths.js';
|
|
10
11
|
import { evaluateCommandPreconditions, } from '../../core/command-preconditions.js';
|
|
11
12
|
import { t } from './i18n.js';
|
|
12
13
|
function getSuccessExitCodes(intent) {
|
|
@@ -28,12 +29,18 @@ function getRelativeProjectPath(projectRoot, targetPath) {
|
|
|
28
29
|
return relativePath.length > 0 ? toPosixPath(relativePath) : '.';
|
|
29
30
|
}
|
|
30
31
|
function normalizeTestTargets(values) {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
const normalizedValues = [];
|
|
33
|
+
for (const value of values ?? []) {
|
|
34
|
+
const normalized = normalizeSafeTestTargetPath(value);
|
|
35
|
+
if (normalized === null) {
|
|
36
|
+
return { ok: false, detail: `Test target ${JSON.stringify(value)} is invalid: ${TEST_TARGET_PATH_ERROR}.` };
|
|
37
|
+
}
|
|
38
|
+
normalizedValues.push(normalized);
|
|
39
|
+
}
|
|
40
|
+
return {
|
|
41
|
+
ok: true,
|
|
42
|
+
values: [...new Set(normalizedValues)].sort((left, right) => left.localeCompare(right)),
|
|
43
|
+
};
|
|
37
44
|
}
|
|
38
45
|
function commandAcceptsTestTargets(intent) {
|
|
39
46
|
return isRecord(intent.selection) && intent.selection.accepts_test_targets === true;
|
|
@@ -190,7 +197,13 @@ export function createRunPlan(projectRoot, contract, intentName, options = {}) {
|
|
|
190
197
|
catch (error) {
|
|
191
198
|
return createBlockedRunPlan(contract, intentName, rawIntent, eligibility, 'cwd_outside_project', error instanceof Error ? error.message : String(error), preconditions);
|
|
192
199
|
}
|
|
193
|
-
const
|
|
200
|
+
const normalizedTestTargets = commandAcceptsTestTargets(rawIntent) ?
|
|
201
|
+
normalizeTestTargets(options.testTargets) :
|
|
202
|
+
{ ok: true, values: [] };
|
|
203
|
+
if (!normalizedTestTargets.ok) {
|
|
204
|
+
return createBlockedRunPlan(contract, intentName, rawIntent, eligibility, 'invalid_test_target', normalizedTestTargets.detail, preconditions);
|
|
205
|
+
}
|
|
206
|
+
const testTargets = normalizedTestTargets.values;
|
|
194
207
|
const commandArgv = metadata.commandArgv && testTargets.length > 0 ? [...metadata.commandArgv, ...testTargets] : metadata.commandArgv;
|
|
195
208
|
if (!metadata.timeoutSeconds || !metadata.mode) {
|
|
196
209
|
return createBlockedRunPlan(contract, intentName, rawIntent, eligibility, !metadata.timeoutSeconds ? 'missing_timeout' : 'missing_command_source', !metadata.timeoutSeconds ? 'Intent timeout_seconds is missing or invalid.' : 'Intent does not define argv or shell cmd.', preconditions);
|
|
@@ -1,8 +1,39 @@
|
|
|
1
|
-
import { MANIFEST_LOCK_RELATIVE_PATH,
|
|
1
|
+
import { MANIFEST_LOCK_RELATIVE_PATH, inspectManifestLock } from './manifest-lock.js';
|
|
2
2
|
export const ALLOW_UNTRUSTED_ROOT_OPTION = '--allow-untrusted-root';
|
|
3
|
+
const REQUIRED_RUN_TRUST_LOCK_PATHS = [
|
|
4
|
+
'AGENTS.md',
|
|
5
|
+
'.mustflow/config/commands.toml',
|
|
6
|
+
];
|
|
3
7
|
export function assessRunRootTrust(projectRoot) {
|
|
4
|
-
const
|
|
8
|
+
const inspection = inspectManifestLock(projectRoot);
|
|
9
|
+
const { readResult } = inspection;
|
|
5
10
|
if (readResult.kind === 'present') {
|
|
11
|
+
if (readResult.lock.files.length === 0) {
|
|
12
|
+
return {
|
|
13
|
+
trusted: false,
|
|
14
|
+
reason: 'manifest_lock_invalid',
|
|
15
|
+
manifestLockPath: readResult.lockPath,
|
|
16
|
+
detail: 'Manifest lock must track at least one file.',
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
const trackedPaths = new Set(readResult.lock.files.map((file) => file.relativePath));
|
|
20
|
+
const missingRequiredPath = REQUIRED_RUN_TRUST_LOCK_PATHS.find((relativePath) => !trackedPaths.has(relativePath));
|
|
21
|
+
if (missingRequiredPath) {
|
|
22
|
+
return {
|
|
23
|
+
trusted: false,
|
|
24
|
+
reason: 'manifest_lock_invalid',
|
|
25
|
+
manifestLockPath: readResult.lockPath,
|
|
26
|
+
detail: `Manifest lock must track ${missingRequiredPath}.`,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
if (inspection.issues.length > 0) {
|
|
30
|
+
return {
|
|
31
|
+
trusted: false,
|
|
32
|
+
reason: 'manifest_lock_invalid',
|
|
33
|
+
manifestLockPath: readResult.lockPath,
|
|
34
|
+
detail: inspection.issues[0] ?? 'Manifest lock does not match the current workflow files.',
|
|
35
|
+
};
|
|
36
|
+
}
|
|
6
37
|
return {
|
|
7
38
|
trusted: true,
|
|
8
39
|
reason: 'manifest_lock_present',
|
|
@@ -2,9 +2,19 @@ import { existsSync } from 'node:fs';
|
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { isRecord } from '../command-contract.js';
|
|
4
4
|
import { readMustflowTomlFile } from '../toml.js';
|
|
5
|
+
import { normalizeSafeTestTargetPath, TEST_TARGET_PATH_ERROR } from '../../../core/test-target-paths.js';
|
|
5
6
|
import { ALLOWED_TEST_SELECTION_RISKS, FORBIDDEN_TEST_SELECTION_COMMAND_AUTHORITY_FIELDS, TEST_SELECTION_CONFIG_PATH, } from './constants.js';
|
|
6
7
|
import { isConfiguredCommandIntent, isDeclaredCommandIntent } from './command-intents.js';
|
|
7
8
|
import { hasOwn, pushStrictIssue, validateAllowedStringField, validateNestedTable, validatePathArrayField, validateRequiredStringField, validateStringArrayField, } from './primitives.js';
|
|
9
|
+
function validateTestTargetPathArrayField(table, key, label, issues) {
|
|
10
|
+
if (!hasOwn(table, key)) {
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
const value = table[key];
|
|
14
|
+
if (!Array.isArray(value) || value.length === 0 || !value.every((entry) => normalizeSafeTestTargetPath(entry) !== null)) {
|
|
15
|
+
issues.push({ message: `${label} ${TEST_TARGET_PATH_ERROR}` });
|
|
16
|
+
}
|
|
17
|
+
}
|
|
8
18
|
function validateNoTestSelectionCommandAuthorityFields(label, table, issues) {
|
|
9
19
|
for (const field of FORBIDDEN_TEST_SELECTION_COMMAND_AUTHORITY_FIELDS) {
|
|
10
20
|
if (hasOwn(table, field)) {
|
|
@@ -59,7 +69,7 @@ function validateTestSelectionRule(rule, index, commandsToml, issues) {
|
|
|
59
69
|
validateNoTestSelectionCommandAuthorityFields(`${label}.select`, select, issues);
|
|
60
70
|
validateTestSelectionIntentReference(select.intent, `${label}.select.intent`, commandsToml, issues);
|
|
61
71
|
validateTestSelectionIntentReference(select.fallback_intent, `${label}.select.fallback_intent`, commandsToml, issues);
|
|
62
|
-
|
|
72
|
+
validateTestTargetPathArrayField(select, 'test_targets', `${TEST_SELECTION_CONFIG_PATH} ${label}.select.test_targets`, issues);
|
|
63
73
|
}
|
|
64
74
|
}
|
|
65
75
|
export function validateStrictTestSelectionConfig(projectRoot, commandsToml, issues) {
|
|
@@ -76,6 +76,13 @@ export function evaluateCommandIntentEligibility(intentName, rawIntent) {
|
|
|
76
76
|
detail: blockedPattern.detail,
|
|
77
77
|
};
|
|
78
78
|
}
|
|
79
|
+
if (rawIntent.mode === 'shell' && rawIntent.allow_shell !== true) {
|
|
80
|
+
return {
|
|
81
|
+
ok: false,
|
|
82
|
+
code: 'agent_shell_requires_allow',
|
|
83
|
+
detail: `Agent-runnable shell intent ${intentName} must set allow_shell = true.`,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
79
86
|
return {
|
|
80
87
|
ok: true,
|
|
81
88
|
code: 'ok',
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { spawnSync } from 'node:child_process';
|
|
2
2
|
import { existsSync } from 'node:fs';
|
|
3
3
|
import path from 'node:path';
|
|
4
|
+
import { createCommandEnv } from './command-env.js';
|
|
4
5
|
import { readFileInsideWithoutSymlinks, writeFileInsideWithoutSymlinks } from './safe-filesystem.js';
|
|
5
6
|
const GITATTRIBUTES_PATH = '.gitattributes';
|
|
6
7
|
function toPosixPath(value) {
|
|
@@ -18,6 +19,7 @@ function gitList(projectRoot, args) {
|
|
|
18
19
|
const result = spawnSync('git', [...args, '-z'], {
|
|
19
20
|
cwd: projectRoot,
|
|
20
21
|
encoding: 'buffer',
|
|
22
|
+
env: createCommandEnv(projectRoot, { policy: 'minimal', allowlist: [] }),
|
|
21
23
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
22
24
|
windowsHide: true,
|
|
23
25
|
});
|
|
@@ -75,8 +75,8 @@ function collectSnapshotEntries(projectRoot, currentPath, entries) {
|
|
|
75
75
|
entries.set(relativePath, signatureForPath(fullPath));
|
|
76
76
|
}
|
|
77
77
|
}
|
|
78
|
-
function captureSnapshot(projectRoot) {
|
|
79
|
-
const gitSnapshot = captureGitStatusSnapshot(projectRoot);
|
|
78
|
+
function captureSnapshot(projectRoot, env) {
|
|
79
|
+
const gitSnapshot = captureGitStatusSnapshot(projectRoot, env);
|
|
80
80
|
if (gitSnapshot) {
|
|
81
81
|
return gitSnapshot;
|
|
82
82
|
}
|
|
@@ -102,9 +102,10 @@ function captureSnapshot(projectRoot) {
|
|
|
102
102
|
};
|
|
103
103
|
}
|
|
104
104
|
}
|
|
105
|
-
function captureGitStatusSnapshot(projectRoot) {
|
|
105
|
+
function captureGitStatusSnapshot(projectRoot, env) {
|
|
106
106
|
const result = spawnSync('git', ['-C', projectRoot, 'status', '--porcelain=v1', '-z', `--untracked-files=${GIT_STATUS_UNTRACKED_MODE}`], {
|
|
107
107
|
encoding: 'utf8',
|
|
108
|
+
env,
|
|
108
109
|
input: '',
|
|
109
110
|
maxBuffer: GIT_STATUS_MAX_BUFFER_BYTES,
|
|
110
111
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
@@ -206,21 +207,23 @@ function createUnavailableWriteDriftReceipt(declaredPaths, reason) {
|
|
|
206
207
|
reason,
|
|
207
208
|
};
|
|
208
209
|
}
|
|
209
|
-
export function startRunWriteTracking(projectRoot, contract, intentName, options
|
|
210
|
+
export function startRunWriteTracking(projectRoot, contract, intentName, options) {
|
|
210
211
|
const declaredPaths = [
|
|
211
212
|
...listDeclaredWritePaths(projectRoot, contract, intentName),
|
|
212
213
|
...(options.additionalDeclaredPaths ?? []).map(normalizeRelativePath),
|
|
213
214
|
];
|
|
214
215
|
return {
|
|
215
216
|
projectRoot,
|
|
217
|
+
env: options.env,
|
|
216
218
|
declaredPaths: [...new Set(declaredPaths)].sort((left, right) => left.localeCompare(right)),
|
|
217
|
-
before: captureSnapshot(projectRoot),
|
|
219
|
+
before: captureSnapshot(projectRoot, options.env),
|
|
218
220
|
};
|
|
219
221
|
}
|
|
220
|
-
export function startRunWriteBatchTracking(projectRoot) {
|
|
222
|
+
export function startRunWriteBatchTracking(projectRoot, env) {
|
|
221
223
|
return {
|
|
222
224
|
projectRoot,
|
|
223
|
-
|
|
225
|
+
env,
|
|
226
|
+
before: captureSnapshot(projectRoot, env),
|
|
224
227
|
};
|
|
225
228
|
}
|
|
226
229
|
export function finishRunWriteBatchTracking(tracker, intents) {
|
|
@@ -232,7 +235,7 @@ export function finishRunWriteBatchTracking(tracker, intents) {
|
|
|
232
235
|
if (tracker.before.status === 'unavailable') {
|
|
233
236
|
return fallbackReceipts;
|
|
234
237
|
}
|
|
235
|
-
const after = captureSnapshot(tracker.projectRoot);
|
|
238
|
+
const after = captureSnapshot(tracker.projectRoot, tracker.env);
|
|
236
239
|
if (after.status === 'unavailable') {
|
|
237
240
|
return new Map(intents.map((intent) => [
|
|
238
241
|
intent.intentName,
|
|
@@ -309,7 +312,7 @@ export function finishRunWriteTracking(tracker) {
|
|
|
309
312
|
if (tracker.before.status === 'unavailable') {
|
|
310
313
|
return createUnavailableWriteDriftReceipt(tracker.declaredPaths, tracker.before.reason);
|
|
311
314
|
}
|
|
312
|
-
const after = captureSnapshot(tracker.projectRoot);
|
|
315
|
+
const after = captureSnapshot(tracker.projectRoot, tracker.env);
|
|
313
316
|
if (after.status === 'unavailable') {
|
|
314
317
|
return createUnavailableWriteDriftReceipt(tracker.declaredPaths, after.reason);
|
|
315
318
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { existsSync } from 'node:fs';
|
|
2
2
|
import { isRecord, readMustflowOwnedTomlFile, readStringArray, resolveMustflowConfigPath, } from './config-loading.js';
|
|
3
3
|
import { classifyVerificationCandidate, } from './verification-plan.js';
|
|
4
|
+
import { normalizeSafeTestTargetPath } from './test-target-paths.js';
|
|
4
5
|
export const TEST_SELECTION_CONFIG_RELATIVE_PATH = '.mustflow/config/test-selection.toml';
|
|
5
6
|
const STALE_OR_MISSING_RULES_NOTE = 'Project-declared test selection rules did not cover the current changed files; review .mustflow/config/test-selection.toml for stale or missing rules.';
|
|
6
7
|
function uniqueSorted(values) {
|
|
@@ -47,8 +48,18 @@ function readRule(value) {
|
|
|
47
48
|
const surfaces = readStringArray(value.match, 'surfaces');
|
|
48
49
|
const intent = readStringField(value.select, 'intent');
|
|
49
50
|
const fallbackIntent = readStringField(value.select, 'fallback_intent');
|
|
50
|
-
const
|
|
51
|
-
|
|
51
|
+
const rawTestTargets = readStringArray(value.select, 'test_targets') ?? [];
|
|
52
|
+
const testTargets = rawTestTargets.map((target) => normalizeSafeTestTargetPath(target));
|
|
53
|
+
if (!id ||
|
|
54
|
+
!risk ||
|
|
55
|
+
!reason ||
|
|
56
|
+
!paths ||
|
|
57
|
+
paths.length === 0 ||
|
|
58
|
+
!surfaces ||
|
|
59
|
+
surfaces.length === 0 ||
|
|
60
|
+
!intent ||
|
|
61
|
+
!fallbackIntent ||
|
|
62
|
+
!testTargets.every((target) => target !== null)) {
|
|
52
63
|
return null;
|
|
53
64
|
}
|
|
54
65
|
return {
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
export const TEST_TARGET_PATH_ERROR = "entries must be non-empty relative paths that do not start with '-'";
|
|
3
|
+
export function normalizeSafeTestTargetPath(value) {
|
|
4
|
+
if (typeof value !== 'string') {
|
|
5
|
+
return null;
|
|
6
|
+
}
|
|
7
|
+
const raw = value.trim();
|
|
8
|
+
const normalized = raw.replace(/\\/g, '/');
|
|
9
|
+
if (normalized.length === 0 ||
|
|
10
|
+
normalized.startsWith('-') ||
|
|
11
|
+
path.posix.isAbsolute(normalized) ||
|
|
12
|
+
path.win32.isAbsolute(raw)) {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
const segments = normalized.split('/');
|
|
16
|
+
return segments.every((segment) => segment.length > 0 && segment !== '.' && segment !== '..') ? normalized : null;
|
|
17
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { spawnSync } from 'node:child_process';
|
|
2
2
|
import { existsSync, readFileSync } from 'node:fs';
|
|
3
3
|
import path from 'node:path';
|
|
4
|
+
import { createCommandEnv } from './command-env.js';
|
|
4
5
|
const TEST_CHANGE_KINDS = new Set(['test', 'test_fixture']);
|
|
5
6
|
const SKIP_OR_ONLY_MARKER = /\b(?:describe|it|test)\s*\.\s*(?:skip|only)\s*\(/u;
|
|
6
7
|
const TODO_OR_PENDING_MARKER = /\b(?:describe|it|test)\s*\.\s*(?:todo|pending)\s*\(/u;
|
|
@@ -52,6 +53,7 @@ function gitDiffLines(projectRoot, relativePath) {
|
|
|
52
53
|
const result = spawnSync('git', ['diff', '--no-ext-diff', '--unified=0', '--', relativePath], {
|
|
53
54
|
cwd: projectRoot,
|
|
54
55
|
encoding: 'utf8',
|
|
56
|
+
env: createCommandEnv(projectRoot, { policy: 'minimal', allowlist: [] }),
|
|
55
57
|
windowsHide: true,
|
|
56
58
|
});
|
|
57
59
|
if (result.status !== 0 || typeof result.stdout !== 'string' || result.stdout.length === 0) {
|
package/package.json
CHANGED
|
@@ -325,13 +325,13 @@ translations = {}
|
|
|
325
325
|
[documents."skill.security-privacy-review"]
|
|
326
326
|
source = "locales/en/.mustflow/skills/security-privacy-review/SKILL.md"
|
|
327
327
|
source_locale = "en"
|
|
328
|
-
revision =
|
|
328
|
+
revision = 19
|
|
329
329
|
translations = {}
|
|
330
330
|
|
|
331
331
|
[documents."skill.security-regression-tests"]
|
|
332
332
|
source = "locales/en/.mustflow/skills/security-regression-tests/SKILL.md"
|
|
333
333
|
source_locale = "en"
|
|
334
|
-
revision =
|
|
334
|
+
revision = 11
|
|
335
335
|
translations = {}
|
|
336
336
|
|
|
337
337
|
[documents."skill.search-ad-content-authoring"]
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
mustflow_doc: skill.security-privacy-review
|
|
3
3
|
locale: en
|
|
4
4
|
canonical: true
|
|
5
|
-
revision:
|
|
5
|
+
revision: 19
|
|
6
6
|
lifecycle: mustflow-owned
|
|
7
7
|
authority: procedure
|
|
8
8
|
name: security-privacy-review
|
|
@@ -48,6 +48,7 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
|
|
|
48
48
|
- A change uses cache, Redis, generated state, search documents, or read models to make authorization, ownership, subscription, entitlement, payment, inventory, or admin decisions.
|
|
49
49
|
- A change adds external URL fetching, webhook callbacks, redirects, browser previews, remote downloads, database-as-a-service rules, security headers, CORS, CSRF handling, or rate limits.
|
|
50
50
|
- A change stores webhook payloads, external API requests or responses, retry errors, dead-letter jobs, AI prompts or outputs, email bodies, or provider diagnostic data.
|
|
51
|
+
- A change adds or modifies public intake surfaces such as action handlers, webhook handlers, callback receivers, job enqueue endpoints, idempotency stores, replay APIs, or default verifier, authenticator, authorizer, normalizer, or deduplication collaborators.
|
|
51
52
|
- A change records AI usage, model pricing, token counts, cache keys, feature metadata, prompt hashes, provider call metadata, retry cost, or failed AI calls that could include confidential content or identify users.
|
|
52
53
|
- A change records AI budgets, feature policies, policy decisions, blocked reasons, model downgrades, agent steps, tool calls, provider budget status, or emergency disables that could reveal customer behavior, sensitive feature use, or regulated processing.
|
|
53
54
|
- A change touches cookies, JWTs, reset tokens, invite tokens, OAuth callbacks, file upload or download, browser storage, business rules, pricing, entitlements, database queries, ORM bulk operations, or deployment configuration.
|
|
@@ -56,6 +57,7 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
|
|
|
56
57
|
- A change touches cryptography, password hashing, token generation, random number generation, TLS/HTTPS, certificate validation, scanner gates, or a security invariant that could drift across architecture boundaries.
|
|
57
58
|
- A change adds, imports, recommends, or installs third-party dependencies that may affect the software supply chain.
|
|
58
59
|
- A change introduces or edits agent configuration, MCP/tool configuration, prompt files, model instructions, or repository-local rule files.
|
|
60
|
+
- A change adds or modifies policy engines, architecture linters, rule catalogs, validators, generated compliance reports, or governance gates that can approve sensitive data, payment, API, AI, tier, or deployment boundaries.
|
|
59
61
|
- A change affects CI/CD workflow permissions, fork pull-request handling, build scripts, package lifecycle scripts, deployment secrets, container users, storage buckets, debug flags, or public admin, metrics, GraphQL, cache, or search endpoints.
|
|
60
62
|
- Documentation, templates, examples, tests, or final reports mention sensitive data handling, privacy behavior, secret handling, or user-identifying data.
|
|
61
63
|
- A diff could expose data through filenames, paths, command output, screenshots, generated artifacts, package contents, or public docs.
|
|
@@ -82,6 +84,7 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
|
|
|
82
84
|
- Cookie, JWT, OAuth, file upload, file download, business-value, database mutation, ORM bulk operation, CI/CD permission, deployment setting, or secret-source surface involved.
|
|
83
85
|
- Cryptographic primitive, password hashing, random-token, secure transport, certificate validation, scanner gate, or security invariant involved.
|
|
84
86
|
- Existing project rules for secrets, privacy, generated state, public docs, package contents, and command output.
|
|
87
|
+
- Policy or rule-catalog source of truth, trusted metadata source, fallback behavior when a rule file is missing, and any untrusted repository-local fields that might be treated as ownership, tier, role, or exemption evidence.
|
|
85
88
|
- Admin operation list, role or capability model, audit-log fields, cache visibility policy, and cache invalidation surface when those are involved.
|
|
86
89
|
- Behavior analytics event names, event versions, actor identifiers, anonymous identifiers, properties, retention period, deletion or anonymization policy, and whether event writes can be delayed or lost.
|
|
87
90
|
- Core event ownership, including which signup, login failure, account recovery, payment, refund, subscription, entitlement, permission, file, search, admin, webhook, queue, and security events must remain internally stored instead of only in a SaaS dashboard.
|
|
@@ -91,6 +94,8 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
|
|
|
91
94
|
- Data classification policy when available, including sensitive personal data, ordinary personal data, product usage data, public content, AI prompts or outputs, and which classes may enter logs, analytics, support tools, AI providers, or cross-region backups.
|
|
92
95
|
- Runtime and dependency patch policy, including supported or LTS version requirement, end-of-life ban, lockfile expectation, vulnerability scan source, patch response target, smoke-test surface, canary or rollback route, and whether experimental runtime choices are kept off survival paths.
|
|
93
96
|
- Webhook and external-call record policy, including signature verification, processed-event deduplication, safe request hashes, redacted provider responses, unknown-result reconciliation, dead-letter retention, and whether raw payloads are needed or should be replaced by bounded metadata.
|
|
97
|
+
- Public intake default policy, including whether verifiers, authenticators, authorizers, deduplication stores, idempotency stores, and normalizers are required by registration, explicit opt-in, or silently replaced by permissive defaults.
|
|
98
|
+
- Attacker-controlled key and header limits for idempotency keys, webhook event ids, provider names, action names, replay ids, dedupe keys, request ids, and any in-memory map or queue keyed by public request data.
|
|
94
99
|
- AI record policy, including prompt and output retention, cache-key hashing, provider request id handling, feature-key properties, pricing snapshots, token usage, failed-call errors, user or account identifiers, and whether raw prompts or generated text are omitted, redacted, encrypted, or retained under a narrow rule.
|
|
95
100
|
- AI budget and gateway policy, including whether provider budgets are hard stops or only alerts, whether product-owned hard limits exist, which identifiers are recorded for user, organization, feature, model, request, provider call, policy decision, and whether blocked or downgraded decisions are logged without exposing prompt text.
|
|
96
101
|
- Cache authority boundary, including which data is final source of truth and which values are disposable, stale, private, or shared.
|
|
@@ -154,6 +159,7 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
|
|
|
154
159
|
20. For cache purge, search reindex, ranking refresh, and generated-state rebuild endpoints, treat them as privileged state-changing operations with authorization, rate limiting, audit logs, idempotency, and bounded target selection.
|
|
155
160
|
21. For external URL, webhook, preview, redirect, download, or callback behavior, check allowlists, protocol restrictions, redirect handling, DNS/IP re-resolution, private network ranges, link-local metadata endpoints, webhook signatures, timeout limits, retry limits, and open redirect parameters such as `next` or `redirect`.
|
|
156
161
|
- For webhooks, verify the signature against the raw body before trusting parsed data. Store only the raw body reference or bounded raw payload when replay, verification, or support needs justify it.
|
|
162
|
+
- Do not silently install allow-all, unsigned, no-op, nil, null-object, or test-only verifiers for public webhook or callback endpoints. Missing verifier, authenticator, or authorization configuration should fail registration unless the caller explicitly selects a clearly named unsafe or local-only mode.
|
|
157
163
|
- Store processed event identifiers to avoid duplicate effects. Keep provider event payloads, request bodies, and response bodies out of ordinary logs and dead-letter records unless they are redacted and have a retention rule.
|
|
158
164
|
22. For database-as-a-service, storage bucket, or realtime rules, check that server-side policies are default-deny, ownership-scoped, and not left in public read/write development mode.
|
|
159
165
|
23. For input sinks, check parameterized queries, ORM binding, static command maps, output encoding, HTML/Markdown rendering boundaries, unsafe dynamic evaluation, XML/YAML/Markdown parser options, redirect and sort parameters, page-size limits, and framework escape hatches.
|
|
@@ -163,6 +169,8 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
|
|
|
163
169
|
- For direct-to-object-storage uploads, authorize the target resource before issuing the signed upload URL, confirm upload completion before making the asset usable, and keep pending, uploaded, processing, ready, failed, and deleted states separate.
|
|
164
170
|
- Inspect actual file bytes instead of trusting extension or `Content-Type`. Re-encode images and strip metadata when practical before serving user uploads.
|
|
165
171
|
25. For business logic, check that server code does not trust client-supplied prices, discounts, roles, owners, entitlement state, plan limits, usage counters, inventory, seats, refunds, credits, or coupon state. Inspect idempotency, transactions, uniqueness, and concurrent requests for repeated side effects.
|
|
172
|
+
- For public action or intake endpoints, validate cheap request shape and attacker-controlled idempotency keys before permanently claiming the key. If a request is rejected before the trusted side effect starts, release or avoid storing the key so malformed traffic cannot poison future valid retries.
|
|
173
|
+
- Bound default in-memory idempotency, deduplication, replay, rate-limit, and request-tracking stores by key length, entry count, TTL, eviction, or a durable backend contract. A process-memory map keyed by unbounded public headers or event ids is an availability boundary, not just an implementation detail.
|
|
166
174
|
26. For API responses, check that the response contains only fields the caller may see and needs for the use case. Do not expose password hashes, internal storage keys, permanent private URLs, raw billing provider ids, internal moderation notes, private IP data, privileged flags, or database columns merely because they are present on the model.
|
|
167
175
|
27. For dependency failure policy, distinguish fail-closed from degraded behavior. Authentication, authorization, payment, entitlement, and destructive admin decisions should usually fail closed; analytics, recommendation, statistics, AI summaries, and email should usually avoid exposing private data or blocking core state changes.
|
|
168
176
|
- For dead-letter queues, retry logs, and external API call records, check that errors contain safe codes and bounded metadata rather than full prompts, email bodies, payment details, tokens, private files, or personal data.
|
|
@@ -184,12 +192,19 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
|
|
|
184
192
|
38. For runtime and framework security updates, check that supported versions are documented, end-of-life versions are rejected, dependency locks exist where appropriate, security patches can be tested and deployed quickly, and rollback or redeploy can happen without manual dashboard memory. Do not treat a fashionable or high-performance runtime as safe unless the patch path is operationally credible.
|
|
185
193
|
39. For transport security, check HTTPS/TLS requirements, certificate validation, insecure HTTP downgrade paths, disabled verification flags, and whether sensitive traffic can bypass the secure channel.
|
|
186
194
|
40. For cryptography, reject custom cryptography and tutorial-grade shortcuts. Check password hashing uses a password-hashing primitive such as bcrypt, scrypt, or Argon2id where supported by the project; random tokens use secure randomness; keys are separated from encrypted data; and weak hashes such as MD5, SHA-1, or bare SHA-256 are not used for password storage.
|
|
187
|
-
41. For
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
195
|
+
41. For policy engines, architecture linters, compliance validators, and generated governance gates, identify the canonical policy source and the canonical object identity before trusting a pass result.
|
|
196
|
+
- Do not let repository-controlled advisory fields, nested duplicates, labels, components, owners, stages, tiers, or exemption fields override a trusted catalog, server-derived identity, or central registration.
|
|
197
|
+
- When two fields can describe the same security decision, such as top-level and nested owner values, validate their consistency or choose the canonical source explicitly instead of reading the first convenient path.
|
|
198
|
+
- Treat missing, wrong, or fallback rule catalogs as fail-closed or explicitly degraded; a misplaced rule file should not silently disable validation for public API, payment, AI, tier, deployment, or data-boundary controls.
|
|
199
|
+
- Required security-control declarations should validate meaningful values, not merely non-null presence. Reject `false`, `0`, empty objects, empty arrays, empty strings, or type-mismatched placeholders unless the policy specifically allows that value.
|
|
200
|
+
- Derive deny decisions from metadata classes when possible instead of only from static name denylists that can miss newly introduced repositories, services, tenants, roles, or providers.
|
|
201
|
+
42. For read-only commands that inspect repositories, remember that the underlying tool can still execute configured helpers. Disable or neutralize repository-local hooks, fsmonitor helpers, credential helpers, package lifecycle hooks, and executable lookup through untrusted PATH when the command is meant to be safe inspection.
|
|
202
|
+
43. For architecture drift, name the security invariant before accepting the generated structure. Confirm the invariant still holds across UI, handler, service, repository, database policy, workflow, and deployment boundaries.
|
|
203
|
+
44. For SAST, SCA, or scanner output, treat scanner output as evidence rather than command authority. Map the finding to a repository-owned boundary, configured verification intent, dependency metadata, or regression test before claiming the issue is fixed.
|
|
204
|
+
45. Verify that examples, fixtures, screenshots, command outputs, and final reports do not expose real-looking secrets or unnecessary personal data.
|
|
205
|
+
46. Prefer omission or minimal metadata over masking when the sensitive value is not needed for the user to understand the result.
|
|
206
|
+
47. If the change affects an authorization, SSRF, CSRF, rate-limit, upload, download, token, business-logic, injection, logging, telemetry, cache authority, cache disclosure, admin operation, agent permission, cryptography, transport, scanner, policy-engine, rule-catalog, or abuse boundary, activate `security-regression-tests` for test selection instead of folding test generation into this review.
|
|
207
|
+
48. Run the narrowest configured verification that covers the changed docs, templates, package, or mustflow contract.
|
|
193
208
|
|
|
194
209
|
<!-- mustflow-section: postconditions -->
|
|
195
210
|
## Postconditions
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
mustflow_doc: skill.security-regression-tests
|
|
3
3
|
locale: en
|
|
4
4
|
canonical: true
|
|
5
|
-
revision:
|
|
5
|
+
revision: 11
|
|
6
6
|
lifecycle: mustflow-owned
|
|
7
7
|
authority: procedure
|
|
8
8
|
name: security-regression-tests
|
|
@@ -32,11 +32,13 @@ Convert security-sensitive behavior changes into safe negative tests that preser
|
|
|
32
32
|
|
|
33
33
|
- Authentication, authorization, session, CSRF, rate-limit, admin, payment, credit, subscription, personal-data, or tenant-boundary behavior changes.
|
|
34
34
|
- Input validation, output encoding, file upload, path handling, webhook callback, redirect, or external URL handling changes.
|
|
35
|
+
- Public action, webhook, callback, replay, job enqueue, idempotency, deduplication, or in-memory intake-store behavior changes.
|
|
35
36
|
- Cookie, JWT, OAuth callback, reset token, invite token, logout, reauthentication, file download, upload processing, business-rule, entitlement, pricing, inventory, database query, ORM bulk operation, or deployment-configuration behavior changes.
|
|
36
37
|
- AI-generated or vibe-coded routes, data access, external fetchers, admin screens, or database rules need denied-case coverage beyond a happy-path test.
|
|
37
38
|
- Cryptography, password hashing, secure randomness, HTTPS/TLS, certificate validation, scanner-gate, or security-invariant behavior changes.
|
|
38
39
|
- Command construction, command recommendation, executable resolution, command-contract linting, or copy-to-clipboard command behavior changes.
|
|
39
40
|
- Filesystem containment, symlink handling, package publishing, build pipeline, or release automation behavior changes.
|
|
41
|
+
- Policy engines, architecture linters, rule-catalog loaders, schema validators, generated compliance reports, or governance gates change how security-sensitive data, API, AI, payment, tier, deployment, ownership, or repository boundaries are approved.
|
|
40
42
|
- A bug fix closes an abuse case and the fix needs a regression test to prevent reintroduction.
|
|
41
43
|
- A review identifies a concrete security-sensitive boundary that can be expressed as a deterministic test.
|
|
42
44
|
- A static analysis alert identifies a concrete data flow, permission boundary, command boundary, artifact boundary, or input-handling bug that can be locked with a local test.
|
|
@@ -96,13 +98,18 @@ Convert security-sensitive behavior changes into safe negative tests that preser
|
|
|
96
98
|
- unsafe external URL, callback, redirect, or server-side request target
|
|
97
99
|
- SSRF-style private network, localhost, link-local metadata, redirect, or DNS re-resolution target
|
|
98
100
|
- missing webhook signature validation or unsafe retry behavior for external callbacks
|
|
101
|
+
- insecure verifier, authenticator, authorizer, normalizer, or dedupe default where missing configuration silently becomes allow-all, unsigned, no-op, or test-only behavior on a public endpoint
|
|
99
102
|
- CSRF-style state change that relies on browser credentials without an origin, token, or same-site boundary
|
|
100
103
|
- missing rate limit or lockout on login, signup, token reset, invitation, webhook, or expensive generation endpoints
|
|
101
104
|
- client-supplied price, discount, role, owner, entitlement, plan, inventory, seat, refund, coupon, or usage value trusted by the server
|
|
102
105
|
- ORM mass assignment, unscoped `findMany`, `updateMany`, `deleteMany`, unsafe migration default, or missing database policy enforcement
|
|
103
106
|
- unsafe shell command construction, command name interpolation, clipboard command output, or executable lookup
|
|
104
|
-
-
|
|
107
|
+
- repository inspection command that unexpectedly executes local Git helpers, hooks, credential helpers, package lifecycle hooks, PATH shims, or other repository-controlled executables
|
|
108
|
+
- filesystem escape through symlinks, path traversal, archive entries, Git tree entries, generated state, or package contents
|
|
105
109
|
- mismatch between two validators, linters, dashboards, schemas, or release gates that claim the same policy
|
|
110
|
+
- policy-source mismatch where a validator loads a security rule from the wrong catalog, silently disables a missing rule, or accepts a legacy fallback without an explicit compatibility test
|
|
111
|
+
- untrusted metadata override where a repository-controlled field, nested duplicate, component, owner, stage, tier, role, or exemption value is treated as trusted ownership or authorization evidence
|
|
112
|
+
- invalid-but-present security control values where `false`, `0`, `{}`, `[]`, empty strings, or type-mismatched placeholders satisfy required policy fields
|
|
106
113
|
- release or package-publishing pipeline code execution before artifact publication
|
|
107
114
|
- incomplete escaping, quoting, encoding, or sanitization where the safe behavior can be asserted without invoking a real shell or network target
|
|
108
115
|
- stack trace or internal error exposure through a user-visible API, report, dashboard, or command output
|
|
@@ -116,6 +123,7 @@ Convert security-sensitive behavior changes into safe negative tests that preser
|
|
|
116
123
|
- missing capability or scoped permission object where a sensitive operation depends on broad user, role, or global authorization state
|
|
117
124
|
- missing invariant policy where a sensitive state change could violate a non-negotiable rule such as last-owner, entitlement, paid-order, refund, or retention constraints
|
|
118
125
|
- missing idempotency key, action ledger, or outbox/inbox record where repeated execution of a side effect could charge, refund, notify, grant, revoke, publish, or delete more than once
|
|
126
|
+
- unbounded public request identifiers, idempotency keys, webhook event ids, replay ids, provider names, request ids, or header values retained in process memory without length, entry-count, TTL, eviction, or cleanup on rejected requests
|
|
119
127
|
- exposed debug, admin, metrics, storage, GraphQL, development console, root container user, default credential, or fork pull-request secret path that can be checked locally
|
|
120
128
|
3. Search for existing tests that already cover the same boundary. Strengthen the existing test when that gives clearer coverage than adding a new one.
|
|
121
129
|
4. Build the smallest safe negative test data: at least one allowed control case when useful, and one denied case that proves the boundary rejects the abuse condition.
|
|
@@ -124,24 +132,27 @@ Convert security-sensitive behavior changes into safe negative tests that preser
|
|
|
124
132
|
7. For CSRF and browser-credential state changes, assert that the mutating operation rejects missing or mismatched token, origin, or same-site evidence according to the project framework.
|
|
125
133
|
8. For rate limits and lockouts, use injected time, local stores, or fake counters to prove repeated attempts are bounded without slowing the suite.
|
|
126
134
|
9. For session, JWT, OAuth, reset, invite, logout, or reauthentication boundaries, assert the denied condition directly: invalid signature, expired token, wrong issuer, wrong audience, missing state, revoked token, reused token, or missing recent authentication.
|
|
127
|
-
10. For
|
|
128
|
-
11. For
|
|
129
|
-
12. For
|
|
130
|
-
13. For
|
|
131
|
-
14. For
|
|
132
|
-
15. For
|
|
133
|
-
16. For
|
|
134
|
-
17.
|
|
135
|
-
18.
|
|
136
|
-
19.
|
|
137
|
-
20.
|
|
138
|
-
21.
|
|
139
|
-
22.
|
|
140
|
-
23.
|
|
141
|
-
24.
|
|
142
|
-
25. For
|
|
143
|
-
26. For
|
|
144
|
-
27.
|
|
135
|
+
10. For public webhook, callback, and action-intake defaults, assert missing verifier, authenticator, or authorizer configuration fails registration or setup. If an intentionally unsafe local/test mode exists, require the test to pass that mode explicitly and name it in the fixture.
|
|
136
|
+
11. For public idempotency, deduplication, replay, rate-limit, and request-tracking stores, assert malformed rejected requests do not permanently claim keys, oversized keys are rejected or normalized before retention, and default memory stores have observable bounds such as entry eviction, TTL, or a configured capacity.
|
|
137
|
+
12. For upload and download boundaries, use local fixture files and fake storage. Assert authorization, content signature, MIME, size, filename, path, metadata stripping, and conversion resource-limit behavior without using live user files.
|
|
138
|
+
13. For business-rule boundaries, use server-side fixtures that try manipulated price, discount, role, owner, entitlement, plan, inventory, seat, refund, coupon, or usage fields. Assert that state remains unchanged or is recalculated from trusted server data.
|
|
139
|
+
14. For database and ORM boundaries, assert scoped queries or policies through observable behavior: cross-tenant rows stay invisible, bulk update or delete affects only owned rows, mass-assigned privileged fields are ignored, and unsafe migration defaults cannot create elevated access.
|
|
140
|
+
15. For cryptography and token-generation boundaries, assert behavior through the project-owned API rather than hard-coding private implementation details: password verifiers reject plaintext or fast-hash storage, token generation uses injected secure randomness or a deterministic test double, and custom cryptography shortcuts are absent where the project exposes that decision.
|
|
141
|
+
16. For transport-security boundaries, assert configuration rejects disabled certificate validation or insecure HTTP for sensitive endpoints when the project owns that configuration.
|
|
142
|
+
17. For architecture-drift boundaries, write the test around the security invariant, not the refactor shape: unauthorized access stays denied, sensitive output stays omitted, and side effects remain scoped after the generated structure changes.
|
|
143
|
+
18. For policy-engine or governance-linter boundaries, add denied cases that prove the invariant cannot be bypassed by newly named entities, spoofed duplicate fields, self-declared ownership metadata, missing or misplaced rule files, or invalid-but-present values. Include an allowed control case when it clarifies the intended trusted source.
|
|
144
|
+
19. For parser, validator, serializer, path, command, or workflow boundaries, consider a bounded property-based or fuzz-style regression when the invariant is clearer than a list of hand-written examples. Keep generators local, deterministic under the test runner, size-limited, and focused on the defensive invariant.
|
|
145
|
+
20. When adding a fuzzing or property-based testing dependency, keep dependency metadata, lockfiles, test selection rules, and package tests synchronized. Prefer an existing project dependency when it can express the invariant cleanly.
|
|
146
|
+
21. Use mocks or local fakes for external requests, uploads, redirects, webhooks, payment providers, file systems, shell commands, package registries, Git helpers, and CI workflows. Do not contact live suspicious endpoints or publish real artifacts.
|
|
147
|
+
22. Name the test after the defensive expectation, such as `cannot_read_other_users_invoice` or `rejects_private_network_callback_url`.
|
|
148
|
+
23. Keep assertions tied to observable behavior: status code, returned error shape, unchanged database state, missing side effect, sanitized output, rejected job, or invariant preserved for all generated cases.
|
|
149
|
+
24. Avoid dumping long exploit strings into the test. Use minimal representative inputs or generated values that prove the validation or boundary rule without becoming an offensive payload corpus.
|
|
150
|
+
25. For command and filesystem boundaries, assert the denied side effect directly: no injected command appears in a runnable recommendation, no repository-local shim or Git helper is executed, no background shell pattern is counted runnable, no symlink target outside the root is read or written, and no Git tree or archive path writes outside the intended destination.
|
|
151
|
+
26. For plan/apply, capability, invariant, time, and idempotency boundaries, assert the safety contract directly: planning produces no side effect, commit rejects stale or unauthorized capability, invalid transitions preserve state, injected time controls expiry, and repeated side-effect keys do not repeat the effect.
|
|
152
|
+
27. For workflow scanner fixes, prefer repository-local assertions for durable contracts: action references are pinned to commit SHAs or digest-pinned containers, privileged permissions are job-scoped, fork pull requests do not receive secrets, deployment or scanner jobs can be manually rerun when useful, and dependency scans exclude fixture-only manifests unless intentionally included.
|
|
153
|
+
28. For deployment and configuration fixes, prefer local config assertions: debug flags are off for production, sample credentials are absent, public admin or metrics endpoints are not enabled by default, storage is not public, containers do not run as root when the project controls that setting, and HTTPS requirements are preserved.
|
|
154
|
+
29. For scanner-driven fixes, include a regression only when the rule reflects a durable project contract. Do not add brittle tests that merely assert the scanner's current wording, line number, or severity.
|
|
155
|
+
30. If the project lacks enough context to write a deterministic test, output a concrete test proposal instead of inventing fixtures or behavior.
|
|
145
156
|
|
|
146
157
|
<!-- mustflow-section: postconditions -->
|
|
147
158
|
## Postconditions
|