npm - mustflow - Versions diffs - 2.22.16 → 2.22.17 - Mend

mustflow 2.22.16 → 2.22.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/cli/commands/run.js +10 -0
package/dist/cli/commands/verify.js +2 -1
package/dist/cli/i18n/en.js +3 -0
package/dist/cli/i18n/es.js +3 -0
package/dist/cli/i18n/fr.js +3 -0
package/dist/cli/i18n/hi.js +3 -0
package/dist/cli/i18n/ko.js +3 -0
package/dist/cli/i18n/zh.js +3 -0
package/dist/cli/lib/git-changes.js +2 -0
package/dist/cli/lib/run-plan.js +20 -7
package/dist/cli/lib/run-root-trust.js +33 -2
package/dist/cli/lib/validation/test-selection.js +11 -1
package/dist/core/command-intent-eligibility.js +7 -0
package/dist/core/line-endings.js +2 -0
package/dist/core/run-write-drift.js +12 -9
package/dist/core/test-selection.js +13 -2
package/dist/core/test-target-paths.js +17 -0
package/dist/core/validation-ratchet.js +2 -0
package/package.json +1 -1
package/templates/default/i18n.toml +2 -2
package/templates/default/locales/en/.mustflow/skills/security-privacy-review/SKILL.md +22 -7
package/templates/default/locales/en/.mustflow/skills/security-regression-tests/SKILL.md +31 -20
package/templates/default/manifest.toml +1 -1

package/dist/cli/commands/run.js CHANGED Viewed

@@ -36,6 +36,9 @@ function reportRunPlanFailure(plan, reporter, lang) {
         case 'stdin_not_closed':
             message = t(lang, 'run.error.stdin', { intent: plan.intentName });
             break;
+        case 'agent_shell_requires_allow':
+            message = t(lang, 'run.error.agentShellRequiresAllow', { intent: plan.intentName });
+            break;
         case 'missing_timeout':
             message = t(lang, 'run.error.timeout', { intent: plan.intentName });
             break;
@@ -66,6 +69,12 @@ function reportRunPlanFailure(plan, reporter, lang) {
                 detail: getRunPlanDetail(plan, lang, 'run.error.cwdOutsideProjectDetail'),
             });
             break;
+        case 'invalid_test_target':
+            message = t(lang, 'run.error.invalidTestTarget', {
+                intent: plan.intentName,
+                detail: getRunPlanDetail(plan, lang, 'run.error.invalidTestTargetDetail'),
+            });
+            break;
         case 'max_output_bytes_exceeds_limit':
             message = t(lang, 'run.error.maxOutputBytes', {
                 intent: plan.intentName,
@@ -225,6 +234,7 @@ export async function runRun(args, reporter, lang = 'en', options = {}) {
         const env = profiler.measure('environment', () => createCommandEnv(projectRoot, { policy: plan.envPolicy, allowlist: plan.envAllowlist }));
         const writeTracker = profiler.measure('write_drift_before', () => startRunWriteTracking(projectRoot, contract, intentName, {
             additionalDeclaredPaths: options.additionalDeclaredWritePaths,
+            env,
         }));
         const stdoutTailBytes = Math.min(runReceiptPolicy.stdoutTailBytes, plan.maxOutputBytes);
         const stderrTailBytes = Math.min(runReceiptPolicy.stderrTailBytes, plan.maxOutputBytes);

package/dist/cli/commands/verify.js CHANGED Viewed

@@ -11,6 +11,7 @@ import { createVerifyEvidenceModel } from '../../core/verification-evidence.js';
 import { createScopeDiffRisks } from '../../core/scope-risk.js';
 import { countValidationRatchetVerdictEffects, createValidationRatchetRisks, } from '../../core/validation-ratchet.js';
 import { finishRunWriteBatchTracking, startRunWriteBatchTracking, } from '../../core/run-write-drift.js';
+import { createCommandEnv } from '../../core/command-env.js';
 import { readCommandContract } from '../../core/config-loading.js';
 import { evaluateCommandPreconditions, } from '../../core/command-preconditions.js';
 import { DEFAULT_VERIFY_PARALLELISM, parseVerifyArgs, resolveVerifyParallelism, } from './verify/args.js';
@@ -198,7 +199,7 @@ async function runVerificationEntriesInParallelChunks(projectRoot, entries, para
     const results = [];
     for (let index = 0; index < entries.length; index += parallelism) {
         const chunk = entries.slice(index, index + parallelism);
-        const batchTracker = startRunWriteBatchTracking(projectRoot);
+        const batchTracker = startRunWriteBatchTracking(projectRoot, createCommandEnv(projectRoot, { policy: 'minimal', allowlist: [] }));
         const chunkResults = await Promise.all(chunk.map((entry) => runVerificationIntent(entry.intent, lang, verificationPlanId, correlationId, scheduledTestTargets.get(entry.intent) ?? [])));
         const writeDriftByIntent = finishRunWriteBatchTracking(batchTracker, chunk.map((entry) => ({
             intentName: entry.intent,

package/dist/cli/i18n/en.js CHANGED Viewed

@@ -671,6 +671,7 @@ Read these files before working:
     "run.error.lifecycleNotOneshot": 'Refused: command "{intent}" has lifecycle = "{lifecycle}"; mf run only executes oneshot commands',
     "run.error.runPolicy": 'Command "{intent}" requires run_policy = "agent_allowed" for mf run',
     "run.error.stdin": 'Command "{intent}" must set stdin = "closed"',
+    "run.error.agentShellRequiresAllow": 'Command "{intent}" must set allow_shell = true when mode = "shell"',
     "run.error.timeout": 'Command "{intent}" must define timeout_seconds',
     "run.error.commandSource": 'Command "{intent}" must define argv or mode = "shell" with cmd',
     "run.error.unsafeIntent": 'Intent "{intent}" has an unsafe name. {detail}',
@@ -681,6 +682,8 @@ Read these files before working:
     "run.error.blockedLongRunningCommandDetail": "Command argv must describe a finite one-shot command, not a development server, watcher, shell wrapper, interpreter loop, or background process.",
     "run.error.cwdOutsideProject": 'Command "{intent}" has an invalid cwd: {detail}',
     "run.error.cwdOutsideProjectDetail": "Intent cwd must stay inside the current root.",
+    "run.error.invalidTestTarget": 'Command "{intent}" received an invalid test target. {detail}',
+    "run.error.invalidTestTargetDetail": "Test targets must be relative file paths and cannot start with '-'.",
     "run.error.maxOutputBytes": 'Command "{intent}" has invalid max_output_bytes. {detail}',
     "run.error.maxOutputBytesDetail": "The output limit must stay within the allowed maximum.",
     "run.error.conflictingPreviewModes": "Use either --dry-run or --plan-only, not both",

package/dist/cli/i18n/es.js CHANGED Viewed

@@ -671,6 +671,7 @@ Lee estos archivos antes de trabajar:
     "run.error.lifecycleNotOneshot": 'Rechazado: el comando "{intent}" tiene lifecycle = "{lifecycle}"; mf run sólo ejecuta comandos oneshot',
     "run.error.runPolicy": 'El comando "{intent}" requiere run_policy = "agent_allowed" para mf run',
     "run.error.stdin": 'El comando "{intent}" debe establecer stdin = "closed"',
+    "run.error.agentShellRequiresAllow": 'El comando "{intent}" debe establecer allow_shell = true cuando mode = "shell"',
     "run.error.timeout": 'El comando "{intent}" debe definir timeout_seconds',
     "run.error.commandSource": 'El comando "{intent}" debe definir argv o mode = "shell" con cmd',
     "run.error.unsafeIntent": 'La intención "{intent}" tiene un nombre no seguro. {detail}',
@@ -681,6 +682,8 @@ Lee estos archivos antes de trabajar:
     "run.error.blockedLongRunningCommandDetail": "argv debe describir un comando finito de una sola ejecución, no un servidor de desarrollo, watcher, envoltorio de shell, bucle de intérprete o proceso en segundo plano.",
     "run.error.cwdOutsideProject": 'El comando "{intent}" tiene un cwd no válido: {detail}',
     "run.error.cwdOutsideProjectDetail": "El cwd de la intención debe permanecer dentro de la raíz actual.",
+    "run.error.invalidTestTarget": 'El comando "{intent}" recibió un objetivo de prueba no válido. {detail}',
+    "run.error.invalidTestTargetDetail": "Los objetivos de prueba deben ser rutas relativas y no pueden empezar con '-'.",
     "run.error.maxOutputBytes": 'El comando "{intent}" tiene max_output_bytes no válido. {detail}',
     "run.error.maxOutputBytesDetail": "El límite de salida debe permanecer dentro del máximo permitido.",
     "run.error.conflictingPreviewModes": "Usa --dry-run o --plan-only, no ambos",

package/dist/cli/i18n/fr.js CHANGED Viewed

@@ -671,6 +671,7 @@ Lisez ces fichiers avant de travailler :
     "run.error.lifecycleNotOneshot": 'Refusé : la commande "{intent}" a lifecycle = "{lifecycle}" ; mf run exécute uniquement les commandes oneshot',
     "run.error.runPolicy": 'La commande "{intent}" nécessite run_policy = "agent_allowed" pour mf run',
     "run.error.stdin": 'La commande "{intent}" doit définir stdin = "closed"',
+    "run.error.agentShellRequiresAllow": 'La commande "{intent}" doit définir allow_shell = true lorsque mode = "shell"',
     "run.error.timeout": 'La commande "{intent}" doit définir timeout_seconds',
     "run.error.commandSource": 'La commande "{intent}" doit définir argv ou mode = "shell" avec cmd',
     "run.error.unsafeIntent": 'L’intention "{intent}" a un nom non sûr. {detail}',
@@ -681,6 +682,8 @@ Lisez ces fichiers avant de travailler :
     "run.error.blockedLongRunningCommandDetail": "argv doit décrire une commande ponctuelle finie, pas un serveur de développement, un watcher, un wrapper shell, une boucle d'interpréteur ou un processus en arrière-plan.",
     "run.error.cwdOutsideProject": 'La commande "{intent}" a un cwd non valide : {detail}',
     "run.error.cwdOutsideProjectDetail": "Le cwd de l’intention doit rester dans la racine actuelle.",
+    "run.error.invalidTestTarget": 'La commande "{intent}" a reçu une cible de test invalide. {detail}',
+    "run.error.invalidTestTargetDetail": "Les cibles de test doivent être des chemins relatifs et ne peuvent pas commencer par '-'.",
     "run.error.maxOutputBytes": 'La commande "{intent}" a une valeur max_output_bytes non valide. {detail}',
     "run.error.maxOutputBytesDetail": "La limite de sortie doit rester dans le maximum autorisé.",
     "run.error.conflictingPreviewModes": "Utilisez --dry-run ou --plan-only, pas les deux",

package/dist/cli/i18n/hi.js CHANGED Viewed

@@ -671,6 +671,7 @@ export const hiMessages = {
     "run.error.lifecycleNotOneshot": 'अस्वीकृत: कमांड "{intent}" का lifecycle = "{lifecycle}" है; mf run केवल oneshot कमांड चलाता है',
     "run.error.runPolicy": 'mf run के लिए कमांड "{intent}" में run_policy = "agent_allowed" चाहिए',
     "run.error.stdin": 'कमांड "{intent}" को stdin = "closed" सेट करना होगा',
+    "run.error.agentShellRequiresAllow": 'कमांड "{intent}" में mode = "shell" होने पर allow_shell = true सेट होना चाहिए',
     "run.error.timeout": 'कमांड "{intent}" को timeout_seconds परिभाषित करना होगा',
     "run.error.commandSource": 'कमांड "{intent}" को argv या mode = "shell" के साथ cmd परिभाषित करना होगा',
     "run.error.unsafeIntent": 'इंटेंट "{intent}" का नाम सुरक्षित नहीं है। {detail}',
@@ -681,6 +682,8 @@ export const hiMessages = {
     "run.error.blockedLongRunningCommandDetail": "argv में finite one-shot command होना चाहिए, development server, watcher, shell wrapper, interpreter loop, या background process नहीं।",
     "run.error.cwdOutsideProject": 'कमांड "{intent}" का cwd अमान्य है: {detail}',
     "run.error.cwdOutsideProjectDetail": "Intent cwd current root के अंदर रहना चाहिए।",
+    "run.error.invalidTestTarget": 'कमांड "{intent}" को अमान्य test target मिला। {detail}',
+    "run.error.invalidTestTargetDetail": "Test targets relative file paths होने चाहिए और '-' से शुरू नहीं हो सकते।",
     "run.error.maxOutputBytes": 'कमांड "{intent}" में max_output_bytes अमान्य है। {detail}',
     "run.error.maxOutputBytesDetail": "Output limit अनुमत maximum के अंदर रहनी चाहिए।",
     "run.error.conflictingPreviewModes": "--dry-run या --plan-only में से एक इस्तेमाल करें, दोनों नहीं",

package/dist/cli/i18n/ko.js CHANGED Viewed

@@ -671,6 +671,7 @@ export const koMessages = {
     "run.error.lifecycleNotOneshot": '거부됨: 명령 "{intent}"의 수명 주기(lifecycle)는 "{lifecycle}"입니다. mf run은 일회성(oneshot) 명령만 실행합니다',
     "run.error.runPolicy": '명령 "{intent}"는 mf run에서 실행하려면 run_policy = "agent_allowed"가 필요합니다',
     "run.error.stdin": '명령 "{intent}"는 stdin = "closed"를 설정해야 합니다',
+    "run.error.agentShellRequiresAllow": '명령 "{intent}"는 mode = "shell"일 때 allow_shell = true를 설정해야 합니다',
     "run.error.timeout": '명령 "{intent}"는 timeout_seconds를 정의해야 합니다',
     "run.error.commandSource": '명령 "{intent}"는 argv를 정의하거나 mode = "shell"과 cmd를 함께 정의해야 합니다',
     "run.error.unsafeIntent": '명령 의도 "{intent}"의 이름이 안전하지 않습니다. {detail}',
@@ -681,6 +682,8 @@ export const koMessages = {
     "run.error.blockedLongRunningCommandDetail": "argv는 개발 서버, 감시 명령, 셸 래퍼, 인터프리터 반복 작업, 백그라운드 프로세스가 아니라 끝나는 단발성 명령이어야 합니다.",
     "run.error.cwdOutsideProject": '명령 "{intent}"의 실행 위치(cwd)가 올바르지 않습니다: {detail}',
     "run.error.cwdOutsideProjectDetail": "명령 실행 위치(cwd)는 현재 루트 안에 있어야 합니다.",
+    "run.error.invalidTestTarget": '명령 "{intent}"에 올바르지 않은 테스트 대상이 전달되었습니다. {detail}',
+    "run.error.invalidTestTargetDetail": "테스트 대상은 상대 파일 경로여야 하며 '-'로 시작할 수 없습니다.",
     "run.error.maxOutputBytes": '명령 "{intent}"의 max_output_bytes 값이 올바르지 않습니다. {detail}',
     "run.error.maxOutputBytesDetail": "출력 상한은 허용된 최댓값 안에 있어야 합니다.",
     "run.error.conflictingPreviewModes": "--dry-run과 --plan-only 중 하나만 사용하세요",

package/dist/cli/i18n/zh.js CHANGED Viewed

@@ -671,6 +671,7 @@ export const zhMessages = {
     "run.error.lifecycleNotOneshot": '已拒绝：命令 "{intent}" 的 lifecycle = "{lifecycle}"；mf run 只执行 oneshot 命令',
     "run.error.runPolicy": '命令 "{intent}" 需要 run_policy = "agent_allowed" 才能通过 mf run 执行',
     "run.error.stdin": '命令 "{intent}" 必须设置 stdin = "closed"',
+    "run.error.agentShellRequiresAllow": '当 mode = "shell" 时，命令 "{intent}" 必须设置 allow_shell = true',
     "run.error.timeout": '命令 "{intent}" 必须定义 timeout_seconds',
     "run.error.commandSource": '命令 "{intent}" 必须定义 argv，或定义 mode = "shell" 并提供 cmd',
     "run.error.unsafeIntent": '意图 "{intent}" 的名称不安全。{detail}',
@@ -681,6 +682,8 @@ export const zhMessages = {
     "run.error.blockedLongRunningCommandDetail": "argv 必须描述会结束的单次命令，而不是开发服务器、监听命令、shell 包装器、解释器循环或后台进程。",
     "run.error.cwdOutsideProject": '命令 "{intent}" 的 cwd 无效：{detail}',
     "run.error.cwdOutsideProjectDetail": "意图 cwd 必须位于当前根目录内。",
+    "run.error.invalidTestTarget": '命令 "{intent}" 收到无效测试目标。{detail}',
+    "run.error.invalidTestTargetDetail": "测试目标必须是相对文件路径，且不能以 '-' 开头。",
     "run.error.maxOutputBytes": '命令 "{intent}" 的 max_output_bytes 无效。{detail}',
     "run.error.maxOutputBytesDetail": "输出限制必须保持在允许的最大值内。",
     "run.error.conflictingPreviewModes": "只能使用 --dry-run 或 --plan-only，不能同时使用",

package/dist/cli/lib/git-changes.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { spawnSync } from 'node:child_process';
 import { parseGitStatusOutput } from '../../core/change-classification.js';
+import { createCommandEnv } from '../../core/command-env.js';
 const GIT_STATUS_TIMEOUT_MS = 10_000;
 const GIT_STATUS_MAX_BUFFER_BYTES = 16 * 1024 * 1024;
 export class GitChangedFilesError extends Error {
@@ -14,6 +15,7 @@ export function readGitChangedFiles(projectRoot) {
     const result = spawnSync('git', ['status', '--porcelain=v1', '-z', '--untracked-files=all'], {
         cwd: projectRoot,
         encoding: 'utf8',
+        env: createCommandEnv(projectRoot, { policy: 'minimal', allowlist: [] }),
         input: '',
         maxBuffer: GIT_STATUS_MAX_BUFFER_BYTES,
         stdio: ['ignore', 'pipe', 'pipe'],

package/dist/cli/lib/run-plan.js CHANGED Viewed

@@ -7,6 +7,7 @@ import { inspectActiveRunLocks, } from '../../core/active-run-locks.js';
 import { isRecord, readPositiveInteger, readString, readStringArray, } from '../../core/config-loading.js';
 import { DEFAULT_COMMAND_MAX_OUTPUT_BYTES, COMMAND_OUTPUT_LIMIT_SCOPE, MAX_COMMAND_OUTPUT_BYTES, commandMaxOutputBytesLimitMessage, } from '../../core/command-output-limits.js';
 import { normalizeSuccessExitCodes } from '../../core/success-exit-codes.js';
+import { normalizeSafeTestTargetPath, TEST_TARGET_PATH_ERROR } from '../../core/test-target-paths.js';
 import { evaluateCommandPreconditions, } from '../../core/command-preconditions.js';
 import { t } from './i18n.js';
 function getSuccessExitCodes(intent) {
@@ -28,12 +29,18 @@ function getRelativeProjectPath(projectRoot, targetPath) {
     return relativePath.length > 0 ? toPosixPath(relativePath) : '.';
 }
 function normalizeTestTargets(values) {
-    return [
-        ...new Set((values ?? [])
-            .map((value) => value.trim().replace(/\\/g, '/'))
-            .filter((value) => value.length > 0 && !path.posix.isAbsolute(value) && !path.win32.isAbsolute(value))
-            .filter((value) => value.split('/').every((segment) => segment.length > 0 && segment !== '.' && segment !== '..'))),
-    ].sort((left, right) => left.localeCompare(right));
+    const normalizedValues = [];
+    for (const value of values ?? []) {
+        const normalized = normalizeSafeTestTargetPath(value);
+        if (normalized === null) {
+            return { ok: false, detail: `Test target ${JSON.stringify(value)} is invalid: ${TEST_TARGET_PATH_ERROR}.` };
+        }
+        normalizedValues.push(normalized);
+    }
+    return {
+        ok: true,
+        values: [...new Set(normalizedValues)].sort((left, right) => left.localeCompare(right)),
+    };
 }
 function commandAcceptsTestTargets(intent) {
     return isRecord(intent.selection) && intent.selection.accepts_test_targets === true;
@@ -190,7 +197,13 @@ export function createRunPlan(projectRoot, contract, intentName, options = {}) {
     catch (error) {
         return createBlockedRunPlan(contract, intentName, rawIntent, eligibility, 'cwd_outside_project', error instanceof Error ? error.message : String(error), preconditions);
     }
-    const testTargets = commandAcceptsTestTargets(rawIntent) ? normalizeTestTargets(options.testTargets) : [];
+    const normalizedTestTargets = commandAcceptsTestTargets(rawIntent) ?
+        normalizeTestTargets(options.testTargets) :
+        { ok: true, values: [] };
+    if (!normalizedTestTargets.ok) {
+        return createBlockedRunPlan(contract, intentName, rawIntent, eligibility, 'invalid_test_target', normalizedTestTargets.detail, preconditions);
+    }
+    const testTargets = normalizedTestTargets.values;
     const commandArgv = metadata.commandArgv && testTargets.length > 0 ? [...metadata.commandArgv, ...testTargets] : metadata.commandArgv;
     if (!metadata.timeoutSeconds || !metadata.mode) {
         return createBlockedRunPlan(contract, intentName, rawIntent, eligibility, !metadata.timeoutSeconds ? 'missing_timeout' : 'missing_command_source', !metadata.timeoutSeconds ? 'Intent timeout_seconds is missing or invalid.' : 'Intent does not define argv or shell cmd.', preconditions);

package/dist/cli/lib/run-root-trust.js CHANGED Viewed

@@ -1,8 +1,39 @@
-import { MANIFEST_LOCK_RELATIVE_PATH, readManifestLock } from './manifest-lock.js';
+import { MANIFEST_LOCK_RELATIVE_PATH, inspectManifestLock } from './manifest-lock.js';
 export const ALLOW_UNTRUSTED_ROOT_OPTION = '--allow-untrusted-root';
+const REQUIRED_RUN_TRUST_LOCK_PATHS = [
+    'AGENTS.md',
+    '.mustflow/config/commands.toml',
+];
 export function assessRunRootTrust(projectRoot) {
-    const readResult = readManifestLock(projectRoot);
+    const inspection = inspectManifestLock(projectRoot);
+    const { readResult } = inspection;
     if (readResult.kind === 'present') {
+        if (readResult.lock.files.length === 0) {
+            return {
+                trusted: false,
+                reason: 'manifest_lock_invalid',
+                manifestLockPath: readResult.lockPath,
+                detail: 'Manifest lock must track at least one file.',
+            };
+        }
+        const trackedPaths = new Set(readResult.lock.files.map((file) => file.relativePath));
+        const missingRequiredPath = REQUIRED_RUN_TRUST_LOCK_PATHS.find((relativePath) => !trackedPaths.has(relativePath));
+        if (missingRequiredPath) {
+            return {
+                trusted: false,
+                reason: 'manifest_lock_invalid',
+                manifestLockPath: readResult.lockPath,
+                detail: `Manifest lock must track ${missingRequiredPath}.`,
+            };
+        }
+        if (inspection.issues.length > 0) {
+            return {
+                trusted: false,
+                reason: 'manifest_lock_invalid',
+                manifestLockPath: readResult.lockPath,
+                detail: inspection.issues[0] ?? 'Manifest lock does not match the current workflow files.',
+            };
+        }
         return {
             trusted: true,
             reason: 'manifest_lock_present',

package/dist/cli/lib/validation/test-selection.js CHANGED Viewed

@@ -2,9 +2,19 @@ import { existsSync } from 'node:fs';
 import path from 'node:path';
 import { isRecord } from '../command-contract.js';
 import { readMustflowTomlFile } from '../toml.js';
+import { normalizeSafeTestTargetPath, TEST_TARGET_PATH_ERROR } from '../../../core/test-target-paths.js';
 import { ALLOWED_TEST_SELECTION_RISKS, FORBIDDEN_TEST_SELECTION_COMMAND_AUTHORITY_FIELDS, TEST_SELECTION_CONFIG_PATH, } from './constants.js';
 import { isConfiguredCommandIntent, isDeclaredCommandIntent } from './command-intents.js';
 import { hasOwn, pushStrictIssue, validateAllowedStringField, validateNestedTable, validatePathArrayField, validateRequiredStringField, validateStringArrayField, } from './primitives.js';
+function validateTestTargetPathArrayField(table, key, label, issues) {
+    if (!hasOwn(table, key)) {
+        return;
+    }
+    const value = table[key];
+    if (!Array.isArray(value) || value.length === 0 || !value.every((entry) => normalizeSafeTestTargetPath(entry) !== null)) {
+        issues.push({ message: `${label} ${TEST_TARGET_PATH_ERROR}` });
+    }
+}
 function validateNoTestSelectionCommandAuthorityFields(label, table, issues) {
     for (const field of FORBIDDEN_TEST_SELECTION_COMMAND_AUTHORITY_FIELDS) {
         if (hasOwn(table, field)) {
@@ -59,7 +69,7 @@ function validateTestSelectionRule(rule, index, commandsToml, issues) {
         validateNoTestSelectionCommandAuthorityFields(`${label}.select`, select, issues);
         validateTestSelectionIntentReference(select.intent, `${label}.select.intent`, commandsToml, issues);
         validateTestSelectionIntentReference(select.fallback_intent, `${label}.select.fallback_intent`, commandsToml, issues);
-        validatePathArrayField(select, 'test_targets', `${TEST_SELECTION_CONFIG_PATH} ${label}.select.test_targets`, issues);
+        validateTestTargetPathArrayField(select, 'test_targets', `${TEST_SELECTION_CONFIG_PATH} ${label}.select.test_targets`, issues);
     }
 }
 export function validateStrictTestSelectionConfig(projectRoot, commandsToml, issues) {

package/dist/core/command-intent-eligibility.js CHANGED Viewed

@@ -76,6 +76,13 @@ export function evaluateCommandIntentEligibility(intentName, rawIntent) {
             detail: blockedPattern.detail,
         };
     }
+    if (rawIntent.mode === 'shell' && rawIntent.allow_shell !== true) {
+        return {
+            ok: false,
+            code: 'agent_shell_requires_allow',
+            detail: `Agent-runnable shell intent ${intentName} must set allow_shell = true.`,
+        };
+    }
     return {
         ok: true,
         code: 'ok',

package/dist/core/line-endings.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { spawnSync } from 'node:child_process';
 import { existsSync } from 'node:fs';
 import path from 'node:path';
+import { createCommandEnv } from './command-env.js';
 import { readFileInsideWithoutSymlinks, writeFileInsideWithoutSymlinks } from './safe-filesystem.js';
 const GITATTRIBUTES_PATH = '.gitattributes';
 function toPosixPath(value) {
@@ -18,6 +19,7 @@ function gitList(projectRoot, args) {
     const result = spawnSync('git', [...args, '-z'], {
         cwd: projectRoot,
         encoding: 'buffer',
+        env: createCommandEnv(projectRoot, { policy: 'minimal', allowlist: [] }),
         stdio: ['ignore', 'pipe', 'pipe'],
         windowsHide: true,
     });

package/dist/core/run-write-drift.js CHANGED Viewed

@@ -75,8 +75,8 @@ function collectSnapshotEntries(projectRoot, currentPath, entries) {
         entries.set(relativePath, signatureForPath(fullPath));
     }
 }
-function captureSnapshot(projectRoot) {
-    const gitSnapshot = captureGitStatusSnapshot(projectRoot);
+function captureSnapshot(projectRoot, env) {
+    const gitSnapshot = captureGitStatusSnapshot(projectRoot, env);
     if (gitSnapshot) {
         return gitSnapshot;
     }
@@ -102,9 +102,10 @@ function captureSnapshot(projectRoot) {
         };
     }
 }
-function captureGitStatusSnapshot(projectRoot) {
+function captureGitStatusSnapshot(projectRoot, env) {
     const result = spawnSync('git', ['-C', projectRoot, 'status', '--porcelain=v1', '-z', `--untracked-files=${GIT_STATUS_UNTRACKED_MODE}`], {
         encoding: 'utf8',
+        env,
         input: '',
         maxBuffer: GIT_STATUS_MAX_BUFFER_BYTES,
         stdio: ['ignore', 'pipe', 'pipe'],
@@ -206,21 +207,23 @@ function createUnavailableWriteDriftReceipt(declaredPaths, reason) {
         reason,
     };
 }
-export function startRunWriteTracking(projectRoot, contract, intentName, options = {}) {
+export function startRunWriteTracking(projectRoot, contract, intentName, options) {
     const declaredPaths = [
         ...listDeclaredWritePaths(projectRoot, contract, intentName),
         ...(options.additionalDeclaredPaths ?? []).map(normalizeRelativePath),
     ];
     return {
         projectRoot,
+        env: options.env,
         declaredPaths: [...new Set(declaredPaths)].sort((left, right) => left.localeCompare(right)),
-        before: captureSnapshot(projectRoot),
+        before: captureSnapshot(projectRoot, options.env),
     };
 }
-export function startRunWriteBatchTracking(projectRoot) {
+export function startRunWriteBatchTracking(projectRoot, env) {
     return {
         projectRoot,
-        before: captureSnapshot(projectRoot),
+        env,
+        before: captureSnapshot(projectRoot, env),
     };
 }
 export function finishRunWriteBatchTracking(tracker, intents) {
@@ -232,7 +235,7 @@ export function finishRunWriteBatchTracking(tracker, intents) {
     if (tracker.before.status === 'unavailable') {
         return fallbackReceipts;
     }
-    const after = captureSnapshot(tracker.projectRoot);
+    const after = captureSnapshot(tracker.projectRoot, tracker.env);
     if (after.status === 'unavailable') {
         return new Map(intents.map((intent) => [
             intent.intentName,
@@ -309,7 +312,7 @@ export function finishRunWriteTracking(tracker) {
     if (tracker.before.status === 'unavailable') {
         return createUnavailableWriteDriftReceipt(tracker.declaredPaths, tracker.before.reason);
     }
-    const after = captureSnapshot(tracker.projectRoot);
+    const after = captureSnapshot(tracker.projectRoot, tracker.env);
     if (after.status === 'unavailable') {
         return createUnavailableWriteDriftReceipt(tracker.declaredPaths, after.reason);
     }

package/dist/core/test-selection.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { existsSync } from 'node:fs';
 import { isRecord, readMustflowOwnedTomlFile, readStringArray, resolveMustflowConfigPath, } from './config-loading.js';
 import { classifyVerificationCandidate, } from './verification-plan.js';
+import { normalizeSafeTestTargetPath } from './test-target-paths.js';
 export const TEST_SELECTION_CONFIG_RELATIVE_PATH = '.mustflow/config/test-selection.toml';
 const STALE_OR_MISSING_RULES_NOTE = 'Project-declared test selection rules did not cover the current changed files; review .mustflow/config/test-selection.toml for stale or missing rules.';
 function uniqueSorted(values) {
@@ -47,8 +48,18 @@ function readRule(value) {
     const surfaces = readStringArray(value.match, 'surfaces');
     const intent = readStringField(value.select, 'intent');
     const fallbackIntent = readStringField(value.select, 'fallback_intent');
-    const testTargets = readStringArray(value.select, 'test_targets') ?? [];
-    if (!id || !risk || !reason || !paths || paths.length === 0 || !surfaces || surfaces.length === 0 || !intent || !fallbackIntent) {
+    const rawTestTargets = readStringArray(value.select, 'test_targets') ?? [];
+    const testTargets = rawTestTargets.map((target) => normalizeSafeTestTargetPath(target));
+    if (!id ||
+        !risk ||
+        !reason ||
+        !paths ||
+        paths.length === 0 ||
+        !surfaces ||
+        surfaces.length === 0 ||
+        !intent ||
+        !fallbackIntent ||
+        !testTargets.every((target) => target !== null)) {
         return null;
     }
     return {

package/dist/core/test-target-paths.js ADDED Viewed

@@ -0,0 +1,17 @@
+import path from 'node:path';
+export const TEST_TARGET_PATH_ERROR = "entries must be non-empty relative paths that do not start with '-'";
+export function normalizeSafeTestTargetPath(value) {
+    if (typeof value !== 'string') {
+        return null;
+    }
+    const raw = value.trim();
+    const normalized = raw.replace(/\\/g, '/');
+    if (normalized.length === 0 ||
+        normalized.startsWith('-') ||
+        path.posix.isAbsolute(normalized) ||
+        path.win32.isAbsolute(raw)) {
+        return null;
+    }
+    const segments = normalized.split('/');
+    return segments.every((segment) => segment.length > 0 && segment !== '.' && segment !== '..') ? normalized : null;
+}

package/dist/core/validation-ratchet.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { spawnSync } from 'node:child_process';
 import { existsSync, readFileSync } from 'node:fs';
 import path from 'node:path';
+import { createCommandEnv } from './command-env.js';
 const TEST_CHANGE_KINDS = new Set(['test', 'test_fixture']);
 const SKIP_OR_ONLY_MARKER = /\b(?:describe|it|test)\s*\.\s*(?:skip|only)\s*\(/u;
 const TODO_OR_PENDING_MARKER = /\b(?:describe|it|test)\s*\.\s*(?:todo|pending)\s*\(/u;
@@ -52,6 +53,7 @@ function gitDiffLines(projectRoot, relativePath) {
     const result = spawnSync('git', ['diff', '--no-ext-diff', '--unified=0', '--', relativePath], {
         cwd: projectRoot,
         encoding: 'utf8',
+        env: createCommandEnv(projectRoot, { policy: 'minimal', allowlist: [] }),
         windowsHide: true,
     });
     if (result.status !== 0 || typeof result.stdout !== 'string' || result.stdout.length === 0) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mustflow",
-  "version": "2.22.16",
+  "version": "2.22.17",
   "description": "Agent workflow documents and CLI for mustflow repository roots.",
   "type": "module",
   "license": "MIT-0",

package/templates/default/i18n.toml CHANGED Viewed

@@ -325,13 +325,13 @@ translations = {}
 [documents."skill.security-privacy-review"]
 source = "locales/en/.mustflow/skills/security-privacy-review/SKILL.md"
 source_locale = "en"
-revision = 17
+revision = 19
 translations = {}
 [documents."skill.security-regression-tests"]
 source = "locales/en/.mustflow/skills/security-regression-tests/SKILL.md"
 source_locale = "en"
-revision = 9
+revision = 11
 translations = {}
 [documents."skill.search-ad-content-authoring"]

package/templates/default/locales/en/.mustflow/skills/security-privacy-review/SKILL.md CHANGED Viewed

@@ -2,7 +2,7 @@
 mustflow_doc: skill.security-privacy-review
 locale: en
 canonical: true
-revision: 17
+revision: 19
 lifecycle: mustflow-owned
 authority: procedure
 name: security-privacy-review
@@ -48,6 +48,7 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
 - A change uses cache, Redis, generated state, search documents, or read models to make authorization, ownership, subscription, entitlement, payment, inventory, or admin decisions.
 - A change adds external URL fetching, webhook callbacks, redirects, browser previews, remote downloads, database-as-a-service rules, security headers, CORS, CSRF handling, or rate limits.
 - A change stores webhook payloads, external API requests or responses, retry errors, dead-letter jobs, AI prompts or outputs, email bodies, or provider diagnostic data.
+- A change adds or modifies public intake surfaces such as action handlers, webhook handlers, callback receivers, job enqueue endpoints, idempotency stores, replay APIs, or default verifier, authenticator, authorizer, normalizer, or deduplication collaborators.
 - A change records AI usage, model pricing, token counts, cache keys, feature metadata, prompt hashes, provider call metadata, retry cost, or failed AI calls that could include confidential content or identify users.
 - A change records AI budgets, feature policies, policy decisions, blocked reasons, model downgrades, agent steps, tool calls, provider budget status, or emergency disables that could reveal customer behavior, sensitive feature use, or regulated processing.
 - A change touches cookies, JWTs, reset tokens, invite tokens, OAuth callbacks, file upload or download, browser storage, business rules, pricing, entitlements, database queries, ORM bulk operations, or deployment configuration.
@@ -56,6 +57,7 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
 - A change touches cryptography, password hashing, token generation, random number generation, TLS/HTTPS, certificate validation, scanner gates, or a security invariant that could drift across architecture boundaries.
 - A change adds, imports, recommends, or installs third-party dependencies that may affect the software supply chain.
 - A change introduces or edits agent configuration, MCP/tool configuration, prompt files, model instructions, or repository-local rule files.
+- A change adds or modifies policy engines, architecture linters, rule catalogs, validators, generated compliance reports, or governance gates that can approve sensitive data, payment, API, AI, tier, or deployment boundaries.
 - A change affects CI/CD workflow permissions, fork pull-request handling, build scripts, package lifecycle scripts, deployment secrets, container users, storage buckets, debug flags, or public admin, metrics, GraphQL, cache, or search endpoints.
 - Documentation, templates, examples, tests, or final reports mention sensitive data handling, privacy behavior, secret handling, or user-identifying data.
 - A diff could expose data through filenames, paths, command output, screenshots, generated artifacts, package contents, or public docs.
@@ -82,6 +84,7 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
 - Cookie, JWT, OAuth, file upload, file download, business-value, database mutation, ORM bulk operation, CI/CD permission, deployment setting, or secret-source surface involved.
 - Cryptographic primitive, password hashing, random-token, secure transport, certificate validation, scanner gate, or security invariant involved.
 - Existing project rules for secrets, privacy, generated state, public docs, package contents, and command output.
+- Policy or rule-catalog source of truth, trusted metadata source, fallback behavior when a rule file is missing, and any untrusted repository-local fields that might be treated as ownership, tier, role, or exemption evidence.
 - Admin operation list, role or capability model, audit-log fields, cache visibility policy, and cache invalidation surface when those are involved.
 - Behavior analytics event names, event versions, actor identifiers, anonymous identifiers, properties, retention period, deletion or anonymization policy, and whether event writes can be delayed or lost.
 - Core event ownership, including which signup, login failure, account recovery, payment, refund, subscription, entitlement, permission, file, search, admin, webhook, queue, and security events must remain internally stored instead of only in a SaaS dashboard.
@@ -91,6 +94,8 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
 - Data classification policy when available, including sensitive personal data, ordinary personal data, product usage data, public content, AI prompts or outputs, and which classes may enter logs, analytics, support tools, AI providers, or cross-region backups.
 - Runtime and dependency patch policy, including supported or LTS version requirement, end-of-life ban, lockfile expectation, vulnerability scan source, patch response target, smoke-test surface, canary or rollback route, and whether experimental runtime choices are kept off survival paths.
 - Webhook and external-call record policy, including signature verification, processed-event deduplication, safe request hashes, redacted provider responses, unknown-result reconciliation, dead-letter retention, and whether raw payloads are needed or should be replaced by bounded metadata.
+- Public intake default policy, including whether verifiers, authenticators, authorizers, deduplication stores, idempotency stores, and normalizers are required by registration, explicit opt-in, or silently replaced by permissive defaults.
+- Attacker-controlled key and header limits for idempotency keys, webhook event ids, provider names, action names, replay ids, dedupe keys, request ids, and any in-memory map or queue keyed by public request data.
 - AI record policy, including prompt and output retention, cache-key hashing, provider request id handling, feature-key properties, pricing snapshots, token usage, failed-call errors, user or account identifiers, and whether raw prompts or generated text are omitted, redacted, encrypted, or retained under a narrow rule.
 - AI budget and gateway policy, including whether provider budgets are hard stops or only alerts, whether product-owned hard limits exist, which identifiers are recorded for user, organization, feature, model, request, provider call, policy decision, and whether blocked or downgraded decisions are logged without exposing prompt text.
 - Cache authority boundary, including which data is final source of truth and which values are disposable, stale, private, or shared.
@@ -154,6 +159,7 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
 20. For cache purge, search reindex, ranking refresh, and generated-state rebuild endpoints, treat them as privileged state-changing operations with authorization, rate limiting, audit logs, idempotency, and bounded target selection.
 21. For external URL, webhook, preview, redirect, download, or callback behavior, check allowlists, protocol restrictions, redirect handling, DNS/IP re-resolution, private network ranges, link-local metadata endpoints, webhook signatures, timeout limits, retry limits, and open redirect parameters such as `next` or `redirect`.
    - For webhooks, verify the signature against the raw body before trusting parsed data. Store only the raw body reference or bounded raw payload when replay, verification, or support needs justify it.
+   - Do not silently install allow-all, unsigned, no-op, nil, null-object, or test-only verifiers for public webhook or callback endpoints. Missing verifier, authenticator, or authorization configuration should fail registration unless the caller explicitly selects a clearly named unsafe or local-only mode.
    - Store processed event identifiers to avoid duplicate effects. Keep provider event payloads, request bodies, and response bodies out of ordinary logs and dead-letter records unless they are redacted and have a retention rule.
 22. For database-as-a-service, storage bucket, or realtime rules, check that server-side policies are default-deny, ownership-scoped, and not left in public read/write development mode.
 23. For input sinks, check parameterized queries, ORM binding, static command maps, output encoding, HTML/Markdown rendering boundaries, unsafe dynamic evaluation, XML/YAML/Markdown parser options, redirect and sort parameters, page-size limits, and framework escape hatches.
@@ -163,6 +169,8 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
    - For direct-to-object-storage uploads, authorize the target resource before issuing the signed upload URL, confirm upload completion before making the asset usable, and keep pending, uploaded, processing, ready, failed, and deleted states separate.
    - Inspect actual file bytes instead of trusting extension or `Content-Type`. Re-encode images and strip metadata when practical before serving user uploads.
 25. For business logic, check that server code does not trust client-supplied prices, discounts, roles, owners, entitlement state, plan limits, usage counters, inventory, seats, refunds, credits, or coupon state. Inspect idempotency, transactions, uniqueness, and concurrent requests for repeated side effects.
+   - For public action or intake endpoints, validate cheap request shape and attacker-controlled idempotency keys before permanently claiming the key. If a request is rejected before the trusted side effect starts, release or avoid storing the key so malformed traffic cannot poison future valid retries.
+   - Bound default in-memory idempotency, deduplication, replay, rate-limit, and request-tracking stores by key length, entry count, TTL, eviction, or a durable backend contract. A process-memory map keyed by unbounded public headers or event ids is an availability boundary, not just an implementation detail.
 26. For API responses, check that the response contains only fields the caller may see and needs for the use case. Do not expose password hashes, internal storage keys, permanent private URLs, raw billing provider ids, internal moderation notes, private IP data, privileged flags, or database columns merely because they are present on the model.
 27. For dependency failure policy, distinguish fail-closed from degraded behavior. Authentication, authorization, payment, entitlement, and destructive admin decisions should usually fail closed; analytics, recommendation, statistics, AI summaries, and email should usually avoid exposing private data or blocking core state changes.
    - For dead-letter queues, retry logs, and external API call records, check that errors contain safe codes and bounded metadata rather than full prompts, email bodies, payment details, tokens, private files, or personal data.
@@ -184,12 +192,19 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
 38. For runtime and framework security updates, check that supported versions are documented, end-of-life versions are rejected, dependency locks exist where appropriate, security patches can be tested and deployed quickly, and rollback or redeploy can happen without manual dashboard memory. Do not treat a fashionable or high-performance runtime as safe unless the patch path is operationally credible.
 39. For transport security, check HTTPS/TLS requirements, certificate validation, insecure HTTP downgrade paths, disabled verification flags, and whether sensitive traffic can bypass the secure channel.
 40. For cryptography, reject custom cryptography and tutorial-grade shortcuts. Check password hashing uses a password-hashing primitive such as bcrypt, scrypt, or Argon2id where supported by the project; random tokens use secure randomness; keys are separated from encrypted data; and weak hashes such as MD5, SHA-1, or bare SHA-256 are not used for password storage.
-41. For architecture drift, name the security invariant before accepting the generated structure. Confirm the invariant still holds across UI, handler, service, repository, database policy, workflow, and deployment boundaries.
-42. For SAST, SCA, or scanner output, treat scanner output as evidence rather than command authority. Map the finding to a repository-owned boundary, configured verification intent, dependency metadata, or regression test before claiming the issue is fixed.
-43. Verify that examples, fixtures, screenshots, command outputs, and final reports do not expose real-looking secrets or unnecessary personal data.
-44. Prefer omission or minimal metadata over masking when the sensitive value is not needed for the user to understand the result.
-45. If the change affects an authorization, SSRF, CSRF, rate-limit, upload, download, token, business-logic, injection, logging, telemetry, cache authority, cache disclosure, admin operation, agent permission, cryptography, transport, scanner, or abuse boundary, activate `security-regression-tests` for test selection instead of folding test generation into this review.
-46. Run the narrowest configured verification that covers the changed docs, templates, package, or mustflow contract.
+41. For policy engines, architecture linters, compliance validators, and generated governance gates, identify the canonical policy source and the canonical object identity before trusting a pass result.
+    - Do not let repository-controlled advisory fields, nested duplicates, labels, components, owners, stages, tiers, or exemption fields override a trusted catalog, server-derived identity, or central registration.
+    - When two fields can describe the same security decision, such as top-level and nested owner values, validate their consistency or choose the canonical source explicitly instead of reading the first convenient path.
+    - Treat missing, wrong, or fallback rule catalogs as fail-closed or explicitly degraded; a misplaced rule file should not silently disable validation for public API, payment, AI, tier, deployment, or data-boundary controls.
+    - Required security-control declarations should validate meaningful values, not merely non-null presence. Reject `false`, `0`, empty objects, empty arrays, empty strings, or type-mismatched placeholders unless the policy specifically allows that value.
+    - Derive deny decisions from metadata classes when possible instead of only from static name denylists that can miss newly introduced repositories, services, tenants, roles, or providers.
+42. For read-only commands that inspect repositories, remember that the underlying tool can still execute configured helpers. Disable or neutralize repository-local hooks, fsmonitor helpers, credential helpers, package lifecycle hooks, and executable lookup through untrusted PATH when the command is meant to be safe inspection.
+43. For architecture drift, name the security invariant before accepting the generated structure. Confirm the invariant still holds across UI, handler, service, repository, database policy, workflow, and deployment boundaries.
+44. For SAST, SCA, or scanner output, treat scanner output as evidence rather than command authority. Map the finding to a repository-owned boundary, configured verification intent, dependency metadata, or regression test before claiming the issue is fixed.
+45. Verify that examples, fixtures, screenshots, command outputs, and final reports do not expose real-looking secrets or unnecessary personal data.
+46. Prefer omission or minimal metadata over masking when the sensitive value is not needed for the user to understand the result.
+47. If the change affects an authorization, SSRF, CSRF, rate-limit, upload, download, token, business-logic, injection, logging, telemetry, cache authority, cache disclosure, admin operation, agent permission, cryptography, transport, scanner, policy-engine, rule-catalog, or abuse boundary, activate `security-regression-tests` for test selection instead of folding test generation into this review.
+48. Run the narrowest configured verification that covers the changed docs, templates, package, or mustflow contract.
 <!-- mustflow-section: postconditions -->
 ## Postconditions

package/templates/default/locales/en/.mustflow/skills/security-regression-tests/SKILL.md CHANGED Viewed

@@ -2,7 +2,7 @@
 mustflow_doc: skill.security-regression-tests
 locale: en
 canonical: true
-revision: 9
+revision: 11
 lifecycle: mustflow-owned
 authority: procedure
 name: security-regression-tests
@@ -32,11 +32,13 @@ Convert security-sensitive behavior changes into safe negative tests that preser
 - Authentication, authorization, session, CSRF, rate-limit, admin, payment, credit, subscription, personal-data, or tenant-boundary behavior changes.
 - Input validation, output encoding, file upload, path handling, webhook callback, redirect, or external URL handling changes.
+- Public action, webhook, callback, replay, job enqueue, idempotency, deduplication, or in-memory intake-store behavior changes.
 - Cookie, JWT, OAuth callback, reset token, invite token, logout, reauthentication, file download, upload processing, business-rule, entitlement, pricing, inventory, database query, ORM bulk operation, or deployment-configuration behavior changes.
 - AI-generated or vibe-coded routes, data access, external fetchers, admin screens, or database rules need denied-case coverage beyond a happy-path test.
 - Cryptography, password hashing, secure randomness, HTTPS/TLS, certificate validation, scanner-gate, or security-invariant behavior changes.
 - Command construction, command recommendation, executable resolution, command-contract linting, or copy-to-clipboard command behavior changes.
 - Filesystem containment, symlink handling, package publishing, build pipeline, or release automation behavior changes.
+- Policy engines, architecture linters, rule-catalog loaders, schema validators, generated compliance reports, or governance gates change how security-sensitive data, API, AI, payment, tier, deployment, ownership, or repository boundaries are approved.
 - A bug fix closes an abuse case and the fix needs a regression test to prevent reintroduction.
 - A review identifies a concrete security-sensitive boundary that can be expressed as a deterministic test.
 - A static analysis alert identifies a concrete data flow, permission boundary, command boundary, artifact boundary, or input-handling bug that can be locked with a local test.
@@ -96,13 +98,18 @@ Convert security-sensitive behavior changes into safe negative tests that preser
    - unsafe external URL, callback, redirect, or server-side request target
    - SSRF-style private network, localhost, link-local metadata, redirect, or DNS re-resolution target
    - missing webhook signature validation or unsafe retry behavior for external callbacks
+   - insecure verifier, authenticator, authorizer, normalizer, or dedupe default where missing configuration silently becomes allow-all, unsigned, no-op, or test-only behavior on a public endpoint
    - CSRF-style state change that relies on browser credentials without an origin, token, or same-site boundary
    - missing rate limit or lockout on login, signup, token reset, invitation, webhook, or expensive generation endpoints
    - client-supplied price, discount, role, owner, entitlement, plan, inventory, seat, refund, coupon, or usage value trusted by the server
    - ORM mass assignment, unscoped `findMany`, `updateMany`, `deleteMany`, unsafe migration default, or missing database policy enforcement
    - unsafe shell command construction, command name interpolation, clipboard command output, or executable lookup
-   - filesystem escape through symlinks, path traversal, archive entries, generated state, or package contents
+   - repository inspection command that unexpectedly executes local Git helpers, hooks, credential helpers, package lifecycle hooks, PATH shims, or other repository-controlled executables
+   - filesystem escape through symlinks, path traversal, archive entries, Git tree entries, generated state, or package contents
    - mismatch between two validators, linters, dashboards, schemas, or release gates that claim the same policy
+   - policy-source mismatch where a validator loads a security rule from the wrong catalog, silently disables a missing rule, or accepts a legacy fallback without an explicit compatibility test
+   - untrusted metadata override where a repository-controlled field, nested duplicate, component, owner, stage, tier, role, or exemption value is treated as trusted ownership or authorization evidence
+   - invalid-but-present security control values where `false`, `0`, `{}`, `[]`, empty strings, or type-mismatched placeholders satisfy required policy fields
    - release or package-publishing pipeline code execution before artifact publication
    - incomplete escaping, quoting, encoding, or sanitization where the safe behavior can be asserted without invoking a real shell or network target
    - stack trace or internal error exposure through a user-visible API, report, dashboard, or command output
@@ -116,6 +123,7 @@ Convert security-sensitive behavior changes into safe negative tests that preser
    - missing capability or scoped permission object where a sensitive operation depends on broad user, role, or global authorization state
    - missing invariant policy where a sensitive state change could violate a non-negotiable rule such as last-owner, entitlement, paid-order, refund, or retention constraints
    - missing idempotency key, action ledger, or outbox/inbox record where repeated execution of a side effect could charge, refund, notify, grant, revoke, publish, or delete more than once
+   - unbounded public request identifiers, idempotency keys, webhook event ids, replay ids, provider names, request ids, or header values retained in process memory without length, entry-count, TTL, eviction, or cleanup on rejected requests
    - exposed debug, admin, metrics, storage, GraphQL, development console, root container user, default credential, or fork pull-request secret path that can be checked locally
 3. Search for existing tests that already cover the same boundary. Strengthen the existing test when that gives clearer coverage than adding a new one.
 4. Build the smallest safe negative test data: at least one allowed control case when useful, and one denied case that proves the boundary rejects the abuse condition.
@@ -124,24 +132,27 @@ Convert security-sensitive behavior changes into safe negative tests that preser
 7. For CSRF and browser-credential state changes, assert that the mutating operation rejects missing or mismatched token, origin, or same-site evidence according to the project framework.
 8. For rate limits and lockouts, use injected time, local stores, or fake counters to prove repeated attempts are bounded without slowing the suite.
 9. For session, JWT, OAuth, reset, invite, logout, or reauthentication boundaries, assert the denied condition directly: invalid signature, expired token, wrong issuer, wrong audience, missing state, revoked token, reused token, or missing recent authentication.
-10. For upload and download boundaries, use local fixture files and fake storage. Assert authorization, content signature, MIME, size, filename, path, metadata stripping, and conversion resource-limit behavior without using live user files.
-11. For business-rule boundaries, use server-side fixtures that try manipulated price, discount, role, owner, entitlement, plan, inventory, seat, refund, coupon, or usage fields. Assert that state remains unchanged or is recalculated from trusted server data.
-12. For database and ORM boundaries, assert scoped queries or policies through observable behavior: cross-tenant rows stay invisible, bulk update or delete affects only owned rows, mass-assigned privileged fields are ignored, and unsafe migration defaults cannot create elevated access.
-13. For cryptography and token-generation boundaries, assert behavior through the project-owned API rather than hard-coding private implementation details: password verifiers reject plaintext or fast-hash storage, token generation uses injected secure randomness or a deterministic test double, and custom cryptography shortcuts are absent where the project exposes that decision.
-14. For transport-security boundaries, assert configuration rejects disabled certificate validation or insecure HTTP for sensitive endpoints when the project owns that configuration.
-15. For architecture-drift boundaries, write the test around the security invariant, not the refactor shape: unauthorized access stays denied, sensitive output stays omitted, and side effects remain scoped after the generated structure changes.
-16. For parser, validator, serializer, path, command, or workflow boundaries, consider a bounded property-based or fuzz-style regression when the invariant is clearer than a list of hand-written examples. Keep generators local, deterministic under the test runner, size-limited, and focused on the defensive invariant.
-17. When adding a fuzzing or property-based testing dependency, keep dependency metadata, lockfiles, test selection rules, and package tests synchronized. Prefer an existing project dependency when it can express the invariant cleanly.
-18. Use mocks or local fakes for external requests, uploads, redirects, webhooks, payment providers, file systems, shell commands, package registries, and CI workflows. Do not contact live suspicious endpoints or publish real artifacts.
-19. Name the test after the defensive expectation, such as `cannot_read_other_users_invoice` or `rejects_private_network_callback_url`.
-20. Keep assertions tied to observable behavior: status code, returned error shape, unchanged database state, missing side effect, sanitized output, rejected job, or invariant preserved for all generated cases.
-21. Avoid dumping long exploit strings into the test. Use minimal representative inputs or generated values that prove the validation or boundary rule without becoming an offensive payload corpus.
-22. For command and filesystem boundaries, assert the denied side effect directly: no injected command appears in a runnable recommendation, no repository-local shim is executed, no background shell pattern is counted runnable, no symlink target outside the root is read or written.
-23. For plan/apply, capability, invariant, time, and idempotency boundaries, assert the safety contract directly: planning produces no side effect, commit rejects stale or unauthorized capability, invalid transitions preserve state, injected time controls expiry, and repeated side-effect keys do not repeat the effect.
-24. For workflow scanner fixes, prefer repository-local assertions for durable contracts: action references are pinned to commit SHAs or digest-pinned containers, privileged permissions are job-scoped, fork pull requests do not receive secrets, deployment or scanner jobs can be manually rerun when useful, and dependency scans exclude fixture-only manifests unless intentionally included.
-25. For deployment and configuration fixes, prefer local config assertions: debug flags are off for production, sample credentials are absent, public admin or metrics endpoints are not enabled by default, storage is not public, containers do not run as root when the project controls that setting, and HTTPS requirements are preserved.
-26. For scanner-driven fixes, include a regression only when the rule reflects a durable project contract. Do not add brittle tests that merely assert the scanner's current wording, line number, or severity.
-27. If the project lacks enough context to write a deterministic test, output a concrete test proposal instead of inventing fixtures or behavior.
+10. For public webhook, callback, and action-intake defaults, assert missing verifier, authenticator, or authorizer configuration fails registration or setup. If an intentionally unsafe local/test mode exists, require the test to pass that mode explicitly and name it in the fixture.
+11. For public idempotency, deduplication, replay, rate-limit, and request-tracking stores, assert malformed rejected requests do not permanently claim keys, oversized keys are rejected or normalized before retention, and default memory stores have observable bounds such as entry eviction, TTL, or a configured capacity.
+12. For upload and download boundaries, use local fixture files and fake storage. Assert authorization, content signature, MIME, size, filename, path, metadata stripping, and conversion resource-limit behavior without using live user files.
+13. For business-rule boundaries, use server-side fixtures that try manipulated price, discount, role, owner, entitlement, plan, inventory, seat, refund, coupon, or usage fields. Assert that state remains unchanged or is recalculated from trusted server data.
+14. For database and ORM boundaries, assert scoped queries or policies through observable behavior: cross-tenant rows stay invisible, bulk update or delete affects only owned rows, mass-assigned privileged fields are ignored, and unsafe migration defaults cannot create elevated access.
+15. For cryptography and token-generation boundaries, assert behavior through the project-owned API rather than hard-coding private implementation details: password verifiers reject plaintext or fast-hash storage, token generation uses injected secure randomness or a deterministic test double, and custom cryptography shortcuts are absent where the project exposes that decision.
+16. For transport-security boundaries, assert configuration rejects disabled certificate validation or insecure HTTP for sensitive endpoints when the project owns that configuration.
+17. For architecture-drift boundaries, write the test around the security invariant, not the refactor shape: unauthorized access stays denied, sensitive output stays omitted, and side effects remain scoped after the generated structure changes.
+18. For policy-engine or governance-linter boundaries, add denied cases that prove the invariant cannot be bypassed by newly named entities, spoofed duplicate fields, self-declared ownership metadata, missing or misplaced rule files, or invalid-but-present values. Include an allowed control case when it clarifies the intended trusted source.
+19. For parser, validator, serializer, path, command, or workflow boundaries, consider a bounded property-based or fuzz-style regression when the invariant is clearer than a list of hand-written examples. Keep generators local, deterministic under the test runner, size-limited, and focused on the defensive invariant.
+20. When adding a fuzzing or property-based testing dependency, keep dependency metadata, lockfiles, test selection rules, and package tests synchronized. Prefer an existing project dependency when it can express the invariant cleanly.
+21. Use mocks or local fakes for external requests, uploads, redirects, webhooks, payment providers, file systems, shell commands, package registries, Git helpers, and CI workflows. Do not contact live suspicious endpoints or publish real artifacts.
+22. Name the test after the defensive expectation, such as `cannot_read_other_users_invoice` or `rejects_private_network_callback_url`.
+23. Keep assertions tied to observable behavior: status code, returned error shape, unchanged database state, missing side effect, sanitized output, rejected job, or invariant preserved for all generated cases.
+24. Avoid dumping long exploit strings into the test. Use minimal representative inputs or generated values that prove the validation or boundary rule without becoming an offensive payload corpus.
+25. For command and filesystem boundaries, assert the denied side effect directly: no injected command appears in a runnable recommendation, no repository-local shim or Git helper is executed, no background shell pattern is counted runnable, no symlink target outside the root is read or written, and no Git tree or archive path writes outside the intended destination.
+26. For plan/apply, capability, invariant, time, and idempotency boundaries, assert the safety contract directly: planning produces no side effect, commit rejects stale or unauthorized capability, invalid transitions preserve state, injected time controls expiry, and repeated side-effect keys do not repeat the effect.
+27. For workflow scanner fixes, prefer repository-local assertions for durable contracts: action references are pinned to commit SHAs or digest-pinned containers, privileged permissions are job-scoped, fork pull requests do not receive secrets, deployment or scanner jobs can be manually rerun when useful, and dependency scans exclude fixture-only manifests unless intentionally included.
+28. For deployment and configuration fixes, prefer local config assertions: debug flags are off for production, sample credentials are absent, public admin or metrics endpoints are not enabled by default, storage is not public, containers do not run as root when the project controls that setting, and HTTPS requirements are preserved.
+29. For scanner-driven fixes, include a regression only when the rule reflects a durable project contract. Do not add brittle tests that merely assert the scanner's current wording, line number, or severity.
+30. If the project lacks enough context to write a deterministic test, output a concrete test proposal instead of inventing fixtures or behavior.
 <!-- mustflow-section: postconditions -->
 ## Postconditions

package/templates/default/manifest.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 id = "default"
 name = "default"
-version = "2.22.16"
+version = "2.22.17"
 description = "Minimal workflow for LLM agents to read, edit, and verify their work in a repository."
 common_root = "common"
 locales_root = "locales"