npm - @yasserkhanorg/e2e-agents - Versions diffs - 1.4.0 → 1.5.0 - Mend

@yasserkhanorg/e2e-agents 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/dist/agent/feedback.d.ts +16 -0
package/dist/agent/feedback.d.ts.map +1 -1
package/dist/agent/feedback.js +62 -0
package/dist/agent/process_runner.d.ts +1 -1
package/dist/agent/process_runner.d.ts.map +1 -1
package/dist/agent/process_runner.js +3 -3
package/dist/api.d.ts.map +1 -1
package/dist/api.js +5 -2
package/dist/engine/plan_builder.d.ts +2 -1
package/dist/engine/plan_builder.d.ts.map +1 -1
package/dist/engine/plan_builder.js +22 -9
package/dist/esm/agent/feedback.js +61 -0
package/dist/esm/agent/process_runner.js +3 -3
package/dist/esm/api.js +5 -2
package/dist/esm/engine/plan_builder.js +22 -9
package/dist/esm/index.js +1 -1
package/dist/esm/pipeline/spec_verifier.js +75 -0
package/dist/esm/pipeline/stage3_generation.js +122 -4
package/dist/esm/pipeline/stage4_heal.js +146 -3
package/dist/esm/prompts/heal.js +4 -0
package/dist/esm/qa-agent/phase2/agent_loop.js +60 -24
package/dist/esm/qa-agent/phase2/exploration_state.js +21 -0
package/dist/esm/qa-agent/phase2/tools.js +99 -1
package/dist/esm/qa-agent/phase3/reporter.js +31 -4
package/dist/esm/validation/guardrails.js +1 -0
package/dist/index.d.ts +2 -2
package/dist/index.d.ts.map +1 -1
package/dist/index.js +3 -2
package/dist/pipeline/orchestrator.d.ts.map +1 -1
package/dist/pipeline/spec_verifier.d.ts +20 -0
package/dist/pipeline/spec_verifier.d.ts.map +1 -0
package/dist/pipeline/spec_verifier.js +79 -0
package/dist/pipeline/stage3_generation.d.ts +10 -0
package/dist/pipeline/stage3_generation.d.ts.map +1 -1
package/dist/pipeline/stage3_generation.js +120 -2
package/dist/pipeline/stage4_heal.d.ts +4 -0
package/dist/pipeline/stage4_heal.d.ts.map +1 -1
package/dist/pipeline/stage4_heal.js +145 -2
package/dist/prompts/heal.d.ts +2 -0
package/dist/prompts/heal.d.ts.map +1 -1
package/dist/prompts/heal.js +4 -0
package/dist/qa-agent/phase2/agent_loop.d.ts.map +1 -1
package/dist/qa-agent/phase2/agent_loop.js +60 -24
package/dist/qa-agent/phase2/exploration_state.d.ts.map +1 -1
package/dist/qa-agent/phase2/exploration_state.js +21 -0
package/dist/qa-agent/phase2/tools.d.ts.map +1 -1
package/dist/qa-agent/phase2/tools.js +99 -1
package/dist/qa-agent/phase3/reporter.js +31 -4
package/dist/qa-agent/types.d.ts +9 -1
package/dist/qa-agent/types.d.ts.map +1 -1
package/dist/validation/guardrails.d.ts +2 -0
package/dist/validation/guardrails.d.ts.map +1 -1
package/dist/validation/guardrails.js +4 -1
package/package.json +1 -1

package/dist/agent/feedback.d.ts CHANGED Viewed

@@ -71,5 +71,21 @@ export declare function appendFeedbackAndRecompute(appRoot: string, input: Recom
     calibration: CalibrationSummary;
 };
 export declare function readCalibration(appRoot: string): CalibrationSummary | null;
+export interface AdaptiveThresholds {
+    minConfidenceForTargeted: number;
+    safeMergeMinConfidence: number;
+    /** Subsystems that should always be included regardless of confidence */
+    alwaysIncludeSubsystems: string[];
+    /** Human-readable adjustment reasons for logging */
+    adjustmentReasons: string[];
+}
+/**
+ * Compute adaptive thresholds based on calibration data.
+ * - If recent recall < 0.8: lower minConfidence (catch more escapes)
+ * - If recent precision > 0.9: raise minConfidence (fewer unnecessary tests)
+ * - Per-subsystem: if falseNegativeRate > 0.3 in 30d, always include tests
+ * Returns defaults if no calibration data exists.
+ */
+export declare function getAdaptiveThresholds(appRoot: string): AdaptiveThresholds;
 export declare function readFlakyTests(appRoot: string): FlakySummary | null;
 //# sourceMappingURL=feedback.d.ts.map

package/dist/agent/feedback.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;AAOD,MAAM,WAAW,YAAY;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,EAAE,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,IAAI,GAAG,MAAM,GAAG,QAAQ,CAAC;QAChC,UAAU,EAAE,OAAO,CAAC;QACpB,eAAe,EAAE,MAAM,GAAG,QAAQ,GAAG,kBAAkB,CAAC;QACxD,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACtB,CAAC,CAAC;CACN;AAyQD,wBAAgB,0BAA0B,CACtC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,2BAA2B,GACnC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,kBAAkB,CAAA;CAAC,CAwBlF;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAE1E;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAEnE"}
1	+ {"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;AAOD,MAAM,WAAW,YAAY;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,EAAE,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,IAAI,GAAG,MAAM,GAAG,QAAQ,CAAC;QAChC,UAAU,EAAE,OAAO,CAAC;QACpB,eAAe,EAAE,MAAM,GAAG,QAAQ,GAAG,kBAAkB,CAAC;QACxD,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACtB,CAAC,CAAC;CACN;AAyQD,wBAAgB,0BAA0B,CACtC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,2BAA2B,GACnC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,kBAAkB,CAAA;CAAC,CAwBlF;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAE1E;AAED,MAAM,WAAW,kBAAkB;IAC/B,wBAAwB,EAAE,MAAM,CAAC;IACjC,sBAAsB,EAAE,MAAM,CAAC;IAC/B,yEAAyE;IACzE,uBAAuB,EAAE,MAAM,EAAE,CAAC;IAClC,oDAAoD;IACpD,iBAAiB,EAAE,MAAM,EAAE,CAAC;CAC/B;AAOD;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,CA6DzE;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAEnE"}

package/dist/agent/feedback.js CHANGED Viewed

@@ -4,6 +4,7 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.appendFeedbackAndRecompute = appendFeedbackAndRecompute;
 exports.readCalibration = readCalibration;
+exports.getAdaptiveThresholds = getAdaptiveThresholds;
 exports.readFlakyTests = readFlakyTests;
 const fs_1 = require("fs");
 const path_1 = require("path");
@@ -256,6 +257,67 @@ function appendFeedbackAndRecompute(appRoot, input) {
 function readCalibration(appRoot) {
     return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'calibration.json'));
 }
+const DEFAULT_MIN_CONFIDENCE = 60;
+const DEFAULT_SAFE_MERGE = 85;
+const MIN_CONFIDENCE_FLOOR = 40;
+const MIN_CONFIDENCE_CEILING = 80;
+/**
+ * Compute adaptive thresholds based on calibration data.
+ * - If recent recall < 0.8: lower minConfidence (catch more escapes)
+ * - If recent precision > 0.9: raise minConfidence (fewer unnecessary tests)
+ * - Per-subsystem: if falseNegativeRate > 0.3 in 30d, always include tests
+ * Returns defaults if no calibration data exists.
+ */
+function getAdaptiveThresholds(appRoot) {
+    const calibration = readCalibration(appRoot);
+    const reasons = [];
+    const alwaysInclude = [];
+    if (!calibration || calibration.samples === 0) {
+        return {
+            minConfidenceForTargeted: DEFAULT_MIN_CONFIDENCE,
+            safeMergeMinConfidence: DEFAULT_SAFE_MERGE,
+            alwaysIncludeSubsystems: [],
+            adjustmentReasons: ['No calibration data — using defaults'],
+        };
+    }
+    let minConfidence = DEFAULT_MIN_CONFIDENCE;
+    let safeMerge = DEFAULT_SAFE_MERGE;
+    // Adjust based on 7-day recall
+    if (calibration.recent7d.samples >= 3) {
+        if (calibration.recent7d.recall < 0.8) {
+            const adjustment = 10;
+            minConfidence -= adjustment;
+            safeMerge -= adjustment;
+            reasons.push(`Lowering confidence threshold by ${adjustment} (7d recall: ${calibration.recent7d.recall.toFixed(2)})`);
+        }
+        else if (calibration.recent7d.precision > 0.9) {
+            const adjustment = 5;
+            minConfidence += adjustment;
+            safeMerge += adjustment;
+            reasons.push(`Raising confidence threshold by ${adjustment} (7d precision: ${calibration.recent7d.precision.toFixed(2)})`);
+        }
+    }
+    // Clamp to safe ranges
+    minConfidence = Math.max(MIN_CONFIDENCE_FLOOR, Math.min(MIN_CONFIDENCE_CEILING, minConfidence));
+    safeMerge = Math.max(70, Math.min(95, safeMerge));
+    // Per-subsystem blind spot detection (30-day window)
+    for (const [subsystem, metrics] of Object.entries(calibration.bySubsystem)) {
+        const recent = metrics.recent30d;
+        if (recent.samples >= 3 && recent.falseNegativeRate > 0.3) {
+            alwaysInclude.push(subsystem);
+            reasons.push(`Always including ${subsystem} tests (30d false-negative rate: ${recent.falseNegativeRate.toFixed(2)})`);
+        }
+    }
+    if (reasons.length === 0) {
+        reasons.push('Calibration data within normal range — using defaults');
+    }
+    return {
+        minConfidenceForTargeted: minConfidence,
+        safeMergeMinConfidence: safeMerge,
+        alwaysIncludeSubsystems: alwaysInclude,
+        adjustmentReasons: reasons,
+    };
+}
 function readFlakyTests(appRoot) {
     return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
 }

package/dist/agent/process_runner.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import type { PipelineConfig } from './config.js';
 import type { CommandResult } from './pipeline_types.js';
 export declare function resolvePlaywrightBinary(testsRoot: string): string | null;
 export declare function summarizeCommandOutput(stdout: string, stderr: string): string;
-export declare function runCommand(command: string, args: string[], cwd: string, timeoutMs?: number): CommandResult;
+export declare function runCommand(command: string, args: string[], cwd: string, timeoutMs?: number, envOverride?: NodeJS.ProcessEnv): CommandResult;
 export declare function resolveMcpCommandTimeoutMs(pipeline: PipelineConfig): number;
 export declare function resolveMcpRetries(pipeline: PipelineConfig): number;
 export declare function isRetryableMcpFailure(result: CommandResult): boolean;

package/dist/agent/process_runner.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"process_runner.d.ts","sourceRoot":"","sources":["../../src/agent/process_runner.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,qBAAqB,CAAC;AAEvD,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAUxE;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAO7E;AAED,wBAAgB,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,SAAiB,GAAG,aAAa,~~CAsBlH~~;AAED,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,CAM3E;AAED,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,CAMlE;AAED,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,aAAa,GAAG,OAAO,CAQpE;AAED,wBAAgB,qBAAqB,CACjC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,EAAE,EACd,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,GAChB,aAAa,CAYf"}
1	+ {"version":3,"file":"process_runner.d.ts","sourceRoot":"","sources":["../../src/agent/process_runner.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,qBAAqB,CAAC;AAEvD,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAUxE;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAO7E;AAED,wBAAgB,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,SAAiB,EAAE,WAAW,CAAC,EAAE,MAAM,CAAC,UAAU,GAAG,aAAa,CAsBnJ;AAED,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,CAM3E;AAED,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,CAMlE;AAED,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,aAAa,GAAG,OAAO,CAQpE;AAED,wBAAgB,qBAAqB,CACjC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,EAAE,EACd,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,GAChB,aAAa,CAYf"}

package/dist/agent/process_runner.js CHANGED Viewed

@@ -31,12 +31,12 @@ function summarizeCommandOutput(stdout, stderr) {
     const lines = combined.split('\n').slice(-20);
     return lines.join('\n').slice(0, 2000);
 }
-function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000) {
+function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000, envOverride) {
     // When spawning `claude`, unset CLAUDECODE so nested invocations are allowed.
     // Claude Code sets this variable to block nested sessions; child processes
     // that spawn their own claude instance must run without it.
-    let env;
-    if (command === 'claude') {
+    let env = envOverride;
+    if (!env && command === 'claude') {
         const { CLAUDECODE: _, ...rest } = process.env;
         env = rest;
     }

package/dist/api.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"api.d.ts","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAIA,OAAO,EAAgB,KAAK,eAAe,EAAC,MAAM,mBAAmB,CAAC;AACtE,OAAO,EAEH,KAAK,UAAU,EAClB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAmC,KAAK,YAAY,EAAC,MAAM,2BAA2B,CAAC;~~AAS9F~~,OAAO,EAAqB,KAAK,kBAAkB,EAAC,MAAM,2BAA2B,CAAC;AAEtF,OAAO,EAAyB,KAAK,6BAA6B,EAAE,KAAK,4BAA4B,EAAC,MAAM,oBAAoB,CAAC;AACjI,OAAO,EAEH,KAAK,yBAAyB,EAC9B,KAAK,wBAAwB,EAChC,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAGH,KAAK,yBAAyB,EACjC,MAAM,iCAAiC,CAAC;AAEzC,MAAM,WAAW,eAAgB,SAAQ,IAAI,CAAC,eAAe,EAAE,MAAM,CAAC;IAClE,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED,MAAM,WAAW,4BAA4B;IACzC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,yBAAyB,CAAC;CACvC;AAED,MAAM,WAAW,6BAA6B;IAC1C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAcD,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,6BAA6B,GAAG,4BAA4B,CAE1G;AAED,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,4BAA4B,GAAG,wBAAwB,CASlG;AAED,MAAM,WAAW,sBAAsB;IACnC,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC;CACzB;AAED,wBAAgB,0BAA0B,CAAC,OAAO,GAAE,eAAoB,GAAG,YAAY,CAQtF;AAED,wBAAgB,2BAA2B,CAAC,OAAO,GAAE,eAAoB,GAAG,sBAAsB,~~CAcjG~~;AAED,wBAAsB,gBAAgB,CAAC,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,sBAAsB,GAAG;IAAE,YAAY,CAAC,EAAE,kBAAkB,CAAA;CAAE,CAAC,~~CAgD7I~~;AAED,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,6BAA6B,GAAG,yBAAyB,CAkBrG"}
1	+ {"version":3,"file":"api.d.ts","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAIA,OAAO,EAAgB,KAAK,eAAe,EAAC,MAAM,mBAAmB,CAAC;AACtE,OAAO,EAEH,KAAK,UAAU,EAClB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAmC,KAAK,YAAY,EAAC,MAAM,2BAA2B,CAAC;AAU9F,OAAO,EAAqB,KAAK,kBAAkB,EAAC,MAAM,2BAA2B,CAAC;AAEtF,OAAO,EAAyB,KAAK,6BAA6B,EAAE,KAAK,4BAA4B,EAAC,MAAM,oBAAoB,CAAC;AACjI,OAAO,EAEH,KAAK,yBAAyB,EAC9B,KAAK,wBAAwB,EAChC,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAGH,KAAK,yBAAyB,EACjC,MAAM,iCAAiC,CAAC;AAEzC,MAAM,WAAW,eAAgB,SAAQ,IAAI,CAAC,eAAe,EAAE,MAAM,CAAC;IAClE,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED,MAAM,WAAW,4BAA4B;IACzC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,yBAAyB,CAAC;CACvC;AAED,MAAM,WAAW,6BAA6B;IAC1C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAcD,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,6BAA6B,GAAG,4BAA4B,CAE1G;AAED,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,4BAA4B,GAAG,wBAAwB,CASlG;AAED,MAAM,WAAW,sBAAsB;IACnC,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC;CACzB;AAED,wBAAgB,0BAA0B,CAAC,OAAO,GAAE,eAAoB,GAAG,YAAY,CAQtF;AAED,wBAAgB,2BAA2B,CAAC,OAAO,GAAE,eAAoB,GAAG,sBAAsB,CAejG;AAED,wBAAsB,gBAAgB,CAAC,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,sBAAsB,GAAG;IAAE,YAAY,CAAC,EAAE,kBAAkB,CAAA;CAAE,CAAC,CAiD7I;AAED,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,6BAA6B,GAAG,yBAAyB,CAkBrG"}

package/dist/api.js CHANGED Viewed

@@ -13,6 +13,7 @@ const plan_js_1 = require("./agent/plan.js");
 const impact_engine_js_1 = require("./engine/impact_engine.js");
 const plan_builder_js_1 = require("./engine/plan_builder.js");
 const git_js_1 = require("./agent/git.js");
+const feedback_js_1 = require("./agent/feedback.js");
 const diff_loader_js_1 = require("./engine/diff_loader.js");
 const ai_enrichment_js_1 = require("./engine/ai_enrichment.js");
 const anthropic_provider_js_1 = require("./anthropic_provider.js");
@@ -60,7 +61,8 @@ function recommendTestsDeterministic(options = {}) {
         testsRoot: reportRoot,
         routeFamilies: config.routeFamilies,
     });
-    const plan = (0, plan_builder_js_1.buildPlanFromImpact)(impact, config.policy);
+    const adaptive = (0, feedback_js_1.getAdaptiveThresholds)(reportRoot);
+    const plan = (0, plan_builder_js_1.buildPlanFromImpact)(impact, config.policy, undefined, adaptive);
     const planPath = (0, plan_builder_js_1.writePlanReport)(reportRoot, plan);
     const ciSummaryMarkdown = (0, plan_builder_js_1.renderCiSummaryMarkdown)(plan);
     const ciSummaryPath = (0, plan_builder_js_1.writeCiSummary)(reportRoot, ciSummaryMarkdown);
@@ -106,7 +108,8 @@ async function recommendTestsAI(options = {}) {
             specDetails: [...specDetailsMap.values()],
         });
     }
-    const plan = (0, plan_builder_js_1.buildPlanFromImpact)(impact, config.policy, aiEnrichment);
+    const adaptive = (0, feedback_js_1.getAdaptiveThresholds)(reportRoot);
+    const plan = (0, plan_builder_js_1.buildPlanFromImpact)(impact, config.policy, aiEnrichment, adaptive);
     const planPath = (0, plan_builder_js_1.writePlanReport)(reportRoot, plan);
     const ciSummaryMarkdown = (0, plan_builder_js_1.renderCiSummaryMarkdown)(plan);
     const ciSummaryPath = (0, plan_builder_js_1.writeCiSummary)(reportRoot, ciSummaryMarkdown);

package/dist/engine/plan_builder.d.ts CHANGED Viewed

@@ -1,9 +1,10 @@
 import type { PolicyConfig } from '../agent/config.js';
 import type { ImpactResult } from './impact_engine.js';
 import type { AIEnrichmentResult } from './ai_enrichment.js';
+import type { AdaptiveThresholds } from '../agent/feedback.js';
 import type { PlanReport, GapDetail, CoveredFlowSummary } from '../agent/plan.js';
 export type { PlanReport, GapDetail, CoveredFlowSummary };
-export declare function buildPlanFromImpact(impact: ImpactResult, policyOverride?: Partial<PolicyConfig>, aiEnrichment?: AIEnrichmentResult): PlanReport;
+export declare function buildPlanFromImpact(impact: ImpactResult, policyOverride?: Partial<PolicyConfig>, aiEnrichment?: AIEnrichmentResult, adaptiveThresholds?: AdaptiveThresholds): PlanReport;
 export declare function writePlanReport(appRoot: string, plan: PlanReport): string;
 export declare function renderCiSummaryMarkdown(plan: PlanReport): string;
 export declare function writeCiSummary(appRoot: string, markdown: string, relativePath?: string): string;

package/dist/engine/plan_builder.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"plan_builder.d.ts","sourceRoot":"","sources":["../../src/engine/plan_builder.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,oBAAoB,CAAC;~~AACrD~~,OAAO,KAAK,EAAC,YAAY,EAAkB,MAAM,oBAAoB,CAAC;AAEtE,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,oBAAoB,CAAC;~~AAG3D~~,OAAO,KAAK,EACR,UAAU,EACV,SAAS,EACT,kBAAkB,EAIrB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EAAC,UAAU,EAAE,SAAS,EAAE,kBAAkB,EAAC,CAAC;~~AAiPxD~~,wBAAgB,mBAAmB,CAC/B,MAAM,EAAE,YAAY,EACpB,cAAc,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,EACtC,YAAY,CAAC,EAAE,kBAAkB,~~GAClC~~,UAAU,~~CA2IZ~~;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,MAAM,CAMzE;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAwHhE;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,SAAiC,GAAG,MAAM,CAMvH"}
1	+ {"version":3,"file":"plan_builder.d.ts","sourceRoot":"","sources":["../../src/engine/plan_builder.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,oBAAoB,CAAC;AAErD,OAAO,KAAK,EAAC,YAAY,EAAkB,MAAM,oBAAoB,CAAC;AAEtE,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,oBAAoB,CAAC;AAC3D,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,sBAAsB,CAAC;AAG7D,OAAO,KAAK,EACR,UAAU,EACV,SAAS,EACT,kBAAkB,EAIrB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EAAC,UAAU,EAAE,SAAS,EAAE,kBAAkB,EAAC,CAAC;AAoPxD,wBAAgB,mBAAmB,CAC/B,MAAM,EAAE,YAAY,EACpB,cAAc,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,EACtC,YAAY,CAAC,EAAE,kBAAkB,EACjC,kBAAkB,CAAC,EAAE,kBAAkB,GACxC,UAAU,CAsJZ;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,MAAM,CAMzE;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAwHhE;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,SAAiC,GAAG,MAAM,CAMvH"}

package/dist/engine/plan_builder.js CHANGED Viewed

@@ -9,6 +9,7 @@ exports.writeCiSummary = writeCiSummary;
 const fs_1 = require("fs");
 const path_1 = require("path");
 const minimatch_1 = require("minimatch");
+const test_path_js_1 = require("../agent/test_path.js");
 const impact_engine_js_1 = require("./impact_engine.js");
 const DEFAULT_POLICY = {
     minConfidenceForTargeted: 60,
@@ -196,22 +197,34 @@ function evaluateEnforcement(decision, policy) {
 }
 /**
  * Build recommended test list from impacted features' Playwright specs.
+ * When alwaysIncludeSubsystems is provided, specs from those subsystems are
+ * included regardless of their coverage status (blind-spot protection).
  */
-function buildRecommendedTests(impact) {
-    const tests = [];
+function buildRecommendedTests(impact, alwaysIncludeSubsystems = []) {
+    const tests = new Set();
+    const alwaysSet = new Set(alwaysIncludeSubsystems);
     for (const feature of impact.impactedFeatures) {
-        if (feature.coverageStatus !== 'uncovered') {
+        const shouldInclude = feature.coverageStatus !== 'uncovered' ||
+            feature.playwrightSpecs.some((spec) => alwaysSet.has((0, test_path_js_1.inferSubsystemFromTestPath)(spec)));
+        if (shouldInclude) {
             for (const spec of feature.playwrightSpecs) {
-                if (!tests.includes(spec)) {
-                    tests.push(spec);
-                }
+                tests.add(spec);
             }
         }
     }
-    return tests;
+    return [...tests];
 }
-function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
+function buildPlanFromImpact(impact, policyOverride, aiEnrichment, adaptiveThresholds) {
     const policy = { ...DEFAULT_POLICY, ...(policyOverride || {}) };
+    // Apply adaptive calibration overrides (if available and not explicitly overridden)
+    if (adaptiveThresholds && policyOverride?.minConfidenceForTargeted === undefined) {
+        policy.minConfidenceForTargeted = adaptiveThresholds.minConfidenceForTargeted;
+    }
+    if (adaptiveThresholds && policyOverride?.safeMergeMinConfidence === undefined) {
+        policy.safeMergeMinConfidence = adaptiveThresholds.safeMergeMinConfidence;
+    }
+    // Apply alwaysIncludeSubsystems: force their tests into the recommended set
+    const alwaysIncludeSubsystems = adaptiveThresholds?.alwaysIncludeSubsystems ?? [];
     const confidence = computeConfidence(impact);
     const runSetResult = pickRunSet(impact, confidence, policy);
     const decision = buildDecision(impact, runSetResult.runSet, confidence, policy);
@@ -294,7 +307,7 @@ function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
             advisoryScenarios,
         };
     });
-    const recommendedTests = buildRecommendedTests(impact);
+    const recommendedTests = buildRecommendedTests(impact, alwaysIncludeSubsystems);
     const requiredNewTests = gaps.map((f) => `${featureLabel(f)}: Add E2E tests`);
     const p0 = impact.impactedFeatures.filter((f) => f.priority === 'P0').length;
     const p1 = impact.impactedFeatures.filter((f) => f.priority === 'P1').length;

package/dist/esm/agent/feedback.js CHANGED Viewed

@@ -251,6 +251,67 @@ export function appendFeedbackAndRecompute(appRoot, input) {
 export function readCalibration(appRoot) {
     return readJson(join(appRoot, '.e2e-ai-agents', 'calibration.json'));
 }
+const DEFAULT_MIN_CONFIDENCE = 60;
+const DEFAULT_SAFE_MERGE = 85;
+const MIN_CONFIDENCE_FLOOR = 40;
+const MIN_CONFIDENCE_CEILING = 80;
+/**
+ * Compute adaptive thresholds based on calibration data.
+ * - If recent recall < 0.8: lower minConfidence (catch more escapes)
+ * - If recent precision > 0.9: raise minConfidence (fewer unnecessary tests)
+ * - Per-subsystem: if falseNegativeRate > 0.3 in 30d, always include tests
+ * Returns defaults if no calibration data exists.
+ */
+export function getAdaptiveThresholds(appRoot) {
+    const calibration = readCalibration(appRoot);
+    const reasons = [];
+    const alwaysInclude = [];
+    if (!calibration || calibration.samples === 0) {
+        return {
+            minConfidenceForTargeted: DEFAULT_MIN_CONFIDENCE,
+            safeMergeMinConfidence: DEFAULT_SAFE_MERGE,
+            alwaysIncludeSubsystems: [],
+            adjustmentReasons: ['No calibration data — using defaults'],
+        };
+    }
+    let minConfidence = DEFAULT_MIN_CONFIDENCE;
+    let safeMerge = DEFAULT_SAFE_MERGE;
+    // Adjust based on 7-day recall
+    if (calibration.recent7d.samples >= 3) {
+        if (calibration.recent7d.recall < 0.8) {
+            const adjustment = 10;
+            minConfidence -= adjustment;
+            safeMerge -= adjustment;
+            reasons.push(`Lowering confidence threshold by ${adjustment} (7d recall: ${calibration.recent7d.recall.toFixed(2)})`);
+        }
+        else if (calibration.recent7d.precision > 0.9) {
+            const adjustment = 5;
+            minConfidence += adjustment;
+            safeMerge += adjustment;
+            reasons.push(`Raising confidence threshold by ${adjustment} (7d precision: ${calibration.recent7d.precision.toFixed(2)})`);
+        }
+    }
+    // Clamp to safe ranges
+    minConfidence = Math.max(MIN_CONFIDENCE_FLOOR, Math.min(MIN_CONFIDENCE_CEILING, minConfidence));
+    safeMerge = Math.max(70, Math.min(95, safeMerge));
+    // Per-subsystem blind spot detection (30-day window)
+    for (const [subsystem, metrics] of Object.entries(calibration.bySubsystem)) {
+        const recent = metrics.recent30d;
+        if (recent.samples >= 3 && recent.falseNegativeRate > 0.3) {
+            alwaysInclude.push(subsystem);
+            reasons.push(`Always including ${subsystem} tests (30d false-negative rate: ${recent.falseNegativeRate.toFixed(2)})`);
+        }
+    }
+    if (reasons.length === 0) {
+        reasons.push('Calibration data within normal range — using defaults');
+    }
+    return {
+        minConfidenceForTargeted: minConfidence,
+        safeMergeMinConfidence: safeMerge,
+        alwaysIncludeSubsystems: alwaysInclude,
+        adjustmentReasons: reasons,
+    };
+}
 export function readFlakyTests(appRoot) {
     return readJson(join(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
 }

package/dist/esm/agent/process_runner.js CHANGED Viewed

@@ -22,12 +22,12 @@ export function summarizeCommandOutput(stdout, stderr) {
     const lines = combined.split('\n').slice(-20);
     return lines.join('\n').slice(0, 2000);
 }
-export function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000) {
+export function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000, envOverride) {
     // When spawning `claude`, unset CLAUDECODE so nested invocations are allowed.
     // Claude Code sets this variable to block nested sessions; child processes
     // that spawn their own claude instance must run without it.
-    let env;
-    if (command === 'claude') {
+    let env = envOverride;
+    if (!env && command === 'claude') {
         const { CLAUDECODE: _, ...rest } = process.env;
         env = rest;
     }

package/dist/esm/api.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { appendPlanMetrics, } from './agent/plan.js';
 import { analyzeImpact as analyzeImpactV2 } from './engine/impact_engine.js';
 import { buildPlanFromImpact, renderCiSummaryMarkdown, writeCiSummary, writePlanReport, } from './engine/plan_builder.js';
 import { getChangedFiles } from './agent/git.js';
+import { getAdaptiveThresholds } from './agent/feedback.js';
 import { loadDiffs } from './engine/diff_loader.js';
 import { enrichImpactWithAI } from './engine/ai_enrichment.js';
 import { AnthropicProvider } from './anthropic_provider.js';
@@ -52,7 +53,8 @@ export function recommendTestsDeterministic(options = {}) {
         testsRoot: reportRoot,
         routeFamilies: config.routeFamilies,
     });
-    const plan = buildPlanFromImpact(impact, config.policy);
+    const adaptive = getAdaptiveThresholds(reportRoot);
+    const plan = buildPlanFromImpact(impact, config.policy, undefined, adaptive);
     const planPath = writePlanReport(reportRoot, plan);
     const ciSummaryMarkdown = renderCiSummaryMarkdown(plan);
     const ciSummaryPath = writeCiSummary(reportRoot, ciSummaryMarkdown);
@@ -98,7 +100,8 @@ export async function recommendTestsAI(options = {}) {
             specDetails: [...specDetailsMap.values()],
         });
     }
-    const plan = buildPlanFromImpact(impact, config.policy, aiEnrichment);
+    const adaptive = getAdaptiveThresholds(reportRoot);
+    const plan = buildPlanFromImpact(impact, config.policy, aiEnrichment, adaptive);
     const planPath = writePlanReport(reportRoot, plan);
     const ciSummaryMarkdown = renderCiSummaryMarkdown(plan);
     const ciSummaryPath = writeCiSummary(reportRoot, ciSummaryMarkdown);

package/dist/esm/engine/plan_builder.js CHANGED Viewed

@@ -3,6 +3,7 @@
 import { mkdirSync, writeFileSync } from 'fs';
 import { dirname, join } from 'path';
 import { minimatch } from 'minimatch';
+import { inferSubsystemFromTestPath } from '../agent/test_path.js';
 import { getGaps, getPartialGaps } from './impact_engine.js';
 const DEFAULT_POLICY = {
     minConfidenceForTargeted: 60,
@@ -190,22 +191,34 @@ function evaluateEnforcement(decision, policy) {
 }
 /**
  * Build recommended test list from impacted features' Playwright specs.
+ * When alwaysIncludeSubsystems is provided, specs from those subsystems are
+ * included regardless of their coverage status (blind-spot protection).
  */
-function buildRecommendedTests(impact) {
-    const tests = [];
+function buildRecommendedTests(impact, alwaysIncludeSubsystems = []) {
+    const tests = new Set();
+    const alwaysSet = new Set(alwaysIncludeSubsystems);
     for (const feature of impact.impactedFeatures) {
-        if (feature.coverageStatus !== 'uncovered') {
+        const shouldInclude = feature.coverageStatus !== 'uncovered' ||
+            feature.playwrightSpecs.some((spec) => alwaysSet.has(inferSubsystemFromTestPath(spec)));
+        if (shouldInclude) {
             for (const spec of feature.playwrightSpecs) {
-                if (!tests.includes(spec)) {
-                    tests.push(spec);
-                }
+                tests.add(spec);
             }
         }
     }
-    return tests;
+    return [...tests];
 }
-export function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
+export function buildPlanFromImpact(impact, policyOverride, aiEnrichment, adaptiveThresholds) {
     const policy = { ...DEFAULT_POLICY, ...(policyOverride || {}) };
+    // Apply adaptive calibration overrides (if available and not explicitly overridden)
+    if (adaptiveThresholds && policyOverride?.minConfidenceForTargeted === undefined) {
+        policy.minConfidenceForTargeted = adaptiveThresholds.minConfidenceForTargeted;
+    }
+    if (adaptiveThresholds && policyOverride?.safeMergeMinConfidence === undefined) {
+        policy.safeMergeMinConfidence = adaptiveThresholds.safeMergeMinConfidence;
+    }
+    // Apply alwaysIncludeSubsystems: force their tests into the recommended set
+    const alwaysIncludeSubsystems = adaptiveThresholds?.alwaysIncludeSubsystems ?? [];
     const confidence = computeConfidence(impact);
     const runSetResult = pickRunSet(impact, confidence, policy);
     const decision = buildDecision(impact, runSetResult.runSet, confidence, policy);
@@ -288,7 +301,7 @@ export function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
             advisoryScenarios,
         };
     });
-    const recommendedTests = buildRecommendedTests(impact);
+    const recommendedTests = buildRecommendedTests(impact, alwaysIncludeSubsystems);
     const requiredNewTests = gaps.map((f) => `${featureLabel(f)}: Add E2E tests`);
     const p0 = impact.impactedFeatures.filter((f) => f.priority === 'P0').length;
     const p1 = impact.impactedFeatures.filter((f) => f.priority === 'P1').length;

package/dist/esm/index.js CHANGED Viewed

@@ -14,7 +14,7 @@ export { analyzeImpactDeterministic, recommendTestsDeterministic, handoffGenerat
 export { analyzeImpact as analyzeImpactV2, getGaps, getPartialGaps } from './engine/impact_engine.js';
 export { extractScenarios } from './engine/impact_engine.js';
 export { buildPlanFromImpact } from './engine/plan_builder.js';
-export { appendFeedbackAndRecompute, readCalibration, readFlakyTests } from './agent/feedback.js';
+export { appendFeedbackAndRecompute, readCalibration, readFlakyTests, getAdaptiveThresholds } from './agent/feedback.js';
 export { finalizeGeneratedTests } from './agent/handoff.js';
 export { ingestTraceabilityInput } from './agent/traceability_ingest.js';
 export { captureTraceabilityInput } from './agent/traceability_capture.js';

package/dist/esm/pipeline/spec_verifier.js ADDED Viewed

@@ -0,0 +1,75 @@
+// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
+// See LICENSE.txt for license information.
+import { resolve } from 'path';
+import { runCommand } from '../agent/process_runner.js';
+/** Env var prefixes/names stripped when running LLM-generated specs */
+const SENSITIVE_ENV_PREFIXES = [
+    'AWS_', 'AZURE_', 'GCP_', 'GOOGLE_', 'ANTHROPIC_', 'OPENAI_',
+    'GITHUB_TOKEN', 'NPM_TOKEN', 'SSH_', 'SECRET_', 'PRIVATE_',
+    'DATABASE_URL', 'DB_', 'REDIS_', 'POSTGRES_', 'MYSQL_', 'MONGO_',
+    'API_KEY', 'API_SECRET', 'AUTH_', 'JWT_', 'STRIPE_', 'TWILIO_',
+    'SENDGRID_', 'SLACK_TOKEN', 'SLACK_BOT', 'MATTERMOST_',
+];
+/**
+ * Build a restricted environment for running LLM-generated spec files.
+ * Strips credentials and secrets to limit damage from malicious generated code.
+ */
+function buildRestrictedEnv() {
+    const env = {};
+    for (const [key, value] of Object.entries(process.env)) {
+        const isSensitive = SENSITIVE_ENV_PREFIXES.some((prefix) => key.startsWith(prefix));
+        if (!isSensitive) {
+            env[key] = value;
+        }
+    }
+    return env;
+}
+/**
+ * Validate and normalize a spec path to prevent argument injection.
+ * Rejects raw input that starts with '-' (could be interpreted as flags by tsc/playwright).
+ */
+function sanitizeSpecPath(specPath) {
+    if (specPath.startsWith('-')) {
+        throw new Error(`Invalid spec path: "${specPath}" — path must not start with a dash`);
+    }
+    return resolve(specPath);
+}
+/**
+ * Compile-check a generated spec file using tsc --noEmit.
+ * Returns success: true if compilation succeeds, or errors array on failure.
+ */
+export function compileCheckSpec(specPath, testsRoot) {
+    const safeSpecPath = sanitizeSpecPath(specPath);
+    const result = runCommand('npx', ['tsc', '--noEmit', '--esModuleInterop', '--resolveJsonModule', '--moduleResolution', 'node', '--target', 'ES2020', safeSpecPath], testsRoot, 30000, buildRestrictedEnv());
+    if (result.status === 0) {
+        return { success: true, errors: [] };
+    }
+    const output = [result.stdout, result.stderr].filter(Boolean).join('\n');
+    const errorLines = output.split('\n')
+        .filter((l) => l.includes('error TS') || l.includes('Error:'))
+        .slice(0, 10);
+    return {
+        success: false,
+        errors: errorLines.length > 0 ? errorLines : [output.slice(0, 500) || 'Compilation failed'],
+    };
+}
+/**
+ * Smoke-run a generated spec against a running app.
+ * Runs in a restricted environment with sensitive env vars stripped.
+ * Returns success: true if the test passes with retries.
+ */
+export function smokeRunSpec(specPath, testsRoot, playwrightBinary) {
+    const safeSpecPath = sanitizeSpecPath(specPath);
+    const result = runCommand(playwrightBinary, ['test', safeSpecPath, '--retries', '2', '--reporter', 'list'], testsRoot, 120000, buildRestrictedEnv());
+    if (result.status === 0) {
+        return { success: true };
+    }
+    const output = [result.stdout, result.stderr].filter(Boolean).join('\n');
+    const errorLines = output.split('\n')
+        .filter((l) => l.includes('Error') || l.includes('FAILED') || l.includes('Timeout'))
+        .slice(0, 5);
+    return {
+        success: false,
+        error: errorLines.join('\n') || result.error || 'Smoke run failed',
+    };
+}