@yasserkhanorg/e2e-agents 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/agent/feedback.d.ts +16 -0
  2. package/dist/agent/feedback.d.ts.map +1 -1
  3. package/dist/agent/feedback.js +62 -0
  4. package/dist/agent/process_runner.d.ts +1 -1
  5. package/dist/agent/process_runner.d.ts.map +1 -1
  6. package/dist/agent/process_runner.js +3 -3
  7. package/dist/api.d.ts.map +1 -1
  8. package/dist/api.js +5 -2
  9. package/dist/engine/plan_builder.d.ts +2 -1
  10. package/dist/engine/plan_builder.d.ts.map +1 -1
  11. package/dist/engine/plan_builder.js +22 -9
  12. package/dist/esm/agent/feedback.js +61 -0
  13. package/dist/esm/agent/process_runner.js +3 -3
  14. package/dist/esm/api.js +5 -2
  15. package/dist/esm/engine/plan_builder.js +22 -9
  16. package/dist/esm/index.js +1 -1
  17. package/dist/esm/pipeline/spec_verifier.js +75 -0
  18. package/dist/esm/pipeline/stage3_generation.js +122 -4
  19. package/dist/esm/pipeline/stage4_heal.js +146 -3
  20. package/dist/esm/prompts/heal.js +4 -0
  21. package/dist/esm/qa-agent/phase2/agent_loop.js +60 -24
  22. package/dist/esm/qa-agent/phase2/exploration_state.js +21 -0
  23. package/dist/esm/qa-agent/phase2/tools.js +99 -1
  24. package/dist/esm/qa-agent/phase3/reporter.js +31 -4
  25. package/dist/esm/validation/guardrails.js +1 -0
  26. package/dist/index.d.ts +2 -2
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +3 -2
  29. package/dist/pipeline/orchestrator.d.ts.map +1 -1
  30. package/dist/pipeline/spec_verifier.d.ts +20 -0
  31. package/dist/pipeline/spec_verifier.d.ts.map +1 -0
  32. package/dist/pipeline/spec_verifier.js +79 -0
  33. package/dist/pipeline/stage3_generation.d.ts +10 -0
  34. package/dist/pipeline/stage3_generation.d.ts.map +1 -1
  35. package/dist/pipeline/stage3_generation.js +120 -2
  36. package/dist/pipeline/stage4_heal.d.ts +4 -0
  37. package/dist/pipeline/stage4_heal.d.ts.map +1 -1
  38. package/dist/pipeline/stage4_heal.js +145 -2
  39. package/dist/prompts/heal.d.ts +2 -0
  40. package/dist/prompts/heal.d.ts.map +1 -1
  41. package/dist/prompts/heal.js +4 -0
  42. package/dist/qa-agent/phase2/agent_loop.d.ts.map +1 -1
  43. package/dist/qa-agent/phase2/agent_loop.js +60 -24
  44. package/dist/qa-agent/phase2/exploration_state.d.ts.map +1 -1
  45. package/dist/qa-agent/phase2/exploration_state.js +21 -0
  46. package/dist/qa-agent/phase2/tools.d.ts.map +1 -1
  47. package/dist/qa-agent/phase2/tools.js +99 -1
  48. package/dist/qa-agent/phase3/reporter.js +31 -4
  49. package/dist/qa-agent/types.d.ts +9 -1
  50. package/dist/qa-agent/types.d.ts.map +1 -1
  51. package/dist/validation/guardrails.d.ts +2 -0
  52. package/dist/validation/guardrails.d.ts.map +1 -1
  53. package/dist/validation/guardrails.js +4 -1
  54. package/package.json +1 -1
@@ -71,5 +71,21 @@ export declare function appendFeedbackAndRecompute(appRoot: string, input: Recom
71
71
  calibration: CalibrationSummary;
72
72
  };
73
73
  export declare function readCalibration(appRoot: string): CalibrationSummary | null;
74
+ export interface AdaptiveThresholds {
75
+ minConfidenceForTargeted: number;
76
+ safeMergeMinConfidence: number;
77
+ /** Subsystems that should always be included regardless of confidence */
78
+ alwaysIncludeSubsystems: string[];
79
+ /** Human-readable adjustment reasons for logging */
80
+ adjustmentReasons: string[];
81
+ }
82
+ /**
83
+ * Compute adaptive thresholds based on calibration data.
84
+ * - If recent recall < 0.8: lower minConfidence (catch more escapes)
85
+ * - If recent precision > 0.9: raise minConfidence (fewer unnecessary tests)
86
+ * - Per-subsystem: if falseNegativeRate > 0.3 in 30d, always include tests
87
+ * Returns defaults if no calibration data exists.
88
+ */
89
+ export declare function getAdaptiveThresholds(appRoot: string): AdaptiveThresholds;
74
90
  export declare function readFlakyTests(appRoot: string): FlakySummary | null;
75
91
  //# sourceMappingURL=feedback.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;AAOD,MAAM,WAAW,YAAY;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,EAAE,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,IAAI,GAAG,MAAM,GAAG,QAAQ,CAAC;QAChC,UAAU,EAAE,OAAO,CAAC;QACpB,eAAe,EAAE,MAAM,GAAG,QAAQ,GAAG,kBAAkB,CAAC;QACxD,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACtB,CAAC,CAAC;CACN;AAyQD,wBAAgB,0BAA0B,CACtC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,2BAA2B,GACnC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,kBAAkB,CAAA;CAAC,CAwBlF;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAE1E;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAEnE"}
1
+ {"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;AAOD,MAAM,WAAW,YAAY;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,EAAE,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,IAAI,GAAG,MAAM,GAAG,QAAQ,CAAC;QAChC,UAAU,EAAE,OAAO,CAAC;QACpB,eAAe,EAAE,MAAM,GAAG,QAAQ,GAAG,kBAAkB,CAAC;QACxD,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACtB,CAAC,CAAC;CACN;AAyQD,wBAAgB,0BAA0B,CACtC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,2BAA2B,GACnC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,kBAAkB,CAAA;CAAC,CAwBlF;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAE1E;AAED,MAAM,WAAW,kBAAkB;IAC/B,wBAAwB,EAAE,MAAM,CAAC;IACjC,sBAAsB,EAAE,MAAM,CAAC;IAC/B,yEAAyE;IACzE,uBAAuB,EAAE,MAAM,EAAE,CAAC;IAClC,oDAAoD;IACpD,iBAAiB,EAAE,MAAM,EAAE,CAAC;CAC/B;AAOD;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,CA6DzE;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAEnE"}
@@ -4,6 +4,7 @@
4
4
  Object.defineProperty(exports, "__esModule", { value: true });
5
5
  exports.appendFeedbackAndRecompute = appendFeedbackAndRecompute;
6
6
  exports.readCalibration = readCalibration;
7
+ exports.getAdaptiveThresholds = getAdaptiveThresholds;
7
8
  exports.readFlakyTests = readFlakyTests;
8
9
  const fs_1 = require("fs");
9
10
  const path_1 = require("path");
@@ -256,6 +257,67 @@ function appendFeedbackAndRecompute(appRoot, input) {
256
257
  function readCalibration(appRoot) {
257
258
  return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'calibration.json'));
258
259
  }
260
+ const DEFAULT_MIN_CONFIDENCE = 60;
261
+ const DEFAULT_SAFE_MERGE = 85;
262
+ const MIN_CONFIDENCE_FLOOR = 40;
263
+ const MIN_CONFIDENCE_CEILING = 80;
264
+ /**
265
+ * Compute adaptive thresholds based on calibration data.
266
+ * - If recent recall < 0.8: lower minConfidence (catch more escapes)
267
+ * - If recent precision > 0.9: raise minConfidence (fewer unnecessary tests)
268
+ * - Per-subsystem: if falseNegativeRate > 0.3 in 30d, always include tests
269
+ * Returns defaults if no calibration data exists.
270
+ */
271
+ function getAdaptiveThresholds(appRoot) {
272
+ const calibration = readCalibration(appRoot);
273
+ const reasons = [];
274
+ const alwaysInclude = [];
275
+ if (!calibration || calibration.samples === 0) {
276
+ return {
277
+ minConfidenceForTargeted: DEFAULT_MIN_CONFIDENCE,
278
+ safeMergeMinConfidence: DEFAULT_SAFE_MERGE,
279
+ alwaysIncludeSubsystems: [],
280
+ adjustmentReasons: ['No calibration data — using defaults'],
281
+ };
282
+ }
283
+ let minConfidence = DEFAULT_MIN_CONFIDENCE;
284
+ let safeMerge = DEFAULT_SAFE_MERGE;
285
+ // Adjust based on 7-day recall
286
+ if (calibration.recent7d.samples >= 3) {
287
+ if (calibration.recent7d.recall < 0.8) {
288
+ const adjustment = 10;
289
+ minConfidence -= adjustment;
290
+ safeMerge -= adjustment;
291
+ reasons.push(`Lowering confidence threshold by ${adjustment} (7d recall: ${calibration.recent7d.recall.toFixed(2)})`);
292
+ }
293
+ else if (calibration.recent7d.precision > 0.9) {
294
+ const adjustment = 5;
295
+ minConfidence += adjustment;
296
+ safeMerge += adjustment;
297
+ reasons.push(`Raising confidence threshold by ${adjustment} (7d precision: ${calibration.recent7d.precision.toFixed(2)})`);
298
+ }
299
+ }
300
+ // Clamp to safe ranges
301
+ minConfidence = Math.max(MIN_CONFIDENCE_FLOOR, Math.min(MIN_CONFIDENCE_CEILING, minConfidence));
302
+ safeMerge = Math.max(70, Math.min(95, safeMerge));
303
+ // Per-subsystem blind spot detection (30-day window)
304
+ for (const [subsystem, metrics] of Object.entries(calibration.bySubsystem)) {
305
+ const recent = metrics.recent30d;
306
+ if (recent.samples >= 3 && recent.falseNegativeRate > 0.3) {
307
+ alwaysInclude.push(subsystem);
308
+ reasons.push(`Always including ${subsystem} tests (30d false-negative rate: ${recent.falseNegativeRate.toFixed(2)})`);
309
+ }
310
+ }
311
+ if (reasons.length === 0) {
312
+ reasons.push('Calibration data within normal range — using defaults');
313
+ }
314
+ return {
315
+ minConfidenceForTargeted: minConfidence,
316
+ safeMergeMinConfidence: safeMerge,
317
+ alwaysIncludeSubsystems: alwaysInclude,
318
+ adjustmentReasons: reasons,
319
+ };
320
+ }
259
321
  function readFlakyTests(appRoot) {
260
322
  return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
261
323
  }
@@ -2,7 +2,7 @@ import type { PipelineConfig } from './config.js';
2
2
  import type { CommandResult } from './pipeline_types.js';
3
3
  export declare function resolvePlaywrightBinary(testsRoot: string): string | null;
4
4
  export declare function summarizeCommandOutput(stdout: string, stderr: string): string;
5
- export declare function runCommand(command: string, args: string[], cwd: string, timeoutMs?: number): CommandResult;
5
+ export declare function runCommand(command: string, args: string[], cwd: string, timeoutMs?: number, envOverride?: NodeJS.ProcessEnv): CommandResult;
6
6
  export declare function resolveMcpCommandTimeoutMs(pipeline: PipelineConfig): number;
7
7
  export declare function resolveMcpRetries(pipeline: PipelineConfig): number;
8
8
  export declare function isRetryableMcpFailure(result: CommandResult): boolean;
@@ -1 +1 @@
1
- {"version":3,"file":"process_runner.d.ts","sourceRoot":"","sources":["../../src/agent/process_runner.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,qBAAqB,CAAC;AAEvD,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAUxE;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAO7E;AAED,wBAAgB,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,SAAiB,GAAG,aAAa,CAsBlH;AAED,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,CAM3E;AAED,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,CAMlE;AAED,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,aAAa,GAAG,OAAO,CAQpE;AAED,wBAAgB,qBAAqB,CACjC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,EAAE,EACd,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,GAChB,aAAa,CAYf"}
1
+ {"version":3,"file":"process_runner.d.ts","sourceRoot":"","sources":["../../src/agent/process_runner.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,qBAAqB,CAAC;AAEvD,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAUxE;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAO7E;AAED,wBAAgB,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,SAAiB,EAAE,WAAW,CAAC,EAAE,MAAM,CAAC,UAAU,GAAG,aAAa,CAsBnJ;AAED,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,CAM3E;AAED,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,CAMlE;AAED,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,aAAa,GAAG,OAAO,CAQpE;AAED,wBAAgB,qBAAqB,CACjC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,EAAE,EACd,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,GAChB,aAAa,CAYf"}
@@ -31,12 +31,12 @@ function summarizeCommandOutput(stdout, stderr) {
31
31
  const lines = combined.split('\n').slice(-20);
32
32
  return lines.join('\n').slice(0, 2000);
33
33
  }
34
- function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000) {
34
+ function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000, envOverride) {
35
35
  // When spawning `claude`, unset CLAUDECODE so nested invocations are allowed.
36
36
  // Claude Code sets this variable to block nested sessions; child processes
37
37
  // that spawn their own claude instance must run without it.
38
- let env;
39
- if (command === 'claude') {
38
+ let env = envOverride;
39
+ if (!env && command === 'claude') {
40
40
  const { CLAUDECODE: _, ...rest } = process.env;
41
41
  env = rest;
42
42
  }
package/dist/api.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"api.d.ts","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAIA,OAAO,EAAgB,KAAK,eAAe,EAAC,MAAM,mBAAmB,CAAC;AACtE,OAAO,EAEH,KAAK,UAAU,EAClB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAmC,KAAK,YAAY,EAAC,MAAM,2BAA2B,CAAC;AAS9F,OAAO,EAAqB,KAAK,kBAAkB,EAAC,MAAM,2BAA2B,CAAC;AAEtF,OAAO,EAAyB,KAAK,6BAA6B,EAAE,KAAK,4BAA4B,EAAC,MAAM,oBAAoB,CAAC;AACjI,OAAO,EAEH,KAAK,yBAAyB,EAC9B,KAAK,wBAAwB,EAChC,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAGH,KAAK,yBAAyB,EACjC,MAAM,iCAAiC,CAAC;AAEzC,MAAM,WAAW,eAAgB,SAAQ,IAAI,CAAC,eAAe,EAAE,MAAM,CAAC;IAClE,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED,MAAM,WAAW,4BAA4B;IACzC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,yBAAyB,CAAC;CACvC;AAED,MAAM,WAAW,6BAA6B;IAC1C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAcD,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,6BAA6B,GAAG,4BAA4B,CAE1G;AAED,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,4BAA4B,GAAG,wBAAwB,CASlG;AAED,MAAM,WAAW,sBAAsB;IACnC,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC;CACzB;AAED,wBAAgB,0BAA0B,CAAC,OAAO,GAAE,eAAoB,GAAG,YAAY,CAQtF;AAED,wBAAgB,2BAA2B,CAAC,OAAO,GAAE,eAAoB,GAAG,sBAAsB,CAcjG;AAED,wBAAsB,gBAAgB,CAAC,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,sBAAsB,GAAG;IAAE,YAAY,CAAC,EAAE,kBAAkB,CAAA;CAAE,CAAC,CAgD7I;AAED,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,6BAA6B,GAAG,yBAAyB,CAkBrG"}
1
+ {"version":3,"file":"api.d.ts","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAIA,OAAO,EAAgB,KAAK,eAAe,EAAC,MAAM,mBAAmB,CAAC;AACtE,OAAO,EAEH,KAAK,UAAU,EAClB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAmC,KAAK,YAAY,EAAC,MAAM,2BAA2B,CAAC;AAU9F,OAAO,EAAqB,KAAK,kBAAkB,EAAC,MAAM,2BAA2B,CAAC;AAEtF,OAAO,EAAyB,KAAK,6BAA6B,EAAE,KAAK,4BAA4B,EAAC,MAAM,oBAAoB,CAAC;AACjI,OAAO,EAEH,KAAK,yBAAyB,EAC9B,KAAK,wBAAwB,EAChC,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAGH,KAAK,yBAAyB,EACjC,MAAM,iCAAiC,CAAC;AAEzC,MAAM,WAAW,eAAgB,SAAQ,IAAI,CAAC,eAAe,EAAE,MAAM,CAAC;IAClE,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED,MAAM,WAAW,4BAA4B;IACzC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,yBAAyB,CAAC;CACvC;AAED,MAAM,WAAW,6BAA6B;IAC1C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAcD,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,6BAA6B,GAAG,4BAA4B,CAE1G;AAED,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,4BAA4B,GAAG,wBAAwB,CASlG;AAED,MAAM,WAAW,sBAAsB;IACnC,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC;CACzB;AAED,wBAAgB,0BAA0B,CAAC,OAAO,GAAE,eAAoB,GAAG,YAAY,CAQtF;AAED,wBAAgB,2BAA2B,CAAC,OAAO,GAAE,eAAoB,GAAG,sBAAsB,CAejG;AAED,wBAAsB,gBAAgB,CAAC,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,sBAAsB,GAAG;IAAE,YAAY,CAAC,EAAE,kBAAkB,CAAA;CAAE,CAAC,CAiD7I;AAED,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,6BAA6B,GAAG,yBAAyB,CAkBrG"}
package/dist/api.js CHANGED
@@ -13,6 +13,7 @@ const plan_js_1 = require("./agent/plan.js");
13
13
  const impact_engine_js_1 = require("./engine/impact_engine.js");
14
14
  const plan_builder_js_1 = require("./engine/plan_builder.js");
15
15
  const git_js_1 = require("./agent/git.js");
16
+ const feedback_js_1 = require("./agent/feedback.js");
16
17
  const diff_loader_js_1 = require("./engine/diff_loader.js");
17
18
  const ai_enrichment_js_1 = require("./engine/ai_enrichment.js");
18
19
  const anthropic_provider_js_1 = require("./anthropic_provider.js");
@@ -60,7 +61,8 @@ function recommendTestsDeterministic(options = {}) {
60
61
  testsRoot: reportRoot,
61
62
  routeFamilies: config.routeFamilies,
62
63
  });
63
- const plan = (0, plan_builder_js_1.buildPlanFromImpact)(impact, config.policy);
64
+ const adaptive = (0, feedback_js_1.getAdaptiveThresholds)(reportRoot);
65
+ const plan = (0, plan_builder_js_1.buildPlanFromImpact)(impact, config.policy, undefined, adaptive);
64
66
  const planPath = (0, plan_builder_js_1.writePlanReport)(reportRoot, plan);
65
67
  const ciSummaryMarkdown = (0, plan_builder_js_1.renderCiSummaryMarkdown)(plan);
66
68
  const ciSummaryPath = (0, plan_builder_js_1.writeCiSummary)(reportRoot, ciSummaryMarkdown);
@@ -106,7 +108,8 @@ async function recommendTestsAI(options = {}) {
106
108
  specDetails: [...specDetailsMap.values()],
107
109
  });
108
110
  }
109
- const plan = (0, plan_builder_js_1.buildPlanFromImpact)(impact, config.policy, aiEnrichment);
111
+ const adaptive = (0, feedback_js_1.getAdaptiveThresholds)(reportRoot);
112
+ const plan = (0, plan_builder_js_1.buildPlanFromImpact)(impact, config.policy, aiEnrichment, adaptive);
110
113
  const planPath = (0, plan_builder_js_1.writePlanReport)(reportRoot, plan);
111
114
  const ciSummaryMarkdown = (0, plan_builder_js_1.renderCiSummaryMarkdown)(plan);
112
115
  const ciSummaryPath = (0, plan_builder_js_1.writeCiSummary)(reportRoot, ciSummaryMarkdown);
@@ -1,9 +1,10 @@
1
1
  import type { PolicyConfig } from '../agent/config.js';
2
2
  import type { ImpactResult } from './impact_engine.js';
3
3
  import type { AIEnrichmentResult } from './ai_enrichment.js';
4
+ import type { AdaptiveThresholds } from '../agent/feedback.js';
4
5
  import type { PlanReport, GapDetail, CoveredFlowSummary } from '../agent/plan.js';
5
6
  export type { PlanReport, GapDetail, CoveredFlowSummary };
6
- export declare function buildPlanFromImpact(impact: ImpactResult, policyOverride?: Partial<PolicyConfig>, aiEnrichment?: AIEnrichmentResult): PlanReport;
7
+ export declare function buildPlanFromImpact(impact: ImpactResult, policyOverride?: Partial<PolicyConfig>, aiEnrichment?: AIEnrichmentResult, adaptiveThresholds?: AdaptiveThresholds): PlanReport;
7
8
  export declare function writePlanReport(appRoot: string, plan: PlanReport): string;
8
9
  export declare function renderCiSummaryMarkdown(plan: PlanReport): string;
9
10
  export declare function writeCiSummary(appRoot: string, markdown: string, relativePath?: string): string;
@@ -1 +1 @@
1
- {"version":3,"file":"plan_builder.d.ts","sourceRoot":"","sources":["../../src/engine/plan_builder.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,oBAAoB,CAAC;AACrD,OAAO,KAAK,EAAC,YAAY,EAAkB,MAAM,oBAAoB,CAAC;AAEtE,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,oBAAoB,CAAC;AAG3D,OAAO,KAAK,EACR,UAAU,EACV,SAAS,EACT,kBAAkB,EAIrB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EAAC,UAAU,EAAE,SAAS,EAAE,kBAAkB,EAAC,CAAC;AAiPxD,wBAAgB,mBAAmB,CAC/B,MAAM,EAAE,YAAY,EACpB,cAAc,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,EACtC,YAAY,CAAC,EAAE,kBAAkB,GAClC,UAAU,CA2IZ;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,MAAM,CAMzE;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAwHhE;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,SAAiC,GAAG,MAAM,CAMvH"}
1
+ {"version":3,"file":"plan_builder.d.ts","sourceRoot":"","sources":["../../src/engine/plan_builder.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,oBAAoB,CAAC;AAErD,OAAO,KAAK,EAAC,YAAY,EAAkB,MAAM,oBAAoB,CAAC;AAEtE,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,oBAAoB,CAAC;AAC3D,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,sBAAsB,CAAC;AAG7D,OAAO,KAAK,EACR,UAAU,EACV,SAAS,EACT,kBAAkB,EAIrB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EAAC,UAAU,EAAE,SAAS,EAAE,kBAAkB,EAAC,CAAC;AAoPxD,wBAAgB,mBAAmB,CAC/B,MAAM,EAAE,YAAY,EACpB,cAAc,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,EACtC,YAAY,CAAC,EAAE,kBAAkB,EACjC,kBAAkB,CAAC,EAAE,kBAAkB,GACxC,UAAU,CAsJZ;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,MAAM,CAMzE;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAwHhE;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,SAAiC,GAAG,MAAM,CAMvH"}
@@ -9,6 +9,7 @@ exports.writeCiSummary = writeCiSummary;
9
9
  const fs_1 = require("fs");
10
10
  const path_1 = require("path");
11
11
  const minimatch_1 = require("minimatch");
12
+ const test_path_js_1 = require("../agent/test_path.js");
12
13
  const impact_engine_js_1 = require("./impact_engine.js");
13
14
  const DEFAULT_POLICY = {
14
15
  minConfidenceForTargeted: 60,
@@ -196,22 +197,34 @@ function evaluateEnforcement(decision, policy) {
196
197
  }
197
198
  /**
198
199
  * Build recommended test list from impacted features' Playwright specs.
200
+ * When alwaysIncludeSubsystems is provided, specs from those subsystems are
201
+ * included regardless of their coverage status (blind-spot protection).
199
202
  */
200
- function buildRecommendedTests(impact) {
201
- const tests = [];
203
+ function buildRecommendedTests(impact, alwaysIncludeSubsystems = []) {
204
+ const tests = new Set();
205
+ const alwaysSet = new Set(alwaysIncludeSubsystems);
202
206
  for (const feature of impact.impactedFeatures) {
203
- if (feature.coverageStatus !== 'uncovered') {
207
+ const shouldInclude = feature.coverageStatus !== 'uncovered' ||
208
+ feature.playwrightSpecs.some((spec) => alwaysSet.has((0, test_path_js_1.inferSubsystemFromTestPath)(spec)));
209
+ if (shouldInclude) {
204
210
  for (const spec of feature.playwrightSpecs) {
205
- if (!tests.includes(spec)) {
206
- tests.push(spec);
207
- }
211
+ tests.add(spec);
208
212
  }
209
213
  }
210
214
  }
211
- return tests;
215
+ return [...tests];
212
216
  }
213
- function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
217
+ function buildPlanFromImpact(impact, policyOverride, aiEnrichment, adaptiveThresholds) {
214
218
  const policy = { ...DEFAULT_POLICY, ...(policyOverride || {}) };
219
+ // Apply adaptive calibration overrides (if available and not explicitly overridden)
220
+ if (adaptiveThresholds && policyOverride?.minConfidenceForTargeted === undefined) {
221
+ policy.minConfidenceForTargeted = adaptiveThresholds.minConfidenceForTargeted;
222
+ }
223
+ if (adaptiveThresholds && policyOverride?.safeMergeMinConfidence === undefined) {
224
+ policy.safeMergeMinConfidence = adaptiveThresholds.safeMergeMinConfidence;
225
+ }
226
+ // Apply alwaysIncludeSubsystems: force their tests into the recommended set
227
+ const alwaysIncludeSubsystems = adaptiveThresholds?.alwaysIncludeSubsystems ?? [];
215
228
  const confidence = computeConfidence(impact);
216
229
  const runSetResult = pickRunSet(impact, confidence, policy);
217
230
  const decision = buildDecision(impact, runSetResult.runSet, confidence, policy);
@@ -294,7 +307,7 @@ function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
294
307
  advisoryScenarios,
295
308
  };
296
309
  });
297
- const recommendedTests = buildRecommendedTests(impact);
310
+ const recommendedTests = buildRecommendedTests(impact, alwaysIncludeSubsystems);
298
311
  const requiredNewTests = gaps.map((f) => `${featureLabel(f)}: Add E2E tests`);
299
312
  const p0 = impact.impactedFeatures.filter((f) => f.priority === 'P0').length;
300
313
  const p1 = impact.impactedFeatures.filter((f) => f.priority === 'P1').length;
@@ -251,6 +251,67 @@ export function appendFeedbackAndRecompute(appRoot, input) {
251
251
  export function readCalibration(appRoot) {
252
252
  return readJson(join(appRoot, '.e2e-ai-agents', 'calibration.json'));
253
253
  }
254
+ const DEFAULT_MIN_CONFIDENCE = 60;
255
+ const DEFAULT_SAFE_MERGE = 85;
256
+ const MIN_CONFIDENCE_FLOOR = 40;
257
+ const MIN_CONFIDENCE_CEILING = 80;
258
+ /**
259
+ * Compute adaptive thresholds based on calibration data.
260
+ * - If recent recall < 0.8: lower minConfidence (catch more escapes)
261
+ * - If recent precision > 0.9: raise minConfidence (fewer unnecessary tests)
262
+ * - Per-subsystem: if falseNegativeRate > 0.3 in 30d, always include tests
263
+ * Returns defaults if no calibration data exists.
264
+ */
265
+ export function getAdaptiveThresholds(appRoot) {
266
+ const calibration = readCalibration(appRoot);
267
+ const reasons = [];
268
+ const alwaysInclude = [];
269
+ if (!calibration || calibration.samples === 0) {
270
+ return {
271
+ minConfidenceForTargeted: DEFAULT_MIN_CONFIDENCE,
272
+ safeMergeMinConfidence: DEFAULT_SAFE_MERGE,
273
+ alwaysIncludeSubsystems: [],
274
+ adjustmentReasons: ['No calibration data — using defaults'],
275
+ };
276
+ }
277
+ let minConfidence = DEFAULT_MIN_CONFIDENCE;
278
+ let safeMerge = DEFAULT_SAFE_MERGE;
279
+ // Adjust based on 7-day recall
280
+ if (calibration.recent7d.samples >= 3) {
281
+ if (calibration.recent7d.recall < 0.8) {
282
+ const adjustment = 10;
283
+ minConfidence -= adjustment;
284
+ safeMerge -= adjustment;
285
+ reasons.push(`Lowering confidence threshold by ${adjustment} (7d recall: ${calibration.recent7d.recall.toFixed(2)})`);
286
+ }
287
+ else if (calibration.recent7d.precision > 0.9) {
288
+ const adjustment = 5;
289
+ minConfidence += adjustment;
290
+ safeMerge += adjustment;
291
+ reasons.push(`Raising confidence threshold by ${adjustment} (7d precision: ${calibration.recent7d.precision.toFixed(2)})`);
292
+ }
293
+ }
294
+ // Clamp to safe ranges
295
+ minConfidence = Math.max(MIN_CONFIDENCE_FLOOR, Math.min(MIN_CONFIDENCE_CEILING, minConfidence));
296
+ safeMerge = Math.max(70, Math.min(95, safeMerge));
297
+ // Per-subsystem blind spot detection (30-day window)
298
+ for (const [subsystem, metrics] of Object.entries(calibration.bySubsystem)) {
299
+ const recent = metrics.recent30d;
300
+ if (recent.samples >= 3 && recent.falseNegativeRate > 0.3) {
301
+ alwaysInclude.push(subsystem);
302
+ reasons.push(`Always including ${subsystem} tests (30d false-negative rate: ${recent.falseNegativeRate.toFixed(2)})`);
303
+ }
304
+ }
305
+ if (reasons.length === 0) {
306
+ reasons.push('Calibration data within normal range — using defaults');
307
+ }
308
+ return {
309
+ minConfidenceForTargeted: minConfidence,
310
+ safeMergeMinConfidence: safeMerge,
311
+ alwaysIncludeSubsystems: alwaysInclude,
312
+ adjustmentReasons: reasons,
313
+ };
314
+ }
254
315
  export function readFlakyTests(appRoot) {
255
316
  return readJson(join(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
256
317
  }
@@ -22,12 +22,12 @@ export function summarizeCommandOutput(stdout, stderr) {
22
22
  const lines = combined.split('\n').slice(-20);
23
23
  return lines.join('\n').slice(0, 2000);
24
24
  }
25
- export function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000) {
25
+ export function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000, envOverride) {
26
26
  // When spawning `claude`, unset CLAUDECODE so nested invocations are allowed.
27
27
  // Claude Code sets this variable to block nested sessions; child processes
28
28
  // that spawn their own claude instance must run without it.
29
- let env;
30
- if (command === 'claude') {
29
+ let env = envOverride;
30
+ if (!env && command === 'claude') {
31
31
  const { CLAUDECODE: _, ...rest } = process.env;
32
32
  env = rest;
33
33
  }
package/dist/esm/api.js CHANGED
@@ -5,6 +5,7 @@ import { appendPlanMetrics, } from './agent/plan.js';
5
5
  import { analyzeImpact as analyzeImpactV2 } from './engine/impact_engine.js';
6
6
  import { buildPlanFromImpact, renderCiSummaryMarkdown, writeCiSummary, writePlanReport, } from './engine/plan_builder.js';
7
7
  import { getChangedFiles } from './agent/git.js';
8
+ import { getAdaptiveThresholds } from './agent/feedback.js';
8
9
  import { loadDiffs } from './engine/diff_loader.js';
9
10
  import { enrichImpactWithAI } from './engine/ai_enrichment.js';
10
11
  import { AnthropicProvider } from './anthropic_provider.js';
@@ -52,7 +53,8 @@ export function recommendTestsDeterministic(options = {}) {
52
53
  testsRoot: reportRoot,
53
54
  routeFamilies: config.routeFamilies,
54
55
  });
55
- const plan = buildPlanFromImpact(impact, config.policy);
56
+ const adaptive = getAdaptiveThresholds(reportRoot);
57
+ const plan = buildPlanFromImpact(impact, config.policy, undefined, adaptive);
56
58
  const planPath = writePlanReport(reportRoot, plan);
57
59
  const ciSummaryMarkdown = renderCiSummaryMarkdown(plan);
58
60
  const ciSummaryPath = writeCiSummary(reportRoot, ciSummaryMarkdown);
@@ -98,7 +100,8 @@ export async function recommendTestsAI(options = {}) {
98
100
  specDetails: [...specDetailsMap.values()],
99
101
  });
100
102
  }
101
- const plan = buildPlanFromImpact(impact, config.policy, aiEnrichment);
103
+ const adaptive = getAdaptiveThresholds(reportRoot);
104
+ const plan = buildPlanFromImpact(impact, config.policy, aiEnrichment, adaptive);
102
105
  const planPath = writePlanReport(reportRoot, plan);
103
106
  const ciSummaryMarkdown = renderCiSummaryMarkdown(plan);
104
107
  const ciSummaryPath = writeCiSummary(reportRoot, ciSummaryMarkdown);
@@ -3,6 +3,7 @@
3
3
  import { mkdirSync, writeFileSync } from 'fs';
4
4
  import { dirname, join } from 'path';
5
5
  import { minimatch } from 'minimatch';
6
+ import { inferSubsystemFromTestPath } from '../agent/test_path.js';
6
7
  import { getGaps, getPartialGaps } from './impact_engine.js';
7
8
  const DEFAULT_POLICY = {
8
9
  minConfidenceForTargeted: 60,
@@ -190,22 +191,34 @@ function evaluateEnforcement(decision, policy) {
190
191
  }
191
192
  /**
192
193
  * Build recommended test list from impacted features' Playwright specs.
194
+ * When alwaysIncludeSubsystems is provided, specs from those subsystems are
195
+ * included regardless of their coverage status (blind-spot protection).
193
196
  */
194
- function buildRecommendedTests(impact) {
195
- const tests = [];
197
+ function buildRecommendedTests(impact, alwaysIncludeSubsystems = []) {
198
+ const tests = new Set();
199
+ const alwaysSet = new Set(alwaysIncludeSubsystems);
196
200
  for (const feature of impact.impactedFeatures) {
197
- if (feature.coverageStatus !== 'uncovered') {
201
+ const shouldInclude = feature.coverageStatus !== 'uncovered' ||
202
+ feature.playwrightSpecs.some((spec) => alwaysSet.has(inferSubsystemFromTestPath(spec)));
203
+ if (shouldInclude) {
198
204
  for (const spec of feature.playwrightSpecs) {
199
- if (!tests.includes(spec)) {
200
- tests.push(spec);
201
- }
205
+ tests.add(spec);
202
206
  }
203
207
  }
204
208
  }
205
- return tests;
209
+ return [...tests];
206
210
  }
207
- export function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
211
+ export function buildPlanFromImpact(impact, policyOverride, aiEnrichment, adaptiveThresholds) {
208
212
  const policy = { ...DEFAULT_POLICY, ...(policyOverride || {}) };
213
+ // Apply adaptive calibration overrides (if available and not explicitly overridden)
214
+ if (adaptiveThresholds && policyOverride?.minConfidenceForTargeted === undefined) {
215
+ policy.minConfidenceForTargeted = adaptiveThresholds.minConfidenceForTargeted;
216
+ }
217
+ if (adaptiveThresholds && policyOverride?.safeMergeMinConfidence === undefined) {
218
+ policy.safeMergeMinConfidence = adaptiveThresholds.safeMergeMinConfidence;
219
+ }
220
+ // Apply alwaysIncludeSubsystems: force their tests into the recommended set
221
+ const alwaysIncludeSubsystems = adaptiveThresholds?.alwaysIncludeSubsystems ?? [];
209
222
  const confidence = computeConfidence(impact);
210
223
  const runSetResult = pickRunSet(impact, confidence, policy);
211
224
  const decision = buildDecision(impact, runSetResult.runSet, confidence, policy);
@@ -288,7 +301,7 @@ export function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
288
301
  advisoryScenarios,
289
302
  };
290
303
  });
291
- const recommendedTests = buildRecommendedTests(impact);
304
+ const recommendedTests = buildRecommendedTests(impact, alwaysIncludeSubsystems);
292
305
  const requiredNewTests = gaps.map((f) => `${featureLabel(f)}: Add E2E tests`);
293
306
  const p0 = impact.impactedFeatures.filter((f) => f.priority === 'P0').length;
294
307
  const p1 = impact.impactedFeatures.filter((f) => f.priority === 'P1').length;
package/dist/esm/index.js CHANGED
@@ -14,7 +14,7 @@ export { analyzeImpactDeterministic, recommendTestsDeterministic, handoffGenerat
14
14
  export { analyzeImpact as analyzeImpactV2, getGaps, getPartialGaps } from './engine/impact_engine.js';
15
15
  export { extractScenarios } from './engine/impact_engine.js';
16
16
  export { buildPlanFromImpact } from './engine/plan_builder.js';
17
- export { appendFeedbackAndRecompute, readCalibration, readFlakyTests } from './agent/feedback.js';
17
+ export { appendFeedbackAndRecompute, readCalibration, readFlakyTests, getAdaptiveThresholds } from './agent/feedback.js';
18
18
  export { finalizeGeneratedTests } from './agent/handoff.js';
19
19
  export { ingestTraceabilityInput } from './agent/traceability_ingest.js';
20
20
  export { captureTraceabilityInput } from './agent/traceability_capture.js';
@@ -0,0 +1,75 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ import { resolve } from 'path';
4
+ import { runCommand } from '../agent/process_runner.js';
5
+ /** Env var prefixes/names stripped when running LLM-generated specs */
6
+ const SENSITIVE_ENV_PREFIXES = [
7
+ 'AWS_', 'AZURE_', 'GCP_', 'GOOGLE_', 'ANTHROPIC_', 'OPENAI_',
8
+ 'GITHUB_TOKEN', 'NPM_TOKEN', 'SSH_', 'SECRET_', 'PRIVATE_',
9
+ 'DATABASE_URL', 'DB_', 'REDIS_', 'POSTGRES_', 'MYSQL_', 'MONGO_',
10
+ 'API_KEY', 'API_SECRET', 'AUTH_', 'JWT_', 'STRIPE_', 'TWILIO_',
11
+ 'SENDGRID_', 'SLACK_TOKEN', 'SLACK_BOT', 'MATTERMOST_',
12
+ ];
13
+ /**
14
+ * Build a restricted environment for running LLM-generated spec files.
15
+ * Strips credentials and secrets to limit damage from malicious generated code.
16
+ */
17
+ function buildRestrictedEnv() {
18
+ const env = {};
19
+ for (const [key, value] of Object.entries(process.env)) {
20
+ const isSensitive = SENSITIVE_ENV_PREFIXES.some((prefix) => key.startsWith(prefix));
21
+ if (!isSensitive) {
22
+ env[key] = value;
23
+ }
24
+ }
25
+ return env;
26
+ }
27
+ /**
28
+ * Validate and normalize a spec path to prevent argument injection.
29
+ * Rejects raw input that starts with '-' (could be interpreted as flags by tsc/playwright).
30
+ */
31
+ function sanitizeSpecPath(specPath) {
32
+ if (specPath.startsWith('-')) {
33
+ throw new Error(`Invalid spec path: "${specPath}" — path must not start with a dash`);
34
+ }
35
+ return resolve(specPath);
36
+ }
37
+ /**
38
+ * Compile-check a generated spec file using tsc --noEmit.
39
+ * Returns success: true if compilation succeeds, or errors array on failure.
40
+ */
41
+ export function compileCheckSpec(specPath, testsRoot) {
42
+ const safeSpecPath = sanitizeSpecPath(specPath);
43
+ const result = runCommand('npx', ['tsc', '--noEmit', '--esModuleInterop', '--resolveJsonModule', '--moduleResolution', 'node', '--target', 'ES2020', safeSpecPath], testsRoot, 30000, buildRestrictedEnv());
44
+ if (result.status === 0) {
45
+ return { success: true, errors: [] };
46
+ }
47
+ const output = [result.stdout, result.stderr].filter(Boolean).join('\n');
48
+ const errorLines = output.split('\n')
49
+ .filter((l) => l.includes('error TS') || l.includes('Error:'))
50
+ .slice(0, 10);
51
+ return {
52
+ success: false,
53
+ errors: errorLines.length > 0 ? errorLines : [output.slice(0, 500) || 'Compilation failed'],
54
+ };
55
+ }
56
+ /**
57
+ * Smoke-run a generated spec against a running app.
58
+ * Runs in a restricted environment with sensitive env vars stripped.
59
+ * Returns success: true if the test passes with retries.
60
+ */
61
+ export function smokeRunSpec(specPath, testsRoot, playwrightBinary) {
62
+ const safeSpecPath = sanitizeSpecPath(specPath);
63
+ const result = runCommand(playwrightBinary, ['test', safeSpecPath, '--retries', '2', '--reporter', 'list'], testsRoot, 120000, buildRestrictedEnv());
64
+ if (result.status === 0) {
65
+ return { success: true };
66
+ }
67
+ const output = [result.stdout, result.stderr].filter(Boolean).join('\n');
68
+ const errorLines = output.split('\n')
69
+ .filter((l) => l.includes('Error') || l.includes('FAILED') || l.includes('Timeout'))
70
+ .slice(0, 5);
71
+ return {
72
+ success: false,
73
+ error: errorLines.join('\n') || result.error || 'Smoke run failed',
74
+ };
75
+ }